;* ======================================================================== *; ;* TEXAS INSTRUMENTS, INC. *; ;* *; ;* DSPLIB DSP Signal Processing Library *; ;* *; ;* Release: Version 1.02 *; ;* CVS Revision: 1.1 Fri Dec 7 02:42:02 2001 (UTC) *; ;* Snapshot date: 18-Apr-2002 *; ;* *; ;* This library contains proprietary intellectual property of Texas *; ;* Instruments, Inc. The library and its source code are protected by *; ;* various copyrights, and portions may also be protected by patents or *; ;* other legal protections. *; ;* *; ;* This software is licensed for use with Texas Instruments TMS320 *; ;* family DSPs. This license was provided to you prior to installing *; ;* the software. You may review this license by consulting the file *; ;* TI_license.PDF which accompanies the files in this library. *; ;* ------------------------------------------------------------------------ *; ;* Copyright (C) 2002 Texas Instruments, Incorporated. *; ;* All Rights Reserved. *; ;* ======================================================================== *; * ========================================================================= * * TEXAS INSTRUMENTS INC. * * * * NAME * * Complex FFT (Radix 4) * * * * REVISION DATE * * 06-Dec-2001 * * * * USAGE * * This routine is C callable and can the called as * * * * void DSP_r4fft(int n, short *restrict x, const short *restrict w); * * * * n --- FFT size (power of 4) (input) * * x[] --- input and output sequences (dim-n) (input/output) * * w[] --- FFT coefficients (dim-n) (input) * * * * If the routine is not to be used as a C callable function, * * then all instructions relating to stack should be removed. * * Refer to comments of individual instructions. You will also * * need to initialize values for all the values passed as these * * are assumed to be in registers as defined by the calling * * convention of the compiler, (refer to the C compiler reference * * guide.) * * * * C CODE * * * * This is the C equivalent of the Assembly Code without the * * assumptions listed below. Note that the assembly code is hand * * optimized and assumptions apply. * * * * SOURCE:Burrus, Parks p .113 * * * * void DSP_r4fft(int n, short *restrict x, const short *restrict w) * * { * * int n1, n2, ie, ia1, ia2, ia3, i0, i1, i2, i3, j, k; * * short t, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3; * * * * n2 = n; * * ie = 1; * * for (k = n; k > 1; k >>= 2) * * { * * n1 = n2; * * n2 >>= 2; * * ia1 = 0; * * for (j = 0; j < n2; j++) * * { * * ia2 = ia1 + ia1; * * ia3 = ia2 + ia1; * * co1 = w[ia1 * 2 + 1]; * * si1 = w[ia1 * 2]; * * co2 = w[ia2 * 2 + 1]; * * si2 = w[ia2 * 2]; * * co3 = w[ia3 * 2 + 1]; * * si3 = w[ia3 * 2]; * * ia1 = ia1 + ie; * * for (i0 = j; i0 < n; i0 += n1) * * { * * i1 = i0 + n2; * * i2 = i1 + n2; * * i3 = i2 + n2; * * r1 = x[2 * i0] + x[2 * i2]; * * r2 = x[2 * i0] - x[2 * i2]; * * t = x[2 * i1] + x[2 * i3]; * * x[2 * i0] = r1 + t; * * r1 = r1 - t; * * s1 = x[2 * i0 + 1] + x[2 * i2 + 1]; * * s2 = x[2 * i0 + 1] - x[2 * i2 + 1]; * * t = x[2 * i1 + 1] + x[2 * i3 + 1]; * * x[2 * i0 + 1] = s1 + t; * * s1 = s1 - t; * * x[2 * i2] = (r1 * co2 + s1 * si2) >> 15; * * x[2 * i2 + 1] = (s1 * co2-r1 * si2)>>15; * * t = x[2 * i1 + 1] - x[2 * i3 + 1]; * * r1 = r2 + t; * * r2 = r2 - t; * * t = x[2 * i1] - x[2 * i3]; * * s1 = s2 - t; * * s2 = s2 + t; * * x[2 * i1] = (r1 * co1 + s1 * si1) >>15; * * x[2 * i1 + 1] = (s1 * co1-r1 * si1)>>15; * * x[2 * i3] = (r2 * co3 + s2 * si3) >>15; * * x[2 * i3 + 1] = (s2 * co3-r2 * si3)>>15; * * } * * } * * ie <<= 2; * * } * * } * * * * DESCRIPTION * * * * This routine is used to compute FFT of a complex sequece * * of size n, a power of 4, with "decimation-in-frequency * * decomposition" method. The output is in digit-reversed * * order. Each complex value is with interleaved 16-bit real * * and imaginary parts. * * * * TECHNIQUES * * 1. Loading input x as well as coefficient w in word. * * 2. Both loops j and i0 shown in the C code are placed in the * * INNERLOOP of the assembly code. * * * * ASSUMPTIONS * * 4 <= n <= 65536 * * x is aligned on a 4*N Byte (N*word) boundary * * w is aligned on an odd word boundary * * x data is stored in the order real[0], image[0], real[1], ... * * w coef is stored in the order k*sin[0*delta], k*cos[0*delta], * * k*sin[1*delta], ... where delta = 2*PI/N, k = 32767 * * * * MEMORY NOTE * * x must be aligned on a 4*N Byte (N*word) boundary for circular * * buffering. w should be aligned on an odd word boundary to * * minimize memory bank hits. There are N/4 memory bank hits total * * * * CYCLES * * LOGBASE4(N) * (10 * N/4 + 29) + 36 + N/4 * * (This includes all function-call overhead, as well as stack * * save and restore overhead.) * * * * CODESIZE * * 736 bytes * * ------------------------------------------------------------------------- * * Copyright (c) 2002 Texas Instruments, Incorporated. * * All Rights Reserved. * * ========================================================================= * .global _DSP_r4fft * ========================================================================= * * End of file: dsp_r4fft.h62 * * ------------------------------------------------------------------------- * * Copyright (c) 2002 Texas Instruments, Incorporated. * * All Rights Reserved. * * ========================================================================= *