;* ======================================================================== *; ;* TEXAS INSTRUMENTS, INC. *; ;* *; ;* DSPLIB DSP Signal Processing Library *; ;* *; ;* Release: Version 1.02 *; ;* CVS Revision: 1.8 Fri Mar 22 02:06:40 2002 (UTC) *; ;* Snapshot date: 18-Apr-2002 *; ;* *; ;* This library contains proprietary intellectual property of Texas *; ;* Instruments, Inc. The library and its source code are protected by *; ;* various copyrights, and portions may also be protected by patents or *; ;* other legal protections. *; ;* *; ;* This software is licensed for use with Texas Instruments TMS320 *; ;* family DSPs. This license was provided to you prior to installing *; ;* the software. You may review this license by consulting the file *; ;* TI_license.PDF which accompanies the files in this library. *; ;* ------------------------------------------------------------------------ *; ;* Copyright (C) 2002 Texas Instruments, Incorporated. *; ;* All Rights Reserved. *; ;* ======================================================================== *; * ========================================================================= * * * * TEXAS INSTRUMENTS, INC. * * * * NAME * * DSP_fft16x16r * * * * * * REVISION DATE * * 12-Sep-2000 * * * * USAGE * * This routine is C-callable and can be called as: * * * * void DSP_fft16x16r * * ( * * int N, * * short *x, * * short *w, * * unsigned char *brev, * * short *y, * * int n_min, * * int offset, * * int nmax * * ); * * * * N : Length of fft in complex samples, power of 2 <=16384 * * x : Pointer to complex data input * * w : Pointer to complex twiddle factor (see below) * * brev : Pointer to bit reverse table containing 64 entries * * y : Pointer to complex data output * * n_min : Smallest fft butterfly used in computation * * used for decomposing fft into subffts, see notes * * offset : Index in complex samples of sub-fft from start of main * * fft * * nmax : Size of main fft in complex samples * * * * DESCRIPTION * * The benchmark performs a mixed radix forward FFT using a special * * sequence of coefficients generated in the following way: * * * * void tw_gen(short *w, int N) * * { * * int j, k; * * double x_t, y_t, theta1, theta2, theta3; * * const double PI = 3.141592654, M = 32767.0; * * /* M is 16383 for scale by 4 */ * * * * for (j=1, k=0; j <= N>>2; j = j<<2) * * { * * for (i=0; i < N>>2; i+=j) * * { * * theta1 = 2*PI*i/N; * * x_t = M*cos(theta1); * * y_t = M*sin(theta1); * * w[k] = (short)x_t; * * w[k+1] = (short)y_t; * * * * theta2 = 4*PI*i/N; * * x_t = M*cos(theta2); * * y_t = M*sin(theta2); * * w[k+2] = (short)x_t; * * w[k+3] = (short)y_t; * * * * theta3 = 6*PI*i/N; * * x_t = M*cos(theta3); * * y_t = M*sin(theta3); * * w[k+4] = (short)x_t; * * w[k+5] = (short)y_t; * * k+=6; * * } * * } * * } * * * * This redundent set of twiddle factors is size 2*N short samples. * * As pointed out later dividing these twiddle factors by 2 will give * * an effective divide by 4 at each stage to guarentee no overflow. * * The function is accurate to about 68dB of signal to noise ratio to * * the DFT function below: * * * * void dft(int n, short x[], short y[]) * * { * * int k,i, index; * * const double PI = 3.14159654; * * short *p_x; * * double arg, fx_0, fx_1, fy_0, fy_1, co, si; * * * * for(k = 0; k radix) * * { * * j = 0; * * fft_jmp = stride + (stride>>1); * * h2 = stride>>1; * * l1 = stride; * * l2 = stride + (stride>>1); * * x = ptr_x; * * w = ptr_w + tw_offset; * * * * for (i = 0; i < n>>1; i += 2) * * { * * co1 = w[j+0]; * * si1 = w[j+1]; * * co2 = w[j+2]; * * si2 = w[j+3]; * * co3 = w[j+4]; * * si3 = w[j+5]; * * j += 6; * * * * x_0 = x[0]; * * x_1 = x[1]; * * x_h2 = x[h2]; * * x_h2p1 = x[h2+1]; * * x_l1 = x[l1]; * * x_l1p1 = x[l1+1]; * * x_l2 = x[l2]; * * x_l2p1 = x[l2+1]; * * * * xh0 = x_0 + x_l1; * * xh1 = x_1 + x_l1p1; * * xl0 = x_0 - x_l1; * * xl1 = x_1 - x_l1p1; * * * * xh20 = x_h2 + x_l2; * * xh21 = x_h2p1 + x_l2p1; * * xl20 = x_h2 - x_l2; * * xl21 = x_h2p1 - x_l2p1; * * * * ptr_x0 = x; * * ptr_x0[0] = ((short)(xh0 + xh20))>>1; * * ptr_x0[1] = ((short)(xh1 + xh21))>>1; * * * * ptr_x2 = ptr_x0; * * x += 2; * * predj = (j - fft_jmp); * * if (!predj) x += fft_jmp; * * if (!predj) j = 0; * * * * xt0 = xh0 - xh20; * * yt0 = xh1 - xh21; * * xt1 = xl0 + xl21; * * yt2 = xl1 + xl20; * * xt2 = xl0 - xl21; * * yt1 = xl1 - xl20; * * * * l1p1 = l1+1; * * h2p1 = h2+1; * * l2p1 = l2+1; * * * * ptr_x2[l1 ] = (xt1 * co1 + yt1 * si1 * * + 0x00008000) >> 16; * * ptr_x2[l1p1] = (yt1 * co1 - xt1 * si1 * * + 0x00008000) >> 16; * * ptr_x2[h2 ] = (xt0 * co2 + yt0 * si2 * * + 0x00008000) >> 16; * * ptr_x2[h2p1] = (yt0 * co2 - xt0 * si2 * * + 0x00008000) >> 16; * * ptr_x2[l2 ] = (xt2 * co3 + yt2 * si3 * * + 0x00008000) >> 16; * * ptr_x2[l2p1] = (yt2 * co3 - xt2 * si3 * * + 0x00008000) >> 16; * * } * * * * tw_offset += fft_jmp; * * stride = stride>>2; * * } /* end while */ * * * * j = offset>>2; * * ptr_x0 = ptr_x; * * y0 = y; * * * * /* determine l0 = _norm(nmax) - 17 */ * * l0 = 31; * * if (((nmax>>31)&1)==1) * * num = ~nmax; * * else * * num = nmax; * * if (!num) * * l0 = 32; * * else * * { * * a=num&0xFFFF0000; if (a) { l0-=16; num=a; } * * a=num&0xFF00FF00; if (a) { l0-= 8; num=a; } * * a=num&0xF0F0F0F0; if (a) { l0-= 4; num=a; } * * a=num&0xCCCCCCCC; if (a) { l0-= 2; num=a; } * * a=num&0xAAAAAAAA; if (a) { l0-= 1; } * * } * * l0 -= 1; * * * * l0 -= 17; * * * * if(radix == 2 || radix == 4) * * for (i = 0; i < n; i += 4) * * { * * /* reversal computation */ * * * * j0 = (j ) & 0x3F; * * j1 = (j >> 6) & 0x3F; * * k0 = brev[j0]; * * k1 = brev[j1]; * * k = (k0 << 6) | k1; * * if (l0 < 0) * * k = k << -l0; * * else * * k = k >> l0; * * j++; /* multiple of 4 index */ * * * * x0 = ptr_x0[0]; x1 = ptr_x0[1]; * * x2 = ptr_x0[2]; x3 = ptr_x0[3]; * * x4 = ptr_x0[4]; x5 = ptr_x0[5]; * * x6 = ptr_x0[6]; x7 = ptr_x0[7]; * * ptr_x0 += 8; * * * * xh0_0 = x0 + x4; * * xh1_0 = x1 + x5; * * xh0_1 = x2 + x6; * * xh1_1 = x3 + x7; * * * * if (radix == 2) * * { * * xh0_0 = x0; * * xh1_0 = x1; * * xh0_1 = x2; * * xh1_1 = x3; * * } * * * * yt0 = xh0_0 + xh0_1; * * yt1 = xh1_0 + xh1_1; * * yt4 = xh0_0 - xh0_1; * * yt5 = xh1_0 - xh1_1; * * * * xl0_0 = x0 - x4; * * xl1_0 = x1 - x5; * * xl0_1 = x2 - x6; * * xl1_1 = x3 - x7; * * * * if (radix == 2) * * { * * xl0_0 = x4; * * xl1_0 = x5; * * xl1_1 = x6; * * xl0_1 = x7; * * } * * * * yt2 = xl0_0 + xl1_1; * * yt3 = xl1_0 - xl0_1; * * * * yt6 = xl0_0 - xl1_1; * * yt7 = xl1_0 + xl0_1; * * * * if (radix == 2) * * { * * yt7 = xl1_0 - xl0_1; * * yt3 = xl1_0 + xl0_1; * * } * * * * y0[k] = yt0; y0[k+1] = yt1; * * k += n>>1; * * y0[k] = yt2; y0[k+1] = yt3; * * k += n>>1; * * y0[k] = yt4; y0[k+1] = yt5; * * k += n>>1; * * y0[k] = yt6; y0[k+1] = yt7; * * } * * } * * * * CYCLES * * 2.5 * N * ceil(log4(N)) - N/2 + 164 * * * * For N = 1024: cycles = 12452 * * For N = 512: cycles = 6308 * * For N = 256: cycles = 2568 * * * * CODESIZE * * 1344 bytes * * * * REFERENCES * * [1] C. S. Burrus and T.W. Parks (1985) "DFT/FFT and Convolution * * Algos - Theory and Implementation", J. Wiley. * * [2] Implementation of Various Precision Fast Fourier Transforms on * * the TMS320C6400 processor - DJH, ESC 2000 * * [3] Burrus - Rice University and Papamichalis - TI (1988) - Paper * * on the convertion of radix4 to radix2 digit reversal. * * * * ------------------------------------------------------------------------- * * Copyright (c) 2002 Texas Instruments, Incorporated. * * All Rights Reserved. * * ========================================================================= * .global _DSP_fft16x16r * ========================================================================= * * End of file: dsp_fft16x16r.h62 * * ------------------------------------------------------------------------- * * Copyright (c) 2002 Texas Instruments, Incorporated. * * All Rights Reserved. * * ========================================================================= *