-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathffft.S
431 lines (396 loc) · 9.59 KB
/
ffft.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
;-----------------------------------------------------------------------------;
; Fixed-point FFT routines for megaAVRs (C)ChaN, 2005
;-----------------------------------------------------------------------------;
; * This program is opened under license policy of following trems.
;
; Copyright (C) 2005, ChaN, all right reserved.
;
; * This program is a free software and there is NO WARRANTY.
; * No restriction on use. You can use, modify and redistribute it for
; personal, non-profit or commercial use UNDER YOUR RESPONSIBILITY.
; * Redistributions of source code must retain the above copyright notice.
;
;-----------------------------------------------------------------------------;
;
; void fft_input (const int16_t *array_src, complex_t *array_bfly);
; void fft_execute (complex_t *array_bfly);
; void fft_output (complex_t *array_bfly, uint16_t *array_dst);
;
; <array_src>: Wave form to be processed.
; <array_bfly>: Complex array for butterfly operations.
; <array_dst>: Spectrum output buffer.
;
; These functions must be called in sequence to do a DFT in FFT algorithm.
; fft_input() fills the complex array with a wave form to prepare butterfly
; operations. A hamming window is applied at the same time.
; fft_execute() executes the butterfly operations.
; fft_output() re-orders the results, converts the complex spectrum into
; scalar spectrum and output it in linear scale.
;
;----------------------------------------------------------------------------;
; THIS CODE HAS BEEN MODIFIED FROM ITS ORIGINAL FORM - it is pared down
; to handle 64 points only, for the Circuit Playground microphone library.
; Original FFFT code can be found on GitHub.
;----------------------------------------------------------------------------;
.list
#define FFT_N 64
#define FFT_B 6
#define T0L r0
#define T0H r1
#define T2L r2
#define T2H r3
#define T4L r4
#define T4H r5
#define T6L r6
#define T6H r7
#define T8L r8
#define T8H r9
#define T10L r10
#define T10H r11
#define T12L r12
#define T12H r13
#define T14L r14
#define T14H r15
#define AL r16
#define AH r17
#define BL r18
#define BH r19
#define CL r20
#define CH r21
#define DL r22
#define DH r23
#define EL r24
#define EH r25
#define XL r26
#define XH r27
#define YL r28
#define YH r29
#define ZL r30
#define ZH r31
.macro ldiw dh,dl, abs
ldi \dl, lo8(\abs)
ldi \dh, hi8(\abs)
.endm
.macro subiw dh,dl, abs
subi \dl, lo8(\abs)
sbci \dh, hi8(\abs)
.endm
.macro addw dh,dl, sh,sl
add \dl, \sl
adc \dh, \sh
.endm
.macro addd d3,d2,d1,d0, s3,s2,s1,s0
add \d0, \s0
adc \d1, \s1
adc \d2, \s2
adc \d3, \s3
.endm
.macro subw dh,dl, sh,sl
sub \dl, \sl
sbc \dh, \sh
.endm
.macro subd d3,d2,d1,d0, s3,s2,s1,s0
sub \d0, \s0
sbc \d1, \s1
sbc \d2, \s2
sbc \d3, \s3
.endm
.macro lddw dh,dl, src
ldd \dl, \src
ldd \dh, \src+1
.endm
.macro ldw dh,dl, src
ld \dl, \src
ld \dh, \src
.endm
.macro stw dst, sh,sl
st \dst, \sl
st \dst, \sh
.endm
.macro clrw dh, dl
clr \dh
clr \dl
.endm
.macro lsrw dh, dl
lsr \dh
ror \dl
.endm
.macro asrw dh, dl
asr \dh
ror \dl
.endm
.macro lslw dh, dl
lsl \dl
rol \dh
.endm
.macro pushw dh, dl
push \dh
push \dl
.endm
.macro popw dh, dl
pop \dl
pop \dh
.endm
.macro lpmw dh,dl, src
lpm \dl, \src
lpm \dh, \src
.endm
.macro rjne lbl
breq 99f
rjmp \lbl
99:
.endm
.macro FMULS16 d3,d2,d1,d0 ,s1h,s1l, s2h,s2l ;Fractional Multiply (19clk)
fmuls \s1h, \s2h
movw \d2, T0L
fmul \s1l, \s2l
movw \d0, T0L
adc \d2, EH ;EH: zero reg.
fmulsu \s1h, \s2l
sbc \d3, EH
add \d1, T0L
adc \d2, T0H
adc \d3, EH
fmulsu \s2h, \s1l
sbc \d3, EH
add \d1, T0L
adc \d2, T0H
adc \d3, EH
.endm
.macro SQRT32 ; 32bit square root (526..542clk)
clr T6L
clr T6H
clr T8L
clr T8H
ldi BL, 1
ldi BH, 0
clr CL
clr CH
ldi DH, 16
90: lsl T2L
rol T2H
rol T4L
rol T4H
rol T6L
rol T6H
rol T8L
rol T8H
lsl T2L
rol T2H
rol T4L
rol T4H
rol T6L
rol T6H
rol T8L
rol T8H
brpl 91f
add T6L, BL
adc T6H, BH
adc T8L, CL
adc T8H, CH
rjmp 92f
91: sub T6L, BL
sbc T6H, BH
sbc T8L, CL
sbc T8H, CH
92: lsl BL
rol BH
rol CL
andi BL, 0b11111000
ori BL, 0b00000101
sbrc T8H, 7
subi BL, 2
dec DH
brne 90b
lsr CL
ror BH
ror BL
lsr CL
ror BH
ror BL
.endm
;----------------------------------------------------------------------------;
; Constant Tables
.global tbl_window
tbl_window: ; tbl_window[] = ... (This is a Hamming window)
.dc.w 2621, 2693, 2910, 3270, 3768, 4401, 5161, 6042, 7036, 8132, 9320, 10588, 11926, 13318, 14753, 16216
.dc.w 17694, 19171, 20634, 22069, 23462, 24799, 26068, 27256, 28352, 29345, 30226, 30987, 31619, 32117, 32477, 32694
.dc.w 32766, 32694, 32477, 32117, 31619, 30987, 30226, 29345, 28352, 27256, 26068, 24799, 23462, 22069, 20634, 19171
.dc.w 17694, 16216, 14753, 13318, 11926, 10588, 9320, 8132, 7036, 6042, 5161, 4401, 3768, 3270, 2910, 2693
tbl_cos_sin: ; Table of {cos(x),sin(x)}, (0 <= x < pi, in FFT_N/2 steps)
.dc.w 32767, 0, 32609, 3211, 32137, 6392, 31356, 9511, 30272, 12539, 28897, 15446, 27244, 18204, 25329, 20787
.dc.w 23169, 23169, 20787, 25329, 18204, 27244, 15446, 28897, 12539, 30272, 9511, 31356, 6392, 32137, 3211, 32609
.dc.w 0, 32766, -3211, 32609, -6392, 32137, -9511, 31356, -12539, 30272, -15446, 28897, -18204, 27244, -20787, 25329
.dc.w -23169, 23169, -25329, 20787, -27244, 18204, -28897, 15446, -30272, 12539, -31356, 9511, -32137, 6392, -32609, 3211
tbl_bitrev: ; tbl_bitrev[] = ...
.dc.w 0*4, 32*4, 16*4, 48*4, 8*4, 40*4, 24*4, 56*4, 4*4, 36*4, 20*4, 52*4, 12*4, 44*4, 28*4, 60*4
.dc.w 2*4, 34*4, 18*4, 50*4, 10*4, 42*4, 26*4, 58*4, 6*4, 38*4, 22*4, 54*4, 14*4, 46*4, 30*4, 62*4
;----------------------------------------------------------------------------;
.global fft_input
.func fft_input
fft_input:
pushw T2H,T2L
pushw AH,AL
pushw YH,YL
movw XL, EL ;X = array_src;
movw YL, DL ;Y = array_bfly;
clr EH ;Zero
ldiw ZH,ZL, tbl_window ;Z = &tbl_window[0];
ldiw AH,AL, FFT_N ;A = FFT_N;
1: lpmw BH,BL, Z+ ;B = *Z++; (window)
ldw CH,CL, X+ ;C = *X++; (I-axis)
FMULS16 DH,DL,T2H,T2L, BH,BL, CH,CL ;D = B * C;
stw Y+, DH,DL ;*Y++ = D;
stw Y+, DH,DL ;*Y++ = D;
subiw AH,AL, 1 ;while(--A)
brne 1b ;/
popw YH,YL
popw AH,AL
popw T2H,T2L
clr r1
ret
.endfunc
;----------------------------------------------------------------------------;
.global fft_execute
.func fft_execute
fft_execute:
pushw T2H,T2L
pushw T4H,T4L
pushw T6H,T6L
pushw T8H,T8L
pushw T10H,T10L
pushw T12H,T12L
pushw T14H,T14L
pushw AH,AL
pushw YH,YL
movw ZL, EL ;Z = array_bfly;
ldiw EH,EL, 1 ;E = 1;
ldiw XH,XL, FFT_N/2 ;X = FFT_N/2;
1: ldi AL, 4 ;T12 = E; (angular speed)
mul EL, AL ;
movw T12L, T0L ;
mul EH, AL ;
add T12H, T0L ;/
movw T14L, EL ;T14 = E;
pushw EH,EL
movw YL, ZL ;Z = &array_bfly[0];
mul XL, AL ;Y = &array_bfly[X];
addw YH,YL, T0H,T0L ;
mul XH, AL ;
add YH, T0L ;/
pushw ZH,ZL
2: clrw T10H,T10L ;T10 = 0 (angle)
clr EH ;Zero reg.
3: lddw AH,AL, Z+0 ;A = *Z - *Y; *Z++ += *Y;
asrw AH,AL ;
lddw DH,DL, Y+0 ;
asrw DH,DL ;
movw CL, AL ;
subw AH,AL, DH,DL ;
addw CH,CL, DH,DL ;
stw Z+, CH,CL ;/
lddw BH,BL, Z+0 ;B = *Z - *Y; *Z++ += *Y;
asrw BH,BL ;
lddw DH,DL, Y+2 ;
asrw DH,DL ;
movw CL, BL ;
subw BH,BL, DH,DL ;
addw CH,CL, DH,DL ;
stw Z+, CH,CL ;/
movw T0L, ZL
ldiw ZH,ZL, tbl_cos_sin ;C = cos(T10); D = sin(T10);
addw ZH,ZL, T10H,T10L ;
lpmw CH,CL, Z+ ;
lpmw DH,DL, Z+ ;/
movw ZL, T0L
FMULS16 T4H,T4L,T2H,T2L, AH,AL, CH,CL ;*Y++ = A * C + B * D;
FMULS16 T8H,T8L,T6H,T6L, BH,BL, DH,DL ;
addd T4H,T4L,T2H,T2L, T8H,T8L,T6H,T6L;
stw Y+, T4H,T4L ;/
FMULS16 T4H,T4L,T2H,T2L, BH,BL, CH,CL ;*Y++ = B * C - A * D;
FMULS16 T8H,T8L,T6H,T6L, AH,AL, DH,DL ;
subd T4H,T4L,T2H,T2L, T8H,T8L,T6H,T6L;
stw Y+, T4H,T4L ;/
addw T10H,T10L, T12H,T12L ;T10 += T12; (next angle)
#if FFT_N >= 128
sbrs T10H, FFT_B - 7 ;while(T10 < pi)
#else
sbrs T10L, FFT_B + 1
#endif
rjmp 3b ;/
ldi AL, 4 ;Y += X; Z += X; (skip split segment)
mul XL, AL
addw YH,YL, T0H,T0L ;
addw ZH,ZL, T0H,T0L ;
mul XH, AL ;
add YH, T0L ;
add ZH, T0L ;/
ldi EL, 1 ;while(--T14)
subw T14H,T14L, EH,EL ;
rjne 2b ;/
popw ZH,ZL
popw EH,EL
lslw EH,EL ;E *= 2;
lsrw XH,XL ;while(X /= 2)
adiw XL, 0 ;
rjne 1b ;/
popw YH,YL
popw AH,AL
popw T14H,T14L
popw T12H,T12L
popw T10H,T10L
popw T8H,T8L
popw T6H,T6L
popw T4H,T4L
popw T2H,T2L
; clr r1
ret
.endfunc
;----------------------------------------------------------------------------;
.global fft_output
.func fft_output
fft_output:
pushw T2H,T2L
pushw T4H,T4L
pushw T6H,T6L
pushw T8H,T8L
pushw T10H,T10L
pushw AH,AL
pushw YH,YL
movw T10L, EL ;T10 = array_bfly;
movw YL, DL ;Y = array_output;
ldiw ZH,ZL, tbl_bitrev ;Z = tbl_bitrev;
clr EH ;Zero
ldiw AH,AL, FFT_N / 2 ;A = FFT_N / 2; (plus only)
1: lpmw XH,XL, Z+ ;X = *Z++;
addw XH,XL, T10H,T10L ;X += array_bfly;
ldw BH,BL, X+ ;B = *X++;
ldw CH,CL, X+ ;C = *X++;
FMULS16 T4H,T4L,T2H,T2L, BH,BL, BH,BL ;T4:T2 = B * B;
FMULS16 T8H,T8L,T6H,T6L, CH,CL, CH,CL ;T8:T6 = C * C;
addd T4H,T4L,T2H,T2L, T8H,T8L,T6H,T6L;T4:T2 += T8:T6;
SQRT32 ;B = sqrt(T4:T2);
stw Y+, BH,BL ;*Y++ = B;
subiw AH,AL, 1 ;while(--A)
rjne 1b ;/
popw YH,YL
popw AH,AL
popw T10H,T10L
popw T8H,T8L
popw T6H,T6L
popw T4H,T4L
popw T2H,T2L
clr r1
ret
.endfunc
;----------------------------------------------------------------------------;
.global fmuls_f
.func fmuls_f
fmuls_f:
movw CL, EL ;C = E;
clr EH ;Zero
FMULS16 ZH,ZL,XH,XL, CH,CL, DH,DL ;Z:X = C * D;
movw EL, ZL
clr r1
ret
.endfunc