39 #include "unit_test_common.h" 47 #define TEST_LENGTH_SAMPLES (32768) 48 #define MIN_LENGTH_SAMPLES_CPX (4) 49 #define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX*2) 51 #define SNR_THRESHOLD_INT32 25.0f 53 #define TEST_COUNT 250000 60 static ne10_int32_t testInput_i32_unscaled[TEST_LENGTH_SAMPLES * 2];
61 static ne10_int32_t testInput_i32_scaled[TEST_LENGTH_SAMPLES * 2];
62 static ne10_int32_t * guarded_in_c = NULL;
63 static ne10_int32_t * guarded_in_neon = NULL;
64 static ne10_int32_t * in_c = NULL;
65 static ne10_int32_t * in_neon = NULL;
67 static ne10_int32_t * guarded_out_c = NULL;
68 static ne10_int32_t * guarded_out_neon = NULL;
69 static ne10_int32_t * out_c = NULL;
70 static ne10_int32_t * out_neon = NULL;
72 static ne10_float32_t snr = 0.0f;
74 static ne10_int64_t time_c = 0;
75 static ne10_int64_t time_neon = 0;
76 static ne10_float32_t time_speedup = 0.0f;
77 static ne10_float32_t time_savings = 0.0f;
79 void test_fft_c2c_1d_int32_conformance()
83 ne10_int32_t fftSize = 0;
86 ne10_float32_t * out_c_tmp = NULL;
87 ne10_float32_t * out_neon_tmp = NULL;
89 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
92 guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
93 guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
94 in_c = guarded_in_c + ARRAY_GUARD_LEN;
95 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
98 guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
99 guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
100 out_c = guarded_out_c + ARRAY_GUARD_LEN;
101 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
103 out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) *
sizeof (ne10_float32_t));
104 out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) *
sizeof (ne10_float32_t));
106 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
108 testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
109 testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
111 for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
113 fprintf (stdout,
"FFT size %d\n", fftSize);
118 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
123 if (cfg_neon == NULL)
126 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
131 memcpy (in_c, testInput_i32_unscaled, 2 * fftSize *
sizeof (ne10_int32_t));
132 memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize *
sizeof (ne10_int32_t));
134 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int32_t));
135 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int32_t));
138 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int32_t));
139 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int32_t));
142 for (i = 0; i < fftSize * 2; i++)
144 out_c_tmp[i] = (ne10_float32_t) out_c[i];
145 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
147 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
148 assert_false ( (snr < SNR_THRESHOLD_INT32));
151 memcpy (in_c, testInput_i32_unscaled, 2 * fftSize *
sizeof (ne10_int32_t));
152 memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize *
sizeof (ne10_int32_t));
154 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int32_t));
155 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int32_t));
158 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int32_t));
159 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int32_t));
162 for (i = 0; i < fftSize * 2; i++)
164 out_c_tmp[i] = (ne10_float32_t) out_c[i];
165 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
167 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
168 assert_false ( (snr < SNR_THRESHOLD_INT32));
171 memcpy (in_c, testInput_i32_scaled, 2 * fftSize *
sizeof (ne10_int32_t));
172 memcpy (in_neon, testInput_i32_scaled, 2 * fftSize *
sizeof (ne10_int32_t));
174 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int32_t));
175 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int32_t));
178 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int32_t));
179 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int32_t));
182 for (i = 0; i < fftSize * 2; i++)
184 out_c_tmp[i] = (ne10_float32_t) out_c[i];
185 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
187 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
188 assert_false ( (snr < SNR_THRESHOLD_INT32));
191 memcpy (in_c, testInput_i32_scaled, 2 * fftSize *
sizeof (ne10_int32_t));
192 memcpy (in_neon, testInput_i32_scaled, 2 * fftSize *
sizeof (ne10_int32_t));
194 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int32_t));
195 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int32_t));
198 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 *
sizeof (ne10_int32_t));
199 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 *
sizeof (ne10_int32_t));
202 for (i = 0; i < fftSize * 2; i++)
204 out_c_tmp[i] = (ne10_float32_t) out_c[i];
205 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
207 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
208 assert_false ( (snr < SNR_THRESHOLD_INT32));
211 NE10_FREE (cfg_neon);
214 NE10_FREE (guarded_in_c);
215 NE10_FREE (guarded_in_neon);
216 NE10_FREE (guarded_out_c);
217 NE10_FREE (guarded_out_neon);
218 NE10_FREE (out_c_tmp);
219 NE10_FREE (out_neon_tmp);
222 void test_fft_c2c_1d_int32_performance()
226 ne10_int32_t fftSize = 0;
229 ne10_int32_t test_loop = 0;
231 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
232 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"FFT Length",
"C Time in ms",
"NEON Time in ms",
"Time Savings",
"Performance Ratio");
235 guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
236 guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
237 in_c = guarded_in_c + ARRAY_GUARD_LEN;
238 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
241 guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
242 guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
243 out_c = guarded_out_c + ARRAY_GUARD_LEN;
244 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
246 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
248 testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
249 testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
251 for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
253 fprintf (stdout,
"FFT size %d\n", fftSize);
256 memcpy (in_c, testInput_i32_unscaled, 2 * fftSize *
sizeof (ne10_int32_t));
257 memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize *
sizeof (ne10_int32_t));
261 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
266 if (cfg_neon == NULL)
269 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
273 test_loop = TEST_COUNT / fftSize;
279 for (i = 0; i < test_loop; i++)
287 for (i = 0; i < test_loop; i++)
291 time_speedup = (ne10_float32_t) time_c / time_neon;
292 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
293 ne10_log (__FUNCTION__,
" unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
296 memcpy (in_c, out_c, 2 * fftSize *
sizeof (ne10_int32_t));
297 memcpy (in_neon, out_c, 2 * fftSize *
sizeof (ne10_int32_t));
303 for (i = 0; i < test_loop; i++)
311 for (i = 0; i < test_loop; i++)
316 time_speedup = (ne10_float32_t) time_c / time_neon;
317 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
318 ne10_log (__FUNCTION__,
"unscaled IFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
321 memcpy (in_c, testInput_i32_scaled, 2 * fftSize *
sizeof (ne10_int32_t));
322 memcpy (in_neon, testInput_i32_scaled, 2 * fftSize *
sizeof (ne10_int32_t));
328 for (i = 0; i < test_loop; i++)
336 for (i = 0; i < test_loop; i++)
340 time_speedup = (ne10_float32_t) time_c / time_neon;
341 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
342 ne10_log (__FUNCTION__,
" scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
345 memcpy (in_c, out_c, 2 * fftSize *
sizeof (ne10_int32_t));
346 memcpy (in_neon, out_c, 2 * fftSize *
sizeof (ne10_int32_t));
352 for (i = 0; i < test_loop; i++)
360 for (i = 0; i < test_loop; i++)
365 time_speedup = (ne10_float32_t) time_c / time_neon;
366 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
367 ne10_log (__FUNCTION__,
" scaled IFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
370 NE10_FREE (cfg_neon);
373 NE10_FREE (guarded_in_c);
374 NE10_FREE (guarded_in_neon);
375 NE10_FREE (guarded_out_c);
376 NE10_FREE (guarded_out_neon);
379 void test_fft_r2c_1d_int32_conformance()
383 ne10_int32_t fftSize = 0;
385 ne10_float32_t * out_c_tmp = NULL;
386 ne10_float32_t * out_neon_tmp = NULL;
388 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
391 guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
392 guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
393 in_c = guarded_in_c + ARRAY_GUARD_LEN;
394 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
397 guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
398 guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
399 out_c = guarded_out_c + ARRAY_GUARD_LEN;
400 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
402 out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) *
sizeof (ne10_float32_t));
403 out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) *
sizeof (ne10_float32_t));
405 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
407 testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
408 testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
410 for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
412 fprintf (stdout,
"FFT size %d\n", fftSize);
417 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
422 memcpy (in_c, testInput_i32_unscaled, fftSize *
sizeof (ne10_int32_t));
423 memcpy (in_neon, testInput_i32_unscaled, fftSize *
sizeof (ne10_int32_t));
425 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int32_t));
426 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int32_t));
431 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int32_t));
432 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int32_t));
435 for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
437 out_c_tmp[i] = (ne10_float32_t) out_c[i];
438 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
440 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
441 assert_false ( (snr < SNR_THRESHOLD_INT32));
444 for (i = 1; i < (fftSize / 2); i++)
446 in_c[2 * i] = testInput_i32_unscaled[2 * i];
447 in_c[2 * i + 1] = testInput_i32_unscaled[2 * i + 1];
448 in_c[2 * (fftSize - i)] = in_c[2 * i];
449 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
451 in_c[0] = testInput_i32_unscaled[0];
453 in_c[fftSize] = testInput_i32_unscaled[1];
454 in_c[fftSize + 1] = 0;
455 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (ne10_int32_t));
457 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize *
sizeof (ne10_int32_t));
458 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize *
sizeof (ne10_int32_t));
463 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize *
sizeof (ne10_int32_t));
464 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize *
sizeof (ne10_int32_t));
467 for (i = 0; i < fftSize; i++)
469 out_c_tmp[i] = (ne10_float32_t) out_c[i];
470 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
472 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
473 assert_false ( (snr < SNR_THRESHOLD_INT32));
476 memcpy (in_c, testInput_i32_scaled, fftSize *
sizeof (ne10_int32_t));
477 memcpy (in_neon, testInput_i32_scaled, fftSize *
sizeof (ne10_int32_t));
479 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int32_t));
480 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int32_t));
485 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int32_t));
486 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 *
sizeof (ne10_int32_t));
489 for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
491 out_c_tmp[i] = (ne10_float32_t) out_c[i];
492 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
494 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
495 assert_false ( (snr < SNR_THRESHOLD_INT32));
498 for (i = 1; i < (fftSize / 2); i++)
500 in_c[2 * i] = testInput_i32_scaled[2 * i];
501 in_c[2 * i + 1] = testInput_i32_scaled[2 * i + 1];
502 in_c[2 * (fftSize - i)] = in_c[2 * i];
503 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
505 in_c[0] = testInput_i32_scaled[0];
507 in_c[fftSize] = testInput_i32_scaled[1];
508 in_c[fftSize + 1] = 0;
509 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (ne10_int32_t));
511 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize *
sizeof (ne10_int32_t));
512 GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize *
sizeof (ne10_int32_t));
517 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize *
sizeof (ne10_int32_t));
518 CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize *
sizeof (ne10_int32_t));
521 for (i = 0; i < fftSize; i++)
523 out_c_tmp[i] = (ne10_float32_t) out_c[i];
524 out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
526 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
527 assert_false ( (snr < SNR_THRESHOLD_INT32));
533 NE10_FREE (guarded_in_c);
534 NE10_FREE (guarded_in_neon);
535 NE10_FREE (guarded_out_c);
536 NE10_FREE (guarded_out_neon);
537 NE10_FREE (out_c_tmp);
538 NE10_FREE (out_neon_tmp);
541 void test_fft_r2c_1d_int32_performance()
545 ne10_int32_t fftSize = 0;
547 ne10_int32_t test_loop = 0;
549 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
550 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"FFT Length",
"C Time in ms",
"NEON Time in ms",
"Time Savings",
"Performance Ratio");
553 guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
554 guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
555 in_c = guarded_in_c + ARRAY_GUARD_LEN;
556 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
559 guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
560 guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) *
sizeof (ne10_int32_t));
561 out_c = guarded_out_c + ARRAY_GUARD_LEN;
562 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
564 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
566 testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
567 testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
569 for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
571 fprintf (stdout,
"FFT size %d\n", fftSize);
576 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
579 test_loop = TEST_COUNT / fftSize;
581 memcpy (in_c, testInput_i32_unscaled, fftSize *
sizeof (ne10_int32_t));
582 memcpy (in_neon, testInput_i32_unscaled, fftSize *
sizeof (ne10_int32_t));
588 for (i = 0; i < test_loop; i++)
596 for (i = 0; i < test_loop; i++)
601 time_speedup = (ne10_float32_t) time_c / time_neon;
602 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
603 ne10_log (__FUNCTION__,
"Int32 unscaled RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
606 for (i = 1; i < (fftSize / 2); i++)
608 in_c[2 * i] = testInput_i32_unscaled[2 * i];
609 in_c[2 * i + 1] = testInput_i32_unscaled[2 * i + 1];
610 in_c[2 * (fftSize - i)] = in_c[2 * i];
611 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
613 in_c[0] = testInput_i32_unscaled[0];
615 in_c[fftSize] = testInput_i32_unscaled[1];
616 in_c[fftSize + 1] = 0;
617 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (ne10_int32_t));
623 for (i = 0; i < test_loop; i++)
631 for (i = 0; i < test_loop; i++)
636 time_speedup = (ne10_float32_t) time_c / time_neon;
637 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
638 ne10_log (__FUNCTION__,
"Int32 unscaled RIFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
641 memcpy (in_c, testInput_i32_scaled, fftSize *
sizeof (ne10_int32_t));
642 memcpy (in_neon, testInput_i32_scaled, fftSize *
sizeof (ne10_int32_t));
648 for (i = 0; i < test_loop; i++)
656 for (i = 0; i < test_loop; i++)
661 time_speedup = (ne10_float32_t) time_c / time_neon;
662 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
663 ne10_log (__FUNCTION__,
"Int32 scaled RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
666 for (i = 1; i < (fftSize / 2); i++)
668 in_c[2 * i] = testInput_i32_scaled[2 * i];
669 in_c[2 * i + 1] = testInput_i32_scaled[2 * i + 1];
670 in_c[2 * (fftSize - i)] = in_c[2 * i];
671 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
673 in_c[0] = testInput_i32_scaled[0];
675 in_c[fftSize] = testInput_i32_scaled[1];
676 in_c[fftSize + 1] = 0;
677 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (ne10_int32_t));
683 for (i = 0; i < test_loop; i++)
691 for (i = 0; i < test_loop; i++)
696 time_speedup = (ne10_float32_t) time_c / time_neon;
697 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
698 ne10_log (__FUNCTION__,
"Int32 scaled RIFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
703 NE10_FREE (guarded_in_c);
704 NE10_FREE (guarded_in_neon);
705 NE10_FREE (guarded_out_c);
706 NE10_FREE (guarded_out_neon);
709 void test_fft_c2c_1d_int32()
711 #if defined (SMOKE_TEST)||(REGRESSION_TEST) 712 test_fft_c2c_1d_int32_conformance();
715 #if defined (PERFORMANCE_TEST) 716 test_fft_c2c_1d_int32_performance();
720 void test_fft_r2c_1d_int32()
722 #if defined (SMOKE_TEST)||(REGRESSION_TEST) 723 test_fft_r2c_1d_int32_conformance();
726 #if defined (PERFORMANCE_TEST) 727 test_fft_r2c_1d_int32_performance();
731 static void my_test_setup (
void)
733 ne10_log_buffer_ptr = ne10_log_buffer;
736 void test_fixture_fft_c2c_1d_int32 (
void)
738 test_fixture_start();
740 fixture_setup (my_test_setup);
742 run_test (test_fft_c2c_1d_int32);
747 void test_fixture_fft_r2c_1d_int32 (
void)
749 test_fixture_start();
751 fixture_setup (my_test_setup);
753 run_test (test_fft_r2c_1d_int32);
void ne10_fft_c2c_1d_int32_neon(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 32-bit fixed point data.
void ne10_fft_c2r_1d_int32_neon(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int32 data.
void ne10_fft_r2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int32 data.
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_neon(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
void ne10_fft_r2c_1d_int32_neon(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int32 data.
void ne10_fft_c2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 32-bit fixed point data.
structure for the 32 bits fixed point FFT function.
ne10_fft_r2c_cfg_int32_t ne10_fft_alloc_r2c_int32(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_c(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
void ne10_fft_c2r_1d_int32_c(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int32 data.