summaryrefslogtreecommitdiffstats
path: root/lib/ffts/src/patterns.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/ffts/src/patterns.c')
-rw-r--r--lib/ffts/src/patterns.c208
1 files changed, 208 insertions, 0 deletions
diff --git a/lib/ffts/src/patterns.c b/lib/ffts/src/patterns.c
new file mode 100644
index 0000000..93fe7f7
--- /dev/null
+++ b/lib/ffts/src/patterns.c
@@ -0,0 +1,208 @@
+/*
+
+ This file is part of FFTS -- The Fastest Fourier Transform in the South
+
+ Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
+ Copyright (c) 2012, The University of Waikato
+
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the organization nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#include "patterns.h"
+
+void permute_addr(int N, int offset, int stride, int *d) {
+ int i, a[4] = {0,2,1,3};
+ for(i=0;i<4;i++) {
+ d[i] = offset + (a[i] << stride);
+ if(d[i] < 0) d[i] += N;
+ }
+}
+
+void ffts_hardcodedleaf_is_rec(ptrdiff_t **is, int bigN, int N, int poffset, int offset, int stride, int even, int VL) {
+
+ if(N > 4) {
+ ffts_hardcodedleaf_is_rec(is, bigN, N/2, poffset, offset, stride + 1, even, VL);
+ if(N/4 >= 4) ffts_hardcodedleaf_is_rec(is, bigN, N/4, poffset+(1<<stride),offset+(N/2), stride + 2, 0, VL);
+ if(N/4 >= 4) ffts_hardcodedleaf_is_rec(is, bigN, N/4, poffset-(1<<stride),offset+(3*N/4), stride + 2, 0, VL);
+ else {
+ int temp = poffset+(1<<stride);
+ if(temp < 0) temp += bigN;
+ temp *= 2;
+
+ if(!(temp % (VL*2))) {
+ (*is)[0] = poffset+(1<<stride);
+ (*is)[1] = poffset+(1<<stride)+(1<<(stride+2));
+ (*is)[2] = poffset-(1<<stride);
+ (*is)[3] = poffset-(1<<stride)+(1<<(stride+2));
+ int i;
+ for(i=0;i<4;i++) if((*is)[i] < 0) (*is)[i] += bigN;
+ for(i=0;i<4;i++) (*is)[i] *= 2;
+ *is += 4;
+ }
+ }
+ }else if(N == 4) {
+ int perm[4];
+ permute_addr(bigN, poffset, stride, perm);
+ if(!((perm[0]*2) % (VL*2))) {
+ int i;
+ for(i=0;i<4;i++) {
+ (*is)[i] = perm[i] * 2;
+ }
+ *is += 4;
+ }
+ }
+}
+
+void ffts_init_is(ffts_plan_t *p, int N, int leafN, int VL) {
+ int i, i0 = N/leafN/3+1, i1=N/leafN/3, i2 = N/leafN/3;
+ int stride = log(N/leafN)/log(2);
+
+ p->is = malloc(N/VL * sizeof(ptrdiff_t));
+
+ ptrdiff_t *is = p->is;
+
+ if((N/leafN) % 3 > 1) i1++;
+
+ for(i=0;i<i0;i++) ffts_hardcodedleaf_is_rec(&is, N, leafN, i, 0, stride, 1, VL);
+ for(i=i0;i<i0+i1;i++) {
+ ffts_hardcodedleaf_is_rec(&is, N, leafN/2, i, 0, stride+1, 1, VL);
+ ffts_hardcodedleaf_is_rec(&is, N, leafN/2, i-(1<<stride), 0, stride+1, 1, VL);
+ }
+ for(i=0-i2;i<0;i++) ffts_hardcodedleaf_is_rec(&is, N, leafN, i, 0, stride, 1, VL);
+
+
+//for(i=0;i<N/VL;i++) {
+// printf("%td ", p->is[i]);
+// if(i % 16 == 15) printf("\n");
+//}
+
+ p->i0 = i0; p->i1 = i1;
+}
+/**
+ *
+ *
+ */
+void ffts_elaborate_offsets(ptrdiff_t *offsets, int leafN, int N, int ioffset, int ooffset, int stride, int even) {
+ if((even && N == leafN) || (!even && N <= leafN)) {
+ offsets[2*(ooffset/leafN)] = ioffset*2;
+ offsets[2*(ooffset/leafN)+1] = ooffset;
+ }else if(N > 4) {
+ ffts_elaborate_offsets(offsets, leafN, N/2, ioffset, ooffset, stride+1, even);
+ ffts_elaborate_offsets(offsets, leafN, N/4, ioffset+(1<<stride), ooffset+N/2, stride+2, 0);
+ if(N/4 >= leafN)
+ ffts_elaborate_offsets(offsets, leafN, N/4, ioffset-(1<<stride), ooffset+3*N/4, stride+2, 0);
+ }
+
+}
+
+int compare_offsets(const void *a, const void *b) {
+ return ((ptrdiff_t *)a)[0] - ((ptrdiff_t *)b)[0];
+}
+
+uint32_t reverse_bits(uint32_t a, int n) {
+ uint32_t x = 0;
+
+ int i;
+ for(i=0;i<n;i++) {
+ if(a & (1 << i)) x |= 1 << (n-i-1);
+ }
+ return x;
+}
+
+
+void ffts_init_offsets(ffts_plan_t *p, int N, int leafN) {
+
+ ptrdiff_t *offsets = malloc(2 * N/leafN * sizeof(ptrdiff_t));
+
+ ffts_elaborate_offsets(offsets, leafN, N, 0, 0, 1, 1);
+
+ size_t i;
+ for(i=0;i<2*N/leafN;i+=2) {
+ if(offsets[i] < 0) offsets[i] = N + offsets[i];
+ }
+
+ qsort(offsets, N/leafN, 2 * sizeof(ptrdiff_t), compare_offsets);
+ //elaborate_is(p, N, 0, 0, 1);
+ p->offsets = malloc(N/leafN * sizeof(ptrdiff_t));
+ for(i=0;i<N/leafN;i++) {
+ p->offsets[i] = offsets[i*2+1]*2;
+ }
+//for(i=0;i<N/leafN;i++) {
+// printf("%4d %4d\n", p->offsets[i], reverse_bits(p->offsets[i], __builtin_ctzl(2*N)));
+//}
+ free(offsets);
+}
+
+/*
+int tree_count(int N, int leafN, int offset) {
+
+ if(N <= leafN) return 0;
+ int count = 0;
+ count += tree_count(N/4, leafN, offset);
+ count += tree_count(N/8, leafN, offset + N/4);
+ count += tree_count(N/8, leafN, offset + N/4 + N/8);
+ count += tree_count(N/4, leafN, offset + N/2);
+ count += tree_count(N/4, leafN, offset + 3*N/4);
+
+ return 1 + count;
+}
+
+void elaborate_tree(transform_index_t **p, int N, int leafN, int offset) {
+
+ if(N <= leafN) return;
+ elaborate_tree(p, N/4, leafN, offset);
+ elaborate_tree(p, N/8, leafN, offset + N/4);
+ elaborate_tree(p, N/8, leafN, offset + N/4 + N/8);
+ elaborate_tree(p, N/4, leafN, offset + N/2);
+ elaborate_tree(p, N/4, leafN, offset + 3*N/4);
+
+ (*p)[0] = N;
+ (*p)[1] = offset*2;
+
+ (*p)+=2;
+}
+
+void ffts_init_tree(ffts_plan_t *p, int N, int leafN) {
+
+ int count = tree_count(N, leafN, 0) + 1;
+ transform_index_t *ps = p->transforms = malloc(count * 2 * sizeof(transform_index_t));
+
+//printf("count = %d\n", count);
+
+ elaborate_tree(&ps, N, leafN, 0);
+ #ifdef __ARM_NEON__
+ ps -= 2;
+ #endif
+ ps[0] = 0;
+ ps[1] = 0;
+//int i;
+//for(i=0;i<count;i++) {
+// fprintf(stderr, "%lu %lu - %d\n", p->transforms[i*2], p->transforms[i*2+1],
+// __builtin_ctzl(p->transforms[i*2]) - 5);
+//}
+
+}
+*/