diff options
author | Timothy Pearson <kb9vqf@pearsoncomputing.net> | 2016-10-01 17:09:04 -0500 |
---|---|---|
committer | Timothy Pearson <kb9vqf@pearsoncomputing.net> | 2016-10-01 17:09:04 -0500 |
commit | e4e92bf2b00ed469141029640f192579c0ba1025 (patch) | |
tree | 6cd4352f84cfe4488277c879b70b5c43fd3fdd90 /lib/ffts/src/macros-alpha.h | |
parent | d8856bdf08c7fcbfe1608b692c632e2023d6dd06 (diff) | |
download | ulab-e4e92bf2b00ed469141029640f192579c0ba1025.tar.gz ulab-e4e92bf2b00ed469141029640f192579c0ba1025.zip |
Switch FFTS to linkotec branch for cross-arch support
Diffstat (limited to 'lib/ffts/src/macros-alpha.h')
-rw-r--r-- | lib/ffts/src/macros-alpha.h | 324 |
1 files changed, 191 insertions, 133 deletions
diff --git a/lib/ffts/src/macros-alpha.h b/lib/ffts/src/macros-alpha.h index 06daf4a..f7795d4 100644 --- a/lib/ffts/src/macros-alpha.h +++ b/lib/ffts/src/macros-alpha.h @@ -1,206 +1,264 @@ /* - - This file is part of FFTS -- The Fastest Fourier Transform in the South - - Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz> - Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com> - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the organization nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY - DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ +This file is part of FFTS -- The Fastest Fourier Transform in the South + +Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz> +Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com> + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the organization nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#ifndef __MACROS_ALPHA_H__ -#define __MACROS_ALPHA_H__ +*/ -#include <math.h> +#ifndef FFTS_MACROS_ALPHA_H +#define FFTS_MACROS_ALPHA_H -#ifdef __alpha__ -#define restrict +#if defined (_MSC_VER) && (_MSC_VER >= 1020) +#pragma once #endif -typedef struct {float r1, i1, r2, i2;} V; +#include "ffts_attributes.h" -#define FFTS_MALLOC(d,a) malloc(d) -#define FFTS_FREE(d) free(d) +#ifdef HAVE_STRING_H +#include <string.h> +#endif -#define VLIT4(f3,f2,f1,f0) ((V){f0,f1,f2,f3}) +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif -static inline V VADD(V x, V y) +typedef union { + struct { + float r1; + float i1; + float r2; + float i2; + } r; + uint32_t u[4]; +} V4SF; + +#define FFTS_MALLOC(d,a) (malloc(d)) +#define FFTS_FREE(d) (free(d)) + +static FFTS_ALWAYS_INLINE V4SF +V4SF_LIT4(float f3, float f2, float f1, float f0) { - V z; - z.r1 = x.r1 + y.r1; - z.i1 = x.i1 + y.i1; - z.r2 = x.r2 + y.r2; - z.i2 = x.i2 + y.i2; + V4SF z; + + z.r.r1 = f0; + z.r.i1 = f1; + z.r.r2 = f2; + z.r.i2 = f3; + return z; } - -static inline V VSUB(V x, V y) +static FFTS_ALWAYS_INLINE V4SF +V4SF_ADD(V4SF x, V4SF y) { - V z; - z.r1 = x.r1 - y.r1; - z.i1 = x.i1 - y.i1; - z.r2 = x.r2 - y.r2; - z.i2 = x.i2 - y.i2; + V4SF z; + + z.r.r1 = x.r.r1 + y.r.r1; + z.r.i1 = x.r.i1 + y.r.i1; + z.r.r2 = x.r.r2 + y.r.r2; + z.r.i2 = x.r.i2 + y.r.i2; + return z; } - -static inline V VMUL(V x, V y) +static FFTS_ALWAYS_INLINE V4SF +V4SF_SUB(V4SF x, V4SF y) { - V z; - z.r1 = x.r1 * y.r1; - z.i1 = x.i1 * y.i1; - z.r2 = x.r2 * y.r2; - z.i2 = x.i2 * y.i2; + V4SF z; + + z.r.r1 = x.r.r1 - y.r.r1; + z.r.i1 = x.r.i1 - y.r.i1; + z.r.r2 = x.r.r2 - y.r.r2; + z.r.i2 = x.r.i2 - y.r.i2; + return z; } -static inline V VXOR(V x, V y) +static FFTS_ALWAYS_INLINE V4SF +V4SF_MUL(V4SF x, V4SF y) { - V r; - r.r1 = (uint32_t)x.r1 ^ (uint32_t)y.r1; - r.i1 = (uint32_t)x.i1 ^ (uint32_t)y.i1; - r.r2 = (uint32_t)x.r2 ^ (uint32_t)y.r2; - r.i2 = (uint32_t)x.i2 ^ (uint32_t)y.i2; - return r; + V4SF z; + + z.r.r1 = x.r.r1 * y.r.r1; + z.r.i1 = x.r.i1 * y.r.i1; + z.r.r2 = x.r.r2 * y.r.r2; + z.r.i2 = x.r.i2 * y.r.i2; + + return z; } -static inline V VSWAPPAIRS(V x) +static FFTS_ALWAYS_INLINE V4SF +V4SF_XOR(V4SF x, V4SF y) { - V z; - z.r1 = x.i1; - z.i1 = x.r1; - z.r2 = x.i2; - z.i2 = x.r2; + V4SF z; + + z.u[0] = x.u[0] ^ y.u[0]; + z.u[1] = x.u[1] ^ y.u[1]; + z.u[2] = x.u[2] ^ y.u[2]; + z.u[3] = x.u[3] ^ y.u[3]; + return z; } - -static inline V VBLEND(V x, V y) +static FFTS_ALWAYS_INLINE V4SF +V4SF_SWAP_PAIRS(V4SF x) { - V z; - z.r1 = x.r1; - z.i1 = x.i1; - z.r2 = y.r2; - z.i2 = y.i2; + V4SF z; + + z.r.r1 = x.r.i1; + z.r.i1 = x.r.r1; + z.r.r2 = x.r.i2; + z.r.i2 = x.r.r2; + return z; } -static inline V VUNPACKHI(V x, V y) +static FFTS_ALWAYS_INLINE V4SF +V4SF_BLEND(V4SF x, V4SF y) { - V z; - z.r1 = x.r2; - z.i1 = x.i2; - z.r2 = y.r2; - z.i2 = y.i2; + V4SF z; + + z.r.r1 = x.r.r1; + z.r.i1 = x.r.i1; + z.r.r2 = y.r.r2; + z.r.i2 = y.r.i2; + return z; } -static inline V VUNPACKLO(V x, V y) +static FFTS_ALWAYS_INLINE V4SF +V4SF_UNPACK_HI(V4SF x, V4SF y) { - V z; - z.r1 = x.r1; - z.i1 = x.i1; - z.r2 = y.r1; - z.i2 = y.i1; + V4SF z; + + z.r.r1 = x.r.r2; + z.r.i1 = x.r.i2; + z.r.r2 = y.r.r2; + z.r.i2 = y.r.i2; + return z; } -static inline V VDUPRE(V x) +static FFTS_ALWAYS_INLINE V4SF +V4SF_UNPACK_LO(V4SF x, V4SF y) { - V z; - z.r1 = x.r1; - z.i1 = x.r1; - z.r2 = x.r2; - z.i2 = x.r2; + V4SF z; + + z.r.r1 = x.r.r1; + z.r.i1 = x.r.i1; + z.r.r2 = y.r.r1; + z.r.i2 = y.r.i1; + return z; } -static inline V VDUPIM(V x) +static FFTS_ALWAYS_INLINE V4SF +V4SF_DUPLICATE_RE(V4SF x) { - V z; - z.r1 = x.i1; - z.i1 = x.i1; - z.r2 = x.i2; - z.i2 = x.i2; + V4SF z; + + z.r.r1 = x.r.r1; + z.r.i1 = x.r.r1; + z.r.r2 = x.r.r2; + z.r.i2 = x.r.r2; + return z; } -static inline V IMUL(V d, V re, V im) +static FFTS_ALWAYS_INLINE V4SF +V4SF_DUPLICATE_IM(V4SF x) { - re = VMUL(re, d); - im = VMUL(im, VSWAPPAIRS(d)); - return VSUB(re, im); + V4SF z; + + z.r.r1 = x.r.i1; + z.r.i1 = x.r.i1; + z.r.r2 = x.r.i2; + z.r.i2 = x.r.i2; + + return z; } +static FFTS_ALWAYS_INLINE V4SF +V4SF_IMUL(V4SF d, V4SF re, V4SF im) +{ + re = V4SF_MUL(re, d); + im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d)); + return V4SF_SUB(re, im); +} -static inline V IMULJ(V d, V re, V im) +static FFTS_ALWAYS_INLINE V4SF +V4SF_IMULJ(V4SF d, V4SF re, V4SF im) { - re = VMUL(re, d); - im = VMUL(im, VSWAPPAIRS(d)); - return VADD(re, im); + re = V4SF_MUL(re, d); + im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d)); + return V4SF_ADD(re, im); } -static inline V MULI(int inv, V x) +static FFTS_ALWAYS_INLINE V4SF +V4SF_MULI(int inv, V4SF x) { - V z; + V4SF z; if (inv) { - z.r1 = -x.r1; - z.i1 = x.i1; - z.r2 = -x.r2; - z.i2 = x.i2; - }else{ - z.r1 = x.r1; - z.i1 = -x.i1; - z.r2 = x.r2; - z.i2 = -x.i2; + z.r.r1 = -x.r.r1; + z.r.i1 = x.r.i1; + z.r.r2 = -x.r.r2; + z.r.i2 = x.r.i2; + } else { + z.r.r1 = x.r.r1; + z.r.i1 = -x.r.i1; + z.r.r2 = x.r.r2; + z.r.i2 = -x.r.i2; } + return z; } - -static inline V IMULI(int inv, V x) +static FFTS_ALWAYS_INLINE V4SF +V4SF_IMULI(int inv, V4SF x) { - return VSWAPPAIRS(MULI(inv, x)); + return V4SF_SWAP_PAIRS(V4SF_MULI(inv, x)); } - -static inline V VLD(const void *s) +static FFTS_ALWAYS_INLINE V4SF +V4SF_LD(const void *s) { - V *d = (V *)s; - return *d; + V4SF z; + memcpy(&z, s, sizeof(z)); + return z; } - -static inline void VST(void *d, V s) +static FFTS_ALWAYS_INLINE void +V4SF_ST(void *d, V4SF s) { - V *r = (V *)d; + V4SF *r = (V4SF*) d; *r = s; } -#endif +#endif /* FFTS_MACROS_ALPHA_H */
\ No newline at end of file |