diff options
Diffstat (limited to 'glm-master/glm/simd/neon.h')
-rw-r--r-- | glm-master/glm/simd/neon.h | 155 |
1 files changed, 0 insertions, 155 deletions
diff --git a/glm-master/glm/simd/neon.h b/glm-master/glm/simd/neon.h deleted file mode 100644 index f85947f..0000000 --- a/glm-master/glm/simd/neon.h +++ /dev/null @@ -1,155 +0,0 @@ -/// @ref simd_neon -/// @file glm/simd/neon.h - -#pragma once - -#if GLM_ARCH & GLM_ARCH_NEON_BIT -#include <arm_neon.h> - -namespace glm { - namespace neon { - static inline float32x4_t dupq_lane(float32x4_t vsrc, int lane) { - switch(lane) { -#if GLM_ARCH & GLM_ARCH_ARMV8_BIT - case 0: return vdupq_laneq_f32(vsrc, 0); - case 1: return vdupq_laneq_f32(vsrc, 1); - case 2: return vdupq_laneq_f32(vsrc, 2); - case 3: return vdupq_laneq_f32(vsrc, 3); -#else - case 0: return vdupq_n_f32(vgetq_lane_f32(vsrc, 0)); - case 1: return vdupq_n_f32(vgetq_lane_f32(vsrc, 1)); - case 2: return vdupq_n_f32(vgetq_lane_f32(vsrc, 2)); - case 3: return vdupq_n_f32(vgetq_lane_f32(vsrc, 3)); -#endif - } - assert(!"Unreachable code executed!"); - return vdupq_n_f32(0.0f); - } - - static inline float32x2_t dup_lane(float32x4_t vsrc, int lane) { - switch(lane) { -#if GLM_ARCH & GLM_ARCH_ARMV8_BIT - case 0: return vdup_laneq_f32(vsrc, 0); - case 1: return vdup_laneq_f32(vsrc, 1); - case 2: return vdup_laneq_f32(vsrc, 2); - case 3: return vdup_laneq_f32(vsrc, 3); -#else - case 0: return vdup_n_f32(vgetq_lane_f32(vsrc, 0)); - case 1: return vdup_n_f32(vgetq_lane_f32(vsrc, 1)); - case 2: return vdup_n_f32(vgetq_lane_f32(vsrc, 2)); - case 3: return vdup_n_f32(vgetq_lane_f32(vsrc, 3)); -#endif - } - assert(!"Unreachable code executed!"); - return vdup_n_f32(0.0f); - } - - static inline float32x4_t copy_lane(float32x4_t vdst, int dlane, float32x4_t vsrc, int slane) { -#if GLM_ARCH & GLM_ARCH_ARMV8_BIT - switch(dlane) { - case 0: - switch(slane) { - case 0: return vcopyq_laneq_f32(vdst, 0, vsrc, 0); - case 1: return vcopyq_laneq_f32(vdst, 0, vsrc, 1); - case 2: return vcopyq_laneq_f32(vdst, 0, vsrc, 2); - case 3: return vcopyq_laneq_f32(vdst, 0, vsrc, 3); - } - assert(!"Unreachable code executed!"); - case 1: - switch(slane) { - case 0: return vcopyq_laneq_f32(vdst, 1, vsrc, 0); - case 1: return vcopyq_laneq_f32(vdst, 1, vsrc, 1); - case 2: return vcopyq_laneq_f32(vdst, 1, vsrc, 2); - case 3: return vcopyq_laneq_f32(vdst, 1, vsrc, 3); - } - assert(!"Unreachable code executed!"); - case 2: - switch(slane) { - case 0: return vcopyq_laneq_f32(vdst, 2, vsrc, 0); - case 1: return vcopyq_laneq_f32(vdst, 2, vsrc, 1); - case 2: return vcopyq_laneq_f32(vdst, 2, vsrc, 2); - case 3: return vcopyq_laneq_f32(vdst, 2, vsrc, 3); - } - assert(!"Unreachable code executed!"); - case 3: - switch(slane) { - case 0: return vcopyq_laneq_f32(vdst, 3, vsrc, 0); - case 1: return vcopyq_laneq_f32(vdst, 3, vsrc, 1); - case 2: return vcopyq_laneq_f32(vdst, 3, vsrc, 2); - case 3: return vcopyq_laneq_f32(vdst, 3, vsrc, 3); - } - assert(!"Unreachable code executed!"); - } -#else - - float l; - switch(slane) { - case 0: l = vgetq_lane_f32(vsrc, 0); break; - case 1: l = vgetq_lane_f32(vsrc, 1); break; - case 2: l = vgetq_lane_f32(vsrc, 2); break; - case 3: l = vgetq_lane_f32(vsrc, 3); break; - default: - assert(!"Unreachable code executed!"); - } - switch(dlane) { - case 0: return vsetq_lane_f32(l, vdst, 0); - case 1: return vsetq_lane_f32(l, vdst, 1); - case 2: return vsetq_lane_f32(l, vdst, 2); - case 3: return vsetq_lane_f32(l, vdst, 3); - } -#endif - assert(!"Unreachable code executed!"); - return vdupq_n_f32(0.0f); - } - - static inline float32x4_t mul_lane(float32x4_t v, float32x4_t vlane, int lane) { -#if GLM_ARCH & GLM_ARCH_ARMV8_BIT - switch(lane) { - case 0: return vmulq_laneq_f32(v, vlane, 0); break; - case 1: return vmulq_laneq_f32(v, vlane, 1); break; - case 2: return vmulq_laneq_f32(v, vlane, 2); break; - case 3: return vmulq_laneq_f32(v, vlane, 3); break; - default: - assert(!"Unreachable code executed!"); - } - assert(!"Unreachable code executed!"); - return vdupq_n_f32(0.0f); -#else - return vmulq_f32(v, dupq_lane(vlane, lane)); -#endif - } - - static inline float32x4_t madd_lane(float32x4_t acc, float32x4_t v, float32x4_t vlane, int lane) { -#if GLM_ARCH & GLM_ARCH_ARMV8_BIT -#ifdef GLM_CONFIG_FORCE_FMA -# define FMADD_LANE(acc, x, y, L) do { asm volatile ("fmla %0.4s, %1.4s, %2.4s" : "+w"(acc) : "w"(x), "w"(dup_lane(y, L))); } while(0) -#else -# define FMADD_LANE(acc, x, y, L) do { acc = vmlaq_laneq_f32(acc, x, y, L); } while(0) -#endif - - switch(lane) { - case 0: - FMADD_LANE(acc, v, vlane, 0); - return acc; - case 1: - FMADD_LANE(acc, v, vlane, 1); - return acc; - case 2: - FMADD_LANE(acc, v, vlane, 2); - return acc; - case 3: - FMADD_LANE(acc, v, vlane, 3); - return acc; - default: - assert(!"Unreachable code executed!"); - } - assert(!"Unreachable code executed!"); - return vdupq_n_f32(0.0f); -# undef FMADD_LANE -#else - return vaddq_f32(acc, vmulq_f32(v, dupq_lane(vlane, lane))); -#endif - } - } //namespace neon -} // namespace glm -#endif // GLM_ARCH & GLM_ARCH_NEON_BIT |