From f1a8bcd876a6e5e714e512340e69916f6173610a Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Tue, 7 Apr 2015 13:55:12 -0700 Subject: [PATCH] use single-precision floats for ARM NEON doesn't support doubles ;_; anyway, this turns the two-times slowdown caused by previous commits into a two-times speedup. --- Makefile | 4 ++-- include/util.h | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index adfdb79..393afe1 100644 --- a/Makefile +++ b/Makefile @@ -70,8 +70,8 @@ linux: ALL_CXXFLAGS += $(OPT_FLAGS) -fpic linux: VST_FLAGS += -D__cdecl= linux: all -linux-arm: ALL_CFLAGS += -Ofast -mfpu=neon -fpic -linux-arm: ALL_CXXFLAGS += -Ofast -mfpu=neon -fpic +linux-arm: ALL_CFLAGS += -Ofast -march=native -DFORCE_SINGLE -fpic +linux-arm: ALL_CXXFLAGS += -Ofast -march=native -DFORCE_SINGLE -fpic linux-arm: VST_FLAGS += -D__cdecl= linux-arm: all diff --git a/include/util.h b/include/util.h index 665ff8e..3422c33 100644 --- a/include/util.h +++ b/include/util.h @@ -7,7 +7,13 @@ #define INNER static inline #define PURE __attribute__((pure)) #define CONST __attribute__((const)) + +#ifndef FORCE_SINGLE typedef double v2df __attribute__((vector_size(16), aligned(16))); +#else +typedef float v2df __attribute__((vector_size(8), aligned(8))); +#endif + typedef float v4sf __attribute__((vector_size(16), aligned(16))); typedef unsigned long ulong; // __attribute((aligned(16)));