use single-precision floats for ARM

NEON doesn't support doubles ;_; anyway, this turns the two-times slowdown caused by previous commits into a two-times speedup.
2015-04-07 13:55:12 -07:00 · 2015-04-07 13:55:12 -07:00 · f1a8bcd876
commit f1a8bcd876
parent 95c3e58ff9
2 changed files with 8 additions and 2 deletions
--- a/4
+++ b/4
@ -70,8 +70,8 @@ linux: ALL_CXXFLAGS += $(OPT_FLAGS) -fpic
 linux: VST_FLAGS += -D__cdecl=
 linux: all

-linux-arm: ALL_CFLAGS += -Ofast -mfpu=neon -fpic
-linux-arm: ALL_CXXFLAGS += -Ofast -mfpu=neon -fpic
+linux-arm: ALL_CFLAGS += -Ofast -march=native -DFORCE_SINGLE -fpic
+linux-arm: ALL_CXXFLAGS += -Ofast -march=native -DFORCE_SINGLE -fpic
 linux-arm: VST_FLAGS += -D__cdecl=
 linux-arm: all

--- a/include/util.h
+++ b/include/util.h
@ -7,7 +7,13 @@
 #define INNER static inline
 #define PURE __attribute__((pure))
 #define CONST __attribute__((const))
+
+#ifndef FORCE_SINGLE
 typedef double v2df __attribute__((vector_size(16), aligned(16)));
+#else
+typedef float v2df __attribute__((vector_size(8), aligned(8)));
+#endif
+
 typedef float v4sf __attribute__((vector_size(16), aligned(16)));
 typedef unsigned long ulong; // __attribute((aligned(16)));