diff --git a/Makefile b/Makefile index d645cfd..f98b960 100644 --- a/Makefile +++ b/Makefile @@ -34,8 +34,9 @@ VST_SRC = ${VST_CPP:%=$(VST_CPP_DIR)/%} VST_OBJ = ${VST_CPP:%.cpp=$(BIN)/%.o} VST_DEF = $(VST_SDK_DIR)/public.sdk/samples/vst2.x/win/vstplug.def -GENERAL_FLAGS = -Wall -Wno-unused-function -I include -ALL_CFLAGS = $(GENERAL_FLAGS) -std=gnu99 $(CFLAGS) +INLINE_FLAGS = -Winline -finline-limit=1000 +GENERAL_FLAGS = -Wall -Wno-unused-function -I include $(INLINE_FLAGS) +ALL_CFLAGS = $(GENERAL_FLAGS) -std=gnu11 $(CFLAGS) ALL_CXXFLAGS = $(GENERAL_FLAGS) $(CXXFLAGS) ALL_LDFLAGS = -lm $(LDFLAGS) @@ -43,6 +44,8 @@ LADSPA_FLAGS = VST_FLAGS = -Wno-write-strings -Wno-narrowing VST_FLAGS += -I $(VST_SDK_DIR) -DBUILDING_DLL=1 +# specifying core2 as the target architecture +# seems significantly faster, even on newer processors. ymmv. OPT_FLAGS = -Ofast -march=core2 -mfpmath=sse # any possibly produced files besides intermediates diff --git a/crap/delay_test.h b/crap/delay_test.h index f7cf78b..d5344e5 100644 --- a/crap/delay_test.h +++ b/crap/delay_test.h @@ -27,7 +27,7 @@ typedef struct { channel c[2]; } personal; -static double +INNER double fir_up(double *x, double s) { x[0] = s; @@ -55,7 +55,7 @@ fir_up(double *x, double s) return s; } -static double +INNER double fir_down(double *x, double s) { x[0] = s; @@ -69,7 +69,7 @@ fir_down(double *x, double s) return s; } -static double +INNER double process_one(channel *c, double s) { s = fir_down(c->down, biquad_run(&c->filter, fir_up(c->up, s))); @@ -79,7 +79,7 @@ process_one(channel *c, double s) return s; } -static void +INNER void process(personal *data, float *in_L, float *in_R, float *out_L, float *out_R, @@ -91,7 +91,7 @@ process(personal *data, } } -static void +INNER void process_double(personal *data, double *in_L, double *in_R, double *out_L, double *out_R, @@ -103,23 +103,23 @@ process_double(personal *data, } } -static void +INNER void construct(personal *data) {} -static void +INNER void destruct(personal *data) {} -static void +INNER void resume(personal *data) {} -static void +INNER void pause(personal *data) {} -static void +INNER void adjust(personal *data, ulong fs) { for (int k = 0; k < 2; k++) { diff --git a/crap/eq.h b/crap/eq.h index 2d33491..578ca90 100644 --- a/crap/eq.h +++ b/crap/eq.h @@ -18,7 +18,7 @@ typedef struct { float fs; } personal; -static double +INNER double process_one(biquad *filters, double samp) { for (int i = 0; i < BANDS; i++) @@ -26,7 +26,7 @@ process_one(biquad *filters, double samp) return samp; } -static void +INNER void process(personal *data, float *in_L, float *in_R, float *out_L, float *out_R, @@ -39,7 +39,7 @@ process(personal *data, } } -static void +INNER void process_double(personal *data, double *in_L, double *in_R, double *out_L, double *out_R, @@ -52,7 +52,7 @@ process_double(personal *data, } } -static void +INNER void resume(personal *data) { biquad *filters = data->filters[0]; @@ -61,11 +61,11 @@ resume(personal *data) memcpy(data->filters[1], filters, BANDS*sizeof(biquad)); } -static void +INNER void pause(personal *data) {} -static void +INNER void construct_params(param *params) { for (int i = 0; i < BANDS; i++) { @@ -94,15 +94,15 @@ construct_params(param *params) } } -static void +INNER void construct(personal *data) {} -static void +INNER void destruct(personal *data) {} -static void +INNER void adjust(personal *data, param *params, unsigned long fs) { data->fs = fs; @@ -115,7 +115,7 @@ adjust(personal *data, param *params, unsigned long fs) resume(data); } -static void +INNER void adjust_one(personal *data, param *params, unsigned int index) { float fs = data->fs; diff --git a/crap/eq_const.h b/crap/eq_const.h index 66c716e..d69221e 100644 --- a/crap/eq_const.h +++ b/crap/eq_const.h @@ -14,7 +14,7 @@ typedef struct { biquad filters[2][BANDS]; } personal; -static double +INNER double process_one(biquad *filters, double samp) { for (int i = 0; i < BANDS; i++) @@ -22,7 +22,7 @@ process_one(biquad *filters, double samp) return samp; } -static void +INNER void process(personal *data, float *in_L, float *in_R, float *out_L, float *out_R, @@ -35,7 +35,7 @@ process(personal *data, } } -static void +INNER void process_double(personal *data, double *in_L, double *in_R, double *out_L, double *out_R, @@ -48,15 +48,15 @@ process_double(personal *data, } } -static void +INNER void construct(personal *data) {} -static void +INNER void destruct(personal *data) {} -static void +INNER void resume(personal *data) { biquad *filters = data->filters[0]; @@ -65,11 +65,11 @@ resume(personal *data) memcpy(data->filters[1], filters, BANDS*sizeof(biquad)); } -static void +INNER void pause(personal *data) {} -static void +INNER void adjust(personal *data, unsigned long fs) { biquad *filters = data->filters[0]; diff --git a/crap/noise.h b/crap/noise.h index 7bcc6ab..0116a7d 100644 --- a/crap/noise.h +++ b/crap/noise.h @@ -10,7 +10,7 @@ typedef struct { } personal; -static void +INNER void process(personal *data, float *in_L, float *in_R, float *out_L, float *out_R, @@ -23,7 +23,7 @@ process(personal *data, out_R[pos] = whitenoise(); } -static void +INNER void process_double(personal *data, double *in_L, double *in_R, double *out_L, double *out_R, @@ -35,22 +35,22 @@ process_double(personal *data, out_R[pos] = whitenoise(); } -static void +INNER void construct(personal *data) {} -static void +INNER void destruct(personal *data) {} -static void +INNER void resume(personal *data) {} -static void +INNER void pause(personal *data) {} -static void +INNER void adjust(personal *data, unsigned long fs) {} diff --git a/crap/tube.h b/crap/tube.h index 3d64bd5..aa37ed5 100644 --- a/crap/tube.h +++ b/crap/tube.h @@ -1,3 +1,4 @@ +#include #include #include @@ -13,6 +14,10 @@ #define PARAMETERS 2 #define OVERSAMPLING 2 +#define BLOCK_SIZE 256 +#define FULL_SIZE (BLOCK_SIZE*OVERSAMPLING) + +typedef unsigned long ulong; typedef struct { double desired, actual, speed; @@ -24,7 +29,7 @@ typedef struct { smoothval drive, wet; } personal; -static double +INNER double smooth(smoothval *val) { double a = val->actual; @@ -44,62 +49,116 @@ smooth(smoothval *val) return a; } -static double +INNER double distort(double x) { return (27*x + 9) / (9*x*x + 6*x + 19) - 9/19.; } -static double +INNER double process_one(double x, double drive, double wet) { return (distort(x*drive)/drive*0.79 - x)*wet + x; } -static double -process_os(personal *data, double x, int right) -{ - halfband_t *hbu = (!right) ? &data->hbu_L : &data->hbu_R; - halfband_t *hbd = (!right) ? &data->hbd_L : &data->hbd_R; - double y; - - #define doit(SAMP) \ - decimate(hbd, process_one(interpolate(hbu, SAMP), \ - smooth(&data->drive), smooth(&data->wet))) - doit(x); - y = doit(x); - #undef doit - - return y; -} - -static void -process(personal *data, - float *in_L, float *in_R, - float *out_L, float *out_R, - unsigned long count) -{ - disable_denormals(); - for (unsigned long pos = 0; pos < count; pos++) { - out_L[pos] = process_os(data, in_L[pos], 0); - out_R[pos] = process_os(data, in_R[pos], 1); - } -} - -static void +INNER void process_double(personal *data, double *in_L, double *in_R, double *out_L, double *out_R, - unsigned long count) + ulong count) { disable_denormals(); - for (unsigned long pos = 0; pos < count; pos++) { - out_L[pos] = process_os(data, in_L[pos], 0); - out_R[pos] = process_os(data, in_R[pos], 1); + + double drives[FULL_SIZE], wets[FULL_SIZE]; + double in_os[FULL_SIZE], out_os[FULL_SIZE]; + + for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) { + ulong rem = BLOCK_SIZE; + if (pos + BLOCK_SIZE > count) + rem = count - pos; + + for (ulong i = 0; i < rem*OVERSAMPLING; i++) + drives[i] = smooth(&data->drive); + for (ulong i = 0; i < rem*OVERSAMPLING; i++) + wets[i] = smooth(&data->wet); + + halfband_t *hb; + + // left channel + hb = &data->hbu_L; + for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) { + in_os[i+0] = interpolate(hb, in_L[j]); + in_os[i+1] = interpolate(hb, in_L[j]); + } + + for (ulong i = 0; i < rem*OVERSAMPLING; i++) { + out_os[i] = process_one(in_os[i], drives[i], wets[i]); + } + + hb = &data->hbd_L; + for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) { + decimate(hb, out_os[i+0]); + out_L[j] = decimate(hb, out_os[i+1]); + } + + // right channel + hb = &data->hbu_R; + for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) { + in_os[i+0] = interpolate(hb, in_R[j]); + in_os[i+1] = interpolate(hb, in_R[j]); + } + + for (ulong i = 0; i < rem*OVERSAMPLING; i++) { + out_os[i] = process_one(in_os[i], drives[i], wets[i]); + } + + hb = &data->hbd_R; + for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) { + decimate(hb, out_os[i+0]); + out_R[j] = decimate(hb, out_os[i+1]); + } + + in_L += BLOCK_SIZE; + in_R += BLOCK_SIZE; + out_L += BLOCK_SIZE; + out_R += BLOCK_SIZE; } } -static void +INNER void +process(personal *data, + float *in_L, float *in_R, + float *out_L, float *out_R, + ulong count) +{ + double in_L2[BLOCK_SIZE], in_R2[BLOCK_SIZE]; + double out_L2[BLOCK_SIZE], out_R2[BLOCK_SIZE]; + + for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) { + ulong rem = BLOCK_SIZE; + if (pos + BLOCK_SIZE > count) + rem = count - pos; + + for (ulong i = 0; i < rem; i++) + in_L2[i] = in_L[i]; + for (ulong i = 0; i < rem; i++) + in_R2[i] = in_R[i]; + + process_double(data, in_L2, in_R2, out_L2, out_R2, rem); + + for (ulong i = 0; i < rem; i++) + out_L[i] = out_L2[i]; + for (ulong i = 0; i < rem; i++) + out_R[i] = out_R2[i]; + + in_L += BLOCK_SIZE; + in_R += BLOCK_SIZE; + out_L += BLOCK_SIZE; + out_R += BLOCK_SIZE; + } +} + +INNER void resume(personal *data) { memset(&data->hbu_L, 0, sizeof(halfband_t)); @@ -108,17 +167,17 @@ resume(personal *data) memset(&data->hbd_R, 0, sizeof(halfband_t)); } -static void +INNER void pause(personal *data) {} -static void +INNER void construct(personal *data) { memset(data, 0, sizeof(personal)); } -static void +INNER void construct_params(param *params) { sprintf(params[0].name, "Drive"); @@ -137,12 +196,12 @@ construct_params(param *params) param_reset(¶ms[1]); } -static void +INNER void destruct(personal *data) {} -static void -adjust(personal *data, param *params, unsigned long fs_long) +INNER void +adjust(personal *data, param *params, ulong fs_long) { resume(data); double fs = fs_long; @@ -156,7 +215,7 @@ adjust(personal *data, param *params, unsigned long fs_long) data->wet.log = 0; } -static void +INNER void adjust_one(personal *data, param *params, unsigned int index) { data->drive.desired = DB2LIN(params[0].value); diff --git a/include/os2piir.h b/include/os2piir.h index e08d1b5..4da9704 100644 --- a/include/os2piir.h +++ b/include/os2piir.h @@ -16,7 +16,7 @@ typedef struct { int i; } halfband_t; -static void +INNER void halfband_a(double a[8], double ao[8], double x0, double x2) { a[0] = x2 + (x0 - ao[0])*0.006185967461045014; @@ -29,7 +29,7 @@ halfband_a(double a[8], double ao[8], double x0, double x2) a[7] = ao[6] + (a[6] - ao[7])*0.862917812650502936; } -static void +INNER void halfband_b(double b[8], double bo[8], double x1, double x3) { b[0] = x3 + (x1 - bo[0])*0.024499027624721819; @@ -42,7 +42,7 @@ halfband_b(double b[8], double bo[8], double x1, double x3) b[7] = bo[6] + (b[6] - bo[7])*0.952428157718303137; } -static double +INNER double halfband(halfband_t *h, double x0) { double a[8], b[8]; @@ -58,7 +58,7 @@ halfband(halfband_t *h, double x0) return (a[7] + b[7])*0.5; } -static double +INNER double decimate(halfband_t *h, double x0) { double c[8]; @@ -78,7 +78,7 @@ decimate(halfband_t *h, double x0) } // note: do not zero-stuff! send the input each time. -static double +INNER double interpolate(halfband_t *h, double x0) { double c[8]; diff --git a/include/util.h b/include/util.h index 8ec4f57..49a9890 100644 --- a/include/util.h +++ b/include/util.h @@ -4,7 +4,9 @@ #include #endif -static void +#define INNER static inline + +INNER void disable_denormals(); #define LIMIT(v,l,u) ((v)<(l)?(l):((v)>(u)?(u):(v))) @@ -22,10 +24,10 @@ typedef struct { double b0, b1, b2, a0, a1, a2; } biquad_interim; -static float +INNER float whitenoise(); -static void +INNER void biquad_init(biquad *bq); typedef enum { @@ -41,16 +43,16 @@ typedef enum { FILT_GAIN } filter_t; -static biquad +INNER biquad biquad_gen(filter_t type, double fc, double gain, double bw, double fs); /* s-plane to z-plane */ -static biquad_interim +INNER biquad_interim design(double cw, double sw, double num0, double num1, double num2, double den0, double den1, double den2); -static double +INNER double biquad_run(biquad *bq, double x); #include "util_def.h" diff --git a/include/util_def.h b/include/util_def.h index 3d84f32..10814dd 100644 --- a/include/util_def.h +++ b/include/util_def.h @@ -2,7 +2,7 @@ #include #include -static void +INNER void disable_denormals() { #if __SSE2__ @@ -13,7 +13,7 @@ disable_denormals() /* via http://www.rgba.org/articles/sfrand/sfrand.htm */ static unsigned int mirand = 1; -static float +INNER float whitenoise() { union either { @@ -27,13 +27,13 @@ whitenoise() /* used to resemble https://github.com/swh/ladspa/blob/master/util/biquad.h */ -static void +INNER void biquad_init(biquad *bq) { bq->x1 = bq->x2 = bq->y1 = bq->y2 = 0; } -static biquad_interim +INNER biquad_interim design(double cw, double sw, double num0, double num1, double num2, double den0, double den1, double den2) @@ -48,7 +48,7 @@ design(double cw, double sw, }; } -static biquad +INNER biquad biquad_gen(filter_t type, double fc, double gain, double bw, double fs) { double w0, cw, sw, A, As, Q; @@ -88,7 +88,7 @@ biquad_gen(filter_t type, double fc, double gain, double bw, double fs) return out; } -static double +INNER double biquad_run(biquad *bq, double x) { double y; diff --git a/util/bench.c b/util/bench.c index 6f303b3..9ce1ad9 100644 --- a/util/bench.c +++ b/util/bench.c @@ -1,28 +1,28 @@ +#include #include #include #include #include +#include #include "dlfcn.h" #include "ladspa.h" #include "util.h" -enum { - BLOCK_SIZE=2048 -}; +#define BLOCK_SIZE 2048 void *plug = NULL; static float *audio_buffer; static int audio_count = 0; -static void +INNER void cleanup() { dlclose(plug); if (audio_count) free(audio_buffer); } -static const LADSPA_Descriptor* +INNER const LADSPA_Descriptor* load_ladspa(char *path) { plug = dlopen(path, RTLD_NOW); @@ -38,6 +38,52 @@ load_ladspa(char *path) return d; } +INNER float +between(float percent, float min, float max, int logscale) +{ + if (logscale) + return log(min/percent)/log(min/max); + else + return (min - percent)/(min - max); +} + +INNER float +get_default(LADSPA_PortRangeHint hint) +{ + float x = 0; + int hd = hint.HintDescriptor; + float min = hint.LowerBound; + float max = hint.UpperBound; + float logscale = LADSPA_IS_HINT_LOGARITHMIC(hd); + if (LADSPA_IS_HINT_DEFAULT_0(hd)) + x = 0; + if (LADSPA_IS_HINT_DEFAULT_1(hd)) + x = 1; + if (LADSPA_IS_HINT_DEFAULT_100(hd)) + x = 100; + if (LADSPA_IS_HINT_DEFAULT_440(hd)) + x = 440; + if (LADSPA_IS_HINT_DEFAULT_MINIMUM(hd)) + x = min; + if (LADSPA_IS_HINT_DEFAULT_LOW(hd)) + x = between(0.25, min, max, logscale); + if (LADSPA_IS_HINT_DEFAULT_MIDDLE(hd)) + x = between(0.50, min, max, logscale); + if (LADSPA_IS_HINT_DEFAULT_HIGH(hd)) + x = between(0.75, min, max, logscale); + if (LADSPA_IS_HINT_DEFAULT_MAXIMUM(hd)) + x = max; + if (LADSPA_IS_HINT_INTEGER(hd)) + x = round(x); + if (LADSPA_IS_HINT_TOGGLED(hd)) { + float mid = between(0.50, min, max, logscale); + x = x >= mid ? max : min; + } + if (x < min) x = min; + if (x > max) x = max; + return x; +} + int main(int argc, char **argv) { @@ -56,9 +102,15 @@ main(int argc, char **argv) audio_buffer = calloc(audio_count*BLOCK_SIZE, sizeof(float)); int a = 0; - for (int i = 0; i < d->PortCount; i++) - if (LADSPA_IS_PORT_AUDIO(d->PortDescriptors[i])) + for (int i = 0; i < d->PortCount; i++) { + if (LADSPA_IS_PORT_AUDIO(d->PortDescriptors[i])) { d->connect_port(h, i, audio_buffer + a++*BLOCK_SIZE); + } else { + float *x = alloca(sizeof(float)); + *x = get_default(d->PortRangeHints[i]); + d->connect_port(h, i, x); + } + } mirand = time(NULL); for (int i = 0; i < audio_count*BLOCK_SIZE; i++) diff --git a/util/benchtime b/util/benchtime index 3cec957..9748972 100755 --- a/util/benchtime +++ b/util/benchtime @@ -13,4 +13,4 @@ for i in {1..8}; do time "$bench" "$against" done 2>&1 >/dev/null | awk 'BEGIN{m=999;printf " …\033[90m"} {a+=$1;n++;m=$1