From a39438455556d2c74cd825f55018a35648c8869a Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Sat, 4 Apr 2015 06:28:59 -0700 Subject: [PATCH] optimize by splitting into blocks +67% faster on this machine --- crap/tube.h | 153 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 107 insertions(+), 46 deletions(-) diff --git a/crap/tube.h b/crap/tube.h index 3d64bd5..39a6cb5 100644 --- a/crap/tube.h +++ b/crap/tube.h @@ -1,3 +1,4 @@ +#include #include #include @@ -13,6 +14,12 @@ #define PARAMETERS 2 #define OVERSAMPLING 2 +#define BLOCK_SIZE 256 +#define FULL_SIZE (BLOCK_SIZE*OVERSAMPLING) + +#define INNER static inline + +typedef unsigned long ulong; typedef struct { double desired, actual, speed; @@ -24,7 +31,7 @@ typedef struct { smoothval drive, wet; } personal; -static double +INNER double smooth(smoothval *val) { double a = val->actual; @@ -44,62 +51,116 @@ smooth(smoothval *val) return a; } -static double +INNER double distort(double x) { return (27*x + 9) / (9*x*x + 6*x + 19) - 9/19.; } -static double +INNER double process_one(double x, double drive, double wet) { return (distort(x*drive)/drive*0.79 - x)*wet + x; } -static double -process_os(personal *data, double x, int right) -{ - halfband_t *hbu = (!right) ? &data->hbu_L : &data->hbu_R; - halfband_t *hbd = (!right) ? &data->hbd_L : &data->hbd_R; - double y; - - #define doit(SAMP) \ - decimate(hbd, process_one(interpolate(hbu, SAMP), \ - smooth(&data->drive), smooth(&data->wet))) - doit(x); - y = doit(x); - #undef doit - - return y; -} - -static void -process(personal *data, - float *in_L, float *in_R, - float *out_L, float *out_R, - unsigned long count) -{ - disable_denormals(); - for (unsigned long pos = 0; pos < count; pos++) { - out_L[pos] = process_os(data, in_L[pos], 0); - out_R[pos] = process_os(data, in_R[pos], 1); - } -} - -static void +INNER void process_double(personal *data, double *in_L, double *in_R, double *out_L, double *out_R, - unsigned long count) + ulong count) { disable_denormals(); - for (unsigned long pos = 0; pos < count; pos++) { - out_L[pos] = process_os(data, in_L[pos], 0); - out_R[pos] = process_os(data, in_R[pos], 1); + + double drives[FULL_SIZE], wets[FULL_SIZE]; + double in_os[FULL_SIZE], out_os[FULL_SIZE]; + + for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) { + ulong rem = BLOCK_SIZE; + if (pos + BLOCK_SIZE > count) + rem = count - pos; + + for (int i = 0; i < rem*OVERSAMPLING; i++) + drives[i] = smooth(&data->drive); + for (int i = 0; i < rem*OVERSAMPLING; i++) + wets[i] = smooth(&data->wet); + + halfband_t *hb; + + // left channel + hb = &data->hbu_L; + for (int i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) { + in_os[i+0] = interpolate(hb, in_L[j]); + in_os[i+1] = interpolate(hb, in_L[j]); + } + + for (int i = 0; i < rem*OVERSAMPLING; i++) { + out_os[i] = process_one(in_os[i], drives[i], wets[i]); + } + + hb = &data->hbd_L; + for (int i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) { + decimate(hb, out_os[i+0]); + out_L[j] = decimate(hb, out_os[i+1]); + } + + // right channel + hb = &data->hbu_R; + for (int i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) { + in_os[i+0] = interpolate(hb, in_R[j]); + in_os[i+1] = interpolate(hb, in_R[j]); + } + + for (int i = 0; i < rem*OVERSAMPLING; i++) { + out_os[i] = process_one(in_os[i], drives[i], wets[i]); + } + + hb = &data->hbd_R; + for (int i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) { + decimate(hb, out_os[i+0]); + out_R[j] = decimate(hb, out_os[i+1]); + } + + in_L += BLOCK_SIZE; + in_R += BLOCK_SIZE; + out_L += BLOCK_SIZE; + out_R += BLOCK_SIZE; } } -static void +INNER void +process(personal *data, + float *in_L, float *in_R, + float *out_L, float *out_R, + ulong count) +{ + double in_L2[BLOCK_SIZE], in_R2[BLOCK_SIZE]; + double out_L2[BLOCK_SIZE], out_R2[BLOCK_SIZE]; + + for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) { + ulong rem = BLOCK_SIZE; + if (pos + BLOCK_SIZE > count) + rem = count - pos; + + for (int i = 0; i < rem; i++) + in_L2[i] = in_L[i]; + for (int i = 0; i < rem; i++) + in_R2[i] = in_R[i]; + + process_double(data, in_L2, in_R2, out_L2, out_R2, rem); + + for (int i = 0; i < rem; i++) + out_L[i] = out_L2[i]; + for (int i = 0; i < rem; i++) + out_R[i] = out_R2[i]; + + in_L += BLOCK_SIZE; + in_R += BLOCK_SIZE; + out_L += BLOCK_SIZE; + out_R += BLOCK_SIZE; + } +} + +INNER void resume(personal *data) { memset(&data->hbu_L, 0, sizeof(halfband_t)); @@ -108,17 +169,17 @@ resume(personal *data) memset(&data->hbd_R, 0, sizeof(halfband_t)); } -static void +INNER void pause(personal *data) {} -static void +INNER void construct(personal *data) { memset(data, 0, sizeof(personal)); } -static void +INNER void construct_params(param *params) { sprintf(params[0].name, "Drive"); @@ -137,12 +198,12 @@ construct_params(param *params) param_reset(¶ms[1]); } -static void +INNER void destruct(personal *data) {} -static void -adjust(personal *data, param *params, unsigned long fs_long) +INNER void +adjust(personal *data, param *params, ulong fs_long) { resume(data); double fs = fs_long; @@ -156,7 +217,7 @@ adjust(personal *data, param *params, unsigned long fs_long) data->wet.log = 0; } -static void +INNER void adjust_one(personal *data, param *params, unsigned int index) { data->drive.desired = DB2LIN(params[0].value);