diff --git a/crap/tube.h b/crap/tube.h index 159fec9..b322e2c 100644 --- a/crap/tube.h +++ b/crap/tube.h @@ -15,7 +15,7 @@ #include "util.h" #include "param.h" -#include "os2piir.h" +#include "os2piir_stereo.h" typedef struct { double desired, actual, speed; @@ -23,7 +23,7 @@ typedef struct { } smoothval; typedef struct { - halfband_t hbu_L, hbu_R, hbd_L, hbd_R; + halfband_t hb_up, hb_down; smoothval drive, wet; } personal; @@ -47,16 +47,16 @@ smooth(smoothval *val) return a; } -INNER double -distort(double x) +INNER CONST v2df +distort(v2df x) { - return (27*x + 9) / (9*x*x + 6*x + 19) - 9/19.; + return (V(27.)*x + V(9.)) / (V(9.)*x*x + V(6.)*x + V(19.)) - V(9./19.); } -INNER double -process_one(double x, double drive, double wet) +INNER CONST v2df +process_one(v2df x, v2df drive, v2df wet) { - return (distort(x*drive)/drive*0.79 - x)*wet + x; + return (distort(x*drive)/drive*V(0.79) - x)*wet + x; } static void @@ -64,74 +64,20 @@ process_double(personal *data, double *in_L, double *in_R, double *out_L, double *out_R, ulong count) -{ - disable_denormals(); +#include "process_nonlinear.h" - double drives[FULL_SIZE], wets[FULL_SIZE]; - double in_os[FULL_SIZE], out_os[FULL_SIZE]; - - for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) { - ulong rem = BLOCK_SIZE; - if (pos + BLOCK_SIZE > count) - rem = count - pos; - - for (ulong i = 0; i < rem*OVERSAMPLING; i++) - drives[i] = smooth(&data->drive); - for (ulong i = 0; i < rem*OVERSAMPLING; i++) - wets[i] = smooth(&data->wet); - - halfband_t *hb; - - // left channel - hb = &data->hbu_L; - for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) { - in_os[i+0] = interpolate(hb, in_L[j]); - in_os[i+1] = interpolate(hb, in_L[j]); - } - - for (ulong i = 0; i < rem*OVERSAMPLING; i++) { - out_os[i] = process_one(in_os[i], drives[i], wets[i]); - } - - hb = &data->hbd_L; - for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) { - decimate(hb, out_os[i+0]); - out_L[j] = decimate(hb, out_os[i+1]); - } - - // right channel - hb = &data->hbu_R; - for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) { - in_os[i+0] = interpolate(hb, in_R[j]); - in_os[i+1] = interpolate(hb, in_R[j]); - } - - for (ulong i = 0; i < rem*OVERSAMPLING; i++) { - out_os[i] = process_one(in_os[i], drives[i], wets[i]); - } - - hb = &data->hbd_R; - for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) { - decimate(hb, out_os[i+0]); - out_R[j] = decimate(hb, out_os[i+1]); - } - - in_L += BLOCK_SIZE; - in_R += BLOCK_SIZE; - out_L += BLOCK_SIZE; - out_R += BLOCK_SIZE; - } -} - -#include "process.h" +static void +process(personal *data, + float *in_L, float *in_R, + float *out_L, float *out_R, + ulong count) +#include "process_nonlinear.h" INNER void resume(personal *data) { - memset(&data->hbu_L, 0, sizeof(halfband_t)); - memset(&data->hbu_R, 0, sizeof(halfband_t)); - memset(&data->hbd_L, 0, sizeof(halfband_t)); - memset(&data->hbd_R, 0, sizeof(halfband_t)); + memset(&data->hb_up, 0, sizeof(halfband_t)); + memset(&data->hb_down, 0, sizeof(halfband_t)); } INNER void diff --git a/include/os2piir_stereo.h b/include/os2piir_stereo.h new file mode 100644 index 0000000..5710154 --- /dev/null +++ b/include/os2piir_stereo.h @@ -0,0 +1,98 @@ +/* halfband polyphase IIR filter + coefficients designed with halfband program: + https://gist.github.com/3be345efb6c97d757398#file-halfband-c + parameters: 16 coefficients, 0.1 transition band + stopband: ~-150dB + overall delay: ~8 samples +*/ + +#define copy(dst, src) memcpy(dst, src, sizeof(v2df)*8) +//#define copy(dst, src) _copy(dst, src) + +// all should be initialized to 0 +typedef struct { + v2df ao[8], bo[8]; + v2df at[8], bt[8]; + v2df x1, x2, x3; + int i; +} halfband_t; + +INNER void +halfband_a(v2df a[8], v2df ao[8], v2df x0, v2df x2) +{ + a[0] = x2 + (x0 - ao[0])*V(0.006185967461045014); + a[1] = ao[0] + (a[0] - ao[1])*V(0.054230780876613788); + a[2] = ao[1] + (a[1] - ao[2])*V(0.143280861566087270); + a[3] = ao[2] + (a[2] - ao[3])*V(0.262004358403954640); + a[4] = ao[3] + (a[3] - ao[4])*V(0.398796973552973666); + a[5] = ao[4] + (a[4] - ao[5])*V(0.545323651071132232); + a[6] = ao[5] + (a[5] - ao[6])*V(0.698736833646440347); + a[7] = ao[6] + (a[6] - ao[7])*V(0.862917812650502936); +} + +INNER void +halfband_b(v2df b[8], v2df bo[8], v2df x1, v2df x3) +{ + b[0] = x3 + (x1 - bo[0])*V(0.024499027624721819); + b[1] = bo[0] + (b[0] - bo[1])*V(0.094283481125726432); + b[2] = bo[1] + (b[1] - bo[2])*V(0.199699579426327684); + b[3] = bo[2] + (b[2] - bo[3])*V(0.328772348316831664); + b[4] = bo[3] + (b[3] - bo[4])*V(0.471167216679969414); + b[5] = bo[4] + (b[4] - bo[5])*V(0.621096845120503893); + b[6] = bo[5] + (b[5] - bo[6])*V(0.778944517099529166); + b[7] = bo[6] + (b[6] - bo[7])*V(0.952428157718303137); +} + +INNER v2df +halfband(halfband_t *h, v2df x0) +{ + v2df a[8], b[8]; + halfband_a(a, h->ao, x0, h->x2); + halfband_b(b, h->bo, h->x1, h->x3); + copy(h->ao, h->at); + copy(h->bo, h->bt); + copy(h->at, a); + copy(h->bt, b); + h->x3 = h->x2; + h->x2 = h->x1; + h->x1 = x0; + return (a[7] + b[7])*V(0.5); +} + +INNER v2df +decimate(halfband_t *h, v2df x0) +{ + v2df c[8]; + if ((h->i = !h->i)) { + halfband_b(c, h->bo, x0, h->x2); + copy(h->bo, c); + h->x2 = h->x1; + h->x1 = x0; + return V(0); + } else { + halfband_a(c, h->ao, x0, h->x2); + copy(h->ao, c); + h->x2 = h->x1; + h->x1 = x0; + return (c[7] + h->bo[7])*V(0.5); + } +} + +// note: do not zero-stuff! send the input each time. +INNER v2df +interpolate(halfband_t *h, v2df x0) +{ + v2df c[8]; + if ((h->i = !h->i)) { + halfband_a(c, h->ao, x0, h->x1); + copy(h->ao, c); + return c[7]; + } else { + halfband_b(c, h->bo, x0, h->x1); + copy(h->bo, c); + h->x1 = x0; + return c[7]; + } +} + +#undef copy diff --git a/include/process_nonlinear.h b/include/process_nonlinear.h new file mode 100644 index 0000000..603e489 --- /dev/null +++ b/include/process_nonlinear.h @@ -0,0 +1,58 @@ +{ + disable_denormals(); + + v2df drives[FULL_SIZE], wets[FULL_SIZE]; + v2df buf[BLOCK_SIZE]; + v2df over[FULL_SIZE]; + + halfband_t *hb_up = &data->hb_up; + halfband_t *hb_down = &data->hb_down; + + for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) { + ulong rem = BLOCK_SIZE; + if (pos + BLOCK_SIZE > count) + rem = count - pos; + + ulong rem2 = rem*OVERSAMPLING; + + for (ulong i = 0; i < rem2; i++) { + double y = smooth(&data->drive); + drives[i] = V(y); + } + for (ulong i = 0; i < rem2; i++) { + double y = smooth(&data->wet); + wets[i] = V(y); + } + + for (ulong i = 0; i < rem; i++) { + buf[i][0] = in_L[i]; + buf[i][1] = in_R[i]; + } + + for (ulong i = 0; i < rem; i++) { + hb_up->i = 0; // so compiler can optimize + over[i*2+0] = interpolate(hb_up, buf[i]); + over[i*2+1] = interpolate(hb_up, buf[i]); + } + + for (ulong i = 0; i < rem2; i++) { + over[i] = process_one(over[i], drives[i], wets[i]); + } + + for (ulong i = 0; i < rem; i++) { + hb_down->i = 0; // so compiler can optimize + decimate(hb_down, over[i*2+0]); + buf[i] = decimate(hb_down, over[i*2+1]); + } + + for (ulong i = 0; i < rem; i++) { + out_L[i] = buf[i][0]; + out_R[i] = buf[i][1]; + } + + in_L += BLOCK_SIZE; + in_R += BLOCK_SIZE; + out_L += BLOCK_SIZE; + out_R += BLOCK_SIZE; + } +} diff --git a/include/util.h b/include/util.h index 971fb8f..665ff8e 100644 --- a/include/util.h +++ b/include/util.h @@ -1,12 +1,17 @@ #include "math.h" #ifdef __SSE2__ -#include +#include #endif #define INNER static inline +#define PURE __attribute__((pure)) +#define CONST __attribute__((const)) typedef double v2df __attribute__((vector_size(16), aligned(16))); -typedef unsigned long ulong; +typedef float v4sf __attribute__((vector_size(16), aligned(16))); +typedef unsigned long ulong; // __attribute((aligned(16))); + +#define V(x) (v2df){(x), (x)} INNER void disable_denormals();