From 4de1731d9315038196b849893c769f5996275fa9 Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Wed, 5 Feb 2014 20:32:16 -0800 Subject: [PATCH] tube plugin, begin on SSE2 optimizations --- Makefile | 2 +- README.md | 14 ++++-- crap_tube.h | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 crap_tube.h diff --git a/Makefile b/Makefile index cb94fc9..f185ecb 100755 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ DISTNAME = crap VERSION = git FULLNAME = ${DISTNAME}-${VERSION} -BOTH = crap_eq crap_eq_const +BOTH = crap_eq crap_eq_const crap_tube LADSPA_ONLY = crap_noise VST_ONLY = crap_delay_test LADSPA = ${BOTH:=-ladspa} ${LADSPA_ONLY:=-ladspa} diff --git a/README.md b/README.md index 37833c7..9aab33f 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,14 @@ __crap\_noise (0xEC57A71C)__ white noise generator. loud, full-range, 0dBFS. don't say i didn't warn you. +### crap Tube Distortion + +__crap\_tube (0x50F7BA11)__ + +static waveshaper with 4x oversampling, sounds kinda like a tube I guess? +be aware that the oversampling is a bit naive and attenuates the signal +past 17kHz, assuming a 44.1kHz sample rate. + ### crap delay test __crap\_delay\_test (0xDEDEDEDE)__ @@ -37,7 +45,8 @@ experimentation with delay compensation and EQ oversampling, not for use. a `benchmark` target is included, however it doesn't build on Windows. -try `CFLAGS="-O3 -ffast-math -march=core2"` +for speed, try `CFLAGS="-O3 -ffast-math -march=core2 -mfpmath=sse"` +and the same for CXXFLAGS. on Linux, you'll need `CFLAGS+=" -fpic" CXXFLAGS+=" -fpic -D__cdecl="` @@ -47,10 +56,9 @@ remember to export `VST_SDK_DIR` to the path of your `vstsdk2.4/` * convert crap\_noise to the template format * rename plugins (fix capitalization consistency and such) -* make style consistent +* make code style consistent * remove crap\_ prefixes? * move to subdirs? -* make crap faster (hand-written SSE2? compiler directives?) * reduce input/output buffers on biquads (shared) * ease up on the preprocessor ifs * polish parameter support diff --git a/crap_tube.h b/crap_tube.h new file mode 100644 index 0000000..f0035c7 --- /dev/null +++ b/crap_tube.h @@ -0,0 +1,135 @@ +#include + +#ifdef __SSE2__ +#include +#ifndef __SSE2_MATH__ +#warning SSE2 enabled but not forced, beware denormals +#endif +#else +#warning built without SSE2, denormals will be painful +#endif + +#define ID 0x50F7BA11 +#define LABEL "crap_tube" +#define NAME "crap Tube Distortion" +#define AUTHOR "Connor Olding" +#define COPYRIGHT "MIT" +#define PARAMETERS 0 + +typedef struct { + double history_L[64]; + double history_R[64]; +} personal; + +static void +disable_denormals() +{ + #if __SSE2__ + _mm_setcsr(_mm_getcsr() | 0x8040); + #endif +} + +static double +distort(double x) +{ + return (27*x + 9) / (9*x*x + 6*x + 19) - 9/19.; +} + +// b2 is always b0 with lowpasses +// a0 is already factored into the rest of the coefficients +#define LOWPASS(i, b0, b1, a1, a2) \ + y = b0*x + b1*xn[i*2] + b0*xn[i*2 + 1] \ + - a1*yn[i*2] - a2*yn[i*2 + 1]; \ + xn[i*2 + 1] = xn[i*2]; \ + xn[i*2] = x; \ + yn[i*2 + 1] = yn[i*2]; \ + yn[i*2] = y; \ + x = y; + +static double +upsample(double xn[16], double yn[16], double x) +{ + double y; + LOWPASS(0, +0.71327159,+0.00688573,-0.45391337,+0.88734229); + LOWPASS(1, +0.63347126,+0.05572752,-0.36946634,+0.69213639); + LOWPASS(2, +0.55963645,+0.13990391,-0.26487901,+0.52405582); + LOWPASS(3, +0.49037095,+0.24706928,-0.14763065,+0.37544183); + LOWPASS(4, +0.42692239,+0.36379839,-0.02763286,+0.24527604); + LOWPASS(5, +0.37268890,+0.47433865,+0.08224090,+0.13747554); + LOWPASS(6, +0.33241251,+0.56148939,+0.16727062,+0.05904378); + LOWPASS(7, +0.31079382,+0.60975767,+0.21392163,+0.01742368); + return y; +} + +static double +downsample(double xn[16], double yn[16], double x) +{ + double y; + LOWPASS(0, +0.62136966,-0.87573986,-1.56336581,+0.93036527); + LOWPASS(1, +0.56540370,-0.77393348,-1.44258778,+0.79946170); + LOWPASS(2, +0.49824084,-0.63630306,-1.31114921,+0.67132784); + LOWPASS(3, +0.41949184,-0.46466704,-1.16600279,+0.54031944); + LOWPASS(4, +0.33172375,-0.26684785,-1.00993399,+0.40653364); + LOWPASS(5, +0.24269774,-0.06242297,-0.85492245,+0.27789496); + LOWPASS(6, +0.16673206,+0.11379847,-0.72421195,+0.17147454); + LOWPASS(7, +0.12199271,+0.21811002,-0.64769184,+0.10978728); + return y; +} + +static double +process_one(double h[64], double x) +{ + double y; + y = downsample(h+32, h+48, distort(4*upsample(h, h+16, x))); + downsample(h+32, h+48, distort(4*upsample(h, h+16, 0))); + downsample(h+32, h+48, distort(4*upsample(h, h+16, 0))); + downsample(h+32, h+48, distort(4*upsample(h, h+16, 0))); + return y*0.71; +} + +static void +process(personal *data, + float *in_L, float *in_R, + float *out_L, float *out_R, + unsigned long count) { + disable_denormals(); + for (unsigned long pos = 0; pos < count; pos++) { + out_L[pos] = process_one(data->history_L, in_L[pos]); + out_R[pos] = process_one(data->history_R, in_R[pos]); + } +} + +static void +process_double(personal *data, + double *in_L, double *in_R, + double *out_L, double *out_R, + unsigned long count) { + disable_denormals(); + for (unsigned long pos = 0; pos < count; pos++) { + out_L[pos] = process_one(data->history_L, in_L[pos]); + out_R[pos] = process_one(data->history_R, in_R[pos]); + } +} + +static void +resume(personal *data) { + memset(data->history_L, 0, 64); + memset(data->history_R, 0, 64); +} + +static void +pause(personal *data) { +} +static void +construct(personal *data) { + resume(data); +} +static void +destruct(personal *data) { +} + +static void +adjust(personal *data, unsigned long fs) { + resume(data); +} +