vectorize tube

This commit is contained in:
Connor Olding 2015-04-07 11:25:26 -07:00
parent 8e7dde59f6
commit 33c0ef8f14
4 changed files with 180 additions and 73 deletions

View File

@ -15,7 +15,7 @@
#include "util.h"
#include "param.h"
#include "os2piir.h"
#include "os2piir_stereo.h"
typedef struct {
double desired, actual, speed;
@ -23,7 +23,7 @@ typedef struct {
} smoothval;
typedef struct {
halfband_t hbu_L, hbu_R, hbd_L, hbd_R;
halfband_t hb_up, hb_down;
smoothval drive, wet;
} personal;
@ -47,16 +47,16 @@ smooth(smoothval *val)
return a;
}
INNER double
distort(double x)
INNER CONST v2df
distort(v2df x)
{
return (27*x + 9) / (9*x*x + 6*x + 19) - 9/19.;
return (V(27.)*x + V(9.)) / (V(9.)*x*x + V(6.)*x + V(19.)) - V(9./19.);
}
INNER double
process_one(double x, double drive, double wet)
INNER CONST v2df
process_one(v2df x, v2df drive, v2df wet)
{
return (distort(x*drive)/drive*0.79 - x)*wet + x;
return (distort(x*drive)/drive*V(0.79) - x)*wet + x;
}
static void
@ -64,74 +64,20 @@ process_double(personal *data,
double *in_L, double *in_R,
double *out_L, double *out_R,
ulong count)
{
disable_denormals();
#include "process_nonlinear.h"
double drives[FULL_SIZE], wets[FULL_SIZE];
double in_os[FULL_SIZE], out_os[FULL_SIZE];
for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) {
ulong rem = BLOCK_SIZE;
if (pos + BLOCK_SIZE > count)
rem = count - pos;
for (ulong i = 0; i < rem*OVERSAMPLING; i++)
drives[i] = smooth(&data->drive);
for (ulong i = 0; i < rem*OVERSAMPLING; i++)
wets[i] = smooth(&data->wet);
halfband_t *hb;
// left channel
hb = &data->hbu_L;
for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) {
in_os[i+0] = interpolate(hb, in_L[j]);
in_os[i+1] = interpolate(hb, in_L[j]);
}
for (ulong i = 0; i < rem*OVERSAMPLING; i++) {
out_os[i] = process_one(in_os[i], drives[i], wets[i]);
}
hb = &data->hbd_L;
for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) {
decimate(hb, out_os[i+0]);
out_L[j] = decimate(hb, out_os[i+1]);
}
// right channel
hb = &data->hbu_R;
for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) {
in_os[i+0] = interpolate(hb, in_R[j]);
in_os[i+1] = interpolate(hb, in_R[j]);
}
for (ulong i = 0; i < rem*OVERSAMPLING; i++) {
out_os[i] = process_one(in_os[i], drives[i], wets[i]);
}
hb = &data->hbd_R;
for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) {
decimate(hb, out_os[i+0]);
out_R[j] = decimate(hb, out_os[i+1]);
}
in_L += BLOCK_SIZE;
in_R += BLOCK_SIZE;
out_L += BLOCK_SIZE;
out_R += BLOCK_SIZE;
}
}
#include "process.h"
static void
process(personal *data,
float *in_L, float *in_R,
float *out_L, float *out_R,
ulong count)
#include "process_nonlinear.h"
INNER void
resume(personal *data)
{
memset(&data->hbu_L, 0, sizeof(halfband_t));
memset(&data->hbu_R, 0, sizeof(halfband_t));
memset(&data->hbd_L, 0, sizeof(halfband_t));
memset(&data->hbd_R, 0, sizeof(halfband_t));
memset(&data->hb_up, 0, sizeof(halfband_t));
memset(&data->hb_down, 0, sizeof(halfband_t));
}
INNER void

98
include/os2piir_stereo.h Normal file
View File

@ -0,0 +1,98 @@
/* halfband polyphase IIR filter
coefficients designed with halfband program:
https://gist.github.com/3be345efb6c97d757398#file-halfband-c
parameters: 16 coefficients, 0.1 transition band
stopband: ~-150dB
overall delay: ~8 samples
*/
#define copy(dst, src) memcpy(dst, src, sizeof(v2df)*8)
//#define copy(dst, src) _copy(dst, src)
// all should be initialized to 0
typedef struct {
v2df ao[8], bo[8];
v2df at[8], bt[8];
v2df x1, x2, x3;
int i;
} halfband_t;
INNER void
halfband_a(v2df a[8], v2df ao[8], v2df x0, v2df x2)
{
a[0] = x2 + (x0 - ao[0])*V(0.006185967461045014);
a[1] = ao[0] + (a[0] - ao[1])*V(0.054230780876613788);
a[2] = ao[1] + (a[1] - ao[2])*V(0.143280861566087270);
a[3] = ao[2] + (a[2] - ao[3])*V(0.262004358403954640);
a[4] = ao[3] + (a[3] - ao[4])*V(0.398796973552973666);
a[5] = ao[4] + (a[4] - ao[5])*V(0.545323651071132232);
a[6] = ao[5] + (a[5] - ao[6])*V(0.698736833646440347);
a[7] = ao[6] + (a[6] - ao[7])*V(0.862917812650502936);
}
INNER void
halfband_b(v2df b[8], v2df bo[8], v2df x1, v2df x3)
{
b[0] = x3 + (x1 - bo[0])*V(0.024499027624721819);
b[1] = bo[0] + (b[0] - bo[1])*V(0.094283481125726432);
b[2] = bo[1] + (b[1] - bo[2])*V(0.199699579426327684);
b[3] = bo[2] + (b[2] - bo[3])*V(0.328772348316831664);
b[4] = bo[3] + (b[3] - bo[4])*V(0.471167216679969414);
b[5] = bo[4] + (b[4] - bo[5])*V(0.621096845120503893);
b[6] = bo[5] + (b[5] - bo[6])*V(0.778944517099529166);
b[7] = bo[6] + (b[6] - bo[7])*V(0.952428157718303137);
}
INNER v2df
halfband(halfband_t *h, v2df x0)
{
v2df a[8], b[8];
halfband_a(a, h->ao, x0, h->x2);
halfband_b(b, h->bo, h->x1, h->x3);
copy(h->ao, h->at);
copy(h->bo, h->bt);
copy(h->at, a);
copy(h->bt, b);
h->x3 = h->x2;
h->x2 = h->x1;
h->x1 = x0;
return (a[7] + b[7])*V(0.5);
}
INNER v2df
decimate(halfband_t *h, v2df x0)
{
v2df c[8];
if ((h->i = !h->i)) {
halfband_b(c, h->bo, x0, h->x2);
copy(h->bo, c);
h->x2 = h->x1;
h->x1 = x0;
return V(0);
} else {
halfband_a(c, h->ao, x0, h->x2);
copy(h->ao, c);
h->x2 = h->x1;
h->x1 = x0;
return (c[7] + h->bo[7])*V(0.5);
}
}
// note: do not zero-stuff! send the input each time.
INNER v2df
interpolate(halfband_t *h, v2df x0)
{
v2df c[8];
if ((h->i = !h->i)) {
halfband_a(c, h->ao, x0, h->x1);
copy(h->ao, c);
return c[7];
} else {
halfband_b(c, h->bo, x0, h->x1);
copy(h->bo, c);
h->x1 = x0;
return c[7];
}
}
#undef copy

View File

@ -0,0 +1,58 @@
{
disable_denormals();
v2df drives[FULL_SIZE], wets[FULL_SIZE];
v2df buf[BLOCK_SIZE];
v2df over[FULL_SIZE];
halfband_t *hb_up = &data->hb_up;
halfband_t *hb_down = &data->hb_down;
for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) {
ulong rem = BLOCK_SIZE;
if (pos + BLOCK_SIZE > count)
rem = count - pos;
ulong rem2 = rem*OVERSAMPLING;
for (ulong i = 0; i < rem2; i++) {
double y = smooth(&data->drive);
drives[i] = V(y);
}
for (ulong i = 0; i < rem2; i++) {
double y = smooth(&data->wet);
wets[i] = V(y);
}
for (ulong i = 0; i < rem; i++) {
buf[i][0] = in_L[i];
buf[i][1] = in_R[i];
}
for (ulong i = 0; i < rem; i++) {
hb_up->i = 0; // so compiler can optimize
over[i*2+0] = interpolate(hb_up, buf[i]);
over[i*2+1] = interpolate(hb_up, buf[i]);
}
for (ulong i = 0; i < rem2; i++) {
over[i] = process_one(over[i], drives[i], wets[i]);
}
for (ulong i = 0; i < rem; i++) {
hb_down->i = 0; // so compiler can optimize
decimate(hb_down, over[i*2+0]);
buf[i] = decimate(hb_down, over[i*2+1]);
}
for (ulong i = 0; i < rem; i++) {
out_L[i] = buf[i][0];
out_R[i] = buf[i][1];
}
in_L += BLOCK_SIZE;
in_R += BLOCK_SIZE;
out_L += BLOCK_SIZE;
out_R += BLOCK_SIZE;
}
}

View File

@ -1,12 +1,17 @@
#include "math.h"
#ifdef __SSE2__
#include <xmmintrin.h>
#include <emmintrin.h>
#endif
#define INNER static inline
#define PURE __attribute__((pure))
#define CONST __attribute__((const))
typedef double v2df __attribute__((vector_size(16), aligned(16)));
typedef unsigned long ulong;
typedef float v4sf __attribute__((vector_size(16), aligned(16)));
typedef unsigned long ulong; // __attribute((aligned(16)));
#define V(x) (v2df){(x), (x)}
INNER void
disable_denormals();