vectorize tube
This commit is contained in:
parent
8e7dde59f6
commit
33c0ef8f14
4 changed files with 180 additions and 73 deletions
88
crap/tube.h
88
crap/tube.h
|
@ -15,7 +15,7 @@
|
|||
|
||||
#include "util.h"
|
||||
#include "param.h"
|
||||
#include "os2piir.h"
|
||||
#include "os2piir_stereo.h"
|
||||
|
||||
typedef struct {
|
||||
double desired, actual, speed;
|
||||
|
@ -23,7 +23,7 @@ typedef struct {
|
|||
} smoothval;
|
||||
|
||||
typedef struct {
|
||||
halfband_t hbu_L, hbu_R, hbd_L, hbd_R;
|
||||
halfband_t hb_up, hb_down;
|
||||
smoothval drive, wet;
|
||||
} personal;
|
||||
|
||||
|
@ -47,16 +47,16 @@ smooth(smoothval *val)
|
|||
return a;
|
||||
}
|
||||
|
||||
INNER double
|
||||
distort(double x)
|
||||
INNER CONST v2df
|
||||
distort(v2df x)
|
||||
{
|
||||
return (27*x + 9) / (9*x*x + 6*x + 19) - 9/19.;
|
||||
return (V(27.)*x + V(9.)) / (V(9.)*x*x + V(6.)*x + V(19.)) - V(9./19.);
|
||||
}
|
||||
|
||||
INNER double
|
||||
process_one(double x, double drive, double wet)
|
||||
INNER CONST v2df
|
||||
process_one(v2df x, v2df drive, v2df wet)
|
||||
{
|
||||
return (distort(x*drive)/drive*0.79 - x)*wet + x;
|
||||
return (distort(x*drive)/drive*V(0.79) - x)*wet + x;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -64,74 +64,20 @@ process_double(personal *data,
|
|||
double *in_L, double *in_R,
|
||||
double *out_L, double *out_R,
|
||||
ulong count)
|
||||
{
|
||||
disable_denormals();
|
||||
#include "process_nonlinear.h"
|
||||
|
||||
double drives[FULL_SIZE], wets[FULL_SIZE];
|
||||
double in_os[FULL_SIZE], out_os[FULL_SIZE];
|
||||
|
||||
for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) {
|
||||
ulong rem = BLOCK_SIZE;
|
||||
if (pos + BLOCK_SIZE > count)
|
||||
rem = count - pos;
|
||||
|
||||
for (ulong i = 0; i < rem*OVERSAMPLING; i++)
|
||||
drives[i] = smooth(&data->drive);
|
||||
for (ulong i = 0; i < rem*OVERSAMPLING; i++)
|
||||
wets[i] = smooth(&data->wet);
|
||||
|
||||
halfband_t *hb;
|
||||
|
||||
// left channel
|
||||
hb = &data->hbu_L;
|
||||
for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) {
|
||||
in_os[i+0] = interpolate(hb, in_L[j]);
|
||||
in_os[i+1] = interpolate(hb, in_L[j]);
|
||||
}
|
||||
|
||||
for (ulong i = 0; i < rem*OVERSAMPLING; i++) {
|
||||
out_os[i] = process_one(in_os[i], drives[i], wets[i]);
|
||||
}
|
||||
|
||||
hb = &data->hbd_L;
|
||||
for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) {
|
||||
decimate(hb, out_os[i+0]);
|
||||
out_L[j] = decimate(hb, out_os[i+1]);
|
||||
}
|
||||
|
||||
// right channel
|
||||
hb = &data->hbu_R;
|
||||
for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) {
|
||||
in_os[i+0] = interpolate(hb, in_R[j]);
|
||||
in_os[i+1] = interpolate(hb, in_R[j]);
|
||||
}
|
||||
|
||||
for (ulong i = 0; i < rem*OVERSAMPLING; i++) {
|
||||
out_os[i] = process_one(in_os[i], drives[i], wets[i]);
|
||||
}
|
||||
|
||||
hb = &data->hbd_R;
|
||||
for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) {
|
||||
decimate(hb, out_os[i+0]);
|
||||
out_R[j] = decimate(hb, out_os[i+1]);
|
||||
}
|
||||
|
||||
in_L += BLOCK_SIZE;
|
||||
in_R += BLOCK_SIZE;
|
||||
out_L += BLOCK_SIZE;
|
||||
out_R += BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
#include "process.h"
|
||||
static void
|
||||
process(personal *data,
|
||||
float *in_L, float *in_R,
|
||||
float *out_L, float *out_R,
|
||||
ulong count)
|
||||
#include "process_nonlinear.h"
|
||||
|
||||
INNER void
|
||||
resume(personal *data)
|
||||
{
|
||||
memset(&data->hbu_L, 0, sizeof(halfband_t));
|
||||
memset(&data->hbu_R, 0, sizeof(halfband_t));
|
||||
memset(&data->hbd_L, 0, sizeof(halfband_t));
|
||||
memset(&data->hbd_R, 0, sizeof(halfband_t));
|
||||
memset(&data->hb_up, 0, sizeof(halfband_t));
|
||||
memset(&data->hb_down, 0, sizeof(halfband_t));
|
||||
}
|
||||
|
||||
INNER void
|
||||
|
|
98
include/os2piir_stereo.h
Normal file
98
include/os2piir_stereo.h
Normal file
|
@ -0,0 +1,98 @@
|
|||
/* halfband polyphase IIR filter
|
||||
coefficients designed with halfband program:
|
||||
https://gist.github.com/3be345efb6c97d757398#file-halfband-c
|
||||
parameters: 16 coefficients, 0.1 transition band
|
||||
stopband: ~-150dB
|
||||
overall delay: ~8 samples
|
||||
*/
|
||||
|
||||
#define copy(dst, src) memcpy(dst, src, sizeof(v2df)*8)
|
||||
//#define copy(dst, src) _copy(dst, src)
|
||||
|
||||
// all should be initialized to 0
|
||||
typedef struct {
|
||||
v2df ao[8], bo[8];
|
||||
v2df at[8], bt[8];
|
||||
v2df x1, x2, x3;
|
||||
int i;
|
||||
} halfband_t;
|
||||
|
||||
INNER void
|
||||
halfband_a(v2df a[8], v2df ao[8], v2df x0, v2df x2)
|
||||
{
|
||||
a[0] = x2 + (x0 - ao[0])*V(0.006185967461045014);
|
||||
a[1] = ao[0] + (a[0] - ao[1])*V(0.054230780876613788);
|
||||
a[2] = ao[1] + (a[1] - ao[2])*V(0.143280861566087270);
|
||||
a[3] = ao[2] + (a[2] - ao[3])*V(0.262004358403954640);
|
||||
a[4] = ao[3] + (a[3] - ao[4])*V(0.398796973552973666);
|
||||
a[5] = ao[4] + (a[4] - ao[5])*V(0.545323651071132232);
|
||||
a[6] = ao[5] + (a[5] - ao[6])*V(0.698736833646440347);
|
||||
a[7] = ao[6] + (a[6] - ao[7])*V(0.862917812650502936);
|
||||
}
|
||||
|
||||
INNER void
|
||||
halfband_b(v2df b[8], v2df bo[8], v2df x1, v2df x3)
|
||||
{
|
||||
b[0] = x3 + (x1 - bo[0])*V(0.024499027624721819);
|
||||
b[1] = bo[0] + (b[0] - bo[1])*V(0.094283481125726432);
|
||||
b[2] = bo[1] + (b[1] - bo[2])*V(0.199699579426327684);
|
||||
b[3] = bo[2] + (b[2] - bo[3])*V(0.328772348316831664);
|
||||
b[4] = bo[3] + (b[3] - bo[4])*V(0.471167216679969414);
|
||||
b[5] = bo[4] + (b[4] - bo[5])*V(0.621096845120503893);
|
||||
b[6] = bo[5] + (b[5] - bo[6])*V(0.778944517099529166);
|
||||
b[7] = bo[6] + (b[6] - bo[7])*V(0.952428157718303137);
|
||||
}
|
||||
|
||||
INNER v2df
|
||||
halfband(halfband_t *h, v2df x0)
|
||||
{
|
||||
v2df a[8], b[8];
|
||||
halfband_a(a, h->ao, x0, h->x2);
|
||||
halfband_b(b, h->bo, h->x1, h->x3);
|
||||
copy(h->ao, h->at);
|
||||
copy(h->bo, h->bt);
|
||||
copy(h->at, a);
|
||||
copy(h->bt, b);
|
||||
h->x3 = h->x2;
|
||||
h->x2 = h->x1;
|
||||
h->x1 = x0;
|
||||
return (a[7] + b[7])*V(0.5);
|
||||
}
|
||||
|
||||
INNER v2df
|
||||
decimate(halfband_t *h, v2df x0)
|
||||
{
|
||||
v2df c[8];
|
||||
if ((h->i = !h->i)) {
|
||||
halfband_b(c, h->bo, x0, h->x2);
|
||||
copy(h->bo, c);
|
||||
h->x2 = h->x1;
|
||||
h->x1 = x0;
|
||||
return V(0);
|
||||
} else {
|
||||
halfband_a(c, h->ao, x0, h->x2);
|
||||
copy(h->ao, c);
|
||||
h->x2 = h->x1;
|
||||
h->x1 = x0;
|
||||
return (c[7] + h->bo[7])*V(0.5);
|
||||
}
|
||||
}
|
||||
|
||||
// note: do not zero-stuff! send the input each time.
|
||||
INNER v2df
|
||||
interpolate(halfband_t *h, v2df x0)
|
||||
{
|
||||
v2df c[8];
|
||||
if ((h->i = !h->i)) {
|
||||
halfband_a(c, h->ao, x0, h->x1);
|
||||
copy(h->ao, c);
|
||||
return c[7];
|
||||
} else {
|
||||
halfband_b(c, h->bo, x0, h->x1);
|
||||
copy(h->bo, c);
|
||||
h->x1 = x0;
|
||||
return c[7];
|
||||
}
|
||||
}
|
||||
|
||||
#undef copy
|
58
include/process_nonlinear.h
Normal file
58
include/process_nonlinear.h
Normal file
|
@ -0,0 +1,58 @@
|
|||
{
|
||||
disable_denormals();
|
||||
|
||||
v2df drives[FULL_SIZE], wets[FULL_SIZE];
|
||||
v2df buf[BLOCK_SIZE];
|
||||
v2df over[FULL_SIZE];
|
||||
|
||||
halfband_t *hb_up = &data->hb_up;
|
||||
halfband_t *hb_down = &data->hb_down;
|
||||
|
||||
for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) {
|
||||
ulong rem = BLOCK_SIZE;
|
||||
if (pos + BLOCK_SIZE > count)
|
||||
rem = count - pos;
|
||||
|
||||
ulong rem2 = rem*OVERSAMPLING;
|
||||
|
||||
for (ulong i = 0; i < rem2; i++) {
|
||||
double y = smooth(&data->drive);
|
||||
drives[i] = V(y);
|
||||
}
|
||||
for (ulong i = 0; i < rem2; i++) {
|
||||
double y = smooth(&data->wet);
|
||||
wets[i] = V(y);
|
||||
}
|
||||
|
||||
for (ulong i = 0; i < rem; i++) {
|
||||
buf[i][0] = in_L[i];
|
||||
buf[i][1] = in_R[i];
|
||||
}
|
||||
|
||||
for (ulong i = 0; i < rem; i++) {
|
||||
hb_up->i = 0; // so compiler can optimize
|
||||
over[i*2+0] = interpolate(hb_up, buf[i]);
|
||||
over[i*2+1] = interpolate(hb_up, buf[i]);
|
||||
}
|
||||
|
||||
for (ulong i = 0; i < rem2; i++) {
|
||||
over[i] = process_one(over[i], drives[i], wets[i]);
|
||||
}
|
||||
|
||||
for (ulong i = 0; i < rem; i++) {
|
||||
hb_down->i = 0; // so compiler can optimize
|
||||
decimate(hb_down, over[i*2+0]);
|
||||
buf[i] = decimate(hb_down, over[i*2+1]);
|
||||
}
|
||||
|
||||
for (ulong i = 0; i < rem; i++) {
|
||||
out_L[i] = buf[i][0];
|
||||
out_R[i] = buf[i][1];
|
||||
}
|
||||
|
||||
in_L += BLOCK_SIZE;
|
||||
in_R += BLOCK_SIZE;
|
||||
out_L += BLOCK_SIZE;
|
||||
out_R += BLOCK_SIZE;
|
||||
}
|
||||
}
|
|
@ -1,12 +1,17 @@
|
|||
#include "math.h"
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#define INNER static inline
|
||||
#define PURE __attribute__((pure))
|
||||
#define CONST __attribute__((const))
|
||||
typedef double v2df __attribute__((vector_size(16), aligned(16)));
|
||||
typedef unsigned long ulong;
|
||||
typedef float v4sf __attribute__((vector_size(16), aligned(16)));
|
||||
typedef unsigned long ulong; // __attribute((aligned(16)));
|
||||
|
||||
#define V(x) (v2df){(x), (x)}
|
||||
|
||||
INNER void
|
||||
disable_denormals();
|
||||
|
|
Loading…
Reference in a new issue