vectorize tube
This commit is contained in:
parent
8e7dde59f6
commit
33c0ef8f14
4 changed files with 180 additions and 73 deletions
88
crap/tube.h
88
crap/tube.h
|
@ -15,7 +15,7 @@
|
||||||
|
|
||||||
#include "util.h"
|
#include "util.h"
|
||||||
#include "param.h"
|
#include "param.h"
|
||||||
#include "os2piir.h"
|
#include "os2piir_stereo.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
double desired, actual, speed;
|
double desired, actual, speed;
|
||||||
|
@ -23,7 +23,7 @@ typedef struct {
|
||||||
} smoothval;
|
} smoothval;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
halfband_t hbu_L, hbu_R, hbd_L, hbd_R;
|
halfband_t hb_up, hb_down;
|
||||||
smoothval drive, wet;
|
smoothval drive, wet;
|
||||||
} personal;
|
} personal;
|
||||||
|
|
||||||
|
@ -47,16 +47,16 @@ smooth(smoothval *val)
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
INNER double
|
INNER CONST v2df
|
||||||
distort(double x)
|
distort(v2df x)
|
||||||
{
|
{
|
||||||
return (27*x + 9) / (9*x*x + 6*x + 19) - 9/19.;
|
return (V(27.)*x + V(9.)) / (V(9.)*x*x + V(6.)*x + V(19.)) - V(9./19.);
|
||||||
}
|
}
|
||||||
|
|
||||||
INNER double
|
INNER CONST v2df
|
||||||
process_one(double x, double drive, double wet)
|
process_one(v2df x, v2df drive, v2df wet)
|
||||||
{
|
{
|
||||||
return (distort(x*drive)/drive*0.79 - x)*wet + x;
|
return (distort(x*drive)/drive*V(0.79) - x)*wet + x;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -64,74 +64,20 @@ process_double(personal *data,
|
||||||
double *in_L, double *in_R,
|
double *in_L, double *in_R,
|
||||||
double *out_L, double *out_R,
|
double *out_L, double *out_R,
|
||||||
ulong count)
|
ulong count)
|
||||||
{
|
#include "process_nonlinear.h"
|
||||||
disable_denormals();
|
|
||||||
|
|
||||||
double drives[FULL_SIZE], wets[FULL_SIZE];
|
static void
|
||||||
double in_os[FULL_SIZE], out_os[FULL_SIZE];
|
process(personal *data,
|
||||||
|
float *in_L, float *in_R,
|
||||||
for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) {
|
float *out_L, float *out_R,
|
||||||
ulong rem = BLOCK_SIZE;
|
ulong count)
|
||||||
if (pos + BLOCK_SIZE > count)
|
#include "process_nonlinear.h"
|
||||||
rem = count - pos;
|
|
||||||
|
|
||||||
for (ulong i = 0; i < rem*OVERSAMPLING; i++)
|
|
||||||
drives[i] = smooth(&data->drive);
|
|
||||||
for (ulong i = 0; i < rem*OVERSAMPLING; i++)
|
|
||||||
wets[i] = smooth(&data->wet);
|
|
||||||
|
|
||||||
halfband_t *hb;
|
|
||||||
|
|
||||||
// left channel
|
|
||||||
hb = &data->hbu_L;
|
|
||||||
for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) {
|
|
||||||
in_os[i+0] = interpolate(hb, in_L[j]);
|
|
||||||
in_os[i+1] = interpolate(hb, in_L[j]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (ulong i = 0; i < rem*OVERSAMPLING; i++) {
|
|
||||||
out_os[i] = process_one(in_os[i], drives[i], wets[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
hb = &data->hbd_L;
|
|
||||||
for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) {
|
|
||||||
decimate(hb, out_os[i+0]);
|
|
||||||
out_L[j] = decimate(hb, out_os[i+1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// right channel
|
|
||||||
hb = &data->hbu_R;
|
|
||||||
for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) {
|
|
||||||
in_os[i+0] = interpolate(hb, in_R[j]);
|
|
||||||
in_os[i+1] = interpolate(hb, in_R[j]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (ulong i = 0; i < rem*OVERSAMPLING; i++) {
|
|
||||||
out_os[i] = process_one(in_os[i], drives[i], wets[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
hb = &data->hbd_R;
|
|
||||||
for (ulong i = 0, j = 0; j < rem; i += OVERSAMPLING, j++) {
|
|
||||||
decimate(hb, out_os[i+0]);
|
|
||||||
out_R[j] = decimate(hb, out_os[i+1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
in_L += BLOCK_SIZE;
|
|
||||||
in_R += BLOCK_SIZE;
|
|
||||||
out_L += BLOCK_SIZE;
|
|
||||||
out_R += BLOCK_SIZE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#include "process.h"
|
|
||||||
|
|
||||||
INNER void
|
INNER void
|
||||||
resume(personal *data)
|
resume(personal *data)
|
||||||
{
|
{
|
||||||
memset(&data->hbu_L, 0, sizeof(halfband_t));
|
memset(&data->hb_up, 0, sizeof(halfband_t));
|
||||||
memset(&data->hbu_R, 0, sizeof(halfband_t));
|
memset(&data->hb_down, 0, sizeof(halfband_t));
|
||||||
memset(&data->hbd_L, 0, sizeof(halfband_t));
|
|
||||||
memset(&data->hbd_R, 0, sizeof(halfband_t));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
INNER void
|
INNER void
|
||||||
|
|
98
include/os2piir_stereo.h
Normal file
98
include/os2piir_stereo.h
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
/* halfband polyphase IIR filter
|
||||||
|
coefficients designed with halfband program:
|
||||||
|
https://gist.github.com/3be345efb6c97d757398#file-halfband-c
|
||||||
|
parameters: 16 coefficients, 0.1 transition band
|
||||||
|
stopband: ~-150dB
|
||||||
|
overall delay: ~8 samples
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define copy(dst, src) memcpy(dst, src, sizeof(v2df)*8)
|
||||||
|
//#define copy(dst, src) _copy(dst, src)
|
||||||
|
|
||||||
|
// all should be initialized to 0
|
||||||
|
typedef struct {
|
||||||
|
v2df ao[8], bo[8];
|
||||||
|
v2df at[8], bt[8];
|
||||||
|
v2df x1, x2, x3;
|
||||||
|
int i;
|
||||||
|
} halfband_t;
|
||||||
|
|
||||||
|
INNER void
|
||||||
|
halfband_a(v2df a[8], v2df ao[8], v2df x0, v2df x2)
|
||||||
|
{
|
||||||
|
a[0] = x2 + (x0 - ao[0])*V(0.006185967461045014);
|
||||||
|
a[1] = ao[0] + (a[0] - ao[1])*V(0.054230780876613788);
|
||||||
|
a[2] = ao[1] + (a[1] - ao[2])*V(0.143280861566087270);
|
||||||
|
a[3] = ao[2] + (a[2] - ao[3])*V(0.262004358403954640);
|
||||||
|
a[4] = ao[3] + (a[3] - ao[4])*V(0.398796973552973666);
|
||||||
|
a[5] = ao[4] + (a[4] - ao[5])*V(0.545323651071132232);
|
||||||
|
a[6] = ao[5] + (a[5] - ao[6])*V(0.698736833646440347);
|
||||||
|
a[7] = ao[6] + (a[6] - ao[7])*V(0.862917812650502936);
|
||||||
|
}
|
||||||
|
|
||||||
|
INNER void
|
||||||
|
halfband_b(v2df b[8], v2df bo[8], v2df x1, v2df x3)
|
||||||
|
{
|
||||||
|
b[0] = x3 + (x1 - bo[0])*V(0.024499027624721819);
|
||||||
|
b[1] = bo[0] + (b[0] - bo[1])*V(0.094283481125726432);
|
||||||
|
b[2] = bo[1] + (b[1] - bo[2])*V(0.199699579426327684);
|
||||||
|
b[3] = bo[2] + (b[2] - bo[3])*V(0.328772348316831664);
|
||||||
|
b[4] = bo[3] + (b[3] - bo[4])*V(0.471167216679969414);
|
||||||
|
b[5] = bo[4] + (b[4] - bo[5])*V(0.621096845120503893);
|
||||||
|
b[6] = bo[5] + (b[5] - bo[6])*V(0.778944517099529166);
|
||||||
|
b[7] = bo[6] + (b[6] - bo[7])*V(0.952428157718303137);
|
||||||
|
}
|
||||||
|
|
||||||
|
INNER v2df
|
||||||
|
halfband(halfband_t *h, v2df x0)
|
||||||
|
{
|
||||||
|
v2df a[8], b[8];
|
||||||
|
halfband_a(a, h->ao, x0, h->x2);
|
||||||
|
halfband_b(b, h->bo, h->x1, h->x3);
|
||||||
|
copy(h->ao, h->at);
|
||||||
|
copy(h->bo, h->bt);
|
||||||
|
copy(h->at, a);
|
||||||
|
copy(h->bt, b);
|
||||||
|
h->x3 = h->x2;
|
||||||
|
h->x2 = h->x1;
|
||||||
|
h->x1 = x0;
|
||||||
|
return (a[7] + b[7])*V(0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
INNER v2df
|
||||||
|
decimate(halfband_t *h, v2df x0)
|
||||||
|
{
|
||||||
|
v2df c[8];
|
||||||
|
if ((h->i = !h->i)) {
|
||||||
|
halfband_b(c, h->bo, x0, h->x2);
|
||||||
|
copy(h->bo, c);
|
||||||
|
h->x2 = h->x1;
|
||||||
|
h->x1 = x0;
|
||||||
|
return V(0);
|
||||||
|
} else {
|
||||||
|
halfband_a(c, h->ao, x0, h->x2);
|
||||||
|
copy(h->ao, c);
|
||||||
|
h->x2 = h->x1;
|
||||||
|
h->x1 = x0;
|
||||||
|
return (c[7] + h->bo[7])*V(0.5);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// note: do not zero-stuff! send the input each time.
|
||||||
|
INNER v2df
|
||||||
|
interpolate(halfband_t *h, v2df x0)
|
||||||
|
{
|
||||||
|
v2df c[8];
|
||||||
|
if ((h->i = !h->i)) {
|
||||||
|
halfband_a(c, h->ao, x0, h->x1);
|
||||||
|
copy(h->ao, c);
|
||||||
|
return c[7];
|
||||||
|
} else {
|
||||||
|
halfband_b(c, h->bo, x0, h->x1);
|
||||||
|
copy(h->bo, c);
|
||||||
|
h->x1 = x0;
|
||||||
|
return c[7];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef copy
|
58
include/process_nonlinear.h
Normal file
58
include/process_nonlinear.h
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
{
|
||||||
|
disable_denormals();
|
||||||
|
|
||||||
|
v2df drives[FULL_SIZE], wets[FULL_SIZE];
|
||||||
|
v2df buf[BLOCK_SIZE];
|
||||||
|
v2df over[FULL_SIZE];
|
||||||
|
|
||||||
|
halfband_t *hb_up = &data->hb_up;
|
||||||
|
halfband_t *hb_down = &data->hb_down;
|
||||||
|
|
||||||
|
for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) {
|
||||||
|
ulong rem = BLOCK_SIZE;
|
||||||
|
if (pos + BLOCK_SIZE > count)
|
||||||
|
rem = count - pos;
|
||||||
|
|
||||||
|
ulong rem2 = rem*OVERSAMPLING;
|
||||||
|
|
||||||
|
for (ulong i = 0; i < rem2; i++) {
|
||||||
|
double y = smooth(&data->drive);
|
||||||
|
drives[i] = V(y);
|
||||||
|
}
|
||||||
|
for (ulong i = 0; i < rem2; i++) {
|
||||||
|
double y = smooth(&data->wet);
|
||||||
|
wets[i] = V(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (ulong i = 0; i < rem; i++) {
|
||||||
|
buf[i][0] = in_L[i];
|
||||||
|
buf[i][1] = in_R[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (ulong i = 0; i < rem; i++) {
|
||||||
|
hb_up->i = 0; // so compiler can optimize
|
||||||
|
over[i*2+0] = interpolate(hb_up, buf[i]);
|
||||||
|
over[i*2+1] = interpolate(hb_up, buf[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (ulong i = 0; i < rem2; i++) {
|
||||||
|
over[i] = process_one(over[i], drives[i], wets[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (ulong i = 0; i < rem; i++) {
|
||||||
|
hb_down->i = 0; // so compiler can optimize
|
||||||
|
decimate(hb_down, over[i*2+0]);
|
||||||
|
buf[i] = decimate(hb_down, over[i*2+1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (ulong i = 0; i < rem; i++) {
|
||||||
|
out_L[i] = buf[i][0];
|
||||||
|
out_R[i] = buf[i][1];
|
||||||
|
}
|
||||||
|
|
||||||
|
in_L += BLOCK_SIZE;
|
||||||
|
in_R += BLOCK_SIZE;
|
||||||
|
out_L += BLOCK_SIZE;
|
||||||
|
out_R += BLOCK_SIZE;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,12 +1,17 @@
|
||||||
#include "math.h"
|
#include "math.h"
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
#include <xmmintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define INNER static inline
|
#define INNER static inline
|
||||||
|
#define PURE __attribute__((pure))
|
||||||
|
#define CONST __attribute__((const))
|
||||||
typedef double v2df __attribute__((vector_size(16), aligned(16)));
|
typedef double v2df __attribute__((vector_size(16), aligned(16)));
|
||||||
typedef unsigned long ulong;
|
typedef float v4sf __attribute__((vector_size(16), aligned(16)));
|
||||||
|
typedef unsigned long ulong; // __attribute((aligned(16)));
|
||||||
|
|
||||||
|
#define V(x) (v2df){(x), (x)}
|
||||||
|
|
||||||
INNER void
|
INNER void
|
||||||
disable_denormals();
|
disable_denormals();
|
||||||
|
|
Loading…
Reference in a new issue