optimize biquads
using process.h and running in-place is 19% faster on this machine. from there, using intrinsics yields another 94%, for a total speedup of 130%.
This commit is contained in:
parent
db0cd0a7dd
commit
75fa193a90
4 changed files with 98 additions and 36 deletions
|
@ -1,7 +1,5 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "util.h"
|
|
||||||
|
|
||||||
#define ID 0x0DEFACED
|
#define ID 0x0DEFACED
|
||||||
#define LABEL "crap_eq_const"
|
#define LABEL "crap_eq_const"
|
||||||
#define NAME "crap Constant Equalizer"
|
#define NAME "crap Constant Equalizer"
|
||||||
|
@ -9,45 +7,59 @@
|
||||||
#define COPYRIGHT "MIT"
|
#define COPYRIGHT "MIT"
|
||||||
#define PARAMETERS 0
|
#define PARAMETERS 0
|
||||||
|
|
||||||
|
#define BLOCK_SIZE 256
|
||||||
|
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
#define BANDS 12
|
#define BANDS 12
|
||||||
typedef struct {
|
typedef struct {
|
||||||
biquad filters[2][BANDS];
|
biquad filters[2][BANDS];
|
||||||
} personal;
|
} personal;
|
||||||
|
|
||||||
INNER double
|
static void
|
||||||
process_one(biquad *filters, double samp)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < BANDS; i++)
|
|
||||||
samp = biquad_run(&filters[i], samp);
|
|
||||||
return samp;
|
|
||||||
}
|
|
||||||
|
|
||||||
INNER void
|
|
||||||
process(personal *data,
|
|
||||||
float *in_L, float *in_R,
|
|
||||||
float *out_L, float *out_R,
|
|
||||||
unsigned long count)
|
|
||||||
{
|
|
||||||
disable_denormals();
|
|
||||||
for (unsigned long pos = 0; pos < count; pos++) {
|
|
||||||
out_L[pos] = process_one(data->filters[0], in_L[pos]);
|
|
||||||
out_R[pos] = process_one(data->filters[1], in_R[pos]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
INNER void
|
|
||||||
process_double(personal *data,
|
process_double(personal *data,
|
||||||
double *in_L, double *in_R,
|
double *in_L, double *in_R,
|
||||||
double *out_L, double *out_R,
|
double *out_L, double *out_R,
|
||||||
unsigned long count)
|
ulong count)
|
||||||
{
|
{
|
||||||
disable_denormals();
|
disable_denormals();
|
||||||
for (unsigned long pos = 0; pos < count; pos++) {
|
|
||||||
out_L[pos] = process_one(data->filters[0], in_L[pos]);
|
double buf[2*BLOCK_SIZE];
|
||||||
out_R[pos] = process_one(data->filters[1], in_R[pos]);
|
|
||||||
|
biquad *f0, *f1;
|
||||||
|
|
||||||
|
for (ulong pos = 0; pos < count; pos += BLOCK_SIZE) {
|
||||||
|
ulong rem = BLOCK_SIZE;
|
||||||
|
if (pos + BLOCK_SIZE > count)
|
||||||
|
rem = count - pos;
|
||||||
|
|
||||||
|
for (ulong i = 0; i < rem; i++) {
|
||||||
|
buf[i*2+0] = in_L[i];
|
||||||
|
buf[i*2+1] = in_R[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
f0 = data->filters[0];
|
||||||
|
f1 = data->filters[1];
|
||||||
|
for (ulong i = 0; i < BANDS; i++) {
|
||||||
|
biquad_run_block_stereo(f0, f1, buf, rem);
|
||||||
|
f0++;
|
||||||
|
f1++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (ulong i = 0; i < rem; i++) {
|
||||||
|
out_L[i] = buf[i*2+0];
|
||||||
|
out_R[i] = buf[i*2+1];
|
||||||
|
}
|
||||||
|
|
||||||
|
in_L += BLOCK_SIZE;
|
||||||
|
in_R += BLOCK_SIZE;
|
||||||
|
out_L += BLOCK_SIZE;
|
||||||
|
out_R += BLOCK_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include "process.h"
|
||||||
|
|
||||||
INNER void
|
INNER void
|
||||||
construct(personal *data)
|
construct(personal *data)
|
||||||
{}
|
{}
|
||||||
|
@ -70,7 +82,7 @@ pause(personal *data)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
INNER void
|
INNER void
|
||||||
adjust(personal *data, unsigned long fs)
|
adjust(personal *data, ulong fs)
|
||||||
{
|
{
|
||||||
biquad *filters = data->filters[0];
|
biquad *filters = data->filters[0];
|
||||||
filters[ 0] = biquad_gen(FILT_PEAKING, 62.0, 5.3, 0.55, fs);
|
filters[ 0] = biquad_gen(FILT_PEAKING, 62.0, 5.3, 0.55, fs);
|
||||||
|
|
10
crap/tube.h
10
crap/tube.h
|
@ -2,10 +2,6 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "util.h"
|
|
||||||
#include "param.h"
|
|
||||||
#include "os2piir.h"
|
|
||||||
|
|
||||||
#define ID 0x50F7BA11
|
#define ID 0x50F7BA11
|
||||||
#define LABEL "crap_tube"
|
#define LABEL "crap_tube"
|
||||||
#define NAME "crap Tube Distortion"
|
#define NAME "crap Tube Distortion"
|
||||||
|
@ -17,7 +13,9 @@
|
||||||
#define BLOCK_SIZE 256
|
#define BLOCK_SIZE 256
|
||||||
#define FULL_SIZE (BLOCK_SIZE*OVERSAMPLING)
|
#define FULL_SIZE (BLOCK_SIZE*OVERSAMPLING)
|
||||||
|
|
||||||
typedef unsigned long ulong;
|
#include "util.h"
|
||||||
|
#include "param.h"
|
||||||
|
#include "os2piir.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
double desired, actual, speed;
|
double desired, actual, speed;
|
||||||
|
@ -61,7 +59,7 @@ process_one(double x, double drive, double wet)
|
||||||
return (distort(x*drive)/drive*0.79 - x)*wet + x;
|
return (distort(x*drive)/drive*0.79 - x)*wet + x;
|
||||||
}
|
}
|
||||||
|
|
||||||
INNER void
|
static void
|
||||||
process_double(personal *data,
|
process_double(personal *data,
|
||||||
double *in_L, double *in_R,
|
double *in_L, double *in_R,
|
||||||
double *out_L, double *out_R,
|
double *out_L, double *out_R,
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define INNER static inline
|
#define INNER static inline
|
||||||
|
typedef unsigned long ulong;
|
||||||
|
|
||||||
INNER void
|
INNER void
|
||||||
disable_denormals();
|
disable_denormals();
|
||||||
|
|
|
@ -48,7 +48,7 @@ design(double cw, double sw,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
INNER biquad
|
static biquad
|
||||||
biquad_gen(filter_t type, double fc, double gain, double bw, double fs)
|
biquad_gen(filter_t type, double fc, double gain, double bw, double fs)
|
||||||
{
|
{
|
||||||
double w0, cw, sw, A, As, Q;
|
double w0, cw, sw, A, As, Q;
|
||||||
|
@ -102,3 +102,54 @@ biquad_run(biquad *bq, double x)
|
||||||
|
|
||||||
return y;
|
return y;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
INNER void
|
||||||
|
biquad_run_block_stereo(biquad *bq_L, biquad *bq_R,
|
||||||
|
double *buf, ulong count)
|
||||||
|
#ifdef __SSE2__
|
||||||
|
{
|
||||||
|
__m128d b0, b1, b2, a1, a2, x1, x2, y1, y2;
|
||||||
|
|
||||||
|
b0 = _mm_set1_pd(bq_L->b0);
|
||||||
|
b1 = _mm_set1_pd(bq_L->b1);
|
||||||
|
b2 = _mm_set1_pd(bq_L->b2);
|
||||||
|
a1 = _mm_set1_pd(bq_L->a1);
|
||||||
|
a2 = _mm_set1_pd(bq_L->a2);
|
||||||
|
|
||||||
|
x1 = _mm_setr_pd(bq_L->x1, bq_R->x1);
|
||||||
|
x2 = _mm_setr_pd(bq_L->x2, bq_R->x2);
|
||||||
|
y1 = _mm_setr_pd(bq_L->y1, bq_R->y1);
|
||||||
|
y2 = _mm_setr_pd(bq_L->y2, bq_R->y2);
|
||||||
|
|
||||||
|
for (int i = 0; i < 2*count; i += 2) {
|
||||||
|
__m128d x = _mm_load_pd(buf + i);
|
||||||
|
__m128d y = b0*x + b1*x1 + b2*x2 + a1*y1 + a2*y2;
|
||||||
|
x2 = x1;
|
||||||
|
y2 = y1;
|
||||||
|
x1 = x;
|
||||||
|
y1 = y;
|
||||||
|
_mm_store_pd(buf + i, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
double temp[8];
|
||||||
|
_mm_store_pd(temp+0, x1);
|
||||||
|
_mm_store_pd(temp+2, x2);
|
||||||
|
_mm_store_pd(temp+4, y1);
|
||||||
|
_mm_store_pd(temp+6, y2);
|
||||||
|
bq_L->x1 = temp[0];
|
||||||
|
bq_R->x1 = temp[1];
|
||||||
|
bq_L->x2 = temp[2];
|
||||||
|
bq_R->x2 = temp[3];
|
||||||
|
bq_L->y1 = temp[4];
|
||||||
|
bq_R->y1 = temp[5];
|
||||||
|
bq_L->y2 = temp[6];
|
||||||
|
bq_R->y2 = temp[7];
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
{
|
||||||
|
for (ulong i = 0; i < 2*count; i += 2) {
|
||||||
|
buf[i+0] = biquad_run(bq_L, buf[i+0]);
|
||||||
|
buf[i+1] = biquad_run(bq_R, buf[i+1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue