conv:conv
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revisionNext revisionBoth sides next revision | ||
conv:conv [2008/01/04 12:32] – deva | conv:conv [2008/01/24 11:54] – deva | ||
---|---|---|---|
Line 1: | Line 1: | ||
- | =====Convolution implementation pages===== | + | =====Convolution implementation pages (Spring 2008 studies group under Ole Caprani)===== |
+ | Our goal is to implement a realtime plugin to do convolution.\\ | ||
+ | The basic "Input Side" is way too slow (convolution implemented with two for loops), so we will be experimenting with a dft-multiply-idft algorithm (sometimes called "The Overlap-and-Save algorithm" | ||
+ | To do the actual fft we will use the fftw3 library (http:// | ||
+ | The next step is to implement at test the " | ||
+ | |||
+ | ====Group==== | ||
+ | * Jonas Suhr Christensen - // | ||
+ | * Bent Bisballe Nyeng - // | ||
+ | |||
+ | ====Analysis==== | ||
+ | The following graphs show the execution time of a single convolution iteration, using the overlap and save algorithm with various optimizations enabled. | ||
+ | The algorithm optimizations are named as follows: | ||
+ | * **Double** This is an implementation with no particular optimizations. | ||
+ | * **Float** This is a simple implementation using float as the internal datatype. | ||
+ | * **Real Double** This is an implementation using real ffts instead of complex. | ||
+ | * **Real Float** This is an implementation using real ffts instead of complex, with float as internal datatype. | ||
+ | * **MT Double** This is an implementation running on multiple CPUs. | ||
+ | * **MT Float** This is an implementation with float as internal datatype, running on multiple CPUs. | ||
+ | * **MT Real Double** This is an implementation using real ffts instead of complex, running on multiple CPUs. | ||
+ | * **MT Real Float** This is an implementation using real ffts instead of complex, with float as internal datatype, running on multiple CPUs. | ||
+ | {{: | ||
+ | {{: | ||
+ | {{: | ||
+ | {{: | ||
+ | {{: | ||
====Links==== | ====Links==== | ||
- | * http:// | + | |
- | * http:// | + | |
- | * http:// | + | * http:// |
- | * http:// | + | * http:// |
+ | * http:// | ||
+ | * http:// | ||
+ | * http:// | ||
+ | * http:// | ||
====Code==== | ====Code==== | ||
Compile with: | Compile with: | ||
< | < | ||
- | g++ conv.cc -o conv -lfftw | + | g++ -O2 -mmmx -msse -msse2 conv_opt.cc -o conv_opt -lfftw3 -lsndfile |
+ | </ | ||
+ | or | ||
+ | < | ||
+ | g++ -O2 -mmmx -msse -msse2 conv_opt.cc -o conv_opt -DFFTW_FLOAT -lfftw3f | ||
</ | </ | ||
+ | to use floating point fftw instead of double.\\ | ||
+ | The actual code is shown below: | ||
<code c> | <code c> | ||
#include < | #include < | ||
#include < | #include < | ||
+ | #include < | ||
+ | #include < | ||
+ | #include < | ||
+ | #include " | ||
- | // | + | //#define BUFFER_SIZE_MS 113 //ms 113, // Find a size with small prime factors |
+ | #define BUFFER_SIZE_MS 1000 //ms 113, // Find a size with small prime factors | ||
+ | #define SAMPLE_RATE_HZ 44100 //hz | ||
+ | #define BUFFER_SIZE_SAMPLES 4096 | ||
+ | // | ||
+ | |||
+ | //#define MEASURE | ||
+ | |||
+ | #ifdef FFTW_FLOAT | ||
+ | #define FFTW_DATATYPE fftwf_complex | ||
+ | #define FFTW_PLAN_DFT fftwf_plan_dft_1d | ||
+ | #define FFTW_EXECUTE fftwf_execute | ||
+ | #define FFTW_PLAN fftwf_plan | ||
+ | #define FFTW_DESTROY_PLAN fftwf_destroy_plan | ||
+ | #define FFTW_FREE fftwf_free | ||
+ | #define FFTW_MALLOC fftwf_malloc | ||
+ | typedef | ||
+ | #else | ||
+ | #define FFTW_DATATYPE fftw_complex | ||
+ | #define FFTW_PLAN_DFT fftw_plan_dft_1d | ||
+ | #define FFTW_EXECUTE fftw_execute | ||
+ | #define FFTW_PLAN fftw_plan | ||
+ | #define FFTW_DESTROY_PLAN fftw_destroy_plan | ||
+ | #define FFTW_FREE fftw_free | ||
+ | #define FFTW_MALLOC fftw_malloc | ||
typedef double sample_t; | typedef double sample_t; | ||
+ | #endif | ||
typedef struct { | typedef struct { | ||
Line 34: | Line 99: | ||
} | } | ||
- | samples_t *read_data(const char *filename) | + | samples_t *read_data(const char *filename, double scale) |
{ | { | ||
- | size_t size = 3; | + | printf(" |
+ | |||
+ | SndfileHandle sfh(filename); | ||
+ | size_t size = sfh.seek(0, SEEK_END); | ||
+ | sfh.seek(0, | ||
+ | // size_t size = 3; | ||
+ | | ||
sample_t *data = (sample_t*)malloc(sizeof(sample_t) * size); | sample_t *data = (sample_t*)malloc(sizeof(sample_t) * size); | ||
- | data[0] = 1; | + | sfh.read(data, size); |
- | data[1] = 1; | + | |
- | data[2] = 1; | + | printf(" |
+ | |||
+ | for(int i = 0; i < size; i++) { | ||
+ | data[i] = scale * data[i]; | ||
+ | } | ||
+ | |||
+ | // for(int i = size / 2; i < (size / 2) +20; i++) printf(" | ||
samples_t *samples = new_samples(data, | samples_t *samples = new_samples(data, | ||
+ | printf(" | ||
+ | |||
return samples; | return samples; | ||
} | } | ||
Line 48: | Line 128: | ||
void write_data(const char *filename, samples_t *samples) | void write_data(const char *filename, samples_t *samples) | ||
{ | { | ||
- | for(int i = 0; i < samples-> | + | printf(" |
- | printf(" | + | |
- | } | + | // for(int i = samples-> |
+ | |||
+ | SndfileHandle sfh(filename, | ||
+ | |||
+ | sfh.write(samples-> | ||
+ | |||
+ | printf(" | ||
} | } | ||
Line 56: | Line 142: | ||
#define IM 1 | #define IM 1 | ||
- | samples_t | + | |
+ | static FFTW_DATATYPE | ||
+ | static FFTW_DATATYPE | ||
+ | static FFTW_DATATYPE *out = NULL; | ||
+ | static FFTW_DATATYPE *inx = NULL; | ||
+ | static FFTW_DATATYPE *xfft = NULL; | ||
+ | void init_fftw(samples_t *h) | ||
{ | { | ||
- | int size = x->size + h->size - 1; | + | FFTW_PLAN p; |
- | sample_t *data = (sample_t*)calloc(sizeof(sample_t), | + | |
- | samples_t | + | int size = BUFFER_SIZE_SAMPLES + h->size - 1; |
+ | hfft = (FFTW_DATATYPE*) FFTW_MALLOC(sizeof(FFTW_DATATYPE) * size); | ||
+ | FFTW_DATATYPE *inh = (FFTW_DATATYPE*) FFTW_MALLOC(sizeof(FFTW_DATATYPE) * size); | ||
- | // FFTW | + | for(int i = 0; i < size; i++) { |
- | fftw_plan p; | + | inh[i][RE] = inh[i][IM] = 0; |
- | + | } | |
- | int N = x->size; | + | for(int i = 0; i < h-> |
+ | inh[i][RE] = h-> | ||
+ | } | ||
- | fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); | + | p = FFTW_PLAN_DFT(size, |
- | fftw_complex *xfft = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); | + | |
- | fftw_complex *hfft = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * N); | + | FFTW_DESTROY_PLAN(p); |
- | + | ||
- | p = fftw_plan_dft_r2c_1d(x->size, x->data, xfft, 0); | + | |
- | fftw_execute(p); | + | |
- | + | ||
- | /* | + | |
- | for(int i = 0; i < N; i++) { | + | |
- | printf(" | + | |
- | } | + | |
- | */ | + | |
- | + | ||
- | p = fftw_plan_dft_r2c_1d(h-> | + | |
- | fftw_execute(p); | + | |
- | + | ||
- | for(int i = 0; i < N; i++ ) { | + | |
- | out[i][RE] = hfft[i][RE] * xfft[i][RE] - hfft[i][IM] * xfft[i][IM]; | + | |
- | out[i][IM] = xfft[i][IM] * hfft[i][RE] + xfft[i][RE] * hfft[i][IM]; | + | |
- | } | + | |
- | + | ||
- | // invers FFTW | + | |
- | + | ||
- | p = fftw_plan_dft_c2r_1d(N, | + | |
- | fftw_execute(p); | + | |
- | for(int i = 0; i < N; i++ ) { | + | FFTW_FREE(inh); |
- | data[i] /= N; | + | |
- | } | + | |
- | fftw_destroy_plan(p); | + | outm = (FFTW_DATATYPE*) FFTW_MALLOC(sizeof(FFTW_DATATYPE) * size); |
- | fftw_free(xfft); | + | out = (FFTW_DATATYPE*) FFTW_MALLOC(sizeof(FFTW_DATATYPE) * size); |
- | fftw_free(hfft); | + | inx = (FFTW_DATATYPE*) FFTW_MALLOC(sizeof(FFTW_DATATYPE) * size); |
- | // end of FFTW | + | for(int i = 0; i < size; i++) { |
+ | inx[i][RE] = inx[i][IM] = 0; | ||
+ | } | ||
+ | |||
+ | | ||
+ | |||
+ | |||
+ | } | ||
+ | |||
+ | void deinit_fftw() | ||
+ | { | ||
+ | FFTW_FREE(hfft); | ||
+ | | ||
+ | FFTW_FREE(outm); | ||
+ | FFTW_FREE(inx); | ||
+ | FFTW_FREE(xfft); | ||
+ | } | ||
+ | |||
+ | #ifdef MEASURE | ||
+ | long static time_used; | ||
+ | #endif | ||
+ | samples_t *conv_fftwc_preprocess(samples_t *x, size_t hsize/*samples_t *h*/) | ||
+ | { | ||
+ | assert(hfft); | ||
+ | assert(out); | ||
+ | assert(outm); | ||
+ | assert(inx); | ||
+ | assert(xfft); | ||
+ | |||
+ | #ifdef MEASURE | ||
+ | struct timeb systime; | ||
+ | long begin = 0; | ||
+ | long end = 0; | ||
+ | ftime(& | ||
+ | begin = systime.time * 1000 + systime.millitm; | ||
+ | #endif | ||
+ | |||
+ | FFTW_PLAN p; | ||
+ | |||
+ | int size = x->size + hsize - 1; | ||
+ | sample_t *data = (sample_t*)calloc(sizeof(sample_t), | ||
+ | samples_t *y = new_samples(data, | ||
+ | for(int i = 0; i < x->size; i++) { | ||
+ | inx[i][RE] = x-> | ||
+ | } | ||
+ | |||
+ | p = FFTW_PLAN_DFT(size, | ||
+ | FFTW_EXECUTE(p); | ||
+ | |||
+ | |||
+ | for(int i = 0; i < size; i++ ) { | ||
+ | outm[i][RE] = hfft[i][RE] * xfft[i][RE] - hfft[i][IM] * xfft[i][IM]; | ||
+ | outm[i][IM] = xfft[i][IM] * hfft[i][RE] + xfft[i][RE] * hfft[i][IM]; | ||
+ | } | ||
+ | |||
+ | |||
+ | p = FFTW_PLAN_DFT(size, | ||
+ | FFTW_EXECUTE(p); | ||
+ | |||
+ | |||
+ | |||
+ | for(int i = 0; i < size; i++) { | ||
+ | data[i] = out[i][RE] / size; | ||
+ | } | ||
+ | |||
+ | FFTW_DESTROY_PLAN(p); | ||
+ | |||
+ | #ifdef MEASURE | ||
+ | ftime(& | ||
+ | end = systime.time * 1000 + systime.millitm; | ||
+ | printf(" | ||
+ | time_used = end - begin; | ||
+ | #endif | ||
+ | |||
+ | |||
return y; | return y; | ||
} | } | ||
- | samples_t | + | sample_t |
{ | { | ||
- | int size = x-> | + | unsigned |
- | sample_t *data = (sample_t*)calloc(sizeof(sample_t), size); | + | static |
+ | static unsigned int p = 0; | ||
- | samples_t *y = new_samples(data, size); | + | samples_t *input_s |
+ | samples_t *y; | ||
+ | |||
+ | y = conv_fftwc_preprocess(input_s, | ||
+ | |||
+ | for(int i = 0; i < BUFFER_SIZE_SAMPLES; | ||
+ | buffer[(i + p + filter_size - 1) % buffer_size] = 0; // | ||
+ | } | ||
+ | |||
+ | for(int i = 0/* BUFFER_SIZE_SAMPLES*/; | ||
+ | buffer[(i + p) % buffer_size] += y-> | ||
+ | } | ||
- | for(int xn = 0; xn < x->size; xn++) { | + | // alloker returbuffer |
- | for(int | + | sample_t *output_buffer = (sample_t*)malloc(BUFFER_SIZE_SAMPLES * sizeof(sample_t)); |
- | y-> | + | |
- | } | + | // kopier retsultat til output buffer |
+ | for(int | ||
+ | output_buffer[i] = buffer[(i + p) % buffer_size]; | ||
} | } | ||
- | return | + | p += BUFFER_SIZE_SAMPLES; |
+ | |||
+ | // frigør | ||
+ | free(input_s); | ||
+ | |||
+ | return output_buffer; | ||
} | } | ||
+ | sample_t puls = 0.5; | ||
int main(int argc, char *argv[]) | int main(int argc, char *argv[]) | ||
{ | { | ||
// | // | ||
+ | if(argc != 4) { | ||
+ | printf(" | ||
+ | exit(1); | ||
+ | } | ||
+ | |||
int xsz, hsz; | int xsz, hsz; | ||
- | samples_t *h = read_data("h.dat"); | + | const char *filterfile = argv[1]; |
- | samples_t *x = read_data("x.dat"); | + | const char *inputfile = argv[2]; |
+ | const char *outputfile = argv[3]; | ||
+ | |||
+ | samples_t *h = read_data(filterfile, 0.08); | ||
+ | // | ||
+ | samples_t *x = read_data(inputfile, 0.5); | ||
+ | // | ||
+ | |||
+ | |||
+ | size_t s = h->size + x->size - 1; | ||
+ | sample_t *d = (sample_t*)malloc(s | ||
+ | |||
+ | samples_t *y = new_samples(d, | ||
+ | |||
+ | |||
+ | // 0 padding | ||
+ | sample_t *pd = (sample_t*)malloc((y-> | ||
+ | for(int i = 0; i < x->size; i++) { | ||
+ | pd[i] = x-> | ||
+ | } | ||
+ | for(int i = x->size; i < y->size + (y->size % BUFFER_SIZE_SAMPLES); | ||
+ | pd[i] = 0; | ||
+ | } | ||
+ | x = new_samples(pd, | ||
+ | |||
+ | init_fftw(h); | ||
+ | |||
+ | int e = 0; | ||
+ | #ifdef MEASURE | ||
+ | // find a good delta value (ie. small prime factors) | ||
+ | int o = 20000; | ||
+ | for(int i = 0; i < 20; i++) { | ||
+ | printf("Measuring %d: ", i); | ||
+ | filter(& | ||
+ | if(time_used < o) { e = i; o = time_used; } | ||
+ | } | ||
+ | #endif | ||
+ | |||
+ | for(int i = 0; i < y->size; i += BUFFER_SIZE_SAMPLES) { | ||
+ | #ifdef MEASURE | ||
+ | printf(" | ||
+ | #endif | ||
+ | sample_t *data = filter(& | ||
+ | |||
+ | for(int j = 0; j < BUFFER_SIZE_SAMPLES; | ||
+ | y-> | ||
+ | } | ||
+ | |||
+ | free(data); | ||
+ | } | ||
- | printf(" | + | deinit_fftw(); |
- | samples_t *y = conv_simple(x, | + | |
- | write_data(" | + | |
- | free(y); | + | |
+ | write_data(outputfile, | ||
- | printf(" | ||
- | y = conv_fftw(x, | ||
- | write_data(" | ||
free(y); | free(y); | ||
conv/conv.txt · Last modified: 2008/09/04 12:09 by deva