conv:conv
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revisionNext revisionBoth sides next revision | ||
conv:conv [2008/01/03 15:01] – deva | conv:conv [2008/01/24 11:54] – deva | ||
---|---|---|---|
Line 1: | Line 1: | ||
- | =====Convolution implementation pages===== | + | =====Convolution implementation pages (Spring 2008 studies group under Ole Caprani)===== |
+ | Our goal is to implement a realtime plugin to do convolution.\\ | ||
+ | The basic "Input Side" is way too slow (convolution implemented with two for loops), so we will be experimenting with a dft-multiply-idft algorithm (sometimes called "The Overlap-and-Save algorithm" | ||
+ | To do the actual fft we will use the fftw3 library (http:// | ||
+ | The next step is to implement at test the " | ||
+ | |||
+ | ====Group==== | ||
+ | * Jonas Suhr Christensen - // | ||
+ | * Bent Bisballe Nyeng - // | ||
+ | |||
+ | ====Analysis==== | ||
+ | The following graphs show the execution time of a single convolution iteration, using the overlap and save algorithm with various optimizations enabled. | ||
+ | The algorithm optimizations are named as follows: | ||
+ | * **Double** This is an implementation with no particular optimizations. | ||
+ | * **Float** This is a simple implementation using float as the internal datatype. | ||
+ | * **Real Double** This is an implementation using real ffts instead of complex. | ||
+ | * **Real Float** This is an implementation using real ffts instead of complex, with float as internal datatype. | ||
+ | * **MT Double** This is an implementation running on multiple CPUs. | ||
+ | * **MT Float** This is an implementation with float as internal datatype, running on multiple CPUs. | ||
+ | * **MT Real Double** This is an implementation using real ffts instead of complex, running on multiple CPUs. | ||
+ | * **MT Real Float** This is an implementation using real ffts instead of complex, with float as internal datatype, running on multiple CPUs. | ||
+ | {{: | ||
+ | {{: | ||
+ | {{: | ||
+ | {{: | ||
+ | {{: | ||
====Links==== | ====Links==== | ||
- | * http:// | + | |
- | * http:// | + | |
- | * http:// | + | * http:// |
- | * http:// | + | * http:// |
+ | * http:// | ||
+ | * http:// | ||
+ | * http:// | ||
+ | * http:// | ||
+ | |||
+ | ====Code==== | ||
+ | Compile with: | ||
+ | < | ||
+ | g++ -O2 -mmmx -msse -msse2 conv_opt.cc -o conv_opt -lfftw3 -lsndfile | ||
+ | </ | ||
+ | or | ||
+ | < | ||
+ | g++ -O2 -mmmx -msse -msse2 conv_opt.cc -o conv_opt -DFFTW_FLOAT -lfftw3f -lsndfile | ||
+ | </ | ||
+ | to use floating point fftw instead of double.\\ | ||
+ | |||
+ | The actual code is shown below: | ||
+ | <code c> | ||
+ | #include < | ||
+ | #include < | ||
+ | #include < | ||
+ | #include < | ||
+ | #include < | ||
+ | #include " | ||
+ | |||
+ | //#define BUFFER_SIZE_MS 113 //ms 113, // Find a size with small prime factors | ||
+ | #define BUFFER_SIZE_MS 1000 //ms 113, // Find a size with small prime factors | ||
+ | #define SAMPLE_RATE_HZ 44100 //hz | ||
+ | #define BUFFER_SIZE_SAMPLES 4096 | ||
+ | // | ||
+ | |||
+ | //#define MEASURE | ||
+ | |||
+ | #ifdef FFTW_FLOAT | ||
+ | #define FFTW_DATATYPE fftwf_complex | ||
+ | #define FFTW_PLAN_DFT fftwf_plan_dft_1d | ||
+ | #define FFTW_EXECUTE fftwf_execute | ||
+ | #define FFTW_PLAN fftwf_plan | ||
+ | #define FFTW_DESTROY_PLAN fftwf_destroy_plan | ||
+ | #define FFTW_FREE fftwf_free | ||
+ | #define FFTW_MALLOC fftwf_malloc | ||
+ | typedef float sample_t; | ||
+ | #else | ||
+ | #define FFTW_DATATYPE fftw_complex | ||
+ | #define FFTW_PLAN_DFT fftw_plan_dft_1d | ||
+ | #define FFTW_EXECUTE fftw_execute | ||
+ | #define FFTW_PLAN fftw_plan | ||
+ | #define FFTW_DESTROY_PLAN fftw_destroy_plan | ||
+ | #define FFTW_FREE fftw_free | ||
+ | #define FFTW_MALLOC fftw_malloc | ||
+ | typedef double sample_t; | ||
+ | #endif | ||
+ | |||
+ | typedef struct { | ||
+ | sample_t *data; | ||
+ | size_t size; | ||
+ | } samples_t; | ||
+ | |||
+ | |||
+ | samples_t *new_samples(sample_t *data, size_t size) | ||
+ | { | ||
+ | samples_t *samples = (samples_t*)malloc(sizeof(samples_t)); | ||
+ | samples-> | ||
+ | samples-> | ||
+ | return samples; | ||
+ | } | ||
+ | |||
+ | samples_t *read_data(const char *filename, double scale) | ||
+ | { | ||
+ | printf(" | ||
+ | |||
+ | SndfileHandle sfh(filename); | ||
+ | size_t size = sfh.seek(0, SEEK_END); | ||
+ | sfh.seek(0, | ||
+ | // size_t size = 3; | ||
+ | |||
+ | sample_t *data = (sample_t*)malloc(sizeof(sample_t) * size); | ||
+ | sfh.read(data, | ||
+ | |||
+ | printf(" | ||
+ | |||
+ | for(int i = 0; i < size; i++) { | ||
+ | data[i] = scale * data[i]; | ||
+ | } | ||
+ | |||
+ | // for(int i = size / 2; i < (size / 2) +20; i++) printf(" | ||
+ | |||
+ | samples_t *samples = new_samples(data, | ||
+ | printf(" | ||
+ | |||
+ | return samples; | ||
+ | } | ||
+ | |||
+ | |||
+ | void write_data(const char *filename, samples_t *samples) | ||
+ | { | ||
+ | printf(" | ||
+ | |||
+ | // for(int i = samples-> | ||
+ | |||
+ | SndfileHandle sfh(filename, | ||
+ | |||
+ | sfh.write(samples-> | ||
+ | |||
+ | printf(" | ||
+ | } | ||
+ | |||
+ | #define RE 0 | ||
+ | #define IM 1 | ||
+ | |||
+ | |||
+ | static FFTW_DATATYPE *hfft = NULL; | ||
+ | static FFTW_DATATYPE *outm = NULL; | ||
+ | static FFTW_DATATYPE *out = NULL; | ||
+ | static FFTW_DATATYPE *inx = NULL; | ||
+ | static FFTW_DATATYPE *xfft = NULL; | ||
+ | void init_fftw(samples_t *h) | ||
+ | { | ||
+ | FFTW_PLAN p; | ||
+ | |||
+ | int size = BUFFER_SIZE_SAMPLES + h->size - 1; | ||
+ | hfft = (FFTW_DATATYPE*) FFTW_MALLOC(sizeof(FFTW_DATATYPE) * size); | ||
+ | FFTW_DATATYPE *inh = (FFTW_DATATYPE*) FFTW_MALLOC(sizeof(FFTW_DATATYPE) * size); | ||
+ | |||
+ | for(int i = 0; i < size; i++) { | ||
+ | inh[i][RE] = inh[i][IM] = 0; | ||
+ | } | ||
+ | for(int i = 0; i < h->size; i++) { | ||
+ | inh[i][RE] = h-> | ||
+ | } | ||
+ | |||
+ | p = FFTW_PLAN_DFT(size, | ||
+ | FFTW_EXECUTE(p); | ||
+ | FFTW_DESTROY_PLAN(p); | ||
+ | |||
+ | FFTW_FREE(inh); | ||
+ | |||
+ | outm = (FFTW_DATATYPE*) FFTW_MALLOC(sizeof(FFTW_DATATYPE) * size); | ||
+ | out = (FFTW_DATATYPE*) FFTW_MALLOC(sizeof(FFTW_DATATYPE) * size); | ||
+ | inx = (FFTW_DATATYPE*) FFTW_MALLOC(sizeof(FFTW_DATATYPE) * size); | ||
+ | for(int i = 0; i < size; i++) { | ||
+ | inx[i][RE] = inx[i][IM] = 0; | ||
+ | } | ||
+ | |||
+ | xfft = (FFTW_DATATYPE*) FFTW_MALLOC(sizeof(FFTW_DATATYPE) * size); | ||
+ | |||
+ | |||
+ | } | ||
+ | |||
+ | void deinit_fftw() | ||
+ | { | ||
+ | FFTW_FREE(hfft); | ||
+ | FFTW_FREE(out); | ||
+ | FFTW_FREE(outm); | ||
+ | FFTW_FREE(inx); | ||
+ | FFTW_FREE(xfft); | ||
+ | } | ||
+ | |||
+ | #ifdef MEASURE | ||
+ | long static time_used; | ||
+ | #endif | ||
+ | samples_t *conv_fftwc_preprocess(samples_t *x, size_t hsize/ | ||
+ | { | ||
+ | assert(hfft); | ||
+ | assert(out); | ||
+ | assert(outm); | ||
+ | assert(inx); | ||
+ | assert(xfft); | ||
+ | |||
+ | #ifdef MEASURE | ||
+ | struct timeb systime; | ||
+ | long begin = 0; | ||
+ | long end = 0; | ||
+ | ftime(& | ||
+ | begin = systime.time * 1000 + systime.millitm; | ||
+ | #endif | ||
+ | |||
+ | FFTW_PLAN p; | ||
+ | |||
+ | int size = x->size + hsize - 1; | ||
+ | sample_t *data = (sample_t*)calloc(sizeof(sample_t), | ||
+ | samples_t *y = new_samples(data, | ||
+ | |||
+ | for(int i = 0; i < x->size; i++) { | ||
+ | inx[i][RE] = x-> | ||
+ | } | ||
+ | |||
+ | p = FFTW_PLAN_DFT(size, | ||
+ | FFTW_EXECUTE(p); | ||
+ | |||
+ | |||
+ | for(int i = 0; i < size; i++ ) { | ||
+ | outm[i][RE] = hfft[i][RE] * xfft[i][RE] - hfft[i][IM] * xfft[i][IM]; | ||
+ | outm[i][IM] = xfft[i][IM] * hfft[i][RE] + xfft[i][RE] * hfft[i][IM]; | ||
+ | } | ||
+ | |||
+ | |||
+ | p = FFTW_PLAN_DFT(size, | ||
+ | FFTW_EXECUTE(p); | ||
+ | |||
+ | |||
+ | |||
+ | for(int i = 0; i < size; i++) { | ||
+ | data[i] = out[i][RE] / size; | ||
+ | } | ||
+ | |||
+ | FFTW_DESTROY_PLAN(p); | ||
+ | |||
+ | #ifdef MEASURE | ||
+ | ftime(& | ||
+ | end = systime.time * 1000 + systime.millitm; | ||
+ | printf(" | ||
+ | time_used = end - begin; | ||
+ | #endif | ||
+ | |||
+ | |||
+ | return y; | ||
+ | } | ||
+ | |||
+ | |||
+ | sample_t *filter(const sample_t *input, size_t filter_size) | ||
+ | { | ||
+ | unsigned int buffer_size = BUFFER_SIZE_SAMPLES + filter_size - 1; | ||
+ | static sample_t *buffer = (sample_t*)calloc(buffer_size, | ||
+ | static unsigned int p = 0; | ||
+ | |||
+ | samples_t *input_s = new_samples((sample_t *)input, BUFFER_SIZE_SAMPLES); | ||
+ | |||
+ | samples_t *y; | ||
+ | |||
+ | y = conv_fftwc_preprocess(input_s, | ||
+ | |||
+ | for(int i = 0; i < BUFFER_SIZE_SAMPLES; | ||
+ | buffer[(i + p + filter_size - 1) % buffer_size] = 0; // | ||
+ | } | ||
+ | |||
+ | for(int i = 0/* BUFFER_SIZE_SAMPLES*/; | ||
+ | buffer[(i + p) % buffer_size] += y-> | ||
+ | } | ||
+ | |||
+ | // alloker returbuffer | ||
+ | sample_t *output_buffer = (sample_t*)malloc(BUFFER_SIZE_SAMPLES * sizeof(sample_t)); | ||
+ | |||
+ | // kopier retsultat til output buffer | ||
+ | for(int i = 0; i < BUFFER_SIZE_SAMPLES; | ||
+ | output_buffer[i] = buffer[(i + p) % buffer_size]; | ||
+ | } | ||
+ | |||
+ | p += BUFFER_SIZE_SAMPLES; | ||
+ | |||
+ | // frigør y | ||
+ | free(input_s); | ||
+ | |||
+ | return output_buffer; | ||
+ | } | ||
+ | |||
+ | |||
+ | sample_t puls = 0.5; | ||
+ | int main(int argc, char *argv[]) | ||
+ | { | ||
+ | // | ||
+ | |||
+ | if(argc != 4) { | ||
+ | printf(" | ||
+ | exit(1); | ||
+ | } | ||
+ | |||
+ | int xsz, hsz; | ||
+ | |||
+ | const char *filterfile = argv[1]; | ||
+ | const char *inputfile = argv[2]; | ||
+ | const char *outputfile = argv[3]; | ||
+ | |||
+ | samples_t *h = read_data(filterfile, | ||
+ | // | ||
+ | samples_t *x = read_data(inputfile, | ||
+ | // | ||
+ | |||
+ | |||
+ | size_t s = h->size + x->size - 1; | ||
+ | sample_t *d = (sample_t*)malloc(s | ||
+ | |||
+ | samples_t *y = new_samples(d, | ||
+ | |||
+ | |||
+ | // 0 padding | ||
+ | sample_t *pd = (sample_t*)malloc((y-> | ||
+ | for(int i = 0; i < x->size; i++) { | ||
+ | pd[i] = x-> | ||
+ | } | ||
+ | for(int i = x->size; i < y->size + (y->size % BUFFER_SIZE_SAMPLES); | ||
+ | pd[i] = 0; | ||
+ | } | ||
+ | x = new_samples(pd, | ||
+ | |||
+ | init_fftw(h); | ||
+ | |||
+ | int e = 0; | ||
+ | #ifdef MEASURE | ||
+ | // find a good delta value (ie. small prime factors) | ||
+ | int o = 20000; | ||
+ | for(int i = 0; i < 20; i++) { | ||
+ | printf(" | ||
+ | filter(& | ||
+ | if(time_used < o) { e = i; o = time_used; } | ||
+ | } | ||
+ | #endif | ||
+ | |||
+ | for(int i = 0; i < y->size; i += BUFFER_SIZE_SAMPLES) { | ||
+ | #ifdef MEASURE | ||
+ | printf(" | ||
+ | #endif | ||
+ | sample_t *data = filter(& | ||
+ | |||
+ | for(int j = 0; j < BUFFER_SIZE_SAMPLES; | ||
+ | y-> | ||
+ | } | ||
+ | |||
+ | free(data); | ||
+ | } | ||
+ | |||
+ | |||
+ | deinit_fftw(); | ||
+ | |||
+ | write_data(outputfile, | ||
+ | |||
+ | free(y); | ||
+ | |||
+ | free(h); | ||
+ | free(x); | ||
+ | } | ||
+ | </ |
conv/conv.txt · Last modified: 2008/09/04 12:09 by deva