先上源代码!
#include <stdio.h>
#include <stdint.h>
#include <math.h>
typedef struct {
float r;
float i;
}complex;
static void butter_fly(complex* a, complex* b, const complex* c) {
complex bc;
bc.r = b->r * c->r - b->i * c->i;
bc.i = b->r * c->i + b->i * c->r;
b->r = a->r - bc.r;
b->i = a->i - bc.i;
a->r += bc.r;
a->i += bc.i;
}
static uint32_t bits_reverse(uint32_t index, uint32_t bits) {
uint32_t left, right;
left = index << 16;
right = index >> 16;
index = left | right;
left = (index << 8) & 0xff00ff00;
right = (index >> 8) & 0x00ff00ff;
index = left | right;
left = (index << 4) & 0xf0f0f0f0;
right = (index >> 4) & 0x0f0f0f0f;
index = left | right;
left = (index << 2) & 0xc3c3c3c3;
right = (index >> 2) & 0x3c3c3c3c;
index = left | right;
left = (index << 1) & 0xa5a5a5a5;
right = (index >> 1) & 0x5a5a5a5a;
index = left | right;
return index >> (32 - bits);
}
static void fft_k(complex* dat, const complex* w, uint32_t k, uint32_t k_all) {
uint32_t i, j;
complex* dat1;
k_all = 1 << (k_all - k - 1);
k = 1 << k;
dat1 = dat + k;
for (i = 0; i < k_all; i++) {
for (j = 0; j < k; j++) {
butter_fly(dat++, dat1++, w + j * k_all);
}
dat += k;
dat1 += k;
}
}
void fft_init(complex* w, uint32_t k) {
float beta = 0.0f, dbeta = 3.1415926535f / k ;
for ( ; k; k--) {
w->r = cosf(beta);
w->i = sinf(beta);
beta += dbeta;
w++;
}
}
void fft(complex* dat, const complex* w, uint32_t k) {
uint32_t i, j, n;
complex temp;
n = 1 << k;
for (i = 0; i < n; i++) {
j = bits_reverse(i, k);
if (i <= j) {
continue;
}
temp = dat[i];
dat[i] = dat[j];
dat[j] = temp;
}
for (i = 0; i < k; i++) {
fft_k(dat, w, i, k);
}
}
再上代码的使用方法:
#define K 4
#define N (1 << K)
static complex w[N / 2];
static complex dat[N];
int main() {
uint32_t i;
for (i = 0; i < N; i++) {
dat[i].r = 0.0f;
dat[i].i = 0.0f;
}
dat[0].r = 1.0f;
dat[1].r = 1.0f;
fft_init((complex*)w, N / 2);
fft((complex*)dat, (const complex*)w, K);
for (i = 0; i < N; i++) {
printf((const char*)"dat[%d] = %f + %f * i\n", i, dat[i].r, dat[i].i);
}
return 0;
}
输出结果:
对比matlab输出结果:
最近编辑记录 xxzouzhichao (2018-08-20 11:23:56)
离线
运算速度如何? 和ST的库比较一下哪个快点
最近编辑记录 演技担当黄晓明 (2018-08-20 11:30:37)
离线
厉害
离线
运算速度如何? 和ST的库比较一下哪个快点
stm32f030 16点fft 400us
stm32f030 64点fft 3900us
stm32f030 128点fft 9200us
开优化后128点fft 8800us
最近编辑记录 xxzouzhichao (2018-08-20 11:43:15)
离线
运算速度如何? 和ST的库比较一下哪个快点
mdk的库128点fft约6300us
离线
st的库能不需移植直接给别的CPU跑吗?
离线
大神这是软硬通吃,我等望尘莫及.
2013年毕业前夕照着数字信号处理的书写的,在amobbs上开源了,如今朝花夕拾,优化了一下代码风格,在挖坑网开源
离线
st的库能不需移植直接给别的CPU跑吗?
我没用过stm的库,我用的mdk自带的dsp库对比的,128点比mdk的库慢了2500us
离线
感谢楼主分享!牛X的很!
离线
大神! 我等望尘莫及!
离线
更正一段代码:
static uint32_t bits_reverse(uint32_t index, uint32_t bits) {
uint32_t left, right;
left = index << 16;
right = index >> 16;
index = left | right;
left = (index << 8) & 0xff00ff00;
right = (index >> 8) & 0x00ff00ff;
index = left | right;
left = (index << 4) & 0xf0f0f0f0;
right = (index >> 4) & 0x0f0f0f0f;
index = left | right;
left = (index << 2) & 0xc3c3c3c3;
right = (index >> 2) & 0x3c3c3c3c;
index = left | right;
left = (index << 1) & 0xa5a5a5a5;
right = (index >> 1) & 0x5a5a5a5a;
index = left | right;
return index >> (32 - bits);
}
更正后:
static uint32_t bits_reverse(uint32_t index, uint32_t bits) {
uint32_t left, right;
left = index << 16;
right = index >> 16;
index = left | right;
left = (index << 8) & 0xff00ff00;
right = (index >> 8) & 0x00ff00ff;
index = left | right;
left = (index << 4) & 0xf0f0f0f0;
right = (index >> 4) & 0x0f0f0f0f;
index = left | right;
left = (index << 2) & 0xc3c3c3c3;
right = (index >> 2) & 0x3c3c3c3c;
index = left | right;
left = (index << 1) & 0xaaaaaaaa;
right = (index >> 1) & 0x55555555;
index = left | right;
return index >> (32 - bits);
}
离线
更正一段代码:
static uint32_t bits_reverse(uint32_t index, uint32_t bits) { uint32_t left, right; left = index << 16; right = index >> 16; index = left | right; left = (index << 8) & 0xff00ff00; right = (index >> 8) & 0x00ff00ff; index = left | right; left = (index << 4) & 0xf0f0f0f0; right = (index >> 4) & 0x0f0f0f0f; index = left | right; left = (index << 2) & 0xc3c3c3c3; right = (index >> 2) & 0x3c3c3c3c; index = left | right; left = (index << 1) & 0xa5a5a5a5; right = (index >> 1) & 0x5a5a5a5a; index = left | right; return index >> (32 - bits); }
更正后:
static uint32_t bits_reverse(uint32_t index, uint32_t bits) { uint32_t left, right; left = index << 16; right = index >> 16; index = left | right; left = (index << 8) & 0xff00ff00; right = (index >> 8) & 0x00ff00ff; index = left | right; left = (index << 4) & 0xf0f0f0f0; right = (index >> 4) & 0x0f0f0f0f; index = left | right; left = (index << 2) & 0xcccccccc; right = (index >> 2) & 0x33333333; index = left | right; left = (index << 1) & 0xaaaaaaaa; right = (index >> 1) & 0x55555555; index = left | right; return index >> (32 - bits); }
离线
mark
离线
离线
谢谢分享,虽然做过24点更精简的
离线
那个bits_reverse函数如果用编译器的intrinsic替代的话会快很多。
离线
谢谢分享,可以方便用于对比学习。
离线
那个bits_reverse函数如果用编译器的intrinsic替代的话会快很多。
这个支持cotex M0 M3 M4 吗?
离线
离线
niubility!
离线
牛皮 插眼收藏一波
离线
arm的dsp库也是源码的,不需要移植,直接用的
离线
arm的dsp库也是源码的,不需要移植,直接用的
你可以运行它(修改后)但性能很糟糕,我测试了 800MHZ 的性能甚至低于 100MHz Cortex M4
离线
离线
这个是要做优化的,利用硬件的加速模块,合理的安排计算的tap
离线