基于FFT的吉他音符识别实验
由于我个人是吉他爱好者,目的是想研究吉他音频究竟能不能转换为MIDI输入到电脑中,在很多方面的算法上不太熟悉,在多个音符出现时,有很多识别不准确的问题,希望与更多电子爱好者或音乐爱好者交流,多提宝贵意见。
首先说一下我对音符识别的基本思路:1、首先进行FFT运算,寻找波峰以确定频率 。2、如果计算出的频率在某个音符频率范围内,则输出这个音符。
3、如果改点的幅值小于设定阈值时,音符关闭(尽量遵循MIDI规范)。
接下来是难点:
1、多个音符出现时,频谱会出现多个波峰,如何计算出波峰的最大值,次大值,第三大值,第四大值==
2、谐波如何滤波(假设检测到一个261HZ的正弦波信号,其谐波为522HZ,1044HZ…… 261HZ为C4,523HZ为C5,低音C和高音C容易识别错误)
对于1、我的解决办法是对频谱进行多次遍历,第一次发现最大值后,记录其下标,下一次遍历时跳过它,从而找到次大峰值。2、基频的识别有待研究
硬件采用的是arduino due开发板,显示用3.2寸TFT,
可直插到DUE开发板中。
电路连接:声卡音频输出到ARDUINO DUE的A0口,A0口采用两根电阻分压得到 3.3/2 (v) 电压,串联电容隔离直流。
为了更清晰的调试,我保留了频谱显示。
、
以下是测试视频:
http://pan.baidu.com/s/1c29qDkk
库:1、SplitRadixRealP //FFT库
2、UTFT //TFT显示屏
以下是代码:
#include <SplitRadixRealP.h>
#include <UTFT.h>
// Declare which fonts we will be using
extern uint8_t SmallFont[];
extern uint8_t BigFont[];
extern uint8_t SevenSegNumFont[];
UTFT myGLCD(ILI9481,38,39,40,41);//TFT屏幕
#define SMP_RATE 10240UL //采样率 选择不同采样率会改变频率解析度和能分辨的频率上限
//频率上限=采样率/2
#define CLK_MAIN 84000000UL //主时钟
#define TMR_CNTR CLK_MAIN / (2 *SMP_RATE)
// FFT_SIZE IS DEFINED in Header file Radix4.h
#define FFT_SIZE 2048 //(可选4096) //采样点数
#define MIRROR FFT_SIZE / 2
#define INP_BUFF FFT_SIZE
volatile uint16_t sptr = 0 ;
volatile int16_t flag = 0 ;
uint16_t inp[2][INP_BUFF] = { 0}; // DMA likes ping-pongs buffer
int f_r[FFT_SIZE] = { 0};
int out1[MIRROR] = { 0}; // Magnitudes 强度
int out2[MIRROR] = { 0}; // Magnitudes 强度
int LastVal[MIRROR]= { 0}; //储存上一次强度
float Ratio[MIRROR]= { 0}; //衰减比率
float NX=SMP_RATE/FFT_SIZE; //单位频点解析度=5HZ
int STEP=2; //电平显示时增益衰减速度
int jump[4][10]={{0},{0},{0},{0}}; //倍频谐波下标
int LASTmaxF[4]={0};//储存最大峰值下标
int sw[4]={0};//音符开关
int k;//每识别一个音符k+1
int note_num=4;//音符数量
const int dc_offset = 2047;
uint8_t print_inp = 0; // print switch
unsigned long time_start;
unsigned int time_hamng, time_revbn, time_radix, time_gainr, time_sqrtl, time_sqrtl2;
SplitRadixRealP radix;
char note[12]{'C','#C','D','#D','E','F','#F','G','#G','A','#A','B'};
static const float pitchFrequency[] = {
8.176, 8.662, 9.177, 9.723, 10.301, 10.913, 11.562, 12.250, 12.978, 13.750, 14.568, 15.434,
16.352, 17.324, 18.354, 19.445, 20.602, 21.827, 23.125, 24.500, 25.957, 27.500, 29.135, 30.868,
32.703, 34.648, 36.708, 38.891, 41.203, 43.654, 46.249, 48.999, 51.913, 55.000, 58.270, 61.735,
65.406, 69.296, 73.416, 77.782, 82.407, 87.307, 92.499, 97.999, 103.826, 110.000, 116.541, 123.471,
130.813, 138.591, 146.832, 155.563, 164.814, 174.614, 184.997, 195.998, 207.652, 220.000, 233.082, 246.942,
261.626, 277.183, 293.665, 311.127, 329.628, 349.228, 369.994, 391.995, 415.305, 440.000, 466.164, 493.883,
523.251, 554.365, 587.330, 622.254, 659.255, 698.456, 739.989, 783.991, 830.609, 880.000, 932.328, 987.767,
1046.502, 1108.731, 1174.659, 1244.508, 1318.510, 1396.913, 1479.978, 1567.982, 1661.219, 1760.000, 1864.655, 1975.533,
2093.005, 2217.461, 2349.318, 2489.016, 2637.020, 2793.826, 2959.955, 3135.963, 3322.438, 3520.000, 3729.310, 3951.066,
4186.009, 4434.922, 4698.636, 4978.032, 5274.041, 5587.652, 5919.911, 6271.927, 6644.875, 7040.000, 7458.620, 7902.133,
8372.018, 8869.844, 9397.273, 9956.063, 10548.082, 11175.303, 11839.822, 12543.854,
};
float pitchFrequencyUP[130] = {};
float pitchFrequencyDW[130] = {};
void setup()
{
Serial.begin (115200) ;
Serial.println(" ");
Serial.println("FREQ");
for (int i=0;i<=127;i++){
pitchFrequencyUP=pitchFrequency*1.0293;//计算每个音符上限频率
pitchFrequencyDW=pitchFrequency/1.0293;//计算每个音符下限频率
Serial.print(" UP");Serial.print(i);Serial.print("=");Serial.print(pitchFrequencyUP);
Serial.print(" F");Serial.print(i);Serial.print("=");Serial.print(pitchFrequency);
Serial.print(" DOWN");Serial.print(i);Serial.print("=");Serial.println(pitchFrequencyDW);
}
//------------ADC初始化 TFT屏幕初始化------------------
adc_setup ();
tmr_setup ();
myGLCD.InitLCD();
myGLCD.clrScr();
myGLCD.setFont(SmallFont);
myGLCD.print(" Audio Spectrum Analyzer", CENTER, 0);
}//设置完成
//====================================================
inline int mult_shft12( int a, int b)
{
return (( a * b ) >> 12);
}
//====================================================
void loop()
{
if ( flag )
{
uint16_t indx_a = flag -1;
uint16_t indx_b = 0;
time_start = micros();
for ( uint16_t i = 0, k = (NWAVE / FFT_SIZE); i < FFT_SIZE; i++ )
{
uint16_t windw = Hamming[i * k];
f_r = mult_shft12((inp[indx_a][indx_b++] - dc_offset), windw);
}
time_hamng = micros() - time_start;
time_start = micros();
radix.rev_bin( f_r, FFT_SIZE);
time_revbn = micros() - time_start;
time_start = micros();
radix.fft_split_radix_real( f_r, LOG2_FFT);
time_radix = micros() - time_start;
time_start = micros();
radix.gain_Reset( f_r, LOG2_FFT -1);
time_gainr = micros() - time_start;
time_start = micros();
radix.get_Magnit1( f_r, out1);
time_sqrtl = micros() - time_start;
time_start = micros();
radix.get_Magnit2( f_r, out2);
time_sqrtl2 = micros() - time_start;
prnt_out(out1, MIRROR);//频谱显示输出
MAXFREQ(out2); //音符识别计算
flag = 0;
}
}
void tmr_setup ()
{
pmc_enable_periph_clk(TC_INTERFACE_ID + 0 *3 + 0); // clock the TC0 channel 0
TcChannel * t = &(TC0->TC_CHANNEL)[0] ; // pointer to TC0 registers for its channel 0
t->TC_CCR = TC_CCR_CLKDIS ; // disable internal clocking while setup regs
t->TC_IDR = 0xFFFFFFFF ; // disable interrupts
t->TC_SR ; // read int status reg to clear pending
t->TC_CMR = TC_CMR_TCCLKS_TIMER_CLOCK1 | // use TCLK1 (prescale by 2, = 42MHz)
TC_CMR_WAVE | // waveform mode
TC_CMR_WAVSEL_UP_RC | // count-up PWM using RC as threshold
TC_CMR_EEVT_XC0 | // Set external events from XC0 (this setup TIOB as output)
TC_CMR_ACPA_CLEAR | TC_CMR_ACPC_CLEAR |
TC_CMR_BCPB_CLEAR | TC_CMR_BCPC_CLEAR ;
t->TC_RC = TMR_CNTR; // counter resets on RC, so sets period in terms of 42MHz clock
t->TC_RA = TMR_CNTR /2; // roughly square wave
t->TC_CMR = (t->TC_CMR & 0xFFF0FFFF) | TC_CMR_ACPA_CLEAR | TC_CMR_ACPC_SET ; // set clear and set from RA and RC compares
t->TC_CCR = TC_CCR_CLKEN | TC_CCR_SWTRG ; // re-enable local clocking and switch to hardware trigger source.
}
void adc_setup ()
{
pmc_enable_periph_clk(ID_ADC);
adc_init(ADC, SystemCoreClock, ADC_FREQ_MAX, ADC_STARTUP_FAST);
NVIC_EnableIRQ (ADC_IRQn); // enable ADC interrupt vector
adc_disable_all_channel(ADC);
adc_enable_interrupt(ADC, ADC_IER_RXBUFF);
ADC->ADC_RPR = (uint32_t) inp[0]; // DMA buffer
ADC->ADC_RCR = INP_BUFF;
ADC->ADC_RNPR = (uint32_t) inp[1]; // next DMA buffer
ADC->ADC_RNCR = INP_BUFF;
ADC->ADC_PTCR = 1;
adc_set_bias_current(ADC, 0x01);
// adc_enable_tag(ADC);
adc_enable_channel(ADC, ADC_CHANNEL_7); // AN0
adc_configure_trigger(ADC, ADC_TRIG_TIO_CH_0, 0);
adc_start(ADC);
}
void ADC_Handler (void)
{
if((adc_get_status(ADC) & ADC_ISR_RXBUFF) == ADC_ISR_RXBUFF) {
flag = ++sptr;
sptr &= 0x01;
ADC->ADC_RNPR = (uint32_t) inp[sptr];
ADC->ADC_RNCR = INP_BUFF;
}
}
//----------------------------全频点峰值计算--------------------
void MAXFREQ(int *array){
int TH1=30;//音符=关 阈值
int maxval=TH1;//频点峰值最大值
int DSmaxF[5]={0};//频点峰值下标
int TH2=80;//音符=开 阈值
array[0]=0;array[1]=0;//舍去直流成分
for (int i=10;i<=704;i++){//50HZ ~ 3520HZ
//---------------当遇到上次遍历出的峰值最大值下标时跳过,且跳过其谐波---------
for (int j=0;j<note_num;j++){//j为音符个数
{
for (int m=0;m<=5;m++){//为谐波个数
if (sw[j]==1 && i==jump[j][m]){i+=3;break;}
}
}
}
//------------------------------------------
array*=2;//电平幅度增益
if (array>maxval){maxval=array;DSmaxF[k]=i;} //计算峰值最大值
//DSmaxF[k]即最大峰值频率点下标 array[DSmaxF[k]]为这点的f幅值
//频率=采样率/采样点数*频率点下标
}
if (sw[k]==0 && maxval>TH2){
LASTmaxF[k]=DSmaxF[k];sw[k]=1;NOTE_ON(k);
//如果峰值大于音符(开)阈值时
//记录其下标 标记这个音符被打开 并输出显示
//---------------计算此音符的谐波下标-------------
//下个循环遍历最大值时会用到
for (int b=0;b<=5;b++){
jump[k]=LASTmaxF[k]*(b+1)-1;
}
//------------------------------------------------
k++;}//进行下一个音符识别
//-------------------计算所有被识别的音符音量衰减情况------------------------
for (int j=0;j<note_num;j++){
if (sw[j]==1 && array[LASTmaxF[j]]<TH1){sw[j]=0;NOTE_OFF(j);}//小于音量阈值时 输出关闭
myGLCD.setColor(255,255,255);
myGLCD.printNumI(LASTmaxF[j],10,j*10+5,2);
myGLCD.printNumI(array[LASTmaxF[j]],50,j*10+5,3);
myGLCD.printNumI(sw[j],100,j*10+5,2);
}
myGLCD.printNumI(k,180,30,2);
if (k>=note_num){k=0;}
}
//----------------------------全频点峰值计算--------------------
//------------------------------95段电平显示------------------------
void prnt_out( int *array, int dlina)
{
array[0]=0;array[1]=0;//舍去直流成分
int H1 = 200; int H2 = 50;//屏幕Y轴下边界 上边界
int w1 = 5; int w2 = 3; //背景填充宽度 电平填充宽度
int x = 0; float DSVAL=0; //左边界宽度 电平高度
for (int j=1;j<=STEP;j++){
for (int i = 0; i < 95; i++) {
//屏幕 宽*高 = 480*320
int VAL=0;
VAL=array;
Ratio = (VAL - LastVal)/STEP;//计算上升或下降比率
//-----------------------
DSVAL = LastVal+Ratio;//输出显示电平
DSVAL=log(DSVAL)*20;//对数显示(db)
if (DSVAL>=155){DSVAL=155;}//峰值限制
//-------------------
//------------------------
DSVAL = H1 - DSVAL;//最大值150
myGLCD.setColor(255,255,255); // clear 白色
myGLCD.fillRect(x+(i*w1),H1,x+(i*w1)+w1,H2); // 白线填充
myGLCD.setColor(255,0,0); // 红色
myGLCD.fillRect(x+(i*w1),H1,x+(i*w1)+w2,DSVAL); // 电平用红线填充
LastVal = VAL;//储存本次电平
}//for 0 to 95
}//STEP
}
//------------------------------95段电平显示------------------------
//------------------------------音符开关输出到串口------------------------
void NOTE_OFF(int t){
int FREQ;
int NOTE;
FREQ=LASTmaxF[t]*NX;
for (int i=0;i<127;i++){
if (FREQ>pitchFrequencyDW && FREQ<pitchFrequencyUP){NOTE=i;}
}
NOTE%=12;
Serial.print(note[NOTE]); Serial.println("=OFF");
}
void NOTE_ON(int t){
int FREQ;
int NOTE;
FREQ=LASTmaxF[t]*NX;//计算出频率值
for (int i=0;i<127;i++){
if (FREQ>pitchFrequencyDW && FREQ<pitchFrequencyUP){NOTE=i;} //当计算出来的频率处于下边界与上边界之间,确定note下标
}
NOTE%=12;//对12取(12个半音)
Serial.print(note[NOTE]); Serial.println("=ON");
}
|