本帖最后由 freebit 于 2016-4-30 01:57 编辑
刚入手一块科大讯飞XFS5152CE语音合成芯片模块,配合Adruino开发板就可以来学习语音合成方面的应用了。
芯片模块的某宝价格大概在100元左右,如果单买芯片自行制作PBC的话,估计在50元左右。相对来说比其它同类型的语音模块偏贵了些。但是科大讯飞的语音识别还是做得不错的。所以值得学习。
下面我们一起来看看这个模块的相关简介:
芯片的语音合成效果可以参考下面的视频:
芯片支持三种与单片机的通讯模式:TTL电平串口、I2C、SPI。具体的二次开发资料详见附件内容。
操作开始前,请按下面的联线顺序链接模块与Adruino开发板:
模块的:GND-----Adruino开发板:GND
模块的:3V3-----Adruino开发板:3.3V
模块的:RXD-----Adruino开发板:TX(Arduino UNO的D1引脚)
模块的:TXD-----Adruino开发板:RX(Arduino UNO的D0引脚)
- /*语音编码数据,每个汉字对应两个16进制的编码*/
- char voice_encodeData[]={0xFD,0x02,0x5D,0x01,0x01,0xC4,0xFA,0xBA,0xC3,0xA3,0xAC,0xBB,0xB6,0xD3,0xAD,0xCA,0xB9,0xD3,0xC3,0xBF,0xC6,0xB4,0xF3,0xD1,0xB6,0xB7,0xC9,0xD3,0xEF,0xD2,0xF4,0xBA,0xCF,0xB3,0xC9,0xCF,0xB5,0xC1,0xD0,0xD0,0xBE,0xC6,0xAC,0xA3,0xA1,0x0D,0x0A,0xCF,0xC2,0xC3,0xE6,0xBD,0xAB,0xB7,0xD6,0xC0,0xE0,0xBD,0xF8,0xD0,0xD0,0xD0,0xBE,0xC6,0xAC,0xB9,0xA6,0xC4,0xDC,0xCB,0xB5,0xC3,0xF7,0xA3,0xBA,0x0D,0x0A,0x31,0xA1,0xA2,0x20,0xD0,0xBE,0xC6,0xAC,0xBF,0xC9,0xD2,0xD4,0xD7,0xD4,0xB6,0xAF,0xCA,0xB6,0xB1,0xF0,0xBA,0xC5,0xC2,0xEB,0xA1,0xA2,0xC8,0xD5,0xC6,0xDA,0xA1,0xA2,0xCA,0xB1,0xBC,0xE4,0xA1,0xA2,0xCA,0xFD,0xD1,0xA7,0xB7,0xFB,0xBA,0xC5,0xA1,0xA2,0xBC,0xC6,0xC1,0xBF,0xB5,0xA5,0xCE,0xBB,0xB5,0xC8,0xCC,0xD8,0xCA,0xE2,0xB8,0xF1,0xCA,0xBD,0xB5,0xC4,0xCE,0xC4,0xB1,0xBE,0xA3,0xAC,0xB2,0xA2,0xB0,0xB4,0xC8,0xCB,0xC3,0xC7,0xB5,0xC4,0xCF,0xB0,0xB9,0xDF,0xB7,0xBD,0xCA,0xBD,0xC0,0xCA,0xB6,0xC1,0xA1,0xA3,0x0D,0x0A,0x20,0x20,0x20,0xC0,0xFD,0xC8,0xE7,0xA3,0xBA,0x0D,0x0A,0x20,0x20,0x20,0xD7,0xDC,0xCC,0xA8,0xCC,0xFD,0xB2,0xBB,0xB5,0xBD,0xC4,0xFA,0xBD,0xB2,0xBB,0xB0,0xA3,0xAC,0xCA,0xD6,0xBB,0xFA,0xB4,0xF2,0xB2,0xBB,0xCD,0xA8,0xA3,0xAC,0xD3,0xD0,0xCA,0xC2,0xC7,0xEB,0xBA,0xF4,0x39,0x36,0x31,0x30,0x33,0xA1,0xA3,0x0D,0x0A,0x20,0x20,0x20,0xC1,0xAA,0xCF,0xB5,0xB7,0xBD,0xCA,0xBD,0xA3,0xBA,0x30,0x31,0x30,0xA1,0xAA,0x36,0x35,0x33,0x33,0x31,0x38,0x36,0x33,0xA1,0xAA,0x31,0x32,0x33,0x34,0x0D,0x0A,0x20,0x20,0x20,0xCF,0xD6,0xD4,0xDA,0xCA,0xB1,0xBC,0xE4,0xCA,0xC7,0xA3,0xAC,0x31,0x33,0x2D,0x35,0x2D,0x31,0x36,0xA3,0xAC,0x31,0x30,0x3A,0x33,0x36,0x3A,0x32,0x38,0xA1,0xA3,0x0D,0x0A,0x20,0x20,0x20,0xBD,0xF1,0xCC,0xEC,0xC6,0xF0,0xC8,0xBC,0xD3,0xCD,0xBC,0xDB,0xB8,0xF1,0xC9,0xCF,0xD5,0xC7,0x35,0x25,0xA3,0xAC,0x0D,0x0A,0x20,0x20,0x20,0xC3,0xF7,0xCC,0xEC,0xB5,0xC4,0xCE,0xC2,0xB6,0xC8,0xD7,0xEE,0xB8,0xDF,0xB4,0xEF,0xB5,0xBD,0x32,0x38,0xA1,0xE6,0xA3,0xAC,0x0D,0x0A,0x20,0x20,0x20,0xBE,0xA9,0xBB,0xA6,0xB8,0xDF,0xCC,0xFA,0xC8,0xAB,0xB3,0xA4,0x31,0x33,0x31,0x38,0x6B,0x6D,0xA3,0xAC,0x0D,0x0A,0x20,0x20,0x20,0xD4,0xCB,0xBB,0xF5,0xCE,0xEF,0xC3,0xBF,0xBC,0xFE,0xB2,0xBB,0xC4,0xDC,0xB3,0xAC,0xB9,0xFD,0x35,0x30,0x6B,0x67,0xA1,0xA3,0x0D,0x0A,0x0D,0x0A,0x20,0x20,0x20,0xD0,0xBE,0xC6,0xAC,0xBE,0xDF,0xD3,0xD0,0xBD,0xCF,0xC7,0xBF,0xB5,0xC4,0xB6,0xE0,0xD2,0xF4,0xD7,0xD6,0xC5,0xD0,0xB1,0xF0,0xC4,0xDC,0xC1,0xA6,0xA3,0xAC,0x0D,0x0A,0x20,0x20,0x20,0xC0,0xFD,0xC8,0xE7,0xA3,0xBA,0x0D,0x0A,0x20,0x20,0x20,0xB5,0xB1,0xC7,0xB0,0xB9,0xA4,0xD7,0xF7,0xB5,0xC4,0xD6,0xD8,0xD6,0xD0,0xD6,0xAE,0xD6,0xD8,0xCA,0xC7,0xD2,0xAA,0xD4,0xDA,0xD6,0xD8,0xD6,0xD8,0xC0,0xA7,0xC4,0xD1,0xD6,0xD0,0xB1,0xA3,0xD6,0xA4,0xD6,0xD8,0xC7,0xEC,0xCA,0xD0,0xB5,0xC4,0xD6,0xD8,0xB5,0xE3,0xB9,0xA4,0xB3,0xCC,0xB5,0xC4,0xCB,0xB3,0xC0,0xFB,0xBD,0xF8,0xD0,0xD0,0xA3,0xAC,0xBC,0xE1,0xBE,0xF6,0xBE,0xDC,0xBE,0xF8,0xD6,0xD8,0xB8,0xB4,0xBD,0xA8,0xC9,0xE8,0xA1,0xA3,0x0D,0x0A,0x20,0x20,0x20,0xD2,0xF8,0xD0,0xD0,0xD0,0xD0,0xB3,0xA4,0xB4,0xA9,0xB9,0xFD,0xC8,0xCB,0xD0,0xD0,0xB5,0xC0,0xCF,0xF2,0xC6,0xEF,0xD7,0xC5,0xD7,0xD4,0xD0,0xD0,0xB3,0xB5,0xB5,0xC4,0xD2,0xF8,0xD0,0xD0,0xD6,0xB0,0xD4,0xB1,0xD0,0xD0,0xD7,0xDF,0xB9,0xFD,0xC8,0xA5,0xA1,0xA3};
- void setup() {
- // put your setup code here, to run once:
- Serial.begin(9600);//模块默认的波特率为9600bps,该波特率可以根据芯片手册通过芯片的波特率设置两个引脚接不同电阻值来进行设置,默认时如果改成其它波特率可能造成无法通讯的情况
- }
- void loop() {
- // put your main code here, to run repeatedly:
- Serial.write(voice_encodeData,sizeof(voice_encodeData));//送出语音编码数据进行合成
- delay(90000);//延时90秒,差不多是上面的内容朗读完的时间。
- }
复制代码
好了,上面已经给出了简单的语音合成程序,但是难不成每次要合成不同的内容均需要用其它的第三方编码软件把汉字编译成GB2312编码后再重新下载到控制板上面吗?显然,这不现实,也不科学。但是找了很久,实在是没有发现能够将汉字转换成GB2312编码的代码或是库,无耐之下,想到我们平时在进行网页设计时也经常需要对网页进行编码,例如UTF-8编码、GBK编码、BIG5编码等,那么就想着应该有一种可能性,通过网页的形式来输入想要合成音语的汉字,再通过译码后POST给指定页面进行处理,然后由单片机向串口进行输出给语音合成模块进行合成。经过长时间的测试摸索,终于基本上实现了这现的功能。
由于要进行网页处理,故需要用到Arduino的Ethernet W5100 网络扩展板模块,该扩展模块自带SD卡读写器,后面可以再把通过网页输入的汉字译码后存放在SD卡中。既然是通过网页输入,那么无论是有线、无线WIFI就均可以了,如果网络扩展模块通过无线路由器接入网络的,还可以使用手机上的网页浏览器进行设置。暂时SD卡还没有去学习操作,所以将目前已经写好的部分代码放上来共同学习交流。在测试的过程中,发现如果输入的文字过长的话就会出现问题,所以在POST页面的处理时,POST参数的字节长度我暂时处理成了[255]个字节,因为再长的话POST表单的时候就会卡死,证明程序还是存在问题,清楚问题的也请你们将修正后的代码贴上来共同学习。最好再加上SD卡读写这一个功能。
首先先将网络扩展板堆叠在开发板上面,然后再与语音合成模块进行连线,具体的连线如下:
模块的:GND-----Adruino开发板:GND
模块的:3V3-----Adruino开发板:3.3V
模块的:RXD-----Adruino开发板:TX(Arduino UNO的D1引脚)
模块的:TXD-----Adruino开发板:RX(Arduino UNO的D0引脚)
模块的:RDY-----Adruino开发板:2(Arduino UNO的D2引脚)这一根线是检测语音模块是否正处于语音合成中,如果是,后面推送过来的新内容暂时就不处理。
具体的代码如下:
- #include <SPI.h>//引用头文件
- #include <Ethernet.h>//引用头文件
- #include<WebServer.h>
- #include<Streaming.h>
- boolean make_voice(char voice_value[],int count)//合成语音,每个汉字占用两个字节,count为voice_value[]数组的长度
- {
-
- int i=0;
- for(i=0;i<count;i++){if(voice_value[i]==0){count=i;break;}}//当前数组元素为空时跳出循环体,并将非空值的数组元素个数赋值给count
- byte make_sound_head= 0xFD;//头标识
- byte make_sound_length[2];//数据区长度
- /*构造数据区长度开始*/
- if((count+2)>255)
- {
- make_sound_length[0]=(count+2)/255;
- make_sound_length[1]=(count+2)%255;
- }else{
- make_sound_length[0]= 0x00;
- make_sound_length[1]=(count+2);
- }
- /*构造数据区长度结束*/
- byte make_sound_command[2]={0x01,0x00};//启用语音合成命令功能,编码格式为GB3121
-
- Serial.write(make_sound_head);//送出头标识,一个字节
- Serial.write(make_sound_length[0]);//送出数据区长度高位,一个字节
- Serial.write(make_sound_length[1]);//送出数据区长度低位,一个字节
- Serial.write(make_sound_command,sizeof(make_sound_command));//送出命令和编码格式
- Serial.write(voice_value,count);//送出语音编码数据
- }
-
- byte mac[] = { 0xDE,0xAD,0xBE,0xEF,0xFE,0xED };//定义MAC地址
- IPAddress ip(192,168,1,188);//定义手动IP
- WebServer webserver("",80);//创建实例
- P(home_head)=
- "<!doctype html>"
- "<html>"
- "<head>"
- "<meta charset="gb2312"/>"
- "<title>"
- "网页编码并合成语音界面[设计开发:比特虫]"
- "</title>"
- "</head>"
- "<body>";
-
- P(home_body)=
- "请在下面文本框中输入中文,长度最长为122个汉字(含标点符号)!";
- P(home_form)=
- "<form action= /postpage.html method="post" content-Type: application/x-www-form-urlencoded accept-charset="gb2312" onsubmit="document.charset='gb2312';">"
- "声音播放内容:<input name="voice" type="text" width=1000px><br/>"
- "<input type="submit" value="设置">";
-
- P(home_foot)=
- "</body>"
- "</html>";
- void homeCmd(WebServer &server,WebServer::ConnectionType type,char *,bool)
- {
- server.httpSuccess();
- if(type!=WebServer::HEAD){
- server.printP(home_head);
- server.printP(home_body);
- server.printP(home_form);
- server.printP(home_foot);
- }
- }
- void postCmd(WebServer &server,WebServer::ConnectionType type,char *,bool)
- {
- char name[255],value[255];
- server.httpSuccess();
- if(type == WebServer::POST)
- {
- while(server.readPOSTparam(name,255,value,255))
- {
- server << "<!doctype html><html><head><meta charset="gb2312"/><title>POST表单处理页面</title></head><body>";
- server << "<p>参数" << name << "的值是:" << value << "</p>";
- server << "</body></html>";
- if(strcmp(name,"voice")==0){
- //if(test_busy()){make_voice(value,sizeof(value));}
- if(!digitalRead(2)){make_voice(value,sizeof(value));}
-
- }
- }
- }
- }
- void setup()
- {
- Serial.begin(9600); //开启串口
- Ethernet.begin(mac,ip);//开启以太网连接
- webserver.setDefaultCommand(&homeCmd);
- webserver.addCommand("postpage.html",&postCmd);
- webserver.begin();//启动服务器
- pinMode(2,INPUT);//这是语音合成芯片的状态检测端口,接芯片上面的RDY,另一端接开发板的数字端口2
- }
- void loop()
- {
- webserver.processConnection();
-
- }
- /************************************************/
复制代码
写入程序后,将开发板的网络扩展板接入网线,然后打开浏览器,输入网址:192.168.1.188(是一个网址是在程序中的:IPAddress ip(192,168,1,188);定义的),然后我们就可以看到如下的页面,我们在文本框中输入下面的内容:
“[v7][s3]sound217请[z1]#VIP[n1]108号客户到7号窗口办理业务。sound218请[z1]#VIP[n1]108号客户到7号窗口办理业务。”(该段内容含有芯片的文本控制标记的使用,具体可以详见附件中的《XFS5152CE语音合成芯片用户开发指南V1.2.pdf》)
还有需要求助大家的是:I2C的通讯方式搞不定,因为我使用的是Arduino uno,只有一个串口,所以还是想着最好能够使用I2C的通讯方式来控制模块的工作,哪位大神能够懂的,请指教一下,十分感谢。根据开发手册,芯片的I2C地址是:0x80 |