学院与科大讯飞合作“厦门大学-科大讯飞闽南语语音与语言联合实验室”,第一阶段的语音识别演示系统,可能只是简单的闽南语孤立词识别。现成的演示系统有去年写的android演示程序。打算再写个PC端的演示系统,基本的引擎已经搭建好,后续界面和数据库方面再调整优化。再来,最近学习Golang,怎么可以不用上呢?web版演示系统,golang(Beego框架)(后端) + HTML5(前端) + MongoDB(数据库)。
本节,主要讲解web前端的录音工作,以及通过HTML5 websocket传输音频流数据到后端并保存。
来看下代码:
record.html:
<!DOCTYPE HTML>
<html lang="en">
<head>
<meta charset = "utf-8"/>
<title>Chat by Web Sockets</title>
<script type="text/javascript" src="/static/lib/recorder.js"> </script>
<script type="text/javascript" src="/static/lib/jquery-1.10.1.min.js"> </script>
<style type=‘text/css‘>
</style>
</head>
<body>
<audio controls autoplay></audio>
<form>
<input type="button" id="record" value="录音">
<input type="button" id="export" value="发送">
</form>
<div id="message"></div>
</body>
<script type=‘text/javascript‘>
var onFail = function(e) {
console.log(‘Rejected!‘, e);
};
var onSuccess = function(s) {
var context = new webkitAudioContext();
var mediaStreamSource = context.createMediaStreamSource(s);
rec = new Recorder(mediaStreamSource);
}
//window.URL = URL || window.URL || window.webkitURL;
navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia;
var rec;
var audio = document.querySelector(‘#audio‘);
function startRecording() {
if (navigator.getUserMedia) {
navigator.getUserMedia({audio: true}, onSuccess, onFail);
} else {
console.log(‘navigator.getUserMedia not present‘);
}
}
startRecording();
//--------------------
$(‘#record‘).click(function() {
rec.record();
var dd = ws.send("start");
$("#message").text("Click export to stop recording");
// export a wav every second, so we can send it using websockets
intervalKey = setInterval(function() {
rec.exportWAV(function(blob) {
rec.clear();
ws.send(blob);
//audio.src = URL.createObjectURL(blob);
});
}, 3000);
});
$(‘#export‘).click(function() {
// first send the stop command
rec.stop();
ws.send("stop");
clearInterval(intervalKey);
// ws.send("analyze");
$("#message").text("已发送到服务器!");
});
var ws = new WebSocket(‘ws://‘ + window.location.host + ‘/record/join‘);
ws.onopen = function () {
console.log("Openened connection to websocket");
};
ws.onclose = function (){
console.log("Close connection to websocket");
}
ws.onerror = function (){
console.log("Cannot connection to websocket");
}
ws.onmessage = function(e) {
audio.src = URL.createObjectURL(e.data);
}
</script>
</html>这段代码关键在于navigator.getUserMedia来获得客户端的媒体资源。进入该页面,将向chrome浏览器客户端请求媒体资源。请求成功后:
//创建webkitAudio资源
var context = new webkitAudioContext();
//创建媒体流
var mediaStreamSource = context.createMediaStreamSource(s);
//录音实例
rec = new Recorder(mediaStreamSource);开始录音,执行rec.record(),看下recorder.js:
(function(window) {
var WORKER_PATH = ‘/static/lib/recorderWorker.js‘;
var Recorder = function(source, cfg) {
var config = cfg || {};
var bufferLen = config.bufferLen || 4096*2;
this.context = source.context;
this.node = this.context.createJavaScriptNode(bufferLen, 2, 2);
var worker = new Worker(config.workerPath || WORKER_PATH);
worker.postMessage({
command: ‘init‘,
config: {
sampleRate: 16000/*this.context.sampleRate*/
}
});
var recording = false,
currCallback;
this.node.onaudioprocess = function(e) {
if (!recording) return;
worker.postMessage({
command: ‘record‘,
buffer: [
e.inputBuffer.getChannelData(0)
,
e.inputBuffer.getChannelData(1)
]
});
}
this.configure = function(cfg) {
for (var prop in cfg) {
if (cfg.hasOwnProperty(prop)) {
config[prop] = cfg[prop];
}
}
}
this.record = function() {
recording = true;
}
this.stop = function() {
recording = false;
}
this.clear = function() {
worker.postMessage({
command: ‘clear‘
});
}
this.getBuffer = function(cb) {
currCallback = cb || config.callback;
worker.postMessage({
command: ‘getBuffer‘
})
}
this.exportWAV = function(cb, type) {
currCallback = cb || config.callback;
type = type || config.type || ‘audio/wav‘;
if (!currCallback) throw new Error(‘Callback not set‘);
worker.postMessage({
command: ‘exportWAV‘,
type: type
});
}
worker.onmessage = function(e) {
var blob = e.data;
currCallback(blob);
}
source.connect(this.node);
this.node.connect(this.context.destination); //this should not be necessary
};
Recorder.forceDownload = function(blob, filename) {
var url = (window.URL || window.webkitURL).createObjectURL(blob);
alert(url);
var link = window.document.createElement(‘a‘);
link.href = url;
link.download = filename || ‘output.wav‘;
var click = document.createEvent("Event");
click.initEvent("click", true, true);
link.dispatchEvent(click);
}
window.Recorder = Recorder;
})(window); 开始录音后,执行this.node.onaudioprocess,从录音缓冲去录音samples数据,注意:
worker.postMessage({
command: ‘record‘,
buffer: [
e.inputBuffer.getChannelData(0)
,
e.inputBuffer.getChannelData(1)
]
});buffer将从录音设备获取两个声道的数据。
recorderWorker.js:
var recLength = 0,
recBuffersL = [],
recBuffersR = [],
sampleRate;
this.onmessage = function(e) {
switch (e.data.command) {
case ‘init‘:
init(e.data.config);
break;
case ‘record‘:
record(e.data.buffer);
break;
case ‘exportWAV‘:
exportWAV(e.data.type);
break;
case ‘getBuffer‘:
getBuffer();
break;
case ‘clear‘:
clear();
break;
}
};
function init(config) {
sampleRate = 16000//config.sampleRate /*8000*/ ;
}
//从录音设备获得两个声道的数据
function record(inputBuffer) {
recBuffersL.push(inputBuffer[0]);
recBuffersR.push(inputBuffer[1]);
recLength += inputBuffer[0].length;
}
//发送处理好的dataview数据
function exportWAV(type) {
var bufferL = mergeBuffers(recBuffersL, recLength);
var bufferR = mergeBuffers(recBuffersR, recLength);
var interleaved = interleave(bufferL , bufferR);
var dataview = encodeWAV(interleaved);
var audioBlob = new Blob([dataview], {
type: type
});
this.postMessage(audioBlob);
}
//从录音缓冲读取数据存入发送缓冲
function getBuffer() {
var buffers = [];
buffers.push(mergeBuffers(recBuffersL, recLength));
buffers.push( mergeBuffers(recBuffersR, recLength) );
this.postMessage(buffers);
}
//清除录音缓冲数据
function clear(inputBuffer) {
recLength = 0;
recBuffersL = [];
recBuffersR = [];
}
//合并数据
function mergeBuffers(recBuffers, recLength) {
var result = new Float32Array(recLength);
var offset = 0;
for (var i = 0; i < recBuffers.length; i++) {
result.set(recBuffers[i], offset);
offset += recBuffers[i].length;
}
return result;
}
//合并交错左右声道数据
function interleave(inputL, inputR){
// function interleave(inputL) {
var length = inputL.length + inputR.length ;
var result = new Float32Array(length);
var index = 0,
inputIndex = 0;
while (index < length) {
result[index++] = inputL[inputIndex];
result[index++] = inputR[inputIndex];
inputIndex++;
}
return result;
}
//数据转码16bit
function floatTo16BitPCM(output, offset, input) {
for (var i = 0; i < input.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
function writeString(view, offset, string) {
for (var i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
//写入44位 wav数据头
function encodeWAV(samples) {
var buffer = new ArrayBuffer(44 + samples.length * 2);
var view = new DataView(buffer);
/* RIFF identifier */
writeString(view, 0, ‘RIFF‘);
/* file length */
view.setUint32(4, 32 + samples.length * 2, true);
/* RIFF type */
writeString(view, 8, ‘WAVE‘);
/* format chunk identifier */
writeString(view, 12, ‘fmt ‘);
/* format chunk length */
view.setUint32(16, 16, true);
/* sample format (raw) */
view.setUint16(20, 1, true);
/* channel count */
view.setUint16(22, 2, true);
/* sample rate */
view.setUint32(24, sampleRate, true);
/* byte rate (sample rate * block align) */
view.setUint32(28, sampleRate * 4, true);
/* block align (channel count * bytes per sample) */
view.setUint16(32, 4, true);
/* bits per sample */
view.setUint16(34, 16, true);
/* data chunk identifier */
writeString(view, 36, ‘data‘);
/* data chunk length */
view.setUint32(40, samples.length * 2, true);
floatTo16BitPCM(view, 44, samples);
return view;
}目前,只能录制22050Hz 16Bit Stereo 数据。我调整了录制参数,所需目标格式为8000Hz 16Bit Mono语音数据,但是失败了,录制出的数据仍然是22050Hz 16Bit Stereo。由于对前端javascript代码完全不了解,后续再来研究怎么解决这个录音格式的问题。
再回头看record.html中:
//进入页面服务器发送websocket握手请求
var ws = new WebSocket(‘ws://‘ + window.location.host + ‘/record/join‘);
//握手成功
ws.onopen = function () {
console.log("Openened connection to websocket");
};
//断开连接
ws.onclose = function (){
console.log("Close connection to websocket");
}
//握手失败
ws.onerror = function (){
console.log("Cannot connection to websocket");
}每次刷新登入该页面,客户端就会向服务器发送websocket握手请求,握手成功后,js代码中录好音之后 将ws.send(数据)对应到button上,点击按钮就可发送数据了。
golang beego框架后端怎么来处理数据呢?在页面对应的controllers上的代码上定义controller的join方法,代码较为简陋,初步实现功能,后续加上channel等来完善:
package controllers
import (
"bufio"
"github.com/astaxie/beego"
"github.com/garyburd/go-websocket/websocket"
"net/http"
"os"
"path"
"strings"
)
type RecordController struct {
beego.Controller
}
func (this *RecordController) Join() {
//获取请求端的IP地址
remoteAddr := strings.Split(this.Ctx.Request.RemoteAddr, ":")[0]
mlogger.i("Reciving Record Data From Host: " + remoteAddr)
//获取websocket的连接实例
ws, err := websocket.Upgrade(this.Ctx.ResponseWriter, this.Ctx.Request.Header, nil, 1024, 1024)
if _, ok := err.(websocket.HandshakeError); ok {
http.Error(this.Ctx.ResponseWriter, "Not a websocket handshake", 400)
return
} else if err != nil {
beego.Error("Cannot setup WebSocket connection:", err)
return
}
//以IP地址作为保存wav文件的文件名
wavName := "record/" + remoteAddr + ".wav"
os.MkdirAll(path.Dir(wavName), os.ModePerm)
_, e := os.Stat(wavName)
if e == nil {
//删除已有wav文件
os.Remove(wavName)
}
f, err := os.Create(wavName)
mlogger.i("Host: " + remoteAddr + " creating file handler ...")
defer f.Close()
if err != nil {
mlogger.e(err)
return
}
w := bufio.NewWriter(f)
for {
//从websocket上读取数据流
_, p, err := ws.ReadMessage()
if err != nil {
mlogger.i("Host: " + remoteAddr + " disconnected ...")
break
}
length := len(p)
if length == 4 || length == 5 {
//length == 4,说明在web上发送ws.send(‘stop‘)
//length == 5,说明在web上发送ws.send(‘start‘)
action := string(p)
mlogger.i("Client‘s action: " + action + " recording !")
if action == "stop" {
goto SAVE
} else {
goto RESTART
}
}
w.Write(p)
continue
SAVE:
mlogger.i("Host: " + remoteAddr + " saving wav file wav ...")
w.Flush()
mlogger.i("Host: " + remoteAddr + " flushing writer ...")
f.Close()
mlogger.i("Host: " + remoteAddr + " closing the file handler ...")
continue
RESTART:
os.Remove(wavName)
f, err = os.Create(wavName)
mlogger.i("Host: " + remoteAddr + " creating file handler ...")
// defer f.Close()
if err != nil {
mlogger.e(err)
return
}
w = bufio.NewWriter(f)
}
return
}在路由设置上:
beego.Router("/record", &controllers.RecordController{})
beego.Router("/record/join", &controllers.RecordController{}, "get:Join") 这样,从前端录音,到websocket传输数据,再到beego后端读写数据到服务器本地就可实现了
本文出自 “Programming in XMU” 博客,请务必保留此出处http://liuxp0827.blog.51cto.com/5013343/1364435
Golang 基于chrome浏览器语音识别web演示系统WebHTK开发之(一)websocket编程,HTML5录音,布布扣,bubuko.com
Golang 基于chrome浏览器语音识别web演示系统WebHTK开发之(一)websocket编程,HTML5录音
原文:http://liuxp0827.blog.51cto.com/5013343/1364435