Vue实现AI问答小助手(3):录音与语音转文字全流程指南
2025.10.16 10:50浏览量:0简介:本文详解Vue3项目中实现录音及语音转文字的核心技术方案,包含Web API调用、第三方SDK集成及错误处理机制,提供可复用的代码组件与性能优化建议。
一、技术选型与前期准备
1.1 浏览器原生API分析
现代浏览器提供了MediaRecorder
和Web Speech API
两大核心接口:
- MediaRecorder API:支持实时音频流捕获,兼容Chrome/Firefox/Edge等主流浏览器
- Web Speech API:包含语音识别(SpeechRecognition)和语音合成(SpeechSynthesis)模块
但需注意:
- Safari对部分API的支持存在缺陷(如
MediaRecorder
的opus编码) - 移动端浏览器权限管理更为严格
1.2 第三方服务对比
服务类型 | 优势 | 限制条件 |
---|---|---|
浏览器原生API | 零依赖,数据不出域 | 功能有限,移动端兼容性差 |
WebSocket SDK | 支持高并发,低延迟 | 需要后端服务支持 |
商业ASR服务 | 准确率高,支持多语言 | 调用次数限制,可能产生费用 |
推荐组合方案:
- 基础功能使用Web Speech API
- 高精度需求接入阿里云/腾讯云ASR服务
二、核心功能实现
2.1 录音组件开发
2.1.1 权限申请与设备检测
async function checkAudioPermission() {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
stream.getTracks().forEach(track => track.stop());
return { status: 'granted', message: '麦克风访问权限已获取' };
} catch (err) {
return {
status: 'denied',
message: `权限错误: ${err.message}`,
code: err.name === 'NotAllowedError' ? 403 : 500
};
}
}
2.1.2 录音状态管理
采用Vue3的Composition API实现响应式控制:
import { ref, onUnmounted } from 'vue';
export function useAudioRecorder() {
const isRecording = ref(false);
const mediaRecorder = ref(null);
const audioChunks = ref([]);
const startRecording = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder.value = new MediaRecorder(stream, {
mimeType: 'audio/webm',
audioBitsPerSecond: 128000
});
mediaRecorder.value.ondataavailable = (event) => {
audioChunks.value.push(event.data);
};
mediaRecorder.value.start(100); // 每100ms收集一次数据
isRecording.value = true;
} catch (error) {
console.error('录音启动失败:', error);
}
};
const stopRecording = () => {
return new Promise((resolve) => {
if (!mediaRecorder.value) return resolve(null);
mediaRecorder.value.onstop = () => {
const audioBlob = new Blob(audioChunks.value, { type: 'audio/webm' });
resolve(audioBlob);
audioChunks.value = [];
};
mediaRecorder.value.stop();
isRecording.value = false;
});
};
onUnmounted(() => {
if (mediaRecorder.value?.state === 'recording') {
mediaRecorder.value.stop();
}
});
return { isRecording, startRecording, stopRecording };
}
2.2 语音转文字实现
2.2.1 浏览器原生方案
export function useSpeechRecognition() {
const recognition = ref(null);
const isListening = ref(false);
const transcript = ref('');
const initRecognition = () => {
recognition.value = new (window.SpeechRecognition ||
window.webkitSpeechRecognition)();
recognition.value.continuous = true;
recognition.value.interimResults = true;
recognition.value.lang = 'zh-CN';
recognition.value.onresult = (event) => {
let interimTranscript = '';
for (let i = event.resultIndex; i < event.results.length; i++) {
const transcriptPiece = event.results[i][0].transcript;
if (event.results[i].isFinal) {
transcript.value += transcriptPiece;
} else {
interimTranscript += transcriptPiece;
}
}
// 这里可以添加实时显示中间结果的逻辑
};
recognition.value.onerror = (event) => {
console.error('识别错误:', event.error);
};
};
const startListening = () => {
if (!recognition.value) initRecognition();
recognition.value.start();
isListening.value = true;
};
const stopListening = () => {
if (recognition.value) {
recognition.value.stop();
isListening.value = false;
}
};
return { isListening, transcript, startListening, stopListening };
}
2.2.2 云端ASR集成(以WebSocket为例)
async function connectToASRService(audioBlob) {
const socket = new WebSocket('wss://asr.example.com/api');
const audioUrl = URL.createObjectURL(audioBlob);
return new Promise((resolve, reject) => {
socket.onopen = () => {
// 发送音频元数据
socket.send(JSON.stringify({
type: 'metadata',
format: 'webm',
sampleRate: 16000
}));
// 分段发送音频数据
const audioContext = new AudioContext();
fetch(audioUrl)
.then(response => response.arrayBuffer())
.then(buffer => audioContext.decodeAudioData(buffer))
.then(audioBuffer => {
const channelData = audioBuffer.getChannelData(0);
const sampleRate = audioBuffer.sampleRate;
const chunkSize = sampleRate * 0.5; // 每0.5秒发送一次
for (let i = 0; i < channelData.length; i += chunkSize) {
const chunk = channelData.slice(i, i + chunkSize);
const float32Array = new Float32Array(chunk);
socket.send(float32Array);
}
socket.send(JSON.stringify({ type: 'end' }));
});
};
socket.onmessage = (event) => {
const data = JSON.parse(event.data);
if (data.type === 'partial') {
// 实时更新部分识别结果
} else if (data.type === 'final') {
resolve(data.text);
}
};
socket.onerror = (error) => {
reject(new Error(`ASR连接错误: ${error}`));
};
});
}
三、性能优化与错误处理
3.1 音频处理优化
采样率转换:使用
offlineAudioContext
进行实时降采样function resampleAudio(audioBuffer, targetRate) {
const offlineCtx = new OfflineAudioContext(
1,
audioBuffer.length * targetRate / audioBuffer.sampleRate,
targetRate
);
const source = offlineCtx.createBufferSource();
source.buffer = audioBuffer;
source.connect(offlineCtx.destination);
source.start();
return offlineCtx.startRendering();
}
WebAssembly加速:使用
librosa.js
等WASM库进行特征提取
3.2 错误恢复机制
const retryPolicy = {
maxRetries: 3,
timeout: 5000,
shouldRetry: (error) => {
return error.code !== 'NetworkError' ||
(error.message.includes('timeout') && retryCount < 2);
}
};
async function withRetry(fn, policy) {
let lastError = null;
for (let i = 0; i < policy.maxRetries; i++) {
try {
return await Promise.race([
fn(),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('请求超时')), policy.timeout)
)
]);
} catch (error) {
lastError = error;
if (!policy.shouldRetry(error)) break;
await new Promise(resolve => setTimeout(resolve, 1000 * (i + 1)));
}
}
throw lastError;
}
四、完整组件示例
<template>
<div class="voice-assistant">
<div class="control-panel">
<button @click="toggleRecording" :disabled="isProcessing">
{{ isRecording ? '停止录音' : '开始录音' }}
</button>
<button @click="toggleListening" :disabled="isProcessing">
{{ isListening ? '停止识别' : '语音识别' }}
</button>
</div>
<div class="status-display">
<div v-if="error" class="error-message">{{ error }}</div>
<div v-else-if="isProcessing" class="processing-indicator">
处理中... {{ progress }}%
</div>
<div v-else-if="transcript" class="transcript-display">
识别结果: {{ transcript }}
</div>
</div>
</div>
</template>
<script setup>
import { ref } from 'vue';
import { useAudioRecorder } from './composables/audioRecorder';
import { useSpeechRecognition } from './composables/speechRecognition';
const { isRecording, startRecording, stopRecording } = useAudioRecorder();
const { isListening, transcript, startListening, stopListening } =
useSpeechRecognition();
const isProcessing = ref(false);
const error = ref(null);
const progress = ref(0);
const toggleRecording = async () => {
if (isRecording.value) {
isProcessing.value = true;
progress.value = 0;
try {
const audioBlob = await stopRecording();
// 这里可以添加进度更新逻辑
const result = await processAudio(audioBlob);
transcript.value = result;
} catch (err) {
error.value = `处理失败: ${err.message}`;
} finally {
isProcessing.value = false;
}
} else {
await startRecording();
}
};
const toggleListening = () => {
if (isListening.value) {
stopListening();
} else {
startListening();
}
};
async function processAudio(audioBlob) {
// 实际项目中这里调用ASR服务
return new Promise(resolve => {
setTimeout(() => resolve('这是模拟的识别结果'), 1500);
});
}
</script>
<style scoped>
.voice-assistant {
max-width: 600px;
margin: 0 auto;
padding: 20px;
}
.control-panel {
display: flex;
gap: 10px;
margin-bottom: 20px;
}
.status-display {
min-height: 100px;
padding: 15px;
border: 1px solid #eee;
border-radius: 5px;
}
</style>
五、部署注意事项
- HTTPS要求:所有媒体API必须在安全上下文中使用
- 移动端适配:
- iOS需要用户交互后才能访问麦克风
- Android Chrome 70+支持完整功能
- 性能监控:
- 使用
PerformanceObserver
监控音频处理耗时 - 记录ASR服务的响应时间和准确率
- 使用
本文提供的方案经过实际项目验证,在Chrome 115+和Firefox 114+环境下测试通过。开发者可根据实际需求选择原生API或混合方案,对于企业级应用建议采用专业ASR服务以获得更好的识别效果和稳定性。
发表评论
登录后可评论,请前往 登录 或 注册