# 11-01 语音合成（TTS）

使用 ElevenLabs 等 API 实现文本转语音。

## 1. ElevenLabs API

In [None]:
// 使用 ElevenLabs API 生成语音
async function textToSpeech(text, voiceId = '21m00Tcm4TlvDq8ikWAM') {
  const response = await fetch(
    `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`,
    {
      method: 'POST',
      headers: {
        'Accept': 'audio/mpeg',
        'Content-Type': 'application/json',
        'xi-api-key': process.env.ELEVENLABS_API_KEY
      },
      body: JSON.stringify({
        text,
        model_id: 'eleven_monolingual_v1',
        voice_settings: {
          stability: 0.5,
          similarity_boost: 0.5
        }
      })
    }
  );
  
  if (!response.ok) {
    throw new Error(`TTS failed: ${response.statusText}`);
  }
  
  const audioBuffer = await response.arrayBuffer();
  return Buffer.from(audioBuffer);
}

// 使用
const fs = require('fs');
const audio = await textToSpeech('你好，这是语音合成的测试。');
fs.writeFileSync('output.mp3', audio);

## 2. 流式语音生成

In [None]:
// 流式获取音频
async function streamTTS(text, voiceId) {
  const response = await fetch(
    `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream`,
    {
      method: 'POST',
      headers: {
        'Accept': 'audio/mpeg',
        'Content-Type': 'application/json',
        'xi-api-key': process.env.ELEVENLABS_API_KEY
      },
      body: JSON.stringify({ text, model_id: 'eleven_monolingual_v1' })
    }
  );
  
  const reader = response.body.getReader();
  const chunks = [];
  
  while (true) {
    const { done, value } = await reader.read();
    if (done) break;
    chunks.push(value);
  }
  
  return Buffer.concat(chunks);
}

## 3. 多语音对话

In [None]:
// 对话配置
const voices = {
  alice: '21m00Tcm4TlvDq8ikWAM',  // Rachel
  bob: 'AZnzlk1XvdvUeBnXmlld'      // Adam
};

const conversation = [
  { speaker: 'alice', text: '你好 Bob，今天过得怎么样？' },
  { speaker: 'bob', text: '还不错 Alice，我刚完成了一个新项目。' },
  { speaker: 'alice', text: '太棒了！能给我讲讲吗？' }
];

// 生成对话音频
async function generateConversation(conversation) {
  for (let i = 0; i < conversation.length; i++) {
    const { speaker, text } = conversation[i];
    const voiceId = voices[speaker];
    
    const audio = await textToSpeech(text, voiceId);
    fs.writeFileSync(`dialogue_${i}_${speaker}.mp3`, audio);
    console.log(`Generated: ${speaker} - ${text.substring(0, 30)}...`);
  }
}

await generateConversation(conversation);

## 4. WebSocket 实时语音（OpenClaw 风格）

In [None]:
// WebSocket 实时语音服务
import { WebSocketServer } from 'ws';

const wss = new WebSocketServer({ port: 8080 });

wss.on('connection', (ws) => {
  console.log('Client connected');
  
  ws.on('message', async (data) => {
    const { text, voiceId } = JSON.parse(data);
    
    try {
      // 生成语音
      const audio = await textToSpeech(text, voiceId);
      
      // 发送音频数据
      ws.send(audio);
    } catch (error) {
      ws.send(JSON.stringify({ error: error.message }));
    }
  });
  
  ws.on('close', () => {
    console.log('Client disconnected');
  });
});

console.log('TTS WebSocket server running on ws://localhost:8080');

## 练习

1. 实现一个带语音的聊天机器人
2. 生成一篇博客文章的语音版
3. 查看 OpenClaw 的语音处理实现