In [2]:
%pip install websocket-client

Collecting websocket-client
  Downloading websocket_client-1.6.1-py3-none-any.whl (56 kB)
     -------------------------------------- 56.9/56.9 kB 249.6 kB/s eta 0:00:00
Installing collected packages: websocket-client
Successfully installed websocket-client-1.6.1
Note: you may need to restart the kernel to use updated packages.


In [8]:
# -*- coding:utf-8 -*-
#
#   author: iflytek
#
#  本demo测试时运行的环境为：Windows + Python3.7
#  本demo测试成功运行时所安装的第三方库及其版本如下，您可自行逐一或者复制到一个新的txt文件利用pip一次性安装：
#   cffi==1.12.3
#   gevent==1.4.0
#   greenlet==0.4.15
#   pycparser==2.19
#   six==1.12.0
#   websocket==0.2.1
#   websocket-client==0.56.0
#
#  语音听写流式 WebAPI 接口调用示例 接口文档（必看）：https://doc.xfyun.cn/rest_api/语音听写（流式版）.html
#  webapi 听写服务参考帖子（必看）：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38947&extra=
#  语音听写流式WebAPI 服务，热词使用方式：登陆开放平台https://www.xfyun.cn/后，找到控制台--我的应用---语音听写（流式）---服务管理--个性化热词，
#  设置热词
#  注意：热词只能在识别的时候会增加热词的识别权重，需要注意的是增加相应词条的识别率，但并不是绝对的，具体效果以您测试为准。
#  语音听写流式WebAPI 服务，方言试用方法：登陆开放平台https://www.xfyun.cn/后，找到控制台--我的应用---语音听写（流式）---服务管理--识别语种列表
#  可添加语种或方言，添加后会显示该方言的参数值
#  错误码链接：https://www.xfyun.cn/document/error-code （code返回错误码时必看）
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
import websocket
import datetime
import hashlib
import base64
import hmac
import json
from urllib.parse import urlencode
import time
import ssl
from wsgiref.handlers import format_date_time
from datetime import datetime
from time import mktime
import _thread as thread

STATUS_FIRST_FRAME = 0  # 第一帧的标识
STATUS_CONTINUE_FRAME = 1  # 中间帧标识
STATUS_LAST_FRAME = 2  # 最后一帧的标识


class Ws_Param(object):
    # 初始化
    def __init__(self, APPID, APIKey, APISecret, AudioFile):
        self.APPID = APPID
        self.APIKey = APIKey
        self.APISecret = APISecret
        self.AudioFile = AudioFile

        # 公共参数(common)
        self.CommonArgs = {"app_id": self.APPID}
        # 业务参数(business)，更多个性化参数可在官网查看
        self.BusinessArgs = {"domain": "iat", "language": "zh_cn", "accent": "mandarin", "vinfo":1,"vad_eos":10000}

    # 生成url
    def create_url(self):
        url = 'wss://ws-api.xfyun.cn/v2/iat'
        # 生成RFC1123格式的时间戳
        now = datetime.now()
        date = format_date_time(mktime(now.timetuple()))

        # 拼接字符串
        signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
        signature_origin += "date: " + date + "\n"
        signature_origin += "GET " + "/v2/iat " + "HTTP/1.1"
        # 进行hmac-sha256进行加密
        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
                                 digestmod=hashlib.sha256).digest()
        signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')

        authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
            self.APIKey, "hmac-sha256", "host date request-line", signature_sha)
        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
        # 将请求的鉴权参数组合为字典
        v = {
            "authorization": authorization,
            "date": date,
            "host": "ws-api.xfyun.cn"
        }
        # 拼接鉴权参数，生成url
        url = url + '?' + urlencode(v)
        # print("date: ",date)
        # print("v: ",v)
        # 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释，比对相同参数时生成的url与自己代码生成的url是否一致
        # print('websocket url :', url)
        return url


# 收到websocket消息的处理
def on_message(ws, message):
    try:
        code = json.loads(message)["code"]
        sid = json.loads(message)["sid"]
        if code != 0:
            errMsg = json.loads(message)["message"]
            print("sid:%s call error:%s code is:%s" % (sid, errMsg, code))

        else:
            data = json.loads(message)["data"]["result"]["ws"]
            # print(json.loads(message))
            result = ""
            for i in data:
                for w in i["cw"]:
                    result += w["w"]
            print("sid:%s call success!,data is:%s" % (sid, json.dumps(data, ensure_ascii=False)))
    except Exception as e:
        print("receive msg,but parse exception:", e)



# 收到websocket错误的处理
def on_error(ws, error):
    print("### error:", error)


# 收到websocket关闭的处理
def on_close(ws,a,b):
    print("### closed ###")


# 收到websocket连接建立的处理
def on_open(ws):
    def run(*args):
        frameSize = 8000  # 每一帧的音频大小
        intervel = 0.04  # 发送音频间隔(单位:s)
        status = STATUS_FIRST_FRAME  # 音频的状态信息，标识音频是第一帧，还是中间帧、最后一帧

        with open(wsParam.AudioFile, "rb") as fp:
            while True:
                buf = fp.read(frameSize)
                # 文件结束
                if not buf:
                    status = STATUS_LAST_FRAME
                # 第一帧处理
                # 发送第一帧音频，带business 参数
                # appid 必须带上，只需第一帧发送
                if status == STATUS_FIRST_FRAME:

                    d = {"common": wsParam.CommonArgs,
                         "business": wsParam.BusinessArgs,
                         "data": {"status": 0, "format": "audio/L16;rate=16000",
                                  "audio": str(base64.b64encode(buf), 'utf-8'),
                                  "encoding": "raw"}}
                    d = json.dumps(d)
                    ws.send(d)
                    status = STATUS_CONTINUE_FRAME
                # 中间帧处理
                elif status == STATUS_CONTINUE_FRAME:
                    d = {"data": {"status": 1, "format": "audio/L16;rate=16000",
                                  "audio": str(base64.b64encode(buf), 'utf-8'),
                                  "encoding": "raw"}}
                    ws.send(json.dumps(d))
                # 最后一帧处理
                elif status == STATUS_LAST_FRAME:
                    d = {"data": {"status": 2, "format": "audio/L16;rate=16000",
                                  "audio": str(base64.b64encode(buf), 'utf-8'),
                                  "encoding": "raw"}}
                    ws.send(json.dumps(d))
                    time.sleep(1)
                    break
                # 模拟音频采样间隔
                time.sleep(intervel)
        ws.close()

    thread.start_new_thread(run, ())


if __name__ == "__main__":
    # 测试时候在此处正确填写相关信息即可运行
    time1 = datetime.now()
    wsParam = Ws_Param(APPID='xxx', APISecret='xxx',
                       APIKey='xxx',
                       AudioFile=r'tmp.pcm')
    websocket.enableTrace(False)
    wsUrl = wsParam.create_url()
    ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close)
    ws.on_open = on_open
    ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
    time2 = datetime.now()
    print(time2-time1)


sid:iat000df4fd@dx1890a4b2e31a11f802 call success!,data is:[{"bg": 175, "cw": [{"w": "您好", "sc": 0}]}]
sid:iat000df4fd@dx1890a4b2e31a11f802 call success!,data is:[{"cw": [{"sc": 0, "w": "，"}], "bg": 286}, {"bg": 286, "cw": [{"sc": 0, "w": "现在"}]}, {"bg": 362, "cw": [{"sc": 0, "w": "是"}]}, {"bg": 454, "cw": [{"sc": 0, "w": "中午"}]}]
sid:iat000df4fd@dx1890a4b2e31a11f802 call success!,data is:[{"bg": 557, "cw": [{"sc": 0, "w": "，"}]}, {"bg": 557, "cw": [{"sc": 0, "w": "你"}]}, {"bg": 581, "cw": [{"sc": 0, "w": "想"}]}, {"bg": 617, "cw": [{"sc": 0, "w": "干什么"}]}, {"bg": 661, "cw": [{"sc": 0, "w": "吗"}]}]
sid:iat000df4fd@dx1890a4b2e31a11f802 call success!,data is:[{"bg": 801, "cw": [{"sc": 0, "w": "？"}]}, {"bg": 801, "cw": [{"sc": 0, "w": "今天"}]}, {"bg": 885, "cw": [{"sc": 0, "w": "天气"}]}, {"bg": 929, "cw": [{"sc": 0, "w": "不错"}]}]
sid:iat000df4fd@dx1890a4b2e31a11f802 call success!,data is:[{"bg": 984, "cw": [{"sc": 0, "w": "。"}]}]
### closed ###
0:00:03.174497


In [7]:
# 录制音频，并保存为pcm格式
import pyaudio
import wave
import time
import os
import sys
import threading
import queue
import datetime
import time
import os

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
RECORD_SECONDS = 10
WAVE_OUTPUT_FILENAME = "tmp.pcm"
# WAVE_OUTPUT_FILENAME = "output.wav"

def record():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,input_device_index=1,
                    frames_per_buffer=CHUNK)
    print("开始录音,请说话......")
    frames = []
    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("录音结束,请闭嘴!")
    stream.stop_stream()
    stream.close()
    p.terminate()
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

record()

开始录音,请说话......
录音结束,请闭嘴!


整合上面的代码

In [1]:
# 写个按钮, 点击后录音
# 1. 用ipywidgets写个按钮
# 2. 点击后录音
# 3. 点击后停止录音
# 4. 保存录音为tmp.pcm

import pyaudio
import wave
import time
import os
import sys
import threading
import queue
import datetime
import ipywidgets as widgets
import websocket
import hashlib
import base64
import hmac
import json
from urllib.parse import urlencode
import ssl
from wsgiref.handlers import format_date_time
from datetime import datetime
from time import mktime
import _thread as thread

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
RECORD_SECONDS = 10
WAVE_OUTPUT_FILENAME = "tmp.pcm"
wsParam = None
# WAVE_OUTPUT_FILENAME = "output.wav"

STATUS_FIRST_FRAME = 0  # 第一帧的标识
STATUS_CONTINUE_FRAME = 1  # 中间帧标识
STATUS_LAST_FRAME = 2  # 最后一帧的标识
tmp_sentence = ""

class Ws_Param(object):
    # 初始化
    def __init__(self, APPID, APIKey, APISecret, AudioFile):
        self.APPID = APPID
        self.APIKey = APIKey
        self.APISecret = APISecret
        self.AudioFile = AudioFile

        # 公共参数(common)
        self.CommonArgs = {"app_id": self.APPID}
        # 业务参数(business)，更多个性化参数可在官网查看
        self.BusinessArgs = {"domain": "iat", "language": "zh_cn", "accent": "mandarin", "vinfo":1,"vad_eos":10000}

    # 生成url
    def create_url(self):
        url = 'wss://ws-api.xfyun.cn/v2/iat'
        # 生成RFC1123格式的时间戳
        now = datetime.now()
        date = format_date_time(mktime(now.timetuple()))

        # 拼接字符串
        signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
        signature_origin += "date: " + date + "\n"
        signature_origin += "GET " + "/v2/iat " + "HTTP/1.1"
        # 进行hmac-sha256进行加密
        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
                                 digestmod=hashlib.sha256).digest()
        signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')

        authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
            self.APIKey, "hmac-sha256", "host date request-line", signature_sha)
        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
        # 将请求的鉴权参数组合为字典
        v = {
            "authorization": authorization,
            "date": date,
            "host": "ws-api.xfyun.cn"
        }
        # 拼接鉴权参数，生成url
        url = url + '?' + urlencode(v)
        # print("date: ",date)
        # print("v: ",v)
        # 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释，比对相同参数时生成的url与自己代码生成的url是否一致
        # print('websocket url :', url)
        return url


# 收到websocket消息的处理
def on_message(ws, message):
    try:
        code = json.loads(message)["code"]
        sid = json.loads(message)["sid"]
        if code != 0:
            errMsg = json.loads(message)["message"]
            print("sid:%s call error:%s code is:%s" % (sid, errMsg, code))

        else:
            data = json.loads(message)["data"]["result"]["ws"]
            print(data)
            # print(json.loads(message))
            result = ""
            for i in data:
                for w in i["cw"]:
                    result += w["w"]
            print("sid:%s call success!,data is:%s" % (sid, json.dumps(data, ensure_ascii=False)))
            global tmp_sentence
            for i in data:
                for w in i["cw"]:
                    tmp_sentence += w["w"]
            print(tmp_sentence)
    except Exception as e:
        print("receive msg,but parse exception:", e)



# 收到websocket错误的处理
def on_error(ws, error):
    print("### error:", error)


# 收到websocket关闭的处理
def on_close(ws,a,b):
    print("### closed ###")


# 收到websocket连接建立的处理
def on_open(ws):
    def run(*args):
        global wsParam
        frameSize = 8000  # 每一帧的音频大小
        intervel = 0.04  # 发送音频间隔(单位:s)
        status = STATUS_FIRST_FRAME  # 音频的状态信息，标识音频是第一帧，还是中间帧、最后一帧

        with open(wsParam.AudioFile, "rb") as fp:
            while True:
                buf = fp.read(frameSize)
                # 文件结束
                if not buf:
                    status = STATUS_LAST_FRAME
                # 第一帧处理
                # 发送第一帧音频，带business 参数
                # appid 必须带上，只需第一帧发送
                if status == STATUS_FIRST_FRAME:

                    d = {"common": wsParam.CommonArgs,
                         "business": wsParam.BusinessArgs,
                         "data": {"status": 0, "format": "audio/L16;rate=16000",
                                  "audio": str(base64.b64encode(buf), 'utf-8'),
                                  "encoding": "raw"}}
                    d = json.dumps(d)
                    ws.send(d)
                    status = STATUS_CONTINUE_FRAME
                # 中间帧处理
                elif status == STATUS_CONTINUE_FRAME:
                    d = {"data": {"status": 1, "format": "audio/L16;rate=16000",
                                  "audio": str(base64.b64encode(buf), 'utf-8'),
                                  "encoding": "raw"}}
                    ws.send(json.dumps(d))
                # 最后一帧处理
                elif status == STATUS_LAST_FRAME:
                    d = {"data": {"status": 2, "format": "audio/L16;rate=16000",
                                  "audio": str(base64.b64encode(buf), 'utf-8'),
                                  "encoding": "raw"}}
                    ws.send(json.dumps(d))
                    time.sleep(1)
                    break
                # 模拟音频采样间隔
                time.sleep(intervel)
        ws.close()

    thread.start_new_thread(run, ())

def record():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,input_device_index=1,
                    frames_per_buffer=CHUNK)
    print("开始录音,请说话......")
    # 判断麦克风是否录入声音，如果没有声音则退出程序

    frames = []
    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("录音结束,请闭嘴!")
    stream.stop_stream()
    stream.close()
    p.terminate()
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

def recognize():
    global wsParam
    # 测试时候在此处正确填写相关信息即可运行
    # 输入更改为来自麦克风的音频流
    # record()
    wsParam = Ws_Param(APPID='xxx', APISecret='xxx',
                       APIKey='xxx',
                       AudioFile=r'tmp.pcm')
    websocket.enableTrace(False)
    wsUrl = wsParam.create_url()
    ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close)
    ws.on_open = on_open
    ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})

# 1. 用ipywidgets写个按钮
button1 = widgets.Button(description="录音")
display(button1)

# 2. 点击后录音
def on_button_clicked1(b):
    record()

# 3. 点击后停止录音
# 4. 保存录音为tmp.pcm

# 5. 用ipywidgets写个按钮 识别
button2 = widgets.Button(description="识别")
display(button2)

# 6. 点击后识别
def on_button_clicked2(b):
    recognize()

button1.on_click(on_button_clicked1)
button2.on_click(on_button_clicked2)




Button(description='录音', style=ButtonStyle())

Button(description='识别', style=ButtonStyle())

开始录音,请说话......
录音结束,请闭嘴!
[{'bg': 265, 'cw': [{'sc': 0, 'w': '现在'}]}, {'bg': 297, 'cw': [{'w': '是', 'sc': 0}]}, {'bg': 305, 'cw': [{'sc': 0, 'w': '北京时间'}]}, {'bg': 369, 'cw': [{'w': '下午', 'sc': 0}]}, {'bg': 405, 'cw': [{'sc': 0, 'w': '2:23'}]}]
sid:iat000ddebf@dx189115c27dfa11f802 call success!,data is:[{"bg": 265, "cw": [{"sc": 0, "w": "现在"}]}, {"bg": 297, "cw": [{"w": "是", "sc": 0}]}, {"bg": 305, "cw": [{"sc": 0, "w": "北京时间"}]}, {"bg": 369, "cw": [{"w": "下午", "sc": 0}]}, {"bg": 405, "cw": [{"sc": 0, "w": "2:23"}]}]
现在是北京时间下午2:23
[{'cw': [{'w': '，', 'sc': 0}], 'bg': 600}, {'bg': 600, 'cw': [{'sc': 0, 'w': '然后'}]}, {'bg': 624, 'cw': [{'sc': 0, 'w': '现在'}]}, {'bg': 652, 'cw': [{'sc': 0, 'w': '是'}]}, {'bg': 672, 'cw': [{'sc': 0, 'w': '08.9'}]}]
sid:iat000ddebf@dx189115c27dfa11f802 call success!,data is:[{"cw": [{"w": "，", "sc": 0}], "bg": 600}, {"bg": 600, "cw": [{"sc": 0, "w": "然后"}]}, {"bg": 624, "cw": [{"sc": 0, "w": "现在"}]}, {"bg": 652, "cw": [{"sc": 0, "w": "是"}]}, {"bg": 672, "cw": 

In [None]:
import pyaudio
import numpy as np
import ipywidgets as widgets

# 设置参数
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
THRESHOLD = 400  # 阈值，用于判断是否有人声传入

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
               channels=CHANNELS,
               rate=RATE,
               input=True,
               frames_per_buffer=CHUNK)

print("开始监听麦克风...")

text_box = widgets.Textarea()
display(text_box)

while True:
    # time.sleep(0.1)
    data = stream.read(CHUNK)
    audio_data = np.frombuffer(data, dtype=np.int16)
    energy = np.sum(np.abs(audio_data)) / CHUNK
    if energy > THRESHOLD:
        # print("有人声传入")
        text_box.value = "有人声传入"
    else:
        # print("没有人声传入")
        text_box.value = "没有人声传入"

stream.stop_stream()
stream.close()
p.terminate()


合并监听和代码

In [14]:
# %pip install openai
%pip install pyttsx3

Collecting pyttsx3
  Downloading pyttsx3-2.90-py3-none-any.whl (39 kB)
Collecting comtypes (from pyttsx3)
  Downloading comtypes-1.2.0-py2.py3-none-any.whl (184 kB)
     ------------------------------------ 184.3/184.3 kB 484.9 kB/s eta 0:00:00
Collecting pypiwin32 (from pyttsx3)
  Downloading pypiwin32-223-py3-none-any.whl (1.7 kB)
Installing collected packages: comtypes, pypiwin32, pyttsx3
Successfully installed comtypes-1.2.0 pypiwin32-223 pyttsx3-2.90
Note: you may need to restart the kernel to use updated packages.


In [6]:
# 写个按钮, 点击后录音
# 1. 用ipywidgets写个按钮
# 2. 点击后录音
# 3. 点击后停止录音
# 4. 保存录音为tmp.pcm

import pyaudio
import wave
import time
import os
import sys
import threading
import queue
import datetime
import ipywidgets as widgets
import websocket
import hashlib
import base64
import hmac
import json
from urllib.parse import urlencode
import ssl
from wsgiref.handlers import format_date_time
from datetime import datetime
from time import mktime
import _thread as thread
import numpy as np
import openai
import pyttsx3
import ctypes
import inspect

# 设置参数
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
RECORD_SECONDS = 10
WAVE_OUTPUT_FILENAME = "tmp.pcm"
wsParam = None
# WAVE_OUTPUT_FILENAME = "output.wav"

STATUS_FIRST_FRAME = 0  # 第一帧的标识
STATUS_CONTINUE_FRAME = 1  # 中间帧标识
STATUS_LAST_FRAME = 2  # 最后一帧的标识
tmp_sentence = ""

THRESHOLD = 500  # 阈值，用于判断是否有人声传入
frames = []
messages = []
silence_flag = False
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,input_device_index=1,
                frames_per_buffer=CHUNK)
speak_thread = None

class Ws_Param(object):
    # 初始化
    def __init__(self, APPID, APIKey, APISecret, AudioFile):
        self.APPID = APPID
        self.APIKey = APIKey
        self.APISecret = APISecret
        self.AudioFile = AudioFile

        # 公共参数(common)
        self.CommonArgs = {"app_id": self.APPID}
        # 业务参数(business)，更多个性化参数可在官网查看
        self.BusinessArgs = {"domain": "iat", "language": "zh_cn", "accent": "mandarin", "vinfo":1,"vad_eos":10000}

    # 生成url
    def create_url(self):
        url = 'wss://ws-api.xfyun.cn/v2/iat'
        # 生成RFC1123格式的时间戳
        now = datetime.now()
        date = format_date_time(mktime(now.timetuple()))

        # 拼接字符串
        signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
        signature_origin += "date: " + date + "\n"
        signature_origin += "GET " + "/v2/iat " + "HTTP/1.1"
        # 进行hmac-sha256进行加密
        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
                                 digestmod=hashlib.sha256).digest()
        signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')

        authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
            self.APIKey, "hmac-sha256", "host date request-line", signature_sha)
        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
        # 将请求的鉴权参数组合为字典
        v = {
            "authorization": authorization,
            "date": date,
            "host": "ws-api.xfyun.cn"
        }
        # 拼接鉴权参数，生成url
        url = url + '?' + urlencode(v)
        # print("date: ",date)
        # print("v: ",v)
        # 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释，比对相同参数时生成的url与自己代码生成的url是否一致
        # print('websocket url :', url)
        return url


# 收到websocket消息的处理
def on_message(ws, message):
    try:
        code = json.loads(message)["code"]
        sid = json.loads(message)["sid"]
        if code != 0:
            errMsg = json.loads(message)["message"]
            # print("sid:%s call error:%s code is:%s" % (sid, errMsg, code))

        else:
            data = json.loads(message)["data"]["result"]["ws"]
            # print(data)
            # print(json.loads(message))
            result = ""
            for i in data:
                for w in i["cw"]:
                    result += w["w"]
            # print("sid:%s call success!,data is:%s" % (sid, json.dumps(data, ensure_ascii=False)))
            global tmp_sentence
            for i in data:
                for w in i["cw"]:
                    tmp_sentence += w["w"]
            # print(tmp_sentence)
    except Exception as e:
        print("receive msg,but parse exception:", e)



# 收到websocket错误的处理
def on_error(ws, error):
    print("### error:", error)


# 收到websocket关闭的处理
def on_close(ws,a,b):
    # print("### closed ###")
    pass



# 收到websocket连接建立的处理
def on_open(ws):
    def run(*args):
        global wsParam
        frameSize = 8000  # 每一帧的音频大小
        intervel = 0.04  # 发送音频间隔(单位:s)
        status = STATUS_FIRST_FRAME  # 音频的状态信息，标识音频是第一帧，还是中间帧、最后一帧

        with open(wsParam.AudioFile, "rb") as fp:
            while True:
                buf = fp.read(frameSize)
                # 文件结束
                if not buf:
                    status = STATUS_LAST_FRAME
                # 第一帧处理
                # 发送第一帧音频，带business 参数
                # appid 必须带上，只需第一帧发送
                if status == STATUS_FIRST_FRAME:

                    d = {"common": wsParam.CommonArgs,
                         "business": wsParam.BusinessArgs,
                         "data": {"status": 0, "format": "audio/L16;rate=16000",
                                  "audio": str(base64.b64encode(buf), 'utf-8'),
                                  "encoding": "raw"}}
                    d = json.dumps(d)
                    ws.send(d)
                    status = STATUS_CONTINUE_FRAME
                # 中间帧处理
                elif status == STATUS_CONTINUE_FRAME:
                    d = {"data": {"status": 1, "format": "audio/L16;rate=16000",
                                  "audio": str(base64.b64encode(buf), 'utf-8'),
                                  "encoding": "raw"}}
                    ws.send(json.dumps(d))
                # 最后一帧处理
                elif status == STATUS_LAST_FRAME:
                    d = {"data": {"status": 2, "format": "audio/L16;rate=16000",
                                  "audio": str(base64.b64encode(buf), 'utf-8'),
                                  "encoding": "raw"}}
                    ws.send(json.dumps(d))
                    time.sleep(1)
                    break
                # 模拟音频采样间隔
                time.sleep(intervel)
        ws.close()

    thread.start_new_thread(run, ())

def record():
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,input_device_index=1,
                    frames_per_buffer=CHUNK)
    print("开始录音,请说话......")

    frames = []
    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("录音结束,请闭嘴!")
    stream.stop_stream()
    stream.close()
    p.terminate()
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

def recognize():
    global wsParam
    # 测试时候在此处正确填写相关信息即可运行
    wsParam = Ws_Param(APPID='xxx', APISecret='xxx',
                       APIKey='xxx',
                       AudioFile=r'tmp.pcm')
    websocket.enableTrace(False)
    wsUrl = wsParam.create_url()
    ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close)
    ws.on_open = on_open
    ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})

def gpt(messages):
    openai.api_key = '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'
    global counter
    # global messages
    # messages.append({"role": "user", "content": message})
    # print(message)

    chat = openai.ChatCompletion.create(
        model="gpt-3.5-turbo", messages=messages,
        temperature = 1
    )
    reply = chat.choices[0].message.content

    # messages.append({"role": "assistant", "content": reply})
    # time.sleep(5)
    return reply

print("开始监听麦克风...")

text_box = widgets.Textarea()
display(text_box)

# 当有人声传入时，开始录音
# 当没有人声传入,并持续2秒钟时，停止录音
# 当停止录音时，开始识别

counter_box = widgets.Textarea()
display(counter_box)

reset_button = widgets.Button(description="重置")
display(reset_button)

def on_button_clicked_reset(b):
    global messages, frames, tmp_sentence, counter, silence_flag, stream, p, wf, text_box, counter_box
    messages = []
    text_box.value = ""
    counter_box.value = ""

reset_button.on_click(on_button_clicked_reset)

engine = pyttsx3.init()

while True:
    # time.sleep(0.1)
    data = stream.read(CHUNK)
    audio_data = np.frombuffer(data, dtype=np.int16)
    energy = np.sum(np.abs(audio_data)) / CHUNK
    frames.append(data)
    counter_box.value = str(len(frames))
    # 一定时间清空缓存
    if len(frames) > 1000:
        frames = []
    
    if energy > THRESHOLD:
        # start recording
        frames = frames[-5:]
        time_silence = 0
        while True:
            data = stream.read(CHUNK)
            # 排除电脑自身播放的声音
            audio_data = np.frombuffer(data, dtype=np.int16)
            energy = np.sum(np.abs(audio_data)) / CHUNK
            
            frames.append(data)
            text_box.value = "正在录音..."
            if energy < THRESHOLD:
                if not silence_flag:
                    silence_flag = True
                    time_silence = time.time()
                else:
                    if time.time() - time_silence > 1:
                        text_box.value = "停止录音，正在保存"
                        # stream.stop_stream()
                        # stream.close()
                        # p.terminate()
                        wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
                        wf.setnchannels(CHANNELS)
                        wf.setsampwidth(p.get_sample_size(FORMAT))
                        wf.setframerate(RATE)
                        wf.writeframes(b''.join(frames))
                        wf.close()

                        silence_flag = False
                        frames = []

                        break
                    else:
                        pass
            else:
                silence_flag = False
                time_silence = 0
        
        # recognize
        recognize()
        if tmp_sentence != "":
            print(f"你：{tmp_sentence}")
            messages.append({"role": "user", "content": tmp_sentence})
            reply = gpt(messages)
            messages.append({"role": "assistant", "content": reply})
            print(f"助手：{reply}")
            # 打开一个新的进程，用于播放语音，如果有语音播放，则中断之前的播放
            
            if engine._inLoop:
                engine.endLoop()
            def run():
                engine.say(reply)
                engine.runAndWait()
            thread.start_new_thread(run, ())

            tmp_sentence = ""
        else:
            print("没有识别到声音，请重试")
        
    else:
        text_box.value = "没有人声传入"



开始监听麦克风...


Textarea(value='')

Textarea(value='')

Button(description='重置', style=ButtonStyle())

KeyboardInterrupt: 