In [None]:
import asyncio
import wave
import grpc
from proto import vnpt_asr_pb2 as rasr
from proto import vnpt_asr_pb2_grpc as rasr_srv
from proto import vnpt_audio_pb2 as ra

server = ''
authorization = ''
token_id = ''
token_key = ''

metadata = (('authorization',authorization),
            ('token-id',token_id),
            ('token-key',token_key))


def run_sync_one(audio_file):
    with wave.open(audio_file, 'rb') as wf:
        audio_data = wf.readframes(wf.getnframes())
        frame_rate = wf.getframerate()

    with grpc.secure_channel(server, grpc.ssl_channel_credentials()) as channel:
        # or grpc.insecure_channel depending on the server
        client = rasr_srv.VnptSpeechRecognitionStub(channel)
        config = rasr.RecognitionConfig(
            language_code='vi-VN',
            encoding=ra.AudioEncoding.LINEAR_PCM,
            sample_rate_hertz=frame_rate,
            max_alternatives=1,
            enable_automatic_punctuation=False,
        )
        request = rasr.RecognizeRequest(config=config, audio=audio_data)
        response = client.Recognize(request, metadata=metadata)

    best = response.results[0].alternatives[0]
    print(best.transcript)


async def add_prints_for_demo_purpose(uucall, file):
    print('calling file', file)
    response = await uucall
    print('done', file)
    print('  result:', response.results[0].alternatives[0].transcript)
    return response

async def run_async_many_files(wav_files: list[str]):
    async with grpc.aio.secure_channel(server, grpc.ssl_channel_credentials()) as channel:
        client = rasr_srv.VnptSpeechRecognitionStub(channel)
        tasks = []
        for file in wav_files:
            with wave.open(file, 'rb') as wf:
                audio_data = wf.readframes(wf.getnframes())
                frame_rate = wf.getframerate()
                # note that this is blocking file reading, for simplicity
                # we may get a bit better throughput with non-blocking file reading
            config = rasr.RecognitionConfig(
                language_code='vi-VN',
                encoding=ra.AudioEncoding.LINEAR_PCM,
                sample_rate_hertz=frame_rate,
                max_alternatives=1,
                enable_automatic_punctuation=False,
            )
            request = rasr.RecognizeRequest(config=config, audio=audio_data)
            task: grpc.aio.UnaryUnaryCall = client.Recognize(request, metadata=metadata)
            # similar to asyncio.Task
            task = add_prints_for_demo_purpose(task, file)
            tasks.append(task)
        responses = await asyncio.gather(*tasks, return_exceptions=True)
        return responses

In [None]:
audio_file = 'audio/3s.wav'
run_sync_one(audio_file)

Quay lại đây. Ai ngờ nó quay lại thật tất nhiên, không đội mũ bảo hiểm


In [None]:
wav_files = ['audio/4s.wav', 'audio/3s.wav']
responses = await run_async_many_files(wav_files)

calling file file-4s.wav
calling file file-3s.wav
done file-3s.wav
  result: Quay lại đây. Ai ngờ nó quay lại thật tất nhiên, không đội mũ bảo hiểm
done file-4s.wav
  result: Quay lại đây, ai ngờ nó quay lại thật thanh niên không đội mũ bảo hiểm bon bon trên đường


In [4]:
print(type(responses[0]))
responses

<class 'vnpt_asr_pb2.RecognizeResponse'>


[results {
   alternatives {
     transcript: "Quay lại đây, ai ngờ nó quay lại thật thanh niên không đội mũ bảo hiểm bon bon trên đường"
     confidence: -1.52288747
   }
   channel_tag: 1
   audio_processed: 4.8
 },
 results {
   alternatives {
     transcript: "Quay lại đây. Ai ngờ nó quay lại thật tất nhiên, không đội mũ bảo hiểm"
     confidence: -1.59368098
   }
   channel_tag: 1
   audio_processed: 4.8
 }]