In [10]:
from google.cloud import speech


In [11]:
# Path to your audio file
audio_file_path = "./../data/audio/audio_109_10.webm"


In [12]:

# Function to process audio with Google Speech-to-Text
def google_stt_process_audio(audio_file_path):
    client = speech.SpeechClient()

    with open(audio_file_path, "rb") as audio_file:
        audio_data = audio_file.read()

    # Set up the audio and configuration
    audio = speech.RecognitionAudio(content=audio_data)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,  # Adjust if using a different encoding
        sample_rate_hertz=48000,  # Adjust based on your audio file
        language_code="en-IN",  # Set the language code
    )

    # Perform the speech recognition request
    try:
        response = client.recognize(config=config, audio=audio)
        if not response.results:
            print("No transcription results found.")
            return ""

        # Extract the transcript from the first result
        transcript = response.results[0].alternatives[0].transcript
        return transcript
    except Exception as e:
        print(f"Error during speech recognition: {e}")
        return ""



In [13]:
# Run the STT on your audio file
transcription = google_stt_process_audio(audio_file_path)
print(f"Transcription: {transcription}")


I0000 00:00:1727422881.439340 2300288 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


Transcription: yes I enjoy it with my friend


In [19]:
from openai import OpenAI
client = OpenAI()

audio_file = open(audio_file_path, "rb")
transcription = client.audio.transcriptions.create(
  model="whisper-1", 
  file=audio_file, 
)
print(transcription)

Transcription(text='यह, मैं वो उद्धाने करता हूँ कि वो वोड़्ड के लिए जोड़ा हूँ।')


In [22]:
from openai import OpenAI
client = OpenAI()

audio_file = open(audio_file_path, "rb")
transcript = client.audio.translations.create(
  model="whisper-1",
  file=audio_file
)
print(transcript)

Translation(text='Yes, I enjoy it with my friend.')


In [28]:
import time
import boto3

aws_access_key = ''
aws_secret_key = ''


def transcribe_file(job_name, file_uri, transcribe_client):
    # Start a transcription job
    transcribe_client.start_transcription_job(
        TranscriptionJobName=job_name,
        Media={
            'MediaFileUri': file_uri
        },
        MediaFormat='webm',  # Assuming your file is in .webm format
        LanguageCode='en-IN'
    )

    # Poll the status of the job
    max_tries = 60
    while max_tries > 0:
        max_tries -= 1
        job = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
        job_status = job['TranscriptionJob']['TranscriptionJobStatus']
        
        if job_status in ['COMPLETED', 'FAILED']:
            print(f"Job {job_name} is {job_status}.")
            if job_status == 'COMPLETED':
                print(
                    f"Download the transcript from\n"
                    f"\t{job['TranscriptionJob']['Transcript']['TranscriptFileUri']}.")
            break
        else:
            print(f"Waiting for {job_name}. Current status is {job_status}.")
        time.sleep(10)

def main():
    transcribe_client = boto3.client('transcribe', aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key, region_name='us-east-1')

    # Update with your actual S3 URI where the file is stored
    file_uri = 's3://chatfriend-prod-data/data/audio/audio_109_10.webm'

    # Unique job name for transcription
    transcribe_file('audio_109_10_job', file_uri, transcribe_client)

if __name__ == '__main__':
    main()


Waiting for audio_109_10_job. Current status is IN_PROGRESS.
Waiting for audio_109_10_job. Current status is IN_PROGRESS.
Waiting for audio_109_10_job. Current status is IN_PROGRESS.
Job audio_109_10_job is COMPLETED.
Download the transcript from
	https://s3.us-east-1.amazonaws.com/aws-transcribe-us-east-1-prod/905418190201/audio_109_10_job/e2def99d-9395-4783-b7a1-c01ce88647a2/asrOutput.json?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEOD%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIAj7PCIPEwV%2FOEU%2BhFeyfzWuPaArj7nIvO1goYRwXSmuAiEA5Iz%2BLEWcbMWMxTuJHoCIBFIPoMM1xF0IgP%2Fe%2FUetgjMqsgUIKBAEGgwyNzY2NTY0MzMxNTMiDNQQJQjUL4XJYNQPyiqPBd96zYTZUvC2v0C12fMRIzc9zjXo%2BLttZ5hEoaxm8Nk6MsogLmoor5sgR1bdBej0NkGbo56e63WqZNvXLSP4JkHFtFkgAJIg2LEv7xPWrjOV8vZLB7WqDv43cL7mcSjuJF5UtwEjvz2LTE7xb%2BSJC%2BNHzeoE0gxLMVwAjD6saHMz5FE1id24o4d0lPVmupWlwotSq%2BV0UMTk1kNAFcbg9rwikuFS00cs3ovFiKYCFbjez1nKWsVsimcTNmVQGP%2FQtAR4o0Ka4JYDayd93q4K0%2BAVDBEZRXespAgGnByb47%2F5nDcu%2BUzx%2BbQWtlkEbg3T3%2B8MME6%2BFOqaBWDS82%2

In [30]:
#output : 
# 
# {"jobName":"audio_109_10_job","accountId":"","status":"COMPLETED","results":{"transcripts":[{"transcript":"Yes, I enjoy it with my friend."}],"items":[{"id":0,"type":"pronunciation","alternatives":[{"confidence":"0.999","content":"Yes"}],"start_time":"1.57","end_time":"1.95"},{"id":1,"type":"punctuation","alternatives":[{"confidence":"0.0","content":","}]},{"id":2,"type":"pronunciation","alternatives":[{"confidence":"0.999","content":"I"}],"start_time":"1.96","end_time":"2.029"},{"id":3,"type":"pronunciation","alternatives":[{"confidence":"0.999","content":"enjoy"}],"start_time":"2.039","end_time":"2.589"},{"id":4,"type":"pronunciation","alternatives":[{"confidence":"0.995","content":"it"}],"start_time":"2.599","end_time":"2.63"},{"id":5,"type":"pronunciation","alternatives":[{"confidence":"0.998","content":"with"}],"start_time":"2.64","end_time":"2.869"},{"id":6,"type":"pronunciation","alternatives":[{"confidence":"0.999","content":"my"}],"start_time":"2.88","end_time":"3.029"},{"id":7,"type":"pronunciation","alternatives":[{"confidence":"0.996","content":"friend"}],"start_time":"3.039","end_time":"3.509"},{"id":8,"type":"punctuation","alternatives":[{"confidence":"0.0","content":"."}]}],"audio_segments":[{"id":0,"transcript":"Yes, I enjoy it with my friend.","start_time":"1.559","end_time":"3.759","items":[0,1,2,3,4,5,6,7,8]}]}}