In [None]:
"""
whisper model details

Size	  | Parameters	English-only model	Multilingual model
--------+--------------------------------------------------
tiny	  | 39 M	        ✓	                ✓
base	  | 74 M	        ✓	                ✓
small	  | 244 M	        ✓	                ✓
medium    | 769 M	        ✓	                ✓
large	  | 1550 M                              ✓

"""

In [None]:
!pip install -U openai-whisper # for using Whisper AI

In [None]:
# import modules
import os
import whisper
from tqdm import tqdm
from datetime import timedelta

In [None]:
# Set up Whisper client
def set_up_whisper_client(model_size):
    print("Loading whisper model...")
    model = whisper.load_model(model_size)
    print("Whisper model complete.")
    print("-------------------------------------------------")
    return model

In [None]:
# Get the number of wav files in the root folder and its sub-folders
def get_file_names_and_count(root_path):
    print("Getting files names to transcribe...")
    file_name_list = os.listdir(root_path)
    number_of_files = len(file_name_list)
    print(file_name_list)
    print("Number of files:", number_of_files)
    print("-------------------------------------------------")
    return file_name_list, number_of_files

In [None]:
def transcribe_audio(model, file_name, file_path, exit_path):
    transcribe = model.transcribe(file_path, language="hi", fp16=False, verbose=True, task='translate')
    segments = transcribe['segments']

    for segment in segments:
        start_time = str(0)+str(timedelta(seconds=int(segment['start'])))+',000'
        end_time = str(0)+str(timedelta(seconds=int(segment['end'])))+',000'
        text = segment['text']
        segment_id = segment['id']+1
        segment = f"{segment_id}\n{start_time} --> {end_time}\n{text[1:] if text[0] == ' ' else text}\n\n"

        srt_file = os.path.join(exit_path, f"{file_name.split('.')[0]}.srt")
        with open(srt_file, 'a', encoding='utf-8') as srtFile:
            srtFile.write(segment)

    print(f"{file_name} transcription complete.\n")
    return

In [None]:
def transcribe_all_audios(root_path, exit_path, file_name_list, number_of_files):
    for i in range(number_of_files):
        file_name = file_name_list[i]
        file_path = os.path.join(root_path, file_name)
        transcribe_audio(model, file_name, file_path, exit_path)

In [None]:
# Define model size
model_size = "large"

# Define the folder where the wav files are located
root_path = "/content/drive/MyDrive/Colab_Notebooks/input_files"
exit_path = "/content/drive/MyDrive/Colab_Notebooks/output_files"

# Get file data and generate transcription
model = set_up_whisper_client(model_size)
file_name_list, number_of_files = get_file_names_and_count(root_path)
transcribe_all_audios(root_path, exit_path, file_name_list, number_of_files)

print("Transcription of all files completed successfully!!!")
