##### Os allow go to every place in our operative system and pydub allow to do actions with audio files.


In [None]:
import os
from pydub import AudioSegment

In [None]:
def load_transcriptions(route_txt: str) -> dict:
    """ This function converts a transcription file into a dictionary. """
    trans = {}
    with open(route_txt, 'r', encoding='utf-8') as file:
        for line in file:
            if line.strip():
                # Separating the ID and the text
                parts = line.strip().split(" ", 1) 
                if len(parts) == 2:
                    id_audio, text = parts
                    trans[id_audio] = text
    return trans

##### This function process every chapter to do new audio and text files.

In [None]:
def process_chapter(route_chapter: str, route_txt: str, dest_dir: str) -> None:
    """ This function processes a chapter directory, splitting audio files into
    segments and creating corresponding transcription files. """
    trans = load_transcriptions(route_txt)
    # sorting the audio files to ensure they are processed in order
    audios = sorted([f for f in os.listdir(route_chapter) if f.endswith(".flac")])
    
    # Segment audio initialization
    act_segment = AudioSegment.empty()
    # List to store transcriptions for the current segment
    act_trans = []
    max_duration = 30 * 1000
    # Duration accumulated in the current segment
    act_duration = 0
    count = 0
    
    # Create the directory for the chapter
    chapter_name = os.path.basename(route_chapter)
    dir_chapter = os.path.join(dest_dir, chapter_name)
    os.makedirs(dir_chapter, exist_ok=True)
    
    for audio in audios:
        # Delete the extension in the file
        id_audio = os.path.splitext(audio)[0]
        audio_route = os.path.join(route_chapter, audio)
        # Load the audio file
        audio_seg = AudioSegment.from_file(audio_route)
        duration = len(audio_seg)
        
        if len(act_segment) + len(audio_seg) > max_duration:
            name = f"segment_{count}"
            file_route = os.path.join(dir_chapter, name + ".flac")
            txt_route = os.path.join(dir_chapter, name + ".txt")
            # Export the new audio file
            act_segment.export(file_route, format="flac")
            
            # Write the new transcription file
            with open(txt_route, "w", encoding="utf-8") as file:
                for i, line in enumerate(act_trans):
                    file.write(f"{i}: {line}\n")
            
            # Reset the initialization for the next segment
            count += 1
            act_segment = AudioSegment.empty()
            act_trans = []
            act_duration = 0
            
        # Calculate the start and end time for the transcription
        start = act_duration
        end = act_duration + duration
        
        # Look for the transcription in the dictionary
        trans_line = trans.get(id_audio," ")
        act_trans.append(f"{id_audio} [{start/1000:.2f}s - {end/1000:.2f}s]: {trans_line}")
        
        
        act_segment += audio_seg
        act_duration += duration
        
    if len(act_segment) > 0:
        name = f"segment_{count}"
        file_route = os.path.join(dir_chapter, name + ".flac")
        txt_route = os.path.join(dir_chapter, name + ".txt")
        act_segment.export(file_route, format="flac")
            
        with open(txt_route, "w", encoding="utf-8") as file:
            for i, line in enumerate(act_trans):
                    file.write(f"{i}: {line}\n")

##### This function goes to through all chapters for process them

In [None]:
def process_all_chapters(root_dir: str, dest_dir: str) -> None:
    """" This function processes all chapters in the given root directory. """
    for chapter in os.listdir(root_dir):
        chapter_path = os.path.join(root_dir, chapter)
        if os.path.isdir(chapter_path):
            files_txt = [f for f in os.listdir(chapter_path) if f.endswith(".txt")]
            if files_txt:
                route_txt = os.path.join(chapter_path, files_txt[0])
                process_chapter(chapter_path, route_txt, dest_dir)
                print(f"Processed chapter: {chapter}")

In [None]:
root_dir = "your_root_directory/"
dest_dir = "your_destination_directory/"

os.makedirs(dest_dir, exist_ok=True)
process_all_chapters(root_dir, dest_dir)