In [7]:
import csv
import os

# Function to create a dictionary representing an interval with its start time, 
# end time, and associated text
def create_interval_data_dict(xmin, xmax, sentence):
    return { 'xmin': xmin, 'xmax': xmax, 'text': sentence }

# Function to print the interval data in a formatted way
def print_interval_data_dict(idx, data):
    print(f"intervals [{idx}]:\n\t\txmin = {data['xmin']}\n\t\txmax = {data['xmax']}\n\t\ttext = \"{data['text']}\"")

# Function to write intervals to a TextGrid file
def write_textgrid_file(intervals, output_file_path, total_xmax):
    print(f'writing {output_file_path}')
    with open(output_file_path, 'w') as f:
        f.write('File type = "ooTextFile"\n')
        f.write('Object class = "TextGrid"\n')
        f.write('\n')  # Add an empty line
        f.write('xmin = 0\n')
        f.write(f'xmax = {str(float(total_xmax) + 0.001)}\n')
        f.write('tiers? <exists>\n')
        f.write('size = 1\n')
        f.write('item []:\n')
        f.write('    item [1]:\n')
        f.write('        class = "IntervalTier"\n')
        f.write('        name = "Intonational Unit"\n')  # Set the name of the interval tier
        f.write('        xmin = 0\n')
        f.write(f'        xmax = {str(float(total_xmax) + 0.001)}\n')
        f.write(f'        intervals: size = {len(intervals) + 1}\n')  # Size of intervals

        for idx, interval in enumerate(intervals):
            f.write(f'        intervals [{idx + 1}]:\n')  # Interval index starts at 1
            f.write(f'            xmin = {interval["xmin"]}\n')
            f.write(f'            xmax = {interval["xmax"]}\n')
            f.write(f'            text = "{interval["text"]}"\n')
        
        # Add the final interval to keep the last boundary
        f.write(f'        intervals [{len(intervals) + 1}]:\n')
        f.write(f'            xmin = {intervals[-1]["xmax"]}\n')
        final_boundary = str(float(intervals[-1]["xmax"]) + 0.001)
        f.write(f'            xmax = {final_boundary}\n')
        f.write(f'            text = ""\n')
        

# Create a directory for TextGrid files if it doesn't exist
output_directory = 'textgrids'
os.makedirs(output_directory, exist_ok=True)

# Open the CSV file containing the predicted intonational units (IUs)
with open('spice_segmentation_predictedIUs.csv') as csvfile:
    reader = csv.reader(csvfile)
    next(reader) # Skip the header row
    count = 0
    iu_xmin = 0
    iu_xmax = 0
    intervals = [] # List to hold the intervals for the TextGrid
    words = [] # List to hold the words in the current intonational unit

    prev_filename = None

    # Loop through each row in the CSV file
    for row in reader:
        filename = row[1]
        xmin = row[2]
        xmax = row[3]
        text = row[4]
        is_iu_start_pred = row[5] == 'TRUE' # Check if this word marks the start of an IU

        if prev_filename!= None and prev_filename != filename:
            write_textgrid_file(intervals, os.path.join(output_directory, f"{prev_filename}.TextGrid"), intervals[-1]['xmax'])
            iu_xmin = 0
            iu_xmax = 0
            intervals = [] # List to hold the intervals for the TextGrid
            words = [] # List to hold the words in the current intonational unit

        prev_filename = filename

        # If this word indicates the start of a new intonational unit
        if is_iu_start_pred:
            # Start case: if there are no intervals yet, set prev_xmax to 0
            if len(intervals) == 0:
                prev_xmax = 0
            else:
                prev_xmax = intervals[-1]['xmax'] # Get the end time of the last interval

            # If the previous interval's end time does not match the current IU's start time
            if prev_xmax != iu_xmin:
                # Create an empty interval for the gap and append it to intervals
                interval_data_dict = create_interval_data_dict(prev_xmax, iu_xmin, '')
                intervals.append(interval_data_dict)

            # If there are words collected in the current IU
            if len(words) > 0:
                # Create an interval for the previous IU with the collected words and append it to intervals
                interval_data_dict = create_interval_data_dict(iu_xmin, iu_xmax, ' '.join(words))
                intervals.append(interval_data_dict)
                words = [] # Reset the words list for the new IU
                
            iu_xmin = xmin # Set the start time for the new IU

        
        words.append(text) # Add the word to the current list of words
        
        iu_xmax = xmax # Update the end time for the current IU

        # Uncomment to limit the number of processed rows for debugging purposes
        # if (count > 10):
        #     break
        # count +=1

    # Print the collected intervals in the required format
    for idx,interval in enumerate(intervals):
        print_interval_data_dict(idx+1, interval) # Print each interval with its index
    print(intervals) # Uncomment to see the full list of intervals in dictionary format
    
    # Write the intervals to a TextGrid file
    output_file_path = 'output.TextGrid'  # Specify your output file path
    write_textgrid_file(intervals, output_file_path, total_xmax)
    print(f'TextGrid file has been created: {output_file_path}')

writing textgrids/VM21B_English_I1_20190313.TextGrid
writing textgrids/VF19B_English_I1_20190213.TextGrid
writing textgrids/VF27A_English_I1_20181120.TextGrid
writing textgrids/VF22A_English_I2_20181206.TextGrid
writing textgrids/VM19D_English_I2_20200211.TextGrid
writing textgrids/VF19D_English_I2_20190308.TextGrid
writing textgrids/VM21E_English_I2_20200309.TextGrid
writing textgrids/VF23C_English_I2_20190128.TextGrid
writing textgrids/VM22B_English_I1_20200309.TextGrid
writing textgrids/VM21D_English_I2_20200309.TextGrid
writing textgrids/VF33B_English_I1_20190206.TextGrid
writing textgrids/VF21B_English_I2_20190204.TextGrid
writing textgrids/VM19A_English_I1_20191031.TextGrid
writing textgrids/VF23B_English_I1_20190121.TextGrid
writing textgrids/VM22A_English_I2_20181210.TextGrid
writing textgrids/VF20A_English_I2_20181119.TextGrid
writing textgrids/VM21C_English_I2_20190403.TextGrid
writing textgrids/VF32A_English_I2_20190213.TextGrid
writing textgrids/VF21A_English_I1_20190130.Te

IndexError: list index out of range