In [76]:
import re
import csv

day_dict= { 
"Sun, Oct 15": "10/15/2023",
"Mon, Oct 16": "10/16/2023", 
"Tue, Oct 17": "10/17/2023",
"Wed, Oct 18": "10/18/2023"}

fieldnames = ['Subject', 'Start Date', 'Start Time', 'End Date', 'End Time', 'Description', 'Location']

def process_entry(entry):
    #burn any leading blank lines
    line = next(entry, None)
    while line is not None and not line.strip():
        line = next(entry, None)
    
    if not line:
        return False, None
    
    # reg Exp to separate talk number and title
    match = re.match(r'(\d+)\.\s*(.*)', line)
    if match:
        talk_number = int(match.group(1))
        subject = match.group(2)
    
        # Store the values in a dictionary
        subject = "Talk " + str(talk_number) + " " + subject
        parsed_data = {
            "Subject": subject
        }
    else:
        print("The talk title is not as expected. ")
        print(line)
    
    #extract some easy stuff
    session_title = next(entry, None)
    parsed_data["Description"] = session_title
    date = next(entry, None)
    parsed_data["Start Date"] = day_dict[date]
    parsed_data["End Date"] = day_dict[date]
    
    ##Harder work to parse the time
    next_line = next(entry, None)
    parts = re.split(r'\s*[–-]\s*', next_line)

    start_match = re.match(r'^(\d+:\d+)(?:\s*(AM|PM))?', parts[0])
    start_time = start_match.group(1)

    end_match = re.match(r'^(\d+:\d+)(?:\s*(AM|PM))?', parts[1])
    end_time = end_match.group(1)
    end_ampm = end_match.group(2)

    #correct the am/pm of the start string
    start_ampm = start_match.group(2) if start_match.group(2) else end_ampm
    
    parsed_data["Start Time"] = start_time + " " + start_ampm
    parsed_data["End Time"] = end_time + " " + end_ampm
        
    parsed_data["Location"] = next(entry, None)
    
    
    return True, parsed_data

def line_generator(file):
    for line in file:
        yield line.strip()

def write_row_to_csv(file_pointer, data_dict):
    writer = csv.DictWriter(file_pointer, fieldnames=fieldnames)
    writer.writerow(data_dict)
        

In [77]:
# Open the CSV file and write the header
output_file = "output.csv"
with open(output_file, 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()


In [78]:
# Open the text file
with open('raw_my_informs_talks.txt', 'r') as file:
    entry = line_generator(file)
    while True:
        can_continue, result = process_entry(entry)
        if not can_continue:
            break

        #Open the output csv and write to it and close it.
        with open(output_file, 'a', newline='') as csvfile:  # Open in append mode
            write_row_to_csv(csvfile, result)
        

In [70]:
str(3)

'3'