# IPSyn Score Extraction from CHILDES Transcripts
Important Note: this script must be in the same directory as the transcripts

In [None]:
import os
import csv
from collections import Counter

## Extracts IPSyn scores for each transcript and outputs to a CSV for each child

### 50 Utterances

In [None]:
# Loop through all folders in the current working directory
for root, subfolders, files in os.walk('.'):
    total_scores = []  # Initialize a list to store the total scores for each folder
    folder_name = os.path.basename(root)  # Get the name of the current folder
    for file_name in files:
        if file_name.endswith('.ipsyn.cex'):
            file_path = os.path.join(root, file_name)
            with open(file_path, 'r') as file:
                content = file.read()
                
                # Extract the total score using a simple search
                total_score_line = [line for line in content.splitlines() if line.startswith('Total =')]
                if total_score_line:
                    total_score = int(total_score_line[0].split('=')[1].strip())
                    # Replace .ipsyn.cex with .cha in the file name
                    new_file_name = file_name.replace('.ipsyn.cex', '.cha')
                    total_scores.append((folder_name, new_file_name, total_score))

            # Write the results to a new CSV file for the current folder
            output_csv_name = os.path.join(root, f"{folder_name}_ipsyn50.csv")
            with open(output_csv_name, mode='w', newline='') as output_csv:
                csv_writer = csv.writer(output_csv)
                # Write the header
                csv_writer.writerow(['Folder', 'File', 'IPSyn Score'])
                # Write the data
                csv_writer.writerows(total_scores)


### 100 Utterances

In [None]:
# Loop through all folders in the current working directory
for root, subfolders, files in os.walk('.'):
    total_scores = []  # Initialize a list to store the total scores for each folder
    folder_name = os.path.basename(root)  # Get the name of the current folder
    for file_name in files:
        if file_name.endswith('.ipsyn-100.cex'):
            file_path = os.path.join(root, file_name)
            with open(file_path, 'r') as file:
                content = file.read()
                
                # Extract the total score using a simple search
                total_score_line = [line for line in content.splitlines() if line.startswith('Total =')]
                if total_score_line:
                    total_score = int(total_score_line[0].split('=')[1].strip())
                    # Replace .ipsyn-100.cex with .cha in the file name
                    new_file_name = file_name.replace('.ipsyn-100.cex', '.cha')
                    total_scores.append((folder_name, new_file_name, total_score))

            # Write the results to a new CSV file for the current folder
            output_csv_name = os.path.join(root, f"{folder_name}_ipsyn100.csv")
            with open(output_csv_name, mode='w', newline='') as output_csv:
                csv_writer = csv.writer(output_csv)
                # Write the header
                csv_writer.writerow(['Folder', 'File', 'IPSyn Score'])
                # Write the data
                csv_writer.writerows(total_scores)

### Note: CLAN IPSyn can only be computed on samples of size 50 or 100