#### Section 1:
Run these cells to evaluate transcripts with chat-gpt

In [3]:
# Install required libraries

!pip install PyGithub openai

In [21]:
# Import libraries

import os
import requests
from github import Github
from openai import OpenAI

In [30]:
# Mount google drive to save output files

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [5]:
# Set up openAI Api key

from getpass import getpass
api_key = getpass('Enter your OpenAI API key: ')
os.environ['OPENAI_API_KEY'] = api_key

Enter your OpenAI API key: ··········


In [40]:
# Configuration

GITHUB_REPO_URL = 'https://github.com/lfedronic/Ipsyn'
GITHUB_FOLDER_PATH = 'Sarah_Transcripts'  # Replace with desired folder of transcripts from the git repo
OUTPUT_FOLDER = '/content/drive/My Drive/llm-ipsyn'.
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
MODEL_NAME = 'gpt-4o-mini'
TEMP = 0
MAX_TOKENS = 7500  # Configured for the current prompt so that the model fully completes ipsyn

# Create output directory
os.makedirs(OUTPUT_FOLDER, exist_ok=True)


In [22]:
# Function to download files from GitHub
def download_files_from_github(repo_url, folder_path):
    g = Github()
    repo_name = '/'.join(repo_url.split('/')[-2:])

    repo = g.get_repo(repo_name)
    contents = repo.get_contents(folder_path)

    for content_file in contents:
        if content_file.type == 'file' and content_file.name.endswith('.txt'):
            file_content = requests.get(content_file.download_url).text
            with open(content_file.name, 'w') as f:
                f.write(file_content)
            print(f'Downloaded {content_file.name}')

In [None]:
download_files_from_github(GITHUB_REPO_URL, GITHUB_FOLDER_PATH)

In [41]:
# Function to process transcript and call OpenAI API
def process_transcript(file_name):
    client = OpenAI()
    with open(file_name, 'r') as file:
        transcript = file.read()
    # You can edit this prompt
    prompt = f"""Objective:
You are to evaluate the following transcript according to the Index of Productive Syntax. Specifically, you should go through the transcript one line at a time, checking for the presence of each grammatical item in the scoring table. You should keep track of how many times each scoring table item is satisfied by the line from the transcript. Each instance adds one to the item’s score. Once an item reaches a score of 2, stop looking for further instances–2 is the maximum score any one item can receive. Do not award points based on implied or inferred words; just use what is explicitly stated in the transcript.

Scoring Table:
N1: Noun
N2: Pronoun
N3: Modifier
N4:Two-word Noun Phrase
N5: Determiner + Noun
N6: Verb + Two-word Noun Phrase
N7: Noun Plural
N8: Two-word Noun Phrase + Verb
N9: Three-word Noun Phrase
N10: Noun Phrase + Adverb
N11: Bound Morpheme
V1: Verb
V2: Verb Particle or Preposition
V3: Prepositional Phrase
V4: Noun + Copula + Noun
V5: Catenative
V6: Auxiliary Be, Do, Have
V7: Progressive -ing
V8: Adverb
V9: Modal + Verb
V10: Third-person Singular Present
V11: Past Tense Modal
V12: Regular Past Tense
V13: Past Tense Auxiliary
V14: Medial Adverb
V15: Ellipsis
V16: Past Copula
V17: Bound Morpheme
Q1: contains a question mark
Q2: wh-word alone; routine question with or without a verb
Q3: Simple Negation
Q4: Wh-Question + Verb
Q5: Subject + negation + Verb
Q6: Wh-Question with Subject-Auxiliary inversion
Q7: Negation copula, modal or auxiliary
Q8: Yes/No Question with Subject–Auxiliary Inversion
Q9: Wh-Question
Q10: Tag Question
Q11: Negation Question with Subject–Auxiliary Inversion
S1: Two words
S2: Subject + Verb
S3: Verb + Object
S4: Subject + Verb + Object
S5: Any Conjunction
S6: Any Two Verbs
S7: Conjoined Phrases
S8: Infinitive
S9: Let/Make/Help/Watch
S10: Subordinating Conjunction
S11: Mental State Verb
S12: Conjoined Clauses
S13: If or Wh-Clause
S14: Bitransitive Predicate
S15: Three or More Verbs
S16: Relative Clause
S17: Infinitival Clause
S18: Gerund
S19: Left or Center-Embedded Clause
S20: Passive

{transcript}

Steps:
1) Go through the transcript line by line, checking for the presence of each grammatical item.
2) For each item you encounter, only list the applicable item(s) and scores.
3) Once you have finished parsing the entire transcript, compile each item's score
3) Finally, present the final score by capping each item at a maximum of 2 and responding in this format:
Final score:
N1, #
N2, #
...
S20, #
"""
    completion = client.chat.completions.create(
        messages=[
            {"role": "user", "content": prompt}
        ],
        model=MODEL_NAME,
        max_tokens=MAX_TOKENS,
        temperature=TEMP
    )

    return completion.choices[0].message.content


In [None]:
# Process each downloaded transcript and save the result in the google drive folder you set in configuration
for file_name in os.listdir():
    if file_name.endswith('.txt'):
        result = process_transcript(file_name)
        output_file_name = os.path.join(OUTPUT_FOLDER, f'evaluation_{file_name}')
        with open(output_file_name, 'w') as output_file:
            output_file.write(result)
        print(f'Saved evaluation for {file_name} to {output_file_name}')