In [None]:
import pandas as pd
import os
import ollama
from tqdm.auto import tqdm
import glob
from collections import Counter

df = pd.read_csv('new_final_2024.csv')
len(df)

1480

In [None]:
OLLAMA_MODEL = "gpt-oss:20b" 


POLICY_CATEGORIES = [
    "Permissive",         # Explicitly encourages or allows wide AI use.
    "Guideline-Based",    # Allows AI use but requires adherence to strict citation/disclosure rules.
    "Restrictive",        # Strictly limits or completely prohibits AI use for assignments.
    "Case-by-Case",       # Defers the policy to the instructor on a per-assignment basis.
    "Ambiguous",          # Mentions AI but the rules are unclear or vague.
    "No Policy Found",    # Use this for empty/irrelevant text.
    "Analysis Error"      # Use this if the model fails.
]

def create_prompt(policy_text):
    """Creates a clear, concise prompt for the LLM."""
    return f"""
Analyze the following academic AI policy and classify it into ONE of the following categories:

- Permissive: Explicitly encourages or allows wide AI use.
- Guideline-Based: Allows AI use but requires adherence to strict citation/disclosure rules.
- Restrictive: Strictly limits or completely prohibits AI use for assignments.
- Case-by-Case: Defers the policy to the instructor on a per-assignment basis.
- Ambiguous: Mentions AI but the rules are unclear or vague.

Your response MUST BE ONLY ONE of the category labels listed above. Do not add explanations.

--- POLICY TEXT ---
{policy_text}
--- END OF TEXT ---

Category:"""


def analyze_policy(policy_text):
    """
    Sends the policy text to Ollama and returns a single category label.
    """
    if not isinstance(policy_text, str) or not policy_text.strip():
        return "No Policy Found"

    prompt = create_prompt(policy_text)
    
    try:
        response = ollama.chat(
            model=OLLAMA_MODEL,
            messages=[{'role': 'user', 'content': prompt}]
        )
        # Extract the model's response and clean it up.
        category = response['message']['content'].strip().replace("'", "").replace('"', '')

        # Validate that the model's response is one of our expected categories
        if category in POLICY_CATEGORIES:
            return category
        else:
            print(f"\nWarning: Model returned an unexpected category: '{category}'. Marking as Ambiguous.")
            return "Ambiguous"
            
    except Exception as e:
        print(f"\nERROR: Could not connect to or get a valid response from Ollama. Reason: {e}")
        return "Analysis Error"

def process_csv_with_ollama(csv_path, out_suffix='_with_sentiment_1.csv'):
    """
    Reads a CSV, analyzes the 'AI Policy' column using an LLM, adds the
    results, and saves to a NEW file.
    """
    print(f"\n--- Processing file: {os.path.basename(csv_path)} ---")
    
    try:
        df = pd.read_csv(csv_path)
    except FileNotFoundError:
        print(f"  ERROR: File not found. Skipping.")
        return

    if 'AI Policy' not in df.columns:
        print("  ERROR: 'AI Policy' column not found. Skipping.")
        return
    
    # Identify which rows actually need processing
    rows_to_process = df[df['AI Policy'].notna() & (df['AI Policy'].str.strip() != '')]
    
    if rows_to_process.empty:
        print("  INFO: No AI policies found to analyze in this file.")
        return

    print(f"  Found {len(rows_to_process)} policies to analyze. Starting analysis...")
    
    # Initialize the new column with a default value
    df['Policy Category'] = "No Policy Found"
    
    # Use tqdm for a progress bar as this will be slow
    for index, row in tqdm(rows_to_process.iterrows(), total=len(rows_to_process), desc="Analyzing Policies"):
        policy_text = row['AI Policy']
        category = analyze_policy(policy_text)
        # Use .at for efficient cell setting
        df.at[index, 'Policy Category'] = category
    
    # --- Save to a NEW file for safety ---
    output_path = csv_path.replace(".csv", out_suffix)
    try:
        df.to_csv(output_path, index=False, encoding='utf-8-sig')
        print(f"\n  SUCCESS: Analysis complete. Results saved to:\n  {output_path}")
    except Exception as e:
        print(f"\n  ERROR: Could not save the updated file. Reason: {e}")



  from .autonotebook import tqdm as notebook_tqdm


In [None]:
if __name__ == '__main__':
    
    print(f"{'='*30}\nStarting AI Policy Analysis with Ollama ({OLLAMA_MODEL})\n{'='*30}")

    target_csv_file = 'new_final_2024.csv'
    process_csv_with_ollama(target_csv_file)
        
    print(f"\n{'='*30}\nAnalysis script finished.\n{'='*30}")

Starting AI Policy Analysis with Ollama (gpt-oss:20b)

--- Processing file: new_final_2024.csv ---
  Found 510 policies to analyze. Starting analysis...


Analyzing Policies: 100%|██████████| 510/510 [2:57:20<00:00, 20.86s/it]  


  SUCCESS: Analysis complete. Results saved to:
  new_final_2024_with_sentiment_1.csv

Analysis script finished.





In [None]:
if __name__ == '__main__':
    
    print(f"{'='*30}\nStarting AI Policy Analysis with Ollama ({OLLAMA_MODEL})\n{'='*30}")

    target_csv_file = 'new_final_2024.csv'
    process_csv_with_ollama(target_csv_file,out_suffix='_with_sentiment_2.csv')
        
    print(f"\n{'='*30}\nAnalysis script finished.\n{'='*30}")

Starting AI Policy Analysis with Ollama (gpt-oss:20b)

--- Processing file: new_final_2024.csv ---
  Found 510 policies to analyze. Starting analysis...


Analyzing Policies: 100%|██████████| 510/510 [2:57:03<00:00, 20.83s/it]  


  SUCCESS: Analysis complete. Results saved to:
  new_final_2024_with_sentiment_2.csv

Analysis script finished.





In [None]:
if __name__ == '__main__':
    
    print(f"{'='*30}\nStarting AI Policy Analysis with Ollama ({OLLAMA_MODEL})\n{'='*30}")

    target_csv_file = 'new_final_2024.csv'
    process_csv_with_ollama(target_csv_file,out_suffix='_with_sentiment_3.csv')
        
    print(f"\n{'='*30}\nAnalysis script finished.\n{'='*30}")

Starting AI Policy Analysis with Ollama (gpt-oss:20b)

--- Processing file: new_final_2024.csv ---
  Found 510 policies to analyze. Starting analysis...


Analyzing Policies: 100%|██████████| 510/510 [2:54:32<00:00, 20.53s/it]  


  SUCCESS: Analysis complete. Results saved to:
  new_final_2024_with_sentiment_3.csv

Analysis script finished.





In [None]:
if __name__ == '__main__':
    
    print(f"{'='*30}\nStarting AI Policy Analysis with Ollama ({OLLAMA_MODEL})\n{'='*30}")

    target_csv_file = 'new_final_2024.csv'
    process_csv_with_ollama(target_csv_file,out_suffix='_with_sentiment_4.csv')
        
    print(f"\n{'='*30}\nAnalysis script finished.\n{'='*30}")

Starting AI Policy Analysis with Ollama (gpt-oss:20b)

--- Processing file: new_final_2024.csv ---
  Found 510 policies to analyze. Starting analysis...


Analyzing Policies: 100%|██████████| 510/510 [3:08:14<00:00, 22.15s/it]  


  SUCCESS: Analysis complete. Results saved to:
  new_final_2024_with_sentiment_4.csv

Analysis script finished.





In [None]:
if __name__ == '__main__':
    
    print(f"{'='*30}\nStarting AI Policy Analysis with Ollama ({OLLAMA_MODEL})\n{'='*30}")

    target_csv_file = 'new_final_2024.csv'
    process_csv_with_ollama(target_csv_file,out_suffix='_with_sentiment_5.csv')   
      
    print(f"\n{'='*30}\nAnalysis script finished.\n{'='*30}")

Starting AI Policy Analysis with Ollama (gpt-oss:20b)

--- Processing file: new_final_2024.csv ---
  Found 510 policies to analyze. Starting analysis...


Analyzing Policies: 100%|██████████| 510/510 [3:10:43<00:00, 22.44s/it]  


  SUCCESS: Analysis complete. Results saved to:
  new_final_2024_with_sentiment_5.csv

Analysis script finished.





In [None]:
SENTIMENT_MAP = {
    'Permissive': 'Positive',
    'Guideline-Based': 'Neutral',
    'Case-by-Case': 'Neutral',
    'Ambiguous': 'Neutral',
    'Restrictive': 'Negative',
    'No Policy Found': 'No Policy', 
    'Analysis Error': 'Error'       
}

def calculate_final_sentiment(row):
    """
    Analyzes the classification results from all runs for a single row.
    Returns the mode of the classification, the mode of the broad sentiment,
    and the confidence score for that sentiment.
    """
    classifications = [row[f'Run_{i}_Result'] for i in range(1, 6)]
    
    sentiments = [SENTIMENT_MAP.get(cat, 'Unknown') for cat in classifications]
    
    if not sentiments:
        return pd.Series(['No Policy Found', 'No Policy', 0.0], index=['Final Classification', 'Broad Sentiment', 'Sentiment Confidence'])
    
    sentiment_counts = Counter(sentiments)
    final_sentiment = sentiment_counts.most_common(1)[0][0]
    
    confidence_score = sentiment_counts[final_sentiment] / len(sentiments)
    
    classification_counts = Counter(classifications)
    final_classification = classification_counts.most_common(1)[0][0]

    return pd.Series([final_classification, final_sentiment, confidence_score], 
                     index=['Final Classification', 'Broad Sentiment', 'Sentiment Confidence'])


def aggregate_sentiment_runs(source_csv_path, run_files_pattern):
    """
    Reads all run files, aggregates them, calculates sentiment confidence,
    and creates a final summary file with clear column names and logical order.
    """
    print(f"\n--- Aggregating results for: {os.path.basename(source_csv_path)} ---")
    
    run_files = sorted(glob.glob(run_files_pattern))
    
    if len(run_files) < 2:
        print(f"  ERROR: Found only {len(run_files)} result file(s). Need at least 2. Halting.")
        return
        
    print(f"  Found {len(run_files)} result files to aggregate.")
    
    try:
        final_df = pd.read_csv(source_csv_path)
        run_dfs = [pd.read_csv(f) for f in run_files]
    except Exception as e:
        print(f"  ERROR: Could not read a required file. Reason: {e}")
        return

    for i, file_path in enumerate(run_files):
        final_df[f'Run_{i+1}_Result'] = run_dfs[i]['Policy Category']

    print("  Calculating final classifications and sentiment confidence scores...")
    analysis_results = final_df.apply(calculate_final_sentiment, axis=1)
    
    final_df = final_df.join(analysis_results)
    
    original_cols = list(pd.read_csv(source_csv_path, nrows=0).columns)
    
    new_analysis_cols = ['Final Classification', 'Broad Sentiment', 'Sentiment Confidence']
    run_cols = [f'Run_{i}_Result' for i in range(1, len(run_files) + 1)]

    try:
        policy_index = original_cols.index('AI Policy')
        final_col_order = (original_cols[:policy_index+1] + 
                           new_analysis_cols + 
                           original_cols[policy_index+1:] +
                           run_cols)
    except ValueError:
        final_col_order = original_cols + new_analysis_cols + run_cols

    final_df = final_df[final_col_order]

    final_output_path = source_csv_path.replace(".csv", "_final_aggregated.csv")
    try:
        final_df.to_csv(final_output_path, index=False, encoding='utf-8-sig')
        print(f"\n  SUCCESS: Aggregation complete. Report with new naming and order saved to:\n  {final_output_path}")
    except Exception as e:
        print(f"\n  ERROR: Could not save the final aggregated file. Reason: {e}")


if __name__ == '__main__':
    
    original_csv_file = 'new_final_2024.csv'
    results_pattern = 'new_final_2024_with_sentiment_*.csv'
    
    aggregate_sentiment_runs(original_csv_file, results_pattern)


--- Aggregating results for: new_final_2024.csv ---
  Found 5 result files to aggregate.
  Calculating final classifications and sentiment confidence scores...

  SUCCESS: Aggregation complete. Report with new naming and order saved to:
  new_final_2024_final_aggregated.csv
