<a href="https://colab.research.google.com/github/vnaut/Professor-Rating-ML-Project/blob/main/PlanetTerp_Professors_ML_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# at the top of your notebook
!pip install flair --quiet

In [None]:
import requests
import time
import json

BASE_URL = "https://planetterp.com/api/v1"
DELAY = 0

In [None]:
def get_professors(limit=100, offset=0, include_reviews=False, prof_type=None):
    """
    Fetch a page of professors.
    :param limit: max records (1-100)
    :param offset: records to skip
    :param include_reviews: whether to include reviews
    :param prof_type: 'professor' or 'ta' or None
    :return: list of professor dicts
    """
    params = {
        'limit': limit,
        'offset': offset,
    }
    if include_reviews:
        params['reviews'] = 'true'
    if prof_type:
        params['type'] = prof_type

    resp = requests.get(f"{BASE_URL}/professors", params=params)
    resp.raise_for_status()
    return resp.json()

In [None]:
def fetch_all_professors(include_reviews=False, prof_type=None):
    """
    Fetches all professors via pagination.
    """
    all_profs = []
    offset = 0
    while True:
        batch = get_professors(limit=100, offset=offset,
                               include_reviews=include_reviews,
                               prof_type=prof_type)
        if not batch:
            break
        all_profs.extend(batch)
        offset += len(batch)
        time.sleep(DELAY)
    return all_profs

In [None]:
def get_courses(department=None, limit=100, offset=0, include_reviews=False):
    """
    Fetch a page of courses.
    :param department: 4-char dept code, e.g. 'CMSC'
    """
    params = {
        'limit': limit,
        'offset': offset,
    }
    if department:
        params['department'] = department
    if include_reviews:
        params['reviews'] = 'true'

    resp = requests.get(f"{BASE_URL}/courses", params=params)
    resp.raise_for_status()
    return resp.json()

In [None]:
def fetch_all_courses(department=None, include_reviews=False):
    """
    Fetches all courses (optionally filtered by department).
    """
    all_courses = []
    offset = 0
    while True:
        batch = get_courses(department=department,
                            limit=100, offset=offset,
                            include_reviews=include_reviews)
        if not batch:
            break
        all_courses.extend(batch)
        offset += len(batch)
        time.sleep(DELAY)
    return all_courses

In [None]:
def get_grades(course=None, professor=None, semester=None, section=None):
    """
    Fetch grade distributions.
    At least one of course or professor must be provided.
    :param course: course code string e.g. 'CMSC320'
    :param professor: full name string e.g. 'Jon Snow'
    :param semester: 'YYYY01' for spring, 'YYYY08' for fall
    :param section: section code string, e.g. '0101'
    """
    params = {}
    if course:
        params['course'] = course
    if professor:
        params['professor'] = professor
    if semester:
        params['semester'] = semester
    if section:
        params['section'] = section
    if not params:
        raise ValueError("Must specify at least course or professor")

    resp = requests.get(f"{BASE_URL}/grades", params=params)
    resp.raise_for_status()
    return resp.json()

In [None]:
def save_json(data, filename):
    """
    Utility to save JSON data to a file.
    """
    with open(filename, 'w') as f:
        json.dump(data, f, indent=2)

In [None]:
def main():
    # Example: Fetch all professors with reviews
    print("Fetching all professors with reviews...")
    professors = fetch_all_professors(include_reviews=True)
    print(f"Retrieved {len(professors)} professors")
    save_json(professors, 'professors.json')

    # Collect unique courses from professors
    course_codes = set()
    for prof in professors:
        for code in prof.get('courses', []):
            course_codes.add(code)

    # Fetch course details for each unique code
    print(f"Fetching details for {len(course_codes)} courses...")
    courses = {}
    for code in course_codes:
        data = requests.get(f"{BASE_URL}/course", params={'name': code}).json()
        courses[code] = data
        time.sleep(DELAY)
    save_json(courses, 'courses.json')

    # Fetch grade distributions for each professor-course pair
    print("Fetching grade distributions...")
    grades = []
    for prof in professors:
        name = prof['name']
        for code in prof.get('courses', []):
            try:
                gd = get_grades(course=code, professor=name)
                grades.extend(gd)
            except Exception as e:
                print(f"Error fetching grades for {code} by {name}: {e}")
            time.sleep(DELAY)
    save_json(grades, 'grades.json')

    print("Data fetching complete. Files saved: professors.json, courses.json, grades.json")


if __name__ == '__main__':
    main()

In [None]:
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
import json

BASE_URL = "https://planetterp.com/api/v1"
MAX_WORKERS = 10  # adjust based on your network/API limits

def fetch_all_professors(session, limit=100):
    profs, offset = [], 0
    while True:
        resp = session.get(f"{BASE_URL}/professors", params={'limit': limit, 'offset': offset})
        resp.raise_for_status()
        batch = resp.json()
        if not batch:
            break
        profs.extend(batch)
        offset += len(batch)
    return profs

def fetch_all_courses(session, limit=100):
    courses, offset = [], 0
    while True:
        resp = session.get(f"{BASE_URL}/courses", params={'limit': limit, 'offset': offset})
        resp.raise_for_status()
        batch = resp.json()
        if not batch:
            break
        courses.extend(batch)
        offset += len(batch)
    return courses

def fetch_grades_for_course(session, course_code):
    resp = session.get(f"{BASE_URL}/grades", params={'course': course_code})
    resp.raise_for_status()
    return resp.json()

def main():
    session = requests.Session()

    # 1) Bulk fetch professors (no reviews) and courses
    print("Fetching professors...")
    profs = fetch_all_professors(session)
    print(f"  → Retrieved {len(profs)} professors.")

    print("Fetching courses...")
    courses = fetch_all_courses(session)
    print(f"  → Retrieved {len(courses)} courses.")

    # 2) Concurrently fetch grades per course
    course_codes = [f"{c['department']}{c['course_number']}" for c in courses]
    print("Fetching grades for each course in parallel...")
    grades = []
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = {executor.submit(fetch_grades_for_course, session, code): code for code in course_codes}
        for future in as_completed(futures):
            code = futures[future]
            try:
                grades.extend(future.result())
            except Exception as e:
                print(f"Error fetching grades for {code}: {e}")

    # 3) Save to files
    with open('professors.json', 'w') as f:
        json.dump(profs, f, indent=2)
    with open('courses.json', 'w') as f:
        json.dump({f"{c['department']}{c['course_number']}": c for c in courses}, f, indent=2)
    with open('grades.json', 'w') as f:
        json.dump(grades, f, indent=2)

    print("Done! Data saved to professors.json, courses.json, grades.json")

if __name__ == "__main__":
    main()

In [None]:
import pandas as pd

# 1) Load the raw JSON dumps (produced by fetch_planetterp_data.py)
with open('professors.json') as f:
    profs = json.load(f)
with open('courses.json') as f:
    courses = json.load(f)
with open('grades.json')  as f:
    grades = json.load(f)

# 2) Make DataFrames
prof_df   = pd.DataFrame(profs)                                     # contains name, slug, courses list, average_rating (TARGET)
courses_df = (
    pd.DataFrame.from_dict(courses, orient='index')
      .rename_axis('course_code')
      .reset_index()
)                                                                   # course_code, department, credits, average_gpa, etc.
grades_df = pd.DataFrame(grades)                                     # one row per (course, prof, semester, section) grade distribution

# 3) Course‑level aggregates per professor
#    • how many distinct courses they’ve taught
#    • average course credits
#    • average of course‑level avg_gpa
course_feats = (
    prof_df
      .explode('courses')
      .merge(courses_df[['course_code','credits','average_gpa']],
             left_on='courses', right_on='course_code', how='left')
      .groupby('name')
      .agg(
         num_courses     = ('course_code',   'nunique'),
         avg_credits     = ('credits',       'mean'),
         avg_course_gpa  = ('average_gpa',   'mean')
      )
)

# 4) Grade‑distribution aggregates per professor
#    We sum up all sections and turn raw counts into proportions
grade_cols = ['A+','A','A-','B+','B','B-','C+','C','C-','D+','D','D-','F','W','Other']
grades_sum = (
    grades_df
      .groupby('professor')[grade_cols]
      .sum()
)
grades_sum['total'] = grades_sum.sum(axis=1)
for g in grade_cols:
    grades_sum[f'prop_{g}'] = grades_sum[g] / grades_sum['total']

# 5) Stitch everything together (one row per professor)
dataset = (
    prof_df
      .set_index('name')[['slug','average_rating']]
      .join(course_feats)
      .join(grades_sum[[f'prop_{g}' for g in ['A+','A','A-','B+','B','B-','W']]])
      .reset_index()
)

# 6) Quick sanity‑check
print(dataset.tail())


In [None]:
import json
import pandas as pd

# 1) Load raw professors + their reviews
with open('professors.json') as f:
    profs = json.load(f)

# 2) Flatten out reviews into a DataFrame
#    (we assume each prof dict has a key "reviews" which is a list of
#     objects containing at least "review_text" and "expected_grade")
rows = []
for p in profs:
    name = p['name']
    for r in p.get('reviews', []):
        rows.append({
            'professor':       name,
            'text':            r.get('review_text',''),
            'expected_grade':  r.get('expected_grade', None)
        })

rev_df = pd.DataFrame(rows)

# peek at the first professor’s first review
import pprint
pprint.pprint(profs[0].get('reviews', [])[0])


# 3) Run sentiment analysis
#    Example using Flair:
from flair.models import TextClassifier
from flair.data   import Sentence

classifier = TextClassifier.load('en-sentiment')

def flair_sentiment(txt):
    s = Sentence(txt)
    classifier.predict(s)
    return s.labels[0].value     # either "POSITIVE" or "NEGATIVE"

rev_df['sentiment'] = rev_df['text'].apply(flair_sentiment)

# 4) Map letter grades to numeric so we can average
grade_to_num = {
    'A+': 4.0, 'A': 4.0, 'A-': 3.7,
    'B+': 3.3, 'B':  3.0, 'B-': 2.7,
    'C+': 2.3, 'C':  2.0, 'C-': 1.7,
    'D+': 1.3, 'D':  1.0, 'D-': 0.7,
    'F':  0.0
}
rev_df['grade_num'] = rev_df['expected_grade'].map(grade_to_num)

# 5) Aggregate by professor
sent_counts = (rev_df
  .groupby(['professor','sentiment'])
  .size()
  .unstack(fill_value=0)
)

num_ratings   = rev_df.groupby('professor').size().rename('num_ratings')
avg_grade_num = rev_df.groupby('professor')['grade_num'].mean().rename('avg_grade_num')

# 6) Convert back to a letter grade (round to nearest standard cut)
def num_to_letter(x):
    if   x >= 3.85: return 'A'
    elif x >= 3.5:  return 'A-'
    elif x >= 3.15: return 'B+'
    elif x >= 2.85: return 'B'
    elif x >= 2.5:  return 'B-'
    elif x >= 2.15: return 'C+'
    elif x >= 1.85: return 'C'
    elif x >= 1.5:  return 'C-'
    elif x >= 1.15: return 'D+'
    elif x >= 0.85: return 'D'
    elif x >= 0.5:  return 'D-'
    else:           return 'F'

summary = (
    sent_counts
      .join(num_ratings)
      .join(avg_grade_num)
      .reset_index()
)
summary['avg_expected_grade'] = summary['avg_grade_num'].apply(num_to_letter)

# 7) Final table
print(summary[['professor','POSITIVE','NEGATIVE','num_ratings','avg_expected_grade']])
