In [15]:
import pandas as pd
from collections import Counter

In [17]:
course_catalog = pd.read_csv("course_catalog_by_major.csv")
course_pathways = pd.read_csv("course_pathways.csv")

In [19]:
selected_majors = ['Data science', 'Artificial intelligence', 'Cybersecurity']

filtered_pathways = course_pathways[
    (course_pathways['Program'] == 'CECS') &
    (course_pathways['Major'].isin(selected_majors))
].copy()

filtered_pathways['FullCourseList'] = filtered_pathways['CoursePath'].str.split(' -> ')

required_courses = course_catalog[course_catalog['Type'] == 'Required']
required_courses_dict = required_courses.groupby('Major')['Course'].apply(set).to_dict()

def preserve_ordered_required_courses(row):
    major = row['Major']
    full_courses = row['FullCourseList']
    required = required_courses_dict.get(major, set())
    return [course for course in full_courses if course in required]

filtered_pathways['OrderedRequiredCourseList'] = filtered_pathways.apply(preserve_ordered_required_courses, axis=1)

all_transitions = []

for ordered_courses in filtered_pathways['OrderedRequiredCourseList']:
    if len(ordered_courses) >= 2:
        transitions = [(ordered_courses[i], ordered_courses[i+1]) for i in range(len(ordered_courses) - 1)]
        all_transitions.extend(transitions)

transition_counts = Counter(all_transitions)

transitions_df = pd.DataFrame([
    {'Source': src, 'Target': tgt, 'Count': cnt}
    for (src, tgt), cnt in transition_counts.items()
])

transitions_df

Unnamed: 0,Source,Target,Count
0,Intro to Python,Statistics I,40
1,Statistics I,Linear Algebra,40
2,Linear Algebra,Data Wrangling,40
3,Data Wrangling,Data Visualization,40
4,Data Visualization,Machine Learning,40
5,Machine Learning,Big Data Analytics,40
6,Big Data Analytics,Database Systems,40
7,Database Systems,Data Mining,40
8,Data Mining,Data Science Capstone 401,40
9,Intro to Python,Probability & Statistics,40


In [23]:
transitions_df.to_csv("transitions.csv", index=False)