In [14]:
import pandas as pd 

# Given a term and level (undergrad, grad, prof.), we want to generate a row of all students in said term and their enrollment data
def generateCumulativeRow(term, level):
    df = pd.read_csv(fr'../data/summary_{term}.csv')
    level_df = df[df['Student Level'] == level] 
    grouped_level = level_df.groupby('Term/Year Code').agg('sum').reset_index()
    grouped_level = grouped_level.drop(columns=['Name', 'Code'])
    grouped_level.at[0, 'Student Level'] = level
    grouped_level.at[0, 'Term/Year Code'] = term
    return grouped_level
    

# Given a term, we want to generate rows of data containing the cumulative statistics for all levels in said term. Eventually these will be concatenated
def generateCumulativeTerm(term):
    u = generateCumulativeRow(term, 'Undergraduate ')
    g = generateCumulativeRow(term, 'Graduate ')
    p = generateCumulativeRow(term, 'Professional ')
    return [u, g, p]

In [15]:
# Now, we generate cumulative term data for all semesters in /data and merge into one DataFrame

# Update range for 2005 - 2023 (range in for loop, zfill for years 0x)
df_list = []
for i in range(5, 23):
    year = str(i).zfill(2)
    fa = generateCumulativeTerm(f'fa{year}')
    sp = generateCumulativeTerm(f'sp{year}')
    su = generateCumulativeTerm(f'su{year}')
    df_list.extend(fa)
    df_list.extend(sp)
    df_list.extend(su)

# And we need to do SP23 manually since range does not include and there is no FA23 / SU 23 yet. 
sp23 = generateCumulativeTerm('sp23')
df_list.extend(sp23)

df_merged = pd.concat(df_list, ignore_index=True, sort=False)
df_merged

Unnamed: 0,Term/Year Code,Student Level,Total,Men,Women,Unreported Gender,Caucasian,Asian American,African American,Hispanic,...,Non-Illinois,Part time,Full time,Hawaiian/Pacific Isl,Multiracial,All African American,All Native American,All Hawaiian/ Pac Isl,All Asian,URM
0,fa05,Undergraduate,30449,16206,14211,32,20593,3840,1994,1942,...,3367,537,29912,,,,,,,
1,fa05,Graduate,9032,4901,4127,4,4273,599,364,289,...,6099,642,8390,,,,,,,
2,fa05,Professional,1029,472,556,1,661,133,40,55,...,247,35,994,,,,,,,
3,sp05,Undergraduate,27754,14505,13231,18,18892,3541,1876,1733,...,3105,776,26978,,,,,,,
4,sp05,Graduate,8691,4846,3844,1,4092,573,307,268,...,5825,706,7985,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,su22,Graduate,12114,6875,5190,49,4379,1476,519,827,...,9274,6196,5918,11.0,207.0,638.0,98.0,30.0,1640.0,1452.0
161,su22,Professional,379,154,225,0,191,95,11,28,...,244,53,326,0.0,19.0,15.0,6.0,3.0,109.0,49.0
162,sp23,Undergraduate,33186,17598,15561,27,12985,7275,1885,4506,...,8951,1814,31372,10.0,1193.0,2401.0,364.0,126.0,8324.0,6816.0
163,sp23,Graduate,18818,10564,8182,72,6449,2247,784,1226,...,14231,7705,11113,14.0,355.0,983.0,169.0,51.0,2517.0,2195.0


In [16]:
# Cleanup and export our merged_df
df_merged = df_merged.drop(columns=['URM'])
df_merged = df_merged.rename(columns={'Term/Year Code': 'Term'})
df_merged.to_csv(rf'../data/cumulative_fa05-sp23.csv', index=False)

In [17]:
df_merged

Unnamed: 0,Term,Student Level,Total,Men,Women,Unreported Gender,Caucasian,Asian American,African American,Hispanic,...,Illinois,Non-Illinois,Part time,Full time,Hawaiian/Pacific Isl,Multiracial,All African American,All Native American,All Hawaiian/ Pac Isl,All Asian
0,fa05,Undergraduate,30449,16206,14211,32,20593,3840,1994,1942,...,27082,3367,537,29912,,,,,,
1,fa05,Graduate,9032,4901,4127,4,4273,599,364,289,...,2933,6099,642,8390,,,,,,
2,fa05,Professional,1029,472,556,1,661,133,40,55,...,782,247,35,994,,,,,,
3,sp05,Undergraduate,27754,14505,13231,18,18892,3541,1876,1733,...,24649,3105,776,26978,,,,,,
4,sp05,Graduate,8691,4846,3844,1,4092,573,307,268,...,2866,5825,706,7985,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,su22,Graduate,12114,6875,5190,49,4379,1476,519,827,...,2840,9274,6196,5918,11.0,207.0,638.0,98.0,30.0,1640.0
161,su22,Professional,379,154,225,0,191,95,11,28,...,135,244,53,326,0.0,19.0,15.0,6.0,3.0,109.0
162,sp23,Undergraduate,33186,17598,15561,27,12985,7275,1885,4506,...,24235,8951,1814,31372,10.0,1193.0,2401.0,364.0,126.0,8324.0
163,sp23,Graduate,18818,10564,8182,72,6449,2247,784,1226,...,4587,14231,7705,11113,14.0,355.0,983.0,169.0,51.0,2517.0
