In [1]:
# Import Dependencies
import pandas as pd

In [2]:
# Load in Master Funding Data
file_path = "../datasets/master_funding.csv"

funding_master = pd.read_csv(file_path, encoding = 'utf-8')
funding_master = funding_master.rename(columns = {'Unnamed: 0': 'STNAME'}).set_index(['STNAME'])
funding_master

Unnamed: 0_level_0,STABR,2006_R1A,2006_R1B,2006_R1C,2006_R1D,2006_R1E,2006_R1F,2006_R1G,2006_R1H,2006_R1I,...,2010_A14B,2010_PPE15,2010_MEMBR09,2010_ARRASTE1,2010_ARRATE5,2010_ARRAE81Z,2010_ARRATE10,2010_ARRASTE6,2010_ARRATLEIZ,2010_ARRASTE4
STNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
alabama,AL,-2,-2,837722644,507421446,3563199,902980,0,86597,80153773,...,698208,8651,748889,369660033,381295043,138409,6187052,2689876,99711346,0
alaska,AK,-2,-2,169308873,180834960,0,0,0,0,11988478,...,-2,16668,131661,23470386,49963570,0,3835787,1228974,13751946,0
arizona,AZ,2610582721,-2,-2,40960642,580878,21656088,129949,403161,59366011,...,-2,8255,1077831,539065103,550231077,548353,29137598,232566,253804938,-2
arkansas,AR,1094994437,1245972,1520969,792048,4678927,2982173,60826,135367,45949378,...,-2,9481,480559,76412045,134544527,427520,61383167,76377545,37711346,-2
california,CA,13875619650,450091925,168400,-2,199551,216962720,23243683,36124922,1056239256,...,-2,9229,6263438,2332892784,3209610616,69829727,20955494,8622373,452377212,-1
colorado,CO,2828139818,71445787,-2,-2,47598986,5266907,5184240,111686,106929973,...,-2,9024,832368,69641156,111859035,6107904,10567842,72984,47800312,0
connecticut,CT,-2,-2,4771725433,-2,3053979,160635678,0,9311737,1146385,...,548787,15619,563968,368176090,368176090,1085172,10041669,-1,39247400,1753315
delaware,DE,355448768,-2,-2,-2,0,47385484,0,0,9687604,...,119879,12415,126801,47318827,60815739,53106,1762749,0,11821846,0
district of columbia,DC,-2,-2,228167996,815773094,557725,0,233388,0,45624,...,-2,20460,69433,94922392,100828760,796006,1268635,0,22900442,0
florida,FL,9914711981,-2,-2,-2,338296,0,10111304,4953368,403759340,...,-2,8597,2634522,850169705,1440267844,17253688,36363687,95990170,177551767,11026737


In [3]:
# Read in desired variables from Meta Data, always use 2006 meta to avoid using new variables not included in the beginning yrs
meta_path = "../resources/funding_data/2006_meta.txt"
meta = pd.read_csv(meta_path, sep=r'\s{2,}', engine='python', header=None, names=['Variable','Data Type', 'Data Element Position', 'Description'])
meta = meta.drop(meta.index[0:2])
meta.iloc[27:41,:]

Unnamed: 0,Variable,Data Type,Data Element Position,Description
29,E11,N,29,INSTRUCTIONAL EXPENDITURES SALARIES
30,E12,N,30,INSTRUCTIONAL EXPENDITURES EMPLOYEE BENEFITS
31,E13,N,31,INSTRUCTIONAL EXPENDITURES PURCHASED SERVICES
32,E14,N,32,INSTRUCTIONAL EXPENDITURES TUITION TO PRIVATE ...
33,E15,N,33,INSTRUCTIONAL EXPENDITURES TUITION TO OTHER LE...
34,E16,N,34,INSTRUCTIONAL EXPENDITURES SUPPLIES
35,E17,N,35,INSTRUCTIONAL EXPENDITURES PROPERTY
36,E18,N,36,INSTRUCTIONAL EXPENDITURES OTHER
37,STE1,N,37,INSTRUCTIONAL EXPENDITURES SUBTOTAL
38,E11A,N,38,TEACHER SALARIES REGULAR PROGRAMS


In [4]:
# Create Insturctional Expenditure Subset

# Years will always be the same for all subsets
years = ['2006', '2007', '2008', '2009', '2010']

# Create starter dataframes with desired index, should mostly be statename(STNAME)
inst_ex_df = pd.DataFrame(funding_master.index)

# loop through master set with desired variable codes from the meta set shown above
# forge the variable names with year and code
for year in years:
    for variable in meta['Variable'][27:41]:
        inst_ex_df[year+'_'+variable] = ''
        # Fill out dataset, fill none in for all 0,-1,and-2(those are N/As), also remove and subtotal and total vars
        # We will be creating our own totals and custom subtotals to avoid any double counting
        for i in range(len(inst_ex_df)):
            if funding_master[year+'_'+variable][i] <= 0:
                inst_ex_df[year+'_'+variable][i] = None
            else: 
                inst_ex_df[year+'_'+variable][i] = funding_master[year+'_'+variable][i]
    inst_ex_df = inst_ex_df.drop(inst_ex_df[[str(year)+'_STE1']], axis = 1)

inst_ex_df = inst_ex_df.set_index(['STNAME'])
inst_ex_df

Unnamed: 0_level_0,2006_E11,2006_E12,2006_E13,2006_E14,2006_E15,2006_E16,2006_E17,2006_E18,2006_E11A,2006_E11B,...,2010_E14,2010_E15,2010_E16,2010_E17,2010_E18,2010_E11A,2010_E11B,2010_E11C,2010_E11D,2010_E2
STNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
alabama,2175489042,785451414,88963202,2899939.0,463446.0,267903434,6410373,12374024.0,1547398697.0,270249489.0,...,1853069.0,395947.0,258190506,5451356,11889250.0,1792844992.0,188274018.0,110278708.0,24837500.0,16553537.0
alaska,551604528,223009277,49074469,,,42269974,7561600,8645424.0,,,...,108520.0,197429.0,59980844,13023961,10549173.0,,,,,
arizona,3434476964,634617858,156342973,6053177.0,19084602.0,159898431,125084667,26840242.0,,,...,10873343.0,28777468.0,187008767,128741211,135885169.0,2084843217.0,334131348.0,66336523.0,116676673.0,71375105.0
arkansas,1591695691,409857095,65222296,8077867.0,5541051.0,200079253,29174514,17153301.0,1049757024.0,161487476.0,...,12001037.0,7316257.0,214753424,45831168,18506817.0,1141046676.0,182872151.0,86305441.0,153053949.0,25140497.0
california,21714002608,6906542947,1244453344,693884790.0,322128498.0,1682409648,36235279,3273287.0,16320132495.0,2471220681.0,...,748356725.0,395138436.0,1365473021,26828100,3808507.0,17138076886.0,2809219250.0,130419485.0,778490284.0,316585759.0
colorado,2547089097,569916903,83895288,49376931.0,6386936.0,263930515,50750526,62780246.0,1834671034.0,300246185.0,...,42116539.0,9699904.0,274122107,64290163,59574416.0,2151622195.0,356613211.0,87514203.0,108268215.0,54033013.0
connecticut,3107054505,1108456056,140708880,284136388.0,50356321.0,114910156,30961436,7075003.0,2027218289.0,329526257.0,...,364609254.0,66784612.0,126809130,45669776,7226164.0,2283622951.0,389208722.0,42420860.0,130350148.0,
delaware,539576609,231696607,20142985,6776803.0,51426965.0,40626996,3038210,9439002.0,334621519.0,103399476.0,...,6474801.0,63743941.0,48807224,2670322,9691939.0,377124374.0,114761878.0,17015297.0,29011528.0,8112869.0
district of columbia,347191986,41199792,17803211,140103284.0,,4450500,19995979,,194712000.0,33896736.0,...,166575039.0,,18708260,10932490,5732696.0,327223565.0,55070245.0,4284889.0,19538010.0,111616709.0
florida,8030592580,2222370951,1401906889,666407.0,,601227999,130252063,95981694.0,4699901623.0,1827213910.0,...,943321.0,,502706147,77618831,99453572.0,5249057514.0,1803179638.0,211792426.0,345545169.0,169016528.0


In [5]:
# Break Instructional Expenditures Subset down by Year, Subsets start at E11 and end at E2
inst_ex_df_2006 = inst_ex_df.iloc[:,0:13]
inst_ex_df_2007 = inst_ex_df.iloc[:,13:26]
inst_ex_df_2008 = inst_ex_df.iloc[:,26:39]
inst_ex_df_2009 = inst_ex_df.iloc[:,39:52]
inst_ex_df_2010 = inst_ex_df.iloc[:,52:65]
inst_ex_df_2010

Unnamed: 0_level_0,2010_E11,2010_E12,2010_E13,2010_E14,2010_E15,2010_E16,2010_E17,2010_E18,2010_E11A,2010_E11B,2010_E11C,2010_E11D,2010_E2
STNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
alabama,2527833534,998529795,104085015,1853069.0,395947.0,258190506,5451356,11889250.0,1792844992.0,188274018.0,110278708.0,24837500.0,16553537.0
alaska,641431359,384228399,59288785,108520.0,197429.0,59980844,13023961,10549173.0,,,,,
arizona,3359861432,859390181,206437720,10873343.0,28777468.0,187008767,128741211,135885169.0,2084843217.0,334131348.0,66336523.0,116676673.0,71375105.0
arkansas,1757604021,479523820,90422003,12001037.0,7316257.0,214753424,45831168,18506817.0,1141046676.0,182872151.0,86305441.0,153053949.0,25140497.0
california,23503106187,7756666360,1679025181,748356725.0,395138436.0,1365473021,26828100,3808507.0,17138076886.0,2809219250.0,130419485.0,778490284.0,316585759.0
colorado,3059053410,724107986,110502483,42116539.0,9699904.0,274122107,64290163,59574416.0,2151622195.0,356613211.0,87514203.0,108268215.0,54033013.0
connecticut,3544680769,1371504932,204105857,364609254.0,66784612.0,126809130,45669776,7226164.0,2283622951.0,389208722.0,42420860.0,130350148.0,
delaware,607857133,265699807,14029383,6474801.0,63743941.0,48807224,2670322,9691939.0,377124374.0,114761878.0,17015297.0,29011528.0,8112869.0
district of columbia,458470012,60639939,13029947,166575039.0,,18708260,10932490,5732696.0,327223565.0,55070245.0,4284889.0,19538010.0,111616709.0
florida,8699434850,2680150045,2129008654,943321.0,,502706147,77618831,99453572.0,5249057514.0,1803179638.0,211792426.0,345545169.0,169016528.0


In [6]:
# Save Datasets to CSVs
inst_ex_out_2006 = '../datasets/subsets/inst_ex/inst_ex_2006.csv'
inst_ex_out_2007 = '../datasets/subsets/inst_ex/inst_ex_2007.csv'
inst_ex_out_2008 = '../datasets/subsets/inst_ex/inst_ex_2008.csv'
inst_ex_out_2009 = '../datasets/subsets/inst_ex/inst_ex_2009.csv'
inst_ex_out_2010 = '../datasets/subsets/inst_ex/inst_ex_2010.csv'

inst_ex_df_2006.to_csv(inst_ex_out_2006)
inst_ex_df_2007.to_csv(inst_ex_out_2007)
inst_ex_df_2008.to_csv(inst_ex_out_2008)
inst_ex_df_2009.to_csv(inst_ex_out_2009)
inst_ex_df_2010.to_csv(inst_ex_out_2010)