In [1]:
# Import Dependencies
import pandas as pd
import matplotlib.plot

In [2]:
# Load in Master Funding Data
file_path = "../datasets/master_funding.csv"

funding_master = pd.read_csv(file_path, encoding = 'utf-8')
funding_master = funding_master.rename(columns = {'Unnamed: 0': 'STNAME'}).set_index(['STNAME'])
funding_master

Unnamed: 0_level_0,STABR,2006_R1A,2006_R1B,2006_R1C,2006_R1D,2006_R1E,2006_R1F,2006_R1G,2006_R1H,2006_R1I,...,2010_A14B,2010_PPE15,2010_MEMBR09,2010_ARRASTE1,2010_ARRATE5,2010_ARRAE81Z,2010_ARRATE10,2010_ARRASTE6,2010_ARRATLEIZ,2010_ARRASTE4
STNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
alabama,AL,-2,-2,837722644,507421446,3563199,902980,0,86597,80153773,...,698208,8651,748889,369660033,381295043,138409,6187052,2689876,99711346,0
alaska,AK,-2,-2,169308873,180834960,0,0,0,0,11988478,...,-2,16668,131661,23470386,49963570,0,3835787,1228974,13751946,0
arizona,AZ,2610582721,-2,-2,40960642,580878,21656088,129949,403161,59366011,...,-2,8255,1077831,539065103,550231077,548353,29137598,232566,253804938,-2
arkansas,AR,1094994437,1245972,1520969,792048,4678927,2982173,60826,135367,45949378,...,-2,9481,480559,76412045,134544527,427520,61383167,76377545,37711346,-2
california,CA,13875619650,450091925,168400,-2,199551,216962720,23243683,36124922,1056239256,...,-2,9229,6263438,2332892784,3209610616,69829727,20955494,8622373,452377212,-1
colorado,CO,2828139818,71445787,-2,-2,47598986,5266907,5184240,111686,106929973,...,-2,9024,832368,69641156,111859035,6107904,10567842,72984,47800312,0
connecticut,CT,-2,-2,4771725433,-2,3053979,160635678,0,9311737,1146385,...,548787,15619,563968,368176090,368176090,1085172,10041669,-1,39247400,1753315
delaware,DE,355448768,-2,-2,-2,0,47385484,0,0,9687604,...,119879,12415,126801,47318827,60815739,53106,1762749,0,11821846,0
district of columbia,DC,-2,-2,228167996,815773094,557725,0,233388,0,45624,...,-2,20460,69433,94922392,100828760,796006,1268635,0,22900442,0
florida,FL,9914711981,-2,-2,-2,338296,0,10111304,4953368,403759340,...,-2,8597,2634522,850169705,1440267844,17253688,36363687,95990170,177551767,11026737


In [3]:
# Read in desired variables from Meta Data, always use 2006 meta to avoid using new variables not included in the beginning yrs
meta_path = "../resources/funding_data/2006_meta.txt"
meta = pd.read_csv(meta_path, sep=r'\s{2,}', engine='python', header=None, names=['Variable','Data Type', 'Data Element Position', 'Description'])
meta = meta.drop(meta.index[0:2])
meta.iloc[41:97,:]

Unnamed: 0,Variable,Data Type,Data Element Position,Description
43,E212,N,43,SUPPORT EXPENDITURES SALARIES STUDENT SUPPORT ...
44,E213,N,44,SUPPORT EXPENDITURES SALARIES INSTRUCTIONAL ST...
45,E214,N,45,SUPPORT EXPENDITURES SALARIES GENERAL ADMINIST...
46,E215,N,46,SUPPORT EXPENDITURES SALARIES SCHOOL ADMINISTR...
47,E216,N,47,SUPPORT EXPENDITURES SALARIES OPERATION & MAIN...
48,E217,N,48,SUPPORT EXPENDITURES SALARIES PUPIL TRANSPORTA...
49,E218,N,49,SUPPORT EXPENDITURES SALARIES OTHER SERVICES
50,TE21,N,50,SUPPORT EXPENDITURES SALARIES SUBTOTAL
51,E222,N,51,SUPPORT EXPENDITURES EMPLOYEE BENEFITS STUDENT...
52,E223,N,52,SUPPORT EXPENDITURES EMPLOYEE BENEFITS INSTRUC...


In [4]:
# Create Support Expenditure Subset

# Years will always be the same for all subsets
years = ['2006', '2007', '2008', '2009', '2010']

# Create starter dataframes with desired index, should mostly be statename(STNAME)
supp_ex_df = pd.DataFrame(funding_master.index)

# loop through master set with desired variable codes from the meta set shown above
# forge the variable names with year and code
for year in years:
    for variable in meta['Variable'][41:97]:
        supp_ex_df[year+'_'+variable] = ''
        # Fill out dataset, fill none in for all 0,-1,and-2(those are N/As), also remove and subtotal and total vars
        # We will be creating our own totals and custom subtotals to avoid any double counting
        for i in range(len(supp_ex_df)):
            if funding_master[year+'_'+variable][i] <= 0:
                supp_ex_df[year+'_'+variable][i] = None
            else: 
                supp_ex_df[year+'_'+variable][i] = funding_master[year+'_'+variable][i]
    supp_ex_df = supp_ex_df.drop(supp_ex_df[[str(year)+'_TE21', str(year)+'_TE22', str(year)+'_TE23',
                                            str(year)+'_TE24', str(year)+'_TE25', str(year)+'_TE26',
                                            str(year)+'_STE22', str(year)+'_STE23', str(year)+'_STE24',
                                            str(year)+'_STE25', str(year)+'_STE26', str(year)+'_STE27',
                                            str(year)+'_STE28', str(year)+'_STE2T']], axis = 1)

supp_ex_df = supp_ex_df.set_index(['STNAME'])
supp_ex_df

Unnamed: 0_level_0,2006_E212,2006_E213,2006_E214,2006_E215,2006_E216,2006_E217,2006_E218,2006_E222,2006_E223,2006_E224,...,2010_E256,2010_E257,2010_E258,2010_E262,2010_E263,2010_E264,2010_E265,2010_E266,2010_E267,2010_E268
STNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
alabama,170933354,159719812,64853947,241821448,149774596,121849909,52587518,59536612,52488754,18518498,...,2591054,28081293.0,1005098,2623851.0,2420693.0,44556645.0,2321614.0,102694.0,32899.0,12467481
alaska,59312386,42691284,10380525,60529665,63211936,5442576,28589081,23935300,16381460,4355269,...,3112254,881388.0,2552213,699036.0,937807.0,1594375.0,202034.0,97588.0,19146.0,1192126
arizona,279269396,117199517,52085805,260069781,255072799,134602626,132643115,67946540,28399745,13168238,...,29557098,37906410.0,15331541,166015761.0,617284.0,14084639.0,3796531.0,20684173.0,4740376.0,116972608
arkansas,118655299,141483083,60719930,157962962,117603294,70189530,49761951,29422498,33317607,17068318,...,14933203,26643636.0,8632978,389568.0,4183023.0,8998587.0,534305.0,2224300.0,1094394.0,8623816
california,1684068985,2049972094,240074615,2518366818,2048972809,512680186,1203752431,531731963,650044906,94581339,...,39876486,54394672.0,37914004,301854.0,2461823.0,10959161.0,1752262.0,211580.0,105304.0,6609466
colorado,204937662,208914194,42364521,309060835,237435459,112835479,144713931,46408849,46576312,9125679,...,17433800,17608108.0,35004708,4050893.0,2997323.0,4572864.0,3276488.0,626892.0,157431.0,35316609
connecticut,306128211,137283895,73252820,293988633,255675257,22304000,84499822,107040376,47558960,30626452,...,25090172,1824001.0,6179027,1405946.0,2232680.0,4235978.0,1745798.0,2476017.0,665253.0,5706487
delaware,42165566,11228135,7991579,51300458,45754353,16542988,37684197,18112579,4755518,3390456,...,1350757,780225.0,52751,,,,,,,4360901
district of columbia,48637112,35207502,12294616,48015949,47201152,48228656,16434736,6125723,5439012,1881654,...,6589045,291792.0,3653975,1177247.0,3238653.0,4790979.0,2719073.0,9453403.0,9096991.0,1343261
florida,711563699,900875784,84874811,900819137,772008504,463954066,305255765,197161216,237326277,25212041,...,13664588,5138183.0,3288934,2112243.0,23728794.0,96640103.0,2914120.0,40081507.0,16935019.0,9493009


In [5]:
# Break Support Expenditures Subset down by Year, Subsets start at E212 and end at E268
supp_ex_df_2006 = supp_ex_df.iloc[:,0:42]
supp_ex_df_2007 = supp_ex_df.iloc[:,42:84]
supp_ex_df_2008 = supp_ex_df.iloc[:,84:126]
supp_ex_df_2009 = supp_ex_df.iloc[:,126:168]
supp_ex_df_2010 = supp_ex_df.iloc[:,168:210]
supp_ex_df_2010

Unnamed: 0_level_0,2010_E212,2010_E213,2010_E214,2010_E215,2010_E216,2010_E217,2010_E218,2010_E222,2010_E223,2010_E224,...,2010_E256,2010_E257,2010_E258,2010_E262,2010_E263,2010_E264,2010_E265,2010_E266,2010_E267,2010_E268
STNAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
alabama,220488723,184476547,71493964,280776328,165131520,149547462,62316315,85123368,68865117,23191359,...,2591054,28081293.0,1005098,2623851.0,2420693.0,44556645.0,2321614.0,102694.0,32899.0,12467481
alaska,95734277,52637955,12806684,75052699,73973820,6539742,35131039,57222518,30263959,7999478,...,3112254,881388.0,2552213,699036.0,937807.0,1594375.0,202034.0,97588.0,19146.0,1192126
arizona,668304116,143684549,55706616,291270090,285247340,160075206,145323561,90869569,40678470,15432502,...,29557098,37906410.0,15331541,166015761.0,617284.0,14084639.0,3796531.0,20684173.0,4740376.0,116972608
arkansas,148990616,202960147,60390984,171815993,136138317,80375581,58876822,39778419,51935012,19632452,...,14933203,26643636.0,8632978,389568.0,4183023.0,8998587.0,534305.0,2224300.0,1094394.0,8623816
california,2027949626,2063980966,272519858,2707883115,2251783898,541300725,1355186206,664384854,683154304,108123418,...,39876486,54394672.0,37914004,301854.0,2461823.0,10959161.0,1752262.0,211580.0,105304.0,6609466
colorado,260010247,270814705,53039842,375110918,274723582,135569776,185876478,61025965,61123963,12094263,...,17433800,17608108.0,35004708,4050893.0,2997323.0,4572864.0,3276488.0,626892.0,157431.0,35316609
connecticut,355547687,149722949,89891269,340049560,291176470,26569799,98928319,139441128,56642141,38943095,...,25090172,1824001.0,6179027,1405946.0,2232680.0,4235978.0,1745798.0,2476017.0,665253.0,5706487
delaware,47162177,10671441,9582691,56766545,52523940,16049399,33558605,20624203,4637840,4060189,...,1350757,780225.0,52751,,,,,,,4360901
district of columbia,55398742,57226609,31672538,76356603,47340896,62866053,31776004,7679387,7694583,3892164,...,6589045,291792.0,3653975,1177247.0,3238653.0,4790979.0,2719073.0,9453403.0,9096991.0,1343261
florida,754322297,1006255042,83370889,984555309,817316097,468604552,328479400,234224181,289730611,34009396,...,13664588,5138183.0,3288934,2112243.0,23728794.0,96640103.0,2914120.0,40081507.0,16935019.0,9493009


In [6]:
# Save Datasets to CSVs
supp_ex_out_2006 = '../datasets/subsets/supp_ex/supp_ex_2006.csv'
supp_ex_out_2007 = '../datasets/subsets/supp_ex/supp_ex_2007.csv'
supp_ex_out_2008 = '../datasets/subsets/supp_ex/supp_ex_2008.csv'
supp_ex_out_2009 = '../datasets/subsets/supp_ex/supp_ex_2009.csv'
supp_ex_out_2010 = '../datasets/subsets/supp_ex/supp_ex_2010.csv'

supp_ex_df_2006.to_csv(supp_ex_out_2006)
supp_ex_df_2007.to_csv(supp_ex_out_2007)
supp_ex_df_2008.to_csv(supp_ex_out_2008)
supp_ex_df_2009.to_csv(supp_ex_out_2009)
supp_ex_df_2010.to_csv(supp_ex_out_2010)