In [1]:
import pandas as pd
import numpy as np

In [2]:
abbr = 'ME'

In [3]:
file = 'maine2018.xlsx'

Read in federal level data

In [4]:
fiscal = pd.read_sas('../../data/fiscal2018', format = 'sas7bdat', encoding='iso-8859-1')

Generate list of districts in the state in the federal data

In [5]:
state_fiscal = fiscal[(fiscal['STABBR'] == abbr) & (fiscal['GSHI'] == '12')]

In [6]:
len(state_fiscal)

249

In [7]:
state_fiscal.head()

Unnamed: 0,LEAID,CENSUSID,FIPST,CONUM,CSA,CBSA,NAME,STNAME,STABBR,SCHLEV,...,FL_66V,FL_W01,FL_W31,FL_W61,FL_V95,FL_V02,FL_K14,FL_CE1,FL_CE2,FL_CE3
6051,2300004,20300200730100,23,23003,N,N,Bridgewater Public Schools,Maine,ME,6,...,M,M,R,R,R,R,R,R,R,R
6052,2300005,20300202430100,23,23003,N,N,Grand Isle Public Schools,Maine,ME,6,...,M,M,R,R,R,R,R,R,R,R
6053,2300008,20300600930100,23,23011,N,12300,Fayette Public Schools,Maine,ME,1,...,M,M,R,R,R,R,R,I,I,R
6054,2300009,20300320130100,23,23005,438,38860,Long Island Public Schools,Maine,ME,1,...,M,M,R,R,R,R,R,R,R,R
6055,2300049,20000000010100,23,23003,N,N,ME Sch of Science & Mathematics,Maine,ME,2,...,M,M,R,R,R,R,R,I,I,R


Read in state level data

In [17]:
state_grads = pd.read_excel('../../data/state_data_raw/' + file, sheet_name='details')

In [18]:
state_grads.head()

Unnamed: 0,Year Code,District ID,District Name,School ID,School Name,Graduation,Disaggregated,Population,Adjusted Cohort Count,Graduate Count,Student Rate,District Rate,Statewide Rate
0,2018,1069.0,Arthur R. Gould Sch--LCYDC,,All Schools,GRADRT4YRADJ,All Students,All Students,27,9,33.33,,86.74
1,2018,1069.0,Arthur R. Gould Sch--LCYDC,,All Schools,GRADRT4YRADJ,Children in Foster Care,No,24,9,37.50,,86.86
2,2018,1069.0,Arthur R. Gould Sch--LCYDC,,All Schools,GRADRT4YRADJ,Children in Foster Care,Yes,*,*,*,*,*
3,2018,1069.0,Arthur R. Gould Sch--LCYDC,,All Schools,GRADRT4YRADJ,Children with Disabilities,No,*,*,*,*,*
4,2018,1069.0,Arthur R. Gould Sch--LCYDC,,All Schools,GRADRT4YRADJ,Children with Disabilities,Yes,19,6,31.58,,73.55


Filter results.

In [23]:
state_grads = state_grads[(state_grads['School Name'] == 'All Schools') &
                         (state_grads['Graduation'] == 'GRADRT4YRADJ') &
                         (state_grads['Disaggregated'] == 'All Students')]

In [24]:
state_grads

Unnamed: 0,Year Code,District ID,District Name,School ID,School Name,Graduation,Disaggregated,Population,Adjusted Cohort Count,Graduate Count,Student Rate,District Rate,Statewide Rate
0,2018,1069.0,Arthur R. Gould Sch--LCYDC,,All Schools,GRADRT4YRADJ,All Students,All Students,27,9,33.33,,86.74
96,2018,14.0,Auburn Public Schools,,All Schools,GRADRT4YRADJ,All Students,All Students,244,190,77.87,,86.74
192,2018,28.0,Augusta Public Schools,,All Schools,GRADRT4YRADJ,All Students,All Students,185,147,79.46,,86.74
288,2018,38.0,Baileyville Public Schools,,All Schools,GRADRT4YRADJ,All Students,All Students,25,24,96,,86.74
384,2018,42.0,Bangor Public Schools,,All Schools,GRADRT4YRADJ,All Students,All Students,280,237,84.64,,86.74
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11808,2018,518.0,Winslow Schools,,All Schools,GRADRT4YRADJ,All Students,All Students,116,103,88.79,,86.74
11904,2018,524.0,Winthrop Public Schools,,All Schools,GRADRT4YRADJ,All Students,All Students,53,46,86.79,,86.74
12000,2018,1671.0,Wiscasset Public Schools,,All Schools,GRADRT4YRADJ,All Students,All Students,45,37,82.22,,86.74
12096,2018,537.0,Yarmouth Schools,,All Schools,GRADRT4YRADJ,All Students,All Students,129,128,99.22,,86.74


Select and rename columns.

In [25]:
state_grads = state_grads[['District Name', 'Adjusted Cohort Count', 'Student Rate']]

In [26]:
state_grads.columns = ['District Name', 'Total', 'Graduation Rate']

In [27]:
state_grads

Unnamed: 0,District Name,Total,Graduation Rate
0,Arthur R. Gould Sch--LCYDC,27,33.33
96,Auburn Public Schools,244,77.87
192,Augusta Public Schools,185,79.46
288,Baileyville Public Schools,25,96
384,Bangor Public Schools,280,84.64
...,...,...,...
11808,Winslow Schools,116,88.79
11904,Winthrop Public Schools,53,86.79
12000,Wiscasset Public Schools,45,82.22
12096,Yarmouth Schools,129,99.22


In [28]:
state_grads.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 120 entries, 0 to 12192
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   District Name    120 non-null    object
 1   Total            120 non-null    object
 2   Graduation Rate  119 non-null    object
dtypes: object(3)
memory usage: 3.8+ KB


Convert data types.

In [33]:
state_grads = state_grads.replace('*', '')

In [34]:
state_grads['Total'] = pd.to_numeric(state_grads['Total'])
state_grads['Graduation Rate'] = pd.to_numeric(state_grads['Graduation Rate']) / 100

Check for matches and non-matches in the two lists

In [39]:
Matches = [name for name in list(state_grads['District Name']) if name in list(state_fiscal['NAME'])]
Matches.sort()
Matches

['Arthur R. Gould Sch--LCYDC',
 'Auburn Public Schools',
 'Augusta Public Schools',
 'Baileyville Public Schools',
 'Bangor Public Schools',
 'Baxter Academy for Technology and Science',
 'Biddeford Public Schools',
 'Boothbay-Boothbay Hbr CSD',
 'Brewer Public Schools',
 'Brunswick Public Schools',
 'Calais Public Schools',
 'Cape Elizabeth Public Schools',
 'Cornville Regional Charter School',
 'Deer Isle-Stonington CSD',
 'East Millinocket Public Schools',
 'Easton Public Schools',
 'Eastport Public Schools',
 'Ellsworth Public Schools',
 'Falmouth Public Schools',
 'Five Town CSD',
 'Gorham Public Schools',
 'Greenville Public Schools',
 'Harpswell Coastal Academy',
 'Hermon Public Schools',
 'Islesboro Public Schools',
 'Kittery Public Schools',
 'Lewiston Public Schools',
 'Lisbon Public Schools',
 'ME Sch of Science & Mathematics',
 'MSAD 27',
 'MSAD 46',
 'Machias Public Schools',
 'Madawaska Public Schools',
 'Maine Academy of Natural Sciences',
 'Maine Connections Academy',
 

In [37]:
A = [name for name in list(state_grads['District Name']) if name not in list(state_fiscal['NAME'])]
A.sort()
A

['Statewide']

In [38]:
B = [name for name in list(state_fiscal['NAME']) if name not in list(state_grads['District Name'])]
B.sort()
B

['Acton Public Schools',
 'Airline CSD',
 'Alexander Public Schools',
 'Andover Public Schools',
 'Athens Public Schools',
 'Baring Plt Public Schools',
 'Beaver Cove Public Schools',
 'Beddington Public Schools',
 'Blue Hill Public Schools',
 'Bowerbank Public Schools',
 'Bremen Public Schools',
 'Bridgewater Public Schools',
 'Brighton Plt School Department',
 'Bristol Public Schools',
 'Brooklin Public Schools',
 'Brooksville Public Schools',
 'Burlington Public Schools',
 'Byron Public Schools',
 'Caratunk Public Schools',
 'Carrabassett Valley Public Schools',
 'Carroll Plt Public Schools',
 'Cary Plantation',
 'Castine Public Schools',
 'Caswell Public Schools',
 'Charlotte Public Schools',
 'Chebeague Island Public Schools',
 'Cherryfield Public Schools',
 'Cooper Public Schools',
 'Coplin Plt Public Schools',
 'Cranberry Isles Public Schools',
 'Crawford Public Schools',
 'Cutler Public Schools',
 'Damariscotta Public Schools',
 'Dayton Public Schools',
 'Deblois Public Schools

Rename the samples I can find matches for.

In [29]:
state_grads_rename = {
    #'Algiers Technology Academy',
    #'Community School for Apprenticeship Learning Inc.',
    'Delta Charter Group' : 'Delta Charter School MST',
    'Dr Martin Luther King Charter School for Sci/Tech' : 'Dr. Martin Luther King Charter School for Sci Tech',
    #'Howard School',
    'JCFA Lafayette' : 'JCFA',
    'KIPP Renaissance High School' : 'KIPP Renaissance',
    #'LA Schools for the Deaf and Visually Impaired',
    #'Lake Area New Tech Early College High School',
    #'Louisiana Special Education Center',
    #'The NET2 High School',
    'University View Academy Inc. (FRM LA Connections)' :  'University View Academy, Inc. (FRM LA Connections)',
    #'Voices for International Business & Education'
}

In [30]:
state_fiscal = state_fiscal.replace(state_grads_rename)

Merge federal and state data, keeping only matches between the two. 

In [31]:
state_grads_merged = pd.merge(state_fiscal, state_grads, how='inner', left_on='NAME', right_on='District Name')

Save cleaned data. 

In [33]:
state_grads_merged.to_csv('../../data/state_data_merged/' + abbr + '.csv', index=False)