In [None]:
import geopandas as gpd
from tqdm import tqdm
import libpysal as lp
import json

### Get the data in the folder ./LCPS_data for pre-processing. Let's call this `retrieved data`.

In [None]:
def retrieve_data(sy):
    """
    Retrieve data for a given school year (sy)
    """
    data_dir = "./LCPS_data"
    # Read the data files
    schools = gpd.read_file('{}/LCPS_Sites_{}.shp'.format(data_dir, sy))
    students = gpd.read_file('{}/Students_{}.shp'.format(data_dir, sy))
    spas = gpd.read_file('{}/PlanningZones_{}.shp'.format(data_dir, sy))
    
    return schools, students, spas

In [None]:
sy = '2017_2018'

In [None]:
schools, students, spas = retrieve_data(sy)

#### Print out a data instance of the SPAs, SCHOOLs and STUDENTs

In [None]:
for i, spa in spas.iterrows():
    print(spa)
    break

In [None]:
for ind, school in schools.iterrows():
    print(school)
    break

In [None]:
for index, student in students.iterrows():
    print(student)
    break

### Printout the CRS of the SPAs, SCHOOLs and STUDENTs

In [None]:
spas.crs    # Printout the CRS

In [None]:
spas.keys()

In [None]:
[x for x in list(spas.PLANNING_1) if x.startswith('DS07')]

In [None]:
students.crs

In [None]:
schools.crs

### Read the data (supplied with the algorithm) in the folder './data'. Let's call this `new data`.

In [None]:
spas_new = gpd.read_file('./data/SPAs.json')

In [None]:
[x for x in list(spas_new.SPA) if x.startswith('DS07')]

In [None]:
for i, s in spas_new.iterrows():
    print(s)
    break

#### We have to modify the SPAs in `retrieved data` to match the entries in `new data`

In [None]:
fields = ['ELEM_1', 'MID_1', 'HIGH_1', 'DISTRICT_3', 'MID_2_2019', 'HIGH__2019',
          'PLANNING_1', 'UTILITIE_2', 'STUDENTS_P', 'Field39', 'Field40']
spas.drop(fields, axis=1, inplace=True)

In [None]:
# Adding new fields and renaming some existing ones
spas = spas.rename(columns={"INT_CODE": "MID_CODE"})
spas = spas.rename(columns={"STDYAREA": "SPA"})
new_fields = ['ELEM_POP', 'MID_POP', 'HIGH_POP', 'TOT_POP']    # Population of ELEM, MID, HIGH and TOTAL

for f in new_fields:
    spas[f] = 0

##### Do point-in-polygon test to find out number of students attending public schools in LCPS and residing within Loudoun county, VA.

In [None]:
spas.columns

In [None]:
# spas['SPA'] = spas['STDYAREA_1']

In [None]:
set(students['GRADE'])    # Possible values of grade

In [None]:
count = 0
others = 0
for index, student in tqdm(students.iterrows()):
    student_location = student.geometry
    student_grade = student['GRADE']
    legit = True
    for ind, spa in spas.iterrows():
        
        if student_location.within(spa.geometry):
            count += 1
            if 0 < student_grade < 6 or student_grade == 13:    # ELEMENTARY School has grades KG-5
                spas.at[ind, 'ELEM_POP'] = spas['ELEM_POP'][ind] + 1
            elif 5 < student_grade < 9:    # MIDDLE School has grades 6-8
                spas.at[ind, 'MID_POP'] = spas['MID_POP'][ind] + 1
            elif 8 < student_grade < 13:    # HIGH School has grades 9-12
                spas.at[ind, 'HIGH_POP'] = spas['HIGH_POP'][ind] + 1
            else:
                legit = False
                others += 1
#                 print(others)
                
            if legit:
                spas.at[ind, 'TOT_POP'] = spas['TOT_POP'][ind] + 1
                
            break
            
total_students = len(students)


In [None]:
print('{}/{} students living inside LCPS\' boundary are considered for redistricting'.format(count - others,
                                                                                             total_students))
print('{}/{} students living inside LCPS\' boundary are ignored'.format(others, total_students))
print('{}/{} students living outside LCPS\' boundary are ignored'.format(total_students - count,
                                                                         total_students))

In [None]:
count = 0
others = 0
for index, school in tqdm(schools.iterrows()):
    school_location = school.geometry
    legit = True
    for ind, spa in spas.iterrows():
        
        if school_location.within(spa.geometry):
            count += 1
            if spa['STDYAREA_1']:
                print(spa['STDYAREA_1'])
                schools.at[index, 'SPA'] = str(spa['STDYAREA_1'])
            else:
                legit = False
                others += 1
                
            if legit:
                pass
                
            break
#         assert 0, 'not contained'
            


In [None]:
len(spas)

In [None]:
schools['SCHOOL_TYP'] = ''

schools.loc[schools['CLASS'] == 'ELEMENTARY','SCHOOL_TYP'] = 'ES'

schools.loc[schools['CLASS'] == 'MIDDLE','SCHOOL_TYP'] = 'MS'

schools.loc[schools['CLASS'] == 'HIGH','SCHOOL_TYP'] = 'HS'

In [None]:
spas.keys()

### Transform the CRS of the `retrieved data` to match `new data`. We need to project the shapefiles into a new coordinate system to ensure that the data you are working with uses common geometric projection. For more details refer to the [link](https://geopandas.org/projections.html).

In [None]:
new_crs = spas_new.crs

In [None]:
new_crs

In [None]:
spas = spas.to_crs(new_crs)

In [None]:
schools['SCHOOL_TYP'].fillna('')

In [None]:
schools = schools.to_crs(new_crs)

In [None]:
# Check if the projections have been reprojected 
print(spas.crs, schools.crs)

In [None]:
spas.keys()

In [None]:
adjacency_matrix = dict(lp.weights.Rook.from_dataframe(spas, idVariable="STDYAREA_1"))
for key in adjacency_matrix:
    adjacency_matrix[key] = list(adjacency_matrix[key].keys())
with open("data/nbrlist_SPA.json", "w") as fp:
    json.dump(adjacency_matrix, fp)

### Write out the updated data as geojson files

In [None]:
def write_data(schools, spas, sy):
    """
    Retrieve data for a given school year (sy)
    """
    data_dir = "data"
    # Read the data files
    schools.to_file('{}/Schools_{}.json'.format(data_dir, sy), driver='GeoJSON')
    spas.to_file('{}/SPAs_{}.json'.format(data_dir, sy), driver='GeoJSON')


In [None]:
write_data(schools, spas, sy)