In [1]:
#Restart kernel if rerunning this cell
import os 
wd = os.path.dirname(os.getcwd()).replace('\\', '/')
os.chdir(wd)

import pandas as pd
import geopandas as gpd
import scripts.preprocess_helper as ph
import scripts.cleanup_helper as ch
from scripts.cleanup_helper import LowIncomeCleanup
import scripts.overlay_map_helper as omh
import omegaconf
import warnings
import tobler
from tobler.area_weighted import area_interpolate

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

In [59]:
yaml_path = os.path.join(wd, 'conf', 'config.yaml')
cfg = omegaconf.OmegaConf.load(yaml_path)
paths = cfg.paths
consts = cfg.constants

In [5]:
%%time
#Load the data
data = {}
datasets = {'coops': paths.utilities.rural_coops_path,'muns': paths.utilities.municipal_utils_path,
            'j40': paths.j40.j40_clean_path,'coal': paths.energy.coal_clean_path,
            'ffe': paths.energy.ffe_clean_path, 'lic': paths.low_income.low_inc_clean_path,
            'dci': paths.dci.dci_clean_path, 'tract_pop': paths.population.tract_pop_shp,
            'msa_pop': paths.population.msa_pop_shp, 'cty_pop': paths.population.cty_pop_shp,
            'cty': paths.boundaries.ct_clean_path,'st': paths.boundaries.st_clean_path,}

for name, file_path in datasets.items():
    file_type = 'shp' if file_path.endswith('.shp') else 'csv'
    data[name] = ph.load_data(file_path, file_type)

CPU times: total: 1min 50s
Wall time: 5min 2s


In [29]:
%%time
community_list = ['j40', 'coal', 'ffe', 'lic', 'dci']
community_data_list = [data['j40'], data['coal'], data['ffe'], data['lic'], data['dci']]
for community, community_data in zip(community_list, community_data_list):
    # Process overlays for coops
    coop_overlay_key = community + '_coop'
    overlays[coop_overlay_key] = omh.overlays(community=community, coops_utils=data['coops'], community_data=community_data, consts=consts.overlays)
    coop_overlay_path = paths.overlays.coops[community + '_overlay_path']
    os.makedirs(os.path.dirname(coop_overlay_path), exist_ok=True)
    overlays[coop_overlay_key].to_file(coop_overlay_path)

    # Process overlays for municipalities
    muni_overlay_key = community + '_muni'
    overlays[muni_overlay_key] = omh.overlays(community=community, coops_utils=data['muns'], community_data=community_data, consts=consts.overlays)
    muni_overlay_path = paths.overlays.munis[community + '_overlay_path']
    os.makedirs(os.path.dirname(muni_overlay_path), exist_ok=True)
    overlays[muni_overlay_key].to_file(muni_overlay_path)


CPU times: total: 7min 38s
Wall time: 9min 20s


In [49]:
# Function to perform population interpolation for a community overlay
def interpolate_population(source_df, target_df):
    results = area_interpolate(source_df=source_df, target_df=target_df, extensive_variables=['total_pop'])
    overlay_df_with_pop = target_df.merge(results[['total_pop', 'geometry']], on='geometry', how='left')
    return overlay_df_with_pop

In [50]:
%%time
tract_interpolated_results = {}
county_interpolated_results = {}

for overlay_key in overlays.keys():
    if overlay_key.startswith(('j40', 'coal', 'lic', 'dci')):
        tract_interpolated_results[overlay_key] = interpolate_population(data['tract_pop'], overlays[overlay_key])
    elif overlay_key.startswith('ffe'):
        county_interpolated_results[overlay_key] = interpolate_population(data['cty_pop'], overlays[overlay_key])

CPU times: total: 14min 25s
Wall time: 30min 46s


In [64]:
def save_interpolated_results(interpolated_results, paths):
    for overlay_key, df in interpolated_results.items():
        output_path = paths.population[overlay_key + '_pop']
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        ph.save_data(df, output_path, type='shp')

In [65]:
%%time
save_interpolated_results(tract_interpolated_results, paths)
save_interpolated_results(county_interpolated_results, paths)

CPU times: total: 7min 18s
Wall time: 15min 18s
