# Step 5 - Gravity Modeling

The gravity modeling process can be broken into two primary sections -- one in which the OD matrices are manipulated into shapes compatiable with the gravity modeling tools, and one in which the actual gravity models are developed and exported for origins and destinations. This notebook covers both.

In [109]:
import os, sys
import time
import pandas as pd
import geopandas as gpd
import numpy as np

import networkx as nx
from scipy import spatial
from functools import partial
import pyproj

import shapely
from shapely.ops import transform
from shapely import wkt
from shapely.geometry import Point, MultiPoint
from shapely.wkt import loads

import GOSTnets as gn
import geopy

### Define stuff

Fixed parameters

In [210]:
simplif_meters = 25

In [211]:
source_epsg = 4326
target_epsg = 3106

Variable parameters

In [340]:
scenario = 'Current'
# scenario = 'Padma'

In [341]:
# WorldPop data determinants

constraint_status = 'constrained'
# constraint_status = 'unconstrained'

wp_res = 100
# wp_res = 250
# wp_res = '1k'

In [316]:
# Production date for outputs being used

prod_date = '210329'

In [342]:
origins = 'PopOrigins'
# origins = 'CityOrigins'
# origins = 'All_SEZ_Origins'
# origins = 'Active_SEZ_Origins'

Path names and file names

In [343]:
input_pth = r'inputs\\dests'
interm_pth = r'intermediate'
fin_pth = r'final'
res_pth = r'results'

tab_pth = r'../../../Tabular'
geo_pth = r'../../../GEO'
origin_folder = r'..\..\..\GEO\Population'

Settings and functions

In [344]:
# pd.set_option('max_columns',None)

In [345]:
# the gravity model throws error messages that reduce readibility, let's get rid of them

import warnings

def fxn():
    warnings.warn("deprecated", DeprecationWarning)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fxn()

In [346]:
# function for sorting alphanumerically

import re

def sorted_nicely( l ): 
    """ Sort the given iterable in the way that humans expect.""" 
    convert = lambda text: int(text) if text.isdigit() else text 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

# funciton for sorting matrices smallest to largest, by origin ID then destination ID

def sort_od_matrix(od_matrix):
    
    # sort by O_IDs, then dest node IDs
    od_matrix = od_matrix.sort_values('Unnamed: 0').reindex(sorted_nicely(od_matrix.columns), axis=1)

#     # reset O_ID column to the front
#     od_matrix = od_matrix[ ['Unnamed: 0'] + [ col for col in od_matrix.columns if col != 'Unnamed: 0' ] ]

    # set the Dest_ID column back to index so the shape is the same as the dWeight shape
    od_matrix.set_index('Unnamed: 0',inplace=True)

In [347]:
# spatial join admin file to origin file based on within location

def georef_origins(orig_fil,adm_fil):
    orig_fil['geometry'] = orig_fil['geometry'].apply(wkt.loads)
    orig_fil = gpd.GeoDataFrame(orig_fil,geometry='geometry')
    orig_fil = gpd.sjoin(orig_fil,adm_fil,how='left',op='within')

In [348]:
# Optimization functions to slim the file sizes of our many large input files and thus speed up processing / export

from typing import List

def optimize_floats(df: pd.DataFrame) -> pd.DataFrame:
    floats = df.select_dtypes(include=['float64']).columns.tolist()
    df[floats] = df[floats].apply(pd.to_numeric, downcast='float')
    return df

def optimize_ints(df: pd.DataFrame) -> pd.DataFrame:
    ints = df.select_dtypes(include=['int64']).columns.tolist()
    df[ints] = df[ints].apply(pd.to_numeric, downcast='integer')
    return df

def optimize(df: pd.DataFrame, datetime_features: List[str] = []):
    return optimize_floats(optimize_ints(df))

## Data Preparation

Create a dictionary of destination DFs

In [223]:
# Destination DFs

City_df = pd.read_csv(os.path.join(fin_pth,prod_date,\
                        f'All_cities_{constraint_status}_{wp_res}m_res_{simplif_meters}m_simplification_snapped.csv'))
Dhaka_Chitt_df = pd.read_csv(os.path.join(fin_pth,prod_date,\
                        f'Dhaka_Chitt_{constraint_status}_{wp_res}m_res_{simplif_meters}m_simplification_snapped.csv'))
Minor_cities_df = pd.read_csv(os.path.join(fin_pth,prod_date,\
                        f'Minor_cities_{constraint_status}_{wp_res}m_res_{simplif_meters}m_simplification_snapped.csv'))
Dry_ports_df = pd.read_csv(os.path.join(fin_pth,prod_date,\
                        f'Dry_ports_{constraint_status}_{wp_res}m_res_{simplif_meters}m_simplification_snapped.csv'))
River_ports_df = pd.read_csv(os.path.join(fin_pth,prod_date,\
                        f'River_ports_{constraint_status}_{wp_res}m_res_{simplif_meters}m_simplification_snapped.csv'))
Deep_sea_ports_df = pd.read_csv(os.path.join(fin_pth,prod_date,\
                        f'Deep_sea_ports_{constraint_status}_{wp_res}m_res_{simplif_meters}m_simplification_snapped.csv'))
All_SEZs_df = pd.read_csv(os.path.join(fin_pth,prod_date,\
                        f'All_SEZs_{constraint_status}_{wp_res}m_res_{simplif_meters}m_simplification_snapped.csv'))
Active_SEZs_df = pd.read_csv(os.path.join(fin_pth,prod_date,\
                        f'Active_SEZs_{constraint_status}_{wp_res}m_res_{simplif_meters}m_simplification_snapped.csv'))


In [228]:
# Dest dictionary

dests = {"All_cities" : City_df, "Dhaka_Chitt" : Dhaka_Chitt_df, "Minor_cities" : Minor_cities_df, \
         "Dry_ports" : Dry_ports_df, "River_ports" : River_ports_df, "Deep_sea_ports" : Deep_sea_ports_df, \
         "All_SEZs" : All_SEZs_df, "Active_SEZs" : Active_SEZs_df}

Admin data

In [229]:
# load admin spatial data

adm2 = gpd.read_file(os.path.join(geo_pth,'Boundaries/bd_adm_bbs.gpkg'),layer='bd_adm2_wgs84')
adm2.crs = 'epsg:4326'

adm3 = gpd.read_file(os.path.join(geo_pth,'Boundaries/bd_adm_bbs.gpkg'),layer='bd_adm3_wgs84')
adm3.crs = 'epsg:4326'

adm4 = gpd.read_file(os.path.join(geo_pth,'Boundaries/bd_adm_bbs.gpkg'),layer='bd_adm4_wgs84')
adm4.crs = 'epsg:4326'

adm5 = gpd.read_file(os.path.join(geo_pth,'Boundaries/bd_adm_bbs.gpkg'),layer='bd_adm5_wgs84')
adm5.crs = 'epsg:4326'

In [230]:
# Fix types for later joining
adm3['adm3_pcode'] = adm3['adm3_pcode'].astype(str)
adm4['adm4_pcode'] = adm4['adm4_pcode'].astype(str)

# Fix names for later display
adm3['adm3_en'] = adm3['adm3_en'].str.title()

#### Data prep

set an index on the NN, reindex by an alphanumerically sorted list of IDs, then reset_index to return the NNs to the data frame

In [231]:
for dest, df in dests.items():
    df[['Unweighted']] = 1
    df = df.sort_values('NN').set_index('NN').reset_index() # sort for correct joining, set/reset index to move NN column to the front for easy checking
    dests.update({dest:df})

Check results, especially for IDs for use in gravity modeling

In [232]:
# check first key of dests dict
dests[list(dests.keys())[0]].head()

Unnamed: 0.1,NN,Unnamed: 0,Number,City,Area_sqkm,Households,Pop_all_2011,Pop_male_2011,Pop_female_2011,Pop_all_2021,...,Pop_female_2021,Literacy_7yr_plus,Ranking_by_pop_2011,Type,Longitude,Latitude,2021_growth_rate,geometry,NN_dist,Unweighted
0,6253,4,5,Brahmanbaria,22.49,38329,193814,96468,97346,229393.9,...,115216.5,64.4,18,Municipal Corporation,91.111674,23.966521,0.183578,POINT (91.11167399999999 23.966521),51.070479,1
1,6613,25,25,Mymenshingh,70.98,82687,389918,200053,189865,454965.5,...,221538.9,66.4,8,City Corporation,90.3972,24.76127,0.166823,POINT (90.3972 24.76127),55.394221,1
2,21729,17,17,Jamalpur,55.25,35619,150172,75068,75104,168327.7,...,84184.03,49.3,30,Municipal Corporation,89.94455,24.92712,0.120899,POINT (89.94455000000001 24.92712),62.479275,1
3,38279,12,12,Dhaka,316.0,2034146,8906039,4931802,3974237,9733517.0,...,4343491.0,74.6,1,City Corporation,90.409879,23.728216,0.092912,POINT (90.409879 23.728216),27.815607,1
4,53188,15,15,Feni,22.0,31468,156971,82554,74417,180060.2,...,85363.17,69.7,27,Municipal Corporation,91.39552,23.00631,0.147092,POINT (91.39551999999999 23.00631),107.214995,1


#### OD matrix data prep

##### Prepare OD files

In [349]:
# origin to destination
orig_raw = gpd.read_file(os.path.join(res_pth,prod_date,'spatial',f'{scenario}_origins_{wp_res}m_TTs.gpkg'),layer=f"{origins}",driver='GPKG')
orig_raw = optimize(orig_raw)
orig_raw = orig_raw.sort_values('O_ID')
orig_raw.rename({f'{scenario}_{origins}_DhakaChitt_mins' : f'{scenario}_{origins}_Dhaka_Chitt_mins', \
                 f'{scenario}_{origins}_deep_ports_mins' : f'{scenario}_{origins}_deep_sea_ports_mins'},axis=1,inplace=True)

# Summarize origin populations by origin node, to enable appropriate population weighting
# population assigned to each origin node

if origins == 'PopOrigins':
    raw_orig = pd.read_csv(os.path.join(fin_pth,prod_date,f'bgd_wp_{constraint_status}_origins_{wp_res}m_2020_snapped_25m.csv'))
    raw_orig.rename({'NN':'O_ID','VALUE':'Pop'},axis=1,inplace=True)
    orig_wtd = pd.pivot_table(raw_orig,values='Pop',index='O_ID',aggfunc='sum')
    orig_wtd = optimize(orig_wtd)

elif origins == 'All_SEZ_Origins':
    raw_orig = pd.read_csv(os.path.join(fin_pth,prod_date,f'All_SEZs_{constraint_status}_{wp_res}m_res_25m_simplification_snapped.csv'))
    raw_orig.rename({'NN':'O_ID','VALUE':'Pop'},axis=1,inplace=True)
    orig_wtd = pd.pivot_table(raw_orig,values='All_SEZ_count',index='O_ID',aggfunc='sum')
    orig_wtd = optimize(orig_wtd)
    
elif origins == 'Active_SEZ_Origins':
    raw_orig = pd.read_csv(os.path.join(fin_pth,prod_date,f'Active_SEZs_{constraint_status}_{wp_res}m_res_25m_simplification_snapped.csv'))
    raw_orig.rename({'NN':'O_ID','VALUE':'Pop'},axis=1,inplace=True)
    orig_wtd = pd.pivot_table(raw_orig,values='Active_SEZ_count',index='O_ID',aggfunc='sum')
    orig_wtd = optimize(orig_wtd)

In [350]:
orig_wtd.head()

Unnamed: 0_level_0,Pop
O_ID,Unnamed: 1_level_1
3,269.673126
4,1180.427368
5,105.337547
6,9139.359375
8,633.812378


#### Populate a dictionary with dests as keys and a list containing the vehicle ODM and a filtered raw origins file as the value

In [351]:
orig_raw.columns

Index(['O_ID', 'Row_ID', 'VALUE', 'Current_PopOrigins_All_cities_mins',
       'Current_PopOrigins_Dhaka_Chitt_mins',
       'Current_PopOrigins_Minor_cities_mins',
       'Current_PopOrigins_Dry_ports_mins',
       'Current_PopOrigins_River_ports_mins',
       'Current_PopOrigins_Deep_sea_ports_mins',
       'Current_PopOrigins_All_SEZs_mins',
       'Current_PopOrigins_Active_SEZs_mins', 'adm5_pcode', 'adm4_pcode',
       'adm1_pcode', 'adm2_pcode', 'adm3_pcode', 'adm2_pop', 'adm3_pop',
       'adm4_pop', 'adm5_pop', 'Current_PopOrigins_All_cities_mins_WT_adm2',
       'Current_PopOrigins_All_cities_mins_WT_adm3',
       'Current_PopOrigins_All_cities_mins_WT_adm4',
       'Current_PopOrigins_All_cities_mins_WT_adm5',
       'Current_PopOrigins_Dhaka_Chitt_mins_WT_adm2',
       'Current_PopOrigins_Dhaka_Chitt_mins_WT_adm3',
       'Current_PopOrigins_Dhaka_Chitt_mins_WT_adm4',
       'Current_PopOrigins_Dhaka_Chitt_mins_WT_adm5',
       'Current_PopOrigins_Minor_cities_mins_WT_adm2',

In [352]:
odm_dct= {}

for dest, dest_gdf in dests.items():
    
    # filter origins to just relevant information for this destination
    
    if origins == 'PopOrigins':
        dest_orig = orig_raw[['geometry','O_ID','Row_ID','VALUE',f'{scenario}_{origins}_{dest}_mins','adm2_pcode','adm3_pcode','adm4_pcode','adm5_pcode','adm2_pop','adm3_pop','adm4_pop','adm5_pop']]
    else:
        dest_orig = orig_raw
        
    dest_orig = dest_orig.set_index('O_ID')
    
    # Read in raw OD grid
    
    dest_odm = pd.read_csv(os.path.join(fin_pth,prod_date,f'OD_matrix_{scenario}_{origins}_to_{dest}_{constraint_status}_{wp_res}m_res_{simplif_meters}m_simplification.csv'))
    dest_odm = optimize(dest_odm)
    
    # Put the origins and destinations in alphanumeric order by node so that they line up and process correctly within the gravity model. Sort by O_IDs, then dest node IDs
    # Should work as a function but the function is returning unsorted data -- while raw code works. Curious?
    
    dest_odm = dest_odm.sort_values('Unnamed: 0').reindex(sorted_nicely(dest_odm.columns), axis=1)

    # set the O_ID column back to index so the shape is the same as the dWeight shape
    dest_odm.set_index('Unnamed: 0',inplace=True)
    
    # populate a dictionary to use in gravity modeling
    
    odm_dct.update({dest:[dest_orig,dest_odm,dest_gdf]})


## Gravity models

In [353]:
import GOSTnets.calculate_od_raw as calcOD
# calcOD.calculate_gravity??

Check out weighting options

In [354]:
rename_decay_dct = {
    'd_0.01' : '2.9mins_decay',
    'd_0.005' : '5.8mins_decay',
    'd_0.001' : '11.5mins_decay',
    'd_0.0007701635' :'15mins_decay',
    'd_0.0003850818' :'30mins_decay',
    'd_0.0001925409' :'60mins_decay',
    'd_9.62704e-05' : '120mins_decay',
    'd_3.85082e-05' : '300mins_decay',
    'd_1e-05' : '1200mins_decay' }

### Origins

#### Weights

Define your gravity weights

In [355]:
grav_models_Orig_wtless_dct = {
    'All_cities' : dict.fromkeys(['Pop_all_2011','Pop_all_2021','Unweighted']),
    'Dhaka_Chitt' : dict.fromkeys(['Pop_all_2011','Pop_all_2021','Unweighted']),
    'Minor_cities' : dict.fromkeys(['Pop_all_2011','Pop_all_2021','Unweighted']),
    'Dry_ports' : dict.fromkeys(['Unweighted']),
    'River_ports' : dict.fromkeys(['EXIM','Unweighted']),
    'Deep_sea_ports' : dict.fromkeys(['Annual_capacity_TEU','Annual_capacity_TEU_2025','Unweighted']),
    'All_SEZs' : dict.fromkeys(['All_SEZ_count']),
    'Active_SEZs' : dict.fromkeys(['Active_SEZ_count'])
}

#### Gravity calculations

Create a function to generate the gravity models and append them to a dictionary

In [356]:
def orig_grav_loop(orig_file,od_matrix,orig_id_col,orig_weight_col,dest_weight_col):
    
    orig_gravity = calcOD.calculate_gravity(od = np.array(od_matrix), \
                                            oWeight = orig_weight_col,\
                                            dWeight = dest_weight_col)

    orig_gravity['O_ID'] = np.array(orig_id_col) # will assign incorrectly if not converted to an array first
    orig_gravity = orig_gravity.set_index('O_ID')
    
    # merge on indices to speed this step up
    orig_gravity_gdf = pd.merge(orig_file, orig_gravity, how='left',left_index=True,right_index=True)

    return orig_gravity_gdf

Use the function to create gravity models for all the desired weights and export them to a geopackage

In [357]:
from pprint import pprint
pprint(grav_models_Orig_wtless_dct)

{'Active_SEZs': {'Active_SEZ_count': None},
 'All_SEZs': {'All_SEZ_count': None},
 'All_cities': {'Pop_all_2011': None, 'Pop_all_2021': None, 'Unweighted': None},
 'Deep_sea_ports': {'Annual_capacity_TEU': None,
                    'Annual_capacity_TEU_2025': None,
                    'Unweighted': None},
 'Dhaka_Chitt': {'Pop_all_2011': None,
                 'Pop_all_2021': None,
                 'Unweighted': None},
 'Dry_ports': {'Unweighted': None},
 'Minor_cities': {'Pop_all_2011': None,
                  'Pop_all_2021': None,
                  'Unweighted': None},
 'River_ports': {'EXIM': None, 'Unweighted': None}}


In [358]:
for dest_key, weight_dct in grav_models_Orig_wtless_dct.items():
    
    for dest, input_list in odm_dct.items():
        
        # match on destination names so as to call the right weights
        
        if dest_key == dest:
            
            print(dest_key)
            
            for wt in weight_dct:

                print(wt)

                # Start timer
                func_start = time.time()

                # we call in the original destinations file here, referencing the weights column calculated above
                df = orig_grav_loop(input_list[0],input_list[1],orig_wtd.index,[1],input_list[2][wt]) 

                # create geodataframe, rename decay dict
                df = gpd.GeoDataFrame(df,geometry='geometry')
                df.rename(columns=rename_decay_dct,inplace=True)

                # export to geopackage
                gpkg_layername = str(dest_key) + '_' + wt # rename as needed
                df.to_file(os.path.join(f'results\\{prod_date}\\spatial\\origins_gravity_{prod_date}_{scenario}_{origins}_no_orig_weight.gpkg'),\
                                           layer=gpkg_layername,\
                                           driver="GPKG",\
                                           ignore_errors=True)

                # populate nested weight dict with gravity model
                grav_models_Orig_wtless_dct[dest_key][wt] = df

                # Report time

                func_end = time.time()
                print('\n time elapsed for function')
                print(str((func_end - func_start) / 60) + ' minutes \n')

All_cities
Pop_all_2011

 time elapsed for function
22.75905690987905 minutes 

Pop_all_2021

 time elapsed for function
23.530368304252626 minutes 

Unweighted

 time elapsed for function
23.82742187579473 minutes 

Dhaka_Chitt
Pop_all_2011

 time elapsed for function
23.817124489943186 minutes 

Pop_all_2021

 time elapsed for function
24.154891757170358 minutes 

Unweighted

 time elapsed for function
23.197356200218202 minutes 

Minor_cities
Pop_all_2011

 time elapsed for function
22.830280526479086 minutes 

Pop_all_2021

 time elapsed for function
22.74421218633652 minutes 

Unweighted

 time elapsed for function
24.439106325308483 minutes 

Dry_ports
Unweighted

 time elapsed for function
24.71010059515635 minutes 

River_ports
EXIM

 time elapsed for function
23.31399001677831 minutes 

Unweighted

 time elapsed for function
24.555287166436514 minutes 

Deep_sea_ports
Annual_capacity_TEU

 time elapsed for function
24.80245396296183 minutes 

Annual_capacity_TEU_2025

 time el

Currently the gravity statistics are per origin point. Here we aggregate these statistics at the adm4 level, weighting by origin population

In [35]:
# grav_models_Orig_wtless_dct

In [359]:
# iterate through the resulting dict of gravity models, joining their results to adm3 and adm4 files for export and visualization

adm3_grav = adm3.set_index('adm3_pcode')
adm4_grav = adm4.set_index('adm4_pcode')

# adm3_grav.set_index('adm3_pcode',inplace=True)
# adm4_grav.set_index('adm4_pcode',inplace=True)

for key, weight_dct in grav_models_Orig_wtless_dct.items():
    
    print(key)
    
    for wt, layer in weight_dct.items():

        print(wt)

        # Start timer
        func_start = time.time()

        # Create separate adm3 and adm4 layers

        adm3_layer = layer.filter(regex='mins_decay|adm3_pcode|VALUE|adm3_pop')
        adm4_layer = layer.filter(regex='mins_decay|adm4_pcode|VALUE|adm4_pop')

        # weight index by population
        # Note from previous troubleshooting -- perhaps this WITH the OWeight is causing weird patterns

        for col in adm3_layer.loc[:,'2.9mins_decay':'1200mins_decay']:
            adm3_layer[col] = (adm3_layer[col] * (adm3_layer['VALUE'] / adm3_layer['adm3_pop'])) # weighting by share of overall adm population

        for col in adm4_layer.loc[:,'2.9mins_decay':'1200mins_decay']:
            adm4_layer[col] = (adm4_layer[col] * (adm4_layer['VALUE'] / adm4_layer['adm4_pop'])) # weighting by share of overall adm population

        # aggregate weighted indices by upazila

        upz = adm3_layer.filter(regex='mins_decay|adm3_pcode').groupby('adm3_pcode').apply(lambda x: x.sum()) # new version works with non-sequential columns
        upz.drop(columns='adm3_pcode',inplace=True)
        upz = upz.add_prefix(key + '_' + wt + '_') # if running scenarios, modify by scenario here

        # aggregate weighted indices by union

        union = adm4_layer.filter(regex='mins_decay|adm4_pcode').groupby('adm4_pcode').apply(lambda x: x.sum()) # new version works with non-sequential columns
        union.drop(columns='adm4_pcode',inplace=True)
        union = union.add_prefix(key + '_' + wt + '_')

        # join to adm4 file

        adm3_grav = pd.merge(adm3_grav,upz,how='left',left_index=True,right_index=True)
        adm4_grav = pd.merge(adm4_grav,union,how='left',left_index=True,right_index=True) 
        
        # Report time

        func_end = time.time()
        print('\n time elapsed for function')
        print(str((func_end - func_start) / 60) + ' minutes \n')

All_cities
Pop_all_2011


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)



 time elapsed for function
1.7493743817011516 minutes 

Pop_all_2021

 time elapsed for function
1.7659995635350545 minutes 

Unweighted

 time elapsed for function
1.8022161841392517 minutes 

Dhaka_Chitt
Pop_all_2011

 time elapsed for function
1.779349426428477 minutes 

Pop_all_2021

 time elapsed for function
1.8566164056460062 minutes 

Unweighted

 time elapsed for function
2.002782408396403 minutes 

Minor_cities
Pop_all_2011

 time elapsed for function
1.969515860080719 minutes 

Pop_all_2021

 time elapsed for function
1.7474492232004801 minutes 

Unweighted

 time elapsed for function
1.7091326435407004 minutes 

Dry_ports
Unweighted

 time elapsed for function
1.6135910232861836 minutes 

River_ports
EXIM

 time elapsed for function
1.627599330743154 minutes 

Unweighted

 time elapsed for function
1.7166908820470175 minutes 

Deep_sea_ports
Annual_capacity_TEU

 time elapsed for function
1.6451241930325826 minutes 

Annual_capacity_TEU_2025

 time elapsed for function
1.6

Export the finished adm3 and adm4 layers for data analysis and visualization

In [360]:
# adm3

adm3_grav.drop('geometry',axis=1).to_csv(os.path.join(res_pth,prod_date,f'tables\\adm3_gravity_models_{prod_date}_{origins}_nopop_wt.csv'))
adm3_grav.to_file(os.path.join(res_pth,prod_date,f'spatial\\adm3_gravity_models_{scenario}_{prod_date}_{origins}_nopop_wt.geojson'),driver="GeoJSON")
adm3_grav.to_file(os.path.join(res_pth,prod_date,f'spatial\\adm3_gravity_models_{scenario}_{prod_date}_{origins}_nopop_wt.gpkg'),driver="GPKG", ignore_errors=True)

# adm4
adm4_grav.drop('geometry',axis=1).to_csv(os.path.join(res_pth,prod_date,f'tables\\adm4_gravity_models_{scenario}_{prod_date}_{origins}_nopop_wt.csv'))
adm4_grav.to_file(os.path.join(res_pth,prod_date,f'spatial\\adm4_gravity_models_{scenario}_{prod_date}_{origins}_nopop_wt.geojson'),driver="GeoJSON")
adm4_grav.to_file(os.path.join(res_pth,prod_date,f'spatial\\adm4_gravity_models_{scenario}_{prod_date}_{origins}_nopop_wt.gpkg'),driver="GPKG", ignore_errors=True)


#### Destinations

In [None]:
# {scenario}_mkts_odm.transpose()
# .iloc[1:]

In [None]:
# List previously created weights

gm_weights = ['good_jobs_tot_','bad_jobs_tot_','good_jobs_4and5','bad_jobs_4and5',\
              'n_firms_4and5','n_firms_tot','tw_tot','tw_tot_4and5','tw_ser_4and5',\
              'tw_ind_4and5','no_weight']

# Insert weights into dict

grav_models_dict = dict.fromkeys(gm_weights)

Create a function to generate the gravity models per destination

In [73]:
def dest_grav_loop(orig_file,od_matrix,grav_param,dest_df,orig_weight_df):
    dest_gravity = calcOD.calculate_gravity(np.array(od_matrix), oWeight=dest_df[grav_param], dWeight = orig_weight_df['Pop'])
    dest_gravity['NN'] = dest_df['NN']
    dest_gravity_gdf = pd.merge(mkts, dest_gravity, how='left',on='NN')

    return dest_gravity_gdf

# OLD

# def dest_grav_loop(orig_file,od_matrix,grav_param,oW_df,dW):
#     dest_gravity = calcOD.calculate_gravity(np.array(od_matrix), oWeight=oW_df[grav_param], dWeight = dW['Pop'])
#     dest_gravity['NN'] = oW_df['NN']
#     dest_gravity_gdf = pd.merge(mkts, dest_gravity, how='left',on='NN')

#     return dest_gravity_gdf

Apply the function and append results to a dictionary

In [74]:
for key, v in grav_models_dict.items():
    
    print(key)
    
    df = dest_grav_loop(current_orig_mkts,current_mkts_odm.transpose(),key,mkts,cur_orig_grp)
    
    gpkg_layername = 'mkts_cur_' + key
    df.to_file(os.path.join('results\\spatial\\gravity_models_mkts.gpkg'),layer=gpkg_layername,driver="GPKG", ignore_errors=True)
    
    grav_models_dict[key] = df

## OLD code

Sorting process

In [121]:
# # OLD manual process

# # sort by O_IDs, then dest node IDs

# current_mkts_odm = current_mkts_odm.sort_values('Unnamed: 0').reindex(sorted_nicely(current_mkts_odm.columns), axis=1)
# ua_mkts_odm = ua_mkts_odm.sort_values('Unnamed: 0').reindex(sorted_nicely(ua_mkts_odm.columns), axis=1)

# # reset O_ID column to the front
# current_mkts_odm = current_mkts_odm[ ['Unnamed: 0'] + [ col for col in current_mkts_odm.columns if col != 'Unnamed: 0' ] ]
# ua_mkts_odm = ua_mkts_odm[ ['Unnamed: 0'] + [ col for col in ua_mkts_odm.columns if col != 'Unnamed: 0' ] ]

# # set the Dest_ID column back to index so the shape is the same as the dWeight shape
# current_mkts_odm.set_index('Unnamed: 0',inplace=True)
# ua_mkts_odm.set_index('Unnamed: 0',inplace=True)

Georeference, spatial join files

In [125]:
# # Old manual routine 

# current_orig_mkts['geometry'] = current_orig_mkts['geometry'].apply(wkt.loads)
# current_orig_mkts = gpd.GeoDataFrame(current_orig_mkts,geometry='geometry')
# current_orig_mkts = gpd.sjoin(current_orig_mkts,adm4[['geometry','adm3_en','adm4_en','adm3_pcode','adm4_pcode']],how='left',op='within')

# ua_orig_mkts['geometry'] = ua_orig_mkts['geometry'].apply(wkt.loads)
# ua_orig_mkts = gpd.GeoDataFrame(ua_orig_mkts,geometry='geometry')
# ua_orig_mkts = gpd.sjoin(ua_orig_mkts,adm4[['geometry','adm3_en','adm4_en','adm3_pcode','adm4_pcode']],how='left',op='within')

  "(%s != %s)" % (left_df.crs, right_df.crs)


Scenario based approach to gravity stats

Now repeat the process for the all updated roads

In [None]:
# for key, v in grav_models_dict.items():
    
#     print(key)
    
#     df = orig_grav_loop(ua_orig_mkts,ua_mkts_odm,ua_orig_grp,mkts[key])
    
#     gpkg_layername = 'mkts_ua_' + key
#     df.to_file(os.path.join('results\\spatial\\gravity_models.gpkg'),layer=gpkg_layername,driver="GPKG", ignore_errors=True)
    
#     grav_models_dict[key] = df

In [None]:
# # iterate through the dict

# # adm4_grav = adm4_grav

# for key, layer in grav_models_dict.items():
    
#     print(key)
    
#     # weight index by population
    
#     layer['adm4_pop'] = layer.groupby('adm4_pcode')['VALUE'].transform(np.sum)
    
#     for col in layer.loc[:,'d_0.01':'d_1e-05']:
#         layer[col] = (layer[col] * (layer['VALUE'] / layer['adm4_pop']))
    
# #     [ col * (layer['VALUE'].astype(float) / layer['adm4_pop'].astype(float)) for col in layer.loc[:,'d_0.01':'d_1e-05']] # failed atempt at list comprehension
    
#     # aggregate weighted indices by union
    
#     union = layer.loc[:,'adm4_pcode':'d_1e-05'].groupby('adm4_pcode').apply(lambda x: x.sum())
#     union.drop(columns='adm4_pcode',inplace=True)
#     union = union.add_prefix(key + '_ua_')
#     union.reset_index(inplace=True)
#     union['adm4_pcode'] = union['adm4_pcode'].astype(str)

#     # join to adm4 file

#     adm4_grav = pd.merge(adm4_grav,union,how='left',on=['adm4_pcode'])  

If doing a scenario, calculate changes in gravity resulting from that scenario

In [None]:
# adm4_grav['delta60_gj_tot'] = adm4_grav['good_jobs_tot__ua_d_0.0001925409'] - adm4_grav['good_jobs_tot__cur_d_0.0001925409']
# adm4_grav['delta60_bj_tot'] = adm4_grav['bad_jobs_tot__ua_d_0.0001925409'] - adm4_grav['bad_jobs_tot__cur_d_0.0001925409']
# adm4_grav['delta30_bj_tot'] = adm4_grav['bad_jobs_tot__ua_d_0.0003850818'] - adm4_grav['bad_jobs_tot__cur_d_0.0003850818']

# adm4_grav['delta60_gj_4and5'] = adm4_grav['good_jobs_4and5_ua_d_0.0001925409'] - adm4_grav['good_jobs_4and5_cur_d_0.0001925409']
# adm4_grav['delta60_bj_4and5'] = adm4_grav['bad_jobs_4and5_ua_d_0.0001925409'] - adm4_grav['bad_jobs_4and5_cur_d_0.0001925409']

# adm4_grav['delta60_tw_tot'] = adm4_grav['tw_tot_ua_d_0.0001925409'] - adm4_grav['tw_tot_cur_d_0.0001925409']
# adm4_grav['delta60_nf_tot'] = adm4_grav['n_firms_tot_ua_d_0.0001925409'] - adm4_grav['n_firms_tot_cur_d_0.0001925409']

# adm4_grav['delta60_tw_4and5'] = adm4_grav['tw_tot_4and5_ua_d_0.0001925409'] - adm4_grav['tw_tot_4and5_cur_d_0.0001925409']
# adm4_grav['delta60_nf_4and5'] = adm4_grav['n_firms_4and5_ua_d_0.0001925409'] - adm4_grav['n_firms_4and5_cur_d_0.0001925409']

# adm4_grav['delta60_tw_ser4and5'] = adm4_grav['tw_ser_4and5_ua_d_0.0001925409'] - adm4_grav['tw_ser_4and5_cur_d_0.0001925409']
# adm4_grav['delta60_tw_ind4and5'] = adm4_grav['tw_ind_4and5_ua_d_0.0001925409'] - adm4_grav['tw_ind_4and5_cur_d_0.0001925409']

# adm4_grav['delta60_now'] = adm4_grav['no_weight_ua_d_0.0001925409'] - adm4_grav['no_weight_cur_d_0.0001925409']
# adm4_grav['delta30_now'] = adm4_grav['no_weight_ua_d_0.0003850818'] - adm4_grav['no_weight_cur_d_0.0003850818']