In [134]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import statsmodels.api as sm
import numpy as np
import geopandas as gpd
import networkx as nx
from ipfn import ipfn
# import pycombo
from sklearn.linear_model import LinearRegression
from sklearn.metrics import normalized_mutual_info_score as nmi
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error
import seaborn as sns
pd.options.mode.chained_assignment = None  # default='warn'
from sttn.data.lehd import OriginDestinationEmploymentDataProvider
provider = OriginDestinationEmploymentDataProvider()

import math
from sttn.network import SpatioTemporalNetwork
from sttn.utils import add_distance
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [135]:
# data format: city, state usps code, list of counties
cities = [
    ('New York City', 'ny', ['New York County, NY', 'Queens County, NY','Kings County, NY','Bronx County, NY','Richmond County, NY']),
    ('Los Angeles', 'ca', ['Los Angeles County, CA']),
    ('Chicago', 'il', ['Cook County, IL']),
    ('Houston', 'tx', ['Harris County, TX']),
    ('Boston', 'ma', ['Suffolk County, MA', 'Middlesex County, MA']),
    ('Phoenix', 'az', ['Maricopa County, AZ']),
    ('Philadelphia', 'pa', ['Philadelphia County, PA']),
    ('San Antonio', 'tx', ['Bexar County, TX']),
    ('San Diego', 'ca', ['San Diego County, CA']),
    ('Dallas', 'tx', ['Dallas County, TX']),
    ('San Jose', 'ca', ['Santa Clara County, CA']),
    ('Austin', 'tx', ['Travis County, TX']),
]

In [126]:
!mkdir cities
job_column = 'S000'
comp_aggs={job_column: 'sum'}
for city, state, conties in cities:
    state_network = provider.get_data(state=state, year=2018)
    city_network = state_network.filter_nodes(state_network.nodes.county.isin(conties))
    
    with_distance = add_distance(city_network).edges
    with_distance = with_distance.loc[(with_distance.SE01 > 0) & 
                                              (with_distance.SE02 > 0) &
                                              (with_distance.SE03 > 0)]
#     with_weights = compute_weights(city_network, column)
    
    city_jobs = city_network.agg_adjacent_edges(aggs=comp_aggs, outgoing=False).rename(columns={job_column: 'jobs'}).reset_index()
    city_pop = city_network.agg_adjacent_edges(aggs=comp_aggs, outgoing=True).rename(columns={job_column: 'residence'}).reset_index()
    
    city_dist = with_distance.merge(city_jobs, on='destination')
    city_cum = city_dist.merge(city_pop, on='origin')
    city_cum.to_csv('cities/%s.csv'%city,index=False)

mkdir: cannot create directory ‘cities’: File exists


### unconstrained power law

In [183]:
import scipy.optimize as optimize
def power_law(x, a,b):
    return a*(x**b)
def unconstrained_model(data, separate_income=False):
    y_target = ['S000']  # target = total commute if no income segregation
    data.loc[data.distance == 0, 'distance'] = 0.2
    origin = df.groupby(['origin']).agg({'S000':sum}).reset_index()
    origin.columns = ['origin','S000residence']
    destination = df.groupby(['destination']).agg({'S000':sum}).reset_index()
    destination.columns = ['destination','S000jobs']
    data = data.merge(origin,on=['origin'])
    data = data.merge(destination,on=['destination'])
    
    if separate_income == True:
        
        y_target = ['SE01', 'SE02', 'SE03'] # target = individual income commute if income segregation
        origin = df.groupby(['origin']).agg({'SE01':sum,'SE02':sum,'SE03':sum}).reset_index()
        origin.columns = ['origin','SE01residence','SE02residence','SE03residence']
        destination = df.groupby(['destination']).agg({'SE01':sum,'SE02':sum,'SE03':sum}).reset_index()
        destination.columns = ['destination','SE01jobs','SE02jobs','SE03jobs']
        data = data.merge(origin,on=['origin'])
        data = data.merge(destination,on=['destination'])
    dataF = []
    for target in y_target:
        X = data.distance.values
        y = data[target]/(data[target+'jobs']*data[target+'residence'])
        pars, cov = optimize.curve_fit(f=power_law, xdata=X, ydata=y, bounds=(-np.inf, np.inf))
#         print(pars)
        data[target+'a'] = pars[0]
        data[target+'b'] = pars[1]
        data[target+'pred'] = data[target+'a']*(data['distance']**data[target+'b'])*data[target+'jobs']*data[target+'residence']
    return data

        

In [184]:
!mkdir unconstrainPowerlawPredictionResultsCT/
import os
cities = os.listdir('cities/')
for city in cities:
    df = pd.read_csv('cities/'+city)
    df = unconstrained_model(df,separate_income=False)
    df.to_csv('unconstrainPowerlawPredictionResultsCT/'+city,index=False)


mkdir: cannot create directory ‘unconstrainPowerlawPredictionResultsCT/’: File exists


In [128]:
!mkdir unconstrainPowerlawPredictionResultsCTIncome/
import os
cities = os.listdir('cities/')
for city in cities:
    df = pd.read_csv('cities/'+city)
    df = unconstrained_model(df,separate_income=True)
    df.to_csv('unconstrainPowerlawPredictionResultsCTIncome/'+city,index=False)


mkdir: cannot create directory ‘unconstrainPowerlawPredictionResultsCTIncome/’: File exists


### full power law

In [185]:

X = df[['distance','jobs','residence']].values
y = df['S000'].values
def power_law(x,k,a,b,c):
    return k*(x[:,0]**a)*(x[:,1]**b)*(x[:,2]**c)
def unconstrained_model(data, separate_income=False):
    data.loc[data.distance == 0, 'distance'] = 0.2
    y_target = ['S000']  # target = total commute if no income segregation.2
    origin = df.groupby(['origin']).agg({'S000':sum}).reset_index()
    origin.columns = ['origin','S000residence']
    destination = df.groupby(['destination']).agg({'S000':sum}).reset_index()
    destination.columns = ['destination','S000jobs']
    data = data.merge(origin,on=['origin'])
    data = data.merge(destination,on=['destination'])
    if separate_income == True:
        
        y_target = ['SE01', 'SE02', 'SE03'] # target = individual income commute if income segregation
        origin = df.groupby(['origin']).agg({'SE01':sum,'SE02':sum,'SE03':sum}).reset_index()
        origin.columns = ['origin','SE01residence','SE02residence','SE03residence']
        destination = df.groupby(['destination']).agg({'SE01':sum,'SE02':sum,'SE03':sum}).reset_index()
        destination.columns = ['destination','SE01jobs','SE02jobs','SE03jobs']
        data = data.merge(origin,on=['origin'])
        data = data.merge(destination,on=['destination'])
    dataF = []
    for target in y_target:
        X = data[['distance',target+'jobs',target+'residence']].values
        y = data[target].values
        pars, cov = optimize.curve_fit(f=power_law, xdata=X, ydata=y, bounds=(-np.inf, np.inf))
#         print(pars)
        data[target+'k'] = pars[0]
        data[target+'a'] = pars[1]
        data[target+'b'] = pars[2]
        data[target+'c'] = pars[3]
        data[target+'pred'] = data[target+'k']*(data['distance']**data[target+'a'])*\
                        (data[target+'jobs']**data[target+'b'])*(data[target+'residence']**data[target+'c'])
    return data

        

In [187]:
!mkdir unconstrainFullPowerlawPredictionResultsCT/
import os
cities = os.listdir('cities/')
for city in cities:
    df = pd.read_csv('cities/'+city)
    df = unconstrained_model(df,separate_income=False)
    df.to_csv('unconstrainFullPowerlawPredictionResultsCT/'+city,index=False)


mkdir: cannot create directory ‘unconstrainFullPowerlawPredictionResultsCT/’: File exists


In [186]:
!mkdir unconstrainFullPowerlawPredictionResultsCTIncome/
import os
cities = os.listdir('cities/')
for city in cities:
    df = pd.read_csv('cities/'+city)
    df = unconstrained_model(df,separate_income=True)
    df.to_csv('unconstrainFullPowerlawPredictionResultsCTIncome/'+city,index=False)


mkdir: cannot create directory ‘unconstrainFullPowerlawPredictionResultsCTIncome/’: File exists


### expotential

In [175]:
import scipy.optimize as optimize
def power_law(x, a,b):
    return a*(np.e**(b*x))
def unconstrained_model(data, separate_income=False):
    y_target = ['S000']  # target = total commute if no income segregation
    data.loc[data.distance == 0, 'distance'] = 0.2
    origin = df.groupby(['origin']).agg({'S000':sum}).reset_index()
    origin.columns = ['origin','S000residence']
    destination = df.groupby(['destination']).agg({'S000':sum}).reset_index()
    destination.columns = ['destination','S000jobs']
    data = data.merge(origin,on=['origin'])
    data = data.merge(destination,on=['destination'])
    if separate_income == True:
        
        y_target = ['SE01', 'SE02', 'SE03'] 
        origin = df.groupby(['origin']).agg({'SE01':sum,'SE02':sum,'SE03':sum}).reset_index()
        origin.columns = ['origin','SE01residence','SE02residence','SE03residence']
        destination = df.groupby(['destination']).agg({'SE01':sum,'SE02':sum,'SE03':sum}).reset_index()
        destination.columns = ['destination','SE01jobs','SE02jobs','SE03jobs']
        data = data.merge(origin,on=['origin'])
        data = data.merge(destination,on=['destination'])
    dataF = []
    for target in y_target:
        X = data.distance.values
        y = data[target]/(data[target+'jobs']*data[target+'residence'])
        pars, cov = optimize.curve_fit(f=power_law, xdata=X, ydata=y, bounds=(-np.inf, np.inf))
#         print(pars)
        data[target+'a'] = pars[0]
        data[target+'b'] = pars[1]
        data[target+'pred'] = data[target+'a']*(np.e**(data['distance']*data[target+'b']))*data[target+'jobs']*data[target+'residence']
    return data

        

In [181]:
!mkdir unconstrainExpPredictionResultsCT/
import os
cities = os.listdir('cities/')
for city in cities:
    df = pd.read_csv('cities/'+city)
    df = unconstrained_model(df,separate_income=False)
    df.to_csv('unconstrainExpPredictionResultsCT/'+city,index=False)


mkdir: cannot create directory ‘unconstrainExpPredictionResultsCT/’: File exists


In [132]:
!mkdir unconstrainExpPredictionResultsCTIncome/
import os
cities = os.listdir('cities/')
for city in cities:
    df = pd.read_csv('cities/'+city)
    df = unconstrained_model(df,separate_income=True)
    df.to_csv('unconstrainExpPredictionResultsCTIncome/'+city,index=False)


mkdir: cannot create directory ‘unconstrainExpPredictionResultsCTIncome/’: File exists
