## Generating Accessibility Vars

Arezoo Besharati, Paul Waddell, UrbanSim, July 2018 


In [None]:
import os; os.chdir('../')
import numpy as np, pandas as pd 
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap

import seaborn as sns
from scipy.stats import norm
import orca

import warnings;
warnings.simplefilter('ignore')
%load_ext autoreload
%autoreload 2

### Load data

In [None]:
# Load any script-based Orca registrations
from scripts import datasources
from scripts import models

### Generate Node variables

In [None]:
%%capture
orca.run(["initialize_network_drive", 
          "network_aggregations_drive",
          "initialize_network_small", 
          "network_aggregations_small",
          "initialize_network_walk", 
          "network_aggregations_walk"])

In [None]:
# get a list of all existing tables
orca.list_tables()

## Inspect the data

In [None]:
nodesdrive = orca.get_table('nodesdrive').to_frame()
nodessmall = orca.get_table('nodessmall').to_frame()
nodeswalk = orca.get_table('nodeswalk').to_frame()
nodesdrive.info()

In [None]:
nodesdrive.to_csv('data/nodesdrive_vars.csv')
nodessmall.to_csv('data/nodessmall_vars.csv')
nodeswalk.to_csv('data/nodeswalk_vars.csv')

### Explore Median_income_1500

In [None]:
minus_one = len(nodesdrive[nodesdrive.med_income_1500 == -1])
print('{} nodes have median rent zero or below'.format(minus_one))
total_nodes = len (nodesdrive)
print('Total nodes count {}'.format(total_nodes))
print('{0:.2f} percent of nodes have no data for med_income'.format(minus_one/total_nodes*100))

In [None]:
%matplotlib notebook

ax = sns.distplot(nodesdrive.med_income_1500, bins=500,  fit=norm, kde=False)

In [None]:
#Find the outliers
%matplotlib notebook
ax = sns.boxplot(x = nodesdrive.med_income_1500, palette="Set2", width=0.5)


In [None]:
#Get relevant percentiles and see their distribution
nodesdrive.med_income_1500.quantile([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 1])


In [None]:
len(nodesdrive[nodesdrive.med_income_1500>200000])

### Explore Med_rent_sqft_1500

In [None]:
minus_one = len(nodesdrive[nodesdrive.med_rent_sqft_1500 < 0])
print('{} nodes have median rent zero or below'.format(minus_one))
total_nodes = len (nodesdrive)
print('Total nodes count {}'.format(total_nodes))
print('{0:.2f} percent of nodes have no data for med_rent'.format(minus_one/total_nodes*100))



In [None]:
%matplotlib notebook
filtered = nodesdrive.med_rent_sqft_1500[nodesdrive.med_rent_sqft_1500 <10]
ax = sns.distplot(filtered, bins=50, kde=False)

In [None]:
# %matplotlib notebook
# plt.hist(np.log1p(nodes.population_1500))

In [None]:
# %matplotlib notebook
# plt.hist(np.power(nodes.population_1500,1/3))