In [1]:
from notebooks import projectfunctions as pf
import segregation
from segregation.local import MultiLocationQuotient, MultiLocalDiversity, MultiLocalEntropy, MultiLocalSimpsonInteraction, MultiLocalSimpsonConcentration, LocalRelativeCentralization
%matplotlib inline

#### Note: Please look at the LODES technical documentation to fully understand this notebook:
https://lehd.ces.census.gov/data/lodes/LODES7/LODESTechDoc7.4.pdf#page=9

## Import + Calc datasets

In [2]:
SDlehd_blocks = pf.cleanlink_wac('data\lodes\ca_wac_S000_JT00_2017.csv.gz','6073','data\\blocks\\sd\\tl_2010_06073_tabblock10.shp')

In [3]:
#Need these columns to keep other functions working
JobsSimpsonIndex = MultiLocalSimpsonConcentration(SDlehd_blocks, pf.gen_jobs(SDlehd_blocks))
SDlehd_blocks['Job_Local_Simpson_Concentration'] = JobsSimpsonIndex.statistics
JobLocalEntropyIndex = MultiLocalEntropy(SDlehd_blocks, pf.gen_jobs(SDlehd_blocks))
SDlehd_blocks['Job_Local_Entropy'] = JobLocalEntropyIndex.statistics

In [4]:
pf.calc_lq(SDlehd_blocks)

In [5]:
RVlehd_blocks = pf.cleanlink_wac('data\lodes\ca_wac_S000_JT00_2017.csv.gz','6065','data\\blocks\\rv\\tl_2010_06065_tabblock10.shp')

In [6]:
#Need these columns to keep other functions working
JobsSimpsonIndex = MultiLocalSimpsonConcentration(RVlehd_blocks, pf.gen_jobs(RVlehd_blocks))
RVlehd_blocks['Job_Local_Simpson_Concentration'] = JobsSimpsonIndex.statistics
JobLocalEntropyIndex = MultiLocalEntropy(RVlehd_blocks, pf.gen_jobs(RVlehd_blocks))
RVlehd_blocks['Job_Local_Entropy'] = JobLocalEntropyIndex.statistics

In [7]:
pf.calc_lq(RVlehd_blocks)

## Sanity Checks

Let's make sure that we're getting the kind of results that we would expect out of our dataset, or at least that we don't see something that we know is insane.

#### Transportation and Warehousing

In [8]:
RVlehd_blocks['CNS08'].max()

7360.0

In [9]:
RVlehd_blocks['CNS08'].sum() / RVlehd_blocks['C000'].sum()

0.05955138044946841

In [10]:
SDlehd_blocks['CNS08'].max()

1165.0

In [11]:
SDlehd_blocks['CNS08'].sum() / SDlehd_blocks['C000'].sum()

0.018862034978894503

Riverside County has higer total employment in the Transportation and Warehousing sector as well as a higher max value for a single block. This fits in with our expectations.

#### Professional, Scientific, Technical LQ

In [12]:
SDlehd_blocks['CNS12'].max()

3703.0

In [13]:
SDlehd_blocks['CNS12'].sum()/ SDlehd_blocks['C000'].sum()

0.09774734551083282

In [14]:
RVlehd_blocks['CNS12'].max()

425.0

In [15]:
RVlehd_blocks['CNS12'].sum() / RVlehd_blocks['C000'].sum()

0.029664631774034958

San Diego County has higer total employment in the Professional, Scientific and Technichal sector as well as a higher max value for a single block. This fits in with our expectations.

#### Total Employment

In [16]:
SDlehd_blocks['C000'].sum()

1403189.0

In [29]:
RVlehd_blocks['C000'].sum()

706835.0

San Diego has nearly double the employment of Riverside County. This fits our expectations.

#### Jobs with a wage over $3333/month.

In [19]:
#divide by total jobs to get proportion of jobs that are high paying - better to compre across counties
RVlehd_blocks['CE03'].sum() / RVlehd_blocks['C000'].sum()

0.37087439077012313

In [20]:
#divide by total jobs to get proportion of jobs that are high paying - better to compre across counties
SDlehd_blocks['CE03'].sum() / SDlehd_blocks['C000'].sum()

0.4744635255835101

This all looks good so far. Let's calculate some standard deviations to make sure that there aren't any inadvertent wacky values in the dataset.

In [21]:
SDlehd_blocks['CNS20'].std()

84.90972403391845

In [22]:
SDlehd_blocks['LQ_CNS20'].std()

1.7773065646062383

In [24]:
RVlehd_blocks['CNS20'].std()

43.01202586091644

In [25]:
RVlehd_blocks['LQ_CNS20'].std()

1.3186986712583686