In [1]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
from ld_script import *
pd.set_option('display.max_colwidth', None)

## S0802 - Means of Transportation to work by selected characteristics

In [2]:
# path to the S0101 subject table
data_dir = 'data/subject_tables/unzipped_files/ACSST5Y2019.S0802_2021-01-12T120347'

In [3]:
S0802d = pd.read_csv(os.path.join(data_dir, 'ACSST5Y2019.S0802_data_with_overlays_2021-01-12T102914.csv'), low_memory=False)
S0802m = pd.read_csv(os.path.join(data_dir, 'ACSST5Y2019.S0802_metadata_2021-01-12T102914.csv'))

In [4]:
print(f'Shape of data table proper is {S0802d.shape} \n'
    + f'Shape of metadata table is {S0802m.shape}')

Shape of data table proper is (5266, 810) 
Shape of metadata table is (809, 2)


In [5]:
S0802d.head(2)

Unnamed: 0,GEO_ID,NAME,S0802_C01_001E,S0802_C01_001M,S0802_C01_002E,S0802_C01_002M,S0802_C01_003E,S0802_C01_003M,S0802_C01_004E,S0802_C01_004M,...,S0802_C04_097E,S0802_C04_097M,S0802_C04_098E,S0802_C04_098M,S0802_C04_099E,S0802_C04_099M,S0802_C04_100E,S0802_C04_100M,S0802_C04_101E,S0802_C04_101M
0,id,Geographic Area Name,Estimate!!Total!!Workers 16 years and over,Margin of Error!!Total!!Workers 16 years and over,Estimate!!Total!!Workers 16 years and over!!AGE!!16 to 19 years,Margin of Error!!Total!!Workers 16 years and over!!AGE!!16 to 19 years,Estimate!!Total!!Workers 16 years and over!!AGE!!20 to 24 years,Margin of Error!!Total!!Workers 16 years and over!!AGE!!20 to 24 years,Estimate!!Total!!Workers 16 years and over!!AGE!!25 to 44 years,Margin of Error!!Total!!Workers 16 years and over!!AGE!!25 to 44 years,...,Estimate!!Public transportation (excluding taxicab)!!Workers 16 years and over in households!!VEHICLES AVAILABLE!!3 or more vehicles available,Margin of Error!!Public transportation (excluding taxicab)!!Workers 16 years and over in households!!VEHICLES AVAILABLE!!3 or more vehicles available,Estimate!!Public transportation (excluding taxicab)!!PERCENT ALLOCATED!!Means of transportation to work,Margin of Error!!Public transportation (excluding taxicab)!!PERCENT ALLOCATED!!Means of transportation to work,Estimate!!Public transportation (excluding taxicab)!!PERCENT ALLOCATED!!Time of departure to go to work,Margin of Error!!Public transportation (excluding taxicab)!!PERCENT ALLOCATED!!Time of departure to go to work,Estimate!!Public transportation (excluding taxicab)!!PERCENT ALLOCATED!!Travel time to work,Margin of Error!!Public transportation (excluding taxicab)!!PERCENT ALLOCATED!!Travel time to work,Estimate!!Public transportation (excluding taxicab)!!PERCENT ALLOCATED!!Vehicles available,Margin of Error!!Public transportation (excluding taxicab)!!PERCENT ALLOCATED!!Vehicles available
1,1400000US48001950100,"Census Tract 9501, Anderson County, Texas",1785,293,2.6,2.2,6.7,4.8,38.2,10.9,...,-,**,(X),(X),(X),(X),(X),(X),(X),(X)


In [6]:
S0802m.head(2)

Unnamed: 0,GEO_ID,id
0,NAME,Geographic Area Name
1,S0802_C01_001E,Estimate!!Total!!Workers 16 years and over


In [7]:
# Custom function defined in ld_scripts.py in project folder to break apart GEO_ID column appropriately
S0802d = parse_geo_id(S0802d)

In [8]:
S080d, S0802m = drop_and_normalize(S0802d, S0802m)

In [9]:
index_locations = [3, *(list(range(87,99,2))), *(list(range(101,119,2))), *(list(range(141,181,2))), 205, 385, *(list(range(391,399,2))), 407]
index_locations = [x-3 for x in index_locations]                                                                                                     

In [10]:
S0802m.iloc[index_locations]

Unnamed: 0,GEO_ID,id
0,S0802_C01_001E,Estimate!!Total!!Workers 16 years and over
84,S0802_C01_043E,"Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Management, business, science, and arts occupations"
86,S0802_C01_044E,Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Service occupations
88,S0802_C01_045E,Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Sales and office occupations
90,S0802_C01_046E,"Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Natural resources, construction, and maintenance occupations"
92,S0802_C01_047E,"Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Production, transportation, and material moving occupations"
94,S0802_C01_048E,Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Military specific occupations
98,S0802_C01_050E,Estimate!!Total!!Workers 16 years and over!!INDUSTRY!!Construction
100,S0802_C01_051E,Estimate!!Total!!Workers 16 years and over!!INDUSTRY!!Manufacturing
102,S0802_C01_052E,Estimate!!Total!!Workers 16 years and over!!INDUSTRY!!Wholesale trade


In [11]:
wc = create_wanted_columns(S0802d, S0802m, index_locations)

In [12]:
S0802d = S0802d[wc]

In [13]:
S0802d.head()

Unnamed: 0,S0802_C01_001E,S0802_C01_043E,S0802_C01_044E,S0802_C01_045E,S0802_C01_046E,S0802_C01_047E,S0802_C01_048E,S0802_C01_050E,S0802_C01_051E,S0802_C01_052E,...,S0802_C02_091E,S0802_C02_094E,S0802_C02_095E,S0802_C02_096E,S0802_C02_097E,S0802_C03_001E,FIPS,STATEFP,COUNTYFP,TRACTCE
0,Estimate!!Total!!Workers 16 years and over,"Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Management, business, science, and arts occupations",Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Service occupations,Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Sales and office occupations,"Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Natural resources, construction, and maintenance occupations","Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Production, transportation, and material moving occupations",Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Military specific occupations,Estimate!!Total!!Workers 16 years and over!!INDUSTRY!!Construction,Estimate!!Total!!Workers 16 years and over!!INDUSTRY!!Manufacturing,Estimate!!Total!!Workers 16 years and over!!INDUSTRY!!Wholesale trade,...,"Estimate!!Car, truck, or van -- drove alone!!Workers 16 years and over in households","Estimate!!Car, truck, or van -- drove alone!!Workers 16 years and over in households!!VEHICLES AVAILABLE!!No vehicle available","Estimate!!Car, truck, or van -- drove alone!!Workers 16 years and over in households!!VEHICLES AVAILABLE!!1 vehicle available","Estimate!!Car, truck, or van -- drove alone!!Workers 16 years and over in households!!VEHICLES AVAILABLE!!2 vehicles available","Estimate!!Car, truck, or van -- drove alone!!Workers 16 years and over in households!!VEHICLES AVAILABLE!!3 or more vehicles available","Estimate!!Car, truck, or van -- carpooled!!Workers 16 years and over",_fips,_statefp,_countyfp,_tractce
1,1785,33.8,15.2,17.2,13.3,20.4,0.0,8.4,7.1,4.3,...,1500,0.7,14.4,36.9,48.0,147,48001950100,48,001,950100
2,145,8.3,55.9,20.7,7.6,7.6,0.0,0.0,0.0,0.0,...,145,0.0,16.6,68.3,15.2,0,48001950401,48,001,950401
3,163,27.0,26.4,41.1,0.0,5.5,0.0,0.0,2.5,0.0,...,163,0.0,6.7,63.8,29.4,0,48001950402,48,001,950402
4,1966,16.9,19.2,15.7,17.0,31.3,0.0,11.0,9.8,1.8,...,1674,0.0,28.1,55.1,16.8,182,48001950500,48,001,950500


In [14]:
meta_dict = dict(zip(S0802d.columns, S0802d.iloc[0,:]))

In [15]:
meta_dict

{'S0802_C01_001E': 'Estimate!!Total!!Workers 16 years and over',
 'S0802_C01_043E': 'Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Management, business, science, and arts occupations',
 'S0802_C01_044E': 'Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Service occupations',
 'S0802_C01_045E': 'Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Sales and office occupations',
 'S0802_C01_046E': 'Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Natural resources, construction, and maintenance occupations',
 'S0802_C01_047E': 'Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Production, transportation, and material moving occupations',
 'S0802_C01_048E': 'Estimate!!Total!!Workers 16 years and over!!OCCUPATION!!Military specific occupations',
 'S0802_C01_050E': 'Estimate!!Total!!Workers 16 years and over!!INDUSTRY!!Construction',
 'S0802_C01_051E': 'Estimate!!Total!!Workers 16 years and over!!INDUSTRY!!Manufacturing',
 'S0802_C01_052E': 'Estimate!!Total!