In [14]:
# import libraries
import pandas as pd
import pygeos
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pathlib
from pathlib import Path
import os
os.environ['USE_PYGEOS'] = '0'
import seaborn as sns
import datetime
import shapely
from shapely.geometry import shape, Point, LineString, Polygon
from collections import defaultdict
# Census Data
import censusdata

# set the working directory
BASE_DIR = Path.cwd()
# print(BASE_DIR)

#set the project directory
project_folder = BASE_DIR.parent.parent
print(project_folder)

/tmp/pycharm_project_316/BEAM_Freight


In [15]:
# Upload output in the respective bucket storage in the following path
# "gs://beam-core-outputs/output/city_name/simulation_name/Output/*" or
# "gs://beam-core-outputs/output/city_name/simulation_name/Output/plot/*" or

from google.cloud import storage
# Upload file to Google Cloud service
def upload_blob(_bucket_name, _source_file_name, _destination_blob_name):
    """Uploads a file to the bucket."""
    # The ID of your GCS bucket
    # bucket_name = "your-bucket-name"
    # The path to your file to upload
    # source_file_name = "local/path/to/file"
    # The ID of your GCS object
    # destination_blob_name = "storage-object-name"
    storage_client = storage.Client()
    bucket = storage_client.bucket(_bucket_name)
    blob = bucket.blob(_destination_blob_name)
    # Optional: set a generation-match precondition to avoid potential race conditions
    # and data corruptions. The request to upload is aborted if the object's
    # generation number does not match your precondition. For a destination
    # object that does not yet exist, set the if_generation_match precondition to 0.
    # If the destination object already exists in your bucket, set instead a
    # generation-match precondition using its generation number.
    generation_match_precondition = 0
    blob.upload_from_filename(_source_file_name, if_generation_match=generation_match_precondition)
    print(
        f"File {_source_file_name} uploaded to {_destination_blob_name}."
    )

def delete_blob(_bucket_name, _blob_name):
    """Deletes a blob from the bucket."""
    # bucket_name = "your-bucket-name"
    # blob_name = "your-object-name"

    storage_client = storage.Client()

    bucket = storage_client.bucket(_bucket_name)
    blob = bucket.blob(_blob_name)
    generation_match_precondition = None

    # Optional: set a generation-match precondition to avoid potential race conditions
    # and data corruptions. The request to delete is aborted if the object's
    # generation number does not match your precondition.
    blob.reload()  # Fetch blob metadata to use in generation_match_precondition.
    generation_match_precondition = blob.generation

    blob.delete(if_generation_match=generation_match_precondition)

    print(f"Blob {_blob_name} deleted.")

In [16]:
# Basic Reference Points
city_name="austin"
gcloud_bucket = "gs://beam-core-outputs/output/austin"
linkstats_file_path = "0.linkstats.csv.gz"
simulation_name = "austin-base-with-freight-2018__2023-04-14_16-11-13_yoq"
analysis_type = "Freight"

#Check if the folder exist, if not create it.
try:
    pathlib.Path(project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot")).mkdir(parents=True, exist_ok=False)
except:
    pass

In [17]:
# load synthetic population for validation
households = pd.read_csv(project_folder.joinpath("Reference_Data",city_name, "households.csv"))
persons = pd.read_csv(project_folder.joinpath("Reference_Data",city_name, "persons.csv"))

In [18]:
# scenario variables
households.loc[:, 'state'] = households['lcm_county_id']/1000
households.loc[:, 'state'] = households['state'].astype(int)
households.loc[:, 'state'] = households['state'].astype(str).str.zfill(2)
households.loc[:, 'county'] = households['lcm_county_id']%1000
households.loc[:, 'county'] = households['county'].astype(int)
households.loc[:, 'county'] = households['county'].astype(str).str.zfill(3)

list_of_fips = households.county.unique() #This extracts the counties or areas compromising of Austin metropolis

#list_of_fips = [str(i%1000).zfill(3) for i in list_of_counties]
# print(list_of_fips)
year = 2018
validation_source = 'acs5'
state = '48'

In [19]:
# fetch data from census website using census package.
# Key inputs include the year = "2018", data source = "acs5" and the state = "texas" | 48
data = censusdata.download(validation_source, year,censusdata.censusgeo([('state', state),('county', '*')]),
                           ['B11001_001E', 'B01001_001E', 'B01001_002E', 'B01001_026E', # hh, person, male, female
                            'B08202_001E', 'B08202_002E', 'B08202_003E', 'B08202_004E', 'B08202_005E',  # household by numbers of workers
                            'B19001_002E', 'B19001_003E', 'B19001_004E', 'B19001_005E', 'B19001_006E',
                            'B19001_007E', 'B19001_008E', 'B19001_009E', 'B19001_010E', 'B19001_011E',
                            'B19001_012E', 'B19001_013E', 'B19001_014E', 'B19001_015E', 'B19001_016E',
                            'B19001_017E', # income
                            'B08201_002E', 'B08201_003E', 'B08201_004E', 'B08201_005E', 'B08201_006E',  # vehicle ownership
                            'B09018_002E',  # Children
                            'B01001_003E', 'B01001_004E', 'B01001_005E', 'B01001_006E', 'B01001_007E',
                            'B01001_008E', 'B01001_009E', 'B01001_010E', 'B01001_011E', 'B01001_012E',
                            'B01001_013E', 'B01001_014E', 'B01001_015E', 'B01001_016E', 'B01001_017E',
                            'B01001_018E', 'B01001_019E', 'B01001_020E', 'B01001_021E', 'B01001_022E',
                            'B01001_023E', 'B01001_024E', 'B01001_025E', #male age group
                            'B01001_027E', 'B01001_028E', 'B01001_029E', 'B01001_030E', 'B01001_031E',
                            'B01001_032E', 'B01001_033E', 'B01001_034E', 'B01001_035E', 'B01001_036E',
                            'B01001_037E', 'B01001_038E', 'B01001_039E', 'B01001_040E', 'B01001_041E',
                            'B01001_042E', 'B01001_043E', 'B01001_044E', 'B01001_045E', 'B01001_046E',
                            'B01001_047E', 'B01001_048E', 'B01001_049E', # female age group
                            'B02001_002E', 'B02001_003E', 'B02001_004E', 'B02001_005E', 'B02001_006E',
                            'B02001_007E', 'B02001_008E', 'B02001_009E', 'B02001_010E',  # race
                            'B23025_002E'])   # worker status
data = data.reset_index()
# print(data.columns)
data.loc[:, 'index'] = data['index'].astype(str)
data.loc[:, 'county'] = data['index'].str[-3:]
data.loc[:, 'county name'] = data['index'].str.split(' ').str[0]
#'B01001_001E', 'B01001_002E', 'B01001_026E'

# Keep records of only counties which belong to our interest - in this case Austin
austin_data = data.loc[data['county'].isin(list_of_fips)].copy()
# austin_data.head(5)

In [20]:
# compare total households
modeled_household_count = households.groupby(['county'])[['household_id']].count()
modeled_household_count.columns = ['BEAM households']
modeled_household_count = modeled_household_count.reset_index()

acs_household_count = austin_data[['county', 'county name', 'B11001_001E']].copy()
acs_household_count.columns = ['county', 'county name', 'ACS households']
household_count_comparison = pd.merge(acs_household_count, modeled_household_count,on = 'county', how = 'left')
household_count_comparison.plot(x = 'county name', kind = 'bar')
plt.ylabel('count of households')
plt.xticks(rotation=0, ha='center')
# plt.ylabel('count of households')
plt.title('Total households comparison')
plt_file_name = "household_count_by_county.png"
plt.savefig(project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name), dpi = 600, bbox_inches = 'tight')
# plt.show()

total_synth_hh = household_count_comparison['BEAM households'].sum()
total_acs_hh = household_count_comparison['ACS households'].sum()
print(total_synth_hh, total_acs_hh)
print(total_synth_hh/total_acs_hh - 1)

In [21]:
bucket_name = "beam-core-outputs"
source_file_name = project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name)
destination_blob_name = f"output/{city_name}/{simulation_name}/Output/plot/{plt_file_name}"

# Check if the file exist in the bucket. If "Yes", delete
try:
    delete_blob(_bucket_name=bucket_name, _blob_name=destination_blob_name)
except:
    pass
# and upload the file
upload_blob(bucket_name, source_file_name, destination_blob_name)


In [22]:
# compare total persons
modeled_person_count = households.groupby(['county'])[['persons']].sum()
modeled_person_count.columns = ['BEAM persons']
modeled_person_count = modeled_person_count.reset_index()

acs_person_count = austin_data[['county', 'county name', 'B01001_001E']].copy()
acs_person_count.columns = ['county', 'county name', 'ACS persons']
person_count_comparison = pd.merge(acs_person_count, modeled_person_count,on = 'county', how = 'left')
person_count_comparison.plot(x = 'county name', kind = 'bar')
plt.ylabel('count of persons')
plt.title('Total persons')
plt.xticks(rotation=0, ha='center')
plt_file_name = "person_count_by_county.png"
plt.savefig(project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name), dpi = 600, bbox_inches = 'tight')
# plt.show()


total_synth_person = person_count_comparison['BEAM persons'].sum()
total_acs_person = person_count_comparison['ACS persons'].sum()
print(total_synth_person, total_acs_person)
print(total_synth_person/total_acs_person - 1)

In [23]:
bucket_name = "beam-core-outputs"
source_file_name = project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name)
destination_blob_name = f"output/{city_name}/{simulation_name}/Output/plot/{plt_file_name}"
# Check if the file exist in the bucket. If "Yes", delete
try:
    delete_blob(_bucket_name=bucket_name, _blob_name=destination_blob_name)
except:
    pass
# and upload the file
upload_blob(bucket_name, source_file_name, destination_blob_name)

In [24]:
# compare household by number of workers
modeled_household_by_workers = households.groupby('hh_workers')[['household_id']].count()
modeled_household_by_workers.columns = ['BEAM households']
modeled_household_by_workers = modeled_household_by_workers.reset_index()

austin_data.loc[:, 'none'] = austin_data['B08202_002E']
austin_data.loc[:, 'one'] = austin_data['B08202_003E']
austin_data.loc[:, 'two or more'] = austin_data['B08202_004E'] + austin_data['B08202_005E']

acs_household_by_workers = pd.melt(austin_data, id_vars=['county name'],
                                   value_vars=['none', 'one', 'two or more'],
                                   var_name = 'hh_workers', value_name = 'ACS households')
acs_household_by_workers = acs_household_by_workers.groupby('hh_workers')[['ACS households']].sum()
acs_household_by_workers = acs_household_by_workers.reset_index()

household_by_worker_comparison = pd.merge(acs_household_by_workers,modeled_household_by_workers,on = 'hh_workers', how = 'left')
household_by_worker_comparison.plot(x = 'hh_workers', kind = 'bar')
plt.xticks(rotation=0, ha='center')
plt.xlabel('number of workers in each household')
plt.ylabel('count of households')
plt.title('Total households by number of workers comparison')
plt_file_name = "household_count_by_workers.png"
plt.savefig(project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name), dpi = 600, bbox_inches = 'tight')
# plt.show()

In [25]:
bucket_name = "beam-core-outputs"
source_file_name = project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name)
destination_blob_name = f"output/{city_name}/{simulation_name}/Output/plot/{plt_file_name}"
# Check if the file exist in the bucket. If "Yes", delete
try:
    delete_blob(_bucket_name=bucket_name, _blob_name=destination_blob_name)
except:
    pass
# and upload the file
upload_blob(bucket_name, source_file_name, destination_blob_name)

In [26]:
# household income
inc_group_lookup = {'lt30': '< $30k',
                    'gt30-lt60': '>=$30k, <$60k',
                    'gt60-lt100': '>=$60k, <$100k',
                    'gt100-lt150': '>=$100k, <$150k',
                    'gt150': '>=$150k'}
ub_lookup = {'lt30': 30000,
             'gt30-lt60': 60000,
             'gt60-lt100': 100000,
             'gt100-lt150': 150000,
             'gt150': 200000}
modeled_household_by_income = households.groupby('hh_income')[['household_id']].count()
modeled_household_by_income.columns = ['BEAM households']
modeled_household_by_income = modeled_household_by_income.reset_index()


austin_data.loc[:, 'lt30'] = austin_data['B19001_002E'] + austin_data['B19001_003E'] + \
                             austin_data['B19001_004E'] + austin_data['B19001_005E'] + \
                             austin_data['B19001_006E']


austin_data.loc[:, 'gt30-lt60'] = austin_data['B19001_007E'] + austin_data['B19001_008E'] + \
                                  austin_data['B19001_009E'] + austin_data['B19001_010E'] + \
                                  austin_data['B19001_011E']

austin_data.loc[:, 'gt60-lt100'] = austin_data['B19001_012E'] + austin_data['B19001_013E']

austin_data.loc[:, 'gt100-lt150'] = austin_data['B19001_014E'] + austin_data['B19001_015E']

austin_data.loc[:, 'gt150'] = austin_data['B19001_016E'] + austin_data['B19001_017E']


acs_household_by_income = pd.melt(austin_data, id_vars=['county name'],
                                  value_vars=['lt30', 'gt30-lt60', 'gt60-lt100', 'gt100-lt150',  'gt150'],
                                  var_name = 'hh_income', value_name = 'ACS households')
acs_household_by_income = acs_household_by_income.groupby('hh_income')[['ACS households']].sum()
acs_household_by_income = acs_household_by_income.reset_index()

household_by_income_comparison = pd.merge(acs_household_by_income,
                                          modeled_household_by_income,
                                          on = 'hh_income', how = 'left')
household_by_income_comparison.loc[:, 'income_group'] = household_by_income_comparison['hh_income'].map(inc_group_lookup)
household_by_income_comparison.loc[:, 'ub'] = household_by_income_comparison['hh_income'].map(ub_lookup)
household_by_income_comparison = household_by_income_comparison.sort_values('ub', ascending = True)

household_by_income_comparison[['income_group', 'ACS households', 'BEAM households']].plot(x = 'income_group', kind = 'bar')
plt.ylabel('count of households')
plt.xticks(rotation=0, ha='center')
plt.xlabel('income group category')
plt.title('Total households by income group comparison')
plt_file_name = "household_count_by_income.png"
plt.savefig(project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name), dpi = 600, bbox_inches = 'tight')
# plt.show()

In [27]:
bucket_name = "beam-core-outputs"
source_file_name = project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name)
destination_blob_name = f"output/{city_name}/{simulation_name}/Output/plot/{plt_file_name}"
# Check if the file exist in the bucket. If "Yes", delete
try:
    delete_blob(_bucket_name=bucket_name, _blob_name=destination_blob_name)
except:
    pass
# and upload the file
upload_blob(bucket_name, source_file_name, destination_blob_name)

In [28]:
# compare household vehicle ownership
modeled_household_by_vehicle = households.groupby('hh_cars')[['household_id']].count()
modeled_household_by_vehicle.columns = ['BEAM households']
modeled_household_by_vehicle = modeled_household_by_vehicle.reset_index()
# modeled_household_by_vehicle

austin_data.loc[:, 'none'] = austin_data['B08201_002E']
austin_data.loc[:, 'one'] = austin_data['B08201_003E']
austin_data.loc[:, 'two or more'] = austin_data['B08201_004E'] + \
                                    austin_data['B08201_005E'] + austin_data['B08201_006E']

acs_household_by_vehicle = pd.melt(austin_data, id_vars=['county name'],
                                   value_vars=['none', 'one', 'two or more'],
                                   var_name = 'hh_cars', value_name = 'ACS households')
acs_household_by_vehicle = acs_household_by_vehicle.groupby('hh_cars')[['ACS households']].sum()
acs_household_by_vehicle = acs_household_by_vehicle.reset_index()

household_by_vehicle_comparison = pd.merge(acs_household_by_vehicle,
                                           modeled_household_by_vehicle,
                                           on = 'hh_cars', how = 'left')

household_by_vehicle_comparison.plot(x = 'hh_cars', kind = 'bar')
plt.ylabel('count of households')
plt.xticks(rotation=0, ha='center')
plt.xlabel('car ownership')
plt.title('Total households by car ownership comparison')
plt_file_name = "household_count_by_vehicle.png"
plt.savefig(project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name), dpi = 600, bbox_inches = 'tight')
# plt.show()

In [29]:
bucket_name = "beam-core-outputs"
source_file_name = project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name)
destination_blob_name = f"output/{city_name}/{simulation_name}/Output/plot/{plt_file_name}"
# Check if the file exist in the bucket. If "Yes", delete
try:
    delete_blob(_bucket_name=bucket_name, _blob_name=destination_blob_name)
except:
    pass
# and upload the file
upload_blob(bucket_name, source_file_name, destination_blob_name)

In [30]:
# compare household with children
modeled_household_with_child = households.groupby('hh_children')[['household_id']].count()
modeled_household_with_child.columns = ['BEAM households']
modeled_household_with_child = modeled_household_with_child.reset_index()
# modeled_household_with_child

austin_data.loc[:, 'yes'] = austin_data['B09018_002E']
austin_data.loc[:, 'no'] = austin_data['B11001_001E'] - \
                           austin_data['B09018_002E']

acs_household_with_child = pd.melt(austin_data, id_vars=['county name'],
                                   value_vars=['yes', 'no'],
                                   var_name = 'hh_children', value_name = 'ACS households')
acs_household_with_child = acs_household_with_child.groupby('hh_children')[['ACS households']].sum()
acs_household_with_child = acs_household_with_child.reset_index()

household_by_child_comparison = pd.merge(acs_household_with_child,
                                         modeled_household_with_child,
                                         on = 'hh_children', how = 'left')

household_by_child_comparison.plot(x = 'hh_children', kind = 'bar')
plt.ylabel('count of households')
plt.title('Total households by children presence comparison')
plt.xticks(rotation=0, ha='center')
plt.xlabel('Children presence in household')
# plt.legend(loc = 3)
plt_file_name = "household_count_by_child.png"
plt.savefig(project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name), dpi = 600, bbox_inches = 'tight')
# plt.show()

In [31]:
bucket_name = "beam-core-outputs"
source_file_name = project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name)
destination_blob_name = f"output/{city_name}/{simulation_name}/Output/plot/{plt_file_name}"
# Check if the file exist in the bucket. If "Yes", delete
try:
    delete_blob(_bucket_name=bucket_name, _blob_name=destination_blob_name)
except:
    pass
# and upload the file
upload_blob(bucket_name, source_file_name, destination_blob_name)

In [32]:
# compare person by gender
person_by_gender = persons.groupby('person_sex')[['person_id']].count()
person_by_gender.columns = ['BEAM persons']
person_by_gender = person_by_gender.reset_index()
# person_by_gender

austin_data.loc[:, 'male'] = austin_data['B01001_002E']
austin_data.loc[:, 'female'] = austin_data['B01001_026E']

acs_person_by_gender = pd.melt(austin_data, id_vars=['county name'],
                               value_vars=['male', 'female'],
                               var_name = 'person_sex', value_name = 'ACS persons')
acs_person_by_gender = acs_person_by_gender.groupby('person_sex')[['ACS persons']].sum()
acs_person_by_gender = acs_person_by_gender.reset_index()

person_by_gender_comparison = pd.merge(acs_person_by_gender,
                                       person_by_gender,
                                       on = 'person_sex', how = 'left')

person_by_gender_comparison.plot(x = 'person_sex', kind = 'bar')
plt.ylabel('count of persons')
plt.title('Total persons by gender comparison')
# plt.legend(loc = 3)

plt.xticks(rotation=0, ha='center')
plt.xlabel('Gender')
plt_file_name = "person_count_by_gender.png"
plt.savefig(project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name), dpi = 600, bbox_inches = 'tight')
# plt.show()

In [33]:
bucket_name = "beam-core-outputs"
source_file_name = project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name)
destination_blob_name = f"output/{city_name}/{simulation_name}/Output/plot/{plt_file_name}"
# Check if the file exist in the bucket. If "Yes", delete
try:
    delete_blob(_bucket_name=bucket_name, _blob_name=destination_blob_name)
except:
    pass
# and upload the file
upload_blob(bucket_name, source_file_name, destination_blob_name)

In [34]:
# compare person by age
person_by_age = persons.groupby('person_age')[['person_id']].count()
person_by_age.columns = ['BEAM persons']
person_by_age = person_by_age.reset_index()
# person_by_age

group1_var = ['B01001_003E', 'B01001_004E', 'B01001_005E', 'B01001_006E', 'B01001_007E',
              'B01001_027E', 'B01001_028E', 'B01001_029E', 'B01001_030E', 'B01001_031E']
group2_var = ['B01001_008E', 'B01001_009E', 'B01001_010E', 'B01001_011E', 'B01001_012E',
              'B01001_032E', 'B01001_033E', 'B01001_034E', 'B01001_035E', 'B01001_036E']
group3_var = ['B01001_013E', 'B01001_014E', 'B01001_015E', 'B01001_016E', 'B01001_017E',
              'B01001_037E', 'B01001_038E', 'B01001_039E', 'B01001_040E', 'B01001_041E']

austin_data.loc[:, '19 and under'] = austin_data[group1_var].sum(axis = 1)
austin_data.loc[:, '20 to 35'] = austin_data[group2_var].sum(axis = 1)
austin_data.loc[:, '35 to 60'] = austin_data[group3_var].sum(axis = 1)
austin_data.loc[:, 'above 60'] = austin_data['B01001_001E'] - \
                                 austin_data[ '19 and under'] - austin_data['20 to 35'] - \
                                 austin_data['35 to 60']

acs_person_by_age = pd.melt(austin_data, id_vars=['county name'],
                            value_vars=['19 and under', '20 to 35', '35 to 60', 'above 60'],
                            var_name = 'person_age', value_name = 'ACS persons')
acs_person_by_age = acs_person_by_age.groupby('person_age')[['ACS persons']].sum()
acs_person_by_age = acs_person_by_age.reset_index()

person_by_age_comparison = pd.merge(acs_person_by_age,
                                    person_by_age,
                                    on = 'person_age', how = 'left')

person_by_age_comparison.plot(x = 'person_age', kind = 'bar')
plt.ylabel('count of persons')
plt.title('Total persons by age group comparison')
# plt.legend(loc = 3)

plt.xticks(rotation=0, ha='center')
plt.xlabel('Age groups')
plt_file_name = "person_count_by_age.png"
plt.savefig(project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name), dpi = 600, bbox_inches = 'tight')
plt.show()

In [35]:
bucket_name = "beam-core-outputs"
source_file_name = project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name)
destination_blob_name = f"output/{city_name}/{simulation_name}/Output/plot/{plt_file_name}"
# Check if the file exist in the bucket. If "Yes", delete
try:
    delete_blob(_bucket_name=bucket_name, _blob_name=destination_blob_name)
except:
    pass
# and upload the file
upload_blob(bucket_name, source_file_name, destination_blob_name)

In [36]:
# compare person by age
person_by_race = persons.groupby('race')[['person_id']].count()
person_by_race.columns = ['BEAM persons']
person_by_race = person_by_race.reset_index()
# person_by_race

austin_data.loc[:, 'white'] = austin_data['B02001_002E']
austin_data.loc[:, 'black'] = austin_data['B02001_003E']
austin_data.loc[:, 'asian'] = austin_data['B02001_005E']
austin_data.loc[:, 'other'] = austin_data['B01001_001E'] - \
                              austin_data['white'] - austin_data.loc[:, 'black'] - \
                              austin_data['asian']

acs_person_by_race = pd.melt(austin_data, id_vars=['county name'],
                             value_vars=['white', 'black', 'asian', 'other'],
                             var_name = 'race', value_name = 'ACS persons')
acs_person_by_race = acs_person_by_race.groupby('race')[['ACS persons']].sum()
acs_person_by_race = acs_person_by_race.reset_index()


person_by_race_comparison = pd.merge(acs_person_by_race,
                                     person_by_race,
                                     on = 'race', how = 'left')
person_by_race_comparison = person_by_race_comparison.sort_values('ACS persons', ascending = False)
person_by_race_comparison.plot(x = 'race', kind = 'bar')
plt.ylabel('count of persons')
plt.title('Total persons by race comparison')
plt.legend(loc=1)

plt.xticks(rotation=0, ha='center')
plt.xlabel('Race')
plt_file_name = "person_count_by_race.png"
plt.savefig(project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name), dpi = 600, bbox_inches = 'tight')
plt.show()

In [37]:
bucket_name = "beam-core-outputs"
source_file_name = project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name)
destination_blob_name = f"output/{city_name}/{simulation_name}/Output/plot/{plt_file_name}"
# Check if the file exist in the bucket. If "Yes", delete
try:
    delete_blob(_bucket_name=bucket_name, _blob_name=destination_blob_name)
except:
    pass
# and upload the file
upload_blob(bucket_name, source_file_name, destination_blob_name)

In [38]:
# compare person by worker status
person_by_worker = persons.groupby('worker')[['person_id']].count()
person_by_worker.columns = ['BEAM persons']
person_by_worker = person_by_worker.reset_index()
person_by_worker['worker'] = person_by_worker['worker'].astype(str)
# person_by_worker

austin_data.loc[:, '1'] = austin_data['B23025_002E']
austin_data.loc[:, '0'] = austin_data['B01001_001E'] - austin_data['1']


acs_person_by_worker = pd.melt(austin_data, id_vars=['county name'],
                               value_vars=['1', '0'],
                               var_name = 'worker', value_name = 'ACS persons')
acs_person_by_worker = acs_person_by_worker.groupby('worker')[['ACS persons']].sum()
acs_person_by_worker = acs_person_by_worker.reset_index()

person_by_worker_comparison = pd.merge(acs_person_by_worker,
                                       person_by_worker,
                                       on = 'worker', how = 'left')

person_by_worker_comparison.plot(x = 'worker', kind = 'bar')
plt.ylabel('count of persons')
plt.title('Total persons by working status comparison')
# plt.legend(loc=2)
labels = ["No", "Yes"]
# plt.axes[0].set_xticks(labels)
plt.xticks(rotation=0,  ha='center')
# plt.set_xticklabels(["No", "Yes"])
plt.xlabel('Working status')
plt_file_name = "person_count_by_worker.png"
plt.savefig(project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name), dpi = 600, bbox_inches = 'tight')
plt.show()

In [39]:
bucket_name = "beam-core-outputs"
source_file_name = project_folder.joinpath("Output", city_name, simulation_name, analysis_type, "plot",plt_file_name)
destination_blob_name = f"output/{city_name}/{simulation_name}/Output/plot/{plt_file_name}"
# Check if the file exist in the bucket. If "Yes", delete
try:
    delete_blob(_bucket_name=bucket_name, _blob_name=destination_blob_name)
except:
    pass
# and upload the file
upload_blob(bucket_name, source_file_name, destination_blob_name)

In [40]:
person_by_worker_comparison

In [41]:
censusdata.printtable(censusdata.censustable('acs5', 2018, 'B23025'))