In [None]:
import pandas as pd
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
import numpy as np

import matplotlib.ticker as mtick
from adjustText import adjust_text

# For CSV:
clark_county_ts2021 = pd.read_csv("H:/My Drive/7. SNV Industry Study/SNV_ArcGIS/IHS Files/Clark_County_TS2021.csv", dtype={'STCC4': str})

bridges_excel = pd.ExcelFile("H:/My Drive/7. SNV Industry Study/SNV_ArcGIS/IHS Files/bridges.xlsx")
US_2020_CSA = pd.read_csv("H:/My Drive/7. SNV Industry Study/SNV_ArcGIS/IHS Files/2020_US_CSA.csv")
# For Excel:
naics_stcc = pd.read_excel("H:/My Drive/7. SNV Industry Study/SNV_ArcGIS/IHS Files/STCC_NAICS.xlsx", dtype={'STCC4': str})


# Load necessary sheets from the Excel file
stcc = bridges_excel.parse("STCC")
regions = bridges_excel.parse("Regions")
modes = bridges_excel.parse("Modes")
county_region = bridges_excel.parse("County to Region")
naics_stcc = naics.parse("stcc_naics_bridge")
# Checking for duplicates
clark_county_ts2021 = clark_county_ts2021.drop_duplicates()
US_2020_CSA = US_2020_CSA.drop_duplicates()



# Define origin_regions and destination_regions separately
origin_regions = regions.rename(columns={"Region": "Origin Region", "Region Name": "Origin Region Name"})
destination_regions = regions.rename(columns={"Region": "Destination Region", "Region Name": "Destination Region Name"})
# Ensure 'STCC4' column in both dataframes is of type string
# For example, replace NaN with a known string
clark_county_ts2021['STCC4'] = clark_county_ts2021['STCC4'].astype(str)
naics_stcc['STCC4'] = naics_stcc['STCC4'].astype(str)
stcc['STCC4'] = stcc['STCC4'].astype(str)

# Merge origin_regions and destination_regions into clark_county_ts2021
clark_county_ts2021 = pd.merge(clark_county_ts2021, origin_regions, on='Origin Region', how='left')
clark_county_ts2021 = pd.merge(clark_county_ts2021, destination_regions, on='Destination Region', how='left')
# Merge the summary by mode with the modes table
clark_county_ts2021 = pd.merge(clark_county_ts2021, modes, how='left', on='Mode')
clark_county_ts2021 = pd.merge(clark_county_ts2021, stcc, how='left', on='STCC')


# Rename the rest of the columns
clark_county_ts2021.rename(columns={
    'State_x': 'Origin State', 
    'BEA_x': 'Origin BEA', 
    'BEA Name_x': 'Origin BEA Name', 
    'Country_x': 'Origin Country', 
    'State_y': 'Destination State', 
    'BEA_y': 'Destination BEA', 
    'BEA Name_y': 'Destination BEA Name', 
    'Country_y': 'Destination Country'}, inplace=True)

# Define your formatting function
format_func = lambda x: '{:,.2f}'.format(x)

# Apply the formatting function to the specified columns
clark_county_ts2021['Tons'] = clark_county_ts2021['Tons'].apply(format_func)
clark_county_ts2021['Units'] = clark_county_ts2021['Units'].apply(format_func)
clark_county_ts2021['Value'] = clark_county_ts2021['Value'].apply(format_func)
clark_county_ts2021['Average Miles'] = clark_county_ts2021['Average Miles'].apply(format_func)

# Print DataFrame to check
#print(clark_county_ts2021)



In [None]:
clark_county_ts2021

In [None]:
clark_county_ts2021['STCC4'] = clark_county_ts2021['STCC4'].fillna('0').astype(str)
clark_county_ts2021['STCC4'] = clark_county_ts2021['STCC4'].astype(str)

clark_county_ts2021

In [None]:
naics_stcc['STCC4'] = naics_stcc['STCC4'].astype(str)

naics_stcc

In [None]:
# Now, try to merge
clark_county_ts2021 = pd.merge(clark_county_ts2021, naics_stcc, how='left', on='STCC4')
clark_county_ts2021

In [None]:
# Outbound: Origin is Clark County
clark_county_outbound = clark_county_ts2021[
    (clark_county_ts2021['Origin Region Name'] == "Clark County, NV") & 
    (clark_county_ts2021['Destination Region Name'] != "Clark County, NV")]

# Inbound: Destination is Clark County
clark_county_inbound = clark_county_ts2021[
    (clark_county_ts2021['Destination Region Name'] == "Clark County, NV") & 
    (clark_county_ts2021['Origin Region Name'] != "Clark County, NV")]

# Through: Clark County is neither origin nor destination
clark_county_through = clark_county_ts2021[
    (clark_county_ts2021['Origin Region Name'] != "Clark County, NV") & 
    (clark_county_ts2021['Destination Region Name'] != "Clark County, NV")]

# Intra: Both origin and destination is Clark County
clark_county_intra = clark_county_ts2021[
    (clark_county_ts2021['Origin Region Name'] == "Clark County, NV") & 
    (clark_county_ts2021['Destination Region Name'] == "Clark County, NV")]

In [None]:
# Group by Year and Mode Name for each filter and calculate the sum of Values and Tons
outbound_summary = clark_county_outbound.groupby(['Year', 'Name'])[['Value', 'Tons']].sum().reset_index()
inbound_summary = clark_county_inbound.groupby(['Year', 'Name'])[['Value', 'Tons']].sum().reset_index()
through_summary = clark_county_through.groupby(['Year', 'Name'])[['Value', 'Tons']].sum().reset_index()
intra_summary = clark_county_intra.groupby(['Year', 'Name'])[['Value', 'Tons']].sum().reset_index()

# Rename the Value and Tons columns for each filter
outbound_summary.rename(columns={'Value': 'OB_Value', 'Tons': 'OB_Tons'}, inplace=True)
inbound_summary.rename(columns={'Value': 'IB_Value', 'Tons': 'IB_Tons'}, inplace=True)
through_summary.rename(columns={'Value': 'Thr_Value', 'Tons': 'Thr_Tons'}, inplace=True)
intra_summary.rename(columns={'Value': 'Intr_Value', 'Tons': 'Intr_Tons'}, inplace=True)

# Merge all the DataFrames on Year and Name
CC_Modes_Summary = pd.merge(outbound_summary, inbound_summary, on=['Year', 'Name'], how='outer')
CC_Modes_Summary = pd.merge(CC_Modes_Summary, through_summary, on=['Year', 'Name'], how='outer')
CC_Modes_Summary = pd.merge(CC_Modes_Summary, intra_summary, on=['Year', 'Name'], how='outer')

# Print the final DataFrame
print(CC_Commodity_Summary)


In [None]:
print(clark_county_ts2021.columns)
print(stcc.columns)
print(naics_stcc.columns)
print(modes.columns)

In [None]:
import pandas as pd
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.ticker as mtick
from adjustText import adjust_text

# Load data:
clark_county_ts2021 = pd.read_csv("H:/My Drive/7. SNV Industry Study/SNV_ArcGIS/IHS Files/Clark_County_TS2021.csv")
bridges_excel = pd.ExcelFile("H:/My Drive/7. SNV Industry Study/SNV_ArcGIS/IHS Files/bridges.xlsx")

# Load necessary sheets from the Excel file
stcc = bridges_excel.parse("STCC")
modes = bridges_excel.parse("Modes")

naics_stcc = pd.read_excel("H:/My Drive/7. SNV Industry Study/SNV_ArcGIS/IHS Files/STCC_NAICS.xlsx", dtype={'STCC4': str})

# Checking for duplicates
clark_county_ts2021 = clark_county_ts2021.drop_duplicates()

# Merge stcc and naics_stcc using STCC4
stcc = pd.merge(stcc, naics_stcc, how='left', on='STCC4')

# Merge stcc (which now includes naics_stcc data) into clark_county_ts2021 using STCC
clark_county_ts2021 = pd.merge(clark_county_ts2021, stcc, how='left', on='STCC')

# Merge modes into clark_county_ts2021 using Mode
clark_county_ts2021 = pd.merge(clark_county_ts2021, modes, how='left', on='Mode')

# Print DataFrame to check
print(clark_county_ts2021)
