In [50]:
import pandas as pd
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.ticker as mtick

# Load data:
clark_county_ts2021 = pd.read_csv("H:/My Drive/7. SNV Industry Study/SNV_ArcGIS/IHS Files/Clark_County_TS2021.csv")
bridges_excel = pd.ExcelFile("H:/My Drive/7. SNV Industry Study/SNV_ArcGIS/IHS Files/bridges.xlsx")

# Load necessary sheets from the Excel file
stcc = bridges_excel.parse("STCC")
modes = bridges_excel.parse("Modes")
regions = bridges_excel.parse("Regions")

# Define origin_regions and destination_regions separately
origin_regions = regions.rename(columns={"Region": "Origin Region", "Region Name": "Origin Region Name"})
destination_regions = regions.rename(columns={"Region": "Destination Region", "Region Name": "Destination Region Name"})


# Ensure 'STCC4' column in both dataframes is of type string
# For example, replace NaN with a known string
stcc['STCC4'] = stcc['STCC4'].astype(str)
# naics_stcc['STCC4'] = naics_stcc['STCC4'].astype(str)
# naics_stcc['NAICS_code'] = naics_stcc['NAICS_code'].astype(str)
# Create a new DataFrame from 'stcc' that only includes rows with 2-digit 'STCC' codes
stcc_2digit = stcc[stcc['STCC'].str.len() == 2].copy()
# Ensure 'STCC4' is of type string
stcc_2digit['STCC4'] = stcc_2digit['STCC4'].astype(str)
# Merge stcc_2digit and naics_stcc using STCC4
#stcc_2digit = pd.merge(stcc_2digit, naics_stcc, how='left', on='STCC4')
#stcc = pd.merge(stcc, naics_stcc, how='left', on='STCC4')

# Merge stcc (which now includes naics_stcc data) into clark_county_ts2021 using STCC
clark_county_ts2021 = pd.merge(clark_county_ts2021, stcc, how='left', on='STCC')

# Merge modes into clark_county_ts2021 using Mode
clark_county_ts2021 = pd.merge(clark_county_ts2021, modes, how='left', on='Mode')
# Merge origin_regions and destination_regions into clark_county_ts2021
clark_county_ts2021 = pd.merge(clark_county_ts2021, origin_regions, on='Origin Region', how='left')
clark_county_ts2021 = pd.merge(clark_county_ts2021, destination_regions, on='Destination Region', how='left')

# Create 'STCC_2digit' column in 'clark_county_ts2021' DataFrame
clark_county_ts2021['STCC_2digit'] = clark_county_ts2021['STCC'].str[:2]

# Print DataFrame to check
#print(clark_county_ts2021)


In [51]:
# Rename the rest of the columns
clark_county_ts2021.rename(columns={
    'State_x': 'Origin State', 
    'BEA_x': 'Origin BEA', 
    'BEA Name_x': 'Origin BEA Name', 
    'Country_x': 'Origin Country', 
    'State_y': 'Destination State', 
    'BEA_y': 'Destination BEA', 
    'BEA Name_y': 'Destination BEA Name', 
    'Country_y': 'Destination Country'}, inplace=True)


In [52]:
print(clark_county_ts2021.columns)


Index(['Year', 'Origin Region', 'Destination Region', 'STCC', 'Trade Type',
       'Mode', 'Tons', 'Units', 'Value', 'Average Miles', 'First Node',
       'Last Node', 'From FIPS', 'To FIPS', 'Entry Road', 'Exit Road',
       'Commodity', 'STCC4', 'Code', 'Name', 'Group', 'Origin Region Name',
       'Origin State', 'Origin BEA', 'Origin BEA Name', 'Origin Country',
       'Destination Region Name', 'Destination State', 'Destination BEA',
       'Destination BEA Name', 'Destination Country', 'STCC_2digit'],
      dtype='object')


In [53]:
selected_columns = [
    'Year', 'Origin Region', 'Origin Region Name',  'Origin BEA Name', 'Origin State', 'Origin Country', 
    'Destination Region', 'Destination Region Name', 'Destination BEA Name', 
    'Destination State', 'Destination Country', 'STCC_2digit', 'STCC', 'Commodity', 'Value', 'Tons'
]

clark_county_ts2021_filtered = clark_county_ts2021[selected_columns]


In [54]:
region_code = 32003 # Replace with the specific code
outbound = clark_county_ts2021[(clark_county_ts2021['Origin Region'] == region_code) & (clark_county_ts2021['Destination Region'] != region_code)]
inbound = clark_county_ts2021[(clark_county_ts2021['Origin Region'] != region_code) & (clark_county_ts2021['Destination Region'] == region_code)]
through = clark_county_ts2021[(clark_county_ts2021['Origin Region'] != region_code) & (clark_county_ts2021['Destination Region'] != region_code)]
intra = clark_county_ts2021[(clark_county_ts2021['Origin Region'] == region_code) & (clark_county_ts2021['Destination Region'] == region_code)]


In [55]:
print(clark_county_ts2021.columns)
print(intra.columns)


Index(['Year', 'Origin Region', 'Destination Region', 'STCC', 'Trade Type',
       'Mode', 'Tons', 'Units', 'Value', 'Average Miles', 'First Node',
       'Last Node', 'From FIPS', 'To FIPS', 'Entry Road', 'Exit Road',
       'Commodity', 'STCC4', 'Code', 'Name', 'Group', 'Origin Region Name',
       'Origin State', 'Origin BEA', 'Origin BEA Name', 'Origin Country',
       'Destination Region Name', 'Destination State', 'Destination BEA',
       'Destination BEA Name', 'Destination Country', 'STCC_2digit'],
      dtype='object')
Index(['Year', 'Origin Region', 'Destination Region', 'STCC', 'Trade Type',
       'Mode', 'Tons', 'Units', 'Value', 'Average Miles', 'First Node',
       'Last Node', 'From FIPS', 'To FIPS', 'Entry Road', 'Exit Road',
       'Commodity', 'STCC4', 'Code', 'Name', 'Group', 'Origin Region Name',
       'Origin State', 'Origin BEA', 'Origin BEA Name', 'Origin Country',
       'Destination Region Name', 'Destination State', 'Destination BEA',
       'Destination BE

In [56]:
columns_to_group_by = ['Year', 'Origin Region','Origin Region Name', 'Origin BEA Name', 'Origin State', 'Origin Country','Destination Region', 'Destination Region Name', 'Destination State', 'Destination BEA Name', 'Destination Country']

outbound_summary = outbound.groupby(columns_to_group_by)[['Value', 'Tons']].sum().reset_index()
inbound_summary = inbound.groupby(columns_to_group_by)[['Value', 'Tons']].sum().reset_index()
through_summary = through.groupby(columns_to_group_by)[['Value', 'Tons']].sum().reset_index()
intra_summary = intra.groupby(columns_to_group_by)[['Value', 'Tons']].sum().reset_index()


In [36]:
# Rename the columns
outbound_summary.rename(columns={'Value': 'OB_Value', 'Tons': 'OB_Tons'}, inplace=True)
inbound_summary.rename(columns={'Value': 'IB_Value', 'Tons': 'IB_Tons'}, inplace=True)
through_summary.rename(columns={'Value': 'Thr_Value', 'Tons': 'Thr_Tons'}, inplace=True)
intra_summary.rename(columns={'Value': 'Intr_Value', 'Tons': 'Intr_Tons'}, inplace=True)

# Merge all the DataFrames on Year and selected columns as before
summary_final = pd.merge(outbound_summary, inbound_summary, on=columns_to_group_by, how='outer')
summary_final = pd.merge(summary_final, through_summary, on=columns_to_group_by, how='outer')
summary_final = pd.merge(summary_final, intra_summary, on=columns_to_group_by, how='outer')

# Replace NaN with 0 for specific columns
columns_to_replace_nan = ['OB_Value', 'OB_Tons', 'IB_Value', 'IB_Tons', 'Thr_Value', 'Thr_Tons', 'Intr_Value', 'Intr_Tons']
summary_final[columns_to_replace_nan] = summary_final[columns_to_replace_nan].fillna(0)

# Calculate the Total Value and Total Tons for each row
summary_final['Total_Value'] = summary_final['OB_Value'] + summary_final['IB_Value'] + summary_final['Thr_Value'] + summary_final['Intr_Value']
summary_final['Total_Tons'] = summary_final['OB_Tons'] + summary_final['IB_Tons'] + summary_final['Thr_Tons'] + summary_final['Intr_Tons']

# Print the final DataFrame with Total Value and Total Tons columns
print(summary_final)


       Year  Origin Region Origin Region Name Origin BEA Name Origin State  \
0      2015          32003   Clark County, NV   Las Vegas, NV           NV   
1      2015          32003   Clark County, NV   Las Vegas, NV           NV   
2      2015          32003   Clark County, NV   Las Vegas, NV           NV   
3      2015          32003   Clark County, NV   Las Vegas, NV           NV   
4      2015          32003   Clark County, NV   Las Vegas, NV           NV   
...     ...            ...                ...             ...          ...   
13884  2050            390            Yucatan         Yucatan           YU   
13885  2015          32003   Clark County, NV   Las Vegas, NV           NV   
13886  2019          32003   Clark County, NV   Las Vegas, NV           NV   
13887  2021          32003   Clark County, NV   Las Vegas, NV           NV   
13888  2050          32003   Clark County, NV   Las Vegas, NV           NV   

      Origin Country  Destination Region            Destination

In [37]:
summary_final

Unnamed: 0,Year,Origin Region,Origin Region Name,Origin BEA Name,Origin State,Origin Country,Destination Region,Destination Region Name,Destination State,Destination BEA Name,...,OB_Value,OB_Tons,IB_Value,IB_Tons,Thr_Value,Thr_Tons,Intr_Value,Intr_Tons,Total_Value,Total_Tons
0,2015,32003,"Clark County, NV","Las Vegas, NV",NV,US,1,Alabama Portion of Dothan BEA,AL,"Dothan, AL",...,189923.032039,70.791488,0.0,0.0,0.0000,0.000000,0.000000e+00,0.000000e+00,1.899230e+05,7.079149e+01
1,2015,32003,"Clark County, NV","Las Vegas, NV",NV,US,2,Alabama Portion of Columbus BEA,AL,"Columbus, GA",...,370467.262467,120.912749,0.0,0.0,0.0000,0.000000,0.000000e+00,0.000000e+00,3.704673e+05,1.209127e+02
2,2015,32003,"Clark County, NV","Las Vegas, NV",NV,US,3,Alabama Portion of Atlanta BEA,AL,"Atlanta, GA",...,46547.517217,17.804393,0.0,0.0,0.0000,0.000000,0.000000e+00,0.000000e+00,4.654752e+04,1.780439e+01
3,2015,32003,"Clark County, NV","Las Vegas, NV",NV,US,4,Alabama Portion of Huntsville BEA,AL,"Huntsville, AL",...,999368.040124,428.722780,0.0,0.0,0.0000,0.000000,0.000000e+00,0.000000e+00,9.993680e+05,4.287228e+02
4,2015,32003,"Clark County, NV","Las Vegas, NV",NV,US,5,Alabama Portion of Tupelo BEA,AL,"Tupelo, MS",...,32617.455678,4.103792,0.0,0.0,0.0000,0.000000,0.000000e+00,0.000000e+00,3.261746e+04,4.103792e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13884,2050,390,Yucatan,Yucatan,YU,MX,74,"Peoria, IL BEA",IL,"Peoria, IL",...,0.000000,0.000000,0.0,0.0,19298.4536,11.972698,0.000000e+00,0.000000e+00,1.929845e+04,1.197270e+01
13885,2015,32003,"Clark County, NV","Las Vegas, NV",NV,US,32003,"Clark County, NV",NV,"Las Vegas, NV",...,0.000000,0.000000,0.0,0.0,0.0000,0.000000,8.165316e+09,1.610371e+07,8.165316e+09,1.610371e+07
13886,2019,32003,"Clark County, NV","Las Vegas, NV",NV,US,32003,"Clark County, NV",NV,"Las Vegas, NV",...,0.000000,0.000000,0.0,0.0,0.0000,0.000000,9.045947e+09,1.633485e+07,9.045947e+09,1.633485e+07
13887,2021,32003,"Clark County, NV","Las Vegas, NV",NV,US,32003,"Clark County, NV",NV,"Las Vegas, NV",...,0.000000,0.000000,0.0,0.0,0.0000,0.000000,9.073076e+09,1.606648e+07,9.073076e+09,1.606648e+07


In [49]:
summary_final['State'] = summary_final['Destination  State']
summary_final['Country'] = summary_final['Destination  Country']


KeyError: 'Destination  State'

In [39]:
summary_final.drop(columns=['Origin State', 'Destination State'], inplace=True)


In [40]:
def map_state_to_region(state):
    for region, states in region_mapping.items():
        if state in states:
            return region
    return 'Unknown' # in case a state doesn't match any region


In [41]:
region_mapping = {
    'Northeast': ['CT', 'ME', 'MA', 'NH', 'RI', 'VT', 'NJ', 'NY', 'PA'],
    'Midwest': ['IL', 'IN', 'MI', 'OH', 'WI', 'IA', 'KS', 'MN', 'MO', 'NE', 'ND', 'SD'],
    'South': ['DE', 'FL', 'GA', 'MD', 'NC', 'SC', 'VA', 'DC', 'WV', 'AL', 'KY', 'MS', 'TN', 'AR', 'LA', 'OK', 'TX'],
    'West': ['AZ', 'CO', 'ID', 'MT', 'NV', 'NM', 'UT', 'WY', 'AK', 'CA', 'HI', 'OR', 'WA']
}


In [42]:
summary_final['Region'] = summary_final['State'].apply(map_state_to_region)


In [43]:
summary_final = summary_final.groupby(['Year', 'State', 'Region']).agg({
    'Total_Value': 'sum',
    'Total_Tons': 'sum',
    'IB_Value': 'sum',
    'OB_Value': 'sum',
    'Thr_Value': 'sum',
    'Intr_Value': 'sum',
    'IB_Tons': 'sum',
    'OB_Tons': 'sum',
    'Thr_Tons': 'sum',
    'Intr_Tons': 'sum'
}).reset_index()


In [46]:
# Calculating percentage of IB, OB, Thr, and Intra for both Values and Tons
summary_final['%_of_IB_Value'] = (summary_final['IB_Value'] / summary_final['Total_Value']) * 100
summary_final['%_of_OB_Value'] = (summary_final['OB_Value'] / summary_final['Total_Value']) * 100
summary_final['%_of_Thr_Value'] = (summary_final['Thr_Value'] / summary_final['Total_Value']) * 100
summary_final['%_of_Intr_Value'] = (summary_final['Intr_Value'] / summary_final['Total_Value']) * 100

summary_final['%_of_IB_Tons'] = (summary_final['IB_Tons'] / summary_final['Total_Tons']) * 100
summary_final['%_of_OB_Tons'] = (summary_final['OB_Tons'] / summary_final['Total_Tons']) * 100
summary_final['%_of_Thr_Tons'] = (summary_final['Thr_Tons'] / summary_final['Total_Tons']) * 100
summary_final['%_of_Intr_Tons'] = (summary_final['Intr_Tons'] / summary_final['Total_Tons']) * 100

# Print the final DataFrame with new percentage columns
print(summary_final)


     Year State   Region   Total_Value    Total_Tons      IB_Value  OB_Value  \
0    2015    AB  Unknown  6.367931e+08  6.876947e+05  1.065181e+08       0.0   
1    2015    AG  Unknown  1.187450e+06  1.424971e+02  1.152363e+06       0.0   
2    2015    AK     West  5.931368e+07  5.404941e+03  5.888916e+07       0.0   
3    2015    AL    South  1.611292e+08  5.146988e+04  1.506466e+08       0.0   
4    2015    AR    South  1.706922e+08  7.399478e+04  1.499748e+08       0.0   
..    ...   ...      ...           ...           ...           ...       ...   
351  2050    WI  Midwest  1.278996e+10  4.214185e+06  5.499365e+08       0.0   
352  2050    WV    South  5.428521e+07  2.263442e+04  5.355891e+07       0.0   
353  2050    WY     West  1.090075e+09  1.592301e+06  8.141409e+07       0.0   
354  2050    YU  Unknown  1.065466e+05  7.355694e+01  5.699608e+04       0.0   
355  2050    ZT  Unknown  2.915731e+06  3.999509e+02  2.915731e+06       0.0   

        Thr_Value  Intr_Value        IB

In [48]:
summary_final

Unnamed: 0,Year,State,Region,Total_Value,Total_Tons,IB_Value,OB_Value,Thr_Value,Intr_Value,IB_Tons,...,Thr_Tons,Intr_Tons,%_of_IB_Value,%_of_OB_Value,%_of_Thr_Value,%_of_Intr_Value,%_of_IB_Tons,%_of_OB_Tons,%_of_Thr_Tons,%_of_Intr_Tons
0,2015,AB,Unknown,6.367931e+08,6.876947e+05,1.065181e+08,0.0,5.302750e+08,0.0,157962.241245,...,5.297324e+05,0.0,16.727277,0.0,83.272723,0.0,22.969822,0.0,77.030178,0.0
1,2015,AG,Unknown,1.187450e+06,1.424971e+02,1.152363e+06,0.0,3.508684e+04,0.0,133.478185,...,9.018925e+00,0.0,97.045194,0.0,2.954806,0.0,93.670802,0.0,6.329198,0.0
2,2015,AK,West,5.931368e+07,5.404941e+03,5.888916e+07,0.0,4.245201e+05,0.0,5253.533728,...,1.514070e+02,0.0,99.284280,0.0,0.715720,0.0,97.198730,0.0,2.801270,0.0
3,2015,AL,South,1.611292e+08,5.146988e+04,1.506466e+08,0.0,1.048254e+07,0.0,48457.066252,...,3.012813e+03,0.0,93.494326,0.0,6.505674,0.0,94.146454,0.0,5.853546,0.0
4,2015,AR,South,1.706922e+08,7.399478e+04,1.499748e+08,0.0,2.071734e+07,0.0,63698.492881,...,1.029628e+04,0.0,87.862744,0.0,12.137256,0.0,86.085121,0.0,13.914879,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351,2050,WI,Midwest,1.278996e+10,4.214185e+06,5.499365e+08,0.0,1.224003e+10,0.0,202715.465505,...,4.011469e+06,0.0,4.299750,0.0,95.700250,0.0,4.810313,0.0,95.189687,0.0
352,2050,WV,South,5.428521e+07,2.263442e+04,5.355891e+07,0.0,7.262964e+05,0.0,22492.124199,...,1.422989e+02,0.0,98.662073,0.0,1.337927,0.0,99.371316,0.0,0.628684,0.0
353,2050,WY,West,1.090075e+09,1.592301e+06,8.141409e+07,0.0,1.008660e+09,0.0,115097.118511,...,1.477204e+06,0.0,7.468672,0.0,92.531328,0.0,7.228350,0.0,92.771650,0.0
354,2050,YU,Unknown,1.065466e+05,7.355694e+01,5.699608e+04,0.0,4.955055e+04,0.0,6.208941,...,6.734800e+01,0.0,53.494025,0.0,46.505975,0.0,8.440999,0.0,91.559001,0.0
