For background information, refer to the ReadMe on github
* https://github.com/wpbSabi/sustainable_supply_chain

In [1]:
# !pip install -U googlemaps 
import folium
import numpy as np
import pandas as pd
import googlemaps

# Data

## Geocoding

My first choice would have been to use geopy / nominatum to obtain latitudes and longitudes based on city names. However, there was an issue on April 8, 2023 with one of the services geopy relies on.  The good news is that on April 18, 2023 the service was running again.
* https://geopy.readthedocs.io/en/stable/

In [None]:
# from geopy.geocoders import Nominatim # OpenStreetMap API for geocoding
# geolocator = Nominatim(user_agent="april_8")
# city = "Chicago"
# country = "USA"
# loc = geolocator.geocode(city + ',' + country)
# print("latitude is: ", loc.latitude, "\nlongtitude is:-", loc.longitude)

Instead, I signed up for a google maps api

In [4]:
# gmaps_key = googlemaps.Client(key='<YOUR_KEY_HERE')

# # Import or set format for data
# df = pd.read_csv('data/location_lat_long.csv')
# # Create a list, to retrieve multiple locations at a time
# list_of_cities = ['Salt Lake City, Utah',
#                   'New Orleans, Louisiana',
#                   'Hartford, Connecticut',
#                   'Buffalo, New York',
#                   'Birmingham, Alabama']
# for city in list_of_cities:
#     g = gmaps_key.geocode(city)
#     address = g[0]['formatted_address']
#     lat = g[0]['geometry']['location']['lat']
#     long = g[0]['geometry']['location']['lng']
#     df.loc[len(df), 'location'] = address
#     df.loc[len(df) - 1, 'latitude'] = lat
#     df.loc[len(df) - 1, 'longitude'] = long

In [None]:
# def map_locations(df):
#     # Verify the new data by creating a map (or look at the df)
#     m = folium.Map([40, -95],  zoom_start=4)
#     # Add orange circles
#     for i in range(len(df)): folium.CircleMarker(
#         location=[df.iloc[i]['latitude'],
#                 df.iloc[i]['longitude']],
#         tooltip=df.iloc[i]['location'],
#         color='orange',
#         fill=True,
#         fill_opacity=0.7,
#         radius=5 # df.iloc[i]['demand_fix'] #5 #df.iloc[i]['Demand FY21']
#         ).add_to(m)
#     # To enable the toggle between map base layers
#     folium.TileLayer('OpenStreetMap').add_to(m)
#     folium.TileLayer('Stamen Terrain').add_to(m)
#     folium.TileLayer('Stamen Toner').add_to(m)
#     folium.TileLayer('Stamen Water Color').add_to(m)
#     folium.TileLayer('CartoDB positron').add_to(m)
#     folium.TileLayer('CartoDB dark_matter').add_to(m)
#     folium.LayerControl().add_to(m)
#     # m.save('images/python_folium_bubble_map.html')
#     return m
# map_locations(lat_long_data)

In [None]:
# Overwrite the data file with the updates
# df.to_csv('data/location_lat_long.csv', index=False)

## Greatest Circle Distance (GCD)

In [2]:
lat_long_data = pd.read_csv('data/location_lat_long.csv')

In [3]:
def haversine_distance(lat1, 
                       lon1, 
                       lat2, 
                       lon2):
    """
    Calculate the greatest circle distance between two points on Earth
    
    Utilizes the haversine calculation via latitude and longitude data
    """
    r = 6371
    phi1 = np.radians(lat1)
    phi2 = np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)
    a = np.sin(delta_phi / 2) ** 2 \
        + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2) ** 2
    res = r * (2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)))
    return np.round(res, 2)

In [4]:
lat_long_data.loc[0]

location     Portland, OR, USA
latitude             45.515232
longitude          -122.678385
Name: 0, dtype: object

In [5]:
# portland to boise, used in the greenhouse gas example, about right
haversine_distance(lat_long_data.loc[0, 'latitude'],
                   lat_long_data.loc[0, 'longitude'],
                   43.6150,
                   -116.2023)

554.64

In [6]:
# Could wrap the following into a function
def create_cartesian(df):
    """
    Creates a data frame of the possible Origin / Destination pairs

    """
    origins = df # [['location']].drop_duplicates()
    destinations = df #[['location']].drop_duplicates()
    combinations = origins.merge(destinations, how='cross')
    return combinations
origin_destination_pairs = create_cartesian(lat_long_data)

In [7]:
# Could wrap the following into this function instead
def calculate_distances(df):
    """
    Utilize haversine_distance() and the df for origin / destination pairs
    
    Calculate the distances between all origin / destination pairs
    """
    df['distance'] = df\
    .apply(lambda x: haversine_distance(x['latitude_x'],
                   x['longitude_x'],
                   x['latitude_y'],
                   x['longitude_y']),
          axis=1)

    # For shipments within a city, set the distance to 10 miles
    df['distance'] = np.where(
        df['location_x']==df['location_y'],
        10,
        df['distance']
    )
    # Choosing not to remove the shipments within a city
    # combinations = combinations[combinations['distance']>0]\
    #  .reset_index(drop=True)
    return df 

distance_between_od_pairs = calculate_distances(origin_destination_pairs)

## Scenarios / Greenhouse gas calculations
Create shipments for scenarios, and assumptions:
* 0.36 grams co2e per mile for parcels shipments or LTL shipments
* 400 parcels per truck / shipment
* DC footprint - 443,000 grams co2e per day

### One Distribution Center

In [8]:
# Scenario 1: Memphis Only
# Distribution Center at Memphis
# 1 daily shipment to each of the 50 cities
scen1 = distance_between_od_pairs[
    distance_between_od_pairs['location_x']=='Memphis, TN, USA']
co2e_per_mile = 0.36
co2e_for_facility = 443000
scen1['co2e_shipment'] = scen1['distance'] * co2e_per_mile

# Total co2e 
print(round(sum(scen1['co2e_shipment']) + co2e_for_facility)) 
# ~466,759 co2e
# Print co2e from transportation
print(round(sum(scen1['co2e_shipment']))) # 23,759

466759
23759


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  scen1['co2e_shipment'] = scen1['distance'] * co2e_per_mile


In [9]:
# Scenario 2: St Louis Only
# Distribution Center at St Louis
# 1 daily shipment to each of the 50 cities
scen2 = distance_between_od_pairs[
    distance_between_od_pairs['location_x']=='St. Louis, MO, USA']
co2e_per_mile = 0.36
co2e_for_facility_daily = 443000
scen2['co2e_shipment'] = scen2['distance'] * co2e_per_mile

# Total co2e 
print(round(sum(scen2['co2e_shipment']) + co2e_for_facility_daily)) 
# ~466,196 co2e
# Print co2e from transportation
print(round(sum(scen2['co2e_shipment']))) # 23,197

466197
23197


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  scen2['co2e_shipment'] = scen2['distance'] * co2e_per_mile


In [10]:
# Scenario 3: Seattle Only
# Distribution Center at Seattle (probably the worst choice)
# 1 daily shipment to each of the 50 cities
scen3 = distance_between_od_pairs[
    distance_between_od_pairs['location_x']=='Seattle, WA, USA']

co2e_per_mile = 0.36
co2e_for_facility_daily = 443000
scen3['co2e_shipment'] = scen3['distance'] * co2e_per_mile

# Total co2e 
print(round(sum(scen3['co2e_shipment']) + co2e_for_facility_daily)) 
# ~494,124 co2e
# Print co2e from transportation
print(round(sum(scen3['co2e_shipment']))) # 51,124

494124
51124


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  scen3['co2e_shipment'] = scen3['distance'] * co2e_per_mile


In [11]:
print((494124 - 466197) / 494124)
print((494124 - 466197) / 466197)

0.056518201908832603
0.059903860385201964


### Three Distribution Centers

In [12]:
# Scenario 4: Riverside, Houston, and Columbus
# Distribution Center at Seattle (probably the worst choice)
# 1 daily shipment to each of the 50 cities
s4 = distance_between_od_pairs[
    (distance_between_od_pairs['location_x']=='Riverside, CA, USA') | 
    (distance_between_od_pairs['location_x']=='Houston, TX, USA') | 
    (distance_between_od_pairs['location_x']=='Columbus, OH, USA')
]

In [13]:
co2e_per_mile = 0.36
co2e_per_facility_daily = round(443000 / np.sqrt(3))
print('Average facility size:', co2e_per_facility_daily)

Average facility size: 255766


In [18]:
# For each destination, use the closest facility
scen4 = s4[['location_y', 'distance']]\
            .groupby(['location_y'], as_index=False)\
            .min()

In [19]:
scen4['co2e_shipment'] = scen4['distance'] * co2e_per_mile

# Print Total co2e 
print(round(sum(scen4['co2e_shipment']) + (co2e_per_facility_daily * 3)) )
# ~778,617 co2e
# Print co2e from transportation
print(round(sum(scen4['co2e_shipment']))) # 11,319

778617
11319
