# Corporate Decarbonization Research
---

## Import modules

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

---

## Data Cleaning

In this section of the notebook, I will be importing our data and cleaning/standardizing our datasets.

In [None]:
foodag = pd.read_csv('data/food&ag.csv')
energy = pd.read_csv('data/energy.csv')
auto = pd.read_csv('data/auto.csv')
tech = pd.read_csv('data/tech.csv')


In [None]:
# Add sector columns to concatenate df's

foodag['SECTOR'] = ['Food & Agriculture'] * len(foodag['COMPANY NAME'])
energy['SECTOR'] = ['Energy'] * len(energy['COMPANY NAME'])
auto['SECTOR'] = ['Auto'] * len(auto['COMPANY NAME'])
tech['SECTOR'] = ['Tech'] * len(tech['COMPANY NAME'])
foodag.shape, energy.shape, auto.shape, tech.shape


In [None]:
# clean tech to standardize columns across all df's, shorten CI column name

tech = tech.rename(columns={'CARBON INTENSITY\n(Scope 1 & 2 g CO2e/ $ Sales) \ncalculated' :
'CARBON INTENSITY\n(Scope 1 & 2 g CO2e / $ Sales)'})

df_array = [foodag, energy, auto, tech]
for df in df_array:
    df.rename(columns={'CARBON INTENSITY\n(Scope 1 & 2 g CO2e / $ Sales)': 'CARBON INTENSITY'}, inplace=True)

# convert CI to type float
energy['CARBON INTENSITY'] = energy['CARBON INTENSITY'].str.replace(',', '').astype(float)


In [None]:
# concatenate dataframes together

sectors = pd.concat([foodag, energy, auto, tech])
sectors = sectors.drop(columns='SCOPE 1 + SCOPE 2 EMISSIONS')
sectors.shape

In [None]:
# convert revenue's to USD

def toUSD(i):
    dic = {
    'AUD': 0.75,   # 1 AUD to USD
    'CAD': 0.78,   # 1 CAD to USD
    'CHF': 1.09,   # 1 CHF to USD
    'DKK': 0.15,   # 1 DKK to USD
    'EUR': 1.16,   # 1 EUR to USD
    'GBP': 1.37,   # 1 GBP to USD
    'JPY': 0.009,  # 1 JPY to USD
    'KRW': 0.0009, # 1 KRW to USD
    'NOK': 0.11,   # 1 NOK to USD
    'RUB': 0.014,  # 1 RUB to USD
    'SEK': 0.11,   # 1 SEK to USD
    'RMB': 7.29,   # 1 RMB to USD
    'TWD' : 0.03,  # 1 TWD to USD
    'USD': 1       # No Change    
    }
    if (i[0] != 'nan') & (i[1] != 'nan'):
        return i[1] * dic[i[0]] # The revenue multiplied by the corresponding USD conversion rate

# sectors['Revenue (USD)'] = sectors.apply(toUSD, 'TOTAL REVENUE (miillion $)')

In [None]:
# Function for creating column with [Currency, Revenue]

def currAndRev(df):
    new_col = []
    for i in np.arange(df.shape[0]):
        new_col.append([df['CURRENCY'].values[i], df['TOTAL REVENUE (miillion $)'].values[i]])
    df['REVENUE IN USD'] = new_col

currAndRev(sectors)


In [None]:
# create a copy of sectors with no null values and create USD Revenue column

sectors_nona = sectors.copy().dropna()
sectors_nona['USD REVENUE'] =  sectors_nona.loc[:, 'REVENUE IN USD'].apply(toUSD)

sectors_nona = sectors_nona.drop(columns=['REVENUE IN USD'])
sectors_nona.head()

In [None]:
sectors_nona['ADJUSTED CI'] = ((sectors_nona['SCOPE 1'] + sectors_nona['SCOPE 2 (location-based)']) / sectors_nona['USD REVENUE']).round(2)
sectors_nona.head()

In [None]:
# Replace 'Submit to CDP' values with True and False for performing categorical data analysis

sectors['SUBMIT TO CDP'] = sectors['SUBMIT TO CDP'].replace({
    'Yes' : True,
    'No' : False
})

sectors.head()

---

## Cross-Sector Exploratory Data Analysis (EDA)

In [None]:
# Average scope and revenue values by sector in descending order of average total emissions

sectors.groupby('SECTOR').mean(numeric_only=True).drop(columns=['YEAR']).round(0).sort_values(by='TOTAL EMISSIONS', ascending=False)

In [None]:
rev_summary = sectors_nona.groupby('SECTOR').mean(numeric_only=True).round(0)[['USD REVENUE']].rename(columns={'USD REVENUE' : 'Mean USD Revenue'}).sort_values(by='Mean USD Revenue', ascending=False)
rev_summary

In [None]:
sectors.groupby('COMPANY NAME').mean(numeric_only=True).drop(columns=['YEAR']).sort_values(by='TOTAL EMISSIONS', ascending=False)

In [None]:
country = sectors.groupby('COUNTRY OF ORIGIN').mean(numeric_only=True).drop(columns=['YEAR', 'TOTAL REVENUE (miillion $)']).round(0).sort_values(by='TOTAL EMISSIONS', ascending=False)
country

The first row corresponds to Shell, last row corresponds to Molson Coors

---

## Helper Functions

In [None]:
# graph using matplotlib

def graphCoEmissions(df, co_name, years_arr):
    # format plots and add right axis for CI
    fig, ax1 = plt.subplots()
    ax2 = ax1.twinx()

    # create x axis array
    years = df.loc[df['COMPANY NAME']==co_name, 'YEAR']

    # create bar chart stacks and CI array
    scope1 = df.loc[df['COMPANY NAME']==co_name, 'SCOPE 1']
    scope2 = df.loc[df['COMPANY NAME']==co_name, 'SCOPE 2 (location-based)']
    scope3 = df.loc[df['COMPANY NAME']==co_name, 'SCOPE 3']
    ci = df.loc[df['COMPANY NAME']==co_name, 'CARBON INTENSITY']

    # plot bar chart
    b1 = ax1.bar(years, scope1, color=plt.cm.plasma(0))
    b2 = ax1.bar(years, scope2, bottom=scope1, color=plt.cm.plasma(0.4))
    b3 = ax1.bar(years, scope3, bottom=scope1+scope2, color=plt.cm.plasma(0.8))

    # plot CI
    l1 = ax2.plot(years, ci, color = plt.cm.viridis(0.5), marker = 'o')

    # finish formatting plots
    ax1.set_xticks(years_arr)
    ax1.set_ylim(0, max(scope1+scope2+scope3)+9000000)
    ax2.set_ylim(0, max(ci)+20)

    # combine and add legend
    lines = [b1, b2, b3, l1]
    labels = ['SCOPE 1','SCOPE 2','SCOPE 3','CI' ]
    ax1.legend(lines, labels, loc='lower left', bbox_to_anchor=(1, 1))


    # add labels and titles
    plt.suptitle('Annual GHG Emissions', fontsize=14, fontweight='bold')
    plt.title(co_name, fontsize=10)
    ax1.set_xlabel('Year')
    ax1.set_ylabel('Carbon Emissions (metric ton CO2e)')
    ax2.set_ylabel('Carbon Intensity (g CO2e / $ million)')

    

In [None]:
# graph using plotly

import plotly.graph_objects as go

def graphCoEmissionsPlotly(df, co_name, years_arr):
    # format plots and add right axis for CI
    # fig, ax1 = plt.subplots()
    # ax2 = ax1.twinx()

    # create x axis array
    years = df.loc[df['COMPANY NAME']==co_name, 'YEAR']

    # create bar chart stacks and CI array
    scope1 = df.loc[df['COMPANY NAME']==co_name, 'SCOPE 1']
    scope2 = df.loc[df['COMPANY NAME']==co_name, 'SCOPE 2 (location-based)']
    scope3 = df.loc[df['COMPANY NAME']==co_name, 'SCOPE 3']
    ci = df.loc[df['COMPANY NAME']==co_name, 'CARBON INTENSITY']


     # create stacked bar chart traces
    trace_scope1 = go.Bar(x=years, y=scope1, name='Scope 1', marker=dict(color='rgba(0, 0, 255, 0.7)'))
    trace_scope2 = go.Bar(x=years, y=scope2, name='Scope 2', marker=dict(color='rgba(0, 255, 0, 0.7)'))
    trace_scope3 = go.Bar(x=years, y=scope3, name='Scope 3', marker=dict(color='rgba(255, 0, 0, 0.7)'))

    # create line chart trace for CI
    trace_ci = go.Scatter(x=years, y=ci, mode='markers+lines', name='Carbon Intensity', yaxis='y2',
                          marker=dict(color='rgba(255, 165, 0, 0.7)'))

    # combine traces into data list
    data = [trace_scope1, trace_scope2, trace_scope3, trace_ci]

    # create layout
    layout = go.Layout(
        title=dict(text=co_name+' - Annual GHG Emissions', x=0.5),
        xaxis=dict(tickvals=years_arr, title='Year'),
        yaxis=dict(title='Carbon Emissions (metric ton CO2e)', range=[0, max(scope1+scope2+scope3)+9000000]),
        yaxis2=dict(title='Carbon Intensity (g CO2e / $ million)', overlaying='y', side='right', range=[0, max(ci)+20]),
        barmode='stack',
        showlegend=True,
        height=650,
        width=800,
        margin=dict(r=1.2)
    )

    # create figure
    fig = go.Figure(data=data, layout=layout)

    # show the figure
    fig.show()

---

## Visualizations

In [None]:
fig = px.scatter(sectors, x='SCOPE 1', y='SCOPE 2 (location-based)', color='SECTOR', hover_data=['COMPANY NAME'])
fig.update_layout(title='Scope 1 vs. Scope 2 Across All Sectors')
fig.show()

In [None]:
fig = px.scatter(sectors, x='SCOPE 2 (location-based)', y='SCOPE 3', color='SECTOR', hover_data=['COMPANY NAME'])
fig.update_layout(title='Scope 2 vs. Scope 3 Across All Sectors')
fig.show()

In [None]:
# Way too wide of a range for carbon intensity with adjusted revenue values, 
# could potentially be because revenue values are not standardized, auto rev's may not represent millions of dollars

sectors_nona['ADJUSTED CI'].sort_values()

### Food & Agriculture Emissions Charts

In [None]:
# graph charts for every company

yrs1 = [2017, 2018, 2019, 2020]
yrs2 = [2018, 2019, 2020, 2021]
yrs3 = [2018, 2019, 2020, 2021, 2022]
yrs4 = [np.arange(2017, 2023)]

for company in foodag['COMPANY NAME'].unique():
    num_years = foodag.loc[foodag['COMPANY NAME']==company, 'YEAR'].shape[0]
    if num_years == 4:
        graphCoEmissions(foodag, company, yrs2)
    else:
        graphCoEmissions(foodag, company, yrs3)

In [None]:
# another way to plot charts

for company in foodag['COMPANY NAME'].unique():
    num_years = foodag.loc[foodag['COMPANY NAME']==company, 'YEAR'].shape[0]
    if num_years == 4:
        graphCoEmissionsPlotly(foodag, company, yrs2)
    else:
        graphCoEmissionsPlotly(foodag, company, yrs3)


### Energy Emissions Charts

In [None]:
# graph energy charts

for company in energy['COMPANY NAME'].unique():
    num_years = energy.loc[energy['COMPANY NAME']==company, 'YEAR'].shape[0]
    if num_years == 4:
        graphCoEmissionsPlotly(energy, company, yrs2)
    else:
        graphCoEmissionsPlotly(energy, company, yrs3)

### Tech Emissions Charts

In [None]:
# graph tech charts

for company in tech['COMPANY NAME'].unique():
    num_years = tech.loc[tech['COMPANY NAME']==company, 'YEAR'].shape[0]
    if num_years == 4:
        graphCoEmissionsPlotly(tech, company, yrs2)
    else:
        graphCoEmissionsPlotly(tech, company, yrs3)

### Auto Emissions Charts

In [None]:
for company in auto['COMPANY NAME'].unique():
    num_years = auto.loc[auto['COMPANY NAME']==company, 'YEAR'].shape[0]
    if num_years == 4:
        graphCoEmissionsPlotly(auto, company, yrs2)
    else:
        graphCoEmissionsPlotly(auto, company, yrs3)