## A) Read Data & Preprocessing

### A-1) Pulling in All the Data Across 9 Notebooks

In [38]:
# Import packages that will be used
import pandas as pd
import os 
from nltk import tokenize  
import json
import re

In [39]:
%store -r HC_reltext
%store -r HC_alltext
%store -r HC_stat

%store -r IND_reltext
%store -r IND_alltext
%store -r IND_stat

%store -r Energy_reltext
%store -r Energy_alltext
%store -r Energy_stat

%store -r CONSTA_reltext
%store -r CONSTA_alltext
%store -r CONSTA_stat

%store -r CONDIS_reltext
%store -r CONDIS_alltext
%store -r CONDIS_stat

%store -r IT_reltext
%store -r IT_alltext
%store -r IT_stat

%store -r Real_Estate_reltext
%store -r Real_Estate_alltext
%store -r Real_Estate_stat

%store -r Materials_reltext
%store -r Materials_alltext
%store -r Materials_stat

%store -r Utilities_reltext
%store -r Utilities_alltext
%store -r Utilities_stat

### A-2) Read  <ins>total_relevant</ins>, <ins>total_all</ins>, (both relevant and irrelevant)

In [40]:
total_relevant = pd.concat([HC_reltext, IND_reltext, Energy_reltext,
                 CONSTA_reltext, CONDIS_reltext, IT_reltext,
                 Real_Estate_reltext, Materials_reltext, Utilities_reltext])

In [41]:
total_all = pd.concat([HC_alltext, IND_alltext, Energy_alltext,
                 CONSTA_alltext, CONDIS_alltext, IT_alltext,
                 Real_Estate_alltext, Materials_alltext, Utilities_alltext])

### A-3) Removing Duplicates of Sentences

In [42]:
total_all = total_all.drop_duplicates('all_sentences')

In [43]:
total_relevant = total_relevant.drop_duplicates('relevant_sentences')

In [44]:
# Check to see company labels are properly populated
unique_comp = total_all.company_label.unique()
len(unique_comp)

72

In [45]:
# keep track of original relevant and all sentences to compare after deleting short sentences (length <=4)
original_all = total_all
original_relevant = total_relevant

### A-4) Create Company Labels using Dictionary

In [46]:
# Create a Company Label Dictionary 
comp_dict = dict()

count = 1
for i in unique_comp:
    comp_dict[i] = str("%04d" % count)
    count += 1

In [47]:
company_list = list(comp_dict.keys())

In [48]:
comp_dict.items()

dict_items([('EliLilly', '0001'), ('UnitedHealthGroup', '0002'), ('Merck', '0003'), ('BristolMyersSquibb', '0004'), ('Danaher', '0005'), ('johnsonandjohnson', '0006'), ('Pfizer', '0007'), ('Abbott', '0008'), ('ThermoFisherScientifiic', '0009'), ('Amgen', '0010'), ('Caterpillar', '0011'), ('Lockheed', '0012'), ('Boeing', '0013'), ('UPS', '0014'), ('Raytheon', '0015'), ('Delta', '0016'), ('Deere', '0017'), ('Honeywell', '0018'), ('3M', '0019'), ('UnionPacific', '0020'), ('Total', '0021'), ('BP', '0022'), ('Shell', '0023'), ('Mondelez_Intl', '0024'), ('Hershey', '0025'), ('Philip_Morris_Intl', '0026'), ('PepsiCo', '0027'), ('Altria_Environmental', '0028'), ('PandG', '0029'), ('Altria_TCFD', '0030'), ('Costco', '0031'), ('CocaCola', '0032'), ('Altria_2021', '0033'), ('Walmart', '0034'), ('EsteeLauder', '0035'), ('McDonalds', '0036'), ('TJX', '0037'), ('HomeDepot', '0038'), ('Lowes', '0039'), ('Target', '0040'), ('BookingHoldings', '0041'), ('Tesla', '0042'), ('Amazon', '0043'), ('Nike', '0

### A-5) Apply Company Label and Sentence Length Indexing

In [49]:
# Company Label Indexing
total_all['company_index'] = total_all.apply(lambda x: comp_dict[x.company_label], axis = 1)

In [50]:
# Sentence Length Indexing
cur_comp_index = ""
sent_index = [] 
for i in total_all.company_index:
    if i != cur_comp_index:
        cur_comp_index = i
        sent_val = 0
        sent_val += 1
        sent_index.append(str("%04d" % sent_val))
    else:
        sent_val += 1
        sent_index.append(str("%04d" % sent_val))

In [51]:
# Quick Stats of Sentences Corresponding to Company Labels
total_all.groupby('company_label', sort = False).count()

Unnamed: 0_level_0,all_sentences,company_index
company_label,Unnamed: 1_level_1,Unnamed: 2_level_1
EliLilly,217,217
UnitedHealthGroup,1554,1554
Merck,4593,4593
BristolMyersSquibb,3103,3103
Danaher,1736,1736
...,...,...
Dow,4846,4846
Dominion_Energy,671,671
Duke_Energy,1016,1016
AEP,1575,1575


### [Display] total_all DF with company_label, company_index, sent_index

In [52]:
total_all['sent_index'] = sent_index

In [53]:
total_all[total_all.sent_index == '0001']

Unnamed: 0,all_sentences,company_label,company_index,sent_index
0,"7/7/22, 10:29 AM",EliLilly,0001,0001
263,Our Mission in Action,UnitedHealthGroup,0002,0001
2813,"Environmental, Social & Governance (ESG) Progr...",Merck,0003,0001
9847,"Environmental, Social and Governance Report Ou...",BristolMyersSquibb,0004,0001
13336,TABLE OF CONTENTS,Danaher,0005,0001
...,...,...,...,...
32934,INtersections,Dow,0068,0001
0,REPORT 2021 A report based on the recommendati...,Dominion_Energy,0069,0001
677,1 2021 DUKE ENERGY ESG REPORT D UK E E NE ...,Duke_Energy,0070,0001
1752,2022 Corporate Sustainability Report 2022 CORP...,AEP,0071,0001


### [Display] total_relevant DF with company_label, company_index, sent_index

In [54]:
total_relevant['company_index'] = total_relevant.apply(lambda x: comp_dict[x.company_label], axis = 1)

In [55]:
total_relevant

Unnamed: 0,relevant_sentences,company_label,company_index
0,"In 2021, 9.6% of our purchased electricity cam...",EliLilly,0001
1,A large portion of this renewable electricity ...,EliLilly,0001
2,"From 2012 to 2020, we achieved a 26% reduction...",EliLilly,0001
3,"In 2021, we achieved a 9% absolute emissions r...",EliLilly,0001
4,This reduction was partially driven by energy ...,EliLilly,0001
...,...,...,...
67,2018 Retired and demolished 636 MW of coal and...,NextEraEnergyZeroCarbonBlueprint,0072
68,"2019 Aquired Gulf Power, which added 1,750 MW ...",NextEraEnergyZeroCarbonBlueprint,0072
69,2020 Retired 615 MW of nuclear and 330 MW of c...,NextEraEnergyZeroCarbonBlueprint,0072
70,"2021 Added 2,008 MW of wind, 1,547 MW of solar...",NextEraEnergyZeroCarbonBlueprint,0072


## B) Extracting Irrelevant Sentences from All Sentences

### This method below doesn't correctly extract irrelevant sentences. Another method is needed

In [56]:
rel_test = []
for i in total_relevant['relevant_sentences'].to_list():
    if i in total_all['all_sentences'].to_list():
        rel_test.append(i)

In [57]:
print("This method to extract relevant sentences: ", len(rel_test), "vs.", "original_relevant_sentences: ", len(total_relevant['relevant_sentences'].to_list()))

This method to extract relevant sentences:  558 vs. original_relevant_sentences:  999


### Use rapidfuzz to conduct string comparison

In [58]:
from rapidfuzz import process, fuzz

### B-1) Eliminating Short Sentences from Relevant and All Sentences

### [Display] Checking the number of letters in each sentence to make sure sentences that have less than 4 letters are eliminated

In [59]:
import numpy as np
np.unique([len(i.split(" ")) for i in total_relevant['relevant_sentences']])[0:50]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50])

In [60]:
np.unique([len(i.split(" ")) for i in total_all['all_sentences']])[0:50]

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50])

In [61]:
total_relevant['sent_count'] = total_relevant['relevant_sentences'].str.split().str.len()

In [62]:
total_all['sent_count'] = total_all['all_sentences'].str.split().str.len()

In [63]:
total_relevant = total_relevant[total_relevant['sent_count'] >= 4]

In [64]:
total_all = total_all[total_all['sent_count'] >= 4]

In [65]:
# Check the length of new sentences after dropping duplicates

print("relevant_sentences:", len(original_relevant), "->", len(total_relevant))
print("all_sentences:", len(original_all), "->", len(total_all))

relevant_sentences: 999 -> 970
all_sentences: 129779 -> 101380


In [66]:
# rel_var = total_relevant['relevant_sentences'].to_list()
# rel_lab = total_relevant['company_label'].to_list()
# rel_comp_index = total_relevant['company_index'].to_list()

# all_var = total_all['all_sentences'].to_list()
# all_lab = total_all['company_label'].to_list()
# all_comp_index = total_all['company_index'].to_list()

### =============================== PAUSE RUNNING HERE ====================================

### B-2) Delete Dots that Affected the Performance of String Matching

In [67]:
# Check total_relevant and total_all sentences 

# for i in total_relevant.relevant_sentences:
#     print(tokenize.sent_tokenize(i))
#     print()

In [68]:
# for i in total_all.all_sentences:
#     print(i)
#     print()

In [69]:
# Example list of companies 
company_list[0:5]

['EliLilly', 'UnitedHealthGroup', 'Merck', 'BristolMyersSquibb', 'Danaher']

In [70]:
# Reset Index to easily iterate through the dataframe 
total_all = total_all.reset_index(drop = True)

In [71]:
for idx in total_all.index:
    each_row = total_all.at[idx, 'all_sentences']
    if '. . .' in each_row:
        print(each_row)
        total_all.at[idx, 'all_sentences'] = ''

. . . . .10
. . . . .14
. . . . .25
. . . . . . 53
. . . . . . 44
. . . . . . 66
Introduction  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . 2 CEO’s Letter  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . 4 We Rise to the Challenge, Together . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5


In [72]:
for idx in total_all.index:
    each_row = total_all.at[idx, 'all_sentences']
    if '.  .  .  ' in each_row:
        print(each_row)
        total_all.at[idx, 'all_sentences'] = ''

.  .  .  .  .  .  .  . 2 About Amgen .
.  .  .  .  .  .  .  .  .  .  .  . 3 Awards and Rankings .  .  .  .  .  . 5 Leadership Message .
.  .  .  .  .  . 6 Our Approach to ESG .  .  .  .  .  . 7
COVID-19 Response .  .  .  .  .  . 11
Access to Medicines .  .  .  .  .  . 16 Health Equity .
.  .  .  .  .  .  .  .  .  .  .  . 20
.  .  .  .  .  .  .  .  .  .  .  .  .  . 46
Diversity, Inclusion and Belonging .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . 27 Human Capital Management .
.  .  .  .  .  .  .  .  .  .  .  . 33 Community Investment.
.  .  .  . 42
Corporate Governance.  .  .  .  .  . 55 Business Ethics .
.  .  .  .  .  .  .  .  .  .  .  . 56 Ethical Research .  .  .  .  .  .  .  .  .  .  .  . 58 Patient Safety and Product Quality .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . 60 Cybersecurity and Data Privacy .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . 63 Government Affairs and Public Policy. .  .  .  .  .  .  .  .  .  .  . 65
.  .  .  .  .  .  .  .  .  .  .  .  .  .  

### B-3) String Matching Algorithm (TODO: Write Clean and Modular Code)

In [73]:
total_all

Unnamed: 0,all_sentences,company_label,company_index,sent_index,sent_count
0,Environmental | 2021 ESG Report | Eli Lilly an...,EliLilly,0001,0002,10
1,"Our Environmental Approach Our purpose, to mak...",EliLilly,0001,0004,18
2,Making medicines requires the use of valuable ...,EliLilly,0001,0005,14
3,We’re committed to reducing our environmental ...,EliLilly,0001,0006,18
4,"To track our progress, we measure and manage e...",EliLilly,0001,0007,27
...,...,...,...,...,...
101375,→ FPL’s four nuclear units continue to operat...,NextEraEnergyZeroCarbonBlueprint,0072,0216,10
101376,Technology We assume that: → FPL’s gas plants...,NextEraEnergyZeroCarbonBlueprint,0072,0217,28
101377,→ NextEra Energy Resources would invest in el...,NextEraEnergyZeroCarbonBlueprint,0072,0218,26
101378,→ All non-FPL fossil generation assets would ...,NextEraEnergyZeroCarbonBlueprint,0072,0219,14


In [74]:
# Revised String Matching Algorithm: 
# 1) use process.extract with scorer = fuzz.WRatio w/ cutoff = 90
# 2) use process.extract with scorer = fuzz.partial_ratio w/ cutoff = 90
# Test Case 1: if 1) and 2) only has one extracted value compare their lengths and append the longer version
# Test Case 2: if 1) and 2) have different length of extracted sentences --> check if they have the same scorer
# value or not 
    # If they do have the same scorer value: join all the sentences into one
    # If they don't have the same scorer value: only get the max ratio 
    # Compare these two values' length and append the one with greater length 

matrel = [] # with compound match
mat_exact_rel = [] # without compounded match
matlabel = []
orgrel = []
track = 0

# loop over the company_list to ensure we're only comparing rel and irr sentences within the same company
for comp in company_list: 
    # selecting rel sentences based on company label     
    total_relevant_comp = total_relevant[total_relevant.company_label == comp]
    for i, j in zip(total_relevant_comp.relevant_sentences, total_relevant_comp.company_index):
        # all sentences based on company label
        total_all_comp = total_all[total_all.company_label == comp]

#         val = process.extractOne(i, total_all_comp.all_sentences, scorer = fuzz.WRatio, score_cutoff = 90)
#         val_2 = process.extractOne(i, total_all_comp.all_sentences, scorer = fuzz.partial_ratio, score_cutoff = 90)
        extract_val_wratio = process.extract(i, total_all_comp.all_sentences, scorer = fuzz.WRatio, score_cutoff = 90)
        extract_val_partial = process.extract(i, total_all_comp.all_sentences, scorer = fuzz.partial_ratio, score_cutoff = 90)
        print()
        print('Original Rel Sentence:', i)
        print(extract_val_wratio)
        print(extract_val_partial)
        print()
        if (len(extract_val_wratio) >= 1) or (len(extract_val_partial) >= 1): 
            # Test Case #1
            if len(extract_val_wratio) == 1 and len(extract_val_partial) == 1:
                if len(extract_val_wratio[0]) >= len(extract_val_partial[0]):
                    matrel.append(extract_val_wratio[0][0])
                    mat_exact_rel.append(extract_val_wratio[0][0])
                    print(extract_val_wratio[0][0])
                    
                    matlabel.append(j)
                    orgrel.append(i)
                    
                elif len(extract_val_wratio[0]) < len(extract_val_partial[0]):
                    matrel.append(extract_val_partial[0][0])
                    mat_exact_rel.append(extract_val_partial[0][0])
                    print(extract_val_partial[0][0])
                
                    matlabel.append(j)
                    orgrel.append(i)
                
            # Test Case #2        
            else: 
#                 print(i)
                wratio_scorer = [score[1] for score in extract_val_wratio]
#                 print(wratio_scorer)
                
                if len(set(wratio_scorer)) == 1:
                    match_only_wratio = [extract[0] for extract in extract_val_wratio]
                    
                    if len(match_only_wratio[0]) == len(i):
                        wratio_val = (match_only_wratio[0], [match_only_wratio[0]])
                    else:
                        if len(match_only_wratio) > 1:
                            if fuzz.partial_ratio(match_only_wratio[0], i) > 90 and fuzz.partial_ratio(match_only_wratio[1], i) < 90: 
                                wratio_val = (match_only_wratio[0], [match_only_wratio[0]])
                            elif fuzz.partial_ratio(match_only_wratio[0], i) > 90 and fuzz.partial_ratio(match_only_wratio[1], i) > 90:
                                wratio_val = (' '.join(match_only_wratio) , match_only_wratio)
                            else:
                                wratio_val = (match_only_wratio[0], [match_only_wratio[0]])
                        else:
                            wratio_val = (match_only_wratio[0], [match_only_wratio[0]])
                            
                        
                elif len(set(wratio_scorer)) > 1:
                    wratio_val = (extract_val_wratio[0][0], [extract_val_wratio[0][0]]) 
                
                partial_scorer = [score[1] for score in extract_val_partial]
#                 print(partial_scorer)
            
                if len(set(partial_scorer)) == 1:
                    match_only_partial = [extract[0] for extract in extract_val_partial]
                                                             
                    if len(match_only_partial[0]) == len(i):
                        partial_val = (match_only_partial[0], [match_only_partial[0]])
                        
                    else:
                        if len(match_only_partial) > 1:
                            if fuzz.partial_ratio(match_only_partial[0], i) > 90 and fuzz.partial_ratio(match_only_partial[1], i) < 90: 
                                partial_val = (match_only_partial[0], [match_only_partial[0]])
                            elif fuzz.partial_ratio(match_only_partial[0], i) > 90 and fuzz.partial_ratio(match_only_partial[1], i) > 90:
                                partial_val = (' '.join(match_only_partial) , match_only_partial)
                            else:
                                partial_val = (match_only_partial[0], [match_only_partial[0]])
                            
                        else:
                            partial_val = (match_only_partial[0], [match_only_partial[0]])
                                                               
                elif len(set(partial_scorer)) > 1:
                    partial_val = (extract_val_partial[0][0], [extract_val_partial[0][0]])

                    
                if len(wratio_val[0]) >= len(partial_val[0]):
                    if len(wratio_val[1]) == 1:
                        matrel.append(wratio_val[0])
                        mat_exact_rel.append(wratio_val[0])
                        print(wratio_val[0])

                        matlabel.append(j)
                        orgrel.append(i)
                    elif len(wratio_val[1]) > 1:
                        for k in range(len(wratio_val[1])):
                            matrel.append(wratio_val[0])
                            mat_exact_rel.append(wratio_val[1][k])
                            print(wratio_val[0])

                            matlabel.append(j)
                            orgrel.append(i)
                
                elif len(wratio_val[0]) < len(partial_val[0]):
                    if len(partial_val[1]) == 1:
                        matrel.append(partial_val[0])
                        mat_exact_rel.append(partial_val[0])
                        print(partial_val[0])
                        
                        matlabel.append(j)
                        orgrel.append(i)
                        
                    elif len(partial_val[1]) > 1:
                        for k in range(len(partial_val[1])):
                            matrel.append(partial_val[0])
                            mat_exact_rel.append(partial_val[1][k])
                            print(partial_val[0])
                            
                            matlabel.append(j)
                            orgrel.append(i)
                    
            print()
            print(track, end = " ")
            print()
            track +=1
        
        # if extracted values are both None    
        else: 
            matrel.append(None)
            mat_exact_rel.append(None)
            matlabel.append(j)
            orgrel.append(i)
            print()
            print(track, end = " ")
            print()
            track +=1


Original Rel Sentence: In 2021, 9.6% of our purchased electricity came from renewable sources.
[('Enhancing the Use of Solar In 2021, 9.6% of our purchased electricity was secured from renewable sources.', 91.37404580152672, 68), ('Looking toward the future, we have set climate goals for 2030 as we work toward contributing to a low-carbon economy: Secure 100% of our purchased electricity from renewable sources In 2021, 9.6% of our purchased electricity came from renewable sources.', 90.0, 29)]
[('Looking toward the future, we have set climate goals for 2030 as we work toward contributing to a low-carbon economy: Secure 100% of our purchased electricity from renewable sources In 2021, 9.6% of our purchased electricity came from renewable sources.', 100.0, 29)]

Looking toward the future, we have set climate goals for 2030 as we work toward contributing to a low-carbon economy: Secure 100% of our purchased electricity from renewable sources In 2021, 9.6% of our purchased electricity cam


Original Rel Sentence: These projects will address approximately 35 percent of our company’s Scope 2 emissions by collectively adding 145 megawatts (MW) of solar and wind energy to the grid.
[('These projects will address approximately 35 percent of our company’s Scope 2 emissions by collectively adding 145 megawatts (MW) of solar and wind energy to the grid.', 100.0, 2670)]
[('These projects will address approximately 35 percent of our company’s Scope 2 emissions by collectively adding 145 megawatts (MW) of solar and wind energy to the grid.', 100.0, 2670)]

These projects will address approximately 35 percent of our company’s Scope 2 emissions by collectively adding 145 megawatts (MW) of solar and wind energy to the grid.

16 

Original Rel Sentence: These agreements follow a 2018 U.S. wind VPPA, which has added 60 MW of new renewable energy capacity, while providing our company with the associated renewable energy credits.
[('These agreements follow a 2018 U.S. wind VPPA, which has


Original Rel Sentence: Between 2019 and 2020, the total contribution made equates to 6,723 metric tonnes of CO2 sequestered—equivalent
[('Between 2019 and 2020, the total contribution made equates to', 90.0, 5789)]
[('Between 2019 and 2020, the total contribution made equates to', 100.0, 5789)]

Between 2019 and 2020, the total contribution made equates to

30 

Original Rel Sentence: As part of our Sustainability 2020 Goals, we transitioned Orencia’s packaging to a high product density lightweight packaging, leading to: 320,000 tons in annual CO2e emissions reduction.
[('As part of our Sustainability 2020 Goals, we transitioned Orencia’s packaging to a high product density lightweight packaging, leading to:', 95.0, 5947), ('in annual CO2e emissions reduction.', 90.0, 5948)]
[('As part of our Sustainability 2020 Goals, we transitioned Orencia’s packaging to a high product density lightweight packaging, leading to:', 100.0, 5947), ('in annual CO2e emissions reduction.', 100.0, 5948)]




Original Rel Sentence: As part of our ongoing efforts to leverage data science across our operations, in 2021, we automated a manual freight load planning process to produce a 3D loading plan that optimizes space utilization of shipping containers, resulting in lower GHG emissions from shipping, a faster loading process and cost savings.
[('Reducing carbon emissions by land, air and sea: As part of our ongoing efforts to leverage data science across our operations, in 2021, we automated a manual freight load planning process to produce a 3D loading plan that optimizes space utilization of shipping containers, resulting in lower GHG emissions from shipping, a faster loading process and cost savings.', 95.0, 9176)]
[('Reducing carbon emissions by land, air and sea: As part of our ongoing efforts to leverage data science across our operations, in 2021, we automated a manual freight load planning process to produce a 3D loading plan that optimizes space utilization of shipping containers,


Original Rel Sentence: Generated Electricity (Cogeneration) 196 Generated Electricity (Renewables) 7 Generated On-Site from Renewables 7 Purchased Renewables from Third Party 678 Renewable Energy 678 % of Renewable Energy Purchased 15% Energy Consumption by Source(o) 1,000 Gigajoules Electricity Consumption by Type Purchased Energy Consumed (1,000
[]
[('Energy Consumption by Source(o)', 100.0, 12446), ('Generated On-Site from Renewables', 100.0, 12449), ('Purchased Renewables from Third Party', 100.0, 12450), ('Electricity Consumption by Type', 100.0, 12451), ('% of Renewable Energy Purchased', 100.0, 12455)]

Energy Consumption by Source(o) Generated On-Site from Renewables Purchased Renewables from Third Party Electricity Consumption by Type % of Renewable Energy Purchased
Energy Consumption by Source(o) Generated On-Site from Renewables Purchased Renewables from Third Party Electricity Consumption by Type % of Renewable Energy Purchased
Energy Consumption by Source(o) Generated On-


Original Rel Sentence: ENERGY In 2021, we increased the amount of renewable energy in our purchased electricity to 79% compared to 41% in 2020 We purchased in-country renewable energy certificates to make our facilities in Thousand Oaks, CA; West Greenwich, RI; Louisville, KY; Cambridge, MA; Breda, Netherlands; and Singapore use 100% renewable electricity This 2021 Energy Use by Source
[('In 2021, we increased the amount of renewable energy in our purchased electricity to 79% compared to 41% in 2020.', 90.0, 15541), ('We purchased in-country renewable energy certificates to make our facilities in Thousand Oaks, CA; West Greenwich, RI; Louisville, KY; Cambridge, MA; Breda, Netherlands; and Singapore use 100% renewable electricity.', 90.0, 15542)]
[('In 2021, we increased the amount of renewable energy in our purchased electricity to 79% compared to 41% in 2020.', 100.0, 15541), ('We purchased in-country renewable energy certificates to make our facilities in Thousand Oaks, CA; West Gre


Original Rel Sentence: In 2021, we reduced our energy intensity by 12% from our 2018 baseline.
[('In 2021, we reduced our energy intensity by 12% from our 2018 baseline.', 100.0, 16751)]
[('In 2021, we reduced our energy intensity by 12% from our 2018 baseline.', 100.0, 16751)]

In 2021, we reduced our energy intensity by 12% from our 2018 baseline.

85 

Original Rel Sentence: Our absolute GHG emissions decreased 32% from 2018 to 2021.
[('Our absolute GHG emissions decreased 32% from 2018 to 2021.', 100.0, 16752)]
[('Our absolute GHG emissions decreased 32% from 2018 to 2021.', 100.0, 16752)]

Our absolute GHG emissions decreased 32% from 2018 to 2021.

86 

Original Rel Sentence: In 2021, we reduced our GHG emissions intensity by 27% from our 2018 baseline.
[('In 2021, we reduced our GHG emissions intensity by 27% from our 2018 baseline.', 100.0, 16753)]
[('In 2021, we reduced our GHG emissions intensity by 27% from our 2018 baseline.', 100.0, 16753)]

In 2021, we reduced our GHG em


Original Rel Sentence: Remote working conditions; reduced production activities; and conservation gains contributed.
[('Remote working conditions; reduced production activities; and conservation gains contributed.', 100.0, 19205)]
[('Remote working conditions; reduced production activities; and conservation gains contributed.', 100.0, 19205)]

Remote working conditions; reduced production activities; and conservation gains contributed.

100 

Original Rel Sentence: Percentage of total energy that is renewable 12%
[('Percentage of total energy that is renewable', 96.7032967032967, 19472)]
[('Percentage of total energy that is renewable', 100.0, 19472)]

Percentage of total energy that is renewable

101 

Original Rel Sentence: In 2020 (in ‘000 Metric Tonnes): Scope 1 CO2e emissions were 15,751 Scope 2 CO2e emissions were 749 Scope 3 CO2e emissions were 21,437
[('In 2020 (in ‘000 Metric Tonnes): •', 90.0, 20037), ('Scope 1 CO2e emissions were 15,751', 90.0, 20038), ('Scope 2 CO2e emissi


Original Rel Sentence: The GTF engine builds on a long track record of Since entering into service in 2016, this technology has saved operators 600 million gallons of fuel and avoided nearly 6 million metric tons of CO2.
[('Since entering into service in 2016, this technology has saved operators 600 million gallons of fuel and avoided nearly 6 million metric tons of CO2.', 95.0, 20695)]
[('Since entering into service in 2016, this technology has saved operators 600 million gallons of fuel and avoided nearly 6 million metric tons of CO2.', 100.0, 20695)]

Since entering into service in 2016, this technology has saved operators 600 million gallons of fuel and avoided nearly 6 million metric tons of CO2.

115 

Original Rel Sentence: Our engineers are determined to drive additional
[('Our engineers are determined to drive additional incremental emissions reductions.', 90.0, 20696)]
[('Our engineers are determined to drive additional incremental emissions reductions.', 100.0, 20696)]

Our


Original Rel Sentence: Our ability to achieve our ambitious climate goals is dependent on the actions of governments and third parties and will require, among other things, significant capital investment, including from third parties, research and development from manufacturers and other stakeholders, along with government policies and incentives to reduce the cost, and incent production, of SAF and other technologies that are not presently in existence or available at scale.
[('Our ability to achieve our ambitious climate goals is dependent on the actions of governments and third parties and will require, among other things, significant capital investment, including from third parties, research and development from manufacturers and other stakeholders, along with government policies and incentives to reduce the cost, and incent production, of SAF and other technologies that are not presently in existence or available at scale.', 100.0, 22233)]
[('Our ability to achieve our ambitious 


Original Rel Sentence: For Delta to reach net zero, out-of-sector solutions will likely be a necessary complement to fleet, SAF, operational initiatives and other in-sector technology improvements, and there will be a need to transition from avoidance (preventing deforestation) and reduction (solar, wind) type projects to removal projects that are absorbing incremental and additional CO2 from the atmosphere.
[('For Delta to reach net zero, out-of-sector solutions will likely be a necessary complement to fleet, SAF, operational initiatives and other in-sector technology improvements, and there will be a need to transition from avoidance (preventing deforestation) and reduction (solar, wind) type projects to removal projects that are absorbing incremental and additional CO2 from the atmosphere.', 100.0, 22316)]
[('For Delta to reach net zero, out-of-sector solutions will likely be a necessary complement to fleet, SAF, operational initiatives and other in-sector technology improvements, 


Original Rel Sentence: Progress Towards Our Goals Improve fuel efficiency on an ASM basis ASM: Fuel efficiency on a capacity basis improved by 0.8% compared to 2020 partially as a result of our fleet retirements.
[('Progress Towards Our Goals', 90.0, 22348), ('ASM: Fuel efficiency on a capacity basis improved by 0.8% compared to 2020 partially as a result of our fleet retirements.', 90.0, 22357), ('Fuel efficiency on a capacity basis', 90.0, 22843)]
[('Progress Towards Our Goals', 100.0, 22348), ('ASM: Fuel efficiency on a capacity basis improved by 0.8% compared to 2020 partially as a result of our fleet retirements.', 100.0, 22357), ('Fuel efficiency on a capacity basis', 100.0, 22843)]

Progress Towards Our Goals ASM: Fuel efficiency on a capacity basis improved by 0.8% compared to 2020 partially as a result of our fleet retirements. Fuel efficiency on a capacity basis
Progress Towards Our Goals ASM: Fuel efficiency on a capacity basis improved by 0.8% compared to 2020 partially as


Original Rel Sentence: This system takes advantage of the FAA’s NextGen airspace modernization initiative and can shorten flight times by 3-5 minutes.
[('This system takes advantage of the FAA’s NextGen airspace modernization initiative and can shorten flight times by 3-5 minutes.', 100.0, 22415)]
[('This system takes advantage of the FAA’s NextGen airspace modernization initiative and can shorten flight times by 3-5 minutes.', 100.0, 22415)]

This system takes advantage of the FAA’s NextGen airspace modernization initiative and can shorten flight times by 3-5 minutes.

166 

Original Rel Sentence: By reducing overall flight time, this system drives both fuel savings and can help customers reach their destination more quickly.
[('By reducing overall flight time, this system drives both fuel savings and can help customers reach their destination more quickly.', 100.0, 22416)]
[('By reducing overall flight time, this system drives both fuel savings and can help customers reach their des


Original Rel Sentence: In 2021, John Deere launched the C770 Cotton Harvesters, which are the culmination of more than a decade-long journey that started in 2009 to revolutionize cotton harvesting.
[('In 2021, John Deere launched the C770 Cotton Harvesters, which are the culmination of more than a decade-long journey that started in 2009 to revolutionize cotton harvesting.', 100.0, 23033)]
[('In 2021, John Deere launched the C770 Cotton Harvesters, which are the culmination of more than a decade-long journey that started in 2009 to revolutionize cotton harvesting.', 100.0, 23033)]

In 2021, John Deere launched the C770 Cotton Harvesters, which are the culmination of more than a decade-long journey that started in 2009 to revolutionize cotton harvesting.

180 

Original Rel Sentence: These machines represent most signiﬁ cant advancement in The beneﬁ ts of the C770 include an impressive 20-percent fuel savings through efﬁ cient engine technology and machine design — reducing fuel costs 


Original Rel Sentence: 2050 3M’s goal is to achieve carbon neutrality by
[('3M’s goal is to achieve carbon neutrality by', 95.0, 27146)]
[('3M’s goal is to achieve carbon neutrality by', 100.0, 27146)]

3M’s goal is to achieve carbon neutrality by

197 

Original Rel Sentence: Goal: improve energy efficiency, indexed to net sales, by 30% by 2025 Goal: increase renewable energy to 50% of total electricity use by 2025 In 2019, we achieved our 2025 goal by surpassing 25% renewable electricity.
[('Goal: Increase renewable energy to', 90.0, 24785), ('In 2019, we achieved our 2025 goal by surpassing 25% renewable electricity.', 90.0, 27269), ('In 2019, we achieved our 2025 goal by surpassing 25% renewable electricity', 90.0, 27290)]
[('Goal: Increase renewable energy to', 100.0, 24785), ('of total electricity use', 100.0, 25297), ('In 2019, we achieved our 2025 goal by surpassing 25% renewable electricity.', 100.0, 27269), ('In 2019, we achieved our 2025 goal by surpassing 25% renewable ele


Original Rel Sentence: We affirm our ambitious target of a more than 30% reduction in greenhouse gas emissions related to sales of petroleum prod- ucts (Scope 3 Oil) by 2030 compared to 2015.
[('We affirm our ambitious target of a more than 30% reduction in greenhouse gas emissions related to sales of petroleum products (Scope 3 Oil) by 2030 compared to 2015.', 99.3975903614458, 29154)]
[('We affirm our ambitious target of a more than 30% reduction in greenhouse gas emissions related to sales of petroleum products (Scope 3 Oil) by 2030 compared to 2015.', 98.7878787878788, 29154)]

We affirm our ambitious target of a more than 30% reduction in greenhouse gas emissions related to sales of petroleum products (Scope 3 Oil) by 2030 compared to 2015.

222 

Original Rel Sentence: To that, we add phased targets for reducing methane emissions (50% from 2020 levels by 2025 and 80% from 2020 levels by 2030) to move towards zero methane and an objective of less than 0.1 million cubic meters per


Original Rel Sentence: Decarbonize our electricity purchases in Europe and the United States (Scope 2) by 2025.
[('•D  ecarbonize our electricity purchases in Europe and the United States (Scope 2) by 2025.', 98.86363636363636, 29660)]
[('•D  ecarbonize our electricity purchases in Europe and the United States (Scope 2) by 2025.', 99.42196531791907, 29660)]

•D  ecarbonize our electricity purchases in Europe and the United States (Scope 2) by 2025.

239 

Original Rel Sentence: Develop a carbon storage offer for our customers with capacity exceeding 10 Mt/year by 20305.
[('•D  evelop a carbon storage offer for our customers with capacity exceeding 10 Mt/year by 20305.', 98.9247311827957, 29673)]
[('•D  evelop a carbon storage offer for our customers with capacity exceeding 10 Mt/year by 20305.', 99.4535519125683, 29673)]

•D  evelop a carbon storage offer for our customers with capacity exceeding 10 Mt/year by 20305.

240 

Original Rel Sentence: Develop a CCS capacity of more than 10


Original Rel Sentence: — In 2020, TotalEnergies decided to aim for net zero emissions for all electricity purchases at its operated sites in Europe by 2025.
[('In 2020, TotalEnergies decided to aim for net zero emissions for all electricity purchases at its operated sites in Europe by 2025.', 100.0, 29755)]
[('In 2020, TotalEnergies decided to aim for net zero emissions for all electricity purchases at its operated sites in Europe by 2025.', 100.0, 29755)]

In 2020, TotalEnergies decided to aim for net zero emissions for all electricity purchases at its operated sites in Europe by 2025.

253 

Original Rel Sentence: All In Europe, electricity will be provided by solar farms acquired in Spain in 2020, offering capacity of 5 GW and production of 10 TWh/year by 2025.
[('• In Europe, electricity will be provided by solar farms acquired in Spain in 2020, offering capacity of 5 GW and production of 10 TWh/year by 2025.', 98.63945578231292, 29758)]
[('• In Europe, electricity will be provide


Original Rel Sentence: In all three cases, the CO2 would be stored in depeleted reservoirs in the North Sea.
[('In all three cases, the CO2 would be stored in depeleted reservoirs in the North Sea.', 100.0, 29844)]
[('In all three cases, the CO2 would be stored in depeleted reservoirs in the North Sea.', 100.0, 29844)]

In all three cases, the CO2 would be stored in depeleted reservoirs in the North Sea.

270 

Original Rel Sentence: Developing transport and storage projects — • In Norway, the Company, together with Equinor and Shell, launched Northern Lights, the first large-scale carbon transport and storage project.
[('• In Norway, the Company, together with Equinor and Shell, launched Northern Lights, the first large-scale carbon transport and storage project.', 95.0, 29846), ('Developing transport and storage projects —', 90.0, 29845)]
[('Developing transport and storage projects —', 100.0, 29845), ('• In Norway, the Company, together with Equinor and Shell, launched Northern Lig


Original Rel Sentence: Our objectives for 2030 TotalEnergies has set a target for 2030 of reducing its global Scope 3 emissions – i.e., those from the energy products used by our customers – to below 2015 levels, even though over the same period the Company plans to produce and sell 30% more energy products due, in particular, to growth in sales of elec- tricity and LNG.
[('TotalEnergies has set a target for 2030 of reducing its global Scope 3 emissions – i.e., those from the energy products used by our customers – to below 2015 levels, even though over the same period the Company plans to produce and sell 30% more energy products due, in particular, to growth in sales of electricity and LNG.', 96.13095238095238, 29911)]
[('Our objectives for 2030', 100.0, 29910), ('TotalEnergies has set a target for 2030 of reducing its global Scope 3 emissions – i.e., those from the energy products used by our customers – to below 2015 levels, even though over the same period the Company plans to pr


Original Rel Sentence: In support of those commitments by the European Commis- sion, it has set a target in Europe of reducing Scope 1+2+3 emissions by 30% between 2015 and 2030.
[('In support of those commitments by the European Commission, it has set a target in Europe of reducing Scope 1+2+3 emissions by 30% between 2015 and 2030.', 99.34640522875817, 30024)]
[('In support of those commitments by the European Commission, it has set a target in Europe of reducing Scope 1+2+3 emissions by 30% between 2015 and 2030.', 98.68421052631578, 30024)]

In support of those commitments by the European Commission, it has set a target in Europe of reducing Scope 1+2+3 emissions by 30% between 2015 and 2030.

294 

Original Rel Sentence: Emissions from operated facilities have declined by approxi- mately 20% since 2015.
[]
[('Our Progress in 2021 and our Objectives for 2030 The credibility of the Company’s ambition for 2050 hinges on its ability to show the progress it has made so far, and it is 


Original Rel Sentence: Scope 1 (direct) emissions, covered by aim 1, were 33.2MtCO2e in 2021, a decrease of 20% from 41.7MtCO2e in 2020.
[('Scope 1 (direct) emissions, covered by aim 1, were 33.2MtCO2e in 2021, a decrease of 20% from 41.7MtCO2e in 2020.', 100.0, 31385)]
[('Scope 1 (direct) emissions, covered by aim 1, were 33.2MtCO2e in 2021, a decrease of 20% from 41.7MtCO2e in 2020.', 100.0, 31385)]

Scope 1 (direct) emissions, covered by aim 1, were 33.2MtCO2e in 2021, a decrease of 20% from 41.7MtCO2e in 2020.

309 

Original Rel Sentence: Of those Scope 1 emissions 32.0MtCO2e were from CO2 and 1.1MtCO2e from methanec.
[('Of those Scope 1 emissions 32.0MtCO2e were from CO2 and', 95.0, 31386)]
[('Of those Scope 1 emissions 32.0MtCO2e were from CO2 and', 100.0, 31386)]

Of those Scope 1 emissions 32.0MtCO2e were from CO2 and

310 

Original Rel Sentence: Emissions decreased due to divestments, delivery of SERs and other permanent operational changes.
[('Emissions decreased due to di


Original Rel Sentence: The estimated Scope 3 emissions from the carbon in our upstream oil and gas production were 304MtCO2 in 2021, a reduction of approximately 7% from 328MtCO2 in 2020, mainly associated with portfolio changes, including divestments and existing field decline.
[('The estimated Scope 3 emissions from the carbon in our upstream oil and gas production were 304MtCO2 in 2021, a reduction of approximately 7% from 328MtCO2 in 2020, mainly associated with portfolio changes, including divestments and existing field decline.', 100.0, 31423)]
[('The estimated Scope 3 emissions from the carbon in our upstream oil and gas production were 304MtCO2 in 2021, a reduction of approximately 7% from 328MtCO2 in 2020, mainly associated with portfolio changes, including divestments and existing field decline.', 100.0, 31423)]

The estimated Scope 3 emissions from the carbon in our upstream oil and gas production were 304MtCO2 in 2021, a reduction of approximately 7% from 328MtCO2 in 2020,


Original Rel Sentence: Together with H2Teesside, which has the capacity to deliver a planned 1GW CCS-enabled blue hydrogen, bp projects could deliver 30% of the UK government’s 5GW hydrogen target.
[('Together with H2Teesside, which has the capacity to deliver a planned 1GW CCS-enabled blue hydrogen, bp projects could deliver 30% of the UK government’s 5GW hydrogen target.', 100.0, 31537)]
[('Together with H2Teesside, which has the capacity to deliver a planned 1GW CCS-enabled blue hydrogen, bp projects could deliver 30% of the UK government’s 5GW hydrogen target.', 100.0, 31537)]

Together with H2Teesside, which has the capacity to deliver a planned 1GW CCS-enabled blue hydrogen, bp projects could deliver 30% of the UK government’s 5GW hydrogen target.

337 

Original Rel Sentence: The This aligns with bp’s aim to accelerate our EV charging ambition across key growth markets, through a focus on ‘on-the-go’ charging and fleets and growing our network of around 13,100 charge points tod


Original Rel Sentence: This new target covers all Scope 1 and 2 emissions under Shell’s operational control and complements our existing carbon-intensity targets.
[('This new target covers all Scope 1 and 2 emissions under Shell’s operational control and complements our existing carbon-intensity targets.', 100.0, 32645), ('This new target covers all Scope 1 and 2 emissions under Shell’s operational control and complements our existing carbonintensity targets.', 99.63636363636364, 32705)]
[('This new target covers all Scope 1 and 2 emissions under Shell’s operational control and complements our existing carbon-intensity targets.', 100.0, 32645), ('This new target covers all Scope 1 and 2 emissions under Shell’s operational control and complements our existing carbonintensity targets.', 99.27007299270073, 32705)]

This new target covers all Scope 1 and 2 emissions under Shell’s operational control and complements our existing carbon-intensity targets.

351 

Original Rel Sentence: We ai


Original Rel Sentence: In the meantime, we continue to deliver against our 2025 public goals, including reducing our absolute end-to-end greenhouse gas emissions by 10% by 2025 vs a 2018 baseline, which is equivalent to a 23% reduction vs business-as-usual.
[('In the meantime, we continue to deliver against our 2025 public goals, including reducing our absolute end-to-end greenhouse gas emissions by 10% by 2025 vs a 2018 baseline, which is equivalent to a 23% reduction vs business-as-usual.', 100.0, 34728)]
[('In the meantime, we continue to deliver against our 2025 public goals, including reducing our absolute end-to-end greenhouse gas emissions by 10% by 2025 vs a 2018 baseline, which is equivalent to a 23% reduction vs business-as-usual.', 100.0, 34728)]

In the meantime, we continue to deliver against our 2025 public goals, including reducing our absolute end-to-end greenhouse gas emissions by 10% by 2025 vs a 2018 baseline, which is equivalent to a 23% reduction vs business-as-us


Original Rel Sentence: Our East Suzhou plant launched its solar generation installation, generating 1,317 MWh of power and reducing CO2e by 800 metric tonnes annually.
[('Our East Suzhou plant launched its solar generation installation, generating 1,317 MWh of power and reducing CO2e by 800 metric tonnes annually.', 100.0, 34774)]
[('Our East Suzhou plant launched its solar generation installation, generating 1,317 MWh of power and reducing CO2e by 800 metric tonnes annually.', 100.0, 34774)]

Our East Suzhou plant launched its solar generation installation, generating 1,317 MWh of power and reducing CO2e by 800 metric tonnes annually.

373 

Original Rel Sentence: Our Beijing plant’s new solar panels are generating 187 Megawatt-hour (MWh) of electricity and saving 117 metric tonnes of CO2e annually.
[('Our Beijing plant’s new solar panels are generating 187 Megawatt-hour (MWh) of electricity and saving 117 metric tonnes of CO2e annually.', 100.0, 34775)]
[('Our Beijing plant’s new so


Original Rel Sentence: • Reduced coal usage (already phased out in 2022) within our India plant, where we’ve transitioned to using rice husks as biofuel.
[('• Reduced coal usage (already phased out in 2022) within our India plant, where we’ve transitioned to using rice husks as biofuel.', 100.0, 36387)]
[('• Reduced coal usage (already phased out in 2022) within our India plant, where we’ve transitioned to using rice husks as biofuel.', 100.0, 36387)]

• Reduced coal usage (already phased out in 2022) within our India plant, where we’ve transitioned to using rice husks as biofuel.

384 

Original Rel Sentence: Scope 3 (Value Chain Emissions) • Strengthened on-farm sustainability.
[('Scope 3 (Value Chain Emissions)', 90.0, 36388), ('• Strengthened on-farm sustainability.', 90.0, 36390)]
[('Scope 3 (Value Chain Emissions)', 100.0, 36388), ('• Strengthened on-farm sustainability.', 100.0, 36390)]

Scope 3 (Value Chain Emissions) • Strengthened on-farm sustainability.
Scope 3 (Value Chain


Original Rel Sentence: We prioritize insetting projects in our supply chain when possible and purchase certified carbon credits when needed.
[('We prioritize insetting projects in our supply chain when possible and purchase certified carbon credits when needed.', 100.0, 40062)]
[('We prioritize insetting projects in our supply chain when possible and purchase certified carbon credits when needed.', 100.0, 40062)]

We prioritize insetting projects in our supply chain when possible and purchase certified carbon credits when needed.

395 

Original Rel Sentence: Overall, in 2021, our emissions decreased in absolute terms by 1.7 percent across our value chain versus 2020, amounting to a total reduction of 84,000 tons of CO2e.
[('Overall, in 2021, our emissions decreased in absolute terms by 1.7 percent across our value chain versus 2020, amounting to a total reduction of 84,000 tons of CO2e.', 100.0, 40117)]
[('Overall, in 2021, our emissions decreased in absolute terms by 1.7 percent acr


Original Rel Sentence: We have reduced absolute scope 1 & 2 greenhouse gas emissions by 25%.
[('We have reduced absolute scope 1 &', 90.0, 42576), ('2 greenhouse gas emissions by 25%.', 90.0, 42578)]
[('We have reduced absolute scope 1 &', 100.0, 42576), ('2 greenhouse gas emissions by 25%.', 100.0, 42578)]

We have reduced absolute scope 1 & 2 greenhouse gas emissions by 25%.
We have reduced absolute scope 1 & 2 greenhouse gas emissions by 25%.

414 

Original Rel Sentence: The We have two key strategies to reduce greenhouse gas emissions by 2020: • Improve operating efficiency by 20%.
[]
[('Targets Initiative recently updated their guidance and validated our target as “well below 2’C.” We have two key strategies to reduce greenhouse gas emissions by 2020: • Improve operating efficiency by 20%.', 98.63013698630137, 42579), ('2 greenhouse gas emissions by 25%.', 93.75, 42578)]

Targets Initiative recently updated their guidance and validated our target as “well below 2’C.” We have two


Original Rel Sentence: In India, SLMG Beverages Private Limited—our largest franchise bottling partner in India—is working toward maximizing use of solar energy in its operations through investments and installation of solar panels across its facilities.
[('In India, SLMG Beverages Private Limited—our largest franchise bottling partner in India—is working toward maximizing use of solar energy in its operations through investments and installation of solar panels across its facilities.', 100.0, 45646)]
[('In India, SLMG Beverages Private Limited—our largest franchise bottling partner in India—is working toward maximizing use of solar energy in its operations through investments and installation of solar panels across its facilities.', 100.0, 45646)]

In India, SLMG Beverages Private Limited—our largest franchise bottling partner in India—is working toward maximizing use of solar energy in its operations through investments and installation of solar panels across its facilities.

427 




Original Rel Sentence: For transportation and distribution-related emissions (Scope 3), we are focusing on improving data quality to enable better decision making and maximize efficiency.
[('For transportation and distribution-related emissions (Scope 3), we are focusing on improving data quality to enable better decision making and maximize efficiency.', 100.0, 48265)]
[('For transportation and distribution-related emissions (Scope 3), we are focusing on improving data quality to enable better decision making and maximize efficiency.', 100.0, 48265)]

For transportation and distribution-related emissions (Scope 3), we are focusing on improving data quality to enable better decision making and maximize efficiency.

439 

Original Rel Sentence: We are party to a Virtual Power Purchase Agreement (VPPA) for 22 megawatts (MW) of wind power from the Ponderosa wind farm in Oklahoma, United States.
[('INVESTING IN WIND ENERGY We are party to a Virtual Power Purchase Agreement (VPPA) for 22 m


Original Rel Sentence: CLIMATE AND ENERGY: Our global approach to reducing our climate impact includes a balanced portfolio of emissions-reduction activities focused on reducing our energy consumption and expense, investing in energy efficiency projects, and sourcing low-carbon and renewable energy sources for our direct operations.
[('CLIMATE AND ENERGY: Our global approach to reducing our climate impact includes a balanced portfolio of emissions-reduction activities focused on reducing our energy consumption and expense, investing in energy efficiency projects, and sourcing low-carbon and renewable energy sources for our direct operations.', 100.0, 50311), ('Our global approach to reducing our climate impact includes a balanced portfolio of emissions-reduction activities focused on reducing our energy consumption and expense, investing in energy efficiency projects, and sourcing low-carbon and renewable energy sources for our direct operations.', 96.66666666666667, 51160)]
[('CLIMAT


Original Rel Sentence: Through its regional energy and climate strategy, TJX Canada avoided or offset over 35,500 metric tons of CO2e calculated for its fiscal 2021 GHG inventory.
[('Through its regional energy and climate strategy, TJX Canada avoided or offset over 35,500 metric tons of CO2e calculated for its fiscal 2021 GHG inventory.', 100.0, 50383)]
[('Through its regional energy and climate strategy, TJX Canada avoided or offset over 35,500 metric tons of CO2e calculated for its fiscal 2021 GHG inventory.', 100.0, 50383)]

Through its regional energy and climate strategy, TJX Canada avoided or offset over 35,500 metric tons of CO2e calculated for its fiscal 2021 GHG inventory.

462 

Original Rel Sentence: Conserving Energy: In fiscal 2021, TJX Canada continued to implement technologies, like LED lighting and HVAC replacements, which reduced our GHG inventory by over 739,000 kilowatt hours.
[('Here’s how TJX Canada achieved these results: / Conserving Energy: In fiscal 2021, TJX


Original Rel Sentence: Under our science-based target goal linked to our annual CDP reporting, we have committed to reducing our Scope 1 and Scope 2 carbon dioxide emissions 2.1% per year to achieve a 40% reduction by 2030 and a 50% reduction by 2035.
[('Under our science-based target goal linked to our annual CDP reporting, we have committed to reducing our Scope 1 and Scope 2 carbon dioxide emissions 2.1% per year to achieve a 40% reduction by 2030 and a 50% reduction by 2035.', 100.0, 52077)]
[('Under our science-based target goal linked to our annual CDP reporting, we have committed to reducing our Scope 1 and Scope 2 carbon dioxide emissions 2.1% per year to achieve a 40% reduction by 2030 and a 50% reduction by 2035.', 100.0, 52077)]

Under our science-based target goal linked to our annual CDP reporting, we have committed to reducing our Scope 1 and Scope 2 carbon dioxide emissions 2.1% per year to achieve a 40% reduction by 2030 and a 50% reduction by 2035.

474 

Original Rel


Original Rel Sentence: The Home Depot provided over 3.4 million pounds of shredded paper for recycling in 2020.
[('The Home Depot provided over 3.4 million pounds of shredded paper for recycling in 2020.', 100.0, 52517)]
[('The Home Depot provided over 3.4 million pounds of shredded paper for recycling in 2020.', 100.0, 52517), ('The Home Depot FC1', 90.9090909090909, 51197), ('The Home Depot 103', 90.9090909090909, 52665), ('The Home Depot 105', 90.9090909090909, 52706), ('The Home Depot 107', 90.9090909090909, 52730)]

The Home Depot provided over 3.4 million pounds of shredded paper for recycling in 2020.

488 

Original Rel Sentence: Here’s how that helped the planet: Avoided 2.5 million pounds of CO2 emissions
[('Here’s how that helped the planet:', 90.0, 52518)]
[('Here’s how that helped the planet:', 100.0, 52518), ('Avoided 2.5 million pounds of CO2 emissions 2014', 94.5054945054945, 52513)]

Here’s how that helped the planet:

489 

Original Rel Sentence: In 2021, the lightin


Original Rel Sentence: Our reduction thus far has been primarily due to our 2021 transition to 100% renewable electricity across our enterprise through the purchase of unbundled energy attribute certificates (EACs) that matched our total electricity consumption for the year of which 92% is compliant with the RE100 market boundary criteria.
[('Our reduction thus far has been primarily due to our 2021 transition to 100% renewable electricity across our enterprise through the purchase of unbundled energy attribute certificates (EACs) that matched our total electricity consumption for the year of which 92% is compliant with the RE100 market boundary criteria.', 100.0, 56989)]
[('Our reduction thus far has been primarily due to our 2021 transition to 100% renewable electricity across our enterprise through the purchase of unbundled energy attribute certificates (EACs) that matched our total electricity consumption for the year of which 92% is compliant with the RE100 market boundary criter


Original Rel Sentence: On a path to powering our operations with 100% renewable energy by 2025 Making 50% of all shipments net-zero carbon by 2030 Reaching net-zero carbon emissions across our operations by 2040
[]
[('Net-Zero Carbon by 2040', 95.65217391304348, 59222), ('Net-Zero Carbon Reaching net-zero carbon emissions across our operations by 2040', 90.0, 59329)]

The 6.8 million metric tons of vehicle CO2e savings estimate is based on the net CO2e savings during the use-phase of a Tesla vehicle compared to an ICE vehicle with a real-world fuel economy of ~24 mpg (of which 0.9 million metric tons was avoided through annual renewables matching for the global Supercharger network and home charging in California).

519 

Original Rel Sentence: Deploying 100,000 custom electric delivery vehicles by 2030
[('Electric Delivery Vehicles Deploying 100,000 custom electric delivery vehicles by 2030', 95.0, 59330)]
[('Electric Delivery Vehicles Deploying 100,000 custom electric delivery vehic


Original Rel Sentence: A solar farm in Pittsylvania County, Virginia, will power Amazon’s new headquarters in Arlington and other Amazon operations in the region, including Whole Foods Market stores and Amazon fulfillment centers.
[('A solar farm in Pittsylvania County, Virginia, will power Amazon’s new headquarters in Arlington and other Amazon operations in the region, including Whole Foods Market stores and Amazon fulfillment centers.', 100.0, 59490)]
[('A solar farm in Pittsylvania County, Virginia, will power Amazon’s new headquarters in Arlington and other Amazon operations in the region, including Whole Foods Market stores and Amazon fulfillment centers.', 100.0, 59490)]

A solar farm in Pittsylvania County, Virginia, will power Amazon’s new headquarters in Arlington and other Amazon operations in the region, including Whole Foods Market stores and Amazon fulfillment centers.

534 

Original Rel Sentence: Amazon is sharing the project with Arlington County through a public- pri


Original Rel Sentence: This project, along with PPAs delivering power across the U.S. and Canada and various onsite solar projects globally, brings NIKE to 78% of our 100% renewable energy target in owned or operated facilities.
[('This project, along with PPAs delivering power across the U.S. and Canada and various onsite solar projects globally, brings NIKE to 78% of our 100% renewable energy target in owned or operated facilities.', 100.0, 61973)]
[('This project, along with PPAs delivering power across the U.S. and Canada and various onsite solar projects globally, brings NIKE to 78% of our 100% renewable energy target in owned or operated facilities.', 100.0, 61973)]

This project, along with PPAs delivering power across the U.S. and Canada and various onsite solar projects globally, brings NIKE to 78% of our 100% renewable energy target in owned or operated facilities.

552 

Original Rel Sentence: At our Tepana distribution center in Mexico, a rooftop solar array went live.
[('


Original Rel Sentence: Because we do not own our office buildings, our plans to increase renewable electricity are not heavily focused on on-site renewable generation.
[('Because we do not own our office buildings, our plans to increase renewable electricity are not', 90.0, 64616), ('heavily focused on on-site renewable generation.', 90.0, 64618)]
[('Because we do not own our office buildings, our plans to increase renewable electricity are not', 100.0, 64616), ('heavily focused on on-site renewable generation.', 100.0, 64618)]

Because we do not own our office buildings, our plans to increase renewable electricity are not heavily focused on on-site renewable generation.
Because we do not own our office buildings, our plans to increase renewable electricity are not heavily focused on on-site renewable generation.

570 

Original Rel Sentence: Rather, we are engaging market based renewable electricity purchase opportunities.
[('Rather, we are engaging market based renewable', 90.0, 646


Original Rel Sentence: In 2021, we moved up the target date of our commitment to reach net-zero emissions by a decade, from 2050 to 2040.
[('We moved up the target date of our commitment to reach net-zero emissions by a decade, from 2050 to 2040.', 95.85253456221197, 66922), ('Our approach In 2021, we moved up the target date of our commitment to reach net-zero emissions by a decade, from 2050 to 2040.', 95.0, 66919)]
[('Our approach In 2021, we moved up the target date of our commitment to reach net-zero emissions by a decade, from 2050 to 2040.', 100.0, 66919), ('We moved up the target date of our commitment to reach net-zero emissions by a decade, from 2050 to 2040.', 100.0, 66922), ('We accelerated our commitment to reach net-zero emissions by a decade, from 2050 to 2040.', 91.86046511627907, 67608)]

Our approach In 2021, we moved up the target date of our commitment to reach net-zero emissions by a decade, from 2050 to 2040.

590 

Original Rel Sentence: We To further our effort


Original Rel Sentence: We are continuing efforts with our suppliers, who account for more than 70 percent of our Scope 3 emissions, through engagement, interventions and target-setting.
[('We are continuing efforts with our suppliers, who account for more than 70 percent of our Scope 3 emissions, through engagement, interventions and target-setting.', 100.0, 67007)]
[('We are continuing efforts with our suppliers, who account for more than 70 percent of our Scope 3 emissions, through engagement, interventions and target-setting.', 100.0, 67007)]

We are continuing efforts with our suppliers, who account for more than 70 percent of our Scope 3 emissions, through engagement, interventions and target-setting.

602 

Original Rel Sentence: help us achieve our 2040 net-zero goal.
[('We have developed a supplier engagement program that will help us achieve our 2040 net-zero goal.', 90.0, 67003)]
[('We have developed a supplier engagement program that will help us achieve our 2040 net-zero g


Original Rel Sentence: We sourced 38% of our FY22 electricity use from renewable sources, a 13 percentage point increase from the previous year.
[('We sourced 38% of our FY22 electricity use from renewable sources, a 13 percentage point increase from the previous year.', 100.0, 68652)]
[('We sourced 38% of our FY22 electricity use from renewable sources, a 13 percentage point increase from the previous year.', 100.0, 68652)]

We sourced 38% of our FY22 electricity use from renewable sources, a 13 percentage point increase from the previous year.

618 

Original Rel Sentence: In addition to similarly advanced levels of energy- and water-efficient design, this building features biophilic elements and a shade-providing external trellis that will house 390 kW of solar panels to harness Santa Clara's sunny weather.
[("In addition to similarly advanced levels of energy- and water-efficient design, this building features biophilic elements and a shade-providing external trellis that will hou


Original Rel Sentence: In September 2021, Cisco committed to net zero greenhouse gas emissions by 2040, 10 years ahead of the time by which climate scientists say the planet must reach net zero to avoid the worst effects of climate change.
[('Energy and emissions goals In September 2021, Cisco committed to net zero greenhouse gas emissions by 2040, 10 years ahead of the time by which climate scientists say the planet must reach net zero to avoid the worst effects of climate change.', 95.0, 71245)]
[('Energy and emissions goals In September 2021, Cisco committed to net zero greenhouse gas emissions by 2040, 10 years ahead of the time by which climate scientists say the planet must reach net zero to avoid the worst effects of climate change.', 100.0, 71245)]

Energy and emissions goals In September 2021, Cisco committed to net zero greenhouse gas emissions by 2040, 10 years ahead of the time by which climate scientists say the planet must reach net zero to avoid the worst effects of cli


Original Rel Sentence: The Cisco 8201 consumes 96 percent less energy per year than the NCS 6008, while supplying 35 percent more bandwidth, as well as being five times more power-efficient than its closest competitor.
[('The Cisco 8201 consumes 96 percent less energy per year than the NCS 6008, while supplying 35 percent more bandwidth, as well as being five times more power-efficient than its closest competitor.', 100.0, 71299)]
[('The Cisco 8201 consumes 96 percent less energy per year than the NCS 6008, while supplying 35 percent more bandwidth, as well as being five times more power-efficient than its closest competitor.', 100.0, 71299)]

The Cisco 8201 consumes 96 percent less energy per year than the NCS 6008, while supplying 35 percent more bandwidth, as well as being five times more power-efficient than its closest competitor.

635 

Original Rel Sentence: These technology improvements, along with the use of 80 Plus Titanium-rated power supplies, can reduce the energy consump


Original Rel Sentence: Reduced Scope 1 and 2 by 16.9% We reduced our Scope 1 and 2 (market- based) emissions by 58,654 metric tons of carbon dioxide equivalents (mtCO2) in FY21.
[('Reduced Scope 1 87% supplier and 2 by 16.9% reporting We reduced our Scope 1 and 2 (marketbased) emissions by 58,654 metric tons of carbon dioxide equivalents (mtCO2) in FY21.', 92.35474006116208, 71831)]
[('Reduced Scope 1 87% supplier and 2 by 16.9% reporting We reduced our Scope 1 and 2 (marketbased) emissions by 58,654 metric tons of carbon dioxide equivalents (mtCO2) in FY21.', 91.33333333333333, 71831)]

Reduced Scope 1 87% supplier and 2 by 16.9% reporting We reduced our Scope 1 and 2 (marketbased) emissions by 58,654 metric tons of carbon dioxide equivalents (mtCO2) in FY21.

645 

Original Rel Sentence: Scope 3 emissions increased by 22.7 percent.
[('Scope 3 emissions increased by 22.7 percent.', 100.0, 71832)]
[('Scope 3 emissions increased by 22.7 percent.', 100.0, 71832)]

Scope 3 emissions incr


Original Rel Sentence: At the Bengaluru Cosmo office site, we installed an uninterruptible power supply (UPS) with flywheel to store kinetic energy to provide backup power without the need for lead acid batteries typically used in conventional UPS.
[('Avoiding emissions with kinetic energy for backup power At the Bengaluru Cosmo office site, we installed an uninterruptible power supply (UPS) with flywheel to store kinetic energy to provide backup power without the need for lead acid batteries typically used in conventional UPS.', 95.0, 71910)]
[('Avoiding emissions with kinetic energy for backup power At the Bengaluru Cosmo office site, we installed an uninterruptible power supply (UPS) with flywheel to store kinetic energy to provide backup power without the need for lead acid batteries typically used in conventional UPS.', 100.0, 71910)]

Avoiding emissions with kinetic energy for backup power At the Bengaluru Cosmo office site, we installed an uninterruptible power supply (UPS) wit


Original Rel Sentence: The datacenter’s generators run on Preem Evolution Diesel Plus, the world’s first Nordic Eco-labelled fuel, which contains at least 50 percent renewable raw material, and nearly an equivalent reduction in net carbon dioxide emissions compared with standard fossil diesel blends.
[('The datacenter’s generators run on Preem Evolution Diesel Plus, the world’s first Nordic Eco-labelled fuel, which contains at least 50 percent renewable raw material, and nearly an equivalent reduction in net carbon dioxide emissions compared with standard fossil diesel blends.', 100.0, 71934)]
[('The datacenter’s generators run on Preem Evolution Diesel Plus, the world’s first Nordic Eco-labelled fuel, which contains at least 50 percent renewable raw material, and nearly an equivalent reduction in net carbon dioxide emissions compared with standard fossil diesel blends.', 100.0, 71934)]

The datacenter’s generators run on Preem Evolution Diesel Plus, the world’s first Nordic Eco-label


Original Rel Sentence: This latest procurement brings our operating and contracted renewable energy projects to 7.8 GW globally and we are positioned to continue to grow our renewable resource portfolio.
[('This latest procurement brings our operating and contracted renewable energy projects to 7.8 GW globally and we are positioned to continue to grow our renewable resource portfolio.', 100.0, 71988)]
[('This latest procurement brings our operating and contracted renewable energy projects to 7.8 GW globally and we are positioned to continue to grow our renewable resource portfolio.', 100.0, 71988)]

This latest procurement brings our operating and contracted renewable energy projects to 7.8 GW globally and we are positioned to continue to grow our renewable resource portfolio.

691 

Original Rel Sentence: In addition to these PPAs, Microsoft partnered with Volt Energy, the only national African American owned solar development company, for a 250-MW portfolio of solar projects with th


Original Rel Sentence: We are also specifying the use of cement replacement products such as fly ash and slag that reduce the carbon footprint of concrete.
[('We are also specifying the use of cement replacement products such as fly ash and slag that reduce the carbon footprint of concrete.', 100.0, 72028)]
[('We are also specifying the use of cement replacement products such as fly ash and slag that reduce the carbon footprint of concrete.', 100.0, 72028)]

We are also specifying the use of cement replacement products such as fly ash and slag that reduce the carbon footprint of concrete.

703 

Original Rel Sentence: We designed our Surface Laptop Studio to allow for “stamping,” a lower-waste manufacturing technique that reduced our aluminum scrap rate for the product’s base by at least 25 percent, a key contributor to an overall product carbon reduction of 30 percent versus its predecessor, the Surface Book 3 13”.
[('Reduced waste We designed our Surface Laptop Studio to allow for “


Original Rel Sentence: Biochar We invested in a portfolio from Puro.earth Oy, including from Carbon Cycle, Carbofex, and ECHO2, small operations in Germany, Finland, and Australia that use biomass residue (for example, wood chips and forest waste) to sequester carbon dioxide in biochar for use in soil amendment and other products.
[('Biochar We invested in a portfolio from Puro.earth Oy, including from Carbon Cycle, Carbofex, and ECHO2, small operations in Germany, Finland, and Australia that use biomass residue (for example, wood chips and forest waste) to sequester carbon dioxide in biochar for use in soil amendment and other products.', 100.0, 72123)]
[('Biochar We invested in a portfolio from Puro.earth Oy, including from Carbon Cycle, Carbofex, and ECHO2, small operations in Germany, Finland, and Australia that use biomass residue (for example, wood chips and forest waste) to sequester carbon dioxide in biochar for use in soil amendment and other products.', 100.0, 72123)]

Bioch


Original Rel Sentence: In carbon intensity, our Scope 1 and 2 emissions saw significant improvements in 2020 vs 2019: - Decreased 77 percent per employee - Decreased 78 percent per square foot of space - Decreased 76 percent per dollar revenue - Decreased 77 percent per transaction processed In 2020, we achieved carbon neutrality across our operations for the first time.
[('\x83 In carbon intensity, our Scope 1 and 2 emissions saw significant improvements in 2020 vs 2019: - Decreased 77 percent per employee - Decreased 78 percent per square foot of space - Decreased 76 percent per dollar revenue - Decreased 77 percent per transaction processed In 2020, we achieved carbon neutrality across our operations for the first time.', 100.0, 74231)]
[('\x83 In carbon intensity, our Scope 1 and 2 emissions saw significant improvements in 2020 vs 2019: - Decreased 77 percent per employee - Decreased 78 percent per square foot of space - Decreased 76 percent per dollar revenue - Decreased 77 perce


Original Rel Sentence: efforts that support this goal require innovations at scale — like designing and Achieve carbon neutrality for our entire carbon footprint, including products, by 2030.
[('Achieve carbon neutrality for our entire carbon footprint, including products, by 2030.', 90.0, 75192)]
[('Achieve carbon neutrality for our entire carbon footprint, including products, by 2030.', 100.0, 75192)]

Achieve carbon neutrality for our entire carbon footprint, including products, by 2030.

741 

Original Rel Sentence: And reduce related emissions by 75% compared with fiscal year 2015 40% emissions reduction since 2015 across our value chain 23M metric tons of emissions avoided in fiscal year 2021 alone due to carbon reduction initiatives across our value chain Achieved since April 2020 by implementing energy efficiency initiatives, sourcing 100% renewable electricity for Apple facilities, and securing carbon offsets for the remaining corporate emissions Transition our entire manufac


Original Rel Sentence: Our plan to become carbon neutral by 2030 centers around a 75 percent emissions reduction target, compared with our fiscal year 2015 footprint.
[('Progress toward net zero Our plan to become carbon neutral by 2030 centers around a 75 percent emissions reduction target, compared with our fiscal year 2015 footprint.', 95.0, 75273)]
[('Progress toward net zero Our plan to become carbon neutral by 2030 centers around a 75 percent emissions reduction target, compared with our fiscal year 2015 footprint.', 100.0, 75273)]

Progress toward net zero Our plan to become carbon neutral by 2030 centers around a 75 percent emissions reduction target, compared with our fiscal year 2015 footprint.

748 

Original Rel Sentence: We plan to address residual emissions through carbon removals.
[('We plan to address residual emissions through carbon removals.', 100.0, 75274)]
[('We plan to address residual emissions through carbon removals.', 100.0, 75274)]

We plan to address residu


Original Rel Sentence: Apple has generated or sourced 100 percent renewable electricity for its corporate operations since 2018 and we are now committed to transitioning our entire supply chain to 100 percent renewable electricity as well.
[('Apple has generated or sourced 100 percent renewable electricity for its corporate operations since 2018 and we are now committed to transitioning our entire supply chain to 100 percent renewable electricity as well.', 100.0, 75421)]
[('Apple has generated or sourced 100 percent renewable electricity for its corporate operations since 2018 and we are now committed to transitioning our entire supply chain to 100 percent renewable electricity as well.', 100.0, 75421)]

Apple has generated or sourced 100 percent renewable electricity for its corporate operations since 2018 and we are now committed to transitioning our entire supply chain to 100 percent renewable electricity as well.

760 

Original Rel Sentence: We sign long-term contracts for renew


Original Rel Sentence: At 99% of the approximately 1,000 newly built sites and nearly 75% of the approximately 2,100 legacy sites, we have installed solar arrays and lithium-ion batteries (LIBs).
[('At 99% of the approximately 1,000 newly built sites and nearly 75% of the approximately 2,100 legacy sites, we have installed solar arrays and lithium-ion batteries (LIBs).', 100.0, 78416)]
[('At 99% of the approximately 1,000 newly built sites and nearly 75% of the approximately 2,100 legacy sites, we have installed solar arrays and lithium-ion batteries (LIBs).', 100.0, 78416)]

At 99% of the approximately 1,000 newly built sites and nearly 75% of the approximately 2,100 legacy sites, we have installed solar arrays and lithium-ion batteries (LIBs).

777 

Original Rel Sentence: Also, where possible, we have accelerated grid connections at newly built sites to utilize Kenyan grid power, which is increasingly powered by renewables.
[('Also, where possible, we have accelerated grid connecti


Original Rel Sentence: Scope 3 emissions (emissions not directly associated with our operations) represent 99.9% of our total emissions footprint—and our best opportunity to make an impact.
[('Scope 3 emissions (emissions not directly associated with our operations) represent 99.9% of our total emissions footprint—and our best opportunity to make an impact.', 100.0, 79558)]
[('Scope 3 emissions (emissions not directly associated with our operations) represent 99.9% of our total emissions footprint—and our best opportunity to make an impact.', 100.0, 79558)]

Scope 3 emissions (emissions not directly associated with our operations) represent 99.9% of our total emissions footprint—and our best opportunity to make an impact.

792 

Original Rel Sentence: 2021 Scope 1 and 2 Sources 2021 Scope 3 Sources Our 2018 Science Based Target was to reduce emissions 21% (Scopes 1 and 2) and 15% (Scope 3) by 2025, and 56% (Scopes 1 and 2) and 40% (Scope 3) by 2040, from Our updated (2022) Science Bas


Original Rel Sentence: In 2022, we plan to expand EV charging as a service to customers and expect to manage approximately 10 MWs of EV charging capacity.
[('In 2022, we plan to expand EV charging as a service to customers and expect to manage approximately 10 MWs of EV charging capacity.', 100.0, 79623)]
[('In 2022, we plan to expand EV charging as a service to customers and expect to manage approximately 10 MWs of EV charging capacity.', 100.0, 79623), ('\x13\x12\x02\x01 )\x1f + )\x1f )/\x01\x02..0-\x1c)\x1e \x01\x14/\x1c/ ( )/\x01', 100.0, 80024), ('\x01 \x15#$.\x01\x02..0-\x1c)\x1e \x01\x14/\x1c/ ( )/\x01#\x1c.\x01\x1d )\x01+- +\x1c- \x1f\x01!', 100.0, 80028), ('*-(\x1c/$*)\x012$/#$)\x01/# \x01- +*-/\x01\x1c)\x1f\x01!', 100.0, 80041), ('*-\x01/# \x01 )\x1f\x010.', 100.0, 80055)]

In 2022, we plan to expand EV charging as a service to customers and expect to manage approximately 10 MWs of EV charging capacity.

800 

Original Rel Sentence: We have committed to achieving carbon neut


Original Rel Sentence: In fact, we calculate that in 2020, Linde gases, principally oxygen and hydrogen, enabled our customers to avoid 85 million metric tons of CO₂e, which is more than twice as much GHG avoided than emitted from all our operations.
[('In fact, we calculate that in 2020, Linde gases, principally oxygen and hydrogen, enabled our customers to avoid 85 million metric tons of CO₂e, which is more than twice as much GHG avoided than emitted from all our operations.', 100.0, 80734)]
[('In fact, we calculate that in 2020, Linde gases, principally oxygen and hydrogen, enabled our customers to avoid 85 million metric tons of CO₂e, which is more than twice as much GHG avoided than emitted from all our operations.', 100.0, 80734)]

In fact, we calculate that in 2020, Linde gases, principally oxygen and hydrogen, enabled our customers to avoid 85 million metric tons of CO₂e, which is more than twice as much GHG avoided than emitted from all our operations.

819 

Original Rel Sen


Original Rel Sentence: In 2020, the company sourced 16 million MWh low-carbon energy, or 39 percent of all its purchased electricity.
[('In 2020, the company sourced 16 million MWh low-carbon energy, or 39 percent of all its purchased electricity.', 100.0, 80939)]
[('In 2020, the company sourced 16 million MWh low-carbon energy, or 39 percent of all its purchased electricity.', 100.0, 80939)]

In 2020, the company sourced 16 million MWh low-carbon energy, or 39 percent of all its purchased electricity.

834 

Original Rel Sentence: Low-carbon electricity is defined as electricity produced from non-fossil Linde actively sourced 2.4 TWh renewable energy in 2020. electricity use in the UK is almost 100 percent renewable using
[('Linde actively sourced 2.4 TWh renewable energy in 2020.', 90.0, 80944)]
[('Linde actively sourced 2.4 TWh renewable energy in 2020.', 100.0, 80944), ('Linde electricity use in the UK is almost 100 percent renewable using wind.', 92.7536231884058, 80945)]

Linde 


Original Rel Sentence: Linde is at the forefront in the transition to clean hydrogen and has installed nearly 200 hydrogen fueling stations and 80 hydrogen electrolysis plants worldwide and includes decarbonization investments within its SD 2028 targets.
[('Linde is at the forefront in the transition to clean hydrogen and has installed nearly 200 hydrogen fueling stations and 80 hydrogen electrolysis plants worldwide and includes decarbonization investments within its SD 2028 targets.', 100.0, 83152)]
[('Linde is at the forefront in the transition to clean hydrogen and has installed nearly 200 hydrogen fueling stations and 80 hydrogen electrolysis plants worldwide and includes decarbonization investments within its SD 2028 targets.', 100.0, 83152)]

Linde is at the forefront in the transition to clean hydrogen and has installed nearly 200 hydrogen fueling stations and 80 hydrogen electrolysis plants worldwide and includes decarbonization investments within its SD 2028 targets.

847 




Original Rel Sentence: To underscore our commitment to supporting the health of the planet and leading in science-based climate action, we have identified the following high-level environmental goals: • Reduce absolute direct and indirect (Scope 1 and 2 under our operational control) GHG emissions by 50% below 2021 levels by 2030 • Achieve net zero (Scope 1 and 2) GHG emissions by 2040; and be net positive across our entire value chain (cradle-to-gate) by 205010
[('To underscore our commitment to supporting the health of the planet and leading on science-based climate action, we have identified the following high-level environmental goals: • Reduce absolute direct and indirect (Scope 1 and 2 under our operational control) GHG emissions by 50% below 2021 levels by 2030 • Achieve net zero (Scope 1 and 2) GHG emissions by 2040; and be net positive across our entire value chain (cradle-to-gate) by 2050.', 99.5475113122172, 89392), ('To underscore our commitment to supporting the health of


Original Rel Sentence: Finally, from a carbon perspective, this results in an approximately 80,000 metric ton net reduction in CO2 equivalent per year (excluding Scope 3 and at full capacity) versus a comparable fleet of diesel trucks designed to do the same task.
[('Finally, from a carbon perspective, this results in an approximately 80,000 metric ton net reduction in CO2 equivalent per year (excluding Scope 3 and at full capacity) versus a comparable fleet of diesel trucks designed to do the same task.', 100.0, 90012)]
[('Finally, from a carbon perspective, this results in an approximately 80,000 metric ton net reduction in CO2 equivalent per year (excluding Scope 3 and at full capacity) versus a comparable fleet of diesel trucks designed to do the same task.', 100.0, 90012)]

Finally, from a carbon perspective, this results in an approximately 80,000 metric ton net reduction in CO2 equivalent per year (excluding Scope 3 and at full capacity) versus a comparable fleet of diesel truc


Original Rel Sentence: The energy efficiency projects we are evaluating include fleet management and monitoring, haul road optimization and fuel station optimization; fixed plant energy monitoring systems; variable frequency drives for pumps, ball mills and flow control; and waste or electric heat recovery for mine ventilation.
[('The energy efficiency projects we are evaluating include fleet management and monitoring, haul road optimization and fuel station optimization; fixed plant energy monitoring systems; variable frequency drives for pumps, ball mills and flow control; and waste or electric heat recovery for mine ventilation.', 100.0, 93723)]
[('The energy efficiency projects we are evaluating include fleet management and monitoring, haul road optimization and fuel station optimization; fixed plant energy monitoring systems; variable frequency drives for pumps, ball mills and flow control; and waste or electric heat recovery for mine ventilation.', 100.0, 93723)]

The energy eff


Original Rel Sentence: By 2050, Dow intends to be carbon neutral (Scopes 1 + 2 + 3 plus product benefits).
[('By 2050, Dow intends to be carbon neutral (Scopes 1 + 2 + 3 plus product benefits).', 100.0, 94370), ('By 2050, Dow intends to be carbon neutral (Scope 1 + 2 + 3 plus product benefits).', 99.37888198757764, 95977), ('• By 2050, Dow intends to be carbon neutral (Scope 1+2+3 plus product benefits).', 96.81528662420382, 96257)]
[('By 2050, Dow intends to be carbon neutral (Scopes 1 + 2 + 3 plus product benefits).', 100.0, 94370), ('By 2050, Dow intends to be carbon neutral (Scope 1 + 2 + 3 plus product benefits).', 98.75, 95977), ('• By 2050, Dow intends to be carbon neutral (Scope 1+2+3 plus product benefits).', 93.42105263157895, 96257)]

By 2050, Dow intends to be carbon neutral (Scopes 1 + 2 + 3 plus product benefits).

894 

Original Rel Sentence: In 2021, we expanded access to renewable power to more than 900 megawatts (MW), such that more than 25% of our purchased electric


Original Rel Sentence: Moving Toward a Cleaner Generation Fleet and Increased Fuel Diversity (Represents total company view) Transforming the way we produce power (Generation (MWh) by fuel type) 2005 1 2021 1, 2 2030E 3 1% Hydro, wind and solar 33% Nuclear 6% Natural gas 60% Coal/oil 7% Hydro, wind and solar 35% Nuclear 36% Natural gas 22% Coal/oil 25% Hydro, wind and solar 30% Nuclear 40% Natural gas 5% Coal/oil  1 2005 and 2021 data based on Duke Energy’s ownership share of U.S. generation
[('After gathering feedback  from stakeholders in both North Carolina and South  Carolina, we’re incorporating this public input into  Moving Toward a Cleaner Generation  Fleet and Increased Fuel Diversity  (Represents total company view)  Transforming the way we produce power (Generation (MWh) by fuel type) 2005 1   1%  Hydro, wind and solar  33% Nuclear   6% Natural gas 60% Coal/oil 2021 1, 2   7%  Hydro, wind and solar  35% Nuclear  36% Natural gas 22% Coal/oil 2030E 3  25%  Hydro, wind and sol


Original Rel Sentence: Cross-compression technology is being used in certain operational activities such as decommissioning a pipe or in-line inspections to eliminate the release of natural gas to the atmosphere or flaring the natural gas.
[('Cross-compression technology is being used in certain  operational activities such as decommissioning a pipe  or in-line inspections to eliminate the release of natural  gas to the atmosphere or flaring the natural gas.', 99.30715935334872, 98963)]
[('Cross-compression technology is being used in certain  operational activities such as decommissioning a pipe  or in-line inspections to eliminate the release of natural  gas to the atmosphere or flaring the natural gas.', 98.6046511627907, 98963)]

Cross-compression technology is being used in certain  operational activities such as decommissioning a pipe  or in-line inspections to eliminate the release of natural  gas to the atmosphere or flaring the natural gas.

914 

Original Rel Sentence: This 


Original Rel Sentence: Electricity Generated (net megawatt-hours)  MWh (thousands) Percent Nuclear 75,328 34.9% Wind 2 7,387 3.4% Conventional Hydro2 2,870 1.3% Solar2 4,325 2.0% Natural Gas 77,679 36.0% Fuel Cell 374 0.2% Coal 48,181 22.3% Oil 214 0.1% Pumped-Storage Hydro 3 (614) -0.3% Generation Capacity (megawatts)  MW Percent 8,907 16.5% 2,987 5.5% 1,339 2.5% 1,973 3.7% 19,788 36.7% 44 0.1% 15,652 29.0% 995 1.8% 2,300 4.3% Total Carbon-Free 89,910 15,206 41.7% 28.2% 78,053 48,395 22.4% 19,832 36.7% 30.8% 16,647 215,745 100% 53,985 100% 36.2% 2021 19.9 27.3 631
[]
[]


927 

Original Rel Sentence: Emissions From Electric Generation 1
[]
[('Scope 1 Emissions Emissions From Electric Generation 1 2005 2019 2020 2021 CO2 emissions (thousand metric/short tons)  139,000/ 153,000 84,000/  93,000 74,000/  82,000 77,000/  85,000 CO2 emissions intensity (pounds per net kWh) 1.29 0.86 0.78 0.79 SO2 emissions (metric/short tons)  1,004,000/  1,107,000 28,000/  31,000 24,000/  27,000 23,000/  


Original Rel Sentence: The increased stakeholder demand for clean energy combined with approximately 8 GW of planned retirements and expiring purchase power agreements (PPAs) between 2022-2030, is creating economic energy opportunities and driving renewable energy growth.
[('The increased stakeholder demand for clean energy combined with approximately 8 GW of planned retirements and expiring purchase power agreements (PPAs) between 2022-2030, is creating economic energy opportunities and driving renewable energy growth.', 100.0, 100120)]
[('The increased stakeholder demand for clean energy combined with approximately 8 GW of planned retirements and expiring purchase power agreements (PPAs) between 2022-2030, is creating economic energy opportunities and driving renewable energy growth.', 100.0, 100120)]

The increased stakeholder demand for clean energy combined with approximately 8 GW of planned retirements and expiring purchase power agreements (PPAs) between 2022-2030, is creating 


Original Rel Sentence: Technologies of interest include: Renewable Energy Energy Storage Small Modular Nuclear Reactors (SMRs) Carbon Capture with Utilization or Storage Hydrogen and Other Chemical Energy Carriers Other Technologies (as they are identified) In addition, AEP continues to operate the 636 MW Smith Mountain hydroelectric facility located near Roanoke, Virginia.
[('In addition, AEP continues to operate the 636 MW Smith Mountain hydroelectric facility located near Roanoke, Virginia.', 90.0, 100168)]
[('In addition, AEP continues to operate the 636 MW Smith Mountain hydroelectric facility located near Roanoke, Virginia.', 100.0, 100168)]

In addition, AEP continues to operate the 636 MW Smith Mountain hydroelectric facility located near Roanoke, Virginia.

948 

Original Rel Sentence: AEP’s Donald C. Cook Nuclear Plant in Bridgman, Michigan, can provide 2,296 MW of carbon- free electricity when operating at full power – enough to power 1.5 million homes.
[('AEP’s Donald C. C

### B-4) Check the Outcome of Matched Sentences and Save the Files

In [75]:
# Save a copy of string_matched DataFrame
string_matched_old = pd.DataFrame([matrel, mat_exact_rel, orgrel, matlabel], index = ['matched_rel', 'matched_rel_from_all', 'original_rel', 'company_label']).transpose()

In [76]:
string_matched = pd.DataFrame([matrel, mat_exact_rel, orgrel, matlabel], index = ['matched_rel', 'matched_rel_from_all', 'original_rel', 'company_label']).transpose()

In [77]:
# In V5: extracting sentences portion from the tuple with (sentence, ratio) format
# --> In V6, matched_rel_only is the same thing as matched_rel
# string_matched['matched_rel_only'] = [i if i is not None else '' for i in string_matched.matched_rel]

In [78]:
string_matched.matched_rel

0       Looking toward the future, we have set climate...
1       A large portion of this renewable electricity ...
2       From 2012 to 2020, we achieved a 26% reduction...
3       In 2021, we achieved a 9% absolute emissions r...
4       use of renewable electricity including the sta...
                              ...                        
1120    2018  Retired and demolished 636 MW of coal an...
1121    2019 Aquired Gulf Power, which added 1,750 MW ...
1122    2020  Retired 615 MW of nuclear and 330 MW of ...
1123    2021 Added 2,008 MW of wind, 1,547 MW of solar...
1124                                                 None
Name: matched_rel, Length: 1125, dtype: object

In [79]:
# for i, j in zip(string_matched.matched_rel, string_matched.company_label):
#     if 'org to develop' in str(i):
#         print(i, j)

In [80]:
string_matched.to_csv("string_matched.csv", encoding = 'utf-8-sig')

### =============================== RESUME RUNNING HERE ==================================

### B-5) Keep Track of Sentences that Didn't Get Matched

In [81]:
string_matched = pd.read_csv('string_matched.csv', index_col = 0)

In [82]:
string_matched.head()

Unnamed: 0,matched_rel,matched_rel_from_all,original_rel,company_label
0,"Looking toward the future, we have set climate...","Looking toward the future, we have set climate...","In 2021, 9.6% of our purchased electricity cam...",1
1,A large portion of this renewable electricity ...,A large portion of this renewable electricity ...,A large portion of this renewable electricity ...,1
2,"From 2012 to 2020, we achieved a 26% reduction...","From 2012 to 2020, we achieved a 26% reduction...","From 2012 to 2020, we achieved a 26% reduction...",1
3,"In 2021, we achieved a 9% absolute emissions r...","In 2021, we achieved a 9% absolute emissions r...","In 2021, we achieved a 9% absolute emissions r...",1
4,use of renewable electricity including the sta...,use of renewable electricity including the sta...,This reduction was partially driven by energy ...,1


In [83]:
still_not_matched = string_matched[string_matched.matched_rel.isnull()]

In [84]:
still_not_matched.head()

Unnamed: 0,matched_rel,matched_rel_from_all,original_rel,company_label
157,,,One of our largest renewable energy contracts ...,15
302,,,PRODUCE MORE ENERGY FOR OUR CUSTOMERS WHILE RE...,21
457,,,"GHG EMISSIONS1,2,3 EMISSIONS INTENSITY4",25
508,,,The above are supportive of our progress towar...,33
692,,,FUEL AND ENERGY CONSUMPTION GREENHOUSE GAS EMI...,47


In [85]:
print("String Matched:", len(string_matched), " ", "String Not Matched:", len(still_not_matched))

String Matched: 1125   String Not Matched: 25


### B-6) Trying to Understand why String Matching Failed for Some Sentences

In [86]:
for i,j in zip(still_not_matched.original_rel, still_not_matched.company_label):
    print(i, "-->", j)
    print()

One of our largest renewable energy contracts began participation in an off-site community solar program. --> 15

PRODUCE MORE ENERGY FOR OUR CUSTOMERS WHILE REDUCING OUR PRODUCTS’ CARBON FOOTPRINT NET ZERO BY 2050, TOGETHER WITH SOCIETY In line with the objectives of the Paris Agreement --> 21

GHG EMISSIONS1,2,3 EMISSIONS INTENSITY4 --> 25

The above are supportive of our progress toward long- term, 2030 targets to: – Reduce absolute Scope 1 & Scope 2 emissions by 55%1 – Reduce absolute Scope 3 emissions by 18%1 – Achieve 100% renewable electricity – Reduce waste sent to landfill by 25% – Achieve 100% water neutrality each year --> 33

FUEL AND ENERGY CONSUMPTION GREENHOUSE GAS EMISSIONS --> 47

SCOPE 1 AND 2 EMISSIONS SCOPE 3 EMISSIONS been, and continues to be, on decarbonization. --> 48

Scope 1 Scope 2 Scope 3 FY18 FY17 FY20 FY21 FY19 10,662,000 139,066 107,452 11,745,000 11,466,000 11,239,000 --> 52

In addition to these purchases, we’re using our campuses have installed on-site

In [87]:
# Calculate Sentence Length for this Analysis
still_not_matched['sent_count'] = still_not_matched['original_rel'].str.split().str.len()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  still_not_matched['sent_count'] = still_not_matched['original_rel'].str.split().str.len()


In [88]:
# Reverse Dictionary to apply company label for easy analysis on the excelsheet
reversedDict = dict()
for key in comp_dict:
    val = int(comp_dict[key])
    reversedDict[val] = key

In [89]:
company_name = []
for i in still_not_matched.company_label:
    company_name.append(reversedDict[i])

In [90]:
still_not_matched['company_name'] = company_name

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  still_not_matched['company_name'] = company_name


In [91]:
# Reorder Columns to easily view the spreadsheet
still_not_matched = still_not_matched[['matched_rel', 'original_rel', 'company_label', 'company_name', 'matched_rel_from_all', 'sent_count']]

In [92]:
still_not_matched.to_csv('sentenences_not_matched.csv', encoding = 'utf-8-sig')

### B-7) Keep Track of Sentences that Got Matched (only use this version from now on: disregard sentences not matched and short sentences)

In [93]:
sentences_matched = string_matched[string_matched.matched_rel.notnull()]

In [94]:
sentences_matched = sentences_matched.drop_duplicates(subset = ['matched_rel_from_all', 'company_label'])

In [95]:
sentences_matched.to_csv('sentences_matched.csv', encoding = 'utf-8-sig')

In [96]:
len(sentences_matched) # increased because there are compounded sentences

1086

In [97]:
len(sentences_matched.matched_rel.unique()) # unique relevant sentences

936

In [98]:
# Acquired more sentences from previous versions 881 --> 913 --> 936

### B-8) Separate Relevant and Irrelevant Sentences: aligning matched sentences with total_all sentences

In [99]:
sentences_matched = pd.read_csv('sentences_matched.csv', index_col = 0)

In [100]:
sentences_matched.columns

Index(['matched_rel', 'matched_rel_from_all', 'original_rel', 'company_label'], dtype='object')

In [101]:
# Remove any duplicates
sentences_matched[sentences_matched.matched_rel_from_all.duplicated()]

Unnamed: 0,matched_rel,matched_rel_from_all,original_rel,company_label
472,We now aim to achieve carbon neutrality in our...,We now aim to achieve carbon neutrality in our...,"At PepsiCo, we’re striving to achieve net-zero...",27
606,The 6.8 million metric tons of vehicle CO2e sa...,The 6.8 million metric tons of vehicle CO2e sa...,On a path to powering our operations with 100%...,43


In [102]:
sentences_matched = sentences_matched.drop_duplicates('matched_rel_from_all')

In [103]:
original_merge_match = sentences_matched.rename(columns = {'original_rel': 'relevant_sentences'})

In [104]:
# Create this original_merge_match dataframe to get started on merging with other dataframes
original_merge_match

Unnamed: 0,matched_rel,matched_rel_from_all,relevant_sentences,company_label
0,"Looking toward the future, we have set climate...","Looking toward the future, we have set climate...","In 2021, 9.6% of our purchased electricity cam...",1
1,A large portion of this renewable electricity ...,A large portion of this renewable electricity ...,A large portion of this renewable electricity ...,1
2,"From 2012 to 2020, we achieved a 26% reduction...","From 2012 to 2020, we achieved a 26% reduction...","From 2012 to 2020, we achieved a 26% reduction...",1
3,"In 2021, we achieved a 9% absolute emissions r...","In 2021, we achieved a 9% absolute emissions r...","In 2021, we achieved a 9% absolute emissions r...",1
4,use of renewable electricity including the sta...,use of renewable electricity including the sta...,This reduction was partially driven by energy ...,1
...,...,...,...,...
1119,"2017 Retired and demolished 250 MW of coal, re...","2017 Retired and demolished 250 MW of coal, re...","2017 Retired and demolished 250 MW of coal, re...",72
1120,2018 Retired and demolished 636 MW of coal an...,2018 Retired and demolished 636 MW of coal an...,2018 Retired and demolished 636 MW of coal and...,72
1121,"2019 Aquired Gulf Power, which added 1,750 MW ...","2019 Aquired Gulf Power, which added 1,750 MW ...","2019 Aquired Gulf Power, which added 1,750 MW ...",72
1122,2020 Retired 615 MW of nuclear and 330 MW of ...,2020 Retired 615 MW of nuclear and 330 MW of ...,2020 Retired 615 MW of nuclear and 330 MW of c...,72


### Merge total_relevant and original_merge_match --> merge_rel_matched

In [106]:
total_relevant.columns

Index(['relevant_sentences', 'company_label', 'company_index', 'sent_count'], dtype='object')

In [107]:
merge_rel_matched = total_relevant.merge(original_merge_match, how = 'left', on = 'relevant_sentences')

In [108]:
merge_rel_matched = merge_rel_matched[merge_rel_matched.matched_rel.notnull()].drop(['company_label_y'], axis = 1)

In [109]:
merge_rel_matched.columns

Index(['relevant_sentences', 'company_label_x', 'company_index', 'sent_count',
       'matched_rel', 'matched_rel_from_all'],
      dtype='object')

In [117]:
# Remove any duplicates here
# merge_rel_matched[merge_rel_matched.relevant_sentences.duplicated()]

### Merge total_all and merge_rel_matched --> merge_rel_all

In [118]:
merge_rel_all = merge_rel_matched.rename(columns = {'matched_rel_from_all': 'all_sentences'})

In [119]:
merge_rel_all.columns

Index(['relevant_sentences', 'company_label_x', 'company_index', 'sent_count',
       'matched_rel', 'all_sentences'],
      dtype='object')

In [120]:
merge_rel_all

Unnamed: 0,relevant_sentences,company_label_x,company_index,sent_count,matched_rel,all_sentences
0,"In 2021, 9.6% of our purchased electricity cam...",EliLilly,0001,11,"Looking toward the future, we have set climate...","Looking toward the future, we have set climate..."
1,A large portion of this renewable electricity ...,EliLilly,0001,24,A large portion of this renewable electricity ...,A large portion of this renewable electricity ...
2,"From 2012 to 2020, we achieved a 26% reduction...",EliLilly,0001,12,"From 2012 to 2020, we achieved a 26% reduction...","From 2012 to 2020, we achieved a 26% reduction..."
3,"In 2021, we achieved a 9% absolute emissions r...",EliLilly,0001,11,"In 2021, we achieved a 9% absolute emissions r...","In 2021, we achieved a 9% absolute emissions r..."
4,This reduction was partially driven by energy ...,EliLilly,0001,27,use of renewable electricity including the sta...,use of renewable electricity including the sta...
...,...,...,...,...,...,...
1113,"2017 Retired and demolished 250 MW of coal, re...",NextEraEnergyZeroCarbonBlueprint,0072,23,"2017 Retired and demolished 250 MW of coal, re...","2017 Retired and demolished 250 MW of coal, re..."
1114,2018 Retired and demolished 636 MW of coal and...,NextEraEnergyZeroCarbonBlueprint,0072,38,2018 Retired and demolished 636 MW of coal an...,2018 Retired and demolished 636 MW of coal an...
1115,"2019 Aquired Gulf Power, which added 1,750 MW ...",NextEraEnergyZeroCarbonBlueprint,0072,27,"2019 Aquired Gulf Power, which added 1,750 MW ...","2019 Aquired Gulf Power, which added 1,750 MW ..."
1116,2020 Retired 615 MW of nuclear and 330 MW of c...,NextEraEnergyZeroCarbonBlueprint,0072,45,2020 Retired 615 MW of nuclear and 330 MW of ...,2020 Retired 615 MW of nuclear and 330 MW of ...


In [121]:
total_all

Unnamed: 0,all_sentences,company_label,company_index,sent_index,sent_count
0,Environmental | 2021 ESG Report | Eli Lilly an...,EliLilly,0001,0002,10
1,"Our Environmental Approach Our purpose, to mak...",EliLilly,0001,0004,18
2,Making medicines requires the use of valuable ...,EliLilly,0001,0005,14
3,We’re committed to reducing our environmental ...,EliLilly,0001,0006,18
4,"To track our progress, we measure and manage e...",EliLilly,0001,0007,27
...,...,...,...,...,...
101375,→ FPL’s four nuclear units continue to operat...,NextEraEnergyZeroCarbonBlueprint,0072,0216,10
101376,Technology We assume that: → FPL’s gas plants...,NextEraEnergyZeroCarbonBlueprint,0072,0217,28
101377,→ NextEra Energy Resources would invest in el...,NextEraEnergyZeroCarbonBlueprint,0072,0218,26
101378,→ All non-FPL fossil generation assets would ...,NextEraEnergyZeroCarbonBlueprint,0072,0219,14


## NUMBERS DON'T ADD UP BECAUSE OF THIS: Incorrect Merge as we have 121 Compounded Sentences. Find a way to track this

In [122]:
test_merge = merge_rel_all.merge(total_all, how = 'left', on = 'all_sentences')

In [123]:
test_merge.columns

Index(['relevant_sentences', 'company_label_x', 'company_index_x',
       'sent_count_x', 'matched_rel', 'all_sentences', 'company_label',
       'company_index_y', 'sent_index', 'sent_count_y'],
      dtype='object')

In [124]:
test_merge

Unnamed: 0,relevant_sentences,company_label_x,company_index_x,sent_count_x,matched_rel,all_sentences,company_label,company_index_y,sent_index,sent_count_y
0,"In 2021, 9.6% of our purchased electricity cam...",EliLilly,0001,11,"Looking toward the future, we have set climate...","Looking toward the future, we have set climate...",EliLilly,0001,0041,40
1,A large portion of this renewable electricity ...,EliLilly,0001,24,A large portion of this renewable electricity ...,A large portion of this renewable electricity ...,EliLilly,0001,0042,24
2,"From 2012 to 2020, we achieved a 26% reduction...",EliLilly,0001,12,"From 2012 to 2020, we achieved a 26% reduction...","From 2012 to 2020, we achieved a 26% reduction...",EliLilly,0001,0046,12
3,"In 2021, we achieved a 9% absolute emissions r...",EliLilly,0001,11,"In 2021, we achieved a 9% absolute emissions r...","In 2021, we achieved a 9% absolute emissions r...",EliLilly,0001,0047,11
4,This reduction was partially driven by energy ...,EliLilly,0001,27,use of renewable electricity including the sta...,use of renewable electricity including the sta...,EliLilly,0001,0049,13
...,...,...,...,...,...,...,...,...,...,...
1079,"2017 Retired and demolished 250 MW of coal, re...",NextEraEnergyZeroCarbonBlueprint,0072,23,"2017 Retired and demolished 250 MW of coal, re...","2017 Retired and demolished 250 MW of coal, re...",NextEraEnergyZeroCarbonBlueprint,0072,0052,23
1080,2018 Retired and demolished 636 MW of coal and...,NextEraEnergyZeroCarbonBlueprint,0072,38,2018 Retired and demolished 636 MW of coal an...,2018 Retired and demolished 636 MW of coal an...,NextEraEnergyZeroCarbonBlueprint,0072,0053,38
1081,"2019 Aquired Gulf Power, which added 1,750 MW ...",NextEraEnergyZeroCarbonBlueprint,0072,27,"2019 Aquired Gulf Power, which added 1,750 MW ...","2019 Aquired Gulf Power, which added 1,750 MW ...",NextEraEnergyZeroCarbonBlueprint,0072,0054,27
1082,2020 Retired 615 MW of nuclear and 330 MW of c...,NextEraEnergyZeroCarbonBlueprint,0072,45,2020 Retired 615 MW of nuclear and 330 MW of ...,2020 Retired 615 MW of nuclear and 330 MW of ...,NextEraEnergyZeroCarbonBlueprint,0072,0055,45


### test_merge used to output final dataframes

In [125]:
# Drop irrelevant columns
test_merge = test_merge.drop(['company_label_x', 'company_index_x'], axis = 1)

In [127]:
# test_merge_clean = test_merge.drop(['matched_rel'], axis = 1)
test_merge_clean = test_merge
test_merge_clean.rename(columns = {'company_index_y': 'company_index'})

Unnamed: 0,relevant_sentences,sent_count_x,matched_rel,all_sentences,company_label,company_index,sent_index,sent_count_y
0,"In 2021, 9.6% of our purchased electricity cam...",11,"Looking toward the future, we have set climate...","Looking toward the future, we have set climate...",EliLilly,0001,0041,40
1,A large portion of this renewable electricity ...,24,A large portion of this renewable electricity ...,A large portion of this renewable electricity ...,EliLilly,0001,0042,24
2,"From 2012 to 2020, we achieved a 26% reduction...",12,"From 2012 to 2020, we achieved a 26% reduction...","From 2012 to 2020, we achieved a 26% reduction...",EliLilly,0001,0046,12
3,"In 2021, we achieved a 9% absolute emissions r...",11,"In 2021, we achieved a 9% absolute emissions r...","In 2021, we achieved a 9% absolute emissions r...",EliLilly,0001,0047,11
4,This reduction was partially driven by energy ...,27,use of renewable electricity including the sta...,use of renewable electricity including the sta...,EliLilly,0001,0049,13
...,...,...,...,...,...,...,...,...
1079,"2017 Retired and demolished 250 MW of coal, re...",23,"2017 Retired and demolished 250 MW of coal, re...","2017 Retired and demolished 250 MW of coal, re...",NextEraEnergyZeroCarbonBlueprint,0072,0052,23
1080,2018 Retired and demolished 636 MW of coal and...,38,2018 Retired and demolished 636 MW of coal an...,2018 Retired and demolished 636 MW of coal an...,NextEraEnergyZeroCarbonBlueprint,0072,0053,38
1081,"2019 Aquired Gulf Power, which added 1,750 MW ...",27,"2019 Aquired Gulf Power, which added 1,750 MW ...","2019 Aquired Gulf Power, which added 1,750 MW ...",NextEraEnergyZeroCarbonBlueprint,0072,0054,27
1082,2020 Retired 615 MW of nuclear and 330 MW of c...,45,2020 Retired 615 MW of nuclear and 330 MW of ...,2020 Retired 615 MW of nuclear and 330 MW of ...,NextEraEnergyZeroCarbonBlueprint,0072,0055,45


### test_merge_clean --> test_rel

In [144]:
test_merge_clean['label'] = 'rel'
test_merge_clean[test_merge_clean.relevant_sentences.duplicated()]
test_rel = test_merge_clean

In [145]:
test_rel = test_rel.rename(columns = {'all_sentences': 'rel_match_all', 'company_index_y': 'company_index',
                                      'sent_count_x': 'rel_sent_count', 'sent_count_y': 'rel_match_sent_count'})

In [146]:
test_rel

Unnamed: 0,relevant_sentences,rel_sent_count,matched_rel,rel_match_all,company_label,company_index,sent_index,rel_match_sent_count,label
0,"In 2021, 9.6% of our purchased electricity cam...",11,"Looking toward the future, we have set climate...","Looking toward the future, we have set climate...",EliLilly,0001,0041,40,rel
1,A large portion of this renewable electricity ...,24,A large portion of this renewable electricity ...,A large portion of this renewable electricity ...,EliLilly,0001,0042,24,rel
2,"From 2012 to 2020, we achieved a 26% reduction...",12,"From 2012 to 2020, we achieved a 26% reduction...","From 2012 to 2020, we achieved a 26% reduction...",EliLilly,0001,0046,12,rel
3,"In 2021, we achieved a 9% absolute emissions r...",11,"In 2021, we achieved a 9% absolute emissions r...","In 2021, we achieved a 9% absolute emissions r...",EliLilly,0001,0047,11,rel
4,This reduction was partially driven by energy ...,27,use of renewable electricity including the sta...,use of renewable electricity including the sta...,EliLilly,0001,0049,13,rel
...,...,...,...,...,...,...,...,...,...
1079,"2017 Retired and demolished 250 MW of coal, re...",23,"2017 Retired and demolished 250 MW of coal, re...","2017 Retired and demolished 250 MW of coal, re...",NextEraEnergyZeroCarbonBlueprint,0072,0052,23,rel
1080,2018 Retired and demolished 636 MW of coal and...,38,2018 Retired and demolished 636 MW of coal an...,2018 Retired and demolished 636 MW of coal an...,NextEraEnergyZeroCarbonBlueprint,0072,0053,38,rel
1081,"2019 Aquired Gulf Power, which added 1,750 MW ...",27,"2019 Aquired Gulf Power, which added 1,750 MW ...","2019 Aquired Gulf Power, which added 1,750 MW ...",NextEraEnergyZeroCarbonBlueprint,0072,0054,27,rel
1082,2020 Retired 615 MW of nuclear and 330 MW of c...,45,2020 Retired 615 MW of nuclear and 330 MW of ...,2020 Retired 615 MW of nuclear and 330 MW of ...,NextEraEnergyZeroCarbonBlueprint,0072,0055,45,rel


### total_all

In [131]:
total_all = total_all.reset_index()
total_all = total_all.drop(columns = ['index'])

### total_all --> test_irr

In [132]:
test_irr = total_all.merge(test_merge_clean, how = 'outer', on = 'all_sentences', indicator = True)

In [133]:
test_irr = test_irr[test_irr.relevant_sentences.isnull()]

In [134]:
test_irr['label'] = 'irr'

In [135]:
test_irr = test_irr.drop(['relevant_sentences', 'sent_count_x', 'company_label_y', 'company_index_y', '_merge', 'sent_index_y', 'sent_count_y'], axis = 1)

In [136]:
test_irr = test_irr.reset_index().drop('index', axis = 1 )

In [137]:
test_irr = test_irr.rename(columns = {'company_label_x': 'company_label', 'sent_index_x': 'sent_index', 
                                      'all_sentences' : 'irrelevant_sentences'})

In [138]:
test_irr

Unnamed: 0,irrelevant_sentences,company_label,company_index,sent_index,sent_count,matched_rel,label
0,Environmental | 2021 ESG Report | Eli Lilly an...,EliLilly,0001,0002,10,,irr
1,"Our Environmental Approach Our purpose, to mak...",EliLilly,0001,0004,18,,irr
2,Making medicines requires the use of valuable ...,EliLilly,0001,0005,14,,irr
3,We’re committed to reducing our environmental ...,EliLilly,0001,0006,18,,irr
4,"To track our progress, we measure and manage e...",EliLilly,0001,0007,27,,irr
...,...,...,...,...,...,...,...
100291,→ FPL’s four nuclear units continue to operat...,NextEraEnergyZeroCarbonBlueprint,0072,0216,10,,irr
100292,Technology We assume that: → FPL’s gas plants...,NextEraEnergyZeroCarbonBlueprint,0072,0217,28,,irr
100293,→ NextEra Energy Resources would invest in el...,NextEraEnergyZeroCarbonBlueprint,0072,0218,26,,irr
100294,→ All non-FPL fossil generation assets would ...,NextEraEnergyZeroCarbonBlueprint,0072,0219,14,,irr


In [139]:
total_all.to_csv('all_with_index.csv', encoding = 'utf-8-sig')

In [147]:
test_rel.columns

Index(['relevant_sentences', 'rel_sent_count', 'matched_rel', 'rel_match_all',
       'company_label', 'company_index', 'sent_index', 'rel_match_sent_count',
       'label'],
      dtype='object')

In [148]:
test_rel = test_rel[['relevant_sentences', 'rel_sent_count', 'matched_rel', 'rel_match_all', 'rel_match_sent_count', 'company_label', 'company_index', 'sent_index', 'label']]

In [149]:
test_rel.to_csv('rel_with_index.csv', encoding = 'utf-8-sig')

In [142]:
test_irr.to_csv('irr_with_index.csv', encoding = 'utf-8-sig')

In [100]:
len(test_rel.rel_match_all.unique())

1084

## CHECK THE STATS: numbers are not adding up

In [101]:
print(' Irrelevant Sentences:', len(test_irr), '\n', 'Relevant Sentences from Total Sentences (935 Unique Relevant Sentences):', len(test_rel), '\n', 'Total Sentences:', len(total_all))

 Irrelevant Sentences: 100296 
 Relevant Sentences from Total Sentences (935 Unique Relevant Sentences): 1084 
 Total Sentences: 101380


In [102]:
test_rel.head()

Unnamed: 0,relevant_sentences,rel_sent_count,rel_match_all,rel_match_sent_count,company_label,company_index,sent_index,label
0,"In 2021, 9.6% of our purchased electricity cam...",11,"Looking toward the future, we have set climate...",40,EliLilly,1,41,rel
1,A large portion of this renewable electricity ...,24,A large portion of this renewable electricity ...,24,EliLilly,1,42,rel
2,"From 2012 to 2020, we achieved a 26% reduction...",12,"From 2012 to 2020, we achieved a 26% reduction...",12,EliLilly,1,46,rel
3,"In 2021, we achieved a 9% absolute emissions r...",11,"In 2021, we achieved a 9% absolute emissions r...",11,EliLilly,1,47,rel
4,This reduction was partially driven by energy ...,27,use of renewable electricity including the sta...,13,EliLilly,1,49,rel


In [103]:
total_all.head()

Unnamed: 0,all_sentences,company_label,company_index,sent_index,sent_count
0,Environmental | 2021 ESG Report | Eli Lilly an...,EliLilly,1,2,10
1,"Our Environmental Approach Our purpose, to mak...",EliLilly,1,4,18
2,Making medicines requires the use of valuable ...,EliLilly,1,5,14
3,We’re committed to reducing our environmental ...,EliLilly,1,6,18
4,"To track our progress, we measure and manage e...",EliLilly,1,7,27


In [104]:
test_irr.head()

Unnamed: 0,irrelevant_sentences,company_label,company_index,sent_index,sent_count,label
0,Environmental | 2021 ESG Report | Eli Lilly an...,EliLilly,1,2,10,irr
1,"Our Environmental Approach Our purpose, to mak...",EliLilly,1,4,18,irr
2,Making medicines requires the use of valuable ...,EliLilly,1,5,14,irr
3,We’re committed to reducing our environmental ...,EliLilly,1,6,18,irr
4,"To track our progress, we measure and manage e...",EliLilly,1,7,27,irr


## C) TODO: Update Sentences Statistics

In [105]:
# rel_from_all = rel.rename(columns = {'sentences': 'relevant_sentences'})

In [106]:
# irr_from_all = irr.rename(columns = {'sentences': 'irrelevant_sentences'})

In [107]:
total_relevant_stat = test_rel.groupby('company_label', sort = False).count()[['relevant_sentences']]

In [108]:
total_irrelevant_stat = test_irr.groupby('company_label', sort = False).count()[['irrelevant_sentences']]

In [109]:
total_stat_final = total_irrelevant_stat.merge(total_relevant_stat, how = 'left', on = 'company_label', sort = False)

In [110]:
total_stat_final['relevant_sentences'] = total_stat_final['relevant_sentences'].fillna(0)

In [111]:
total_stat_final['relevant_sentences'] = total_stat_final['relevant_sentences'].astype(int)

In [112]:
total_stat_final['rel/total percentages'] = round((total_stat_final['relevant_sentences'] / (total_stat_final['relevant_sentences'] + total_stat_final['irrelevant_sentences'])) * 100, 2)

In [113]:
total_stat_final.to_csv('total_stat_final.csv')

In [114]:
total_stat_final.sum()

irrelevant_sentences     100296.00
relevant_sentences         1084.00
rel/total percentages       113.75
dtype: float64

### TODO: Save the sentence dictionary into json file

In [383]:
# for key, sent in zip(total_all.key, total_all.all_sentences):
#     print(key, sent)

In [382]:
# sent_dict = {}

# for key, sent in zip(all_sent.key, all_sent.sentences):
#     sent_dict[key] = sent

In [376]:
# import json

# with open('sentence_dict.json', 'w') as fp:
#     json.dump(sent_dict, fp, sort_keys=True, indent=4)

In [377]:
# with open('sentence_dict.json', 'r') as fp:
#     data = json.load(fp)

In [None]:
# import os
# import shutil
# import glob

In [None]:
# path = '/Users/tylerryoo/t3/extracted_sentences/notebooks/final_extracted_statistics_notebooks'
# files = glob.glob(path + "/*.csv")

In [None]:
# files.append('/Users/tylerryoo/t3/extracted_sentences/notebooks/final_extracted_statistics_notebooks/sentence_dict.json')

In [None]:
# for file in files:
    
#     if file == '/Users/tylerryoo/t3/extracted_sentences/notebooks/final_extracted_statistics_notebooks/string_matched.csv':
#         continue

#     filename = file.split('/')[-1]
    
#     target = (r'/Users/tylerryoo/t3/relevant_irrelevant_sentences_labeled_final/' + filename)

#     shutil.move(file, target)

### Archive

In [None]:
# Original String Matching Algorithm:
# string comparison to separate relevant and irrelevant sentences from all sentences
# matrel = []
# matlabel = []
# orgrel = []
# track = 0
# for comp in company_list:
#     total_relevant_comp = total_relevant[total_relevant.company_label == comp]
#     for i, j in zip(total_relevant_comp.relevant_sentences, total_relevant_comp.company_index):
#         total_all_comp = total_all[total_all.company_label == comp]
#         # extract relevant sentences from all
#         val = process.extractOne(i, total_all_comp.all_sentences, scorer = fuzz.WRatio, score_cutoff = 90)
#         val_2 = process.extractOne(i, total_all_comp.all_sentences, scorer = fuzz.partial_ratio, score_cutoff = 90)
#         test_val = process.extract(i, total_all_comp.all_sentences, scorer = fuzz.WRatio, score_cutoff = 90)
#         test_val_2 = process.extract(i, total_all_comp.all_sentences, scorer = fuzz.partial_ratio, score_cutoff = 90)
#         print()
#         print('TEST VAL:', test_val)
#         print('TEST VAL_2:', test_val_2)
#         print()
#         if (val is not None) and (val_2 is not None): 
#             print()
#             print("PRINT ORG: ", i)
#             print('PRINT BOTH:', val[0])
#             print('PRINT BOTH:', val_2[0])
#             print()
#             if len(val[0]) >= len(val_2[0]):
#                 matrel.append(val)
#                 print('choose value 1:', val)
#             elif len(val[0]) < len(val_2[0]):
#                 matrel.append(val_2)
#                 print('choose value 2:', val_2)
#             else:
#                 continue
#         else: 
#             matrel.append(None)
#         matlabel.append(j)
#         orgrel.append(i)
#         print(track, end = " ")
#         print()
#         track +=1