<div class="alert alert-block alert-info">
This script maps <b>LCIA results exported from SP910</b> against previously mapped <b>DB processes/activities SP910 and Ecoinvent v3.5 APOS</b>  
    <hr> 
    Note: <br>
    <i><b>Input file(s)' name(s) and metadata</b></i> (if available) are <b>printed out (below 👇🏼) in 'read data to df' section.</b>
</div>

# Imports

In [1]:
%config IPCompleter.use_jedi = False # disable jedi autocompleter (https://stackoverflow.com/a/65734178/14485040)

import project_path  # makes possible the access to `src` directory using relative path
from src.utils import explore_dir, make_readme_info
from src.utils import read_excel_to_pandas as r_excel
from src.utils import set_outputs_dir
from src.utils import write_pandas_to_excel as w_excel

%run init_nb.ipynb

# INPUTS: Identify file(s) and read data to df

In [2]:
# Explore the directory to find the file(s)

# --------------- LCIAresults file ----------------
inputs_dir_1, files_list_1 = explore_dir(
    path_to_dir=r"..\data\lcaforsac", file_extension="xlsx", print_files_list=True,
)
print("--> nº of found files in", inputs_dir_1, ": ", len(files_list_1), "\n")

# --------------- master mapped db file ----------------
inputs_dir_2, files_list_2 = explore_dir(
    path_to_dir=r"..\..\18-BW2Hub\02.Code\Outputs",
    file_extension="xlsx",
    print_files_list=True,
)
print("--> nº of found files in", inputs_dir_2, ": ", len(files_list_2))

['lcia-results-from-sp910-combined.xlsx']
--> nº of found files in C:\Users\ViteksPC\Documents\00-ETH_projects\17-AESA_ecoinvent_chemicals\data\lcaforsac :  1 

['ecoinvent_3.5_apos_activities_db-extracted.xlsx',
 'mapped_processes_SP910-EI35APOS.xlsx',
 'RybergMethod_CFs_PB-LCIA_ecoinvent_linkage_V2.037_V2037-extracted.xlsx',
 'SP910_Impact_CFs_PBs-LCIA_(baseline)_V072-extracted.xlsx',
 'SP910_Impact_CFs_ReCiPe_Midpoint_(H)_V113-extracted.xlsx',
 'SP910_processes_db-extracted.xlsx',
 'SP910_substances-extracted.xlsx',
 'subcompartments_mapped.xlsx']
--> nº of found files in C:\Users\ViteksPC\Documents\00-ETH_projects\18-BW2Hub\02.Code\Outputs :  8


In [3]:
%%time
# Process raw data

# LCIAs results from SimaPro
df_lcias_raw = r_excel(
    inputs_dir_1, "lcia-results-from-sp910-combined.xlsx", sheets="Sheet1"
)

print(
    "df of LCIAs from SimaPro (raw) ".ljust(40, "."),
    f" {df_lcias_raw.shape}".rjust(13, "."),
)

# master mapped db file
df_master_mapped_db_raw = r_excel(
    inputs_dir_2, "mapped_processes_SP910-EI35APOS.xlsx", sheets="Sheet1"
)
print(
    "df of master mapped db (raw) ".ljust(40, "."),
    f" {df_master_mapped_db_raw.shape}".rjust(13, "."),
)


===> Trying to load 'readme' data... ===
File: lcia-results-from-sp910-combined.xlsx from
C:\Users\ViteksPC\Documents\00-ETH_projects\17-AESA_ecoinvent_chemicals\notebooks\0.01-vt-parse-lcia-results-from-sp910.ipynb
Generated on 2021-12-03 (Friday), 16:30:50 by Tulus, V.
Includes:
<<<
Sheet1: Multiple LCIA methods results (per category) for ALL chemical markets from SimaPro910. 
df_lcia_labels: unique names of the LCIA methods used in Sheet1.
>>>

df of LCIAs from SimaPro (raw) ......... ... (947, 64)

===> Trying to load 'readme' data... ===
File: mapped_processes_SP910-EI35APOS.xlsx from
C:\Users\ViteksPC\Documents\00-ETH_projects\18-BW2Hub\02.Code\05.map_processes_SP910-EI35APOS.ipynb
Generated on 2021-05-12 (Wednesday), 16:44:58 by Tulus, V.
Includes:
<<<
Mapped SimaPro910 (with EI35-APOS) processes against Ecoinvent v3.5 APOS activities. Note: for now only 18 activities from Ecoinvent remain unmapped after automatic mapping. Manual refinement is needed.
>>>

df of master mapped d

In [4]:
# Get unique names of the LCIA methods from 'LCIAresultsSP910-combined.xlsx'
# This df is copied to the new Excel file degerated below.

df_lcia_labels = r_excel(
    inputs_dir_1, "lcia-results-from-sp910-combined.xlsx", sheets="df_lcia_labels", show_readme=False,
)
df_lcia_labels

Unnamed: 0,Method
0,ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H
1,PBs-LCIA (baseline) V0.72
2,PBs - Alternative: EF - LANCA V0.70
3,ReCiPe 2016 Endpoint (H) V1.03 / World (2010) H/A
4,Cumulative Energy Demand V1.11 / Cumulative en...
5,IPCC 2013 GWP 100a V1.03
6,PBs-LCIA V0.71 V0.71


# Operations

## Filter/adjust data

In [6]:
df_lcias = df_lcias_raw.copy() # make a deepcopy of the original df

# Substitute "APOS, U - copy" in the original df with "APOS, S", according to the last added markets
lst = []
aposRegex = re.compile((r'APOS, (.*)')) # complied regular expression of "APOS, U - copy" to be replaced with "APOS, S"
for item in df_lcias.Activity:
    lst.append(aposRegex.sub(r'APOS, S', item))
df_lcias.Activity = lst

shape_lcias = df_lcias.shape
print('df of LCIAs from SimaPro '.ljust(40,'.'), f' {shape_lcias}'.rjust(13, '.'))
df_lcias.sample(2)

df of LCIAs from SimaPro ............... ... (947, 64)


Unnamed: 0,wkbName,Activity,"('PBs-LCIA V0.71 V0.71', 'Climate change - CO2 concentration', 'ppm')","('PBs-LCIA V0.71 V0.71', 'Climate change - Energy imbalance', 'Wm-2')","('PBs-LCIA V0.71 V0.71', 'Stratospheric ozone depletion', 'DU')","('PBs-LCIA V0.71 V0.71', 'Ocean acidification', 'Omega Aragon')","('PBs-LCIA V0.71 V0.71', 'Biogeochemical flows - P', 'Tg P')","('PBs-LCIA V0.71 V0.71', 'Biogeochemical flows - N', 'Tg N')","('PBs-LCIA V0.71 V0.71', 'Land-system change - Global', '%')","('PBs-LCIA V0.71 V0.71', 'Freshwater use - Global', 'km3')",...,"('ReCiPe 2016 Endpoint (H) V1.03 / World (2010) H/A', 'Human health', 'DALY')","('ReCiPe 2016 Endpoint (H) V1.03 / World (2010) H/A', 'Ecosystems', 'species.yr')","('ReCiPe 2016 Endpoint (H) V1.03 / World (2010) H/A', 'Resources', 'USD2013')","('IPCC 2013 GWP 100a V1.03', 'IPCC GWP 100a', 'kg CO2 eq')","('Cumulative Energy Demand V1.11 / Cumulative energy demand', 'Non renewable, fossil', 'MJ')","('Cumulative Energy Demand V1.11 / Cumulative energy demand', 'Non-renewable, nuclear', 'MJ')","('Cumulative Energy Demand V1.11 / Cumulative energy demand', 'Non-renewable, biomass', 'MJ')","('Cumulative Energy Demand V1.11 / Cumulative energy demand', 'Renewable, biomass', 'MJ')","('Cumulative Energy Demand V1.11 / Cumulative energy demand', 'Renewable, wind, solar, geothe', 'MJ')","('Cumulative Energy Demand V1.11 / Cumulative energy demand', 'Renewable, water', 'MJ')"
537,Chemicals-Organic-Market,"Monoethanolamine {GLO}| market for | APOS, S",6.773613e-11,9.084333e-13,7.900254e-15,2.069843e-13,8.271784e-16,1.414589e-10,3.623812e-16,4.987075e-12,...,6e-06,1.214697e-08,0.541646,2.816783,66.056701,4.179401,0.000727,0.493512,0.271767,1.087382
80,Chemicals-Gases-Liquified-Market,"Nitrogen, liquid {RoW}| market for | APOS, S",1.121245e-11,1.539825e-13,2.254442e-15,3.42624e-14,2.7203700000000003e-17,1.756538e-14,4.5464950000000006e-17,9.750602e-13,...,1e-06,2.105013e-09,0.021529,0.458012,5.346593,0.690546,6.4e-05,0.044886,0.073176,0.38804


In [7]:
df_master_mapped_db = df_master_mapped_db_raw.copy() # make a deepcopy of the original df

# Substitute "APOS, U" in the original df with "APOS, S", since the LCIAs are calculated with S-system instead of U-unit 
lst = []
aposRegex = re.compile((r'APOS, (\w+)')) # complied regular expression of "APOS, U" to be replaced with "APOS, S"
for item in df_master_mapped_db.fullName_SimaPro:
    lst.append(aposRegex.sub(r'APOS, S', item))
df_master_mapped_db.fullName_SimaPro = lst

shape_master_mapped = df_master_mapped_db.shape
print('df of master mapped db'.ljust(40,'.'), f' {shape_master_mapped}'.rjust(13, '.'))

df_master_mapped_db.sample(2)

df of master mapped db.................. . (16027, 29)


Unnamed: 0,activity_comment,type,referenceProduct,shortName_geo,activityName_SP,fullName_SimaPro,unit,amount,allocation_percentage,wasteType,...,referenceProductUnit,referenceProduct_prodVolume,referenceProduct_prodVolumeComment,referenceProduct_price,referenceProduct_priceUnit,referenceProduct_priceComment,referenceProduct_casNumber,referenceProduct_CPCclass,activity_generalComment,sourceFilename
14162,Production volume: 1846800000 kgIncluded ac...,Waste treatment,Waste gypsum,Europe without Switzerland,"treatment of waste gypsum, inert material land...",Waste gypsum {Europe without Switzerland}| tre...,kg,1,(blank),All waste types,...,kg,1846800000.0,Estimated value. EU production of gypsum in 20...,0.0,EUR2005,0.0,13397-24-5,39: Wastes or scraps,,25264_6e4b1d54-7056-413d-a7f2-372927e05b2a_27a...
7167,"Rentz O., Karl U., Peter H. (2002). Ermittlung...",Products,"Heat, district or industrial, other than natur...",CZ,"treatment of coal gas, in power plant","Heat, district or industrial, other than natur...",MJ,1,100,not defined,...,MJ,416562900.0,,,,,,17300: Steam and hot water,"Rentz O., Karl U., Peter H. (2002). Ermittlung...",32300_84e48164-c191-4864-bbd1-a77dbe58daff_71e...


In [8]:
df_master_mapped_db.columns

Index(['activity_comment', 'type', 'referenceProduct', 'shortName_geo',
       'activityName_SP', 'fullName_SimaPro', 'unit', 'amount',
       'allocation_percentage', 'wasteType', 'category', 'inline_comment',
       'activityName_EI', 'geo', 'activity_ISICclass',
       'activity_ecoSpold01class', 'technologyLevel', 'referenceProductName',
       'referenceProductAmount', 'referenceProductUnit',
       'referenceProduct_prodVolume', 'referenceProduct_prodVolumeComment',
       'referenceProduct_price', 'referenceProduct_priceUnit',
       'referenceProduct_priceComment', 'referenceProduct_casNumber',
       'referenceProduct_CPCclass', 'activity_generalComment',
       'sourceFilename'],
      dtype='object')

## Mapping

In [9]:
# Merge df_lcias with df_master_mapped_db on "Activity" and "fullName_SimaPro"
df_merged = df_lcias.merge(df_master_mapped_db,
                                 how='left',
                                 left_on=['Activity'], 
                                 right_on=['fullName_SimaPro'],
                                 suffixes=('_LCIArslts', '_SimaPro')
                                )

shape_merged = df_merged.shape
# ----------------------------------

print(
    "|".rjust(8, " ")
    + "LCIAs results".center(15, " ")
    + "|"
    + "Master mapped db".center(18, " ")
)
print("".center(41, "-"))
print(
    "Total"
    + "|".rjust(3, " ")
    + f"{shape_lcias[0]}".center(15, " ")
    + "|"
    + f"{shape_master_mapped[0]}".center(18, " ")
)
print("".center(41, "-"))
print("Mapped |", f"{shape_merged[0]} items".center(30, " "))
print("".center(41, "-"))

print('Merged df '.ljust(15,'.'), f' {shape_merged}'.rjust(13, '.'))
df_merged.sample(3)

       | LCIAs results | Master mapped db 
-----------------------------------------
Total  |      947      |      16027       
-----------------------------------------
Mapped |           947 items           
-----------------------------------------
Merged df ..... ... (947, 93)


Unnamed: 0,wkbName,Activity,"('PBs-LCIA V0.71 V0.71', 'Climate change - CO2 concentration', 'ppm')","('PBs-LCIA V0.71 V0.71', 'Climate change - Energy imbalance', 'Wm-2')","('PBs-LCIA V0.71 V0.71', 'Stratospheric ozone depletion', 'DU')","('PBs-LCIA V0.71 V0.71', 'Ocean acidification', 'Omega Aragon')","('PBs-LCIA V0.71 V0.71', 'Biogeochemical flows - P', 'Tg P')","('PBs-LCIA V0.71 V0.71', 'Biogeochemical flows - N', 'Tg N')","('PBs-LCIA V0.71 V0.71', 'Land-system change - Global', '%')","('PBs-LCIA V0.71 V0.71', 'Freshwater use - Global', 'km3')",...,referenceProductUnit,referenceProduct_prodVolume,referenceProduct_prodVolumeComment,referenceProduct_price,referenceProduct_priceUnit,referenceProduct_priceComment,referenceProduct_casNumber,referenceProduct_CPCclass,activity_generalComment,sourceFilename
331,Chemicals-Organic-Market,"2-butanol {RER}| market for 2-butanol | APOS, S",9.510684e-11,1.264309e-12,7.606072e-15,2.906214e-13,6.67313e-16,1.378311e-13,2.928519e-16,1.794665e-11,...,kg,218750000.0,,0.546,EUR2005,Temporary price data. Calculated as 90% of pur...,78-92-2,"34139: Other alcohols, phenols, phenol-alcohol...",A regional market for Europe [RER] is motivate...,17071_ab64c2c0-344b-5338-a650-5ac7da58bc7a_dab...
594,Chemicals-Organic-Market,Sodium chloroacetate {GLO}| market for sodium ...,7.888528e-11,1.064015e-12,1.690498e-14,2.410521e-13,1.835611e-14,7.303715e-13,7.682507e-16,8.207565e-12,...,kg,347701100.0,,1.07,EUR2005,Calculated based on inputs: The price of the p...,,"34140: Carboxylic acids and their anhydrides, ...",The transport amounts are based on the default...,17251_4edccd1a-561a-4c16-8620-85521c086931_2ee...
724,Chemicals-Pesticides-Market,[sulfonyl]urea-compound {GLO}| market for | AP...,2.639814e-10,3.621065e-12,2.951666e-13,8.066384e-13,2.268922e-14,4.926419e-11,1.530126e-15,2.136596e-11,...,kg,24811470.0,,5.13,EUR2005,Calculated value based on data from United Nat...,,"34663: Herbicides, anti-sprouting products and...","In this market, expert judgement was used to d...",20598_273cd9e8-46e7-491f-a677-1f083bef8200_4c2...


### > Find unmatched activities and drop (if any)

    (if there are any NaNs in columns "on the right" in merge function
        e.g. 'fullName_SimaPro', or 'shortName_geo', 
    this means the activity was not matched, i.e. DOESN'T EXIST IN DF ON THE RIGHT)

In [10]:
print('CAUTION: These columns have at least one NaN entry:\n')

dict_nans = dict(zip(
                    list(df_merged.columns[df_merged.isnull().any()]),      # .isnull() exactly the same as .isna()
                    [col for col in df_merged.isnull().sum() if col != 0]
                    )
                )

pprint.pprint(dict_nans)

CAUTION: These columns have at least one NaN entry:

{'activityName_EI': 1,
 'activityName_SP': 1,
 'activity_ISICclass': 1,
 'activity_comment': 1,
 'activity_ecoSpold01class': 408,
 'activity_generalComment': 400,
 'allocation_percentage': 1,
 'amount': 1,
 'category': 1,
 'fullName_SimaPro': 1,
 'geo': 1,
 'inline_comment': 1,
 'referenceProduct': 1,
 'referenceProductAmount': 1,
 'referenceProductName': 1,
 'referenceProductUnit': 1,
 'referenceProduct_CPCclass': 2,
 'referenceProduct_casNumber': 517,
 'referenceProduct_price': 7,
 'referenceProduct_priceComment': 7,
 'referenceProduct_priceUnit': 7,
 'referenceProduct_prodVolume': 1,
 'referenceProduct_prodVolumeComment': 935,
 'shortName_geo': 1,
 'sourceFilename': 1,
 'technologyLevel': 265,
 'type': 1,
 'unit': 1,
 'wasteType': 1}


> <font color=red>'activityName_EI'</font> is a **required** field. MUST not have empty entries!

In [11]:
print('This is a list of activities from SimaPro not matched in Ecoinvent db:\n')
list(df_merged[df_merged.activityName_EI.isnull()].Activity)

This is a list of activities from SimaPro not matched in Ecoinvent db:



['Deinking emulsion, in paper production {GLO}| market for | APOS, S']

> (!) <span style='background:red'> WARNING:</span> Deinking emulsion, in paper production {GLO} is not in Ecoinvent database, only RoW and RER...<br>
> Drop it from df.<br>

In [12]:
df_merged = df_merged.dropna(
    axis=0, how="any", subset=["activityName_EI"], inplace=False
)

### > Check duplicates (if any)

In [13]:
# Unique items per column
print('nº of unique items per column:'.center(35))
print(''.center(35,'-'))
df_merged.nunique()

   nº of unique items per column:  
-----------------------------------


wkbName                                                                   18
Activity                                                                 946
('PBs-LCIA V0.71 V0.71', 'Climate change - CO2 concentration', 'ppm')    938
('PBs-LCIA V0.71 V0.71', 'Climate change - Energy imbalance', 'Wm-2')    943
('PBs-LCIA V0.71 V0.71', 'Stratospheric ozone depletion', 'DU')          939
                                                                        ... 
referenceProduct_priceComment                                            361
referenceProduct_casNumber                                               275
referenceProduct_CPCclass                                                116
activity_generalComment                                                  312
sourceFilename                                                           946
Length: 93, dtype: int64

In [14]:
# Check for duplicates

print('These are the "duplicated" items in column Activity:\n')
df_merged[df_merged.Activity.duplicated(False)]
# df_merged.loc[85,'activityName_Ecoinvent']

These are the "duplicated" items in column Activity:



Unnamed: 0,wkbName,Activity,"('PBs-LCIA V0.71 V0.71', 'Climate change - CO2 concentration', 'ppm')","('PBs-LCIA V0.71 V0.71', 'Climate change - Energy imbalance', 'Wm-2')","('PBs-LCIA V0.71 V0.71', 'Stratospheric ozone depletion', 'DU')","('PBs-LCIA V0.71 V0.71', 'Ocean acidification', 'Omega Aragon')","('PBs-LCIA V0.71 V0.71', 'Biogeochemical flows - P', 'Tg P')","('PBs-LCIA V0.71 V0.71', 'Biogeochemical flows - N', 'Tg N')","('PBs-LCIA V0.71 V0.71', 'Land-system change - Global', '%')","('PBs-LCIA V0.71 V0.71', 'Freshwater use - Global', 'km3')",...,referenceProductUnit,referenceProduct_prodVolume,referenceProduct_prodVolumeComment,referenceProduct_price,referenceProduct_priceUnit,referenceProduct_priceComment,referenceProduct_casNumber,referenceProduct_CPCclass,activity_generalComment,sourceFilename


## TEMPORTAL (inactive)

# OUTPUTS: Export data to excel

In [15]:
%%time

# Set output directory
outputs_dir = set_outputs_dir(use_default=False, rel_path_output=r"..\data\lcaforsac")  # default `..\data\interim`

## Export dataframe to excel
excelName = "mapped-lcia-results.xlsx"

df_readme = make_readme_info(
    excelName,
    "Sheet1: LCIA method results (per category) for ALL chemical markets from SimaPro910 "
    "mapped against metadata from Ecoinvent v3.5 APOS. "
    "\ndf_lcia_labels: unique names of the LCIA methods used in Sheet1.",
)

w_excel(
    path_to_file=outputs_dir,
    filename=excelName,
    dict_data_to_write={"Sheet1": df_merged, "df_lcia_labels": df_lcia_labels},
    readme_info=("readme", df_readme),
    ####         ExcelWriter_kwargs={"engine": "openpyxl", "encoding": "UTF-8"}
    #     startrow=0
)

File: mapped-lcia-results.xlsx successfully created in 
C:\Users\ViteksPC\Documents\00-ETH_projects\17-AESA_ecoinvent_chemicals\data\lcaforsac
Wall time: 2.54 s
