<div class="alert alert-block alert-info">
    This script <b> creates a <code>HIGHLIGHTED_PRODUCTS</code> list, which is used later for plotting</b>.
    <hr> 
    Note: <br>
    <i><b>Input file(s)' name(s) and metadata</b></i> (if available) are <b>printed out (below 👇🏼) in 'read data to df' section.</b>

In [1]:
# %env
# %who_ls
# %who
# %who int
# %pinfo <var name>

# Imports

In [2]:
%config IPCompleter.use_jedi = False # disable jedi autocompleter (https://stackoverflow.com/a/65734178/14485040)

import project_path  # makes possible the access to `src` directory using relative path
from src.data import filter_dataframe  # , internal_funcs
from src.utils import explore_dir, make_readme_info
from src.utils import read_excel_to_pandas as r_excel
from src.utils import set_outputs_dir
from src.utils import write_pandas_to_excel as w_excel

%run init_nb.ipynb

In [3]:
pd.options.display.max_columns = None

# INPUTS: Identify file(s) and read data to df

In [4]:
# Explore the directory to find the file(s)
inputs_dir, files_list = explore_dir(
    path_to_dir=r"..\data\interim", file_extension="xlsx", print_files_list=True
)

['CPC33to36_other_chemicals.xlsx',
 'data-extended-added-transgression-pbs.xlsx',
 'data-extended-glo-markets-and-fu-kg.xlsx',
 'data-filtered-cpc-33to36-divisions.xlsx',
 'data-full-only-in-pubchem.xlsx',
 'data-regrouped-cpc-divisions-into-3-categories.xlsx',
 'df_chemproperties.xlsx',
 'extended-data-chosen-methods-metadata-pubchem-properties.xlsx',
 'highlighted-products.xlsx',
 'lcia-results-from-sp910-combined.xlsx',
 'list-chemicals-not-shown-on-fig-prices-vs-impacts.xlsx',
 'list-chemicals-not-shown-on-fig3.xlsx',
 'list-outliers.xlsx',
 'mapped-lcia-results.xlsx',
 'pubchem-properties.xlsx',
 'raw-data-chosen-lcia-methods-and-metadata.xlsx',
 'temp-df_base_full_wCAS.xlsx',
 'temp-df_base_full_wCAS_woOutliersRMDk9a5.xlsx',
 'temp-dict_fullMethods.xlsx',
 'to-check-regrouped-data-other-chemicals-category.xlsx']


In [5]:
# Read data
df_base_full = r_excel(inputs_dir, "data-extended-added-transgression-pbs.xlsx")
print(
    "df_base_full".ljust(40, "."), f"{df_base_full.shape}\n".rjust(13, "."),
)

# Get list of LCIA methods, TL PBs and list of metadata
METHODS = r_excel(
    inputs_dir,
    "data-extended-added-transgression-pbs.xlsx",
    sheets="METHODS",
    show_readme=False,
)["METHODS"].to_list()

TL_METHODS = r_excel(
    inputs_dir,
    "data-extended-added-transgression-pbs.xlsx",
    sheets="TL_METHODS",
    show_readme=False,
)["TL_METHODS"].to_list()

METADATA = r_excel(
    inputs_dir,
    "data-extended-added-transgression-pbs.xlsx",
    sheets="METADATA",
    show_readme=False,
)["METADATA"].to_list()


===> Trying to load 'readme' data... ===
File: data-extended-added-transgression-pbs.xlsx from
C:\Users\ViteksPC\Documents\00-ETH_projects\17-AESA_ecoinvent_chemicals\notebooks\0.50-vt-calculate-transgression-levels.ipynb
Generated on 2021-08-04 (Wednesday), 17:15:02 by Tulus, V.
Includes:
<<<
Sheet1: Extended dataframe, which also includes 9 transgression level (TL) PBs.
METADATA: list of relevant metadata used in Sheet1.
METHODS: list of LCIA methods used in Sheet1.
TL_METHODS: list of TL in PBs used in Sheet1.
[METHODS + METADATA + TL_METHODS have to be the only column labels in Sheet1]
>>>

df_base_full............................ ...(668, 48)



# Operations
<div class="alert alert-block alert-info">
created: <code>HIGHLIGHTED_PRODUCTS</code> list
</div>

## Research possible highlighted products

In [6]:
lst_known_chemicals = [
    "Toluene",
    "Xylene",
    # Javier's list below
    "Liquefied petroleum gas",  # ok
    "Petrol",  # "Gasoline", # ok
    "Diesel",  # ok
    "Kerosene",  # ok
    "Ethylene",  # ok
    "Propylene",  # ok
    "Benzene",  # ok
    "Synthetic gas",  # FU 1m3
    "Ammonia, liquid",  # ok
    "Methanol",  # ok
    "Sulfuric acid",  # ok
    "Chlorine",  # ok
    "Acetic acid",  # ok
    "Formaldehyde",  # ok
    "Urea",  # ok
    "Ethylene oxide",  # ok
    "Acrylonitrile",  # ok
    "Acetaldehyde",  # ok
    "Polyethylene",  # ok
    "Polypropylene",  # ok
    "Polyvinylchloride",  # ok
    "Hydrogen",  # ok
]

for item in lst_known_chemicals:
    print("Looking for " + item)
    filter_dataframe(
        df_in=df_base_full,
        col_name="referenceProduct",
        filter_in=[item],
        print_unique=True,
    )

Looking for Toluene
List of unique items matching your request:
		 ['Toluene, liquid', 'Toluene diisocyanate'] 

Looking for Xylene
List of unique items matching your request:
		 ['Xylene'] 

Looking for Liquefied petroleum gas
List of unique items matching your request:
		 ['Liquefied petroleum gas'] 

Looking for Petrol
List of unique items matching your request:
		 ['Petrol, unleaded', 'Petrol, 5% ethanol by volume from biomass', 'Petrol, 4% ETBE additive by volume, with ethanol from biomass', 'Petrol, low-sulfur', 'Petroleum coke', 'Petrol, two-stroke blend', 'Petrol, 15% ETBE additive by volume, with ethanol from biomass'] 

Looking for Diesel
List of unique items matching your request:
		 ['Diesel', 'Diesel, low-sulfur'] 

Looking for Kerosene
List of unique items matching your request:
		 ['Kerosene'] 

Looking for Ethylene
List of unique items matching your request:
		 ['Ethylene, pipeline system', 'Ethylene vinyl acetate copolymer', 'Ethylene glycol', 'Ethylene bromide', 'Ethy

## Selected products

In [7]:
# Exact names only!

HIGHLIGHTED_PRODUCTS = [
    "Sulfuric acid",
    "Kerosene",
    "Diesel, low-sulfur",  # or "Diesel",
    "Liquefied petroleum gas",
    "Methanol",
    "Petrol, low-sulfur",
    "Formaldehyde",
    "Chlorine, liquid",
    "Ethylene, average",
    "Propylene",
    "Toluene, liquid",
    "Acetic acid, without water, in 98% solution state",
    "Acetaldehyde",
    "Polyethylene, high density, granulate",
    "Benzene",
    "Ammonia, liquid",
    "Polypropylene, granulate",
    "Ethylene oxide",
    "Polyvinylchloride, bulk polymerised",
    "Hydrogen, liquid",
    "Acrylonitrile",
    "Urea, as N",
    "1-propanol",
    "Acetylene",
    "Chlorotoluron",
    "Methylene diphenyl diisocyanate",
    "Ammonium nitrate, as N",
    "Pyridine",
    "Nylon 6-6",
    "Glyphosate",
    "Para-phenylene diamine",
    "Fluorine, liquid",
    "Adipic acid",
    "Xylene",
]

# replaced internal_funcs.find_chemicals(df_base_full,HIGHLIGHTED_PRODUCTS,"referenceProduct")
selected = filter_dataframe(
    df_in=df_base_full,
    col_name="referenceProduct",
    filter_in=HIGHLIGHTED_PRODUCTS,
    exact_match=True,
)[
    ["Activity"]
    + ["referenceProduct"]
    + ["geo"]
    #     + ["category"]
    + ["category_regrouped"]
    + ["referenceProduct_CPCclass"]
    #     + ["referenceProduct_prodVolume"]
    + METHODS[:1]  # ["('IPCC 2013 GWP 100a V1.03', 'IPCC GWP 100a', 'kg CO2 eq')"]
    + TL_METHODS[:1]
    #     + ["complexity"]
    + ["MF"]
    #     lst_metadata
    #     + lst_methods
]

selected[selected.geo == "GLO"].sort_values(
    by=TL_METHODS[0],  # "('IPCC 2013 GWP 100a V1.03', 'IPCC GWP 100a', 'kg CO2 eq')"
    ascending=False
)

Unnamed: 0,Activity,referenceProduct,geo,category_regrouped,referenceProduct_CPCclass,"('IPCC 2013 GWP 100a V1.03', 'IPCC GWP 100a', 'kg CO2 eq')","(epc) TL in ('PBs-LCIA (baseline) V0.72', 'Climate change - CO2 concentration', 'ppm')",MF
82,"Ammonium nitrate, as N {GLO}| market for | APO...","Ammonium nitrate, as N",GLO,Other chemical,34613: Ammonium nitrate,7.993117,133.582556,H4N2O3
413,"Urea, as N {GLO}| market for | APOS, S","Urea, as N",GLO,Other chemical,34611: Urea,3.354824,131.555473,CH4N2O
516,"Chlorine, liquid, combined to GLO market","Chlorine, liquid",GLO,Inorganic chemical,34231: Chemical elements n.e.c.; inorganic aci...,1.015311,120.745094,Cl2
493,"Ammonia, liquid, combined to GLO market","Ammonia, liquid",GLO,Other chemical,3465: Other fertilizers,2.109609,109.192345,H3N
267,"Nylon 6-6 {GLO}| market for | APOS, S",Nylon 6-6,GLO,Organic chemical,347: Plastics in primary forms,8.336883,101.197719,
43,"1-propanol {GLO}| market for | APOS, S",1-propanol,GLO,Organic chemical,"34139: Other alcohols, phenols, phenol-alcohol...",4.525119,93.998992,C3H8O
487,"Acetylene, combined to GLO market",Acetylene,GLO,Organic chemical,341: Basic organic chemicals,5.670185,90.54059,C2H2
623,"Sulfuric acid, combined to GLO market",Sulfuric acid,GLO,Inorganic chemical,34231: Chemical elements n.e.c.; inorganic aci...,0.150888,81.342372,H2O4S
72,"Adipic acid {GLO}| market for | APOS, S",Adipic acid,GLO,Organic chemical,"34140: Carboxylic acids and their anhydrides, ...",13.733554,77.62022,C6H10O4
69,"Acrylonitrile {GLO}| market for | APOS, S",Acrylonitrile,GLO,Organic chemical,34150: Amine-function compounds; oxygen-functi...,3.018312,71.86261,C3H3N


In [8]:
# Make df of HIGHLIGHTED_PRODUCTS for later export
df_highlighted_products = pd.DataFrame(sorted(HIGHLIGHTED_PRODUCTS), columns=["HIGHLIGHTED_PRODUCTS"])
# df_highlighted_products

# OUTPUTS: Export data to excel

In [9]:
%%time

# Set output directory
outputs_dir = set_outputs_dir(use_default=True)  # default `..\data\interim`

## Export dataframe to excel
excelName = "highlighted-products.xlsx"

df_readme = make_readme_info(
    excelName,
    "HIGHLIGHTED_PRODUCTS: Dataframe with highlighted products used for insights and plotting (alphabetically sorted)",
)

w_excel(
    path_to_file=outputs_dir,
    filename=excelName,
    dict_data_to_write={"HIGHLIGHTED_PRODUCTS": df_highlighted_products},
    readme_info=("readme", df_readme),
    #     ExcelWriter_kwargs={"engine": "openpyxl", "encoding": "UTF-8"}
    #     startrow=0
)

File: highlighted-products.xlsx successfully created in 
C:\Users\ViteksPC\Documents\00-ETH_projects\17-AESA_ecoinvent_chemicals\data\interim
Wall time: 192 ms
