<div class="alert alert-block alert-info">
    This script <b> creates a <code>HIGHLIGHTED_PRODUCTS</code> list, which is used later for plotting</b>.
    <hr> 
    Note: <br>
    <i><b>Input file(s)' name(s) and metadata</b></i> (if available) are <b>printed out (below 👇🏼) in 'read data to df' section.</b>

In [None]:
# %env
# %who_ls
# %who
# %who int
# %pinfo <var name>

# Imports

In [None]:
%config IPCompleter.use_jedi = False # disable jedi autocompleter (https://stackoverflow.com/a/65734178/14485040)

import project_path  # makes possible the access to `src` directory using relative path
from src.data import filter_dataframe  # , internal_funcs
from src.utils import explore_dir, make_readme_info
from src.utils import read_excel_to_pandas as r_excel
from src.utils import set_outputs_dir
from src.utils import write_pandas_to_excel as w_excel

%run init_nb.ipynb

In [None]:
pd.options.display.max_columns = None

# INPUTS: Identify file(s) and read data to df

In [None]:
# Explore the directory to find the file(s)
inputs_dir, files_list = explore_dir(
    path_to_dir=r"..\data\interim", file_extension="xlsx", print_files_list=True
)

In [None]:
# Read data
df_base_full = r_excel(inputs_dir, "data-extended-added-transgression-pbs.xlsx")
print(
    "df_base_full".ljust(40, "."), f"{df_base_full.shape}\n".rjust(13, "."),
)

# Get list of LCIA methods, TL PBs and list of metadata
METHODS = r_excel(
    inputs_dir,
    "data-extended-added-transgression-pbs.xlsx",
    sheets="METHODS",
    show_readme=False,
)["METHODS"].to_list()

TL_METHODS = r_excel(
    inputs_dir,
    "data-extended-added-transgression-pbs.xlsx",
    sheets="TL_METHODS",
    show_readme=False,
)["TL_METHODS"].to_list()

METADATA = r_excel(
    inputs_dir,
    "data-extended-added-transgression-pbs.xlsx",
    sheets="METADATA",
    show_readme=False,
)["METADATA"].to_list()

# Operations
<div class="alert alert-block alert-info">
created: <code>HIGHLIGHTED_PRODUCTS</code> list
</div>

## Research possible highlighted products

In [None]:
lst_known_chemicals = [
    "Toluene",
    "Xylene",
    # Javier's list below
    "Liquefied petroleum gas",  # ok
    "Petrol",  # "Gasoline", # ok
    "Diesel",  # ok
    "Kerosene",  # ok
    "Ethylene",  # ok
    "Propylene",  # ok
    "Benzene",  # ok
    "Synthetic gas",  # FU 1m3
    "Ammonia, liquid",  # ok
    "Methanol",  # ok
    "Sulfuric acid",  # ok
    "Chlorine",  # ok
    "Acetic acid",  # ok
    "Formaldehyde",  # ok
    "Urea",  # ok
    "Ethylene oxide",  # ok
    "Acrylonitrile",  # ok
    "Acetaldehyde",  # ok
    "Polyethylene",  # ok
    "Polypropylene",  # ok
    "Polyvinylchloride",  # ok
    "Hydrogen",  # ok
]

for item in lst_known_chemicals:
    print("Looking for " + item)
    filter_dataframe(
        df_in=df_base_full,
        col_name="referenceProduct",
        filter_in=[item],
        print_unique=True,
    )

## Selected products

In [None]:
# Exact names only!

HIGHLIGHTED_PRODUCTS = [
    "Sulfuric acid",
    "Kerosene",
    "Diesel, low-sulfur",  # or "Diesel",
    "Liquefied petroleum gas",
    "Methanol",
    "Petrol, low-sulfur",
    "Formaldehyde",
    "Chlorine, liquid",
    "Ethylene, average",
    "Propylene",
    "Toluene, liquid",
    "Acetic acid, without water, in 98% solution state",
    "Acetaldehyde",
    "Polyethylene, high density, granulate",
    "Benzene",
    "Ammonia, liquid",
    "Polypropylene, granulate",
    "Ethylene oxide",
    "Polyvinylchloride, bulk polymerised",
    "Hydrogen, liquid",
    "Acrylonitrile",
    "Urea, as N",
    "1-propanol",
    "Acetylene",
    "Chlorotoluron",
    "Methylene diphenyl diisocyanate",
    "Ammonium nitrate, as N",
    "Pyridine",
    "Nylon 6-6",
    "Glyphosate",
    "Para-phenylene diamine",
    "Fluorine, liquid",
    "Adipic acid",
    "Xylene",
]

# replaced internal_funcs.find_chemicals(df_base_full,HIGHLIGHTED_PRODUCTS,"referenceProduct")
selected = filter_dataframe(
    df_in=df_base_full,
    col_name="referenceProduct",
    filter_in=HIGHLIGHTED_PRODUCTS,
    exact_match=True,
)[
    ["Activity"]
    + ["referenceProduct"]
    + ["geo"]
    #     + ["category"]
    + ["category_regrouped"]
    + ["referenceProduct_CPCclass"]
    #     + ["referenceProduct_prodVolume"]
    + METHODS[:1]  # ["('IPCC 2013 GWP 100a V1.03', 'IPCC GWP 100a', 'kg CO2 eq')"]
    + TL_METHODS[:1]
    #     + ["complexity"]
    + ["MF"]
    #     lst_metadata
    #     + lst_methods
]

selected[selected.geo == "GLO"].sort_values(
    by=TL_METHODS[0],  # "('IPCC 2013 GWP 100a V1.03', 'IPCC GWP 100a', 'kg CO2 eq')"
    ascending=False
)

In [None]:
# Make df of HIGHLIGHTED_PRODUCTS for later export
df_highlighted_products = pd.DataFrame(sorted(HIGHLIGHTED_PRODUCTS), columns=["HIGHLIGHTED_PRODUCTS"])
# df_highlighted_products

# OUTPUTS: Export data to excel

In [None]:
%%time

# Set output directory
outputs_dir = set_outputs_dir(use_default=True)  # default `..\data\interim`

## Export dataframe to excel
excelName = "highlighted-products.xlsx"

df_readme = make_readme_info(
    excelName,
    "HIGHLIGHTED_PRODUCTS: Dataframe with highlighted products used for insights and plotting (alphabetically sorted)",
)

w_excel(
    path_to_file=outputs_dir,
    filename=excelName,
    dict_data_to_write={"HIGHLIGHTED_PRODUCTS": df_highlighted_products},
    readme_info=("readme", df_readme),
    #     ExcelWriter_kwargs={"engine": "openpyxl", "encoding": "UTF-8"}
    #     startrow=0
)