<div class="alert alert-block alert-info">
This script <b>extends the data by adding properties extracted from PubChem</b>.
    <hr> 
    Note: <br>
    <i><b>Input file(s)' name(s) and metadata</b></i> (if available) are <b>printed out (below 👇🏼) in 'read data to df' section.</b>
</div>

In [1]:
# %env
# %who_ls
# %who
# %who int
# %pinfo <var name>

# Imports

In [1]:
%config IPCompleter.use_jedi = False # disable jedi autocompleter (https://stackoverflow.com/a/65734178/14485040)

import project_path  # makes possible the access to `src` directory using relative path
from src.data import filter_dataframe, internal_funcs
from src.utils import explore_dir, make_readme_info
from src.utils import read_excel_to_pandas as r_excel
from src.utils import set_outputs_dir
from src.utils import write_pandas_to_excel as w_excel

%run init_nb.ipynb

# INPUTS: Identify file(s) and read data to df

In [2]:
# Explore the directory to find the file(s)
inputs_dir, files_list = explore_dir(
    path_to_dir=r"..\data\lcaforsac", file_extension="xlsx", print_files_list=True
)

['lcia-results-from-sp910-combined.xlsx',
 'mapped-lcia-results.xlsx',
 'raw-data-chosen-lcia-methods-and-metadata.xlsx']


In [3]:
# Read data
df_raw_data = r_excel(inputs_dir, "raw-data-chosen-lcia-methods-and-metadata.xlsx")
print(
    "df of raw data".ljust(40, "."),
    f"{df_raw_data.shape}\n".rjust(13, "."),
)

# Get list of LCIA methods and list of metadata
METHODS = r_excel(
    inputs_dir, "raw-data-chosen-lcia-methods-and-metadata.xlsx", sheets="METHODS", show_readme=False,
)["METHODS"].to_list()

METADATA = r_excel(
    inputs_dir, "raw-data-chosen-lcia-methods-and-metadata.xlsx", sheets="METADATA", show_readme=False,
)["METADATA"].to_list()

# Read PubChem properties
df_pubchem_prop = r_excel(inputs_dir, "pubchem-properties.xlsx")
print(
    "df of PubChem chemical properties".ljust(40, "."),
    f"{df_pubchem_prop.shape}\n".rjust(13, "."),
)


===> Trying to load 'readme' data... ===
File: raw-data-chosen-lcia-methods-and-metadata.xlsx from
C:\Users\ViteksPC\Documents\00-ETH_projects\17-AESA_ecoinvent_chemicals\notebooks\0.10-vt-clean-raw-data-select-lcia-methods-and-metadata.ipynb
Generated on 2021-12-03 (Friday), 16:41:17 by Tulus, V.
Includes:
<<<
Sheet1: Raw data with chosen LCIA methods and important metadata (redundant columns and extra methods were dropped).
METADATA: list of relevant metadata used in Sheet1.
METHODS: list of LCIA methods used in Sheet1.
[METHODS + METADATA have to be the only column labels in Sheet1]
>>>

df of raw data.......................... ...(946, 40)


===> Trying to load 'readme' data... ===
File: pubchem-properties.xlsx from
C:\Users\ViteksPC\Documents\00-ETH_projects\17-AESA_ecoinvent_chemicals\notebooks\0.20-vt-query-pubchem.ipynb
Generated on 2021-07-29 (Thursday), 19:20:09 by Tulus, V.
Includes:
<<<
Dataframe with chemical properties for unique reference products from df_out. 
Chemical

# Operations 
- add data with chemical properties (from PubChem) to df_raw_data
<div class="alert alert-block alert-info">
    created: <strong>df_analysis</strong>
</div>

In [4]:
df_pubchem_prop.sort_values(by=["MW", "pubchem_match"], ascending=True, inplace=True)
df_pubchem_prop.sample(3)

Unnamed: 0,referenceProduct,referenceProduct_casNumber,pubchem_match,num_matches,MF,MW,iupac,complexity,synonyms
292,Hydrogen cyanide,74-90-8,by CAS,1,CHN,27.025,formonitrile,10.0,"['hydrogen cyanide', 'hydrocyanic acid', 'Form..."
675,Trimethylamine,000075-50-3,by NAME,1,C3H9N,59.11,"N,N-dimethylmethanamine",8.0,"['trimethylamine', 'N,N-dimethylmethanamine', ..."
205,Dodecanol,,by NAME,1,C12H26O,186.33,dodecan-1-ol,81.2,"['1-DODECANOL', 'Dodecan-1-ol', 'Dodecanol', '..."


In [5]:
percent_not_matched = (
    1
    - df_pubchem_prop[df_pubchem_prop.num_matches != 0].referenceProduct.count()
    / df_pubchem_prop.referenceProduct.count()
) * 100
print("{}% of referenceProducts had no match...".format(percent_not_matched.round(2)))

29.86% of referenceProducts had no match...


## Explore df_pubchem_prop

In [6]:
# df_pubchem_prop[df_pubchem_prop.num_matches == 0]

In [7]:
df_pubchem_prop[df_pubchem_prop.num_matches == 0]

Unnamed: 0,referenceProduct,referenceProduct_casNumber,pubchem_match,num_matches,MF,MW,iupac,complexity,synonyms
11,2-cyclopentone,,No match,0,,,,,
22,"Acetamide-anillide-compound, unspecified",,No match,0,,,,,
35,"Acrylic binder, without water, in 34% solution...",,No match,0,,,,,
36,"Acrylic dispersion, without water, in 65% solu...",,No match,0,,,,,
37,Acrylic filler,,No match,0,,,,,
...,...,...,...,...,...,...,...,...,...
710,Xylene,1330-20-7,No match,0,,,,,
713,Zinc monosulfate,,No match,0,,,,,
716,"Zircon, 50% zirconium",001490-68-2,No match,0,,,,,
718,[sulfonyl]urea-compound,,No match,0,,,,,


In [8]:
# Components not identified automatically by PubChem
# have to be added manually !

COMP_ADDED_MANUALLY = [
    # from Javier's list
    "Liquefied petroleum gas",
    "Petrol, low-sulfur",  # or 'Petrol, unleaded',
    "Diesel",
    "Diesel, low-sulfur",
    "Kerosene",
    # other
    "Xylene",  # o-, m- or p-
]
COMP_ADDED_MANUALLY

['Liquefied petroleum gas',
 'Petrol, low-sulfur',
 'Diesel',
 'Diesel, low-sulfur',
 'Kerosene',
 'Xylene']

In [9]:
# Properties added here, 
# if desired, specific MW and complexity and other can be added below

df_pubchem_prop.loc[
    df_pubchem_prop.referenceProduct == "Liquefied petroleum gas",
    ["pubchem_match", "num_matches", "MW"],
] = ["manual match", 1, 44.097] # Propane MW used as proxy

df_pubchem_prop.loc[
    df_pubchem_prop.referenceProduct == "Petrol, low-sulfur",
    ["pubchem_match", "num_matches", "MW"],
] = ["manual match", 1, 105] # MW taken as average...

df_pubchem_prop.loc[
    df_pubchem_prop.referenceProduct == "Diesel",
    ["pubchem_match", "num_matches", "MW"],
] = ["manual match", 1, 200] # MW taken as average...

df_pubchem_prop.loc[
    df_pubchem_prop.referenceProduct == "Diesel, low-sulfur",
    ["pubchem_match", "num_matches", "MW"],
] = ["manual match", 1, 200] # MW taken as average...

df_pubchem_prop.loc[
    df_pubchem_prop.referenceProduct == "Kerosene",
    ["pubchem_match", "num_matches", "MW"],
] = ["manual match", 1, 170] # MW taken as average...

df_pubchem_prop.loc[
    df_pubchem_prop.referenceProduct == "Xylene",
    ["pubchem_match", "num_matches", "MW"],
] = ["manual match", 1, 106]

for item in COMP_ADDED_MANUALLY:
    if df_pubchem_prop.loc[df_pubchem_prop.referenceProduct == item, ["pubchem_match"]].values[0] == "No match":
        df_pubchem_prop.loc[
            df_pubchem_prop.referenceProduct == item, ["pubchem_match", "num_matches", "MW"],
        ] = ["manual match", 1, 100] # dummy MW

In [10]:
filter_dataframe(
    df_in=df_pubchem_prop,
    col_name="referenceProduct",
    filter_in=COMP_ADDED_MANUALLY,
    exact_match=True,
    print_unique=True,
)

List of unique items matching your request:
		 ['Diesel, low-sulfur', 'Xylene', 'Liquefied petroleum gas', 'Kerosene', 'Diesel', 'Petrol, low-sulfur'] 



Unnamed: 0,referenceProduct,referenceProduct_casNumber,pubchem_match,num_matches,MF,MW,iupac,complexity,synonyms
180,"Diesel, low-sulfur",,manual match,1,,200.0,,,
710,Xylene,1330-20-7,manual match,1,,106.0,,,
334,Liquefied petroleum gas,,manual match,1,,44.097,,,
323,Kerosene,,manual match,1,,170.0,,,
179,Diesel,,manual match,1,,200.0,,,
449,"Petrol, low-sulfur",,manual match,1,,105.0,,,


In [11]:
percent_not_matched = (
    1
    - df_pubchem_prop[df_pubchem_prop.num_matches != 0].referenceProduct.count()
    / df_pubchem_prop.referenceProduct.count()
) * 100
print("{}% of referenceProducts had no match...".format(percent_not_matched.round(2)))

29.03% of referenceProducts had no match...


<div class="alert alert-block alert-warning"> 
❗❗❗ <br>
    the cas number of "Praseodymium oxide" is not found in the PubChem database, <br>
    but could be found in Sigma-Aldrich (which references to a compound name in PubChem -> "Praseodymium (III, IV) oxide"). <br>
    <strong>Change the name to make it searchable in PubChem...</strong> 
</div>

In [13]:
NO_MATCH_PRODS = list(df_pubchem_prop[df_pubchem_prop.num_matches == 0].referenceProduct)
e, *_ = NO_MATCH_PRODS[0].split(", ")
print("{} not matched reference products".format(len(NO_MATCH_PRODS)))
NO_MATCH_PRODS

209 not matched reference products


['2-cyclopentone',
 'Acetamide-anillide-compound, unspecified',
 'Acrylic binder, without water, in 34% solution state',
 'Acrylic dispersion, without water, in 65% solution state',
 'Acrylic filler',
 'Acrylic varnish, without water, in 87.5% solution state',
 'Activated bentonite',
 'Adhesive, for metal',
 'Alkyd paint, white, without solvent, in 60% solution state',
 'Alkyd paint, white, without water, in 60% solution state',
 'Alkyd resin, long oil, without solvent, in 70% white spirit solution state',
 'Alkyl sulphate (C12-14)',
 'Alkylketene dimer sizing agent, for paper production',
 'Aluminium fluoride',
 'Amine oxide',
 'Ammonium nitrite',
 'Anhydrite rock',
 'Anhydrite',
 'Anhydrite, burned',
 'Anionic resin',
 'Benzimidazole-compound',
 'Benzo[thia]diazole-compound',
 'Benzoic-compound',
 'Bipyridylium-compound',
 'Bisphenol A epoxy based vinyl ester resin',
 'Calcareous marl',
 'Calcium borates',
 'Calcium carbide, technical grade',
 'Cationic resin',
 'Cerium concentrate, 

### Try cirpy module (should be used in combination with pubchempy?)

In [14]:
import cirpy as crp
import pubchempy as pcp
from cirpy import Molecule

In [15]:
crp.resolve("Anhydrite", "iupac_name")

'CALCIUM SULFATE'

In [16]:
crp.query("Krypton", "iupac_name")

[Result(input='Krypton', representation='iupac_name', resolver='name_by_opsin', input_format='IUPAC name (OPSIN)', notation='Krypton', value='KRYPTON'),
 Result(input='Krypton', representation='iupac_name', resolver='name_by_cir', input_format='chemical name (CIR)', notation='KRYPTON', value='KRYPTON')]

In [17]:
c = pcp.get_compounds("EINECS 222-037-3", namespace="name", searchtype=None, as_dataframe=False)
print(c[0].molecular_formula)
print(c[0].molecular_weight)

C12H26N2O4
262.35


In [18]:
mol = Molecule("Nylon 6/6")
print(mol.cas)
print(mol.formula)
print(mol.mw)
print(mol.image_url)
print(mol.names)

['94289-34-6', '52349-42-5', '3323-53-3', '160886-56-6']
C12H26N2O4
262.3484
https://cactus.nci.nih.gov/chemical/structure/Nylon%206/6/image
['hexane-1,6-diamine; hexanedioic acid', 'adipic acid; hexane-1,6-diamine', 'adipic acid; 6-aminohexylamine', '94289-34-6', '52349-42-5', '3323-53-3', '160886-56-6', 'Hexanedioic acid, compd. with 1,6-hexanediamine (1:1)', 'Nylon 66 salt', '181129_ALDRICH', 'Nylon 6/6', "Poly(N,N'-hexamethyleneadipinediamide", 'Poly(hexamethylene adipamide)', 'hexanedioic acid - hexane-1,6-diamine (1:1)', 'Adipan hexamethylendiaminu [Czech]', 'Hexamethylenediammonium adipate', 'Hexanedioic acid, compd. with 1,6-hexanediamine (1:1), homopolymer', '429201_ALDRICH', '429236_ALDRICH', 'Adipic acid, compd. with 1,6-hexanediamine (1:1)', 'Adipic acid, compound with hexane-1,6-diamine (1:1)', 'Adipic aicd, compd. with 1,6-hexanediamine', 'EINECS 222-037-3', 'Hexamethylenediamine adipate (1:1)', 'Hexamethylenediamine monoadipate', '429171_ALDRICH']


In [19]:
c = pcp.get_compounds("52349-42-5", namespace="name", searchtype=None, as_dataframe=False)
print(c[0].molecular_formula)
print(c[0].molecular_weight)

IndexError: list index out of range

In [20]:
indices_NO_MATCH_PRODS = [
    idx
    for idx in df_raw_data.index
    if df_raw_data.referenceProduct[idx] in NO_MATCH_PRODS
]
print("{} not matched reference products in df_analysis_prev".format(len(indices_NO_MATCH_PRODS)))
# indices_NO_MATCH_PRODS

268 not matched reference products in df_analysis_prev


In [21]:
df_raw_data.columns

Index(['Activity', 'activity_comment', 'type', 'referenceProduct', 'category',
       'inline_comment', 'geo', 'activity_ISICclass',
       'activity_ecoSpold01class', 'technologyLevel', 'referenceProductAmount',
       'referenceProductUnit', 'referenceProduct_prodVolume',
       'referenceProduct_prodVolumeComment', 'referenceProduct_price',
       'referenceProduct_priceUnit', 'referenceProduct_priceComment',
       'referenceProduct_casNumber', 'referenceProduct_CPCclass',
       'activity_generalComment', 'sourceFilename',
       '('IPCC 2013 GWP 100a V1.03', 'IPCC GWP 100a', 'kg CO2 eq')',
       '('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Global warming', 'kg CO2 eq')',
       '('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Stratospheric ozone depletion', 'kg CFC11 eq')',
       '('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Ionizing radiation', 'kBq Co-60 eq')',
       '('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Ozone formation, Human health', '

In [22]:
df_to_explore = df_raw_data.loc[indices_NO_MATCH_PRODS][
    METADATA
    #     [
    #         "Activity",
    #         "category",
    #         "referenceProduct_CPCclass",
    #         "referenceProduct",
    #         "referenceProduct_prodVolume",
    #         "wasteType",
    #         "geo",
    #         "referenceProductUnit",
    #         "referenceProduct_casNumber",
    #     ]
].sort_values(
    by=["referenceProduct_prodVolume", "category"]
)  # .category.unique()  # .sort_index()

# df_to_explore
# sorted(
#     filter_dataframe(
#         _filter_by_geo_and_FU(df_to_explore, geo="GLO", FU="kg"),
#         col_name="referenceProduct_CPCclass",
#         filter_in=["3"],
#     ).Activity,  # .referenceProduct_CPCclass
#     reverse=False,
# )


lst_temp = []

for idx in internal_funcs.filter_by_geo_and_fu(
    df_to_explore, geo="GLO", funit="kg"
).index:
    x = df_to_explore.referenceProduct_CPCclass[idx]
    if (
        str(x).startswith("33")
        or str(x).startswith("34")
        or str(x).startswith("35")
        or str(x).startswith("36")
    ):
        lst_temp.append(df_to_explore.Activity[idx])
sorted(lst_temp)

['2-cyclopentone {GLO}| market for | APOS, S',
 'Acetamide-anillide-compound, unspecified {GLO}| market for | APOS, S',
 'Alkyl sulphate (C12-14) {GLO}| market for alkyl sulphate (C12-14) | APOS, S',
 'Aluminium fluoride {GLO}| market for | APOS, S',
 'Amine oxide {GLO}| market for amine oxide | APOS, S',
 'Ammonium nitrite {GLO}| market for | APOS, S',
 'Benzimidazole-compound {GLO}| market for | APOS, S',
 'Benzo[thia]diazole-compound {GLO}| market for | APOS, S',
 'Benzoic-compound {GLO}| market for | APOS, S',
 'Bipyridylium-compound {GLO}| market for | APOS, S',
 'Bisphenol A epoxy based vinyl ester resin {GLO}| market for | APOS, S',
 'Calcium borates {GLO}| market for | APOS, S',
 'Cerium concentrate, 60% cerium oxide {GLO}| market for | APOS, S',
 'Chemical, inorganic {GLO}| market for chemicals, inorganic | APOS, S',
 'Chemical, organic {GLO}| market for | APOS, S',
 'Cocamide diethanolamine {GLO}| market for cocamide diethanolamine | APOS, S',
 'Cyclic N-compound {GLO}| marke

In [23]:
len(lst_temp)

130

In [24]:
# find = 'Stone meal'
# find = "Horn meal"
find = "Polyvinylfluoride, dispersion"

print(list(df_raw_data[df_raw_data.referenceProduct==find].activity_comment),"\n")
print(list(df_raw_data[df_raw_data.referenceProduct==find].inline_comment),"\n")
print(list(df_raw_data[df_raw_data.referenceProduct==find].referenceProduct_prodVolumeComment),"\n")
print(list(df_raw_data[df_raw_data.referenceProduct==find].referenceProduct_priceComment),"\n")
print(list(df_raw_data[df_raw_data.referenceProduct==find].activity_generalComment),"\n")

df_raw_data[df_raw_data.referenceProduct==find]

['This product is generally considered to be used at the production site.  Therefore, the market does not contain any transport.\x7f\x7fProduction volume: 4.65661287307739E-10 kg\x7fIncluded activities start: \x7fIncluded activities end: \x7fEnergy values: \x7fGeography: The inventory is modelled for Global\x7fTechnology level: 0\x7fTechnology: \x7fStart date: 01/01/2011\x7fEnd date: 31/12/2018\x7fIs data valid for entire period: True\x7fTime period: \x7fMacro-economic scenario name: Business-as-Usual\x7f\x7fVersion: 3.0.3.0\x7fCreated: 8/2/2011 10:00:32 AM\x7fLast edited: 8/2/2011 10:00:32 AM\x7fSource: 22801_af5ee4fd-4673-4261-9ef2-67d192452fcd_09e83e96-9e71-422d-812b-631387b22a20.spold\x7fUUID: af5ee4fd-4673-4261-9ef2-67d192452fcd\x7f'] 

['\x7fProduction Volume Amount: 4.65661287307739E-10\x7f'] 

[nan] 

['Calculated based on inputs: The price of the product has been calculated as a sum of the values of the material and energy inputs for this product. This value is therefore a rou

Unnamed: 0,Activity,activity_comment,type,referenceProduct,category,inline_comment,geo,activity_ISICclass,activity_ecoSpold01class,technologyLevel,...,"('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Marine eutrophication', 'kg N eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Terrestrial ecotoxicity', 'kg 1,4-DCB')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Freshwater ecotoxicity', 'kg 1,4-DCB')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Marine ecotoxicity', 'kg 1,4-DCB')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Human carcinogenic toxicity', 'kg 1,4-DCB')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Human non-carcinogenic toxicity', 'kg 1,4-DCB')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Land use', 'm2a crop eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Mineral resource scarcity', 'kg Cu eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Fossil resource scarcity', 'kg oil eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Water consumption', 'm3')"
682,"Polyvinylfluoride, dispersion {GLO}| market fo...",This product is generally considered to be use...,Products,"Polyvinylfluoride, dispersion",Chemicals\Organic\Market,Production Volume Amount: 4.65661287307739E-10,GLO,2013:Manufacture of plastics and synthetic rub...,chemicals/organics,0.0,...,0.000556,28.487349,0.429711,0.600931,0.564713,12.443209,0.193482,0.030967,3.96495,0.186447


## Created ``df_analysis``, ``df_metadata`` and ``df_methods``

In [25]:
df_analysis = df_raw_data.merge(
    df_pubchem_prop, how="left", on=["referenceProduct", "referenceProduct_casNumber"]
)


# (!) update the list of non-method column labels
cols_from_pubchem_prop = df_pubchem_prop.columns.difference(df_raw_data.columns)
METADATA = [i for i in METADATA if i not in cols_from_pubchem_prop]
METADATA = METADATA + cols_from_pubchem_prop.to_list()
# # ---------------
df_analysis = df_analysis.loc[:, list(METADATA + METHODS)]

pd.options.display.max_columns = None

print("Created **df_analysis** dataframe is of {} shape.\n".format(df_analysis.shape))
df_analysis.sample(3)

Created **df_analysis** dataframe is of (946, 47) shape.



Unnamed: 0,Activity,activity_comment,type,referenceProduct,category,inline_comment,geo,activity_ISICclass,activity_ecoSpold01class,technologyLevel,referenceProductAmount,referenceProductUnit,referenceProduct_prodVolume,referenceProduct_prodVolumeComment,referenceProduct_price,referenceProduct_priceUnit,referenceProduct_priceComment,referenceProduct_casNumber,referenceProduct_CPCclass,activity_generalComment,sourceFilename,MF,MW,complexity,iupac,num_matches,pubchem_match,synonyms,"('IPCC 2013 GWP 100a V1.03', 'IPCC GWP 100a', 'kg CO2 eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Global warming', 'kg CO2 eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Stratospheric ozone depletion', 'kg CFC11 eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Ionizing radiation', 'kBq Co-60 eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Ozone formation, Human health', 'kg NOx eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Fine particulate matter formation', 'kg PM2.5 eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Ozone formation, Terrestrial ecosystems', 'kg NOx eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Terrestrial acidification', 'kg SO2 eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Freshwater eutrophication', 'kg P eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Marine eutrophication', 'kg N eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Terrestrial ecotoxicity', 'kg 1,4-DCB')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Freshwater ecotoxicity', 'kg 1,4-DCB')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Marine ecotoxicity', 'kg 1,4-DCB')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Human carcinogenic toxicity', 'kg 1,4-DCB')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Human non-carcinogenic toxicity', 'kg 1,4-DCB')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Land use', 'm2a crop eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Mineral resource scarcity', 'kg Cu eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Fossil resource scarcity', 'kg oil eq')","('ReCiPe 2016 Midpoint (H) V1.03 / World (2010) H', 'Water consumption', 'm3')"
373,"Heavy fuel oil {RoW}| market for | APOS, S",[This dataset was already contained in the eco...,Products,Heavy fuel oil,Fuels\Oil\Fuel oil\Market,Production Volume Amount: 492287461397.519,RoW,1920:Manufacture of refined petroleum products,oil/fuels,0.0,1,kg,492287500000.0,,0.138,EUR2005,Calculated based on inputs: The price of the p...,,33370: Fuel oils n.e.c.,[This dataset was already contained in the eco...,31699_506f0fb7-e4d6-4312-9514-e6e0e6e1b068_296...,,,,,0.0,No match,,0.48271,0.491834,8.917379e-07,0.03839,0.001887,0.0015,0.002008,0.00428,6.5e-05,7e-06,0.637345,0.00502,0.008716,0.011453,0.152652,0.005734,0.000879,1.218281,0.005924
804,"Sodium silicate, solid {RER}| market for sodiu...",A regional market for Europe [RER] is motivate...,Products,"Sodium silicate, solid",Chemicals\Inorganic\Market,Production Volume Amount: 1.32236940655721,RER,2011:Manufacture of basic chemicals,,,1,kg,1.322369,,0.22,EUR2005,Calculated value based on data from United Nat...,1344-09-8,"34270: Cyanides, cyanide oxides and complex cy...",A regional market for Europe [RER] is motivate...,17258_130ad717-c078-5cd5-977a-75bf1445bf24_77e...,Na2O3Si,122.063,18.8,disodium;dioxido(oxo)silane,1.0,by CAS,"['Sodium metasilicate', 'SODIUM SILICATE', '68...",0.77668,0.784764,2.186706e-07,0.031249,0.001564,0.001196,0.001588,0.003566,0.000298,1.8e-05,3.951537,0.030439,0.044293,0.032033,1.044557,0.045176,0.003774,0.166192,0.010558
169,"Carbon dioxide, liquid {RoW}| market for | APO...",Special transport modelling for liquid gases: ...,Products,"Carbon dioxide, liquid",Chemicals\Gases\Liquified\Market,Production Volume Amount: 22470000000,RoW,2011:Manufacture of basic chemicals,chemicals/inorganics,0.0,1,kg,22470000000.0,,0.11,EUR2005,Calculated value based on data from United Nat...,,"34210: Hydrogen, nitrogen, oxygen, carbon diox...",Special transport modelling for liquid gases: ...,26510_8ea6ae88-0105-4ab5-9f79-bc4ee0ca542c_ef6...,CO2,44.009,18.3,,1.0,by NAME,"['carbon dioxide', 'carbonic anhydride', 'Dry ...",0.902718,0.968019,2.29594e-07,0.037674,0.001238,0.001188,0.001258,0.002144,0.000295,5.4e-05,6.085744,0.025215,0.034885,0.027123,0.768201,0.012884,0.002578,0.169171,0.004826


In [26]:
# Make df of METADATA and METHODS for later export
df_metadata = pd.DataFrame(METADATA, columns=["METADATA"])
df_methods = pd.DataFrame(METHODS, columns=["METHODS"])

# OUTPUTS: Export data to excel

In [27]:
%%time

# Set output directory
outputs_dir = set_outputs_dir(use_default=False, rel_path_output=r"..\data\lcaforsac")  # default `..\data\interim`

## Export dataframe to excel
excelName = "extended-data-chosen-methods-metadata-pubchem-properties.xlsx"

df_readme = make_readme_info(
    excelName,
    "Sheet1: Extended data with chosen LCIA methods, important metadata and PubChem properties"
    "\nMETADATA: list of relevant metadata used in Sheet1."
    "\nMETHODS: list of LCIA methods used in Sheet1."
    "\n[METHODS + METADATA have to be the only column labels in Sheet1]",
)

w_excel(
    path_to_file=outputs_dir,
    filename=excelName,
    dict_data_to_write={
        "Sheet1": df_analysis,
        "METADATA": df_metadata,
        "METHODS": df_methods,
    },
    readme_info=("readme", df_readme),
    #     ExcelWriter_kwargs={"engine": "openpyxl", "encoding": "UTF-8"}
    #     startrow=0
)

File: extended-data-chosen-methods-metadata-pubchem-properties.xlsx successfully created in 
C:\Users\ViteksPC\Documents\00-ETH_projects\17-AESA_ecoinvent_chemicals\data\lcaforsac
Wall time: 1.83 s
