In [1]:
import pprint
import re
from functools import partial
from pathlib import Path

import bw2calc as bc
import bw2data as bd
import bw2io as bi
from project_path import ROOT_DIR

In [2]:
# bd.projects

In [3]:
bd.projects.set_current("default")

In [4]:
# bd.databases

## import Freshwater Use method
 > Global

In [5]:
method_category = "FreshwaterUse_Global"

In [6]:
SIMAPRO_DIR = ROOT_DIR + r"\data\external\from_simapro"

In [7]:
STORE_DATA_TO = Path(
    ROOT_DIR + r"\src\aesa_pbs\data" + f"\\aesa_{method_category}.xlsx"
)

In [8]:
si = bi.SimaProLCIACSVImporter(
    filepath=Path(SIMAPRO_DIR + f"\PBsLCIAv072_{method_category}.csv")
)

Extracted 1 methods in 0.03 seconds


In [9]:
# si.data[0].keys()

In [10]:
for method in si.data:
    print(method["name"], method["unit"])

('PBs-LCIA (baseline)', 'Freshwater use - Global') km3


In [11]:
si.data[0]["exchanges"][0]#.keys()

{'amount': -1e-09,
 'CAS number': '007732-18-5',
 'categories': ('Water', 'groundwater'),
 'name': 'Water, AD',
 'unit': 'm3'}

In [12]:
si.apply_strategies()
si.statistics()

Applying strategy: normalize_units
Applying strategy: set_biosphere_type
Applying strategy: normalize_simapro_biosphere_categories
Applying strategy: normalize_simapro_biosphere_names
Applying strategy: set_biosphere_type
Applying strategy: drop_unspecified_subcategories
Applying strategy: normalize_biosphere_categories
Applying strategy: normalize_biosphere_names
Applying strategy: link_iterable_by_fields
Applying strategy: match_subcategories
Applied 10 strategies in 0.45 seconds
1 methods
2168 cfs
2140 unlinked cfs


(1, 2168, 2140)

In [17]:
bi.strategies.normalize_simapro_biosphere_categories

[1;31mSignature:[0m [0mbi[0m[1;33m.[0m[0mstrategies[0m[1;33m.[0m[0mmatch_subcategories[0m[1;33m([0m[0mdata[0m[1;33m,[0m [0mbiosphere_db_name[0m[1;33m,[0m [0mremove[0m[1;33m=[0m[1;32mTrue[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m Given a characterization with a top-level category, e.g. ``('air',)``, find all biosphere flows with the same top-level categories, and add CFs for these flows as well. Doesn't replace CFs for existing flows with multi-level categories. If ``remove``, also delete the top-level CF, but only if it is unlinked.
[1;31mFile:[0m      c:\users\vitekspc\virtualenvs\bw2\lib\site-packages\bw2io\strategies\lcia.py
[1;31mType:[0m      function


In [15]:
si.write_excel("temporal-fwu")

Wrote matching file to:
C:\Users\ViteksPC\AppData\Local\pylca\Brightway3\default.c21f969b5f03d33d43e04f8f136e7682\output\lcia-matching-temporal-fwu.xlsx


In [13]:
# # remove ISO2 locations in the end of a flow
# def remove_water_ISO2_location(data):
#     for method in data:
#         for exc in method["exchanges"]:
#             exc['name'] = re.sub(", [A-Z]{2}$", "", exc["name"])
#     return data

# remove list of locations in the end of a flow
def remove_water_location(data):
    list_locations = [
        ", Europe.*$",
        ", [A-Z].*$",
        ", IAI Area.*$",
        ",  Europe.*$",
        ", quality .*$",
    ]
    exclude = "|".join(list_locations)

    for method in data:
        for exc in method["exchanges"]:
            exc["name"] = re.sub(exclude, "", exc["name"])
    return data

link_by_name_and_categories = partial(
    bi.strategies.link_iterable_by_fields,
    other=bd.Database(bd.config.biosphere),
    kind="biosphere",
    fields=("name", "categories"),
)

In [14]:
def drop_duplicates(data):
    for method in data:

        unique_exchanges = {
            (o["name"], o["categories"]): o for o in method["exchanges"]
        }
        method["exchanges"] = list(unique_exchanges.values())
        return data

In [15]:
si.apply_strategies(
    [
        remove_water_location,
        # remove_water_ISO2_location,
        link_by_name_and_categories,
        drop_duplicates,
    ]
)
si.statistics()

Applying strategy: remove_water_location
Applying strategy: link_iterable_by_fields
Applying strategy: drop_duplicates
Applied 3 strategies in 0.21 seconds
1 methods
38 cfs
26 unlinked cfs


(1, 38, 26)

In [16]:
# list(si.unlinked)

In [17]:
# from the 5 unlinked:
# 1 has amount 0, can be dropped - Carbon dioxide, in air
# 2 others "Carbon dioxide" and "Carbon monoxide" are not in biosphere3, BUT
# "Carbon dioxide, fossil" 
# and "Carbon monoxide, fossil" and "Carbon monoxide, non-fossil" (with same CFs)
# have been linked (see check_equivalent_linked)
[(exc["name"], exc["categories"]) for exc in list(si.unlinked)]

[('Water, cooling, unspecified natural origin', ('natural resource',)),
 ('Water, extreme water stress', ('water', 'ground-')),
 ('Water, extreme water stress', ('water', 'surface water')),
 ('Water, fossil', ('natural resource',)),
 ('Water, fresh', ('natural resource',)),
 ('Water, high water stress', ('water', 'ground-')),
 ('Water, high water stress', ('water', 'surface water')),
 ('Water, low water stress', ('water', 'ground-')),
 ('Water, low water stress', ('water', 'surface water')),
 ('Water, medium water stress', ('water', 'ground-')),
 ('Water, medium water stress', ('water', 'surface water')),
 ('Water, moderate water stress', ('water', 'ground-')),
 ('Water, moderate water stress', ('water', 'surface water')),
 ('Water, process and cooling, unspecified natural origin',
  ('natural resource',)),
 ('Water, process, unspecified natural origin/m3', ('natural resource',)),
 ('Water, rain', ('natural resource',)),
 ('Water, turbine use, unspecified natural origin', ('natural res

In [18]:
def see_unlinked(imported_methods):
    unlinked_exc_names = [
        exc_unlinked["name"] for exc_unlinked in list(imported_methods.unlinked)
    ]
    
    for ix in range(len(imported_methods.data)):
        lst = []
        print(imported_methods.data[ix]["name"][1])
        for exc in imported_methods.data[ix]["exchanges"]:
            if exc["name"] in unlinked_exc_names:
                lst.append(
                    (exc["name"], exc["categories"], exc["amount"])
                )
        for i in list(set(lst)):
            print("\t", i)
        print("\n")
        # if exc["amount"] != 0

In [19]:
def check_equivalent_linked(imported_methods, list_names):
    for ix in range(len(imported_methods.data)):
        print(imported_methods.data[ix]["name"][1])
        for exc in imported_methods.data[ix]["exchanges"]:
            for x in list_names:
                if x in exc["name"]:
                    print("\t",
                        exc["name"],
                        exc["categories"],
                        exc["amount"],
                    )
        print("\n")

In [20]:
def remove_duplicates_if_any(imported_methods):
    for ix in range(len(imported_methods.data)):
        current_exc = len(imported_methods.data[ix]["exchanges"])
        wo_duplicates_exc = len(
            {tuple(o["input"]) for o in imported_methods.data[ix]["exchanges"]}
        )
        print(
            imported_methods.data[ix]["name"][1], ": ", current_exc, wo_duplicates_exc,
        )
        if current_exc != wo_duplicates_exc:
            unique_exchanges = {
                tuple(o["input"]): o for o in imported_methods.data[ix]["exchanges"]
            }
            imported_methods.data[ix]["exchanges"] = list(unique_exchanges.values())
            print("Duplicates removed.\n")
        else:
            print("No duplicates.\n")

In [21]:
see_unlinked(si)

Freshwater use - Global
	 ('Water, moderate water stress', ('water', 'ground-'), -1e-09)
	 ('Water, unspecified natural origin, extreme water stress', ('natural resource',), 0.0)
	 ('Water, turbine use, unspecified natural origin', ('natural resource', 'in water'), 0.0)
	 ('Water, unspecified natural origin', ('natural resource', 'fossil well'), 0.0)
	 ('Water, cooling, unspecified natural origin', ('natural resource', 'in water'), 0.0)
	 ('Water, extreme water stress', ('water', 'ground-'), -1e-09)
	 ('Water, unspecified natural origin/m3', ('natural resource',), 0.0)
	 ('Water, unspecified natural origin', ('natural resource',), 0.0)
	 ('Water, medium water stress', ('water', 'ground-'), -1e-09)
	 ('Water, high water stress', ('water', 'surface water'), -1e-09)
	 ('Water, low water stress', ('water', 'ground-'), -1e-09)
	 ('Water, unspecified natural origin, very high water stress', ('natural resource',), 0.0)
	 ('Water, fresh', ('natural resource',), 0.0)
	 ('Water, extreme water st

In [22]:
b3 = bd.Database("biosphere3")
set([
    (flow["name"], flow["categories"])
    for flow in b3
    if "Water" in flow["name"]#.lower()
    if "natural resource" in str(flow["categories"])
])

{('Water, cooling, unspecified natural origin',
  ('natural resource', 'in water')),
 ('Water, in air', ('natural resource', 'in air')),
 ('Water, lake', ('natural resource', 'in water')),
 ('Water, river', ('natural resource', 'in water')),
 ('Water, salt, ocean', ('natural resource', 'in water')),
 ('Water, salt, sole', ('natural resource', 'in water')),
 ('Water, turbine use, unspecified natural origin',
  ('natural resource', 'in water')),
 ('Water, unspecified natural origin', ('natural resource', 'fossil well')),
 ('Water, unspecified natural origin', ('natural resource', 'in ground')),
 ('Water, unspecified natural origin', ('natural resource', 'in water')),
 ('Water, well, in ground', ('natural resource', 'in water'))}

In [23]:
# check_equivalent_linked(si, ["Water,"])

In [24]:
si.drop_unlinked()
si.statistics()

Applying strategy: drop_unlinked_cfs
Applied 1 strategies in 0.00 seconds
1 methods
12 cfs
0 unlinked cfs


(1, 12, 0)

In [25]:
# check if there are duplicates (both numbers should be equal)
remove_duplicates_if_any(si)

Freshwater use - Global :  12 12
No duplicates.



In [26]:
si.statistics()

1 methods
12 cfs
0 unlinked cfs


(1, 12, 0)

In [27]:
# Next si.write_excel(method_category) will create an excel file in 
# ~\Local\pylca\Brightway3\default###\output\lcia-matching-{method_category}.xlsx

# Go to created excel and apply manual corrections
#TODO: manual corrections should not be manual

# Store corrected excel to STORE_DATA_TO

In [None]:
si.write_excel(method_category)