## Setting the Database

### Loading DataFrame & Change to desired input format for DB

In [None]:
## loading the data 
import pandas as pd
## defining the path 
PATH = "./data/clean_data.csv"
## loading 
df = pd.read_csv(PATH)
## reseting indeces
dfa = df.reset_index(drop=True)

## number of materials 
mats = [x for x in df.columns if "M-" in x]
n_mats = len(mats)
print(n_mats)
## current dataframe: Processes | Material 1| Material 2| Material 3| ... 
## convert to Processes | Materials | Values
long_df = pd.wide_to_long(dfa, ["M-"], i="Processes", j="Material").reset_index().rename(columns={"M-": "Consumption"})
## add Input or Output column
## x > 0 -> Input, x < 0 -> Output, x = 0 -> None
long_df['IO'] = long_df['Consumption'].apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0)
## adding the text to it in case
long_df['IO_txt'] = long_df['IO'].apply(lambda x: "Input" if x == 1 else "Output" if x == -1 else "None")


long_df['Processes'] = list(map(lambda x: f'P{int(x.split("-")[1])}',long_df['Processes']))
long_df['Material'] = list(map(lambda x: f"M{x}",long_df['Material']))


## the dataframe is per process, 
long_df = long_df.sort_values(by=['Processes','Material'])
long_df_nz = long_df.query("Consumption != 0")


## Used Materials
input_materials = long_df_nz.query("IO == 1")
used_materials_per_process = input_materials.groupby("Processes")['Material'].apply(list).reset_index()
used_materials_per_process.rename(columns={"Material": "Inputs"}, inplace=True)
## Energy to Input 
used_consumption_materials_per_process = input_materials.groupby("Processes")['Consumption'].apply(list).reset_index()
used_consumption_materials_per_process.rename(columns={"Consumption": "InputCost"}, inplace=True)
## merge 
used_materials_df = input_materials.merge(used_materials_per_process, on="Processes", how="left")
used_materials_df = used_materials_df.merge(used_consumption_materials_per_process, on="Processes", how="left")


## Produced materials
output_materials = long_df_nz.query("IO == -1")
produced_materials_per_process = output_materials.groupby("Processes")['Material'].apply(list).reset_index()
produced_materials_per_process.rename(columns={"Material": "Outputs"}, inplace=True)
## Consumption Needed
produced_consumption_materials_per_process = output_materials.groupby("Processes")['Consumption'].apply(list).reset_index()
produced_consumption_materials_per_process.rename(columns={"Consumption": "OutputCost"}, inplace=True)
## merge 
produced_materials_df = output_materials.merge(produced_materials_per_process, on="Processes", how="left")
produced_materials_df = produced_materials_df.merge(produced_consumption_materials_per_process, on="Processes", how="left")
produced_materials_df


## tmp1 : used_materials drop Material	Consumption	IO	IO_txt
tmp1 = used_materials_df.drop(columns=["Material", "Consumption", "IO", "IO_txt"])
## tmp2: produced_materials_df
tmp2 = produced_materials_df.drop(columns=["Material", "Consumption", "IO", "IO_txt"])
tmp3 = tmp1.merge(tmp2, on="Processes", how="left")

# output_cost = lambda x: x if x>0 else 0
# input_cost = lambda x: x if x<0 else 0
# ## map the function to the list
tmp3['TotalOutputCost'] = tmp3['OutputCost'].apply(lambda x: sum(x))
tmp3['TotalInputCost'] = tmp3['InputCost'].apply(lambda x: sum(x))
## drop the duplicates 
tmp3 = tmp3.drop_duplicates('Processes')
tmp3.head()

to_use = long_df_nz.drop(columns=['IO','IO_txt'])
to_use.to_csv("./data/final_df_for_db.csv", index=False)

In [None]:
tmp3

In [None]:
tmp3.head()

In [None]:
## for that dataframe 

material = "M1"

l = ["M10","M2","M3","M4","M5","M6","M7","M8","M9","M1"]
idxs = []
for idx, row in tmp3.iterrows():
    if material in row['Inputs']:
        idxs.append(idx)
## filter the dataframe by the idxs
tmp3.loc[idxs]

### Start the Database

Essentially doing this 

1) Database 
2) Connection to DB with action 
3) Ask queries to the system

In [None]:
## autoreload 
%load_ext autoreload
%autoreload 2


In [None]:
## importing the connectors 
from db.connector import DBConnector
## load the environment variables
from db.db_helpers import load_env_vars, clear_db

## import the materials 
from db.Materials import (Material, material_payload,
                          populate_materials_from_df,
                          add_material,remove_material,
                          update_material_name,update_material_quantity,
                          add_material_relationship)

## import the processes 
from db.Processes import (Process, process_payload,
                          populate_process_from_df,add_process)

In [None]:
## loading environment variables to connect to the database 
env_vars = load_env_vars()
## instantiate the connector 
connector = DBConnector(**env_vars)
## start the connection 
connector._connect()
## close the connection 
#connector._close()
## getting all the Material nodes
#Material.nodes.all()
clear_db()

### Testing the db 

In [None]:
hg_dict = {"uuid": "M99",
                 "name": "Hg",
                 "quantity": 100, "unit": "ton",
                 "cost": 1000000000,
                 "description":"Material 99 - is pure Silver"} ## the dictionary for the first material

hg,status = add_material(hg_dict,verbose=True)


ur_dict = {"uuid": "M98",
                 "name": "Ur",
                 "quantity": 999, "unit": "ton",
                 "cost": 99999999999,
                 "description":"Material 98 - is pure Uranium, used in Max's Laboratory"} ## the dictionary for the first material

ur,status2 = add_material(ur_dict,verbose=True)

print(Material.nodes.all(),end='\n')
#clear_db()

In [None]:
clear_db()
## Silver
hg,status = add_material(hg_dict,verbose=False)
## Uranium 
ur,status2 = add_material(ur_dict,verbose=False)
## create a relationship 
rel_status1 = add_material_relationship(hg, ur) ## hg --> ur 
rel_status2 = add_material_relationship(ur, hg) ## ur --> hg 

## update the material name 
old_uuid = "M99"
new_name = 'K'
upname1 = update_material_name(material_id=old_uuid, new_name=new_name)

### Populating the db

In [None]:
populate_materials_from_df(df=to_use, verbose=False)

In [None]:
## populate the processes
populate_process_from_df(df=to_use, verbose=True)

In [None]:

Material.nodes.all()

In [None]:

Process.nodes.all()

In [224]:
## class for managing the queries to the database 
from db.Query import Query

In [226]:
Q.query

'MATCH (n:Material {uuid: "M1"})-[r]-(m) RETURN r, n, m'

In [225]:
## instantiate the object 
Q = Query(db)
## make a material query 
Q.material_query("M1")
print(Q.run_query()[0][0])
## make a process query
Q.process_query("P1")
print(Q.run_query())
## make a subgraph query
Q.subgraph_query("M1","Material")
print(Q.run_query())


[<Node id=50 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>]
([[<Node id=70 labels=frozenset({'Process'}) properties={'name': 'P1__XX', 'uuid': 'P1'}>]], ['n'])
([[<Relationship id=287 nodes=(<Node id=99 labels=frozenset({'Process'}) properties={'name': 'P9__XX', 'uuid': 'P9'}>, <Node id=50 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>) type='OUTPUT_MATERIAL' properties={}>, <Node id=50 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>, <Node id=99 labels=frozenset({'Process'}) properties={'name': 'P9__XX', 'uuid': 'P9'}>], [<Relationship id=277 nodes=(<Node id=98 labels=frozenset({'Process'}) properties={'name': 'P8__XX', 'uuid': 'P8'}>, <Node id=50 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>) type='OUTPUT_MATERIAL' properties={}>, <Node id=50 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 

In [None]:
def parse_subgraph(subgraph:tuple) -> list:
    """Returns a list of tuples for the subgraph"""
    ## get the relationships
    relationships = [rel for subg in subgraph[0] for rel in subg if type(rel) != neo4j.graph.Node]
    ## retrieve the data, store as generator for lazy loading
    data = ((x.type, x.nodes[0]._properties['uuid'],x.nodes[1]._properties['uuid']) for x in relationships)
    return data, relationships

In [None]:
def parsed_subgraph_to_df(parsed_subgraph:list,verbose:bool=False) -> pd.DataFrame:
    """Returns the dataframe of a parsed subgraph"""
    ## make the dataframe
    df = pd.DataFrame(parsed_subgraph, columns=['Relationship', 'From', 'To']).sort_values(by=['Relationship'],ascending=True)
    if verbose:
        from termcolor import colored
        for e in df.itertuples():
            ## type of the relationship
            _node_a,_node_b = e[2], e[3]
            if e[1] =='INPUT_MATERIAL':
                ## then "M1" is INPUT_MATERIAL for "P2"; "M1" --> "P2"
                _type_c = colored("INPUT_MATERIAL ", "green")
                print(f"{_type_c} for Process {_node_b}; {_node_a} --> {_node_b}")
            elif e[1] =='OUTPUT_MATERIAL':
                _type_c = colored("OUTPUT_MATERIAL", "red")
                ## then "M1" is OUTPUT_MATERIAL for "P2"; "P2" --> "M1"
                print(f"{_type_c} for Process {_node_a}; {_node_a} --> {_node_b}")
    return df

In [None]:
data,rels = parse_subgraph(subgraph_mat)

In [None]:
df = parsed_subgraph_to_df(data,verbose=True)