## Setting the Database

### Loading DataFrame & Change to desired input format for DB

In [10]:
## loading the data 
import pandas as pd
## defining the path 
PATH = "./data/clean_data.csv"
## loading 
df = pd.read_csv(PATH)
## reseting indeces
dfa = df.reset_index(drop=True)

## number of materials 
mats = [x for x in df.columns if "M-" in x]
n_mats = len(mats)
print(n_mats)

20


In [72]:
## current dataframe: Processes | Material 1| Material 2| Material 3| ... 
## convert to Processes | Materials | Values
long_df = pd.wide_to_long(dfa, ["M-"], i="Processes", j="Material").reset_index().rename(columns={"M-": "Consumption"})
## add Input or Output column
## x > 0 -> Input, x < 0 -> Output, x = 0 -> None
long_df['IO'] = long_df['Consumption'].apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0)
## adding the text to it in case
long_df['IO_txt'] = long_df['IO'].apply(lambda x: "Input" if x == 1 else "Output" if x == -1 else "None")


long_df['Processes'] = list(map(lambda x: f'P{int(x.split("-")[1])}',long_df['Processes']))
long_df['Material'] = list(map(lambda x: f"M{x}",long_df['Material']))


In [73]:
## the dataframe is per process, 
long_df = long_df.sort_values(by=['Processes','Material'])
long_df_nz = long_df.query("Consumption != 0")

In [74]:
long_df

Unnamed: 0,Processes,Material,Consumption,IO,IO_txt
0,P1,M1,-73,-1,Output
270,P1,M10,0,0,
300,P1,M11,0,0,
330,P1,M12,-60,-1,Output
360,P1,M13,0,0,
...,...,...,...,...,...
128,P9,M5,67,1,Input
158,P9,M6,-5,-1,Output
188,P9,M7,-95,-1,Output
218,P9,M8,-34,-1,Output


In [75]:
## Used Materials
input_materials = long_df_nz.query("IO == 1")
used_materials_per_process = input_materials.groupby("Processes")['Material'].apply(list).reset_index()
used_materials_per_process.rename(columns={"Material": "Inputs"}, inplace=True)
## Energy to Input 
used_consumption_materials_per_process = input_materials.groupby("Processes")['Consumption'].apply(list).reset_index()
used_consumption_materials_per_process.rename(columns={"Consumption": "InputCost"}, inplace=True)
## merge 
used_materials_df = input_materials.merge(used_materials_per_process, on="Processes", how="left")
used_materials_df = used_materials_df.merge(used_consumption_materials_per_process, on="Processes", how="left")


## Produced materials
output_materials = long_df_nz.query("IO == -1")
produced_materials_per_process = output_materials.groupby("Processes")['Material'].apply(list).reset_index()
produced_materials_per_process.rename(columns={"Material": "Outputs"}, inplace=True)
## Consumption Needed
produced_consumption_materials_per_process = output_materials.groupby("Processes")['Consumption'].apply(list).reset_index()
produced_consumption_materials_per_process.rename(columns={"Consumption": "OutputCost"}, inplace=True)
## merge 
produced_materials_df = output_materials.merge(produced_materials_per_process, on="Processes", how="left")
produced_materials_df = produced_materials_df.merge(produced_consumption_materials_per_process, on="Processes", how="left")
produced_materials_df


## tmp1 : used_materials drop Material	Consumption	IO	IO_txt
tmp1 = used_materials_df.drop(columns=["Material", "Consumption", "IO", "IO_txt"])
## tmp2: produced_materials_df
tmp2 = produced_materials_df.drop(columns=["Material", "Consumption", "IO", "IO_txt"])
tmp3 = tmp1.merge(tmp2, on="Processes", how="left")

# output_cost = lambda x: x if x>0 else 0
# input_cost = lambda x: x if x<0 else 0
# ## map the function to the list
tmp3['TotalOutputCost'] = tmp3['OutputCost'].apply(lambda x: sum(x))
tmp3['TotalInputCost'] = tmp3['InputCost'].apply(lambda x: sum(x))
## drop the duplicates 
tmp3 = tmp3.drop_duplicates('Processes')
tmp3.head()


Unnamed: 0,Processes,Inputs,InputCost,Outputs,OutputCost,TotalOutputCost,TotalInputCost
0,P1,"[M3, M5]","[59, 48]","[M1, M12, M15, M18, M19]","[-73, -60, -26, -9, -15]",-183,107
10,P10,"[M10, M15, M2, M3, M8]","[71, 64, 22, 15, 64]","[M12, M18, M20, M6]","[-87, -16, -70, -15]",-188,236
30,P11,"[M1, M11, M16, M19, M20, M3, M8]","[63, 79, 82, 26, 42, 98, 92]","[M10, M15, M18, M4, M6]","[-31, -93, -13, -69, -43]",-249,482
65,P12,"[M11, M17, M18, M4, M7]","[23, 51, 85, 71, 42]","[M1, M15, M16, M3, M6]","[-13, -79, -47, -9, -23]",-171,272
90,P13,"[M10, M12, M15, M5, M6, M8, M9]","[49, 66, 52, 54, 67, 39, 83]","[M1, M16]","[-80, -59]",-139,410


In [16]:
to_use = long_df_nz.drop(columns=['IO','IO_txt'])

### Start the Database

Essentially doing this 

1) Database 
2) Connection to DB with action 
3) Ask queries to the system

In [1]:
## autoreload 
%load_ext autoreload
%autoreload 2


In [2]:
from db.Materials import Material 
from db.Processes import Process

## helpers 
from db.db_helpers import (add_material, remove_material,update_material_name, clear_db,add_material_relationship)

## connectors & environment variables
from db.db_helpers import load_env_vars
from db.connector import DBConnector

In [3]:
## loading environment variables to connect to the database 
env_vars = load_env_vars()
## instantiate the connector 
connector = DBConnector(**env_vars)
## start the connection 
connector._connect()
## close the connection 
#connector._close()
## getting all the Material nodes
Material.nodes.all()

Loaded environment variables
Connected to the database


[<Material: {'uuid': 'M99', 'name': 'K', 'quantity': 100, 'unit': 'ton', 'cost': 1000000000, 'description': 'Material 99 - is pure Silver', 'id': 6}>,
 <Material: {'uuid': 'M98', 'name': 'Ur', 'quantity': 999, 'unit': 'ton', 'cost': 99999999999, 'description': "Material 98 - is pure Uranium, used in Max's Laboratory", 'id': 7}>]

In [4]:
hg_dict = {"uuid": "M99",
                 "name": "Hg",
                 "quantity": 100, "unit": "ton",
                 "cost": 1000000000,
                 "description":"Material 99 - is pure Silver"} ## the dictionary for the first material

hg,status = add_material(hg_dict)


ur_dict = {"uuid": "M98",
                 "name": "Ur",
                 "quantity": 999, "unit": "ton",
                 "cost": 99999999999,
                 "description":"Material 98 - is pure Uranium, used in Max's Laboratory"} ## the dictionary for the first material

ur,status2 = add_material(ur_dict)

print(Material.nodes.all(),end='\n')
clear_db()

Material: M99-Hg added
Material: M98-Ur added
[<Material: {'uuid': 'M99', 'name': 'K', 'quantity': 100, 'unit': 'ton', 'cost': 1000000000, 'description': 'Material 99 - is pure Silver', 'id': 6}>, <Material: {'uuid': 'M98', 'name': 'Ur', 'quantity': 999, 'unit': 'ton', 'cost': 99999999999, 'description': "Material 98 - is pure Uranium, used in Max's Laboratory", 'id': 7}>, <Material: {'uuid': 'M99', 'name': 'Hg', 'quantity': 100, 'unit': 'ton', 'cost': 1000000000, 'description': 'Material 99 - is pure Silver', 'id': 9}>, <Material: {'uuid': 'M98', 'name': 'Ur', 'quantity': 999, 'unit': 'ton', 'cost': 99999999999, 'description': "Material 98 - is pure Uranium, used in Max's Laboratory", 'id': 10}>]
Database cleared


([], [])

In [5]:
clear_db()
## Silver
hg,status = add_material(hg_dict)
## Uranium 
ur,status2 = add_material(ur_dict)
## create a relationship 
rel_status1 = add_material_relationship(hg, ur) ## hg --> ur 
rel_status2 = add_material_relationship(ur, hg) ## ur --> hg 

## update the material name 
old_uuid = "M99"
new_name = 'K'
upname1 = update_material_name(material_id=old_uuid, new_name=new_name)

Database cleared
Material: M99-Hg added
Material: M98-Ur added
Added relationship between M99 and M98
Added relationship between M98 and M99
Updated Material: M99 with new name: K


In [6]:
## add processes 
process_dict = {"uuid": "P1",
                "name": "Process 1",
                "materials_used": "M99",
                "output_of_process": "M98",
                "description": "Process 1 - is used to produce K",
                "total_input_cost": 1000000000,
                "total_output_cost": -99999
                }

p1 = Process(**process_dict).save()

In [7]:
## connect a material as input for the process
p1.input_material.connect(hg)

True

In [8]:
## connect the process as output for a material
p1.output_material.connect(ur)

True

In [9]:
## add connection of material hg to process p1
hg.associated_processes.connect(p1)

True

In [29]:
## add the dataframe to the database 

## replace the P-001 with P1
to_use['Processes'] = list(map(lambda x: f'P{int(x.split("-")[1])}',to_use['Processes']))
to_use['Material'] = list(map(lambda x: f"M{x}",to_use['Material']))



Unnamed: 0,Processes,Material,Consumption
0,P1,M1,-73
60,P1,M3,59
120,P1,M5,48
330,P1,M12,-60
420,P1,M15,-26
...,...,...,...
329,P30,M11,-4
359,P30,M12,43
479,P30,M16,-98
569,P30,M19,33


In [39]:
clear_db()

Database cleared


([], [])

In [50]:
## first need to create all the materials
def material_payload(uuid:str, name:str, quantity:int, unit:str=None, cost:int=None, description:str=None):
    return {"uuid": uuid,
            "name": name+"__",
            "quantity": quantity,
            "unit": unit,
            "cost": cost,
            "description": description}
    
## create the process payload 
def process_payload(uuid:str, name:str, output_materials:str, input_materials:str, description:str=None, total_input_cost:int=None, total_output_cost:int=None):
    return {"uuid": uuid,
            "name": name,
            "output_materials": output_materials,
            "input_materials": input_materials,
            "description": description,
            "total_input_cost": total_input_cost,
            "total_output_cost": total_output_cost}

In [76]:
clear_db()

Database cleared


([], [])

In [79]:
from db.db_helpers import add_process

In [69]:
## iterate over the dataframe and create the materials and processes 
for row in to_use.iterrows():
    ## get the row information 
    process = row[1]['Processes']
    material = row[1]['Material']
    consumption = row[1]['Consumption']
    ## now create the material
    mat_payload = material_payload(uuid=material, name=material, quantity=consumption)
    ## add the material to the database
    #mat = Material.get_or_create(**mat_payload)
    mat = add_material(mat_payload, verbose=False)
    

In [77]:
tmp3.head()

Unnamed: 0,Processes,Inputs,InputCost,Outputs,OutputCost,TotalOutputCost,TotalInputCost
0,P1,"[M3, M5]","[59, 48]","[M1, M12, M15, M18, M19]","[-73, -60, -26, -9, -15]",-183,107
10,P10,"[M10, M15, M2, M3, M8]","[71, 64, 22, 15, 64]","[M12, M18, M20, M6]","[-87, -16, -70, -15]",-188,236
30,P11,"[M1, M11, M16, M19, M20, M3, M8]","[63, 79, 82, 26, 42, 98, 92]","[M10, M15, M18, M4, M6]","[-31, -93, -13, -69, -43]",-249,482
65,P12,"[M11, M17, M18, M4, M7]","[23, 51, 85, 71, 42]","[M1, M15, M16, M3, M6]","[-13, -79, -47, -9, -23]",-171,272
90,P13,"[M10, M12, M15, M5, M6, M8, M9]","[49, 66, 52, 54, 67, 39, 83]","[M1, M16]","[-80, -59]",-139,410


In [None]:
## first need to create all the materials
def material_payload(uuid:str, name:str, quantity:int, unit:str=None, cost:int=None, description:str=None):
    return {"uuid": uuid,
            "name": name+"__",
            "quantity": quantity,
            "unit": unit,
            "cost": cost,
            "description": description}
    
## create the process payload 
def process_payload(uuid:str, name:str, output_materials:str, input_materials:str, description:str=None, total_input_cost:int=None, total_output_cost:int=None):
    return {"uuid": uuid,
            "name": name,
            "output_materials": output_materials,
            "input_materials": input_materials,
            "description": description,
            "total_input_cost": total_input_cost,
            "total_output_cost": total_output_cost}

In [78]:
tmp3.head()

Unnamed: 0,Processes,Inputs,InputCost,Outputs,OutputCost,TotalOutputCost,TotalInputCost
0,P1,"[M3, M5]","[59, 48]","[M1, M12, M15, M18, M19]","[-73, -60, -26, -9, -15]",-183,107
10,P10,"[M10, M15, M2, M3, M8]","[71, 64, 22, 15, 64]","[M12, M18, M20, M6]","[-87, -16, -70, -15]",-188,236
30,P11,"[M1, M11, M16, M19, M20, M3, M8]","[63, 79, 82, 26, 42, 98, 92]","[M10, M15, M18, M4, M6]","[-31, -93, -13, -69, -43]",-249,482
65,P12,"[M11, M17, M18, M4, M7]","[23, 51, 85, 71, 42]","[M1, M15, M16, M3, M6]","[-13, -79, -47, -9, -23]",-171,272
90,P13,"[M10, M12, M15, M5, M6, M8, M9]","[49, 66, 52, 54, 67, 39, 83]","[M1, M16]","[-80, -59]",-139,410


In [48]:
for row in tmp3.iterrows():
    ## get the data of the row 
    row=row[1]
    processes = row['Processes']
    inputs = row['Inputs'] ## list 
    inputcosts = row['InputCost'] #list 
    outputs = row['Outputs'] #list 
    outputcosts = row['OutputCost'] #list 
    totalinputcost = row['TotalInputCost'] #int
    totaloutputcost = row['TotalOutputCost'] #int
    ## create the process payload 
    process_payload = process_payload(uuid=processes, 
                                      name=processes, 
                                      total_input_cost=totalinputcost, 
                                      total_output_cost=totaloutputcost)
    ## add the process to the database
    process = add_process(process_payload, verbose=False)
    ## 
    
    

Processes       P1
Material        M1
Consumption    -73
Name: 0, dtype: object

In [None]:
## now add the processes with the corresponidng materials 

In [None]:
#driver = GraphDatabase.driver(URL, auth=(username, password))
# neo_url = "http://localhost:7474"
# driver = GraphDatabase.driver(neo_url, auth=(username, password))

# with driver.session() as sess:
    
    
    

In [None]:
# from neo4j import GraphDatabase

# driver = GraphDatabase.driver(URL, auth=(username, password))

# def create_friend_of(tx, name, friend):
#     tx.run("MATCH (a:Person) WHERE a.name = $name "
#            "CREATE (a)-[:KNOWS]->(:Person {name: $friend})",
#            name=name, friend=friend)

# with driver.session() as session:
#     session.write_transaction(create_friend_of, "Alice", "Bob")

# with driver.session() as session:
#     session.write_transaction(create_friend_of, "Alice", "Carl")

# driver.close()