## Setting the Database

In [33]:
## autoreload 
%load_ext autoreload
%autoreload 2

## defining the paths: 
PATH = "./data/clean_data.csv"
SAVE_DIR = "./data/final_df_for_db.csv"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Loading DataFrame

In [34]:
## loading helpers 
from utils.helpers import prepare_dataframe

to_use = prepare_dataframe(path = PATH, save_dir = SAVE_DIR)

### Start the Database

Essentially doing this 

1) Database 
2) Connection to DB with action 
3) Ask queries to the system

In [35]:
## importing the connectors 
from db.connector import DBConnector
## load the environment variables
from db.db_helpers import load_env_vars, clear_db

## import the materials 
from db.Materials import (Material,populate_materials_from_df,
                          add_material,update_material_name,
                          add_material_relationship)

## import the processes 
from db.Processes import (Process,populate_process_from_df)

In [36]:
## loading environment variables to connect to the database 
env_vars = load_env_vars()
## instantiate the connector 
connector = DBConnector(**env_vars)
## start the connection 
connector._connect()
## close the connection 
#connector._close()
## getting all the Material nodes
#Material.nodes.all()
clear_db()

Failed to write data to connection IPv4Address(('localhost', 7687)) (IPv4Address(('127.0.0.1', 7687)))


Loaded environment variables
Connected to the database
Database cleared


([], [])

### Testing the db 

In [37]:
hg_dict = {"uuid": "M99",
                 "name": "Hg",
                 "quantity": 100, "unit": "ton",
                 "cost": 1000000000,
                 "description":"Material 99 - is pure Silver"} ## the dictionary for the first material

hg,status = add_material(hg_dict,verbose=True)


ur_dict = {"uuid": "M98",
                 "name": "Ur",
                 "quantity": 999, "unit": "ton",
                 "cost": 99999999999,
                 "description":"Material 98 - is pure Uranium, used in Max's Laboratory"} ## the dictionary for the first material

ur,status2 = add_material(ur_dict,verbose=True)

print(Material.nodes.all(),end='\n')
#clear_db()

[32mMaterial: M99-Hg added[0m
[32mMaterial: M98-Ur added[0m
[<Material: {'uuid': 'M99', 'name': 'Hg', 'quantity': 100, 'unit': 'ton', 'cost': 1000000000, 'description': 'Material 99 - is pure Silver', 'id': 0}>, <Material: {'uuid': 'M98', 'name': 'Ur', 'quantity': 999, 'unit': 'ton', 'cost': 99999999999, 'description': "Material 98 - is pure Uranium, used in Max's Laboratory", 'id': 1}>]


In [38]:
clear_db()
## Silver
hg,status = add_material(hg_dict,verbose=False)
## Uranium 
ur,status2 = add_material(ur_dict,verbose=False)
## create a relationship 
rel_status1 = add_material_relationship(hg, ur) ## hg --> ur 
rel_status2 = add_material_relationship(ur, hg) ## ur --> hg 

## update the material name 
old_uuid = "M99"
new_name = 'K'
upname1 = update_material_name(material_id=old_uuid, new_name=new_name)

Database cleared
Added relationship between M99 and M98
Added relationship between M98 and M99
Updated Material: M99 with new name: K


### Populating the db

In [39]:
clear_db()
populate_materials_from_df(df=to_use, verbose=True)

Database cleared
[32mMaterial: M1-M1__ added[0m
[32mMaterial: M12-M12__ added[0m
[32mMaterial: M15-M15__ added[0m
[32mMaterial: M18-M18__ added[0m
[32mMaterial: M19-M19__ added[0m
[32mMaterial: M3-M3__ added[0m
[32mMaterial: M5-M5__ added[0m
[32mMaterial: M10-M10__ added[0m
[31mMaterial: M12 already exists[0m
[31mMaterial: M15 already exists[0m
[31mMaterial: M18 already exists[0m
[32mMaterial: M2-M2__ added[0m
[32mMaterial: M20-M20__ added[0m
[31mMaterial: M3 already exists[0m
[32mMaterial: M6-M6__ added[0m
[32mMaterial: M8-M8__ added[0m
[31mMaterial: M1 already exists[0m
[31mMaterial: M10 already exists[0m
[32mMaterial: M11-M11__ added[0m
[31mMaterial: M15 already exists[0m
[32mMaterial: M16-M16__ added[0m
[31mMaterial: M18 already exists[0m
[31mMaterial: M19 already exists[0m
[31mMaterial: M20 already exists[0m
[31mMaterial: M3 already exists[0m
[32mMaterial: M4-M4__ added[0m
[31mMaterial: M6 already exists[0m
[31mMaterial: M8 al

In [40]:
## populate the processes
populate_process_from_df(df=to_use, verbose=True)

[32mProcess P1 added[0m
[31mProcess P1 already exists[0m
[31mProcess P1 already exists[0m
[31mProcess P1 already exists[0m
[31mProcess P1 already exists[0m
[31mProcess P1 already exists[0m
[31mProcess P1 already exists[0m
[32mProcess P10 added[0m
[31mProcess P10 already exists[0m
[31mProcess P10 already exists[0m
[31mProcess P10 already exists[0m
[31mProcess P10 already exists[0m
[31mProcess P10 already exists[0m
[31mProcess P10 already exists[0m
[31mProcess P10 already exists[0m
[31mProcess P10 already exists[0m
[32mProcess P11 added[0m
[31mProcess P11 already exists[0m
[31mProcess P11 already exists[0m
[31mProcess P11 already exists[0m
[31mProcess P11 already exists[0m
[31mProcess P11 already exists[0m
[31mProcess P11 already exists[0m
[31mProcess P11 already exists[0m
[31mProcess P11 already exists[0m
[31mProcess P11 already exists[0m
[31mProcess P11 already exists[0m
[31mProcess P11 already exists[0m
[32mProcess P12 added[0m

In [41]:
Material.nodes.all()

[<Material: {'uuid': 'M1', 'name': 'M1__', 'quantity': -73, 'unit': None, 'cost': None, 'description': None, 'id': 4}>,
 <Material: {'uuid': 'M12', 'name': 'M12__', 'quantity': -60, 'unit': None, 'cost': None, 'description': None, 'id': 5}>,
 <Material: {'uuid': 'M15', 'name': 'M15__', 'quantity': -26, 'unit': None, 'cost': None, 'description': None, 'id': 6}>,
 <Material: {'uuid': 'M18', 'name': 'M18__', 'quantity': -9, 'unit': None, 'cost': None, 'description': None, 'id': 7}>,
 <Material: {'uuid': 'M19', 'name': 'M19__', 'quantity': -15, 'unit': None, 'cost': None, 'description': None, 'id': 8}>,
 <Material: {'uuid': 'M3', 'name': 'M3__', 'quantity': 59, 'unit': None, 'cost': None, 'description': None, 'id': 9}>,
 <Material: {'uuid': 'M5', 'name': 'M5__', 'quantity': 48, 'unit': None, 'cost': None, 'description': None, 'id': 10}>,
 <Material: {'uuid': 'M10', 'name': 'M10__', 'quantity': 71, 'unit': None, 'cost': None, 'description': None, 'id': 11}>,
 <Material: {'uuid': 'M2', 'name

In [42]:
Process.nodes.all()

[<Process: {'uuid': 'P6', 'name': 'P6__XX', 'description': None, 'total_input_cost': None, 'total_output_cost': None, 'id': 0}>,
 <Process: {'uuid': 'P7', 'name': 'P7__XX', 'description': None, 'total_input_cost': None, 'total_output_cost': None, 'id': 1}>,
 <Process: {'uuid': 'P8', 'name': 'P8__XX', 'description': None, 'total_input_cost': None, 'total_output_cost': None, 'id': 2}>,
 <Process: {'uuid': 'P9', 'name': 'P9__XX', 'description': None, 'total_input_cost': None, 'total_output_cost': None, 'id': 3}>,
 <Process: {'uuid': 'P1', 'name': 'P1__XX', 'description': None, 'total_input_cost': None, 'total_output_cost': None, 'id': 24}>,
 <Process: {'uuid': 'P10', 'name': 'P10__XX', 'description': None, 'total_input_cost': None, 'total_output_cost': None, 'id': 25}>,
 <Process: {'uuid': 'P11', 'name': 'P11__XX', 'description': None, 'total_input_cost': None, 'total_output_cost': None, 'id': 26}>,
 <Process: {'uuid': 'P12', 'name': 'P12__XX', 'description': None, 'total_input_cost': Non

### Queries

In [43]:
## class for managing the queries to the database 
from db.Query import Query
from neomodel import db


In [44]:
Q.query

'MATCH (n:Material {uuid: "M1"})-[r]-(m) RETURN r, n, m'

In [45]:
## instantiate the object 
Q = Query(db)
## make a material query 
Q.material_query("M1")
print(Q.run_query()[0][0])
## make a process query
Q.process_query("P1")
print(Q.run_query())
## make a subgraph query
Q.subgraph_query("M1","Material")
print(Q.run_query())

[<Node id=4 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>]
([[<Node id=24 labels=frozenset({'Process'}) properties={'name': 'P1__XX', 'uuid': 'P1'}>]], ['n'])
([[<Relationship id=289 nodes=(<Node id=3 labels=frozenset({'Process'}) properties={'name': 'P9__XX', 'uuid': 'P9'}>, <Node id=4 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>) type='OUTPUT_MATERIAL' properties={}>, <Node id=4 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>, <Node id=3 labels=frozenset({'Process'}) properties={'name': 'P9__XX', 'uuid': 'P9'}>], [<Relationship id=279 nodes=(<Node id=2 labels=frozenset({'Process'}) properties={'name': 'P8__XX', 'uuid': 'P8'}>, <Node id=4 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>) type='OUTPUT_MATERIAL' properties={}>, <Node id=4 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 

### Parsing the Query output

In [46]:
from utils.helpers import (parse_subgraph, parsed_subgraph_to_df)

In [47]:
subgraph_mat = Q.run_query()
data,rels = parse_subgraph(subgraph_mat)

In [48]:
df = parsed_subgraph_to_df(data,verbose=True)

[32mINPUT_MATERIAL [0m for Process P26; M1 --> P26
[32mINPUT_MATERIAL [0m for Process P15; M1 --> P15
[32mINPUT_MATERIAL [0m for Process P13; M1 --> P13
[32mINPUT_MATERIAL [0m for Process P12; M1 --> P12
[32mINPUT_MATERIAL [0m for Process P1; M1 --> P1
[31mOUTPUT_MATERIAL[0m for Process P9; P9 --> M1
[31mOUTPUT_MATERIAL[0m for Process P8; P8 --> M1
[31mOUTPUT_MATERIAL[0m for Process P7; P7 --> M1
[31mOUTPUT_MATERIAL[0m for Process P28; P28 --> M1
[31mOUTPUT_MATERIAL[0m for Process P24; P24 --> M1
[31mOUTPUT_MATERIAL[0m for Process P21; P21 --> M1
[31mOUTPUT_MATERIAL[0m for Process P20; P20 --> M1
[31mOUTPUT_MATERIAL[0m for Process P19; P19 --> M1
[31mOUTPUT_MATERIAL[0m for Process P18; P18 --> M1
[31mOUTPUT_MATERIAL[0m for Process P11; P11 --> M1


In [49]:
df

Unnamed: 0,Relationship,From,To
4,INPUT_MATERIAL,M1,P26
10,INPUT_MATERIAL,M1,P15
11,INPUT_MATERIAL,M1,P13
12,INPUT_MATERIAL,M1,P12
14,INPUT_MATERIAL,M1,P1
0,OUTPUT_MATERIAL,P9,M1
1,OUTPUT_MATERIAL,P8,M1
2,OUTPUT_MATERIAL,P7,M1
3,OUTPUT_MATERIAL,P28,M1
5,OUTPUT_MATERIAL,P24,M1


In [50]:

Q.query


'MATCH (n:Material {uuid: "M1"})-[r]-(m) RETURN r, n, m'