## Setting the Database

In [1]:
## autoreload 
%load_ext autoreload
%autoreload 2

## defining the paths: 
PATH = "./data/clean_data.csv"
SAVE_DIR = "./data/final_df_for_db.csv"

### Loading DataFrame

In [2]:
## loading helpers 
from utils.helpers import prepare_dataframe

to_use = prepare_dataframe(path = PATH, save_dir = SAVE_DIR)

### Start the Database

Essentially doing this 

1) Database 
2) Connection to DB with action 
3) Ask queries to the system

In [7]:
# ## importing the connectors 
# from db.connector import DBConnector
# ## load the environment variables
from db.graphdb import DBConnector

## import the materials 
from db.Materials import (Material,populate_materials_from_df,
                          add_material,update_material_name,
                          add_material_relationship)

## import the processes 
from db.Processes import (Process,populate_process_from_df)

In [9]:
connector = DBConnector() ## instantiate the object and load the environment variables
connector._connect() ## connect to the database
print(connector._get_status()) ## to check that we are connected to neo4j

Loaded environment variables
Connected to the database
{IPv4Address(('bad3976d.databases.neo4j.io', 7687)): [{'servers': [{'addresses': ['bad3976d.databases.neo4j.io:7687'], 'role': 'WRITE'}, {'addresses': ['bad3976d.databases.neo4j.io:7687'], 'role': 'READ'}, {'addresses': ['bad3976d.databases.neo4j.io:7687'], 'role': 'ROUTE'}], 'ttl': 10}]}


### Testing the db 

#### Adding Materials

In [None]:
hg_dict = {"uuid": "M99",
                 "name": "Hg",
                 "quantity": 100, "unit": "ton",
                 "cost": 1000000000,
                 "description":"Material 99 - is pure Silver"} ## the dictionary for the first material

hg,status = add_material(hg_dict,verbose=True)


ur_dict = {"uuid": "M98",
                 "name": "Ur",
                 "quantity": 999, "unit": "ton",
                 "cost": 99999999999,
                 "description":"Material 98 - is pure Uranium, used in Max's Laboratory"} ## the dictionary for the first material

ur,status2 = add_material(ur_dict,verbose=True)

print(Material.nodes.all(),end='\n')
connector._clear_database() ## clear the database

#### Adding Materials & Relationships

In [None]:
#connector._clear_database()
## Silver
hg,status = add_material(hg_dict,verbose=False)
## Uranium 
ur,status2 = add_material(ur_dict,verbose=False)
## create a relationship 
rel_status1 = add_material_relationship(hg, ur) ## hg --> ur 
rel_status2 = add_material_relationship(ur, hg) ## ur --> hg 

## update the material name 
old_uuid = "M99"
new_name = 'K'
upname1 = update_material_name(material_id=old_uuid, new_name=new_name)

### Populating the db

In [None]:
clear_db()
populate_materials_from_df(df=to_use, verbose=True)

In [None]:
## populate the processes
populate_process_from_df(df=to_use, verbose=True)

In [None]:
Material.nodes.all()

In [None]:
Process.nodes.all()

### Queries: Materials, Processes & Sub-graphs

In [11]:
## class for managing the queries to the database 
from db.Query import Query
from neomodel import db

In [12]:
## instantiate the object 
Q = Query(db) ## pass in the abc class for the database 

#### Querying Materials

In [13]:
## make a material query 
Q.material_query("M1")
print(Q.run_query()[0][0]) ## returns Node object with the ids, and properties/attributes of the node (Material)

[<Node id=0 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>]


#### Querying Processes

In [14]:
## make a process query
Q.process_query("P1")
print(Q.run_query()) ## returns Node object with the ids, and properties/attributes of the node (Process)

([[<Node id=20 labels=frozenset({'Process'}) properties={'name': 'P1__XX', 'uuid': 'P1'}>]], ['n'])


#### Querying Subgraphs

In [15]:
## make a subgraph query
## returns the connected nodes of the given node, i.e. the processes associated to a material or the materials associated to a process 
Q.subgraph_query("M1","Material")
print(Q.run_query())

([[<Relationship id=287 nodes=(<Node id=49 labels=frozenset({'Process'}) properties={'name': 'P9__XX', 'uuid': 'P9'}>, <Node id=0 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>) type='OUTPUT_MATERIAL' properties={}>, <Node id=0 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>, <Node id=49 labels=frozenset({'Process'}) properties={'name': 'P9__XX', 'uuid': 'P9'}>], [<Relationship id=277 nodes=(<Node id=48 labels=frozenset({'Process'}) properties={'name': 'P8__XX', 'uuid': 'P8'}>, <Node id=0 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>) type='OUTPUT_MATERIAL' properties={}>, <Node id=0 labels=frozenset({'Material'}) properties={'quantity': -73, 'name': 'M1__', 'uuid': 'M1'}>, <Node id=48 labels=frozenset({'Process'}) properties={'name': 'P8__XX', 'uuid': 'P8'}>], [<Relationship id=269 nodes=(<Node id=47 labels=frozenset({'Process'}) properties={'name': 'P7__

### Parsing the subgraph Query into human-readable form

In [17]:
from utils.helpers import (parse_subgraph, parsed_subgraph_to_df)

In [18]:
subgraph_mat = Q.run_query()
data,rels = parse_subgraph(subgraph_mat)
df = parsed_subgraph_to_df(data,verbose=True)

[32mINPUT_MATERIAL [0m for Process P26; M1 --> P26
[32mINPUT_MATERIAL [0m for Process P15; M1 --> P15
[32mINPUT_MATERIAL [0m for Process P13; M1 --> P13
[32mINPUT_MATERIAL [0m for Process P12; M1 --> P12
[32mINPUT_MATERIAL [0m for Process P1; M1 --> P1
[31mOUTPUT_MATERIAL[0m for Process P9; P9 --> M1
[31mOUTPUT_MATERIAL[0m for Process P8; P8 --> M1
[31mOUTPUT_MATERIAL[0m for Process P7; P7 --> M1
[31mOUTPUT_MATERIAL[0m for Process P28; P28 --> M1
[31mOUTPUT_MATERIAL[0m for Process P24; P24 --> M1
[31mOUTPUT_MATERIAL[0m for Process P21; P21 --> M1
[31mOUTPUT_MATERIAL[0m for Process P20; P20 --> M1
[31mOUTPUT_MATERIAL[0m for Process P19; P19 --> M1
[31mOUTPUT_MATERIAL[0m for Process P18; P18 --> M1
[31mOUTPUT_MATERIAL[0m for Process P11; P11 --> M1


In [19]:
## look at the dataframe 
df.head(2)

Unnamed: 0,Relationship,From,To
4,INPUT_MATERIAL,M1,P26
10,INPUT_MATERIAL,M1,P15
