In [1]:
%load_ext autoreload
%autoreload 2

import sys
import os
import inspect
import getpass
import json

import pandas as pd
import matplotlib.pyplot as plt


project_path = os.path.abspath( os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) + "/../" )
sys.path.insert(0, project_path)
print(project_path)

from database_storage.helper import *

from database_storage.database.database import *
from database_storage.main import *

from database_storage.objects.human import *
from database_storage.objects.issue import *
from database_storage.objects.kpi import *
from database_storage.objects.machine import *
from database_storage.objects.tag import *
from database_storage.objects.maintenanceworkorder import *


/Users/saschamoccozet/Documents/Git/Nestor


# Setup Dataset informations

foreach Dataset get all the specific information such as:

    user : Identification to connect to the database -STRING
    password : Password to connect to the database -STRING
    
    csv_file : Path of the raw dataset (the readable one created by Nestor) -CSV
    csvSchemas_file : Path of the file use to link the header of the CSV with the properties in the database -YAML
    
    bincsv_file : Path of the 1GramTag Binnary csv (created by Nestor) -CSV
    binNcsv_file : Path of the NGramTag Binnary csv (created by Nestor) -CSV
    
    vocabcsv_file : Path of the vocabulary 1Gram file (created by Nestor) -CSV
    vocabNcsv_file : Path of the vocabulary NGram file (created by Nestor) -CSV
    
    tree_path : Path of the item-item hierarchy (created by <insert a name here>) -JSON

### Greystone

In [None]:
user = "neo4j"
password = 'GREYSTONE!!'

csv_file = os.path.join(project_path ,'data', 'gs_data', 'readable.csv')
csvSchemas_file = os.path.join(project_path , 'data',  'gs_data', 'gs_header.yaml')


bincsv_file = os.path.join(project_path ,'data', 'gs_data', 'bin_1g.csv')
binNcsv_file = os.path.join(project_path ,'data', 'gs_data', 'bin_Ng.csv')


### Mine

In [10]:
user = "neo4j"
password = 'MINE!!'

csv_file = os.path.join(project_path ,'data', 'mine_data', 'readable.csv')
csvSchemas_file = os.path.join(project_path , 'data',  'mine_data', 'mine_header.yaml')


bincsv_file = os.path.join(project_path ,'data', 'mine_data', 'bin_1g.csv')
binNcsv_file = os.path.join(project_path ,'data', 'mine_data', 'bin_Ng.csv')

vocabcsv_file = os.path.join(project_path ,'data', 'mine_data', 'vocab_1g.csv')
vocabNcsv_file = os.path.join(project_path ,'data', 'mine_data', 'vocab_2g.csv')


In [11]:
tree_path = os.path.join(project_path ,'data', 'mine_data', 'tree_test.json')

### HVAC

In [None]:
user = "neo4j"
password = 'HVAC!!'

csv_file = os.path.join(project_path ,'data', 'hvac_data', 'readable.csv')
csvSchemas_file = os.path.join(project_path , 'data',  'hvac_data', 'hvac_header.yaml')


bincsv_file = os.path.join(project_path ,'data', 'hvac_data', 'bin_1g.csv')
binNcsv_file = os.path.join(project_path ,'data', 'hvac_data', 'bin_Ng.csv')


### Others

In [None]:
user = getpass.getuser()
password = getpass.getpass("Password:")

csv_file = os.path.join(project_path ,'data', 'hvac_data', 'hvac_raw.csv')
csvSchemas_file = os.path.join(project_path , 'data',  'hvac_data', 'hvac_header.yaml')

# Load Files from Path

foreach path file, load them into the memory using the needed format:
    
    Dataframe for : csv_file, bincsv_file, binNcsv_file, vocabcsv_file, vocabNcsv_file
   
    Dictionary for : csvSchemas_file, tree_path

In [12]:
dataframe = pd.read_csv(csv_file, index_col=0, header=0)
dataframe.fillna("", inplace=True)


dataframe_bincsv = pd.read_csv(bincsv_file, index_col=0, header=[0,1])
dataframe_binNcsv = pd.read_csv(binNcsv_file, index_col=0, header=[0,1])

dataframe_vocab1g = pd.read_csv(vocabcsv_file, index_col=0)
dataframe_vocabNg = pd.read_csv(vocabNcsv_file, index_col=0)

csvSchemas_dict = openYAMLFile(csvSchemas_file)

yaml file open


In [13]:
with open(tree_path) as file:
    tree_dict = json.load(file)

# ConnectGraphDatabase

In [15]:
databaseSchema_file = os.path.join(project_path ,'database_storage','database', 'DatabaseSchema.yaml')
databaseSchema_dict = openYAMLFile(databaseSchema_file)
database = DatabaseNeo4J("bolt://127.0.0.1:7687", user, password, databaseSchema_dict)

yaml file open


## Clean the database

In [None]:
database.deleteData()

database.dropConstraints()
database.dropIndexes()

database.createIndexes()
database.createConstraints()

## Create Cypher Queries

### Create historical_data

In [None]:
database.runQueries(cypherCreate_historicalMaintenanceWorkOrder (database, dataframe, csvSchemas_dict))

### Create tag_data

In [None]:
database.runQueries(cypherCreate_tag(database, dataframe_bincsv, dataframe_vocab1g, dataframe_vocabNg, True))

In [None]:
database.runQueries(cypherCreate_tag(database, dataframe_binNcsv, dataframe_vocab1g, dataframe_vocabNg, True))

### Update 1gram-->Ngram

In [None]:
database.runQueries(cypherLink_Ngram1gram(database))

### Update issue-->item

In [None]:
database.runQueries(cypherLink_itemIssue(database))

### Update item-->item

In [None]:
database.runQueries(cypherCreate_itemsTree(database,tree_dict ))

------------------

In [16]:
cypherCreate_itemsTree(database, tree_dict )


MATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"oil"})
MATCH (child:TAG:ONE_GRAM:ITEM{keyword:"engine"})
MERGE (parent)-[:PARENT_OF]->(child)

MATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"oil"})
MATCH (child:TAG:ONE_GRAM:ITEM{keyword:"hose"})
MERGE (parent)-[:PARENT_OF{approved:True}]->(child)

MATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"oil"})
MATCH (child:TAG:ONE_GRAM:ITEM{keyword:"tooth"})
MERGE (parent)-[:PARENT_OF]->(child)

MATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"oil"})
MATCH (child:TAG:ONE_GRAM:ITEM{keyword:"pump"})
MERGE (parent)-[:PARENT_OF]->(child)

MATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"valve"})
MATCH (child:TAG:ONE_GRAM:ITEM{keyword:"oil"})
MERGE (parent)-[:PARENT_OF]->(child)

MATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"bucket"})
MATCH (child:TAG:ONE_GRAM:ITEM{keyword:"lube"})
MERGE (parent)-[:PARENT_OF]->(child)

MATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"bucket"})
MATCH (child:TAG:ONE_GRAM:ITEM{keyword:"line"})
MERGE (parent)-[:PARENT_OF{approved:True}]->(child)

MATCH (parent:TAG:ONE

['\nMATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"hyd"})\nMATCH (child:TAG:ONE_GRAM:ITEM{keyword:"valve"})\nMERGE (parent)-[:PARENT_OF{approved:True}]->(child)',
 '\nMATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"valve"})\nMATCH (child:TAG:ONE_GRAM:ITEM{keyword:"oil"})\nMERGE (parent)-[:PARENT_OF]->(child)',
 '\nMATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"oil"})\nMATCH (child:TAG:ONE_GRAM:ITEM{keyword:"engine"})\nMERGE (parent)-[:PARENT_OF]->(child)',
 '\nMATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"oil"})\nMATCH (child:TAG:ONE_GRAM:ITEM{keyword:"hose"})\nMERGE (parent)-[:PARENT_OF{approved:True}]->(child)',
 '\nMATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"oil"})\nMATCH (child:TAG:ONE_GRAM:ITEM{keyword:"tooth"})\nMERGE (parent)-[:PARENT_OF]->(child)',
 '\nMATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"oil"})\nMATCH (child:TAG:ONE_GRAM:ITEM{keyword:"pump"})\nMERGE (parent)-[:PARENT_OF]->(child)',
 '\nMATCH (parent:TAG:ONE_GRAM:ITEM{keyword:"valve"})\nMATCH (child:TAG:ONE_GRAM:ITEM{keyword:"bucket"})\nMERGE (parent