In [36]:
import numpy as np
import pandas as pd
from neo4j import GraphDatabase
from datetime import datetime

driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "neo4j123456"))



In [37]:
# insert account data into neo4j
# account = pd.read_csv('./data/account.csv')
# def create_account(tx):
#     for line in account.values.tolist():
#         date = datetime.strptime(str(line[3]), '%y%m%d' )
#         date = str(date).replace(" ", "T")
#         tx.run("CREATE (ac: account {{account_id : {}, district_id : {},  frequency : \"{}\", date: datetime(\"{}\") }})".format(line[0], line[1], line[2], date))

In [38]:
# with driver.session() as session:
#     session.write_transaction(create_account)

In [39]:
# insert client data into neo4j
# client = pd.read_csv('./data/client.csv')
# def create_client(tx):
#     for line in client.values.tolist():
#         # process the birth_number with the rules:
#         # the number is in the form YYMMDD for men,
#         # the number is in the form YYMM+50DD for women
#         birth = str(line[1])
#         gender = 'M'
#         month = birth[2:4]
#         ### if the client is female
#         if int(month) > 50 :
#             gender = 'F'
#             month = str(int(month) - 50)
#             month = month.zfill(2)
#         #print(birth)
#         birth = "".join((birth[:2], month, birth[4:]))
#         birth = "19" + birth
#         #print(birth)
#         tx.run("CREATE (cl: client {{client_id: {}, birthdate: date(\"{}\"), gender: \"{}\", district_id: {} }})".format(line[0], birth, gender, line[2]))
# 
# # with driver.session() as session:
# #     session.write_transaction(create_client)  

In [40]:
def create_trans(tx):
    tx.run("LOAD CSV WITH HEADERS FROM \"file:///transaction.csv\" AS line create (p:trans {trans_id:line.trans_id, date:line.date, type:line.type, amount: line.amount, balance: line.balance})")

with driver.session() as session:
    session.write_transaction(create_trans)      
    

In [41]:
def create_account(tx):
    tx.run("LOAD CSV WITH HEADERS FROM \"file:///account.csv\" AS line create (ac:account {account_id:line.account_id, date:line.date, frequency:line.frequency})")

with driver.session() as session:
    session.write_transaction(create_account)   

In [42]:
def create_client(tx):
    tx.run("LOAD CSV WITH HEADERS FROM \"file:///client.csv\" AS line create (cl:client {client_id:line.client_id, birthdate:line.birth_number, gender:line.gender, age:line.age})")

with driver.session() as session:
    session.write_transaction(create_client)   


In [43]:
def create_district(tx):
    tx.run("LOAD CSV WITH HEADERS FROM \"file:///district.csv\" AS line create (dist:district {district_id:line.district_id, district_name:line.district_name, no_of_inhabitants:line.no_of_inhabitants, A5: line.A5, A6:line.A6, A7:line.A7, A8:line.A8, no_of_cities: line.no_of_cities, ratio_of_urban_inhabitants:line.ratio_of_urban_inhabitants, average_salary:line.average_salary, unemploymant_rate_95:line.unemploymant_rate_95, unemploymant_rate_96:line.unemploymant_rate_96, no_of_enterpreneurs_per1000inhabitants:line.no_of_enterpreneurs_per1000inhabitants, no_of_commited_crimes95:line.no_of_commited_crimes95, no_of_commited_crimes96:line.no_of_commited_crimes96})")

with driver.session() as session:
    session.write_transaction(create_district)   

In [44]:
def create_region(tx):
    tx.run("LOAD CSV WITH HEADERS FROM \"file:///district.csv\" AS line MERGE (reg:region {region:line.region})")

with driver.session() as session:
    session.write_transaction(create_region)   

In [45]:
def create_loan(tx):
    tx.run("LOAD CSV WITH HEADERS FROM \"file:///loan.csv\" AS line create (loan:loan {loan_id:line.loan_id, date:line.date, amount:line.amount, duration:line.duration, payments: line.payments, status:line.status})")

with driver.session() as session:
    session.write_transaction(create_loan)   
    

### create index on those properties which we need to match or merge
##### CREATE INDEX ON :account(account_id)
##### CREATE INDEX ON :district(district_id)
##### CREATE INDEX ON :client(client_id)
##### CREATE INDEX ON :trans(trans_id)
##### CREATE INDEX ON :loan(loan_id)

In [46]:
def create_index_account_id(tx):
    tx.run("CREATE INDEX ON :account(account_id)")
with driver.session() as session:
    session.write_transaction(create_index_account_id)  

In [47]:
def create_index_dist_id(tx):
    tx.run("CREATE INDEX ON :district(district_id)")
with driver.session() as session:
    session.write_transaction(create_index_dist_id)

In [48]:
def create_index_client_id(tx):
    tx.run("CREATE INDEX ON :client(client_id)")
with driver.session() as session:
    session.write_transaction(create_index_client_id)

In [49]:
def create_index_trans_id(tx):
    tx.run("CREATE INDEX ON :trans(trans_id)")
with driver.session() as session:
    session.write_transaction(create_index_trans_id)

In [50]:
def create_index_loan_id(tx):
    tx.run("CREATE INDEX ON :loan(loan_id)")
with driver.session() as session:
    session.write_transaction(create_index_loan_id)

### relationships
#### 1. account + district: district_id -> load account.csv file

In [51]:
def create_rel_acc_dist(tx):
    tx.run("LOAD CSV WITH HEADERS FROM \"file:///account.csv\" AS row MATCH (acc:account {account_id: row.account_id}) MATCH (dist:district {district_id: row.district_id}) MERGE (acc)-[:BelongTo]->(dist)")
with driver.session() as session:
    session.write_transaction(create_rel_acc_dist)  

### relationships
#### 2. client_id -  [:BelongTo] - district_id

In [52]:
def create_rel_cli_dist(tx):
    tx.run("LOAD CSV WITH HEADERS FROM \"file:///client.csv\" AS row MATCH (cl:client {client_id: row.client_id}) MATCH (dist:district {district_id: row.district_id}) MERGE (cl)-[:BelongTo]->(dist)")
with driver.session() as session:
    session.write_transaction(create_rel_cli_dist)  

### relationships
#### 3. account_id - [: made] - trans_id

In [53]:
def create_rel_acc_trans(tx):
    tx.run("LOAD CSV WITH HEADERS FROM \"file:///transaction.csv\" AS row MATCH (acc:account {account_id: row.account_id}) MATCH (trans:trans {trans_id: row.trans_id}) MERGE (acc)-[:made]->(trans)")
with driver.session() as session:
    session.write_transaction(create_rel_acc_trans) 

### relationships
### 4. loan_id -> account_id: account_id - [: made] - loan_id

In [54]:
def create_rel_acc_loan(tx):
    tx.run("LOAD CSV WITH HEADERS FROM \"file:///loan.csv\" AS row MATCH (acc:account {account_id: row.account_id}) MATCH (loan:loan {loan_id: row.loan_id}) MERGE (acc)-[:made]->(loan)")
with driver.session() as session:
    session.write_transaction(create_rel_acc_loan) 

### relationships
### 5. district_id -> region: district_id - [: Belongto] - region

In [55]:
def create_rel_dist_region(tx):
    tx.run("LOAD CSV WITH HEADERS FROM \"file:///district.csv\" AS row MATCH (dist:district {district_id: row.district_id}) MATCH (reg:region {region: row.region}) MERGE (dist)-[:BelongTo]->(reg)")
with driver.session() as session:
    session.write_transaction(create_rel_dist_region) 

### relationships
### 6. client_id - [: hold] - account_id (load from disp.csv)

In [56]:
def create_rel_client_acc(tx):
    tx.run("LOAD CSV WITH HEADERS FROM \"file:///disp.csv\" AS row MATCH (client:client {client_id: row.client_id}) MATCH (acc:account {account_id: row.account_id}) MERGE (client)-[rel:hold]->(acc) ON CREATE SET rel.type = row.type")
with driver.session() as session:
    session.write_transaction(create_rel_client_acc) 