In [None]:
!pip install neo4j

## Datasets

In [None]:
import pandas as pd

### `customers`

In [None]:
customers = pd.read_csv('datasets/customers.csv')

In [None]:
customers.count()

In [None]:
customers.head()

In [None]:
customers.dtypes

### `purchases`

In [None]:
purchases = pd.read_csv('datasets/purchases.csv')

In [None]:
purchases.count()

In [None]:
purchases.head()

Find out if there is duplicate transaction id

In [None]:
purchases.groupby(['TransactionID'])['TransactionID'].size().sort_values(ascending=False)

See the sample of duplicate tx ids

In [None]:
purchases[purchases['TransactionID'] == 739097]

Drop duplicate transaction id with the lowest purchase amount, put it into a new dataframe called `purchases_cleaned`

In [None]:
purchases['rank'] = purchases.groupby(['TransactionID'])['Amount'].rank('dense', ascending=False)
purchases_cleaned = purchases[purchases['rank'] == 1]
purchases_cleaned = purchases_cleaned.drop(columns=['rank'])

In [None]:
purchases_cleaned.count()

In [None]:
purchases_cleaned['PurchaseDatetime'] = pd.to_datetime(purchases_cleaned['PurchaseDatetime'], 
                                                      format='%Y-%m-%d %H:%M:%SZ')

In [None]:
purchases_cleaned.head()

In [None]:
purchases_cleaned.dtypes

### `transfers`

In [None]:
transfers = pd.read_csv('datasets/transfers.csv')

In [None]:
transfers.count()

In [None]:
transfers.head()

Find out if there is duplicate transfer ids

In [None]:
transfers.groupby(['TransactionID'])['TransactionID'].size().sort_values(ascending=False)

In [None]:
transfers[transfers['TransactionID'] == 835422]

Remove the duplicate transfer id with the lowest transaction amount and store it into a new dataframe `transfers_cleaned`

In [None]:
transfers_cleaned = transfers.drop(277)

transfers_cleaned[transfers_cleaned['TransactionID'] == 835422]

In [None]:
transfers_cleaned.count()

In [None]:
transfers_cleaned['TransferDatetime'] = pd.to_datetime(transfers_cleaned['TransferDatetime'],
                                                      format='%Y-%m-%d %H:%M:%SZ')

In [None]:
transfers_cleaned.head()

In [None]:
transfers_cleaned.dtypes

## Data Model

![title](./images/Bank_transaction_purchase.png)

## Load data into Neo4j

Update the following variables based on your configuration.

In [None]:
uri='neo4j://localhost:7687'
username='neo4j'
password='neo4jneo4j'
db='transaction2'

In [None]:
from neo4j import GraphDatabase

driver = GraphDatabase.driver(uri=uri, auth=(username,password))

In [None]:
class Neo4jConnection:
    def __init__(self, uri, user, password):
        try:
            self.__driver = GraphDatabase.driver(uri, auth=(user, password))
            print('Driver is successfully created.')
        except Exception as e:
            print(f'Failed to create the driver: {e}')
        
    def close(self):
        self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        session = None
        response = None
        try:
            session = self.__driver.session(database=db) if db is not None else self.__driver.session()
            response = list(session.run(query,parameters=parameters))
        except Exception as e:
            print(f'Query failed: {e}')
        finally:
            if session is not None:
                session.close()
        return response
        

Create connection.

In [None]:
conn = Neo4jConnection(uri, username, password)

Create the database if not exist

In [None]:
conn.query(f'CREATE DATABASE {db} IF NOT EXISTS')

Create constraints and indexes

In [None]:
conn.query('CREATE CONSTRAINT accounts IF NOT EXISTS FOR (a:Account) REQUIRE a.account_number IS UNIQUE',db=db)
conn.query('CREATE CONSTRAINT customers IF NOT EXISTS FOR (c:Customer) REQUIRE c.cif IS UNIQUE',db=db)
conn.query('CREATE CONSTRAINT accounts IF NOT EXISTS FOR (acc:Account) REQUIRE acc.cif IS UNIQUE',db=db)
conn.query('CREATE CONSTRAINT credit_cards IF NOT EXISTS FOR (cc:Credit_card) REQUIRE cc.card_number IS UNIQUE',db=db)
conn.query('CREATE CONSTRAINT merchants IF NOT EXISTS FOR (m:Merchant) REQUIRE m.name IS UNIQUE',db=db)
conn.query('CREATE CONSTRAINT countries IF NOT EXISTS FOR (co:Country) REQUIRE co.name IS UNIQUE',db=db)
conn.query('CREATE CONSTRAINT addresses IF NOT EXISTS FOR (a:Address) REQUIRE a.address_line IS UNIQUE',db=db)
conn.query('CREATE CONSTRAINT transfers IF NOT EXISTS FOR (t:Transfer) REQUIRE t.id IS UNIQUE',db=db)
conn.query('CREATE CONSTRAINT purchases IF NOT EXISTS FOR (p:Purchase) REQUIRE p.id IS UNIQUE',db=db)
conn.query('CREATE INDEX transfer_datetime IF NOT EXISTS FOR (t:Transfer) ON (t.datetime)',db=db)
conn.query('CREATE INDEX purchase_datetime IF NOT EXISTS FOR (p:Purchase) ON (p.datetime)',db=db)

### Insert country nodes

In [None]:
query = '''
UNWIND $rows AS row
MERGE(co:Country {name:row})

RETURN COUNT(*) AS total
'''

conn.query(query, parameters={'rows':customers['Country'].unique()}, db=db)

### Insert `(Customer)-[:LIVES_AT]->(Address)-[:LOCATED_IN]->(Country)`

In [None]:
query = '''
UNWIND $rows AS row
MATCH(co:Country {name:row.Country})
MERGE (c:Customer {cif:row.CIF, age:row.Age, email_address:row.EmailAddress, first_name:row.FirstName, 
                  last_name:row.LastName, phone_number:row.PhoneNumber, gender:row.Gender, 
    job_title:row.JobTitle})
MERGE (a:Address {address_line:row.Address})
MERGE(c)-[:LIVES_AT]->(a)-[:LOCATED_IN]->(co)

RETURN count(*) AS total
'''

conn.query(query, parameters={'rows':customers.to_dict('records')},db=db)

### Insert merchant nodes

In [None]:
## Create merchant nodes

query = '''
UNWIND $rows AS row
MERGE(m:Merchant {name:row})

RETURN COUNT(*) AS total
'''

conn.query(query, parameters={'rows':purchases['Merchant'].unique()},db=db)

### Create `(Customer)-[:HAS]->(Credit_card)`

In [None]:
## Create (customer)-[:HAS]->(credit_card)

query = '''
UNWIND $rows AS row
MATCH(c:Customer {cif:row.CIF})
MERGE(c)-[:HAS]->(cc:Credit_card {card_number:row.CardNumber})

RETURN COUNT(*) AS total
'''

conn.query(query, parameters={'rows':customers.to_dict('records')}, db=db)

### Create `(Credit_card)-[:BUY]->(Purchase)<-[:SELL]-(merchant)`

In [None]:
## Create (credit_card)-[:PURCHASED_FROM]->(merchant)

query = '''
UNWIND $rows AS row
MATCH(cc:Credit_card {card_number:row.CardNumber}),(m:Merchant {name:row.Merchant})
MERGE(p:Purchase {id:row.TransactionID,amount:row.Amount,datetime:row.PurchaseDatetime})
MERGE(cc)-[:BUY]->(p)<-[:SELL]-(m) 
    
RETURN COUNT(*) AS total
'''

conn.query(query, parameters={'rows':purchases_cleaned.to_dict('records')},db=db)

### add `issuer` property to `Credit_card` node

In [None]:
query = '''
UNWIND $rows AS row
MATCH(cc:Credit_card {card_number:row.CardNumber})
SET cc.issuer=row.CardIssuer

RETURN COUNT(*) AS total
'''

conn.query(query, parameters={'rows':purchases_cleaned.to_dict('records')},db=db)

### Create `(customer)-[:HAS]->(account)`

In [None]:
## Create (customer)-[:HAS]->(account)

query = '''
UNWIND $rows AS row
MATCH(c:Customer {cif:row.CIF})
MERGE(c)-[:HAS]->(a:Account {account_number:row.AccountNumber})

RETURN COUNT(*) AS total
'''

conn.query(query, parameters={'rows':customers.to_dict('records')},db=db)

### Create `(account)-[:TRANSFER_IN]->(Transfer)<-[:TRANSFER_OUT]-(Account)`

In [None]:
query = '''
UNWIND $rows AS row
MATCH(a:Account {account_number:row.SenderAccountNumber}),(b:Account {account_number:row.ReceiverAccountNumber})
MERGE(t:Transfer {id:row.TransactionID,amount:row.Amount,datetime:row.TransferDatetime})
MERGE(a)-[:TRANSFER_IN]->(t)<-[:TRANSFER_OUT]-(b)

RETURN COUNT(*) AS total
'''

conn.query(query, parameters={'rows':transfers_cleaned.to_dict('records')},db=db)

Done