## TP3 - Neo4j

TAFARO Ugo

Pour lancer le TP, il faut d'abord lancer le serveur neo4j. 
Faire "run all" pour initialiser le notebook

In [36]:
from neo4j import GraphDatabase

### Exercice 1

**Connexion à la base de données neo4j**

In [37]:
# Database Credentials

uri             = "bolt://localhost:7687"
userName        = "neo4j"
password        = "universite"

# Connect to the neo4j database server
graphDB_Driver  = GraphDatabase.driver(uri, auth=(userName, password))

**Fonctions utiles**

In [38]:
def run_graphdb(query):
    """
    Run a query on the graph database and return the result
    
    """
    with graphDB_Driver.session() as graphDB_Session:
        res =  graphDB_Session.run(query)
        return list(res)
        
def clear_db(node=""):
    """
    Delete all the nodes or one node in the database
    """
    if node == "":
        req = f"MATCH (n) DETACH DELETE n"
    else:
        req = f"MATCH (n:{node}) DETACH DELETE n"
    with graphDB_Driver.session() as graphDB_Session:
            graphDB_Session.run(req)

In [39]:
#Create the database
cqlCreate = """
    CREATE (:Acteur {nom: "Tom Hanks"})
    CREATE (:Acteur {nom: "Julia Roberts"})
    CREATE (:Acteur {nom: "Brad Pitt"})

    
    CREATE (:Film {titre: "Forrest Gump", année: 1994})
    CREATE (:Film {titre: "Pretty Woman", année: 1990})
    CREATE (:Film {titre: "Fight Club", année: 1999})

    
    
    CREATE (a)-[:A_JOUE_DANS]->(f)

    
    CREATE (a)-[:A_JOUE_DANS]->(f)
    CREATE (a)-[:A_JOUE_DANS]->(f)
    CREATE (a1)-[:AMI]->(a2)
    CREATE (a1)-[:A_COLLABORE_AVEC]->(a2)

"""



**Récupérer tous les acteurs**

In [40]:
# Recup all the movies in the database
cqlNodeQuery          = "MATCH (f:Film) RETURN f"

clear_db("Film")
run_graphdb(cqlCreate)
nodes = run_graphdb(cqlNodeQuery)
print("List of movies present in the graph  ")
for node in nodes:

    print(node)

List of movies present in the graph  
<Record f=<Node element_id='4:ee2b87e8-5438-4cb0-bb01-4a64a65a7411:2436' labels=frozenset({'Film'}) properties={'titre': 'Forrest Gump', 'année': 1994}>>
<Record f=<Node element_id='4:ee2b87e8-5438-4cb0-bb01-4a64a65a7411:2437' labels=frozenset({'Film'}) properties={'titre': 'Pretty Woman', 'année': 1990}>>
<Record f=<Node element_id='4:ee2b87e8-5438-4cb0-bb01-4a64a65a7411:2438' labels=frozenset({'Film'}) properties={'titre': 'Fight Club', 'année': 1999}>>


**Récupérer tous les acteurs**

In [41]:
# Retrieve all actors

cqlNodeQuery = "MATCH (a:Acteur) RETURN a"
nodes = run_graphdb(cqlNodeQuery)
print("List of actors present in the graph:")
for node in nodes:
    print(node)


List of actors present in the graph:
<Record a=<Node element_id='4:ee2b87e8-5438-4cb0-bb01-4a64a65a7411:2415' labels=frozenset({'Acteur'}) properties={'nom': 'Tom Hanks'}>>
<Record a=<Node element_id='4:ee2b87e8-5438-4cb0-bb01-4a64a65a7411:2434' labels=frozenset({'Acteur'}) properties={'nom': 'Julia Roberts'}>>
<Record a=<Node element_id='4:ee2b87e8-5438-4cb0-bb01-4a64a65a7411:2435' labels=frozenset({'Acteur'}) properties={'nom': 'Brad Pitt'}>>
<Record a=<Node element_id='4:ee2b87e8-5438-4cb0-bb01-4a64a65a7411:2667' labels=frozenset({'Acteur'}) properties={'nom': 'Tom Hanks'}>>
<Record a=<Node element_id='4:ee2b87e8-5438-4cb0-bb01-4a64a65a7411:2668' labels=frozenset({'Acteur'}) properties={'nom': 'Julia Roberts'}>>
<Record a=<Node element_id='4:ee2b87e8-5438-4cb0-bb01-4a64a65a7411:2669' labels=frozenset({'Acteur'}) properties={'nom': 'Brad Pitt'}>>


### Exercice 2

**Charger le dataset**

In [42]:
import pandas as pd

df = pd.read_csv('communes-departement-region.csv')

**On supprime les valeurs null**

In [43]:
df.dropna(subset=['nom_region'], inplace=True)



**Création des noeuds régions**

In [44]:
import re

clear_db("Region")
regions = df['nom_region'].unique()
departements = df['nom_departement'].unique()

for region in regions:
    if type(region) == str:
        region = re.sub(r"\'", "", region) # Remove the apostrophe in the region name
        cqlCreate = f"CREATE (:Region {{nom: '{region}'}})"
        run_graphdb(cqlCreate)
        
            




**Création noeud département**

In [45]:

clear_db("Departement")
for departement in departements:
    try: 
        if type(departement) == str:
            region = df[df['nom_departement'] == departement]['nom_region'].values[0]
            region = re.sub(r"\'", "", region) # Remove the apostrophe in the departement name
            cqlCreate = f"CREATE (:Departement {{nom: '{departement}'}})"
            cqlLink = f"MATCH (r:Region {{nom: '{region}'}}) \
                        MATCH (d:Departement {{nom: '{departement}'}}) \
                        CREATE (d)-[:EST_DANS]->(r)"
            
            run_graphdb(cqlCreate)
            run_graphdb(cqlLink)
            
    except Exception:
        pass

**Création noeud communes**

In [46]:
clear_db('Commune')
communes = df["nom_commune"].sample(n=3000) # We take a sample of 3000 communes in the dataframe

for commune in communes:
    
    departement = df[df['nom_commune'] == commune]['nom_departement'].values[0]
    code_postal = df[df['nom_commune'] == commune]['code_postal'].values[0]
    longitude = df[df['nom_commune'] == commune]['longitude'].values[0]
    latitude = df[df['nom_commune'] == commune]['latitude'].values[0]
    code_commune = df[df['nom_commune'] == commune]['code_commune'].values[0]
    cqlCreate = f'CREATE (:Commune {{nom: "{commune}", code_postal : {code_postal}, longitude: {longitude}, latitude: {latitude}, code_commune: {code_commune}}})'
    cqlLink = f'MATCH (d:Departement {{nom: "{departement}"}}) \
                        MATCH (c:Commune {{nom: "{commune}"}}) \
                        CREATE (d)-[:HAS_MUNICIPALITY]->(c)'
    
    run_graphdb(cqlCreate)
    run_graphdb(cqlLink)
   
