In [None]:
import requests
from neo4j import GraphDatabase
import networkx as nx
import matplotlib.pyplot as plt
import tkinter as tk
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import time
from typing import Optional, List, Set, Tuple
import itertools
import asyncio
import aiohttp
import plotly.graph_objects as go
import networkx as nx
from plotly.colors import sample_colorscale

%matplotlib inline

# === Configuration ===
NEO4J_URI = "neo4j+s://2ca821c3.databases.neo4j.io"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "ezlLiixUK9RhSHNdiiwwk7JY9WXb3uK6ph8GjdLYPl8"

driver = GraphDatabase.driver(
    NEO4J_URI,
    auth=(NEO4J_USER, NEO4J_PASSWORD),
    max_connection_lifetime=3600,  # 1 hour, adjust as needed
    max_connection_pool_size=20,   # adjust as needed for your workload
    keep_alive=True
)

In [3]:
# === Neo4j insertion ===
def insert_relations(tx, doi: str, citations_out: List[str], citations_in: List[str]):
    # Assurer le noeud de base
    tx.run("MERGE (p:Paper {doi: $doi})", doi=doi)

    # Références citées
    for ref in citations_out:
        tx.run(
            """
            MERGE (cited:Paper {doi: $cited_doi})
            MERGE (p:Paper {doi: $doi})
            MERGE (p)-[:CITES]->(cited)
            """,
            doi=doi, cited_doi=ref
        )

    # Entrants (papiers citant ce DOI)
    for citer in citations_in:
        tx.run(
            """
            MERGE (citer:Paper {doi: $citer_doi})
            MERGE (p:Paper {doi: $doi})
            MERGE (citer)-[:CITES]->(p)
            """,
            doi=doi, citer_doi=citer
        )

# === OpenCitations API (async) ===
async def get_citations_out(session: aiohttp.ClientSession, doi: str) -> List[str]:
    """DOIs cités par le DOI donné"""
    url = f"https://opencitations.net/index/coci/api/v1/references/{doi}"
    try:
        timeout = aiohttp.ClientTimeout(total=15)
        async with session.get(url, timeout=timeout) as r:
            r.raise_for_status()
            data = await r.json()
            return [item["cited"] for item in data if "cited" in item][:MAX_CITED]
    except Exception as e:
        print(f"[OpenCitations] Erreur (out) {doi}: {e}")
        return []

async def get_citations_in(session: aiohttp.ClientSession, doi: str) -> List[str]:
    """DOIs citant le DOI donné"""
    url = f"https://opencitations.net/index/coci/api/v1/citations/{doi}"
    try:
        timeout = aiohttp.ClientTimeout(total=15)
        async with session.get(url, timeout=timeout) as r:
            r.raise_for_status()
            data = await r.json()
            return [item["citing"] for item in data if "citing" in item][:MAX_CITING]
    except Exception as e:
        print(f"[OpenCitations] Erreur (in) {doi}: {e}")
        return []

# === Graphe récursif avec batching séquentiel, collecte OpenCitations en parallèle ===

async def build_citation_graph(seed_doi: str, depth: int = DEPTH, pause: float = PAUSE):
    import time as _time
    seen: Set[str] = set()
    frontier: List[Tuple[str, int]] = [(seed_doi, 0)]
    start_time = _time.time()

    async with aiohttp.ClientSession() as aio_session:
        with driver.session() as session:
            while frontier:
                # Prendre un batch de BATCH_SIZE DOIs à traiter
                batch = []
                next_frontier = []
                for _ in range(BATCH_SIZE):
                    if not frontier:
                        break
                    doi, level = frontier.pop(0)
                    if doi not in seen and level <= depth:
                        batch.append((doi, level))

                if not batch:
                    break

                dois_to_insert = []
                out_refs_batch = []
                in_refs_batch = []
                levels_batch = []

                # Asynchronous fetching
                sem = asyncio.Semaphore(MAX_THREADS)
                async def fetch_citations(doi, level):
                    async with sem:
                        out_refs = await get_citations_out(aio_session, doi)
                        in_refs = await get_citations_in(aio_session, doi)
                        await asyncio.sleep(pause)
                        return (doi, level, out_refs, in_refs)

                tasks = [fetch_citations(doi, level) for doi, level in batch]
                results = await asyncio.gather(*tasks, return_exceptions=True)
                for result in results:
                    if isinstance(result, Exception):
                        print(f"[OpenCitations] Erreur lors de la récupération : {result}")
                        continue
                    doi_fetched, level_fetched, out_refs, in_refs = result
                    seen.add(doi_fetched)
                    print(f"[Niveau {level_fetched}] {doi_fetched} → {len(out_refs)} cités, {len(in_refs)} citants")
                    dois_to_insert.append(doi_fetched)
                    out_refs_batch.append(out_refs)
                    in_refs_batch.append(in_refs)
                    levels_batch.append(level_fetched)
                    # Ajouter les nouveaux DOIs à la prochaine frontière
                    for ref in out_refs + in_refs:
                        if ref not in seen and level_fetched < depth:
                            next_frontier.append((ref, level_fetched + 1))

                # Insérer en batch dans une seule transaction (séquentiel)
                def batch_insert(tx):
                    for doi, citations_out, citations_in in zip(dois_to_insert, out_refs_batch, in_refs_batch):
                        insert_relations(tx, doi, citations_out, citations_in)
                session.execute_write(batch_insert)

                frontier = next_frontier

    elapsed = _time.time() - start_time
    print(f"[Graphe OpenCitations] Construction terminée. Temps total : {elapsed:.2f} secondes.")


In [6]:
PATIENT_ZERO = "10.1002/cssc.201900519"
MAX_CITED = 10
MAX_CITING = 10
DEPTH = 2
PAUSE = 1
BATCH_SIZE = 10
MAX_THREADS = 50  # For OpenCitations API concurrency

In [7]:
# === Main ===
if __name__ == "__main__":
    

    print(f"[INFO] Exploration OpenCitations profondeur : {DEPTH}")
    #Version python : asyncio.run(build_citation_graph(seed_doi=PATIENT_ZERO, depth=DEPTH))
    await build_citation_graph(seed_doi=PATIENT_ZERO, depth=DEPTH)

    # Récupération des relations depuis Neo4j
    with driver.session() as session:
        query = """
        MATCH (p:Paper)-[r:CITES]->(q:Paper)
        RETURN p.doi AS from, q.doi AS to
        """
        result = list(session.run(query))

    # Construire le graphe NetworkX
    G = nx.DiGraph()
    node_labels = {}
    edge_colors = []
    edge_list = []

    for record in result:
        f = record["from"]
        t = record["to"]
        node_labels[f] = f"Paper: {f}"  # ou remplacer par titre si dispo
        node_labels[t] = f"Paper: {t}"
        G.add_node(f)
        G.add_node(t)
        # edges
        G.add_edge(f, t)
        edge_list.append((f, t))
        edge_colors.append('green')
        G.add_edge(t, f)
        edge_list.append((t, f))
        edge_colors.append('red')

[INFO] Exploration OpenCitations profondeur : 2
[Niveau 0] 10.1002/cssc.201900519 → 10 cités, 10 citants
[OpenCitations] Erreur (out) : 200, message='Attempt to decode JSON with unexpected mimetype: text/html', url='https://api.opencitations.net/index/v1'
[OpenCitations] Erreur (in) : 200, message='Attempt to decode JSON with unexpected mimetype: text/html', url='https://api.opencitations.net/index/v1'
[Niveau 1] 10.1021/ol048619j → 10 cités, 10 citants
[Niveau 1] 10.1002/aoc.1701 → 10 cités, 10 citants
[Niveau 1] 10.1002/cctc.201701617 → 10 cités, 10 citants
[Niveau 1] 10.1002/ange.200905025 → 10 cités, 10 citants
[Niveau 1]  → 0 cités, 0 citants
[Niveau 1] 10.1021/cr020095i → 10 cités, 10 citants
[Niveau 1] 10.1021/acs.organomet.6b00478 → 10 cités, 10 citants
[Niveau 1] 10.1039/c7gc00999b → 10 cités, 10 citants
[Niveau 1] 10.1016/j.tetlet.2008.07.107 → 10 cités, 10 citants
[Niveau 1] 10.1002/chem.201603148 → 10 cités, 10 citants
[Niveau 2] 10.1021/jo0262560 → 4 cités, 10 citants
[Niv

In [22]:
# === Graph ===


#pos = nx.spring_layout(G, k=0.5, iterations=50)
pos = nx.kamada_kawai_layout(G)
"""
    •   nx.circular_layout(G) → nodes disposés en cercle
	•	nx.kamada_kawai_layout(G) → layout force-directed alternatif
	•	nx.shell_layout(G) → concentrique en cercles
	•	nx.random_layout(G) → positions aléatoires
"""

## Colors

in_citation_count = {node: G.in_degree(node) for node in G.nodes()}
max_in_cite = max(in_citation_count.values())
min_in_cite = min(in_citation_count.values())
in_colors = []
for node in G.nodes():
    # normaliser entre 0 et 1
    norm_val = (in_citation_count[node] - min_in_cite) / (max_in_cite - min_in_cite + 1e-5)
    in_color = sample_colorscale('Viridis', [norm_val])[0]
    in_colors.append(in_color)
    
out_citation_count = {node: G.out_degree(node) for node in G.nodes()}
max_out_cite = max(out_citation_count.values())
min_out_cite = min(out_citation_count.values())
out_colors = []
for node in G.nodes():
    # normaliser entre 0 et 1
    norm_val = (out_citation_count[node] - min_out_cite) / (max_out_cite - min_out_cite + 1e-5)
    out_color = sample_colorscale('Viridis', [norm_val])[0]
    out_colors.append(out_color)

## arêtes

edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x += [x0, x1, None]
    edge_y += [y0, y1, None]

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=1, color="white"),
    hoverinfo='none',
    mode='lines'
)

"""
	•	line.width : épaisseur des arêtes (edges)
	•	line.color : couleur des arêtes, peut être un code hex ou nom ('#ff0000' ou 'red')
	•	mode : 'lines' ou 'lines+markers' si tu veux mettre des marqueurs aux extrémités
	•	Tu peux ajouter des flèches en utilisant des annotations Plotly ou arrowhead (plus complexe à gérer pour beaucoup d’arêtes)
 """

## noeuds

node_x = []
node_y = []
node_text = []

for node in G.nodes():
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)
    node_text.append(node_labels[node])  # texte qui apparaît au hover

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers+text',
    text=[ "" for _ in node_x],  # ne pas afficher le texte directement
    hovertext=node_text,          # texte qui apparaît au hover
    hoverinfo='text',
    marker=dict(
        size=15,
        color=in_colors, 
        line=dict(width=1, color='white')
    )
)

"""
	•	marker.size : taille des nodes
	•	marker.color : couleur des nodes (hex, nom ou liste pour colorier individuellement)
	•	marker.symbol : forme du node ('circle', 'square', 'diamond', 'triangle-up', etc.)
	•	text : texte affiché directement sur le graphe (ici vide pour ne pas afficher)
	•	hovertext : texte affiché au hover (ex: DOI ou titre)
	•	hoverinfo : quel type d’info montrer au hover ('text', 'all', 'none')
 """

## figure

fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    showlegend=False,
                    hovermode='closest',
                    plot_bgcolor="black",
                    margin=dict(b=5,l=5,r=5,t=5),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
                ))


"""
	•	showlegend : afficher la légende (True/False)
	•	hovermode : 'closest' → tooltip pour le point le plus proche, 'x'/'y' pour coordonnées, 'all' pour tout
	•	margin : marges autour de la figure (b=bottom, l=left, r=right, t=top)
	•	Tu peux aussi ajouter :
	•	title="Titre du graphe"
	•	plot_bgcolor="white" ou autre couleur de fond
	•	xaxis/yaxis pour masquer les axes : dict(showgrid=False, zeroline=False, showticklabels=False)
"""

fig.show()