In [1]:
import os
import socket
from neo4j import GraphDatabase

# 1. Check if we are in Docker
in_docker = os.path.exists('/.dockerenv')
print(f"üê≥ Running inside Docker? {'YES' if in_docker else 'NO'}")

# 2. Hostname Resolution Test
print("\n--- DNS Resolution ---")
for host in ['neo4j', 'graphrag-neo4j', 'localhost']:
    try:
        ip = socket.gethostbyname(host)
        print(f"‚úÖ {host} -> {ip}")
    except Exception as e:
        print(f"‚ùå {host} -> FAILED ({e})")

# 3. Connection Test
print("\n--- Neo4j Connection ---")
if in_docker:
    # In Docker, we MUST use the service name 'neo4j' or 'graphrag-neo4j'
    target = 'neo4j'
else:
    # Local, we MUST use localhost
    target = 'localhost'

uri = f"bolt://{target}:7687"
print(f"Attempting connection to {uri}...")

try:
    auth = (os.getenv('NEO4J_USER', 'neo4j'), os.getenv('NEO4J_PASSWORD', 'password'))
    driver = GraphDatabase.driver(uri, auth=auth)
    driver.verify_connectivity()
    print("üéâ SUCCESS! Connected.")
    driver.close()
except Exception as e:
    print(f"‚ùå CONNECTION FAILED: {e}")
    
    print("\n--- TROUBLESHOOTING GUIDE ---")
    if in_docker:
        print("1. If 'neo4j' DNS failed: The containers might not be on the same network named 'graphrag-net'.")
        print("2. If DNS works but Connection Refused: Neo4j container is running but not listening (starting up or crashed).")
    else:
        print("1. You are running LOCALLY. Ensure 'docker-compose up' is running successfully in another terminal.")
        print("2. Check if Neo4j is actually running: 'docker ps' | grep neo4j'")
        print("3. Check Neo4j logs: 'docker logs graphrag-neo4j' (Look for 'Started' message)")
        print("4. If logs say 'Plugin failure', the open-gds.jar might be missing/corrupt in data/neo4j/plugins.")

üê≥ Running inside Docker? YES

--- DNS Resolution ---
‚úÖ neo4j -> 172.18.0.5
‚úÖ graphrag-neo4j -> 172.18.0.5
‚úÖ localhost -> 127.0.0.1

--- Neo4j Connection ---
Attempting connection to bolt://neo4j:7687...
üéâ SUCCESS! Connected.


In [4]:
import pandas as pd
kgcandid=pd.read_csv('kgresults.csv')

In [7]:
kgcandid.columns

Index(['Unnamed: 0', 'doc_id', 'category', 'original_text', 'experiment_mode',
       'extracted_entities', 'linked_relationships'],
      dtype='object')

In [8]:
kgcandid.head(2)

Unnamed: 0.1,Unnamed: 0,doc_id,category,original_text,experiment_mode,extracted_entities,linked_relationships
0,0,0,Financials,['PTC Inc.\n\nCONSOLIDATED BALANCE SHEETS\n\n(...,BASELINE,"{'extracted_entities': [{'text': 'PTC Inc.', '...",{'entity_relationships': [{'source_entity': 'P...
1,1,0,Financials,['PTC Inc.\n\nCONSOLIDATED BALANCE SHEETS\n\n(...,FIBO,"{'extracted_entities': [{'text': 'PTC Inc.', '...",{'entity_relationships': [{'source_entity': 'P...


In [11]:
kgcandid.iloc[0]['extracted_entities']

"{'extracted_entities': [{'text': 'PTC Inc.', 'type': 'COMPANY_NAME'}, {'text': 'Assets', 'type': 'FINANCIAL_TERM'}, {'text': 'Debt', 'type': 'FINANCIAL_TERM'}, {'text': 'Revenue', 'type': 'FINANCIAL_TERM'}, {'text': 'Net Income', 'type': 'FINANCIAL_TERM'}]}"

In [9]:
kgcandid.iloc[0]['linked_relationships']

"{'entity_relationships': [{'source_entity': 'PTC Inc.', 'target_entity': 'Assets', 'relation_type': 'OWNS'}, {'source_entity': 'PTC Inc.', 'target_entity': 'Debt', 'relation_type': 'OWNS'}, {'source_entity': 'PTC Inc.', 'target_entity': 'Revenue', 'relation_type': 'OWNS'}, {'source_entity': 'PTC Inc.', 'target_entity': 'Net Income', 'relation_type': 'OWNS'}, {'source_entity': 'Revenue', 'target_entity': 'Net Income', 'relation_type': 'RELATED_TO'}]}"

In [None]:
import ast  # Î¨∏ÏûêÏó¥ ÌòïÌÉúÏùò ÎîïÏÖîÎÑàÎ¶¨Î•º ÏïàÏ†ÑÌïòÍ≤å Î≥ÄÌôòÌïòÍ∏∞ ÏúÑÌï¥ ÏÇ¨Ïö©
from neo4j import GraphDatabase

# ==========================================
# 1. Neo4j Ïó∞Í≤∞ ÏÑ§Ï†ï
# ==========================================
NEO4J_URI = "bolt://neo4j:7687" 
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "password"

# ==========================================
# 2. Îç∞Ïù¥ÌÑ∞ ÌååÏã± Ìï®Ïàò (Helper)
# ==========================================
def parse_record(record_str):
    """
    Î¨∏ÏûêÏó¥Î°ú Îêú ÎîïÏÖîÎÑàÎ¶¨ "{'extracted_entities': [...]}"Î•º 
    Ïã§Ï†ú Python DictionaryÎ°ú Î≥ÄÌôòÌï©ÎãàÎã§.
    """
    try:
        if isinstance(record_str, str):
            return ast.literal_eval(record_str)
        return record_str
    except (ValueError, SyntaxError):
        return {}

# ==========================================
# 3. Neo4j Ï†ÅÏû¨ Ìï®Ïàò
# ==========================================
def ingest_dataframe(df):
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
    
    # ÏøºÎ¶¨: ÎÖ∏Îìú ÏÉùÏÑ± (Entity Î†àÏù¥Î∏îÏùÑ Í≥µÌÜµÏúºÎ°ú Ï£ºÍ≥†, type ÏÜçÏÑ±ÏúºÎ°ú Íµ¨Î∂Ñ)
    # Ïù¥Ïú†: ÌöåÏÇ¨Î™Ö, Ïû¨Î¨¥Ïö©Ïñ¥ Îì± Î†àÏù¥Î∏îÏù¥ ÎèôÏ†ÅÏúºÎ°ú Î≥ÄÌïòÎ©¥ Í¥ÄÎ¶¨Í∞Ä Ïñ¥Î†µÏäµÎãàÎã§.
    node_query = """
    UNWIND $batch AS row
    MERGE (e:Entity {name: row.text})
    ON CREATE SET e.type = row.type
    """
    
    # ÏøºÎ¶¨: Í¥ÄÍ≥Ñ ÏÉùÏÑ±
    # Í¥ÄÍ≥Ñ ÌÉÄÏûÖ(OWNS, RELATED_TO Îì±)Ïù¥ ÎèôÏ†ÅÏù¥ÎØÄÎ°ú APOCÎ•º Ïì∞Í±∞ÎÇò f-stringÏùÑ Ïç®Ïïº Ìï©ÎãàÎã§.
    # Ïó¨Í∏∞ÏÑúÎäî APOC ÏóÜÏù¥ PythonÏóêÏÑú CypherÎ•º ÎèôÏ†ÅÏúºÎ°ú ÏÉùÏÑ±ÌïòÎäî Î∞©ÏãùÏùÑ ÏîÅÎãàÎã§.
    
    with driver.session() as session:
        print("üöÄ Í∑∏ÎûòÌîÑ Ï†ÅÏû¨ ÏãúÏûë...")
        
        # 1. ÎÖ∏Îìú(Entity) Î®ºÏ†Ä ÏÉùÏÑ±
        batch_nodes = []
        for index, row in df.iterrows():
            parsed_ent = parse_record(row['extracted_entities'])
            entities = parsed_ent.get('extracted_entities', [])
            batch_nodes.extend(entities)
            
            # 1000Í∞úÏî© ÎÅäÏñ¥ÏÑú Ï†ÑÏÜ°
            if len(batch_nodes) >= 1000:
                session.run(node_query, batch=batch_nodes)
                batch_nodes = []
        
        # ÎÇ®ÏùÄ ÎÖ∏Îìú Ï≤òÎ¶¨
        if batch_nodes:
            session.run(node_query, batch=batch_nodes)
        print("‚úÖ ÎÖ∏Îìú ÏÉùÏÑ± ÏôÑÎ£å")

        # 2. Í¥ÄÍ≥Ñ(Relationship) ÏÉùÏÑ±
        # Í¥ÄÍ≥ÑÎäî ÌÉÄÏûÖÎ≥ÑÎ°ú Î¨∂Ïñ¥ÏÑú Ï≤òÎ¶¨Ìï¥Ïïº Ìö®Ïú®Ï†ÅÏûÖÎãàÎã§.
        rels_by_type = {} 
        
        for index, row in df.iterrows():
            parsed_rel = parse_record(row['linked_relationships'])
            relationships = parsed_rel.get('entity_relationships', [])
            
            for rel in relationships:
                r_type = rel['relation_type'].upper().replace(" ", "_") # Í≥µÎ∞± Ï†úÍ±∞ Î∞è ÎåÄÎ¨∏ÏûêÌôî
                if r_type not in rels_by_type:
                    rels_by_type[r_type] = []
                
                rels_by_type[r_type].append({
                    "source": rel['source_entity'],
                    "target": rel['target_entity']
                })

        # ÌÉÄÏûÖÎ≥ÑÎ°ú Cypher ÏøºÎ¶¨ Ïã§Ìñâ
        for r_type, batch_data in rels_by_type.items():
            # Ï£ºÏùò: r_typeÏùÄ Í≤ÄÏ¶ùÎêú Î¨∏ÏûêÏó¥Ïù¥Ïñ¥Ïïº Ìï® (Injection Î∞©ÏßÄ)
            rel_query = f"""
            UNWIND $batch AS row
            MATCH (source:Entity {{name: row.source}})
            MATCH (target:Entity {{name: row.target}})
            MERGE (source)-[:{r_type}]->(target)
            """
            
            # Î∞∞Ïπò Ï≤òÎ¶¨
            batch_size = 1000
            for i in range(0, len(batch_data), batch_size):
                chunk = batch_data[i:i + batch_size]
                session.run(rel_query, batch=chunk)
                print(f"   üîó Í¥ÄÍ≥Ñ Ïó∞Í≤∞ Ï§ë: {r_type} ({len(chunk)}Í±¥)")

    driver.close()
    print("üéâ Î™®Îì† Îç∞Ïù¥ÌÑ∞ Ï†ÅÏû¨ ÏôÑÎ£å!")