In [None]:
%pip install gremlinpython==3.7.3 
%pip install janusgraphpython
%pip install nest-asyncio

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [24]:
import os
import yfinance as yf
from gremlin_python import statics
from gremlin_python.structure.graph import Graph
from gremlin_python.process.graph_traversal import __
from gremlin_python.process.strategies import *
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.process.anonymous_traversal import traversal
from dotenv import load_dotenv

# ──── CRITICAL: Fix for Jupyter event loop conflict ────
import nest_asyncio
nest_asyncio.apply()

# Load environment variables
load_dotenv()

# Connection Config
GREMLIN_SERVER_URL = "ws://localhost:8182/gremlin"  # confirm this matches your JanusGraph setup

graph = Graph()
connection = DriverRemoteConnection(GREMLIN_SERVER_URL, 'g')
g = traversal().withRemote(connection)

def ingest_financial_data(tickers):
    for symbol in tickers:
        stock = yf.Ticker(symbol)
        info = stock.info
        
        name = info.get('longName', symbol)
        sector = info.get('sector', 'Unknown')
        summary = info.get('longBusinessSummary', 'No summary available.')

        # 1. Upsert Company
        g.V().has('ticker', symbol)\
          .fold()\
          .coalesce(
              __.unfold().as_('c')\
                .property('name', name)\
                .property('summary', summary),
              __.addV('Company')\
                .property('ticker', symbol)\
                .property('name', name)\
                .property('summary', summary)
          ).as_('c').iterate()

        # 2. Upsert Sector
        g.V().hasLabel('Sector').has('name', sector)\
          .fold()\
          .coalesce(
              __.unfold().as_('s'),
              __.addV('Sector').property('name', sector)
          ).as_('s').iterate()

        # 3. Add edge ONLY if it doesn't exist yet (robust check)
        exists = (
            g.V().has('ticker', symbol)
             .out('IN_SECTOR')
             .hasLabel('Sector').has('name', sector)
             .hasNext()   # True if at least one such edge exists
        )

        if not exists:
            g.V().has('ticker', symbol)\
                .as_('c')\
                .V().hasLabel('Sector').has('name', sector)\
                .addE('IN_SECTOR').from_('c')   \
                .iterate()

        print(f"Ingested: {name} in {sector} sector.")


def graph_rag_traversal(target_sector):
    results = (
        g.V().hasLabel('Sector').has('name', target_sector)
         .in_('IN_SECTOR').hasLabel('Company')
         .project('company', 'context')
           .by('name')
           .by('summary')
         .toList()
    )
    
    return [
        f"Data for {r['company']}: {r.get('context', '')[:200]}..."
        for r in results
    ]

# Example usage
try:
    tickers = ["AAPL", "MSFT", "GOOGL"]
    ingest_financial_data(tickers)

    for symbol in tickers:
        count = g.V().has('ticker', symbol).count().next()
        print(f"{symbol}: {count} vertex found")
        
        if count > 0:
            props = g.V().has('ticker', symbol).valueMap(True).next()
            print(f"Properties of {symbol}:", props)

finally:
    connection.close()

Ingested: Apple Inc. in Technology sector.
Ingested: Microsoft Corporation in Technology sector.
Ingested: Alphabet Inc. in Communication Services sector.
AAPL: 1 vertex found
Properties of AAPL: {'name': ['Apple Inc.'], 'summary': ['Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple Vision Pro, Apple TV, Apple Watch, Beats products, and HomePod, as well as Apple branded and third-party accessories. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to discover and download applications and digital content, such as books, music, video, games, and podcasts, as well as advertising services include third-party licensing arrangements and its own adverti

In [26]:

graph = Graph()
connection = DriverRemoteConnection(GREMLIN_SERVER_URL, 'g')
g = traversal().withRemote(connection)
# 1. See what sectors were actually written
print("All sectors in graph:")
print(g.V().hasLabel('Sector').values('name').toList())

# 2. See companies + their outgoing sector names
results = (
    g.V().hasLabel('Company')
     .project('company', 'ticker', 'sectors')
       .by('name')
       .by('ticker')
       .by(__.out('IN_SECTOR').values('name').fold())   # ← out = follows the direction you created
     .toList()
)

for r in results:
    print(r)

All sectors in graph:
['Technology', 'Communication Services']
{'company': 'Alphabet Inc.', 'ticker': 'GOOGL', 'sectors': ['Communication Services']}
{'company': 'Microsoft Corporation', 'ticker': 'MSFT', 'sectors': ['Technology']}
{'company': 'Apple Inc.', 'ticker': 'AAPL', 'sectors': ['Technology']}
