In [1]:
%pip install -U langchain-postgres psycopg[binary] pypdf sentence-transformers

Collecting pypdf
  Downloading pypdf-6.7.4-py3-none-any.whl.metadata (7.1 kB)
Downloading pypdf-6.7.4-py3-none-any.whl (331 kB)
Installing collected packages: pypdf
  Attempting uninstall: pypdf
    Found existing installation: pypdf 6.7.3
    Uninstalling pypdf-6.7.3:
      Successfully uninstalled pypdf-6.7.3
Successfully installed pypdf-6.7.4
Note: you may need to restart the kernel to use updated packages.


In [None]:

import yfinance as yf
import psycopg2
from datasets import load_dataset
from sentence_transformers import SentenceTransformer

from transformers import logging as transformers_logging
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD") 
print(POSTGRES_PASSWORD)
# Set transformers logging to error only
transformers_logging.set_verbosity_error()

# 1. Database & Extension Initialization
conn = psycopg2.connect(f"postgres://postgres:{POSTGRES_PASSWORD}@localhost:5432/postgres")
cur = conn.cursor()
cur.execute("CREATE EXTENSION IF NOT EXISTS vectorscale CASCADE;")

# 2. Optimized 2026 Trading Schema
# Using pgvectorscale's DiskANN for low-latency searches on SSD
cur.execute("""
    CREATE TABLE IF NOT EXISTS trading_signals (
        id SERIAL PRIMARY KEY,
        ticker TEXT,
        data_source TEXT,
        raw_text TEXT,
        embedding vector(384)
    );
""")

cur.execute("""
    CREATE INDEX IF NOT EXISTS idx_diskann_signals 
    ON trading_signals USING diskann (embedding vector_cosine_ops);
""")

# 3. Fetching Actual 2026 Market & Alternative Data
# A. Market Price Regime (Yahoo Finance)
ticker = "NVDA"
# NVIDIA remains a core AI-driven asset in Feb 2026 market rallies
hist = yf.Ticker(ticker).history(period="1d", interval="1m")
price_signal = f"Market Regime for {ticker} at {hist.index[-1]}: Price {hist['Close'].iloc[-1]:.2f}"

# B. Corporate Filings (SEC/Financial Reports in Parquet)
# We use a dataset known to have a valid Parquet export to avoid Script errors
sec_ds = load_dataset("HuggingFaceFW/finepdfs_100BT", split="train", streaming=True)
filing_sample = next(iter(sec_ds))['text'][:500]

# C. Social Alternative Data (Financial Sentiment)
# Many sentiment datasets have been converted to script-less Parquet
twit_ds = load_dataset("zeroshot/twitter-financial-news-sentiment", split="train", streaming=True)
tweet_sample = next(iter(twit_ds))['text']

# 4. Vectorization and Storage
model = SentenceTransformer('all-MiniLM-L6-v2')
sources = [("market", price_signal), ("filing", filing_sample), ("social", tweet_sample)]

for src, txt in sources:
    vec = model.encode(txt).tolist()
    cur.execute(
        "INSERT INTO trading_signals (ticker, data_source, raw_text, embedding) VALUES (%s, %s, %s, %s)",
        (ticker, src, txt, vec)
    )

# 5. Hedge Fund "Regime Detection" Query
# Identifying current market conditions that match historical risk factors
# 1. Prepare the query
query_text = "increased short interest in software stocks and AI margin compression"
query_vec = model.encode(query_text).tolist()

# 2. Execute with explicit casting (Solves UndefinedFunction error)
try:
    cur.execute("""
        SELECT 
            data_source, 
            LEFT(raw_text, 100) AS snippet, 
            1 - (embedding <=> %s::vector) AS similarity
        FROM trading_signals
        ORDER BY embedding <=> %s::vector 
        LIMIT 3;
    """, (query_vec, query_vec))

    results = cur.fetchall()
    
    print(f"--- Alpha Signals for: '{query_text}' ---")
    for row in results:
        print(f"[{row[0].upper()}] Score: {row[2]:.4f} | {row[1]}...")
        
except Exception as e:
    print(f"Query failed: {e}")


print("Top Signals Found:")
for row in cur.fetchall():
    print(f"[{row[0].upper()}] Sim: {row[2]:.4f} | {row[1][:80]}...")

conn.commit()
cur.close()
conn.close()


quant_secure_pass
postgres://postgres:quant_secure_pass@localhost:5432/postgres


Loading weights: 100%|██████████| 103/103 [00:00<00:00, 2702.55it/s, Materializing param=pooler.dense.weight]                             


--- Alpha Signals for: 'increased short interest in software stocks and AI margin compression' ---
[MARKET] Score: 0.1951 | Market Regime for NVDA at 2026-02-27 15:59:00-05:00: Price 177.16...
[MARKET] Score: 0.1951 | Market Regime for NVDA at 2026-02-27 15:59:00-05:00: Price 177.16...
[SOCIAL] Score: 0.1215 | $BYND - JPMorgan reels in expectations on Beyond Meat https://t.co/bd0xbFGjkT...
Top Signals Found:
