In [13]:
from qdrant_client import QdrantClient

# Connect to local Qdrant instance
client = QdrantClient(url="http://localhost:6333")

# Check if the collection exists
collections = client.get_collections()
print(collections)

# Load data from the `financial_news` collection
collection_name = "financial_news"
collection_info = client.get_collection(collection_name)
print(collection_info)


collections=[CollectionDescription(name='earnings_calls'), CollectionDescription(name='aapl_10k_10q_forms'), CollectionDescription(name='financial_news')]
status=<CollectionStatus.GREEN: 'green'> optimizer_status=<OptimizersStatusOneOf.OK: 'ok'> vectors_count=None indexed_vectors_count=0 points_count=96 segments_count=4 config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=1024, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_thre

In [15]:
from qdrant_client.http.models import PointRequest

# Retrieve the first 5 points
points = client.scroll(collection_name="financial_news", limit=5)

print(points)




([Record(id=5813337049586389, payload={'document': 'Apple (NASDAQ: AAPL) stock has experienced many struggles since its 1980 IPO. After its board fired Steve Jobs in 1985, the company spent years in the wilderness. It suspended its dividend payout in 1996, and was close to bankruptcy when it brought Jobs back in 1997.', 'file_name': "If You'd Invested $1,000 in Apple Stock 27 Years Ago, Here's How Much You'd Have Today.txt", 'chunk_index': 0, 'title': "If You'd Invested $1,000 in Apple Stock 27 Years Ago, Here's How Much You'd Have Today", 'source': 'finance.yahoo.com', 'summary': 'Apple (NASDAQ: AAPL) stock has experienced many struggles since its 1980 IPO. After its board fired Steve Jobs in 1985, the company spent years in the wilderness. It suspended its dividend payout in 1...', 'publish_date': '2024-06-28T22:32:07.000Z', 'url': 'https://finance.yahoo.com/news/youd-invested-1-000-apple-223207115.html'}, vector=None, shard_key=None, order_value=None), Record(id=10631151568077030, p

In [16]:
# Retrieve 5 records from the collection
records, _ = client.scroll(collection_name="financial_news", limit=5)

# Print formatted output
for record in records:
    print(f"Title: {record.payload.get('title')}")
    print(f"Source: {record.payload.get('source')}")
    print(f"Publish Date: {record.payload.get('publish_date')}")
    print(f"URL: {record.payload.get('url')}")
    print(f"Summary: {record.payload.get('summary')}\n")
    print("="*80)


Title: If You'd Invested $1,000 in Apple Stock 27 Years Ago, Here's How Much You'd Have Today
Source: finance.yahoo.com
Publish Date: 2024-06-28T22:32:07.000Z
URL: https://finance.yahoo.com/news/youd-invested-1-000-apple-223207115.html
Summary: Apple (NASDAQ: AAPL) stock has experienced many struggles since its 1980 IPO. After its board fired Steve Jobs in 1985, the company spent years in the wilderness. It suspended its dividend payout in 1...

Title: Jeff Bezos is betting millions on a Nvidia rival
Source: www.yahoo.com
Publish Date: 2024-12-02T13:37:00.000Z
URL: https://www.yahoo.com/tech/jeff-bezos-betting-millions-nvidia-133700975.html
Summary: Billionaire Jeff Bezos is throwing his weight behind a computing company that’s coming for Nvidia’s (NVDA) dominance of the artificial intelligence chip market.

Title: Stock market today: Dow, S&P 500, Nasdaq futures hit pause in countdown to consumer inflation data
Source: finance.yahoo.com
Publish Date: 2024-12-10T12:13:38.000Z
URL: http

In [25]:
from transformers import pipeline
import torch

# Load FinBERT for sentiment analysis
sentiment_pipeline = pipeline("text-classification", model="ProsusAI/finbert", tokenizer="ProsusAI/finbert")

# Function to get sentiment and confidence scores
def analyze_sentiment(text):
    result = sentiment_pipeline(text, truncation=True, max_length=512)[0]  # Ensure text fits model limit
    return result["label"], result["score"]

# Retrieve all financial news records
all_records = []
offset = None
batch_size = 100  # Process in batches to handle large collections efficiently

while True:
    batch_records, next_offset = client.scroll(
        collection_name="financial_news",
        limit=batch_size,
        offset=offset,
        with_payload=True  # Need payload for sentiment analysis
    )
    
    if not batch_records:
        break
        
    all_records.extend(batch_records)
    
    if next_offset is None:
        break
        
    offset = next_offset

print(f"Retrieved {len(all_records)} records in total")

# Process records in batches to avoid overloading memory
batch_size = 20
for i in range(0, len(all_records), batch_size):
    batch = all_records[i:i+batch_size]
    
    # First, analyze sentiment for the batch
    for record in batch:
        text = record.payload.get("document", "")
        if text:
            sentiment, confidence = analyze_sentiment(text)
            
            # Create a new payload with just the sentiment fields
            sentiment_payload = {
                "sentiment": sentiment,
                "confidence_score": float(confidence)
            }
            
            # Update ONLY the sentiment fields in the database
            client.set_payload(
                collection_name="financial_news",
                payload=sentiment_payload,
                points=[record.id]
            )
    
    print(f"Processed batch {i//batch_size + 1}/{(len(all_records) + batch_size - 1)//batch_size}")

# Print updated results 
# Fetch the updated records to verify
updated_records, _ = client.scroll(
    collection_name="financial_news",
    limit=len(all_records),
    with_payload=True
)

for record in updated_records:
    print(f"Title: {record.payload.get('title')}")
    print(f"Sentiment: {record.payload.get('sentiment')} (Confidence: {record.payload.get('confidence_score', 0):.4f})")
    print(f"Summary: {record.payload.get('summary')}\n")
    print("="*80)

print(f"Updated sentiment analysis for {len(updated_records)} records")

Device set to use cpu


Retrieved 96 records in total
Processed batch 1/5
Processed batch 2/5
Processed batch 3/5
Processed batch 4/5
Processed batch 5/5
Title: If You'd Invested $1,000 in Apple Stock 27 Years Ago, Here's How Much You'd Have Today
Sentiment: negative (Confidence: 0.9497)
Summary: Apple (NASDAQ: AAPL) stock has experienced many struggles since its 1980 IPO. After its board fired Steve Jobs in 1985, the company spent years in the wilderness. It suspended its dividend payout in 1...

Title: Jeff Bezos is betting millions on a Nvidia rival
Sentiment: neutral (Confidence: 0.9108)
Summary: Billionaire Jeff Bezos is throwing his weight behind a computing company that’s coming for Nvidia’s (NVDA) dominance of the artificial intelligence chip market.

Title: Stock market today: Dow, S&P 500, Nasdaq futures hit pause in countdown to consumer inflation data
Sentiment: negative (Confidence: 0.9630)
Summary: US stock futures stalled on Tuesday as investors trod carefully on the eve of a closely watched co