### Install Required Packages

In [50]:
pip install langchain langchain-community langchain-google-vertexai yfinance mlflow pydantic --q


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [51]:
pip install serpapi tqdm --q


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


### Imports and Environment Setup


In [52]:
#Imports and environment

import os
import json
import time
from typing import List
from pydantic import BaseModel, Field
from dotenv import load_dotenv

import mlflow
import yfinance as yf
import google.generativeai as genai

# Load API key from .env
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=api_key)

### Config and Environment Variables
### Set these before running

In [53]:
os.environ["VERTEX_MODEL_NAME"] = "gemini-2.0-flash"

mlflow.set_tracking_uri("http://20.75.92.162:5000")
print("MLflow Tracking URI:", mlflow.get_tracking_uri())

experiment_name = "market_sentiment_vishalverma"
mlflow.set_experiment(experiment_name)

MLflow Tracking URI: http://20.75.92.162:5000


<Experiment: artifact_location='mlflow-artifacts:/155267235024431033', creation_time=1758471929121, experiment_id='155267235024431033', last_update_time=1758471929121, lifecycle_stage='active', name='market_sentiment_vishalverma', tags={}>

### Define output schema for structured sentiment

In [54]:
class SentimentProfile(BaseModel):
    company_name: str
    stock_code: str
    newsdesc: str
    sentiment: str
    people_names: List[str]
    places_names: List[str]
    other_companies_referred: List[str]
    related_industries: List[str]
    market_implications: str
    confidence_score: float

#### Step 1: Resolve stock ticker from company name

In [55]:
STATIC_TICKERS = {
    "Apple Inc": "AAPL",
    "Google": "GOOGL",
    "Microsoft": "MSFT",
    "Amazon": "AMZN",
    "Tesla": "TSLA"
}

def get_stock_code(company_name: str) -> str:
    try:
        ticker = yf.Ticker(company_name)
        if ticker.ticker:
            return ticker.ticker
    except Exception:
        pass
    return STATIC_TICKERS.get(company_name, "UNKNOWN")

### News Fetcher
#### Step 2: Fetch company news (SerpAPI or fallback to yfinance)

In [56]:
def fetch_company_news(stock_code: str, top_n: int = 5) -> str:
    try:
        tk = yf.Ticker(stock_code)
        news = tk.news if hasattr(tk, "news") else []
        summaries = [n.get("title", "") for n in news[:top_n]]
        return " | ".join(summaries) if summaries else "No recent news found."
    except Exception:
        return "No news available."

### Sentiment Analyzer with Gemini
#### Step 3: Sentiment analysis with Gemini LLM

In [57]:
# Analyze sentiment using Google Gemini API

from langchain.output_parsers import PydanticOutputParser

parser = PydanticOutputParser(pydantic_object=SentimentProfile)

def analyze_sentiment(company_name: str, stock_code: str, newsdesc: str) -> dict:
    prompt = f"""
    You are a market sentiment analyzer.
    Analyze the following news about {company_name} ({stock_code}).

    News: {newsdesc}

    Provide a structured JSON with the following fields:
    {parser.get_format_instructions()}
    """
    
    client = genai.GenerativeModel("gemini-2.0-flash")
    response = client.generate_content(prompt)
    output_text = response.text
    
    return parser.parse(output_text).dict()

In [58]:
# Check the analyze_sentiment function
company = "Google"
stock_code = "GOOGL"
news = "Google announced a new AI product today. Market is reacting positively."

result = analyze_sentiment(company, stock_code, news)
print(result)

{'company_name': 'Google', 'stock_code': 'GOOGL', 'newsdesc': 'Google announced a new AI product today. Market is reacting positively.', 'sentiment': 'positive', 'people_names': [], 'places_names': [], 'other_companies_referred': [], 'related_industries': ['Artificial Intelligence'], 'market_implications': "Positive market reaction suggests investor confidence in Google's AI strategy and its potential to generate future revenue.", 'confidence_score': 0.95}


/tmp/ipykernel_6058/3591070709.py:22: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  return parser.parse(output_text).dict()


### MLflow Integration
#### Step 4: MLflow logging

In [59]:
# MLflow logging

def log_to_mlflow(company_name: str, stock_code: str, newsdesc: str, sentiment_result: dict):
    run_name = f"vishalverma_sentiment_{company_name}_{int(time.time())}"
    with mlflow.start_run(run_name=f"sentiment_{company_name}_{int(time.time())}"):
        mlflow.log_param("company_name", company_name)
        mlflow.log_param("stock_code", stock_code)
        mlflow.log_text(newsdesc, "news_input.txt")
        mlflow.log_dict(sentiment_result, "sentiment_profile.json")
        mlflow.log_metric("confidence_score", sentiment_result.get("confidence_score", 0.0))

### Full Pipeline
#### Step 5: Full pipeline

In [60]:
# Full pipeline

def run_pipeline(company_name: str) -> dict:
    stock_code = get_stock_code(company_name)
    newsdesc = fetch_company_news(stock_code)
    sentiment_result = analyze_sentiment(company_name, stock_code, newsdesc)
    log_to_mlflow(company_name, stock_code, newsdesc, sentiment_result)
    return sentiment_result

### Execute the pipeline

In [61]:
company_name = "Google"
newsdesc = """
Google has announced the launch of its new AI-powered search feature, which integrates advanced generative AI
capabilities to provide more contextual and personalized search results. Analysts predict this could significantly
boost Google's advertising revenue as businesses leverage the new feature for targeted campaigns. Some competitors
have expressed concern over privacy implications, while investors are optimistic about the growth potential in the
AI-driven search market. Overall, the market reacted positively to the announcement, with Google's stock price
rising 3% in after-hours trading.
"""
stock_code = "GOOGL"
profile = analyze_sentiment(company_name, stock_code, newsdesc)
print("Final Sentiment Profile:\n", json.dumps(profile, indent=2))

Final Sentiment Profile:
 {
  "company_name": "Google",
  "stock_code": "GOOGL",
  "newsdesc": "Google has announced the launch of its new AI-powered search feature, which integrates advanced generative AI capabilities to provide more contextual and personalized search results. Analysts predict this could significantly boost Google's advertising revenue as businesses leverage the new feature for targeted campaigns. Some competitors have expressed concern over privacy implications, while investors are optimistic about the growth potential in the AI-driven search market. Overall, the market reacted positively to the announcement, with Google's stock price rising 3% in after-hours trading.",
  "sentiment": "Positive",
  "people_names": [],
  "places_names": [],
  "other_companies_referred": [],
  "related_industries": [
    "Artificial Intelligence",
    "Search Engines",
    "Advertising",
    "Technology"
  ],
  "market_implications": "Potential increase in Google's advertising revenue,

/tmp/ipykernel_6058/3591070709.py:22: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  return parser.parse(output_text).dict()


In [62]:
# Execute the pipeline

company = "Google"
profile = run_pipeline(company)
print("Final Sentiment Profile:\n", json.dumps(profile, indent=2))

/tmp/ipykernel_6058/3591070709.py:22: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  return parser.parse(output_text).dict()


🏃 View run sentiment_Google_1758472002 at: http://20.75.92.162:5000/#/experiments/155267235024431033/runs/730991e20daf48c1a00df8271304fb39
🧪 View experiment at: http://20.75.92.162:5000/#/experiments/155267235024431033
Final Sentiment Profile:
 {
  "company_name": "Google",
  "stock_code": "GOOGL",
  "newsdesc": "No recent news found.",
  "sentiment": "Neutral",
  "people_names": [],
  "places_names": [],
  "other_companies_referred": [],
  "related_industries": [],
  "market_implications": "Market sentiment cannot be determined due to the absence of news.",
  "confidence_score": 0.5
}


In [63]:
pip freeze > requirements.txt

Note: you may need to restart the kernel to use updated packages.
