In [1]:
import requests
from datetime import datetime, timedelta
import pandas as pd
import os
from dotenv import load_dotenv

### FEtch Data from API

In [2]:
def get_alpha_vantage_news(api_key, ticker, topic=None):
    """
    Fetches news articles about `ticker` from the Alpha Vantage News API.
    Optionally filters by `topic` (e.g., "technology", "finance", etc.).
    Returns a list of dicts with article info (title, summary, sentiment, etc.).
    """

    # Alpha Vantage 'NEWS_SENTIMENT' endpoint
    url = "https://www.alphavantage.co/query"

    # Base params
    params = {
        "function": "NEWS_SENTIMENT",
        "tickers": ticker,
        "apikey": api_key,
        "sort": "RELEVANCE"
    }

    # If a topic was provided, include it in the params
    # NOTE: The Alpha Vantage parameter for filtering by topic is "topics".
    # E.g., topics=technology,ipo
    if topic:
        params["topics"] = topic

    response = requests.get(url, params=params)
    print("Request URL:", response.url)  # Debugging: see the final request
    data = response.json()

    # Check for errors
    if "feed" not in data:
        print(f"Error fetching news from Alpha Vantage: {data}")
        return []

    articles = data["feed"]
    return articles

In [7]:
def get_company_overview(api_key, ticker):
    """
    Fetches the company overview from Alpha Vantage using function=OVERVIEW.
    Returns a Pandas DataFrame.
    """
    url = "https://www.alphavantage.co/query"
    params = {
        "function": "OVERVIEW",
        "symbol": ticker,
        "apikey": api_key
    }

    response = requests.get(url, params=params)
    data = response.json()

    if "Symbol" not in data:
        print(f"Error fetching company overview: {data}")
        return None

    # Convert JSON dictionary to a Pandas DataFrame
    overview_df = pd.DataFrame.from_dict(data, orient='index', columns=['Value'])
    overview_df.reset_index(inplace=True)
    overview_df.columns = ['Field', 'Value']

    return overview_df

In [4]:
load_dotenv()
ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")

In [5]:
articles = get_alpha_vantage_news(ALPHA_VANTAGE_API_KEY, "TSLA")
articles = articles[:5]

Request URL: https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=TSLA&apikey=4RZANL9BTFIUH6KF&sort=RELEVANCE


In [14]:
overview_data = get_company_overview(ALPHA_VANTAGE_API_KEY, "TSLA")

### Unstructured Loader

In [16]:
from langchain.document_loaders import UnstructuredURLLoader

In [None]:
def parse_article(articles):
    """
    Parses the article and returns the article text.
    """

    urls = []
    for i in range(len(articles)):
        urls.append(articles[i]['url'])

    loader = UnstructuredURLLoader(urls=urls)
    parsed_docs = loader.load()

    return parsed_docs

#### RecursiveTextSplitter

In [32]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
def split_text(docs):
    """
    Splits the text into smaller chunks.
    """

    text_splitter = RecursiveCharacterTextSplitter(
    separators=['\n\n', '\n', '.', ','],
    chunk_size=1000
    )

    chunks = text_splitter.split_documents(docs)
    texts = [chunk.page_content for chunk in chunks]

    return texts

### FAISS Database

In [None]:
from sentence_transformers import SentenceTransformer
import faiss

In [None]:
def buildIndex(texts):
    """
    Build a FAISS index from a list of text documents.
    Each document is encoded into a vector using SentenceTransformer.
    The vectors are then normalized and added to the FAISS index.
    Args:
        text (list): List of text documents to be indexed.
    Returns:
        index (faiss.Index): A FAISS index containing the normalized vectors.
    """
    
    encoder = SentenceTransformer("BAAI/bge-base-en")
    vectors = encoder.encode(texts)
    faiss.normalize_L2(vectors)
    
    dim = vectors.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(vectors)

    return index


### Retrival (Ignore)

In [58]:
from langchain.chains import RetrievalQAWithSourcesChain

In [67]:
from langchain import OpenAI

In [61]:
LLM_API_KEY = os.getenv("OPEN_ROUTER_KEY")

In [68]:
llm = OpenAI(temperature=0.9, max_tokens=500)

  llm = OpenAI(temperature=0.9, max_tokens=500)


ValidationError: 1 validation error for OpenAI
  Value error, Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. [type=value_error, input_value={'temperature': 0.9, 'max...ne, 'http_client': None}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.10/v/value_error

In [65]:
llm = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=LLM_API_KEY
)

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [59]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=index.as_retriever())

NameError: name 'llm' is not defined