In [11]:
import requests
from datetime import datetime, timedelta
import pandas as pd
import os
from dotenv import load_dotenv

### FEtch Data from API

In [6]:
def get_alpha_vantage_news(api_key, ticker, topic=None):
    """
    Fetches news articles about `ticker` from the Alpha Vantage News API.
    Optionally filters by `topic` (e.g., "technology", "finance", etc.).
    Returns a list of dicts with article info (title, summary, sentiment, etc.).
    """

    # Alpha Vantage 'NEWS_SENTIMENT' endpoint
    url = "https://www.alphavantage.co/query"

    # Base params
    params = {
        "function": "NEWS_SENTIMENT",
        "tickers": ticker,
        "apikey": api_key,
        "sort": "RELEVANCE"
    }

    # If a topic was provided, include it in the params
    # NOTE: The Alpha Vantage parameter for filtering by topic is "topics".
    # E.g., topics=technology,ipo
    if topic:
        params["topics"] = topic

    response = requests.get(url, params=params)
    print("Request URL:", response.url)  # Debugging: see the final request
    data = response.json()

    # Check for errors
    if "feed" not in data:
        print(f"Error fetching news from Alpha Vantage: {data}")
        return []

    articles = data["feed"]
    return articles

In [7]:
def get_company_overview(api_key, ticker):
    """
    Fetches the company overview from Alpha Vantage using function=OVERVIEW.
    Returns a Pandas DataFrame.
    """
    url = "https://www.alphavantage.co/query"
    params = {
        "function": "OVERVIEW",
        "symbol": ticker,
        "apikey": api_key
    }

    response = requests.get(url, params=params)
    data = response.json()

    if "Symbol" not in data:
        print(f"Error fetching company overview: {data}")
        return None

    # Convert JSON dictionary to a Pandas DataFrame
    overview_df = pd.DataFrame.from_dict(data, orient='index', columns=['Value'])
    overview_df.reset_index(inplace=True)
    overview_df.columns = ['Field', 'Value']

    return overview_df

In [8]:
load_dotenv()
ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")

In [9]:
articles = get_alpha_vantage_news(ALPHA_VANTAGE_API_KEY, "TSLA")
articles = articles[:5]

Request URL: https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers=TSLA&apikey=4RZANL9BTFIUH6KF&sort=RELEVANCE


In [14]:
overview_data = get_company_overview(ALPHA_VANTAGE_API_KEY, "TSLA")

### Unstructured Loader

In [16]:
from langchain.document_loaders import UnstructuredURLLoader

In [17]:
urls = []
for i in range(len(articles)):
    urls.append(articles[i]['url'])

In [18]:
loader = UnstructuredURLLoader(urls=urls)

In [19]:
loader

<langchain_community.document_loaders.url.UnstructuredURLLoader at 0x1577e6fd0>

In [16]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /Users/vincy/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/vincy/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [20]:
data = loader.load()
len(data)

5

In [21]:
data

[Document(metadata={'source': 'https://www.benzinga.com/news/22/04/26466681/nio-said-to-be-in-discussions-with-rivals-to-license-its-flagship-ev-battery-swap-tech'}, page_content='España\n\nIndia\n\nItalia\n\n대한민국\n\n日本\n\nBenzinga Edge\n\nBenzinga Research\n\nBenzinga Pro\n\nGet Benzinga Pro\n\nData & APIs\n\nEvents\n\nPremarket\n\nAdvertise\n\nContribute\n\nEspaña\n\nIndia\n\nItalia\n\n대한민국\n\n日本\n\nOur Services\n\nNews\n\nEarnings\n\nGuidance\n\nDividends\n\nM&A\n\nBuybacks\n\nLegal\n\nInterviews\n\nManagement\n\nOfferings\n\nIPOs\n\nInsider Trades\n\nBiotech/FDA\n\nPolitics\n\nHealthcare\n\nMarkets\n\nPre-Market\n\nAfter Hours\n\nMovers\n\nETFs\n\nForex\n\nCannabis\n\nCommodities\n\nBinary Options\n\nBonds\n\nFutures\n\nCME Group\n\nGlobal Economics\n\nMining\n\nPreviews\n\nSmall-Cap\n\nReal Estate\n\nPenny Stocks\n\nDigital Securities\n\nVolatility\n\nOptions\n\nRatings\n\nAnalyst Color\n\nDowngrades\n\nUpgrades\n\nInitiations\n\nPrice Target\n\nIdeas\n\nTrade Ideas\n\nLong Ideas\

In [22]:
pages = []
for doc in data:
    pages.append(doc.page_content)


In [23]:
len(pages)

5

#### RecursiveTextSplitter

In [32]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [33]:
text_splitter = RecursiveCharacterTextSplitter(
    separators=['\n\n', '\n', '.', ','],
    chunk_size=1000
)

In [34]:
docs = text_splitter.split_documents(data)

In [38]:
docs

[Document(metadata={'source': 'https://www.benzinga.com/news/22/04/26466681/nio-said-to-be-in-discussions-with-rivals-to-license-its-flagship-ev-battery-swap-tech'}, page_content='España\n\nIndia\n\nItalia\n\n대한민국\n\n日本\n\nBenzinga Edge\n\nBenzinga Research\n\nBenzinga Pro\n\nGet Benzinga Pro\n\nData & APIs\n\nEvents\n\nPremarket\n\nAdvertise\n\nContribute\n\nEspaña\n\nIndia\n\nItalia\n\n대한민국\n\n日本\n\nOur Services\n\nNews\n\nEarnings\n\nGuidance\n\nDividends\n\nM&A\n\nBuybacks\n\nLegal\n\nInterviews\n\nManagement\n\nOfferings\n\nIPOs\n\nInsider Trades\n\nBiotech/FDA\n\nPolitics\n\nHealthcare\n\nMarkets\n\nPre-Market\n\nAfter Hours\n\nMovers\n\nETFs\n\nForex\n\nCannabis\n\nCommodities\n\nBinary Options\n\nBonds\n\nFutures\n\nCME Group\n\nGlobal Economics\n\nMining\n\nPreviews\n\nSmall-Cap\n\nReal Estate\n\nPenny Stocks\n\nDigital Securities\n\nVolatility\n\nOptions\n\nRatings\n\nAnalyst Color\n\nDowngrades\n\nUpgrades\n\nInitiations\n\nPrice Target\n\nIdeas\n\nTrade Ideas\n\nLong Ideas\

In [44]:
texts = [doc.page_content for doc in docs]

In [46]:
len(texts)

19

### FAISS Database

In [29]:
%pip install faiss-cpu

Note: you may need to restart the kernel to use updated packages.


In [30]:
!pip install sentence-transformers



In [40]:
from sentence_transformers import SentenceTransformer

# Use a high-quality model
encoder = SentenceTransformer("BAAI/bge-base-en")  # Or "mxbai-embed-large"

In [47]:
vectors = encoder.encode(texts)

In [48]:
vectors.shape

(19, 768)

In [50]:
dim = vectors.shape[1]

In [49]:
import faiss

In [51]:
faiss.normalize_L2(vectors)

In [52]:
index = faiss.IndexFlatL2(dim)

In [53]:
index.add(vectors)

In [54]:
index

<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x2cb969a10> >

In [None]:
def buildIndex(text):
    """
    Build a FAISS index from a list of text documents.
    Each document is encoded into a vector using SentenceTransformer.
    The vectors are then normalized and added to the FAISS index.
    Args:
        text (list): List of text documents to be indexed.
    Returns:
        index (faiss.Index): A FAISS index containing the normalized vectors.
    """
    
    encoder = SentenceTransformer("BAAI/bge-base-en")
    vectors = encoder.encode(text)
    faiss.normalize_L2(vectors)
    
    dim = vectors.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(vectors)

    return index
    
    

In [56]:
database = buildIndex(texts)

In [57]:
database

<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x15fc173f0> >