# Database Queries and Analysis
This notebook contains queries for various database tables, identifies potential data issues, and provides insights for further analysis.

In [2]:
# Import required libraries
import pandas as pd
import os
from utils import connect_to_db


In [2]:
# Query the `stock_metadata` table
conn = connect_to_db()
if conn:
    query = "SELECT * FROM raw.stock_metadata;"
    df = pd.read_sql_query(query, conn)
    conn.close()
    
    # Display the data
    display(df)

  df = pd.read_sql_query(query, conn)


Unnamed: 0,tic,name,sector,industry,country,market_cap,employees,description,website,exchange,currency,last_updated
0,AAPL,Apple Inc.,Technology,Consumer Electronics,United States,3791126003712,150000,"Apple Inc. designs, manufactures, and markets ...",https://www.apple.com,NASDAQ,USD,2025-09-29 01:49:13.523554
1,TSLA,"Tesla, Inc.",Consumer Cyclical,Auto Manufacturers,United States,1464396414976,125665,"Tesla, Inc. designs, develops, manufactures, l...",https://www.tesla.com,NASDAQ,USD,2025-09-29 01:49:13.523554
2,NVDA,NVIDIA Corporation,Technology,Semiconductors,United States,4338392236032,36000,"NVIDIA Corporation, a computing infrastructure...",https://www.nvidia.com,NASDAQ,USD,2025-09-29 01:49:13.523554


In [2]:
# Query the `stock_ohlcv` table
conn = connect_to_db()
if conn:
    query = "SELECT * FROM raw.stock_ohlcv;"
    df = pd.read_sql_query(query, conn)
    conn.close()
    
    # Display the data
    display(df)

  df = pd.read_sql_query(query, conn)


Unnamed: 0,date,tic,open,high,low,close,volume,last_updated
0,1980-12-12,AAPL,0.098485,0.098913,0.098485,0.098485,469033600,2025-09-29 01:49:20.541342
1,1980-12-15,AAPL,0.093775,0.093775,0.093347,0.093347,175884800,2025-09-29 01:49:20.541342
2,1980-12-16,AAPL,0.086924,0.086924,0.086495,0.086495,105728000,2025-09-29 01:49:20.541342
3,1980-12-17,AAPL,0.088636,0.089064,0.088636,0.088636,86441600,2025-09-29 01:49:20.541342
4,1980-12-18,AAPL,0.091206,0.091634,0.091206,0.091206,73449600,2025-09-29 01:49:20.541342
...,...,...,...,...,...,...,...,...
21832,2025-09-22,NVDA,175.300003,184.550003,174.710007,183.610001,269637000,2025-09-29 01:49:22.284170
21833,2025-09-23,NVDA,181.970001,182.419998,176.210007,178.429993,192559600,2025-09-29 01:49:22.284170
21834,2025-09-24,NVDA,179.770004,179.779999,175.399994,176.970001,143564100,2025-09-29 01:49:22.284170
21835,2025-09-25,NVDA,174.479996,180.259995,173.130005,177.690002,191586700,2025-09-29 01:49:22.284170


In [2]:
# Query the `earnings` table
conn = connect_to_db()
if conn:
    query = "SELECT * FROM raw.earnings;"
    df = pd.read_sql_query(query, conn)
    conn.close()

    # Replace NaN with None for consistency
    df = df.where(pd.notnull(df), None)

    # Display the data
    display(df)

  df = pd.read_sql_query(query, conn)


Unnamed: 0,tic,fiscal_year,fiscal_quarter,fiscal_date,earnings_date,eps,eps_estimated,session,revenue,revenue_estimated,price_before,price_after,last_updated
0,AAPL,1985,3,1985-09-30,1985-09-30,0.00112,,amc,1.918300e+09,,,,2023-05-17
1,AAPL,1989,4,1989-12-31,1989-12-31,0.00857,,amc,1.493400e+09,,,,2023-05-17
2,AAPL,1990,1,1990-03-31,1990-03-31,0.00929,,amc,1.346200e+09,,,,2023-05-17
3,AAPL,1990,2,1990-06-30,1990-06-30,0.00857,,amc,1.364800e+09,,,,2023-05-17
4,AAPL,1990,3,1990-09-30,1990-09-30,0.00748,,amc,1.354100e+09,,,,2023-05-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...
309,NVDA,2024,4,2025-01-26,2025-02-26,0.89000,0.848,amc,3.933100e+10,3.810135e+10,126.640,120.964,2025-05-25
310,NVDA,2025,1,2025-04-27,2025-05-28,0.81000,0.737,amc,4.406200e+10,4.333416e+10,135.167,139.020,2025-08-27
311,NVDA,2025,2,2025-07-27,2025-08-27,1.05000,1.010,amc,4.674300e+10,4.604892e+10,181.770,180.170,2025-09-28
312,NVDA,2025,3,2025-10-27,2025-11-19,,1.230,bmo,,5.459076e+10,,,2025-09-28


In [32]:
df[df['tic']=='NVDA'].iloc[-10:]

Unnamed: 0,tic,fiscal_year,fiscal_quarter,fiscal_date,earnings_date,eps,eps_estimated,session,revenue,revenue_estimated,price_before,price_after,last_updated
304,NVDA,2023,2,2023-07-30,2023-08-23,0.27,0.21,amc,13507000000.0,11224000000.0,45.7,47.9393,2024-09-08
305,NVDA,2023,3,2023-10-29,2023-11-21,0.4,0.34,amc,18120000000.0,15194600000.0,50.409,48.716,2024-11-25
306,NVDA,2023,4,2024-01-28,2024-02-21,0.52,0.46,amc,22103000000.0,20238800000.0,69.452,78.538,2024-11-25
307,NVDA,2024,2,2024-07-28,2024-08-28,0.68,0.64,amc,30040000000.0,28779510000.0,128.27,118.23,2025-04-24
308,NVDA,2024,3,2024-10-27,2024-11-20,0.81,0.75,amc,35082000000.0,33171590000.0,146.38,146.69,2025-04-24
309,NVDA,2024,4,2025-01-26,2025-02-26,0.89,0.848,amc,39331000000.0,38101350000.0,126.64,120.964,2025-05-25
310,NVDA,2025,1,2025-04-27,2025-05-28,0.81,0.737,amc,44062000000.0,43334160000.0,135.167,139.02,2025-08-27
311,NVDA,2025,2,2025-07-27,2025-08-27,1.05,1.01,amc,46743000000.0,46048920000.0,181.77,180.17,2025-09-28
312,NVDA,2025,3,2025-10-27,2025-11-19,,1.23,bmo,,54590760000.0,,,2025-09-28
313,NVDA,2025,4,2026-01-27,2026-02-24,,,amc,,,,,2025-09-28


In [14]:
# check if any duplication from ticker, fiscal_year, fiscal_quarter
df_duplicates = df[df.duplicated(subset=['tic', 'fiscal_year', 'fiscal_quarter'], keep=False)]
if not df_duplicates.empty:
    print("Duplicate entries found:")
    display(df_duplicates)
else:
    print("No duplicate entries found.")

No duplicate entries found.


In [15]:
df[(df['tic']=='NVDA' ) & (df['fiscal_year']==2010) ]

Unnamed: 0,tic,fiscal_year,fiscal_quarter,fiscal_date,earnings_date,eps,eps_estimated,session,revenue,revenue_estimated,price_before,price_after,last_updated
258,NVDA,2010,1,2010-05-01,2010-05-13,0.06,0.05,amc,1001813000.0,914687000.0,14.68,12.96,2023-05-17
259,NVDA,2010,2,2010-07-31,2010-08-12,-0.06153,-0.03577,amc,811208000.0,2974400000.0,8.88,9.39,2023-05-17
260,NVDA,2010,3,2010-10-30,2010-11-11,0.04,0.03773,amc,843912000.0,787640000.0,12.74,13.26,2023-05-17
261,NVDA,2010,4,2011-01-29,2011-02-16,0.06,0.04128,amc,886376000.0,616626100.0,22.55,25.68,2023-05-17


In [16]:
df[(df['tic']=='TSLA' )]

Unnamed: 0,tic,fiscal_year,fiscal_quarter,fiscal_date,earnings_date,eps,eps_estimated,session,revenue,revenue_estimated,price_before,price_after,last_updated
144,TSLA,2007,4,2008-01-30,2008-01-30,-0.01012,,amc,3.685500e+06,,,,2023-05-17
145,TSLA,2008,1,2008-03-30,2008-03-30,-0.01012,,amc,3.685500e+06,,,,2023-05-17
146,TSLA,2008,4,2008-12-31,2008-12-31,-0.02180,,amc,1.416200e+07,,,,2023-05-17
147,TSLA,2009,1,2009-03-31,2009-03-31,-0.00763,,amc,2.088600e+07,,,,2023-05-17
148,TSLA,2009,2,2009-06-30,2009-06-30,-0.00518,,amc,2.694500e+07,,,,2023-05-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,TSLA,2024,4,2024-12-31,2025-01-29,0.73000,0.7740,amc,2.570700e+10,2.725892e+10,397.85,400.07,2025-04-28
210,TSLA,2025,1,2025-03-31,2025-04-22,0.27000,0.4136,amc,1.933500e+10,2.126950e+10,227.17,252.11,2025-07-21
211,TSLA,2025,2,2025-06-30,2025-07-23,0.40000,0.3972,amc,2.249600e+10,2.227968e+10,332.11,305.30,2025-09-22
212,TSLA,2025,3,2025-09-30,2025-10-22,,0.4800,bmo,,2.498468e+10,,,2025-09-22


In [7]:
# Query the `earnings_transcripts` table
conn = connect_to_db()
if conn:
    query = "SELECT * FROM raw.earnings_transcripts;"
    df = pd.read_sql_query(query, conn)
    conn.close()

    # Replace NaN with None for consistency
    df = df.where(pd.notnull(df), None)

    # Display the data
    display(df)

  df = pd.read_sql_query(query, conn)


Unnamed: 0,tic,fiscal_year,fiscal_quarter,earnings_date,transcript,transcript_hash,raw_json,source,last_updated
0,AAPL,2024,4,2025-01-30,"Suhasini Chandramouli: Good afternoon, and wel...",abba2130960470fb89ec6dd28ea23fbeb60cff51be7654...,"{'cik': '320193', 'date': '2025-01-30', 'year'...",https://api.api-ninjas.com/v1/earningstranscri...,2025-10-07 06:57:42.373967
1,AAPL,2025,1,2025-05-01,"Suhasini Chandramouli: Good afternoon, and wel...",8dee5e77fbc239c31063645f176cb6a03541331e0373e2...,"{'cik': '320193', 'date': '2025-05-01', 'year'...",https://api.api-ninjas.com/v1/earningstranscri...,2025-10-07 06:57:43.054296
2,AAPL,2025,2,2025-07-31,"Suhasini Chandramouli: Good afternoon, and wel...",3dbd9b4e2d9b271c169d08896e655157680e8276bd4014...,"{'date': '2025-07-31', 'year': '2025', 'ticker...",https://api.api-ninjas.com/v1/earningstranscri...,2025-10-07 06:57:43.372375
3,TSLA,2025,1,2025-04-22,"Operator: Good afternoon, everyone, and welcom...",c9a7c36c936de533c2abf98f88539e675ac91a65c710cc...,"{'cik': '1318605', 'date': '2025-04-22', 'year...",https://api.api-ninjas.com/v1/earningstranscri...,2025-10-07 06:57:46.314028
4,TSLA,2025,2,2025-07-23,"Travis Axelrod: Good afternoon, everyone, and ...",9b659eb82fbd6032afe1c94c99373719ea1955cef27327...,"{'date': '2025-07-23', 'year': '2025', 'ticker...",https://api.api-ninjas.com/v1/earningstranscri...,2025-10-07 06:57:46.649935
5,NVDA,2024,2,2024-08-28,Operator: Good afternoon. My name is Abby and ...,a2761d5d8ffcf351fbb8aa69ed77cf7a758c0fff7f4bee...,"{'cik': '1045810', 'date': '2024-08-28', 'year...",https://api.api-ninjas.com/v1/earningstranscri...,2025-10-07 06:57:50.191682
6,NVDA,2024,3,2024-11-20,"Operator: Good afternoon. My name is Joel, and...",a8c6d85f9197127b9b61dab448eadae8d9e2ce6f9734fc...,"{'cik': '1045810', 'date': '2024-11-20', 'year...",https://api.api-ninjas.com/v1/earningstranscri...,2025-10-07 06:57:50.927847
7,NVDA,2024,4,2025-02-26,"Christa: Good afternoon. My name is Christa, a...",2b1dbb5ba288230b9398f67a47e3a56fab3a01a3bb0d26...,"{'cik': '1045810', 'date': '2025-02-26', 'year...",https://api.api-ninjas.com/v1/earningstranscri...,2025-10-07 06:57:51.317138
8,NVDA,2025,1,2025-05-28,"Sarah: Good afternoon. My name is Sarah, and I...",a1fadba73eb8d252dd28b16b3b4e6d1f4f14403d59da2d...,"{'date': '2025-05-28', 'transcript': 'Sarah: G...",https://api.api-ninjas.com/v1/earningstranscri...,2025-10-07 06:57:51.997235
9,NVDA,2025,2,2025-08-27,"Sarah: Good afternoon. My name is Sarah, and I...",389f300f6683c7c63d35c577bdf7d4167259fd665c1f04...,"{'date': '2025-08-27', 'year': '2026', 'ticker...",https://api.api-ninjas.com/v1/earningstranscri...,2025-10-07 06:57:52.297982


In [82]:
text = df[df['tic']=='NVDA']['transcript'].iloc[-1][:]  # Display the first 500 characters of the transcript for NVDA

In [100]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def chunk_text(text, max_tokens=512, overlap_tokens=50):
    text_splitter = RecursiveCharacterTextSplitter(
        separators=[
            r"(?<=\n)\s*",     # split after newlines
            r"(?<=\.)\s+",     # split after a period
            r" ",              # fallback word-level
            r""                # fallback char-level
        ],
        is_separator_regex=True,
        chunk_size=overlap_tokens,
        chunk_overlap=0,
        keep_separator=True,
    )

    chunks = []
    dialogues = text.split('\n')
    for dialogue in dialogues:
        if dialogue.strip():
            if ": " not in dialogue:
                speaker, content = "Unknown", dialogue
            else:
                speaker, content = dialogue.split(": ", 1)

            splits = text_splitter.split_text(content)
            for i in range(len(splits)//10):
                prefix = f"{speaker}: "
                content = " ".join(splits[i*9:(i+1)*10])
                if i > 0:
                    prefix += "(contd) "
                chunks.append(prefix + content.strip())
            if len(splits) % 10 != 0:
                prefix = f"{speaker}: "
                if len(splits) > 10:
                    prefix += "(contd) "
                content = " ".join(splits[(len(splits)//10)*9:])
                chunks.append(prefix + content.strip())

    return chunks

chunks = chunk_text(text, max_tokens=1024, overlap_tokens=50)
print(f"Number of chunks: {len(chunks)}")

Number of chunks: 133


In [101]:
chunks

["Sarah: Good afternoon. My name is Sarah, and I will be your conference operator today. At this time, I would like to welcome everyone to NVIDIA Corporation's Second Quarter Fiscal 2026 Financial Results Conference Call. All lines have been placed on mute to prevent any background noise. After the speakers' remarks, there will be a question and answer session.",
 'Sarah: (contd) question and answer session. If you would like to ask a question during this time, simply press star followed by the number one on your telephone keypad. If you would like to withdraw your question, press star 1 again. Thank you. Toshiya Hari, you may begin your conference. Thank you.',
 "Toshiya Hari: Good afternoon, everyone, and welcome to NVIDIA Corporation's conference call for 2026. With me today from NVIDIA Corporation are Jensen Huang, president and chief executive officer, and Colette Kress, executive vice president and chief financial officer. I would like to remind you that our call is being webcast

In [114]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
import tiktoken

enc = tiktoken.get_encoding("cl100k_base")
def tok_len(s: str) -> int:
    return len(enc.encode(s))

def chunk_text(text, max_tokens=512, overlap_tokens=50):
    text_splitter = RecursiveCharacterTextSplitter(
        separators=[
            r"(?<=\n)\s*",     # split after newlines
            r"(?<=\.)\s+",     # split after a period
            r" ",              # fallback word-level
            r""                # fallback char-level
        ],
        is_separator_regex=True,
        chunk_size=max_tokens,
        chunk_overlap=overlap_tokens,
        length_function=tok_len,
        keep_separator=True,
    )

    chunks = []
    dialogues = text.split('\n')
    for dialogue in dialogues:
        if dialogue.strip():
            if ": " not in dialogue:
                speaker, content = "Unknown", dialogue
            else:
                speaker, content = dialogue.split(": ", 1)

            splits = text_splitter.split_text(content)
            for i, chunk in enumerate(splits):
                prefix = f"{speaker}: "
                if i > 0:
                    prefix += "(contd) "
                chunks.append(prefix + chunk.strip())

    return chunks

chunks = chunk_text(text, max_tokens=512, overlap_tokens=50)
print(f"Number of chunks: {len(chunks)}")

Number of chunks: 39


In [115]:
chunks

["Sarah: Good afternoon. My name is Sarah, and I will be your conference operator today. At this time, I would like to welcome everyone to NVIDIA Corporation's Second Quarter Fiscal 2026 Financial Results Conference Call. All lines have been placed on mute to prevent any background noise. After the speakers' remarks, there will be a question and answer session. If you would like to ask a question during this time, simply press star followed by the number one on your telephone keypad. If you would like to withdraw your question, press star 1 again. Thank you. Toshiya Hari, you may begin your conference. Thank you.",
 "Toshiya Hari: Good afternoon, everyone, and welcome to NVIDIA Corporation's conference call for 2026. With me today from NVIDIA Corporation are Jensen Huang, president and chief executive officer, and Colette Kress, executive vice president and chief financial officer. I would like to remind you that our call is being webcast live on NVIDIA Corporation's 2026. The content 

In [116]:
for chunk in chunks:
    print(f"Token count: {len(chunk)}, Chunk: {chunk}\n")

Token count: 618, Chunk: Sarah: Good afternoon. My name is Sarah, and I will be your conference operator today. At this time, I would like to welcome everyone to NVIDIA Corporation's Second Quarter Fiscal 2026 Financial Results Conference Call. All lines have been placed on mute to prevent any background noise. After the speakers' remarks, there will be a question and answer session. If you would like to ask a question during this time, simply press star followed by the number one on your telephone keypad. If you would like to withdraw your question, press star 1 again. Thank you. Toshiya Hari, you may begin your conference. Thank you.

Token count: 1422, Chunk: Toshiya Hari: Good afternoon, everyone, and welcome to NVIDIA Corporation's conference call for 2026. With me today from NVIDIA Corporation are Jensen Huang, president and chief executive officer, and Colette Kress, executive vice president and chief financial officer. I would like to remind you that our call is being webcast l

In [18]:
from dotenv import load_dotenv
import requests

load_dotenv()
NINJA_API_KEY = os.getenv("NINJA_API_KEY")
ticker = "AAPL"
year = 2025
quarter = 1
url = f"https://api.api-ninjas.com/v1/earningstranscript?ticker={ticker}&year={year}&quarter={quarter}"
headers = {"X-Api-Key": NINJA_API_KEY}
response = requests.get(url, headers=headers)

In [19]:
# Get the response content
print(response.status_code)
print(response.text)
print(response.json())

200
{"date": "2025-01-30", "transcript": "Suhasini Chandramouli: Good afternoon, and welcome to the Apple Q1 Fiscal Year 2025 Earnings Conference Call. My name is Suhasini Chandramouli, Director of Investor Relations. Today's call is being recorded. Speaking first today are Apple CEO, Tim Cook, and he will be followed by CFO, Kevan Parekh. After that, we'll open the call to questions from analysts. Please note that some of the information you'll hear during our discussion today will consist of forward-looking statements, including, without limitation, those regarding revenue, gross margin, operating expenses, other income and expense, taxes, capital allocation, and future business outlook, including the potential impact of macroeconomic conditions on the company's business and results of operations. These statements involve risks and uncertainties that may cause actual results or trends to differ materially from our forecast. For more information, please refer to the risk factors discu

In [20]:
def fetch_earnings_transcript(ticker, year, quarter):
    url = f"https://api.api-ninjas.com/v1/earningstranscript?ticker={ticker}&year={year}&quarter={quarter}"
    headers = {"X-Api-Key": NINJA_API_KEY}
    response = requests.get(url, headers=headers)
    print(ticker, year, quarter)
    print(response)
    print(response.json())
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to fetch data for {ticker}: {response.status_code}")
        return None


In [22]:
fetch_earnings_transcript("AAPL", 2025, 2)

AAPL 2025 2
<Response [200]>
{'date': '2025-05-01', 'transcript': "Suhasini Chandramouli: Good afternoon, and welcome to the Apple Q2 Fiscal Year 2025 Earnings Conference Call. My name is Suhasini Chandramouli, Director of Investor Relations. Today's call is being recorded. Speaking first today is Apple's CEO, Tim Cook; and he'll be followed by CFO, Kevan Parekh. After that, we'll open the call to questions from analysts. Please note that some of the information you'll hear during our discussion today will consist of forward-looking statements, including without limitation those regarding revenue, gross margin, operating expenses, other income and expense, taxes, capital allocation and future business outlook, including the potential impact of tariffs and other trade measures and macroeconomic conditions on the company's business and results of operations. These statements involve risks and uncertainties that may cause actual results or trends to differ materially from our forecast. Fo

{'date': '2025-05-01',
 'transcript': "Suhasini Chandramouli: Good afternoon, and welcome to the Apple Q2 Fiscal Year 2025 Earnings Conference Call. My name is Suhasini Chandramouli, Director of Investor Relations. Today's call is being recorded. Speaking first today is Apple's CEO, Tim Cook; and he'll be followed by CFO, Kevan Parekh. After that, we'll open the call to questions from analysts. Please note that some of the information you'll hear during our discussion today will consist of forward-looking statements, including without limitation those regarding revenue, gross margin, operating expenses, other income and expense, taxes, capital allocation and future business outlook, including the potential impact of tariffs and other trade measures and macroeconomic conditions on the company's business and results of operations. These statements involve risks and uncertainties that may cause actual results or trends to differ materially from our forecast. For more information, please r

In [None]:
# Query the `income_statements` table
conn = connect_to_db()
if conn:
    query = "SELECT * FROM raw.income_statements;"
    df = pd.read_sql_query(query, conn)
    conn.close()

    # Replace NaN with None for consistency
    df = df.where(pd.notnull(df), None)

    # Display the data
    display(df)

  df = pd.read_sql_query(query, conn)


Unnamed: 0,tic,fiscal_year,fiscal_quarter,fiscal_date,raw_json,source,last_updated
0,AAPL,2025,1,2024-12-28,"{'cik': '0000320193', 'eps': 2.41, 'date': '20...",https://financialmodelingprep.com/stable/incom...,2025-09-29 02:05:49.008637
1,AAPL,2024,1,2023-12-30,"{'cik': '0000320193', 'eps': 2.19, 'date': '20...",https://financialmodelingprep.com/stable/incom...,2025-09-29 02:05:49.008637
2,AAPL,2023,1,2022-12-31,"{'cik': '0000320193', 'eps': 1.89, 'date': '20...",https://financialmodelingprep.com/stable/incom...,2025-09-29 02:05:49.008637
3,AAPL,2022,1,2021-12-25,"{'cik': '0000320193', 'eps': 2.11, 'date': '20...",https://financialmodelingprep.com/stable/incom...,2025-09-29 02:05:49.008637
4,AAPL,2021,1,2020-12-26,"{'cik': '0000320193', 'eps': 1.7, 'date': '202...",https://financialmodelingprep.com/stable/incom...,2025-09-29 02:05:49.008637
...,...,...,...,...,...,...,...
70,NVDA,2025,0,2025-01-26,"{'cik': '0001045810', 'eps': 2.97, 'date': '20...",https://financialmodelingprep.com/stable/incom...,2025-09-29 02:05:51.190977
71,NVDA,2024,0,2024-01-28,"{'cik': '0001045810', 'eps': 1.21, 'date': '20...",https://financialmodelingprep.com/stable/incom...,2025-09-29 02:05:51.190977
72,NVDA,2023,0,2023-01-29,"{'cik': '0001045810', 'eps': 0.18, 'date': '20...",https://financialmodelingprep.com/stable/incom...,2025-09-29 02:05:51.190977
73,NVDA,2022,0,2022-01-30,"{'cik': '0001045810', 'eps': 0.39, 'date': '20...",https://financialmodelingprep.com/stable/incom...,2025-09-29 02:05:51.190977


In [3]:
# Query the `news` table
conn = connect_to_db()
if conn:
    query = "SELECT * FROM raw.news;"
    df = pd.read_sql_query(query, conn)
    conn.close()
    
    # Display the data
    display(df)

  df = pd.read_sql_query(query, conn)


Unnamed: 0,tic,published_date,publisher,title,site,content,url,raw_json,source,last_updated
0,AAPL,2025-10-03 12:15:05,Schwab Network,Daily Trader: Apple v. Tesla — Which is Better?,youtube.com,Tom White turns to stocks on Friday's Daily Tr...,https://www.youtube.com/shorts/mrPeqeW3P6k,{'url': 'https://www.youtube.com/shorts/mrPeqe...,https://financialmodelingprep.com/stable/news/...,2025-10-04 06:35:32.509823
1,AAPL,2025-10-03 12:02:00,Barrons,OpenAI's Sora AI-Video App Is Now No. 1 on App...,barrons.com,The app was released on Tuesday and is a near ...,https://www.barrons.com/articles/sora-ai-video...,{'url': 'https://www.barrons.com/articles/sora...,https://financialmodelingprep.com/stable/news/...,2025-10-04 06:35:32.509823
2,AAPL,2025-10-03 11:50:07,CNBC Television,Jefferies' Edison Lee: Expectations for Apple ...,youtube.com,"Edison Lee, Jefferies analyst, joins CNBC's 'S...",https://www.youtube.com/watch?v=9nVGZdycOus,{'url': 'https://www.youtube.com/watch?v=9nVGZ...,https://financialmodelingprep.com/stable/news/...,2025-10-04 06:35:32.509823
3,AAPL,2025-10-03 11:17:43,Benzinga,"Apple App Store Revenue Jumps 10%, Analyst See...",benzinga.com,Apple's (NASDAQ:AAPL) App Store delivered stro...,https://www.benzinga.com/analyst-stock-ratings...,{'url': 'https://www.benzinga.com/analyst-stoc...,https://financialmodelingprep.com/stable/news/...,2025-10-04 06:35:32.509823
4,AAPL,2025-10-03 11:11:00,Market Watch,Can Apple's stock make a true rebound? It migh...,marketwatch.com,A Jefferies analyst thinks Wall Street is read...,https://www.marketwatch.com/story/can-apples-s...,{'url': 'https://www.marketwatch.com/story/can...,https://financialmodelingprep.com/stable/news/...,2025-10-04 06:35:32.509823
...,...,...,...,...,...,...,...,...,...,...
295,NVDA,2025-09-28 08:22:00,The Motley Fool,Could Nvidia's $100 Billion Data Center Gamble...,fool.com,Few companies in history have reshaped industr...,https://www.fool.com/investing/2025/09/28/coul...,{'url': 'https://www.fool.com/investing/2025/0...,https://financialmodelingprep.com/stable/news/...,2025-10-04 06:35:33.151290
296,NVDA,2025-09-28 06:30:00,The Motley Fool,What's Next for These 3 Artificial Intelligenc...,fool.com,"As tech investors have observed, artificial in...",https://www.fool.com/investing/2025/09/28/what...,{'url': 'https://www.fool.com/investing/2025/0...,https://financialmodelingprep.com/stable/news/...,2025-10-04 06:35:33.151290
297,NVDA,2025-09-27 13:10:00,The Motley Fool,"Prediction: These 2 Things, Worth More Than $1...",fool.com,Nvidia (NVDA 0.27%) has seen earnings climb in...,https://www.fool.com/investing/2025/09/27/pred...,{'url': 'https://www.fool.com/investing/2025/0...,https://financialmodelingprep.com/stable/news/...,2025-10-04 06:35:33.151290
298,NVDA,2025-09-27 10:35:52,Seeking Alpha,CoreWeave: The Nvidia Put Is A Game Changer,seekingalpha.com,"CoreWeave, Inc. is initiated with a buy rating...",https://seekingalpha.com/article/4826397-corew...,{'url': 'https://seekingalpha.com/article/4826...,https://financialmodelingprep.com/stable/news/...,2025-10-04 06:35:33.151290


In [35]:
# Query the `news_analysis` table
# Display all records without truncation
# pd.set_option('display.max_colwidth', None)
# pd.set_option('display.max_rows', None)
conn = connect_to_db()
if conn:
    query = "SELECT tic, published_date, title, content, impact_magnitude FROM core.news_analysis WHERE impact_magnitude = 'major';"
    df = pd.read_sql_query(query, conn)
    conn.close()
    
    # Display the data
    display(df)

  df = pd.read_sql_query(query, conn)


Unnamed: 0,tic,published_date,title,content,impact_magnitude
0,AAPL,2025-10-03 11:17:43,"Apple App Store Revenue Jumps 10%, Analyst Sees AI, App Diversification Driving Long-Term Growth","Apple's (NASDAQ:AAPL) App Store delivered strong results in the September quarter, with revenue climbing 10% year-over-year to $8.7 billion and downloads rising 2.5% to 9 billion.",major
1,AAPL,2025-09-25 09:30:06,EU rejects Apple demand to scrap landmark tech rules,"The European Union rejected a call by Apple to scrap its landmark digital competition law on Thursday, dismissing the US giant's claims that the rules put users' security at risk.",major
2,AAPL,2025-09-30 18:24:47,"Apple, Google, Meta must face lawsuits over casino-style gambling apps","A federal judge on Tuesday denied Apple , Google and Meta Platforms' requests to dismiss lawsuits claiming they promoted illegal gambling by hosting and accepting commissions from casino-style apps that addict users.",major
3,TSLA,2025-10-03 15:41:05,Tesla is being sued by the family of a teenager killed in a Cybertruck crash after its doors failed to open,"Tesla's Cybertrucks might be the most polarizing vehicle on the market. And due to their electronic door-open mechanism, they're far from the safest.",major
4,TSLA,2025-10-03 14:15:06,"Tesla, GM lead record U.S. EV market share as federal purchase incentives end","Tesla and General Motors are leading the U.S. automotive industry this year in record domestic sales of all-electric vehicles, as consumers hurried to buy EVs before up to $7,500 in federal incentives for each purchase ended in September. EVs also set a new quarterly record of more than 438,000 units sold during the third quarter — achieving market share of 10.5% for the period.",major
5,TSLA,2025-10-03 10:57:01,"Tesla, GM lead record U.S. EV sales this year as federal incentives end","New data provided to CNBC from Motor Intelligence shows U.S. sales of electric vehicles topped 1 million units through the first nine months of the year. EVs also set a new quarterly record of more than 438,000 units sold during the third quarter — achieving market share of 10.5% for the period.",major
6,TSLA,2025-10-03 10:51:04,Tesla Q3 Deliveries Reach Record Levels: Is TSLA Stock a Buy?,"TSLA's record Q3 deliveries, rising energy storage gains and AI ambitions fuel optimism. However, questions on demand and valuation remain.",major
7,TSLA,2025-10-03 10:00:00,"Pomerantz Law Firm Announces the Filing of a Class Action Against Tesla, Inc. and Certain Officers - TSLA","NEW YORK , Oct. 3, 2025 /PRNewswire/ -- Pomerantz LLP announces that a class action lawsuit has been filed against Tesla, Inc. (""Tesla"" or the ""Company"") (NASDAQ: TSLA) and certain officers. The class action, filed in the United States District Court for the Western District of Texas, and docketed under 25-cv-01213, is on behalf of a class consisting of all persons and entities other than Defendants that purchased or otherwise acquired Tesla securities between April 19, 2023 and June 22, 2025, both dates inclusive (the ""Class Period""), seeking to recover damages caused by Defendants' violations of the federal securities laws and to pursue remedies under Sections 10(b) and 20(a) of the Securities Exchange Act of 1934 and Rule 10b-5 promulgated thereunder, against the Company and certain of its top officials.",major
8,TSLA,2025-10-03 09:32:05,Tesla shares in green after beating Q3 delivery estimates,"Tesla Inc. shares traded in green on Thursday after the company reported third-quarter 2025 vehicle deliveries of 497,099, surpassing Wall Street estimates and posting year-over-year growth, though production fell short compared with the same period last year. The results highlight a mixed performance for the electric-vehicle maker as it navigates shifting demand dynamics in key",major
9,TSLA,2025-10-03 06:44:28,"Why Tesla's record Q3 is a one-time high, and what comes next?","Tesla just wrapped up a blockbuster quarter, delivering 497,099 cars and building 447,450 units, its biggest numbers yet. The energy side of the business also hit a new milestone, rolling out 12.5 GWh of storage products, nearly double what it managed a year ago.",major


In [117]:
# Query the `earnings_transcripts_chunks` table
conn = connect_to_db()
if conn:
    query = "SELECT * FROM core.earnings_transcript_chunks;"
    df = pd.read_sql_query(query, conn)
    conn.close()

    # Replace NaN with None for consistency
    df = df.where(pd.notnull(df), None)

    # Display the data
    display(df)

  df = pd.read_sql_query(query, conn)


Unnamed: 0,tic,fiscal_year,fiscal_quarter,earnings_date,chunk_id,chunk,token_count,chunk_hash,transcript_hash,last_updated
0,AAPL,2024,4,2025-01-30,0,"Suhasini Chandramouli: Good afternoon, and wel...",256,4ba6d30e3d28cb1de70acd90415ecf7bd811f12a62c302...,abba2130960470fb89ec6dd28ea23fbeb60cff51be7654...,2025-10-07 09:16:42.127171
1,AAPL,2024,4,2025-01-30,1,"Tim Cook: Thank you, Suhasini. Good afternoon,...",489,2de23ff5c887b957ec6e815286aac7e99e3e417cc57fd4...,abba2130960470fb89ec6dd28ea23fbeb60cff51be7654...,2025-10-07 09:16:42.127171
2,AAPL,2024,4,2025-01-30,2,Tim Cook: (contd) And we were excited to recen...,511,a3b9984d9b4f1617e2c12bb6f5ef1e5ee3e5ae8321bc5d...,abba2130960470fb89ec6dd28ea23fbeb60cff51be7654...,2025-10-07 09:16:42.127171
3,AAPL,2024,4,2025-01-30,3,Tim Cook: (contd) All of this is enabled by th...,496,3e6d28e7c895d95777c8f0f4854a8f415d562a53036ed6...,abba2130960470fb89ec6dd28ea23fbeb60cff51be7654...,2025-10-07 09:16:42.127171
4,AAPL,2024,4,2025-01-30,4,Tim Cook: (contd) We have so much in store for...,496,0ea41e118ddda93364048c132a691f33a6d920035b0932...,abba2130960470fb89ec6dd28ea23fbeb60cff51be7654...,2025-10-07 09:16:42.127171
...,...,...,...,...,...,...,...,...,...,...
692,NVDA,2025,2,2025-08-27,34,Sarah: Your final question comes from Timothy ...,19,991dce35427015fade249bb0ae4696f2dcedd2151f4d73...,389f300f6683c7c63d35c577bdf7d4167259fd665c1f04...,2025-10-07 09:16:42.127171
693,NVDA,2025,2,2025-08-27,35,"Timothy Arcuri: Thanks a lot. Jensen, I wanted...",103,f1346975e7f14bb72900bc10981bb79b7bed5beedd53d5...,389f300f6683c7c63d35c577bdf7d4167259fd665c1f04...,2025-10-07 09:16:42.127171
694,NVDA,2025,2,2025-08-27,36,"Jensen Huang: Well, I think the best way to lo...",512,e9f62058552f2bfc8db7bf7cb4d138994d4311301d7010...,389f300f6683c7c63d35c577bdf7d4167259fd665c1f04...,2025-10-07 09:16:42.127171
695,NVDA,2025,2,2025-08-27,37,"Jensen Huang: (contd) Our next platform, Rubin...",339,95e4af641dacec7be23358159a768bcde65165e12848c1...,389f300f6683c7c63d35c577bdf7d4167259fd665c1f04...,2025-10-07 09:16:42.127171


In [118]:
df['token_count'].describe()

count    697.000000
mean     155.595409
std      168.150187
min        5.000000
25%       24.000000
50%       92.000000
75%      206.000000
max      521.000000
Name: token_count, dtype: float64

In [4]:
# Query the `earnings_transcripts_chunks` table
conn = connect_to_db()
if conn:
    query = "SELECT * FROM core.earnings_transcript_chunks;"
    df = pd.read_sql_query(query, conn)
    conn.close()

    # Replace NaN with None for consistency
    df = df.where(pd.notnull(df), None)

    # Display the data
    display(df)

  df = pd.read_sql_query(query, conn)


Unnamed: 0,tic,fiscal_year,fiscal_quarter,earnings_date,chunk_id,chunk,token_count,chunk_hash,transcript_hash,last_updated
0,AAPL,2024,4,2025-01-30,0,"Suhasini Chandramouli: Good afternoon, and wel...",256,4ba6d30e3d28cb1de70acd90415ecf7bd811f12a62c302...,abba2130960470fb89ec6dd28ea23fbeb60cff51be7654...,2025-10-07 09:16:42.127171
1,AAPL,2024,4,2025-01-30,1,"Tim Cook: Thank you, Suhasini. Good afternoon,...",489,2de23ff5c887b957ec6e815286aac7e99e3e417cc57fd4...,abba2130960470fb89ec6dd28ea23fbeb60cff51be7654...,2025-10-07 09:16:42.127171
2,AAPL,2024,4,2025-01-30,2,Tim Cook: (contd) And we were excited to recen...,511,a3b9984d9b4f1617e2c12bb6f5ef1e5ee3e5ae8321bc5d...,abba2130960470fb89ec6dd28ea23fbeb60cff51be7654...,2025-10-07 09:16:42.127171
3,AAPL,2024,4,2025-01-30,3,Tim Cook: (contd) All of this is enabled by th...,496,3e6d28e7c895d95777c8f0f4854a8f415d562a53036ed6...,abba2130960470fb89ec6dd28ea23fbeb60cff51be7654...,2025-10-07 09:16:42.127171
4,AAPL,2024,4,2025-01-30,4,Tim Cook: (contd) We have so much in store for...,496,0ea41e118ddda93364048c132a691f33a6d920035b0932...,abba2130960470fb89ec6dd28ea23fbeb60cff51be7654...,2025-10-07 09:16:42.127171
...,...,...,...,...,...,...,...,...,...,...
692,NVDA,2025,2,2025-08-27,34,Sarah: Your final question comes from Timothy ...,19,991dce35427015fade249bb0ae4696f2dcedd2151f4d73...,389f300f6683c7c63d35c577bdf7d4167259fd665c1f04...,2025-10-07 09:16:42.127171
693,NVDA,2025,2,2025-08-27,35,"Timothy Arcuri: Thanks a lot. Jensen, I wanted...",103,f1346975e7f14bb72900bc10981bb79b7bed5beedd53d5...,389f300f6683c7c63d35c577bdf7d4167259fd665c1f04...,2025-10-07 09:16:42.127171
694,NVDA,2025,2,2025-08-27,36,"Jensen Huang: Well, I think the best way to lo...",512,e9f62058552f2bfc8db7bf7cb4d138994d4311301d7010...,389f300f6683c7c63d35c577bdf7d4167259fd665c1f04...,2025-10-07 09:16:42.127171
695,NVDA,2025,2,2025-08-27,37,"Jensen Huang: (contd) Our next platform, Rubin...",339,95e4af641dacec7be23358159a768bcde65165e12848c1...,389f300f6683c7c63d35c577bdf7d4167259fd665c1f04...,2025-10-07 09:16:42.127171


In [7]:
# Query the `earnings_transcripts_chunks` table
conn = connect_to_db()
if conn:
    query = "SELECT * FROM core.earnings_transcript_embeddings;"
    df = pd.read_sql_query(query, conn)
    conn.close()

    # Replace NaN with None for consistency
    df = df.where(pd.notnull(df), None)

    # Display the data
    display(df)

  df = pd.read_sql_query(query, conn)


Unnamed: 0,tic,fiscal_year,fiscal_quarter,earnings_date,chunk_id,embedding,embedding_model,last_updated
0,AAPL,2024,4,2025-01-30,12,"[0.035682842,-0.00039636696,0.0022631593,0.036...",text-embedding-3-small,2025-10-07 19:12:22.993442
1,AAPL,2024,4,2025-01-30,10,"[0.06074509,-0.023144782,0.027064249,-0.002893...",text-embedding-3-small,2025-10-07 19:12:22.993442
2,AAPL,2025,1,2025-05-01,55,"[0.017443154,0.026426641,0.01774435,0.01972176...",text-embedding-3-small,2025-10-07 19:12:22.993442
3,NVDA,2024,3,2024-11-20,13,"[0.0280212,0.0048746895,0.059862804,0.01138039...",text-embedding-3-small,2025-10-07 19:12:22.993442
4,AAPL,2025,1,2025-05-01,68,"[0.019060645,0.0038684208,0.038854394,0.010021...",text-embedding-3-small,2025-10-07 19:12:22.993442
...,...,...,...,...,...,...,...,...
692,TSLA,2025,2,2025-07-23,73,"[0.031511176,-0.00019379305,0.02731332,0.04437...",text-embedding-3-small,2025-10-07 19:12:22.993442
693,AAPL,2025,2,2025-07-31,5,"[0.018208748,0.013937241,0.016918262,0.0470769...",text-embedding-3-small,2025-10-07 19:12:22.993442
694,AAPL,2024,4,2025-01-30,23,"[0.067168735,-0.0017938287,0.013038943,0.03223...",text-embedding-3-small,2025-10-07 19:12:22.993442
695,NVDA,2024,4,2025-02-26,9,"[0.06750801,-0.02043978,0.037897386,0.02805356...",text-embedding-3-small,2025-10-07 19:12:22.993442
