## Step 1: Initialize the Database


In [91]:
# Run init_db.py to create the SQLite schema

!python ../init_db.py  # Adjust the path if needed


✅ Database initialized at: /Users/zacseidel/Documents/GitHub/momentum-screener/data/market_data.sqlite


## Step 2: Connect and Inspect Tables

In [92]:
import sqlite3
import pandas as pd

conn = sqlite3.connect("../data/market_data.sqlite")

# Check that all tables exist
pd.read_sql("SELECT name FROM sqlite_master WHERE type='table'", conn)


Unnamed: 0,name
0,index_constituents
1,index_allocations
2,daily_prices
3,top10_picks


## Step 3 Preview the Table Schemas

In [95]:
# Show column structure for each table
def describe_table(table):
    return pd.read_sql(f"PRAGMA table_info({table})", conn)

#describe_table("index_constituents")
describe_table("top10_picks")


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,ticker,TEXT,1,,1
1,1,date,DATE,1,,2
2,2,current_return,TEXT,0,,0
3,3,last_month_return,TEXT,0,,0
4,4,current_rank,REAL,0,,0
5,5,last_month_rank,REAL,0,,0
6,6,rank_change,REAL,0,,0


## Step 4: Scrape and Insert Initial Index Constituents

In [96]:
import pandas as pd
import requests
from io import StringIO

def get_index_constituents(index="sp500"):
    if index == "sp500":
        url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    elif index == "sp400":
        url = "https://en.wikipedia.org/wiki/List_of_S%26P_400_companies"
    else:
        raise ValueError("Index must be 'sp500' or 'sp400'")

    response = requests.get(url)
    response.raise_for_status()
    html = response.text

    df = pd.read_html(StringIO(html))[0]  # <- Wrapped in StringIO now
    df.head()
    df.columns = [col.lower().strip() for col in df.columns]

    rename_map = {
        "symbol": "ticker",
        "security": "company",
        "gics sector": "gics_sector",
        "gics sub-industry": "gics_sub_industry",
        "headquarters location": "headquarters",
        "date added": "date_added",
        "founded": "founded"
    }

    available_renames = {k: v for k, v in rename_map.items() if k in df.columns}
    df = df.rename(columns=available_renames)

    for col in ["date_added", "founded"]:
        if col not in df.columns:
            df[col] = None

    df["index_type"] = index
    df["date_added"] = pd.Timestamp.today().date()

    df.head()

    return df.loc[:, [
        "ticker", "company", "index_type", "gics_sector",
        "gics_sub_industry", "headquarters", "date_added",
        "founded"
    ]]


## Step 5: Load into SQLite

In [99]:
df_sp500 = get_index_constituents("sp500")
df_sp500.head()

#df_sp400 = get_index_constituents("sp400")
#df_sp400.head()


Unnamed: 0,ticker,company,index_type,gics_sector,gics_sub_industry,headquarters,date_added,founded
0,MMM,3M,sp500,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",2025-05-09,1902
1,AOS,A. O. Smith,sp500,Industrials,Building Products,"Milwaukee, Wisconsin",2025-05-09,1916
2,ABT,Abbott Laboratories,sp500,Health Care,Health Care Equipment,"North Chicago, Illinois",2025-05-09,1888
3,ABBV,AbbVie,sp500,Health Care,Biotechnology,"North Chicago, Illinois",2025-05-09,2013 (1888)
4,ACN,Accenture,sp500,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2025-05-09,1989


In [102]:
df_sp500.to_sql("index_constituents", conn, if_exists="replace", index=False)


503

In [101]:
# Load and insert S&P 500 and S&P 400
df_sp500 = get_index_constituents("sp500")
df_sp400 = get_index_constituents("sp400")

df_sp500.to_sql("index_constituents", conn, if_exists="replace", index=False)
df_sp400.to_sql("index_constituents", conn, if_exists="replace", index=False)


401

## Confirm it worked

In [103]:
pd.read_sql("SELECT * FROM index_constituents WHERE index_type = 'sp500' ORDER BY date_added DESC LIMIT 10", conn)


Unnamed: 0,ticker,company,index_type,gics_sector,gics_sub_industry,headquarters,date_added,founded
0,MMM,3M,sp500,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",2025-05-09,1902
1,AOS,A. O. Smith,sp500,Industrials,Building Products,"Milwaukee, Wisconsin",2025-05-09,1916
2,ABT,Abbott Laboratories,sp500,Health Care,Health Care Equipment,"North Chicago, Illinois",2025-05-09,1888
3,ABBV,AbbVie,sp500,Health Care,Biotechnology,"North Chicago, Illinois",2025-05-09,2013 (1888)
4,ACN,Accenture,sp500,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2025-05-09,1989
5,ADBE,Adobe Inc.,sp500,Information Technology,Application Software,"San Jose, California",2025-05-09,1982
6,AMD,Advanced Micro Devices,sp500,Information Technology,Semiconductors,"Santa Clara, California",2025-05-09,1969
7,AES,AES Corporation,sp500,Utilities,Independent Power Producers & Energy Traders,"Arlington, Virginia",2025-05-09,1981
8,AFL,Aflac,sp500,Financials,Life & Health Insurance,"Columbus, Georgia",2025-05-09,1955
9,A,Agilent Technologies,sp500,Health Care,Life Sciences Tools & Services,"Santa Clara, California",2025-05-09,1999


## Adding Allocations

In [104]:
import sys
import os
import importlib

# Ensure project root is in the path
project_root = os.path.abspath("..")
if project_root not in sys.path:
    sys.path.append(project_root)

# Import and reload your module
import src.allocations
importlib.reload(src.allocations)

# Now you can call the updated function
from src.allocations import update_index_allocations
update_index_allocations()


Saved file: spy_holdings.xlsx
Attempting to connect to database at: /Users/zacseidel/Documents/GitHub/momentum-screener/data/market_data.sqlite
Columns in spy_holdings.xlsx: ['Name', 'Ticker', 'Identifier', 'SEDOL', 'Weight', 'Sector', 'Shares Held', 'Local Currency']
Stored 504 rows for sp500
Saved file: mdy_holdings.xlsx
Attempting to connect to database at: /Users/zacseidel/Documents/GitHub/momentum-screener/data/market_data.sqlite
Columns in mdy_holdings.xlsx: ['Name', 'Ticker', 'Identifier', 'SEDOL', 'Weight', 'Sector', 'Shares Held', 'Local Currency']
Stored 402 rows for sp400


## Test Prices

In [48]:

!pip install python-dotenv



Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [105]:
import sys, os
sys.path.append("..")

from src import prices
import importlib
importlib.reload(prices)

prices.download_all_required_price_data()


Fetching grouped prices for 2025-05-08...
Stored 11028 rows for 2025-05-08
Fetching grouped prices for 2024-05-08...
Stored 10453 rows for 2024-05-08
Fetching grouped prices for 2025-04-08...
Stored 11029 rows for 2025-04-08
Fetching grouped prices for 2024-04-08...
Stored 10499 rows for 2024-04-08


In [106]:
import sqlite3
conn = sqlite3.connect("../data/market_data.sqlite")
pd.read_sql("SELECT * FROM daily_prices ORDER BY date DESC LIMIT 10", conn)


Unnamed: 0,ticker,date,close
0,THO,2025-05-08,76.26
1,ARES,2025-05-08,165.85
2,GNTY,2025-05-08,40.84
3,NESR,2025-05-08,6.22
4,AMTD,2025-05-08,1.0
5,ULTA,2025-05-08,388.24
6,GURU,2025-05-08,49.78
7,DOCN,2025-05-08,29.89
8,GHY,2025-05-08,12.7
9,PSMT,2025-05-08,105.8


## Test Ranking

In [117]:
import sys, os, importlib
sys.path.append("..")  # if not already in sys.path

from src import ranking
importlib.reload(ranking)  # ← this forces Python to reload the module

# Now import your functions again
from src.ranking import get_price_snapshots, compute_returns_and_ranks, store_top10_picks


dates = get_target_dates()
print(dates)
df, resolved_dates = get_price_snapshots(dates)

ranks = compute_returns_and_ranks(df, resolved_dates)

ranks.head(20)          # See values






{'yesterday': '2025-05-08', 'one_year_ago': '2024-05-08', 'one_month_ago': '2025-04-08', 'one_year_plus_month_ago': '2024-04-08'}


Unnamed: 0_level_0,current_return,last_month_return,current_rank,last_month_rank,rank_change
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
PLTR,452.6%,236.2%,1.0,1.0,0.0
GEV,140.2%,120.8%,2.0,2.0,0.0
TPL,128.5%,93.0%,3.0,3.0,0.0
AXON,120.2%,62.0%,4.0,6.0,2.0
TPR,98.9%,40.5%,5.0,18.0,13.0
NFLX,87.8%,38.5%,7.0,20.0,13.0
TKO,71.9%,44.9%,9.0,16.0,7.0
VRSN,69.2%,25.8%,10.0,46.0,36.0
FICO,68.3%,35.4%,11.0,26.0,15.0
RCL,64.3%,30.3%,12.0,31.0,19.0


In [118]:
ranks

Unnamed: 0_level_0,current_return,last_month_return,current_rank,last_month_rank,rank_change
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
PLTR,452.6%,236.2%,1.0,1.0,0.0
GEV,140.2%,120.8%,2.0,2.0,0.0
TPL,128.5%,93.0%,3.0,3.0,0.0
AXON,120.2%,62.0%,4.0,6.0,2.0
TPR,98.9%,40.5%,5.0,18.0,13.0
...,...,...,...,...,...
APA,-47.0%,-60.0%,495.0,496.0,1.0
EL,-52.7%,-65.6%,497.0,499.0,2.0
ALB,-56.0%,-61.5%,498.0,498.0,0.0
SMCI,-61.0%,-65.8%,500.0,500.0,0.0


In [121]:
import sys, os, importlib
importlib.reload(ranking)  # ← this forces Python to reload the module



from src.ranking import (
    get_price_snapshots,
    compute_returns_and_ranks,
    store_top10_picks
)
from src.prices import get_target_dates

# Step 1: Get target dates
dates = get_target_dates()
print("📅 Raw target dates:")
for k, v in dates.items():
    print(f"{k}: {v}")

# Step 2: Fetch prices and resolve dates
df, resolved = get_price_snapshots(dates)
print("\n📅 Resolved trading dates used:")
for k, v in resolved.items():
    print(f"{k}: {v}")

print("\n🧾 Price snapshot sample:")
display(df.head(10))
print(f"\nShape: {df.shape} — Missing values: {df.isna().sum().sum()}")

# Step 3: Compute returns and ranks
ranks = compute_returns_and_ranks(df, resolved)
print("\n📈 Top-ranked momentum results:")
display(ranks.head(10))
print(f"\nFull result shape: {ranks.shape} — NaNs dropped? {df.shape[0] - ranks.shape[0]}")

# Step 4: Store top 10
top10 = store_top10_picks(ranks)
print("\n✅ Final top 10 (if any):")
display(top10)


📅 Raw target dates:
yesterday: 2025-05-08
one_year_ago: 2024-05-08
one_month_ago: 2025-04-08
one_year_plus_month_ago: 2024-04-08

📅 Resolved trading dates used:
yesterday: 2025-05-08
one_year_ago: 2024-05-08
one_month_ago: 2025-04-08
one_year_plus_month_ago: 2024-04-08

🧾 Price snapshot sample:


Unnamed: 0_level_0,2024-04-08,2024-05-08,2025-04-08,2025-05-08
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MMM,91.93,96.35,127.16,141.12
AOS,86.97,85.58,59.63,68.01
ABT,110.52,104.94,123.95,134.0
ABBV,169.8,160.45,175.67,185.58
ACN,331.8,311.99,281.39,308.88
ADBE,484.28,488.1,340.0,383.99
AMD,169.9,153.62,78.21,101.7
AES,18.3,19.37,10.23,10.82
AFL,85.14,84.83,99.77,106.3
A,144.46,142.8,99.29,108.7



Shape: (503, 4) — Missing values: 4

📈 Top-ranked momentum results:


Unnamed: 0_level_0,current_return,last_month_return,current_rank,last_month_rank,rank_change
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
PLTR,452.6%,236.2%,1.0,1.0,0.0
GEV,140.2%,120.8%,2.0,2.0,0.0
TPL,128.5%,93.0%,3.0,3.0,0.0
AXON,120.2%,62.0%,4.0,6.0,2.0
TPR,98.9%,40.5%,5.0,18.0,13.0
NFLX,87.8%,38.5%,7.0,20.0,13.0
TKO,71.9%,44.9%,9.0,16.0,7.0
VRSN,69.2%,25.8%,10.0,46.0,36.0
FICO,68.3%,35.4%,11.0,26.0,15.0
RCL,64.3%,30.3%,12.0,31.0,19.0



Full result shape: (252, 5) — NaNs dropped? 251
Stored top 10 picks for 2025-05-09

✅ Final top 10 (if any):


Unnamed: 0,ticker,current_return,last_month_return,current_rank,last_month_rank,rank_change,date
0,PLTR,452.6%,236.2%,1.0,1.0,0.0,2025-05-09
1,GEV,140.2%,120.8%,2.0,2.0,0.0,2025-05-09
2,TPL,128.5%,93.0%,3.0,3.0,0.0,2025-05-09
3,AXON,120.2%,62.0%,4.0,6.0,2.0,2025-05-09
4,TPR,98.9%,40.5%,5.0,18.0,13.0,2025-05-09
5,NFLX,87.8%,38.5%,7.0,20.0,13.0,2025-05-09
6,TKO,71.9%,44.9%,9.0,16.0,7.0,2025-05-09
7,VRSN,69.2%,25.8%,10.0,46.0,36.0,2025-05-09
8,FICO,68.3%,35.4%,11.0,26.0,15.0,2025-05-09
9,RCL,64.3%,30.3%,12.0,31.0,19.0,2025-05-09


python run_report.py


In [86]:
import sqlite3
import pandas as pd

# Path to your local SQLite database
db_path = "../data/market_data.sqlite"

# Connect and pull tickers from index_constituents
with sqlite3.connect(db_path) as conn:
    tickers = pd.read_sql(
        "SELECT DISTINCT ticker FROM index_constituents",
        conn
    )

print(f"Retrieved {len(tickers)} tickers.")
display(tickers.head(10))


Retrieved 503 tickers.


Unnamed: 0,ticker
0,MMM
1,AOS
2,ABT
3,ABBV
4,ACN
5,ADBE
6,AMD
7,AES
8,AFL
9,A


In [140]:
import sys
import os
import importlib
import src.emailer

importlib.reload(src.emailer)

importlib.reload(run_report)  # ← this forces Python to reload the module



from src.run_report import main
main()


🚀 Starting Momentum Screener Pipeline
Skipping 2025-05-08 — already in DB
Skipping 2024-05-08 — already in DB
Skipping 2025-04-08 — already in DB
Skipping 2024-04-08 — already in DB
Stored top 10 picks for 2025-05-09
🔍 Fetching info for PLTR (1/10)
✅ Already cached. Skipping.
🔍 Fetching info for GEV (2/10)
✅ Already cached. Skipping.
🔍 Fetching info for TPL (3/10)
✅ Already cached. Skipping.
🔍 Fetching info for AXON (4/10)
✅ Already cached. Skipping.
🔍 Fetching info for TPR (5/10)
✅ Already cached. Skipping.
🔍 Fetching info for NFLX (6/10)
✅ Already cached. Skipping.
🔍 Fetching info for TKO (7/10)
✅ Already cached. Skipping.
🔍 Fetching info for VRSN (8/10)
✅ Already cached. Skipping.
🔍 Fetching info for FICO (9/10)
✅ Already cached. Skipping.
🔍 Fetching info for RCL (10/10)
✅ Already cached. Skipping.
✉️ Email sent. Status code: 202
✅ Report sent successfully.


In [131]:
!pip install sendgrid

Collecting sendgrid
  Downloading sendgrid-6.12.0-py3-none-any.whl.metadata (12 kB)
Collecting python-http-client>=3.2.1 (from sendgrid)
  Using cached python_http_client-3.3.7-py3-none-any.whl.metadata (6.9 kB)
Collecting starkbank-ecdsa>=2.0.1 (from sendgrid)
  Using cached starkbank-ecdsa-2.2.0.tar.gz (14 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting werkzeug>=3.0.0 (from sendgrid)
  Downloading werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)
Downloading sendgrid-6.12.0-py3-none-any.whl (102 kB)
Using cached python_http_client-3.3.7-py3-none-any.whl (8.4 kB)
Downloading werkzeug-3.1.3-py3-none-any.whl (224 kB)
Building wheels for collected packages: starkbank-ecdsa
  Building wheel for starkbank-ecdsa (pyproject.toml) ... [?25ldone
[?25h  Created wheel for starkbank-ecdsa: filename=starkbank_ecdsa-2.2.0-py3-none-any.whl size=16046 sha256=042