# Dependencies

In [189]:
! pip install requests pandas numpy matplotlib



In [190]:
import time
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timezone, timedelta

API_KEY = "dYELOfhzHjpfSx6oHPlBTPF44OVPvt41"
TICKER = "META"
START = "2024-01-01"
END   = "2025-01-01"
OUT_CSV = "meta_stock_1yr.csv"
INTERVAL_MINUTES = 5

def ema(series: pd.Series, span: int) -> pd.Series:
    """Exponential Moving Average"""
    return series.ewm(span=span, adjust=False).mean()

def rsi(series: pd.Series, length: int = 14) -> pd.Series:
    """Relative Strength Index"""
    delta = series.diff()
    gain = np.where(delta > 0, delta, 0)
    loss = np.where(delta < 0, -delta, 0)
    roll_up = pd.Series(gain).rolling(length).mean()
    roll_down = pd.Series(loss).rolling(length).mean()
    rs = roll_up / (roll_down + 1e-9)
    return 100 - (100 / (1 + rs))

def macd(series: pd.Series, fast=12, slow=26, signal=9):
    """MACD, Signal, Histogram"""
    ema_fast = ema(series, fast)
    ema_slow = ema(series, slow)
    macd_line = ema_fast - ema_slow
    signal_line = ema(macd_line, signal)
    hist = macd_line - signal_line
    return macd_line, signal_line, hist

def fetch_polygon_bars(ticker: str, start: str, end: str, interval_minutes: int = 5) -> pd.DataFrame:
    url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/{interval_minutes}/minute/{start}/{end}"
    params = {
        "adjusted": "true",
        "sort": "asc",
        "limit": 50000,
        "apiKey": API_KEY
    }
    print(f"Fetching {ticker} {interval_minutes}-min bars {start} → {end} …")
    r = requests.get(url, params=params)
    if r.status_code != 200:
        raise RuntimeError(f"Polygon/Massive error: {r.status_code} {r.text}")
    data = r.json()
    if "results" not in data or not data["results"]:
        print("⚠️ No data returned")
        return pd.DataFrame()
    df = pd.DataFrame(data["results"])
    df.rename(columns={
        "t": "timestamp", "o": "open", "h": "high",
        "l": "low", "c": "close", "v": "volume"
    }, inplace=True)
    df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
    df.set_index("timestamp", inplace=True)
    df["ticker"] = ticker
    return df

df = fetch_polygon_bars(TICKER, START, END, INTERVAL_MINUTES)

if df.empty:
    print("❌ No data returned from Polygon/Massive.")

def compute_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()

    out["return_5m"] = out["close"].pct_change()
    out["return_30m"] = out["close"].pct_change(6)
    out["return_1h"] = out["close"].pct_change(12)
    out["volatility_1h"] = out["return_5m"].rolling(12, min_periods=8).std()

    # RSI
    out["rsi_14"] = rsi(out["close"], 14)

    # MACD
    out["macd"], out["macd_signal"], out["macd_hist"] = macd(out["close"])

    # EMA
    out["ema_12"] = ema(out["close"], 12)
    out["ema_26"] = ema(out["close"], 26)

    # Relative Volume
    window = 5 * 78
    out["rel_volume"] = out["volume"] / (
        out["volume"].rolling(window, min_periods=78).median()
    )

    return out.dropna(subset=["close"]).copy()

def add_sentiment_placeholders(df: pd.DataFrame) -> pd.DataFrame:
    df["sentiment_score"] = 0.0
    df["reputable_mentions"] = 0
    df["politician_trades"] = 0.0
    df["insider_activity_score"] = 0.0
    df["earnings_sentiment"] = 0.0
    df["google_trend_score"] = np.nan
    return df


def add_labels(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["target_5m_return"] = df["close"].pct_change(periods=-1)
    df["target_up_bin"] = (df["target_5m_return"] > 0.0005).astype(int)
    return df

df_feat = compute_features(df)
df_feat = add_sentiment_placeholders(df_feat)
df_feat = add_labels(df_feat)
df_feat = df_feat.reset_index().rename(columns={"index": "timestamp"})

preferred = [
        "timestamp", "ticker",
        "open", "high", "low", "close", "volume",
        "return_5m", "return_30m", "return_1h", "volatility_1h",
        "rsi_14", "macd", "macd_signal", "macd_hist",
        "ema_12", "ema_26", "rel_volume",
        "sentiment_score", "reputable_mentions", "politician_trades",
        "insider_activity_score", "earnings_sentiment", "google_trend_score",
        "target_5m_return", "target_up_bin"
    ]

cols = [c for c in preferred if c in df_feat.columns]
df_feat = df_feat[cols].sort_values("timestamp").reset_index(drop=True)
df_feat.to_csv(OUT_CSV, index=False)
print(f"✅ Wrote {len(df_feat):,} rows to {OUT_CSV}")


Fetching META 5-min bars 2024-01-01 → 2025-01-01 …
✅ Wrote 13,251 rows to meta_stock_1yr.csv


# Hyperparameters

In [177]:
API_KEY = "dYELOfhzHjpfSx6oHPlBTPF44OVPvt41"
TICKER = "META"
START = "2025-07-15"
END   = "2025-08-15"
OUT_CSV = "meta_stock.csv"
INTERVAL_MINUTES = 5

# Functions

In [178]:
def ema(series: pd.Series, span: int) -> pd.Series:
    """Exponential Moving Average"""
    return series.ewm(span=span, adjust=False).mean()

def rsi(series: pd.Series, length: int = 14) -> pd.Series:
    """Relative Strength Index"""
    delta = series.diff()
    gain = np.where(delta > 0, delta, 0)
    loss = np.where(delta < 0, -delta, 0)
    roll_up = pd.Series(gain).rolling(length).mean()
    roll_down = pd.Series(loss).rolling(length).mean()
    rs = roll_up / (roll_down + 1e-9)
    return 100 - (100 / (1 + rs))

def macd(series: pd.Series, fast=12, slow=26, signal=9):
    """MACD, Signal, Histogram"""
    ema_fast = ema(series, fast)
    ema_slow = ema(series, slow)
    macd_line = ema_fast - ema_slow
    signal_line = ema(macd_line, signal)
    hist = macd_line - signal_line
    return macd_line, signal_line, hist

# Data Fetching

In [179]:
def fetch_polygon_bars(ticker: str, start: str, end: str, interval_minutes: int = 5) -> pd.DataFrame:
    url = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/{interval_minutes}/minute/{start}/{end}"
    params = {
        "adjusted": "true",
        "sort": "asc",
        "limit": 50000,
        "apiKey": API_KEY
    }
    print(f"Fetching {ticker} {interval_minutes}-min bars {start} → {end} …")
    r = requests.get(url, params=params)
    if r.status_code != 200:
        raise RuntimeError(f"Polygon/Massive error: {r.status_code} {r.text}")
    data = r.json()
    if "results" not in data or not data["results"]:
        print("⚠️ No data returned")
        return pd.DataFrame()
    df = pd.DataFrame(data["results"])
    df.rename(columns={
        "t": "timestamp", "o": "open", "h": "high",
        "l": "low", "c": "close", "v": "volume"
    }, inplace=True)
    df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
    df.set_index("timestamp", inplace=True)
    df["ticker"] = ticker
    return df

In [180]:
df = fetch_polygon_bars(TICKER, START, END, INTERVAL_MINUTES)

if df.empty:
    print("❌ No data returned from Polygon/Massive.")

Fetching META 5-min bars 2025-07-15 → 2025-08-15 …


In [181]:
df.head()

Unnamed: 0_level_0,volume,vw,open,close,high,low,n,ticker
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-07-15 08:00:00+00:00,2796.0,723.0951,723.4,723.0,723.4,723.0,132,META
2025-07-15 08:05:00+00:00,421.0,722.7523,722.75,722.75,722.75,722.75,14,META
2025-07-15 08:10:00+00:00,1889.0,722.8511,723.07,722.97,723.07,722.64,67,META
2025-07-15 08:15:00+00:00,2429.0,723.2891,723.18,723.31,723.31,723.18,72,META
2025-07-15 08:20:00+00:00,456.0,723.9136,723.88,723.88,723.88,723.88,23,META


# Feature Engineering

In [182]:
def compute_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()

    out["return_5m"] = out["close"].pct_change()
    out["return_30m"] = out["close"].pct_change(6)
    out["return_1h"] = out["close"].pct_change(12)
    out["volatility_1h"] = out["return_5m"].rolling(12, min_periods=8).std()

    # RSI
    out["rsi_14"] = rsi(out["close"], 14)

    # MACD
    out["macd"], out["macd_signal"], out["macd_hist"] = macd(out["close"])

    # EMA
    out["ema_12"] = ema(out["close"], 12)
    out["ema_26"] = ema(out["close"], 26)

    # Relative Volume
    window = 5 * 78
    out["rel_volume"] = out["volume"] / (
        out["volume"].rolling(window, min_periods=78).median()
    )

    return out.dropna(subset=["close"]).copy()

def add_sentiment_placeholders(df: pd.DataFrame) -> pd.DataFrame:
    df["sentiment_score"] = 0.0
    df["reputable_mentions"] = 0
    df["politician_trades"] = 0.0
    df["insider_activity_score"] = 0.0
    df["earnings_sentiment"] = 0.0
    df["google_trend_score"] = np.nan
    return df


def add_labels(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["target_5m_return"] = df["close"].pct_change(periods=-1)
    df["target_up_bin"] = (df["target_5m_return"] > 0.002).astype(int)
    return df

In [183]:
df_feat = compute_features(df)
df_feat = add_sentiment_placeholders(df_feat)
df_feat = add_labels(df_feat)
df_feat = df_feat.reset_index().rename(columns={"index": "timestamp"})

# Output

In [184]:
preferred = [
        "timestamp", "ticker",
        "open", "high", "low", "close", "volume",
        "return_5m", "return_30m", "return_1h", "volatility_1h",
        "rsi_14", "macd", "macd_signal", "macd_hist",
        "ema_12", "ema_26", "rel_volume",
        "sentiment_score", "reputable_mentions", "politician_trades",
        "insider_activity_score", "earnings_sentiment", "google_trend_score",
        "target_5m_return", "target_up_bin"
    ]

In [185]:
cols = [c for c in preferred if c in df_feat.columns]
df_feat = df_feat[cols].sort_values("timestamp").reset_index(drop=True)
df_feat.to_csv(OUT_CSV, index=False)
print(f"✅ Wrote {len(df_feat):,} rows to {OUT_CSV}")

✅ Wrote 3,808 rows to meta_stock.csv


In [186]:
df_feat["target_5m_return"]

0       0.000346
1      -0.000304
2      -0.000470
3      -0.000787
4       0.000111
          ...   
3803    0.000204
3804    0.000000
3805   -0.000191
3806   -0.001018
3807         NaN
Name: target_5m_return, Length: 3808, dtype: float64

In [187]:
df.head(130)

Unnamed: 0_level_0,volume,vw,open,close,high,low,n,ticker
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-07-15 08:00:00+00:00,2796.0,723.0951,723.4000,723.000,723.4000,723.00,132,META
2025-07-15 08:05:00+00:00,421.0,722.7523,722.7500,722.750,722.7500,722.75,14,META
2025-07-15 08:10:00+00:00,1889.0,722.8511,723.0700,722.970,723.0700,722.64,67,META
2025-07-15 08:15:00+00:00,2429.0,723.2891,723.1800,723.310,723.3100,723.18,72,META
2025-07-15 08:20:00+00:00,456.0,723.9136,723.8800,723.880,723.8800,723.88,23,META
...,...,...,...,...,...,...,...,...
2025-07-15 19:45:00+00:00,121187.0,712.8841,712.9300,712.420,713.2700,712.23,4505,META
2025-07-15 19:50:00+00:00,222268.0,712.1906,712.4300,711.788,712.8400,711.69,8450,META
2025-07-15 19:55:00+00:00,791383.0,710.7835,711.9000,710.160,712.0000,709.82,20017,META
2025-07-15 20:00:00+00:00,140764.0,710.3804,710.3900,709.990,710.9799,709.68,716,META
