# 📈 Stock Price Direction Classifier — Upgraded Version
A machine learning model to predict whether the next day's market return will be positive or negative. Includes momentum indicators, baseline comparisons, and backtest with Sharpe Ratio.

In [None]:
# Install required packages (if running locally)
# !pip install yfinance scikit-learn matplotlib pandas seaborn joblib

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

sns.set()


## 🧩 Step 1: Load Market Data

In [None]:
ticker = 'SPY'
data = yf.download(ticker, start='2018-01-01', end='2024-12-31')

price_col = 'Adj Close' if 'Adj Close' in data.columns else 'Close'
data['Return'] = data[price_col].pct_change()
data['Direction'] = np.where(data['Return'] > 0, 1, 0)
data.dropna(inplace=True)
data.head()

## 🔧 Step 2: Feature Engineering

In [None]:
data['SMA_5'] = data[price_col].rolling(5).mean()
data['SMA_10'] = data[price_col].rolling(10).mean()
data['Volatility'] = data['Return'].rolling(10).std()
data['Momentum_5'] = data[price_col].pct_change(5)
data.dropna(inplace=True)

features = ['SMA_5', 'SMA_10', 'Volatility', 'Momentum_5']
X = data[features]
y = data['Direction']

## 🤖 Step 3: Train Models

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_preds = log_reg.predict(X_test)

# Random Forest
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)


## 📊 Step 4: Evaluate Performance

In [None]:
print("Logistic Regression Report:")
print(classification_report(y_test, log_preds))

print("Random Forest Report:")
print(classification_report(y_test, rf_preds))

# Baseline comparison: always predict UP
baseline_preds = np.ones_like(y_test)
baseline_acc = accuracy_score(y_test, baseline_preds)
rf_acc = accuracy_score(y_test, rf_preds)

print(f"Baseline Accuracy (always UP): {baseline_acc:.2f}")
print(f"Random Forest Accuracy: {rf_acc:.2f}")

# Confusion matrix
conf_matrix = confusion_matrix(y_test, rf_preds)
print("Confusion Matrix (RF):\n", conf_matrix)

## 📈 Step 5: Backtest Simple Strategy

In [None]:
test_data = data.loc[X_test.index].copy()
test_data['Strategy'] = rf_preds
test_data['Market Return'] = test_data['Return']
test_data['Strategy Return'] = test_data['Market Return'] * test_data['Strategy']

test_data[['Market Return', 'Strategy Return']].cumsum().plot(figsize=(10, 5))
plt.title("Cumulative Returns: Strategy vs Market")
plt.xlabel("Date")
plt.ylabel("Cumulative Return")
plt.show()

## 📐 Step 6: Sharpe Ratio

In [None]:
def sharpe_ratio(returns, risk_free_rate=0.00):
    excess_returns = returns - risk_free_rate / 252
    return np.mean(excess_returns) / np.std(excess_returns)

strategy_sr = sharpe_ratio(test_data['Strategy Return'])
market_sr = sharpe_ratio(test_data['Market Return'])

print(f"Strategy Sharpe Ratio: {strategy_sr:.2f}")
print(f"Market Sharpe Ratio: {market_sr:.2f}")

## 💾 Step 7: Save the Trained Model

In [None]:
joblib.dump(rf, 'random_forest_model.pkl')

## ✅ Next Steps
- Add more features (e.g., RSI, MACD, volume)
- Try other tickers or sectors
- Test more advanced models (XGBoost, LSTM)
- Publish to GitHub and write a short blog post
- Connect it to a real-time Streamlit dashboard!