In [None]:
!pip install yfinance pandas matplotlib scikit-learn
!pip install requests 
!pip install vaderSentiment
!pip install xgboost

In [9]:


# ─────────────────────────────────────────────────────────────────────────────
# TESLA STOCK MOVEMENT PREDICTION PROJECT
# Final Jupyter Notebook — Cleaned, Documented, and Ready for Presentation
# Model 1: Daily Price Direction (Up/Down)
# Model 2: Weekly Trend Prediction (Up/Down)
# ─────────────────────────────────────────────────────────────────────────────

import pandas as pd
import numpy as np
import yfinance as yf
from tqdm import tqdm
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# ─── 1. DOWNLOAD TESLA STOCK DATA ─────────────────────────────────────────────
ticker = "TSLA"
df = yf.download(ticker, start="2022-01-01", end=None)
df.reset_index(inplace=True)

# ─── 2. CREATE TECHNICAL FEATURES ─────────────────────────────────────────────
df["lag_1"] = df["Close"].shift(1)
df["lag_2"] = df["Close"].shift(2)
df["lag_3"] = df["Close"].shift(3)
df["lag_4"] = df["Close"].shift(4)
df["lag_5"] = df["Close"].shift(5)

df["price_change"] = df["Close"] - df["Open"]
df["ma_3"] = df["Close"].rolling(window=3).mean()
df["ma_5"] = df["Close"].rolling(window=5).mean()
df["volatility"] = df["Close"].rolling(window=5).std()
df["daily_return"] = df["Close"].pct_change()
df["rsi"] = df["daily_return"].rolling(14).apply(lambda x: np.mean(x[x>0]) / (np.mean(np.abs(x)) + 1e-6))
df["close_to_open"] = (df["Close"] - df["Open"]) / df["Open"]
df["volume_change"] = df["Volume"].pct_change()
df["momentum_3"] = df["Close"] - df["Close"].shift(3)
df["return_std"] = df["daily_return"].rolling(5).std()

# MACD (Moving Average Convergence Divergence)
ema_12 = df["Close"].ewm(span=12, adjust=False).mean()
ema_26 = df["Close"].ewm(span=26, adjust=False).mean()
df["macd"] = ema_12 - ema_26
df["macd_signal"] = df["macd"].ewm(span=9, adjust=False).mean()

# Bollinger Band Width
ma20 = df["Close"].rolling(window=20).mean()
std20 = df["Close"].rolling(window=20).std()
df["bollinger_width"] = (2 * std20) / ma20

# Stochastic Oscillator (%K)
low_14 = df["Close"].rolling(window=14).min()
high_14 = df["Close"].rolling(window=14).max()
df["stochastic"] = (df["Close"] - low_14) / (high_14 - low_14)

# ─── 3. DEFINE DAILY TARGET ────────────────────────────────────────────────────
df["Target"] = (df["Close"].shift(-1) > df["Close"]).astype(int)

# ─── 4. DROP MISSING VALUES ────────────────────────────────────────────────────
df.dropna(inplace=True)

# ─── 5. SELECT FEATURES ────────────────────────────────────────────────────────
feature_cols = [
    "lag_1", "lag_2", "lag_3", "lag_4", "lag_5",
    "price_change", "ma_3", "ma_5", "volatility",
    "daily_return", "rsi", "close_to_open",
    "volume_change", "momentum_3", "return_std",
    "macd", "macd_signal", "bollinger_width", "stochastic"
]

X = df[feature_cols]
y = df["Target"]

# ─── 6. SCALE FEATURES ────────────────────────────────────────────────────────
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ─── 7. SPLIT DATA ─────────────────────────────────────────────────────────────
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)

# ─── 8. GRID SEARCH (XGBoost) ─────────────────────────────────────────────────
params = {
    'max_depth': [3, 5],
    'n_estimators': [100, 200],
    'learning_rate': [0.05, 0.1]
}

grid = GridSearchCV(
    estimator=XGBClassifier(random_state=42, eval_metric="logloss"),
    param_grid=params,
    scoring='accuracy',
    cv=3,
    verbose=1
)

grid.fit(X_train, y_train)
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)

print("\n📊 Daily Prediction Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=["Down", "Up"]))

# ─── 9. WEEKLY MODEL   ──────────────────────────────────────
# 9A  Resample to weeks
df_week = (
    df.set_index("Date")[feature_cols + ["Close"]]
      .resample("W-MON")             # Monday-anchored weeks
      .mean()
      .reset_index()
)
df_week["Target"] = (df_week["Close"].shift(-1) > df_week["Close"]).astype(int)
df_week.dropna(inplace=True)

# 9B  Features / scale / split
Xw = df_week[feature_cols]
yw = df_week["Target"]
Xw_scaled  = scaler.fit_transform(Xw)
Xw_train, Xw_test, yw_train, yw_test = train_test_split(
    Xw_scaled, yw, test_size=0.2, shuffle=False
)

# 9C  Train using same best params
weekly_model = XGBClassifier(**grid.best_params_, random_state=42, eval_metric="logloss")
weekly_model.fit(Xw_train, yw_train)
yw_pred = weekly_model.predict(Xw_test)

print("\n📅 Weekly Prediction Accuracy:", accuracy_score(yw_test, yw_pred))
print(classification_report(yw_test, yw_pred, target_names=["Down", "Up"]))



[*********************100%***********************]  1 of 1 completed


Fitting 3 folds for each of 8 candidates, totalling 24 fits

📊 Daily Prediction Accuracy: 0.5555555555555556
              precision    recall  f1-score   support

        Down       0.59      0.49      0.54        89
          Up       0.53      0.62      0.57        82

    accuracy                           0.56       171
   macro avg       0.56      0.56      0.55       171
weighted avg       0.56      0.56      0.55       171


📅 Weekly Prediction Accuracy: 0.6111111111111112
              precision    recall  f1-score   support

        Down       0.56      0.82      0.67        17
          Up       0.73      0.42      0.53        19

    accuracy                           0.61        36
   macro avg       0.64      0.62      0.60        36
weighted avg       0.65      0.61      0.60        36

