# Stock Trading ML Analysis

This notebook provides an interactive environment for analyzing stock data, training ML models, and making predictions.

## Setup

First, let's install the required dependencies and import necessary libraries.

In [None]:
# Install dependencies (run this only once)
# !pip install -r requirements.txt

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Import custom modules
from data_fetcher import StockDataFetcher
from data_preprocessing import StockDataPreprocessor
from model_training import StockPredictor, ModelComparison

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Initialize fetcher
fetcher = StockDataFetcher()

print("Setup complete!")

## 1. Data Fetching

Let's fetch historical stock data for analysis.

In [None]:
# Configure stock and date range
STOCK_SYMBOL = 'AAPL'  # Change this to any stock symbol
START_DATE = '2020-01-01'
END_DATE = datetime.now().strftime('%Y-%m-%d')

print(f"Fetching data for {STOCK_SYMBOL} from {START_DATE} to {END_DATE}...")

# Fetch data
stock_data = fetcher.fetch_data(STOCK_SYMBOL, START_DATE, END_DATE)

# Display first few rows
print("\nFirst 5 rows of data:")
display(stock_data.head())

# Basic info
print(f"\nData shape: {stock_data.shape}")
print(f"Date range: {stock_data['Date'].min()} to {stock_data['Date'].max()}")

## 2. Exploratory Data Analysis

Let's explore the stock data with visualizations.

In [None]:
# Plot price history
plt.figure(figsize=(14, 7))
plt.plot(stock_data['Date'], stock_data['Close'], linewidth=2, label='Close Price')
plt.plot(stock_data['Date'], stock_data['SMA_20'], alpha=0.7, label='20-day SMA', linewidth=1.5)
plt.title(f'{STOCK_SYMBOL} Price History', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price ($)', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.show()

# Basic statistics
print("\nBasic Statistics:")
stats = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']].describe()
display(stats)

# Price volatility
volatility = stock_data['Close'].pct_change().std() * np.sqrt(252)  # Annualized volatility
print(f"\nAnnualized Volatility: {volatility:.2%}")

In [None]:
# Volume analysis
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))

# Price and volume
ax1.plot(stock_data['Date'], stock_data['Close'], color='blue', linewidth=2)
ax1.set_title(f'{STOCK_SYMBOL} Price and Volume', fontsize=16)
ax1.set_ylabel('Price ($)', fontsize=12)
ax1.grid(True, alpha=0.3)

ax2.bar(stock_data['Date'], stock_data['Volume'], color='orange', alpha=0.7)
ax2.set_ylabel('Volume', fontsize=12)
ax2.set_xlabel('Date', fontsize=12)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Daily returns distribution
daily_returns = stock_data['Close'].pct_change().dropna()

plt.figure(figsize=(10, 6))
plt.hist(daily_returns, bins=50, alpha=0.7, edgecolor='black')
plt.title(f'{STOCK_SYMBOL} Daily Returns Distribution', fontsize=16)
plt.xlabel('Daily Return', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.axvline(daily_returns.mean(), color='red', linestyle='--', label=f'Mean: {daily_returns.mean():.4f}')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 3. Technical Analysis

Let's add technical indicators and analyze them.

In [None]:
# Initialize preprocessor and add technical indicators
preprocessor = StockDataPreprocessor()

# Clean and add indicators
cleaned_data = preprocessor.clean_data(stock_data)
data_with_indicators = preprocessor.add_technical_indicators(cleaned_data)

print("Technical indicators added:")
technical_cols = [col for col in data_with_indicators.columns if col not in ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
print(technical_cols)

# Display sample with indicators
display(data_with_indicators[['Date', 'Close', 'SMA_5', 'SMA_20', 'RSI', 'MACD', 'BB_Upper', 'BB_Lower']].tail(10))

In [None]:
# Plot technical indicators
fig, axes = plt.subplots(3, 1, figsize=(14, 15))

# Price with Bollinger Bands
axes[0].plot(data_with_indicators['Date'], data_with_indicators['Close'], label='Close Price', linewidth=2)
axes[0].plot(data_with_indicators['Date'], data_with_indicators['BB_Upper'], label='BB Upper', alpha=0.7, linestyle='--')
axes[0].plot(data_with_indicators['Date'], data_with_indicators['BB_Lower'], label='BB Lower', alpha=0.7, linestyle='--')
axes[0].fill_between(data_with_indicators['Date'], data_with_indicators['BB_Upper'], data_with_indicators['BB_Lower'], alpha=0.1)
axes[0].set_title('Price with Bollinger Bands', fontsize=14)
axes[0].set_ylabel('Price ($)', fontsize=12)
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# RSI
axes[1].plot(data_with_indicators['Date'], data_with_indicators['RSI'], color='purple', linewidth=2)
axes[1].axhline(y=70, color='red', linestyle='--', alpha=0.7, label='Overbought (70)')
axes[1].axhline(y=30, color='green', linestyle='--', alpha=0.7, label='Oversold (30)')
axes[1].set_title('Relative Strength Index (RSI)', fontsize=14)
axes[1].set_ylabel('RSI', fontsize=12)
axes[1].set_ylim(0, 100)
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# MACD
axes[2].plot(data_with_indicators['Date'], data_with_indicators['MACD'], label='MACD', linewidth=2)
axes[2].plot(data_with_indicators['Date'], data_with_indicators['MACD_Signal'], label='Signal', linewidth=1.5)
axes[2].set_title('MACD Indicator', fontsize=14)
axes[2].set_ylabel('MACD', fontsize=12)
axes[2].set_xlabel('Date', fontsize=12)
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Model Training and Evaluation

Let's train ML models to predict price movements.

In [None]:
# Prepare data for ML
processed_data, X_train, X_val, X_test, y_train, y_val, y_test = preprocessor.preprocess_pipeline(
    stock_data, prediction_days=1
)

print(f"Training data shape: {X_train.shape}")
print(f"Validation data shape: {X_val.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Target distribution: {y_train.value_counts(normalize=True)}")

In [None]:
# Train a single model
MODEL_TYPE = 'random_forest'  # Options: 'random_forest', 'gradient_boosting', 'logistic', 'svm'

predictor = StockPredictor(MODEL_TYPE)
predictor.train(X_train, y_train, X_val, y_val)

# Evaluate on test data
test_metrics = predictor.evaluate(X_test, y_test)

print("\nTest Metrics:")
for metric, value in test_metrics.items():
    print(f"{metric}: {value:.4f}")

In [None]:
# Display feature importance (if available)
feature_importance = predictor.get_feature_importance(15)
if feature_importance is not None:
    plt.figure(figsize=(12, 8))
    feature_importance.plot(kind='barh')
    plt.title(f'Feature Importance - {MODEL_TYPE}', fontsize=16)
    plt.xlabel('Importance', fontsize=12)
    plt.tight_layout()
    plt.show()

## 5. Model Comparison

Let's compare different ML models.

In [None]:
# Compare multiple models
comparison = ModelComparison()

# Add models to compare
models_to_compare = ['random_forest', 'gradient_boosting', 'logistic', 'svm']
for model_type in models_to_compare:
    comparison.add_model(model_type, model_type)

# Train all models
comparison.train_all(X_train, y_train, X_val, y_val)

# Evaluate all models
results = comparison.evaluate_all(X_test, y_test)

# Display results
results_df = pd.DataFrame(results).T
display(results_df)

# Plot comparison
comparison.plot_comparison()

# Get best model
best_model = comparison.get_best_model('accuracy')
print(f"\nBest model based on accuracy: {best_model}")

# Set best model as predictor
predictor = comparison.models[best_model]

## 6. Future Prediction

Let's make a prediction for the next trading day.

In [None]:
# Predict next day movement
prediction_result = predictor.predict_future(processed_data)

print("Next Day Prediction:")
print(f"Direction: {prediction_result['direction']}")
print(f"Probability: {prediction_result['probability']:.4f}")
print(f"Confidence: {prediction_result['confidence']}")

# Visualize prediction confidence
plt.figure(figsize=(8, 6))
plt.bar(['Down', 'Up'], [1-prediction_result['probability'], prediction_result['probability']], 
        color=['red', 'green'], alpha=0.7)
plt.title('Next Day Prediction Confidence', fontsize=16)
plt.ylabel('Probability', fontsize=12)
plt.ylim(0, 1)
for i, v in enumerate([1-prediction_result['probability'], prediction_result['probability']]):
    plt.text(i, v + 0.01, f'{v:.3f}', ha='center', fontsize=12)
plt.grid(True, alpha=0.3)
plt.show()

## 7. Backtesting

Let's simulate a simple trading strategy based on our model.

In [None]:
# Simple backtesting
def backtest_strategy(data, predictions, initial_capital=10000):
    """
    Simple backtesting of buy/sell strategy based on predictions.
    """
    capital = initial_capital
    position = 0  # 0: no position, 1: long position
    trades = []
    
    for i in range(len(predictions) - 1):
        pred = predictions[i]
        current_price = data.iloc[i]['Close']
        next_price = data.iloc[i + 1]['Close']
        
        # Buy signal
        if pred == 1 and position == 0:
            shares = capital // current_price
            capital -= shares * current_price
            position = shares
            trades.append({'type': 'BUY', 'price': current_price, 'shares': shares, 'date': data.iloc[i]['Date']})
        
        # Sell signal
        elif pred == 0 and position > 0:
            capital += position * current_price
            trades.append({'type': 'SELL', 'price': current_price, 'shares': position, 'date': data.iloc[i]['Date']})
            position = 0
    
    # Close any remaining position
    if position > 0:
        final_price = data.iloc[-1]['Close']
        capital += position * final_price
        trades.append({'type': 'SELL', 'price': final_price, 'shares': position, 'date': data.iloc[-1]['Date']})
    
    return capital, trades

# Get predictions for all data
all_predictions, _ = predictor.predict(processed_data.drop(['Date', 'Target'], axis=1, errors='ignore'))

# Run backtest
final_capital, trades = backtest_strategy(processed_data, all_predictions)

print(f"Initial Capital: $10,000")
print(f"Final Capital: ${final_capital:.2f}")
print(f"Return: ${(final_capital - 10000):.2f} ({((final_capital - 10000) / 10000 * 100):.2f}%)")
print(f"Number of trades: {len(trades)}")

# Display trades
trades_df = pd.DataFrame(trades)
display(trades_df)

## 8. Save and Load Models

Let's save the trained model for future use.

In [None]:
# Save the model
model_filename = f"models/{STOCK_SYMBOL}_{MODEL_TYPE}_{datetime.now().strftime('%Y%m%d')}.pkl"
predictor.save_model(model_filename)
print(f"Model saved as: {model_filename}")

# Example of loading a model (uncomment to test)
# new_predictor = StockPredictor()
# new_predictor.load_model(model_filename)
# print("Model loaded successfully!")

## Summary

This notebook demonstrated a complete stock trading ML pipeline:

1. **Data Fetching**: Retrieved historical stock data
2. **Exploratory Analysis**: Visualized price trends and distributions
3. **Technical Analysis**: Added and analyzed technical indicators
4. **Model Training**: Trained ML models to predict price movements
5. **Model Comparison**: Compared different algorithms
6. **Prediction**: Made future price movement predictions
7. **Backtesting**: Simulated trading strategy performance
8. **Model Persistence**: Saved and loaded trained models

### Next Steps:
- Experiment with different stocks and time periods
- Try different technical indicators and features
- Implement more sophisticated trading strategies
- Add risk management and position sizing
- Deploy the model for real-time predictions

### Important Disclaimer:
This is for educational purposes only. Past performance does not guarantee future results. Always do your own research and consider consulting financial advisors before making investment decisions.