In [7]:
# Install required dependencies for stock prediction and backtesting
import subprocess
import sys

packages = ['yfinance', 'scikit-learn', 'backtesting']
for package in packages:
    subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])

In [None]:
# 📈 Step 1: Load data
import yfinance as yahoo_finance
import pandas as dataframe_lib
import numpy as numerical_ops
from sklearn.linear_model import LinearRegression as MLRegressor
from backtesting import Backtest as BacktestEngine, Strategy as TradingStrategy

# Configuration parameters
TICKER_SYMBOL = "AAPL"
START_DATE = "2020-01-01"
END_DATE = "2023-01-01"
INITIAL_CAPITAL = 10000
TRADING_COMMISSION = 0.002

def fetch_stock_data(symbol, start, end):
    """Download and prepare stock market data"""
    stock_df = yahoo_finance.download(symbol, start=start, end=end)
    
    # Handle multi-level column structure
    if hasattr(stock_df.columns, 'nlevels') and stock_df.columns.nlevels > 1:
        stock_df.columns = [col[0] if isinstance(col, tuple) else col for col in stock_df.columns]
    
    # Standardize column names
    stock_df.columns.name = None
    required_cols = ["Open", "High", "Low", "Close", "Volume"]
    clean_data = stock_df[required_cols].copy()
    
    print(f"Data shape: {clean_data.shape}")
    print(f"Available columns: {list(clean_data.columns)}")
    
    return clean_data

def engineer_technical_features(price_data, lookback_periods=5):
    """Create predictive features from price data"""
    enhanced_df = price_data.copy()
    
    # Historical price features (different lag structure)
    for period in range(1, lookback_periods + 1):
        enhanced_df[f"price_lag_{period}d"] = enhanced_df["Close"].shift(period)
    
    # Technical indicators with different names
    enhanced_df["sma_short"] = enhanced_df["Close"].rolling(window=5).mean()
    enhanced_df["sma_long"] = enhanced_df["Close"].rolling(window=10).mean()
    enhanced_df["price_momentum"] = enhanced_df["Close"].pct_change()
    enhanced_df["volume_avg"] = enhanced_df["Volume"].rolling(window=5).mean()
    
    # Target variable (tomorrow's price)
    enhanced_df["next_close"] = enhanced_df["Close"].shift(-1)
    
    # Remove incomplete records
    enhanced_df.dropna(inplace=True)
    
    return enhanced_df

def build_prediction_model(dataset):
    """Train machine learning model for price forecasting"""
    # Select predictive variables
    predictor_cols = [col for col in dataset.columns 
                     if any(keyword in col for keyword in ["lag_", "sma_", "momentum", "volume_avg"])]
    
    feature_matrix = dataset[predictor_cols]
    target_vector = dataset["next_close"]
    
    # Initialize and train regression model
    price_predictor = MLRegressor()
    price_predictor.fit(feature_matrix, target_vector)
    
    print(f"Model trained with {len(predictor_cols)} features")
    print(f"Training samples: {len(feature_matrix)}")
    
    return price_predictor, predictor_cols

class PredictiveTrading(TradingStrategy):
    """Custom trading strategy based on ML predictions"""
    
    def init(self):
        """Initialize strategy with model predictions"""
        feature_data = self.data.df[predictor_variables]
        self.forecasts = trained_model.predict(feature_data)
        self.position_size = 10
    
    def next(self):
        """Execute trading logic for each time step"""
        current_index = len(self.data.Close) - 1
        predicted_price = self.forecasts[current_index]
        current_price = self.data.Close[-1]
        
        # Close existing positions
        if self.position:
            self.position.close()
        
        # Trading decision logic
        if predicted_price > current_price * 1.001:  # Small threshold to avoid noise
            self.buy(size=self.position_size)
        elif predicted_price < current_price * 0.999:
            self.sell(size=self.position_size)

# Main execution workflow
print("🔄 Fetching market data...")
market_data = fetch_stock_data(TICKER_SYMBOL, START_DATE, END_DATE)

print("🔧 Engineering features...")
processed_data = engineer_technical_features(market_data)

print("🤖 Training prediction model...")
trained_model, predictor_variables = build_prediction_model(processed_data)

print("📊 Preparing backtest data...")
# Use processed_data so all feature columns are available for the strategy
backtest_dataset = processed_data.copy()

print("🚀 Running strategy backtest...")
strategy_test = BacktestEngine(
    backtest_dataset, 
    PredictiveTrading, 
    cash=INITIAL_CAPITAL, 
    commission=TRADING_COMMISSION
)

performance_results = strategy_test.run()

print("📈 Generating visualization...")
strategy_test.plot()

print("📋 Performance Summary:")
print("=" * 50)
print(performance_results)


  stock_df = yahoo_finance.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed

🔄 Fetching market data...
Data shape: (756, 5)
Available columns: ['Open', 'High', 'Low', 'Close', 'Volume']
🔧 Engineering features...
🤖 Training prediction model...
Model trained with 9 features
Training samples: 746
📊 Preparing backtest data...
🚀 Running strategy backtest...





Backtest.run:   0%|          | 0/745 [00:00<?, ?bar/s]

📈 Generating visualization...


📋 Performance Summary:
Start                     2020-01-15 00:00:00
End                       2022-12-29 00:00:00
Duration                   1079 days 00:00:00
Exposure Time [%]                     84.7185
Equity Final [$]                   9196.60805
Equity Peak [$]                    10188.7702
Commissions [$]                    2393.83292
Return [%]                           -8.03392
Buy & Hold Return [%]                69.97318
Return (Ann.) [%]                    -2.78946
Volatility (Ann.) [%]                 3.61358
CAGR [%]                             -1.93698
Sharpe Ratio                         -0.77194
Sortino Ratio                        -1.02969
Calmar Ratio                         -0.22822
Alpha [%]                            -9.01297
Beta                                  0.01399
Max. Drawdown [%]                   -12.22277
Avg. Drawdown [%]                    -1.97419
Max. Drawdown Duration      807 days 00:00:00
Avg. Drawdown Duration      107 days 00:00:00
# Trades   