# Import Libraries

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

# Download Historical Data

In [2]:
def download_data(tickers, start_date, end_date):
    data = {}
    for ticker in tickers:
        data[ticker] = yf.download(ticker, start=start_date, end=end_date)
    return data

tickers = ['AAPL', 'MSFT', 'GOOGL']  # Example tickers
start_date = '2010-01-01'
end_date = '2020-01-01'
data = download_data(tickers, start_date, end_date)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


# Preprocess Data

In [3]:
def preprocess_data(data):
    returns = {}
    for ticker, df in data.items():
        df['Return'] = df['Adj Close'].pct_change()
        returns[ticker] = df['Return'].dropna()
    return returns

returns = preprocess_data(data)

# Feature Engineering

In [4]:
def create_features(df, lag=5):
    for i in range(1, lag+1):
        df[f'Lag_{i}'] = df['Return'].shift(i)
    df = df.dropna()
    return df

features = {}
for ticker, return_series in returns.items():
    features[ticker] = create_features(return_series.to_frame())

# Split Data

In [5]:
def split_data(df, test_size=0.2):
    X = df.drop(columns=['Return'])
    y = df['Return']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=False)
    return X_train, X_test, y_train, y_test

split_data_dict = {}
for ticker, df in features.items():
    split_data_dict[ticker] = split_data(df)

# Train Model