In [2]:
!pip install yfinance
!pip install pandas-ta

# Import required libraries
import numpy as np
import pandas as pd
import hvplot.pandas
import requests

# Libraries to Extract Stock Price Historical Data 
# from Yahoo! Finance using Tickers from Wikipedia
import yfinance as yf
from pandas.tseries.offsets import DateOffset
from datetime import datetime

# Library for Technical Analysis
import pandas_ta as ta

# Libraries for ML Analysis using Linear Models
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import MultiTaskElasticNet
from sklearn.linear_model import LassoLars
from sklearn.linear_model import BayesianRidge

scaler = StandardScaler()

Collecting yfinance
  Downloading yfinance-0.2.37-py2.py3-none-any.whl.metadata (11 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl.metadata (5.5 kB)
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.4.0.tar.gz (314 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.6/314.6 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.17.1.tar.gz (3.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25

In [3]:
# Extracting Stock Price Historical Data from Yahoo! Finance using Tickers from Wikipedia

def extract_stock_data():
    ticker_list= ticker_df['Symbol'].to_list()
    stock_list = ticker_df['Security'].to_list()
    print(stock_list)
    stock_name = input('Enter Stock_Name from the above displayed list for Analysis:')
    start_date = input('Enter Start Date for Historical Data in yyyy-mm-dd:')
    today = datetime.today().strftime('%Y-%m-%d')    
    row_index = ticker_df.query(f"Security == '{stock_name}'").index[0]
    stock_ticker = ticker_list[row_index]
    OHLCV_df = yf.download(stock_ticker,start_date,today)
    OHLCV_df.index = pd.to_datetime(OHLCV_df.index)
    print(f"{stock_ticker}:{stock_name}")
    display(OHLCV_df.head(3))
    display(OHLCV_df.tail(3))
    return(OHLCV_df)

In [4]:
# Feature Engineering _ additionals features for ML Analysis

def technical_indicators():
    OHLCV_df = extract_stock_data()
    # Daily Returns
    OHLCV_df['daily_returns'] = OHLCV_df['Close'].pct_change()
    # 21Day Volatility
    OHLCV_df['volatility_21'] = OHLCV_df['Close'].rolling(window=21).std()
    # Simple Moving Avg 21Days & 50Days
    OHLCV_df['sma_21'] = OHLCV_df.ta.sma(length=21)
    OHLCV_df['sma_50'] = OHLCV_df.ta.sma(length=50)
    # Exponential Moving Avg 21Days & 50Days
    OHLCV_df['ema_21'] = OHLCV_df.ta.ema(length=21)
    OHLCV_df['ema_50'] = OHLCV_df.ta.ema(length=50)
    # VWAP modified to use Annual cummulative sum
    OHLCV_df['typical_price'] = (OHLCV_df['High']+OHLCV_df['Low']+OHLCV_df['Close'])/3
    OHLCV_df['vw_typical_price'] = OHLCV_df['typical_price']*OHLCV_df['Volume']
    OHLCV_df['vwap_annual'] = OHLCV_df.groupby(OHLCV_df.index.year)['vw_typical_price'].cumsum() / OHLCV_df.groupby(OHLCV_df.index.year)['Volume'].cumsum()
    # RSI Momemtum Indicator 14Days
    OHLCV_df['rsi_14'] = OHLCV_df.ta.rsi()
    # MACD Momentum Indicator
    OHLCV_df[['ema_12', 'ema_26', 'MACD_signal']] = OHLCV_df.ta.macd()
    # Bolinger Bands %B Indicator
    OHLCV_df[['BB_lower','BB_middle','BB_upper','BBP','BB%B']]= OHLCV_df.ta.bbands()
    # On-Balance Volume in Millions
    OHLCV_df['OBV_in_million'] = OHLCV_df.ta.obv()/1000000
    # Average Directional Index 14Days
    OHLCV_df[['adx_14','dmp_14','dmn_14']] = OHLCV_df.ta.adx()
    # Dropping trivial Columns
    OHLCV_append_df = OHLCV_df.drop(columns = ['Open','High','Low','Adj Close',
                                               'typical_price','vw_typical_price',
                                               'BBP','dmp_14','dmn_14']).dropna()
    return(OHLCV_append_df)

In [5]:
# Preparing Features & Target DataFrame for ML Analysis

def predict_Close():
    OHLCV_append_df = technical_indicators()
    # Next Day's "Close" as target
    predictors_df = OHLCV_append_df
    target_df = OHLCV_append_df[['Close']].shift(-1)
    return(predictors_df,target_df)

def predict_Volume():
    OHLCV_append_df = technical_indicators()
    # Next Day's "Close" as target
    predictors_df = OHLCV_append_df
    target_df = OHLCV_append_df[['Volume']].shift(-1)
    return(predictors_df,target_df)

In [6]:
# Data Normalizing & Train-Test Split

def train_test_split_scaled():
    predictors_df, target_df = predict_Close()
    # Training Data of 27months and Testing Data of 6months alternatively 
    # with latest 33months of testing data
    train_range_months = 27
    test_range_months = 6
    split_range = list(range(len(predictors_df)//21//(train_range_months+test_range_months)))
    predictors_train = pd.DataFrame()
    
    for i in split_range:
        train_df = predictors_df.iloc[21*33*i:(21*33*i)+(21*(33-test_range_months)),:]
        predictors_train = pd.concat([predictors_train,train_df],axis=0)
        
    predictors_test = predictors_df.drop(index = predictors_train.index)
    target_train = target_df.drop(index = predictors_test.index)
    target_test = target_df.drop(index = predictors_train.index)
    
    #Using StandardScaler to fit predictors train data and transform both train and test data 
    predictors_scaled = scaler.fit(predictors_train)
    predictors_train_scaled = predictors_scaled.transform(predictors_train)
    predictors_test_scaled = predictors_scaled.transform(predictors_test)
    
    return(predictors_train_scaled,predictors_test_scaled,target_train,target_test)

In [7]:
# Predicting Stock Price using Linear Models

def linear_models_prediction():
    predictors_train_scaled,predictors_test_scaled,target_train,target_test = train_test_split_scaled()
    list_of_models = [Ridge(),Lasso(),MultiTaskElasticNet(),
                  LassoLars(),BayesianRidge()]
    target_predictions_df = target_test.copy()
    for model in list_of_models:
        model_instance = model
        model_instance.fit(predictors_train_scaled,target_train)
        model_prediction = model_instance.predict(predictors_test_scaled)
        target_predictions_df[f'{model}_Close'] = model_prediction

    display(target_predictions_df.tail())

    display(target_predictions_df.hvplot())

    return(target_predictions_df)

In [8]:
# Calculating Root_Mean_Square_Error(RMSE) 
# and Mean_Absolute_Percentage_Error(MAPE)

def linear_model_evaluation():
    target_predictions_df = linear_models_prediction()
    column_range = list(target_predictions_df.columns)
    dict = {}
    for i in column_range[1:]:
        rmse = np.sqrt(np.mean(
            (target_predictions_df.loc[:,'Close']-target_predictions_df.loc[:,i])**2))
        mape = np.mean(np.abs(
            (target_predictions_df.loc[:,'Close']-target_predictions_df.loc[:,i])/target_predictions_df.loc[:,'Close']))*100
        dict[f'{i}'] = [rmse,mape]
    error_measures = pd.DataFrame.from_dict(dict, orient = 'index', columns = ['RMSE','MAPE'])
    error_measures = error_measures.sort_values(by=['MAPE'])
    display(error_measures)
    display(error_measures.hvplot.bar())
    return (error_measures)

In [9]:
# Scrapping for List of stocks in S&P500 for Linear Model ML Prediction Analysis
url = input('Enter Wikipedia url for list of S&P500 Companies_')
  
wiki_data = pd.read_html(url)
print(wiki_data)

ticker_df = wiki_data[0]
display(ticker_df.head(2))
display(ticker_df.tail(2))

output = linear_model_evaluation()

Enter Wikipedia url for list of S&P500 Companies_ https://en.wikipedia.org/wiki/List_of_S%26P_500_companies


[    Symbol            Security             GICS Sector  \
0      MMM                  3M             Industrials   
1      AOS         A. O. Smith             Industrials   
2      ABT              Abbott             Health Care   
3     ABBV              AbbVie             Health Care   
4      ACN           Accenture  Information Technology   
..     ...                 ...                     ...   
498    XYL          Xylem Inc.             Industrials   
499    YUM         Yum! Brands  Consumer Discretionary   
500   ZBRA  Zebra Technologies  Information Technology   
501    ZBH       Zimmer Biomet             Health Care   
502    ZTS              Zoetis             Health Care   

                                GICS Sub-Industry    Headquarters Location  \
0                        Industrial Conglomerates    Saint Paul, Minnesota   
1                               Building Products     Milwaukee, Wisconsin   
2                           Health Care Equipment  North Chicago, Il

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
0,MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1957-03-04,66740,1902
1,AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916


Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
501,ZBH,Zimmer Biomet,Health Care,Health Care Equipment,"Warsaw, Indiana",2001-08-07,1136869,1927
502,ZTS,Zoetis,Health Care,Pharmaceuticals,"Parsippany, New Jersey",2013-06-21,1555280,1952


['3M', 'A. O. Smith', 'Abbott', 'AbbVie', 'Accenture', 'Adobe Inc.', 'Advanced Micro Devices', 'AES Corporation', 'Aflac', 'Agilent Technologies', 'Air Products and Chemicals', 'Airbnb', 'Akamai', 'Albemarle Corporation', 'Alexandria Real Estate Equities', 'Align Technology', 'Allegion', 'Alliant Energy', 'Allstate', 'Alphabet Inc. (Class A)', 'Alphabet Inc. (Class C)', 'Altria', 'Amazon', 'Amcor', 'Ameren', 'American Airlines Group', 'American Electric Power', 'American Express', 'American International Group', 'American Tower', 'American Water Works', 'Ameriprise Financial', 'Ametek', 'Amgen', 'Amphenol', 'Analog Devices', 'Ansys', 'Aon', 'APA Corporation', 'Apple Inc.', 'Applied Materials', 'Aptiv', 'Arch Capital Group', 'Archer-Daniels-Midland', 'Arista Networks', 'Arthur J. Gallagher & Co.', 'Assurant', 'AT&T', 'Atmos Energy', 'Autodesk', 'Automated Data Processing', 'AutoZone', 'AvalonBay Communities', 'Avery Dennison', 'Axon Enterprise', 'Baker Hughes', 'Ball Corporation', 'Bank

Enter Stock_Name from the above displayed list for Analysis: United Rentals
Enter Start Date for Historical Data in yyyy-mm-dd: 2012-01-01


[*********************100%%**********************]  1 of 1 completed

URI:United Rentals





Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-01-03,30.25,30.42,27.879999,28.959999,28.485994,3900000
2012-01-04,29.190001,29.969999,28.51,29.780001,29.292574,3922600
2012-01-05,29.440001,30.01,28.82,29.93,29.440117,1979800


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-03-20,691.51001,715.369995,691.51001,709.330017,709.330017,1070600
2024-03-21,717.590027,729.909973,713.26001,719.049988,719.049988,591900
2024-03-22,724.280029,727.0,713.450012,714.27002,714.27002,445500


  y = column_or_1d(y, warn=True)


Unnamed: 0_level_0,Close,Ridge()_Close,Lasso()_Close,MultiTaskElasticNet()_Close,LassoLars()_Close,BayesianRidge()_Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-03-18,693.619995,678.688389,677.810864,621.176349,677.755518,682.713753
2024-03-19,709.330017,687.428141,687.710849,624.984913,687.621307,692.549527
2024-03-20,719.049988,701.414946,703.248141,631.953754,703.104929,708.090609
2024-03-21,714.27002,711.468625,712.861411,639.852665,712.693122,717.482149
2024-03-22,,710.208612,708.134405,643.354377,708.000742,712.458926


Unnamed: 0,RMSE,MAPE
BayesianRidge()_Close,8.040024,2.046717
Ridge()_Close,8.289088,2.075759
Lasso()_Close,8.516413,2.102716
LassoLars()_Close,8.530736,2.105033
MultiTaskElasticNet()_Close,23.518409,5.158886
