# Basic Backtester

In [None]:
!pip install yfinance

In [None]:
!pip install lxml

In [None]:
# import all imp lib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly 
import yfinance as yf
import time
# etc...

In [None]:
# Example of a stock's data
apple_data = yf.download("AAPL", start="2023-11-01", end="2024-05-01")

print(apple_data.head())



[*********************100%***********************]  1 of 1 completed

Price            Close        High         Low        Open    Volume
Ticker            AAPL        AAPL        AAPL        AAPL      AAPL
Date                                                                
2023-11-01  172.478012  172.735777  168.661024  169.533482  56934900
2023-11-02  176.047150  176.255340  173.955245  174.014728  77334800
2023-11-03  175.135025  175.303580  171.863338  172.745705  79763700
2023-11-06  177.692917  177.891199  174.698827  174.867368  63841300
2023-11-07  180.260696  180.875374  177.435132  177.643323  70530000





In [None]:
# Load list of S&P 500 companies from Wikipedia
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
table = pd.read_html(url) # returns a dataframe
sp500_df = table[0]  # 1st table
tickers = sp500_df['Symbol'].tolist()  # List of S&P 500 tickers
tickers.sort()
print(tickers[:10])  


['A', 'AAPL', 'ABBV', 'ABNB', 'ABT', 'ACGL', 'ACN', 'ADBE', 'ADI', 'ADM']


Now we can use these tickers to get the data.


Note : Yahoo Finance might block or throttle requests if you fetch too many tickers in bulk. Break it into small batches.

In [None]:
# Code to download data of all the tickers in batches of 10, returns a dict storing {ticker : corresponding data}
# Also this code senses the data which are not downloadable and delete them from tickers list
def download_sp500_data(tickers, start="2020-01-01", end="2024-01-01"):
    all_data = {}
    failed_tickers = []
    batch_size = 10
    for i in range(0, len(tickers), batch_size):
        batch = tickers[i:i+batch_size]
        print(f"Downloading batch {i//batch_size + 1}: {batch}")  # 1/10 = 0.1 while 1//10 = 0
        try:
            data = yf.download(batch, start=start, end=end, group_by='ticker', threads=True)
            for ticker in batch:
                # Check whether the data is structured correctly
                if isinstance(data.columns, pd.MultiIndex) and ticker in data.columns.levels[0]:  # if data is pd.MultiIndex then deal with it
                    all_data[ticker] = data[ticker]
                elif isinstance(data, dict) and ticker in data: # if data is dict then deal with it
                    all_data[ticker] = data[ticker]
                else:
                    failed_tickers.append(ticker)
        except Exception as e:
            print(f"Batch error: {e}")
            failed_tickers.extend(batch)
        time.sleep(1)
    return all_data, failed_tickers


In [None]:
# Storing data into csv, as , {ticker}.csv : ticker tell the corresponding stock name
def save_data_to_csv(data_dict):
    for ticker, df in data_dict.items():
        filename = f"{ticker}.csv"
        df.to_csv(filename)
        print(f"Saved {filename}")

In [None]:
#################################################################################

sp500_data , failed = download_sp500_data(tickers)
save_data_to_csv(sp500_data)                                    # Downloading data. Might take a while and might take a lot of time.

###################################################################################

tickers = [t for t in tickers if t not in failed]               # Remove tickers that failed to download. 




In [None]:
# Save the ticker names

t = pd.DataFrame(tickers)

t.to_csv("Tickers.csv")
