In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import akshare as ak  # https://github.com/akfamily/akshare
pd.set_option('display.max_rows', 500)

%matplotlib inline

In [2]:
YEAR_BUY = 2023
## Get fundamental data (seasonal) or price data (daily) 

is_from_scratch = True  # If starting from scratch and no stock data has been downloaded already
to_download = 'fundamental'  # download fundamental or price data

# Get stock list (ordered by capital)
df_sp500_list = pd.read_excel('sp500_fulllist_ranked.xlsx', engine='openpyxl', sheet_name=str(YEAR_BUY))
df_stocks = df_sp500_list.stock
stocks = list(df_stocks[df_stocks != 'GOOG'].values)  # There is GOOGL already
stocks += ['OHI']  # I like OHI

file_name = {
    'fundamental': f'sp500_history_raw2_{str(YEAR_BUY)}.csv',
    'price': f'sp500_history_price_raw2_{str(YEAR_BUY)}.csv',
}
min_row = {
    'fundamental': 3,
    'price': 190,
}
anom = {'Failed': [], 'Short': []}

if not is_from_scratch:  # 
    df_stock_all = read_csv(file_name[to_download])

try:
    # If df_stock_all is defined, take stocks downloaded
    stock_downloaded = df_stock_all.stock.unique()
    print(f'Downloaded {len(stock_downloaded)}, {stock_downloaded}')
except NameError: 
    df_stock_all = pd.DataFrame()

for stock_symbol in stocks:
    try:
        if stock_symbol in stock_downloaded:
            continue
    except NameError: 
        pass
    try:
        if to_download == 'fundamental':
            df_pe = ak.stock_us_fundamental(stock=stock_symbol, symbol="PE")
            df_pb = ak.stock_us_fundamental(stock=stock_symbol, symbol="PB")
            df_stock = pd.merge(df_pe, df_pb.drop('stock_price', axis=1), on='date')
        elif to_download == 'price': 
            df_stock = ak.stock_us_daily(symbol=stock_symbol)
        else:
            print('Wrong variable name')
    except IndexError:
        print(f'Failed for {stock_symbol}')
        anom['Failed'].append(stock_symbol)
        continue        
    df_stock['stock'] = stock_symbol
    df_stock_all = df_stock_all.append(df_stock)
    print(f"{(stock_symbol, df_stock.date.min(), df_stock.date.max(), len(df_stock))}")
    if len(df_stock) < min_row[to_download]:
        anom['Short'].append(stock_symbol)
    df_stock_all.to_csv(file_name[to_download], index=False)    

KeyboardInterrupt: 