In [1]:
import pandas as pd
from ranker.ranker import Ranker
from ranker.ranker_factory import RankerFactory
from database.market import Market
from database.sec import SEC
from database.adatabase import ADatabase
from processor.processor import Processor as p
from datetime import datetime, timedelta
from tqdm import tqdm
import numpy as np
from modeler_strats.universal_modeler import UniversalModeler

In [2]:
## initializing ranker_class
ranker_class = RankerFactory.build(Ranker.WEEKLY_STOCK_FASTSLOW_RANKER)

In [3]:
backtest_start_date = datetime(2016,1,1)
backtest_end_date = datetime(2023,1,1)
current_year = datetime.now().year

In [4]:
## creating database objects
market = Market()
modeler_strat = UniversalModeler()

In [5]:
market.connect()
sp500 = market.retrieve("sp500")
market.disconnect()
sp500 = sp500.rename(columns={"Symbol":"ticker"})

In [6]:
market.connect()
training_sets = []
for ticker in tqdm(sp500["ticker"].unique()):
    try:
        prices = market.retrieve_ticker_prices(ranker_class.asset_class.value,ticker)
        prices = p.column_date_processing(prices)
        ticker_data = ranker_class.training_set(ticker,prices)
        training_sets.append(ticker_data)
    except Exception as e:
        print(str(e))
        continue
market.disconnect()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 490/490 [00:35<00:00, 13.76it/s]


In [7]:
data = pd.concat(training_sets)

In [8]:
training_data = data.dropna().copy().sort_values(["year","week"])

In [9]:
training_data = training_data.groupby(["year","quarter","week","ticker"]).mean().reset_index()

In [10]:
training_data

Unnamed: 0,year,quarter,week,ticker,rank_prediction
0,2000,2,21,A,-0.092549
1,2000,2,21,AAPL,-0.040249
2,2000,2,21,ABC,0.111776
3,2000,2,21,ABMD,-0.238272
4,2000,2,21,ABT,0.110141
...,...,...,...,...,...
545616,2023,2,25,YUM,0.018754
545617,2023,2,25,ZBH,0.029700
545618,2023,2,25,ZBRA,-0.067279
545619,2023,2,25,ZION,-0.211614


In [11]:
ranker_class.db.connect()
relevant_columns = list(set(list(training_data.columns)) - set(ranker_class.factors))
ranker_class.db.store("sim",training_data[relevant_columns])
ranker_class.db.disconnect()

In [12]:
# ranker_class.db.connect()
# ranker_class.db.drop("predictions")
# relevant_columns = list(set(list(training_data.columns)) - set(ranker_class.factors))
# predictions = ranker_class.sim_processor(training_data)
# ranker_class.db.store("predictions",predictions[predictions["year"]==current_year][relevant_columns])
# ranker_class.db.disconnect()

In [13]:
# predictions[predictions["year"]==current_year][relevant_columns]