In [1]:
from datetime import datetime, timedelta
import pandas as pd
from tqdm import tqdm
from modeler.modeler import Modeler as m
from processor.processor import Processor as p
from database.market import Market
from database.adatabase import ADatabase
import numpy as np

In [2]:
start_date = datetime(2016,1,1)
end_date = datetime(2022,6,30)
start_year = 2016
end_year = 2023

In [3]:
market = Market()
speculation_db = ADatabase("speculation")

In [4]:
market.connect()
sp500 = market.retrieve("sp500")
market.disconnect()

In [5]:
speculation_db.connect()
data = speculation_db.retrieve("pdr_data")
speculation_db.disconnect()

In [6]:
data = data.replace([np.inf, -np.inf], np.nan).dropna()

In [8]:
classify_data = []
for ticker in data["ticker"].unique():
    ticker_data = data[data["ticker"]==ticker]
    ticker_data["y_class"] = [1 if x > 0 else 0 for x in ticker_data["adjclose"].diff(1)]
    classify_data.append(ticker_data)

In [9]:
data = pd.concat(classify_data)

In [10]:
data

Unnamed: 0,year,week,adjclose,quarter,0,1,2,3,4,5,...,10,11,12,13,d1,d2,d3,ticker,y,y_class
0,2012.0,14.0,66.156456,2.0,66.156456,66.647684,66.698636,66.652164,64.594408,65.712465,...,64.469431,63.409097,62.498404,64.042451,-0.007371,8.648332,-5.126827,MMM,64.267672,0
1,2012.0,15.0,64.267672,2.0,64.267672,66.156456,66.647684,66.698636,66.652164,64.594408,...,64.990468,64.469431,63.409097,62.498404,-0.028550,2.873578,-0.667730,MMM,65.234367,0
2,2012.0,16.0,65.234367,2.0,65.234367,64.267672,66.156456,66.647684,66.698636,66.652164,...,65.291187,64.990468,64.469431,63.409097,0.015042,-1.526850,-1.531341,MMM,66.409380,1
3,2012.0,17.0,66.409380,2.0,66.409380,65.234367,64.267672,66.156456,66.647684,66.698636,...,65.504095,65.291187,64.990468,64.469431,0.018012,0.197482,-1.129339,MMM,66.924948,1
4,2012.0,18.0,66.924948,2.0,66.924948,66.409380,65.234367,64.267672,66.156456,66.647684,...,65.843601,65.504095,65.291187,64.990468,0.007763,-0.568986,-3.881212,MMM,65.450182,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245155,2022.0,27.0,51.002501,3.0,51.002501,51.910001,52.437501,51.995999,55.674000,56.670000,...,57.088102,63.040614,63.157301,63.406067,-0.017482,0.737860,-1.337736,ZION,49.870000,0
245156,2022.0,28.0,49.870000,3.0,49.870000,51.002501,51.910001,52.437501,51.995999,55.674000,...,57.058308,57.088102,63.040614,63.157301,-0.022205,0.270139,-0.633889,ZION,52.509999,0
245157,2022.0,29.0,52.509999,3.0,52.509999,49.870000,51.002501,51.910001,52.437501,51.995999,...,53.534868,57.058308,57.088102,63.040614,0.052938,-3.384062,-13.527130,ZION,53.682000,1
245158,2022.0,30.0,53.682000,3.0,53.682000,52.509999,49.870000,51.002501,51.910001,52.437501,...,52.826729,53.534868,57.058308,57.088102,0.022320,-0.578380,-0.829087,ZION,53.684000,1


In [11]:
sim = []
speculation_db.connect()
errors = []
training_year = 4
for year in tqdm(range(start_year,end_year)):
    try:
        training_data = data[(data["year"]<year) & (data["year"]>=year-training_year)]
        training_data.dropna(inplace=True)
        training_data.reset_index(drop=True,inplace=True)
        prediction_data = data[data["year"]==year].dropna().reset_index()
        if training_data.index.size > 100 and prediction_data.index.size > 40:
            refined = {"X":training_data[factors],"y":training_data["y"]}
#             refined_classification = {"X":training_data[factors],"y":training_data["y_class"]}
            models = m.regression(refined)
#             classification_models = m.classification(refined_classification,multioutput=False,xgb=True,sk=True)
            for row in models.iterrows():
                try:
                    model = row[1]["model"]
                    api = row[1]["api"]
                    score = row[1]["score"]
                    prediction_data[f"{api}_prediction"] = model.predict(prediction_data[factors])
                    prediction_data[f"{api}_score"] = score 
                except Exception as e:
                    print(str(e))
#             for row in classification_models.iterrows():
#                 try:
#                     model = row[1]["model"]
#                     api = row[1]["api"]
#                     score = row[1]["score"]
#                     prediction_data[f"{api}_classification_prediction"] = model.predict(prediction_data[factors])
#                     prediction_data[f"{api}_classification_score"] = score 
#                 except Exception as e:
#                     print(str(e))
            included_columns = ["year","week","ticker","training_year"]
            included_columns.extend([x for x in prediction_data.columns if "score" in x or "prediction" in x])
            prediction_data["training_year"] = training_year
            speculation_db.store("pdr_sim",prediction_data[included_columns])
        else:
            errors.append([year,training_data.index.size,prediction_data.index.size,"not_enough_stuff"])
    except Exception as e:
        errors.append([year,training_data.index.size,prediction_data.index.size,str(e)])
speculation_db.disconnect()

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [07:45<00:00, 66.44s/it]
