In [1]:
from modeler.modeler import Modeler as m
from database.market import Market
from database.strategy import Strategy
from xgboost import XGBRegressor
import pandas as pd
from datetime import datetime
from tqdm import tqdm
from sklearn.metrics import r2_score, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split, GridSearchCV

In [2]:
market = Market()
strategy = Strategy()

In [3]:
strategy.connect()
weekly_sim = strategy.retrieve("weekly_sim")
quarterly_sim = strategy.retrieve("quarterly_sim")
strategy.disconnect()
market.connect()
prices = market.retrieve("prices")
market.disconnect()

In [4]:
prices["date"] = pd.to_datetime(prices["date"])
prices["date"]
prices["year"] = [x.year for x in prices["date"]]
prices["quarter"] = [x.quarter for x in prices["date"]]
prices["week"] = [x.week for x in prices["date"]]

In [5]:
qp = prices.groupby(["year","quarter","ticker"]).mean().reset_index()[["year","quarter","ticker","adjClose"]]
wp = prices.groupby(["year","week","ticker"]).mean().reset_index()[["year","week","ticker","adjClose"]]
quarterly_set = quarterly_sim.merge(qp,on=["year","quarter","ticker"],how="left")
weekly_set = weekly_sim.merge(wp,on=["year","week","ticker"],how="left")
weekly_set.rename(columns={"score":"weekly_price_regression_score"},inplace=True)

In [6]:
##quarterly_analysis
qa = []
for cty in range(1,5):
    for mty in range(3,6):
        try:
            current_set = quarterly_set[(quarterly_set["category_training_year"]==cty) & (quarterly_set["model_training_year"]==mty)]
            current_set["actual"] = current_set["adjClose"].shift(-1)
            current_set.dropna(inplace=True)
            r2 = r2_score(current_set["quarterly_price_regression_prediction"],current_set["adjClose"])
            mape = mean_absolute_percentage_error(current_set["quarterly_price_regression_prediction"],current_set["adjClose"])
            qa.append({
                "cty":cty,
                "mty":mty,
                "r2":r2,
                "mape":mape
            })
        except Exception as e:
            print(str(e))
hello = pd.DataFrame(qa)
hello["score"] = hello["r2"] * 0.5 + (1-hello["mape"])*0.5
quarterly_analysis = hello.sort_values("score",ascending=False)

In [7]:
##quarterly_analysis
qa = []
for tw in range(14,68,14):
    for cty in range(1,5):
        for mty in range(1,6):
            try:
                current_set = weekly_set[(weekly_set["cty"]==cty) & (weekly_set["mty"]==mty) & (weekly_set["tw"]==tw)]
                if current_set.index.size > 0:
                    current_set["actual"] = current_set["adjClose"].shift(-1)
                    current_set.dropna(inplace=True)
                    r2 = r2_score(current_set["weekly_price_regression_prediction"],current_set["adjClose"])
                    mape = mean_absolute_percentage_error(current_set["weekly_price_regression_prediction"],current_set["adjClose"])
                    qa.append({
                        "cty":cty,
                        "mty":mty,
                        "tw":tw,
                        "r2":r2,
                        "mape":mape
                    })
            except Exception as e:
                print(str(e))
hello = pd.DataFrame(qa)
hello["score"] = hello["r2"] * 0.5 + (1-hello["mape"])*0.5
weekly_analysis = hello.sort_values("score",ascending=False)

In [8]:
rqa = quarterly_analysis[quarterly_analysis["score"]>0.7]
rwa = weekly_analysis[weekly_analysis["score"]>0.7]

In [9]:
rqa

Unnamed: 0,cty,mty,r2,mape,score
0,1,3,0.820909,0.368277,0.726316
6,3,3,0.845291,0.413604,0.715843
1,1,4,0.759846,0.343746,0.70805


In [10]:
quarterly_set.head(5)

Unnamed: 0,_id,year,quarter,ticker,quarterly_price_regression_prediction,score,model_training_year,category_training_year,adjClose
0,6157fe77b7e39a36b301ac1f,2018,1,AFL,46.382053,0.932499,3,1,40.543878
1,6157fe77b7e39a36b301ac20,2018,1,EIX,62.488144,0.932499,3,1,52.800642
2,6157fe77b7e39a36b301ac21,2018,1,COST,156.67421,0.932499,3,1,177.90101
3,6157fe77b7e39a36b301ac22,2018,1,SBUX,51.614201,0.932499,3,1,53.856281
4,6157fe77b7e39a36b301ac23,2018,1,PH,172.176758,0.932499,3,1,177.998981


In [11]:
sets = []
for row in list(rqa.iterrows())[0:2]:
    mty = row[1]["mty"]
    cty = row[1]["cty"]
    cqs = quarterly_set[(quarterly_set["model_training_year"]==mty) & (quarterly_set["category_training_year"]==cty)]
    cqs.rename(columns={"adjClose":"quarterlyAdjClose","score":"quarterly_price_regression_score"},inplace=True)
    full_set = weekly_set.merge(cqs,on=["year","quarter","ticker"],how="left")
    sets.append(full_set)
complete = pd.concat(sets)

In [15]:
columns = ["year","quarter","week","ticker","cty","mty","tw","adjClose"]
for year in tqdm(range(2019,2022)):
    for quarter in tqdm(range(1,5)):
        try:
            prediction_data = complete[(complete["year"]==year) & (complete["quarter"]==quarter)]
            training_data = complete[(complete["year"]==year-1) & (complete["quarter"]==quarter)]
            factors = ['cty',
                        'mty',
                        'tw',
                        'model_training_year',
                        'category_training_year',
                        'weekly_price_regression_prediction',
                        'quarterly_price_regression_prediction',
                        'weekly_price_regression_score',
                        'quarterly_price_regression_score']
            data = {"X":training_data[factors],"y":training_data["adjClose"]}
            xgb_models = m.xgb_regression(data)
            model = xgb_models["model"]
            sim = prediction_data[["year","quarter","week","ticker"]]
            sim["high_level_prediction"] = model.predict(prediction_data[factors])
            sim["high_level_score"] = xgb_models["score"]
            if sim.index.size > 1:
                strategy.connect()
                strategy.store("high_level_sim",sim)
                strategy.disconnect()
        except Exception as e:
            print(str(e))