In [3]:
from database.market import Market
from database.strategy import Strategy
import pandas as pd
from tqdm import tqdm
from datetime import datetime, timedelta

In [4]:
number_of_training_weeks = 14
start = datetime.now() - timedelta(days=number_of_training_weeks * 7)

In [5]:
market = Market()
strategy = Strategy()

In [6]:
market.connect()
sp500 = market.retrieve("sp500")
market.disconnect()

In [7]:
strategy.connect()
prices = strategy.retrieve("prices")
strategy.disconnect()

In [8]:
prices.columns

Index(['_id', 'date', 'close', 'high', 'low', 'open', 'volume', 'adjClose',
       'adjHigh', 'adjLow', 'adjOpen', 'adjVolume', 'divCash', 'splitFactor',
       'ticker'],
      dtype='object')

In [9]:
prices["date"] = pd.to_datetime(prices["date"])
prices["week"] = [x.week for x in prices["date"]]
prices["year"] = [x.year for x in prices["date"]]

In [10]:
aggregate = []
for ticker in tqdm(sp500["Symbol"].unique()):
    ticker_data = prices[prices["ticker"]== ticker]
    ticker_data = ticker_data.groupby(["year","week"]).mean()
    ticker_data["ticker"] = ticker
    aggregate.append(ticker_data)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 505/505 [00:19<00:00, 26.12it/s]


In [11]:
a = pd.concat(aggregate).reset_index()
final = a.pivot_table(values="adjClose",columns="ticker",index=["year","week"])

In [12]:
final.reset_index()

ticker,year,week,A,AAL,AAP,AAPL,ABBV,ABC,ABMD,ABT,...,WRK,WST,WU,WY,WYNN,XEL,XLNX,XOM,XRAY,XYL
0,2018,1,66.181621,31.604058,154.966113,38.393785,79.900234,71.178207,325.0400,69.248122,...,34.580729,97.068655,15.492313,20.167953,94.924585,46.065957,82.726949,58.388243,36.493389,64.690331
1,2018,45,66.085982,35.964256,169.027253,50.257061,76.494138,84.778225,409.5100,69.769901,...,42.200212,110.277834,16.981218,24.633782,93.995920,46.239958,83.512332,69.254073,35.644389,69.053438
2,2018,46,63.026826,36.873697,174.663298,46.642097,77.636437,85.546815,324.3220,67.444397,...,41.625087,106.744824,16.890448,24.422427,98.861950,47.521334,83.286426,67.194001,35.916394,67.333404
3,2018,47,65.092686,35.843686,172.453248,43.319576,75.694616,84.854181,300.7825,66.117452,...,41.822443,104.341625,16.675655,24.287597,99.440345,47.562160,84.106217,66.197317,36.016194,66.164091
4,2018,48,69.288015,38.170930,175.371616,43.224285,77.489100,85.951512,324.9980,68.675602,...,42.658113,106.469551,16.795184,24.434416,106.464098,47.948150,87.956512,66.966237,36.493670,68.741233
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140,2021,27,149.332500,20.640000,209.780000,143.735000,115.077780,115.007500,327.7625,118.965604,...,52.027500,372.067784,23.110000,34.917500,113.835000,67.482500,137.742500,60.787500,62.762500,120.342500
141,2021,28,149.060000,20.346000,209.232000,146.832000,116.999900,113.872000,316.3480,117.578301,...,50.988000,372.969872,23.438000,34.722000,109.840000,68.272000,132.024000,59.570000,61.966000,120.558000
142,2021,29,149.688000,20.658000,210.108000,145.872000,116.838000,117.692000,322.6240,119.110000,...,48.274000,373.304170,22.996000,33.886000,106.374000,67.800000,134.018000,56.644000,62.784000,121.290000
143,2021,30,151.512000,21.344000,213.432000,146.448000,117.894000,121.684000,324.5420,120.384000,...,49.478000,391.310000,23.274000,34.488000,100.982000,68.702000,140.414000,58.206000,65.006000,124.374000


In [13]:
final.fillna(-99999,inplace=True)
final.reset_index(inplace=True)
final = final[final["year"] >= 2019]

missing_tickers = []
for col in final.columns:
    if col not in ["ticker","year","week"]:
        missing = final[final[col]==-99999]
        if missing.index.size > 1:
            missing_tickers.append(col)

for col in missing_tickers:
    final.drop(col,axis=1,inplace=True)

In [14]:
final.reset_index(inplace=True)
stuff = final.drop(["index","ticker"],axis=1,errors="ignore")

In [15]:
stuff

ticker,year,week,A,AAL,AAP,AAPL,ABBV,ABC,ABMD,ABT,...,WRK,WST,WU,WY,WYNN,XEL,XLNX,XOM,XRAY,XYL
0,2019,1,71.826643,29.974005,157.337228,50.741894,78.297937,75.371279,252.7140,72.837271,...,37.870086,117.212271,19.372402,24.020246,115.694322,51.418360,88.594007,60.865462,44.836649,69.267641
1,2019,2,67.537432,32.013503,159.449954,36.903209,77.218692,72.552023,325.7100,66.113623,...,36.637623,101.340349,15.684832,22.014990,107.811526,45.656441,87.018221,61.607773,39.141391,65.621127
2,2019,3,69.287625,32.322555,160.483324,37.517062,75.916914,74.117178,325.8100,67.427717,...,37.161462,103.077148,16.189739,22.564858,106.876772,46.834502,88.976393,61.660861,39.447382,67.548651
3,2019,4,70.831798,32.733477,156.971343,37.585700,75.373221,75.921029,342.7450,68.151531,...,38.099703,104.451805,16.475339,22.998017,109.874416,47.615670,96.264660,61.288817,40.156458,66.937331
4,2019,5,73.453163,35.645361,155.011385,39.391235,69.238136,77.553631,341.8040,68.439614,...,37.364771,104.821677,16.589307,24.144342,116.011260,47.965816,107.930646,62.376693,40.628195,68.498838
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,2021,27,149.332500,20.640000,209.780000,143.735000,115.077780,115.007500,327.7625,118.965604,...,52.027500,372.067784,23.110000,34.917500,113.835000,67.482500,137.742500,60.787500,62.762500,120.342500
132,2021,28,149.060000,20.346000,209.232000,146.832000,116.999900,113.872000,316.3480,117.578301,...,50.988000,372.969872,23.438000,34.722000,109.840000,68.272000,132.024000,59.570000,61.966000,120.558000
133,2021,29,149.688000,20.658000,210.108000,145.872000,116.838000,117.692000,322.6240,119.110000,...,48.274000,373.304170,22.996000,33.886000,106.374000,67.800000,134.018000,56.644000,62.784000,121.290000
134,2021,30,151.512000,21.344000,213.432000,146.448000,117.894000,121.684000,324.5420,120.384000,...,49.478000,391.310000,23.274000,34.488000,100.982000,68.702000,140.414000,58.206000,65.006000,124.374000


In [16]:
strategy.connect()
strategy.store("application_weekly_average",stuff)
strategy.disconnect()