In [1]:
import pandas as pd
import numpy as np
from function_wrappers import *
from functions import *
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from functions import hyperparameter_grid

## Trading Linear Regression Best Window

###### Assignment

Take a window of W days.  Train a linear model on "Adj Close" prices with W days lagged.  Determine if the next day will be higher or lower than today.  Enter or stay in a position that is either long, short, or neutral depending on whether the price is expected to be higher, lower or the same.  Optimize for W according to returns in year 2.


###### Questions


1. take W = 5, 6, . . . , 30 and consider your data for year 1.
For each W in the specified range, compute your average
P/L per trade and plot it: on x-axis you plot the values
of W and on the y axis you plot profit and loss per trade.
What is the optimal value W∗
of W?
2. use the value of W∗
from year 1 and consider year 2. For
every day in year 2, take the previous W∗ days, compute
linear regression and compute the value of r
2
for that day.
Plot the graph of r
2
for year 2. What is the average r
2
.
How well does it explain price movements?
3. take the optimal value of W∗
from year 1 and use it to
implement the above trading strategy for year 2. How many
”long position” and ’short position” transactions did you
have in year 2?
4. what is the average profit/loss per ”long position” trade
and per ”short position” trades in year 2?
5. what is the average number of days for long position and
short position transactions in year 2?
6. are these results very different from those in year 1 for this
value of W∗
?

In [2]:
gme = pd.read_csv('./gme.csv')
gme.drop(['Date', 'Month', 'Day','Week_Number','High', 'Low', 'Close', 'Volume', 
         'Short_MA', 'Long_MA'], inplace=True, axis=1)

In [3]:
gme[gme['Year']==2020].index[0]

754

In [4]:
X_transform(gme,y=2020,w=5,d=1,measure='Adj Close')

array([[1.  , 1.36, 1.35, 1.48, 1.53, 1.52],
       [1.  , 1.35, 1.48, 1.53, 1.52, 1.58],
       [1.  , 1.48, 1.53, 1.52, 1.58, 1.47],
       ...,
       [1.  , 3.88, 4.86, 5.14, 5.04, 5.25],
       [1.  , 4.86, 5.14, 5.04, 5.25, 4.84],
       [1.  , 5.14, 5.04, 5.25, 4.84, 4.82]])

In [5]:
from functions import polynomial_accuracy

In [6]:
results = polynomial_accuracy(gme,y=2020,w=5,d=1,test=0,
                                  measure='Adj Close',weekly=False)
predicted_labels = results[-1]['predicted_labels']

In [7]:
np.unique(results[-1]['true_labels'],return_counts=True)

(array([0, 1]), array([131, 122]))

In [8]:
np.unique(results[-1]['predicted_labels'], return_counts=True)

(array([0, 1]), array([ 55, 198]))

My hypothesis for why there would be more predicted up movements is that there the up movements are on average higher than down movements, and therefore rmse is reduced by biasing predictions upwards.

In [9]:
predicted_labels

array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1])

Note: in the presence of short-selling, since one can lose more than principal, it makes sense to think of return as being on a scale from negative infinity to infinity, where positive returns indicate the proportion of the principal that is gained and negative returns indicate the proportion of the pricipal that is lost.  With only long positions, a range of (0,positive_infinity) to indicate the proportion of principal at the end of an investment period made sense.  For compatibility between long and short positions, I will proceed with the standard of negative_infinity, positive_infinity.

Data structure: list of two element lists, with each list being data for a trade, with the first element being an indicator of position, 0 for short and 1 for long, and the second element being the profit, potentially negative, of the trade. 

Idea: iterate through the days in the trading period.  Have a standing position that is updated for each day.  If the prediction changes, close the current position and record profit for the trade.

In [99]:
class position():
    def __init__(self, entry_price, long_short=1, size=100):
        self.long_short = long_short
        self.size = size 
        self.entry_price = entry_price
        self.current_price = entry_price
        self.shares = size/entry_price
        self.cumulative_return = 1
        self.profit = 0
        self.num_trades = 1
        self.info = (self.long_short,self.num_trades,self.profit)
    def __str__(self):
        return f'long_short:{long_short}, entry price:{entry_price}, \
        current price:{current_price}'
    def update(self,current_price):
        self.current_price = current_price
        self.cumulative_return = (current_price - self.entry_price)*(-1)**\
            (1-self.long_short)/self.entry_price
        self.profit = self.size*(self.cumulative_return)
        self.num_trades += 1
        self.info = (self.long_short,self.num_trades,self.profit)

In [103]:
p = position(20,0,100)
p.update(40)
p.update(40)
p.update(10)
p.profit
p.info
# p.update

(0, 4, 50.0)

In [72]:
class food():
    name = ''
    def __init__(self,category,name,calories):
        self.category = category
        self.name = name
        self.calories = calories
    def cook(self, method):
        name = f'{method} {self.name}'
        self.name=name
    def get_name(self,)
    
f = food('vegetable', 'broccoli', 50)
f.cook('boiled')
f.name

'boiled broccoli'

short position return per share:

(entry price - close price) / entry price

In [73]:
'203'[:-1]

'20'

In [45]:
class trading_booth():
    # window = 5
    """for caclulating returns with a given window for prices and labels"""
    def __init__(self, stock, window = 5, training_year = 2020, evaluation_year = 2021):
        # collect arguments
        self.stock = stock; self.window = window; self.training_year = training_year;
        self.evaluation_year = evaluation_year;
        # collect relevent indices
        self.train_indices = stock[stock['Year'] == training_year].index
        self.eval_indices = stock[stock['Year'] == eval_year].index
        # collect true closing prices
        self.train_true_close = stock['Adj Close'][train_indices]
        self.eval_true_close = stock['Adj Close'][eval_indices]
        stock['begin_prices'] = [stock['Open'].values[0]]+\
                                list(stock['Adj Close'].values[:-1])
        self.A = np.fromiter([np.insert([stock.loc[i-window:i,'Adj Close'].values,0,1) 
                              for i in 
                         self.train_indices])
        self.b = stock.query(f'Year == {training_year}')
        
        self.coeff = np.linalg.lstsq(self.A,self.b)[0]
        self.train_pred_close = self.A * self.coeff
        self.train_positions = np.where(self.train_preds > 
                                stock['Adj Close'][self.train_indices-1],
                                        ,1,0)
        current_position = position(stock['begin_prices'][self.train_indices[0]],
                      self.train_positions[0],size=100)
        for i,p in enumerate(self.train_positions[1:]):
            if self.train_positions[i] == p:
        self.total_profit = 0; 
        self.log = {'long_trades':[], 'short_trades':[]}; 
        self.cumulative_return = 1
    
    def training_trades(self):
        p1 = position(self.stock['begin_prices'][self.train_indices[0]],
                      position=self.train_positions[0],size = 100)
        p2 = position(self.stock)
        for i,p in enumerate(self.train_positions):
            if p == 
    def __str__(self):
        return f'profit: {self.total_profit}'
        
    # def X_w():
    #     pass
    def trade(self):
        for p, inx in zip(predictions,stock[stock['Year']==evaluation_year].index)
    def predict():
        pass
        
    def create_X(window, trading_year):
        self.X = np.fromiter([np.insert(stock.loc[i-window:i,'Adj Close'].values,0,1) for i in \
                         stock[stock['Year'] == trading_year].index])
        pass
    
    def fit_linear():
        self.X
        pass

SyntaxError: closing parenthesis ')' does not match opening parenthesis '[' (3519617228.py, line 18)

In [None]:
np.linalg.lstsq()

In [None]:
gme.Year[0:5]

In [None]:
gme_trading_booth = trading_booth(gme, window=5,training_year=2020,evaluation_year=2021)

In [None]:
gme_trading_booth.stock

In [None]:
gme_trading_booth.X_matrix

In [None]:
gme

In [None]:
gme_trading_booth.stock

In [None]:
dir(gme_trading_booth)

In [None]:
gme_trading_booth.stock

In [None]:
cumulative_return = 0
standing_trade = predicted_labels[0]
standing_trade
for p in predicted_labels[1:]:
    if p == 

In [None]:
predictions = results[-1]['predictions']

In [None]:
for p in predictions:

In [None]:
gme

In [None]:
gme.query('Year == 2020')

In [None]:
predictions

Long trade implementation:
Short trade implementation:

In [None]:
begin_prices = [gme.loc[0,'Open']]
begin_prices = begin_prices+list(gme.loc[:gme.index[-2],'Adj Close'].values)

In [None]:
len(begin_prices)

In [None]:
gme['Begin'] = begin_prices

In [None]:
gme['Begin']

In [None]:
gme['ReturnCalc'] = gme['Adj Close']/gme['Begin']