In [1]:
import pandas as pd
import numpy as np
from function_wrappers import *
from functions import *
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from functions import hyperparameter_grid

## Trading Linear Regression Best Window

###### Assignment

Take a window of W days.  Train a linear model on "Adj Close" prices with W days lagged.  Determine if the next day will be higher or lower than today.  Enter or stay in a position that is either long, short, or neutral depending on whether the price is expected to be higher, lower or the same.  Optimize for W according to returns in year 2.


###### Questions


1. take W = 5, 6, . . . , 30 and consider your data for year 1.
For each W in the specified range, compute your average
P/L per trade and plot it: on x-axis you plot the values
of W and on the y axis you plot profit and loss per trade.
What is the optimal value W∗
of W?
2. use the value of W∗
from year 1 and consider year 2. For
every day in year 2, take the previous W∗ days, compute
linear regression and compute the value of r
2
for that day.
Plot the graph of r
2
for year 2. What is the average r
2
.
How well does it explain price movements?
3. take the optimal value of W∗
from year 1 and use it to
implement the above trading strategy for year 2. How many
”long position” and ’short position” transactions did you
have in year 2?
4. what is the average profit/loss per ”long position” trade
and per ”short position” trades in year 2?
5. what is the average number of days for long position and
short position transactions in year 2?
6. are these results very different from those in year 1 for this
value of W∗
?

In [2]:
def trading(stock, w=5, year=2020):
    pass

In [3]:
gme = pd.read_csv('./gme.csv')
gme.drop(['Date', 'Month', 'Day','Week_Number','High', 'Low', 'Close', 'Volume', 
         'Short_MA', 'Long_MA'], inplace=True, axis=1)

In [4]:
gme

Unnamed: 0,Year,Weekday,Year_Week,Open,Adj Close,Return
0,2017,Tuesday,2017-01,6.36,5.16,0.000000
1,2017,Wednesday,2017-01,6.39,5.27,0.021679
2,2017,Thursday,2017-01,6.39,5.08,-0.035494
3,2017,Friday,2017-01,6.29,4.99,-0.019200
4,2017,Monday,2017-02,6.15,4.98,-0.001224
...,...,...,...,...,...,...
1253,2021,Thursday,2021-51,38.50,38.03,-0.012078
1254,2021,Monday,2021-52,38.00,37.08,-0.025174
1255,2021,Tuesday,2021-52,36.88,36.62,-0.012474
1256,2021,Wednesday,2021-52,36.96,38.48,0.051004


In [5]:
gme[gme['Year']==2020].index[0]

754

In [6]:
X_transform(gme,y=2020,w=5,d=1,measure='Adj Close')

array([[1.  , 1.36, 1.35, 1.48, 1.53, 1.52],
       [1.  , 1.35, 1.48, 1.53, 1.52, 1.58],
       [1.  , 1.48, 1.53, 1.52, 1.58, 1.47],
       ...,
       [1.  , 3.88, 4.86, 5.14, 5.04, 5.25],
       [1.  , 4.86, 5.14, 5.04, 5.25, 4.84],
       [1.  , 5.14, 5.04, 5.25, 4.84, 4.82]])

In [7]:
from functions import polynomial_accuracy

In [8]:
results = polynomial_accuracy(gme,y=2020,w=5,d=1,test=0,
                                  measure='Adj Close',weekly=False)
predicted_labels = results[-1]['predicted_labels']

In [9]:
np.unique(results[-1]['true_labels'],return_counts=True)

(array([0, 1]), array([131, 122]))

In [10]:
np.unique(results[-1]['predicted_labels'], return_counts=True)

(array([0, 1]), array([ 55, 198]))

My hypothesis for why there would be more predicted up movements is that there the up movements are on average higher than down movements, and therefore rmse is reduced by biasing predictions upwards.

In [11]:
predicted_labels

array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1])

Note: in the presence of short-selling, since one can lose more than principal, it makes sense to think of return as being on a scale from negative infinity to infinity, where positive returns indicate the proportion of the principal that is gained and negative returns indicate the proportion of the pricipal that is lost.  With only long positions, a range of (0,positive_infinity) to indicate the proportion of principal at the end of an investment period made sense.  For compatibility between long and short positions, I will proceed with the standard of negative_infinity, positive_infinity.

Data structure: list of two element lists, with each list being data for a trade, with the first element being an indicator of position, 0 for short and 1 for long, and the second element being the profit, potentially negative, of the trade. 

Idea: iterate through the days in the trading period.  Have a standing position that is updated for each day.  If the prediction changes, close the current position and record profit for the trade.

short position return per share:

(entry price - close price) / entry price

structure: 

1. method for determining predictions, taking w and a year as arguments
2. method for determining profit, taking predictions and a year as arguments
3. method for determining the most profitable w, taking an array of Ws and a year as arguments.

In [12]:
class maths():
    def __init__(self, numbers):
        self.numbers = numbers
    def add(self,**numbers):
        if not numbers:
            numbers = self.numbers
        total = 0
        for n in numbers:
            total += n
        return total
    def euclidean(self,**kwargs):

        if not kwargs['numbers']:
            numbers = self.numbers
        else:
            numbers = kwargs['numbers']
            print(type([numbers]))
        return self.add(numbers = [n*n for n in numbers])**.5

In [13]:
class position():
    """take an entry price, bet direction(long:1, short:0) and track a trade's 
    length in days and profit in dollars."""
    def __init__(self, entry_price, long_short=1, size=100):
        self.long_short = long_short
        self.size = size 
        self.entry_price = entry_price
        self.current_price = entry_price
        self.shares = size/entry_price
        self.cumulative_return = 0
        self.profit = 0
        self.trade_length = 0
        self.info = (self.long_short,self.trade_length,self.profit)
    def __str__(self):
        return f'long_short:{long_short}, days:{entry_price}, \
        current price:{current_price}'
    def update(self,current_price):
        self.current_price = current_price
        self.cumulative_return = (current_price - self.entry_price)*(-1)**\
            (1-self.long_short)/self.entry_price
        self.profit = self.size*(self.cumulative_return)
        self.trade_length += 1
        self.info = {'direction':self.long_short,'length':self.trade_length,
                     'profit': self.profit}

trading booth schematic:

1) \_\_init__: takes stock, stores windows, trading_year, evaluation_year
2) Generate predictions
   requirements: 
   a) feature space (create_X)
   b) get least squares coefficients (linear_model)

3) Evaluate Ws:
   1) for each W:
      for each direction of bet:
       * find number of trades
       * find length of trades (total # of days)
       * find profit (total profit)
   

In [14]:
class trading_booth():
    """for caclulating returns with a given window for prices and labels"""
    def __init__(self, stock, windows = list(range(5,31)), training_year = 2020, 
                 evaluation_year = 2021):
        # collect arguments
        self.stock = stock
        self.windows = windows
        self.training_year = training_year
        self.evaluation_year = evaluation_year
    def linear_model(self, w):
        stock = self.stock
        y = self.training_year
        idx = stock[stock['Year'] == y].index
        X = np.array([np.insert(stock.loc[i-w:i-1,'Adj Close'].values,0,1) 
                      for i in idx])
        y = stock.loc[idx, 'Adj Close']
        coeff = np.linalg.lstsq(X,y)[0]
        return coeff
    def create_X(self, w, year):
        stock = self.stock
        idx = stock[stock['Year'] == year].index
        X = np.array([np.insert(stock.loc[i-w:i-1,'Adj Close'].values,0,1) 
                      for i in idx])
        return X
    def create_coeff(self, w, train_year):
        stock = self.stock
        idx = stock[stock['Year'] == train_year].index
        X = self.create_X(w,train_year)
        y = self.stock.loc[idx, 'Adj Close']
        coeff = np.linalg.lstsq(X,y)[0]
        return coeff
    def predict_positions(self, w, train_year, predict_year):
        X = self.create_X(w,predict_year)
        predictions =  X @ self.create_coeff(w,predict_year)
        positions = predictions > X[:,-1]
        return (predictions, positions)
    def trades(self, positions, trading_year):
        trade_log = {1:[], 0:[]}
        idx = self.stock[self.stock['Year']==trading_year].index
        prices = self.stock.loc[idx[0]-1:idx[-1],'Adj Close'].values
        current_position = position(prices[0], long_short = positions[0])
        for p, price in zip(positions[1:], prices[1:]):
            current_position.update(price)
            if current_position.long_short != p:
                trade_log[current_position.long_short].append(
                                    current_position.info[1:])
                current_position = position(price, long_short = p)
        return trade_log
    def trade_analysis(self,trade_log = trades)
    total_profit = sum([sum(trade_log[k][1]) for k in trade_log.keys()])
        stats = {}
        
        stats = {1:{'num_trades':len(trade_log[1]),
                    'total_profit':sum([trade_log[1][i][2] for i in 
                                        range(len(trade_log[1]))]),
                    'avg_profit': sum([trade_log[1][i][2] for i in 
                                        range(len(trade_log[1]))])/\
                                    len(trade_log[1])},
                 0:{'num_trades':len(trade_log[0]),
                    'total_profit':sum([trade_log[0][i][2] for i in 
                                        range(len(trade_log[1]))]),
                    'avg_profit': sum([trade_log[0][i][2] for i in 
                                        range(len(trade_log[1]))])/\
                                    len(trade_log[0])},
                'aggregate':{'num_trades':len(trade_log[0])+
                             len(trade_log[1]),
                    'total_profit':sum([trade_log[0][i][2] for i in 
                                        range(len(trade_log[1]))])+
                             sum([trade_log[1][i][2] for i in 
                                        range(len(trade_log[1]))])
                             ,
                    'avg_profit': (sum([trade_log[0][i][2] for i in 
                                        range(len(trade_log[1]))])+
                             sum([trade_log[1][i][2] for i in 
                                        range(len(trade_log[1]))]
                                  ))/\
                                    (len(trade_log[0])+len(trade_log[1]))}}
        
        return (trade_log, total_profit,stats)
    def windows_stats(self):
        windows = self.windows
        year = self.training_year
        windows_stats = {}
        for w in windows:
            predictions, positions = self.predict_positions(w,year,year)
            log, profit,stats = self.trades(positions,year)
            windows_stats[w] = (log, profit,stats)
        # best_w = max(profitability, key=profitability.get)
        return windows_stats

    def __str__(self):
        return f'profit: {self.total_profit}'

SyntaxError: invalid syntax (2374455561.py, line 49)

In [None]:
gme_trading = trading_booth(gme)

In [None]:
gme_trading.windows_stats()

In [None]:
gme_trading.create_coeff(5,2020)

In [None]:
gme_trading.create_X(5,2020)

In [None]:
stock = gme
trading_year = 2020
window = 5

In [None]:
gme[gme.Year == 2020]

In [None]:
stock.loc[i-window:i,'Adj Close'].values

In [None]:
np.array([np.insert(stock.loc[i-window:i-1,'Adj Close'].values,0,1) for i in \
                         stock[stock['Year'] == trading_year].index])

In [None]:
np.array([np.insert(stock.loc[i-window:i,'Adj Close'].values,0,1) for i in \
                         stock[stock['Year'] == trading_year].index], dtype = 'float')[0]

Class trading_booth scrap

In [None]:
 self.evaluation_year = evaluation_year;
        # collect relevent indices
        self.train_indices = stock[stock['Year'] == training_year].index
        self.eval_indices = stock[stock['Year'] == eval_year].index
        # collect true closing prices
        self.train_true_close = stock['Adj Close'][train_indices]
        self.eval_true_close = stock['Adj Close'][eval_indices]
        stock['begin_prices'] = [stock['Open'].values[0]]+\
                                list(stock['Adj Close'].values[:-1])
        self.A = np.fromiter([np.insert([stock.loc[i-window:i,'Adj Close'].values,0,1) 
                              for i in 
                         self.train_indices])
        self.b = stock.query(f'Year == {training_year}')
        
        self.coeff = np.linalg.lstsq(self.A,self.b)[0]
        self.train_pred_close = self.A * self.coeff
        self.train_positions = np.where(self.train_preds > 
                                stock['Adj Close'][self.train_indices-1],
                                        ,1,0)
        current_position = position(stock['begin_prices'][self.train_indices[0]],
                      self.train_positions[0],size=100)
        for i,p in enumerate(self.train_positions[1:]):
            if self.train_positions[i] == p:
        self.total_profit = 0; 
        self.log = {'long_trades':[], 'short_trades':[]}; 
        self.cumulative_return = 1

In [None]:
np.linalg.lstsq()

In [None]:
gme.Year[0:5]

In [None]:
gme_trading_booth = trading_booth(gme, window=5,training_year=2020,evaluation_year=2021)

In [None]:
gme_trading_booth.stock

In [None]:
gme_trading_booth.X_matrix

In [None]:
gme

In [None]:
gme_trading_booth.stock

In [None]:
dir(gme_trading_booth)

In [None]:
gme_trading_booth.stock

In [None]:
cumulative_return = 0
standing_trade = predicted_labels[0]
standing_trade
for p in predicted_labels[1:]:
    if p == 

In [None]:
predictions = results[-1]['predictions']

In [None]:
for p in predictions:

In [None]:
gme

In [None]:
gme.query('Year == 2020')

In [None]:
predictions

Long trade implementation:
Short trade implementation:

In [None]:
begin_prices = [gme.loc[0,'Open']]
begin_prices = begin_prices+list(gme.loc[:gme.index[-2],'Adj Close'].values)

In [None]:
len(begin_prices)

In [None]:
gme['Begin'] = begin_prices

In [None]:
gme['Begin']

In [None]:
gme['ReturnCalc'] = gme['Adj Close']/gme['Begin']