In [2]:

import datetime

import IPython
import IPython.display

import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings("ignore")


In [3]:

date_from = datetime.date(2021, 8, 19)
date_to = datetime.date(2021,10,28) 

###  Import data


- predictions

In [4]:

predicts = pd.read_csv('allpreds_24.csv', parse_dates=['Date'],  index_col=[0])

# add flag
if 'Model' not in predicts.columns:
    predicts['Model'] = str('prediction')

date_from = datetime.date(2021, 9, 27)
predicts = predicts.query('index >= @date_from')
# add date
if 'Date' not in predicts.columns:
    predicts.reset_index('Date', inplace=True)

# select columns: 
pred = predicts[['Date', 'Model', 'SecuritiesCode', 'ad_Close']]


date_pred = pred.Date

print(f'Predictions: ')
print('------'*10)
print(f'from: {date_pred.min().strftime("%d-%m-%Y") }')
print(f'to  : {date_pred.max().strftime("%d-%m-%Y") }')

print(f'      {date_pred.nunique()} days')

pred.head(2)



FileNotFoundError: [Errno 2] No such file or directory: 'allpreds_24.csv'

- real data

In [None]:
# import data
data = pd.read_csv('../data/curr_price_financial_fill.csv', parse_dates=['Date'], index_col=[0])
data.dropna(inplace=True)

# append the real value
real = data.query('Date >= @date_pred.min() and Date <= @date_pred.max()')

real['Model'] = str('real')
real = real[['Date','Model', 'SecuritiesCode', 'ad_Close']]

date_real = real.Date
print(f'Real Data: ')
print('------'*10)
print(f'from: {date_pred.min().strftime("%d-%m-%Y") }')
print(f'to  : {date_pred.max().strftime("%d-%m-%Y") }')

print(f'      {date_pred.nunique()} days')


real.head(2)

In [None]:
# concat
data_pred = pd.concat([real, pred], axis = 0)
assert data_pred.shape[0] == real.shape[0] + pred.shape[0]

In [None]:
data_pred.tail()

- functions

In [None]:
# calculate target / rank:
def target_calculations(data: pd.DataFrame, periods: int = 1, shift: int = -2, dropna: bool = False) -> pd.DataFrame:
    #data['Date'] = data.index
    #data = data.reset_index('Date')
    td = pd.pivot_table(data, index='Date', columns='SecuritiesCode', values='ad_Close', dropna=dropna)
    td = td.pct_change(periods)

    #calculated_target = pd.melt(td.shift(shift), ignore_index=False, value_name='Target').reset_index().sort_values(['Date','SecuritiesCode']).dropna(subset=['Target']).reset_index(drop=True)
    calculated_target = pd.melt(td.shift(shift), ignore_index=False, value_name='Target').reset_index().sort_values(['Date','SecuritiesCode']).reset_index(drop=True)
    cols = ['ad_Close', 'SecuritiesCode','Date']
    calculated_target = pd.merge(left=calculated_target ,right=data[cols],  on=['SecuritiesCode','Date'])
    #print(f'Calculated_target shape: {calculated_target.shape}') #(2324404, 3)
    #calculated_target.set_index('Date')
    return calculated_target


In [None]:
def target(df):
    prices = df.pivot(index='Date', columns='SecuritiesCode', values='ad_Close')
    prices_target = (prices.shift(-2) - prices.shift(-1)).div(prices.shift(-1))
    return prices_target

    #calculated_target = pd.melt(pct_df, ignore_index=False, value_name='Target').reset_index().sort_values(['Date','SecuritiesCode']).dropna(subset=['Target']).reset_index(drop=True)
    #return calculated_target

In [None]:
def add_rank(df, col_name="Target"):
    #df = df.reset_index('Date')
    df["Rank"] = df.groupby("Date")[col_name].rank(ascending=False, method="first") - 1 
    #df["Rank"] = df["Rank"].astype("int")#df
    df.set_index('Date')
    return df

In [None]:
def calc_spread_return_per_day(df, portfolio_size, toprank_weight_ratio):
    weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
    weights_mean = weights.mean()
    df = df.sort_values(by='Rank')
    purchase = (df['Target'][:portfolio_size]  * weights).sum() / weights_mean
    short    = (df['Target'][-portfolio_size:] * weights[::-1]).sum() / weights_mean
    return purchase - short

In [None]:
def calc_spread_return_sharpe(df: pd.DataFrame, portfolio_size: int = 200, toprank_weight_ratio: float = 2) -> float:
    """
    Args:
        df (pd.DataFrame): predicted results
        portfolio_size (int): # of equities to buy/sell
        toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
    Returns:
        (float): sharpe ratio
    """
    def _calc_spread_return_per_day(df, portfolio_size, toprank_weight_ratio):
        """
        Args:
            df (pd.DataFrame): predicted results
            portfolio_size (int): # of equities to buy/sell
            toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
        Returns:
            (float): spread return
        """
        assert df['Rank'].min() == 0
        assert df['Rank'].max() == len(df['Rank']) - 1
        weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
        purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
        short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
        return purchase - short

    buf = df.groupby('Date').apply(_calc_spread_return_per_day, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    return sharpe_ratio

- evaluate

In [None]:
def _calc_spread_return_per_day(df, portfolio_size, toprank_weight_ratio):
        """
        Args:
            df (pd.DataFrame): predicted results
            portfolio_size (int): # of equities to buy/sell
            toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
        Returns:
            (float): spread return
        """
        assert df['Rank'].min() == 0
        assert df['Rank'].max() == len(df['Rank']) - 1
        weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
        purchase = (df.sort_values(by='Rank')['Target'][:portfolio_size] * weights).sum() / weights.mean()
        short = (df.sort_values(by='Rank', ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
        return purchase - short

    

In [None]:

for mod in data_pred.Model.unique():
    if mod == 'baseline': continue
    #if mod == 'baseline2': continue
    
    df = data_pred.query('Model ==@mod')
    assert (len(df.columns))==4
    df_target = target_calculations(df)
    df_rank = add_rank(df_target, 'ad_Close')

    sharpe_ratio = calc_spread_return_sharpe(df_rank)
    #sharpe_ratio = 1.1
    srd= calc_spread_return_sharpe(df_rank, 200,2)
    #print (f'{mod} Model score: \t {sharpe_ratio:.2f}\t srd: {srd:.2f}')

    daily_spread_return = df_rank.groupby('Date').apply(_calc_spread_return_per_day, 200, 2)
    print (f'{mod} sum daily spread return:    \t {sum(daily_spread_return):.2f}    \t\t score: {sharpe_ratio:.2f} ')


In [None]:
daily_spread_return