In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

In [91]:
def calc_spread_return_sharpe(df: pd.DataFrame, portfolio_size: int = 200, toprank_weight_ratio: float = 2, rank_col='Rank') -> float:
    """
    Args:
        df (pd.DataFrame): predicted results
        portfolio_size (int): # of equities to buy/sell
        toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
    Returns:
        (float): sharpe ratio
    """

    def _calc_spread_return_per_day(df, portfolio_size, toprank_weight_ratio):
        """
        Args:
            df (pd.DataFrame): predicted results
            portfolio_size (int): # of equities to buy/sell
            toprank_weight_ratio (float): the relative weight of the most highly ranked stock compared to the least.
        Returns:
            (float): spread return
        """
        assert df[rank_col].min() == 0
        assert df[rank_col].max() == len(df[rank_col]) - 1
        weights = np.linspace(start=toprank_weight_ratio, stop=1, num=portfolio_size)
        purchase = (df.sort_values(by=rank_col)['Target'][:portfolio_size] * weights).sum() / weights.mean()
        short = (df.sort_values(by=rank_col, ascending=False)['Target'][:portfolio_size] * weights).sum() / weights.mean()
        return purchase - short

    buf = df.groupby('Date').apply(_calc_spread_return_per_day, portfolio_size, toprank_weight_ratio)
    sharpe_ratio = buf.mean() / buf.std()
    return sharpe_ratio, buf

In [92]:
def set_rank(df):
    """
    Args:
        df (pd.DataFrame): including predict column
    Returns:
        df (pd.DataFrame): df with Rank
    """
    # sort records to set Rank
    df = df.sort_values("predict", ascending=False)
    # set Rank starting from 0
    df.loc[:, "Rank"] = np.arange(len(df["predict"]))
    return df

In [4]:
train = pd.read_parquet('../Output/financials_train_agg_df.parquet')
val = pd.read_parquet('../Output/financials_val_agg_df.parquet')
df = pd.concat([train, val]).reset_index(drop=True)

In [37]:
df['PreviousTarget_1'] = df.groupby('SecuritiesCode')['Target'].shift(1)
df['PreviousTarget_2'] = df.groupby('SecuritiesCode')['Target'].shift(2)
df['PreviousTarget_3'] = df.groupby('SecuritiesCode')['Target'].shift(3)
df['PreviousTarget_4'] = df.groupby('SecuritiesCode')['Target'].shift(4)
df['PreviousTarget_5'] = df.groupby('SecuritiesCode')['Target'].shift(5)
df['PreviousTarget_6'] = df.groupby('SecuritiesCode')['Target'].shift(6)
df['PreviousTarget_7'] = df.groupby('SecuritiesCode')['Target'].shift(7)
df['PreviousTarget_8'] = df.groupby('SecuritiesCode')['Target'].shift(8)
df['PreviousTarget_9'] = df.groupby('SecuritiesCode')['Target'].shift(9)

In [64]:
df['ror_1_shift1'] = df.groupby('SecuritiesCode')['ror_1'].shift(1)
df['ror_1_shift2'] = df.groupby('SecuritiesCode')['ror_1'].shift(2)
df['ror_1_shift3'] = df.groupby('SecuritiesCode')['ror_1'].shift(3)
df['ror_1_shift4'] = df.groupby('SecuritiesCode')['ror_1'].shift(4)
df['ror_1_shift5'] = df.groupby('SecuritiesCode')['ror_1'].shift(5)
df['ror_1_shift6'] = df.groupby('SecuritiesCode')['ror_1'].shift(6)
df['ror_1_shift7'] = df.groupby('SecuritiesCode')['ror_1'].shift(7)
df['ror_1_shift8'] = df.groupby('SecuritiesCode')['ror_1'].shift(8)
df['ror_1_shift9'] = df.groupby('SecuritiesCode')['ror_1'].shift(9)

In [65]:
time_config = {'val_split_date': '2021-12-01',
               'test_split_date': '2022-02-01'}

train = df[(df.Date < time_config['val_split_date'])]
val = df[(df.Date >= time_config['val_split_date']) & (df.Date < time_config['test_split_date'])]
test = df[(df.Date >= time_config['test_split_date'])]

In [93]:
test['predict'] = test['ror_1_shift1']*-1
test = test.sort_values(["Date", "predict"], ascending=[True, False])
ranking = test.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [94]:
sharp_ratio

0.1618741738755916

In [95]:
buf

Date
2022-02-01    4.602343
2022-02-02   -1.052260
2022-02-03    2.880099
2022-02-04   -2.785277
2022-02-07    0.961984
2022-02-08    1.774932
2022-02-09   -3.853634
2022-02-10    1.245647
2022-02-14   -0.797264
2022-02-15   -3.886581
2022-02-16   -1.066982
2022-02-17    0.900650
2022-02-18   -0.206964
2022-02-21   -0.966089
2022-02-22    6.164871
2022-02-24   -0.258320
2022-02-25    6.077303
2022-02-28   -1.092726
dtype: float64

In [96]:
buf.mean()

0.48009618627785994

In [97]:
test['predict'] = test['ror_1_shift2']
test = test.sort_values(["Date", "predict"], ascending=[True, False])
ranking = test.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [98]:
sharp_ratio

0.09080821888220274

In [99]:
buf

Date
2022-02-01    1.451282
2022-02-02    1.651278
2022-02-03   -1.824541
2022-02-04   -1.242524
2022-02-07   -3.735034
2022-02-08   -0.077365
2022-02-09    4.909561
2022-02-10    2.309318
2022-02-14    0.922790
2022-02-15   -0.440765
2022-02-16   -0.043859
2022-02-17    2.010084
2022-02-18   -2.522773
2022-02-21    1.340138
2022-02-22    0.152550
2022-02-24   -1.139865
2022-02-25   -1.677605
2022-02-28    1.311355
dtype: float64

In [100]:
buf.mean()

0.18633471968731716

In [122]:
test['predict'] = test['ror_1_shift3'] * -1
test = test.sort_values(["Date", "predict"], ascending=[True, False])
ranking = test.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [123]:
sharp_ratio

0.13753589409622305

In [124]:
buf

Date
2022-02-01   -7.389684
2022-02-02   -0.349925
2022-02-03    4.150876
2022-02-04    1.586830
2022-02-07   -2.600059
2022-02-08    2.820603
2022-02-09   -0.096992
2022-02-10    0.423021
2022-02-14   -0.847528
2022-02-15    2.390828
2022-02-16   -0.973186
2022-02-17   -2.640752
2022-02-18    0.117383
2022-02-21    2.396205
2022-02-22    6.192549
2022-02-24    0.393667
2022-02-25    3.898203
2022-02-28   -1.802206
dtype: float64

In [125]:
sharp_ratio

0.13753589409622305

In [126]:
buf

Date
2022-02-01   -7.389684
2022-02-02   -0.349925
2022-02-03    4.150876
2022-02-04    1.586830
2022-02-07   -2.600059
2022-02-08    2.820603
2022-02-09   -0.096992
2022-02-10    0.423021
2022-02-14   -0.847528
2022-02-15    2.390828
2022-02-16   -0.973186
2022-02-17   -2.640752
2022-02-18    0.117383
2022-02-21    2.396205
2022-02-22    6.192549
2022-02-24    0.393667
2022-02-25    3.898203
2022-02-28   -1.802206
dtype: float64

In [127]:
test['predict'] = test['ror_1_shift4']
test = test.sort_values(["Date", "predict"], ascending=[True, False])
ranking = test.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [128]:
sharp_ratio

0.38237587210001767

In [131]:
test['predict'] = test['ror_1_shift5'] 
test = test.sort_values(["Date", "predict"], ascending=[True, False])
ranking = test.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [132]:
sharp_ratio

0.18648408573837735

In [133]:
test['predict'] = test['ror_1_shift6'] * -1
test = test.sort_values(["Date", "predict"], ascending=[True, False])
ranking = test.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [134]:
sharp_ratio

0.27446060732493655

In [135]:
test['predict'] = test['ror_1_shift7'] * -1
test = test.sort_values(["Date", "predict"], ascending=[True, False])
ranking = test.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [136]:
sharp_ratio

0.07261669565947101

In [137]:
test['predict'] = test['ror_1_shift8']
test = test.sort_values(["Date", "predict"], ascending=[True, False])
ranking = test.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [138]:
sharp_ratio

0.029572088679137847

In [139]:
buf

Date
2022-02-01   -2.386607
2022-02-02    1.474783
2022-02-03    1.623672
2022-02-04    3.661945
2022-02-07    2.773947
2022-02-08   -3.408623
2022-02-09    2.167447
2022-02-10   -0.906347
2022-02-14    0.040817
2022-02-15   -2.434984
2022-02-16   -0.610788
2022-02-17   -1.391921
2022-02-18    1.458307
2022-02-21    0.676896
2022-02-22    6.606867
2022-02-24   -0.141752
2022-02-25   -8.052926
2022-02-28    0.523680
dtype: float64

In [140]:
test['predict'] = test['ror_1_shift9']
test = test.sort_values(["Date", "predict"], ascending=[True, False])
ranking = test.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [141]:
sharp_ratio

0.2601512505336804

In [142]:
buf

Date
2022-02-01    4.644038
2022-02-02    0.237026
2022-02-03    2.351151
2022-02-04    0.380460
2022-02-07   -5.236397
2022-02-08    1.503130
2022-02-09    9.491185
2022-02-10   -0.641997
2022-02-14    1.187148
2022-02-15   -2.098938
2022-02-16   -0.037112
2022-02-17    2.127044
2022-02-18   -0.030081
2022-02-21    2.656741
2022-02-22   -6.503464
2022-02-24    0.639741
2022-02-25    5.106645
2022-02-28    1.031721
dtype: float64

In [143]:
val['predict'] = val['ror_1_shift1']*-1
val = val.sort_values(["Date", "predict"], ascending=[True, False])
ranking = val.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  val['predict'] = val['ror_1_shift1']*-1


In [144]:
sharp_ratio

0.1315713328198504

In [145]:
val['predict'] = val['ror_1_shift2']
val = val.sort_values(["Date", "predict"], ascending=[True, False])
ranking = val.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [146]:
sharp_ratio

0.04545988906951244

In [147]:
val['predict'] = val['ror_1_shift3'] * -1
val = val.sort_values(["Date", "predict"], ascending=[True, False])
ranking = val.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [148]:
sharp_ratio

0.05518078186649945

In [149]:
val['predict'] = val['ror_1_shift4']
val = val.sort_values(["Date", "predict"], ascending=[True, False])
ranking = val.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [150]:
sharp_ratio

0.2580647235510238

In [151]:
val['predict'] = val['ror_1_shift5']
val = val.sort_values(["Date", "predict"], ascending=[True, False])
ranking = val.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [152]:
sharp_ratio

-0.12376756489896905

In [156]:
val['predict'] = val['ror_1_shift6'] * -1
val = val.sort_values(["Date", "predict"], ascending=[True, False])
ranking = val.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [157]:
sharp_ratio

0.050314781200433874

In [158]:
val['predict'] = val['ror_1_shift7'] * -1
val = val.sort_values(["Date", "predict"], ascending=[True, False])
ranking = val.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [159]:
sharp_ratio

0.07286701346449133

In [160]:
val['predict'] = val['ror_1_shift8']
val = val.sort_values(["Date", "predict"], ascending=[True, False])
ranking = val.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [161]:
sharp_ratio

0.17820526621605412

In [162]:
val['predict'] = val['ror_1_shift9']
val = val.sort_values(["Date", "predict"], ascending=[True, False])
ranking = val.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [163]:
sharp_ratio

0.1425925030522431

In [164]:
train['predict'] = train['ror_1_shift1']*-1
train = train.sort_values(["Date", "predict"], ascending=[True, False])
ranking = train.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['predict'] = train['ror_1_shift1']*-1


In [165]:
sharp_ratio

0.04655484654438009

In [166]:
train['predict'] = train['ror_1_shift2']
train = train.sort_values(["Date", "predict"], ascending=[True, False])
ranking = train.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [167]:
sharp_ratio

-0.005339698247159365

In [168]:
train['predict'] = train['ror_1_shift3'] * -1
train = train.sort_values(["Date", "predict"], ascending=[True, False])
ranking = train.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [169]:
sharp_ratio

-0.0002541898277221027

In [170]:
train['predict'] = train['ror_1_shift4']
train = train.sort_values(["Date", "predict"], ascending=[True, False])
ranking = train.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [171]:
sharp_ratio

-0.01520364967717422

In [172]:
train['predict'] = train['ror_1_shift5']
train = train.sort_values(["Date", "predict"], ascending=[True, False])
ranking = train.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [173]:
sharp_ratio

-0.0294184702297882

In [174]:
train['predict'] = train['ror_1_shift6'] * -1
train = train.sort_values(["Date", "predict"], ascending=[True, False])
ranking = train.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [175]:
sharp_ratio

0.018835266939211195

In [176]:
train['predict'] = train['ror_1_shift7'] * -1
train = train.sort_values(["Date", "predict"], ascending=[True, False])
ranking = train.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [177]:
sharp_ratio

0.013072541812158408

In [178]:
train['predict'] = train['ror_1_shift8']
train = train.sort_values(["Date", "predict"], ascending=[True, False])
ranking = train.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [179]:
sharp_ratio

0.009384485735306945

In [180]:
train['predict'] = train['ror_1_shift9']
train = train.sort_values(["Date", "predict"], ascending=[True, False])
ranking = train.groupby("Date").apply(set_rank).reset_index(drop=True)

# calc spread return sharpe
sharp_ratio, buf = calc_spread_return_sharpe(ranking, portfolio_size=200)

In [181]:
sharp_ratio

0.011776102970169742