In [1]:
import random
import numpy as np
from datetime import datetime
import pandas as pd
import pandas_datareader.data as web
from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
from pandas_datareader.famafrench import get_available_datasets
import matplotlib.pyplot as plt

In [2]:
# Hyperparameters
num_sample = 1000 # int or None
start_date = '2015-01-01'
end_date = '2020-12-31'
file_name = 'df_1000_5y.pkl'

In [3]:
# Get all of the nasdaq tickers list
tickers = get_nasdaq_symbols().index
print('Number of tickers in Nasdaq: ', len(tickers))

Number of tickers in Nasdaq:  10549


In [None]:
if num_sample is not None:
    tickers = random.sample(tickers.tolist(), num_sample)

In [None]:
# Get data from yahoo finance
start = datetime.now()
df = web.DataReader(tickers, 'yahoo', start_date, end_date)
df = df.sort_index(axis=0)
df.to_pickle(file_name)
total_time = datetime.now() - start

print('Time cost: ', total_time.seconds // 60, 'minutes ', total_time.seconds % 60, 'seconds')
#df = pd.read_pickle('df_1000_5y.pkl')

In [6]:
# Functions related to (cumulative) prospect theory

class v_func:
    def __init__(self, alpha, lambda_):
        self.alpha = alpha
        self.lambda_ = lambda_
    
    def __call__(self, x):
        if x >= 0:
            return x**self.alpha
        else:
            return -self.lambda_ * (-x)**self.alpha

class w_func:
    def __init__(self, gamma, delta):
        self.gamma = gamma
        self.delta = delta
    
    def __call__(self, p, gain=True):
        if gain:
            exponent = self.gamma
        else:
            exponent = self.delta
            
        numerator = p**exponent
        denominator = (p**exponent + (1-p)**exponent)**(1/exponent)
        return numerator / denominator


def calculate_tk(sorted_return, v, w):
    num_month = sorted_return.count()
    
    neg = sorted_return[sorted_return < 0]
    num_neg = len(neg)
    pos = sorted_return[sorted_return > 0]
    num_pos = len(pos)
    
    v_neg = neg.apply(v).to_numpy()
    v_pos = pos.apply(v).to_numpy()

    pi_neg = np.asarray([w((i+1)/num_month, gain=False) - w(i/num_month, gain=False) for i in range(num_neg)])
    pi_pos = np.asarray([w((num_pos - i)/num_month, gain=True) - w((num_pos - i - 1)/num_month, gain=True) for i in range(num_pos)])    
    
    tk = sum(v_neg * pi_neg) + sum(v_pos * pi_pos)
    
    return tk

In [7]:
v = v_func(alpha=0.88, lambda_=2.25)
w = w_func(gamma=0.61, delta=0.69)

In [8]:
# Currently use 'return' instead of 'exccess return'
close_prices = df['Adj Close'].dropna(axis=1, how='all')
monthly_returns = close_prices.resample('m').ffill().pct_change().dropna(axis=0, how='all')

# Currently use 1 ~ (T-1)-th data to predict T-th data
target_month_returns = monthly_returns.iloc[[-1], :]
target_month_returns = target_month_returns.T.squeeze() # as series
monthly_returns = monthly_returns.iloc[:-1, :]

In [9]:
# Calculate TK values
sorted_returns = monthly_returns.apply(lambda x: x.sort_values().values)
tks = sorted_returns.apply(calculate_tk, axis=0, args=(v, w))

In [10]:
# Simple decile portfolio analysis (equally-weighted)
decile_analysis = pd.concat([tks, pd.qcut(tks, 10, range(0,10)), target_month_returns], axis=1)
decile_analysis.columns = ['TK', 'decile', 'NextMonthReturn']

In [12]:
decile_analysis

Unnamed: 0_level_0,TK,decile,NextMonthReturn
Symbols,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
QDEF,-0.023060,6,0.036094
IBND,-0.014700,7,0.026897
JRO,-0.034191,5,0.031746
CLAR,-0.035739,4,0.069444
FYT,-0.042232,4,0.086996
...,...,...,...
TSEM,-0.064524,2,0.055601
DHC,-0.099938,1,-0.065760
WK,-0.018787,7,0.221763
SCHV,-0.024664,6,0.035318


In [13]:
decile_analysis.groupby('decile').mean()

Unnamed: 0_level_0,TK,NextMonthReturn
decile,Unnamed: 1_level_1,Unnamed: 2_level_1
0,-0.157304,0.110517
1,-0.085979,0.112594
2,-0.063106,0.099164
3,-0.04977,0.066822
4,-0.039221,0.063381
5,-0.030116,0.077472
6,-0.022583,0.047678
7,-0.014837,0.040396
8,-0.002568,0.033262
9,3.478535,0.029322


In [None]:
# Fama-Macbeth regression
#get_available_datasets()
ff_data = web.DataReader('F-F_Research_Data_Factors', 'famafrench', '2016-01-01', '2020-12-31')[0]
ff_data = ff_data.drop(['RF'], axis=1)
ff_data = ff_data.rename(columns={'Mkt-RF': 'Mkt'})