In [1]:
import matplotlib.pyplot as plt
import numpy as np
import numpy.linalg as la
import pandas as pd
import scipy.stats as st
import statsmodels.api as sm
import time

In [2]:
### SET PARAMETERS ###

# set start date
start = 8

# set holding period
H = 7

# set lag for momentum 
L = 7

# set number of quantiles
Q = 5

In [3]:
### READ IN DATA ###

stop = 730

rfrate_df = pd.read_csv(r"C:\Users\zhubr\OneDrive\Desktop\ECON 492\data\old-i\rfrate.csv")
rfrate = rfrate_df.iloc[:stop,1].to_numpy()

prices_df = pd.read_csv(r"C:\Users\zhubr\OneDrive\Desktop\ECON 492\data\old-i\prices.csv")
prices = prices_df.iloc[:stop,1:].to_numpy()

volume_df = pd.read_csv(r"C:\Users\zhubr\OneDrive\Desktop\ECON 492\data\old-i\volume.csv")
volume = volume_df.iloc[:stop,1:].to_numpy()

mktcap_df = pd.read_csv(r"C:\Users\zhubr\OneDrive\Desktop\ECON 492\data\old-i\mktcap.csv")
mktcap = mktcap_df.iloc[:stop,1:].to_numpy()

T = prices.shape[0]
N = prices.shape[1]
        
### COMPUTE RETURNS ###

# get risk-free rate
r_f = rfrate[start:]

# get returns
returns = np.zeros((T-start,N))
for t in range(start,T):
    for i in range(N):
        returns[t-start,i] = 100*(prices[t,i]/prices[t-1,i]-1)
        
### COMPUTE FACTORS ###
        
# get size
size = mktcap[start:,:]
size_0 = mktcap[start-1,:]
np.savez(r"C:\Users\zhubr\OneDrive\Desktop\ECON 492\data\old-i\size.npz",data=size)

# get value
value = np.zeros((T-start,N))
for t in range(start,T):
    for i in range(N):
        if volume[t,i] == 0:
            value[t-start,i] = value[t-start-1,i]
        else:
            value[t-start,i] = mktcap[t,i]/volume[t,i]
        
value_0 = np.zeros(N)
for i in range(N):
    value_0[i] = mktcap[start-1,i]/volume[start-1,i]
        
# get momentum
momentum = np.zeros((T-start,N))
for t in range(start,T):
    for i in range(N):
        prior_returns = np.zeros(L)
        for l in range(L):
            prior_returns[l] = 100*(prices[t-l,i]/prices[t-l-1,i]-1)
        momentum[t-start,i] = np.mean(prior_returns)

momentum_0 = np.zeros(N)
for i in range(N):
    prior_returns_0 = np.zeros(L)
    for l in range(L):
        prior_returns_0[l] = 100*(prices[start-l-1,i]/prices[start-l-2,i]-1)
    momentum_0[i] = np.mean(prior_returns_0)
    
np.savez(r"C:\Users\zhubr\OneDrive\Desktop\ECON 492\data\old-i\momentum.npz",data=momentum)

## COMPUTE INSTRUMENT ###

# get supply
supply = np.zeros((T-start,N))
for t in range(start,T):
    for i in range(N):
        supply[t-start,i] = mktcap[t,i]/prices[t,i]
        
np.savez(r"C:\Users\zhubr\OneDrive\Desktop\ECON 492\data\old-i\supply.npz",data=supply)

# get powind

coin_list = prices_df.columns[1:].values
coin_list_pow = ['bitcoin','ethereum','dogecoin','litecoin','bitcoin-cash','ethereum-classic',
                 'monero','zcash','bitcoin-cash-sv','kadena','ravencoin','siacoin','syscoin',
                 'digibyte','nervos-network']

powind = np.zeros(N)
for i in range(N):
    if coin_list[i] in coin_list_pow:
        powind[i] = 1
np.savez(r"C:\Users\zhubr\OneDrive\Desktop\ECON 492\data\old-i\powind.npz",data=powind)

# save rfrate

np.savez(r"C:\Users\zhubr\OneDrive\Desktop\ECON 492\data\old-i\rfrate.npz",data=rfrate[start:])
        
### UPDATE DIMENSIONS ###
T = returns.shape[0]
N = returns.shape[1]

In [4]:
### COMPUTE QUANTILE PORTFOLIOS RETURNS ###

def get_quantile_portfolio_returns(H,Q,returns,size,size_0,factor,factor_0,flip=0,verbose=0):
    
    # read dimensions
    T = returns.shape[0]
    N = returns.shape[1]
    
    # set parameters
    Q_break = int(N/Q)
    Q_portfolio_returns = np.zeros((T,Q))
    
    # initial objects
    T_hold = 0
    sorted_factor_idx = np.zeros(N)
    if flip == 0:
        sorted_factor_idx = np.flip(np.argsort(factor_0))
    else:
        sorted_factor_idx = np.argsort(factor_0)
        
    # compute returns
    for t in range(T):
        for q in range(Q):
            if T_hold == 0:
                if q == (Q-1):
                    Q_curr_idx = sorted_factor_idx[(q*Q_break):N]
                    Q_curr_size = np.sum(size_0[Q_curr_idx])
                    for i in range(len(Q_curr_idx)):
                        Q_portfolio_returns[t,q] += returns[t,Q_curr_idx[i]]*(size_0[Q_curr_idx[i]]/Q_curr_size)
                else:
                    Q_curr_idx = sorted_factor_idx[(q*Q_break):((q+1)*Q_break)]
                    Q_curr_size = np.sum(size_0[Q_curr_idx])
                    for i in range(len(Q_curr_idx)):
                        Q_portfolio_returns[t,q] += returns[t,Q_curr_idx[i]]*(size_0[Q_curr_idx[i]]/Q_curr_size)
            else:
                if q == (Q-1):
                    Q_curr_idx = sorted_factor_idx[(q*Q_break):N]
                    Q_curr_size = np.sum(size[T_hold,Q_curr_idx])
                    for i in range(len(Q_curr_idx)):
                        Q_portfolio_returns[t,q] += returns[t,Q_curr_idx[i]]*(size[T_hold,Q_curr_idx[i]]/Q_curr_size)
                else:
                    Q_curr_idx = sorted_factor_idx[(q*Q_break):((q+1)*Q_break)]
                    Q_curr_size = np.sum(size[T_hold,Q_curr_idx])
                    for i in range(len(Q_curr_idx)):
                        Q_portfolio_returns[t,q] += returns[t,Q_curr_idx[i]]*(size[T_hold,Q_curr_idx[i]]/Q_curr_size)
        if t%H == (H-1):
            T_hold = t
            if flip == 0:
                sorted_factor_idx = np.flip(np.argsort(factor[t,:]))
            else:
                sorted_factor_idx = np.argsort(factor[t,:])
                
    if verbose >= 1:
        print(np.mean(Q_portfolio_returns,axis=0))
        if verbose >= 2:
            print(st.ttest_rel(Q_portfolio_returns[:,(1-1)],Q_portfolio_returns[:,(Q-1)],alternative='greater'))
    
    return Q_portfolio_returns

In [29]:
np.mean(momentum_qpr[365:,0])

0.46794644075004377

In [5]:
### COMPUTE SIZE-FACTOR PORTFOLIO RETURNS ###

# for t in range(T):
#     print(np.sum(np.flip(np.sort(size[t,:]))[:20])/np.sum(np.flip(np.sort(size[t,:]))))

def get_size_factor_portfolio_returns(H,returns,size,size_0,factor,factor_0,verbose=0):
    
    # read dimensions
    T = returns.shape[0]
    N = returns.shape[1]

    # set parameters
    S_break = np.array([0,20,100])
    F_break = np.array([0,30,70,100])
    SF_portfolio_returns = np.zeros((T,2,3))

    # initial objects
    T_hold = 0
    S_sorted_idx = np.flip(np.argsort(size_0))
    F_sorted_idx = np.flip(np.argsort(factor_0))

    # compute returns
    for t in range(T):
        for s in range(2):
            for f in range(3):
                S_curr_idx = S_sorted_idx[S_break[s]:S_break[s+1]]
                F_curr_idx = F_sorted_idx[F_break[f]:F_break[f+1]]
                SF_curr_idx = np.intersect1d(S_curr_idx,F_curr_idx)
                if T_hold == 0:
                    SF_curr_size = np.sum(size_0[SF_curr_idx])
                    for i in range(len(SF_curr_idx)):
                        SF_portfolio_returns[t,s,f] += returns[t,SF_curr_idx[i]]*(size_0[SF_curr_idx[i]]/SF_curr_size)
                else:
                    SF_curr_size = np.sum(size[T_hold,SF_curr_idx])
                    for i in range(len(SF_curr_idx)):
                        SF_portfolio_returns[t,s,f] += returns[t,SF_curr_idx[i]]*(size[T_hold,SF_curr_idx[i]]/SF_curr_size)
        if t%H == (H-1):
            T_hold = t
            S_sorted_idx = np.flip(np.argsort(size[T_hold,:]))
            F_sorted_idx = np.flip(np.argsort(factor[T_hold,:]))
            
    if verbose >= 1:
        print(np.array([[np.mean(SF_portfolio_returns[:,0,0]),np.mean(SF_portfolio_returns[:,0,1]),np.mean(SF_portfolio_returns[:,0,2])],
                        [np.mean(SF_portfolio_returns[:,1,0]),np.mean(SF_portfolio_returns[:,1,1]),np.mean(SF_portfolio_returns[:,1,2])]]))
        if verbose >= 2:
            print("1:",st.ttest_rel(SF_portfolio_returns[:,0,0],SF_portfolio_returns[:,1,0],alternative='less'))
            print("2:",st.ttest_rel(SF_portfolio_returns[:,0,1],SF_portfolio_returns[:,1,1],alternative='less'))
            print("3:",st.ttest_rel(SF_portfolio_returns[:,0,2],SF_portfolio_returns[:,1,2],alternative='less'))
            print("B:",st.ttest_rel(SF_portfolio_returns[:,0,0],SF_portfolio_returns[:,0,2],alternative='greater'))
            print("S:",st.ttest_rel(SF_portfolio_returns[:,1,0],SF_portfolio_returns[:,1,2],alternative='greater'))
            
    return SF_portfolio_returns

In [6]:
### COMPUTE FACTORS ###

def get_market(r_f,returns,size,size_0):
    r_MKT = -r_f
    total_size_0 = np.sum(size_0)
    for i in range(N):
        r_MKT[0] += returns[0,i]*(size_0[i]/total_size_0)
    for t in range(1,T):
        total_size = np.sum(size[t,:])
        for i in range(N):
            r_MKT[t] += returns[t,i]*(size[t,i]/total_size)
    return r_MKT

def get_factor(H,r_f,returns,size,size_0,factor,factor_0,verbose=0):
    SF_portfolio_returns = get_size_factor_portfolio_returns(H,returns,size,size_0,factor,factor_0,verbose)
    r_B1 = SF_portfolio_returns[:,0,0]
    r_B2 = SF_portfolio_returns[:,0,1]
    r_B3 = SF_portfolio_returns[:,0,2]
    r_S1 = SF_portfolio_returns[:,1,0]
    r_S2 = SF_portfolio_returns[:,1,1]
    r_S3 = SF_portfolio_returns[:,1,2]
    r_SMB = -r_f
    r_1M3 = -r_f
    for t in range(T):
        r_SMB[t] += ((r_S1[t]+r_S2[t]+r_S3[t])/3)-((r_B1[t]+r_B2[t]+r_B3[t])/3)
        r_1M3[t] += ((r_B1[t]+r_S1[t])/2)-((r_B3[t]+r_S3[t])/2)
    return r_SMB, r_1M3

In [7]:
size_qpr = get_quantile_portfolio_returns(H,Q,returns,size,size_0,size,size_0,flip=1,verbose=2)
momentum_qpr = get_quantile_portfolio_returns(H,Q,returns,size,size_0,momentum,momentum_0,verbose=2)
value_qpr = get_quantile_portfolio_returns(H,Q,returns,size,size_0,value,value_0,verbose=2)

r_MKT = get_market(r_f,returns,size,size_0)
r_SMB, r_WML = get_factor(H,r_f,returns,size,size_0,momentum,momentum_0,verbose=2)

r = np.zeros((T,N))
for t in range(T):
    r[t,:] = returns[t,:]-r_f[t]
F = np.column_stack((r_MKT,r_SMB,r_WML))

np.savez(r"C:\Users\zhubr\OneDrive\Desktop\ECON 492\data\old-i\r.npz",r=r)
np.savez(r"C:\Users\zhubr\OneDrive\Desktop\ECON 492\data\old-i\F.npz",F=F)

[1.18859584 0.84850247 0.69217609 0.68150765 0.48281357]
Ttest_relResult(statistic=4.588825022940104, pvalue=2.9550062585051545e-06)
[0.90473138 0.63600739 0.5493092  0.44984996 0.32878139]
Ttest_relResult(statistic=2.459993116880661, pvalue=0.007149841789788734)
[0.61469556 0.32430608 0.73414268 0.77471655 0.42219522]
Ttest_relResult(statistic=1.0487578591027995, pvalue=0.14744924243344804)
[[0.68983887 0.49447034 0.18466893]
 [0.81999903 0.66640631 0.71468489]]
1: Ttest_relResult(statistic=-0.7095531090637937, pvalue=0.2391894039644924)
2: Ttest_relResult(statistic=-1.3700170117228978, pvalue=0.08571026660549969)
3: Ttest_relResult(statistic=-3.150414270757705, pvalue=0.0008740778451537784)
B: Ttest_relResult(statistic=2.1013935756123368, pvalue=0.018103692558876897)
S: Ttest_relResult(statistic=0.7136519996235604, pvalue=0.2379212869662432)


# Compare factor model with benchmark CAPM

In [8]:
# fit CAPM
α_CAPM = np.zeros(N)
ρ_CAPM = np.zeros(N)
σ_CAPM = np.zeros(N)
for i in range(N):
    y = r[:,i]
    X = np.column_stack((np.ones(T),r_MKT))
    mod = sm.OLS(y,X)
    res = mod.fit()
    α_CAPM[i] = np.abs(res.params[0])
    ρ_CAPM[i] = res.rsquared_adj
    σ_CAPM[i] = la.norm(y-mod.predict(res.params,X))/np.sqrt(T)

# show average alpha and R2adj
print("Average |α|:",np.mean(α_CAPM))
print("Average R^2:",np.mean(ρ_CAPM))
print("Average σ^2:",np.mean(σ_CAPM))

Average |α|: 0.32269160254366364
Average R^2: 0.3771075115155573
Average σ^2: 6.637511939281218


In [9]:
# fit factor model
α_FM = np.zeros(N)
ρ_FM = np.zeros(N)
σ_FM = np.zeros(N)
for i in range(N):
    y = r[:,i]
    X = np.column_stack((np.ones(T),r_MKT,r_SMB,r_WML))
    mod = sm.OLS(y,X)
    res = mod.fit()
    α_FM[i] = np.abs(res.params[0])
    ρ_FM[i] = res.rsquared_adj
    σ_FM[i] = la.norm(y-mod.predict(res.params,X))/np.sqrt(T)

# show average alpha and R2adj
print("Average |α|:",np.mean(α_FM))
print("Average R^2:",np.mean(ρ_FM))
print("Average σ^2:",np.mean(σ_FM))

Average |α|: 0.29196907508632175
Average R^2: 0.4109619010717526
Average σ^2: 6.434323642920367


In [10]:
print(st.ttest_rel(α_CAPM,α_FM,alternative='greater'))
print(st.ttest_rel(ρ_CAPM,ρ_FM,alternative='less'))
print(st.ttest_rel(σ_CAPM,σ_FM))

Ttest_relResult(statistic=2.8602173941539806, pvalue=0.002582080291672419)
Ttest_relResult(statistic=-13.245040033598384, pvalue=6.0014815095439506e-24)
Ttest_relResult(statistic=13.108212823942855, pvalue=2.318529297652082e-23)


# Summary statistics

In [11]:
i = 0
print(np.mean(r[:,i]))
print(np.std(r[:,i]))
print(st.skew(r[:,i]))
print(st.kurtosis(r[:,i]))
print(np.percentile(r[:,i],5))
print(np.percentile(r[:,i],95))

0.36659695044241963
4.11502648108964
0.10495659800470612
1.551271030028106
-6.146063788153283
7.603922046849376


In [12]:
i = 1
print(np.mean(r[:,i]))
print(np.std(r[:,i]))
print(st.skew(r[:,i]))
print(st.kurtosis(r[:,i]))
print(np.percentile(r[:,i],5))
print(np.percentile(r[:,i],95))

0.6680698997397091
5.394957251017586
-0.08100999822315703
2.9631691644373195
-7.502506872492214
8.982019360189478


In [14]:
i = 7
print(np.mean(r[:,i]))
print(np.std(r[:,i]))
print(st.skew(r[:,i]))
print(st.kurtosis(r[:,i]))
print(np.percentile(r[:,i],5))
print(np.percentile(r[:,i],95))

1.972988619722073
20.151848959663553
11.832996885301188
187.43986819237395
-10.386442832379657
18.78044277607217
