In [None]:
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
import pandas as pd
import scipy.optimize as opt
import time

# Data processing

In [None]:
data_str = "oilgas-10y"
S_df = pd.read_csv("C:\\Users\\zhubr\\Desktop\\STAT 491\\data-"+data_str+".csv")
S_df = S_df.drop(columns=['Date'])
S_df = S_df.iloc[::-1].astype('float')
S_df.head()

In [None]:
S_norm_df = (S_df-S_df.mean())/S_df.std()
S_norm_df

# AR(p) model

In [None]:
class arfit_output():
    def __init__(self,c,ϕ,σ,w,l):
        self.c = c
        self.ϕ = ϕ
        self.σ = σ
        self.w = w
        if (np.sum(ϕ)==1):
            self.μ = c/1e-8
        else:
            self.μ = c/(1-np.sum(ϕ))
        self.l = l
        
class arfit_eval():
    def __init__(self,loglik,aic,bic):
        self.loglik = loglik
        self.aic = aic
        self.bic = bic
        
def likelihood(p,S,arfit):
    c = arfit.c
    ϕ = arfit.ϕ
    σ = arfit.σ
    w = arfit.w
    v = S@w
    T = len(v)
    loglik = 0
    for t in range(p,T):
        loglik = loglik-(v[t]-c-ϕ@np.flip(v[(t-p):t]))**2
    loglik = loglik/(2*σ*σ)-0.5*(T-p)*(np.log(2*np.pi)+np.log(σ*σ))
    aic = -2*loglik+2*(p+2)
    bic = -2*loglik+np.log(T-p)*(p+2)
    return arfit_eval(loglik,aic,bic)
    
def portmanteau(p,S,arfit):
    v_cent = S@arfit.w-arfit.μ
    T = len(v_cent)
    υ = np.sum(v_cent[p:]**2)/(T-p)
    port = 0
    for k in range(p):
        γ_k = 0
        lag = k+1
        for t in range(p,T-lag):
            γ_k = γ_k+v_cent[t]*v_cent[t+lag]
        γ_k = γ_k/(T-p-lag)
        port = port+(γ_k/υ)**2
    port = port/p
    return port

def crossingrate(p,S,arfit):
    v_cent = S@arfit.w-arfit.μ
    T = len(v_cent)
    χ = 0
    for t in range(p,T-1):
        if (v_cent[t]*v_cent[t+1]<=0):
            χ = χ+1
    χ = χ/(T-p-1)
    return χ

def get_windows(p,S_df,window_IS,window_OS):
    windows = 0
    idxcurr = window_IS
    while idxcurr < S_df.shape[0]:
        idxcurr = idxcurr+window_OS+p
        windows = windows+1
    return windows

def get_D_matrix(p,S,ϕ):
    T = S.shape[0]
    N = S.shape[1]
    D = np.zeros((T-p,N))
    for t in range(p,T):
        for i in range(N):
            D[t-p,i] = S[t,i]-ϕ@np.flip(S[(t-p):t,i])
    return D

def standardize(array):
    array_z = np.zeros(np.shape(array))
    for i in range(np.shape(array)[0]):
        curr = array[i,:]
        if np.std(curr) != 0:
            array_z[i,:] = (curr-np.mean(curr))/np.std(curr)
    return array_z

# Lasso

## Joint maximum likelihood

In [None]:
# def lsloss_obj_l1(params,D,η):
#     T = D.shape[0]
#     N = D.shape[1]
#     DT = np.transpose(D)
#     eT = np.ones(T)
#     w = params
#     return la.norm(D@w-c*eT,ord=2)**2+η*la.norm(w,ord=1)
#     return la.norm(D@w,ord=2)**2-(w@(DT@eT))**2/T+η*la.norm(w,ord=1)

def joint_loglik_obj_l1(params,p,S,η):
    T = S.shape[0]
    N = S.shape[1]
    c = params[0]
    ϕ = params[1:(p+1)]
    σ = params[p+1]
    w = params[(p+2):(N+p+2)]
    v = S@w
    l = 0
    for t in range(p,T):
        l = l+(v[t]-c-ϕ@np.flip(v[(t-p):t]))**2
    l = 0.5*np.log(σ*σ)+(l+η*la.norm(w,ord=1))/(2*(T-p)*(σ*σ))
    return l

def get_constraints(p,N,ϕ_min,ϕ_max):
    A = np.zeros((p+3,N+p+2))
    for i in range(p+2):
        A[i,i] = 1
    A[p+2,(p+2):(N+p+2)] = 1
    lb = np.append(np.append(np.array([-np.inf]),ϕ_min),np.array([0,1]))
    ub = np.append(np.append(np.array([np.inf]),ϕ_max),np.array([np.inf,1]))
    return opt.LinearConstraint(A,lb,ub)

def mrpo_arfit_jml_l1(p,S,ϕ_min,ϕ_max,η,ϵ=1e-6):
    N = S.shape[1]
    params_0_cϕ = np.append(np.array([1]),np.ones(p)/(2*p))
    params_0_σw = np.append(np.array([1]),np.ones(N)/N) 
    params_0 = np.append(params_0_cϕ,params_0_σw)
    constraints = get_constraints(p,N,ϕ_min,ϕ_max)
    fit = opt.minimize(joint_loglik_obj_l1,params_0,args=(p,S,η),method='trust-constr',constraints=constraints)
    c_star = fit.x[0]
    ϕ_star = fit.x[1:(p+1)]
    σ_star = fit.x[p+1]
    w_star = np.where(np.abs(fit.x[(p+2):(N+p+2)])<ϵ,0,fit.x[(p+2):(N+p+2)])
    w_star = w_star/np.sum(w_star)
    fit.x[(p+2):(N+p+2)] = w_star
    l_star = joint_loglik_obj_l1(fit.x,p,S,η)
    return arfit_output(c_star,ϕ_star,σ_star,w_star,l_star)

## Projected subgradient descent

In [None]:
def project(w):
    n = len(w)
    return w-((np.sum(w)-1)/n)

def subgrad(w,D,η):
    T = D.shape[0]
    N = D.shape[1]
    DT = np.transpose(D)
    eT = np.ones(T)    
    subgrad_l1 = η*np.sign(w)
    return 2*(DT@D@w)-(2*(w@(DT@eT))/T)*(DT@eT)+subgrad_l1

def ls_loss_l1_reg(params,D,η):
    w = params
    T = D.shape[0]
    N = D.shape[1]
    DT = np.transpose(D)
    eT = np.ones(T)
    return la.norm(D@w,ord=2)**2-(w@(DT@eT))**2/T+η*la.norm(w,ord=1)    

def projected_subgradient_descent(D,η,n_iter,ζ_init=1e-3):
    T = D.shape[0]
    N = D.shape[1]
    DT = np.transpose(D)
    eT = np.ones(T)    
    w_hist = np.zeros((n_iter,N))
    l_hist = np.zeros(n_iter)
    w_curr = np.ones(N)/N
    g_curr = subgrad(w_curr,D,η)
    w_next = project(w_curr-ζ_init*g_curr)
    w_prev = w_curr
    g_prev = g_curr
    w_curr = w_next
    g_curr = subgrad(w_next,D,η)
    for t in range(n_iter):
        dw = w_curr-w_prev
        dg = g_curr-g_prev
        ζ_curr = (np.abs(dw@dg))/(la.norm(dg,ord=2)**2)
        w_next = project(w_curr-ζ_curr*g_curr)
        w_prev = w_curr
        g_prev = g_curr
        w_curr = w_next
        g_curr = subgrad(w_next,D,η)
        w_hist[t] = w_curr
        l_hist[t] = ls_loss_l1_reg(w_curr,D,η)
    return w_hist, l_hist

def mrpo_arfit_l1(p,S,ϕ,η,n_iter):
    D = get_D_matrix(p,S,ϕ)
    T = D.shape[0]
    N = D.shape[1]
    DT = np.transpose(D)
    eT = np.ones(T)
    w_hist, l_hist = projected_subgradient_descent(D,η,n_iter)
    ϕ_star = ϕ
    w_star = w_hist[-1]
    w_star = np.where(abs(w_star)<1e-3,0,w_star)
    w_star = w_star/np.sum(w_star)
    c_star = (w_star@(DT@eT))/T
    σ_star = np.sqrt((la.norm(D@w_star-c_star*eT,ord=2)**2+η*la.norm(w_star,ord=1))/T)
    θ_star_cϕ = np.append(np.array([c_star]),ϕ_star)
    θ_star_σw = np.append(np.array([σ_star]),w_star)
    θ_star = np.append(θ_star_cϕ,θ_star_σw)
    l_star = joint_loglik_obj_l1(θ_star,p,S,η)
    return arfit_output(c_star,ϕ_star,σ_star,w_star,l_star)

## Simulated annealing via likelihood

In [None]:
def loglik_obj_l1(params,p,S,η,n_iter):
    ϕ = params
    D = get_D_matrix(p,S,ϕ)
    T = D.shape[0]
    N = D.shape[1]
    eT = np.ones(T)
    arfit_l1 = mrpo_arfit_l1(p,S,ϕ,η,n_iter)
    c = arfit_l1.c
    w = arfit_l1.w
    σ = arfit_l1.σ
    l = 0.5*np.log(σ*σ)+(la.norm(D@w-c*eT,ord=2)**2+η*la.norm(w,ord=1))/(2*T*σ*σ)
    return l

def mrpo_arfit_sa_l1(p,S,η,n_iter,n_sims):
    ϕ_curr = np.ones(p)/(2*p)
    l_curr = loglik_obj_l1(ϕ_curr,p,S,η,n_iter)
    ϕ_hist = np.zeros((n_sims,p))
    l_hist = np.zeros(n_sims)
    for t in range(n_sims):
        α_curr = 0.01/(t+1)
        ϕ_prop = ϕ_curr+np.random.normal(0,0.05,size=p)
        l_prop = loglik_obj_l1(ϕ_prop,p,S,η,n_iter)
        if (np.max(np.abs(ϕ_prop)) >= 1):
            ϕ_hist[t] = ϕ_curr
            l_hist[t] = l_curr
        else:
            l_diff = l_prop-l_curr
            if (l_diff < 0):            
                ϕ_hist[t] = ϕ_prop
                l_hist[t] = l_prop
                ϕ_curr = ϕ_prop
                l_curr = l_prop
            else:
                p_accept = np.exp(-l_diff/α_curr)
                u_accept = np.random.uniform(0,1)
                if (u_accept < p_accept): 
                    ϕ_hist[t] = ϕ_prop
                    l_hist[t] = l_prop
                    ϕ_curr = ϕ_prop
                    l_curr = l_prop
                else:
                    ϕ_hist[t] = ϕ_curr
                    l_hist[t] = l_curr
    ϕ_star = ϕ_hist[-1]
    return mrpo_arfit_l1(p,S,ϕ_star,η,n_iter)

## Base functions

In [None]:
def joint_loglik_obj(params,p,S):
    T = S.shape[0]
    N = S.shape[1]
    c = params[0]
    ϕ = params[1:(p+1)]
    σ = params[p+1]
    w = params[(p+2):(N+p+2)]
    v = S@w
    l = 0
    for t in range(p,T):
        l = l+(v[t]-c-ϕ@np.flip(v[(t-p):t]))**2
    l = 0.5*np.log(σ*σ)+l/(2*(T-p)*(σ*σ))
    return l

def mrpo_arfit_cf(p,S,ϕ):
    T = S.shape[0]
    N = S.shape[1]
    D = np.zeros((T-p,N))
    for t in range(p,T):
        for i in range(N):
            D[t-p,i] = S[t,i]-ϕ@np.flip(S[(t-p):t,i])
    DT = np.transpose(D)
    DTDinv = la.inv(DT@D) 
    eT = np.ones(T-p)
    eN = np.ones(N)
    x = DTDinv@DT@eT
    y = DTDinv@eN
    z = DT@eT
    α = eN@DTDinv@DT@eT
    β = eN@DTDinv@eN
    γ = eT@eT
    ϕ_star = ϕ
    c_star = (y@z)/(β*γ-β*x@z+α*y@z)
    w_star = c_star*x+((1-α*c_star)/β)*y
    σ_star = np.sqrt(la.norm(D@w_star-c_star*eT,ord=2)**2/(T-p))
    θ_star_cϕ = np.append(np.array([c_star]),ϕ_star)
    θ_star_σw = np.append(np.array([σ_star]),w_star)
    θ_star = np.append(θ_star_cϕ,θ_star_σw)
    l_star = joint_loglik_obj(θ_star,p,S)
    return arfit_output(c_star,ϕ_star,σ_star,w_star,l_star)

# here, the only parameters are φ_1,...,φ_p

def loglik_obj(params,p,S):
    T = S.shape[0]
    N = S.shape[1]
    ϕ = params
    arfit_cf = mrpo_arfit_cf(p,S,ϕ)
    c = arfit_cf.c
    w = arfit_cf.w
    σ = arfit_cf.σ
    D = np.zeros((T-p,N))
    for t in range(p,T):
        for i in range(N):
            D[t-p,i] = S[t,i]-ϕ@np.flip(S[(t-p):t,i])
    eT = np.ones(T-p)
    l = 0.5*np.log(σ*σ)+(la.norm(D@w-c*eT,ord=2)**2)/(2*(T-p)*σ*σ)
    return l

def mrpo_arfit_ml(p,S,ϕ_min,ϕ_max):
    N = S.shape[1]
    params_0 = np.ones(p)/(2*p)
    bounds = opt.Bounds(ϕ_min,ϕ_max)
    fit = opt.minimize(loglik_obj,params_0,args=(p,S),method='L-BFGS-B',bounds=bounds)
    ϕ_star = fit.x
    arfit_cf_star = mrpo_arfit_cf(p,S,ϕ_star)
    c_star = arfit_cf_star.c
    w_star = arfit_cf_star.w
    σ_star = arfit_cf_star.σ
    l_star = loglik_obj(ϕ_star,p,S)
    return arfit_output(c_star,ϕ_star,σ_star,w_star,l_star)

## Run functions

In [None]:
# Choose parameters

p = 1
η = 0.1

idx_start = 0
window_IS = 210+p
window_OS = 42
S_IS = S_norm_df.to_numpy()[idx_start:(idx_start+window_IS),:]
S_OS = S_norm_df.to_numpy()[(idx_start+window_IS-p):(idx_start+window_IS+window_OS),:]
S = S_IS

ϕ_min = -np.ones(p)
ϕ_max = np.ones(p)

# Joint maximum likelihood

time_0 = time.time()
arfit_jml_l1 = mrpo_arfit_jml_l1(p,S,ϕ_min,ϕ_max,η)
time_1 = time.time()
print("Joint maximum likelihood\n","Elapsed time:",time_1-time_0)

print("c:",arfit_jml_l1.c)
print("ϕ:",arfit_jml_l1.ϕ)
print("σ:",arfit_jml_l1.σ)
print("w:",arfit_jml_l1.w)
print("μ:",arfit_jml_l1.μ)
print("l:",arfit_jml_l1.l)

v_IS_l1_jml = S_IS[p:,]@arfit_jml_l1.w
plt.plot(v_IS_l1_jml)
plt.grid()
plt.show()

# Verify with projected subgradient descent

ϕ = arfit_jml_l1.ϕ
n_iter = int(max(200,np.power(η,2/3)*200))

time_0 = time.time()
arfit_l1 = mrpo_arfit_l1(p,S,ϕ,η,n_iter)
time_1 = time.time()
print("Projected subgradient descent\n","Elapsed time:",time_1-time_0)

print("c:",arfit_l1.c)
print("ϕ:",arfit_l1.ϕ)
print("σ:",arfit_l1.σ)
print("w:",arfit_l1.w)
print("μ:",arfit_l1.μ)
print("l:",arfit_l1.l)

v_IS_l1 = S_IS[p:,]@arfit_l1.w
plt.plot(v_IS_l1)
plt.grid()
plt.show()

# Simulated annealing via likelihood

n_sims = int(np.power(p,2/3)*200)

time_0 = time.time()
arfit_sa_l1 = mrpo_arfit_sa_l1(p,S,η,n_iter,n_sims)
time_1 = time.time()
print("Simulated annealing\n","Elapsed time:",time_1-time_0)

print("c:",arfit_sa_l1.c)
print("ϕ:",arfit_sa_l1.ϕ)
print("σ:",arfit_sa_l1.σ)
print("w:",arfit_sa_l1.w)
print("μ:",arfit_sa_l1.μ)
print("l:",arfit_sa_l1.l)

v_IS_sa_l1 = S_IS[p:,]@arfit_sa_l1.w
plt.plot(v_IS_sa_l1)
plt.grid()
plt.show()

print("Stationarity condition:",(np.max(np.abs(np.roots(np.append(np.array([1]),-arfit_sa_l1.ϕ))))<1))

## Compare least-squares solutions

In [None]:
# Joint Optimization

time_0 = time.time()
arfit_jml_l1 = mrpo_arfit_jml_l1(p,S,ϕ_min,ϕ_max,η)
time_1 = time.time()

ϕ = arfit_sa_l1.ϕ
D = get_D_matrix(p,S,ϕ)

print("\nJoint Optimization\nElapsed time:",time_1-time_0)
print(arfit_jml_l1.w)
print(ls_loss_l1_reg(arfit_jml_l1.w,D,η))

# Regularized least-squares optimization

T = D.shape[0]
N = D.shape[1]
params_0 = np.ones(N)/N
constraints = opt.LinearConstraint(np.ones(N),1,1)

time_0 = time.time()
fit = opt.minimize(ls_loss_l1_reg,params_0,args=(D,η),method='trust-constr',constraints=constraints)
time_1 = time.time()

w_inner = np.where(abs(fit.x)<1e-6,0,fit.x)
w_inner = w_inner/np.sum(w_inner)

print("\nRegularized least-squares Optimization\nElapsed time:",time_1-time_0)
print(w_inner)
print(ls_loss_l1_reg(w_inner,D,η))

# Projected subgradient descent optimization

n_iter = int(max(200,np.power(100*η,2/3)*200))

time_0 = time.time()
w_hist, l_hist = projected_gradient_descent(D,η,n_iter)
time_1 = time.time()

w_pgd = w_hist[-1]
w_pgd = np.where(abs(w_pgd)<1e-3,0,w_pgd)
w_pgd = w_pgd/np.sum(w_pgd)

print("\nProjected subgradient descent optimization\nElapsed time:",time_1-time_0)
print(w_pgd)
print(ls_loss_l1_reg(w_pgd,D,η))

plt.plot(l_hist)
plt.grid()
plt.show()

## Fix model order, vary tuning parameter

### Set parameters

In [None]:
η_max = 10+1
p = 1

p_max = 10
window_IS = 210+p_max
window_OS = 42
n_win = get_windows(p_max,S_norm_df,window_IS,window_OS)


### Fit portfolios

In [None]:
frequency = np.zeros(S_df.shape[1])
cardinality = np.zeros((n_win,η_max))

jywealth_IS = np.zeros((n_win,η_max))
jywealth_OS = np.zeros((n_win,η_max))
jysharpe_IS = np.zeros((n_win,η_max))
jysharpe_OS = np.zeros((n_win,η_max))

portmanteau_IS = np.zeros((n_win,η_max))
portmanteau_OS = np.zeros((n_win,η_max))
crossingrate_IS = np.zeros((n_win,η_max))
crossingrate_OS = np.zeros((n_win,η_max))

loglik_IS_jml = np.zeros((n_win,η_max))
loglik_IS = np.zeros((n_win,η_max))
loglik_OS = np.zeros((n_win,η_max))
aic_IS = np.zeros((n_win,η_max))
aic_OS = np.zeros((n_win,η_max))
bic_IS = np.zeros((n_win,η_max))
bic_OS = np.zeros((n_win,η_max))

np.random.seed(0)
for i in range(n_win):
    for j in range(η_max):
        η = j*0.01
        if η == 0:
            time_0 = time.time()
            idx_start = i*window_OS
            S_IS = S_norm_df.to_numpy()[(idx_start+p_max-p):(idx_start+window_IS),:]
            S_OS = S_norm_df.to_numpy()[(idx_start+window_IS-p):(idx_start+window_IS+window_OS),:]
            ϕ_min = -np.ones(p)
            ϕ_max = np.ones(p)
            arfit_ml = mrpo_arfit_ml(p,S_IS,ϕ_min,ϕ_max)
            frequency = frequency+np.abs(np.sign(arfit_ml.w))
            cardinality[i,j] = np.count_nonzero(arfit_ml.w)
            jywealth_IS[i,j] = jywealth(S_IS,S_IS,arfit_ml)
            jywealth_OS[i,j] = jywealth(S_IS,S_OS,arfit_ml)
            jysharpe_IS[i,j] = jysharpe(S_IS,S_IS,arfit_ml)
            jysharpe_OS[i,j] = jysharpe(S_IS,S_OS,arfit_ml)
            portmanteau_IS[i,j] = portmanteau(p,S_IS,arfit_ml)
            portmanteau_OS[i,j] = portmanteau(p,S_OS,arfit_ml)
            crossingrate_IS[i,j] = crossingrate(p,S_IS,arfit_ml)
            crossingrate_OS[i,j] = crossingrate(p,S_OS,arfit_ml)
            arfit_eval_IS = likelihood(p,S_IS,arfit_ml)
            arfit_eval_OS = likelihood(p,S_OS,arfit_ml)
            loglik_IS[i,j] = arfit_eval_IS.loglik
            loglik_OS[i,j] = arfit_eval_OS.loglik
            aic_IS[i,j] = arfit_eval_IS.aic
            aic_OS[i,j] = arfit_eval_OS.aic
            bic_IS[i,j] = arfit_eval_IS.bic
            bic_OS[i,j] = arfit_eval_OS.bic
            time_1 = time.time()
            print(i,j,"- elasped time -",time_1-time_0)
        else:
            time_0 = time.time()
            idx_start = i*window_OS
            S_IS = S_norm_df.to_numpy()[(idx_start+p_max-p):(idx_start+window_IS),:]
            S_OS = S_norm_df.to_numpy()[(idx_start+window_IS-p):(idx_start+window_IS+window_OS),:]
            n_sims = int(np.power(p,2/3)*250)
            n_iter = 250
            # n_iter = int(max(250,np.power(η,2/3)*250))
            arfit_jml_l1 = mrpo_arfit_jml_l1(p,S_IS,ϕ_min,ϕ_max,η)
            arfit_sa_l1 = mrpo_arfit_sa_l1(p,S_IS,η,ϵ,n_iter,n_sims)
            frequency = frequency+np.abs(np.sign(arfit_sa_l1.w))
            cardinality[i,j] = np.count_nonzero(arfit_sa_l1.w)
            jywealth_IS[i,j] = jywealth(S_IS,S_IS,arfit_sa_l1)
            jywealth_OS[i,j] = jywealth(S_IS,S_OS,arfit_sa_l1)
            jysharpe_IS[i,j] = jysharpe(S_IS,S_IS,arfit_sa_l1)
            jysharpe_OS[i,j] = jysharpe(S_IS,S_OS,arfit_sa_l1)
            portmanteau_IS[i,j] = portmanteau(p,S_IS,arfit_sa_l1)
            portmanteau_OS[i,j] = portmanteau(p,S_OS,arfit_sa_l1)
            crossingrate_IS[i,j] = crossingrate(p,S_IS,arfit_sa_l1)
            crossingrate_OS[i,j] = crossingrate(p,S_OS,arfit_sa_l1)
            arfit_eval_IS_jml = likelihood(p,S_IS,arfit_jml_l1)
            arfit_eval_IS = likelihood(p,S_IS,arfit_sa_l1)
            arfit_eval_OS = likelihood(p,S_OS,arfit_sa_l1)
            loglik_IS_jml[i,j] = arfit_eval_IS_jml.loglik
            loglik_IS[i,j] = arfit_eval_IS.loglik
            loglik_OS[i,j] = arfit_eval_OS.loglik
            aic_IS[i,j] = arfit_eval_IS.aic
            aic_OS[i,j] = arfit_eval_OS.aic
            bic_IS[i,j] = arfit_eval_IS.bic
            bic_OS[i,j] = arfit_eval_OS.bic
            time_1 = time.time()
            print(i,j,"- elasped time -",time_1-time_0)
            
frequency = frequency/(n_win*η_max)
            
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-frequency.npz",frequency)  
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-cardinality.npz",cardinality)        
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-portmanteau-IS.npz",portmanteau_IS)
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-portmanteau-OS.npz",portmanteau_OS)
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-crossingrate-IS.npz",crossingrate_IS)
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-crossingrate-OS.npz",crossingrate_OS)
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-loglik-IS-jml.npz",loglik_IS_jml)
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-loglik-IS.npz",loglik_IS)
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-loglik-OS.npz",loglik_OS)
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-aic-IS.npz",aic_IS)
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-aic-OS.npz",aic_OS)
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-bic-IS.npz",bic_IS)
np.savez("C:\\Users\\zhubr\\Desktop\\STAT 491\\results\\"+data_str+"l1-p"+str(p)+"-bic-OS.npz",bic_OS)

### Plot figures

In [None]:
plt.figure(figsize=(10,6))
plt.title(data_str+"-l1-p"+str(p))
plt.bar(S_df.columns,frequency,color='C4')
plt.grid()
plt.ylabel("Frequency")
plt.savefig("C:\\Users\\zhubr\\Desktop\\STAT 491\\figures\\"+data_str+"-l1-p"+str(p)+"-freq.jpg")
plt.show()

plt.title(data_str+"-l1-p"+str(p))
plt.plot(np.mean(cardinality,axis=0),marker='o',color='C4')
plt.grid()
plt.ylabel("Cardinality")
plt.xlabel("Tuning Parameter (x0.01)")
plt.savefig("C:\\Users\\zhubr\\Desktop\\STAT 491\\figures\\"+data_str+"-l1-p"+str(p)+"-card.jpg")
plt.show()

plt.title(data_str+"-l1-p"+str(p))
plt.plot(np.arange(1,η_max),np.mean(loglik_IS_jml[:,1:],axis=0),color='C7',marker='o',label='JML')
plt.plot(np.arange(1,η_max),np.mean(loglik_IS[:,1:],axis=0),color='C8',marker='o',label='SA')
plt.xlabel("Tuning Parameter (x0.01)")
plt.ylabel("Log-Likelihood")
plt.legend()
plt.grid()
plt.savefig("C:\\Users\\zhubr\\Desktop\\STAT 491\\figures\\"+data_str+"-l1-p"+str(p)+"-perf.jpg")
plt.show()

plt.figure(figsize=(6,6))
plt.subplot(2,1,1)
plt.title(data_str+"-l1-p"+str(p))
plt.plot(np.mean(portmanteau_IS,axis=0),marker='o',label="IS Port.")
plt.plot(np.mean(portmanteau_OS,axis=0),marker='o',label="OS Port.")
plt.ylabel("Port. Stat")
plt.legend()
plt.grid()
plt.subplot(2,1,2)
plt.plot(np.mean(crossingrate_IS,axis=0),marker='o',label="IS X-ing")
plt.plot(np.mean(crossingrate_OS,axis=0),marker='o',label="OS X-ing")
plt.ylabel("X-ing Stat")
plt.xlabel("Tuning Parameter (x0.01)")
plt.legend()
plt.grid()
plt.savefig("C:\\Users\\zhubr\\Desktop\\STAT 491\\figures\\"+data_str+"-l1-p"+str(p)+"-stats.jpg")
plt.show()

portmanteau_IS_z = standardize(portmanteau_IS)
portmanteau_OS_z = standardize(portmanteau_OS)
crossingrate_IS_z = standardize(crossingrate_IS)
crossingrate_OS_z = standardize(crossingrate_OS)
plt.figure(figsize=(6,6))
plt.subplot(2,1,1)
plt.title(data_str+"-l1-p"+str(p))
plt.plot(np.mean(portmanteau_IS_z,axis=0),marker='o',label="IS Port.")
plt.plot(np.mean(portmanteau_OS_z,axis=0),marker='o',label="OS Port.")
plt.ylabel("Z-Score")
plt.legend()
plt.grid()
plt.subplot(2,1,2)
plt.plot(np.mean(crossingrate_IS_z,axis=0),marker='o',label="IS X-ing")
plt.plot(np.mean(crossingrate_OS_z,axis=0),marker='o',label="OS X-ing")
plt.ylabel("Z-Score")
plt.xlabel("Tuning Parameter (x0.01)")
plt.legend()
plt.grid()
plt.savefig("C:\\Users\\zhubr\\Desktop\\STAT 491\\figures\\"+data_str+"-l1-p"+str(p)+"-stats-z.jpg")
plt.show()

plt.figure(figsize=(6,4))
plt.subplot(2,1,1)
plt.title(data_str+"-l1-p"+str(p))
plt.plot(np.mean(loglik_IS,axis=0),marker='o',label="IS Loglik")
plt.ylabel("Value")
plt.legend()
plt.grid()
plt.subplot(2,1,2)
plt.plot(np.mean(loglik_OS,axis=0),marker='o',label="OS Loglik",color='C1')
plt.ylabel("Value")
plt.xlabel("Tuning Parameter (x0.01)")
plt.legend()
plt.grid()
plt.savefig("C:\\Users\\zhubr\\Desktop\\STAT 491\\figures\\"+data_str+"-l2-p"+str(p)+"-loglik.jpg")
plt.show()

plt.figure(figsize=(6,6))
plt.subplot(2,1,1)
plt.title(data_str+"-l1-p"+str(p))
plt.plot(np.mean(aic_IS,axis=0),marker='o',label="IS AIC",color='C2')
plt.plot(np.mean(bic_IS,axis=0),marker='o',label="IS BIC",color='C3')
plt.ylabel("Value")
plt.legend()
plt.grid()
plt.subplot(2,1,2)
plt.plot(np.mean(aic_OS,axis=0),marker='o',label="OS AIC",color='C2')
plt.plot(np.mean(bic_OS,axis=0),marker='o',label="OS BIC",color='C3')
plt.ylabel("Value")
plt.xlabel("Tuning Parameter (x0.01)")
plt.legend()
plt.grid()
plt.savefig("C:\\Users\\zhubr\\Desktop\\STAT 491\\figures\\"+data_str+"-l1-p"+str(p)+"-aicbic.jpg")
plt.show()