In [None]:
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
from joblib import dump, load

In [None]:
path_train = "data/train_data/train/"
path_test = "data/train_data/test/"

E_train = np.loadtxt(path_train + "E_train.csv", delimiter=",")
phi_train = np.loadtxt(path_train + "phi_train.csv", delimiter=",")

E_test = np.loadtxt(path_test + "E_test.csv", delimiter=",")
phi_test = np.loadtxt(path_test + "phi_test.csv", delimiter=",")

In [None]:
feature_out = 15

In [None]:
# Save left(upper) part of r
E = np.loadtxt("data/train_data/E.csv", delimiter=",")
E_left = E[0, :E.shape[1] - feature_out]
np.savetxt("model/left.csv", E_left, delimiter=",")

In [None]:
L = 401
r2_score_list = np.zeros(L)

# training on all feature_in, from 0 to 400
for idx in range(L):
    if idx % (int(L / 10)) == 0:
        print(idx)
    r2_score_avg = np.zeros(feature_out)
    
    for idy in range(0, feature_out):
        
        # regressoion training
        y = E_train[:, E_train.shape[1] - 1 - idy]
        x = phi_train[:, phi_train.shape[1] - 1 - idx:]

        reg = Ridge(alpha=0.05, solver="cholesky")
        model = reg.fit(x, y)
        
        # test
        x = phi_test[:, phi_test.shape[1] - 1 - idx:]
        y_pred = reg.predict(x)
        y_true = E_test[:, E_test.shape[1] - 1 - idy]
        
        r2_score_avg[idy] = r2_score(y_true, y_pred)

    r2_score_list[idx] = np.mean(r2_score_avg)

In [None]:
r2_score_n = np.array(r2_score_list)
np.savetxt("model/Ridge/r2_score.csv", r2_score, delimiter=",")

In [None]:
import matplotlib.pyplot as plt

L = r2_score_list.shape[0]
x = np.linspace(0, 400, 401)

plt.xlim(1, 400)
plt.ylim(0.998, 1)
plt.xlabel("$N_\phi$")
plt.ylabel("$\Delta R^2$")
plt.plot(x[1:], r2_score_list[1:], "g")
plt.tight_layout()
plt.show()

y_var = np.ones(L)
for i in range(1, L):
    y_var[i] = r2_score_list[i] - r2_score_list[i-1]
plt.xlim(1,400)
plt.ylim(0,5e-7)

plt.xlabel("$N_\phi$")
plt.ylabel("$\Delta R^2$")
plt.plot(x[20:], y_var[20:], "g")
plt.tight_layout()
plt.show()

In [None]:
# training set num
feature_in = 200
path_model = "model/Ridge/"

# train
for idy in range(0, feature_out):
    
    print("model", idy)

    # regressoion training
    y = E_train[:, E_train.shape[1] - 1 - idy]
    x = phi_train[:, phi_train.shape[1] - feature_in:]

    reg = Ridge(alpha=0.05, solver="cholesky")
    model = reg.fit(x, y)
    
    dump(reg, path_model + "Ridge" + str(idy) + ".model")

    # test
    x = phi_test[:, phi_test.shape[1] - feature_in:]
    y_pred = reg.predict(x)
    y_true = E_test[:, E_test.shape[1] - 1 - idy]
    print(idy,
          'Mean squared error: %.12f' % mean_squared_error(y_true, y_pred),
          'Coefficient of determination: %.12f' % r2_score(y_true, y_pred))
    

In [None]:
# turn to matrix form
model = []
for i in range(feature_out):
    model.append(load("model/Ridge/Ridge"+str(i)+".model"))
print(len(model[0].coef_))
L = 401
zero_left = np.zeros(L - feature_in)
A = np.zeros((L, L))
r = np.zeros(L)

for j in range(L - 1, L - feature_out - 1, -1):
    row_temp = np.concatenate([zero_left, model[L - 1 - j].coef_])
    A[j] = row_temp
    r[j] = model[L - 1 - j].intercept_

b = np.zeros(L)
left = np.loadtxt("model/left.csv", delimiter=",")
b[ : left.shape[0]] = left
b_tilda = b + r

In [None]:
np.savetxt("model/Matrix_form/A.csv", A, delimiter=",")
np.savetxt("model/Matrix_form/b_tilda.csv", b_tilda, delimiter=",")
np.savetxt("model/Matrix_form/r.csv", r, delimiter=",")