In [1]:
import numpy as np
from proj1_helpers import *
from tools import *
from implementations import *

%load_ext autoreload
%autoreload 2
seed = 10

In [3]:
# Generate Test parameters

def gen_para(lambdas, degree):
    test_para = []
    for la in lambdas:
        for d in range(1, degree+1):
            test_para.append((la,d))
    return test_para


test_para = gen_para([0.0001, 0.001, 0.01], 15)

In [6]:
# Cross validation

def cross_validation(y, x, k_indices, k, lambda_):
    test_indice = k_indices[k]
    train_indice = k_indices[~(np.arange(k_indices.shape[0]) == k)]
    train_indice = train_indice.reshape(-1)

    ty_test = y[test_indice]
    ty_train = y[train_indice]
    tx_test = x[test_indice]
    tx_train = x[train_indice]
    
    ################# replace regression method here ##################
    weight, loss_train = ridge_regression(ty_train, tx_train, lambda_)

    y_train_pred = predict_labels(weight, tx_train)
    y_test_pred = predict_labels(weight, tx_test)

    accuracy_train = compute_accuracy(y_train_pred, ty_train)
    accuracy_test = compute_accuracy(y_test_pred, ty_test)

    return accuracy_train, accuracy_test

In [None]:
# Display the best parameters 
def find_max(resault):
    jet_no = list(set([x[0] for x in resault]))
    for no in jet_no:
        jet_dict = {x[1]: x[2] for x in resault if x[0] == no}

        order = sorted(jet_dict.keys())

        print("Jet_no:{}, Max_acc: {:.5f} at {}, 2nd_acc: {:.5f} at {}".format(no, order[-1],jet_dict[order[-1]], order[-2],
                                                                               jet_dict[order[-2]] ))

In [None]:
# Load data
DATA_TRAIN_PATH = 'data/train.csv'
DATA_TEST_PATH = 'data/test.csv'

y, tx_train, ids_train = load_csv_data(DATA_TRAIN_PATH)
_, tx_test, ids_test = load_csv_data(DATA_TEST_PATH)

# Split data by jet_no
dict_jets_train = group_features_by_jet(tx_train)
dict_jets_test = group_features_by_jet(tx_test)


# Grid Search 

# Set k_fold_no
k_fold = 5
resaults = []

for index in range(3):
    for lambda_, degree in test_para:  
        #data processing
        x_train = tx_train[dict_jets_train[index]]
        y_train = y[dict_jets_train[index]]
        x_test = tx_test[dict_jets_test[index]]

        x_train, _ = process_data(x_train, x_test)
        
        # Build Poly matrix
        x_train = build_polynomial_features(x_train, degree)
        x_train = np.hstack((np.ones((x_train.shape[0], 1)), x_train))

        # Split data in k-fold
        k_indices = build_k_indices(y_train, k_fold, seed)
        list_accuracy_train = []
        list_accuracy_test = []

        for k in range(k_fold):
            a_train, a_test = cross_validation(y_train, x_train, k_indices, k, lambda_)
            list_accuracy_train.append(a_train)
            list_accuracy_test.append(a_test)a

        mean_acc= np.mean(list_accuracy_train)
        var_acc = np.var(list_accuracy_train)
        tmean_acc= np.mean(list_accuracy_test)
        tvar_acc = np.var(list_accuracy_test)

        total_acc = (mean_acc + tmean_acc) /2

        resaults.append((index, total_acc , (lambda_, degree)))
        
find_max(resaults)