In [1]:
import scripts.proj1_helpers as helper

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

#import implementations as imp
#import cross_validation as cv
#from preprocessor import Preprocessor
#from model_ensembler import Model_Ensembler

import run
from least_squares import LeastSquares
from logistic import LogisticRegression
%matplotlib inline
%load_ext autoreload
%autoreload 2

### Import data

In [2]:
y_train, x_train, ids_train = helper.load_csv_data('train.csv')
y_test, x_test, ids_test = helper.load_csv_data('test.csv')

#### Preprocess option 1

In [3]:
y_train[y_train < 0] = 0

In [None]:
lr = LogisticRegression(degree=4, gamma=0.1)

In [None]:
accuracy = run.best_cross_validation(y_train, x_train, 5, lr, seed=1)
accuracy 

In [None]:
sum(accuracy) / len(accuracy)

In [6]:
n = 1
for deg in range(4, 5):
    for gamma in np.linspace(0.01, 0.05, 20):
        lr = LogisticRegression(degree=deg, gamma=gamma)
        accuracy = run.best_cross_validation(y_train, x_train, 3, lr, seed=1)
        accuracy = sum(accuracy) / len(accuracy)
        print(n, '/ 120', '(', deg, ',', gamma, ')', accuracy)
        n += 1

Executed step 1 / 3 of cross validation
Executed step 2 / 3 of cross validation
Executed step 3 / 3 of cross validation
1 / 120 ( 4 , 0.01 ) 0.799875199501
Executed step 1 / 3 of cross validation
Executed step 2 / 3 of cross validation
Executed step 3 / 3 of cross validation
2 / 120 ( 4 , 0.0121052631579 ) 0.801279205117
Executed step 1 / 3 of cross validation
Executed step 2 / 3 of cross validation
Executed step 3 / 3 of cross validation
3 / 120 ( 4 , 0.0142105263158 ) 0.802755211021
Executed step 1 / 3 of cross validation
Executed step 2 / 3 of cross validation
Executed step 3 / 3 of cross validation
4 / 120 ( 4 , 0.0163157894737 ) 0.804107216429
Executed step 1 / 3 of cross validation
Executed step 2 / 3 of cross validation
Executed step 3 / 3 of cross validation
5 / 120 ( 4 , 0.0184210526316 ) 0.804903219613
Executed step 1 / 3 of cross validation
Executed step 2 / 3 of cross validation
Executed step 3 / 3 of cross validation
6 / 120 ( 4 , 0.0205263157895 ) 0.805995223981
Executed 

#### Preprocess option 2

In [None]:
def preprocess(x_train, x_test, deg=1):
    x_train_end = x_train.shape[0]
    
    x_stacked = np.vstack((x_train, x_test))
    
    x = x_stacked.copy()
    stds = np.std(x, axis=0)
    deleted_cols_ids = np.where(stds == 0)
    x = np.delete(x, deleted_cols_ids, axis=1)
    run.mean_spec(x)
    x = run.standardize(x)
    #x = run.polynomial_enhancement(x, deg)
    return x[:x_train_end], x[x_train_end:]
    

In [None]:
x_train, x_test = preprocess(x_train, x_test)

### Test

In [None]:
least_square_model_1 = LeastSquares(degree=8)

In [None]:
accuracy = cv.cross_validation(y_train, x_train, 5, least_square_model_1, seed=1, compute_loss=imp.rmse)
accuracy 

### Garbage (kept just in case)

In [None]:
def preprocess_train(x_tr, deg=4):
    x = x_tr.copy()
    x = run.polynomial_enhancement(x, deg)
    return x, None

def preprocess_test(x_te, dependency, deg=4): 
    x = x_te.copy()
    x = run.polynomial_enhancement(x, deg)
    return x

preprocess_train_model_1 = lambda x_tr: preprocess_train(x_tr, deg=4)
preprocess_test_model_1 = lambda x_te, dependency: preprocess_test(x_te, dependency, deg=4)    

def preprocess_train_meta_model(x_tr, deg=4):
    x = x_tr.copy()
    stacked_x = np.tile(x, deg)
    power_vec = np.repeat(np.array(range(1, deg + 1)), x.shape[1])
    return stacked_x ** power_vec, None
    
def preprocess_test_meta_model(x_te, dependency, deg=4):
    x = x_te.copy()
    stacked_x = np.tile(x, deg)
    power_vec = np.repeat(np.array(range(1, deg + 1)), x.shape[1])
    return stacked_x ** power_vec
    

#preprocess_train_meta_model = lambda x_tr: run.polynomial_enhancement(x_tr, 4), None
#preprocess_test_meta_model = lambda x_te: run.polynomial_enhancement(x_te, 4)

In [None]:
least_square_model_1 = LeastSquares(degree=4)

In [None]:
preprocessor_1 = Preprocessor(preprocess_train, preprocess_test)
preprocessor_2 = Preprocessor(preprocess_train_model_1, preprocess_test_model_1)
preprocessor_meta = Preprocessor(preprocess_train_meta_model, preprocess_test_meta_model)

In [None]:
least_square_model_1 = LeastSquares(preprocessor_1)
least_square_model_2 = LeastSquares(preprocessor_2)

least_square_meta_model = LeastSquares(preprocessor_meta)

In [None]:
model_ensembler = Model_Ensembler([least_square_model_1, least_square_model_2], least_square_meta_model)

In [None]:
least_square_model_1 = LeastSquares(preprocessor_1)
least_square_model_1.train(y_train[:half_index], x_train[:half_index])

least_square_model_2 = LeastSquares(preprocessor_2)
least_square_model_2.train(y_train[:half_index], x_train[:half_index])

models = [least_square_model_1, least_square_model_2]

stage_0_results = np.hstack([model.predict(x_train[half_index:]) for model in models])



In [None]:
logistic_regression = LogisticRegression(preprocessor_1, gamma=0.1)

In [None]:
accuracy = cv.cross_validation(y_train, x_train, 5, least_square_model_1, seed=1, compute_loss=imp.rmse)
accuracy 

In [None]:
sum(accuracy) / len(accuracy)