In [29]:
import pandas as pd
import numpy as np
import itertools
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
import statsmodels.api as sm

In [11]:
def reshape_x_y(x, y):
    if(isinstance(x, pd.Series)):
        x = np.asarray(x).reshape(-1, 1)
    else:
        x = np.asarray(x)
    
    y = np.asarray(y).reshape(-1, 1)
    return(x, y)

In [12]:
x = pd.read_csv('x_tasks.csv')
y = pd.read_csv('y_score.csv')

In [30]:
squared_model_prediction_errors = []
squared_average_prediction_errors = []

all_tasks = set(x["task_name"])

cur_task = "Wolf, goat and cabbage transfer"
training_tasks = [t for t in all_tasks if t != cur_task] # the 9 training tasks

x_train = x[x["task_name"].isin(training_tasks)].drop("task_name", axis = 1)
x_test = x[~x["task_name"].isin(training_tasks)].drop("task_name", axis = 1)

y_train = y[y["task_name"].isin(training_tasks)].drop("task_name", axis = 1)
y_test = y[~y["task_name"].isin(training_tasks)].drop("task_name", axis = 1)

# get evaluation score by training on the training tasks and evaluating on the holdout tasks
# some reshaping
x_train_array, y_train_array = reshape_x_y(x_train, y_train)
x_test_array, y_test_array = reshape_x_y(x_test, y_test)

# Fit the model and get the error
fitted_model = Lasso().fit(X=x_train_array, y=y_train_array)
prediction = fitted_model.predict(x_test_array)

# results = sm.OLS(y_train_array, x_train_array).fit()
# prediction = results.predict(x_test_array)

# save prediction error
# fitted_model = LinearRegression().fit(X=np.asarray(x_train_array), y=np.asarray(y_train_array))
# prediction = fitted_model.predict(np.asarray(x_test_array))

# flatten all arrays
y_test_array = np.asarray(y_test_array).flatten()
prediction = np.asarray(prediction).flatten()

print(y_test_array)
print(np.mean(y_train_array))
print(prediction)

squared_model_prediction_error = (y_test_array - prediction) ** 2

# save total error for this fold
squared_average_prediction_error = (y_test_array - np.mean(y_train_array)) ** 2

squared_model_prediction_errors.append(squared_model_prediction_error)
squared_average_prediction_errors.append(squared_average_prediction_error)

squared_model_prediction_error = np.asarray(squared_model_prediction_error).flatten()
squared_average_prediction_error = np.asarray(squared_average_prediction_error).flatten()

q2 = 1 - (np.sum(squared_model_prediction_error) / np.sum(squared_average_prediction_error))
print(q2)

[100. 100. 100. 100. 100. 100. 100.  60. 100. 100. 100. 100.   0.   0.
   0.  43. 100. 100. 100.  78.   0. 100.  43. 100. 100. 100. 100. 100.
 100. 100. 100. 100. 100. 100. 100.   0. 100. 100. 100. 100.   0. 100.
   0. 100.   0. 100.  43. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.   0. 100. 100.
 100.   0. 100. 100. 100. 100. 100. 100. 100.  43. 100. 100. 100. 100.
  60. 100. 100. 100.   0. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100.   0. 100. 100.  33. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100. 100.   0. 100.   0. 100.  60.   0. 100.  43. 100. 100. 100.   0.
  43. 100. 100.  33. 100.  43. 100. 100. 100. 100. 100.   0. 100. 100.
 100. 100. 100. 100. 100. 100. 100. 100.   0. 100. 100. 100. 100.   0.
 100. 100. 100.  78.   0. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100.  78. 100.   0. 100. 100. 100. 100. 100. 100. 100.   0.  60. 100.
 100.  43. 100.   0. 100.  60.   0. 100. 100. 100. 100. 100. 100.]
69.4802571

In [16]:
y_test_array

array([100., 100., 100., 100., 100., 100., 100.,  60., 100., 100., 100.,
       100.,   0.,   0.,   0.,  43., 100., 100., 100.,  78.,   0., 100.,
        43., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100.,
       100., 100.,   0., 100., 100., 100., 100.,   0., 100.,   0., 100.,
         0., 100.,  43., 100., 100., 100., 100., 100., 100., 100., 100.,
       100., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100.,
       100.,   0., 100., 100., 100.,   0., 100., 100., 100., 100., 100.,
       100., 100.,  43., 100., 100., 100., 100.,  60., 100., 100., 100.,
         0., 100., 100., 100., 100., 100., 100., 100., 100., 100., 100.,
         0., 100., 100.,  33., 100., 100., 100., 100., 100., 100., 100.,
       100., 100., 100., 100.,   0., 100.,   0., 100.,  60.,   0., 100.,
        43., 100., 100., 100.,   0.,  43., 100., 100.,  33., 100.,  43.,
       100., 100., 100., 100., 100.,   0., 100., 100., 100., 100., 100.,
       100., 100., 100., 100., 100.,   0., 100., 10

In [15]:
prediction

array([2.13910666e+13, 2.67522437e+13, 2.09025294e+13, 1.96015922e+13,
       2.03995244e+13, 2.31897681e+13, 2.46239661e+13, 2.16582759e+13,
       2.35789935e+13, 2.35073996e+13, 2.14488803e+13, 2.58212082e+13,
       2.54282870e+13, 2.99547445e+13, 2.62087433e+13, 2.38875433e+13,
       2.58598557e+13, 2.38875433e+13, 2.35789935e+13, 2.36542953e+13,
       2.46478307e+13, 2.05610332e+13, 2.75730378e+13, 2.14488803e+13,
       2.09613458e+13, 2.09613458e+13, 2.00699606e+13, 2.40784603e+13,
       2.00699606e+13, 2.29712907e+13, 2.62087433e+13, 2.10090751e+13,
       2.29749865e+13, 2.49930226e+13, 2.49930226e+13, 2.39187994e+13,
       2.27417262e+13, 2.26866055e+13, 2.46239661e+13, 2.10403311e+13,
       2.57808704e+13, 1.95740319e+13, 2.30134887e+13, 2.05418671e+13,
       2.29749865e+13, 2.11092197e+13, 1.95795755e+13, 2.20036132e+13,
       2.03995244e+13, 2.16582759e+13, 2.73858165e+13, 2.29712907e+13,
       2.10090751e+13, 2.05418671e+13, 1.99889698e+13, 2.40784603e+13,
      