In [1]:
import numpy as np
import pandas as pd
import low_rank_models as lrm

In [2]:
df = pd.read_csv('error_matrix.csv', index_col=0)
models = np.array(list(df))
dataset_IDs = df.index.values
errorMtx = df.values
n_datasets, n_models = errorMtx.shape

In [3]:
n_samples = 8
sampled_cols = lrm.pivoted_qr(errorMtx)[:n_samples]
print(models[sampled_cols])

[ "{'hyperparameters': {'learning_rate': 3.0, 'n_estimators': 50}, 'algorithm': 'ABT'}"
 "{'hyperparameters': {'learning_rate': 0.0001, 'solver': 'adam', 'alpha': 0.0001}, 'algorithm': 'MLP'}"
 "{'hyperparameters': {'learning_rate': 1.5, 'n_estimators': 100}, 'algorithm': 'ABT'}"
 "{'hyperparameters': {'min_samples_split': 32, 'criterion': 'entropy'}, 'algorithm': 'RF'}"
 "{'hyperparameters': {'learning_rate': 2.0, 'n_estimators': 100}, 'algorithm': 'ABT'}"
 "{'hyperparameters': {'learning_rate': 0.01, 'solver': 'sgd', 'alpha': 0.01}, 'algorithm': 'MLP'}"
 "{'hyperparameters': {'min_samples_split': 1024, 'criterion': 'gini'}, 'algorithm': 'ExtraTrees'}"
 "{'hyperparameters': {'kernel': 'poly', 'coef0': 0, 'C': 0.125}, 'algorithm': 'kSVM'}"]


In [4]:
predicted = ()
for i in range(n_datasets):
    a = errorMtx[i].reshape(1,-1)
    A = np.delete(errorMtx, i, axis=0)
    a_hat = np.zeros((1, n_models))
    a_hat[:,sampled_cols] = a[:,sampled_cols]
    a_hat = lrm.low_rank_approximation(A, a_hat, sampled_cols)
    predicted += (np.argsort(a_hat),)
QR_predictions = np.vstack(predicted)

In [5]:
predicted = ()
for i in range(n_datasets):
    a = errorMtx[i].reshape(1,-1)
    A = np.delete(errorMtx, i, axis=0)
    a_hat = np.zeros((1, n_models))
    sampled_cols = np.random.permutation(n_models)[:n_samples]
    a_hat[:,sampled_cols] = a[:,sampled_cols]
    a_hat = lrm.low_rank_approximation(A, a_hat, sampled_cols)
    predicted += (np.argsort(a_hat),)
RND_predictions = np.vstack(predicted)

In [6]:
breakpoints = []
prevtype = 'kNN'
for i in range(n_models):
    algtype = eval(models[i])['algorithm']
    if algtype != prevtype:
        breakpoints.append(i)
    prevtype = algtype
breakpoints = [0] + breakpoints + [n_models]

In [7]:
predicted = ()
for i in range(n_datasets):
    a = errorMtx[i].reshape(1,-1)
    A = np.delete(errorMtx, i, axis=0)
    a_hat = np.zeros((1, n_models))
    sampled_cols = []
    for j in range(len(breakpoints) - 1):
        sampled_cols.append(np.random.randint(breakpoints[j], breakpoints[j+1]))
    sampled_cols = np.array(sampled_cols)[np.random.permutation(len(breakpoints)-1)[:n_samples]]
    a_hat[:,sampled_cols] = a[:,sampled_cols]
    a_hat = lrm.low_rank_approximation(A, a_hat, sampled_cols)
    predicted += (np.argsort(a_hat),)
SRND_predictions = np.vstack(predicted)

In [8]:
pd.DataFrame(QR_predictions).to_csv('predictions/rank8_QR.csv')
pd.DataFrame(RND_predictions).to_csv('predictions/rank8_RND.csv')
pd.DataFrame(SRND_predictions).to_csv('predictions/rank8_SRND.csv')

In [9]:
for k in range(30):
    rank = k + 1
    sampled_cols = lrm.pivoted_qr(errorMtx)[:rank]
    predicted = ()
    for i in range(n_datasets):
        a = errorMtx[i].reshape(1,-1)
        A = np.delete(errorMtx, i, axis=0)
        a_hat = np.zeros((1, n_models))
        a_hat[:,sampled_cols] = a[:,sampled_cols]
        a_hat = lrm.low_rank_approximation(A, a_hat, sampled_cols)
        predicted += (np.argsort(a_hat),)
    QR_predictions = np.vstack(predicted)
    pd.DataFrame(QR_predictions).to_csv('predictions/rank' + str(rank) + '_QR.csv')