In [1]:
import numpy as np
import pandas as pd
import low_rank_models as lrm

In [2]:
df = pd.read_csv('error_matrix_small.csv', index_col=0)
models = np.array(list(df))
dataset_IDs = df.index.values
errorMtx = df.values
n_datasets, n_models = errorMtx.shape

In [None]:
df2 = pd.read_csv('error_matrix_test.csv', index_col=0)
dataset_test_IDs = df2.index.values
errorMtx_test = df2.values
n_datasets_test, n_models = errorMtx_test.shape

In [3]:
RANK = 9

In [9]:
predicted = ()
for i in range(n_datasets):
    a = errorMtx[i].reshape(1,-1)
    A = np.delete(errorMtx, i, axis=0)
    a_hat = np.zeros((1, n_models))
    X,Y,_ = lrm.pca(A, rank=RANK)
    sampled_cols = np.argsort(np.mean(errorMtx, axis=0))[:RANK]
    a_hat[:,sampled_cols] = a[:,sampled_cols]
    a_hat = lrm.low_rank_approximation(A, a_hat, sampled_cols)
    predicted += (np.argsort(a_hat),)
BC_predictions = np.vstack(predicted)

In [10]:
pd.DataFrame(BC_predictions).to_csv('predictions/rank9_BC.csv')

In [None]:
predicted = ()
for i in range(n_datasets):
    a = errorMtx[i].reshape(1,-1)
    A = np.delete(errorMtx, i, axis=0)
    a_hat = np.zeros((1, n_models))
    X,Y,_ = lrm.pca(A, rank=RANK)
    sampled_cols = lrm.pivoted_qr(A)[:RANK]
    a_hat[:,sampled_cols] = a[:,sampled_cols]
    a_hat = lrm.low_rank_approximation(A, a_hat, sampled_cols)
    predicted += (np.argsort(a_hat),)
QRA_predictions = np.vstack(predicted)

In [None]:
predicted = ()
for i in range(n_datasets):
    a = errorMtx[i].reshape(1,-1)
    A = np.delete(errorMtx, i, axis=0)
    a_hat = np.zeros((1, n_models))
    X,Y,_ = lrm.pca(A, rank=RANK)
    sampled_cols = lrm.pivoted_qr(Y)[:RANK]
    a_hat[:,sampled_cols] = a[:,sampled_cols]
    a_hat = lrm.low_rank_approximation(A, a_hat, sampled_cols)
    predicted += (np.argsort(a_hat),)
QRY_predictions = np.vstack(predicted)

In [None]:
predicted = ()
for i in range(n_datasets):
    a = errorMtx[i].reshape(1,-1)
    A = np.delete(errorMtx, i, axis=0)
    a_hat = np.zeros((1, n_models))
    X,Y,Vt = lrm.pca(A, rank=RANK)
    sampled_cols = lrm.pivoted_qr(Vt)[:RANK]
    a_hat[:,sampled_cols] = a[:,sampled_cols]
    a_hat = lrm.low_rank_approximation(A, a_hat, sampled_cols)
    predicted += (np.argsort(a_hat),)
QRV_predictions = np.vstack(predicted)

In [None]:
predicted = ()
for i in range(n_datasets):
    a = errorMtx[i].reshape(1,-1)
    A = np.delete(errorMtx, i, axis=0)
    a_hat = np.zeros((1, n_models))
    sampled_cols = np.random.permutation(n_models)[:RANK]
    a_hat[:,sampled_cols] = a[:,sampled_cols]
    a_hat = lrm.low_rank_approximation(A, a_hat, sampled_cols)
    predicted += (np.argsort(a_hat),)
RND_predictions = np.vstack(predicted)

In [None]:
breakpoints = []
prevtype = 'kNN'
for i in range(n_models):
    # algtype = eval(models[i])['algorithm']
    algtype = models[i].split('.')[0]
    if algtype != prevtype:
        breakpoints.append(i)
    prevtype = algtype
breakpoints = [0] + breakpoints + [n_models]

In [None]:
predicted = ()
for i in range(n_datasets):
    a = errorMtx[i].reshape(1,-1)
    A = np.delete(errorMtx, i, axis=0)
    a_hat = np.zeros((1, n_models))
    sampled_cols = []
    for j in range(len(breakpoints) - 1):
        sampled_cols.append(np.random.randint(breakpoints[j], breakpoints[j+1]))
    sampled_cols = np.array(sampled_cols)[np.random.permutation(len(breakpoints)-1)[:RANK]]
    a_hat[:,sampled_cols] = a[:,sampled_cols]
    a_hat = lrm.low_rank_approximation(A, a_hat, sampled_cols)
    predicted += (np.argsort(a_hat),)
SRND_predictions = np.vstack(predicted)

In [None]:
pd.DataFrame(QRA_predictions).to_csv('predictions/rank9_QRA.csv')
pd.DataFrame(QRY_predictions).to_csv('predictions/rank9_QRY.csv')
pd.DataFrame(QRV_predictions).to_csv('predictions/rank9_QRV.csv')
pd.DataFrame(RND_predictions).to_csv('predictions/rank9_RND.csv')
pd.DataFrame(SRND_predictions).to_csv('predictions/rank9_SRND.csv')

In [4]:
#cross validation on training datasets
for k in range(30):
    rank = k + 1
    predicted = ()
    for i in range(n_datasets):
        a = errorMtx[i].reshape(1,-1)
        A = np.delete(errorMtx, i, axis=0)
        X,Y,Vt = lrm.pca(A, rank)
        sampled_cols = lrm.pivoted_qr(Vt)[:rank]
        a_hat = np.zeros((1, n_models))
        a_hat[:,sampled_cols] = a[:,sampled_cols]
        a_hat = lrm.low_rank_approximation(A, a_hat, sampled_cols)
        predicted += (np.argsort(a_hat),)
    QR_predictions = np.vstack(predicted)
    pd.DataFrame(QR_predictions).to_csv('predictions/rank' + str(rank) + '_QRV.csv')

In [None]:
#prediction on test datasets
for k in range(30):
    rank = k + 1
    sampled_cols = lrm.pivoted_qr(errorMtx)[:rank]
    predicted = ()
    for i in range(n_datasets_test):
        A = errorMtx
        a_hat = np.zeros((1, n_models))
        a_hat[:,sampled_cols] = a[:,sampled_cols]
        a_hat = lrm.low_rank_approximation(A, a_hat, sampled_cols)
        predicted += (np.argsort(a_hat),)
    QR_predictions = np.vstack(predicted)
    pd.DataFrame(QR_predictions).to_csv('predictions/rank' + str(rank) + '_QR_test.csv')     
        