In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import scipy.io
from sklearn import linear_model
from sklearn import model_selection
import eknn 
from eknn import exclusive_lasso, EkNN_C,EkNN_R
import poly_eknn_modified
from poly_eknn_modified import poly_regression, single_polynomial_EkNN_R, polynomial_EkNN_R_classifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
X = np.random.randn(100,10) # Matrix of possible predictors
alphas = np.array([0, 0, 0, 1, 0, 0, 0, -1, 0, 0]) #Two nonzero predictors
y = X @ alphas + 2*np.random.randn(100) # simulated observations y
N_groups = 3
groups = np.random.rand(10,3)
n = X.shape[1]

xL2 = np.linalg.pinv(X) @ y # inital value for alphas / re-visit how the value is initiated
(X @ alphas).shape, y.shape
X.shape

(100, 10)

In [3]:
alphas_0 = np.linalg.pinv(X) @ y
betas_0 = (np.linalg.pinv(X) ** 2) @ y
x0 = np.concatenate((alphas_0, betas_0))
alphas_0, betas_0, x0


(array([-0.09340623,  0.06515274,  0.05534093,  1.167718  ,  0.29380863,
        -0.23674422,  0.06459577, -0.84877737,  0.00692189,  0.07382089]),
 array([ 0.00115334,  0.00216088,  0.00094117,  0.00106819,  0.00119795,
        -0.00045515, -0.00277232,  0.00048457,  0.00102785,  0.00461446]),
 array([-9.34062337e-02,  6.51527442e-02,  5.53409297e-02,  1.16771800e+00,
         2.93808631e-01, -2.36744221e-01,  6.45957711e-02, -8.48777369e-01,
         6.92188747e-03,  7.38208916e-02,  1.15333974e-03,  2.16087969e-03,
         9.41174952e-04,  1.06819074e-03,  1.19795349e-03, -4.55154490e-04,
        -2.77231580e-03,  4.84572160e-04,  1.02784508e-03,  4.61446446e-03]))

In [4]:
N_groups = 3
groups = np.random.rand( N_groups,10)

# simulate groups vectors
for i in range(len(groups)): # number of groups = number of rows
    # print(len(groups[i,:]))
    for j in range(len(groups[i,:])): # number of obs = number columns
        # print(groups[i,j])
        if groups[i,j] == max(groups[:,j]):
            groups[i,j] = 1
        else:
            groups[i,j]= 0
groups

array([[1., 0., 0., 1., 0., 1., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 1., 0., 0., 1., 1., 1.]])

In [5]:
reg = poly_eknn_modified.poly_regression(X, y,groups, 0.1)

In [6]:
coefs = reg.lasso_optimize(x0)
coefficients = coefs.x

In [7]:
# simualate class labels 
N_classes = 4
labels = np.random.rand(N_classes,10)
x_labels = np.zeros(10)

# simulate groups vector
for i in range(len(labels)): # number of groups = number of rows
    for j in range(len(labels[i,:])): # number of obs = number columns
        if labels[i,j] == max(labels[:,j]):
            labels[i,j] = 1
            x_labels[j] = i
        else:
            labels[i,j]= 0
            # x_labels[j] = i
labels

array([[0., 0., 0., 0., 0., 1., 1., 1., 0., 0.],
       [1., 1., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 1.]])

In [8]:
clf = poly_eknn_modified.single_polynomial_EkNN_R(X,y, coefficients[:n], coefficients[n:], labels, 3)
clf.k_largest_sum_vect

array([2.07606862, 0.        , 0.        , 0.        , 0.        ,
       0.        , 2.72180739, 0.        , 0.        , 2.75971116])

In [9]:
clf.alphas_diag_matrix, clf.betas_diag_matrix

(array([[ 0.58517599,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.87643053,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        , -1.50469204,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.97526495,  0.        ,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.42256522,
          0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         -0.93042258,  0.        ,  0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  1.6924318

In [10]:
clf.class_coefs_matrix()

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 2.72180739, 0.        , 0.        , 0.        ],
       [2.07606862, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 2.75971116]])

In [11]:
(np.sum((y+y**2)**2))**(1/2)

102.4823888366796

In [12]:
clf.distance()

array([ 93.94172818, 102.97580156, 101.33903148,  98.96011465,
       102.23056377, 102.09711763,  99.02797102, 100.28362541,
       100.3269401 ,  93.20271615])

In [13]:
clf.weight()

array([0.92438293, 0.        , 0.16747731, 0.41089244, 0.0762541 ,
       0.08990855, 0.40394925, 0.2754684 , 0.27103636, 1.        ])

In [14]:
clf.k_indices, clf.k_largest_sum_vect

([0, 6, 9],
 array([2.07606862, 0.        , 0.        , 0.        , 0.        ,
        0.        , 2.72180739, 0.        , 0.        , 2.75971116]))

In [15]:
clf.classify()

3

In [16]:
# generate more test samples
Y_t = np.zeros((10, y.shape[0]))
for i in range(10):
    Y_t[i,:] = X @ alphas + 2*np.random.randn(100) 
Y = np.transpose(Y_t)

In [17]:
clf = poly_eknn_modified.polynomial_EkNN_R_classifier(0.1, N_groups, k=3)

In [18]:
X_transposed = np.transpose(X)

In [19]:
clf.fit(X_transposed, x_labels)

<poly_eknn_modified.polynomial_EkNN_R_classifier at 0x104f87130>

In [20]:
preds = clf.predict(Y)

10 <class 'int'>


In [21]:
preds

array([1., 1., 1., 3., 0., 0., 0., 3., 1., 0.])

#### LSVT test set

In [2]:
lsvt_predictors = pd.read_excel('lsvt/LSVT_voice_rehabilitation.xlsx')
lsvt_response = pd.read_excel('lsvt/LSVT_voice_rehabilitation.xlsx',sheet_name=1)
train, test, train_labels, test_labels = train_test_split(lsvt_predictors, lsvt_response, test_size=0.33, random_state=42)

In [3]:
train.reset_index(drop=True, inplace=True)
test.reset_index(drop=True, inplace=True)
train_labels.reset_index(drop=True, inplace=True)
test_labels.reset_index(drop=True, inplace=True)

In [4]:
predictors_lsvt = train.columns
# generate predictor matrix X
# matrix rows = col number
# matrix column = row number 
train_cols, train_rows = train.shape[1], train.shape[0]
X_train = np.zeros((train_cols, train_rows))
for i,col in enumerate(predictors_lsvt):
    for j in range(train_rows):
        X_train[i,j] = train[col][j]

In [5]:
test_cols, test_rows = test.shape[1], test.shape[0]
X_test = np.zeros((test_cols, test_rows))
for i,col in enumerate(predictors_lsvt):
    for j in range(test_rows):
        X_test[i,j] = test[col][j]

In [6]:
# try to generate a more generic way to generate the label matrix
y_train = np.zeros(train.shape[0])
for i in range(len(train_labels["Binary class 1=acceptable, 2=unacceptable"])):
    if train_labels["Binary class 1=acceptable, 2=unacceptable"][i] == 1:
        y_train[i] = 0
    else:
        y_train[i] = 1

In [7]:
X_train_transposed = np.transpose(X_train)

In [9]:
clf = poly_eknn_modified.polynomial_EkNN_R_classifier(0.0001, 2, 3)
clf.fit(X_train_transposed, y_train)

preds = clf.predict(X_test)

84 <class 'int'>
[3.84236000e+66 1.06539167e+67] (2,)


  return (np.sum(residual**2) + self.lambda_ * (np.sum(grouped_alphas ** 2)) + self.lambda_ * (np.sum(grouped_betas ** 2)) )
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  df = fun(x) - f0


[nan nan] (2,)


IndexError: index 0 is out of bounds for axis 0 with size 0