In [10]:
import numpy as np

import scipy
print(scipy.version.version)
from scipy.optimize import nnls
from scipy.optimize import lsq_linear
from scipy.sparse import csr_matrix

from utils.constants import TRAIN_DATA_PATH, TRAIN_LABELS_PATH
from utils.data_utils import NaiveDataset

0.19.0


In [35]:
train_dataset = NaiveDataset(TRAIN_DATA_PATH, TRAIN_LABELS_PATH)

np.set_printoptions(threshold=np.inf)
M_cpu = train_dataset.labels_tensor.cpu().numpy()
w_vec, res = np.linalg.lstsq(M_cpu.T,np.ones((17,)))[0:2]
np.sum(M_cpu,axis=0)

40479


array([12315,   339,   862,   332,    98, 28431,  2089,   100,  4477,
        3660,  2697,  7261, 37513,  8071,   340,   209,  7411])

In [3]:
frac = 1/w_vec.shape[0]
print("uniform random probability: ",frac)
print("\n")
print("max: ",max(w_vec))
print("max_abs: ",max(abs(w_vec)))
print("min: ",min(w_vec))
print("min_abs: ",min(abs(w_vec)))
print("max/min: ",max(w_vec)/min(w_vec))
print("sum of all weights: ",np.sum(w_vec))
print("\n")

w_vec_mean = np.mean(w_vec)
w_vec_stdev = np.std(w_vec)

print("mean: ",np.mean(w_vec))
print("stdev: ",np.std(w_vec))

uniform random probability:  2.470416759307295e-05


max:  0.0151350798988
max_abs:  0.0151350798988
min:  -0.000422307913681
min_abs:  4.30991283322e-06
max/min:  -35.8389682231
sum of all weights:  3.99992225721


mean:  9.88147498014e-05
stdev:  0.000982349294861


In [4]:
w_vec_thresh = np.zeros_like(w_vec)
w_vec_thresh[w_vec<=0] = 1
w_vec_thresh[w_vec>0] = 0
print("% of zeroed/negative training examples: ",np.sum(w_vec_thresh)*frac)

% of zeroed/negative training examples:  0.766520912078


In [34]:
np.dot(M_cpu.T,np.ones(w_vec.shape))/w_vec.shape[0]*100

array([ 30.42318239,   0.83747128,   2.12949925,   0.82017836,
         0.24210084,  70.23641888,   5.16070061,   0.24704168,
        11.06005583,   9.04172534,   6.662714  ,  17.93769609,
        92.67274389,  19.93873366,   0.8399417 ,   0.5163171 ,  18.3082586 ])

In [36]:
w_nnls, res = nnls(M_cpu.T,np.ones((17,)))[0:2]
w_vec_thresh = np.zeros_like(w_nnls)
w_vec_thresh[w_nnls<=0] = 1
w_vec_thresh[w_nnls>0] = 0
print("% of zeroed/negative training examples: ",np.sum(w_vec_thresh)*frac)
np.dot(M_cpu.T,w_nnls)/w_vec.shape[0]*100

% of zeroed/negative training examples:  0.999629437486


array([ 0.00249545,  0.00168172,  0.00247042,  0.0016692 ,  0.00159409,
        0.00328415,  0.00247042,  0.00169424,  0.00240782,  0.0024579 ,
        0.00247042,  0.00324659,  0.00337179,  0.0024579 ,  0.00163164,
        0.00163164,  0.00248294])

In [38]:
M_sparse_T = csr_matrix(M_cpu.T)
res_tuple = lsq_linear(M_sparse_T,np.ones((17,)),bounds=(frac/4,np.inf),verbose=1)

The first-order optimality measure is less than `tol`.
Number of iterations 34, initial cost 1.2921e+07, final cost 6.7095e-01, first-order optimality 1.74e-11.


In [39]:
w_lsq_lin = res_tuple['x']/np.sum(res_tuple['x'])*100
np.dot(M_cpu.T,w_lsq_lin)

array([ 21.84286194,  13.93340993,  21.2627741 ,  13.643366  ,
        11.90310248,  29.17222612,  21.2627741 ,  14.22345385,
        19.8125545 ,  20.97273018,  21.2627741 ,  28.30209436,
        31.20253357,  20.97273018,  12.77323424,  12.77323424,  21.55281802])