In [42]:
# environment set up
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import pandas as pd
import time
import ast
from scipy import stats as st

# working folder
directory = os.environ['DATA_DIR']

In [43]:
spectrum_len = 500 # automate this
parent_dir = os.environ['PWD']
stddata_path = os.path.join(os.environ['DATA_DIR'], "StdData-" + str(spectrum_len))
os.chdir(os.path.join(parent_dir, "lab-notebook", "smunukutla"))

In [44]:
data = pd.read_csv("data.csv", sep=",")
record_nums = data.iloc[0, :].tolist()
spectrum_names = data.iloc[1, :].tolist()
y = data.iloc[2, :].astype(int).tolist()
y = np.reshape(y, (len(y), 1))
num_samples = len(y)

In [45]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,156,157,158,159,160,161,162,163,164,165
0,00108,00114,00120,00124,00130,00142,00148,00154,00158,00177,...,10896,10917,10938,10946,10967,10988,11009,11030,11051,11072
1,Actinolite,Actinolite,Actinolite,Actinolite,Actinolite,Actinolite,Actinolite,Actinolite,Actinolite,Actinolite,...,Topaz,Topaz,Topaz,Topaz,Topaz,Topaz,Topaz,Topaz,Topaz,Topaz
2,4,4,4,4,4,4,4,4,4,4,...,0,0,0,0,0,0,0,0,0,0


In [46]:
spectra = np.zeros((num_samples,spectrum_len))

In [47]:
for i in range(len(record_nums)):
    data = pd.read_csv(os.path.join(stddata_path,"{}.csv".format(record_nums[i])))
#     if i == 0:
#         wavelengths[i,:] = data.iloc[:, 0].to_numpy()
    spectra[i,:] = data.iloc[:, 1].to_numpy()

In [48]:
y_cat = to_categorical(y)

In [8]:
data.head(5)

Unnamed: 0,wavelength,reflectance
0,0.37,0.878464
1,0.374269,0.878143
2,0.378537,0.875394
3,0.382806,0.877019
4,0.387074,0.878757


In [9]:
spectra.shape

(166, 500)

In [10]:
spectra

array([[0.36625177, 0.37408954, 0.38041823, ..., 0.69747148, 0.69613203,
        0.69590056],
       [0.22933248, 0.23542408, 0.24072858, ..., 0.57995432, 0.57845447,
        0.58057582],
       [0.10552621, 0.10864627, 0.11163579, ..., 0.3127732 , 0.31293405,
        0.31438494],
       ...,
       [0.84827669, 0.8483359 , 0.84993451, ..., 0.73254605, 0.72484708,
        0.71382553],
       [0.88008486, 0.88482508, 0.88539167, ..., 0.75476558, 0.74785095,
        0.7386977 ],
       [0.87846376, 0.87814294, 0.87539409, ..., 0.7541015 , 0.74652251,
        0.73594507]])

In [11]:
y_cat = to_categorical(y)

In [12]:
from sklearn.decomposition import DictionaryLearning

In [13]:
model = DictionaryLearning(n_components=10, alpha=1, verbose=True)

In [14]:
results = model.fit_transform(spectra)

[dict_learning] .+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+


In [15]:
results.shape

(166, 10)

In [16]:
print(results)

[[-1.17730494e+01  2.41314393e-01 -2.61387673e+00 ... -1.64361723e+00
   4.00841548e-01  1.09387130e-02]
 [-7.28348954e+00  1.65441604e-01 -6.45127935e+00 ... -2.00583857e+00
   4.78828995e-01  1.20838851e-02]
 [ 6.77321659e-02  3.63662884e-01 -8.01376501e+00 ... -6.30032435e-01
  -2.42117114e-02  2.75775763e-04]
 ...
 [-1.29352545e+01 -7.56007780e-01 -1.04838684e+00 ...  1.01948293e+01
  -2.04866607e-01  7.01928640e-04]
 [-1.35164121e+01 -1.03617048e+00 -1.11039729e+00 ...  9.80369327e+00
  -7.75956522e-01 -1.89723032e-03]
 [-1.32554089e+01 -5.79421119e-01  2.26668644e-01 ...  9.79203188e+00
   1.22132417e-01  1.25189951e-03]]


In [17]:
model2 = DictionaryLearning(n_components=10, alpha=1, transform_algorithm='threshold', verbose=True)

In [18]:
results2 = model2.fit_transform(spectra)

[dict_learning] .+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+.+


In [19]:
results2.shape

(166, 10)

In [20]:
print(results2)

[[-13.96702235  11.76187847 -13.5262892  ...  13.69659925  12.32970097
   -0.        ]
 [-12.00684005   9.64230327 -12.25027701 ...  11.72398555  11.13384119
   -0.        ]
 [ -6.9408455    5.19291353  -7.63619502 ...   6.77768826   6.81191958
   -0.        ]
 ...
 [-19.67534109  17.7134846  -18.04453112 ...  19.60979497  16.09787098
   -0.        ]
 [-19.71066981  17.72580689 -18.03078169 ...  19.63619482  16.14339974
   -0.        ]
 [-20.3672688   18.42861286 -18.60715792 ...  20.31026363  16.59028884
   -0.        ]]


In [21]:
model.get_params()

{'alpha': 1,
 'code_init': None,
 'dict_init': None,
 'fit_algorithm': 'lars',
 'max_iter': 1000,
 'n_components': 10,
 'n_jobs': None,
 'positive_code': False,
 'positive_dict': False,
 'random_state': None,
 'split_sign': False,
 'tol': 1e-08,
 'transform_algorithm': 'omp',
 'transform_alpha': None,
 'transform_n_nonzero_coefs': None,
 'verbose': True}

In [22]:
print(model.components_)

[[-0.03576344 -0.03586424 -0.03608334 ... -0.04367278 -0.04356272
  -0.04343269]
 [ 0.02333185  0.02426826  0.02531973 ...  0.0048098   0.0045162
   0.00414729]
 [-0.01466709 -0.01506379 -0.01531206 ... -0.03734992 -0.03729115
  -0.03741644]
 ...
 [ 0.04298044  0.0432337   0.04325401 ...  0.02180551  0.02105525
   0.02026596]
 [ 0.00781883  0.00835778  0.00902266 ...  0.06802284  0.06813333
   0.06787213]
 [ 0.02988469 -0.02091497  0.03176279 ... -0.03926168  0.01859507
  -0.02305731]]


In [23]:
model.components_.shape

(10, 500)

In [24]:
print(model2.components_)

[[-0.03576344 -0.03586424 -0.03608334 ... -0.04367278 -0.04356272
  -0.04343269]
 [ 0.02333185  0.02426826  0.02531973 ...  0.0048098   0.0045162
   0.00414729]
 [-0.01466709 -0.01506379 -0.01531206 ... -0.03734992 -0.03729115
  -0.03741644]
 ...
 [ 0.04298044  0.0432337   0.04325401 ...  0.02180551  0.02105525
   0.02026596]
 [ 0.00781883  0.00835778  0.00902266 ...  0.06802284  0.06813333
   0.06787213]
 [-0.02538974  0.0450955  -0.01830563 ...  0.01424499 -0.03532233
   0.01567114]]


In [25]:
model2.components_.shape

(10, 500)

In [None]:
approximate with the training data
run transform on the training data to find the reconstructed spectra

166 x 10 X 10 x 500

approximation of an integral

thresholding is bad to get coefficients

In [26]:
results.dot(model.components_)

array([[0.40556046, 0.40736726, 0.41168076, ..., 0.69948446, 0.69958278,
        0.69910112],
       [0.27136858, 0.27375936, 0.27781342, ..., 0.60366502, 0.60449648,
        0.60535569],
       [0.11434744, 0.11813399, 0.12078916, ..., 0.32308395, 0.32272749,
        0.32419358],
       ...,
       [0.8312515 , 0.83263652, 0.83301124, ..., 0.72757095, 0.71798228,
        0.70785572],
       [0.85161972, 0.85325663, 0.85364088, ..., 0.74419782, 0.73483245,
        0.72529436],
       [0.84620295, 0.84840559, 0.8500328 , ..., 0.75217336, 0.74336572,
        0.73374242]])

In [27]:
dist = np.linalg.norm(results.dot(model.components_) - spectra)

In [28]:
# max(results.dot(model.components_) - spectra, key=max)

In [29]:
dist

3.757477255247361

In [30]:
dist2 = np.linalg.norm(results2.dot(model2.components_) - spectra)

In [31]:
dist2

1193.5553252799423

In [39]:
model.transform(spectra)

array([[-1.17730494e+01,  2.41314393e-01, -2.61387673e+00, ...,
        -1.64361723e+00,  4.00841548e-01,  1.09387130e-02],
       [-7.28348954e+00,  1.65441604e-01, -6.45127935e+00, ...,
        -2.00583857e+00,  4.78828995e-01,  1.20838851e-02],
       [ 6.77321659e-02,  3.63662884e-01, -8.01376501e+00, ...,
        -6.30032435e-01, -2.42117114e-02,  2.75775763e-04],
       ...,
       [-1.29352545e+01, -7.56007780e-01, -1.04838684e+00, ...,
         1.01948293e+01, -2.04866607e-01,  7.01928640e-04],
       [-1.35164121e+01, -1.03617048e+00, -1.11039729e+00, ...,
         9.80369327e+00, -7.75956522e-01, -1.89723032e-03],
       [-1.32554089e+01, -5.79421119e-01,  2.26668644e-01, ...,
         9.79203188e+00,  1.22132417e-01,  1.25189951e-03]])

In [40]:
results

array([[-1.17730494e+01,  2.41314393e-01, -2.61387673e+00, ...,
        -1.64361723e+00,  4.00841548e-01,  1.09387130e-02],
       [-7.28348954e+00,  1.65441604e-01, -6.45127935e+00, ...,
        -2.00583857e+00,  4.78828995e-01,  1.20838851e-02],
       [ 6.77321659e-02,  3.63662884e-01, -8.01376501e+00, ...,
        -6.30032435e-01, -2.42117114e-02,  2.75775763e-04],
       ...,
       [-1.29352545e+01, -7.56007780e-01, -1.04838684e+00, ...,
         1.01948293e+01, -2.04866607e-01,  7.01928640e-04],
       [-1.35164121e+01, -1.03617048e+00, -1.11039729e+00, ...,
         9.80369327e+00, -7.75956522e-01, -1.89723032e-03],
       [-1.32554089e+01, -5.79421119e-01,  2.26668644e-01, ...,
         9.79203188e+00,  1.22132417e-01,  1.25189951e-03]])

In [49]:
np.linalg.norm(model.transform(spectra) - results)

0.0