In [1]:
# environment set up
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Reshape, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras import metrics
from tensorflow.keras import regularizers
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.callbacks import EarlyStopping
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.metrics import accuracy_score
import os
import random
import pandas as pd
import ast
from scipy import stats as st
import time

In [2]:
spectrum_len = 500 # automate this
parent_dir = os.environ['PWD']
stddata_path = os.path.join(os.environ['DATA_DIR'], "StdData-" + str(spectrum_len))
os.chdir(os.path.join(parent_dir, "lab-notebook", "smunukutla"))

In [3]:
data = pd.read_csv("data.csv", sep=",")
record_nums = data.iloc[0, :].tolist()
spectrum_names = data.iloc[1, :].tolist()
y = data.iloc[2, :].astype(int).tolist()
y = np.reshape(y, (len(y), 1))
num_samples = len(y)

In [4]:
spectra = np.zeros((num_samples,spectrum_len))

In [5]:
for i in range(len(record_nums)):
    data = pd.read_csv(os.path.join(stddata_path,"{}.csv".format(record_nums[i])))
#     if i == 0:
#         wavelengths[i,:] = data.iloc[:, 0].to_numpy()
    spectra[i,:] = data.iloc[:, 1].to_numpy()

In [6]:
spectra.shape

(166, 500)

In [7]:
y_cat = to_categorical(y)

In [8]:
from sklearn.decomposition import FastICA

In [16]:
model = FastICA(n_components=2)

In [30]:
results = model.fit_transform(data)

In [31]:
results

array([[ 9.11357107e-02, -7.18504202e-02],
       [ 9.08594529e-02, -7.20683147e-02],
       [ 9.09266310e-02, -7.42379448e-02],
       [ 9.03752008e-02, -7.28920440e-02],
       [ 8.98076906e-02, -7.14547617e-02],
       [ 8.96508436e-02, -7.23512643e-02],
       [ 8.92580604e-02, -7.19069489e-02],
       [ 8.87461863e-02, -7.07858443e-02],
       [ 8.86645690e-02, -7.21098748e-02],
       [ 8.82914815e-02, -7.17774898e-02],
       [ 8.74900582e-02, -6.90108871e-02],
       [ 8.73265416e-02, -6.98694864e-02],
       [ 8.68624479e-02, -6.90199163e-02],
       [ 8.62766929e-02, -6.74789487e-02],
       [ 8.58067684e-02, -6.65962426e-02],
       [ 8.50378754e-02, -6.40145082e-02],
       [ 8.46867800e-02, -6.38071032e-02],
       [ 8.43286849e-02, -6.35599199e-02],
       [ 8.34180191e-02, -6.01724951e-02],
       [ 8.32568522e-02, -6.10444478e-02],
       [ 8.21909554e-02, -5.67748504e-02],
       [ 8.19091045e-02, -5.69609602e-02],
       [ 8.15737014e-02, -5.68427345e-02],
       [ 8.

In [19]:
def g(x):
    return np.tanh(x)
def g_der(x):
    return 1 - g(x) * g(x)

In [20]:
def center(X):
    X = np.array(X)
    
    mean = X.mean(axis=1, keepdims=True)
    
    return X- mean

In [21]:
def whitening(X):
    cov = np.cov(X)
    d, E = np.linalg.eigh(cov)
    D = np.diag(d)
    D_inv = np.sqrt(np.linalg.inv(D))
    X_whiten = np.dot(E, np.dot(D_inv, np.dot(E.T, X)))
    return X_whiten

In [22]:
def calculate_new_w(w, X):
    w_new = (X * g(np.dot(w.T, X))).mean(axis=1) - g_der(np.dot(w.T, X)).mean() * w
    w_new /= np.sqrt((w_new ** 2).sum())
    return w_new

In [28]:
def ica(X, iterations, tolerance=1e-5):
    X = center(X)
    
    X = whitening(X)
        
    components_nr = X.shape[0]
    
    W = np.zeros((components_nr, components_nr), dtype=X.dtype)

    for i in range(components_nr):
        
        w = np.random.rand(components_nr)
        
        for j in range(iterations):
            
            w_new = calculate_new_w(w, X)
            
            if i >= 1:
                w_new -= np.dot(np.dot(w_new, W[:i].T), W[:i])
            
            distance = np.abs(np.abs((w * w_new).sum()) - 1)
            
            w = w_new
            
            if distance < tolerance:
                break
                
        W[i, :] = w
        
    S = np.dot(W, X)
    
    return S

In [29]:
def plot_mixture_sources_predictions(X, original_sources, S):
    fig = plt.figure()
    plt.subplot(3, 1, 1)
    for x in X:
        plt.plot(x)
    plt.title("mixtures")
    plt.subplot(3, 1, 2)
    for s in original_sources:
        plt.plot(s)
    plt.title("real sources")
    plt.subplot(3,1,3)
    for s in S:
        plt.plot(s)
    plt.title("predicted sources")
    
    fig.tight_layout()
    plt.show()