In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sys import path
%matplotlib inline

In [2]:
path.insert(0, '../src')
from setup import data_setup
from data_generator import DataGenerator
from model_trainer import ModelTrainer

In [3]:
dg = data_setup()
norm_data = dg.df()

In [4]:
from data_transformation import generate_data
# 10,000 Examples
erred = generate_data(norm_data, 2, 2, True, [0, 0, 0])
for _ in range(10):
    erred = pd.concat([erred, generate_data(norm_data, 2, 2, True, [.3, .6, 1], True)], axis=0)
erred['target'] = erred['target'].apply(lambda a: a - 1 if a > 0 else a)
dg.set_df(erred)
erred = dg.calibrated_df()

In [10]:
from data_transformation import get_isotope_data, get_isotope_mass_list
isotope_data = get_isotope_data()
nom_masses_low = get_isotope_mass_list(isotope_data, False, 2000)
nom_masses_high = get_isotope_mass_list(isotope_data, True, 2000)

In [11]:
def get_spectra(masses, intensities, nom_masses_low, nom_masses_high):
    spectra = [0 for x in range(2000)]
    spectra_intensities = [0 for x in range(2000)]
    for i, mass in enumerate(masses):
        j = round(mass)
        num = mass - nom_masses_low[j]
        spectra_intensities[j] = intensities[i]
        if num < 0:
            spectra[j] = num
        else:
            spectra[j] = mass - nom_masses_high[j]
    return spectra, spectra_intensities

In [13]:
from sklearn.preprocessing import MinMaxScaler
spectra = []
intensities = []
for row in erred.itertuples():
    a, b = get_spectra(row.masses, row.precise_intensities, nom_masses_low, nom_masses_high)
    spectra.append(np.array(a))
    intensities.append(np.array(b))
spectra = np.vstack(spectra)
intensities = np.vstack(intensities)
scl = MinMaxScaler()
scl.fit(intensities)
intensities = scl.transform(intensities)

In [15]:
X = np.dstack([spectra, intensities])
y = erred['target']

In [21]:
from tensorflow.keras.layers import Dense, Flatten, Input, Conv1D, AveragePooling1D
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras import Model, Sequential

In [18]:
def lenet():
    model = Sequential()

    model.add(Conv1D(filters=8, kernel_size=20, activation='relu', input_shape=(2000, 2)))
    model.add(AveragePooling1D())

    model.add(Conv1D(filters=20, kernel_size=80, activation='relu'))
    model.add(AveragePooling1D())

    model.add(Flatten())

    model.add(Dense(units=500, activation='sigmoid'))

    model.add(Dense(units=100, activation='relu'))

    model.add(Dense(units=2, activation = 'softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', 'AUC'])
    return model

In [19]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [23]:
estimator = KerasClassifier(build_fn = lenet, epochs = 100, batch_size = 10, verbose = 0)
estimator.fit(X_train, y_train)
preds = estimator.predict_proba(X_test)

KeyboardInterrupt: 

In [None]:
from sklearn.metrics import roc_auc_score, accuracy_score
roc_auc_score(pd.get_dummies(y_test), preds)
predictions = []
for i in range(len(preds)):
    predictions.append(np.where(preds[i]==max(preds[i]))[0][0])
accuracy_score(y_test, predictions)