In [1]:
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from pyimzml.ImzMLParser import ImzMLParser
from tqdm import tqdm
import gc

In [2]:
# Define folder that contains the preprocessed dhg dataset
DHG_IN_PATH = "/sise/assafzar-group/assafzar/Leor/DHG/Preprocessed"
# Define file to export
META_DATA_PATH = "/sise/assafzar-group/assafzar/Leor/DHG/Preprocessed/Metadata.csv"

In [3]:
# 
meta_data = pd.read_csv(META_DATA_PATH)
# 
meta_data = meta_data[meta_data.file_name.str.contains('s')]
# 
meta_data["label"] = (meta_data.who_grade > 2).astype(int)

In [4]:
intensities = np.empty((len(meta_data), 92000))
samples = np.empty((len(meta_data)))
labels = np.empty((len(meta_data)))

In [5]:
#
parsers = { file_name: ImzMLParser(os.path.join(DHG_IN_PATH, f"{file_name}.imzML")) for file_name in meta_data.file_name.unique()}

In [None]:
for idx, row in meta_data.iterrows():
  mzs, intensities[idx] = parsers[row.file_name].getspectrum(row.idx)
  samples[idx] = row.file_name.replace('HG ', "").replace("_", "-").split('-')[0]
  labels[idx] = 0 if row.who_grade <= 2 else 1

In [None]:
import numpy as np
from tensorflow.keras.layers import Lambda, Input, Dense, ReLU, BatchNormalization, Dropout
from tensorflow.keras.constraints import max_norm
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K


class MultiClass_Classifier(object):
    
    def __init__ (self, nSpecFeatures, nClasses,  nHidden):
        self.nClasses = nClasses
        self.nHidden = nHidden
        self.nSpecFeatures = nSpecFeatures
        
    def fc(self):
        input_shape = (self.nSpecFeatures, )
        inputs = Input(shape=input_shape, name='encoder_input')
        den_1 = Dense(self.nHidden,activation='relu', kernel_constraint=max_norm(3))(inputs)
        den_1 = Dropout(0.2)(den_1)
        den_1 = BatchNormalization()(den_1)
        
        den_2 = Dense(self.nHidden,activation='relu', kernel_constraint=max_norm(3))(den_1)
        den_2 = Dropout(0.2)(den_2)
        den_2 = BatchNormalization()(den_2)
        
        out = Dense(self.nClasses, activation='softmax')(den_2)
        fc_model = Model(inputs, out)
        return fc_model

In [None]:
DL_CLassifier = MultiClass_Classifier(92, 2 , 512)
DL_CLassifier.summary()

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

train_rocs = []
train_aucs = []
test_rocs = []
test_aucs = []
for exclude_sample in tqdm(np.unique(samples)):
  clf = LogisticRegression(random_state=0, penalty="l2").fit(intensities[samples != exclude_sample], labels[samples != exclude_sample])
  gc.collect()
  train_score = val = clf.score(intensities[samples != exclude_sample], labels[samples != exclude_sample])
  gc.collect()
  test_score = clf.score(intensities[samples == exclude_sample], labels[samples == exclude_sample])
  train_scores.append(train_score)
  test_scores.append(test_score)
  print(train_score, test_score)
  gc.collect()