# Tensorflow Training Loop

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

# Instantiate an optimizer.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Prepare the training dataset.
batch_size = 64
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = np.reshape(x_train, (-1, 784))
x_test = np.reshape(x_test, (-1, 784))

# Reserve 10,000 samples for validation.
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]

# Prepare the training dataset.
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(batch_size)

# Get model
inputs = keras.Input(shape=(784,), name="digits")
x = layers.Dense(64, activation="relu", name="dense_1")(inputs)

x = layers.Dense(64, activation="relu", name="dense_2")(x)
outputs = layers.Dense(10, name="predictions")(x)
model = keras.Model(inputs=inputs, outputs=outputs)

# Instantiate an optimizer to train the model.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)


# Prepare the metrics.
train_acc_metric = keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = keras.metrics.SparseCategoricalAccuracy()

import time

epochs = 2
for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch,))
    start_time = time.time()

    # Iterate over the batches of the dataset.
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            logits = model(x_batch_train, training=True)
            loss_value = loss_fn(y_batch_train, logits)
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))

        # Update training metric.
        train_acc_metric.update_state(y_batch_train, logits)

        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %d samples" % ((step + 1) * 64))

    # Display metrics at the end of each epoch.
    train_acc = train_acc_metric.result()
    print("Training acc over epoch: %.4f" % (float(train_acc),))

    # Reset training metrics at the end of each epoch
    train_acc_metric.reset_states()

    # Run a validation loop at the end of each epoch.
    for x_batch_val, y_batch_val in val_dataset:
        val_logits = model(x_batch_val, training=False)
        # Update val metrics
        val_acc_metric.update_state(y_batch_val, val_logits)
    val_acc = val_acc_metric.result()
    val_acc_metric.reset_states()
    print("Validation acc: %.4f" % (float(val_acc),))
    print("Time taken: %.2fs" % (time.time() - start_time))

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

# Instantiate an optimizer.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Prepare the training dataset.
batch_size = 64
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = np.reshape(x_train, (-1, 784))
x_test = np.reshape(x_test, (-1, 784))

# Reserve 10,000 samples for validation.
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]

# Prepare the training dataset.
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(batch_size)

# Get model
inputs = keras.Input(shape=(784,), name="digits")
x = layers.Dense(64, activation="relu", name="dense_1")(inputs)
x = layers.Dense(64, activation="relu", name="dense_2")(x)
outputs = layers.Dense(10, name="predictions")(x)
model = keras.Model(inputs=inputs, outputs=outputs)




def train_group_model(model, train_dataset, valid_dataset, epochs = 2):
    import time
    # Instantiate an optimizer to train the model.
    optimizer = keras.optimizers.Adam(learning_rate=1e-3)
    # Instantiate a loss function.
    loss_fn  = keras.losses.BinaryCrossentropy(from_logits=False)
    train_acc_metric = keras.metrics.BinaryAccuracy(
        name='binary_accuracy', dtype=None, threshold=0.5)
    val_acc_metric = keras.metrics.BinaryAccuracy(
        name='binary_accuracy', dtype=None, threshold=0.5)

    for epoch in range(epochs):
        print("\nStart of epoch %d" % (epoch,))
        start_time = time.time()

        # Iterate over the batches of the dataset.
        pbar = tqdm(train_dataset, dynamic_ncols=True, total=len(train_dataset))
        for step, (x_batch_train, y_batch_train) in enumerate(pbar):
            x_batch_train = tf.convert_to_tensor(x_batch_train.numpy(), dtype=tf.float32)
            y_batch_train = tf.convert_to_tensor(y_batch_train.numpy(), dtype=tf.int64)
#             print(x_batch_train)
#             print(y_batch_train)
            with tf.GradientTape() as tape:
                #print(x_batch_train.shape,y_batch_train.shape)
                logits = model(x_batch_train, training=True)
                loss_value = loss_fn(y_batch_train, logits)
            grads = tape.gradient(loss_value, model.trainable_weights)
            optimizer.apply_gradients(zip(grads, model.trainable_weights))

            # Update training metric.
            train_acc_metric.update_state(y_batch_train, logits)

            # Log every 200 batches.
            if step % 200 == 0:
                descrption = f'epoch {step} loss: {loss_value:.4f}'


        # Display metrics at the end of each epoch.
        train_acc = train_acc_metric.result()
        print("Training acc over epoch: %.4f" % (float(train_acc),))

        # Reset training metrics at the end of each epoch
        train_acc_metric.reset_states()

        # Run a validation loop at the end of each epoch.
        for x_batch_val, y_batch_val in valid_dataset:
            x_batch_val = tf.convert_to_tensor(x_batch_val.numpy(), dtype=tf.float32)
            y_batch_val = tf.convert_to_tensor(y_batch_val.numpy(), dtype=tf.int64)
            #print(x_batch_val.shape, y_batch_val.shape)
            val_logits = model(x_batch_val, training=False)
            # Update val metrics
            val_acc_metric.update_state(y_batch_val, val_logits)
        val_acc = val_acc_metric.result()
        val_acc_metric.reset_states()
        print("Validation acc: %.4f" % (float(val_acc),))
        print("Time taken: %.2fs" % (time.time() - start_time))

In [None]:
batch_size = 256
winmin= 6
stridesec = 5
win_size = 15*winmin*60
model_1 = acti_model(input_shape =(win_size,6) )
model_1.compile(loss='binary_crossentropy',
                optimizer='adam', metrics=['accuracy'])
train_loader = torch.utils.data.DataLoader(meal_data_train,batch_size=batch_size, shuffle=True,num_workers=2)
labels = meal_data_train.labels
train_indices, test_indices = split_train_test_indices(X= [i for i in range(len(labels))],
                                                                    y = labels, test_size = 0.2,
                                                                   random_seed = random_seed)

# train_indices = train_indices[:1000]
# train_set_balanced = train_indices[:1000]
# test_indices = test_indices[:1000]
# balance train set
trainset_labels = labels[train_indices]
train_indices_balanced = balance_data_indices(trainset_labels,data_indices= train_indices,mode="under", shuffle=True,random_state = random_seed,replace= False)

train_set_balanced = torch.utils.data.Subset(meal_data_train, train_indices_balanced)
test_set = torch.utils.data.Subset(meal_data_train, test_indices)
            
            
train_loader = torch.utils.data.DataLoader(train_set_balanced,batch_size=batch_size, shuffle=True,num_workers=2)
test_loader = torch.utils.data.DataLoader(test_set ,batch_size=batch_size, shuffle=True,num_workers=2)
            
train_group_model(model_1, train_loader, test_loader, epochs = 2)

In [None]:
person = "wenkanw"
meal_data_train = Person_MealsDataset(person_name= person, file_name = "train_files", winmin = 6,stridesec = 5,get_numpy_data=True)

train_loader = torch.utils.data.DataLoader(meal_data_train,batch_size=16, shuffle=True,num_workers=2)

# Tensorflow Dataset Test

In [None]:

from dataset import *

class Person_MealsDataset2(torch.utils.data.Dataset):
    def __init__(self, dataset = None,person_name= "wenkanw", 
                 data_indices_file = "../data-file-indices/",
                 file_name = "all_files_list",
                 remove_trend = 0,
                 remove_walk = 0,
                 remove_rest = 0,
                 smooth_flag = 1,
                 normalize_flag = 1,
                 winmin = 6,
                 stridesec = 15,
                 gtperc = 0.5,
                 device = 'cpu',
                 ratio_dataset=1,
                load_splitted_dataset = False,
                 enable_time_feat = False,
                 debug_flag= False,
                 tf_data=True,
                 get_numpy_data= True,
                ):
        
        if file_name == "train":
            file_name = data_indices_file + person_name +"/"+"train_files.txt"
        elif file_name == "test":
            file_name = data_indices_file + person_name +"/"+"test_files.txt"
        else:
            file_name = data_indices_file + person_name +"/"+ file_name+".txt"
            
        # Note: file_name is the name of file that contain the list of shm files' names
        self.tf_data = tf_data
        self.get_numpy_data= get_numpy_data
        self.file_name = file_name
        self.dataset = dataset
        self.person_name = person_name
        self.winmin = winmin
        self.stridesec = stridesec
        self.load_splitted_dataset = load_splitted_dataset
        self.remove_trend = remove_trend
        self.remove_walk = remove_walk
        self.remove_rest = remove_rest
        self.smooth_flag = smooth_flag
        self.normalize_flag = normalize_flag
        self.gtperc = gtperc,
        self.ratio_dataset = ratio_dataset
        self.enable_time_feat = enable_time_feat
        self.device = device
        self.debug_flag= debug_flag
        if not self.dataset:
            self.get_data(person_name)

    def get_data(self, person_name):
            
            
            # files_counts, data, samples_indices, labels_array
            # Note: the data preprocessing in this function is for global time series dataset
            
            self.dataset, self.data, self.data_indices, self.labels = load_train_test_data(data_file_list =self.file_name,
                                    load_splitted_dataset = False,
                                     ratio_dataset=self.ratio_dataset,
                                     enabled_time_feat = self.enable_time_feat, 
                                     winmin = self.winmin, stridesec = self.stridesec,gtperc = self.gtperc,
                                     removerest = self.remove_rest,
                                     removewalk = self.remove_walk, smooth_flag = self.smooth_flag, normalize_flag=self.normalize_flag, 
                                     remove_trend = self.remove_trend,
                                     debug_flag=self.debug_flag )
            
            if self.load_splitted_dataset:
                self.dataset = self.get_dataset()
                
            
        
    def __getitem__(self, index):
        # TODO
        # 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).
        # 2. Preprocess the data (e.g. torchvision.Transform).
        # 3. Return a data pair (e.g. image and label).
        #这里需要注意的是，第一步：read one data，是一个data
        data = self.get_item(index)
        if self.tf_data:
            return data['data']
        return data['data'],data['label']
        
        pass
    def __len__(self):
        # You should change 0 to the total size of your dataset.
        return  len(self.dataset) if self.load_splitted_dataset else len(self.data_indices)
    def get_item(self, index, tensor_type=True):
        """
        This function is used to obtain one sample data point
        """
        f,start_time, end_time = self.data_indices[index,0], self.data_indices[index,1], self.data_indices[index,2]
        sample = self.data[f][start_time : end_time]
        data = pd.DataFrame(columns=['data','label'])    
        # Add time feature to data
        if self.enable_time_feat:
            time_offset = self.data_indices[index,3]
            freq = 1.0/15.0
            time_feat = np.array([[i for i in range(len(sample))]],dtype=float).transpose()
            time_feat *= freq
            time_feat += float(start_time)* freq
            time_feat += time_offset
            sample = np.concatenate((sample, time_feat),axis=1)
        label = self.labels[index]
        if not self.get_numpy_data:
            data = {"data":torch.tensor(sample, dtype =torch.float, device =  self.device ), 'label': label}
        else:
            data = {"data":sample, 'label': label}
        return data
    
    def get_dataset(self, start_index = None, end_index = None):
        """
        This function is used to obtain the whole dataset in pandas or part of whole dataset
        It is good to use this to sample some data to analyze
        """
        start_i = 0 if not start_index else start_index
        end_i = self.__len__() if not end_index else end_index
        
        dataset = pd.DataFrame(columns=['data','label'])
        for i in tqdm(range(start_i, end_i)):
            data = self.get_item(i)
            dataset = dataset.append(data,ignore_index=True)
        self.dataset = dataset
        return self.dataset
    
    def sample(self, num = 1000,random_state = None):
        """
        Simply sample part of data for analysis
        """
        if random_state != None:
            np.random.seed(random_state)
            
        sample_data = pd.DataFrame(columns=['data','label'])
        indices = np.random.choice(len(self.labels), num)
        for i in tqdm(indices):
            data = self.get_item(i)
            data["data"] = data["data"].numpy()
            sample_data = sample_data.append(data,ignore_index=True)
        return sample_data
    
    def get_subset(self, indices_ls):
        axdata = []
        aydata = []
        for i in indices_ls:
            data = self.get_item(i, tensor_type=False)
            sample = data['data']
            label = data['label']
            axdata.append(sample)
            aydata.append(label)
        subsetData = np.array(axdata, copy=True) # Undersampled Balanced Training Set
        subsetLabels = np.array(aydata, copy=True)
        del axdata
        del aydata
        return subsetData, subsetLabels
    
    def get_mealdataset_info(self,person_name = None,file_ls = [], root_path = "../data/",print_file=False):
        """
        if file_ls is not given, then get file_ls according to person_name
        file path = root_path + file name in all_files_list.txt

        return:
            meal event count, total minutes of all meals, total hours of all meals,total day counts

        """
        if person_name ==None:
            person_name = self.person_name
        if len(file_ls) ==0:
            data_indices_file = "../data-file-indices/" +person_name+"/all_files_list.txt"
            fp = open(data_indices_file,"r")
            txt = fp.read()
            fp.close()
            file_ls = txt.split("\n")
            while '' in file_ls:
                file_ls.remove('')

        meal_counts = 0
        sec_counts = 0
        min_counts = 0
        hour_counts = 0
        total_hours = 0
        total_mins = 0
        total_sec = 0
        day_counts = len(file_ls)
        for file_name in file_ls:
            file_name = root_path + file_name
            TotalEvents, EventStart, EventEnd, EventNames, TimeOffset,EndTime = loadEvents(file_name, debug_flag = False, print_file=print_file)
            meal_counts += TotalEvents
            total_sec +=  abs(EndTime - TimeOffset)
#             total_hours += (EndTime//(60*60) - TimeOffset//(60*60))
#             total_mins  += (EndTime%(60*60) - TimeOffset//(60*60))
            for i in range(len(EventStart)):
                sec_counts += ( EventEnd[i]- EventStart[i])//(15)
        total_hours = total_sec//(60*60)
        min_counts = sec_counts//60
        hour_counts = min_counts//60
        
        return meal_counts, min_counts,hour_counts, day_counts, total_hours



class tf_dataset:
    def __init__(self,data,shape=[5400,6],batch=16):
        self.data = data    
        self.dataset = tf.data.Dataset.from_generator( self.gen,(tf.float32, tf.int32),(tf.TensorShape(shape), tf.TensorShape([])))
        #self.dataset = self.dataset.batch(batch)
    def __len__(self):
        return len(self.data)
    def gen( self):
        data = self.data
        for i in range(len(data)):
            yield data[i][0], data[i][1]
            
ds = tf_dataset(meal_data_train)

class TF_DataSet:
    def __init__(self,data, indices=None,batch=16):
        self.tf_data = data
        self.indices = indices
        if indices != None:
            self.dataset = tf.data.Dataset.from_tensor_slices( (self.tf_data.data_indices[self.indices], self.tf_data.labels[self.indices]) )
        else:
            self.dataset = tf.data.Dataset.from_tensor_slices( (self.tf_data.data_indices, self.tf_data.labels) )
#         self.dataset = tf.data.Dataset.from_tensor_slices( (self.tf_data,self.tf_data.labels ))
            
        self.dataset = self.dataset.map(lambda x,y: tf.py_function(func=self.map_fun, inp=[x,y], Tout=[tf.float32,tf.int64]))
        self.dataset = self.dataset.batch(batch)
        pass
    def __len__(self):
        return len(self.indices)
    def map_fun(self,x,y ):
        
        return self.tf_data.data[x[0]][x[1]:x[2]],y



# Customized Tensorflow Model with Tensorflow Dataset

In [None]:

# loss_tracker = keras.metrics.Mean(name="loss")
# mae_metric = keras.metrics.MeanAbsoluteError(name="mae")
# optimizer = keras.optimizers.Adam(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn  = keras.losses.BinaryCrossentropy(from_logits=False)
loss_tracker = keras.metrics.BinaryCrossentropy(name="loss")
mae_metric = keras.metrics.BinaryAccuracy(
        name='binary_accuracy', dtype=None, threshold=0.5)

# val_acc_metric = keras.metrics.BinaryAccuracy(
#         name='binary_accuracy', dtype=None, threshold=0.5)


class CustomModel(keras.Model):
    def train_step(self, data):
        print(data[0],data[1])
        x, y = data

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute our own loss
            loss = loss_fn(y, y_pred)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Compute our own metrics
#         loss_tracker.update_state(loss)
        loss_tracker.update_state(y, y_pred)
        mae_metric.update_state(y, y_pred)
        return {"loss": loss_tracker.result(), "mae": mae_metric.result()}

#     @property
#     def metrics(self):
#         # We list our `Metric` objects here so that `reset_states()` can be
#         # called automatically at the start of each epoch
#         # or at the start of `evaluate()`.
#         # If you don't implement this property, you have to call
#         # `reset_states()` yourself at the time of your choosing.
#         return [loss_tracker, mae_metric]


# Construct an instance of CustomModel
inputs = keras.Input(shape=(32,))
outputs = keras.layers.Dense(1)(inputs)

winmin= 6
stridesec = 5
win_size = 15*winmin*60
model_1 = acti_model(input_shape =(win_size,6))

model = CustomModel(model_1.input, model_1.output)

# We don't passs a loss or metrics here.
model.compile(optimizer="adam", metrics=['accuracy'])

# Just use `fit` as usual -- you can use callbacks, etc.

mcp_save = tf.keras.callbacks.ModelCheckpoint("test.h5", save_best_only=True, monitor='accuracy')
model.fit(ds2.dataset, validation_data=None ,epochs=5)

# Check dataset length of tensorflow dataset

In [None]:
ds2 = TF_DataSet(meal_data_train,batch=32)
ds2.dataset.cardinality()

In [None]:
help(ds2.dataset)

In [None]:
winmin= 6
stridesec = 5
win_size = 15*winmin*60
model_1 = acti_model(input_shape =(win_size,6))
model_1.compile(loss='binary_crossentropy',
                optimizer='adam', metrics=['accuracy'])

H = model_1.fit(x=ds2.dataset.as_numpy_iterator(), y=None,
                       validation_data=None,
                    epochs = 2,  verbose=1,
                    callbacks=[])

# function to train group model (Still Need to modify)

In [None]:

from dataset import create_train_test_file_list,  balance_data_indices  #Person_MealsDataset,
from utils import *
from model import *
def train_group_models(model, win_ls = [],EPOCHS = 10,stridesec = 1,name = "wenkanw",model_name="v2" ,random_seed= 1000, split_day=False,test_balanced=False):
    from numpy.random import seed
    seed(random_seed)
    random.seed(random_seed)
#     tf.set_random_seed(random_seed)
    from datetime  import datetime
    batch_size = 128
    outfile = sys.stdout
    perf = {"model":[],"win(sec)":[], "wacc":[],"f1":[],"recall":[],"acc":[]}
    model_ls = []
    hist_ls = []
    for winsize in win_ls:
        tf.random.set_seed(random_seed)
        seed(random_seed)
        
        winmin = winsize
        winlength = int(winmin * 60 * 15)
        step = int(stridesec * 15)
        start_time = datetime.now()
        arr = ["echo -n 'PBS: node is '; cat $PBS_NODEFILE",\
              "echo PBS: job identifier is $PBS_JOBID",\
              "echo PBS: job name is $PBS_JOBNAME"]
        [os.system(cmd) for cmd in arr]
        print("*****************************************************************\n", file=outfile, flush=True)
        print("Execution Started at " + start_time.strftime("%m/%d/%Y, %H:%M:%S"), file=outfile, flush=True)
        print("WindowLength: {:.2f} min ({:d} datum)\tSlide: {:d} ({:d} datum)\tEpochs:{:d}\n".format(winmin, winlength, stridesec, step, EPOCHS), file=outfile, flush=True)


        pathtemp = "../models/" + name+"_models" +"/"+model_name+"_M_F_"
        #pathtemp = "../models/" + name +"/"+model_name+"_M_F_"
        modelpath = pathtemp + "{:f}Min.h5".format(winmin)
        jsonpath = pathtemp + "{:f}Min.json".format(winmin)
        print("Model to Save: ",modelpath)
        print()
        
        ########### Load the dataset################
        person = name
        if split_day:
            pathtemp = "../models/" + name+"_models" +"/"+model_name+"_split_day_M_F_"
            #pathtemp = "../models/" + name +"/"+model_name+"_M_F_"
            modelpath = pathtemp + "{:f}Min.h5".format(winmin)
            jsonpath = pathtemp + "{:f}Min.json".format(winmin)
            create_train_test_file_list(file_name= "all_files_list.txt",person_name =name,
                         out_path = "../data-file-indices/",root_path= "../",
                         test_ratio = 0.2, print_flag = True, shuffle=True, random_state=random_seed)

            meal_data_train = Person_MealsDataset(person_name= person, file_name = "train_files", winmin = winmin,stridesec = stridesec)
            meal_data_test = Person_MealsDataset(person_name= person, file_name = "test_files", winmin = winmin,stridesec = stridesec)

            train_indices, valid_indices = split_train_test_indices(X= [i for i in range(len(meal_data_train.labels))],
                                                                    y = meal_data_train.labels, test_size = 0.2,
                                                                   random_seed = random_seed)
            #balanced train set
            trainset_labels = meal_data_train.labels[train_indices]
            train_indices = balance_data_indices(trainset_labels,data_indices= train_indices,mode="under", shuffle=True,random_state = random_seed,replace= False)

            # balance test set
            testset_labels = meal_data_test.labels
            if test_balanced:
                test_indices = balance_data_indices(testset_labels,data_indices=[i for i in range(len(meal_data_test))] ,mode="under", shuffle=True,random_state = random_seed,replace= False)
            else:
                # without balancing data
                test_indices = [i for i in range(len(meal_data_test))] 
                
            # get numpy dataset
            balancedData, balancedLabels = meal_data_train.get_subset(train_indices)
            valid_balancedData, valid_balancedLabels = meal_data_train.get_subset(valid_indices)
            test_Data, test_Labels = meal_data_test.get_subset(test_indices)

        else:
        
            meal_data = Person_MealsDataset(person_name= person, file_name = "all_files_list", winmin = winmin,stridesec = stridesec)
            samples,labels =  meal_data.data_indices, meal_data.labels
            # split train set and test set
            train_indices, test_indices = split_train_test_indices(X= [i for i in range(len(labels))],
                                                                    y = labels, test_size = 0.2,
                                                                   random_seed = random_seed)
            # balance train set
            trainset_labels = labels[train_indices]
            train_indices_balanced = balance_data_indices(trainset_labels,data_indices= train_indices,mode="under", shuffle=True,random_state = random_seed,replace= False)
            
            
            testset_labels = labels[test_indices]
            if test_balanced:
                #balance test set
                test_indices = balance_data_indices(testset_labels,data_indices= test_indices,mode="under", shuffle=True,random_state = random_seed,replace= False)
            else:
                test_indices = test_indices 
            
            
            
            print("Data Loader Created")            
            
            # split validation set
            balanced_trainset_labels = labels[train_indices_balanced]
            train_indices_balanced, valid_indices = split_train_test_indices(X= train_indices_balanced,
                                                                    y = balanced_trainset_labels, test_size = 0.2,
                                                                   random_seed = random_seed)
            
            train_set_balanced = torch.utils.data.Subset(meal_data, train_indices_balanced)
            test_set = torch.utils.data.Subset(meal_data, test_indices)
            valid_set_balanced = torch.utils.data.Subset(meal_data, valid_indices)
            
            train_loader = torch.utils.data.DataLoader(train_set_balanced,batch_size=batch_size, shuffle=True,num_workers=2)
            test_loader = torch.utils.data.DataLoader(test_set ,batch_size=batch_size, shuffle=True,num_workers=2)
            valid_loader = torch.utils.data.DataLoader(valid_set_balanced,batch_size=batch_size, shuffle=True,num_workers=2)

            # Get numpy dataset: balanced trainset, validation set, test set
            #balancedData, balancedLabels = meal_data.get_subset(train_indices)
            #valid_balancedData, valid_balancedLabels = meal_data.get_subset(valid_indices)
            #test_Data, test_Labels = meal_data.get_subset(test_indices)
            
        

        #training settings
        mcp_save = tf.keras.callbacks.ModelCheckpoint(modelpath, save_best_only=True, monitor='accuracy')
        

        scheduler = tf.keras.callbacks.ReduceLROnPlateau( monitor='val_loss', factor=0.1, patience=3, verbose=0,
                                             mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.)
        
        ##########train model ###############
        H = model.fit(x=balancedData, y = balancedLabels,
                       validation_data=(valid_balancedData, valid_balancedLabels),
                    epochs = EPOCHS, batch_size=batch_size, verbose=1,
                    callbacks=[mcp_save,scheduler]) # removed addons.LossHistory(jsonpath) for compatibility with TensorFlow 2.2.0, needs to be re-added at some point

        print("Max value: ", max(H.history['accuracy']), " at epoch", np.argmax(H.history['accuracy']) + 1)

        
        # Testing 
        from sklearn.metrics import accuracy_score, recall_score, roc_auc_score, balanced_accuracy_score, f1_score
        predictions = model.predict(x=test_Data)
        threshold = 0.5
        wacc =  balanced_accuracy_score(test_Labels,predictions>=threshold)
        f1 =  f1_score(test_Labels,predictions>=threshold)
        acc =  accuracy_score(test_Labels,predictions>=threshold)
        recall = recall_score(test_Labels,predictions>=threshold)
        
        #auc = roc_auc_score(test_Labels,predictions>=threshold)
        print("Weighted Accuracy:", wacc)
        print("Test Accuracy:", acc)
        print("F1-score:", f1)
        print("Recall Accuracy:", recall)
        #print("AUC Score:", auc)

        perf["model"].append("ActiModel")
        perf["win(sec)"].append(winmin*60)
        perf["wacc"].append(wacc)
        perf["f1"].append(f1)
        perf["acc"].append(acc)
        perf["recall"].append(recall)
        #perf["auc"].append(auc)
        model_ls.append(model)
        hist_ls.append(H)
    perf_df = pd.DataFrame(perf)
    print(perf_df)
    return perf_df, model_ls, hist_ls

