In [None]:
import numpy as np
import tensorflow
from sklearn.model_selection import KFold
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
from tensorflow.keras.optimizers import Adam,Nadam, SGD
from keras.regularizers import l2

In [None]:
bag_of_words = []
test_data = []
with open('/content/drive/MyDrive/Data/train-data.dat') as data:
    for line in data:
        bag = [0 for _ in range(8520)]
        inline = line.strip().split()
        for index, value in enumerate(inline):
            if value[0] != '<':
                bag[int(value)] += 1
        bag_of_words.append(bag)


with open('/content/drive/MyDrive/Data/test-data.dat') as data:
    for line in data:
        bag = [0 for _ in range(8520)]
        inline = line.strip().split()
        for index, value in enumerate(inline):
            if value[0] != '<':
                bag[int(value)] += 1
        test_data.append(bag)



In [None]:
# parsing labels.txt into a list
with open('/content/drive/MyDrive/Data/labels.txt') as data:
    genre = []
    for line in data:
        inline = line.strip().split(',')
        genre.append(inline[0])

# parsing train-label.dat into a list
with open("/content/drive/MyDrive/Data/train-label.dat") as data:
    labels = []
    for line in data:
        labels.append([int(i) for i in line.strip().split()])

with open("/content/drive/MyDrive/Data/test-label.dat") as data:
    test_labels = []
    for line in data:
        test_labels.append([int(i) for i in line.strip().split()])

In [None]:
def normalization(list_of_lists):
    normalized = []
    for lista in list_of_lists:
        minimum_number = min(lista)
        maximum_number = max(lista)
        normalized_bag = [(i - minimum_number) / (maximum_number - minimum_number) for i in lista]
        normalized.append(normalized_bag)
    return normalized

normalized_data = normalization(bag_of_words)
normalized_test_data = normalization(test_data)

In [None]:
X = np.array(normalized_data)
Y = np.array(labels)
xx_test = np.array(normalized_test_data)
yy_test = np.array(test_labels)

In [None]:
def get_model(n_inputs, n_outputs):
    model = Sequential()

    model.add(Dense((n_outputs+n_inputs)/2, input_dim = n_inputs, kernel_regularizer=l2(0.9), kernel_initializer='he_uniform', activation='sigmoid'))
    model.add(Dense((n_outputs), kernel_initializer='he_uniform', activation='sigmoid'))

    model.add(Dense(n_outputs, activation='linear'))

    
    sgd = SGD(learning_rate = 0.001,momentum=0.6)
  
    model.compile(loss='mse', optimizer= sgd , metrics = ['binary_accuracy','binary_crossentropy','mse'])
    return model


def evaluate_model(X, y):
    results = []
    n_inputs, n_outputs = X.shape[1], y.shape[1]
    
    best_acc = 0

    # k-Fold
    cv = KFold(n_splits = 5)
    for i, (train_ix, test_ix) in enumerate(cv.split(X)):
        
        X_train, X_test = X[train_ix], X[test_ix]
        y_train, y_test = y[train_ix], y[test_ix]
        
        model = get_model(n_inputs, n_outputs)

        #early stopping
        early_stopping = tensorflow.keras.callbacks.EarlyStopping(
        monitor="loss",restore_best_weights=True,mode='min',patience = 10,min_delta =0.01,verbose=1)

        
        history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs= 100,batch_size=150,verbose=0,callbacks = [early_stopping])


        plt.plot(history.history['loss'], label='train')
        plt.plot(history.history['val_loss'], label='test')
        plt.legend()
        plt.grid()
        plt.show()

        loss, acc, bce, mse = model.evaluate(X_test, y_test)
        results.append(acc)


        #keep the best performing network
        if acc > best_acc:
          indx = i+1
          best_model = model
          best_history = history
          best_acc = acc

    #plot loss of the training data
    plt.plot(best_history.history['loss'], label='train')
    plt.plot(best_history.history['val_loss'], label='test')
    plt.legend()
    plt.grid()
    plt.show()


    #best performing network on test data
    test_model = best_model.evaluate(xx_test,yy_test)
    print("--------------------------TEST--------------------------")
    print(f'Binary Accuracy: {test_model[1]:.4f} | Binary CE: {test_model[2]:.4f} | MSE: {test_model[3]:.4f}')



# evaluate model
evaluate_model(X, Y)
