# II -  K-Folds (cross-validation)

## Importar librerias

In [1]:
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt

import tensorflow as tf

In [2]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

from tensorflow.keras.metrics import MSE
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.losses import binary_crossentropy

from sklearn.model_selection import KFold

## Parametros de entrenamiento

In [3]:
lr = 0.001
epochs = 300
batch_size = 16
k = 5

np.random.seed(14)

## Cargar y visualizar el dataset

In [4]:
# Load data
dataset = np.loadtxt('./src/pima-indians-diabetes.csv', delimiter=',')

FileNotFoundError: ./src/pima-indians-diabetes.csv not found.

In [None]:
features = dataset.shape[1]-1

## Crear el modelo

In [None]:
# Model
#---------------------------------------------------------------------#
input_1 = Input(shape=[features]) #data shape, excepting numer of examples
dense_1 = Dense(10, activation='relu')(input_1)
dense_2 = Dense(10, activation='relu')(dense_1)
output_1 = Dense(1, activation='sigmoid')(dense_2)
#---------------------------------------------------------------------#
model = Model(input_1, output_1)
#---------------------------------------------------------------------#

In [None]:
sgd_optimizer = SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=False)
adam_optimizer = Adam(learning_rate=lr)
model.compile(optimizer=adam_optimizer, loss='binary_crossentropy', metrics=['acc', 'mse'])
model.summary()

## Folding & Entrenamiento

In [None]:
start_time = time.time()
acc = np.zeros((k, epochs))
kfolds = KFold(n_splits=k, shuffle=True, random_state=None)
for i, (train_index, test_index) in enumerate(kfolds.split(dataset)):
    x_train, x_test = dataset[train_index, 0:8], dataset[test_index, 0:8]
    y_train, y_test = dataset[train_index, 8], dataset[test_index, 8]
    history_fold = model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, y_test), shuffle=True, verbose=1)
    acc[i,:] = np.array(history_fold.history['val_acc'])
end_time = time.time()
print('\nElapsed Dense Model training time: {:.5f} seconds'.format(end_time-start_time))

## Graficos

In [None]:
acc_mean = np.mean(acc, axis=0)
acc_std = np.std(acc, axis=0)

In [None]:
f = plt.figure(figsize=(10,10))
plt.plot(range(epochs),acc_mean, linewidth=3, label='Mean Accuracy (5-folds)')
plt.fill_between(range(epochs), acc_mean - acc_std,  acc_mean + acc_std, alpha=0.5, edgecolor='#CC4F1B', facecolor='#FF9848', label='STD')
plt.ylabel('Accuracy')
plt.xlabel('Epochs')
plt.axis([0, 300, 0, 1])
plt.legend(loc='best', fontsize=15)
plt.show()