# Train - Alexnet

In [None]:
import gc, datetime
import numpy as np
import pandas as pd
import tensorflow as tf
import cv2
import matplotlib.pyplot as plt
from utilities import load_images, get_multi_hot_labels
from alexnet_model import alexnet_model_fn
tf.logging.set_verbosity(tf.logging.INFO)
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
train_df = pd.read_csv('data/train_without_missing.csv')
validation_df = pd.read_csv('data/validation.csv')

train_path_list = train_df['imagePath']
eval_path_list = validation_df['imagePath']

eval_data = load_images(eval_path_list)
eval_labels = get_multi_hot_labels(validation_df, list(range(validation_df.shape[0])))

In [None]:
eval_data.shape, eval_labels.shape

In [None]:
def main():
    
    train_iter_size = 5000
    num_iters = 20
    batch_size = 1
    steps = 5000
    eval_every_iters = 1
    
    #train_steps = []
    #train_losses = []
    
    eval_steps = []
    eval_losses = []
    eval_precision = []
    eval_recall = []
    eval_meanfscore = []
    
    # Create the Estimator
    multilabel_classifier = tf.estimator.Estimator(
        model_fn=alexnet_model_fn, model_dir="model/multilabel_alexnet_model")

    # Set up logging for predictions
    #tensors_to_log = {"probabilities": "sigmoid_tensor"}
    #tensors_to_log = {"meanfscore": "eval_tensor"}
    tensors_to_log = []
    logging_hook = tf.train.LoggingTensorHook(
        tensors=tensors_to_log, every_n_iter=100)
    
    for k in range(num_iters):
        print('Trained images so far: {}'.format(k * train_iter_size))
        
        # Randomly load training data and labels
        print('Loading train images..')
        random_indices = np.random.randint(0, train_df.shape[0], size=train_iter_size)        
        train_paths = [train_path_list[i] for i in random_indices]
        train_data = load_images(train_paths)
        
        print('Loading train labels..')
        train_labels = get_multi_hot_labels(train_df, random_indices)

        # Train the model
        train_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": train_data},
            y=train_labels,
            batch_size=batch_size,
            num_epochs=None,
            shuffle=True)
        multilabel_classifier.train(
            input_fn=train_input_fn,
            steps=steps,
            hooks=[logging_hook])
        
        if k % eval_every_iters == 0:
            # Evaluate the model and print results
            eval_input_fn = tf.estimator.inputs.numpy_input_fn(
                x={"x": eval_data},
                y=eval_labels,
                shuffle=False)
            eval_results = multilabel_classifier.evaluate(input_fn=eval_input_fn)
            print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
            print(eval_results)
            
            eval_steps.append(eval_results['global_step'])
            eval_losses.append(eval_results['loss'])
            eval_precision.append(eval_results['precision_micro'])
            eval_recall.append(eval_results['recall_micro'])
            eval_meanfscore.append(eval_results['meanfscore'])
        
        # Garbage collection
        train_data = None
        train_labels = None
        gc.collect()
    
    eval_track = {'eval_steps':eval_steps, 
                  'eval_losses':eval_losses, 
                  'eval_precision':eval_precision, 
                  'eval_recall':eval_recall, 
                  'eval_meanfscore':eval_meanfscore}
    
    return eval_track

In [None]:
eval_track = main()

In [None]:
plt.figure(figsize=(12,8))
plt.plot(eval_track['eval_steps'], eval_track['eval_losses'])
plt.xlabel("Step")
plt.ylabel("Validation loss")

In [None]:
plt.figure(figsize=(12,8))
plt.plot(eval_track['eval_steps'], eval_track['eval_meanfscore'])
plt.xlabel("Step")
plt.ylabel("Validation meanfscore")

In [None]:
pd.DataFrame(eval_track).to_csv("eval_track_bs1_ep1.csv",index = False)

In [None]:
# Evaluate
multilabel_classifier = tf.estimator.Estimator(
            model_fn=alexnet_model_fn, model_dir="model\\multilabel_alexnet_model")
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": eval_data},
            y=eval_labels,
            shuffle=False)
eval_results = multilabel_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)

In [None]:
# Get predictions for validation set
multilabel_classifier = tf.estimator.Estimator(
            model_fn=alexnet_model_fn, model_dir="model\\multilabel_alexnet_model")
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": eval_data},
            y=eval_labels,
            num_epochs=1,
            shuffle=False)
predictions = multilabel_classifier.predict(input_fn=eval_input_fn)
y_predicted = np.array(list(p['classes'] for p in predictions))

In [None]:
predicted_labels = [np.where(row == 1) for row in y_predicted]
predictions_df = pd.DataFrame({'predicted_labels': predicted_labels})
predictions_df.style.set_properties(subset=['predicted_labels'], **{'width': '700px'})

In [None]:
true_labels = [np.where(row == 1) for row in eval_labels]
groundtruth_df = pd.DataFrame({'true_labels': true_labels})
groundtruth_df.style.set_properties(subset=['true_labels'], **{'width': '700px'})

In [None]:
y_predicted.shape[0]

In [None]:
submission.style.set_properties(subset=['label_id'], **{'width': '700px'})