In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
from IPython.display import clear_output
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import minmax_scale
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import make_classification
import numpy as np
import seaborn as sns
import matplotlib as mpl
import pandas as pd
mpl.rcParams['figure.dpi']=100

In [2]:
def build_classification_model(
    num_layers=1,
    architecture=[32],
    act_func="relu",
    input_shape=(28, 28),
    dropout = 0.1,
    output_bias = None,
    output_class=10):
    """
  Builds a densely connected neural network model from user input
  
  Arguments
          num_layers: Number of hidden layers
          architecture: Architecture of the hidden layers (densely connected)
          act_func: Activation function. Could be 'relu', 'sigmoid', or 'tanh'.
          input_shape: Dimension of the input vector
          output_class: Number of classes in the output vector
  Returns
          A neural net (Keras) model for classification
    """
    layers = [tf.keras.layers.Flatten(input_shape=input_shape)]
    if act_func == "relu":
        activation = tf.nn.relu
    elif act_func == "sigmoid":
        activation = tf.nn.sigmoid
    elif act_func == "tanh":
        activation = tf.nn.tanh

    if output_bias is not None:
        output_bias = tf.keras.initializers.Constant(output_bias)

    for i in range(num_layers):
        layers.append(tf.keras.layers.Dense(architecture[i], activation=tf.nn.relu))
        layers.append(tf.keras.layers.Dropout(dropout))
    layers.append(tf.keras.layers.Dense(output_class, activation=tf.nn.sigmoid))

    model = tf.keras.models.Sequential(layers)
    return model

In [3]:
def compile_train_model(
    model,
    x_train,
    y_train,
    callbacks=None,
    learning_rate=0.001,
    metrics = None,
    class_weight = None,
    batch_size=1,
    epochs=10,
    verbose=0,
):
    """
  Compiles and trains a given Keras model with the given data. 
  Assumes Adam optimizer for this implementation.
  Assumes categorical cross-entropy loss.
  
  Arguments
          learning_rate: Learning rate for the optimizer Adam
          batch_size: Batch size for the mini-batch optimization
          epochs: Number of epochs to train
          verbose: Verbosity of the training process
  
  Returns
  A copy of the model
  """

    model_copy = model
    model_copy.compile(
        optimizer=tf.keras.optimizers.Adam(lr=learning_rate),
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=metrics,
    )

    if callbacks != None:
        model_copy.fit(
            x_train,
            y_train,
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks,
            class_weight=class_weight,
            verbose=verbose,
        )
    else:
        model_copy.fit(
            x_train, y_train, 
            epochs=epochs, 
            batch_size=batch_size,
            class_weight=class_weight,
            verbose=verbose
        )
    return model_copy

In [4]:
class TrainingPlot(tf.keras.callbacks.Callback):
    
    # This function is called when the training begins
    def on_train_begin(self, logs={}):
        # Initialize the lists for holding the logs, losses and metrics
        self.losses = []
        self.acc = []
        self.f1score = []
        self.precision = []
        self.recall = []
        self.logs = []
    
    # This function is called at the end of each epoch
    def on_epoch_end(self, epoch, logs={}):
        """
        Calculates and plots Precision, Recall, F1 score
        """
        # Extract from the log
        tp = logs.get('tp')
        fp = logs.get('fp')
        fn = logs.get('fn')
        loss = logs.get('loss')
        
        m = self.model
        preds = m.predict(X_train)
        
        # Calculate
        precision = tp/(tp+fp)
        recall = tp/(tp+fn)
        f1score = 2*(precision*recall)/(precision+recall)    
        
        # Append the logs, losses and accuracies to the lists
        self.logs.append(logs)
        self.losses.append(loss)
        self.f1score.append(f1score)
        self.precision.append(precision)
        self.recall.append(recall)
        
        # Plots every 5th epoch
        if epoch > 0 and epoch%5==0:
            
            # Clear the previous plot
            clear_output(wait=True)
            N = np.arange(0, len(self.losses))
            
            # You can chose the style of your preference
            plt.style.use("seaborn")
            
            # Plot train loss, train acc, val loss and val acc against epochs passed
            plt.figure(figsize=(10,3))
            plt.title("Distribution of prediction probabilities at epoch no. {}".format(epoch), 
                      fontsize=16)
            plt.hist(preds, bins=50,edgecolor='k')
            
            plt.figure(figsize=(10,3))
            plt.title("Loss over epoch")
            plt.plot(N, self.losses)
            fig, ax = plt.subplots(1,3, figsize=(12,4))
            ax = ax.ravel()
            ax[0].plot(N, self.precision, label = "Precision", c='red')
            ax[1].plot(N, self.recall, label = "Recall", c='red')
            ax[2].plot(N, self.f1score, label = "F1 score", c='red')
            ax[0].set_title("Precision at Epoch No. {}".format(epoch))
            ax[1].set_title("Recall at Epoch No. {}".format(epoch))
            ax[2].set_title("F1-score at Epoch No. {}".format(epoch))
            ax[0].set_xlabel("Epoch #")
            ax[1].set_xlabel("Epoch #")
            ax[2].set_xlabel("Epoch #")
            ax[0].set_ylabel("Precision")
            ax[1].set_ylabel("Recall")
            ax[2].set_ylabel("F1 score")
            ax[0].set_ylim(0,1)
            ax[1].set_ylim(0,1)
            ax[2].set_ylim(0,1)
            
            plt.show()

In [5]:


n_features = 45
n_informative = n_features



In [6]:
d = make_classification(n_samples=42000,
                        n_features=n_features,
                        n_informative=n_informative,
                        n_redundant=0,
                        n_classes=10,
                        weights=[0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1],
                        flip_y=0.05,
                        class_sep=0.7)

In [7]:


features, target = d



In [8]:
df=pd.read_csv('letters.csv')
features = df.loc[:, df.columns != 'label']
#X /= 255
#X
target = df['label']

In [9]:
df

Unnamed: 0,label,pixel43,pixel44,pixel92,pixel124,pixel125,pixel126,pixel127,pixel128,pixel129,...,pixel329,pixel351,pixel410,pixel411,pixel412,pixel413,pixel414,pixel415,pixel416,pixel417
0,1,0,0,0,0,0,0,0,0,0,...,0,254,0,0,0,0,0,0,0,0
1,0,0,0,0,137,137,192,86,72,1,...,254,0,0,75,254,254,254,17,0,0
2,1,0,0,0,3,141,139,3,0,0,...,0,184,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,94,255,69,0,0,0,0,0
4,0,0,0,0,155,254,254,254,157,30,...,253,0,0,0,223,253,253,253,129,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41995,2,0,0,1,248,253,176,43,0,0,...,0,0,0,0,0,0,0,0,0,0
41996,0,0,0,0,0,0,0,0,0,128,...,0,0,0,0,255,255,0,0,0,0
41997,2,0,0,0,255,255,191,0,0,0,...,0,0,0,0,0,0,0,0,0,0
41998,2,0,0,0,255,128,0,0,0,0,...,0,255,0,0,0,0,0,0,0,0


In [10]:


pos = np.bincount(target)
neg = np.bincount(target)



In [11]:
initial_bias = np.log([pos/neg])
initial_bias

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [29]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.5, random_state=43)

In [30]:
y_train = np.asarray(target).astype('float32').reshape((-1,1))
y_test = np.asarray(target).astype('float32').reshape((-1,1))


In [31]:
X_train = tf.keras.utils.normalize(X_train, axis=1)
X_test = tf.keras.utils.normalize(X_test, axis=1)

In [26]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [32]:
input_shape=(X_train.shape[1],)

In [18]:


metrics = [
    tf.keras.metrics.TruePositives(name="tp"),
    tf.keras.metrics.TrueNegatives(name="tn"),
    tf.keras.metrics.FalseNegatives(name="fn"),
    tf.keras.metrics.FalsePositives(name="fp"),
]



In [33]:


counts = np.bincount(y_train)

weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]
weight_for_2 = 1.0 / counts[2]
weight_for_3 = 1.0 / counts[3]
weight_for_4 = 1.0 / counts[4]
weight_for_5 = 1.0 / counts[5]
weight_for_6 = 1.0 / counts[6]
weight_for_7 = 1.0 / counts[7]
weight_for_8 = 1.0 / counts[8]
weight_for_9 = 1.0 / counts[9]
class_weight = {0: weight_for_0, 1: weight_for_1, 2: weight_for_2, 3:weight_for_3, 4:weight_for_4, 5:weight_for_5, 6:weight_for_6, 7:weight_for_7, 8:weight_for_8, 9:weight_for_9}



ValueError: object too deep for desired array

In [34]:


plot_metrics = TrainingPlot()



In [35]:


m = build_classification_model(num_layers=5,
                               architecture=[256,128,64,64,32],
                               input_shape=input_shape,
                               output_bias= initial_bias,
                               output_class=10)



In [36]:
m.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 45)                0         
                                                                 
 dense_6 (Dense)             (None, 256)               11776     
                                                                 
 dropout_5 (Dropout)         (None, 256)               0         
                                                                 
 dense_7 (Dense)             (None, 128)               32896     
                                                                 
 dropout_6 (Dropout)         (None, 128)               0         
                                                                 
 dense_8 (Dense)             (None, 64)                8256      
                                                                 
 dropout_7 (Dropout)         (None, 64)               

In [137]:
#np.asarray(target).astype('float32').reshape((-1,1))
#y_train.astype('float32').reshape((-1,1))
#y_test.astype('float32').reshape((-1,1))


array([[1.],
       [0.],
       [1.],
       ...,
       [2.],
       [2.],
       [2.]], dtype=float32)

In [37]:
m = compile_train_model(model=m, 
                        x_train=X_train, 
                        y_train=y_train, 
                        metrics=metrics,
                        class_weight=0.1,
                        callbacks = [plot_metrics],
                        batch_size=10,
                        learning_rate=1e-3,
                        epochs=100,
                        verbose=0)

ValueError: Data cardinality is ambiguous:
  x sizes: 21000
  y sizes: 42000
Make sure all arrays contain the same number of samples.