# Optimizasyon

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import time

In [None]:
def plot_result(history, name):
  pd.DataFrame(history.history).plot()
  plt.grid(True)
  #plt.gca().set_ylim(0, 1)
  plt.title(name)
  plt.show()

def tune_opt_model(optimizer, epochs):
  model = keras.models.Sequential()
  
  model.add(keras.layers.Flatten(input_shape=[28, 28]))

  for n_layers in (300, 100, 50, 50, 50):
    model.add(keras.layers.Dense(n_layers, activation ='relu', kernel_initializer="he_normal"))

  model.add(keras.layers.Dense(10, activation='softmax'))

  model.compile(loss="sparse_categorical_crossentropy",
                optimizer=optimizer,metrics=["accuracy"])
  
  model.summary()
  
  start_time = time.time()

  history = model.fit(X_train_full, y_train_full, epochs=epochs, validation_split=0.1)

  print("--- %s seconds ---" % (time.time() - start_time))

  return history
  

## Sınıflandırma görevi

ReLU Aktivasyon fonksiyonunun kullanılması (%86,72 Tren seti; %86,37 Test seti; 99,76 saniye)

**Optimizasyon Ayarları**

In [None]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()

X_train_full = X_train_full / 255.0
X_test = X_test / 255.0

X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

**Optimizer = SGD - learning_rate = 0.001**

In [None]:
# ReLU goes with he initialization, let's see of this improve the model performance nd running time
tf.random.set_seed(50)
np.random.seed(50)

EPOCHS = 15

optimizer=keras.optimizers.SGD(lr=1e-3)
history = tune_opt_model(optimizer=optimizer, epochs = EPOCHS)

train_loss_SGD = history.history["loss"]
val_loss_SGD = history.history["val_loss"]
train_acc_SGD = history.history["accuracy"]
val_acc_SGD = history.history["val_accuracy"]

**Optimizer = SGD - learning_rate = 0.001, momentum=0.9**

In [None]:
# ReLU goes with he initialization, SGD(lr=0.001, momentum=0.9) optimizer
tf.random.set_seed(50)
np.random.seed(50)

optimizer=keras.optimizers.SGD(lr=0.001, momentum=0.9)
history = tune_opt_model(optimizer=optimizer, epochs = EPOCHS)

train_loss_mom = history.history["loss"]
val_loss_mom = history.history["val_loss"]
train_acc_mom = history.history["accuracy"]
val_acc_mom = history.history["val_accuracy"]

plot_result(history, name='He Initialization with ReLU and SDG (lr=0.001, momentum=0.9) optimizer')

In [None]:
epoch_no = list(range(1,EPOCHS+1))
plt.figure(figsize=(10,5))
plt.grid(True)
plt.plot(epoch_no, train_loss_SGD, marker = 'x')
plt.plot(epoch_no, train_loss_mom)
plt.plot(epoch_no, train_loss_SGD, marker = 'x')
plt.plot(epoch_no, train_acc_mom)
plt.legend(["SGD Loss", "Momentum Loss","SGD Accuracy", "Momentum Accuracy"])
plt.xlabel('Number of epochs')
#plt.ylim((0.2,1))
plt.title("Compare SDG and Momentum")
plt.show()

**Optimizer = SGD - learning_rate = 0.001, momentum=0.9, nesterov=True**

In [None]:
# ReLU goes with he initialization, SGD(lr=0.001, momentum=0.9) optimizer with nesterov is activated 
tf.random.set_seed(50)
np.random.seed(50)

optimizer=keras.optimizers.SGD(lr=0.001, momentum=0.9,nesterov=True)
history = tune_opt_model(optimizer=optimizer, epochs = EPOCHS)

train_loss_NAG = history.history["loss"]
val_loss_NAG = history.history["val_loss"]
train_acc_NAG = history.history["accuracy"]
val_acc_NAG = history.history["val_accuracy"]

plot_result(history, name='He Initialization with ReLU and SDG (lr=0.001, momentum=0.9, nesterov=True) optimizer')

**Optimizer = Adagrad - learning_rate = 0.001**

In [None]:
# ReLU goes with he initialization, AdaGrad optimizer
tf.random.set_seed(50)
np.random.seed(50)

optimizer=keras.optimizers.Adagrad(lr=0.001)
history = tune_opt_model(optimizer=optimizer, epochs = EPOCHS)

train_loss_adagrad = history.history["loss"]
val_loss_adagrad = history.history["val_loss"]
train_acc_adagrad = history.history["accuracy"]
val_acc_adagrad = history.history["val_accuracy"]

plot_result(history, name='He Initialization with ReLU and AdaGrad optimizer')

**Optimizer = RMSprop - learning_rate = 0.001, rho=0.99**

In [None]:
# ReLU goes with he initialization, RMSProp optimizer
tf.random.set_seed(50)
np.random.seed(50)

optimizer=keras.optimizers.RMSprop(lr=0.001, rho=0.99)
history = tune_opt_model(optimizer=optimizer, epochs = EPOCHS)

train_loss_rmsprop = history.history["loss"]
val_loss_rmsprop = history.history["val_loss"]
train_acc_rmsprop = history.history["accuracy"]
val_acc_rmsprop = history.history["val_accuracy"]

plot_result(history, name='He Initialization with ReLU and RMSProp optimizer')

In [None]:
epoch_no = list(range(1,EPOCHS+1))
plt.figure(figsize=(10,5))
plt.grid(True)
plt.plot(epoch_no, train_loss_adagrad, marker = 'x')
plt.plot(epoch_no, train_loss_rmsprop)
plt.plot(epoch_no, train_acc_adagrad, marker = 'x')
plt.plot(epoch_no, train_acc_rmsprop)
plt.legend(["Adagrad Loss", "RMSprop Loss","Adagrad Accuracy", "RMSprop Accuracy"])
plt.xlabel('Number of epochs')
#plt.ylim((0.2,1))
plt.title("Compare Adagrad and RMSprop")
plt.show()

**Optimizer = Adam - beta1 = 0.9, beta2=0.999**

In [None]:
# ReLU goes with he initialization, Adam optimizer
tf.random.set_seed(50)
np.random.seed(50)

optimizer=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
history = tune_opt_model(optimizer=optimizer, epochs = EPOCHS)

train_loss_adam = history.history["loss"]
val_loss_adam = history.history["val_loss"]
train_acc_adam = history.history["accuracy"]
val_acc_adam = history.history["val_accuracy"]

plot_result(history, name='He Initialization with ReLU and Adam optimizer')

In [None]:
epoch_no = list(range(1,EPOCHS+1))
plt.figure(figsize=(10,5))
plt.grid(True)
plt.plot(epoch_no, train_loss_adagrad, marker = 'x', color="#070bf2")
plt.plot(epoch_no, train_loss_rmsprop, marker = 'o', color="#5fd406")
plt.plot(epoch_no, train_loss_adam, marker = 'v', color="#fc0808")
plt.plot(epoch_no, train_acc_adagrad, color="#070bf2")
plt.plot(epoch_no, train_acc_rmsprop, color="#5fd406")
plt.plot(epoch_no, train_acc_adam, color="#fc0808")
plt.legend(["Adagrad Loss", "RMSprop Loss","Adam Loss", "Adagrad Accuracy", "RMSprop Accuracy", "Adam Accuracy"])
plt.xlabel('Number of epochs')
#plt.ylim((0.2,1))
plt.title("Compare Adagrad, RMSprop and Adam")
plt.show()

**Optimizer = Adamax - beta1 = 0.9, beta2=0.999**

In [None]:
# ReLU goes with he initialization, Adamax optimizer
tf.random.set_seed(50)
np.random.seed(50)

optimizer=keras.optimizers.Adamax(lr=0.001, beta_1=0.9, beta_2=0.999)
history = tune_opt_model(optimizer=optimizer, epochs = EPOCHS)

train_loss_adamax = history.history["loss"]
val_loss_adamax = history.history["val_loss"]
train_acc_adamax = history.history["accuracy"]
val_acc_adamax = history.history["val_accuracy"]

plot_result(history, name='He Initialization with ReLU and Adamax optimizer')

**Optimizer = Nadam - beta1 = 0.9, beta2=0.999**

In [None]:
# ReLU goes with he initialization, Nadam optimizer
tf.random.set_seed(50)
np.random.seed(50)

optimizer=keras.optimizers.Nadam(lr=0.001, beta_1=0.9, beta_2=0.999)
history = tune_opt_model(optimizer=optimizer, epochs = EPOCHS)

train_loss_nadam = history.history["loss"]
val_loss_nadam = history.history["val_loss"]
train_acc_nadam = history.history["accuracy"]
val_acc_nadam = history.history["val_accuracy"]

plot_result(history, name='He Initialization with ReLU and Nadam optimizer')

## Regresyon Görevi

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()

X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

In [None]:
def tune_opt_reg(optimizer):
  model_default = keras.models.Sequential()
  for n_layers in (100, 50, 10, 10, 10):
    model_default.add(keras.layers.Dense(n_layers, activation="relu", 
                                         input_shape=X_train.shape[1:], kernel_initializer='he_normal'))
  model_default.add(keras.layers.Dense(1))
    
  model_default.compile(loss="mean_squared_error",
                        optimizer=optimizer)
  
  start_time = time.time()

  history = model_default.fit(X_train, y_train, epochs=EPOCHS, validation_data=(X_valid, y_valid))
  
  print("--- %s seconds ---" % (time.time() - start_time))
  return history

**Optimizer = SGD - learning_rate=0.001**

In [None]:
# He Initialization with Randomized ReLU activation function
tf.random.set_seed(42)
np.random.seed(42)

optimizer=keras.optimizers.SGD(lr=1e-3)
history = tune_opt_reg(optimizer=optimizer)

train_loss_SGD = history.history["loss"]
val_loss_SGD = history.history["val_loss"]

**Optimizer = SGD - learning_rate=0.001, momentum=0.9**

In [None]:
# ReLU goes with he initialization, SGD(lr=0.001, momentum=0.9) optimizer
tf.random.set_seed(42)
np.random.seed(42)

optimizer=keras.optimizers.SGD(lr=0.001, momentum=0.9)
history = tune_opt_reg(optimizer=optimizer)
plot_result(history, name='He Initialization with ReLU and SGD(lr=0.001, momentum=0.9) optimizer')

train_loss_momentum = history.history["loss"]
val_loss_momentum = history.history["val_loss"]

**Optimizer = SGD - learning_rate=0.001, momentum=0.9, nesterov=True**

In [None]:
# ReLU goes with he initialization, Nesterov SGD(lr=0.001, momentum=0.9) optimizer
tf.random.set_seed(42)
np.random.seed(42)
optimizer=keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)
history = tune_opt_reg(optimizer=optimizer)

train_loss_nesterov = history.history["loss"]
val_loss_nesterov = history.history["val_loss"]

plot_result(history, name='He Initialization with ReLU and Nesterov SGD(lr=0.001, momentum=0.9) optimizer')

**Optimizer = Adagrad - learning_rate = 0.001**

In [None]:
# ReLU goes with he initialization, Adagrad optimizer
tf.random.set_seed(42)
np.random.seed(42)

optimizer=keras.optimizers.Adagrad(lr=0.001)
history = tune_opt_reg(optimizer=optimizer)
train_loss_adagrad = history.history["loss"]
val_loss_adagrad = history.history["val_loss"]
plot_result(history, name='He Initialization with ReLU and Adagrad optimizer')

**Optimizer = RMSprop - learning_rate = 0.001, rho=0.9**

In [None]:
# ReLU goes with he initialization, RMSprop optimizer
tf.random.set_seed(42)
np.random.seed(42)

optimizer=keras.optimizers.RMSprop(lr=0.001, rho=0.9)
history = tune_opt_reg(optimizer=optimizer)

train_loss_rmsprop = history.history["loss"]
val_loss_rmsprop = history.history["val_loss"]

plot_result(history, name='He Initialization with ReLU and RMSprop optimizer')

**Optimizer = Adam - learning_rate=0.001, beta1 = 0.9, beta2=0.999**

In [None]:
# ReLU goes with he initialization, Adam optimizer
tf.random.set_seed(42)
np.random.seed(42)

optimizer=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
history = tune_opt_reg(optimizer=optimizer)
train_loss_adam = history.history["loss"]
val_loss_adam = history.history["val_loss"]

plot_result(history, name='He Initialization with ReLU and Adam optimizer')

**Optimizer = Adamax - learning_rate=0.001, beta1 = 0.9, beta2=0.999**

In [None]:
# ReLU goes with he initialization, Adamax optimizer
tf.random.set_seed(42)
np.random.seed(42)

optimizer=keras.optimizers.Adamax(lr=0.001, beta_1=0.9, beta_2=0.999)
history = tune_opt_reg(optimizer=optimizer)
train_loss_adamax = history.history["loss"]
val_loss_adamax = history.history["val_loss"]
plot_result(history, name='He Initialization with ReLU and Adamax optimizer')

**Optimizer = Nadam - learning_rate=0.001, beta1 = 0.9, beta2=0.999**

In [None]:
# ReLU goes with he initialization, Nadam optimizer
tf.random.set_seed(42)
np.random.seed(42)

optimizer=keras.optimizers.Nadam(lr=0.001, beta_1=0.9, beta_2=0.999)
history = tune_opt_reg(optimizer=optimizer)
train_loss_nadam = history.history["loss"]
val_loss_nadam = history.history["val_loss"]
plot_result(history, name='He Initialization with ReLU and Nadam optimizer')

In [None]:
train_loss = [train_loss_SGD, train_loss_momentum, train_loss_nesterov, train_loss_adagrad, train_loss_rmsprop, train_loss_adam, 
              train_loss_adamax, train_loss_nadam]
epoch_no = list(range(1,EPOCHS+1))

In [None]:
plt.plot(epoch_no, train_loss_SGD)
plt.plot(epoch_no, train_loss_momentum)
plt.legend(["train_loss_SGD", "train_loss_momentum"])
plt.show()

In [None]:
plt.figure(figsize=(15,5))
for opt in train_loss:
  plt.plot(epoch_no, opt)
  
plt.legend(["SGD", "Momentum","Nesterov", "Adagrad", "RMSprop", "Adam", "Adamax", "Nadam"])
plt.set_cmap("jet")
plt.ylabel('Train loss')
plt.xlabel('Number of epochs')
plt.ylim((0.2,1))
plt.title("Train loss by different optimizer")
plt.show()

In [None]:
val_loss = [val_loss_SGD, val_loss_momentum, val_loss_nesterov, val_loss_adagrad, val_loss_rmsprop, val_loss_adam, 
              val_loss_adamax, val_loss_nadam]
              
plt.figure(figsize=(15,5))
for opt in val_loss:
  plt.plot(epoch_no, opt)
plt.legend(["SGD", "Momentum","Nesterov", "Adagrad", "RMSprop", "Adam", "Adamax", "Nadam"])
plt.ylabel('Validation loss')
plt.xlabel('Number of epochs')
plt.ylim((0.2,1))
plt.title("Validation set loss by different optimizer")
plt.show()