In [1]:
### WHAT IS GRADIENT DESCENT? ###
# Gradient descent is an optimization algorithm which is commonly-used to train machine learning models and neural networks.
# It trains machine learning models by minimizing errors between predicted and actual results.
# Training data helps these models learn over time, and the cost function within gradient descent specifically acts as a barometer,
# gauging its accuracy with each iteration of parameter updates. Until the function is close to or equal to zero,
# the model will continue to adjust its parameters to yield the smallest possible error.
# Once machine learning models are optimized for accuracy, they can be powerful tools for artificial intelligence (AI) and computer science applications.

### TYPES OF GRADIENT DESCENT###
# There are THREE TYPES of gradient descent learning algorithms: batch gradient descent, stochastic gradient descent and mini-batch gradient descent.
# 1. BATCH gradient descent.: Batch gradient descent sums the error for each point in a training set, updating the model only after all training examples have been evaluated. This process referred to as a training epoch.
# 2. STOCHASTIC gradient descent.:Stochastic gradient descent (SGD) runs a training epoch for each example within the dataset and it updates each training example's parameters one at a time. Since you only need to hold one training example, they are easier to store in memory. While these frequent updates can offer more detail and speed, it can result in losses in computational efficiency when compared to batch gradient descent. Its frequent updates can result in noisy gradients, but this can also be helpful in escaping the local minimum and finding the global one.
# 3. MINI-BATCH gradient descent.: Mini-batch gradient descent combines concepts from both batch gradient descent and stochastic gradient descent. It splits the training dataset into small batch sizes and performs updates on each of those batches. This approach strikes a balance between the computational efficiency of batch gradient descent and the speed of stochastic gradient descent.

### VALIDATION SET & VALIDATION LOSS ###
# Validation Set : A validation data set is a data set of examples used to tune the hyperparameters (i.e. the architecture) of a classifier.
#                  It is sometimes also called the development set or the "dev set".
#                  An example of a hyperparameter for artificial neural networks includes the number of hidden units in each layer.
# Validation Loss : The validation loss is a measure of how well the model generalizes to the validation set.
#                    It represents the error on unseen data.An increasing validation loss indicates that the model’s performance on the validation set is worsening,
#                    suggesting that it is becoming less effective at generalizing to new data.


In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
# Loading and Exploring the Tips Dataset
tips = sns.load_dataset("tips")
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [3]:
# One-Hot Encoding Categorical Variables
tips = pd.get_dummies(tips, drop_first=True)
tips.head()

Unnamed: 0,total_bill,tip,size,sex_Female,smoker_No,day_Fri,day_Sat,day_Sun,time_Dinner
0,16.99,1.01,2,True,True,False,False,True,True
1,10.34,1.66,3,False,True,False,False,True,True
2,21.01,3.5,3,False,True,False,False,True,True
3,23.68,3.31,2,False,True,False,False,True,True
4,24.59,3.61,4,True,True,False,False,True,True


In [4]:
# Separating Target Variable from Features
x = tips.drop("tip", axis=1)
y = tips["tip"]
x.head()

Unnamed: 0,total_bill,size,sex_Female,smoker_No,day_Fri,day_Sat,day_Sun,time_Dinner
0,16.99,2,True,True,False,False,True,True
1,10.34,3,False,True,False,False,True,True
2,21.01,3,False,True,False,False,True,True
3,23.68,2,False,True,False,False,True,True
4,24.59,4,True,True,False,False,True,True


In [5]:
# Splitting Data into Training and Testing Sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [6]:
# Building and Training a Neural Network for Tip Prediction
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation="relu", input_shape=(x_train.shape[1],)),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(1)
])
model.compile(optimizer="adam", loss="mean_squared_error", metrics =['mae'])
model.fit(x_train, y_train, epochs=500, batch_size=32, verbose=1)
loss,accuracy = model.evaluate(x_test, y_test)
print('Mean Squared Error: ', loss)
print('Test Accuracy: ', accuracy)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - loss: 9.9890 - mae: 2.6862 
Epoch 2/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 6.9478 - mae: 2.1673 
Epoch 3/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 5.8304 - mae: 1.9127  
Epoch 4/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 3.6898 - mae: 1.4814 
Epoch 5/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 3.1658 - mae: 1.3474  
Epoch 6/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 2.6077 - mae: 1.2090  
Epoch 7/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 2.1225 - mae: 1.0943  
Epoch 8/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1.7207 - mae: 0.9879  
Epoch 9/500
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: