In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

np.set_printoptions(precision=3, suppress=True)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras import layers
print(tf.__version__)

# Dataset

In [None]:
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'

column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                'Acceleration', 'Model Year', 'Origin']


raw_dataset = pd.read_csv(url, names=column_names, na_values="?", comment='\t', sep=' ', skipinitialspace=True)

In [None]:
dataset = raw_dataset.copy()
dataset.tail(15)

## Cleanup

In [None]:
dataset.isna().sum()


In [None]:
dataset = dataset.dropna()

In [None]:
dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})

In [None]:
dataset.head()

In [None]:
dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='') # performs one-hot encoding
dataset.head()

## Split data into training and test sets

In [None]:
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

In [None]:
train_dataset.size

In [None]:
test_dataset.size

In [None]:
sns.pairplot(train_dataset[['MPG', 'Cylinders', 'Displacement', 'Weight']], diag_kind='kde')

In [None]:
train_dataset.describe().transpose()

In [None]:
train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('MPG')
test_labels = test_features.pop('MPG')  

## Normalization
It is good practice to normalize features that use different scales and ranges.

One reason this is important is because the features are multiplied by the model weights. So, the scale of the outputs and the scale of the gradients are affected by the scale of the inputs.

In [None]:
train_dataset.describe().transpose()[['mean', 'std']]

In [None]:
normalizer = tf.keras.layers.Normalization(axis=-1)

In [None]:
normalizer.adapt(np.array(train_features).astype('float32'))

In [None]:
# Calculate mean and variance
print(normalizer.mean.numpy())

In [None]:
first = np.array(train_features[:1]).astype('float32')

with np.printoptions(precision=2, suppress=True):
  print('First example:', first)
  print()
  print('Normalized:', normalizer(first).numpy())

In [None]:
train_features[:1]

# Linear Regression

## One variable
Predicting 'MPG' from 'Horsepower'


In [None]:
horsepower = np.array(train_features['Horsepower']).astype('float32')

horsepower_normalizer = tf.keras.layers.Normalization(input_shape=[1,], axis=None)
horsepower_normalizer.adapt(horsepower)

In [None]:
horsepower[:1]

In [None]:
horsepower_normalizer(horsepower[:1]).numpy()

In [None]:
horsepower_model = tf.keras.Sequential([
    horsepower_normalizer,
    layers.Dense(units=1)
])

horsepower_model.summary()

In [None]:
horsepower[:10]

In [None]:
horsepower_model.predict(horsepower[:10])

`model.compile` is used to configure the model for training. It is called after you have defined your model's architecture (layers, neurons, etc.) but before you start training it.

In the compile step, you specify the optimizer, loss function, and optional metrics that the model will use during training.
This step essentially prepares the model for the training process but doesn't actually start the training.

`optimizer=tf.keras.optimizers.Adam(learning_rate=0.1)`: Here, we are choosing the Adam optimizer as the optimizer for your model's training. The Adam optimizer is a popular optimization algorithm used in deep learning.



In [None]:
horsepower_model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [None]:
%%time
history = horsepower_model.fit(
    train_features['Horsepower'],
    train_labels, 
    epochs=100,
    verbose=0, # suppress logging
    validation_split = 0.2) # calculate validation results on 20% of the training data
    

In [None]:
# Visualize the model's training progress using the stats stored in the history object:
hist = pd.DataFrame(history.history) 
hist['epoch'] = history.epoch
hist.tail()

In [None]:
def plot_loss(history):
    plt.plot(history.history['loss'], label='loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.ylim([0, 10])
    plt.xlabel('Epoch')
    plt.ylabel('Error [MPG]')
    plt.legend()
    plt.grid(True)
    

In [None]:
plot_loss(history)

**Whats the difference between loss and val_loss?**

The difference between the loss and the val_loss is that the loss is calculated on the training set while the val_loss is calculated on the validation set. The training set is used to fit the model and the validation set is used to minimize overfitting.

In [None]:
# Collect the results on the test set for later:
test_results = {}

test_results['horsepower_model'] = horsepower_model.evaluate(
    test_features['Horsepower'],
    test_labels,
    verbose=0)

In [None]:
# The loss of the horsepower model:
test_results

In [None]:
x = tf.linspace(0.0, 250, 251)

In [None]:
x

In [None]:
y = horsepower_model.predict(x)

In [None]:
def plot_horsepower(x, y):
  plt.scatter(train_features['Horsepower'], train_labels, label='Data')
  plt.plot(x, y, color='k', label='Predictions')
  plt.xlabel('Horsepower')
  plt.ylabel('MPG')
  plt.legend()

In [None]:
plot_horsepower(x, y)

## Linear regression with multiple inputs

In [None]:
linear_model = tf.keras.Sequential([
    normalizer,
    layers.Dense(units=1)
])