# Import

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tensorflow import keras
from tensorflow.keras.layers.experimental import preprocessing

In [16]:
SEED = 0

In [9]:
# DataFrame
URL = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
# Column names of the DataFrame
column_names = ['MPG', 'Cylinders', 'Displacement',
                'Horsepower', 'Weight', 'Acceleration', 
                'Model Year', 'Origin']

# Importing DataFrame
dataset = pd.read_csv(URL, names=column_names, na_values='?',
                    comment='\t', sep=' ', skipinitialspace=True,
                    dtype_backend='pyarrow')

In [3]:
dataset.tail()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
393,27.0,4,140.0,86.0,2790.0,15.6,82,1
394,44.0,4,97.0,52.0,2130.0,24.6,82,2
395,32.0,4,135.0,84.0,2295.0,11.6,82,1
396,28.0,4,120.0,79.0,2625.0,18.6,82,1
397,31.0,4,119.0,82.0,2720.0,19.4,82,1


In [14]:
# Cleanning data
dataset =   dataset.dropna()

# Convert categorical 'Origin' data into one-hot data
dataset[['USA', 'Europe', 'Japan']] = pd.get_dummies(dataset['Origin']).astype('int8[pyarrow]')
dataset.drop(columns=['Origin'], errors='ignore', inplace=True)

In [15]:
dataset.tail()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,USA,Europe,Japan
393,27.0,4,140.0,86.0,2790.0,15.6,82,1,0,0
394,44.0,4,97.0,52.0,2130.0,24.6,82,0,1,0
395,32.0,4,135.0,84.0,2295.0,11.6,82,1,0,0
396,28.0,4,120.0,79.0,2625.0,18.6,82,1,0,0
397,31.0,4,119.0,82.0,2720.0,19.4,82,1,0,0


In [17]:
# Splitting data into train and test
train = dataset.sample(frac=0.8, random_state=SEED)
test = dataset.drop(train.index)

train.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
MPG,314.0,23.31051,7.728652,10.0,17.0,22.0,28.95,46.6
Cylinders,314.0,5.477707,1.699788,3.0,4.0,4.0,8.0,8.0
Displacement,314.0,195.318471,104.331589,68.0,105.5,151.0,265.75,455.0
Horsepower,314.0,104.869427,38.096214,46.0,76.25,94.5,128.0,225.0
Weight,314.0,2990.251592,843.898596,1649.0,2256.5,2822.5,3608.0,5140.0
Acceleration,314.0,15.559236,2.78923,8.0,13.8,15.5,17.2,24.8
Model Year,314.0,75.898089,3.675642,70.0,73.0,76.0,79.0,82.0
USA,314.0,0.624204,0.485101,0.0,0.0,1.0,1.0,1.0
Europe,314.0,0.178344,0.383413,0.0,0.0,0.0,0.0,1.0
Japan,314.0,0.197452,0.398712,0.0,0.0,0.0,0.0,1.0


In [18]:
# Split features from labels
train_features = train.copy()
test_features = test.copy() 

train_labels = train_features.pop('MPG')
test_labels = test_features.pop('MPG')