# Importación de librerías

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset
from torch.utils.data import random_split
import torch.nn.functional as F
import torch.nn as nn
from torchmetrics import MeanSquaredError, MeanAbsoluteError, R2Score
from sklearn.preprocessing import LabelEncoder
import intel_extension_for_pytorch as ipex

import warnings

warnings.filterwarnings("ignore")

# Visualizamos el contenido del dataset

In [2]:
automobileDataset = pd.read_csv("imports-85.data", header=None)
automobileDataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495
1,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
2,1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500
3,2,164,audi,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.40,10.0,102,5500,24,30,13950
4,2,164,audi,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.40,8.0,115,5500,18,22,17450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-1,95,volvo,gas,std,four,sedan,rwd,front,109.1,...,141,mpfi,3.78,3.15,9.5,114,5400,23,28,16845
201,-1,95,volvo,gas,turbo,four,sedan,rwd,front,109.1,...,141,mpfi,3.78,3.15,8.7,160,5300,19,25,19045
202,-1,95,volvo,gas,std,four,sedan,rwd,front,109.1,...,173,mpfi,3.58,2.87,8.8,134,5500,18,23,21485
203,-1,95,volvo,diesel,turbo,four,sedan,rwd,front,109.1,...,145,idi,3.01,3.40,23.0,106,4800,26,27,22470


In [3]:
# Eliminamos valores nulos
automobileDataset.replace('?', pd.NA, inplace=True)
automobileDataset.dropna(subset=automobileDataset.columns, inplace=True)

In [4]:
# Convertimos valores categóricos
columnas_categoricas = [2, 3, 4, 5, 6, 7, 8, 14, 15, 17]

class_label_encoder = LabelEncoder()

for i in columnas_categoricas:
    automobileDataset.iloc[:, i] = class_label_encoder.fit_transform(automobileDataset.iloc[:, i])

In [5]:
print(automobileDataset.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 159 entries, 3 to 204
Data columns (total 26 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       159 non-null    int64  
 1   1       159 non-null    object 
 2   2       159 non-null    int64  
 3   3       159 non-null    int64  
 4   4       159 non-null    int64  
 5   5       159 non-null    int64  
 6   6       159 non-null    int64  
 7   7       159 non-null    int64  
 8   8       159 non-null    int64  
 9   9       159 non-null    float64
 10  10      159 non-null    float64
 11  11      159 non-null    float64
 12  12      159 non-null    float64
 13  13      159 non-null    int64  
 14  14      159 non-null    int64  
 15  15      159 non-null    int64  
 16  16      159 non-null    int64  
 17  17      159 non-null    int64  
 18  18      159 non-null    object 
 19  19      159 non-null    object 
 20  20      159 non-null    float64
 21  21      159 non-null    object 
 22  22

In [6]:
# Pasamos las columnas de tipo object a tipo int
automobileDataset[[1, 18, 19, 21, 22, 25]] = automobileDataset[[1, 18, 19, 21, 22, 25]].astype(float)

automobileDataset.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 159 entries, 3 to 204
Data columns (total 26 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       159 non-null    int64  
 1   1       159 non-null    float64
 2   2       159 non-null    int64  
 3   3       159 non-null    int64  
 4   4       159 non-null    int64  
 5   5       159 non-null    int64  
 6   6       159 non-null    int64  
 7   7       159 non-null    int64  
 8   8       159 non-null    int64  
 9   9       159 non-null    float64
 10  10      159 non-null    float64
 11  11      159 non-null    float64
 12  12      159 non-null    float64
 13  13      159 non-null    int64  
 14  14      159 non-null    int64  
 15  15      159 non-null    int64  
 16  16      159 non-null    int64  
 17  17      159 non-null    int64  
 18  18      159 non-null    float64
 19  19      159 non-null    float64
 20  20      159 non-null    float64
 21  21      159 non-null    float64
 22  22

In [7]:
automobileDataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
3,2,164.0,0,1,0,0,3,1,0,99.8,...,109,4,3.19,3.40,10.0,102.0,5500.0,24,30,13950.0
4,2,164.0,0,1,0,0,3,0,0,99.4,...,136,4,3.19,3.40,8.0,115.0,5500.0,18,22,17450.0
6,1,158.0,0,1,0,0,3,1,0,105.8,...,136,4,3.19,3.40,8.5,110.0,5500.0,19,25,17710.0
8,1,158.0,0,1,1,0,3,1,0,105.8,...,131,4,3.13,3.40,8.3,140.0,5500.0,17,20,23875.0
10,2,192.0,1,1,0,1,3,2,0,101.2,...,108,4,3.50,2.80,8.8,101.0,5800.0,23,29,16430.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-1,95.0,17,1,0,0,3,2,0,109.1,...,141,4,3.78,3.15,9.5,114.0,5400.0,23,28,16845.0
201,-1,95.0,17,1,1,0,3,2,0,109.1,...,141,4,3.78,3.15,8.7,160.0,5300.0,19,25,19045.0
202,-1,95.0,17,1,0,0,3,2,0,109.1,...,173,4,3.58,2.87,8.8,134.0,5500.0,18,23,21485.0
203,-1,95.0,17,0,1,0,3,2,0,109.1,...,145,2,3.01,3.40,23.0,106.0,4800.0,26,27,22470.0


# StandardScaler

In [8]:
class StandardScaler:

    def __init__(self, mean=None, std=None, epsilon=1e-7):
        """Standard Scaler.
        The class can be used to normalize PyTorch Tensors using native functions. The module does not expect the
        tensors to be of any specific shape; as long as the features are the last dimension in the tensor, the module
        will work fine.
        :param mean: The mean of the features. The property will be set after a call to fit.
        :param std: The standard deviation of the features. The property will be set after a call to fit.
        :param epsilon: Used to avoid a Division-By-Zero exception.
        """
        self.mean = mean
        self.std = std
        self.epsilon = epsilon

    def fit(self, values):
        dims = list(range(values.dim() - 1))
        self.mean = torch.mean(values, dim=dims)
        self.std = torch.std(values, dim=dims)

    def transform(self, values):
        return (values - self.mean) / (self.std + self.epsilon)

    def fit_transform(self, values):
        self.fit(values)
        return self.transform(values)

    def __repr__(self):
        return f"mean: {self.mean}, std:{self.std}, epsilon:{self.epsilon}"

# Dataset y Dataloader

In [9]:
class AutomobileDataset(Dataset):
  def __init__(self, src_file, root_dir, transform=None):
    X = automobileDataset.iloc[:, :25]
    Y = automobileDataset.iloc[:, 25]
    
    x1=X.iloc[:,0:25].values
    x_tensor = torch.tensor(x1)

    y_tensor = torch.tensor(Y.values).type(torch.float32).unsqueeze(1)

    scaler = StandardScaler()
    
    XScalada = scaler.fit_transform(x_tensor).type(torch.float32)

    self.data = torch.cat((XScalada,y_tensor),1)
    self.root_dir = root_dir
    self.transform = transform

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()

    preds = self.data[idx, 0:25]
    spcs = self.data[idx, 25]
    sample = (preds, spcs)
    
    if self.transform:
      sample = self.transform(sample)
    return sample

In [10]:
automobileDataset = AutomobileDataset("imports-85.data",".")
display(automobileDataset[0])

(tensor([ 1.0596,  1.2024, -2.0262,  0.3217, -0.4508, -0.8182,  0.4534, -0.4482,
          0.0000,  0.2972,  0.3633,  0.3042,  0.1766, -0.2576, -0.0345, -0.1033,
         -0.3357,  1.0407, -0.4119,  0.5549, -0.0414,  0.2006,  0.8291, -0.4136,
         -0.3223]),
 tensor(13950.))

# División en train y test

In [11]:
lonxitudeDataset = len(automobileDataset)

tamTrain =int(lonxitudeDataset*0.8)
tamVal = lonxitudeDataset - tamTrain

print(f"Tam dataset: {lonxitudeDataset} train: {tamTrain} tamVal: {tamVal}")
train_set, val_set = random_split(automobileDataset,[tamTrain,tamVal])
train_ldr = torch.utils.data.DataLoader(train_set, batch_size=2,
    shuffle=True, drop_last=False)
validation_loader =torch.utils.data.DataLoader(val_set, batch_size=4, shuffle=False, drop_last=True)

Tam dataset: 159 train: 127 tamVal: 32


# Creación do modelo

In [12]:
class Model(nn.Module):
    def __init__(self, entradas):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(entradas, 100)
        self.layer2 = nn.Linear(100, 50)
        self.layer3 = nn.Linear(in_features=50, out_features=1)
        
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.relu(self.layer3(x))
        return x

# Instanciación del modelo

In [13]:
model     = Model(25)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn   = nn.MSELoss(reduction='sum')
model, optimizer = ipex.optimize(model, optimizer=optimizer)

In [14]:
entradaProba,dest = next(iter(train_ldr))

print("Entrada:")
display(entradaProba)

print("Desexada:")
display(dest)

saida = model(entradaProba) # esta é a proba de verdade
print("Saída:")
display(saida)

loss_fn(saida, dest)

Entrada:


tensor([[ 1.0596,  0.4451, -1.2248,  0.3217, -0.4508,  1.2145, -0.8573, -0.4482,
          0.0000, -2.2573, -2.4137, -0.8766, -1.3661, -1.5523, -0.0345, -0.1033,
         -0.8938, -1.5148, -1.4593,  0.5889, -0.1443, -1.2317, -0.6738,  3.6866,
          3.3933],
        [-0.6168,  1.8756, -1.8259,  0.3217, -0.4508,  1.2145,  0.4534,  1.4273,
          0.0000,  0.5681,  0.3806, -0.4146,  0.1766,  0.5164, -0.0345,  2.2433,
          1.4699,  1.0407,  0.0369, -0.1572, -0.2985,  0.8192, -1.8547, -0.9057,
         -0.6319]])

Desexada:


tensor([ 6479., 20970.])

Saída:


tensor([[0.0000],
        [0.0221]], grad_fn=<ReluBackward0>)

tensor(9.6344e+08, grad_fn=<MseLossBackward0>)

# Función de entrenamiento

In [15]:
def train_one_epoch(epoch_index, tb_writer):
    running_loss = 0.
    for i, data in enumerate(train_ldr):
        # Every data instance is an input + label pair
        inputs, labels = data

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()

    return running_loss / len(train_ldr)

In [16]:
EPOCHS = 200
writer = None
for epoch in range(EPOCHS):
    model.train(True)
    avg_loss = train_one_epoch(epoch, writer)

    mean_squared_error = MeanSquaredError()
    mean_absolute_error = MeanAbsoluteError()
    r2Score = R2Score()
    model.train(False)

    with torch.no_grad():
        for entradas, saidas in validation_loader:
            voutputs = model(entradas).flatten()
            mean_squared_error(voutputs,saidas)
            mean_absolute_error(voutputs,saidas)
            r2Score(voutputs,saidas)

    errorMedio = mean_squared_error.compute()
    errorAbsolute =mean_absolute_error.compute()
    r2 = r2Score.compute()

: 