# Importación de librerías

In [74]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset
from torch.utils.data import random_split
import torch.nn.functional as F
import torch.nn as nn
from torchmetrics import MeanSquaredError, MeanAbsoluteError, R2Score

from torch.utils.tensorboard import SummaryWriter

import warnings

warnings.filterwarnings("ignore")

# Visualizamos el contenido del dataset

In [75]:
airfoilDataset = pd.read_csv("airfoil_self_noise.dat", sep="\t", names=['Frequency', 'Angle of attack', 'Chord length', 'Free-stream velocity', 'Suction side displacement thickness', 'Pressure level'])
airfoilDataset

Unnamed: 0,Frequency,Angle of attack,Chord length,Free-stream velocity,Suction side displacement thickness,Pressure level
0,800,0.0,0.3048,71.3,0.002663,126.201
1,1000,0.0,0.3048,71.3,0.002663,125.201
2,1250,0.0,0.3048,71.3,0.002663,125.951
3,1600,0.0,0.3048,71.3,0.002663,127.591
4,2000,0.0,0.3048,71.3,0.002663,127.461
...,...,...,...,...,...,...
1498,2500,15.6,0.1016,39.6,0.052849,110.264
1499,3150,15.6,0.1016,39.6,0.052849,109.254
1500,4000,15.6,0.1016,39.6,0.052849,106.604
1501,5000,15.6,0.1016,39.6,0.052849,106.224


# StandardScaler

In [76]:
class StandardScaler:

    def __init__(self, mean=None, std=None, epsilon=1e-7):
        """Standard Scaler.
        The class can be used to normalize PyTorch Tensors using native functions. The module does not expect the
        tensors to be of any specific shape; as long as the features are the last dimension in the tensor, the module
        will work fine.
        :param mean: The mean of the features. The property will be set after a call to fit.
        :param std: The standard deviation of the features. The property will be set after a call to fit.
        :param epsilon: Used to avoid a Division-By-Zero exception.
        """
        self.mean = mean
        self.std = std
        self.epsilon = epsilon

    def fit(self, values):
        dims = list(range(values.dim() - 1))
        self.mean = torch.mean(values, dim=dims)
        self.std = torch.std(values, dim=dims)

    def transform(self, values):
        return (values - self.mean) / (self.std + self.epsilon)

    def fit_transform(self, values):
        self.fit(values)
        return self.transform(values)

    def __repr__(self):
        return f"mean: {self.mean}, std:{self.std}, epsilon:{self.epsilon}"

# Dataset y Dataloader

In [77]:
class AirfoilDataset(Dataset):
  def __init__(self, src_file, root_dir, transform=None):
    airfoilDataset = pd.read_csv("airfoil_self_noise.dat", sep="\t", names=['Frequency', 'Angle of attack', 'Chord length', 'Free-stream velocity', 'Suction side displacement thickness', 'Pressure level'])
    X = airfoilDataset.loc[:, ~airfoilDataset.columns.isin(['Pressure level'])]
    Y = airfoilDataset[["Pressure level"]]

    x1=X.iloc[:,0:5].values
    x_tensor = torch.tensor(x1)

    y_tensor = torch.tensor(Y.values).type(torch.float32)

    scaler = StandardScaler()
    
    XScalada = scaler.fit_transform(x_tensor).type(torch.float32)

    self.data = torch.cat((XScalada,y_tensor),1)
    self.root_dir = root_dir
    self.transform = transform

  def __len__(self):
    return len(self.data)

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()

    preds = self.data[idx, 0:5]
    spcs = self.data[idx, 5]
    sample = (preds, spcs)
    
    if self.transform:
      sample = self.transform(sample)
    return sample

In [78]:
airfoilDataset = AirfoilDataset("airfoil_self_noise.dat",".")
display(airfoilDataset[0])

(tensor([-0.6618, -1.1460,  1.7987,  1.3125, -0.6446]), tensor(126.2010))

# División en train y test

In [79]:
lonxitudeDataset = len(airfoilDataset)

tamTrain =int(lonxitudeDataset*0.8)
tamVal = lonxitudeDataset - tamTrain

print(f"Tam dataset: {lonxitudeDataset} train: {tamTrain} tamVal: {tamVal}")
train_set, val_set = random_split(airfoilDataset,[tamTrain,tamVal])
train_ldr = torch.utils.data.DataLoader(train_set, batch_size=2,
    shuffle=True, drop_last=False)
validation_loader =torch.utils.data.DataLoader(val_set, batch_size=4, shuffle=False, drop_last=True)

Tam dataset: 1503 train: 1202 tamVal: 301


# Creación do modelo

In [80]:
class Model(nn.Module):
    def __init__(self, entradas):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(entradas, 100)
        self.layer2 = nn.Linear(100, 50)
        self.layer3 = nn.Linear(in_features=50, out_features=1)
        
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.relu(self.layer3(x))
        return x

# Instanciación del modelo

In [81]:
model     = Model(5)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn   = nn.MSELoss(reduction='sum')
display(model)

Model(
  (layer1): Linear(in_features=5, out_features=100, bias=True)
  (layer2): Linear(in_features=100, out_features=50, bias=True)
  (layer3): Linear(in_features=50, out_features=1, bias=True)
)

In [82]:
entradaProba,dest = next(iter(train_ldr))

print("Entrada:")
display(entradaProba)

print("Desexada:")
display(dest)

saida = model(entradaProba) # esta é a proba de verdade
print("Saída:")
display(saida)

loss_fn(saida, dest)

Entrada:


tensor([[ 1.6220, -1.1460,  0.1695, -1.2304, -0.6879],
        [-0.5984,  0.3578, -0.3736, -0.7231,  0.1004]])

Desexada:


tensor([117.9570, 129.5800])

Saída:


tensor([[0.0146],
        [0.0000]], grad_fn=<ReluBackward0>)

tensor(61402.4414, grad_fn=<MseLossBackward0>)

# Función de entrenamiento

In [83]:
def train_one_epoch(epoch_index, tb_writer):
    running_loss = 0.
    for i, data in enumerate(train_ldr):
        # Every data instance is an input + label pair
        inputs, labels = data

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()

    return running_loss / len(train_ldr)

In [84]:
EPOCHS = 200
writer = None
tb = SummaryWriter()
for epoch in range(EPOCHS):
    model.train(True)
    avg_loss = train_one_epoch(epoch, tb)

    mean_squared_error = MeanSquaredError()
    mean_absolute_error = MeanAbsoluteError()
    r2Score = R2Score()
    model.train(False)

    with torch.no_grad():
        for entradas, saidas in validation_loader:
            voutputs = model(entradas).flatten()
            mean_squared_error(voutputs,saidas)
            mean_absolute_error(voutputs,saidas)
            r2Score(voutputs,saidas)

    errorMedio = mean_squared_error.compute()
    errorAbsolute =mean_absolute_error.compute()
    r2 = r2Score.compute()

    tb.add_scalar('Average loss airfoil', avg_loss, epoch)
    tb.add_scalar('Mean squared error airfoil', errorMedio, epoch)
    tb.add_scalar('Mean absolute error airfoil', errorAbsolute, epoch)
    tb.add_scalar('R2Score airfoil', r2, epoch)
    