In [5]:
# Create a GNN dataset with k-NNGraphs with Torch Geometric
from dataset import *
import matplotlib.pyplot as plt
import torch

''' I HAVE TO NORMALIZE THE LABELS TO 1 USING THE MAX AREA Y = PI*10^2'''

' I HAVE TO NORMALIZE THE LABELS TO 1 USING THE MAX AREA Y = PI*10^2'

In [6]:
# Dataset parameters
n_samples  = 50000

# Load the full dataset
dataset = test(root='Data')

# Split into train and validation set
dataset = dataset.shuffle()

train_size = round(0.8*len(dataset))
val_size  = n_samples - train_size

train_dataset = dataset[:train_size]
val_dataset = dataset[val_size:]

In [7]:
# Getting some insights about the first graph
print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')

# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')
print(data.y)



Dataset: test(50000):
Number of graphs: 50000
Number of features: 2
Number of classes: 49972

Data(x=[318, 2], edge_index=[2, 3180], y=[1])
Number of nodes: 318
Number of edges: 3180
Average node degree: 10.00
Has isolated nodes: False
Has self-loops: True
Is undirected: False
tensor([191.8085])


In [8]:
from torch_geometric.loader import DataLoader
batch_size= 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle = True) 

In [9]:
625*64

40000

In [10]:
train_loader.batch_size
next(iter(train_loader)).x

tensor([[9.9500e-04, 9.9833e-05],
        [1.1751e-02, 1.1790e-03],
        [2.2506e-02, 2.2582e-03],
        ...,
        [8.9474e+00, 5.2632e+00],
        [9.1228e+00, 5.2632e+00],
        [9.2982e+00, 5.2632e+00]])

In [11]:
len(train_dataset)/64

625.0

In [12]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin   = Linear(hidden_channels, 1)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        # 1. Obtain node embeddings 
        x = self.conv1(x.float(), edge_index)
        x = x.relu()
        x = self.conv2(x.float(), edge_index)
        x = x.relu()
        x = self.conv3(x.float(), edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.1, training=self.training)
        x = self.lin(x)
        
        return x
    
    def loss(self, pred, score):
        # Start with MAE loss function, then switch to MSE when the error falls below delta
        return F.huber_loss(pred, score, reduction = 'mean', delta = 0.1) # ATTENTION delta dépend des labels 0.1 --> rmax =1 


model = GCN(hidden_channels=64)
print(model)

GCN(
  (conv1): GCNConv(2, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=1, bias=True)
)


In [13]:
class EarlyStopping():
    """
    Early stopping to stop the training when the loss does not improve after
    certain epochs.
    """
    def __init__(self, patience=5, min_delta=0):
        """
        :param patience: how many epochs to wait before stopping when loss is
               not improving
        :param min_delta: minimum difference between new loss and old loss for
               new loss to be considered as an improvement
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
    def __call__(self, val_loss):
        if self.best_loss == None:
            self.best_loss = val_loss
        elif self.best_loss - val_loss > self.min_delta:
            self.best_loss = val_loss
            # reset counter if validation loss improves
            self.counter = 0
        elif self.best_loss - val_loss < self.min_delta:
            self.counter += 1
            print(f"INFO: Early stopping counter {self.counter} of {self.patience}")
            if self.counter >= self.patience:
                print('INFO: Early stopping')
                self.early_stop = True

In [14]:
from torch.utils.tensorboard import SummaryWriter

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(hidden_channels=64).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, eps = 1e-7)
reduce_lr = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer = optimizer, factor=0.5, patience=2, min_lr=1e-7, verbose = True)
early_stopping = EarlyStopping(patience=4)
criterion = model.loss


In [15]:
def fit(model, train_loader, optimizer, criterion):
    print('Training')
    model.train()
    train_running_loss = 0.0
    train_running_correct = 0.
    train_running_mae = 0.
    train_running_mse = 0.
    counter = 0
    prog_bar = tqdm(enumerate(train_loader), total = len(train_loader))
    for data in prog_bar:  # Iterate in batches over the training dataset.
        counter += 1
        data = data[1].to(device)
        target = data.y.float()/(np.pi*10**2) # Normalize labels to 1
        # zero the parameter gradients
        model.zero_grad()
        preds = model(data)[:,0]  # Perform a single forward pass.
        loss = criterion(preds, target)
        # update metrics 
        train_running_loss += loss.item()
        train_running_mse += F.mse_loss(preds,target)
        train_running_mae += F.l1_loss(preds, target)

        loss.backward()
        # Update parameters based on gradients.
        optimizer.step() 

    # compute the mean metrics over the all batch
    train_loss = train_running_loss / counter
    train_mse = train_running_mse/counter
    train_mae = train_running_mae/counter

    return train_loss, train_mse, train_mae

In [16]:
def validate(model, val_dataloader, criterion):
    print('Validating')
    model.eval()
    val_running_loss = 0.
    val_running_correct = 0.
    val_running_mae = 0.
    val_running_mse = 0.
    counter = 0
    prog_bar = tqdm(enumerate(val_dataloader), total=len(val_dataloader))
    with torch.no_grad():
        for i, data in prog_bar:
            counter += 1
            data = data.to(device)
            target = data.y.float()/(np.pi*10**2) # Normalize labels to 1
            preds = model(data)[:,0]
            loss = criterion(preds, target)
            # update metrics
            val_running_loss += loss.item()
            val_running_mse += F.mse_loss(preds,target)
            val_running_mae += F.l1_loss(preds, target)
    
    # Compute the mean of metrics over the all batch
    val_loss = val_running_loss / counter
    val_mse = val_running_mse/counter
    val_mae = val_running_mae/counter

    return val_loss, val_mse, val_mae

In [17]:
def save_checkpoint(epoch, loss, save_path):
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, 'History/' + save_path + '.pt')

def load_checkpoint(model, optimizer, load_path):
    checkpoint = torch.load(load_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    
    return model, optimizer, epoch

def save_history(epochs, optimizer_param, loss, timestamp):
    torch.save({
            'epoch': epochs,
            'optimizer_state_dict': optimizer_param,
            # we don't save model weights and biases every epochs to save memory 
            'Loss': loss,
            }, 'History/history + ' + timestamp + '.pt')

In [18]:
# Training loop
import time
start = time.time()
fname = 'checkpoint' + time.strftime("%Y%m%d-%H%M%S") 
writer = SummaryWriter()

epochs = 10

loss = {'Hubert_train':[], 'Hubert_val': [],
        'MAE_train':   [], 'MAE_val':    [],
        'MSE_train':   [], 'MSE_val':    []}
optimizer_state_dicts = []
epochs_nb = []

for epoch in range(epochs):
    print(f"Epoch {epoch+1} of {epochs}")
    train_epoch_loss, train_epoch_mse, train_epoch_mae = fit(
        model, train_loader, optimizer, criterion
    )
    val_epoch_loss, val_epoch_mse, val_epoch_mae = validate(
        model, val_loader, criterion
    )
    # Keep track of Hubert loss 
    writer.add_scalars(main_tag="Huber_Loss", 
                       tag_scalar_dict={"train_loss": train_epoch_loss,
                                            "test_loss": val_epoch_loss},
                       global_step=epoch)
    loss['Hubert_train'].append(train_epoch_loss)
    loss['Hubert_val'].append(val_epoch_loss)

    # Keep track of MSE loss
    writer.add_scalars(main_tag="MSE", 
                       tag_scalar_dict={"train_MSE": train_epoch_mse,
                                            "test_MSE": val_epoch_mse},
                      global_step=epoch) 
    loss['MSE_train'].append(train_epoch_mse)
    loss['MSE_val'].append(val_epoch_mse)

    # Keep track of MAE Loss 
    writer.add_scalars(main_tag="MAE", 
                       tag_scalar_dict={"train_MAE": train_epoch_mae,
                                            "test_MAE": val_epoch_mae},
                      global_step=epoch)        
    loss['MAE_train'].append(train_epoch_mse)
    loss['MAE_val'].append(val_epoch_mse)

    # update EarlyStopping class
    early_stopping(val_epoch_loss)
    if early_stopping.counter == 0:
    # save a checkpoint if the validation loss has improved for this epoch
        save_checkpoint(epoch, val_epoch_loss, fname)
    
    # save an history
    optimizer_state_dicts.append(optimizer.state_dict)
    epochs_nb.append(epoch)
    save_history(epochs_nb, optimizer_state_dicts, loss, timestamp)
    
    print(f"Train Loss: {train_epoch_loss:.4f}, Train MAE: {train_epoch_mae:.2f}")
    print(f'Validation Loss: {val_epoch_loss:.4f}, Validation MAE: {val_epoch_mae:.2f}')
    if early_stopping.early_stop:
        break
end = time.time()
print(f"Training time: {(end-start)/60:.3f} minutes")
writer.flush()

Epoch 1 of 10
Training


100%|██████████| 625/625 [05:44<00:00,  1.81it/s]


Validating


100%|██████████| 625/625 [02:35<00:00,  4.02it/s]


Train Loss: 0.0059, Train MAE: 0.09
Validation Loss: 0.0045, Validation MAE: 0.08
Epoch 2 of 10
Training


100%|██████████| 625/625 [05:47<00:00,  1.80it/s]


Validating


100%|██████████| 625/625 [02:36<00:00,  4.00it/s]


Train Loss: 0.0040, Train MAE: 0.07
Validation Loss: 0.0034, Validation MAE: 0.06
Epoch 3 of 10
Training


100%|██████████| 625/625 [06:04<00:00,  1.71it/s]


Validating


100%|██████████| 625/625 [02:32<00:00,  4.09it/s]


Train Loss: 0.0035, Train MAE: 0.06
Validation Loss: 0.0031, Validation MAE: 0.06
Epoch 4 of 10
Training


100%|██████████| 625/625 [05:59<00:00,  1.74it/s]


Validating


100%|██████████| 625/625 [02:40<00:00,  3.90it/s]


Train Loss: 0.0032, Train MAE: 0.06
Validation Loss: 0.0029, Validation MAE: 0.05
Epoch 5 of 10
Training


100%|██████████| 625/625 [05:46<00:00,  1.80it/s]


Validating


100%|██████████| 625/625 [02:30<00:00,  4.14it/s]


INFO: Early stopping counter 1 of 20
Train Loss: 0.0031, Train MAE: 0.06
Validation Loss: 0.0031, Validation MAE: 0.05
Epoch 6 of 10
Training


100%|██████████| 625/625 [05:50<00:00,  1.78it/s]


Validating


100%|██████████| 625/625 [02:30<00:00,  4.14it/s]


Train Loss: 0.0029, Train MAE: 0.05
Validation Loss: 0.0028, Validation MAE: 0.05
Epoch 7 of 10
Training


100%|██████████| 625/625 [05:48<00:00,  1.79it/s]


Validating


100%|██████████| 625/625 [02:31<00:00,  4.13it/s]


Train Loss: 0.0028, Train MAE: 0.05
Validation Loss: 0.0026, Validation MAE: 0.05
Epoch 8 of 10
Training


100%|██████████| 625/625 [05:43<00:00,  1.82it/s]


Validating


100%|██████████| 625/625 [02:29<00:00,  4.17it/s]


INFO: Early stopping counter 1 of 20
Train Loss: 0.0027, Train MAE: 0.05
Validation Loss: 0.0028, Validation MAE: 0.05
Epoch 9 of 10
Training


100%|██████████| 625/625 [05:44<00:00,  1.82it/s]


Validating


100%|██████████| 625/625 [02:30<00:00,  4.16it/s]


Train Loss: 0.0027, Train MAE: 0.05
Validation Loss: 0.0025, Validation MAE: 0.05
Epoch 10 of 10
Training


100%|██████████| 625/625 [05:44<00:00,  1.81it/s]


Validating


100%|██████████| 625/625 [02:29<00:00,  4.19it/s]

Train Loss: 0.0026, Train MAE: 0.05
Validation Loss: 0.0023, Validation MAE: 0.05
Training time: 83.695 minutes





In [19]:
'''from IPython.display import Javascript
import torch.nn.functional as F

model.train()

for epoch in tqdm(range(train_size)):
    i = 0
    loss_ = []
    for data in train_loader:  # Iterate in batches over the training dataset.
        data = data.to(device)
        prediction = model(data)  # Perform a single forward pass.
        label = data.y.to(device)
        loss = model.loss(prediction[:,0], data.y.float())
        model.zero_grad()
        loss.backward()
        optimizer.step()  # Update parameters based on gradients.
        i+= 1 
        loss_.append(loss.item())
        

For 1000 epochs of 1000 samples, MSE, batch_size = 128 >> rmse = 1.7 
'''

'from IPython.display import Javascript\nimport torch.nn.functional as F\n\nmodel.train()\n\nfor epoch in tqdm(range(train_size)):\n    i = 0\n    loss_ = []\n    for data in train_loader:  # Iterate in batches over the training dataset.\n        data = data.to(device)\n        prediction = model(data)  # Perform a single forward pass.\n        label = data.y.to(device)\n        loss = model.loss(prediction[:,0], data.y.float())\n        model.zero_grad()\n        loss.backward()\n        optimizer.step()  # Update parameters based on gradients.\n        i+= 1 \n        loss_.append(loss.item())\n        \n\nFor 1000 epochs of 1000 samples, MSE, batch_size = 128 >> rmse = 1.7 \n'

In [20]:
''' Avoiding overfitting implementing 
- change to MAE or Huber #done
- add learning rate and epsilon to optimizer # done
- test on test set every epoch # done
- early stopping : if after 20 epochs, the loss doesn't improve anymore, the training stop #done
- variable learning rate : if after 5 epochs, the loss doesn't improve anymore : multiply it by 0.1 --> ReduceLRonPlateau # done
- checkpoints (only the best) : save the weights, the bias, epochs, lr, ...  --> in a file #done

- normalize to 1 
- metrics : loss MAE, loss MSE and loss Huber # done
- history : loss of train and test each epochs + metrics --> save  #done
'''

" Avoiding overfitting implementing \n- change to MAE or Huber #done\n- add learning rate and epsilon to optimizer # done\n- test on test set every epoch # done\n- early stopping : if after 20 epochs, the loss doesn't improve anymore, the training stop #done\n- variable learning rate : if after 5 epochs, the loss doesn't improve anymore : multiply it by 0.1 --> ReduceLRonPlateau # done\n- checkpoints (only the best) : save the weights, the bias, epochs, lr, ...  --> in a file #done\n\n- normalize to 1 \n- metrics : loss MAE, loss MSE and loss Huber # done\n- history : loss of train and test each epochs + metrics --> save  #done\n"

In [4]:
!tensorboard --logdir='runs'

TensorFlow installation not found - running with reduced feature set.
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.10.1 at http://localhost:6006/ (Press CTRL+C to quit)
^C


In [67]:
optimizer.state_dict()['state'][7]

{'step': tensor(6250.),
 'exp_avg': tensor([0.0011]),
 'exp_avg_sq': tensor([0.0002])}

In [75]:
optimizer.state_dict()['param_groups'][0]['lr']

0.001

In [76]:
import torch.nn as nn
import torch.optim as optim

In [77]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
print(net)

Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [79]:
import torch.optim as optim
opt = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [90]:
opt.step()
print(opt)

SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
