In [1]:
import torch
import torch.nn as nn

In [17]:
class Model(nn.Module):
    def __init__(self, n_input_features):
        super(Model, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
    
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

model = Model(n_input_features=6)
PATH = 'model.pth'

## Method 1: Save all model

In [18]:
# Method 1 
torch.save(model, PATH)

  "type " + obj.__name__ + ". It won't be checked "


In [19]:
model = torch.load(PATH)
model.eval()

for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.1381,  0.0810,  0.3229, -0.2405, -0.3364, -0.3802]],
       requires_grad=True)
Parameter containing:
tensor([0.2255], requires_grad=True)


---
## Method 2: Save State Dict (Preferred)

In [20]:
# Method 2
torch.save(model.state_dict(), PATH)

In [21]:
# model must be created again with parameters
loaded_model = Model(n_input_features=6)
loaded_model.load_state_dict(torch.load(PATH))
loaded_model.eval()

for param in loaded_model.parameters():
    print(param)

Parameter containing:
tensor([[-0.1381,  0.0810,  0.3229, -0.2405, -0.3364, -0.3802]],
       requires_grad=True)
Parameter containing:
tensor([0.2255], requires_grad=True)


In [23]:
print(loaded_model.state_dict())

OrderedDict([('linear.weight', tensor([[-0.1381,  0.0810,  0.3229, -0.2405, -0.3364, -0.3802]])), ('linear.bias', tensor([0.2255]))])


---
## Saving Checkpoints

In [30]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [5000995696, 5000998256]}]}


In [31]:
checkpoint = {
    "epoch": 90,
    "model_state": model.state_dict(),
    "optim_state": optimizer.state_dict()
}

torch.save(checkpoint, 'checkpoint.pth')

In [32]:
# Load back the checkpoing
loaded_checkpoint = torch.load("checkpoint.pth")
epoch = loaded_checkpoint['epoch']

# Initialize a model and an optimizer
model = Model(n_input_features=6)
optimizer = torch.optim.SGD(model.parameters(), lr=0)

# Load the checkpoint state dict into the model and the optimizer
model.load_state_dict(checkpoint['model_state'])
optimizer.load_state_dict(checkpoint['optim_state'])

print(optimizer.state_dict())
print('')
print(model.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [5004515600, 5000962128]}]}

OrderedDict([('linear.weight', tensor([[-0.1381,  0.0810,  0.3229, -0.2405, -0.3364, -0.3802]])), ('linear.bias', tensor([0.2255]))])


---
## Saving and Loading on CPU / GPU

### Case1: Save on GPU, Load on CPU

In [None]:
device = torch.device('cuda')
model.to(device)
torch.save(model.state_dict(), PATH)

device = torch.device('cpu')
model = Model(n_input_features=6)
model.load_state_dict(torch.load(PATH, map_location=device))
# map_location: the saved model is originally on GPU. Now mapped to cpu.

---
### Case2 : Save on GPU, Load on GPU

In [None]:
device = torch.device('cuda')
model.to(device)
torch.save(model.state_dict(), PATH)

model = Model(n_input_features=6)
model.load_state_dict(torch.load(PATH))
model.to(device)
# No map_location is needed here since the model is saved and loaded on the same location.

---
### Case3 : Save on CPU, Load on GPU

In [None]:
torch.save(model.state_dict(), PATH)

device = torch.device('cuda')
model = Model(n_input_features=6)
model.load_state_dict(torch.load(PATH, map_location="cuda:0"))
# Choose whatever GPU number you want
model.to(device)