In [1]:
import torch
import torch.nn as nn

### Methods required
- torch.save(arg, PATH) -> Used python **pickle** module to serialize objects. Result is a **non-human readable format**. Bound with specific classes and exact directory structure
_____
- Recommended way

  torch.save(model.save_dict(), PATH)
  
  and then

  model.load_state_dict(torch.load(PATH))
_____
- torch.load(PATH)

- model.load_state_dict(arg)


In [2]:
class Model(nn.Module):
    def __init__(self, in_features):
        super(Model, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(in_features=in_features, out_features=1),
            nn.Softmax()
        )
    def forward(self, x):
        return self.model(x)

In [3]:
model = Model(in_features=6)

### Saving with lazy method(not recommended)

In [4]:
FILE = 'model.pth'
torch.save(model, FILE)

# Deleting pre-existing model
del model
# Loading
model = torch.load(FILE)
model.eval()

for param in model.parameters():
    print(param)

Parameter containing:
tensor([[ 0.3394,  0.0893,  0.2471, -0.3994,  0.3753, -0.3302]],
       requires_grad=True)
Parameter containing:
tensor([-0.3458], requires_grad=True)


  model = torch.load(FILE)


# Saving with preferred method

1st tensor corresponds to weight and the 2nd one to bias

In [5]:
model = Model(in_features=6)
print(model.parameters())
print(model.state_dict())

<generator object Module.parameters at 0x7b51b11b9540>
OrderedDict([('model.0.weight', tensor([[-0.0355, -0.2615,  0.3980,  0.3904, -0.3145,  0.0522]])), ('model.0.bias', tensor([-0.0696]))])


In [6]:
model = Model(in_features=6)
for param in model.parameters():
    print(param)

FILE = 'model_preferred.pth'
torch.save(model.state_dict(), FILE)

# Delete pre-existing model
del model
print("*"*50)
# Loading model from file
loaded_model = Model(in_features=6)
loaded_model.load_state_dict(torch.load(FILE))

loaded_model.eval()
for param in loaded_model.parameters():
    print(param)

Parameter containing:
tensor([[-0.3348,  0.2722,  0.2217, -0.1971, -0.3081,  0.0881]],
       requires_grad=True)
Parameter containing:
tensor([-0.1775], requires_grad=True)
**************************************************
Parameter containing:
tensor([[-0.3348,  0.2722,  0.2217, -0.1971, -0.3081,  0.0881]],
       requires_grad=True)
Parameter containing:
tensor([-0.1775], requires_grad=True)


  loaded_model.load_state_dict(torch.load(FILE))


### Optimizer also has a **state_dict**

In [7]:
model = Model(in_features=6)

lr = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'fused': None, 'params': [0, 1]}]}


### Creating custom checkpoints, for saving Models/ Checkpoints

In [8]:
model = Model(in_features=6)
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

checkpoint = {
    'epoch':90,
    'model_state':model.state_dict(),
    'optim_state':optimizer.state_dict()
}

torch.save(checkpoint, "checkpoint.pth")
del model, optimizer

loaded_checkpoint = torch.load('checkpoint.pth')
epoch = loaded_checkpoint['epoch']

# Initializing model
model = Model(in_features=6)
# Initializing lr =0
optimizer = torch.optim.SGD(model.parameters(), lr=0)

model.load_state_dict(state_dict=checkpoint['model_state'])
optimizer.load_state_dict(state_dict=checkpoint['optim_state'])
# lr is set to earlier i.e 0.01
print(optimizer.state_dict())

{'state': {}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'fused': None, 'params': [0, 1]}]}


  loaded_checkpoint = torch.load('checkpoint.pth')


# If model training and model train are done on different devices
1.  If training done on GPU and loading in CPU

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = Model(in_features=6)
model.to(device)

PATH = 'device_trained_model.pth'
torch.save(model.state_dict(), PATH)

device = torch.device('cpu')
model = Model(in_features=6)
model.load_state_dict(torch.load(PATH, map_location=device))

  model.load_state_dict(torch.load(PATH, map_location=device))


<All keys matched successfully>

2. If both saving and loading to be done on GPU

In [10]:
model.load_state_dict(torch.load(PATH))
model.to(device)

  model.load_state_dict(torch.load(PATH))


Model(
  (model): Sequential(
    (0): Linear(in_features=6, out_features=1, bias=True)
    (1): Softmax(dim=None)
  )
)

3. If saving done on CPU and loading to be done on GPU

In [11]:
if torch.cuda.is_available():
    model.load_state_dict(torch.load(PATH, map_location="cuda:0"))
    model.to(device)