define model in separate file, model_fc.py in our case
import torch
from torch import nn
import torch.nn.functional as F
class Network(nn.Module):
def __init__(self, input_size, output_size, hidden_layers, drop_p=0.5):
''' Builds a feedforward network with arbitrary hidden layers.
Arguments
---------
input_size: integer, size of the input layer
output_size: integer, size of the output layer
hidden_layers: list of integers, the sizes of the hidden layers
'''
super().__init__()
# Input to a hidden layer
self.hidden_layers = nn.ModuleList([nn.Linear(input_size, hidden_layers[0])])
# Add a variable number of more hidden layers
layer_sizes = zip(hidden_layers[:-1], hidden_layers[1:])
self.hidden_layers.extend([nn.Linear(h1, h2) for h1, h2 in layer_sizes])
self.output = nn.Linear(hidden_layers[-1], output_size)
self.dropout = nn.Dropout(p=drop_p)
def forward(self, x):
''' Forward pass through the network, returns the output logits '''
for each in self.hidden_layers:
x = F.relu(each(x))
x = self.dropout(x)
x = self.output(x)
return F.log_softmax(x, dim=1)
def validation(model, testloader, criterion):
accuracy = 0
test_loss = 0
for images, labels in testloader:
images = images.resize_(images.size()[0], 784)
output = model.forward(images)
test_loss += criterion(output, labels).item()
## Calculating the accuracy
# Model's output is log-softmax, take exponential to get the probabilities
ps = torch.exp(output)
# Class with highest probability is our predicted class, compare with true label
equality = (labels.data == ps.max(1)[1])
# Accuracy is number of correct predictions divided by all predictions, just take the mean
accuracy += equality.type_as(torch.FloatTensor()).mean()
return test_loss, accuracy
def train(model, trainloader, testloader, criterion, optimizer, epochs=5, print_every=40):
steps = 0
running_loss = 0
for e in range(epochs):
# Model in training mode, dropout is on
model.train()
for images, labels in trainloader:
steps += 1
# Flatten images into a 784 long vector
images.resize_(images.size()[0], 784)
optimizer.zero_grad()
output = model.forward(images)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if steps % print_every == 0:
# Model in inference mode, dropout is off
model.eval()
# Turn off gradients for validation, will speed up inference
with torch.no_grad():
test_loss, accuracy = validation(model, testloader, criterion)
print("Epoch: {}/{}.. ".format(e+1, epochs),
"Training Loss: {:.3f}.. ".format(running_loss/print_every),
"Test Loss: {:.3f}.. ".format(test_loss/len(testloader)),
"Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
running_loss = 0
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import helper
import model_fc
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])])
# Download and load the training data
trainset = datasets.FashionMNIST('F_MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
# Download and load the test data
testset = datasets.FashionMNIST('F_MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)
# Create the network, define the criterion and optimizer
model = model_fc.Network(784, 10, [512, 256, 128])
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
model_fc.train(model, trainloader, testloader, criterion, optimizer, epochs=2)
'''
The simplest thing to do is simply save the state dict with torch.save.
For example, we can save it to a file 'checkpoint.pth'.
torch.save(model.state_dict(), 'checkpoint.pth')
Then we can load the state dict with torch.load.
state_dict = torch.load('checkpoint.pth')
print(state_dict.keys())
And to load the state dict in to the network, you do model.load_state_dict(state_dict).
model.load_state_dict(state_dict)
Seems pretty straightforward, but as usual it's a bit more complicated.
Loading the state dict works only if the model architecture is exactly the
same as the checkpoint architecture. If I create a model with a different
architecture, this fails.
'''
checkpoint = {'input_size': 784,
'output_size': 10,
'hidden_layers': [each.out_features for each in model.hidden_layers],
'state_dict': model.state_dict()}
torch.save(checkpoint, 'checkpoint.pth')
def load_checkpoint(filepath):
checkpoint = torch.load(filepath)
model = fc_model.Network(checkpoint['input_size'],
checkpoint['output_size'],
checkpoint['hidden_layers'])
model.load_state_dict(checkpoint['state_dict'])
return model
model = load_checkpoint('checkpoint.pth')
print(model)
Save:
torch.save(model.state_dict(), PATH)
Load:
model = TheModelClass(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.eval()
When saving a model for inference, it is only necessary to save the trained model’s learned parameters. Saving the model’s state_dict with the torch.save() function will give you the most flexibility for restoring the model later, which is why it is the recommended method for saving models.
A common PyTorch convention is to save models using either a .pt or .pth file extension.
Remember that you must call model.eval() to set dropout and batch normalization layers to evaluation mode before running inference. Failing to do this will yield inconsistent inference results.
Save:
torch.save(model, PATH)
Load:
# Model class must be defined somewhere
model = torch.load(PATH)
model.eval()
##Saving & Loading a General Checkpoint for Inference and/or Resuming Training Save:
torch.save({ 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss, ... }, PATH)
Load:
model = TheModelClass(*args, **kwargs) optimizer = TheOptimizerClass(*args, **kwargs)
checkpoint = torch.load(PATH) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] loss = checkpoint['loss']
model.eval()
model.train()