Building and Training Convolutional Neural Networks (CNNs) with Pytorch¶
This lecture includes:
Build CNNs
Train MNIST with CNNs
Train CIFAR10 with CNNs
Improve the test accuracy
Normalize the data
Weight decay
learning rate schedule
1. Build CNNs¶
Convolutional Layer¶
import torch
import torch.nn as nn
#stride default value: 1
#padding default vaule: 0
conv1 = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=0)
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
<ipython-input-1-ae2ea15b2311> in <module>
----> 1 import torch
2 import torch.nn as nn
3
4 #stride default value: 1
5 #padding default vaule: 0
ModuleNotFoundError: No module named 'torch'
class model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 1, 3)
self.conv2 = nn.Conv2d(1, 2, 3)
self.conv3 = nn.Conv2d(3, 2, 3)
my_model=model()
print(my_model.conv1.weight.size()) # (out_channels, in_channels, kernel_size, kernel_size)
print(my_model.conv2.weight.size()) # (out_channels, in_channels, kernel_size, kernel_size)
print(my_model.conv3.weight.size()) # (out_channels, in_channels, kernel_size, kernel_size)
torch.Size([1, 1, 3, 3])
torch.Size([2, 1, 3, 3])
torch.Size([2, 3, 3, 3])
x = torch.randn(1, 1, 4, 4) # batch_size=1, channel =1, image size = 4 * 4
print(x)
print(my_model(x))
Pooling¶
import torch.nn.functional as F
out = F.max_pool2d(input, kernel_size)
out = F.avg_pool2d(input, kernel_size)
x = torch.tensor([[[1,3,2,1],[1,3,2,1],[2,1,1,1],[3,5,1,1]]],dtype=float)
print(x)
max_x = F.max_pool2d(x,2)
print(max_x)
avg_x = F.avg_pool2d(x,2)
print(avg_x)
tensor([[[1., 3., 2., 1.],
[1., 3., 2., 1.],
[2., 1., 1., 1.],
[3., 5., 1., 1.]]], dtype=torch.float64)
tensor([[[3., 2.],
[5., 1.]]], dtype=torch.float64)
tensor([[[2.0000, 1.5000],
[2.7500, 1.0000]]], dtype=torch.float64)
2. Train MNIST with CNNs¶
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.nn.functional as F
use_cuda = torch.cuda.is_available()
print('Use GPU?', use_cuda)
# Define a LeNet-5
# Note that we need to reshape MNIST imgaes 28*28 to 32*32
class model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
out = F.relu(self.conv1(x))
out = F.avg_pool2d(out, 2)
out = F.relu(self.conv2(out))
out = F.avg_pool2d(out, 2)
# out.size() = [batch_size, channels, size, size], -1 here means channels*size*size
# out.view(out.size(0), -1) is similar to out.reshape(out.size(0), -1), but more efficient
# Think about why we need to reshape the out?
out = out.view(out.size(0), -1)
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
out = self.fc3(out)
return out
minibatch_size = 128
num_epochs = 2
lr = 0.1
# Step 1: Define a model
my_model =model()
if use_cuda:
my_model = my_model.cuda()
# Step 2: Define a loss function and training algorithm
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(my_model.parameters(), lr=lr)
# Step 3: load dataset
MNIST_transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
torchvision.transforms.ToTensor()])
trainset = torchvision.datasets.MNIST(root='./data', train= True, download=True, transform=MNIST_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=minibatch_size)
testset = torchvision.datasets.MNIST(root='./data', train= False, download=True, transform=MNIST_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=len(testset))
#Step 4: Train the NNs
# One epoch is when an entire dataset is passed through the neural network only once.
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(trainloader):
if use_cuda:
images = images.cuda()
labels = labels.cuda()
# Forward pass to get the loss
outputs = my_model(images)
loss = criterion(outputs, labels)
# Backward and compute the gradient
optimizer.zero_grad()
loss.backward() #backpropragation
optimizer.step() #update the weights/parameters
# Training accuracy
correct = 0
total = 0
for i, (images, labels) in enumerate(trainloader):
if use_cuda:
images = images.cuda()
labels = labels.cuda()
outputs = my_model(images)
p_max, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
training_accuracy = float(correct)/total
# Test accuracy
correct = 0
total = 0
for i, (images, labels) in enumerate(testloader):
if use_cuda:
images = images.cuda()
labels = labels.cuda()
outputs = my_model(images)
p_max, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
test_accuracy = float(correct)/total
print('Epoch: {}, the training accuracy: {}, the test accuracy: {}' .format(epoch+1,training_accuracy,test_accuracy))
Use GPU? False
Epoch: 1, the training accuracy: 0.8597166666666667, the test accuracy: 0.8699
Epoch: 2, the training accuracy: 0.9314, the test accuracy: 0.9323
3. Train CIFAR10 with CNNs¶
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torch.nn.functional as F
use_cuda = torch.cuda.is_available()
print('Use GPU?', use_cuda)
class model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 6, 5) # change the input channels from 1 to 3
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
out = F.relu(self.conv1(x))
out = F.avg_pool2d(out, 2)
out = F.relu(self.conv2(out))
out = F.avg_pool2d(out, 2)
# out.size() = [batch_size, channels, size, size], -1 here means channels*size*size
# out.view(out.size(0), -1) is similar to out.reshape(out.size(0), -1), but more efficient
# Think about why we need to reshape the out?
out = out.view(out.size(0), -1)
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
out = self.fc3(out)
return out
minibatch_size = 128
num_epochs = 2
lr = 0.1
# Step 1: Define a model
my_model =model()
if use_cuda:
my_model = my_model.cuda()
# Step 2: Define a loss function and training algorithm
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(my_model.parameters(), lr=lr)
# Step 3: load dataset
CIFAR10_transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=CIFAR10_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=minibatch_size, shuffle=True)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=CIFAR10_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)
# classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
#Step 4: Train the NNs
# One epoch is when an entire dataset is passed through the neural network only once.
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(trainloader):
if use_cuda:
images = images.cuda()
labels = labels.cuda()
# Forward pass to get the loss
outputs = my_model(images)
loss = criterion(outputs, labels)
# Backward and compute the gradient
optimizer.zero_grad()
loss.backward() #backpropragation
optimizer.step() #update the weights/parameters
# Training accuracy
correct = 0
total = 0
for i, (images, labels) in enumerate(trainloader):
if use_cuda:
images = images.cuda()
labels = labels.cuda()
outputs = my_model(images)
p_max, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
training_accuracy = float(correct)/total
# Test accuracy
correct = 0
total = 0
for i, (images, labels) in enumerate(testloader):
if use_cuda:
images = images.cuda()
labels = labels.cuda()
outputs = my_model(images)
p_max, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
test_accuracy = float(correct)/total
print('Epoch: {}, the training accuracy: {}, the test accuracy: {}' .format(epoch+1,training_accuracy,test_accuracy))
Use GPU? False
Files already downloaded and verified
Files already downloaded and verified
Epoch: 1, the training accuracy: 0.1918, the test accuracy: 0.1957
Epoch: 2, the training accuracy: 0.3463, the test accuracy: 0.3463
4. Improve the test accuracy¶
Normalize the data with the mean and standard deviation of the dataset¶
$\( \tilde{x}[i,j,:,:] = \frac{x[i,j,:,:]-mean[j]}{std[j]},~~~~i=1,2,...,60000,~~~~j=1,2,3\)$.
CIFAR10_transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),torchvision.transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=CIFAR10_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=minibatch_size, shuffle=True)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=CIFAR10_transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False)
Files already downloaded and verified
Files already downloaded and verified
Weight decay¶
Define the loss function with \(\ell_2\) regularization: $\(L(\theta) :=\frac{1}{N} \sum_{j=1}^N\ell(y_j, h(x_j; \theta)) + + \lambda (\|\theta\|_2^2).\)$
The parameter \(\lambda\) is called “weight_decay” in Pytorch.
optimizer = optim.SGD(my_model.parameters(), lr=lr, weight_decay = 0.0001)
# weight_decay is usually small. Two suggested values: 0.0001, 0.00001
Learning rate schedule¶
def adjust_learning_rate(optimizer, epoch, init_lr):
#lr = 1.0 / (epoch + 1)
lr = init_lr * 0.1 ** (epoch // 10) # epoch // 10, calculate the quotient
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr
init_lr = 1
optimizer = optim.SGD(my_model.parameters(), lr=init_lr, weight_decay = 0.0001)
num_epochs = 30
init_lr = 1
for epoch in range(num_epochs):
current_lr = adjust_learning_rate(optimizer, epoch, init_lr)
print('Epoch: {}, Learning rate: {}'.format(epoch+1,current_lr))
Epoch: 1, Learning rate: 1.0
Epoch: 2, Learning rate: 1.0
Epoch: 3, Learning rate: 1.0
Epoch: 4, Learning rate: 1.0
Epoch: 5, Learning rate: 1.0
Epoch: 6, Learning rate: 1.0
Epoch: 7, Learning rate: 1.0
Epoch: 8, Learning rate: 1.0
Epoch: 9, Learning rate: 1.0
Epoch: 10, Learning rate: 1.0
Epoch: 11, Learning rate: 0.1
Epoch: 12, Learning rate: 0.1
Epoch: 13, Learning rate: 0.1
Epoch: 14, Learning rate: 0.1
Epoch: 15, Learning rate: 0.1
Epoch: 16, Learning rate: 0.1
Epoch: 17, Learning rate: 0.1
Epoch: 18, Learning rate: 0.1
Epoch: 19, Learning rate: 0.1
Epoch: 20, Learning rate: 0.1
Epoch: 21, Learning rate: 0.010000000000000002
Epoch: 22, Learning rate: 0.010000000000000002
Epoch: 23, Learning rate: 0.010000000000000002
Epoch: 24, Learning rate: 0.010000000000000002
Epoch: 25, Learning rate: 0.010000000000000002
Epoch: 26, Learning rate: 0.010000000000000002
Epoch: 27, Learning rate: 0.010000000000000002
Epoch: 28, Learning rate: 0.010000000000000002
Epoch: 29, Learning rate: 0.010000000000000002
Epoch: 30, Learning rate: 0.010000000000000002
Reading material¶
LeNet-5: https://engmrk.com/lenet-5-a-classic-cnn-architecture/
torch.nn.Conv2d: https://pytorch.org/docs/stable/nn.html?highlight=conv2d#torch.nn.Conv2d
Understand Convolutions: https://medium.com/apache-mxnet/convolutions-explained-with-ms-excel-465d6649831c#f17e https://medium.com/apache-mxnet/multi-channel-convolutions-explained-with-ms-excel-9bbf8eb77108 https://gfycat.com/plasticmenacingdegu
(Optional material) How to compute the mean and standard deviation of CIFAR10 dataset?¶
import numpy as np
CIFAR10_transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=CIFAR10_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=len(trainset), shuffle=True)
mean = 0.
std = 0.
for i, (images, labels) in enumerate(trainloader):
batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
images = images.view(batch_samples, images.size(1), -1)
mean = images.mean(2).sum(0)
std = images.std(2).sum(0)
mean /= len(trainloader.dataset)
std /= len(trainloader.dataset)
print('mean:', mean.numpy())
print('std1:', std.numpy())
Files already downloaded and verified
mean: [0.49140054 0.48215687 0.44652957]
std1: [0.20230146 0.19941428 0.20096211]