CIFAR-10分类网络

通过 CIFAR-10 数据集训练得到一个彩色图像分类网络。设计一个至少包含 5 个卷积层和池化层的卷积神经网络。卷积核的尺寸统一采用 3*3。在网络中使用 BatchNorm

导包

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
from torchvision import transforms
from torchvision import datasets as dsets

数据集处理

train_data = dsets.CIFAR10(root='./', train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(dataset=train_data, batch_size=64, shuffle=True)

test_data = dsets.CIFAR10(root='./', train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset=test_data, batch_size=64, shuffle=True)

网络构建

BatchNorm：将激活层的输入调整为标准正态分布。激活层输入分布在激活函数敏感部分，输入有小变化就能使损失函数有较大反应，避免梯度消失，加快训练速度。

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),      # conv1 (changed kernel size to 3x3)
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),                   # pool1
            nn.Conv2d(32, 64, 3, padding=1),     # conv2 (changed kernel size to 3x3)
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),                   # pool2
            nn.Conv2d(64, 128, 3, padding=1),    # conv3 (changed kernel size to 3x3)
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),                   # pool3
            nn.Conv2d(128, 256, 3, padding=1),   # conv4 (changed kernel size to 3x3)
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),                   # pool4
            nn.Conv2d(256, 512, 3, padding=1),   # conv5 (changed kernel size to 3x3)
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),                   # pool5
            nn.Flatten(),
            nn.Linear(512 * 1 * 1, 256),          # fc1
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 10)                     # fc2
        )
        
    def forward(self, x):
        x = self.model(x)
        return x

训练

x.cuda():GPU加速

net = Model()
net = net.cuda()
loss_fn = nn.CrossEntropyLoss()
opt = torch.optim.SGD(net.parameters(), lr=0.01)

for epoch in range(30):
    for i, data in enumerate(train_loader):
        x, y = data
        x = x.cuda()
        y = y.cuda()
        net.train()
        pred = net(x)
        loss = loss_fn(pred, y)

        opt.zero_grad()
        loss.backward()
        opt.step()
        if i % 1000 == 0:
            print(epoch, loss.item())

训练期间损失输出：

0 2.4222264289855957
1 1.064765453338623
2 0.8770453333854675
3 0.7434887886047363
4 0.6620030403137207
5 0.6515734791755676
6 0.5817975401878357
7 0.39957278966903687
8 0.449314683675766
9 0.5886893272399902
10 0.36985668540000916
11 0.574312686920166
12 0.4228469133377075
13 0.4269474148750305
14 0.10617499053478241
15 0.07453690469264984
16 0.1128673255443573
17 0.14995695650577545
18 0.18256041407585144
19 0.2062997967004776
20 0.11573205143213272
21 0.10238634049892426
22 0.21939325332641602
23 0.09869613498449326
24 0.04613097757101059
25 0.1581094115972519
26 0.1776956021785736
27 0.015350659377872944
28 0.24017882347106934
29 0.0061236233450472355

测试

def rightness(predictions, labels):
    pred = torch.max(predictions.data, 1)[1]
    rights = pred.eq(labels.data.view_as(pred)).sum()
    return rights, len(labels)

rights = 0
length = 0
for i, data in enumerate(test_loader):
    x, y = data
    x = x.cuda()
    y = y.cuda()
    net.eval()
    pred = net(x)
    right = pred.argmax(dim=1) == y
    rights += rightness(pred, y)[0]
    length += rightness(pred, y)[1]

print(rights, length, rights/length)

准确率：

tensor(7613, device='cuda:0') 10000 tensor(0.7613, device='cuda:0')