文章目录
LeNet是一个早期用来识别手写数字图像的卷积神经网络。这个名字来源于LeNet论文的第一作者Yann LeCun。LeNet展示了通过梯度下降训练卷积神经网络可以达到手写数字识别在当时最先进的结果。这个奠基性的工作第一次将卷积神经网络推上舞台,为世人所知。LeNet分为卷积层块和全连接层块两个部分,其网络结构如下图所示。
卷积层块
卷积层块里的基本单位是卷积层后接最大池化层:卷积层用来识别图像里的空间模式,如线条和物体局部,之后的最大池化层则用来降低卷积层对位置的敏感性。卷积层块由两个这样的基本单位重复堆叠构成。在卷积层块中,每个卷积层都使用5×5的窗口,并在输出上使用sigmoid激活函数。第一个卷积层输出通道数为6,第二个卷积层输出通道数则增加到16。这是因为第二个卷积层比第一个卷积层的输入的高和宽要小,所以增加输出通道使两个卷积层的参数尺寸类似。卷积层块的两个最大池化层的窗口形状均为2×2,且步幅为2。由于池化窗口与步幅形状相同,池化窗口在输入上每次滑动所覆盖的区域互不重叠。
全连接层块
卷积层块的输出形状为(批量大小, 通道, 高, 宽)。当卷积层块的输出传入全连接层块时,全连接层块会将小批量中每个样本变平(flatten)。也就是说,全连接层的输入形状将变成二维,其中第一维是小批量中的样本,第二维是每个样本变平后的向量表示,且向量长度为通道、高和宽的乘积。全连接层块含3个全连接层。它们的输出个数分别是120、84和10,其中10为输出的类别个数。
网络结构的pytorch实现
方法一:Sequential构造(完整代码见文末)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
from torch import nn class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv = nn.Sequential( nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size nn.Sigmoid(), nn.MaxPool2d(2, 2), # kernel_size, stride nn.Conv2d(6, 16, 5), nn.Sigmoid(), nn.MaxPool2d(2, 2) ) self.fc = nn.Sequential( nn.Linear(16*5*5, 120), nn.Sigmoid(), nn.Linear(120, 84), nn.Sigmoid(), nn.Linear(84, 10), nn.LogSoftmax() ) def forward(self, img): feature = self.conv(img) output = self.fc(feature.view(img.shape[0], -1)) return output |
方法二
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
from torch import nn class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() #1*1*32*32 self.conv1 = nn.Conv2d(1, 6, 5) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(5*5*16, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): in_size = x.size(0) # 1*32*32 out= self.conv1(x) # 6*28*28 out = F.sigmoid(out) out = F.max_pool2d(out, 2, 2) # 6*14*14 out = self.conv2(out) # 16*10*10 out = F.sigmoid(out) out = F.max_pool2d(out, 2, 2) # 16*5*5 out = out.view(in_size, -1) # 1*400 out = self.fc1(out) # 1*120 out = F.sigmoid(out) out = self.fc2(out) # 1*84 out = F.sigmoid(out) out = self.fc3(out) # 1*10 out = F.log_softmax(out, dim = 1) return out |
完整代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms BATCH_SIZE = 512 # 大概需要2G的显存 EPOCHS = 20 # 总共训练批次 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 下载训练集 train_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train = True, download = True, transform = transforms.Compose([ transforms.Resize((32,32)), transforms.ToTensor(), transforms.Normalize((0.1037,), (0.3081,)) ])), batch_size = BATCH_SIZE, shuffle = True) # 测试集 test_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train = False, transform = transforms.Compose([ transforms.Resize((32,32)), transforms.ToTensor(), transforms.Normalize((0.1037,), (0.3081,)) ])), batch_size = BATCH_SIZE, shuffle = True) class LeNet(nn.Module): def __init__(self): super(LeNet, self).__init__() self.conv = nn.Sequential( nn.Conv2d(1, 6, 5), # 输入通道、输出通道、核大小(5*5的卷积核) nn.Sigmoid(), nn.MaxPool2d(2, 2), # 核大小(2*2的邻域内最大池化),步幅 nn.Conv2d(6, 16, 5), nn.Sigmoid(), nn.MaxPool2d(2, 2) ) self.fc = nn.Sequential( nn.Linear(16*5*5, 120), nn.Sigmoid(), nn.Linear(120, 84), nn.Sigmoid(), nn.Linear(84, 10), nn.LogSoftmax() ) def forward(self, img): feature = self.conv(img) output = self.fc(feature.view(img.shape[0], -1)) return output #生成模型和优化器 model = LeNet().to(DEVICE) optimizer = optim.Adam(model.parameters()) # 定义训练函数 def train(model, device, train_loader, optimizer, epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() if (batch_idx + 1) % 30 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) # 定义测试函数 def test(model, device, test_loader): model.eval() test_loss =0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += F.nll_loss(output, target, reduction = 'sum') # 将一批的损失相加 pred = output.max(1, keepdim = True)[1] # 找到概率最大的下标 correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) print("\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%) \n".format( test_loss, correct, len(test_loader.dataset), 100.* correct / len(test_loader.dataset) )) # 最后开始训练和测试 for epoch in range(1, EPOCHS + 1): train(model, DEVICE, train_loader, optimizer, epoch) test(model, DEVICE, test_loader) |
暂无评论