Files
DL-with-Python-and-PyTorch2/pytorch-06/pytorch-06-03.ipynb
2024-01-08 00:44:52 +08:00

524 lines
20 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.6 模型集成提升性能\n",
"为改善一项机器学习或深度学习的任务,我们首先想到的是从模型、数据、优化器等方面进行优化,使用方法比较方便。不过尽管如此,有时效果仍不是很理想。此时,我们可尝试一下其他方法,如模型集成、迁移学习、数据增强等优化方法。这节我们将介绍如何利用模型集成来提升任务性能,后续章节将介绍利用迁移学习、数据增强等方法来提升任务的效果和质量。 \n",
"\t模型集成方法是提升分类器或预测系统的效果的重要方法目前在机器学习、深度学习国际比赛中时常能看到利用模型集成取得佳绩的事例该方法也经常用在生产环境中。模型集成的原理比较简单有点像多个盲人摸象每个盲人只能摸到大象的一部分但综合每人摸到的部分就能形成一个比较完整、符合实际的图像。每个盲人就像单个模型那集成这些模型犹如综合这些盲人的各自摸到的部分就能得到一个强于单个模型的模型。\n",
"\t实际上模型集成也和我们通常说的集思广益、投票选举领导人等原理差不多是1+1>2的有效方法。 \n",
"\t当然要使模型集成发挥效应模型的多样性是非常重要的使用不同架构甚至不同的学习方法是模型多样性的重要体现。如果只是改一下初始条件或调整几个参数有时效果可能还不如单个模型。 \n",
"\t具体使用时除要考虑各模型的差异性还要考虑模型的性能。如果各模型性能差不多可以取各模型预测结果的平均值如果模型性能相差较大模型集成后的性能可能还不及单个模型相差较大时可以采用加权平均的方法其中权重可以采用SLSQP、Nelder-Mead、Powell、CG、BFGS等优化算法获取。\n",
"\t接下来我们使用PyTorch具体实现一个模型集成的实例希望通过这个实例让大家对模型集成有进一步的理解。"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6.6.1 使用模型\n",
"#导入需要的模块"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"import torch.nn.functional as F\n",
"import torch.backends.cudnn as cudnn\n",
"import numpy as np\n",
"import torchvision\n",
"import torchvision.transforms as transforms\n",
"from torch.utils.data import DataLoader\n",
"from collections import Counter\n",
"\n",
"#定义一些超参数 \n",
"BATCHSIZE=100\n",
"DOWNLOAD_MNIST=False\n",
"EPOCHES=20\n",
"LR=0.001"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#定义相关模型结构,这三个网络结构比较接近"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class CNNNet(nn.Module):\n",
" def __init__(self):\n",
" super(CNNNet,self).__init__()\n",
" self.conv1 = nn.Conv2d(in_channels=3,out_channels=16,kernel_size=5,stride=1)\n",
" self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)\n",
" self.conv2 = nn.Conv2d(in_channels=16,out_channels=36,kernel_size=3,stride=1)\n",
" self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)\n",
" self.fc1 = nn.Linear(1296,128)\n",
" self.fc2 = nn.Linear(128,10) \n",
"\n",
" def forward(self,x):\n",
" x=self.pool1(F.relu(self.conv1(x)))\n",
" x=self.pool2(F.relu(self.conv2(x)))\n",
" #print(x.shape)\n",
" x=x.view(-1,36*6*6)\n",
" x=F.relu(self.fc2(F.relu(self.fc1(x))))\n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class Net(nn.Module):\n",
" def __init__(self):\n",
" super(Net, self).__init__()\n",
" self.conv1 = nn.Conv2d(3, 16, 5)\n",
" self.pool1 = nn.MaxPool2d(2, 2)\n",
" self.conv2 = nn.Conv2d(16, 36, 5)\n",
" #self.fc1 = nn.Linear(16 * 5 * 5, 120)\n",
" self.pool2 = nn.MaxPool2d(2, 2)\n",
" self.aap=nn.AdaptiveAvgPool2d(1)\n",
" #self.fc2 = nn.Linear(120, 84)\n",
" self.fc3 = nn.Linear(36, 10)\n",
"\n",
" def forward(self, x):\n",
" x = self.pool1(F.relu(self.conv1(x)))\n",
" x = self.pool2(F.relu(self.conv2(x)))\n",
" #print(x.shape)\n",
" #x = x.view(-1, 16 * 5 * 5)\n",
" x = self.aap(x)\n",
" #print(x.shape)\n",
" #x = F.relu(self.fc2(x))\n",
" x = x.view(x.shape[0], -1)\n",
" #print(x.shape)\n",
" x = self.fc3(x)\n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"class LeNet(nn.Module):\n",
" def __init__(self):\n",
" super(LeNet, self).__init__()\n",
" self.conv1 = nn.Conv2d(3, 6, 5)\n",
" self.conv2 = nn.Conv2d(6, 16, 5)\n",
" self.fc1 = nn.Linear(16*5*5, 120)\n",
" self.fc2 = nn.Linear(120, 84)\n",
" self.fc3 = nn.Linear(84, 10)\n",
"\n",
" def forward(self, x):\n",
" out = F.relu(self.conv1(x))\n",
" out = F.max_pool2d(out, 2)\n",
" out = F.relu(self.conv2(out))\n",
" out = F.max_pool2d(out, 2)\n",
" out = out.view(out.size(0), -1)\n",
" out = F.relu(self.fc1(out))\n",
" out = F.relu(self.fc2(out))\n",
" out = self.fc3(out)\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"cfg = {\n",
" 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],\n",
" 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],\n",
"}\n",
"\n",
"\n",
"class VGG(nn.Module):\n",
" def __init__(self, vgg_name):\n",
" super(VGG, self).__init__()\n",
" self.features = self._make_layers(cfg[vgg_name])\n",
" self.classifier = nn.Linear(512, 10)\n",
"\n",
" def forward(self, x):\n",
" out = self.features(x)\n",
" out = out.view(out.size(0), -1)\n",
" out = self.classifier(out)\n",
" return out\n",
"\n",
" def _make_layers(self, cfg):\n",
" layers = []\n",
" in_channels = 3\n",
" for x in cfg:\n",
" if x == 'M':\n",
" layers += [nn.MaxPool2d(kernel_size=2, stride=2)]\n",
" else:\n",
" layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),\n",
" nn.BatchNorm2d(x),\n",
" nn.ReLU(inplace=True)]\n",
" in_channels = x\n",
" layers += [nn.AvgPool2d(kernel_size=1, stride=1)]\n",
" return nn.Sequential(*layers)\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#导入数据这里数据已下载本地故设download=False"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"==> Preparing data..\n",
"==> Building model..\n"
]
}
],
"source": [
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"\n",
"\n",
"# Data\n",
"print('==> Preparing data..')\n",
"transform_train = transforms.Compose([\n",
" transforms.RandomCrop(32, padding=4),\n",
" transforms.RandomHorizontalFlip(),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n",
"])\n",
"\n",
"transform_test = transforms.Compose([\n",
" transforms.ToTensor(),\n",
" transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n",
"])\n",
"\n",
"trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform_train)\n",
"trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)\n",
"\n",
"testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=False, transform=transform_test)\n",
"testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)\n",
"\n",
"classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')\n",
"\n",
"# Model\n",
"print('==> Building model..')\n",
"net1 = CNNNet()\n",
"net2=Net()\n",
"net3=LeNet()\n",
"net4 = VGG('VGG16')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6.6.2 集成方法\n",
"模型集成方法采用类似投票机制的方法,具体代码如下:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch:0集成模型的正确率38.76\n",
"模型0的正确率为25.27\n",
"模型1的正确率为40.68\n",
"模型2的正确率为44.89\n",
"epoch:1集成模型的正确率46.69\n",
"模型0的正确率为31.06\n",
"模型1的正确率为44.93\n",
"模型2的正确率为50.43\n",
"epoch:2集成模型的正确率49.19\n",
"模型0的正确率为31.24\n",
"模型1的正确率为48.91\n",
"模型2的正确率为53.4\n",
"epoch:3集成模型的正确率49.56\n",
"模型0的正确率为30.91\n",
"模型1的正确率为49.52\n",
"模型2的正确率为54.34\n",
"epoch:4集成模型的正确率51.65\n",
"模型0的正确率为29.83\n",
"模型1的正确率为51.19\n",
"模型2的正确率为56.54\n",
"epoch:5集成模型的正确率52.93\n",
"模型0的正确率为32.92\n",
"模型1的正确率为53.14\n",
"模型2的正确率为57.35\n",
"epoch:6集成模型的正确率54.58\n",
"模型0的正确率为33.13\n",
"模型1的正确率为54.16\n",
"模型2的正确率为58.76\n",
"epoch:7集成模型的正确率56.25\n",
"模型0的正确率为33.51\n",
"模型1的正确率为55.6\n",
"模型2的正确率为60.35\n",
"epoch:8集成模型的正确率57.24\n",
"模型0的正确率为32.98\n",
"模型1的正确率为55.03\n",
"模型2的正确率为60.61\n",
"epoch:9集成模型的正确率56.95\n",
"模型0的正确率为33.79\n",
"模型1的正确率为57.36\n",
"模型2的正确率为60.62\n",
"epoch:10集成模型的正确率57.88\n",
"模型0的正确率为33.86\n",
"模型1的正确率为58.23\n",
"模型2的正确率为62.28\n",
"epoch:11集成模型的正确率58.0\n",
"模型0的正确率为33.63\n",
"模型1的正确率为58.01\n",
"模型2的正确率为61.61\n",
"epoch:12集成模型的正确率59.45\n",
"模型0的正确率为34.04\n",
"模型1的正确率为59.15\n",
"模型2的正确率为63.92\n",
"epoch:13集成模型的正确率59.52\n",
"模型0的正确率为34.06\n",
"模型1的正确率为59.71\n",
"模型2的正确率为63.98\n",
"epoch:14集成模型的正确率60.5\n",
"模型0的正确率为34.01\n",
"模型1的正确率为59.18\n",
"模型2的正确率为63.94\n",
"epoch:15集成模型的正确率60.41\n",
"模型0的正确率为34.48\n",
"模型1的正确率为59.61\n",
"模型2的正确率为63.12\n",
"epoch:16集成模型的正确率59.57\n",
"模型0的正确率为34.57\n",
"模型1的正确率为60.2\n",
"模型2的正确率为64.42\n",
"epoch:17集成模型的正确率60.24\n",
"模型0的正确率为33.83\n",
"模型1的正确率为60.45\n",
"模型2的正确率为65.35\n",
"epoch:18集成模型的正确率62.21\n",
"模型0的正确率为34.24\n",
"模型1的正确率为61.23\n",
"模型2的正确率为65.96\n",
"epoch:19集成模型的正确率60.29\n",
"模型0的正确率为34.44\n",
"模型1的正确率为61.08\n",
"模型2的正确率为65.6\n"
]
}
],
"source": [
"#把3个网络模型放在一个列表里\n",
"mlps=[net1.to(device),net2.to(device),net3.to(device)]\n",
"\n",
"optimizer=torch.optim.Adam([{\"params\":mlp.parameters()} for mlp in mlps],lr=LR)\n",
" \n",
"loss_function=nn.CrossEntropyLoss()\n",
" \n",
"for ep in range(EPOCHES):\n",
" for img,label in trainloader:\n",
" img,label=img.to(device),label.to(device)\n",
" optimizer.zero_grad()#10个网络清除梯度\n",
" for mlp in mlps:\n",
" mlp.train()\n",
" out=mlp(img)\n",
" loss=loss_function(out,label)\n",
" loss.backward()#网络们获得梯度\n",
" optimizer.step()\n",
" \n",
" pre=[]\n",
" vote_correct=0\n",
" mlps_correct=[0 for i in range(len(mlps))]\n",
" for img,label in testloader:\n",
" img,label=img.to(device),label.to(device)\n",
" for i, mlp in enumerate( mlps):\n",
" mlp.eval()\n",
" out=mlp(img)\n",
" \n",
" _,prediction=torch.max(out,1) #按行取最大值\n",
" pre_num=prediction.cpu().numpy()\n",
" mlps_correct[i]+=(pre_num==label.cpu().numpy()).sum()\n",
" \n",
" pre.append(pre_num)\n",
" arr=np.array(pre)\n",
" pre.clear()\n",
" result=[Counter(arr[:,i]).most_common(1)[0][0] for i in range(BATCHSIZE)]\n",
" vote_correct+=(result == label.cpu().numpy()).sum()\n",
" print(\"epoch:\" + str(ep)+\"集成模型的正确率\"+str(vote_correct/len(testloader)))\n",
" \n",
" for idx, coreect in enumerate( mlps_correct):\n",
" print(\"模型\"+str(idx)+\"的正确率为:\"+str(coreect/len(testloader)))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"由此,可以看出集成模型的精度高于各模型的精度,这就是模型集成魅力所在。"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.7使用现代经典模型提升性能\n",
"前面我们利用一些比较简单的模型对CIFAR10数据集进行分类精度在68%左右然后使用模型集成方法使精度提升到74%左右。虽有一定提升,但结果还不是很理想。\n",
"\t精度提升不高很大程度与模型有关前面我们介绍的一些现代经典网络在大赛中都取得了不俗的成绩说明其模型结构有很多突出的优点所以人们经常直接使用这些经典模型作为数据的分类器。这里我们就用VGG16模型来对IFAR10数据集进行分类直接效果非常不错精度一下提高到90%左右,效果非常显著。\n",
"\t以下是VGG16模型的实现代码。"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VGG16模型迭代0次的正确率为49.72\n",
"VGG16模型迭代1次的正确率为64.76\n",
"VGG16模型迭代2次的正确率为72.55\n",
"VGG16模型迭代3次的正确率为75.42\n",
"VGG16模型迭代4次的正确率为79.24\n",
"VGG16模型迭代5次的正确率为79.82\n",
"VGG16模型迭代6次的正确率为82.19\n",
"VGG16模型迭代7次的正确率为82.14\n",
"VGG16模型迭代8次的正确率为84.04\n",
"VGG16模型迭代9次的正确率为84.61\n",
"VGG16模型迭代10次的正确率为87.26\n",
"VGG16模型迭代11次的正确率为85.57\n",
"VGG16模型迭代12次的正确率为85.55\n",
"VGG16模型迭代13次的正确率为86.79\n",
"VGG16模型迭代14次的正确率为88.49\n",
"VGG16模型迭代15次的正确率为87.19\n",
"VGG16模型迭代16次的正确率为88.86\n",
"VGG16模型迭代17次的正确率为88.56\n",
"VGG16模型迭代18次的正确率为88.84\n",
"VGG16模型迭代19次的正确率为88.19\n"
]
}
],
"source": [
"mlps=[net4.to(device)]\n",
"\n",
"optimizer=torch.optim.Adam([{\"params\":mlp.parameters()} for mlp in mlps],lr=LR)\n",
" \n",
"loss_function=nn.CrossEntropyLoss()\n",
" \n",
"for ep in range(EPOCHES):\n",
" for img,label in trainloader:\n",
" img,label=img.to(device),label.to(device)\n",
" optimizer.zero_grad()#10个网络清除梯度\n",
" for mlp in mlps:\n",
" mlp.train()\n",
" out=mlp(img)\n",
" loss=loss_function(out,label)\n",
" loss.backward()#网络们获得梯度\n",
" optimizer.step()\n",
" \n",
" pre=[]\n",
" vote_correct=0\n",
" mlps_correct=[0 for i in range(len(mlps))]\n",
" for img,label in testloader:\n",
" img,label=img.to(device),label.to(device)\n",
" for i, mlp in enumerate( mlps):\n",
" mlp.eval()\n",
" out=mlp(img)\n",
" \n",
" _,prediction=torch.max(out,1) #按行取最大值\n",
" pre_num=prediction.cpu().numpy()\n",
" mlps_correct[i]+=(pre_num==label.cpu().numpy()).sum()\n",
" \n",
" pre.append(pre_num)\n",
" arr=np.array(pre)\n",
" pre.clear()\n",
" result=[Counter(arr[:,i]).most_common(1)[0][0] for i in range(BATCHSIZE)]\n",
" vote_correct+=(result == label.cpu().numpy()).sum()\n",
" #print(\"epoch:\" + str(ep)+\"集成模型的正确率\"+str(vote_correct/len(testloader)))\n",
" \n",
" for idx, coreect in enumerate( mlps_correct):\n",
" print(\"VGG16模型迭代\"+str(ep)+\"次的正确率为:\"+str(coreect/len(testloader)))\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"后续我们还会介绍如何使用迁移方法、数据增强等方法提升分类器的性能同样的这个数据集采用这些方法可以使精度达到95%左右。"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}