Files
DL-with-Python-and-PyTorch2/pytorch-06/pytorch-06-01.ipynb
2024-01-08 00:44:52 +08:00

439 lines
11 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.1从全连接层到卷积层"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.optim as optim\n",
"from torchvision import datasets, transforms\n",
"device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class CNNNet(nn.Module):\n",
" def __init__(self):\n",
" super(CNNNet,self).__init__()\n",
" self.conv1 = nn.Conv2d(in_channels=3,out_channels=16,kernel_size=5,stride=1)\n",
" self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)\n",
" self.conv2 = nn.Conv2d(in_channels=16,out_channels=36,kernel_size=3,stride=1)\n",
" self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)\n",
" self.fc1 = nn.Linear(1296,128)\n",
" self.fc2 = nn.Linear(128,10) \n",
"\n",
" def forward(self,x):\n",
" x=self.pool1(F.relu(self.conv1(x)))\n",
" x=self.pool2(F.relu(self.conv2(x)))\n",
" #print(x.shape)\n",
" x=x.view(-1,36*6*6)\n",
" x=F.relu(self.fc2(F.relu(self.fc1(x))))\n",
" return x\n",
"\n",
"net = CNNNet()\n",
"net=net.to(device)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 6.2 卷积层\n",
"1、用PyTorch自定义卷积运算"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def cust_conv2d(X, K): \n",
" \"\"\"实现卷积运算\"\"\"\n",
" #获取卷积核形状\n",
" h, w = K.shape\n",
" #初始化输出值Y\n",
" Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))\n",
" #实现卷积运算\n",
" for i in range(Y.shape[0]):\n",
" for j in range(Y.shape[1]):\n",
" Y[i, j] = (X[i:i + h, j:j + w] * K).sum()\n",
" return Y"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[4., 3., 4.],\n",
" [2., 4., 3.],\n",
" [2., 3., 4.]])"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = torch.tensor([[1.0,1.0,1.0,0.0,0.0], [0.0,1.0,1.0,1.0,0.0], \n",
" [0.0,0.0,1.0,1.0,1.0],[0.0,0.0,1.0,1.0,0.0],[0.0,1.0,1.0,0.0,0.0]])\n",
"K = torch.tensor([[1.0, 0.0,1.0], [0.0, 1.0,0.0],[1.0, 0.0,1.0]])\n",
"cust_conv2d(X, K)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6.2.2 学习卷积核"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"X = torch.tensor([[10.,10.,10.,0.0,0.0,0.0], [10.,10.,10.,0.0,0.0,0.0], [10.,10.,10.,0.0,0.0,0.0],\n",
" [10.,10.,10.,0.0,0.0,0.0],[10.,10.,10.,0.0,0.0,0.0],[10.,10.,10.,0.0,0.0,0.0]])\n",
"Y = torch.tensor([[0.0, 30.0,30.0,0.0], [0.0, 30.0,30.0,0.0],[0.0, 30.0,30.0,0.0],[0.0, 30.0,30.0,0.0]])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"epoch 100, loss 0.0001\n",
"epoch 200, loss 0.0000\n",
"epoch 300, loss 0.0000\n",
"epoch 400, loss 0.0000\n"
]
}
],
"source": [
"# 构造一个二维卷积层它具有1个输出通道和形状为33的卷积核\n",
"conv2d = nn.Conv2d(1,1, kernel_size=(3, 3), bias=False)\n",
"\n",
"# 这个二维卷积层使用四维输入和输出格式(批量大小、通道、高度、宽度),\n",
"# 其中批量大小和通道数都为1\n",
"X = X.reshape((1, 1, 6, 6))\n",
"Y = Y.reshape((1, 1, 4, 4))\n",
"lr = 0.001 # 学习率\n",
"\n",
"#定义损失函数\n",
"loss_fn = torch.nn.MSELoss()\n",
"for i in range(400):\n",
" Y_pre = conv2d(X)\n",
" #loss = (Y_pre - Y) ** 2\n",
" loss=loss_fn(Y_pre,Y)\n",
" conv2d.zero_grad()\n",
" loss.backward()\n",
" # 迭代卷积核\n",
" conv2d.weight.data[:] -= lr * conv2d.weight.grad\n",
" if (i + 1) % 100 == 0:\n",
" print(f'epoch {i+1}, loss {loss.sum():.4f}')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 1.0961, -0.1821, -0.8602],\n",
" [ 1.1862, 0.0869, -1.1806],\n",
" [ 0.7178, 0.0952, -0.9593]])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"conv2d.weight.data.reshape((3,3))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6.2.5 多通道上的卷积\n",
"\n",
"1、多输入"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def corr2d_mutil_in(X,K):\n",
" h,w = K.shape[1],K.shape[2]\n",
" value = torch.zeros(X.shape[0] - h + 1,X.shape[1] - w + 1)\n",
" for x,k in zip(X,K):\n",
" value = value + cust_conv2d(x,k)\n",
" return value"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"X = torch.tensor([[[1.,0.,1,0.,2.],[1,1,3,2,1],[1,1,0,1,1],[2,3,2,1,3],[0,2,0,1,0]],\n",
" [[1.,0.,0,1.,0.],[2,0,1,2,0],[3,1,1,3,0],[0,3,0,3,2],[1,0,3,2,1]],\n",
" [[2.,0.,1.,2.,1.],[3,3,1,3,2],[2,1,1,1,0],[3,1,3,2,0],[1,1,2,1,1]]])\n",
"K = torch.tensor([[[0.0,1.0,0.0],[0.0,0.0,2.0],[0.0,1.0,0.0]],\n",
" [[2.0,1.0,0.0],[0.0,0.0,0.0],[0.0,3.0,0.0]],\n",
" [[1.0,0.0,0.0],[1.0,0.0,0.0],[0.0,0.0,2.0]]])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([3, 3, 3])\n"
]
},
{
"data": {
"text/plain": [
"tensor([[19., 13., 15.],\n",
" [28., 16., 20.],\n",
" [23., 18., 25.]])"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(K.shape)\n",
"corr2d_mutil_in(X,K)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2、多输出通道"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def corr2d_multi_in_out(X,K):\n",
" return torch.stack([corr2d_mutil_in(X,k) for k in K])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"3、1x1卷积核"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([3, 3, 1, 1])\n"
]
}
],
"source": [
"X = torch.tensor([[[1,2,3],[4,5,6],[7,8,9]],\n",
" [[1,1,1],[1,1,1],[1,1,1]],\n",
" [[2,2,2],[2,2,2],[2,2,2]]])\n",
" \n",
"K = torch.tensor([[[[1]],[[2]],[[3]]],\n",
" [[[4]],[[1]],[[1]]],\n",
" [[[5]],[[3]],[[3]]]])\n",
"print(K.shape)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[[ 9., 10., 11.],\n",
" [12., 13., 14.],\n",
" [15., 16., 17.]],\n",
"\n",
" [[ 7., 11., 15.],\n",
" [19., 23., 27.],\n",
" [31., 35., 39.]],\n",
"\n",
" [[14., 19., 24.],\n",
" [29., 34., 39.],\n",
" [44., 49., 54.]]])"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def corr2d_multi_in_out(X,K):\n",
" return torch.stack([corr2d_mutil_in(X,k) for k in K])\n",
" \n",
"corr2d_multi_in_out(X,K)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h3>6.3池化层</h3>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6.3.1局部池化"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([20, 16, 24, 31])\n"
]
}
],
"source": [
"# 池化窗口为正方形 size=3, stride=2\n",
"m1 = nn.MaxPool2d(3, stride=2)\n",
"# 池化窗口为非正方形\n",
"m2 = nn.MaxPool2d((3, 2), stride=(2, 1))\n",
"input = torch.randn(20, 16, 50, 32)\n",
"output = m2(input)\n",
"print(output.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 6.3.2全局池化"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([1, 64, 1, 1])\n"
]
}
],
"source": [
"# 输出大小为5x7\n",
"m = nn.AdaptiveMaxPool2d((5,7))\n",
"input = torch.randn(1, 64, 8, 9)\n",
"output = m(input)\n",
"# t输出大小为正方形 7x7 \n",
"m = nn.AdaptiveMaxPool2d(7)\n",
"input = torch.randn(1, 64, 10, 9)\n",
"output = m(input)\n",
"# 输出大小为 10x7\n",
"m = nn.AdaptiveMaxPool2d((None, 7))\n",
"input = torch.randn(1, 64, 10, 9)\n",
"output = m(input)\n",
"# 输出大小为 1x1\n",
"m = nn.AdaptiveMaxPool2d((1))\n",
"input = torch.randn(1, 64, 10, 9)\n",
"output = m(input)\n",
"print(output.size())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}