DL-with-Python-and-PyTorch2/pytorch-06/pytorch-06-01.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.1从全连接层到卷积层"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import torch.optim as optim\n",
    "from torchvision import datasets, transforms\n",
    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class CNNNet(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(CNNNet,self).__init__()\n",
    "        self.conv1 = nn.Conv2d(in_channels=3,out_channels=16,kernel_size=5,stride=1)\n",
    "        self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2)\n",
    "        self.conv2 = nn.Conv2d(in_channels=16,out_channels=36,kernel_size=3,stride=1)\n",
    "        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)\n",
    "        self.fc1 = nn.Linear(1296,128)\n",
    "        self.fc2 = nn.Linear(128,10)      \n",
    "\n",
    "    def forward(self,x):\n",
    "        x=self.pool1(F.relu(self.conv1(x)))\n",
    "        x=self.pool2(F.relu(self.conv2(x)))\n",
    "        #print(x.shape)\n",
    "        x=x.view(-1,36*6*6)\n",
    "        x=F.relu(self.fc2(F.relu(self.fc1(x))))\n",
    "        return x\n",
    "\n",
    "net = CNNNet()\n",
    "net=net.to(device)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.2 卷积层\n",
    "1、用PyTorch自定义卷积运算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cust_conv2d(X, K): \n",
    "    \"\"\"实现卷积运算\"\"\"\n",
    "    #获取卷积核形状\n",
    "    h, w = K.shape\n",
    "    #初始化输出值Y\n",
    "    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))\n",
    "    #实现卷积运算\n",
    "    for i in range(Y.shape[0]):\n",
    "        for j in range(Y.shape[1]):\n",
    "            Y[i, j] = (X[i:i + h, j:j + w] * K).sum()\n",
    "    return Y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[4., 3., 4.],\n",
       "        [2., 4., 3.],\n",
       "        [2., 3., 4.]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = torch.tensor([[1.0,1.0,1.0,0.0,0.0], [0.0,1.0,1.0,1.0,0.0], \n",
    "                  [0.0,0.0,1.0,1.0,1.0],[0.0,0.0,1.0,1.0,0.0],[0.0,1.0,1.0,0.0,0.0]])\n",
    "K = torch.tensor([[1.0, 0.0,1.0], [0.0, 1.0,0.0],[1.0, 0.0,1.0]])\n",
    "cust_conv2d(X, K)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 6.2.2 学习卷积核"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = torch.tensor([[10.,10.,10.,0.0,0.0,0.0], [10.,10.,10.,0.0,0.0,0.0], [10.,10.,10.,0.0,0.0,0.0],\n",
    "                  [10.,10.,10.,0.0,0.0,0.0],[10.,10.,10.,0.0,0.0,0.0],[10.,10.,10.,0.0,0.0,0.0]])\n",
    "Y = torch.tensor([[0.0, 30.0,30.0,0.0], [0.0, 30.0,30.0,0.0],[0.0, 30.0,30.0,0.0],[0.0, 30.0,30.0,0.0]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch 100, loss 0.0001\n",
      "epoch 200, loss 0.0000\n",
      "epoch 300, loss 0.0000\n",
      "epoch 400, loss 0.0000\n"
     ]
    }
   ],
   "source": [
    "# 构造一个二维卷积层，它具有1个输出通道和形状为（3，3）的卷积核\n",
    "conv2d = nn.Conv2d(1,1, kernel_size=(3, 3), bias=False)\n",
    "\n",
    "# 这个二维卷积层使用四维输入和输出格式（批量大小、通道、高度、宽度），\n",
    "# 其中批量大小和通道数都为1\n",
    "X = X.reshape((1, 1, 6, 6))\n",
    "Y = Y.reshape((1, 1, 4, 4))\n",
    "lr = 0.001 # 学习率\n",
    "\n",
    "#定义损失函数\n",
    "loss_fn = torch.nn.MSELoss()\n",
    "for i in range(400):\n",
    "    Y_pre = conv2d(X)\n",
    "    #loss = (Y_pre - Y) ** 2\n",
    "    loss=loss_fn(Y_pre,Y)\n",
    "    conv2d.zero_grad()\n",
    "    loss.backward()\n",
    "    # 迭代卷积核\n",
    "    conv2d.weight.data[:] -= lr * conv2d.weight.grad\n",
    "    if (i + 1) % 100 == 0:\n",
    "        print(f'epoch {i+1}, loss {loss.sum():.4f}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 1.0961, -0.1821, -0.8602],\n",
       "        [ 1.1862,  0.0869, -1.1806],\n",
       "        [ 0.7178,  0.0952, -0.9593]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conv2d.weight.data.reshape((3,3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 6.2.5 多通道上的卷积\n",
    "\n",
    "1、多输入"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def corr2d_mutil_in(X,K):\n",
    "    h,w = K.shape[1],K.shape[2]\n",
    "    value = torch.zeros(X.shape[0] - h + 1,X.shape[1] - w + 1)\n",
    "    for x,k in zip(X,K):\n",
    "        value = value + cust_conv2d(x,k)\n",
    "    return value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = torch.tensor([[[1.,0.,1,0.,2.],[1,1,3,2,1],[1,1,0,1,1],[2,3,2,1,3],[0,2,0,1,0]],\n",
    "                  [[1.,0.,0,1.,0.],[2,0,1,2,0],[3,1,1,3,0],[0,3,0,3,2],[1,0,3,2,1]],\n",
    "                  [[2.,0.,1.,2.,1.],[3,3,1,3,2],[2,1,1,1,0],[3,1,3,2,0],[1,1,2,1,1]]])\n",
    "K = torch.tensor([[[0.0,1.0,0.0],[0.0,0.0,2.0],[0.0,1.0,0.0]],\n",
    "                  [[2.0,1.0,0.0],[0.0,0.0,0.0],[0.0,3.0,0.0]],\n",
    "                  [[1.0,0.0,0.0],[1.0,0.0,0.0],[0.0,0.0,2.0]]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([3, 3, 3])\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "tensor([[19., 13., 15.],\n",
       "        [28., 16., 20.],\n",
       "        [23., 18., 25.]])"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(K.shape)\n",
    "corr2d_mutil_in(X,K)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2、多输出通道"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def corr2d_multi_in_out(X,K):\n",
    "    return torch.stack([corr2d_mutil_in(X,k) for k in K])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3、1x1卷积核"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([3, 3, 1, 1])\n"
     ]
    }
   ],
   "source": [
    "X = torch.tensor([[[1,2,3],[4,5,6],[7,8,9]],\n",
    "                  [[1,1,1],[1,1,1],[1,1,1]],\n",
    "                  [[2,2,2],[2,2,2],[2,2,2]]])\n",
    " \n",
    "K = torch.tensor([[[[1]],[[2]],[[3]]],\n",
    "                  [[[4]],[[1]],[[1]]],\n",
    "                  [[[5]],[[3]],[[3]]]])\n",
    "print(K.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[[ 9., 10., 11.],\n",
       "         [12., 13., 14.],\n",
       "         [15., 16., 17.]],\n",
       "\n",
       "        [[ 7., 11., 15.],\n",
       "         [19., 23., 27.],\n",
       "         [31., 35., 39.]],\n",
       "\n",
       "        [[14., 19., 24.],\n",
       "         [29., 34., 39.],\n",
       "         [44., 49., 54.]]])"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def corr2d_multi_in_out(X,K):\n",
    "    return torch.stack([corr2d_mutil_in(X,k) for k in K])\n",
    " \n",
    "corr2d_multi_in_out(X,K)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h3>6.3池化层</h3>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 6.3.1局部池化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([20, 16, 24, 31])\n"
     ]
    }
   ],
   "source": [
    "# 池化窗口为正方形 size=3, stride=2\n",
    "m1 = nn.MaxPool2d(3, stride=2)\n",
    "# 池化窗口为非正方形\n",
    "m2 = nn.MaxPool2d((3, 2), stride=(2, 1))\n",
    "input = torch.randn(20, 16, 50, 32)\n",
    "output = m2(input)\n",
    "print(output.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 6.3.2全局池化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([1, 64, 1, 1])\n"
     ]
    }
   ],
   "source": [
    "# 输出大小为5x7\n",
    "m = nn.AdaptiveMaxPool2d((5,7))\n",
    "input = torch.randn(1, 64, 8, 9)\n",
    "output = m(input)\n",
    "# t输出大小为正方形 7x7 \n",
    "m = nn.AdaptiveMaxPool2d(7)\n",
    "input = torch.randn(1, 64, 10, 9)\n",
    "output = m(input)\n",
    "# 输出大小为 10x7\n",
    "m = nn.AdaptiveMaxPool2d((None, 7))\n",
    "input = torch.randn(1, 64, 10, 9)\n",
    "output = m(input)\n",
    "# 输出大小为 1x1\n",
    "m = nn.AdaptiveMaxPool2d((1))\n",
    "input = torch.randn(1, 64, 10, 9)\n",
    "output = m(input)\n",
    "print(output.size())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}