mirror of
https://github.com/fendouai/PyTorchDocs.git
synced 2025-10-20 21:33:46 +08:00
update the format
This commit is contained in:
28
.idea/workspace.xml
generated
28
.idea/workspace.xml
generated
@ -2,7 +2,7 @@
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="dba69579-024f-442a-b5bf-b31a92577c82" name="Default Changelist" comment="">
|
||||
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||
</list>
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
@ -17,7 +17,7 @@
|
||||
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
|
||||
<state split_layout="SPLIT">
|
||||
<first_editor relative-caret-position="540">
|
||||
<caret line="36" column="19" selection-end-line="50" selection-end-column="46" />
|
||||
<caret line="36" column="19" selection-end-line="53" />
|
||||
</first_editor>
|
||||
<second_editor />
|
||||
</state>
|
||||
@ -29,7 +29,7 @@
|
||||
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
|
||||
<state split_layout="SPLIT">
|
||||
<first_editor relative-caret-position="45">
|
||||
<caret line="3" column="52" selection-start-line="3" selection-start-column="52" selection-end-line="3" selection-end-column="52" />
|
||||
<caret line="3" column="68" selection-start-line="3" selection-start-column="68" selection-end-line="3" selection-end-column="68" />
|
||||
</first_editor>
|
||||
<second_editor />
|
||||
</state>
|
||||
@ -187,18 +187,18 @@
|
||||
<workItem from="1556094040133" duration="12000" />
|
||||
<workItem from="1565696349206" duration="1282000" />
|
||||
<workItem from="1565697965673" duration="582000" />
|
||||
<workItem from="1565699959770" duration="713000" />
|
||||
<workItem from="1565699959770" duration="1208000" />
|
||||
<workItem from="1566207499309" duration="9000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="TimeTrackingManager">
|
||||
<option name="totallyTimeSpent" value="2589000" />
|
||||
<option name="totallyTimeSpent" value="3093000" />
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="0" y="23" width="1440" height="830" extended-state="0" />
|
||||
<editor active="true" />
|
||||
<layout>
|
||||
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.28040057" />
|
||||
<window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.2825465" />
|
||||
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
|
||||
<window_info id="Favorites" order="2" side_tool="true" />
|
||||
<window_info anchor="bottom" id="Message" order="0" />
|
||||
@ -212,7 +212,7 @@
|
||||
<window_info anchor="bottom" id="Version Control" order="8" />
|
||||
<window_info anchor="bottom" id="Database Changes" order="9" />
|
||||
<window_info anchor="bottom" id="Event Log" order="10" side_tool="true" />
|
||||
<window_info anchor="bottom" id="Terminal" order="11" visible="true" weight="0.096205965" />
|
||||
<window_info active="true" anchor="bottom" id="Terminal" order="11" visible="true" weight="0.096205965" />
|
||||
<window_info anchor="bottom" id="Python Console" order="12" />
|
||||
<window_info anchor="right" id="Commander" order="0" weight="0.4" />
|
||||
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
|
||||
@ -283,13 +283,7 @@
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/tensorTest.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="351">
|
||||
<caret line="13" column="13" selection-start-line="13" selection-start-column="13" selection-end-line="13" selection-end-column="13" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/tensorTest.py" />
|
||||
<entry file="file://$PROJECT_DIR$/fourSection/NeuralTransfer.md">
|
||||
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
|
||||
<state split_layout="SPLIT">
|
||||
@ -448,7 +442,7 @@
|
||||
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
|
||||
<state split_layout="SPLIT">
|
||||
<first_editor relative-caret-position="540">
|
||||
<caret line="36" column="19" selection-end-line="50" selection-end-column="46" />
|
||||
<caret line="36" column="19" selection-end-line="53" />
|
||||
</first_editor>
|
||||
<second_editor />
|
||||
</state>
|
||||
@ -458,7 +452,7 @@
|
||||
<provider selected="true" editor-type-id="split-provider[text-editor;markdown-preview-editor]">
|
||||
<state split_layout="SPLIT">
|
||||
<first_editor relative-caret-position="45">
|
||||
<caret line="3" column="52" selection-start-line="3" selection-start-column="52" selection-end-line="3" selection-end-column="52" />
|
||||
<caret line="3" column="68" selection-start-line="3" selection-start-column="68" selection-end-line="3" selection-end-column="68" />
|
||||
</first_editor>
|
||||
<second_editor />
|
||||
</state>
|
||||
|
@ -25,72 +25,99 @@
|
||||
6.更新网络的参数,典型的用一个简单的更新方法:<span class="pre">weight</span> <span class="pre">=</span> <span class="pre">weight</span> <span class="pre">-</span> <span class="pre">learning_rate</span> <span class="pre">*</span><span class="pre">gradient</span>
|
||||
|
||||
定义神经网络
|
||||
<pre><span class="kn">import</span> <span class="nn">torch</span>
|
||||
<span class="kn">import</span> <span class="nn">torch.nn</span> <span class="kn">as</span> <span class="nn">nn</span>
|
||||
<span class="kn">import</span> <span class="nn">torch.nn.functional</span> <span class="kn">as</span> <span class="nn">F</span>
|
||||
|
||||
```python
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">Net</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
|
||||
class Net(nn.Module):
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">Net</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
<span class="c1"># 1 input image channel, 6 output channels, 5x5 square convolution</span>
|
||||
<span class="c1"># kernel</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">conv1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">conv2</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">6</span><span class="p">,</span> <span class="mi">16</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
|
||||
<span class="c1"># an affine operation: y = Wx + b</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fc1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">16</span> <span class="o">*</span> <span class="mi">5</span> <span class="o">*</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">120</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fc2</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">120</span><span class="p">,</span> <span class="mi">84</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fc3</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">84</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
# 1 input image channel, 6 output channels, 5x5 square convolution
|
||||
# kernel
|
||||
self.conv1 = nn.Conv2d(1, 6, 5)
|
||||
self.conv2 = nn.Conv2d(6, 16, 5)
|
||||
# an affine operation: y = Wx + b
|
||||
self.fc1 = nn.Linear(16 * 5 * 5, 120)
|
||||
self.fc2 = nn.Linear(120, 84)
|
||||
self.fc3 = nn.Linear(84, 10)
|
||||
|
||||
<span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||||
<span class="c1"># Max pooling over a (2, 2) window</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">max_pool2d</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">conv1</span><span class="p">(</span><span class="n">x</span><span class="p">)),</span> <span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">))</span>
|
||||
<span class="c1"># If the size is a square you can only specify a single number</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">max_pool2d</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">conv2</span><span class="p">(</span><span class="n">x</span><span class="p">)),</span> <span class="mi">2</span><span class="p">)</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">num_flat_features</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fc1</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fc2</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fc3</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">x</span>
|
||||
def forward(self, x):
|
||||
# Max pooling over a (2, 2) window
|
||||
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
|
||||
# If the size is a square you can only specify a single number
|
||||
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
|
||||
x = x.view(-1, self.num_flat_features(x))
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
x = self.fc3(x)
|
||||
return x
|
||||
|
||||
<span class="k">def</span> <span class="nf">num_flat_features</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||||
<span class="n">size</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">size</span><span class="p">()[</span><span class="mi">1</span><span class="p">:]</span> <span class="c1"># all dimensions except the batch dimension</span>
|
||||
<span class="n">num_features</span> <span class="o">=</span> <span class="mi">1</span>
|
||||
<span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">size</span><span class="p">:</span>
|
||||
<span class="n">num_features</span> <span class="o">*=</span> <span class="n">s</span>
|
||||
<span class="k">return</span> <span class="n">num_features</span>
|
||||
def num_flat_features(self, x):
|
||||
size = x.size()[1:] # all dimensions except the batch dimension
|
||||
num_features = 1
|
||||
for s in size:
|
||||
num_features *= s
|
||||
return num_features
|
||||
|
||||
|
||||
<span class="n">net</span> <span class="o">=</span> <span class="n">Net</span><span class="p">()</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="n">net</span><span class="p">)</span></pre>
|
||||
net = Net()
|
||||
print(net)
|
||||
```
|
||||
输出:
|
||||
<pre>Net(
|
||||
```python
|
||||
|
||||
Net(
|
||||
(conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
|
||||
(conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
|
||||
(fc1): Linear(in_features=400, out_features=120, bias=True)
|
||||
(fc2): Linear(in_features=120, out_features=84, bias=True)
|
||||
(fc3): Linear(in_features=84, out_features=10, bias=True)
|
||||
)</pre>
|
||||
)
|
||||
|
||||
```
|
||||
你刚定义了一个前馈函数,然后反向传播函数被自动通过 autograd 定义了。你可以使用任何张量操作在前馈函数上。
|
||||
|
||||
一个模型可训练的参数可以通过调用 net.parameters() 返回:
|
||||
<pre><span class="n">params</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">net</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">params</span><span class="p">))</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="n">params</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">size</span><span class="p">())</span> <span class="c1"># conv1's .weight</span></pre>
|
||||
```python
|
||||
|
||||
params = list(net.parameters())
|
||||
print(len(params))
|
||||
print(params[0].size()) # conv1's .weight
|
||||
|
||||
```
|
||||
输出:
|
||||
<pre>10
|
||||
torch.Size([6, 1, 5, 5])</pre>
|
||||
```python
|
||||
10
|
||||
torch.Size([6, 1, 5, 5])
|
||||
```
|
||||
让我们尝试随机生成一个 32x32 的输入。注意:期望的输入维度是 32x32 。为了使用这个网络在 MNIST 数据及上,你需要把数据集中的图片维度修改为 32x32。
|
||||
<pre><span class="nb">input</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">32</span><span class="p">,</span> <span class="mi">32</span><span class="p">)</span>
|
||||
<span class="n">out</span> <span class="o">=</span> <span class="n">net</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="n">out</span><span class="p">)</span></pre>
|
||||
|
||||
```python
|
||||
|
||||
|
||||
input = torch.randn(1, 1, 32, 32)
|
||||
out = net(input)
|
||||
print(out)
|
||||
```
|
||||
输出:
|
||||
<pre>tensor([[-0.0233, 0.0159, -0.0249, 0.1413, 0.0663, 0.0297, -0.0940, -0.0135,
|
||||
0.1003, -0.0559]], grad_fn=<AddmmBackward>)</pre>
|
||||
|
||||
```python
|
||||
|
||||
tensor([[-0.0233, 0.0159, -0.0249, 0.1413, 0.0663, 0.0297, -0.0940, -0.0135,
|
||||
0.1003, -0.0559]], grad_fn=<AddmmBackward>)
|
||||
```
|
||||
把所有参数梯度缓存器置零,用随机的梯度来反向传播
|
||||
<pre><span class="n">net</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
|
||||
<span class="n">out</span><span class="o">.</span><span class="n">backward</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">10</span><span class="p">))</span></pre>
|
||||
```python
|
||||
|
||||
net.zero_grad()
|
||||
out.backward(torch.randn(1, 10))
|
||||
```
|
||||
|
||||
在继续之前,让我们复习一下所有见过的类。
|
||||
|
||||
torch.Tensor - A multi-dimensional array with support for autograd operations like backward(). Also holds the gradient w.r.t. the tensor.
|
||||
@ -117,53 +144,72 @@ autograd.Function - Implements forward and backward definitions of an autograd o
|
||||
有一些不同的损失函数在 nn 包中。一个简单的损失函数就是 nn.MSELoss ,这计算了均方误差。
|
||||
|
||||
例如:
|
||||
<pre><span class="n">output</span> <span class="o">=</span> <span class="n">net</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
|
||||
<span class="n">target</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span> <span class="c1"># a dummy target, for example</span>
|
||||
<span class="n">target</span> <span class="o">=</span> <span class="n">target</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># make it the same shape as output</span>
|
||||
<span class="n">criterion</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">MSELoss</span><span class="p">()</span>
|
||||
|
||||
<span class="n">loss</span> <span class="o">=</span> <span class="n">criterion</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">target</span><span class="p">)</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="n">loss</span><span class="p">)</span></pre>
|
||||
```python
|
||||
output = net(input)
|
||||
target = torch.randn(10) # a dummy target, for example
|
||||
target = target.view(1, -1) # make it the same shape as output
|
||||
criterion = nn.MSELoss()
|
||||
|
||||
loss = criterion(output, target)
|
||||
print(loss)
|
||||
|
||||
```
|
||||
输出:
|
||||
<div class="sphx-glr-script-out highlight-none notranslate">
|
||||
<div class="highlight">
|
||||
<pre>tensor(1.3389, grad_fn=<MseLossBackward>)</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
```python
|
||||
tensor(1.3389, grad_fn=<MseLossBackward>)
|
||||
|
||||
```
|
||||
|
||||
现在,如果你跟随损失到反向传播路径,可以使用它的 .grad_fn 属性,你将会看到一个这样的计算图:
|
||||
<pre><span class="nb">input</span> <span class="o">-></span> <span class="n">conv2d</span> <span class="o">-></span> <span class="n">relu</span> <span class="o">-></span> <span class="n">maxpool2d</span> <span class="o">-></span> <span class="n">conv2d</span> <span class="o">-></span> <span class="n">relu</span> <span class="o">-></span> <span class="n">maxpool2d</span>
|
||||
<span class="o">-></span> <span class="n">view</span> <span class="o">-></span> <span class="n">linear</span> <span class="o">-></span> <span class="n">relu</span> <span class="o">-></span> <span class="n">linear</span> <span class="o">-></span> <span class="n">relu</span> <span class="o">-></span> <span class="n">linear</span>
|
||||
<span class="o">-></span> <span class="n">MSELoss</span>
|
||||
<span class="o">-></span> <span class="n">loss</span></pre>
|
||||
|
||||
```python
|
||||
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
|
||||
-> view -> linear -> relu -> linear -> relu -> linear
|
||||
-> MSELoss
|
||||
-> loss
|
||||
```
|
||||
|
||||
所以,当我们调用 loss.backward(),整个图都会微分,而且所有的在图中的requires_grad=True 的张量将会让他们的 grad 张量累计梯度。
|
||||
|
||||
为了演示,我们将跟随以下步骤来反向传播。
|
||||
<pre><span class="k">print</span><span class="p">(</span><span class="n">loss</span><span class="o">.</span><span class="n">grad_fn</span><span class="p">)</span> <span class="c1"># MSELoss</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="n">loss</span><span class="o">.</span><span class="n">grad_fn</span><span class="o">.</span><span class="n">next_functions</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span> <span class="c1"># Linear</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="n">loss</span><span class="o">.</span><span class="n">grad_fn</span><span class="o">.</span><span class="n">next_functions</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">next_functions</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span> <span class="c1"># ReLU</span></pre>
|
||||
|
||||
```python
|
||||
print(loss.grad_fn) # MSELoss
|
||||
print(loss.grad_fn.next_functions[0][0]) # Linear
|
||||
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU
|
||||
|
||||
```
|
||||
输出:
|
||||
<pre><MseLossBackward object at 0x7fab77615278>
|
||||
<AddmmBackward object at 0x7fab77615940>
|
||||
<AccumulateGrad object at 0x7fab77615940></pre>
|
||||
```python
|
||||
<MseLossBackward object at 0x7fab77615278>
|
||||
<AddmmBackward object at 0x7fab77615940>
|
||||
<AccumulateGrad object at 0x7fab77615940>
|
||||
```
|
||||
反向传播
|
||||
|
||||
为了实现反向传播损失,我们所有需要做的事情仅仅是使用 loss.backward()。你需要清空现存的梯度,要不然帝都将会和现存的梯度累计到一起。
|
||||
|
||||
现在我们调用 loss.backward() ,然后看一下 con1 的偏置项在反向传播之前和之后的变化。
|
||||
<pre><span class="n">net</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span> <span class="c1"># zeroes the gradient buffers of all parameters</span>
|
||||
```python
|
||||
net.zero_grad() # zeroes the gradient buffers of all parameters
|
||||
|
||||
<span class="k">print</span><span class="p">(</span><span class="s1">'conv1.bias.grad before backward'</span><span class="p">)</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="n">net</span><span class="o">.</span><span class="n">conv1</span><span class="o">.</span><span class="n">bias</span><span class="o">.</span><span class="n">grad</span><span class="p">)</span>
|
||||
print('conv1.bias.grad before backward')
|
||||
print(net.conv1.bias.grad)
|
||||
|
||||
<span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
|
||||
loss.backward()
|
||||
|
||||
<span class="k">print</span><span class="p">(</span><span class="s1">'conv1.bias.grad after backward'</span><span class="p">)</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="n">net</span><span class="o">.</span><span class="n">conv1</span><span class="o">.</span><span class="n">bias</span><span class="o">.</span><span class="n">grad</span><span class="p">)</span></pre>
|
||||
print('conv1.bias.grad after backward')
|
||||
print(net.conv1.bias.grad)
|
||||
```
|
||||
输出:
|
||||
<pre>conv1.bias.grad before backward
|
||||
```python
|
||||
conv1.bias.grad before backward
|
||||
tensor([0., 0., 0., 0., 0., 0.])
|
||||
conv1.bias.grad after backward
|
||||
tensor([-0.0054, 0.0011, 0.0012, 0.0148, -0.0186, 0.0087])</pre>
|
||||
tensor([-0.0054, 0.0011, 0.0012, 0.0148, -0.0186, 0.0087])
|
||||
```
|
||||
现在我们看到了,如何使用损失函数。
|
||||
|
||||
唯一剩下的事情就是更新神经网络的参数。
|
||||
@ -171,24 +217,31 @@ tensor([-0.0054, 0.0011, 0.0012, 0.0148, -0.0186, 0.0087])</pre>
|
||||
更新神经网络参数:
|
||||
|
||||
最简单的更新规则就是随机梯度下降。
|
||||
<blockquote>
|
||||
<div><code class="docutils literal notranslate"><span class="pre">weight</span> <span class="pre">=</span> <span class="pre">weight</span> <span class="pre">-</span> <span class="pre">learning_rate</span> <span class="pre">*</span> <span class="pre">gradient</span></code></div></blockquote>
|
||||
```python
|
||||
weight = weight - learning_rate * gradient
|
||||
```
|
||||
我们可以使用 python 来实现这个规则:
|
||||
<pre><span class="n">learning_rate</span> <span class="o">=</span> <span class="mf">0.01</span>
|
||||
<span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">net</span><span class="o">.</span><span class="n">parameters</span><span class="p">():</span>
|
||||
<span class="n">f</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sub_</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">grad</span><span class="o">.</span><span class="n">data</span> <span class="o">*</span> <span class="n">learning_rate</span><span class="p">)</span></pre>
|
||||
```
|
||||
learning_rate = 0.01
|
||||
for f in net.parameters():
|
||||
f.data.sub_(f.grad.data * learning_rate)
|
||||
```
|
||||
尽管如此,如果你是用神经网络,你想使用不同的更新规则,类似于 SGD, Nesterov-SGD, Adam, RMSProp, 等。为了让这可行,我们建立了一个小包:torch.optim 实现了所有的方法。使用它非常的简单。
|
||||
<pre><span class="kn">import</span> <span class="nn">torch.optim</span> <span class="kn">as</span> <span class="nn">optim</span>
|
||||
```python
|
||||
import torch.optim as optim
|
||||
|
||||
<span class="c1"># create your optimizer</span>
|
||||
<span class="n">optimizer</span> <span class="o">=</span> <span class="n">optim</span><span class="o">.</span><span class="n">SGD</span><span class="p">(</span><span class="n">net</span><span class="o">.</span><span class="n">parameters</span><span class="p">(),</span> <span class="n">lr</span><span class="o">=</span><span class="mf">0.01</span><span class="p">)</span>
|
||||
# create your optimizer
|
||||
optimizer = optim.SGD(net.parameters(), lr=0.01)
|
||||
|
||||
# in your training loop:
|
||||
optimizer.zero_grad() # zero the gradient buffers
|
||||
output = net(input)
|
||||
loss = criterion(output, target)
|
||||
loss.backward()
|
||||
optimizer.step() # Does the update
|
||||
|
||||
```
|
||||
|
||||
<span class="c1"># in your training loop:</span>
|
||||
<span class="n">optimizer</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span> <span class="c1"># zero the gradient buffers</span>
|
||||
<span class="n">output</span> <span class="o">=</span> <span class="n">net</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
|
||||
<span class="n">loss</span> <span class="o">=</span> <span class="n">criterion</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">target</span><span class="p">)</span>
|
||||
<span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
|
||||
<span class="n">optimizer</span><span class="o">.</span><span class="n">step</span><span class="p">()</span> <span class="c1"># Does the update</span></pre>
|
||||
下载 Python 源代码:
|
||||
|
||||
<a href="http://pytorchchina.com/wp-content/uploads/2018/12/neural_networks_tutorial.py_.zip">neural_networks_tutorial.py</a>
|
||||
|
@ -3,96 +3,116 @@
|
||||
|
||||
在这个教程中,我们将学习如何用 DataParallel 来使用多 GPU。
|
||||
通过 PyTorch 使用多个 GPU 非常简单。你可以将模型放在一个 GPU:
|
||||
<pre> device = torch.device("cuda:0")
|
||||
model.to(device)</pre>
|
||||
```python
|
||||
device = torch.device("cuda:0")
|
||||
model.to(device)
|
||||
```
|
||||
然后,你可以复制所有的张量到 GPU:
|
||||
<pre> mytensor = my_tensor.to(device)</pre>
|
||||
```python
|
||||
|
||||
mytensor = my_tensor.to(device)
|
||||
|
||||
```
|
||||
请注意,只是调用 my_tensor.to(device) 返回一个 my_tensor 新的复制在GPU上,而不是重写 my_tensor。你需要分配给他一个新的张量并且在 GPU 上使用这个张量。
|
||||
|
||||
在多 GPU 中执行前馈,后馈操作是非常自然的。尽管如此,PyTorch 默认只会使用一个 GPU。通过使用 DataParallel 让你的模型并行运行,你可以很容易的在多 GPU 上运行你的操作。
|
||||
<pre> model = nn.DataParallel(model)</pre>
|
||||
```python
|
||||
model = nn.DataParallel(model)
|
||||
|
||||
```
|
||||
这是整个教程的核心,我们接下来将会详细讲解。
|
||||
引用和参数
|
||||
|
||||
引入 PyTorch 模块和定义参数
|
||||
<pre> import torch
|
||||
```python
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import Dataset, DataLoader</pre>
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
|
||||
```
|
||||
# 参数
|
||||
<pre> input_size = 5
|
||||
```python
|
||||
input_size = 5
|
||||
output_size = 2
|
||||
|
||||
batch_size = 30
|
||||
data_size = 100</pre>
|
||||
data_size = 100
|
||||
```
|
||||
设备
|
||||
<pre>device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")</pre>
|
||||
|
||||
```python
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
```
|
||||
实验(玩具)数据
|
||||
|
||||
生成一个玩具数据。你只需要实现 getitem.
|
||||
<pre><span class="k">class</span> <span class="nc">RandomDataset</span><span class="p">(</span><span class="n">Dataset</span><span class="p">):</span>
|
||||
```python
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">length</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">len</span> <span class="o">=</span> <span class="n">length</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="n">length</span><span class="p">,</span> <span class="n">size</span><span class="p">)</span>
|
||||
class RandomDataset(Dataset):
|
||||
|
||||
<span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
|
||||
def __init__(self, size, length):
|
||||
self.len = length
|
||||
self.data = torch.randn(length, size)
|
||||
|
||||
<span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">len</span>
|
||||
def __getitem__(self, index):
|
||||
return self.data[index]
|
||||
|
||||
<span class="n">rand_loader</span> <span class="o">=</span> <span class="n">DataLoader</span><span class="p">(</span><span class="n">dataset</span><span class="o">=</span><span class="n">RandomDataset</span><span class="p">(</span><span class="n">input_size</span><span class="p">,</span> <span class="n">data_size</span><span class="p">),</span><span class="n">batch_size</span><span class="o">=</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span></pre>
|
||||
def __len__(self):
|
||||
return self.len
|
||||
|
||||
rand_loader = DataLoader(dataset=RandomDataset(input_size, data_size),batch_size=batch_size, shuffle=True)
|
||||
```
|
||||
简单模型
|
||||
|
||||
为了做一个小 demo,我们的模型只是获得一个输入,执行一个线性操作,然后给一个输出。尽管如此,你可以使用 DataParallel 在任何模型(CNN, RNN, Capsule Net 等等.)
|
||||
|
||||
我们放置了一个输出声明在模型中来检测输出和输入张量的大小。请注意在 batch rank 0 中的输出。
|
||||
<pre><span class="k">class</span> <span class="nc">Model</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
|
||||
<span class="c1"># Our model</span>
|
||||
```python
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">input_size</span><span class="p">,</span> <span class="n">output_size</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">Model</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fc</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">input_size</span><span class="p">,</span> <span class="n">output_size</span><span class="p">)</span>
|
||||
class Model(nn.Module):
|
||||
# Our model
|
||||
|
||||
<span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">input</span><span class="p">):</span>
|
||||
<span class="n">output</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fc</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="s2">"</span><span class="se">\t</span><span class="s2">In Model: input size"</span><span class="p">,</span> <span class="nb">input</span><span class="o">.</span><span class="n">size</span><span class="p">(),</span>
|
||||
<span class="s2">"output size"</span><span class="p">,</span> <span class="n">output</span><span class="o">.</span><span class="n">size</span><span class="p">())</span>
|
||||
def __init__(self, input_size, output_size):
|
||||
super(Model, self).__init__()
|
||||
self.fc = nn.Linear(input_size, output_size)
|
||||
|
||||
<span class="k">return</span> <span class="n">output</span></pre>
|
||||
|
||||
def forward(self, input):
|
||||
output = self.fc(input)
|
||||
print("\tIn Model: input size", input.size(),
|
||||
"output size", output.size())
|
||||
|
||||
return output
|
||||
```
|
||||
|
||||
创建模型并且数据并行处理
|
||||
|
||||
这是整个教程的核心。首先我们需要一个模型的实例,然后验证我们是否有多个 GPU。如果我们有多个 GPU,我们可以用 nn.DataParallel 来 包裹 我们的模型。然后我们使用 model.to(device) 把模型放到多 GPU 中。
|
||||
```python
|
||||
model = Model(input_size, output_size)
|
||||
if torch.cuda.device_count() > 1:
|
||||
print("Let's use", torch.cuda.device_count(), "GPUs!")
|
||||
# dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
|
||||
model = nn.DataParallel(model)
|
||||
|
||||
|
||||
<pre><span class="n">model</span> <span class="o">=</span> <span class="n">Model</span><span class="p">(</span><span class="n">input_size</span><span class="p">,</span> <span class="n">output_size</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">device_count</span><span class="p">()</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="s2">"Let's use"</span><span class="p">,</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">device_count</span><span class="p">(),</span> <span class="s2">"GPUs!"</span><span class="p">)</span>
|
||||
<span class="c1"># dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">DataParallel</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
|
||||
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span></pre>
|
||||
model.to(device)
|
||||
```
|
||||
输出:
|
||||
<div id="create-model-and-dataparallel" class="section">
|
||||
<div class="sphx-glr-script-out highlight-none notranslate">
|
||||
<div class="highlight">
|
||||
<pre>Let's use 2 GPUs!
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="run-the-model" class="section"> 运行模型:</div>
|
||||
<div>现在我们可以看到输入和输出张量的大小了。</div>
|
||||
<div></div>
|
||||
<div>
|
||||
<pre><span class="k">for</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">rand_loader</span><span class="p">:</span>
|
||||
<span class="nb">input</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
|
||||
<span class="n">output</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="s2">"Outside: input size"</span><span class="p">,</span> <span class="nb">input</span><span class="o">.</span><span class="n">size</span><span class="p">(),</span>
|
||||
<span class="s2">"output_size"</span><span class="p">,</span> <span class="n">output</span><span class="o">.</span><span class="n">size</span><span class="p">())</span></pre>
|
||||
</div>
|
||||
|
||||
```python
|
||||
|
||||
Let's use 2 GPUs!
|
||||
|
||||
```
|
||||
运行模型:
|
||||
现在我们可以看到输入和输出张量的大小了。
|
||||
```python
|
||||
for data in rand_loader:
|
||||
input = data.to(device)
|
||||
output = model(input)
|
||||
print("Outside: input size", input.size(),
|
||||
"output_size", output.size())
|
||||
```
|
||||
输出:
|
||||
<pre>In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
|
||||
In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
|
||||
|
@ -24,145 +24,153 @@
|
||||
</ol>
|
||||
加载并归一化 CIFAR10
|
||||
使用 torchvision ,用它来加载 CIFAR10 数据非常简单。
|
||||
<pre><span class="kn">import</span> <span class="nn">torch</span>
|
||||
<span class="kn">import</span> <span class="nn">torchvision</span>
|
||||
<span class="kn">import</span> <span class="nn">torchvision.transforms</span> <span class="kn">as</span> <span class="nn">transforms</span></pre>
|
||||
|
||||
```python
|
||||
import torch
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
```
|
||||
|
||||
torchvision 数据集的输出是范围在[0,1]之间的 PILImage,我们将他们转换成归一化范围为[-1,1]之间的张量 Tensors。
|
||||
<pre><span class="n">transform</span> <span class="o">=</span> <span class="n">transforms</span><span class="o">.</span><span class="n">Compose</span><span class="p">(</span>
|
||||
<span class="p">[</span><span class="n">transforms</span><span class="o">.</span><span class="n">ToTensor</span><span class="p">(),</span>
|
||||
<span class="n">transforms</span><span class="o">.</span><span class="n">Normalize</span><span class="p">((</span><span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">),</span> <span class="p">(</span><span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">))])</span>
|
||||
```python
|
||||
|
||||
<span class="n">trainset</span> <span class="o">=</span> <span class="n">torchvision</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">CIFAR10</span><span class="p">(</span><span class="n">root</span><span class="o">=</span><span class="s1">'./data'</span><span class="p">,</span> <span class="n">train</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span>
|
||||
<span class="n">download</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">transform</span><span class="o">=</span><span class="n">transform</span><span class="p">)</span>
|
||||
<span class="n">trainloader</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">DataLoader</span><span class="p">(</span><span class="n">trainset</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span>
|
||||
<span class="n">shuffle</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">num_workers</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
transform = transforms.Compose(
|
||||
[transforms.ToTensor(),
|
||||
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
||||
|
||||
<span class="n">testset</span> <span class="o">=</span> <span class="n">torchvision</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">CIFAR10</span><span class="p">(</span><span class="n">root</span><span class="o">=</span><span class="s1">'./data'</span><span class="p">,</span> <span class="n">train</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span>
|
||||
<span class="n">download</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">transform</span><span class="o">=</span><span class="n">transform</span><span class="p">)</span>
|
||||
<span class="n">testloader</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">DataLoader</span><span class="p">(</span><span class="n">testset</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span>
|
||||
<span class="n">shuffle</span><span class="o">=</span><span class="bp">False</span><span class="p">,</span> <span class="n">num_workers</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
|
||||
download=True, transform=transform)
|
||||
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
|
||||
shuffle=True, num_workers=2)
|
||||
|
||||
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
|
||||
download=True, transform=transform)
|
||||
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
|
||||
shuffle=False, num_workers=2)
|
||||
|
||||
classes = ('plane', 'car', 'bird', 'cat',
|
||||
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
|
||||
|
||||
```
|
||||
|
||||
<span class="n">classes</span> <span class="o">=</span> <span class="p">(</span><span class="s1">'plane'</span><span class="p">,</span> <span class="s1">'car'</span><span class="p">,</span> <span class="s1">'bird'</span><span class="p">,</span> <span class="s1">'cat'</span><span class="p">,</span>
|
||||
<span class="s1">'deer'</span><span class="p">,</span> <span class="s1">'dog'</span><span class="p">,</span> <span class="s1">'frog'</span><span class="p">,</span> <span class="s1">'horse'</span><span class="p">,</span> <span class="s1">'ship'</span><span class="p">,</span> <span class="s1">'truck'</span><span class="p">)</span></pre>
|
||||
输出:
|
||||
<pre>Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz
|
||||
Files already downloaded and verified</pre>
|
||||
```python
|
||||
|
||||
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz
|
||||
Files already downloaded and verified
|
||||
```
|
||||
|
||||
让我们来展示其中的一些训练图片。
|
||||
<pre><span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="kn">as</span> <span class="nn">plt</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
|
||||
```python
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
<span class="c1"># functions to show an image</span>
|
||||
# functions to show an image
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">imshow</span><span class="p">(</span><span class="n">img</span><span class="p">):</span>
|
||||
<span class="n">img</span> <span class="o">=</span> <span class="n">img</span> <span class="o">/</span> <span class="mi">2</span> <span class="o">+</span> <span class="mf">0.5</span> <span class="c1"># unnormalize</span>
|
||||
<span class="n">npimg</span> <span class="o">=</span> <span class="n">img</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">transpose</span><span class="p">(</span><span class="n">npimg</span><span class="p">,</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">)))</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
|
||||
def imshow(img):
|
||||
img = img / 2 + 0.5 # unnormalize
|
||||
npimg = img.numpy()
|
||||
plt.imshow(np.transpose(npimg, (1, 2, 0)))
|
||||
plt.show()
|
||||
|
||||
|
||||
<span class="c1"># get some random training images</span>
|
||||
<span class="n">dataiter</span> <span class="o">=</span> <span class="nb">iter</span><span class="p">(</span><span class="n">trainloader</span><span class="p">)</span>
|
||||
<span class="n">images</span><span class="p">,</span> <span class="n">labels</span> <span class="o">=</span> <span class="n">dataiter</span><span class="o">.</span><span class="n">next</span><span class="p">()</span>
|
||||
# get some random training images
|
||||
dataiter = iter(trainloader)
|
||||
images, labels = dataiter.next()
|
||||
|
||||
<span class="c1"># show images</span>
|
||||
<span class="n">imshow</span><span class="p">(</span><span class="n">torchvision</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">make_grid</span><span class="p">(</span><span class="n">images</span><span class="p">))</span>
|
||||
<span class="c1"># print labels</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="s1">'</span><span class="si">%5s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">classes</span><span class="p">[</span><span class="n">labels</span><span class="p">[</span><span class="n">j</span><span class="p">]]</span> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">)))</span></pre>
|
||||
|
||||
|
||||
<img class="alignnone size-full wp-image-117" src="http://pytorchchina.com/wp-content/uploads/2018/12/sphx_glr_cifar10_tutorial_001.png" alt="" width="640" height="480" />
|
||||
# show images
|
||||
imshow(torchvision.utils.make_grid(images))
|
||||
# print labels
|
||||
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
|
||||
|
||||
```
|
||||
输出:
|
||||
<div id="loading-and-normalizing-cifar10" class="section">
|
||||
<div class="sphx-glr-script-out highlight-none notranslate">
|
||||
<div class="highlight">
|
||||
<pre>cat plane ship frog
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="define-a-convolutional-neural-network" class="section"></div>
|
||||
<div></div>
|
||||
```python
|
||||
cat plane ship frog
|
||||
|
||||
```
|
||||
<div>定义一个卷积神经网络
|
||||
在这之前先 从神经网络章节 复制神经网络,并修改它为3通道的图片(在此之前它被定义为1通道)</div>
|
||||
<div></div>
|
||||
<div>
|
||||
<pre><span class="kn">import</span> <span class="nn">torch.nn</span> <span class="kn">as</span> <span class="nn">nn</span>
|
||||
<span class="kn">import</span> <span class="nn">torch.nn.functional</span> <span class="kn">as</span> <span class="nn">F</span>
|
||||
|
||||
```python
|
||||
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
<span class="k">class</span> <span class="nc">Net</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">Net</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">conv1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">pool</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">MaxPool2d</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">conv2</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="mi">6</span><span class="p">,</span> <span class="mi">16</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fc1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">16</span> <span class="o">*</span> <span class="mi">5</span> <span class="o">*</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">120</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fc2</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">120</span><span class="p">,</span> <span class="mi">84</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fc3</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="mi">84</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 6, 5)
|
||||
self.pool = nn.MaxPool2d(2, 2)
|
||||
self.conv2 = nn.Conv2d(6, 16, 5)
|
||||
self.fc1 = nn.Linear(16 * 5 * 5, 120)
|
||||
self.fc2 = nn.Linear(120, 84)
|
||||
self.fc3 = nn.Linear(84, 10)
|
||||
|
||||
<span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pool</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">conv1</span><span class="p">(</span><span class="n">x</span><span class="p">)))</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pool</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">conv2</span><span class="p">(</span><span class="n">x</span><span class="p">)))</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">16</span> <span class="o">*</span> <span class="mi">5</span> <span class="o">*</span> <span class="mi">5</span><span class="p">)</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fc1</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fc2</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">fc3</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">x</span>
|
||||
def forward(self, x):
|
||||
x = self.pool(F.relu(self.conv1(x)))
|
||||
x = self.pool(F.relu(self.conv2(x)))
|
||||
x = x.view(-1, 16 * 5 * 5)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
x = self.fc3(x)
|
||||
return x
|
||||
|
||||
|
||||
<span class="n">net</span> <span class="o">=</span> <span class="n">Net</span><span class="p">()</span></pre>
|
||||
</div>
|
||||
<div></div>
|
||||
<div>
|
||||
net = Net()
|
||||
```
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
<div>定义一个损失函数和优化器
|
||||
让我们使用分类交叉熵Cross-Entropy 作损失函数,动量SGD做优化器。</div>
|
||||
<div></div>
|
||||
<div>
|
||||
<pre><span class="kn">import</span> <span class="nn">torch.optim</span> <span class="kn">as</span> <span class="nn">optim</span>
|
||||
|
||||
<span class="n">criterion</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">CrossEntropyLoss</span><span class="p">()</span>
|
||||
<span class="n">optimizer</span> <span class="o">=</span> <span class="n">optim</span><span class="o">.</span><span class="n">SGD</span><span class="p">(</span><span class="n">net</span><span class="o">.</span><span class="n">parameters</span><span class="p">(),</span> <span class="n">lr</span><span class="o">=</span><span class="mf">0.001</span><span class="p">,</span> <span class="n">momentum</span><span class="o">=</span><span class="mf">0.9</span><span class="p">)</span></pre>
|
||||
</div>
|
||||
<div></div>
|
||||
```python
|
||||
|
||||
import torch.optim as optim
|
||||
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
|
||||
```
|
||||
|
||||
<div>训练网络
|
||||
这里事情开始变得有趣,我们只需要在数据迭代器上循环传给网络和优化器 输入就可以。</div>
|
||||
<div></div>
|
||||
<div>
|
||||
<pre><span class="k">for</span> <span class="n">epoch</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">):</span> <span class="c1"># loop over the dataset multiple times</span>
|
||||
|
||||
<span class="n">running_loss</span> <span class="o">=</span> <span class="mf">0.0</span>
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">data</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">trainloader</span><span class="p">,</span> <span class="mi">0</span><span class="p">):</span>
|
||||
<span class="c1"># get the inputs</span>
|
||||
<span class="n">inputs</span><span class="p">,</span> <span class="n">labels</span> <span class="o">=</span> <span class="n">data</span>
|
||||
```python
|
||||
|
||||
<span class="c1"># zero the parameter gradients</span>
|
||||
<span class="n">optimizer</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
|
||||
for epoch in range(2): # loop over the dataset multiple times
|
||||
|
||||
<span class="c1"># forward + backward + optimize</span>
|
||||
<span class="n">outputs</span> <span class="o">=</span> <span class="n">net</span><span class="p">(</span><span class="n">inputs</span><span class="p">)</span>
|
||||
<span class="n">loss</span> <span class="o">=</span> <span class="n">criterion</span><span class="p">(</span><span class="n">outputs</span><span class="p">,</span> <span class="n">labels</span><span class="p">)</span>
|
||||
<span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
|
||||
<span class="n">optimizer</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
|
||||
running_loss = 0.0
|
||||
for i, data in enumerate(trainloader, 0):
|
||||
# get the inputs
|
||||
inputs, labels = data
|
||||
|
||||
<span class="c1"># print statistics</span>
|
||||
<span class="n">running_loss</span> <span class="o">+=</span> <span class="n">loss</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">i</span> <span class="o">%</span> <span class="mi">2000</span> <span class="o">==</span> <span class="mi">1999</span><span class="p">:</span> <span class="c1"># print every 2000 mini-batches</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="s1">'[</span><span class="si">%d</span><span class="s1">, </span><span class="si">%5d</span><span class="s1">] loss: </span><span class="si">%.3f</span><span class="s1">'</span> <span class="o">%</span>
|
||||
<span class="p">(</span><span class="n">epoch</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="n">running_loss</span> <span class="o">/</span> <span class="mi">2000</span><span class="p">))</span>
|
||||
<span class="n">running_loss</span> <span class="o">=</span> <span class="mf">0.0</span>
|
||||
# zero the parameter gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
<span class="k">print</span><span class="p">(</span><span class="s1">'Finished Training'</span><span class="p">)</span></pre>
|
||||
</div>
|
||||
<div class="sphx-glr-script-out highlight-none notranslate">
|
||||
<div class="highlight"> 输出:</div>
|
||||
<div>
|
||||
<pre>[1, 2000] loss: 2.187
|
||||
# forward + backward + optimize
|
||||
outputs = net(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# print statistics
|
||||
running_loss += loss.item()
|
||||
if i % 2000 == 1999: # print every 2000 mini-batches
|
||||
print('[%d, %5d] loss: %.3f' %
|
||||
(epoch + 1, i + 1, running_loss / 2000))
|
||||
running_loss = 0.0
|
||||
|
||||
print('Finished Training')
|
||||
```
|
||||
|
||||
|
||||
输出:
|
||||
|
||||
```
|
||||
[1, 2000] loss: 2.187
|
||||
[1, 4000] loss: 1.852
|
||||
[1, 6000] loss: 1.672
|
||||
[1, 8000] loss: 1.566
|
||||
@ -174,9 +182,11 @@ Files already downloaded and verified</pre>
|
||||
[2, 8000] loss: 1.318
|
||||
[2, 10000] loss: 1.282
|
||||
[2, 12000] loss: 1.286
|
||||
Finished Training</pre>
|
||||
</div>
|
||||
</div>
|
||||
Finished Training
|
||||
|
||||
```
|
||||
|
||||
|
||||
在测试集上测试网络
|
||||
我们已经通过训练数据集对网络进行了2次训练,但是我们需要检查网络是否已经学到了东西。
|
||||
|
||||
@ -185,69 +195,76 @@ Finished Training</pre>
|
||||
好的,第一步,让我们从测试集中显示一张图像来熟悉它。<img class="alignnone size-full wp-image-118" src="http://pytorchchina.com/wp-content/uploads/2018/12/sphx_glr_cifar10_tutorial_002.png" alt="" width="640" height="480" />
|
||||
|
||||
输出:
|
||||
<div class="sphx-glr-script-out highlight-none notranslate">
|
||||
<div class="highlight">
|
||||
<pre>GroundTruth: cat ship ship plane
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
```python
|
||||
GroundTruth: cat ship ship plane
|
||||
```
|
||||
|
||||
现在让我们看看 神经网络认为这些样本应该预测成什么:
|
||||
<div class="highlight-python notranslate">
|
||||
<div class="highlight">
|
||||
<pre><span class="n">outputs</span> <span class="o">=</span> <span class="n">net</span><span class="p">(</span><span class="n">images</span><span class="p">)</span>
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
```python
|
||||
outputs = net(images)
|
||||
|
||||
```
|
||||
输出是预测与十个类的近似程度,与某一个类的近似程度越高,网络就越认为图像是属于这一类别。所以让我们打印其中最相似类别类标:
|
||||
<pre><span class="n">_</span><span class="p">,</span> <span class="n">predicted</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">outputs</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">print</span><span class="p">(</span><span class="s1">'Predicted: '</span><span class="p">,</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="s1">'</span><span class="si">%5s</span><span class="s1">'</span> <span class="o">%</span> <span class="n">classes</span><span class="p">[</span><span class="n">predicted</span><span class="p">[</span><span class="n">j</span><span class="p">]]</span>
|
||||
<span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">)))</span></pre>
|
||||
```python
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
|
||||
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
|
||||
for j in range(4)))
|
||||
|
||||
```
|
||||
输出:
|
||||
<div class="sphx-glr-script-out highlight-none notranslate">
|
||||
<div class="highlight">
|
||||
<pre>Predicted: cat ship car ship
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
```python
|
||||
|
||||
Predicted: cat ship car ship
|
||||
|
||||
```
|
||||
结果看起开非常好,让我们看看网络在整个数据集上的表现。
|
||||
<pre><span class="n">correct</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="n">total</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
||||
<span class="k">for</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">testloader</span><span class="p">:</span>
|
||||
<span class="n">images</span><span class="p">,</span> <span class="n">labels</span> <span class="o">=</span> <span class="n">data</span>
|
||||
<span class="n">outputs</span> <span class="o">=</span> <span class="n">net</span><span class="p">(</span><span class="n">images</span><span class="p">)</span>
|
||||
<span class="n">_</span><span class="p">,</span> <span class="n">predicted</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">outputs</span><span class="o">.</span><span class="n">data</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">total</span> <span class="o">+=</span> <span class="n">labels</span><span class="o">.</span><span class="n">size</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">correct</span> <span class="o">+=</span> <span class="p">(</span><span class="n">predicted</span> <span class="o">==</span> <span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
|
||||
```python
|
||||
|
||||
correct = 0
|
||||
total = 0
|
||||
with torch.no_grad():
|
||||
for data in testloader:
|
||||
images, labels = data
|
||||
outputs = net(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum().item()
|
||||
|
||||
print('Accuracy of the network on the 10000 test images: %d %%' % (
|
||||
100 * correct / total))
|
||||
```
|
||||
|
||||
<span class="k">print</span><span class="p">(</span><span class="s1">'Accuracy of the network on the 10000 test images: </span><span class="si">%d</span> <span class="si">%%</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span>
|
||||
<span class="mi">100</span> <span class="o">*</span> <span class="n">correct</span> <span class="o">/</span> <span class="n">total</span><span class="p">))</span></pre>
|
||||
输出:
|
||||
<div class="sphx-glr-script-out highlight-none notranslate">
|
||||
<div class="highlight">
|
||||
<pre>Accuracy of the network on the 10000 test images: 54 %
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
```python
|
||||
Accuracy of the network on the 10000 test images: 54 %
|
||||
|
||||
|
||||
```
|
||||
这看起来比随机预测要好,随机预测的准确率为10%(随机预测出为10类中的哪一类)。看来网络学到了东西。
|
||||
<pre><span class="n">class_correct</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="mf">0.</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">))</span>
|
||||
<span class="n">class_total</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="mf">0.</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">))</span>
|
||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
||||
<span class="k">for</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">testloader</span><span class="p">:</span>
|
||||
<span class="n">images</span><span class="p">,</span> <span class="n">labels</span> <span class="o">=</span> <span class="n">data</span>
|
||||
<span class="n">outputs</span> <span class="o">=</span> <span class="n">net</span><span class="p">(</span><span class="n">images</span><span class="p">)</span>
|
||||
<span class="n">_</span><span class="p">,</span> <span class="n">predicted</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">outputs</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">c</span> <span class="o">=</span> <span class="p">(</span><span class="n">predicted</span> <span class="o">==</span> <span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">squeeze</span><span class="p">()</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">):</span>
|
||||
<span class="n">label</span> <span class="o">=</span> <span class="n">labels</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
|
||||
<span class="n">class_correct</span><span class="p">[</span><span class="n">label</span><span class="p">]</span> <span class="o">+=</span> <span class="n">c</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
|
||||
<span class="n">class_total</span><span class="p">[</span><span class="n">label</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
|
||||
```python
|
||||
class_correct = list(0. for i in range(10))
|
||||
class_total = list(0. for i in range(10))
|
||||
with torch.no_grad():
|
||||
for data in testloader:
|
||||
images, labels = data
|
||||
outputs = net(images)
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
c = (predicted == labels).squeeze()
|
||||
for i in range(4):
|
||||
label = labels[i]
|
||||
class_correct[label] += c[i].item()
|
||||
class_total[label] += 1
|
||||
|
||||
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
|
||||
<span class="k">print</span><span class="p">(</span><span class="s1">'Accuracy of </span><span class="si">%5s</span><span class="s1"> : </span><span class="si">%2d</span> <span class="si">%%</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span>
|
||||
<span class="n">classes</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="mi">100</span> <span class="o">*</span> <span class="n">class_correct</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">/</span> <span class="n">class_total</span><span class="p">[</span><span class="n">i</span><span class="p">]))</span></pre>
|
||||
for i in range(10):
|
||||
print('Accuracy of %5s : %2d %%' % (
|
||||
classes[i], 100 * class_correct[i] / class_total[i]))
|
||||
|
||||
```
|
||||
输出:
|
||||
<pre>Accuracy of plane : 57 %
|
||||
Accuracy of car : 73 %
|
||||
@ -266,11 +283,14 @@ Accuracy of truck : 66 %</pre>
|
||||
在GPU上训练
|
||||
就像你怎么把一个张量转移到GPU上一样,你要将神经网络转到GPU上。
|
||||
如果CUDA可以用,让我们首先定义下我们的设备为第一个可见的cuda设备。
|
||||
<pre><span class="n">device</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="s2">"cuda:0"</span> <span class="k">if</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">is_available</span><span class="p">()</span> <span class="k">else</span> <span class="s2">"cpu"</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Assume that we are on a CUDA machine, then this should print a CUDA device:</span>
|
||||
```python
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
<span class="k">print</span><span class="p">(</span><span class="n">device</span><span class="p">)</span></pre>
|
||||
# Assume that we are on a CUDA machine, then this should print a CUDA device:
|
||||
|
||||
print(device)
|
||||
```
|
||||
输出:
|
||||
<div class="sphx-glr-script-out highlight-none notranslate">
|
||||
<div class="highlight">
|
||||
|
Reference in New Issue
Block a user