mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
701 lines
20 KiB
Python
701 lines
20 KiB
Python
# Copyright (c) 2016-present, Facebook, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
##############################################################################
|
|
|
|
## @package convnet_benchmarks
|
|
# Module caffe2.experiments.python.convnet_benchmarks
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
"""
|
|
Benchmark for common convnets.
|
|
|
|
(NOTE: Numbers below prior with missing parameter=update step, TODO to update)
|
|
|
|
Speed on Titan X, with 10 warmup steps and 10 main steps and with different
|
|
versions of cudnn, are as follows (time reported below is per-batch time,
|
|
forward / forward+backward):
|
|
|
|
CuDNN V3 CuDNN v4
|
|
AlexNet 32.5 / 108.0 27.4 / 90.1
|
|
OverFeat 113.0 / 342.3 91.7 / 276.5
|
|
Inception 134.5 / 485.8 125.7 / 450.6
|
|
VGG (batch 64) 200.8 / 650.0 164.1 / 551.7
|
|
|
|
Speed on Inception with varied batch sizes and CuDNN v4 is as follows:
|
|
|
|
Batch Size Speed per batch Speed per image
|
|
16 22.8 / 72.7 1.43 / 4.54
|
|
32 38.0 / 127.5 1.19 / 3.98
|
|
64 67.2 / 233.6 1.05 / 3.65
|
|
128 125.7 / 450.6 0.98 / 3.52
|
|
|
|
Speed on Tesla M40, which 10 warmup steps and 10 main steps and with cudnn
|
|
v4, is as follows:
|
|
|
|
AlexNet 68.4 / 218.1
|
|
OverFeat 210.5 / 630.3
|
|
Inception 300.2 / 1122.2
|
|
VGG (batch 64) 405.8 / 1327.7
|
|
|
|
(Note that these numbers involve a "full" backprop, i.e. the gradient
|
|
with respect to the input image is also computed.)
|
|
|
|
To get the numbers, simply run:
|
|
|
|
for MODEL in AlexNet OverFeat Inception; do
|
|
PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
|
|
--batch_size 128 --model $MODEL --forward_only True
|
|
done
|
|
for MODEL in AlexNet OverFeat Inception; do
|
|
PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
|
|
--batch_size 128 --model $MODEL
|
|
done
|
|
PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
|
|
--batch_size 64 --model VGGA --forward_only True
|
|
PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
|
|
--batch_size 64 --model VGGA
|
|
|
|
for BS in 16 32 64 128; do
|
|
PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
|
|
--batch_size $BS --model Inception --forward_only True
|
|
PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
|
|
--batch_size $BS --model Inception
|
|
done
|
|
|
|
Note that VGG needs to be run at batch 64 due to memory limit on the backward
|
|
pass.
|
|
"""
|
|
|
|
import argparse
|
|
import time
|
|
|
|
from caffe2.python import cnn, workspace, core
|
|
|
|
import caffe2.python.SparseTransformer as SparseTransformer
|
|
|
|
|
|
def MLP(order):
|
|
model = cnn.CNNModelHelper()
|
|
d = 256
|
|
depth = 20
|
|
width = 3
|
|
for i in range(depth):
|
|
for j in range(width):
|
|
current = "fc_{}_{}".format(i, j) if i > 0 else "data"
|
|
next_ = "fc_{}_{}".format(i + 1, j)
|
|
model.FC(
|
|
current, next_,
|
|
dim_in=d, dim_out=d,
|
|
weight_init=model.XavierInit,
|
|
bias_init=model.XavierInit)
|
|
model.Sum(["fc_{}_{}".format(depth, j)
|
|
for j in range(width)], ["sum"])
|
|
model.FC("sum", "last",
|
|
dim_in=d, dim_out=1000,
|
|
weight_init=model.XavierInit,
|
|
bias_init=model.XavierInit)
|
|
xent = model.LabelCrossEntropy(["last", "label"], "xent")
|
|
model.AveragedLoss(xent, "loss")
|
|
return model, d
|
|
|
|
|
|
def AlexNet(order):
|
|
model = cnn.CNNModelHelper(order, name="alexnet",
|
|
use_cudnn=True, cudnn_exhaustive_search=True)
|
|
conv1 = model.Conv(
|
|
"data",
|
|
"conv1",
|
|
3,
|
|
64,
|
|
11,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
stride=4,
|
|
pad=2
|
|
)
|
|
|
|
relu1 = model.Relu(conv1, "conv1")
|
|
pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2)
|
|
conv2 = model.Conv(
|
|
pool1,
|
|
"conv2",
|
|
64,
|
|
192,
|
|
5,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=2
|
|
)
|
|
relu2 = model.Relu(conv2, "conv2")
|
|
pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2)
|
|
conv3 = model.Conv(
|
|
pool2,
|
|
"conv3",
|
|
192,
|
|
384,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu3 = model.Relu(conv3, "conv3")
|
|
conv4 = model.Conv(
|
|
relu3,
|
|
"conv4",
|
|
384,
|
|
256,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu4 = model.Relu(conv4, "conv4")
|
|
conv5 = model.Conv(
|
|
relu4,
|
|
"conv5",
|
|
256,
|
|
256,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu5 = model.Relu(conv5, "conv5")
|
|
pool5 = model.MaxPool(relu5, "pool5", kernel=3, stride=2)
|
|
fc6 = model.FC(
|
|
pool5, "fc6", 256 * 6 * 6, 4096, ('XavierFill', {}),
|
|
('ConstantFill', {})
|
|
)
|
|
relu6 = model.Relu(fc6, "fc6")
|
|
fc7 = model.FC(
|
|
relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
relu7 = model.Relu(fc7, "fc7")
|
|
fc8 = model.FC(
|
|
relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
pred = model.Softmax(fc8, "pred")
|
|
xent = model.LabelCrossEntropy([pred, "label"], "xent")
|
|
model.AveragedLoss(xent, "loss")
|
|
return model, 224
|
|
|
|
|
|
def OverFeat(order):
|
|
model = cnn.CNNModelHelper(order, name="overfeat",
|
|
use_cudnn=True, cudnn_exhaustive_search=True)
|
|
conv1 = model.Conv(
|
|
"data",
|
|
"conv1",
|
|
3,
|
|
96,
|
|
11,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
stride=4
|
|
)
|
|
relu1 = model.Relu(conv1, "conv1")
|
|
pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
|
|
conv2 = model.Conv(
|
|
pool1, "conv2", 96, 256, 5, ('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
relu2 = model.Relu(conv2, "conv2")
|
|
pool2 = model.MaxPool(relu2, "pool2", kernel=2, stride=2)
|
|
conv3 = model.Conv(
|
|
pool2,
|
|
"conv3",
|
|
256,
|
|
512,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu3 = model.Relu(conv3, "conv3")
|
|
conv4 = model.Conv(
|
|
relu3,
|
|
"conv4",
|
|
512,
|
|
1024,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu4 = model.Relu(conv4, "conv4")
|
|
conv5 = model.Conv(
|
|
relu4,
|
|
"conv5",
|
|
1024,
|
|
1024,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu5 = model.Relu(conv5, "conv5")
|
|
pool5 = model.MaxPool(relu5, "pool5", kernel=2, stride=2)
|
|
fc6 = model.FC(
|
|
pool5, "fc6", 1024 * 6 * 6, 3072, ('XavierFill', {}),
|
|
('ConstantFill', {})
|
|
)
|
|
relu6 = model.Relu(fc6, "fc6")
|
|
fc7 = model.FC(
|
|
relu6, "fc7", 3072, 4096, ('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
relu7 = model.Relu(fc7, "fc7")
|
|
fc8 = model.FC(
|
|
relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
pred = model.Softmax(fc8, "pred")
|
|
xent = model.LabelCrossEntropy([pred, "label"], "xent")
|
|
model.AveragedLoss(xent, "loss")
|
|
return model, 231
|
|
|
|
|
|
def VGGA(order):
|
|
model = cnn.CNNModelHelper(order, name='vgg-a',
|
|
use_cudnn=True, cudnn_exhaustive_search=True)
|
|
conv1 = model.Conv(
|
|
"data",
|
|
"conv1",
|
|
3,
|
|
64,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu1 = model.Relu(conv1, "conv1")
|
|
pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
|
|
conv2 = model.Conv(
|
|
pool1,
|
|
"conv2",
|
|
64,
|
|
128,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu2 = model.Relu(conv2, "conv2")
|
|
pool2 = model.MaxPool(relu2, "pool2", kernel=2, stride=2)
|
|
conv3 = model.Conv(
|
|
pool2,
|
|
"conv3",
|
|
128,
|
|
256,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu3 = model.Relu(conv3, "conv3")
|
|
conv4 = model.Conv(
|
|
relu3,
|
|
"conv4",
|
|
256,
|
|
256,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu4 = model.Relu(conv4, "conv4")
|
|
pool4 = model.MaxPool(relu4, "pool4", kernel=2, stride=2)
|
|
conv5 = model.Conv(
|
|
pool4,
|
|
"conv5",
|
|
256,
|
|
512,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu5 = model.Relu(conv5, "conv5")
|
|
conv6 = model.Conv(
|
|
relu5,
|
|
"conv6",
|
|
512,
|
|
512,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu6 = model.Relu(conv6, "conv6")
|
|
pool6 = model.MaxPool(relu6, "pool6", kernel=2, stride=2)
|
|
conv7 = model.Conv(
|
|
pool6,
|
|
"conv7",
|
|
512,
|
|
512,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu7 = model.Relu(conv7, "conv7")
|
|
conv8 = model.Conv(
|
|
relu7,
|
|
"conv8",
|
|
512,
|
|
512,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu8 = model.Relu(conv8, "conv8")
|
|
pool8 = model.MaxPool(relu8, "pool8", kernel=2, stride=2)
|
|
|
|
fcix = model.FC(
|
|
pool8, "fcix", 512 * 7 * 7, 4096, ('XavierFill', {}),
|
|
('ConstantFill', {})
|
|
)
|
|
reluix = model.Relu(fcix, "fcix")
|
|
fcx = model.FC(
|
|
reluix, "fcx", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
relux = model.Relu(fcx, "fcx")
|
|
fcxi = model.FC(
|
|
relux, "fcxi", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
pred = model.Softmax(fcxi, "pred")
|
|
xent = model.LabelCrossEntropy([pred, "label"], "xent")
|
|
model.AveragedLoss(xent, "loss")
|
|
return model, 231
|
|
|
|
|
|
def net_DAG_Builder(model):
|
|
print("====================================================")
|
|
print(" Start Building DAG ")
|
|
print("====================================================")
|
|
net_root = SparseTransformer.netbuilder(model)
|
|
return net_root
|
|
|
|
|
|
def _InceptionModule(
|
|
model, input_blob, input_depth, output_name, conv1_depth, conv3_depths,
|
|
conv5_depths, pool_depth
|
|
):
|
|
# path 1: 1x1 conv
|
|
conv1 = model.Conv(
|
|
input_blob, output_name + ":conv1", input_depth, conv1_depth, 1,
|
|
('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
conv1 = model.Relu(conv1, conv1)
|
|
# path 2: 1x1 conv + 3x3 conv
|
|
conv3_reduce = model.Conv(
|
|
input_blob, output_name +
|
|
":conv3_reduce", input_depth, conv3_depths[0],
|
|
1, ('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
conv3_reduce = model.Relu(conv3_reduce, conv3_reduce)
|
|
conv3 = model.Conv(
|
|
conv3_reduce,
|
|
output_name + ":conv3",
|
|
conv3_depths[0],
|
|
conv3_depths[1],
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
conv3 = model.Relu(conv3, conv3)
|
|
# path 3: 1x1 conv + 5x5 conv
|
|
conv5_reduce = model.Conv(
|
|
input_blob, output_name +
|
|
":conv5_reduce", input_depth, conv5_depths[0],
|
|
1, ('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
conv5_reduce = model.Relu(conv5_reduce, conv5_reduce)
|
|
conv5 = model.Conv(
|
|
conv5_reduce,
|
|
output_name + ":conv5",
|
|
conv5_depths[0],
|
|
conv5_depths[1],
|
|
5,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=2
|
|
)
|
|
conv5 = model.Relu(conv5, conv5)
|
|
# path 4: pool + 1x1 conv
|
|
pool = model.MaxPool(
|
|
input_blob,
|
|
output_name + ":pool",
|
|
kernel=3,
|
|
stride=1,
|
|
pad=1
|
|
)
|
|
pool_proj = model.Conv(
|
|
pool, output_name + ":pool_proj", input_depth, pool_depth, 1,
|
|
('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
pool_proj = model.Relu(pool_proj, pool_proj)
|
|
output = model.Concat([conv1, conv3, conv5, pool_proj], output_name)
|
|
return output
|
|
|
|
|
|
def Inception(order):
|
|
model = cnn.CNNModelHelper(order, name="inception",
|
|
use_cudnn=True, cudnn_exhaustive_search=True)
|
|
conv1 = model.Conv(
|
|
"data",
|
|
"conv1",
|
|
3,
|
|
64,
|
|
7,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
stride=2,
|
|
pad=3
|
|
)
|
|
relu1 = model.Relu(conv1, "conv1")
|
|
pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2, pad=1)
|
|
conv2a = model.Conv(
|
|
pool1, "conv2a", 64, 64, 1, ('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
conv2a = model.Relu(conv2a, conv2a)
|
|
conv2 = model.Conv(
|
|
conv2a,
|
|
"conv2",
|
|
64,
|
|
192,
|
|
3,
|
|
('XavierFill', {}),
|
|
('ConstantFill', {}),
|
|
pad=1
|
|
)
|
|
relu2 = model.Relu(conv2, "conv2")
|
|
pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2, pad=1)
|
|
# Inception modules
|
|
inc3 = _InceptionModule(
|
|
model, pool2, 192, "inc3", 64, [96, 128], [16, 32], 32
|
|
)
|
|
inc4 = _InceptionModule(
|
|
model, inc3, 256, "inc4", 128, [128, 192], [32, 96], 64
|
|
)
|
|
pool5 = model.MaxPool(inc4, "pool5", kernel=3, stride=2, pad=1)
|
|
inc5 = _InceptionModule(
|
|
model, pool5, 480, "inc5", 192, [96, 208], [16, 48], 64
|
|
)
|
|
inc6 = _InceptionModule(
|
|
model, inc5, 512, "inc6", 160, [112, 224], [24, 64], 64
|
|
)
|
|
inc7 = _InceptionModule(
|
|
model, inc6, 512, "inc7", 128, [128, 256], [24, 64], 64
|
|
)
|
|
inc8 = _InceptionModule(
|
|
model, inc7, 512, "inc8", 112, [144, 288], [32, 64], 64
|
|
)
|
|
inc9 = _InceptionModule(
|
|
model, inc8, 528, "inc9", 256, [160, 320], [32, 128], 128
|
|
)
|
|
pool9 = model.MaxPool(inc9, "pool9", kernel=3, stride=2, pad=1)
|
|
inc10 = _InceptionModule(
|
|
model, pool9, 832, "inc10", 256, [160, 320], [32, 128], 128
|
|
)
|
|
inc11 = _InceptionModule(
|
|
model, inc10, 832, "inc11", 384, [192, 384], [48, 128], 128
|
|
)
|
|
pool11 = model.AveragePool(inc11, "pool11", kernel=7, stride=1)
|
|
fc = model.FC(
|
|
pool11, "fc", 1024, 1000, ('XavierFill', {}), ('ConstantFill', {})
|
|
)
|
|
# It seems that Soumith's benchmark does not have softmax on top
|
|
# for Inception. We will add it anyway so we can have a proper
|
|
# backward pass.
|
|
pred = model.Softmax(fc, "pred")
|
|
xent = model.LabelCrossEntropy([pred, "label"], "xent")
|
|
model.AveragedLoss(xent, "loss")
|
|
return model, 224
|
|
|
|
|
|
def AddInput(model, batch_size, db, db_type):
|
|
"""Adds the data input part."""
|
|
data_uint8, label = model.TensorProtosDBInput(
|
|
[], ["data_uint8", "label"], batch_size=batch_size,
|
|
db=db, db_type=db_type
|
|
)
|
|
data = model.Cast(data_uint8, "data_nhwc", to=core.DataType.FLOAT)
|
|
data = model.NHWC2NCHW(data, "data")
|
|
data = model.Scale(data, data, scale=float(1. / 256))
|
|
data = model.StopGradient(data, data)
|
|
return data, label
|
|
|
|
|
|
def AddParameterUpdate(model):
|
|
""" Simple plain SGD update -- not tuned to actually train the models """
|
|
ITER = model.Iter("iter")
|
|
LR = model.LearningRate(
|
|
ITER, "LR", base_lr=-1e-8, policy="step", stepsize=10000, gamma=0.999)
|
|
ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
|
|
for param in model.params:
|
|
param_grad = model.param_to_grad[param]
|
|
model.WeightedSum([param, ONE, param_grad, LR], param)
|
|
|
|
|
|
def Benchmark(model_gen, arg):
|
|
model, input_size = model_gen(arg.order)
|
|
model.Proto().type = arg.net_type
|
|
model.Proto().num_workers = arg.num_workers
|
|
|
|
# In order to be able to run everything without feeding more stuff, let's
|
|
# add the data and label blobs to the parameter initialization net as well.
|
|
|
|
if arg.order == "NCHW":
|
|
input_shape = [arg.batch_size, 3, input_size, input_size]
|
|
else:
|
|
input_shape = [arg.batch_size, input_size, input_size, 3]
|
|
if arg.model == "MLP":
|
|
input_shape = [arg.batch_size, input_size]
|
|
|
|
model.param_init_net.GaussianFill(
|
|
[],
|
|
"data",
|
|
shape=input_shape,
|
|
mean=0.0,
|
|
std=1.0
|
|
)
|
|
model.param_init_net.UniformIntFill(
|
|
[],
|
|
"label",
|
|
shape=[arg.batch_size, ],
|
|
min=0,
|
|
max=999
|
|
)
|
|
|
|
if arg.forward_only:
|
|
print('{}: running forward only.'.format(arg.model))
|
|
else:
|
|
print('{}: running forward-backward.'.format(arg.model))
|
|
model.AddGradientOperators(["loss"])
|
|
AddParameterUpdate(model)
|
|
|
|
if arg.order == 'NHWC':
|
|
print(
|
|
'==WARNING==\n'
|
|
'NHWC order with CuDNN may not be supported yet, so I might\n'
|
|
'exit suddenly.'
|
|
)
|
|
|
|
if not arg.cpu:
|
|
model.param_init_net.RunAllOnGPU()
|
|
model.net.RunAllOnGPU()
|
|
|
|
if arg.dump_model:
|
|
# Writes out the pbtxt for benchmarks on e.g. Android
|
|
with open(
|
|
"{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size), "w"
|
|
) as fid:
|
|
fid.write(str(model.param_init_net.Proto()))
|
|
with open("{0}.pbtxt".format(arg.model,
|
|
arg.batch_size), "w") as fid:
|
|
fid.write(str(model.net.Proto()))
|
|
|
|
workspace.RunNetOnce(model.param_init_net)
|
|
workspace.CreateNet(model.net)
|
|
for i in range(arg.warmup_iterations):
|
|
workspace.RunNet(model.net.Proto().name)
|
|
|
|
plan = core.Plan("plan")
|
|
plan.AddStep(core.ExecutionStep("run", model.net, arg.iterations))
|
|
start = time.time()
|
|
workspace.RunPlan(plan)
|
|
print('Spent: {}'.format((time.time() - start) / arg.iterations))
|
|
if arg.layer_wise_benchmark:
|
|
print('Layer-wise benchmark.')
|
|
workspace.BenchmarkNet(model.net.Proto().name, 1, arg.iterations, True)
|
|
|
|
|
|
def GetArgumentParser():
|
|
parser = argparse.ArgumentParser(description="Caffe2 benchmark.")
|
|
parser.add_argument(
|
|
"--batch_size",
|
|
type=int,
|
|
default=128,
|
|
help="The batch size."
|
|
)
|
|
parser.add_argument("--model", type=str, help="The model to benchmark.")
|
|
parser.add_argument(
|
|
"--order",
|
|
type=str,
|
|
default="NCHW",
|
|
help="The order to evaluate."
|
|
)
|
|
parser.add_argument(
|
|
"--cudnn_ws",
|
|
type=int,
|
|
default=-1,
|
|
help="The cudnn workspace size."
|
|
)
|
|
parser.add_argument(
|
|
"--iterations",
|
|
type=int,
|
|
default=10,
|
|
help="Number of iterations to run the network."
|
|
)
|
|
parser.add_argument(
|
|
"--warmup_iterations",
|
|
type=int,
|
|
default=10,
|
|
help="Number of warm-up iterations before benchmarking."
|
|
)
|
|
parser.add_argument(
|
|
"--forward_only",
|
|
action='store_true',
|
|
help="If set, only run the forward pass."
|
|
)
|
|
parser.add_argument(
|
|
"--layer_wise_benchmark",
|
|
action='store_true',
|
|
help="If True, run the layer-wise benchmark as well."
|
|
)
|
|
parser.add_argument(
|
|
"--cpu",
|
|
action='store_true',
|
|
help="If True, run testing on CPU instead of GPU."
|
|
)
|
|
parser.add_argument(
|
|
"--dump_model",
|
|
action='store_true',
|
|
help="If True, dump the model prototxts to disk."
|
|
)
|
|
parser.add_argument("--net_type", type=str, default="dag")
|
|
parser.add_argument("--num_workers", type=int, default=2)
|
|
return parser
|
|
|
|
|
|
if __name__ == '__main__':
|
|
args = GetArgumentParser().parse_args()
|
|
if (
|
|
not args.batch_size or not args.model or not args.order or
|
|
not args.cudnn_ws
|
|
):
|
|
GetArgumentParser().print_help()
|
|
|
|
workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
|
|
model_map = {
|
|
'AlexNet': AlexNet,
|
|
'OverFeat': OverFeat,
|
|
'VGGA': VGGA,
|
|
'Inception': Inception,
|
|
'MLP': MLP,
|
|
}
|
|
Benchmark(model_map[args.model], args)
|