[Caffe2]Remove Caffe2 scripts and benchmarks (#126747)

Due to removal of Caffe2.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/126747
Approved by: https://github.com/ezyang, https://github.com/malfet
This commit is contained in:
cyy
2024-06-05 23:46:29 +00:00
committed by PyTorch MergeBot
parent e98662bed9
commit 2fd75667b4
23 changed files with 29 additions and 1674 deletions

View File

@ -1,45 +0,0 @@
import numpy as np
from utils import NUM_LOOP_ITERS
from caffe2.python import core, workspace
workspace.GlobalInit(["caffe2"])
def add_blob(ws, blob_name, tensor_size):
blob_tensor = np.random.randn(*tensor_size).astype(np.float32)
ws.FeedBlob(blob_name, blob_tensor)
class C2SimpleNet:
"""
This module constructs a net with 'op_name' operator. The net consist
a series of such operator.
It initializes the workspace with input blob equal to the number of parameters
needed for the op.
Provides forward method to run the net niter times.
"""
def __init__(self, op_name, num_inputs=1, debug=False):
self.input_names = []
self.net = core.Net("framework_benchmark_net")
self.input_names = [f"in_{i}" for i in range(num_inputs)]
for i in range(num_inputs):
add_blob(workspace, self.input_names[i], [1])
self.net.AddExternalInputs(self.input_names)
op_constructor = getattr(self.net, op_name)
op_constructor(self.input_names)
self.output_name = self.net._net.op[-1].output
print(f"Benchmarking op {op_name}:")
for _ in range(NUM_LOOP_ITERS):
output_name = self.net._net.op[-1].output
self.input_names[-1] = output_name[0]
assert len(self.input_names) == num_inputs
op_constructor(self.input_names)
workspace.CreateNet(self.net)
if debug:
print(self.net._net)
def forward(self, niters):
workspace.RunNet(self.net, niters, False)

View File

@ -1,6 +1,5 @@
import argparse
from C2Module import C2SimpleNet
from pt_wrapper_module import WrapperModule
from SimpleAddModule import add_tensors_loop, SimpleAddModule
@ -19,9 +18,6 @@ buck run @mode/opt <path-to-framework_overhead_benchmark>:framework_overhead_ben
--add-op --graph-mode --eager-mode (Runs both graph mode and eager mode)
buck run @mode/opt <path-to-framework_overhead_benchmark>:framework_overhead_benchmark --
--add-op --graph-mode (Runs only graph mode)
To run C2 benchmark:
buck run @mode/opt <path-to-framework_overhead_benchmark>:framework_overhead_benchmark --
--add-op --benchmark-c2-net
"""
SUPPORTED_OPS = {"add_op"}
@ -49,39 +45,22 @@ def benchmark_simple_fn(args, config, module_config, module_type, result):
module_type: Type of the module to be wrapped. e.g. SimpleAddModule for add op.
result: dictionary instance to be populated with the benchmark result (latency per iter).
"""
benchmark_c2_net = args.benchmark_c2_net
print(f"Benchmarking {module_type.__name__}")
if benchmark_c2_net:
op_name = module_config.c2_op
num_inputs = module_config.num_params
module = C2SimpleNet(op_name, num_inputs=num_inputs, debug=args.debug)
latency_per_iter_ms = benchmark_module(config, module)
result[op_name] = latency_per_iter_ms
else:
f_name = (
module_config.pt_fn.__name__
+ ":Num Operands="
+ str(module_config.num_params)
)
graph_mode_str = "Graph mode" + ":" + str(module_config.graph_mode)
result_key = ",".join((f_name, graph_mode_str))
module = WrapperModule(module_type, module_config, args.debug, args.save)
latency_per_iter_ms = benchmark_module(
config, module, args.use_throughput_benchmark
)
result[result_key] = latency_per_iter_ms
f_name = (
module_config.pt_fn.__name__ + ":Num Operands=" + str(module_config.num_params)
)
graph_mode_str = "Graph mode" + ":" + str(module_config.graph_mode)
result_key = ",".join((f_name, graph_mode_str))
module = WrapperModule(module_type, module_config, args.debug, args.save)
latency_per_iter_ms = benchmark_module(
config, module, args.use_throughput_benchmark
)
result[result_key] = latency_per_iter_ms
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--op", default="add_op", dest="op", type=str)
parser.add_argument(
"--benchmark-c2-net",
"--benchmark_c2_net",
default=False,
dest="benchmark_c2_net",
action="store_true",
)
parser.add_argument(
"--use-throughput-benchmark",
"--use_throughput_benchmark",
@ -107,10 +86,6 @@ def main():
if args.op not in SUPPORTED_OPS:
print(f"Op {args.op} is not supported: Supported ops are:{SUPPORTED_OPS}")
return
assert not (
args.benchmark_c2_net and args.use_throughput_benchmark
), "Benchmarking of C2 net via throughput benchmarking is not yet supported"
num_warmup_iters = args.num_warmup_iters
num_iters = args.num_iters
config = BenchmarkConfig(num_warmup_iters, num_iters)
@ -120,10 +95,7 @@ def main():
result = {}
if args.op == "add_op":
num_params = 2
if args.benchmark_c2_net:
module_config = ModuleConfig(None, "Sum", num_params, None)
else:
module_config = ModuleConfig(add_tensors_loop, None, num_params, graph_mode)
module_config = ModuleConfig(add_tensors_loop, None, num_params, graph_mode)
benchmark_simple_fn(args, config, module_config, SimpleAddModule, result)
print_results(result)

View File

@ -1,6 +1,6 @@
# PyTorch/Caffe2 Operator Micro-benchmarks
# PyTorch Operator Micro-benchmarks
This benchmark suite provides a systemic way to measure the performance of operators for a wide range of inputs. The generated benchmark data fully characterized the performance of an operator in terms of execution time and the efficiency of the PyTorch/Caffe2 frameworks used.
This benchmark suite provides a systemic way to measure the performance of operators for a wide range of inputs. The generated benchmark data fully characterized the performance of an operator in terms of execution time and the efficiency of the PyTorch frameworks used.
## Features
@ -8,7 +8,7 @@ Key Features:
1\. Language used: Python
2\. Supported Frameworks: PyTorch and Caffe2
2\. Supported Frameworks: PyTorch
3\. Supported PyTorch mode: eager and JIT
@ -49,7 +49,7 @@ python -m benchmark_all_test
```
## Code to support `torch.add` in the benchmark
The following example shows the code to support `torch.add` with 27 different tests. In the subpages of this wiki, we'll step through the complete flow of adding PyTorch and Caffe2 operators to the benchmark suite. Existing benchmarks for operators are in `pt` and `c2` directories and we highly recommend putting your new operators in those locations.
The following example shows the code to support `torch.add` with 27 different tests. In the subpages of this wiki, we'll step through the complete flow of adding PyTorch operators to the benchmark suite. Existing benchmarks for operators are in the `pt` directory and we highly recommend putting your new operators in those locations.
```python
add_short_configs = op_bench.cross_product_configs(
@ -77,7 +77,7 @@ op_bench.generate_pt_test(add_short_configs, AddBenchmark)
The output is intended to be a human readable format. Here is an example output for `torch.add`:
```
# ----------------------------------------
# PyTorch/Caffe2 Operator Micro-benchmarks
# PyTorch Operator Micro-benchmarks
# ----------------------------------------
# Tag : short
@ -146,7 +146,7 @@ python -m pt.add_test --tag-filter long
```
## Adding New Operators to the Benchmark Suite
In the previous sections, we gave several examples to show how to run the already available operators in the benchmark suite. In the following sections, we'll step through the complete flow of adding PyTorch and Caffe2 operators to the benchmark suite. Existing benchmarks for operators are in `pt` and `c2` directories and we highly recommend putting your new operators in those directories as well.
In the previous sections, we gave several examples to show how to run the already available operators in the benchmark suite. In the following sections, we'll step through the complete flow of adding PyTorch operators to the benchmark suite. Existing benchmarks for operators are in the `pt` directory and we highly recommend putting your new operators in those directories as well.
### Add a New PyTorch Operator
Let's say you want to measure the execution time of the following operator:
@ -260,55 +260,6 @@ if __name__ == "__main__":
```
That's it. You just added a new operator to the benchmark suite!
### Add a New Caffe2 Operator
The steps to add a new Caffe2 operator is the same as that for a PyTorch operator. The code below shows how to add Caffe2 `Add` operator:
```python
import operator_benchmark as op_bench
from caffe2.python import core
add_long_configs = op_bench.cross_product_configs(
M=[8, 64, 128],
N=range(2, 10, 3),
K=[2 ** x for x in range(0, 3)],
tags=["long"]
)
add_short_configs = op_bench.config_list(
attrs=[
[8, 16, 32],
[16, 16, 64],
[64, 64, 128],
],
attr_names=["M", "N", "K"],
tags=["short"],
)
class AddBenchmark(op_bench.Caffe2BenchmarkBase):
def init(self, M, N, K):
self.input_one = self.tensor(M, N, K)
self.input_two = self.tensor(M, N, K)
self.output = self.tensor(M, N, K)
self.set_module_name("add")
def forward(self):
op = core.CreateOperator(
"Add", [self.input_one, self.input_two], self.output, **self.args
)
return op
op_bench.generate_c2_test(add_long_configs + add_short_configs, AddBenchmark)
if __name__ == "__main__":
op_bench.benchmark_runner.main()
```
There are two things worth mentioning in this code:
* `self.tensor` is a helper function which takes shapes and returns a Caffe2 blob. It is designed to make the tensor creation step easier compared to the standard Caffe2 way.
* `generate_c2_test` is used to register Caffe2 tests with the benchmark.
### Add a List of Operators
In the previous sections, we introduced the steps required to add a single operator to the benchmark suite. There are scenarios where you want to extend the benchmark suite with a list of operators which can share the same inputs. For example, to benchmark `abs` and `acos` operators, you can use the same set of inputs for both.
@ -416,37 +367,3 @@ The example below shows the relevant code for that:
self.input_one = torch.rand(M, N, K, requires_grad=True)
generate_pt_gradient_test(long_configs + short_configs, TorchAddBenchmark)
```
#### For Caffe2 Gradient Ops
To add Caffe2 gradient ops, we need to implement a new backward method in the benchmark class:
```python
class AddBenchmark(op_bench.Caffe2BenchmarkBase):
def init(self, M, N, K):
self.input_one = self.tensor(M, N, K)
self.input_two = self.tensor(M, N, K)
self.input_one_grad = self.tensor(M, N, K)
self.input_two_grad = self.tensor(M, N, K)
self.output = self.tensor(M, N, K)
self.set_module_name("add")
def forward(self):
op = core.CreateOperator(
"Add", [self.input_one, self.input_two], self.output, **self.args
)
return op
def backward(self):
grad_op = core.CreateOperator(
"AddGradient",
[self.output, self.input_one, self.input_two],
[self.input_one_grad, self.input_two_grad], **self.args
)
return grad_op
op_bench.generate_c2_gradient_test(long_configs + short_configs,AddBenchmark)
```
After the class is implemented, we need to register the tests with `generate_c2_gradient_test` function.
This concludes the overview of the operator benchmark suite.

View File

@ -1,202 +0,0 @@
from collections import namedtuple
import benchmark_utils
from benchmark_test_generator import _register_test
from caffe2.proto import caffe2_pb2
from caffe2.python import core, workspace
from .benchmark_core import TestConfig
"""Caffe2 performance microbenchmarks.
This module contains Caffe2-specific functionalities for performance
microbenchmarks.
"""
class Caffe2BenchmarkBase:
"""This is a base class used to create Caffe2 operator benchmark"""
tensor_index = 0
test_index = 0
def __init__(self):
self.args = {}
self.user_provided_name = None
self._num_inputs_require_grads = 0
self._pass_count = 0
def _set_backward_test(self, is_backward):
pass
def _device_option(self, device):
"""This method is used to set device option."""
if device not in ["cuda", "cpu"]:
raise ValueError("Missing attrs in configs")
if "cuda" in device:
self.dev = core.DeviceOption(caffe2_pb2.CUDA, 0)
else:
self.dev = core.DeviceOption(caffe2_pb2.CPU)
return self.dev
def tensor(self, shapes, dtype="float32", device="cpu"):
"""A wapper function to create C2 tensor filled with random data.
The name/label of the tensor is returned and it is available
throughout the benchmark execution phase.
Args:
shapes: int or a sequence of ints to defining the shapes of the tensor
dtype: use the dtypes from numpy
(https://docs.scipy.org/doc/numpy/user/basics.types.html)
Return:
C2 tensor of dtype
"""
return self.feed_tensor(benchmark_utils.numpy_random(dtype, *shapes), device)
def feed_tensor(self, tensor, device="cpu"):
"""Similar to tensor, but can supply any data compatible with FeedBlob"""
blob_name = "blob_" + str(Caffe2BenchmarkBase.tensor_index)
dev = self._device_option(device)
with core.DeviceScope(dev):
workspace.FeedBlob(blob_name, tensor)
Caffe2BenchmarkBase.tensor_index += 1
return blob_name
def module_name(self):
"""this is used to label the operator being benchmarked"""
if self.user_provided_name:
return self.user_provided_name
return self.__class__.__name__
def set_module_name(self, name):
self.user_provided_name = name
def _value_to_str(self, value):
"""if value is bool, we will convert it to 0 and 1"""
ret = value
if type(value) == bool:
ret = int(value)
return str(ret)
def test_name(self, name_type="long", **kargs):
"""this is a globally unique name which can be used to
label a specific test
"""
if name_type == "long":
test_name_str = []
for key in kargs:
value = kargs[key]
test_name_str.append(key + self._value_to_str(value))
name = (self.module_name() + "_" + "_".join(test_name_str)).replace(" ", "")
elif name_type == "short":
# this is used to generate test name based on unique index
name = "_".join(
[self.module_name(), "test", str(Caffe2BenchmarkBase.test_index)]
)
Caffe2BenchmarkBase.test_index += 1
return name
def extract_inputs_tuple(self):
# add a dummy function here to match the interface of TorchBenchmarkBase
pass
class Caffe2OperatorTestCase:
"""This class includes all the information needed to benchmark an operator.
op_bench: it's a user-defined class (child of Caffe2BenchmarkBase)
which includes input and operator, .etc
test_config: a namedtuple includes test_name, input_shape, tag, run_backward.
When run_backward is false, the run_forward method will be executed, otherwise
run_backward method will be executed.
"""
def __init__(self, op_bench, test_config):
self.op_bench = op_bench
self.test_config = test_config
self.framework = "Caffe2"
def run_forward(self, num_runs, print_per_iter=False, cuda_sync=False):
"""Run the forward path of an operator in a loop"""
with core.DeviceScope(self.op_bench.dev):
op = self.op_bench.forward()
if not workspace.RunOperatorMultiple(op, num_runs):
raise ValueError(f"Unable to run operator test case: {self.test_name}")
def run_backward(self, num_runs, print_per_iter=False):
"""Run the backward path of an operator in a loop"""
with core.DeviceScope(self.op_bench.dev):
op = self.op_bench.backward()
if not workspace.RunOperatorMultiple(op, num_runs):
raise ValueError(
f"Unable to run operator gradient test case: {self.test_name}"
)
def _print_per_iter(self):
pass
def create_caffe2_op_test_case(op_bench, test_config):
test_case = Caffe2OperatorTestCase(op_bench, test_config)
test_config = test_case.test_config
op = test_case.op_bench
func_name = f"{op.module_name()}{test_case.framework}{str(test_config)}"
return (func_name, test_case)
OpMeta = namedtuple(
"OpMeta",
"op_type num_inputs input_dims input_types \
output_dims num_outputs args device",
)
def generate_c2_test_from_ops(ops_metadata, bench_op, tags):
"""
This function is used to generate Caffe2 tests based on the metadata
of operators. The metadata includes seven fields which are 1) op_type:
the name of the operator. 2) num_inputs: the number of input blobs.
3) input_dims: a dictionary which includes the shapes of the input blobs.
4) input_types: a list which includes the types of input blobs. 5)
output_dims: a dictionary which includes the shapes of output blobs.
6) num_oupts: the number of output blobs. 7) args: a dictionary which
includes the args for th operator.
Here is an example to show the metadata for the WeighedSum operator
op_type : WeightedSum
num_inputs: 4
input_dims: {'0': [256], '1': [1], '2': [256], '3': [1]}
input_types: ['float', 'float', 'float', 'float']
output_dims: {'0': [256]}
num_outputs: 4
args: {}
TODO(mingzhe0908): introduce device and add it to the benchmark name
"""
for op_metadata in ops_metadata:
tmp_attrs = OpMeta(
op_metadata.op_type,
op_metadata.num_inputs,
op_metadata.input_dims,
op_metadata.input_types,
op_metadata.output_dims,
op_metadata.num_outputs,
op_metadata.args,
op_metadata.device,
)
test_attrs = tmp_attrs._asdict()
op = bench_op()
op.init(**test_attrs)
test_name = op.test_name("short")
input_config = f"Shapes: {op_metadata.input_dims}, Type: {op_metadata.input_types}, Args: {str(op_metadata.args)}"
test_config = TestConfig(test_name, input_config, tags, run_backward=False)
if op is not None:
create_caffe2_op_test_case(op, test_config)
def generate_c2_test(configs, c2_bench_op):
"""This function creates Caffe2 op test based on the given operator"""
return _register_test(configs, c2_bench_op, create_caffe2_op_test_case, False)
def generate_c2_gradient_test(configs, c2_bench_op):
"""This function creates Caffe2 op test based on the given operator"""
return _register_test(configs, c2_bench_op, create_caffe2_op_test_case, True)

View File

@ -13,6 +13,7 @@ import torch
# needs to be imported after torch
import torch.utils.cpp_extension as cpp_extension # noqa: F401
"""Performance microbenchmarks.
This module contains core functionalities for performance microbenchmark tests.
@ -50,7 +51,7 @@ def _create_test(
"""Create tests with the benchmark backend.
Args:
bench_op_obj: an object which instantiated from a subclass of
Caffe2BenchmarkBase/TorchBenchmarkBase which includes tensor
TorchBenchmarkBase which includes tensor
creation and operator execution.
orig_test_attrs: a dictionary includes test configs.
tags: a attribute in test config to filter inputs
@ -75,7 +76,7 @@ def _build_test(
"""Generate PyTorch/Caffe2 tests of operators with different inputs.
Args:
configs: a dictionary that has the input shapes
bench_op: a subclass of Caffe2BenchmarkBase/TorchBenchmarkBase which includes tensor
bench_op: a subclass of TorchBenchmarkBase which includes tensor
creation and operator execution
OperatorTestCase: a named tuple to save the metadata of an test
run_backward: a bool parameter indicating backward path
@ -233,9 +234,7 @@ class BenchmarkRunner:
)
)
else:
if test_case.framework == "PyTorch":
print(f"# Mode: {'JIT' if self.use_jit else 'Eager'}")
print(f"# Mode: {'JIT' if self.use_jit else 'Eager'}")
print(
f"# Name: {test_case.test_config.test_name}\n# Input: {test_case.test_config.input_config}"
)
@ -283,8 +282,7 @@ class BenchmarkRunner:
and the execution time is reported
"""
test_case.run_forward(num_runs=1, print_per_iter=False, cuda_sync=False)
if test_case.framework == "PyTorch":
test_case._output_mean()
test_case._output_mean()
backward_time = timeit.timeit(
functools.partial(test_case.run_backward, iters, print_per_iter), number=1
)
@ -357,9 +355,6 @@ class BenchmarkRunner:
# Currently, this is a sub-string matching.
op_test_config = test_case.test_config
if self.args.framework:
frameworks = benchmark_utils.process_arg_list(self.args.framework)
operators = (
benchmark_utils.process_arg_list(self.args.operators)
if self.args.operators
@ -370,7 +365,6 @@ class BenchmarkRunner:
if (
self._check_keep(op_test_config.test_name, self.args.test_name)
and self._check_keep_list(test_case.op_bench.module_name(), operators)
and self._check_keep_list(test_case.framework, frameworks)
and self._check_operator_first_char(
test_case.op_bench.module_name(), self.operator_range
)

View File

@ -92,7 +92,7 @@ def parse_args():
parser.add_argument(
"--omp-num-threads",
"--omp_num_threads",
help="Number of OpenMP threads used in PyTorch/Caffe2 runtime",
help="Number of OpenMP threads used in PyTorch runtime",
default=None,
type=int,
)
@ -100,7 +100,7 @@ def parse_args():
parser.add_argument(
"--mkl-num-threads",
"--mkl_num_threads",
help="Number of MKL threads used in PyTorch/Caffe2 runtime",
help="Number of MKL threads used in PyTorch runtime",
default=None,
type=int,
)
@ -135,12 +135,6 @@ def parse_args():
help="Only run the forward path of operators",
)
parser.add_argument(
"--framework",
help="Comma-delimited list of frameworks to test (Caffe2, PyTorch)",
default="Caffe2,PyTorch",
)
parser.add_argument(
"--device",
help="Run tests on the provided architecture (cpu, cuda)",
@ -160,8 +154,7 @@ def parse_args():
# "Modifications to the environment variables after the program has started,
# even if modified by the program itself, are ignored by the OpenMP implementation"
benchmark_utils.set_omp_threads(args.omp_num_threads)
if benchmark_utils.is_pytorch_enabled(args.framework):
torch.set_num_threads(args.omp_num_threads)
torch.set_num_threads(args.omp_num_threads)
if args.mkl_num_threads:
benchmark_utils.set_mkl_threads(args.mkl_num_threads)

View File

@ -319,14 +319,6 @@ def op_list(**configs):
return generated_configs
def is_caffe2_enabled(framework_arg):
return "Caffe2" in framework_arg
def is_pytorch_enabled(framework_arg):
return "PyTorch" in framework_arg
def get_operator_range(chars_range):
"""Generates the characters from chars_range inclusive."""
if chars_range == "None" or chars_range is None:

View File

@ -1,49 +0,0 @@
import benchmark_caffe2 as op_bench_c2
from benchmark_caffe2 import Caffe2BenchmarkBase # noqa: F401
import operator_benchmark as op_bench
from caffe2.python import core
"""Microbenchmarks for element-wise Add operator. Supports both Caffe2/PyTorch."""
# Configs for C2 add operator
add_long_configs = op_bench.cross_product_configs(
M=[8, 64, 128],
N=range(2, 10, 3),
K=[2**x for x in range(0, 3)],
dtype=["int", "float"],
tags=["long"],
)
add_short_configs = op_bench.config_list(
attrs=[
[8, 16, 32, "int"],
[16, 16, 64, "float"],
[64, 64, 128, "int"],
],
attr_names=["M", "N", "K", "dtype"],
tags=["short"],
)
class AddBenchmark(op_bench_c2.Caffe2BenchmarkBase):
def init(self, M, N, K, dtype):
self.input_one = self.tensor([M, N, K], dtype)
self.input_two = self.tensor([M, N, K], dtype)
self.output = self.tensor([M, N, K], dtype)
self.set_module_name("add")
def forward(self):
op = core.CreateOperator(
"Add", [self.input_one, self.input_two], self.output, **self.args
)
return op
op_bench_c2.generate_c2_test(add_long_configs + add_short_configs, AddBenchmark)
if __name__ == "__main__":
op_bench.benchmark_runner.main()

View File

@ -1,49 +0,0 @@
import benchmark_caffe2 as op_bench_c2
from benchmark_caffe2 import Caffe2BenchmarkBase # noqa: F401
import operator_benchmark as op_bench
from caffe2.python import core
"""Microbenchmarks for BatchBoxCox operator."""
# Configs for C2 BatchBoxCox operator
batch_box_cox_long_configs = op_bench.cross_product_configs(
M=[32, 64, 128], N=range(32, 128, 32), dtype=["float", "double"], tags=["long"]
)
batch_box_cox_short_configs = op_bench.config_list(
attrs=[
[16, 16, "float"],
[16, 16, "double"],
[64, 64, "float"],
[64, 64, "double"],
],
attr_names=["M", "N", "dtype"],
tags=["short"],
)
class BatchBoxCoxBenchmark(op_bench_c2.Caffe2BenchmarkBase):
def init(self, M, N, dtype):
self.data = self.tensor([M, N], dtype)
self.lambda1 = self.tensor([N], dtype)
self.lambda2 = self.tensor([N], dtype)
self.output = self.tensor([1, 1], dtype)
self.set_module_name("batch_box_cox")
def forward(self):
op = core.CreateOperator(
"BatchBoxCox", [self.data, self.lambda1, self.lambda2], self.output
)
return op
op_bench_c2.generate_c2_test(
batch_box_cox_long_configs + batch_box_cox_short_configs, BatchBoxCoxBenchmark
)
if __name__ == "__main__":
op_bench.benchmark_runner.main()

View File

@ -1,58 +0,0 @@
import benchmark_caffe2 as op_bench_c2
import numpy
from benchmark_caffe2 import Caffe2BenchmarkBase # noqa: F401
import operator_benchmark as op_bench
from caffe2.python import core
"""Microbenchmarks for element-wise BatchGather operator."""
# Configs for C2 BatherGather operator
batch_gather_configs_short = op_bench.config_list(
attr_names=["M", "N", "K"],
attrs=[
[8, 8, 1],
[256, 512, 1],
[512, 512, 1],
[8, 8, 2],
[256, 512, 2],
[512, 512, 2],
],
cross_product_configs={
"device": ["cpu", "cuda"],
},
tags=["short"],
)
batch_gather_configs_long = op_bench.cross_product_configs(
M=[128, 1024], N=[128, 1024], K=[1, 2], device=["cpu", "cuda"], tags=["long"]
)
class BatchGatherBenchmark(op_bench_c2.Caffe2BenchmarkBase):
def init(self, M, N, K, device):
self.input_one = self.tensor([M, N, K], device=device)
max_val = N
numpy.random.seed((1 << 32) - 1)
index_dim = numpy.random.randint(0, N)
self.index = self.feed_tensor(
numpy.random.randint(0, max_val, index_dim), device=device
)
self.output = self.tensor([M, index_dim, K], device=device)
self.set_module_name("batch_gather")
def forward(self):
op = core.CreateOperator(
"BatchGather", [self.input_one, self.index], self.output
)
return op
op_bench_c2.generate_c2_test(
batch_gather_configs_long + batch_gather_configs_short, BatchGatherBenchmark
)
if __name__ == "__main__":
op_bench.benchmark_runner.main()

View File

@ -1,54 +0,0 @@
import benchmark_caffe2 as op_bench_c2
from benchmark_caffe2 import Caffe2BenchmarkBase # noqa: F401
import operator_benchmark as op_bench
from caffe2.python import core, dyndep
dyndep.InitOpsLibrary("@/caffe2/caffe2/fb/operators:clip_ranges_op")
"""Microbenchmarks for ClipRanges operator."""
# Configs for C2 ClipRanges operator
clip_ranges_long_configs = op_bench.cross_product_configs(
LENGTH=range(1, 100),
M=[1],
N=[2],
MAX_LENGTH=range(1, 100),
dtype=["int32"],
tags=["long"],
)
clip_ranges_short_configs = op_bench.config_list(
attrs=[
[6, 1, 2, 1, "int32"],
[7, 1, 2, 2, "int32"],
[8, 1, 2, 3, "int32"],
[9, 1, 2, 4, "int32"],
[10, 1, 2, 5, "int32"],
],
attr_names=["LENGTH", "M", "N", "MAX_LENGTH", "dtype"],
tags=["short"],
)
class ClipRangesBenchmark(op_bench_c2.Caffe2BenchmarkBase):
def init(self, LENGTH, M, N, MAX_LENGTH, dtype):
self.input = self.tensor([LENGTH, M, N], dtype)
self.max_length = MAX_LENGTH
self.set_module_name("clip_ranges")
def forward(self):
op = core.CreateOperator(
"ClipRanges", self.input, self.input, max_length=self.max_length
)
return op
op_bench_c2.generate_c2_test(
clip_ranges_long_configs + clip_ranges_short_configs, ClipRangesBenchmark
)
if __name__ == "__main__":
op_bench.benchmark_runner.main()

View File

@ -1,171 +0,0 @@
import random
import benchmark_caffe2 as op_bench_c2
from benchmark_caffe2 import Caffe2BenchmarkBase # noqa: F401
import operator_benchmark as op_bench
from caffe2.python import core
"""Microbenchmarks for Concat operator. Supports both Caffe2/PyTorch."""
cross_product_configs = {
"device": ["cpu", "cuda"],
"dtype": ["float"],
"add_axis": [0],
}
# Configs for C2 concat operator
cat_configs_short = op_bench.config_list(
attr_names=["sizes", "N", "axis"],
attrs=[
[(1, 1, 1), 2, 0], # noqa: E241
[(512, 512, 2), 2, 1], # noqa: E241
[(128, 1024, 2), 2, 1], # noqa: E241
],
cross_product_configs=cross_product_configs,
tags=["short"],
)
# Configs specific to static runtime feature - a fast runtime for pared down models
cat_configs_static_runtime = op_bench.config_list(
attr_names=["sizes", "N", "axis", "add_axis"],
attrs=[
[(1, 40), 5, 1, 1],
[[(1, 160), (1, 14)], -1, 1, 0],
[[(1, 20, 40), (1, 4, 40), (1, 5, 40)], -1, 1, 0],
[[(1, 580), (1, 174)], -1, 1, 0],
[(20, 40), 5, 1, 1],
[[(20, 160), (20, 14)], -1, 1, 0],
[[(20, 20, 40), (20, 4, 40), (20, 5, 40)], -1, 1, 0],
[[(20, 580), (20, 174)], -1, 1, 0],
],
cross_product_configs=cross_product_configs,
tags=["static_runtime"],
)
cat_configs_long = op_bench.config_list(
attr_names=["sizes", "N", "axis"],
attrs=[
[(2**10, 2**10, 2), 2, 0], # noqa: E241
[(2**10 + 1, 2**10 - 1, 2), 2, 1], # noqa: E226,E241
[(2**10, 2**10, 2), 2, 2], # noqa: E241
[
[
lambda: random.randint(2**6, 2**7),
2**7 - 17,
2**6 + 1,
], # noqa: E201,E226,E241
5,
0,
],
[
[
2**6 + 2**5,
lambda: random.randint(2**6, 2**7),
2**6,
], # noqa: E201,E226,E241,E272
5,
1,
],
[
[
2**7,
2**6,
lambda: random.randint(2**6, 2**7),
], # noqa: E201,E241,E272
5,
2,
],
[[lambda: random.randint(2**5, 2**6), 2**5, 2**6], 50, 0], # noqa: E241
[
[2**5, lambda: random.randint(2**5, 2**6), 2**6], # noqa: E241,E272
50,
1,
],
[
[
2**5 + 1,
2**6 + 1,
lambda: random.randint(2**5, 2**6),
], # noqa: E226,E241,E272
50,
2,
],
],
cross_product_configs=cross_product_configs,
tags=["long"],
)
# There is a different codepath on CUDA for >4 dimensions
cat_configs_multidim = op_bench.config_list(
attr_names=["sizes", "N", "axis", "dtype"],
attrs=[
[(2**6, 2**5, 2**2, 2**4, 2**5), 2, 2], # noqa: E241
[(2**4, 2**5, 2**2, 2**4, 2**5), 8, 2], # noqa: E241
[
(2**3 + 1, 2**5 - 1, 2**2 + 1, 2**4 - 1, 2**5 + 1),
17,
4,
], # noqa: E226,E241
],
cross_product_configs=cross_product_configs,
tags=["multidim"],
)
cat_configs_manyinputs = op_bench.config_list(
attr_names=["sizes", "N", "axis"],
attrs=[
[[lambda: random.randint(1, 10000)], 100, 0],
[[lambda: random.randint(1, 1000)], 1000, 0],
[[lambda: random.randint(1, 500)], 2000, 0],
[[lambda: random.randint(1, 300)], 3000, 0],
],
cross_product_configs=cross_product_configs,
tags=["manyinputs"],
)
class ConcatBenchmark(op_bench_c2.Caffe2BenchmarkBase):
def init(self, sizes, N, axis, add_axis, dtype, device):
random.seed(42)
self.inputs = []
self.args = {"axis": axis, "add_axis": add_axis}
gen_sizes = []
if type(sizes) == list and N == -1:
gen_sizes = sizes
else:
for i in range(N):
gen_sizes.append(
[
old_size() if callable(old_size) else old_size
for old_size in sizes
]
)
for s in gen_sizes:
self.inputs.append(self.tensor(s, dtype, device=device))
self.output = self.tensor(gen_sizes[0], dtype, device=device)
self.split_info = self.tensor(gen_sizes[0], "int")
self.set_module_name("concat")
def forward(self):
op = core.CreateOperator(
"Concat", self.inputs, [self.output, self.split_info], **self.args
)
return op
op_bench_c2.generate_c2_test(
cat_configs_short
+ cat_configs_long
+ cat_configs_multidim
+ cat_configs_manyinputs
+ cat_configs_static_runtime,
ConcatBenchmark,
)
if __name__ == "__main__":
op_bench.benchmark_runner.main()

View File

@ -1,50 +0,0 @@
import benchmark_caffe2 as op_bench_c2
from benchmark_caffe2 import Caffe2BenchmarkBase # noqa: F401
import operator_benchmark as op_bench
from caffe2.python import core
"""Microbenchmarks for MatMul operator"""
# Configs for C2 Matmul operator
mm_long_configs = op_bench.cross_product_configs(
M=[8, 64, 128],
N=range(2, 10, 3),
K=[2**x for x in range(0, 3)],
trans_a=[True, False],
trans_b=[True, False],
tags=["long"],
)
mm_short_configs = op_bench.config_list(
attrs=[
[128, 128, 128, False, True],
[1024, 1024, 256, True, False],
[8192, 8192, 1024, True, False],
],
attr_names=["M", "N", "K", "trans_a", "trans_b"],
tags=["short"],
)
class MatMulBenchmark(op_bench_c2.Caffe2BenchmarkBase):
def init(self, M, N, K, trans_a, trans_b):
self.input_one = self.tensor([N, M]) if trans_a else self.tensor([M, N])
self.input_two = self.tensor([K, N]) if trans_b else self.tensor([N, K])
self.args = {"trans_a": trans_a, "trans_b": trans_b}
self.output = self.tensor([M, K])
self.set_module_name("matmul")
def forward(self):
op = core.CreateOperator(
"MatMul", [self.input_one, self.input_two], self.output, **self.args
)
return op
op_bench_c2.generate_c2_test(mm_long_configs + mm_short_configs, MatMulBenchmark)
if __name__ == "__main__":
op_bench.benchmark_runner.main()

View File

@ -1,48 +0,0 @@
import benchmark_caffe2 as op_bench_c2
from benchmark_caffe2 import Caffe2BenchmarkBase # noqa: F401
import operator_benchmark as op_bench
from caffe2.python import core
"""Microbenchmarks for QuantileOp operator."""
# Configs for C2 QuantileOp operator
quantile_op_long_configs = op_bench.cross_product_configs(
M=[32, 64, 128], N=range(32, 128, 32), dtype=["float", "double"], tags=["long"]
)
quantile_op_short_configs = op_bench.config_list(
attrs=[
[16, 16, "float"],
[16, 16, "double"],
[64, 64, "float"],
[64, 64, "double"],
],
attr_names=["M", "N", "dtype"],
tags=["short"],
)
class QuantileOpBenchmark(op_bench_c2.Caffe2BenchmarkBase):
def init(self, M, N, dtype):
self.data = [self.tensor([N], dtype) for _ in range(M)]
self.quantile = 0.3
self.output = self.tensor([1], dtype)
self.set_module_name("quantile_op")
def forward(self):
op = core.CreateOperator(
"Quantile", inputs=self.data, outputs=self.output, quantile=self.quantile
)
return op
op_bench_c2.generate_c2_test(
quantile_op_long_configs + quantile_op_short_configs, QuantileOpBenchmark
)
if __name__ == "__main__":
op_bench.benchmark_runner.main()

View File

@ -1,44 +0,0 @@
import benchmark_caffe2 as op_bench_c2
from benchmark_caffe2 import Caffe2BenchmarkBase # noqa: F401
import operator_benchmark as op_bench
from caffe2.python import core
"""Microbenchmarks for element-wise ReplaceNaN operator."""
# Configs for C2 ReplaceNaN operator
replace_nan_long_configs = op_bench.cross_product_configs(
M=[32, 64, 128], N=range(32, 128, 32), dtype=["float", "double"], tags=["long"]
)
replace_nan_short_configs = op_bench.config_list(
attrs=[
[16, 16, "float"],
[16, 16, "double"],
[64, 64, "float"],
[64, 64, "double"],
],
attr_names=["M", "N", "dtype"],
tags=["short"],
)
class ReplaceNaNBenchmark(op_bench_c2.Caffe2BenchmarkBase):
def init(self, M, N, dtype):
self.input = self.tensor([M, N], dtype)
self.set_module_name("replace_nan")
def forward(self):
op = core.CreateOperator("ReplaceNaN", self.input, self.input, value=1.0)
return op
op_bench_c2.generate_c2_test(
replace_nan_long_configs + replace_nan_short_configs, ReplaceNaNBenchmark
)
if __name__ == "__main__":
op_bench.benchmark_runner.main()

View File

@ -1,41 +0,0 @@
import operator_benchmark as op_bench
from caffe2.python import core
add_configs = op_bench.cross_product_configs(
M=[8], N=[8], K=[8], tags=["short"], device=["cuda", "cpu"]
)
class AddBenchmark(op_bench.Caffe2BenchmarkBase):
def init(self, M, N, K, device):
self.set_module_name("add")
self.input_one = self.tensor([M, N, K], device=device)
self.input_two = self.tensor([M, N, K], device=device)
self.input_one_grad = self.tensor([M, N, K], device=device)
self.input_two_grad = self.tensor([M, N, K], device=device)
self.output = self.tensor([M, N, K], device=device)
def forward(self):
op = core.CreateOperator(
"Add", [self.input_one, self.input_two], self.output, **self.args
)
return op
def backward(self):
grad_op = core.CreateOperator(
"AddGradient",
[self.output, self.input_one, self.input_two],
[self.input_one_grad, self.input_two_grad],
**self.args,
)
return grad_op
op_bench.generate_c2_test(add_configs, AddBenchmark)
op_bench.generate_c2_gradient_test(add_configs, AddBenchmark)
if __name__ == "__main__":
op_bench.benchmark_runner.main()

View File

@ -1,18 +1,13 @@
import argparse
import sys
import torch
import torch.utils.benchmark as benchmark_utils
try:
from benchmarks.fastrnns.factory import lstm_creator
except ImportError:
from caffe2.benchmarks.fastrnns.factory import lstm_creator
from benchmarks.fastrnns.factory import lstm_creator
from torchvision.models import resnet50
import torch
import torch.utils.benchmark as benchmark_utils
def prepare_lstm_jit(bench_args):
model_def = lstm_creator(

View File

@ -1,118 +0,0 @@
#!/usr/bin/env python3
import argparse
import ast
from caffe2.python import brew, workspace
from caffe2.python.model_helper import ModelHelper
from caffe2.python.predictor import mobile_exporter
def parse_kwarg(kwarg_str):
key, value = kwarg_str.split("=")
try:
value = ast.literal_eval(value)
except ValueError:
pass
return key, value
def main(args):
# User defined keyword arguments
kwargs = {"order": "NCHW", "use_cudnn": False}
kwargs.update(dict(args.kwargs))
model = ModelHelper(name=args.benchmark_name)
op_type = args.operator # assumes a brew type op name
input_name = args.input_name
output_name = args.output_name
iters = int(args.instances)
for i in range(iters):
input_blob_name = input_name + (str(i) if i > 0 and args.chain else "")
output_blob_name = output_name + str(i + 1)
add_op = getattr(brew, op_type)
add_op(model, input_blob_name, output_blob_name, **kwargs)
if args.chain:
input_name, output_name = output_name, input_name
workspace.RunNetOnce(model.param_init_net)
init_net, predict_net = mobile_exporter.Export(workspace, model.net, model.params)
if args.debug:
print("init_net:")
for op in init_net.op:
print(" ", op.type, op.input, "-->", op.output)
print("predict_net:")
for op in predict_net.op:
print(" ", op.type, op.input, "-->", op.output)
with open(args.predict_net, "wb") as f:
f.write(predict_net.SerializeToString())
with open(args.init_net, "wb") as f:
f.write(init_net.SerializeToString())
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Utility to generate Caffe2 benchmark models."
)
parser.add_argument("operator", help="Caffe2 operator to benchmark.")
parser.add_argument(
"-b",
"--blob",
help="Instantiate a blob --blob name=dim1,dim2,dim3",
action="append",
)
parser.add_argument("--context", help="Context to run on.", default="CPU")
parser.add_argument(
"--kwargs",
help="kwargs to pass to operator.",
nargs="*",
type=parse_kwarg,
default=[],
)
parser.add_argument(
"--init-net",
"--init_net",
help="Output initialization net.",
default="init_net.pb",
)
parser.add_argument(
"--predict-net",
"--predict_net",
help="Output prediction net.",
default="predict_net.pb",
)
parser.add_argument(
"--benchmark-name",
"--benchmark_name",
help="Name of the benchmark network",
default="benchmark",
)
parser.add_argument(
"--input-name", "--input_name", help="Name of the input blob.", default="data"
)
parser.add_argument(
"--output-name",
"--output_name",
help="Name of the output blob.",
default="output",
)
parser.add_argument(
"--instances", help="Number of instances to run the operator.", default="1"
)
parser.add_argument(
"-d", "--debug", help="Print debug information.", action="store_true"
)
parser.add_argument(
"-c",
"--chain",
help="Chain ops together (create data dependencies)",
action="store_true",
)
args = parser.parse_args()
main(args)

View File

@ -1,10 +0,0 @@
:: Installation scripts for appveyor.
@echo on
if "%USE_CUDA%" == "ON" call %~dp0%install_cuda.bat
:: Miniconda path for appveyor
set PATH=C:\Miniconda-x64;C:\Miniconda-x64\Scripts;%PATH%
:: Install numpy
conda install -y numpy

View File

@ -1,22 +0,0 @@
@echo on
appveyor DownloadFile ^
https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda_8.0.44_windows-exe ^
-FileName cuda_8.0.44_windows.exe
appveyor Downloadfile ^
http://developer.download.nvidia.com/compute/redist/cudnn/v5.1/cudnn-8.0-windows10-x64-v5.1.zip ^
-FileName cudnn-8.0-windows10-x64-v5.1.zip
cuda_8.0.44_windows.exe -s compiler_8.0 cublas_8.0 cublas_dev_8.0 cudart_8.0 curand_8.0 curand_dev_8.0 nvrtc_8.0 nvrtc_dev_8.0
set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\libnvvp;%PATH%
7z x cudnn-8.0-windows10-x64-v5.1.zip
copy cuda\include\cudnn.h ^
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include\"
copy cuda\lib\x64\cudnn.lib ^
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\x64\"
copy cuda\bin\cudnn64_5.dll ^
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin\"
:: Make sure that nvcc is working correctly.
nvcc -V || exit /b

View File

@ -1,175 +0,0 @@
#! /usr/bin/env python3
import os
import subprocess
import sys
import tarfile
import tempfile
from urllib.request import urlretrieve
from caffe2.python.models.download import (
deleteDirectory,
downloadFromURLToFile,
getURLFromName,
)
class SomeClass:
# largely copied from
# https://github.com/onnx/onnx-caffe2/blob/master/tests/caffe2_ref_test.py
def _download(self, model):
model_dir = self._caffe2_model_dir(model)
assert not os.path.exists(model_dir)
os.makedirs(model_dir)
for f in ["predict_net.pb", "init_net.pb", "value_info.json"]:
url = getURLFromName(model, f)
dest = os.path.join(model_dir, f)
try:
try:
downloadFromURLToFile(url, dest, show_progress=False)
except TypeError:
# show_progress not supported prior to
# Caffe2 78c014e752a374d905ecfb465d44fa16e02a28f1
# (Sep 17, 2017)
downloadFromURLToFile(url, dest)
except Exception as e:
print(f"Abort: {e}")
print("Cleaning up...")
deleteDirectory(model_dir)
sys.exit(1)
def _caffe2_model_dir(self, model):
caffe2_home = os.path.expanduser("~/.caffe2")
models_dir = os.path.join(caffe2_home, "models")
return os.path.join(models_dir, model)
def _onnx_model_dir(self, model):
onnx_home = os.path.expanduser("~/.onnx")
models_dir = os.path.join(onnx_home, "models")
model_dir = os.path.join(models_dir, model)
return model_dir, os.path.dirname(model_dir)
# largely copied from
# https://github.com/onnx/onnx/blob/master/onnx/backend/test/runner/__init__.py
def _prepare_model_data(self, model):
model_dir, models_dir = self._onnx_model_dir(model)
if os.path.exists(model_dir):
return
os.makedirs(model_dir)
url = f"https://s3.amazonaws.com/download.onnx/models/{model}.tar.gz"
# On Windows, NamedTemporaryFile cannot be opened for a
# second time
download_file = tempfile.NamedTemporaryFile(delete=False)
try:
download_file.close()
print(f"Start downloading model {model} from {url}")
urlretrieve(url, download_file.name)
print("Done")
with tarfile.open(download_file.name) as t:
t.extractall(models_dir)
except Exception as e:
print(f"Failed to prepare data for model {model}: {e}")
raise
finally:
os.remove(download_file.name)
models = [
"bvlc_alexnet",
"densenet121",
"inception_v1",
"inception_v2",
"resnet50",
# TODO currently onnx can't translate squeezenet :(
# 'squeezenet',
"vgg16",
# TODO currently vgg19 doesn't work in the CI environment,
# possibly due to OOM
# 'vgg19'
]
def download_models():
sc = SomeClass()
for model in models:
print("update-caffe2-models.py: downloading", model)
caffe2_model_dir = sc._caffe2_model_dir(model)
onnx_model_dir, onnx_models_dir = sc._onnx_model_dir(model)
if not os.path.exists(caffe2_model_dir):
sc._download(model)
if not os.path.exists(onnx_model_dir):
sc._prepare_model_data(model)
def generate_models():
sc = SomeClass()
for model in models:
print("update-caffe2-models.py: generating", model)
caffe2_model_dir = sc._caffe2_model_dir(model)
onnx_model_dir, onnx_models_dir = sc._onnx_model_dir(model)
subprocess.check_call(["echo", model])
with open(os.path.join(caffe2_model_dir, "value_info.json")) as f:
value_info = f.read()
subprocess.check_call(
[
"convert-caffe2-to-onnx",
"--caffe2-net-name",
model,
"--caffe2-init-net",
os.path.join(caffe2_model_dir, "init_net.pb"),
"--value-info",
value_info,
"-o",
os.path.join(onnx_model_dir, "model.pb"),
os.path.join(caffe2_model_dir, "predict_net.pb"),
]
)
subprocess.check_call(
["tar", "-czf", model + ".tar.gz", model], cwd=onnx_models_dir
)
def upload_models():
sc = SomeClass()
for model in models:
print("update-caffe2-models.py: uploading", model)
onnx_model_dir, onnx_models_dir = sc._onnx_model_dir(model)
subprocess.check_call(
[
"aws",
"s3",
"cp",
model + ".tar.gz",
f"s3://download.onnx/models/{model}.tar.gz",
"--acl",
"public-read",
],
cwd=onnx_models_dir,
)
def cleanup():
sc = SomeClass()
for model in models:
onnx_model_dir, onnx_models_dir = sc._onnx_model_dir(model)
os.remove(os.path.join(os.path.dirname(onnx_model_dir), model + ".tar.gz"))
if __name__ == "__main__":
try:
subprocess.check_call(["aws", "sts", "get-caller-identity"])
except:
print(
"update-caffe2-models.py: please run `aws configure` manually to set up credentials"
)
sys.exit(1)
if sys.argv[1] == "download":
download_models()
if sys.argv[1] == "generate":
generate_models()
elif sys.argv[1] == "upload":
upload_models()
elif sys.argv[1] == "cleanup":
cleanup()

View File

@ -1,372 +0,0 @@
#! /usr/bin/env python3
import argparse
import glob
import json
import os
import shutil
import tarfile
import tempfile
from urllib.request import urlretrieve
import boto3
import numpy as np
import onnx
import onnx.backend
from onnx import numpy_helper
import caffe2.python.onnx.backend
import caffe2.python.onnx.frontend
import caffe2.python.workspace as c2_workspace
from caffe2.proto import caffe2_pb2
from caffe2.python.models.download import (
deleteDirectory,
downloadFromURLToFile,
getURLFromName,
)
"""A script converting Caffe2 models to ONNX, and updating ONNX model zoos.
Arguments:
-v, verbose
--local-dir, where we store the ONNX and Caffe2 models
--no-cache, ignore existing models in local-dir
--clean-test-data, delete all the existing test data when updating ONNX model zoo
--add-test-data, add add-test-data sets of test data for each ONNX model
--only-local, run locally (for testing purpose)
Examples:
# store the data in /home/username/zoo-dir, delete existing test data, ignore local cache,
# and generate 3 sets of new test data
python update-caffe2-models.py --local-dir /home/username/zoo-dir --clean-test-data --no-cache --add-test-data 3
"""
# TODO: Add GPU support
def upload_onnx_model(model_name, zoo_dir, backup=False, only_local=False):
if only_local:
print("No uploading in local only mode.")
return
model_dir = os.path.join(zoo_dir, model_name)
suffix = "-backup" if backup else ""
if backup:
print(f"Backing up the previous version of ONNX model {model_name}...")
rel_file_name = f"{model_name}{suffix}.tar.gz"
abs_file_name = os.path.join(zoo_dir, rel_file_name)
print(f"Compressing {model_name} model to {abs_file_name}")
with tarfile.open(abs_file_name, "w:gz") as f:
f.add(model_dir, arcname=model_name)
file_size = os.stat(abs_file_name).st_size
print(
f"Uploading {abs_file_name} ({float(file_size) / 1024 / 1024} MB) to s3 cloud..."
)
client = boto3.client("s3", "us-east-1")
transfer = boto3.s3.transfer.S3Transfer(client)
transfer.upload_file(
abs_file_name,
"download.onnx",
f"models/latest/{rel_file_name}",
extra_args={"ACL": "public-read"},
)
print(f"Successfully uploaded {rel_file_name} to s3!")
def download_onnx_model(model_name, zoo_dir, use_cache=True, only_local=False):
model_dir = os.path.join(zoo_dir, model_name)
if os.path.exists(model_dir):
if use_cache:
upload_onnx_model(model_name, zoo_dir, backup=True, only_local=only_local)
return
else:
shutil.rmtree(model_dir)
url = f"https://s3.amazonaws.com/download.onnx/models/latest/{model_name}.tar.gz"
download_file = tempfile.NamedTemporaryFile(delete=False)
try:
download_file.close()
print(
f"Downloading ONNX model {model_name} from {url} and save in {download_file.name} ...\n"
)
urlretrieve(url, download_file.name)
with tarfile.open(download_file.name) as t:
print(f"Extracting ONNX model {model_name} to {zoo_dir} ...\n")
t.extractall(zoo_dir)
except Exception as e:
print(f"Failed to download/backup data for ONNX model {model_name}: {e}")
if not os.path.exists(model_dir):
os.makedirs(model_dir)
finally:
os.remove(download_file.name)
if not only_local:
upload_onnx_model(model_name, zoo_dir, backup=True, only_local=only_local)
def download_caffe2_model(model_name, zoo_dir, use_cache=True):
model_dir = os.path.join(zoo_dir, model_name)
if os.path.exists(model_dir):
if use_cache:
return
else:
shutil.rmtree(model_dir)
os.makedirs(model_dir)
for f in ["predict_net.pb", "init_net.pb", "value_info.json"]:
url = getURLFromName(model_name, f)
dest = os.path.join(model_dir, f)
try:
try:
downloadFromURLToFile(url, dest, show_progress=False)
except TypeError:
# show_progress not supported prior to
# Caffe2 78c014e752a374d905ecfb465d44fa16e02a28f1
# (Sep 17, 2017)
downloadFromURLToFile(url, dest)
except Exception as e:
print(f"Abort: {e}")
print("Cleaning up...")
deleteDirectory(model_dir)
raise
def caffe2_to_onnx(caffe2_model_name, caffe2_model_dir):
caffe2_init_proto = caffe2_pb2.NetDef()
caffe2_predict_proto = caffe2_pb2.NetDef()
with open(os.path.join(caffe2_model_dir, "init_net.pb"), "rb") as f:
caffe2_init_proto.ParseFromString(f.read())
caffe2_init_proto.name = f"{caffe2_model_name}_init"
with open(os.path.join(caffe2_model_dir, "predict_net.pb"), "rb") as f:
caffe2_predict_proto.ParseFromString(f.read())
caffe2_predict_proto.name = caffe2_model_name
with open(os.path.join(caffe2_model_dir, "value_info.json"), "rb") as f:
value_info = json.loads(f.read())
print(
f"Converting Caffe2 model {caffe2_model_name} in {caffe2_model_dir} to ONNX format"
)
onnx_model = caffe2.python.onnx.frontend.caffe2_net_to_onnx_model(
init_net=caffe2_init_proto,
predict_net=caffe2_predict_proto,
value_info=value_info,
)
return onnx_model, caffe2_init_proto, caffe2_predict_proto
def tensortype_to_ndarray(tensor_type):
shape = []
for dim in tensor_type.shape.dim:
shape.append(dim.dim_value)
if tensor_type.elem_type == onnx.TensorProto.FLOAT:
type = np.float32
elif tensor_type.elem_type == onnx.TensorProto.INT:
type = np.int32
else:
raise
array = np.random.rand(*shape).astype(type)
return array
def generate_test_input_data(onnx_model, scale):
real_inputs_names = list(
{input.name for input in onnx_model.graph.input}
- {init.name for init in onnx_model.graph.initializer}
)
real_inputs = []
for name in real_inputs_names:
for input in onnx_model.graph.input:
if name == input.name:
real_inputs.append(input)
test_inputs = []
for input in real_inputs:
ndarray = tensortype_to_ndarray(input.type.tensor_type)
test_inputs.append((input.name, ndarray * scale))
return test_inputs
def generate_test_output_data(caffe2_init_net, caffe2_predict_net, inputs):
p = c2_workspace.Predictor(caffe2_init_net, caffe2_predict_net)
inputs_map = {input[0]: input[1] for input in inputs}
output = p.run(inputs_map)
c2_workspace.ResetWorkspace()
return output
def onnx_verify(onnx_model, inputs, ref_outputs):
prepared = caffe2.python.onnx.backend.prepare(onnx_model)
onnx_inputs = []
for input in inputs:
if isinstance(input, tuple):
onnx_inputs.append(input[1])
else:
onnx_inputs.append(input)
onnx_outputs = prepared.run(inputs=onnx_inputs)
np.testing.assert_almost_equal(onnx_outputs, ref_outputs, decimal=3)
model_mapping = {
"bvlc_alexnet": "bvlc_alexnet",
"bvlc_googlenet": "bvlc_googlenet",
"bvlc_reference_caffenet": "bvlc_reference_caffenet",
"bvlc_reference_rcnn_ilsvrc13": "bvlc_reference_rcnn_ilsvrc13",
"densenet121": "densenet121",
#'finetune_flickr_style': 'finetune_flickr_style',
"inception_v1": "inception_v1",
"inception_v2": "inception_v2",
"resnet50": "resnet50",
"shufflenet": "shufflenet",
"squeezenet": "squeezenet_old",
#'vgg16': 'vgg16',
"vgg19": "vgg19",
"zfnet512": "zfnet512",
}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Update the ONNX models.")
parser.add_argument("-v", action="store_true", default=False, help="verbose")
parser.add_argument(
"--local-dir",
type=str,
default=os.path.expanduser("~"),
help="local dir to store Caffe2 and ONNX models",
)
parser.add_argument(
"--no-cache",
action="store_true",
default=False,
help="whether use local ONNX models",
)
parser.add_argument(
"--clean-test-data",
action="store_true",
default=False,
help="remove the old test data",
)
parser.add_argument(
"--add-test-data", type=int, default=0, help="add new test data"
)
parser.add_argument(
"--only-local",
action="store_true",
default=False,
help="no upload including backup",
)
args = parser.parse_args()
delete_test_data = args.clean_test_data
add_test_data = args.add_test_data
use_cache = not args.no_cache
only_local = args.only_local
root_dir = args.local_dir
caffe2_zoo_dir = os.path.join(root_dir, ".caffe2", "models")
onnx_zoo_dir = os.path.join(root_dir, ".onnx", "models")
for onnx_model_name in model_mapping:
c2_model_name = model_mapping[onnx_model_name]
print(
f"####### Processing ONNX model {onnx_model_name} ({c2_model_name} in Caffe2) #######"
)
download_caffe2_model(c2_model_name, caffe2_zoo_dir, use_cache=use_cache)
download_onnx_model(
onnx_model_name, onnx_zoo_dir, use_cache=use_cache, only_local=only_local
)
onnx_model_dir = os.path.join(onnx_zoo_dir, onnx_model_name)
if delete_test_data:
print("Deleting all the existing test data...")
# NB: For now, we don't delete the npz files.
# for f in glob.glob(os.path.join(onnx_model_dir, '*.npz')):
# os.remove(f)
for f in glob.glob(os.path.join(onnx_model_dir, "test_data_set*")):
shutil.rmtree(f)
onnx_model, c2_init_net, c2_predict_net = caffe2_to_onnx(
c2_model_name, os.path.join(caffe2_zoo_dir, c2_model_name)
)
print(f"Deleteing old ONNX {onnx_model_name} model...")
for f in glob.glob(os.path.join(onnx_model_dir, "model*".format())):
os.remove(f)
print(f"Serializing generated ONNX {onnx_model_name} model ...")
with open(os.path.join(onnx_model_dir, "model.onnx"), "wb") as file:
file.write(onnx_model.SerializeToString())
print(f"Verifying model {onnx_model_name} with ONNX model checker...")
onnx.checker.check_model(onnx_model)
total_existing_data_set = 0
print(f"Verifying model {onnx_model_name} with existing test data...")
for f in glob.glob(os.path.join(onnx_model_dir, "*.npz")):
test_data = np.load(f, encoding="bytes")
inputs = list(test_data["inputs"])
ref_outputs = list(test_data["outputs"])
onnx_verify(onnx_model, inputs, ref_outputs)
total_existing_data_set += 1
for f in glob.glob(os.path.join(onnx_model_dir, "test_data_set*")):
inputs = []
inputs_num = len(glob.glob(os.path.join(f, "input_*.pb")))
for i in range(inputs_num):
tensor = onnx.TensorProto()
with open(os.path.join(f, f"input_{i}.pb"), "rb") as pf:
tensor.ParseFromString(pf.read())
inputs.append(numpy_helper.to_array(tensor))
ref_outputs = []
ref_outputs_num = len(glob.glob(os.path.join(f, "output_*.pb")))
for i in range(ref_outputs_num):
tensor = onnx.TensorProto()
with open(os.path.join(f, f"output_{i}.pb"), "rb") as pf:
tensor.ParseFromString(pf.read())
ref_outputs.append(numpy_helper.to_array(tensor))
onnx_verify(onnx_model, inputs, ref_outputs)
total_existing_data_set += 1
starting_index = 0
while os.path.exists(
os.path.join(onnx_model_dir, f"test_data_set_{starting_index}")
):
starting_index += 1
if total_existing_data_set == 0 and add_test_data == 0:
add_test_data = 3
total_existing_data_set = 3
print(f"Generating {add_test_data} sets of new test data...")
for i in range(starting_index, add_test_data + starting_index):
data_dir = os.path.join(onnx_model_dir, f"test_data_set_{i}")
os.makedirs(data_dir)
inputs = generate_test_input_data(onnx_model, 255)
ref_outputs = generate_test_output_data(c2_init_net, c2_predict_net, inputs)
onnx_verify(onnx_model, inputs, ref_outputs)
for index, input in enumerate(inputs):
tensor = numpy_helper.from_array(input[1])
with open(os.path.join(data_dir, f"input_{index}.pb"), "wb") as file:
file.write(tensor.SerializeToString())
for index, output in enumerate(ref_outputs):
tensor = numpy_helper.from_array(output)
with open(os.path.join(data_dir, f"output_{index}.pb"), "wb") as file:
file.write(tensor.SerializeToString())
del onnx_model
del c2_init_net
del c2_predict_net
upload_onnx_model(
onnx_model_name, onnx_zoo_dir, backup=False, only_local=only_local
)
print("\n\n")