[Caffe2]Remove Caffe2 scripts and benchmarks (#126747)

Due to removal of Caffe2. Pull Request resolved: https://github.com/pytorch/pytorch/pull/126747 Approved by: https://github.com/ezyang, https://github.com/malfet
2025-10-20 12:54:11 +08:00 · 2024-06-05 23:46:29 +00:00
parent e98662bed9
commit 2fd75667b4
23 changed files with 29 additions and 1674 deletions
--- a/benchmarks/framework_overhead_benchmark/C2Module.py
+++ b/benchmarks/framework_overhead_benchmark/C2Module.py
@ -1,45 +0,0 @@
-import numpy as np
-
-from utils import NUM_LOOP_ITERS
-
-from caffe2.python import core, workspace
-
-workspace.GlobalInit(["caffe2"])
-
-
-def add_blob(ws, blob_name, tensor_size):
-    blob_tensor = np.random.randn(*tensor_size).astype(np.float32)
-    ws.FeedBlob(blob_name, blob_tensor)
-
-
-class C2SimpleNet:
-    """
-    This module constructs a net with 'op_name' operator. The net consist
-    a series of such operator.
-    It initializes the workspace with input blob equal to the number of parameters
-    needed for the op.
-    Provides forward method to run the net niter times.
-    """
-
-    def __init__(self, op_name, num_inputs=1, debug=False):
-        self.input_names = []
-        self.net = core.Net("framework_benchmark_net")
-        self.input_names = [f"in_{i}" for i in range(num_inputs)]
-        for i in range(num_inputs):
-            add_blob(workspace, self.input_names[i], [1])
-        self.net.AddExternalInputs(self.input_names)
-        op_constructor = getattr(self.net, op_name)
-        op_constructor(self.input_names)
-        self.output_name = self.net._net.op[-1].output
-        print(f"Benchmarking op {op_name}:")
-        for _ in range(NUM_LOOP_ITERS):
-            output_name = self.net._net.op[-1].output
-            self.input_names[-1] = output_name[0]
-            assert len(self.input_names) == num_inputs
-            op_constructor(self.input_names)
-        workspace.CreateNet(self.net)
-        if debug:
-            print(self.net._net)
-
-    def forward(self, niters):
-        workspace.RunNet(self.net, niters, False)
--- a/benchmarks/framework_overhead_benchmark/framework_overhead_benchmark.py
+++ b/benchmarks/framework_overhead_benchmark/framework_overhead_benchmark.py
@ -1,6 +1,5 @@
 import argparse

-from C2Module import C2SimpleNet
 from pt_wrapper_module import WrapperModule

 from SimpleAddModule import add_tensors_loop, SimpleAddModule
@ -19,9 +18,6 @@ buck run @mode/opt <path-to-framework_overhead_benchmark>:framework_overhead_ben
 --add-op --graph-mode --eager-mode (Runs both graph mode and eager mode)
 buck run @mode/opt <path-to-framework_overhead_benchmark>:framework_overhead_benchmark --
 --add-op --graph-mode (Runs only graph mode)
-To run C2 benchmark:
-buck run @mode/opt <path-to-framework_overhead_benchmark>:framework_overhead_benchmark --
- --add-op --benchmark-c2-net
 """

 SUPPORTED_OPS = {"add_op"}
@ -49,39 +45,22 @@ def benchmark_simple_fn(args, config, module_config, module_type, result):
        module_type:    Type of the module to be wrapped. e.g. SimpleAddModule for add op.
        result:         dictionary instance to be populated with the benchmark result (latency per iter).
    """
-    benchmark_c2_net = args.benchmark_c2_net
    print(f"Benchmarking {module_type.__name__}")
-    if benchmark_c2_net:
-        op_name = module_config.c2_op
-        num_inputs = module_config.num_params
-        module = C2SimpleNet(op_name, num_inputs=num_inputs, debug=args.debug)
-        latency_per_iter_ms = benchmark_module(config, module)
-        result[op_name] = latency_per_iter_ms
-    else:
-        f_name = (
-            module_config.pt_fn.__name__
-            + ":Num Operands="
-            + str(module_config.num_params)
-        )
-        graph_mode_str = "Graph mode" + ":" + str(module_config.graph_mode)
-        result_key = ",".join((f_name, graph_mode_str))
-        module = WrapperModule(module_type, module_config, args.debug, args.save)
-        latency_per_iter_ms = benchmark_module(
-            config, module, args.use_throughput_benchmark
-        )
-        result[result_key] = latency_per_iter_ms
+    f_name = (
+        module_config.pt_fn.__name__ + ":Num Operands=" + str(module_config.num_params)
+    )
+    graph_mode_str = "Graph mode" + ":" + str(module_config.graph_mode)
+    result_key = ",".join((f_name, graph_mode_str))
+    module = WrapperModule(module_type, module_config, args.debug, args.save)
+    latency_per_iter_ms = benchmark_module(
+        config, module, args.use_throughput_benchmark
+    )
+    result[result_key] = latency_per_iter_ms


 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--op", default="add_op", dest="op", type=str)
-    parser.add_argument(
-        "--benchmark-c2-net",
-        "--benchmark_c2_net",
-        default=False,
-        dest="benchmark_c2_net",
-        action="store_true",
-    )
    parser.add_argument(
        "--use-throughput-benchmark",
        "--use_throughput_benchmark",
@ -107,10 +86,6 @@ def main():
    if args.op not in SUPPORTED_OPS:
        print(f"Op {args.op} is not supported: Supported ops are:{SUPPORTED_OPS}")
        return
-    assert not (
-        args.benchmark_c2_net and args.use_throughput_benchmark
-    ), "Benchmarking of C2 net via throughput benchmarking is not yet supported"
-
    num_warmup_iters = args.num_warmup_iters
    num_iters = args.num_iters
    config = BenchmarkConfig(num_warmup_iters, num_iters)
@ -120,10 +95,7 @@ def main():
    result = {}
    if args.op == "add_op":
        num_params = 2
-        if args.benchmark_c2_net:
-            module_config = ModuleConfig(None, "Sum", num_params, None)
-        else:
-            module_config = ModuleConfig(add_tensors_loop, None, num_params, graph_mode)
+        module_config = ModuleConfig(add_tensors_loop, None, num_params, graph_mode)
        benchmark_simple_fn(args, config, module_config, SimpleAddModule, result)
    print_results(result)

--- a/benchmarks/operator_benchmark/README.md
+++ b/benchmarks/operator_benchmark/README.md
@ -1,6 +1,6 @@
-# PyTorch/Caffe2 Operator Micro-benchmarks
+# PyTorch Operator Micro-benchmarks

-This benchmark suite provides a systemic way to measure the performance of operators for a wide range of inputs. The generated benchmark data fully characterized the performance of an operator in terms of execution time and the efficiency of the PyTorch/Caffe2 frameworks used.
+This benchmark suite provides a systemic way to measure the performance of operators for a wide range of inputs. The generated benchmark data fully characterized the performance of an operator in terms of execution time and the efficiency of the PyTorch frameworks used.

 ## Features

@ -8,7 +8,7 @@ Key Features:

 1\. Language used: Python

-2\. Supported Frameworks: PyTorch and Caffe2
+2\. Supported Frameworks: PyTorch

 3\. Supported PyTorch mode: eager and JIT

@ -49,7 +49,7 @@ python -m benchmark_all_test
 ```

 ## Code to support `torch.add` in the benchmark
-The following example shows the code to support `torch.add` with 27 different tests. In the subpages of this wiki, we'll step through the complete flow of adding PyTorch and Caffe2 operators to the benchmark suite. Existing benchmarks for operators are in `pt` and `c2` directories and we highly recommend putting your new operators in those locations.
+The following example shows the code to support `torch.add` with 27 different tests. In the subpages of this wiki, we'll step through the complete flow of adding PyTorch operators to the benchmark suite. Existing benchmarks for operators are in the `pt` directory and we highly recommend putting your new operators in those locations.

 ```python
 add_short_configs = op_bench.cross_product_configs(
@ -77,7 +77,7 @@ op_bench.generate_pt_test(add_short_configs, AddBenchmark)
 The output is intended to be a human readable format. Here is an example output for `torch.add`:
 ```
 # ----------------------------------------
-# PyTorch/Caffe2 Operator Micro-benchmarks
+# PyTorch Operator Micro-benchmarks
 # ----------------------------------------
 # Tag : short

@ -146,7 +146,7 @@ python -m pt.add_test --tag-filter long
 ```

 ## Adding New Operators to the Benchmark Suite
-In the previous sections, we gave several examples to show how to run the already available operators in the benchmark suite. In the following sections, we'll step through the complete flow of adding PyTorch and Caffe2 operators to the benchmark suite. Existing benchmarks for operators are in `pt` and `c2` directories and we highly recommend putting your new operators in those directories as well.
+In the previous sections, we gave several examples to show how to run the already available operators in the benchmark suite. In the following sections, we'll step through the complete flow of adding PyTorch operators to the benchmark suite. Existing benchmarks for operators are in the `pt` directory and we highly recommend putting your new operators in those directories as well.

 ### Add a New PyTorch Operator
 Let's say you want to measure the execution time of the following operator:
@ -260,55 +260,6 @@ if __name__ == "__main__":
 ```
 That's it. You just added a new operator to the benchmark suite!

-
-### Add a New Caffe2 Operator
-The steps to add a new Caffe2 operator is the same as that for a PyTorch operator. The code below shows how to add Caffe2 `Add` operator:
-```python
-import operator_benchmark as op_bench
-from caffe2.python import core
-
-add_long_configs = op_bench.cross_product_configs(
-    M=[8, 64, 128],
-    N=range(2, 10, 3),
-    K=[2 ** x for x in range(0, 3)],
-    tags=["long"]
-)
-
-add_short_configs = op_bench.config_list(
-    attrs=[
-        [8, 16, 32],
-        [16, 16, 64],
-        [64, 64, 128],
-    ],
-    attr_names=["M", "N", "K"],
-    tags=["short"],
-)
-
-class AddBenchmark(op_bench.Caffe2BenchmarkBase):
-
-    def init(self, M, N, K):
-        self.input_one = self.tensor(M, N, K)
-        self.input_two = self.tensor(M, N, K)
-        self.output = self.tensor(M, N, K)
-        self.set_module_name("add")
-
-    def forward(self):
-        op = core.CreateOperator(
-            "Add", [self.input_one, self.input_two], self.output, **self.args
-        )
-
-        return op
-
-op_bench.generate_c2_test(add_long_configs + add_short_configs, AddBenchmark)
-
-if __name__ == "__main__":
-    op_bench.benchmark_runner.main()
-```
-There are two things worth mentioning in this code:
-* `self.tensor` is a helper function which takes shapes and returns a Caffe2 blob. It is designed to make the tensor creation step easier compared to the standard Caffe2 way.
-* `generate_c2_test` is used to register Caffe2 tests with the benchmark.
-
-
 ### Add a List of Operators
 In the previous sections, we introduced the steps required to add a single operator to the benchmark suite. There are scenarios where you want to extend the benchmark suite with a list of operators which can share the same inputs. For example, to benchmark `abs` and `acos` operators, you can use the same set of inputs for both.

@ -416,37 +367,3 @@ The example below shows the relevant code for that:
 self.input_one = torch.rand(M, N, K, requires_grad=True)
 generate_pt_gradient_test(long_configs + short_configs, TorchAddBenchmark)
 ```
-#### For Caffe2 Gradient Ops
-To add Caffe2 gradient ops, we need to implement a new backward method in the benchmark class:
-```python
-class AddBenchmark(op_bench.Caffe2BenchmarkBase):
-
-    def init(self, M, N, K):
-        self.input_one = self.tensor(M, N, K)
-        self.input_two = self.tensor(M, N, K)
-        self.input_one_grad = self.tensor(M, N, K)
-        self.input_two_grad = self.tensor(M, N, K)
-        self.output = self.tensor(M, N, K)
-        self.set_module_name("add")
-
-    def forward(self):
-        op = core.CreateOperator(
-            "Add", [self.input_one, self.input_two], self.output, **self.args
-        )
-
-        return op
-
-    def backward(self):
-        grad_op = core.CreateOperator(
-            "AddGradient",
-            [self.output, self.input_one, self.input_two],
-            [self.input_one_grad, self.input_two_grad], **self.args
-        )
-
-        return grad_op
-
-op_bench.generate_c2_gradient_test(long_configs + short_configs,AddBenchmark)
-```
-After the class is implemented, we need to register the tests with `generate_c2_gradient_test` function.
-
-This concludes the overview of the operator benchmark suite.
--- a/benchmarks/operator_benchmark/benchmark_caffe2.py
+++ b/benchmarks/operator_benchmark/benchmark_caffe2.py
@ -1,202 +0,0 @@
-from collections import namedtuple
-
-import benchmark_utils
-from benchmark_test_generator import _register_test
-
-from caffe2.proto import caffe2_pb2
-from caffe2.python import core, workspace
-
-from .benchmark_core import TestConfig
-
-"""Caffe2 performance microbenchmarks.
-
-This module contains Caffe2-specific functionalities for performance
-microbenchmarks.
-"""
-
-
-class Caffe2BenchmarkBase:
-    """This is a base class used to create Caffe2 operator benchmark"""
-
-    tensor_index = 0
-    test_index = 0
-
-    def __init__(self):
-        self.args = {}
-        self.user_provided_name = None
-        self._num_inputs_require_grads = 0
-        self._pass_count = 0
-
-    def _set_backward_test(self, is_backward):
-        pass
-
-    def _device_option(self, device):
-        """This method is used to set device option."""
-        if device not in ["cuda", "cpu"]:
-            raise ValueError("Missing attrs in configs")
-
-        if "cuda" in device:
-            self.dev = core.DeviceOption(caffe2_pb2.CUDA, 0)
-        else:
-            self.dev = core.DeviceOption(caffe2_pb2.CPU)
-        return self.dev
-
-    def tensor(self, shapes, dtype="float32", device="cpu"):
-        """A wapper function to create C2 tensor filled with random data.
-        The name/label of the tensor is returned and it is available
-        throughout the benchmark execution phase.
-        Args:
-            shapes: int or a sequence of ints to defining the shapes of the tensor
-            dtype: use the dtypes from numpy
-                (https://docs.scipy.org/doc/numpy/user/basics.types.html)
-        Return:
-            C2 tensor of dtype
-        """
-        return self.feed_tensor(benchmark_utils.numpy_random(dtype, *shapes), device)
-
-    def feed_tensor(self, tensor, device="cpu"):
-        """Similar to tensor, but can supply any data compatible with FeedBlob"""
-        blob_name = "blob_" + str(Caffe2BenchmarkBase.tensor_index)
-        dev = self._device_option(device)
-        with core.DeviceScope(dev):
-            workspace.FeedBlob(blob_name, tensor)
-        Caffe2BenchmarkBase.tensor_index += 1
-        return blob_name
-
-    def module_name(self):
-        """this is used to label the operator being benchmarked"""
-        if self.user_provided_name:
-            return self.user_provided_name
-        return self.__class__.__name__
-
-    def set_module_name(self, name):
-        self.user_provided_name = name
-
-    def _value_to_str(self, value):
-        """if value is bool, we will convert it to 0 and 1"""
-        ret = value
-        if type(value) == bool:
-            ret = int(value)
-        return str(ret)
-
-    def test_name(self, name_type="long", **kargs):
-        """this is a globally unique name which can be used to
-        label a specific test
-        """
-        if name_type == "long":
-            test_name_str = []
-            for key in kargs:
-                value = kargs[key]
-                test_name_str.append(key + self._value_to_str(value))
-            name = (self.module_name() + "_" + "_".join(test_name_str)).replace(" ", "")
-        elif name_type == "short":
-            # this is used to generate test name based on unique index
-            name = "_".join(
-                [self.module_name(), "test", str(Caffe2BenchmarkBase.test_index)]
-            )
-            Caffe2BenchmarkBase.test_index += 1
-        return name
-
-    def extract_inputs_tuple(self):
-        # add a dummy function here to match the interface of TorchBenchmarkBase
-        pass
-
-
-class Caffe2OperatorTestCase:
-    """This class includes all the information needed to benchmark an operator.
-    op_bench: it's a user-defined class (child of Caffe2BenchmarkBase)
-    which includes input and operator, .etc
-    test_config: a namedtuple includes test_name, input_shape, tag, run_backward.
-    When run_backward is false, the run_forward method will be executed, otherwise
-    run_backward method will be executed.
-    """
-
-    def __init__(self, op_bench, test_config):
-        self.op_bench = op_bench
-        self.test_config = test_config
-        self.framework = "Caffe2"
-
-    def run_forward(self, num_runs, print_per_iter=False, cuda_sync=False):
-        """Run the forward path of an operator in a loop"""
-        with core.DeviceScope(self.op_bench.dev):
-            op = self.op_bench.forward()
-        if not workspace.RunOperatorMultiple(op, num_runs):
-            raise ValueError(f"Unable to run operator test case: {self.test_name}")
-
-    def run_backward(self, num_runs, print_per_iter=False):
-        """Run the backward path of an operator in a loop"""
-        with core.DeviceScope(self.op_bench.dev):
-            op = self.op_bench.backward()
-        if not workspace.RunOperatorMultiple(op, num_runs):
-            raise ValueError(
-                f"Unable to run operator gradient test case: {self.test_name}"
-            )
-
-    def _print_per_iter(self):
-        pass
-
-
-def create_caffe2_op_test_case(op_bench, test_config):
-    test_case = Caffe2OperatorTestCase(op_bench, test_config)
-    test_config = test_case.test_config
-    op = test_case.op_bench
-    func_name = f"{op.module_name()}{test_case.framework}{str(test_config)}"
-    return (func_name, test_case)
-
-
-OpMeta = namedtuple(
-    "OpMeta",
-    "op_type num_inputs input_dims input_types \
-                    output_dims num_outputs args device",
-)
-
-
-def generate_c2_test_from_ops(ops_metadata, bench_op, tags):
-    """
-    This function is used to generate Caffe2 tests based on the metadata
-    of operators. The metadata includes seven fields which are 1) op_type:
-    the name of the operator. 2) num_inputs: the number of input blobs.
-    3) input_dims: a dictionary which includes the shapes of the input blobs.
-    4) input_types: a list which includes the types of input blobs. 5)
-    output_dims: a dictionary which includes the shapes of output blobs.
-    6) num_oupts: the number of output blobs. 7) args: a dictionary which
-    includes the args for th operator.
-    Here is an example to show the metadata for the WeighedSum operator
-    op_type : WeightedSum
-    num_inputs: 4
-    input_dims: {'0': [256], '1': [1], '2': [256], '3': [1]}
-    input_types: ['float', 'float', 'float', 'float']
-    output_dims:  {'0': [256]}
-    num_outputs: 4
-    args: {}
-    TODO(mingzhe0908): introduce device and add it to the benchmark name
-    """
-    for op_metadata in ops_metadata:
-        tmp_attrs = OpMeta(
-            op_metadata.op_type,
-            op_metadata.num_inputs,
-            op_metadata.input_dims,
-            op_metadata.input_types,
-            op_metadata.output_dims,
-            op_metadata.num_outputs,
-            op_metadata.args,
-            op_metadata.device,
-        )
-        test_attrs = tmp_attrs._asdict()
-        op = bench_op()
-        op.init(**test_attrs)
-        test_name = op.test_name("short")
-        input_config = f"Shapes: {op_metadata.input_dims}, Type: {op_metadata.input_types}, Args: {str(op_metadata.args)}"
-        test_config = TestConfig(test_name, input_config, tags, run_backward=False)
-        if op is not None:
-            create_caffe2_op_test_case(op, test_config)
-
-
-def generate_c2_test(configs, c2_bench_op):
-    """This function creates Caffe2 op test based on the given operator"""
-    return _register_test(configs, c2_bench_op, create_caffe2_op_test_case, False)
-
-
-def generate_c2_gradient_test(configs, c2_bench_op):
-    """This function creates Caffe2 op test based on the given operator"""
-    return _register_test(configs, c2_bench_op, create_caffe2_op_test_case, True)
--- a/benchmarks/operator_benchmark/benchmark_core.py
+++ b/benchmarks/operator_benchmark/benchmark_core.py
@ -13,6 +13,7 @@ import torch
 # needs to be imported after torch
 import torch.utils.cpp_extension as cpp_extension  # noqa: F401

+
 """Performance microbenchmarks.

 This module contains core functionalities for performance microbenchmark tests.
@ -50,7 +51,7 @@ def _create_test(
    """Create tests with the benchmark backend.
    Args:
        bench_op_obj: an object which instantiated from a subclass of
-            Caffe2BenchmarkBase/TorchBenchmarkBase which includes tensor
+            TorchBenchmarkBase which includes tensor
            creation and operator execution.
        orig_test_attrs: a dictionary includes test configs.
        tags: a attribute in test config to filter inputs
@ -75,7 +76,7 @@ def _build_test(
    """Generate PyTorch/Caffe2 tests of operators with different inputs.
    Args:
        configs: a dictionary that has the input shapes
-        bench_op: a subclass of Caffe2BenchmarkBase/TorchBenchmarkBase which includes tensor
+        bench_op: a subclass of TorchBenchmarkBase which includes tensor
            creation and operator execution
        OperatorTestCase: a named tuple to save the metadata of an test
        run_backward: a bool parameter indicating backward path
@ -233,9 +234,7 @@ class BenchmarkRunner:
                    )
                )
        else:
-            if test_case.framework == "PyTorch":
-                print(f"# Mode: {'JIT' if self.use_jit else 'Eager'}")
-
+            print(f"# Mode: {'JIT' if self.use_jit else 'Eager'}")
            print(
                f"# Name: {test_case.test_config.test_name}\n# Input: {test_case.test_config.input_config}"
            )
@ -283,8 +282,7 @@ class BenchmarkRunner:
        and the execution time is reported
        """
        test_case.run_forward(num_runs=1, print_per_iter=False, cuda_sync=False)
-        if test_case.framework == "PyTorch":
-            test_case._output_mean()
+        test_case._output_mean()
        backward_time = timeit.timeit(
            functools.partial(test_case.run_backward, iters, print_per_iter), number=1
        )
@ -357,9 +355,6 @@ class BenchmarkRunner:
        # Currently, this is a sub-string matching.
        op_test_config = test_case.test_config

-        if self.args.framework:
-            frameworks = benchmark_utils.process_arg_list(self.args.framework)
-
        operators = (
            benchmark_utils.process_arg_list(self.args.operators)
            if self.args.operators
@ -370,7 +365,6 @@ class BenchmarkRunner:
        if (
            self._check_keep(op_test_config.test_name, self.args.test_name)
            and self._check_keep_list(test_case.op_bench.module_name(), operators)
-            and self._check_keep_list(test_case.framework, frameworks)
            and self._check_operator_first_char(
                test_case.op_bench.module_name(), self.operator_range
            )
--- a/benchmarks/operator_benchmark/benchmark_runner.py
+++ b/benchmarks/operator_benchmark/benchmark_runner.py
@ -92,7 +92,7 @@ def parse_args():
    parser.add_argument(
        "--omp-num-threads",
        "--omp_num_threads",
-        help="Number of OpenMP threads used in PyTorch/Caffe2 runtime",
+        help="Number of OpenMP threads used in PyTorch runtime",
        default=None,
        type=int,
    )
@ -100,7 +100,7 @@ def parse_args():
    parser.add_argument(
        "--mkl-num-threads",
        "--mkl_num_threads",
-        help="Number of MKL threads used in PyTorch/Caffe2 runtime",
+        help="Number of MKL threads used in PyTorch runtime",
        default=None,
        type=int,
    )
@ -135,12 +135,6 @@ def parse_args():
        help="Only run the forward path of operators",
    )

-    parser.add_argument(
-        "--framework",
-        help="Comma-delimited list of frameworks to test (Caffe2, PyTorch)",
-        default="Caffe2,PyTorch",
-    )
-
    parser.add_argument(
        "--device",
        help="Run tests on the provided architecture (cpu, cuda)",
@ -160,8 +154,7 @@ def parse_args():
        # "Modifications to the environment variables after the program has started,
        # even if modified by the program itself, are ignored by the OpenMP implementation"
        benchmark_utils.set_omp_threads(args.omp_num_threads)
-        if benchmark_utils.is_pytorch_enabled(args.framework):
-            torch.set_num_threads(args.omp_num_threads)
+        torch.set_num_threads(args.omp_num_threads)
    if args.mkl_num_threads:
        benchmark_utils.set_mkl_threads(args.mkl_num_threads)

--- a/benchmarks/operator_benchmark/benchmark_utils.py
+++ b/benchmarks/operator_benchmark/benchmark_utils.py
@ -319,14 +319,6 @@ def op_list(**configs):
    return generated_configs


-def is_caffe2_enabled(framework_arg):
-    return "Caffe2" in framework_arg
-
-
-def is_pytorch_enabled(framework_arg):
-    return "PyTorch" in framework_arg
-
-
 def get_operator_range(chars_range):
    """Generates the characters from chars_range inclusive."""
    if chars_range == "None" or chars_range is None:
--- a/benchmarks/operator_benchmark/c2/init.py
+++ b/benchmarks/operator_benchmark/c2/init.py
--- a/benchmarks/operator_benchmark/c2/add_test.py
+++ b/benchmarks/operator_benchmark/c2/add_test.py
@ -1,49 +0,0 @@
-import benchmark_caffe2 as op_bench_c2
-from benchmark_caffe2 import Caffe2BenchmarkBase  # noqa: F401
-
-import operator_benchmark as op_bench
-from caffe2.python import core
-
-
-"""Microbenchmarks for element-wise Add operator. Supports both Caffe2/PyTorch."""
-
-# Configs for C2 add operator
-add_long_configs = op_bench.cross_product_configs(
-    M=[8, 64, 128],
-    N=range(2, 10, 3),
-    K=[2**x for x in range(0, 3)],
-    dtype=["int", "float"],
-    tags=["long"],
-)
-
-
-add_short_configs = op_bench.config_list(
-    attrs=[
-        [8, 16, 32, "int"],
-        [16, 16, 64, "float"],
-        [64, 64, 128, "int"],
-    ],
-    attr_names=["M", "N", "K", "dtype"],
-    tags=["short"],
-)
-
-
-class AddBenchmark(op_bench_c2.Caffe2BenchmarkBase):
-    def init(self, M, N, K, dtype):
-        self.input_one = self.tensor([M, N, K], dtype)
-        self.input_two = self.tensor([M, N, K], dtype)
-        self.output = self.tensor([M, N, K], dtype)
-        self.set_module_name("add")
-
-    def forward(self):
-        op = core.CreateOperator(
-            "Add", [self.input_one, self.input_two], self.output, **self.args
-        )
-        return op
-
-
-op_bench_c2.generate_c2_test(add_long_configs + add_short_configs, AddBenchmark)
-
-
-if __name__ == "__main__":
-    op_bench.benchmark_runner.main()
--- a/benchmarks/operator_benchmark/c2/batch_box_cox_test.py
+++ b/benchmarks/operator_benchmark/c2/batch_box_cox_test.py
@ -1,49 +0,0 @@
-import benchmark_caffe2 as op_bench_c2
-from benchmark_caffe2 import Caffe2BenchmarkBase  # noqa: F401
-
-import operator_benchmark as op_bench
-from caffe2.python import core
-
-
-"""Microbenchmarks for BatchBoxCox operator."""
-
-# Configs for C2 BatchBoxCox operator
-batch_box_cox_long_configs = op_bench.cross_product_configs(
-    M=[32, 64, 128], N=range(32, 128, 32), dtype=["float", "double"], tags=["long"]
-)
-
-
-batch_box_cox_short_configs = op_bench.config_list(
-    attrs=[
-        [16, 16, "float"],
-        [16, 16, "double"],
-        [64, 64, "float"],
-        [64, 64, "double"],
-    ],
-    attr_names=["M", "N", "dtype"],
-    tags=["short"],
-)
-
-
-class BatchBoxCoxBenchmark(op_bench_c2.Caffe2BenchmarkBase):
-    def init(self, M, N, dtype):
-        self.data = self.tensor([M, N], dtype)
-        self.lambda1 = self.tensor([N], dtype)
-        self.lambda2 = self.tensor([N], dtype)
-        self.output = self.tensor([1, 1], dtype)
-        self.set_module_name("batch_box_cox")
-
-    def forward(self):
-        op = core.CreateOperator(
-            "BatchBoxCox", [self.data, self.lambda1, self.lambda2], self.output
-        )
-        return op
-
-
-op_bench_c2.generate_c2_test(
-    batch_box_cox_long_configs + batch_box_cox_short_configs, BatchBoxCoxBenchmark
-)
-
-
-if __name__ == "__main__":
-    op_bench.benchmark_runner.main()
--- a/benchmarks/operator_benchmark/c2/batch_gather_test.py
+++ b/benchmarks/operator_benchmark/c2/batch_gather_test.py
@ -1,58 +0,0 @@
-import benchmark_caffe2 as op_bench_c2
-import numpy
-from benchmark_caffe2 import Caffe2BenchmarkBase  # noqa: F401
-
-import operator_benchmark as op_bench
-from caffe2.python import core
-
-
-"""Microbenchmarks for element-wise BatchGather operator."""
-
-# Configs for C2 BatherGather operator
-batch_gather_configs_short = op_bench.config_list(
-    attr_names=["M", "N", "K"],
-    attrs=[
-        [8, 8, 1],
-        [256, 512, 1],
-        [512, 512, 1],
-        [8, 8, 2],
-        [256, 512, 2],
-        [512, 512, 2],
-    ],
-    cross_product_configs={
-        "device": ["cpu", "cuda"],
-    },
-    tags=["short"],
-)
-
-batch_gather_configs_long = op_bench.cross_product_configs(
-    M=[128, 1024], N=[128, 1024], K=[1, 2], device=["cpu", "cuda"], tags=["long"]
-)
-
-
-class BatchGatherBenchmark(op_bench_c2.Caffe2BenchmarkBase):
-    def init(self, M, N, K, device):
-        self.input_one = self.tensor([M, N, K], device=device)
-        max_val = N
-        numpy.random.seed((1 << 32) - 1)
-        index_dim = numpy.random.randint(0, N)
-        self.index = self.feed_tensor(
-            numpy.random.randint(0, max_val, index_dim), device=device
-        )
-        self.output = self.tensor([M, index_dim, K], device=device)
-        self.set_module_name("batch_gather")
-
-    def forward(self):
-        op = core.CreateOperator(
-            "BatchGather", [self.input_one, self.index], self.output
-        )
-        return op
-
-
-op_bench_c2.generate_c2_test(
-    batch_gather_configs_long + batch_gather_configs_short, BatchGatherBenchmark
-)
-
-
-if __name__ == "__main__":
-    op_bench.benchmark_runner.main()
--- a/benchmarks/operator_benchmark/c2/clip_ranges_test.py
+++ b/benchmarks/operator_benchmark/c2/clip_ranges_test.py
@ -1,54 +0,0 @@
-import benchmark_caffe2 as op_bench_c2
-from benchmark_caffe2 import Caffe2BenchmarkBase  # noqa: F401
-
-import operator_benchmark as op_bench
-from caffe2.python import core, dyndep
-
-dyndep.InitOpsLibrary("@/caffe2/caffe2/fb/operators:clip_ranges_op")
-
-"""Microbenchmarks for ClipRanges operator."""
-
-# Configs for C2 ClipRanges operator
-clip_ranges_long_configs = op_bench.cross_product_configs(
-    LENGTH=range(1, 100),
-    M=[1],
-    N=[2],
-    MAX_LENGTH=range(1, 100),
-    dtype=["int32"],
-    tags=["long"],
-)
-
-
-clip_ranges_short_configs = op_bench.config_list(
-    attrs=[
-        [6, 1, 2, 1, "int32"],
-        [7, 1, 2, 2, "int32"],
-        [8, 1, 2, 3, "int32"],
-        [9, 1, 2, 4, "int32"],
-        [10, 1, 2, 5, "int32"],
-    ],
-    attr_names=["LENGTH", "M", "N", "MAX_LENGTH", "dtype"],
-    tags=["short"],
-)
-
-
-class ClipRangesBenchmark(op_bench_c2.Caffe2BenchmarkBase):
-    def init(self, LENGTH, M, N, MAX_LENGTH, dtype):
-        self.input = self.tensor([LENGTH, M, N], dtype)
-        self.max_length = MAX_LENGTH
-        self.set_module_name("clip_ranges")
-
-    def forward(self):
-        op = core.CreateOperator(
-            "ClipRanges", self.input, self.input, max_length=self.max_length
-        )
-        return op
-
-
-op_bench_c2.generate_c2_test(
-    clip_ranges_long_configs + clip_ranges_short_configs, ClipRangesBenchmark
-)
-
-
-if __name__ == "__main__":
-    op_bench.benchmark_runner.main()
--- a/benchmarks/operator_benchmark/c2/concat_test.py
+++ b/benchmarks/operator_benchmark/c2/concat_test.py
@ -1,171 +0,0 @@
-import random
-
-import benchmark_caffe2 as op_bench_c2
-from benchmark_caffe2 import Caffe2BenchmarkBase  # noqa: F401
-
-import operator_benchmark as op_bench
-from caffe2.python import core
-
-
-"""Microbenchmarks for Concat operator. Supports both Caffe2/PyTorch."""
-
-cross_product_configs = {
-    "device": ["cpu", "cuda"],
-    "dtype": ["float"],
-    "add_axis": [0],
-}
-
-# Configs for C2 concat operator
-cat_configs_short = op_bench.config_list(
-    attr_names=["sizes", "N", "axis"],
-    attrs=[
-        [(1, 1, 1), 2, 0],  # noqa: E241
-        [(512, 512, 2), 2, 1],  # noqa: E241
-        [(128, 1024, 2), 2, 1],  # noqa: E241
-    ],
-    cross_product_configs=cross_product_configs,
-    tags=["short"],
-)
-
-# Configs specific to static runtime feature - a fast runtime for pared down models
-cat_configs_static_runtime = op_bench.config_list(
-    attr_names=["sizes", "N", "axis", "add_axis"],
-    attrs=[
-        [(1, 40), 5, 1, 1],
-        [[(1, 160), (1, 14)], -1, 1, 0],
-        [[(1, 20, 40), (1, 4, 40), (1, 5, 40)], -1, 1, 0],
-        [[(1, 580), (1, 174)], -1, 1, 0],
-        [(20, 40), 5, 1, 1],
-        [[(20, 160), (20, 14)], -1, 1, 0],
-        [[(20, 20, 40), (20, 4, 40), (20, 5, 40)], -1, 1, 0],
-        [[(20, 580), (20, 174)], -1, 1, 0],
-    ],
-    cross_product_configs=cross_product_configs,
-    tags=["static_runtime"],
-)
-
-cat_configs_long = op_bench.config_list(
-    attr_names=["sizes", "N", "axis"],
-    attrs=[
-        [(2**10, 2**10, 2), 2, 0],  # noqa: E241
-        [(2**10 + 1, 2**10 - 1, 2), 2, 1],  # noqa: E226,E241
-        [(2**10, 2**10, 2), 2, 2],  # noqa: E241
-        [
-            [
-                lambda: random.randint(2**6, 2**7),
-                2**7 - 17,
-                2**6 + 1,
-            ],  # noqa: E201,E226,E241
-            5,
-            0,
-        ],
-        [
-            [
-                2**6 + 2**5,
-                lambda: random.randint(2**6, 2**7),
-                2**6,
-            ],  # noqa: E201,E226,E241,E272
-            5,
-            1,
-        ],
-        [
-            [
-                2**7,
-                2**6,
-                lambda: random.randint(2**6, 2**7),
-            ],  # noqa: E201,E241,E272
-            5,
-            2,
-        ],
-        [[lambda: random.randint(2**5, 2**6), 2**5, 2**6], 50, 0],  # noqa: E241
-        [
-            [2**5, lambda: random.randint(2**5, 2**6), 2**6],  # noqa: E241,E272
-            50,
-            1,
-        ],
-        [
-            [
-                2**5 + 1,
-                2**6 + 1,
-                lambda: random.randint(2**5, 2**6),
-            ],  # noqa: E226,E241,E272
-            50,
-            2,
-        ],
-    ],
-    cross_product_configs=cross_product_configs,
-    tags=["long"],
-)
-
-# There is a different codepath on CUDA for >4 dimensions
-cat_configs_multidim = op_bench.config_list(
-    attr_names=["sizes", "N", "axis", "dtype"],
-    attrs=[
-        [(2**6, 2**5, 2**2, 2**4, 2**5), 2, 2],  # noqa: E241
-        [(2**4, 2**5, 2**2, 2**4, 2**5), 8, 2],  # noqa: E241
-        [
-            (2**3 + 1, 2**5 - 1, 2**2 + 1, 2**4 - 1, 2**5 + 1),
-            17,
-            4,
-        ],  # noqa: E226,E241
-    ],
-    cross_product_configs=cross_product_configs,
-    tags=["multidim"],
-)
-
-cat_configs_manyinputs = op_bench.config_list(
-    attr_names=["sizes", "N", "axis"],
-    attrs=[
-        [[lambda: random.randint(1, 10000)], 100, 0],
-        [[lambda: random.randint(1, 1000)], 1000, 0],
-        [[lambda: random.randint(1, 500)], 2000, 0],
-        [[lambda: random.randint(1, 300)], 3000, 0],
-    ],
-    cross_product_configs=cross_product_configs,
-    tags=["manyinputs"],
-)
-
-
-class ConcatBenchmark(op_bench_c2.Caffe2BenchmarkBase):
-    def init(self, sizes, N, axis, add_axis, dtype, device):
-        random.seed(42)
-        self.inputs = []
-        self.args = {"axis": axis, "add_axis": add_axis}
-        gen_sizes = []
-        if type(sizes) == list and N == -1:
-            gen_sizes = sizes
-        else:
-            for i in range(N):
-                gen_sizes.append(
-                    [
-                        old_size() if callable(old_size) else old_size
-                        for old_size in sizes
-                    ]
-                )
-
-        for s in gen_sizes:
-            self.inputs.append(self.tensor(s, dtype, device=device))
-
-        self.output = self.tensor(gen_sizes[0], dtype, device=device)
-        self.split_info = self.tensor(gen_sizes[0], "int")
-        self.set_module_name("concat")
-
-    def forward(self):
-        op = core.CreateOperator(
-            "Concat", self.inputs, [self.output, self.split_info], **self.args
-        )
-        return op
-
-
-op_bench_c2.generate_c2_test(
-    cat_configs_short
-    + cat_configs_long
-    + cat_configs_multidim
-    + cat_configs_manyinputs
-    + cat_configs_static_runtime,
-    ConcatBenchmark,
-)
-
-
-if __name__ == "__main__":
-    op_bench.benchmark_runner.main()
--- a/benchmarks/operator_benchmark/c2/matmul_test.py
+++ b/benchmarks/operator_benchmark/c2/matmul_test.py
@ -1,50 +0,0 @@
-import benchmark_caffe2 as op_bench_c2
-from benchmark_caffe2 import Caffe2BenchmarkBase  # noqa: F401
-
-import operator_benchmark as op_bench
-from caffe2.python import core
-
-"""Microbenchmarks for MatMul operator"""
-
-# Configs for C2 Matmul operator
-mm_long_configs = op_bench.cross_product_configs(
-    M=[8, 64, 128],
-    N=range(2, 10, 3),
-    K=[2**x for x in range(0, 3)],
-    trans_a=[True, False],
-    trans_b=[True, False],
-    tags=["long"],
-)
-
-
-mm_short_configs = op_bench.config_list(
-    attrs=[
-        [128, 128, 128, False, True],
-        [1024, 1024, 256, True, False],
-        [8192, 8192, 1024, True, False],
-    ],
-    attr_names=["M", "N", "K", "trans_a", "trans_b"],
-    tags=["short"],
-)
-
-
-class MatMulBenchmark(op_bench_c2.Caffe2BenchmarkBase):
-    def init(self, M, N, K, trans_a, trans_b):
-        self.input_one = self.tensor([N, M]) if trans_a else self.tensor([M, N])
-        self.input_two = self.tensor([K, N]) if trans_b else self.tensor([N, K])
-        self.args = {"trans_a": trans_a, "trans_b": trans_b}
-        self.output = self.tensor([M, K])
-        self.set_module_name("matmul")
-
-    def forward(self):
-        op = core.CreateOperator(
-            "MatMul", [self.input_one, self.input_two], self.output, **self.args
-        )
-        return op
-
-
-op_bench_c2.generate_c2_test(mm_long_configs + mm_short_configs, MatMulBenchmark)
-
-
-if __name__ == "__main__":
-    op_bench.benchmark_runner.main()
--- a/benchmarks/operator_benchmark/c2/quantile_op_test.py
+++ b/benchmarks/operator_benchmark/c2/quantile_op_test.py
@ -1,48 +0,0 @@
-import benchmark_caffe2 as op_bench_c2
-from benchmark_caffe2 import Caffe2BenchmarkBase  # noqa: F401
-
-import operator_benchmark as op_bench
-from caffe2.python import core
-
-
-"""Microbenchmarks for QuantileOp operator."""
-
-# Configs for C2 QuantileOp operator
-quantile_op_long_configs = op_bench.cross_product_configs(
-    M=[32, 64, 128], N=range(32, 128, 32), dtype=["float", "double"], tags=["long"]
-)
-
-
-quantile_op_short_configs = op_bench.config_list(
-    attrs=[
-        [16, 16, "float"],
-        [16, 16, "double"],
-        [64, 64, "float"],
-        [64, 64, "double"],
-    ],
-    attr_names=["M", "N", "dtype"],
-    tags=["short"],
-)
-
-
-class QuantileOpBenchmark(op_bench_c2.Caffe2BenchmarkBase):
-    def init(self, M, N, dtype):
-        self.data = [self.tensor([N], dtype) for _ in range(M)]
-        self.quantile = 0.3
-        self.output = self.tensor([1], dtype)
-        self.set_module_name("quantile_op")
-
-    def forward(self):
-        op = core.CreateOperator(
-            "Quantile", inputs=self.data, outputs=self.output, quantile=self.quantile
-        )
-        return op
-
-
-op_bench_c2.generate_c2_test(
-    quantile_op_long_configs + quantile_op_short_configs, QuantileOpBenchmark
-)
-
-
-if __name__ == "__main__":
-    op_bench.benchmark_runner.main()
--- a/benchmarks/operator_benchmark/c2/replace_nan_test.py
+++ b/benchmarks/operator_benchmark/c2/replace_nan_test.py
@ -1,44 +0,0 @@
-import benchmark_caffe2 as op_bench_c2
-from benchmark_caffe2 import Caffe2BenchmarkBase  # noqa: F401
-
-import operator_benchmark as op_bench
-from caffe2.python import core
-
-
-"""Microbenchmarks for element-wise ReplaceNaN operator."""
-
-# Configs for C2 ReplaceNaN operator
-replace_nan_long_configs = op_bench.cross_product_configs(
-    M=[32, 64, 128], N=range(32, 128, 32), dtype=["float", "double"], tags=["long"]
-)
-
-
-replace_nan_short_configs = op_bench.config_list(
-    attrs=[
-        [16, 16, "float"],
-        [16, 16, "double"],
-        [64, 64, "float"],
-        [64, 64, "double"],
-    ],
-    attr_names=["M", "N", "dtype"],
-    tags=["short"],
-)
-
-
-class ReplaceNaNBenchmark(op_bench_c2.Caffe2BenchmarkBase):
-    def init(self, M, N, dtype):
-        self.input = self.tensor([M, N], dtype)
-        self.set_module_name("replace_nan")
-
-    def forward(self):
-        op = core.CreateOperator("ReplaceNaN", self.input, self.input, value=1.0)
-        return op
-
-
-op_bench_c2.generate_c2_test(
-    replace_nan_long_configs + replace_nan_short_configs, ReplaceNaNBenchmark
-)
-
-
-if __name__ == "__main__":
-    op_bench.benchmark_runner.main()
--- a/benchmarks/operator_benchmark/common/tests/c2_cpu_gpu_forward_backward_test.py
+++ b/benchmarks/operator_benchmark/common/tests/c2_cpu_gpu_forward_backward_test.py
@ -1,41 +0,0 @@
-import operator_benchmark as op_bench
-
-from caffe2.python import core
-
-
-add_configs = op_bench.cross_product_configs(
-    M=[8], N=[8], K=[8], tags=["short"], device=["cuda", "cpu"]
-)
-
-
-class AddBenchmark(op_bench.Caffe2BenchmarkBase):
-    def init(self, M, N, K, device):
-        self.set_module_name("add")
-        self.input_one = self.tensor([M, N, K], device=device)
-        self.input_two = self.tensor([M, N, K], device=device)
-        self.input_one_grad = self.tensor([M, N, K], device=device)
-        self.input_two_grad = self.tensor([M, N, K], device=device)
-        self.output = self.tensor([M, N, K], device=device)
-
-    def forward(self):
-        op = core.CreateOperator(
-            "Add", [self.input_one, self.input_two], self.output, **self.args
-        )
-        return op
-
-    def backward(self):
-        grad_op = core.CreateOperator(
-            "AddGradient",
-            [self.output, self.input_one, self.input_two],
-            [self.input_one_grad, self.input_two_grad],
-            **self.args,
-        )
-        return grad_op
-
-
-op_bench.generate_c2_test(add_configs, AddBenchmark)
-op_bench.generate_c2_gradient_test(add_configs, AddBenchmark)
-
-
-if __name__ == "__main__":
-    op_bench.benchmark_runner.main()
--- a/benchmarks/record_function_benchmark/record_function_bench.py
+++ b/benchmarks/record_function_benchmark/record_function_bench.py
@ -1,18 +1,13 @@
 import argparse
 import sys

-import torch
-import torch.utils.benchmark as benchmark_utils
-
-
-try:
-    from benchmarks.fastrnns.factory import lstm_creator
-except ImportError:
-    from caffe2.benchmarks.fastrnns.factory import lstm_creator
-
+from benchmarks.fastrnns.factory import lstm_creator

 from torchvision.models import resnet50

+import torch
+import torch.utils.benchmark as benchmark_utils
+

 def prepare_lstm_jit(bench_args):
    model_def = lstm_creator(
--- a/binaries/bench_gen/bench_gen.py
+++ b/binaries/bench_gen/bench_gen.py
@ -1,118 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import ast
-
-from caffe2.python import brew, workspace
-
-from caffe2.python.model_helper import ModelHelper
-from caffe2.python.predictor import mobile_exporter
-
-
-def parse_kwarg(kwarg_str):
-    key, value = kwarg_str.split("=")
-    try:
-        value = ast.literal_eval(value)
-    except ValueError:
-        pass
-    return key, value
-
-
-def main(args):
-    # User defined keyword arguments
-    kwargs = {"order": "NCHW", "use_cudnn": False}
-    kwargs.update(dict(args.kwargs))
-
-    model = ModelHelper(name=args.benchmark_name)
-
-    op_type = args.operator  # assumes a brew type op name
-    input_name = args.input_name
-    output_name = args.output_name
-
-    iters = int(args.instances)
-    for i in range(iters):
-        input_blob_name = input_name + (str(i) if i > 0 and args.chain else "")
-        output_blob_name = output_name + str(i + 1)
-        add_op = getattr(brew, op_type)
-        add_op(model, input_blob_name, output_blob_name, **kwargs)
-        if args.chain:
-            input_name, output_name = output_name, input_name
-
-    workspace.RunNetOnce(model.param_init_net)
-
-    init_net, predict_net = mobile_exporter.Export(workspace, model.net, model.params)
-
-    if args.debug:
-        print("init_net:")
-        for op in init_net.op:
-            print(" ", op.type, op.input, "-->", op.output)
-        print("predict_net:")
-        for op in predict_net.op:
-            print(" ", op.type, op.input, "-->", op.output)
-
-    with open(args.predict_net, "wb") as f:
-        f.write(predict_net.SerializeToString())
-    with open(args.init_net, "wb") as f:
-        f.write(init_net.SerializeToString())
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Utility to generate Caffe2 benchmark models."
-    )
-    parser.add_argument("operator", help="Caffe2 operator to benchmark.")
-    parser.add_argument(
-        "-b",
-        "--blob",
-        help="Instantiate a blob --blob name=dim1,dim2,dim3",
-        action="append",
-    )
-    parser.add_argument("--context", help="Context to run on.", default="CPU")
-    parser.add_argument(
-        "--kwargs",
-        help="kwargs to pass to operator.",
-        nargs="*",
-        type=parse_kwarg,
-        default=[],
-    )
-    parser.add_argument(
-        "--init-net",
-        "--init_net",
-        help="Output initialization net.",
-        default="init_net.pb",
-    )
-    parser.add_argument(
-        "--predict-net",
-        "--predict_net",
-        help="Output prediction net.",
-        default="predict_net.pb",
-    )
-    parser.add_argument(
-        "--benchmark-name",
-        "--benchmark_name",
-        help="Name of the benchmark network",
-        default="benchmark",
-    )
-    parser.add_argument(
-        "--input-name", "--input_name", help="Name of the input blob.", default="data"
-    )
-    parser.add_argument(
-        "--output-name",
-        "--output_name",
-        help="Name of the output blob.",
-        default="output",
-    )
-    parser.add_argument(
-        "--instances", help="Number of instances to run the operator.", default="1"
-    )
-    parser.add_argument(
-        "-d", "--debug", help="Print debug information.", action="store_true"
-    )
-    parser.add_argument(
-        "-c",
-        "--chain",
-        help="Chain ops together (create data dependencies)",
-        action="store_true",
-    )
-    args = parser.parse_args()
-    main(args)
--- a/scripts/appveyor/install.bat
+++ b/scripts/appveyor/install.bat
@ -1,10 +0,0 @@
-:: Installation scripts for appveyor.
-
-@echo on
-
-if "%USE_CUDA%" == "ON" call %~dp0%install_cuda.bat
-
-:: Miniconda path for appveyor
-set PATH=C:\Miniconda-x64;C:\Miniconda-x64\Scripts;%PATH%
-:: Install numpy
-conda install -y numpy
--- a/scripts/appveyor/install_cuda.bat
+++ b/scripts/appveyor/install_cuda.bat
@ -1,22 +0,0 @@
-@echo on
-
-appveyor DownloadFile ^
-  https://developer.nvidia.com/compute/cuda/8.0/prod/local_installers/cuda_8.0.44_windows-exe ^
-  -FileName cuda_8.0.44_windows.exe
-appveyor Downloadfile ^
-  http://developer.download.nvidia.com/compute/redist/cudnn/v5.1/cudnn-8.0-windows10-x64-v5.1.zip ^
-  -FileName cudnn-8.0-windows10-x64-v5.1.zip
-
-cuda_8.0.44_windows.exe -s compiler_8.0 cublas_8.0 cublas_dev_8.0 cudart_8.0 curand_8.0 curand_dev_8.0 nvrtc_8.0 nvrtc_dev_8.0
-set PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v8.0\libnvvp;%PATH%
-
-7z x cudnn-8.0-windows10-x64-v5.1.zip
-copy cuda\include\cudnn.h ^
-  "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include\"
-copy cuda\lib\x64\cudnn.lib ^
-  "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\lib\x64\"
-copy cuda\bin\cudnn64_5.dll ^
-  "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin\"
-
-:: Make sure that nvcc is working correctly.
-nvcc -V || exit /b
--- a/scripts/model_zoo/update-caffe2-models.py
+++ b/scripts/model_zoo/update-caffe2-models.py
@ -1,175 +0,0 @@
-#! /usr/bin/env python3
-
-import os
-import subprocess
-import sys
-import tarfile
-import tempfile
-
-from urllib.request import urlretrieve
-
-from caffe2.python.models.download import (
-    deleteDirectory,
-    downloadFromURLToFile,
-    getURLFromName,
-)
-
-
-class SomeClass:
-    # largely copied from
-    # https://github.com/onnx/onnx-caffe2/blob/master/tests/caffe2_ref_test.py
-    def _download(self, model):
-        model_dir = self._caffe2_model_dir(model)
-        assert not os.path.exists(model_dir)
-        os.makedirs(model_dir)
-        for f in ["predict_net.pb", "init_net.pb", "value_info.json"]:
-            url = getURLFromName(model, f)
-            dest = os.path.join(model_dir, f)
-            try:
-                try:
-                    downloadFromURLToFile(url, dest, show_progress=False)
-                except TypeError:
-                    # show_progress not supported prior to
-                    # Caffe2 78c014e752a374d905ecfb465d44fa16e02a28f1
-                    # (Sep 17, 2017)
-                    downloadFromURLToFile(url, dest)
-            except Exception as e:
-                print(f"Abort: {e}")
-                print("Cleaning up...")
-                deleteDirectory(model_dir)
-                sys.exit(1)
-
-    def _caffe2_model_dir(self, model):
-        caffe2_home = os.path.expanduser("~/.caffe2")
-        models_dir = os.path.join(caffe2_home, "models")
-        return os.path.join(models_dir, model)
-
-    def _onnx_model_dir(self, model):
-        onnx_home = os.path.expanduser("~/.onnx")
-        models_dir = os.path.join(onnx_home, "models")
-        model_dir = os.path.join(models_dir, model)
-        return model_dir, os.path.dirname(model_dir)
-
-    # largely copied from
-    # https://github.com/onnx/onnx/blob/master/onnx/backend/test/runner/__init__.py
-    def _prepare_model_data(self, model):
-        model_dir, models_dir = self._onnx_model_dir(model)
-        if os.path.exists(model_dir):
-            return
-        os.makedirs(model_dir)
-        url = f"https://s3.amazonaws.com/download.onnx/models/{model}.tar.gz"
-
-        # On Windows, NamedTemporaryFile cannot be opened for a
-        # second time
-        download_file = tempfile.NamedTemporaryFile(delete=False)
-        try:
-            download_file.close()
-            print(f"Start downloading model {model} from {url}")
-            urlretrieve(url, download_file.name)
-            print("Done")
-            with tarfile.open(download_file.name) as t:
-                t.extractall(models_dir)
-        except Exception as e:
-            print(f"Failed to prepare data for model {model}: {e}")
-            raise
-        finally:
-            os.remove(download_file.name)
-
-
-models = [
-    "bvlc_alexnet",
-    "densenet121",
-    "inception_v1",
-    "inception_v2",
-    "resnet50",
-    # TODO currently onnx can't translate squeezenet :(
-    # 'squeezenet',
-    "vgg16",
-    # TODO currently vgg19 doesn't work in the CI environment,
-    # possibly due to OOM
-    # 'vgg19'
-]
-
-
-def download_models():
-    sc = SomeClass()
-    for model in models:
-        print("update-caffe2-models.py:  downloading", model)
-        caffe2_model_dir = sc._caffe2_model_dir(model)
-        onnx_model_dir, onnx_models_dir = sc._onnx_model_dir(model)
-        if not os.path.exists(caffe2_model_dir):
-            sc._download(model)
-        if not os.path.exists(onnx_model_dir):
-            sc._prepare_model_data(model)
-
-
-def generate_models():
-    sc = SomeClass()
-    for model in models:
-        print("update-caffe2-models.py:  generating", model)
-        caffe2_model_dir = sc._caffe2_model_dir(model)
-        onnx_model_dir, onnx_models_dir = sc._onnx_model_dir(model)
-        subprocess.check_call(["echo", model])
-        with open(os.path.join(caffe2_model_dir, "value_info.json")) as f:
-            value_info = f.read()
-        subprocess.check_call(
-            [
-                "convert-caffe2-to-onnx",
-                "--caffe2-net-name",
-                model,
-                "--caffe2-init-net",
-                os.path.join(caffe2_model_dir, "init_net.pb"),
-                "--value-info",
-                value_info,
-                "-o",
-                os.path.join(onnx_model_dir, "model.pb"),
-                os.path.join(caffe2_model_dir, "predict_net.pb"),
-            ]
-        )
-        subprocess.check_call(
-            ["tar", "-czf", model + ".tar.gz", model], cwd=onnx_models_dir
-        )
-
-
-def upload_models():
-    sc = SomeClass()
-    for model in models:
-        print("update-caffe2-models.py:  uploading", model)
-        onnx_model_dir, onnx_models_dir = sc._onnx_model_dir(model)
-        subprocess.check_call(
-            [
-                "aws",
-                "s3",
-                "cp",
-                model + ".tar.gz",
-                f"s3://download.onnx/models/{model}.tar.gz",
-                "--acl",
-                "public-read",
-            ],
-            cwd=onnx_models_dir,
-        )
-
-
-def cleanup():
-    sc = SomeClass()
-    for model in models:
-        onnx_model_dir, onnx_models_dir = sc._onnx_model_dir(model)
-        os.remove(os.path.join(os.path.dirname(onnx_model_dir), model + ".tar.gz"))
-
-
-if __name__ == "__main__":
-    try:
-        subprocess.check_call(["aws", "sts", "get-caller-identity"])
-    except:
-        print(
-            "update-caffe2-models.py:  please run `aws configure` manually to set up credentials"
-        )
-        sys.exit(1)
-    if sys.argv[1] == "download":
-        download_models()
-    if sys.argv[1] == "generate":
-        generate_models()
-    elif sys.argv[1] == "upload":
-        upload_models()
-    elif sys.argv[1] == "cleanup":
-        cleanup()
--- a/scripts/model_zoo/update-models-from-caffe2.py
+++ b/scripts/model_zoo/update-models-from-caffe2.py
@ -1,372 +0,0 @@
-#! /usr/bin/env python3
-
-import argparse
-import glob
-import json
-import os
-import shutil
-import tarfile
-import tempfile
-
-from urllib.request import urlretrieve
-
-import boto3
-import numpy as np
-import onnx
-import onnx.backend
-from onnx import numpy_helper
-
-import caffe2.python.onnx.backend
-import caffe2.python.onnx.frontend
-import caffe2.python.workspace as c2_workspace
-from caffe2.proto import caffe2_pb2
-
-from caffe2.python.models.download import (
-    deleteDirectory,
-    downloadFromURLToFile,
-    getURLFromName,
-)
-
-
-"""A script converting Caffe2 models to ONNX, and updating ONNX model zoos.
-
-Arguments:
-    -v, verbose
-    --local-dir, where we store the ONNX and Caffe2 models
-    --no-cache, ignore existing models in local-dir
-    --clean-test-data, delete all the existing test data when updating ONNX model zoo
-    --add-test-data, add add-test-data sets of test data for each ONNX model
-    --only-local, run locally (for testing purpose)
-
-Examples:
-    # store the data in /home/username/zoo-dir, delete existing test data, ignore local cache,
-    # and generate 3 sets of new test data
-    python update-caffe2-models.py --local-dir /home/username/zoo-dir --clean-test-data --no-cache --add-test-data 3
-
-"""
-
-# TODO: Add GPU support
-
-
-def upload_onnx_model(model_name, zoo_dir, backup=False, only_local=False):
-    if only_local:
-        print("No uploading in local only mode.")
-        return
-    model_dir = os.path.join(zoo_dir, model_name)
-    suffix = "-backup" if backup else ""
-    if backup:
-        print(f"Backing up the previous version of ONNX model {model_name}...")
-    rel_file_name = f"{model_name}{suffix}.tar.gz"
-    abs_file_name = os.path.join(zoo_dir, rel_file_name)
-    print(f"Compressing {model_name} model to {abs_file_name}")
-    with tarfile.open(abs_file_name, "w:gz") as f:
-        f.add(model_dir, arcname=model_name)
-    file_size = os.stat(abs_file_name).st_size
-    print(
-        f"Uploading {abs_file_name} ({float(file_size) / 1024 / 1024} MB) to s3 cloud..."
-    )
-    client = boto3.client("s3", "us-east-1")
-    transfer = boto3.s3.transfer.S3Transfer(client)
-    transfer.upload_file(
-        abs_file_name,
-        "download.onnx",
-        f"models/latest/{rel_file_name}",
-        extra_args={"ACL": "public-read"},
-    )
-
-    print(f"Successfully uploaded {rel_file_name} to s3!")
-
-
-def download_onnx_model(model_name, zoo_dir, use_cache=True, only_local=False):
-    model_dir = os.path.join(zoo_dir, model_name)
-    if os.path.exists(model_dir):
-        if use_cache:
-            upload_onnx_model(model_name, zoo_dir, backup=True, only_local=only_local)
-            return
-        else:
-            shutil.rmtree(model_dir)
-    url = f"https://s3.amazonaws.com/download.onnx/models/latest/{model_name}.tar.gz"
-
-    download_file = tempfile.NamedTemporaryFile(delete=False)
-    try:
-        download_file.close()
-        print(
-            f"Downloading ONNX model {model_name} from {url} and save in {download_file.name} ...\n"
-        )
-        urlretrieve(url, download_file.name)
-        with tarfile.open(download_file.name) as t:
-            print(f"Extracting ONNX model {model_name} to {zoo_dir} ...\n")
-            t.extractall(zoo_dir)
-    except Exception as e:
-        print(f"Failed to download/backup data for ONNX model {model_name}: {e}")
-        if not os.path.exists(model_dir):
-            os.makedirs(model_dir)
-    finally:
-        os.remove(download_file.name)
-
-    if not only_local:
-        upload_onnx_model(model_name, zoo_dir, backup=True, only_local=only_local)
-
-
-def download_caffe2_model(model_name, zoo_dir, use_cache=True):
-    model_dir = os.path.join(zoo_dir, model_name)
-    if os.path.exists(model_dir):
-        if use_cache:
-            return
-        else:
-            shutil.rmtree(model_dir)
-    os.makedirs(model_dir)
-
-    for f in ["predict_net.pb", "init_net.pb", "value_info.json"]:
-        url = getURLFromName(model_name, f)
-        dest = os.path.join(model_dir, f)
-        try:
-            try:
-                downloadFromURLToFile(url, dest, show_progress=False)
-            except TypeError:
-                # show_progress not supported prior to
-                # Caffe2 78c014e752a374d905ecfb465d44fa16e02a28f1
-                # (Sep 17, 2017)
-                downloadFromURLToFile(url, dest)
-        except Exception as e:
-            print(f"Abort: {e}")
-            print("Cleaning up...")
-            deleteDirectory(model_dir)
-            raise
-
-
-def caffe2_to_onnx(caffe2_model_name, caffe2_model_dir):
-    caffe2_init_proto = caffe2_pb2.NetDef()
-    caffe2_predict_proto = caffe2_pb2.NetDef()
-
-    with open(os.path.join(caffe2_model_dir, "init_net.pb"), "rb") as f:
-        caffe2_init_proto.ParseFromString(f.read())
-        caffe2_init_proto.name = f"{caffe2_model_name}_init"
-    with open(os.path.join(caffe2_model_dir, "predict_net.pb"), "rb") as f:
-        caffe2_predict_proto.ParseFromString(f.read())
-        caffe2_predict_proto.name = caffe2_model_name
-    with open(os.path.join(caffe2_model_dir, "value_info.json"), "rb") as f:
-        value_info = json.loads(f.read())
-
-    print(
-        f"Converting Caffe2 model {caffe2_model_name} in {caffe2_model_dir} to ONNX format"
-    )
-    onnx_model = caffe2.python.onnx.frontend.caffe2_net_to_onnx_model(
-        init_net=caffe2_init_proto,
-        predict_net=caffe2_predict_proto,
-        value_info=value_info,
-    )
-
-    return onnx_model, caffe2_init_proto, caffe2_predict_proto
-
-
-def tensortype_to_ndarray(tensor_type):
-    shape = []
-    for dim in tensor_type.shape.dim:
-        shape.append(dim.dim_value)
-    if tensor_type.elem_type == onnx.TensorProto.FLOAT:
-        type = np.float32
-    elif tensor_type.elem_type == onnx.TensorProto.INT:
-        type = np.int32
-    else:
-        raise
-    array = np.random.rand(*shape).astype(type)
-    return array
-
-
-def generate_test_input_data(onnx_model, scale):
-    real_inputs_names = list(
-        {input.name for input in onnx_model.graph.input}
-        - {init.name for init in onnx_model.graph.initializer}
-    )
-    real_inputs = []
-    for name in real_inputs_names:
-        for input in onnx_model.graph.input:
-            if name == input.name:
-                real_inputs.append(input)
-
-    test_inputs = []
-    for input in real_inputs:
-        ndarray = tensortype_to_ndarray(input.type.tensor_type)
-        test_inputs.append((input.name, ndarray * scale))
-
-    return test_inputs
-
-
-def generate_test_output_data(caffe2_init_net, caffe2_predict_net, inputs):
-    p = c2_workspace.Predictor(caffe2_init_net, caffe2_predict_net)
-    inputs_map = {input[0]: input[1] for input in inputs}
-
-    output = p.run(inputs_map)
-    c2_workspace.ResetWorkspace()
-    return output
-
-
-def onnx_verify(onnx_model, inputs, ref_outputs):
-    prepared = caffe2.python.onnx.backend.prepare(onnx_model)
-    onnx_inputs = []
-    for input in inputs:
-        if isinstance(input, tuple):
-            onnx_inputs.append(input[1])
-        else:
-            onnx_inputs.append(input)
-    onnx_outputs = prepared.run(inputs=onnx_inputs)
-    np.testing.assert_almost_equal(onnx_outputs, ref_outputs, decimal=3)
-
-
-model_mapping = {
-    "bvlc_alexnet": "bvlc_alexnet",
-    "bvlc_googlenet": "bvlc_googlenet",
-    "bvlc_reference_caffenet": "bvlc_reference_caffenet",
-    "bvlc_reference_rcnn_ilsvrc13": "bvlc_reference_rcnn_ilsvrc13",
-    "densenet121": "densenet121",
-    #'finetune_flickr_style': 'finetune_flickr_style',
-    "inception_v1": "inception_v1",
-    "inception_v2": "inception_v2",
-    "resnet50": "resnet50",
-    "shufflenet": "shufflenet",
-    "squeezenet": "squeezenet_old",
-    #'vgg16': 'vgg16',
-    "vgg19": "vgg19",
-    "zfnet512": "zfnet512",
-}
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Update the ONNX models.")
-    parser.add_argument("-v", action="store_true", default=False, help="verbose")
-    parser.add_argument(
-        "--local-dir",
-        type=str,
-        default=os.path.expanduser("~"),
-        help="local dir to store Caffe2 and ONNX models",
-    )
-    parser.add_argument(
-        "--no-cache",
-        action="store_true",
-        default=False,
-        help="whether use local ONNX models",
-    )
-    parser.add_argument(
-        "--clean-test-data",
-        action="store_true",
-        default=False,
-        help="remove the old test data",
-    )
-    parser.add_argument(
-        "--add-test-data", type=int, default=0, help="add new test data"
-    )
-    parser.add_argument(
-        "--only-local",
-        action="store_true",
-        default=False,
-        help="no upload including backup",
-    )
-
-    args = parser.parse_args()
-    delete_test_data = args.clean_test_data
-    add_test_data = args.add_test_data
-    use_cache = not args.no_cache
-    only_local = args.only_local
-
-    root_dir = args.local_dir
-    caffe2_zoo_dir = os.path.join(root_dir, ".caffe2", "models")
-    onnx_zoo_dir = os.path.join(root_dir, ".onnx", "models")
-
-    for onnx_model_name in model_mapping:
-        c2_model_name = model_mapping[onnx_model_name]
-
-        print(
-            f"####### Processing ONNX model {onnx_model_name} ({c2_model_name} in Caffe2) #######"
-        )
-        download_caffe2_model(c2_model_name, caffe2_zoo_dir, use_cache=use_cache)
-        download_onnx_model(
-            onnx_model_name, onnx_zoo_dir, use_cache=use_cache, only_local=only_local
-        )
-
-        onnx_model_dir = os.path.join(onnx_zoo_dir, onnx_model_name)
-
-        if delete_test_data:
-            print("Deleting all the existing test data...")
-            # NB: For now, we don't delete the npz files.
-            # for f in glob.glob(os.path.join(onnx_model_dir, '*.npz')):
-            #    os.remove(f)
-            for f in glob.glob(os.path.join(onnx_model_dir, "test_data_set*")):
-                shutil.rmtree(f)
-
-        onnx_model, c2_init_net, c2_predict_net = caffe2_to_onnx(
-            c2_model_name, os.path.join(caffe2_zoo_dir, c2_model_name)
-        )
-
-        print(f"Deleteing old ONNX {onnx_model_name} model...")
-        for f in glob.glob(os.path.join(onnx_model_dir, "model*".format())):
-            os.remove(f)
-
-        print(f"Serializing generated ONNX {onnx_model_name} model ...")
-        with open(os.path.join(onnx_model_dir, "model.onnx"), "wb") as file:
-            file.write(onnx_model.SerializeToString())
-
-        print(f"Verifying model {onnx_model_name} with ONNX model checker...")
-        onnx.checker.check_model(onnx_model)
-
-        total_existing_data_set = 0
-        print(f"Verifying model {onnx_model_name} with existing test data...")
-        for f in glob.glob(os.path.join(onnx_model_dir, "*.npz")):
-            test_data = np.load(f, encoding="bytes")
-            inputs = list(test_data["inputs"])
-            ref_outputs = list(test_data["outputs"])
-            onnx_verify(onnx_model, inputs, ref_outputs)
-            total_existing_data_set += 1
-        for f in glob.glob(os.path.join(onnx_model_dir, "test_data_set*")):
-            inputs = []
-            inputs_num = len(glob.glob(os.path.join(f, "input_*.pb")))
-            for i in range(inputs_num):
-                tensor = onnx.TensorProto()
-                with open(os.path.join(f, f"input_{i}.pb"), "rb") as pf:
-                    tensor.ParseFromString(pf.read())
-                inputs.append(numpy_helper.to_array(tensor))
-            ref_outputs = []
-            ref_outputs_num = len(glob.glob(os.path.join(f, "output_*.pb")))
-            for i in range(ref_outputs_num):
-                tensor = onnx.TensorProto()
-                with open(os.path.join(f, f"output_{i}.pb"), "rb") as pf:
-                    tensor.ParseFromString(pf.read())
-                ref_outputs.append(numpy_helper.to_array(tensor))
-            onnx_verify(onnx_model, inputs, ref_outputs)
-            total_existing_data_set += 1
-
-        starting_index = 0
-        while os.path.exists(
-            os.path.join(onnx_model_dir, f"test_data_set_{starting_index}")
-        ):
-            starting_index += 1
-
-        if total_existing_data_set == 0 and add_test_data == 0:
-            add_test_data = 3
-            total_existing_data_set = 3
-
-        print(f"Generating {add_test_data} sets of new test data...")
-        for i in range(starting_index, add_test_data + starting_index):
-            data_dir = os.path.join(onnx_model_dir, f"test_data_set_{i}")
-            os.makedirs(data_dir)
-            inputs = generate_test_input_data(onnx_model, 255)
-            ref_outputs = generate_test_output_data(c2_init_net, c2_predict_net, inputs)
-            onnx_verify(onnx_model, inputs, ref_outputs)
-            for index, input in enumerate(inputs):
-                tensor = numpy_helper.from_array(input[1])
-                with open(os.path.join(data_dir, f"input_{index}.pb"), "wb") as file:
-                    file.write(tensor.SerializeToString())
-            for index, output in enumerate(ref_outputs):
-                tensor = numpy_helper.from_array(output)
-                with open(os.path.join(data_dir, f"output_{index}.pb"), "wb") as file:
-                    file.write(tensor.SerializeToString())
-
-        del onnx_model
-        del c2_init_net
-        del c2_predict_net
-
-        upload_onnx_model(
-            onnx_model_name, onnx_zoo_dir, backup=False, only_local=only_local
-        )
-
-        print("\n\n")