From f71e36896965f97c628dd755e9864ec742da3407 Mon Sep 17 00:00:00 2001 From: Arun Pa Date: Fri, 5 Apr 2024 18:51:38 +0000 Subject: [PATCH] UFMT formatting on test/autograd test/ao test/cpp test/backends (#123369) Partially addresses #123062 Ran lintrunner on - test/_test_bazel.py - test/ao - test/autograd test/backends test/benchmark_uitls test/conftest.py test/bottleneck_test test/cpp Pull Request resolved: https://github.com/pytorch/pytorch/pull/123369 Approved by: https://github.com/huydhn --- .lintrunner.toml | 27 - test/_test_bazel.py | 7 +- .../ao/sparsity/test_activation_sparsifier.py | 151 +++-- test/ao/sparsity/test_composability.py | 131 ++-- test/ao/sparsity/test_data_scheduler.py | 76 ++- test/ao/sparsity/test_data_sparsifier.py | 551 +++++++++++----- test/ao/sparsity/test_kernels.py | 41 +- test/ao/sparsity/test_parametrization.py | 142 ++-- .../ao/sparsity/test_qlinear_packed_params.py | 134 ++-- test/ao/sparsity/test_scheduler.py | 117 ++-- test/ao/sparsity/test_sparsifier.py | 338 +++++----- .../ao/sparsity/test_structured_sparsifier.py | 158 +++-- test/autograd/test_complex.py | 12 +- test/autograd/test_functional.py | 615 +++++++++++++----- test/backends/xeon/test_launch.py | 33 +- test/benchmark_utils/test_benchmark_utils.py | 231 ++++--- test/bottleneck_test/test_args.py | 7 +- test/bottleneck_test/test_cuda.py | 2 +- test/conftest.py | 49 +- test/cpp/aot_inductor/test.py | 52 +- test/cpp/api/init_baseline.py | 8 +- test/cpp/api/optim_baseline.py | 48 +- test/cpp/jit/tests_setup.py | 19 +- 23 files changed, 1914 insertions(+), 1035 deletions(-) diff --git a/.lintrunner.toml b/.lintrunner.toml index dacb259c5f19..2c7cc9106425 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -1014,33 +1014,6 @@ exclude_patterns = [ 'test/_nvfuser/test_dynamo.py', 'test/_nvfuser/test_python_frontend.py', 'test/_nvfuser/test_torchscript.py', - 'test/_test_bazel.py', - 'test/ao/sparsity/test_activation_sparsifier.py', - 'test/ao/sparsity/test_composability.py', - 'test/ao/sparsity/test_data_scheduler.py', - 'test/ao/sparsity/test_data_sparsifier.py', - 'test/ao/sparsity/test_kernels.py', - 'test/ao/sparsity/test_parametrization.py', - 'test/ao/sparsity/test_qlinear_packed_params.py', - 'test/ao/sparsity/test_scheduler.py', - 'test/ao/sparsity/test_sparsifier.py', - 'test/ao/sparsity/test_sparsity_utils.py', - 'test/ao/sparsity/test_structured_sparsifier.py', - 'test/autograd/test_complex.py', - 'test/autograd/test_fallback.py', - 'test/autograd/test_functional.py', - 'test/backends/xeon/test_launch.py', - 'test/benchmark_utils/test_benchmark_utils.py', - 'test/bottleneck_test/test.py', - 'test/bottleneck_test/test_args.py', - 'test/bottleneck_test/test_cuda.py', - 'test/conftest.py', - 'test/cpp/__init__.py', - 'test/cpp/aot_inductor/test.py', - 'test/cpp/api/init_baseline.py', - 'test/cpp/api/optim_baseline.py', - 'test/cpp/jit/__init__.py', - 'test/cpp/jit/tests_setup.py', 'test/cpp_api_parity/__init__.py', 'test/cpp_api_parity/functional_impl_check.py', 'test/cpp_api_parity/module_impl_check.py', diff --git a/test/_test_bazel.py b/test/_test_bazel.py index 9c3bb6f87b57..7a44f65847e7 100644 --- a/test/_test_bazel.py +++ b/test/_test_bazel.py @@ -11,11 +11,14 @@ The name uses underscore `_test_bazel.py` to avoid globbing into other non-bazel import torch + def test_sum() -> None: - assert torch.eq(torch.tensor([[1, 2, 3]]) + torch.tensor([[4, 5, 6]]), torch.tensor([[5, 7, 9]])).all() + assert torch.eq( + torch.tensor([[1, 2, 3]]) + torch.tensor([[4, 5, 6]]), torch.tensor([[5, 7, 9]]) + ).all() + def test_simple_compile_eager() -> None: - def foo(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: a = torch.sin(x) b = torch.cos(y) diff --git a/test/ao/sparsity/test_activation_sparsifier.py b/test/ao/sparsity/test_activation_sparsifier.py index bd0a456e9118..7fcbd1e87ba5 100644 --- a/test/ao/sparsity/test_activation_sparsifier.py +++ b/test/ao/sparsity/test_activation_sparsifier.py @@ -1,16 +1,21 @@ # Owner(s): ["module: unknown"] import copy -from torch.testing._internal.common_utils import TestCase, skipIfTorchDynamo import logging -import torch -from torch.ao.pruning._experimental.activation_sparsifier.activation_sparsifier import ActivationSparsifier -import torch.nn as nn -import torch.nn.functional as F -from torch.ao.pruning.sparsifier.utils import module_to_fqn from typing import List -logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.ao.pruning._experimental.activation_sparsifier.activation_sparsifier import ( + ActivationSparsifier, +) +from torch.ao.pruning.sparsifier.utils import module_to_fqn +from torch.testing._internal.common_utils import skipIfTorchDynamo, TestCase + +logging.basicConfig( + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO +) class Model(nn.Module): @@ -45,7 +50,7 @@ class TestActivationSparsifier(TestCase): in the activation sparsifier """ sparsifier_defaults = activation_sparsifier.defaults - combined_defaults = {**defaults, 'sparse_config': sparse_config} + combined_defaults = {**defaults, "sparse_config": sparse_config} # more keys are populated in activation sparsifier (eventhough they may be None) assert len(combined_defaults) <= len(activation_sparsifier.defaults) @@ -54,7 +59,9 @@ class TestActivationSparsifier(TestCase): # all the keys in combined_defaults should be present in sparsifier defaults assert config == combined_defaults.get(key, None) - def _check_register_layer(self, activation_sparsifier, defaults, sparse_config, layer_args_list): + def _check_register_layer( + self, activation_sparsifier, defaults, sparse_config, layer_args_list + ): """Checks if layers in the model are correctly mapped to it's arguments. Args: @@ -82,14 +89,14 @@ class TestActivationSparsifier(TestCase): sparse_config_actual = copy.deepcopy(sparse_config) sparse_config_actual.update(sparse_config_layer) - name = module_to_fqn(activation_sparsifier.model, layer_arg['layer']) + name = module_to_fqn(activation_sparsifier.model, layer_arg["layer"]) - assert data_groups[name]['sparse_config'] == sparse_config_actual + assert data_groups[name]["sparse_config"] == sparse_config_actual # assert the rest other_config_actual = copy.deepcopy(defaults) other_config_actual.update(layer_arg) - other_config_actual.pop('layer') + other_config_actual.pop("layer") for key, value in other_config_actual.items(): assert key in data_groups[name] @@ -119,13 +126,15 @@ class TestActivationSparsifier(TestCase): data_agg_actual = data_list[0] model = activation_sparsifier.model layer_name = module_to_fqn(model, model.conv1) - agg_fn = activation_sparsifier.data_groups[layer_name]['aggregate_fn'] + agg_fn = activation_sparsifier.data_groups[layer_name]["aggregate_fn"] for i in range(1, len(data_list)): data_agg_actual = agg_fn(data_agg_actual, data_list[i]) - assert 'data' in activation_sparsifier.data_groups[layer_name] - assert torch.all(activation_sparsifier.data_groups[layer_name]['data'] == data_agg_actual) + assert "data" in activation_sparsifier.data_groups[layer_name] + assert torch.all( + activation_sparsifier.data_groups[layer_name]["data"] == data_agg_actual + ) return data_agg_actual @@ -144,11 +153,11 @@ class TestActivationSparsifier(TestCase): layer_name = module_to_fqn(model, model.conv1) assert layer_name is not None - reduce_fn = activation_sparsifier.data_groups[layer_name]['reduce_fn'] + reduce_fn = activation_sparsifier.data_groups[layer_name]["reduce_fn"] data_reduce_actual = reduce_fn(data_agg_actual) - mask_fn = activation_sparsifier.data_groups[layer_name]['mask_fn'] - sparse_config = activation_sparsifier.data_groups[layer_name]['sparse_config'] + mask_fn = activation_sparsifier.data_groups[layer_name]["mask_fn"] + sparse_config = activation_sparsifier.data_groups[layer_name]["sparse_config"] mask_actual = mask_fn(data_reduce_actual, **sparse_config) mask_model = activation_sparsifier.get_mask(layer_name) @@ -156,8 +165,7 @@ class TestActivationSparsifier(TestCase): assert torch.all(mask_model == mask_actual) for config in activation_sparsifier.data_groups.values(): - assert 'data' not in config - + assert "data" not in config def _check_squash_mask(self, activation_sparsifier, data): """Makes sure that squash_mask() works as usual. Specifically, checks @@ -172,11 +180,12 @@ class TestActivationSparsifier(TestCase): data (torch tensor) dummy batched data """ + # create a forward hook for checking output == layer(input * mask) def check_output(name): mask = activation_sparsifier.get_mask(name) - features = activation_sparsifier.data_groups[name].get('features') - feature_dim = activation_sparsifier.data_groups[name].get('feature_dim') + features = activation_sparsifier.data_groups[name].get("features") + feature_dim = activation_sparsifier.data_groups[name].get("feature_dim") def hook(module, input, output): input_data = input[0] @@ -184,20 +193,28 @@ class TestActivationSparsifier(TestCase): assert torch.all(mask * input_data == output) else: for feature_idx in range(0, len(features)): - feature = torch.Tensor([features[feature_idx]], device=input_data.device).long() - inp_data_feature = torch.index_select(input_data, feature_dim, feature) - out_data_feature = torch.index_select(output, feature_dim, feature) + feature = torch.Tensor( + [features[feature_idx]], device=input_data.device + ).long() + inp_data_feature = torch.index_select( + input_data, feature_dim, feature + ) + out_data_feature = torch.index_select( + output, feature_dim, feature + ) + + assert torch.all( + mask[feature_idx] * inp_data_feature == out_data_feature + ) - assert torch.all(mask[feature_idx] * inp_data_feature == out_data_feature) return hook for name, config in activation_sparsifier.data_groups.items(): - if 'identity' in name: - config['layer'].register_forward_hook(check_output(name)) + if "identity" in name: + config["layer"].register_forward_hook(check_output(name)) activation_sparsifier.model(data) - def _check_state_dict(self, sparsifier1): """Checks if loading and restoring of state_dict() works as expected. Basically, dumps the state of the sparsifier and loads it in the other sparsifier @@ -222,8 +239,8 @@ class TestActivationSparsifier(TestCase): for name, state in sparsifier2.state.items(): assert name in sparsifier1.state - mask1 = sparsifier1.state[name]['mask'] - mask2 = state['mask'] + mask1 = sparsifier1.state[name]["mask"] + mask2 = state["mask"] if mask1 is None: assert mask2 is None @@ -237,8 +254,8 @@ class TestActivationSparsifier(TestCase): assert torch.all(mask1 == mask2) # make sure that the state dict is stored as torch sparse - for state in state_dict['state'].values(): - mask = state['mask'] + for state in state_dict["state"].values(): + mask = state["mask"] if mask is not None: if isinstance(mask, List): for idx in range(len(mask)): @@ -252,8 +269,16 @@ class TestActivationSparsifier(TestCase): assert layer_name in dg2 # exclude hook and layer - config1 = {key: value for key, value in config.items() if key not in ['hook', 'layer']} - config2 = {key: value for key, value in dg2[layer_name].items() if key not in ['hook', 'layer']} + config1 = { + key: value + for key, value in config.items() + if key not in ["hook", "layer"] + } + config2 = { + key: value + for key, value in dg2[layer_name].items() + if key not in ["hook", "layer"] + } assert config1 == config2 @@ -263,6 +288,7 @@ class TestActivationSparsifier(TestCase): till squash_mask(). The idea is to check that everything works as expected while in the workflow. """ + # defining aggregate, reduce and mask functions def agg_fn(x, y): return x + y @@ -287,14 +313,9 @@ class TestActivationSparsifier(TestCase): # Creating default function and sparse configs # default sparse_config - sparse_config = { - 'sparsity_level': 0.5 - } + sparse_config = {"sparsity_level": 0.5} - defaults = { - 'aggregate_fn': agg_fn, - 'reduce_fn': reduce_fn - } + defaults = {"aggregate_fn": agg_fn, "reduce_fn": reduce_fn} # simulate the workflow # STEP 1: make data and activation sparsifier object @@ -306,35 +327,41 @@ class TestActivationSparsifier(TestCase): # STEP 2: Register some layers register_layer1_args = { - 'layer': model.conv1, - 'mask_fn': _vanilla_norm_sparsifier + "layer": model.conv1, + "mask_fn": _vanilla_norm_sparsifier, } - sparse_config_layer1 = {'sparsity_level': 0.3} + sparse_config_layer1 = {"sparsity_level": 0.3} register_layer2_args = { - 'layer': model.linear1, - 'features': [0, 10, 234], - 'feature_dim': 1, - 'mask_fn': _vanilla_norm_sparsifier + "layer": model.linear1, + "features": [0, 10, 234], + "feature_dim": 1, + "mask_fn": _vanilla_norm_sparsifier, } - sparse_config_layer2 = {'sparsity_level': 0.1} + sparse_config_layer2 = {"sparsity_level": 0.1} register_layer3_args = { - 'layer': model.identity1, - 'mask_fn': _vanilla_norm_sparsifier + "layer": model.identity1, + "mask_fn": _vanilla_norm_sparsifier, } - sparse_config_layer3 = {'sparsity_level': 0.3} + sparse_config_layer3 = {"sparsity_level": 0.3} register_layer4_args = { - 'layer': model.identity2, - 'features': [0, 10, 20], - 'feature_dim': 1, - 'mask_fn': _vanilla_norm_sparsifier + "layer": model.identity2, + "features": [0, 10, 20], + "feature_dim": 1, + "mask_fn": _vanilla_norm_sparsifier, } - sparse_config_layer4 = {'sparsity_level': 0.1} + sparse_config_layer4 = {"sparsity_level": 0.1} - layer_args_list = [(register_layer1_args, sparse_config_layer1), (register_layer2_args, sparse_config_layer2)] - layer_args_list += [(register_layer3_args, sparse_config_layer3), (register_layer4_args, sparse_config_layer4)] + layer_args_list = [ + (register_layer1_args, sparse_config_layer1), + (register_layer2_args, sparse_config_layer2), + ] + layer_args_list += [ + (register_layer3_args, sparse_config_layer3), + (register_layer4_args, sparse_config_layer4), + ] # Registering.. for layer_args in layer_args_list: @@ -342,7 +369,9 @@ class TestActivationSparsifier(TestCase): activation_sparsifier.register_layer(**layer_arg, **sparse_config_layer) # check if things are registered correctly - self._check_register_layer(activation_sparsifier, defaults, sparse_config, layer_args_list) + self._check_register_layer( + activation_sparsifier, defaults, sparse_config, layer_args_list + ) # check state_dict after registering and before model forward self._check_state_dict(activation_sparsifier) diff --git a/test/ao/sparsity/test_composability.py b/test/ao/sparsity/test_composability.py index 9c5424601f2b..1d2e6a53c2cf 100644 --- a/test/ao/sparsity/test_composability.py +++ b/test/ao/sparsity/test_composability.py @@ -7,9 +7,14 @@ import torch import torch.ao.quantization as tq from torch import nn from torch.ao import pruning -from torch.testing._internal.common_utils import TestCase -from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx, convert_to_reference_fx, prepare_qat_fx from torch.ao.pruning import fqn_to_module +from torch.ao.quantization.quantize_fx import ( + convert_fx, + convert_to_reference_fx, + prepare_fx, + prepare_qat_fx, +) +from torch.testing._internal.common_utils import TestCase logging.basicConfig( format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO @@ -21,6 +26,7 @@ sparse_defaults = { "zeros_per_block": 4, } + def _get_model_and_sparsifier_and_sparse_config(qconfig=None): model = nn.Sequential( nn.Linear(4, 4), # 0 @@ -40,7 +46,7 @@ def _get_model_and_sparsifier_and_sparse_config(qconfig=None): sparse_config = [ { - "tensor_fqn": '5.weight', + "tensor_fqn": "5.weight", "sparsity_level": 0.7, "sparse_block_shape": (1, 4), "zeros_per_block": 4, @@ -49,15 +55,18 @@ def _get_model_and_sparsifier_and_sparse_config(qconfig=None): ] return model, sparsifier, sparse_config + def _squash_mask_calibrate_and_convert(model, sparsifier, input): sparsifier.step() sparsifier.squash_mask() model(input) tq.convert(model, inplace=True) + def _calculate_sparsity(tensor): return ((tensor == 0).sum() / tensor.numel()).item() + # This series of tests are to check the composability goals for sparsity and quantization. Namely # that performing quantization and sparsity model manipulations in various orderings # does not cause problems @@ -70,7 +79,9 @@ class TestComposability(TestCase): mod, sparsifier, sparse_config, - ) = _get_model_and_sparsifier_and_sparse_config(tq.get_default_qconfig("fbgemm")) + ) = _get_model_and_sparsifier_and_sparse_config( + tq.get_default_qconfig("fbgemm") + ) tq.prepare(mod, inplace=True) sparsifier.prepare(mod, config=sparse_config) @@ -81,9 +92,7 @@ class TestComposability(TestCase): # check that correct observers were inserted self.assertTrue(hasattr(mod[5], "activation_post_process")) - _squash_mask_calibrate_and_convert( - mod, sparsifier, torch.randn(1, 4, 4, 4) - ) + _squash_mask_calibrate_and_convert(mod, sparsifier, torch.randn(1, 4, 4, 4)) # check that final module is the expected quantized module and that the model runs self.assertTrue(isinstance(mod[5], torch.ao.nn.quantized.Linear)) @@ -99,7 +108,9 @@ class TestComposability(TestCase): mod, sparsifier, sparse_config, - ) = _get_model_and_sparsifier_and_sparse_config(tq.get_default_qconfig("fbgemm")) + ) = _get_model_and_sparsifier_and_sparse_config( + tq.get_default_qconfig("fbgemm") + ) sparsifier.prepare(mod, config=sparse_config) tq.prepare(mod, inplace=True) @@ -113,9 +124,7 @@ class TestComposability(TestCase): # occurred successfully self.assertTrue(hasattr(mod[5], "activation_post_process")) - _squash_mask_calibrate_and_convert( - mod, sparsifier, torch.randn(1, 4, 4, 4) - ) + _squash_mask_calibrate_and_convert(mod, sparsifier, torch.randn(1, 4, 4, 4)) # check that final module is the expected quantized module and that the model runs self.assertTrue(isinstance(mod[5], torch.ao.nn.quantized.Linear)) @@ -130,7 +139,9 @@ class TestComposability(TestCase): mod, sparsifier, sparse_config, - ) = _get_model_and_sparsifier_and_sparse_config(tq.get_default_qconfig("fbgemm")) + ) = _get_model_and_sparsifier_and_sparse_config( + tq.get_default_qconfig("fbgemm") + ) sparsifier.prepare(mod, config=sparse_config) tq.prepare(mod, inplace=True) @@ -168,7 +179,9 @@ class TestComposability(TestCase): mod, sparsifier, sparse_config, - ) = _get_model_and_sparsifier_and_sparse_config(tq.get_default_qconfig("fbgemm")) + ) = _get_model_and_sparsifier_and_sparse_config( + tq.get_default_qconfig("fbgemm") + ) sparsifier.prepare(mod, config=sparse_config) tq.fuse_modules(mod, [["5", "6"]], inplace=True) mod[5].qconfig = tq.get_default_qconfig("fbgemm") @@ -182,9 +195,7 @@ class TestComposability(TestCase): # check that correct observers were inserted and that matching # occurred successfully self.assertTrue(hasattr(mod[5], "activation_post_process")) - _squash_mask_calibrate_and_convert( - mod, sparsifier, torch.randn(1, 4, 4, 4) - ) + _squash_mask_calibrate_and_convert(mod, sparsifier, torch.randn(1, 4, 4, 4)) # check that final module is the expected quantized module and that the model runs self.assertTrue(isinstance(mod[5], torch.ao.nn.intrinsic.quantized.LinearReLU)) @@ -197,7 +208,9 @@ class TestComposability(TestCase): mod, sparsifier, _, - ) = _get_model_and_sparsifier_and_sparse_config(tq.get_default_qconfig("fbgemm")) + ) = _get_model_and_sparsifier_and_sparse_config( + tq.get_default_qconfig("fbgemm") + ) tq.fuse_modules(mod, [["5", "6"]], inplace=True) # its absolutely broken by fusion but will still work if you put the correct fqn in @@ -261,9 +274,7 @@ class TestComposability(TestCase): # occurred successfully self.assertTrue(hasattr(mod[5], "activation_post_process")) self.assertTrue(isinstance(mod[5], torch.ao.nn.qat.Linear)) - _squash_mask_calibrate_and_convert( - mod, sparsifier, torch.randn(1, 4, 4, 4) - ) + _squash_mask_calibrate_and_convert(mod, sparsifier, torch.randn(1, 4, 4, 4)) # check that final module is the expected quantized module and that the model runs self.assertTrue(isinstance(mod[5], torch.ao.nn.quantized.Linear)) self.assertEqual(mod(torch.randn(1, 4, 4, 4)).shape, torch.Size([1, 4, 4, 4])) @@ -301,9 +312,7 @@ class TestComposability(TestCase): self.assertTrue(hasattr(mod[5], "activation_post_process")) self.assertTrue(isinstance(mod[5], torch.ao.nn.qat.Linear)) - _squash_mask_calibrate_and_convert( - mod, sparsifier, torch.randn(1, 4, 4, 4) - ) + _squash_mask_calibrate_and_convert(mod, sparsifier, torch.randn(1, 4, 4, 4)) # check that final module is the expected quantized module and that the model runs self.assertTrue(isinstance(mod[5], torch.ao.nn.quantized.Linear)) @@ -313,6 +322,7 @@ class TestComposability(TestCase): cur_sparsity = _calculate_sparsity(mod[5]._weight_bias()[0]) self.assertGreaterAlmostEqual(cur_sparsity, sparse_config[0]["sparsity_level"]) + def _module_has_activation_post_process(model, fqn_of_module): for node in model.graph.nodes: # look for an observer whose arg is the target module @@ -321,10 +331,12 @@ def _module_has_activation_post_process(model, fqn_of_module): return True return False + class TestFxComposability(TestCase): r"""This series of tests checks that various steps of the quantization and sparsity flow compose cleanly despite variation in sequencing. """ + def test_q_prep_fx_before_s_prep(self): r""" This test checks that the ordering of prepare_fx -> sparse prepare -> convert_fx @@ -340,10 +352,11 @@ class TestFxComposability(TestCase): example = torch.randn(1, 4, 4, 4) qconfig = tq.get_default_qconfig("fbgemm") - qconfig_mapping = tq.QConfigMapping() \ - .set_module_name("4", qconfig) \ + qconfig_mapping = ( + tq.QConfigMapping() + .set_module_name("4", qconfig) .set_module_name("5", qconfig) - + ) mod = prepare_fx(mod, qconfig_mapping, (example,)) @@ -374,7 +387,11 @@ class TestFxComposability(TestCase): mod = convert_fx(mod) # check that final module is the expected quantized module and that the model runs - self.assertTrue(isinstance(fqn_to_module(mod, "5"), torch.ao.nn.intrinsic.quantized.LinearReLU)) + self.assertTrue( + isinstance( + fqn_to_module(mod, "5"), torch.ao.nn.intrinsic.quantized.LinearReLU + ) + ) self.assertEqual(mod(example).shape, torch.Size([1, 4, 4, 4])) # check that module was actually sparsified @@ -399,9 +416,11 @@ class TestFxComposability(TestCase): example = torch.randn(1, 4, 4, 4) qconfig = tq.get_default_qconfig("fbgemm") - qconfig_mapping = tq.QConfigMapping() \ - .set_module_name("4", qconfig) \ + qconfig_mapping = ( + tq.QConfigMapping() + .set_module_name("4", qconfig) .set_module_name("5", qconfig) + ) mod = prepare_fx(mod, qconfig_mapping, (example,)) @@ -432,9 +451,15 @@ class TestFxComposability(TestCase): mod = convert_to_reference_fx(mod) # check that final module is the expected quantized module and that the model runs - self.assertTrue(isinstance(fqn_to_module(mod, "5"), torch.ao.nn.intrinsic.LinearReLU)) + self.assertTrue( + isinstance(fqn_to_module(mod, "5"), torch.ao.nn.intrinsic.LinearReLU) + ) self.assertEqual(mod(example).shape, torch.Size([1, 4, 4, 4])) - self.assertTrue(isinstance(fqn_to_module(mod, "5.0"), torch.ao.nn.quantized.reference.Linear)) + self.assertTrue( + isinstance( + fqn_to_module(mod, "5.0"), torch.ao.nn.quantized.reference.Linear + ) + ) # check that module was actually sparsified cur_sparsity = _calculate_sparsity(fqn_to_module(mod, "5.0.weight")) @@ -459,9 +484,11 @@ class TestFxComposability(TestCase): example = torch.randn(1, 4, 4, 4) qconfig = tq.get_default_qconfig("fbgemm") - qconfig_mapping = tq.QConfigMapping() \ - .set_module_name("4", qconfig) \ + qconfig_mapping = ( + tq.QConfigMapping() + .set_module_name("4", qconfig) .set_module_name("5", qconfig) + ) mod = prepare_fx(mod, qconfig_mapping, (example,)) # check that correct modules had parametrizations added and @@ -478,7 +505,11 @@ class TestFxComposability(TestCase): mod = convert_fx(mod) # check that final module is the expected quantized module and that the model runs - self.assertTrue(isinstance(fqn_to_module(mod, "5"), torch.ao.nn.intrinsic.quantized.LinearReLU)) + self.assertTrue( + isinstance( + fqn_to_module(mod, "5"), torch.ao.nn.intrinsic.quantized.LinearReLU + ) + ) self.assertEqual(mod(example).shape, torch.Size([1, 4, 4, 4])) # check that module was actually sparsified @@ -504,16 +535,20 @@ class TestFxComposability(TestCase): example = torch.randn(1, 4, 4, 4) qconfig = tq.get_default_qat_qconfig("fbgemm") - qconfig_mapping = tq.QConfigMapping() \ - .set_module_name("4", qconfig) \ + qconfig_mapping = ( + tq.QConfigMapping() + .set_module_name("4", qconfig) .set_module_name("5", qconfig) + ) mod = prepare_qat_fx(mod, qconfig_mapping, (example,)) # check that correct modules had parametrizations added and # that none were lost during prepare self.assertTrue(hasattr(fqn_to_module(mod, "0.0"), "parametrizations")) self.assertTrue(hasattr(fqn_to_module(mod, "5"), "parametrizations")) - self.assertTrue(isinstance(fqn_to_module(mod, "5"), torch.ao.nn.intrinsic.qat.LinearReLU)) + self.assertTrue( + isinstance(fqn_to_module(mod, "5"), torch.ao.nn.intrinsic.qat.LinearReLU) + ) # check that correct observers were inserted and that matching # occurred successfully @@ -524,7 +559,11 @@ class TestFxComposability(TestCase): mod = convert_fx(mod) # check that final module is the expected quantized module and that the model runs - self.assertTrue(isinstance(fqn_to_module(mod, "5"), torch.ao.nn.intrinsic.quantized.LinearReLU)) + self.assertTrue( + isinstance( + fqn_to_module(mod, "5"), torch.ao.nn.intrinsic.quantized.LinearReLU + ) + ) self.assertEqual(mod(example).shape, torch.Size([1, 4, 4, 4])) # check that module was actually sparsified @@ -550,9 +589,11 @@ class TestFxComposability(TestCase): example = torch.randn(1, 4, 4, 4) qconfig = tq.get_default_qconfig("fbgemm") - qconfig_mapping = tq.QConfigMapping() \ - .set_module_name("4", qconfig) \ + qconfig_mapping = ( + tq.QConfigMapping() + .set_module_name("4", qconfig) .set_module_name("5", qconfig) + ) mod = prepare_fx(mod, qconfig_mapping, (example,)) # check that correct modules had parametrizations added and @@ -569,9 +610,15 @@ class TestFxComposability(TestCase): mod = convert_to_reference_fx(mod) # check that final module is the expected quantized module and that the model runs - self.assertTrue(isinstance(fqn_to_module(mod, "5"), torch.ao.nn.intrinsic.LinearReLU)) + self.assertTrue( + isinstance(fqn_to_module(mod, "5"), torch.ao.nn.intrinsic.LinearReLU) + ) self.assertEqual(mod(example).shape, torch.Size([1, 4, 4, 4])) - self.assertTrue(isinstance(fqn_to_module(mod, "5.0"), torch.ao.nn.quantized.reference.Linear)) + self.assertTrue( + isinstance( + fqn_to_module(mod, "5.0"), torch.ao.nn.quantized.reference.Linear + ) + ) # check that module was actually sparsified cur_sparsity = _calculate_sparsity(fqn_to_module(mod, "5.0.weight")) diff --git a/test/ao/sparsity/test_data_scheduler.py b/test/ao/sparsity/test_data_scheduler.py index ab7c051c2107..cc451d5cd783 100644 --- a/test/ao/sparsity/test_data_scheduler.py +++ b/test/ao/sparsity/test_data_scheduler.py @@ -1,17 +1,20 @@ # Owner(s): ["module: unknown"] +import copy import logging import warnings -from torch.testing._internal.common_utils import TestCase -from torch import nn -import torch from typing import Tuple -import copy -from torch.ao.pruning._experimental.data_sparsifier import DataNormSparsifier +import torch +from torch import nn from torch.ao.pruning._experimental.data_scheduler import BaseDataScheduler -logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) +from torch.ao.pruning._experimental.data_sparsifier import DataNormSparsifier +from torch.testing._internal.common_utils import TestCase + +logging.basicConfig( + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO +) class ImplementedDataScheduler(BaseDataScheduler): @@ -20,27 +23,32 @@ class ImplementedDataScheduler(BaseDataScheduler): def get_schedule_param(self): if self.last_epoch > 0: - return {name: config['sparsity_level'] * 0.5 - for name, config in self.data_sparsifier.data_groups.items()} + return { + name: config["sparsity_level"] * 0.5 + for name, config in self.data_sparsifier.data_groups.items() + } else: return self.base_param class TestBaseDataScheduler(TestCase): def _get_data(self): - tensor1, param1, emb1 = torch.randn(5, 5), nn.Parameter(torch.randn(10, 10)), nn.Embedding(50, 5) - data_list = [ - ('tensor1', tensor1), ('param1', param1), ('emb1', emb1) - ] + tensor1, param1, emb1 = ( + torch.randn(5, 5), + nn.Parameter(torch.randn(10, 10)), + nn.Embedding(50, 5), + ) + data_list = [("tensor1", tensor1), ("param1", param1), ("emb1", emb1)] defaults = { - 'sparsity_level': 0.7, - 'sparse_block_shape': (1, 4), - 'zeros_per_block': 2 + "sparsity_level": 0.7, + "sparse_block_shape": (1, 4), + "zeros_per_block": 2, } data_with_config = [ { - 'name': 'tensor2', 'data': torch.randn(4, 4), - 'config': {'sparsity_level': 0.3} + "name": "tensor2", + "data": torch.randn(4, 4), + "config": {"sparsity_level": 0.3}, } ] return data_list, data_with_config, defaults @@ -48,7 +56,11 @@ class TestBaseDataScheduler(TestCase): def _get_sparsifier(self, data_list, data_with_config, defaults): sparsifier = DataNormSparsifier(data_list, **defaults) for data_config_dict in data_with_config: - name, data, config = data_config_dict['name'], data_config_dict['data'], data_config_dict['config'] + name, data, config = ( + data_config_dict["name"], + data_config_dict["data"], + data_config_dict["config"], + ) sparsifier.add_data(name=name, data=data, **config) return sparsifier @@ -57,7 +69,7 @@ class TestBaseDataScheduler(TestCase): return scheduler def _get_schedule_param(self): - return 'sparsity_level' + return "sparsity_level" def _get_name_data_config(self, some_data, defaults): config = copy.deepcopy(defaults) @@ -66,7 +78,11 @@ class TestBaseDataScheduler(TestCase): name, data = some_data else: # dealing with data_with_config - name, data, new_config = some_data['name'], some_data['data'], some_data['config'] + name, data, new_config = ( + some_data["name"], + some_data["data"], + some_data["config"], + ) config.update(new_config) return name, data, config @@ -102,8 +118,11 @@ class TestBaseDataScheduler(TestCase): # Make sure there is no warning related to the base_data_scheduler for warning in w: fname = warning.filename - fname = '/'.join(fname.split('/')[-5:]) - assert fname != 'torch/ao/sparsity/experimental/scheduler/data_scheduler/base_data_scheduler.py' + fname = "/".join(fname.split("/")[-5:]) + assert ( + fname + != "torch/ao/sparsity/experimental/scheduler/data_scheduler/base_data_scheduler.py" + ) def test_step(self): data_list, data_with_config, defaults = self._get_data() @@ -115,14 +134,19 @@ class TestBaseDataScheduler(TestCase): for some_data in all_data: name, _, config = self._get_name_data_config(some_data, defaults) - assert sparsifier.data_groups[name][schedule_param] == config[schedule_param] + assert ( + sparsifier.data_groups[name][schedule_param] == config[schedule_param] + ) sparsifier.step() scheduler.step() for some_data in all_data: name, _, config = self._get_name_data_config(some_data, defaults) - assert sparsifier.data_groups[name][schedule_param] == config[schedule_param] * 0.5 + assert ( + sparsifier.data_groups[name][schedule_param] + == config[schedule_param] * 0.5 + ) # checking step count step_cnt = 5 @@ -130,7 +154,9 @@ class TestBaseDataScheduler(TestCase): sparsifier.step() scheduler.step() - assert scheduler._step_count == step_cnt + 2 # step_cnt + step above + 1 step in constructor + assert ( + scheduler._step_count == step_cnt + 2 + ) # step_cnt + step above + 1 step in constructor def test_state_dict(self): data_list, data_with_config, defaults = self._get_data() diff --git a/test/ao/sparsity/test_data_sparsifier.py b/test/ao/sparsity/test_data_sparsifier.py index d6d27e368b98..23a3d0287233 100644 --- a/test/ao/sparsity/test_data_sparsifier.py +++ b/test/ao/sparsity/test_data_sparsifier.py @@ -1,20 +1,28 @@ # Owner(s): ["module: unknown"] +import copy +import itertools import logging +import math + +from typing import Tuple + import torch +from torch import nn + +from torch.ao.pruning._experimental.data_sparsifier import ( + BaseDataSparsifier, + DataNormSparsifier, +) +from torch.ao.pruning._experimental.data_sparsifier.quantization_utils import ( + post_training_sparse_quantize, +) from torch.nn.utils.parametrize import is_parametrized from torch.testing._internal.common_utils import TestCase -from typing import Tuple -from torch import nn -import itertools -import math -import copy - -from torch.ao.pruning._experimental.data_sparsifier import BaseDataSparsifier, DataNormSparsifier -from torch.ao.pruning._experimental.data_sparsifier.quantization_utils import post_training_sparse_quantize - -logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) +logging.basicConfig( + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO +) class ImplementedSparsifier(BaseDataSparsifier): @@ -25,17 +33,18 @@ class ImplementedSparsifier(BaseDataSparsifier): mask = self.get_mask(name) mask[0] = 0 linear_state = self.state[name] - linear_state['step_count'] = linear_state.get('step_count', 0) + 1 + linear_state["step_count"] = linear_state.get("step_count", 0) + 1 class _BaseDataSparsiferTestCase(TestCase): r"""This helper test class takes in any supported type of and runs some tests. - The user is required to pass in the data that needs to sparsified and the - runner will run some tests that needs to be passed in order for the data - type to be supported. - TODO: Change the structure by creating a separate test case class for each - member function + The user is required to pass in the data that needs to sparsified and the + runner will run some tests that needs to be passed in order for the data + type to be supported. + TODO: Change the structure by creating a separate test case class for each + member function """ + def run_all_checks(self, data_list, data_with_config, defaults): self.check_constructor(data_list, data_with_config, defaults) self.check_squash_mask(data_list, data_with_config, defaults) @@ -52,32 +61,49 @@ class _BaseDataSparsiferTestCase(TestCase): config = defaults else: # dealing with data_with_config - name, data, config = some_data['name'], some_data['data'], some_data['config'] + name, data, config = ( + some_data["name"], + some_data["data"], + some_data["config"], + ) return name, data, config @staticmethod - def _make_sparsifier(data_list, data_with_config, defaults, - sparsifier_type=None, sparsifier_kwargs=None): + def _make_sparsifier( + data_list, + data_with_config, + defaults, + sparsifier_type=None, + sparsifier_kwargs=None, + ): if sparsifier_type is None: sparsifier = ImplementedSparsifier(data_list=data_list, **defaults) else: kwargs = copy.deepcopy(defaults) kwargs.update(sparsifier_kwargs) - kwargs['data_list'] = data_list + kwargs["data_list"] = data_list sparsifier = sparsifier_type(**kwargs) assert len(sparsifier.data_groups) == len(data_list) for data_config_dict in data_with_config: - name, data, config = data_config_dict['name'], data_config_dict['data'], data_config_dict['config'] + name, data, config = ( + data_config_dict["name"], + data_config_dict["data"], + data_config_dict["config"], + ) sparsifier.add_data(name=name, data=data, **config) return sparsifier def check_constructor(self, data_list, data_with_config, defaults, **kwargs): - sparsifier = self._make_sparsifier(data_list, data_with_config, defaults=defaults, **kwargs) - self.assertEqual(len(sparsifier.data_groups), - len(data_list) + len(data_with_config), - msg="Sparsifier data groups don't match the input " - f"({len(sparsifier.data_groups)} vs. " - f"{len(data_list) + len(data_with_config)}).") + sparsifier = self._make_sparsifier( + data_list, data_with_config, defaults=defaults, **kwargs + ) + self.assertEqual( + len(sparsifier.data_groups), + len(data_list) + len(data_with_config), + msg="Sparsifier data groups don't match the input " + f"({len(sparsifier.data_groups)} vs. " + f"{len(data_list) + len(data_with_config)}).", + ) all_data = data_list + data_with_config @@ -87,7 +113,9 @@ class _BaseDataSparsiferTestCase(TestCase): self.assertEqual(sparsifier.data_groups[name], config) def check_step(self, data_list, data_with_config, defaults, **kwargs): - sparsifier = self._make_sparsifier(data_list, data_with_config, defaults=defaults, **kwargs) + sparsifier = self._make_sparsifier( + data_list, data_with_config, defaults=defaults, **kwargs + ) all_data = data_list + data_with_config # Check data and mask before doing the step @@ -114,11 +142,13 @@ class _BaseDataSparsiferTestCase(TestCase): self.assertEqualBroadcasting(sparsified_data[0], 0) self.assertEqual(original_data, data) self.assertEqualBroadcasting(mask[0], 0) - assert 'step_count' in sparsifier.state[name] - assert sparsifier.state[name]['step_count'] == 3 + assert "step_count" in sparsifier.state[name] + assert sparsifier.state[name]["step_count"] == 3 def check_squash_mask(self, data_list, data_with_config, defaults, **kwargs): - sparsifier = self._make_sparsifier(data_list, data_with_config, defaults=defaults, **kwargs) + sparsifier = self._make_sparsifier( + data_list, data_with_config, defaults=defaults, **kwargs + ) all_data = data_list + data_with_config for some_data in all_data: name, _, _ = self._get_name_data_config(some_data) @@ -129,15 +159,21 @@ class _BaseDataSparsiferTestCase(TestCase): for some_data in all_data: name, _, _ = self._get_name_data_config(some_data) - assert not is_parametrized(sparsifier._container, name) # not parametrized anymore + assert not is_parametrized( + sparsifier._container, name + ) # not parametrized anymore with self.assertRaises(ValueError): sparsifier.get_data(name, return_original=True) def check_add_data(self, data_list, data_with_config, defaults, **kwargs): - sparsifier = self._make_sparsifier(data_list, data_with_config, defaults=defaults, **kwargs) + sparsifier = self._make_sparsifier( + data_list, data_with_config, defaults=defaults, **kwargs + ) all_data = data_list + data_with_config for some_data in all_data: - name1, data1, config = self._get_name_data_config(some_data, defaults=defaults) + name1, data1, config = self._get_name_data_config( + some_data, defaults=defaults + ) data1 = sparsifier._extract_weight(data1) data1_old = copy.deepcopy(data1) assert torch.all(data1 == sparsifier.get_data(name=name1)) @@ -145,18 +181,28 @@ class _BaseDataSparsiferTestCase(TestCase): sparsifier.step() mask = sparsifier.get_mask(name1) - data2 = torch.randn(data1.shape) # add another data with the same shape as original data + data2 = torch.randn( + data1.shape + ) # add another data with the same shape as original data sparsifier.add_data(name=name1, data=data2) assert torch.all(data2 == sparsifier.get_data(name=name1)) - assert torch.all(sparsifier.get_mask(name1) == mask) # mask should not change + assert torch.all( + sparsifier.get_mask(name1) == mask + ) # mask should not change assert torch.all(data1_old == data1) - assert sparsifier.data_groups[name1] == config # if replaced old_config should match new config + assert ( + sparsifier.data_groups[name1] == config + ) # if replaced old_config should match new config def check_state_dict(self, data_list, data_with_config, defaults, **kwargs): - sparsifier1 = self._make_sparsifier(data_list, data_with_config, defaults=defaults, **kwargs) - sparsifier2 = self._make_sparsifier(data_list=[data_list[0]], data_with_config=[], defaults=defaults, **kwargs) + sparsifier1 = self._make_sparsifier( + data_list, data_with_config, defaults=defaults, **kwargs + ) + sparsifier2 = self._make_sparsifier( + data_list=[data_list[0]], data_with_config=[], defaults=defaults, **kwargs + ) sparsifier1.step() state_dict1 = sparsifier1.state_dict() @@ -169,15 +215,17 @@ class _BaseDataSparsiferTestCase(TestCase): assert len(sparsifier1.state) == len(sparsifier2.state) assert len(sparsifier1.data_groups) == len(sparsifier2.data_groups) - state1 = state_dict1['state'] + state1 = state_dict1["state"] for name in state1.keys(): # compare mask assert name in sparsifier2.state - assert 'mask' in sparsifier2.state[name] - assert 'mask' in sparsifier1.state[name] - mask1, mask2 = state1[name]['mask'], sparsifier2.state[name]['mask'] + assert "mask" in sparsifier2.state[name] + assert "mask" in sparsifier1.state[name] + mask1, mask2 = state1[name]["mask"], sparsifier2.state[name]["mask"] assert mask1.is_sparse and not mask2.is_sparse - assert torch.all(mask1.to_dense() == mask2) # mask1 is stored as sparse coo now + assert torch.all( + mask1.to_dense() == mask2 + ) # mask1 is stored as sparse coo now # compare data_groups dg1, dg2 = sparsifier1.data_groups, sparsifier2.data_groups @@ -187,12 +235,14 @@ class _BaseDataSparsiferTestCase(TestCase): # compare container container1, container2 = sparsifier1._container, sparsifier2._container assert torch.all(getattr(container1, name) == getattr(container2, name)) - assert is_parametrized(container1, name) == is_parametrized(container2, name) + assert is_parametrized(container1, name) == is_parametrized( + container2, name + ) if is_parametrized(container1, name): param1 = getattr(container1.parametrizations, name)[0] param2 = getattr(container2.parametrizations, name)[0] - assert hasattr(param1, 'mask') - assert hasattr(param2, 'mask') + assert hasattr(param1, "mask") + assert hasattr(param2, "mask") self.assertEqual(param1.__dict__, param2.__dict__) def check_memory_reference(self, data_list, data_with_config, defaults, **kwargs): @@ -204,29 +254,35 @@ class _BaseDataSparsiferTestCase(TestCase): This test modifies the data and asserts that data in the sparsifier is changed as well """ - sparsifier = self._make_sparsifier(data_list, data_with_config, defaults=defaults, **kwargs) + sparsifier = self._make_sparsifier( + data_list, data_with_config, defaults=defaults, **kwargs + ) all_data = data_list + data_with_config for some_data in all_data: name, data, _ = self._get_name_data_config(some_data) weight = sparsifier._extract_weight(data) weight.data = weight + torch.randn(*weight.shape) contained_data = sparsifier.get_data(name=name) - assert weight.data.storage().data_ptr() == contained_data.data.storage().data_ptr() + assert ( + weight.data.storage().data_ptr() + == contained_data.data.storage().data_ptr() + ) assert torch.all(contained_data == weight) class _NormDataSparsifierTestCase(_BaseDataSparsiferTestCase): r"""This helper test class takes in any supported type of and runs some tests. - This inherits the TestBaseDataSparsifierRuner wherein some functions are - over-ridden to take accomodate the specific sparsifier. - TODO: Change the structure by creating a separate test case class for each - member function + This inherits the TestBaseDataSparsifierRuner wherein some functions are + over-ridden to take accomodate the specific sparsifier. + TODO: Change the structure by creating a separate test case class for each + member function """ - def run_all_checks(self, data_list, defaults, data_with_config, norm_type='L1'): - assert norm_type in ['L1', 'L2'] + + def run_all_checks(self, data_list, defaults, data_with_config, norm_type="L1"): + assert norm_type in ["L1", "L2"] kwargs = { - 'sparsifier_type': DataNormSparsifier, - 'sparsifier_kwargs': {'norm': norm_type} + "sparsifier_type": DataNormSparsifier, + "sparsifier_kwargs": {"norm": norm_type}, } self.check_constructor(data_list, data_with_config, defaults, **kwargs) self.check_squash_mask(data_list, data_with_config, defaults, **kwargs) @@ -234,24 +290,28 @@ class _NormDataSparsifierTestCase(_BaseDataSparsiferTestCase): self.check_state_dict(data_list, data_with_config, defaults, **kwargs) self.check_step(data_list, data_with_config, defaults, norm_type=norm_type) self.check_step_2_of_4(norm_type=norm_type) - self.check_sparsity_level(data_list, data_with_config, defaults, norm_type=norm_type) + self.check_sparsity_level( + data_list, data_with_config, defaults, norm_type=norm_type + ) self.check_memory_reference(data_list, data_with_config, defaults, **kwargs) @staticmethod def _get_bounds_on_actual_sparsity(config, tensor_shape): r"""This function gets the bounds on actual sparsity. - Note:: - Although we specify the sparsity_level parameter, this does not mean that - the actual sparsity obtained after sparsification is the same as sparsity_level. - The actual sparsity depends largely on the shape and the data itself. + Note:: + Although we specify the sparsity_level parameter, this does not mean that + the actual sparsity obtained after sparsification is the same as sparsity_level. + The actual sparsity depends largely on the shape and the data itself. """ - sparsity_level = config['sparsity_level'] - zeros_per_block = config['zeros_per_block'] - sparse_block_shape = config['sparse_block_shape'] + sparsity_level = config["sparsity_level"] + zeros_per_block = config["zeros_per_block"] + sparse_block_shape = config["sparse_block_shape"] height, width = tensor_shape[-2], tensor_shape[-1] block_height, block_width = sparse_block_shape - number_blocks = math.ceil(height / block_height) * math.ceil(width / block_width) + number_blocks = math.ceil(height / block_height) * math.ceil( + width / block_width + ) values_per_block = block_height * block_width if zeros_per_block == 0: @@ -260,17 +320,23 @@ class _NormDataSparsifierTestCase(_BaseDataSparsiferTestCase): # min value assumes zeros_per_block is 1 min_values_sparsified = round(number_blocks * sparsity_level) # max value assumes actual zeros_per_block - max_values_sparsified = min_values_sparsified * min(values_per_block, zeros_per_block) + max_values_sparsified = min_values_sparsified * min( + values_per_block, zeros_per_block + ) lower_bound = min_values_sparsified / (height * width) upper_bound = min(1.0, max_values_sparsified / (height * width)) lower_bound, upper_bound = round(lower_bound, 3), round(upper_bound, 3) return lower_bound, upper_bound - def check_step(self, data_list, data_with_config, defaults, norm_type='L1'): - sparsifier = self._make_sparsifier(data_list, data_with_config, defaults, - sparsifier_type=DataNormSparsifier, - sparsifier_kwargs={'norm': norm_type}) + def check_step(self, data_list, data_with_config, defaults, norm_type="L1"): + sparsifier = self._make_sparsifier( + data_list, + data_with_config, + defaults, + sparsifier_type=DataNormSparsifier, + sparsifier_kwargs={"norm": norm_type}, + ) all_data = data_list + data_with_config # mask before step() should not be sparsified @@ -289,29 +355,39 @@ class _NormDataSparsifierTestCase(_BaseDataSparsiferTestCase): mask = mask.to(torch.float) actual_sparsity = round(1 - mask.mean().item(), 3) assert actual_sparsity >= lb and actual_sparsity <= ub - assert actual_sparsity > 0.0 # exact sparsity level cannot be achieved due to size of tensor + assert ( + actual_sparsity > 0.0 + ) # exact sparsity level cannot be achieved due to size of tensor iters_before_collapse = 100 - test_sparsifier = DataNormSparsifier(sparsity_level=0.5, - sparse_block_shape=(1, 4), - zeros_per_block=4, - norm=norm_type) + test_sparsifier = DataNormSparsifier( + sparsity_level=0.5, + sparse_block_shape=(1, 4), + zeros_per_block=4, + norm=norm_type, + ) for _ in range(iters_before_collapse): new_data = torch.randn(20, 20) - test_sparsifier.add_data(name='test_data', data=new_data) + test_sparsifier.add_data(name="test_data", data=new_data) test_sparsifier.step() - mask = test_sparsifier.get_mask(name='test_data') + mask = test_sparsifier.get_mask(name="test_data") mask = mask.to(torch.float) assert (1.0 - mask.mean().item()) > 0 # some sparsity achieved def check_step_2_of_4(self, norm_type): # overriding default config for test purposes - default_config = {'sparsity_level': 1.0, 'zeros_per_block': 2, 'sparse_block_shape': (1, 4)} - data_list = [('test_data', torch.randn(4, 4))] + default_config = { + "sparsity_level": 1.0, + "zeros_per_block": 2, + "sparse_block_shape": (1, 4), + } + data_list = [("test_data", torch.randn(4, 4))] - sparsifier = DataNormSparsifier(data_list=data_list, norm=norm_type, **default_config) + sparsifier = DataNormSparsifier( + data_list=data_list, norm=norm_type, **default_config + ) sparsifier.step() for some_data in data_list: @@ -321,29 +397,39 @@ class _NormDataSparsifierTestCase(_BaseDataSparsiferTestCase): self.assertAlmostEqual(1.0 - mask.mean().item(), 0.5, places=2) for row in mask: for idx in range(0, len(row), 4): - block = row[idx:idx + 4] + block = row[idx : idx + 4] block, _ = block.sort() assert (block[:2] == 0).all() assert (block[2:] != 0).all() - def check_sparsity_level(self, data_list, data_with_config, defaults, norm_type='L1'): + def check_sparsity_level( + self, data_list, data_with_config, defaults, norm_type="L1" + ): sparsity_levels = [-1.0, 0.0, 0.5, 1.0, 2.0] sparse_block_shapes = [(1, 1), (1, 4), (2, 2), (4, 1)] zeros_per_blocks = [0, 1, 2, 3, 4] sparsifier = DataNormSparsifier(data_list=data_list, norm=norm_type) - testcases = itertools.tee(itertools.product(sparsity_levels, - sparse_block_shapes, - zeros_per_blocks)) + testcases = itertools.tee( + itertools.product(sparsity_levels, sparse_block_shapes, zeros_per_blocks) + ) - assert len(data_with_config) > 0 and 'name' in data_with_config[0] and 'data' in data_with_config[0] + assert ( + len(data_with_config) > 0 + and "name" in data_with_config[0] + and "data" in data_with_config[0] + ) # get some data - name, data = data_with_config[0]['name'], data_with_config[0]['data'] + name, data = data_with_config[0]["name"], data_with_config[0]["data"] for idx, (sl, sbs, zpb) in enumerate(testcases[0]): - new_name = f'{name}_{idx}' + new_name = f"{name}_{idx}" if zpb > sbs[0] * sbs[1]: continue - current_config = {'sparsity_level': sl, 'sparse_block_shape': sbs, 'zeros_per_block': zpb} + current_config = { + "sparsity_level": sl, + "sparse_block_shape": sbs, + "zeros_per_block": zpb, + } sparsifier.add_data(name=new_name, data=data, **current_config) if zpb > sbs[0] * sbs[1]: continue @@ -351,7 +437,7 @@ class _NormDataSparsifierTestCase(_BaseDataSparsiferTestCase): sparsifier.step() sparsifier.squash_mask() for idx, (sl, sbs, zpb) in enumerate(testcases[0]): - new_name = f'{name}_{idx}' + new_name = f"{name}_{idx}" sparsified_data = sparsifier.get_data(name=new_name, original=False) # sparse mask sparse_mask = (sparsified_data == 0).float() @@ -372,130 +458,232 @@ class TestBaseDataSparsifier(_BaseDataSparsiferTestCase): Once the above is done, create an instance of TestBaseDataSparsifierType and call all the run_tests() """ + def test_tensors(self): - tensor1, tensor2, tensor3 = torch.randn(3, 3), torch.randn(4, 4), torch.randn(5, 5) + tensor1, tensor2, tensor3 = ( + torch.randn(3, 3), + torch.randn(4, 4), + torch.randn(5, 5), + ) tensor4, tensor5 = torch.randn(1, 1), torch.randn(4, 4) - data_list = [('tensor1', tensor1), ('tensor2', tensor2), ('tensor3', tensor3)] - defaults = {'test': 3} + data_list = [("tensor1", tensor1), ("tensor2", tensor2), ("tensor3", tensor3)] + defaults = {"test": 3} data_with_config = [ - { - 'name': 'tensor4', 'data': tensor4, 'config': {'test': 7} - }, - { - 'name': 'tensor5', 'data': tensor5, 'config': {'test': 8} - }, + {"name": "tensor4", "data": tensor4, "config": {"test": 7}}, + {"name": "tensor5", "data": tensor5, "config": {"test": 8}}, ] - self.run_all_checks(data_list=data_list, defaults=defaults, data_with_config=data_with_config) + self.run_all_checks( + data_list=data_list, defaults=defaults, data_with_config=data_with_config + ) def test_nn_parameters(self): - param1, param2, param3 = nn.Parameter(torch.randn(3, 3)), nn.Parameter(torch.randn(4, 4)), nn.Parameter(torch.randn(5, 5)) - param4, param5 = nn.Parameter(torch.randn(1, 1)), nn.Parameter(torch.randn(4, 4)) - data_list = [('param1', param1), ('param2', param2), ('param3', param3)] - defaults = {'test': 3} + param1, param2, param3 = ( + nn.Parameter(torch.randn(3, 3)), + nn.Parameter(torch.randn(4, 4)), + nn.Parameter(torch.randn(5, 5)), + ) + param4, param5 = nn.Parameter(torch.randn(1, 1)), nn.Parameter( + torch.randn(4, 4) + ) + data_list = [("param1", param1), ("param2", param2), ("param3", param3)] + defaults = {"test": 3} data_with_config = [ - { - 'name': 'param4', 'data': param4, 'config': {'test': 7} - }, - { - 'name': 'param5', 'data': param5, 'config': {'test': 8} - }, + {"name": "param4", "data": param4, "config": {"test": 7}}, + {"name": "param5", "data": param5, "config": {"test": 8}}, ] - self.run_all_checks(data_list=data_list, defaults=defaults, data_with_config=data_with_config) + self.run_all_checks( + data_list=data_list, defaults=defaults, data_with_config=data_with_config + ) def test_nn_embeddings(self): - emb1, emb2, = nn.Embedding(10, 3), nn.Embedding(20, 3) + ( + emb1, + emb2, + ) = nn.Embedding( + 10, 3 + ), nn.Embedding(20, 3) emb1_bag, emb2_bag = nn.EmbeddingBag(10, 3), nn.EmbeddingBag(20, 3) emb3, emb3_bag = nn.Embedding(15, 3), nn.EmbeddingBag(20, 3) - data_list = [('emb1', emb1), ('emb1_bag', emb1_bag), ('emb2', emb2), ('emb2_bag', emb2_bag)] - defaults = {'test': 3} + data_list = [ + ("emb1", emb1), + ("emb1_bag", emb1_bag), + ("emb2", emb2), + ("emb2_bag", emb2_bag), + ] + defaults = {"test": 3} data_with_config = [ - { - 'name': 'emb3', 'data': emb3, 'config': {'test': 7} - }, - { - 'name': 'emb3_bag', 'data': emb3_bag, 'config': {'test': 8} - }, + {"name": "emb3", "data": emb3, "config": {"test": 7}}, + {"name": "emb3_bag", "data": emb3_bag, "config": {"test": 8}}, ] - self.run_all_checks(data_list=data_list, defaults=defaults, data_with_config=data_with_config) + self.run_all_checks( + data_list=data_list, defaults=defaults, data_with_config=data_with_config + ) class TestNormDataSparsifiers(_NormDataSparsifierTestCase): """To add unit tests to support new data types for the NormDataSparsifier, create the following - data_list: List of tuples of name, data to be added to the constructor - defaults: default config for the above data in data_list - data_with_config: list of dictionaries defining name, data and config (look test_tensors()) + data_list: List of tuples of name, data to be added to the constructor + defaults: default config for the above data in data_list + data_with_config: list of dictionaries defining name, data and config (look test_tensors()) - Once the above is done, create an instance of _NormDataSparsifierTestRunner and call run_tests() + Once the above is done, create an instance of _NormDataSparsifierTestRunner and call run_tests() """ + def test_tensors(self): - tensor1, tensor2, tensor3 = torch.randn(1, 10), torch.randn(4, 4), torch.randn(1, 5) + tensor1, tensor2, tensor3 = ( + torch.randn(1, 10), + torch.randn(4, 4), + torch.randn(1, 5), + ) tensor4, tensor5 = torch.randn(1, 2), torch.randn(4, 4) - data_list = [('tensor1', tensor1), ('tensor2', tensor2), ('tensor3', tensor3)] - defaults = {'sparsity_level': 0.5, 'sparse_block_shape': (1, 4), 'zeros_per_block': 4} + data_list = [("tensor1", tensor1), ("tensor2", tensor2), ("tensor3", tensor3)] + defaults = { + "sparsity_level": 0.5, + "sparse_block_shape": (1, 4), + "zeros_per_block": 4, + } data_with_config = [ { - 'name': 'tensor4', 'data': tensor4, - 'config': {'sparsity_level': 0.7, 'sparse_block_shape': (2, 3), 'zeros_per_block': 6} + "name": "tensor4", + "data": tensor4, + "config": { + "sparsity_level": 0.7, + "sparse_block_shape": (2, 3), + "zeros_per_block": 6, + }, }, { - 'name': 'tensor5', 'data': tensor5, - 'config': {'sparsity_level': 0.3, 'sparse_block_shape': (2, 3), 'zeros_per_block': 6} + "name": "tensor5", + "data": tensor5, + "config": { + "sparsity_level": 0.3, + "sparse_block_shape": (2, 3), + "zeros_per_block": 6, + }, }, ] - self.run_all_checks(data_list=data_list, defaults=defaults, - data_with_config=data_with_config, norm_type='L1') - self.run_all_checks(data_list=data_list, defaults=defaults, - data_with_config=data_with_config, norm_type='L2') + self.run_all_checks( + data_list=data_list, + defaults=defaults, + data_with_config=data_with_config, + norm_type="L1", + ) + self.run_all_checks( + data_list=data_list, + defaults=defaults, + data_with_config=data_with_config, + norm_type="L2", + ) def test_nn_parameters(self): - param1, param2, param3 = nn.Parameter(torch.randn(1, 8)), nn.Parameter(torch.randn(4, 4)), nn.Parameter(torch.randn(5, 5)) - param4, param5 = nn.Parameter(torch.randn(10, 10)), nn.Parameter(torch.randn(4, 4)) - data_list = [('param1', param1), ('param2', param2), ('param3', param3)] - defaults = {'sparsity_level': 0.5, 'sparse_block_shape': (1, 4), 'zeros_per_block': 4} + param1, param2, param3 = ( + nn.Parameter(torch.randn(1, 8)), + nn.Parameter(torch.randn(4, 4)), + nn.Parameter(torch.randn(5, 5)), + ) + param4, param5 = nn.Parameter(torch.randn(10, 10)), nn.Parameter( + torch.randn(4, 4) + ) + data_list = [("param1", param1), ("param2", param2), ("param3", param3)] + defaults = { + "sparsity_level": 0.5, + "sparse_block_shape": (1, 4), + "zeros_per_block": 4, + } data_with_config = [ { - 'name': 'param4', 'data': param4, - 'config': {'sparsity_level': 0.7, 'sparse_block_shape': (2, 3), 'zeros_per_block': 6} + "name": "param4", + "data": param4, + "config": { + "sparsity_level": 0.7, + "sparse_block_shape": (2, 3), + "zeros_per_block": 6, + }, }, { - 'name': 'param5', 'data': param5, - 'config': {'sparsity_level': 0.3, 'sparse_block_shape': (2, 3), 'zeros_per_block': 6} + "name": "param5", + "data": param5, + "config": { + "sparsity_level": 0.3, + "sparse_block_shape": (2, 3), + "zeros_per_block": 6, + }, }, ] - self.run_all_checks(data_list=data_list, defaults=defaults, - data_with_config=data_with_config, norm_type='L1') - self.run_all_checks(data_list=data_list, defaults=defaults, - data_with_config=data_with_config, norm_type='L2') + self.run_all_checks( + data_list=data_list, + defaults=defaults, + data_with_config=data_with_config, + norm_type="L1", + ) + self.run_all_checks( + data_list=data_list, + defaults=defaults, + data_with_config=data_with_config, + norm_type="L2", + ) def test_nn_embeddings(self): - emb1, emb2, = nn.Embedding(10, 3), nn.Embedding(20, 3) + ( + emb1, + emb2, + ) = nn.Embedding( + 10, 3 + ), nn.Embedding(20, 3) emb1_bag, emb2_bag = nn.EmbeddingBag(10, 3), nn.EmbeddingBag(20, 3) emb3, emb3_bag = nn.Embedding(15, 3), nn.EmbeddingBag(20, 3) - data_list = [('emb1', emb1), ('emb1_bag', emb1_bag), ('emb2', emb2), ('emb2_bag', emb2_bag)] - defaults = {'sparsity_level': 0.5, 'sparse_block_shape': (1, 4), 'zeros_per_block': 4} + data_list = [ + ("emb1", emb1), + ("emb1_bag", emb1_bag), + ("emb2", emb2), + ("emb2_bag", emb2_bag), + ] + defaults = { + "sparsity_level": 0.5, + "sparse_block_shape": (1, 4), + "zeros_per_block": 4, + } data_with_config = [ { - 'name': 'emb3', 'data': emb3, - 'config': {'sparsity_level': 0.7, 'sparse_block_shape': (2, 3), 'zeros_per_block': 6} + "name": "emb3", + "data": emb3, + "config": { + "sparsity_level": 0.7, + "sparse_block_shape": (2, 3), + "zeros_per_block": 6, + }, }, { - 'name': 'emb3_bag', 'data': emb3_bag, - 'config': {'sparsity_level': 0.3, 'sparse_block_shape': (2, 3), 'zeros_per_block': 6} + "name": "emb3_bag", + "data": emb3_bag, + "config": { + "sparsity_level": 0.3, + "sparse_block_shape": (2, 3), + "zeros_per_block": 6, + }, }, ] - self.run_all_checks(data_list=data_list, defaults=defaults, - data_with_config=data_with_config, norm_type='L1') + self.run_all_checks( + data_list=data_list, + defaults=defaults, + data_with_config=data_with_config, + norm_type="L1", + ) - self.run_all_checks(data_list=data_list, defaults=defaults, - data_with_config=data_with_config, norm_type='L2') + self.run_all_checks( + data_list=data_list, + defaults=defaults, + data_with_config=data_with_config, + norm_type="L2", + ) class Model(nn.Module): @@ -522,16 +710,21 @@ class TestQuantizationUtils(TestCase): """ model = Model() - sparse_config = {'sparsity_level': 0.80, 'sparse_block_shape': (1, 1)} + sparse_config = {"sparsity_level": 0.80, "sparse_block_shape": (1, 1)} select_embeddings = [model.embbag1, model.emb1] - post_training_sparse_quantize(model, - data_sparsifier_class=DataNormSparsifier, - sparsify_first=True, - select_embeddings=select_embeddings, - **sparse_config) + post_training_sparse_quantize( + model, + data_sparsifier_class=DataNormSparsifier, + sparsify_first=True, + select_embeddings=select_embeddings, + **sparse_config, + ) assert type(model.emb1) == torch.ao.nn.quantized.modules.embedding_ops.Embedding - assert type(model.embbag1) == torch.ao.nn.quantized.modules.embedding_ops.EmbeddingBag + assert ( + type(model.embbag1) + == torch.ao.nn.quantized.modules.embedding_ops.EmbeddingBag + ) assert type(model.emb_seq[0] == nn.Embedding) assert type(model.emb_seq[1] == nn.EmbeddingBag) assert type(model.linear1) == nn.Linear @@ -561,24 +754,34 @@ class TestQuantizationUtils(TestCase): """ model = Model() - sparse_config = {'sparsity_level': 0.8, 'sparse_block_shape': (1, 1)} - post_training_sparse_quantize(model, DataNormSparsifier, sparsify_first=False, **sparse_config) + sparse_config = {"sparsity_level": 0.8, "sparse_block_shape": (1, 1)} + post_training_sparse_quantize( + model, DataNormSparsifier, sparsify_first=False, **sparse_config + ) assert type(model.emb1) == torch.ao.nn.quantized.modules.embedding_ops.Embedding - assert type(model.embbag1) == torch.ao.nn.quantized.modules.embedding_ops.EmbeddingBag - assert type(model.emb_seq[0] == torch.ao.nn.quantized.modules.embedding_ops.Embedding) - assert type(model.emb_seq[1] == torch.ao.nn.quantized.modules.embedding_ops.EmbeddingBag) + assert ( + type(model.embbag1) + == torch.ao.nn.quantized.modules.embedding_ops.EmbeddingBag + ) + assert type( + model.emb_seq[0] == torch.ao.nn.quantized.modules.embedding_ops.Embedding + ) + assert type( + model.emb_seq[1] == torch.ao.nn.quantized.modules.embedding_ops.EmbeddingBag + ) assert type(model.linear1) == nn.Linear # not quantized assert type(model.linear2) == nn.Linear # not quantized - dequant_emb1 = torch.dequantize(model.emb1.weight()) dequant_embbag1 = torch.dequantize(model.embbag1.weight()) dequant_emb_seq_0 = torch.dequantize(model.emb_seq[0].weight()) dequant_emb_seq_1 = torch.dequantize(model.emb_seq[1].weight()) # higher threshold as quantization occurs before sparsity - threshold = 1 # zero points seem to have higher magnitude with sparsity occuring after + threshold = ( + 1 # zero points seem to have higher magnitude with sparsity occuring after + ) sl_emb1 = (torch.abs(dequant_emb1) < threshold).float().mean() sl_embbag1 = (torch.abs(dequant_embbag1) < threshold).float().mean() diff --git a/test/ao/sparsity/test_kernels.py b/test/ao/sparsity/test_kernels.py index 111d51465be1..910b493fc3a2 100644 --- a/test/ao/sparsity/test_kernels.py +++ b/test/ao/sparsity/test_kernels.py @@ -1,32 +1,34 @@ # Owner(s): ["module: unknown"] -from torch.testing._internal.common_utils import run_tests - import copy -import numpy as np import io import logging from itertools import product +import numpy as np + import torch import torch.ao.quantization as tq from torch import nn from torch.ao.pruning.sparsifier.utils import fqn_to_module - -from torch.testing._internal.common_utils import TestCase, skipIfTorchDynamo from torch.testing._internal.common_quantized import ( override_cpu_allocator_for_qnnpack, override_qengines, - qengine_is_qnnpack, qengine_is_fbgemm, qengine_is_onednn, + qengine_is_qnnpack, qengine_is_x86, ) +from torch.testing._internal.common_utils import run_tests, skipIfTorchDynamo, TestCase + # TODO: Once more test files are created, move the contents to a ao folder. -logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) +logging.basicConfig( + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO +) + class TestQuantizedSparseKernels(TestCase): @skipIfTorchDynamo("TorchDynamo fails here for unknown reasons") @@ -84,29 +86,42 @@ class TestQuantizedSparseKernels(TestCase): continue if use_channelwise: W_q = torch.quantize_per_channel( - W_fp32, scales=W_scales, zero_points=W_zps, axis=0, dtype=torch.qint8 + W_fp32, + scales=W_scales, + zero_points=W_zps, + axis=0, + dtype=torch.qint8, ) else: W_q = torch.quantize_per_tensor( - W_fp32, scale=W_scales[0], zero_point=W_zps[0], dtype=torch.qint8 + W_fp32, + scale=W_scales[0], + zero_point=W_zps[0], + dtype=torch.qint8, ) Y_scale = 1.1234 Y_zp = 5 W_prepack_dense = dense_prepack(W_q, float_bias) - W_prepack_sparse = sparse_prepack(W_q, float_bias, row_block_size, col_block_size) + W_prepack_sparse = sparse_prepack( + W_q, float_bias, row_block_size, col_block_size + ) if dynamic_mode: Y = sparse_qlinear_dynamic(X_fp32, W_prepack_sparse) Y_ref = dense_qlinear_dynamic(X_fp32, W_prepack_dense) - np.testing.assert_array_almost_equal(Y_ref.numpy(), Y.numpy(), decimal=decimal_val) + np.testing.assert_array_almost_equal( + Y_ref.numpy(), Y.numpy(), decimal=decimal_val + ) else: Y_q = sparse_qlinear(X_q, W_prepack_sparse, Y_scale, Y_zp) Y_q_ref = dense_qlinear(X_q, W_prepack_dense, Y_scale, Y_zp) np.testing.assert_array_almost_equal( - Y_q_ref.int_repr().numpy(), Y_q.int_repr().numpy(), decimal=decimal_val + Y_q_ref.int_repr().numpy(), + Y_q.int_repr().numpy(), + decimal=decimal_val, ) @@ -235,6 +250,7 @@ def _sparse_layer_test_helper( Y_hat = sqmodel(X_q) test_class.assertEqual(Y_ref.dequantize(), Y_hat.dequantize()) + class SparseQuantizedModel(nn.Module): def __init__(self, in_channels, out_channels): super().__init__() @@ -243,6 +259,7 @@ class SparseQuantizedModel(nn.Module): def forward(self, x): return self.linear(x) + class TestQuantizedSparseLayers(TestCase): @override_qengines @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1991") diff --git a/test/ao/sparsity/test_parametrization.py b/test/ao/sparsity/test_parametrization.py index 7bd64b916383..3314c0bacb1d 100644 --- a/test/ao/sparsity/test_parametrization.py +++ b/test/ao/sparsity/test_parametrization.py @@ -3,22 +3,24 @@ import logging +import torch + from torch import nn from torch.ao.pruning.sparsifier import utils from torch.nn.utils import parametrize - -import torch from torch.testing._internal.common_utils import TestCase -logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) +logging.basicConfig( + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO +) + class ModelUnderTest(nn.Module): def __init__(self, bias=True): super().__init__() self.linear = nn.Linear(16, 16, bias=bias) self.seq = nn.Sequential( - nn.Linear(16, 16, bias=bias), - nn.Linear(16, 16, bias=bias) + nn.Linear(16, 16, bias=bias), nn.Linear(16, 16, bias=bias) ) # Make sure the weights are not random @@ -45,7 +47,7 @@ class TestFakeSparsity(TestCase): mask = torch.zeros(16, 16) sparsity = utils.FakeSparsity(mask) - parametrize.register_parametrization(model, 'weight', sparsity) + parametrize.register_parametrization(model, "weight", sparsity) x = torch.randn(3, 16) self.assertEqual(torch.zeros(3, 16), model(x)) @@ -53,95 +55,119 @@ class TestFakeSparsity(TestCase): def test_weights_parametrized(self): model = ModelUnderTest(bias=False) - assert not hasattr(model.linear, 'parametrizations') - assert not hasattr(model.seq[0], 'parametrizations') - assert not hasattr(model.seq[1], 'parametrizations') + assert not hasattr(model.linear, "parametrizations") + assert not hasattr(model.seq[0], "parametrizations") + assert not hasattr(model.seq[1], "parametrizations") mask = torch.eye(16) - parametrize.register_parametrization(model.linear, 'weight', - utils.FakeSparsity(mask)) + parametrize.register_parametrization( + model.linear, "weight", utils.FakeSparsity(mask) + ) mask = torch.eye(16) - parametrize.register_parametrization(model.seq[0], 'weight', - utils.FakeSparsity(mask)) + parametrize.register_parametrization( + model.seq[0], "weight", utils.FakeSparsity(mask) + ) mask = torch.eye(16) - parametrize.register_parametrization(model.seq[1], 'weight', - utils.FakeSparsity(mask)) + parametrize.register_parametrization( + model.seq[1], "weight", utils.FakeSparsity(mask) + ) - assert hasattr(model.linear, 'parametrizations') - assert parametrize.is_parametrized(model.linear, 'weight') - assert hasattr(model.seq[0], 'parametrizations') - assert parametrize.is_parametrized(model.linear, 'weight') - assert hasattr(model.seq[1], 'parametrizations') - assert parametrize.is_parametrized(model.linear, 'weight') + assert hasattr(model.linear, "parametrizations") + assert parametrize.is_parametrized(model.linear, "weight") + assert hasattr(model.seq[0], "parametrizations") + assert parametrize.is_parametrized(model.linear, "weight") + assert hasattr(model.seq[1], "parametrizations") + assert parametrize.is_parametrized(model.linear, "weight") def test_state_dict_preserved(self): model_save = ModelUnderTest(bias=False) mask = torch.eye(16) - parametrize.register_parametrization(model_save.linear, 'weight', - utils.FakeSparsity(mask)) + parametrize.register_parametrization( + model_save.linear, "weight", utils.FakeSparsity(mask) + ) mask = torch.eye(16) - parametrize.register_parametrization(model_save.seq[0], 'weight', - utils.FakeSparsity(mask)) + parametrize.register_parametrization( + model_save.seq[0], "weight", utils.FakeSparsity(mask) + ) mask = torch.eye(16) - parametrize.register_parametrization(model_save.seq[1], 'weight', - utils.FakeSparsity(mask)) + parametrize.register_parametrization( + model_save.seq[1], "weight", utils.FakeSparsity(mask) + ) state_dict = model_save.state_dict() model_load = ModelUnderTest(bias=False) mask = torch.zeros(model_load.linear.weight.shape) - parametrize.register_parametrization(model_load.linear, 'weight', - utils.FakeSparsity(mask)) + parametrize.register_parametrization( + model_load.linear, "weight", utils.FakeSparsity(mask) + ) mask = torch.zeros(model_load.seq[0].weight.shape) - parametrize.register_parametrization(model_load.seq[0], 'weight', - utils.FakeSparsity(mask)) + parametrize.register_parametrization( + model_load.seq[0], "weight", utils.FakeSparsity(mask) + ) mask = torch.zeros(model_load.seq[1].weight.shape) - parametrize.register_parametrization(model_load.seq[1], 'weight', - utils.FakeSparsity(mask)) + parametrize.register_parametrization( + model_load.seq[1], "weight", utils.FakeSparsity(mask) + ) # Keep this strict, as we are not loading the 'mask' model_load.load_state_dict(state_dict, strict=False) # Check the parametrizations are preserved - assert hasattr(model_load.linear, 'parametrizations') - assert parametrize.is_parametrized(model_load.linear, 'weight') - assert hasattr(model_load.seq[0], 'parametrizations') - assert parametrize.is_parametrized(model_load.linear, 'weight') - assert hasattr(model_load.seq[1], 'parametrizations') - assert parametrize.is_parametrized(model_load.linear, 'weight') + assert hasattr(model_load.linear, "parametrizations") + assert parametrize.is_parametrized(model_load.linear, "weight") + assert hasattr(model_load.seq[0], "parametrizations") + assert parametrize.is_parametrized(model_load.linear, "weight") + assert hasattr(model_load.seq[1], "parametrizations") + assert parametrize.is_parametrized(model_load.linear, "weight") # Check the weights are preserved - self.assertEqual(model_save.linear.parametrizations['weight'].original, - model_load.linear.parametrizations['weight'].original) - self.assertEqual(model_save.seq[0].parametrizations['weight'].original, - model_load.seq[0].parametrizations['weight'].original) - self.assertEqual(model_save.seq[1].parametrizations['weight'].original, - model_load.seq[1].parametrizations['weight'].original) + self.assertEqual( + model_save.linear.parametrizations["weight"].original, + model_load.linear.parametrizations["weight"].original, + ) + self.assertEqual( + model_save.seq[0].parametrizations["weight"].original, + model_load.seq[0].parametrizations["weight"].original, + ) + self.assertEqual( + model_save.seq[1].parametrizations["weight"].original, + model_load.seq[1].parametrizations["weight"].original, + ) # Check the masks are not preserved in the state_dict # We store the state_dicts in the sparsifier, not in the model itself. # TODO: Need to find a clean way of exporting the parametrized model - self.assertNotEqual(model_save.linear.parametrizations['weight'][0].mask, - model_load.linear.parametrizations['weight'][0].mask) - self.assertNotEqual(model_save.seq[0].parametrizations['weight'][0].mask, - model_load.seq[0].parametrizations['weight'][0].mask) - self.assertNotEqual(model_save.seq[1].parametrizations['weight'][0].mask, - model_load.seq[1].parametrizations['weight'][0].mask) + self.assertNotEqual( + model_save.linear.parametrizations["weight"][0].mask, + model_load.linear.parametrizations["weight"][0].mask, + ) + self.assertNotEqual( + model_save.seq[0].parametrizations["weight"][0].mask, + model_load.seq[0].parametrizations["weight"][0].mask, + ) + self.assertNotEqual( + model_save.seq[1].parametrizations["weight"][0].mask, + model_load.seq[1].parametrizations["weight"][0].mask, + ) def test_jit_trace(self): model = ModelUnderTest(bias=False) mask = torch.eye(16) - parametrize.register_parametrization(model.linear, 'weight', - utils.FakeSparsity(mask)) + parametrize.register_parametrization( + model.linear, "weight", utils.FakeSparsity(mask) + ) mask = torch.eye(16) - parametrize.register_parametrization(model.seq[0], 'weight', - utils.FakeSparsity(mask)) + parametrize.register_parametrization( + model.seq[0], "weight", utils.FakeSparsity(mask) + ) mask = torch.eye(16) - parametrize.register_parametrization(model.seq[1], 'weight', - utils.FakeSparsity(mask)) + parametrize.register_parametrization( + model.seq[1], "weight", utils.FakeSparsity(mask) + ) # Tracing example_x = torch.ones(3, 16) - model_trace = torch.jit.trace_module(model, {'forward': example_x}) + model_trace = torch.jit.trace_module(model, {"forward": example_x}) x = torch.randn(3, 16) y = model(x) diff --git a/test/ao/sparsity/test_qlinear_packed_params.py b/test/ao/sparsity/test_qlinear_packed_params.py index 9e719f423d38..eb186d4245f6 100644 --- a/test/ao/sparsity/test_qlinear_packed_params.py +++ b/test/ao/sparsity/test_qlinear_packed_params.py @@ -2,29 +2,30 @@ # Owner(s): ["oncall: mobile"] import tempfile + import torch from torch.ao.nn.sparse.quantized.dynamic.linear import Linear -from torch.testing._internal.common_quantization import ( - skipIfNoFBGEMM, - skipIfNoQNNPACK, -) +from torch.testing._internal.common_quantization import skipIfNoFBGEMM, skipIfNoQNNPACK from torch.testing._internal.common_quantized import ( - qengine_is_qnnpack, + override_cpu_allocator_for_qnnpack, override_quantized_engine, - override_cpu_allocator_for_qnnpack + qengine_is_qnnpack, ) from torch.testing._internal.common_utils import TestCase + class TestQlinearPackedParams(TestCase): def qlinear_packed_params_test(self, allow_non_zero_zero_points=False): # copied from https://pytorch.org/docs/stable/sparse.html#csr-tensor-operations, # so row/col block indices match that example, but with blocks and # scaled rows - weight_fp32 = torch.Tensor([ - [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0], - [6, 6, 6, 6, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ]) + weight_fp32 = torch.Tensor( + [ + [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0], + [6, 6, 6, 6, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ] + ) row_block_size = 1 col_block_size = 4 @@ -43,18 +44,14 @@ class TestQlinearPackedParams(TestCase): wide_weight_fp32[1][0] = 8 per_tensor_small = ( - torch.quantize_per_tensor( - weight_fp32, - scales[0], - zero_points[0], - dtype - ), + torch.quantize_per_tensor(weight_fp32, scales[0], zero_points[0], dtype), True, [0, 1, 3, 3], [2, 0, 1], - [x + (1 if allow_non_zero_zero_points else 0) for x in [ - 1, 1, 1, 1, 3, 3, 3, 3, 6, 6, 6, 6 - ]], + [ + x + (1 if allow_non_zero_zero_points else 0) + for x in [1, 1, 1, 1, 3, 3, 3, 3, 6, 6, 6, 6] + ], ) per_channel_small = ( @@ -68,9 +65,10 @@ class TestQlinearPackedParams(TestCase): False, [0, 1, 3, 3], [2, 0, 1], - [x + ([1, 2, 2][i // 4] if allow_non_zero_zero_points else 0) for (i, x) in enumerate([ - 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2 - ])], + [ + x + ([1, 2, 2][i // 4] if allow_non_zero_zero_points else 0) + for (i, x) in enumerate([1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2]) + ], ) per_tensor_large = ( @@ -83,14 +81,19 @@ class TestQlinearPackedParams(TestCase): True, [0, 2, 3, 3], [0, 1001, 0], - [x + (1 if allow_non_zero_zero_points else 0) for x in [ - 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0 - ]], + [ + x + (1 if allow_non_zero_zero_points else 0) + for x in [2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0] + ], ) - for (weight, is_per_tensor_quantized, expected_row_block_indices, expected_col_block_indices, expected_weights) in [ - per_tensor_small, per_channel_small, per_tensor_large - ]: + for ( + weight, + is_per_tensor_quantized, + expected_row_block_indices, + expected_col_block_indices, + expected_weights, + ) in [per_tensor_small, per_channel_small, per_tensor_large]: lin = Linear( out_features=weight.shape[0], in_features=weight.shape[1], @@ -118,24 +121,36 @@ class TestQlinearPackedParams(TestCase): col_block_indices_, weights_, output_channels_, - input_channels_ + input_channels_, ) = serialized[0] # Test Serialization self.assertEqual(bias_, bias) self.assertEqual(out_features_block_size_, row_block_size) self.assertEqual(in_features_block_size_, col_block_size) - self.assertEqual(weight_scales_, [scales[0]] if is_per_tensor_quantized else scales) - self.assertEqual(weight_zero_points_, [zero_points[0]] if is_per_tensor_quantized else zero_points) + self.assertEqual( + weight_scales_, [scales[0]] if is_per_tensor_quantized else scales + ) + self.assertEqual( + weight_zero_points_, + [zero_points[0]] if is_per_tensor_quantized else zero_points, + ) self.assertEqual(quantization_scheme_, is_per_tensor_quantized) self.assertEqual(row_block_indices_, expected_row_block_indices) self.assertEqual(col_block_indices_, expected_col_block_indices) - self.assertEqual(weights_.tolist(), [v + 128 for v in expected_weights]) # weights are serialized as +128 + self.assertEqual( + weights_.tolist(), [v + 128 for v in expected_weights] + ) # weights are serialized as +128 self.assertEqual(output_channels_, weight.shape[0]) self.assertEqual(input_channels_, weight.shape[1]) # Test Unpacking - (weights_, bias_, out_features_block_size_, in_features_block_size_) = lin._weight_bias() + ( + weights_, + bias_, + out_features_block_size_, + in_features_block_size_, + ) = lin._weight_bias() self.assertEqual(torch.dequantize(weights_), torch.dequantize(weight)) self.assertEqual(bias_, bias) self.assertEqual(out_features_block_size_, row_block_size) @@ -148,7 +163,9 @@ class TestQlinearPackedParams(TestCase): lin2 = torch.load(file_buff) self.assertEqual(lin._weight_bias(), lin2._weight_bias()) # Serialize -> Deserialize -> Serialize should match Serialize - self.assertEqual(serialized, lin2._packed_params._packed_params.__getstate__()) + self.assertEqual( + serialized, lin2._packed_params._packed_params.__getstate__() + ) # Test that op output is preserved by serialize -> deserialize if qengine_is_qnnpack(): @@ -157,29 +174,29 @@ class TestQlinearPackedParams(TestCase): y2 = lin2(x) self.assertEqual(y1, y2) - @skipIfNoFBGEMM def test_qlinear_packed_params_fbgemm(self): torch.manual_seed(0) - with override_quantized_engine('fbgemm'): + with override_quantized_engine("fbgemm"): self.qlinear_packed_params_test(allow_non_zero_zero_points=False) - @skipIfNoQNNPACK def test_qlinear_packed_params_qnnpack(self): torch.manual_seed(0) - with override_quantized_engine('qnnpack'): + with override_quantized_engine("qnnpack"): with override_cpu_allocator_for_qnnpack(qengine_is_qnnpack()): self.qlinear_packed_params_test(allow_non_zero_zero_points=True) def test_qlinear_packed_params_fbgemm_qnnpack_cross_compatibility(self): torch.manual_seed(0) - weight_fp32 = torch.Tensor([ - [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0], - [6, 6, 6, 6, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - ]) + weight_fp32 = torch.Tensor( + [ + [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0], + [6, 6, 6, 6, 12, 12, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ] + ) row_block_size = 1 col_block_size = 4 @@ -229,17 +246,23 @@ class TestQlinearPackedParams(TestCase): def packed_params_data_with_int32_indices(data_as_state_and_weight_bias): (st, weight_bias) = data_as_state_and_weight_bias (s0, s1) = st - s0_updated = tuple([ - # 7 and 8 are row and col block indices respectively - v if (i != 7 and i != 8) else v.to(torch.int32) for (i, v) in enumerate(list(s0)) - ]) + s0_updated = tuple( + [ + # 7 and 8 are row and col block indices respectively + v if (i != 7 and i != 8) else v.to(torch.int32) + for (i, v) in enumerate(list(s0)) + ] + ) return ((s0_updated, s1), weight_bias) # Test Fbgemm -> Qnnpack - with override_quantized_engine('fbgemm'): - packed_params_data_1a, file_buff_1 = make_lin_get_state_weight_bias_and_save() + with override_quantized_engine("fbgemm"): + ( + packed_params_data_1a, + file_buff_1, + ) = make_lin_get_state_weight_bias_and_save() - with override_quantized_engine('qnnpack'): + with override_quantized_engine("qnnpack"): with override_cpu_allocator_for_qnnpack(qengine_is_qnnpack()): packed_params_data_1b = load_get_state_weight_bias(file_buff_1) @@ -249,11 +272,14 @@ class TestQlinearPackedParams(TestCase): ) # Test Qnnpack -> Fbgemm - with override_quantized_engine('qnnpack'): + with override_quantized_engine("qnnpack"): with override_cpu_allocator_for_qnnpack(qengine_is_qnnpack()): - packed_params_data_2a, file_buff_2 = make_lin_get_state_weight_bias_and_save() + ( + packed_params_data_2a, + file_buff_2, + ) = make_lin_get_state_weight_bias_and_save() - with override_quantized_engine('fbgemm'): + with override_quantized_engine("fbgemm"): packed_params_data_2b = load_get_state_weight_bias(file_buff_2) self.assertEqual( diff --git a/test/ao/sparsity/test_scheduler.py b/test/ao/sparsity/test_scheduler.py index 835c5143f18b..098fde5935ed 100644 --- a/test/ao/sparsity/test_scheduler.py +++ b/test/ao/sparsity/test_scheduler.py @@ -1,43 +1,38 @@ # Owner(s): ["module: unknown"] +import warnings + from torch import nn -from torch.ao.pruning import WeightNormSparsifier -from torch.ao.pruning import BaseScheduler, LambdaSL, CubicSL +from torch.ao.pruning import BaseScheduler, CubicSL, LambdaSL, WeightNormSparsifier from torch.testing._internal.common_utils import TestCase -import warnings class ImplementedScheduler(BaseScheduler): def get_sl(self): if self.last_epoch > 0: - return [group['sparsity_level'] * 0.5 - for group in self.sparsifier.groups] + return [group["sparsity_level"] * 0.5 for group in self.sparsifier.groups] else: return list(self.base_sl) class TestScheduler(TestCase): def test_constructor(self): - model = nn.Sequential( - nn.Linear(16, 16) - ) + model = nn.Sequential(nn.Linear(16, 16)) sparsifier = WeightNormSparsifier() sparsifier.prepare(model, config=None) scheduler = ImplementedScheduler(sparsifier) assert scheduler.sparsifier is sparsifier assert scheduler._step_count == 1 - assert scheduler.base_sl == [sparsifier.groups[0]['sparsity_level']] + assert scheduler.base_sl == [sparsifier.groups[0]["sparsity_level"]] def test_order_of_steps(self): """Checks if the warning is thrown if the scheduler step is called before the sparsifier step""" - model = nn.Sequential( - nn.Linear(16, 16) - ) + model = nn.Sequential(nn.Linear(16, 16)) sparsifier = WeightNormSparsifier() sparsifier.prepare(model, config=None) scheduler = ImplementedScheduler(sparsifier) @@ -54,43 +49,41 @@ class TestScheduler(TestCase): # Make sure there is no warning related to the base_scheduler for warning in w: fname = warning.filename - fname = '/'.join(fname.split('/')[-5:]) - assert fname != 'torch/ao/sparsity/scheduler/base_scheduler.py' + fname = "/".join(fname.split("/")[-5:]) + assert fname != "torch/ao/sparsity/scheduler/base_scheduler.py" def test_step(self): - model = nn.Sequential( - nn.Linear(16, 16) - ) + model = nn.Sequential(nn.Linear(16, 16)) sparsifier = WeightNormSparsifier() sparsifier.prepare(model, config=None) - assert sparsifier.groups[0]['sparsity_level'] == 0.5 + assert sparsifier.groups[0]["sparsity_level"] == 0.5 scheduler = ImplementedScheduler(sparsifier) - assert sparsifier.groups[0]['sparsity_level'] == 0.5 + assert sparsifier.groups[0]["sparsity_level"] == 0.5 sparsifier.step() scheduler.step() - assert sparsifier.groups[0]['sparsity_level'] == 0.25 + assert sparsifier.groups[0]["sparsity_level"] == 0.25 def test_lambda_scheduler(self): - model = nn.Sequential( - nn.Linear(16, 16) - ) + model = nn.Sequential(nn.Linear(16, 16)) sparsifier = WeightNormSparsifier() sparsifier.prepare(model, config=None) - assert sparsifier.groups[0]['sparsity_level'] == 0.5 + assert sparsifier.groups[0]["sparsity_level"] == 0.5 scheduler = LambdaSL(sparsifier, lambda epoch: epoch * 10) - assert sparsifier.groups[0]['sparsity_level'] == 0.0 # Epoch 0 + assert sparsifier.groups[0]["sparsity_level"] == 0.0 # Epoch 0 scheduler.step() - assert sparsifier.groups[0]['sparsity_level'] == 5.0 # Epoch 1 + assert sparsifier.groups[0]["sparsity_level"] == 5.0 # Epoch 1 class TestCubicScheduler(TestCase): def setUp(self): self.model_sparse_config = [ - {'tensor_fqn': '0.weight', 'sparsity_level': 0.8}, - {'tensor_fqn': '2.weight', 'sparsity_level': 0.4}, + {"tensor_fqn": "0.weight", "sparsity_level": 0.8}, + {"tensor_fqn": "2.weight", "sparsity_level": 0.4}, + ] + self.sorted_sparse_levels = [ + conf["sparsity_level"] for conf in self.model_sparse_config ] - self.sorted_sparse_levels = [conf['sparsity_level'] for conf in self.model_sparse_config] self.initial_sparsity = 0.1 self.initial_step = 3 @@ -107,8 +100,8 @@ class TestCubicScheduler(TestCase): sparsifier.prepare(model, config=self.model_sparse_config) scheduler_args = { - 'init_sl': self.initial_sparsity, - 'init_t': self.initial_step, + "init_sl": self.initial_sparsity, + "init_t": self.initial_step, } scheduler_args.update(kwargs) @@ -118,25 +111,33 @@ class TestCubicScheduler(TestCase): @staticmethod def _get_sparsity_levels(sparsifier, precision=32): r"""Gets the current levels of sparsity in a sparsifier.""" - return [round(group['sparsity_level'], precision) for group in sparsifier.groups] + return [ + round(group["sparsity_level"], precision) for group in sparsifier.groups + ] def test_constructor(self): model = self._make_model() sparsifier, scheduler = self._make_scheduler(model=model, initially_zero=True) self.assertIs( - scheduler.sparsifier, sparsifier, - msg="Sparsifier is not properly attached") + scheduler.sparsifier, sparsifier, msg="Sparsifier is not properly attached" + ) self.assertEqual( - scheduler._step_count, 1, - msg="Scheduler is initialized with incorrect step count") + scheduler._step_count, + 1, + msg="Scheduler is initialized with incorrect step count", + ) self.assertEqual( - scheduler.base_sl, self.sorted_sparse_levels, - msg="Scheduler did not store the target sparsity levels correctly") + scheduler.base_sl, + self.sorted_sparse_levels, + msg="Scheduler did not store the target sparsity levels correctly", + ) # Value before t_0 is 0 self.assertEqual( - self._get_sparsity_levels(sparsifier), scheduler._make_sure_a_list(0.0), - msg="Sparsifier is not reset correctly after attaching to the Scheduler") + self._get_sparsity_levels(sparsifier), + scheduler._make_sure_a_list(0.0), + msg="Sparsifier is not reset correctly after attaching to the Scheduler", + ) # Value before t_0 is s_0 model = self._make_model() @@ -144,36 +145,50 @@ class TestCubicScheduler(TestCase): self.assertEqual( self._get_sparsity_levels(sparsifier), scheduler._make_sure_a_list(self.initial_sparsity), - msg="Sparsifier is not reset correctly after attaching to the Scheduler") + msg="Sparsifier is not reset correctly after attaching to the Scheduler", + ) def test_step(self): # For n=5, dt=2, there will be totally 10 steps between s_0 and s_f, starting from t_0 model = self._make_model() sparsifier, scheduler = self._make_scheduler( - model=model, initially_zero=True, init_t=3, delta_t=2, total_t=5) + model=model, initially_zero=True, init_t=3, delta_t=2, total_t=5 + ) scheduler.step() scheduler.step() - self.assertEqual(scheduler._step_count, 3, msg="Scheduler step_count is expected to increment") + self.assertEqual( + scheduler._step_count, + 3, + msg="Scheduler step_count is expected to increment", + ) # Value before t_0 is supposed to be 0 self.assertEqual( - self._get_sparsity_levels(sparsifier), scheduler._make_sure_a_list(0.0), - msg="Scheduler step updating the sparsity level before t_0") + self._get_sparsity_levels(sparsifier), + scheduler._make_sure_a_list(0.0), + msg="Scheduler step updating the sparsity level before t_0", + ) scheduler.step() # Step = 3 => sparsity = initial_sparsity self.assertEqual( - self._get_sparsity_levels(sparsifier), scheduler._make_sure_a_list(self.initial_sparsity), - msg="Sparsifier is not reset to initial sparsity at the first step") + self._get_sparsity_levels(sparsifier), + scheduler._make_sure_a_list(self.initial_sparsity), + msg="Sparsifier is not reset to initial sparsity at the first step", + ) scheduler.step() # Step = 4 => sparsity ~ [0.3, 0.2] self.assertEqual( - self._get_sparsity_levels(sparsifier, 1), [0.3, 0.2], - msg="Sparsity level is not set correctly after the first step") + self._get_sparsity_levels(sparsifier, 1), + [0.3, 0.2], + msg="Sparsity level is not set correctly after the first step", + ) current_step = scheduler._step_count - scheduler.init_t[0] - 1 more_steps_needed = scheduler.delta_t[0] * scheduler.total_t[0] - current_step for _ in range(more_steps_needed): # More steps needed to final sparsity level scheduler.step() self.assertEqual( - self._get_sparsity_levels(sparsifier), self.sorted_sparse_levels, - msg="Sparsity level is not reaching the target level afer delta_t * n steps ") + self._get_sparsity_levels(sparsifier), + self.sorted_sparse_levels, + msg="Sparsity level is not reaching the target level afer delta_t * n steps ", + ) diff --git a/test/ao/sparsity/test_sparsifier.py b/test/ao/sparsity/test_sparsifier.py index c9309d4b81fe..d8151a091d84 100644 --- a/test/ao/sparsity/test_sparsifier.py +++ b/test/ao/sparsity/test_sparsifier.py @@ -6,13 +6,24 @@ import re import torch from torch import nn -from torch.ao.pruning import BaseSparsifier, WeightNormSparsifier, FakeSparsity, NearlyDiagonalSparsifier +from torch.ao.pruning import ( + BaseSparsifier, + FakeSparsity, + NearlyDiagonalSparsifier, + WeightNormSparsifier, +) from torch.nn.utils.parametrize import is_parametrized +from torch.testing._internal.common_pruning import ( + ImplementedSparsifier, + MockSparseLinear, + SimpleLinear, +) from torch.testing._internal.common_utils import TestCase -from torch.testing._internal.common_pruning import SimpleLinear, MockSparseLinear, ImplementedSparsifier -logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) +logging.basicConfig( + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO +) class TestBaseSparsifier(TestCase): @@ -27,41 +38,44 @@ class TestBaseSparsifier(TestCase): sparsifier.step() # Can instantiate the model with configs sparsifier = ImplementedSparsifier(test=3) - sparsifier.prepare(model, [{'tensor_fqn': 'linear1.weight'}]) + sparsifier.prepare(model, [{"tensor_fqn": "linear1.weight"}]) assert len(sparsifier.groups) == 1 - assert sparsifier.groups[0]['tensor_fqn'] == 'linear1.weight' - assert 'test' in sparsifier.groups[0] - assert sparsifier.groups[0]['test'] == 3 + assert sparsifier.groups[0]["tensor_fqn"] == "linear1.weight" + assert "test" in sparsifier.groups[0] + assert sparsifier.groups[0]["test"] == 3 def test_prepare_config(self): model = SimpleLinear() sparsifier = ImplementedSparsifier(test=3) # Make sure there are no parametrizations before `prepare` - assert not hasattr(model.seq[0], 'parametrizations') - assert not hasattr(model.linear1, 'parametrizations') - assert not hasattr(model.linear2, 'parametrizations') - sparsifier.prepare(model, config=[ - {'tensor_fqn': 'seq.0.weight', 'test': 42}, - # No 'linear1' to make sure it will be skipped in the sparsification - {'tensor_fqn': 'linear2.weight'} - ]) + assert not hasattr(model.seq[0], "parametrizations") + assert not hasattr(model.linear1, "parametrizations") + assert not hasattr(model.linear2, "parametrizations") + sparsifier.prepare( + model, + config=[ + {"tensor_fqn": "seq.0.weight", "test": 42}, + # No 'linear1' to make sure it will be skipped in the sparsification + {"tensor_fqn": "linear2.weight"}, + ], + ) assert len(sparsifier.groups) == 2 # Check if default argument is not assigned if explicit - assert sparsifier.groups[0]['tensor_fqn'] == 'seq.0.weight' - assert sparsifier.groups[0]['test'] == 42 + assert sparsifier.groups[0]["tensor_fqn"] == "seq.0.weight" + assert sparsifier.groups[0]["test"] == 42 # Check if FQN and module are pointing to the same location - assert sparsifier.groups[1]['tensor_fqn'] == 'linear2.weight' - assert sparsifier.groups[1]['module'] == model.linear2 + assert sparsifier.groups[1]["tensor_fqn"] == "linear2.weight" + assert sparsifier.groups[1]["module"] == model.linear2 # Check if parameterizations are attached - assert hasattr(model.seq[0], 'parametrizations') - assert not hasattr(model.linear1, 'parametrizations') - assert hasattr(model.linear2, 'parametrizations') + assert hasattr(model.seq[0], "parametrizations") + assert not hasattr(model.linear1, "parametrizations") + assert hasattr(model.linear2, "parametrizations") def test_step(self): model = SimpleLinear() sparsifier = ImplementedSparsifier(test=3) sparsifier.enable_mask_update = True - sparsifier.prepare(model, [{'tensor_fqn': 'linear1.weight'}]) + sparsifier.prepare(model, [{"tensor_fqn": "linear1.weight"}]) sparsifier.step() assert torch.all(model.linear1.parametrizations.weight[0].mask[0] == 0) @@ -69,21 +83,21 @@ class TestBaseSparsifier(TestCase): step_count = 3 model0 = SimpleLinear() sparsifier0 = ImplementedSparsifier(test=3) - sparsifier0.prepare(model0, [{'tensor_fqn': 'linear1.weight'}]) - mask = model0.linear1.parametrizations['weight'][0].mask + sparsifier0.prepare(model0, [{"tensor_fqn": "linear1.weight"}]) + mask = model0.linear1.parametrizations["weight"][0].mask mask.data = torch.arange(mask.shape[0] * mask.shape[1]).reshape(mask.shape) for step in range(step_count): sparsifier0.step() state_dict = sparsifier0.state_dict() # Check the expected keys in the state_dict - assert 'state' in state_dict - assert 'step_count' in state_dict['state']['linear1.weight'] - assert state_dict['state']['linear1.weight']['step_count'] == 3 - assert 'groups' in state_dict - assert 'test' in state_dict['groups'][0] - assert 'tensor_fqn' in state_dict['groups'][0] - assert state_dict['groups'][0]['tensor_fqn'] == 'linear1.weight' + assert "state" in state_dict + assert "step_count" in state_dict["state"]["linear1.weight"] + assert state_dict["state"]["linear1.weight"]["step_count"] == 3 + assert "groups" in state_dict + assert "test" in state_dict["groups"][0] + assert "tensor_fqn" in state_dict["groups"][0] + assert state_dict["groups"][0]["tensor_fqn"] == "linear1.weight" # Check loading static_dict creates an equivalent model model1 = SimpleLinear() @@ -94,11 +108,11 @@ class TestBaseSparsifier(TestCase): # Make sure the masks are different in the beginning for mg in sparsifier0.groups: - if mg['tensor_fqn'] == 'linear1.weight': - mask0 = mg['module'].parametrizations.weight[0].mask + if mg["tensor_fqn"] == "linear1.weight": + mask0 = mg["module"].parametrizations.weight[0].mask for mg in sparsifier1.groups: - if mg['tensor_fqn'] == 'linear1.weight': - mask1 = mg['module'].parametrizations.weight[0].mask + if mg["tensor_fqn"] == "linear1.weight": + mask1 = mg["module"].parametrizations.weight[0].mask self.assertNotEqual(mask0, mask1) sparsifier1.load_state_dict(state_dict) @@ -113,12 +127,12 @@ class TestBaseSparsifier(TestCase): mg1 = sparsifier1.groups[idx] for key in mg0.keys(): assert key in mg1 - if key == 'module': + if key == "module": # We cannot compare modules as they are different param0 = mg0[key].parametrizations.weight[0] param1 = mg1[key].parametrizations.weight[0] - assert hasattr(param0, 'mask') - assert hasattr(param1, 'mask') + assert hasattr(param0, "mask") + assert hasattr(param1, "mask") self.assertEqual(param0.__dict__, param1.__dict__) else: assert mg0[key] == mg1[key] @@ -126,80 +140,84 @@ class TestBaseSparsifier(TestCase): def test_convert(self): model = SimpleLinear() sparsifier = ImplementedSparsifier(test=3) - sparsifier.prepare(model, [{'tensor_fqn': 'linear1.weight'}]) - new_model = sparsifier.convert(model, mapping={nn.Linear: MockSparseLinear}, inplace=False) + sparsifier.prepare(model, [{"tensor_fqn": "linear1.weight"}]) + new_model = sparsifier.convert( + model, mapping={nn.Linear: MockSparseLinear}, inplace=False + ) assert isinstance(new_model.linear1, MockSparseLinear) assert isinstance(new_model.seq[0], nn.Linear) assert isinstance(new_model.linear2, nn.Linear) - - def test_mask_squash(self): model = SimpleLinear() sparsifier = ImplementedSparsifier(test=3) - sparsifier.prepare(model, [{'tensor_fqn': 'linear1.weight'}]) - assert hasattr(model.linear1.parametrizations.weight[0], 'mask') - assert is_parametrized(model.linear1, 'weight') - assert not is_parametrized(model.seq[0], 'weight') + sparsifier.prepare(model, [{"tensor_fqn": "linear1.weight"}]) + assert hasattr(model.linear1.parametrizations.weight[0], "mask") + assert is_parametrized(model.linear1, "weight") + assert not is_parametrized(model.seq[0], "weight") sparsifier.squash_mask() - assert not is_parametrized(model.seq[0], 'weight') - assert not is_parametrized(model.linear1, 'weight') + assert not is_parametrized(model.seq[0], "weight") + assert not is_parametrized(model.linear1, "weight") def test_mask_squash_with_params1(self): model = SimpleLinear() sparsifier = ImplementedSparsifier(foo=3, bar=2, baz=1) - sparsifier.prepare(model, [{'tensor_fqn': 'linear1.weight'}, {'tensor_fqn': 'seq.0.weight'}]) + sparsifier.prepare( + model, [{"tensor_fqn": "linear1.weight"}, {"tensor_fqn": "seq.0.weight"}] + ) sparsifier.squash_mask( - params_to_keep_per_layer={ - 'linear1': ('foo', 'bar'), - 'seq.0': ('baz',) - }) - assert not is_parametrized(model.seq[0], 'weight') - assert not is_parametrized(model.linear1, 'weight') - assert hasattr(model.seq[0], 'sparse_params') - assert hasattr(model.linear1, 'sparse_params') - assert model.seq[0].sparse_params.get('foo', None) is None - assert model.seq[0].sparse_params.get('bar', None) is None - assert model.seq[0].sparse_params.get('baz', None) == 1 - assert model.linear1.sparse_params.get('foo', None) == 3 - assert model.linear1.sparse_params.get('bar', None) == 2 - assert model.linear1.sparse_params.get('baz', None) is None + params_to_keep_per_layer={"linear1": ("foo", "bar"), "seq.0": ("baz",)} + ) + assert not is_parametrized(model.seq[0], "weight") + assert not is_parametrized(model.linear1, "weight") + assert hasattr(model.seq[0], "sparse_params") + assert hasattr(model.linear1, "sparse_params") + assert model.seq[0].sparse_params.get("foo", None) is None + assert model.seq[0].sparse_params.get("bar", None) is None + assert model.seq[0].sparse_params.get("baz", None) == 1 + assert model.linear1.sparse_params.get("foo", None) == 3 + assert model.linear1.sparse_params.get("bar", None) == 2 + assert model.linear1.sparse_params.get("baz", None) is None def test_mask_squash_with_params2(self): model = SimpleLinear() sparsifier = ImplementedSparsifier(foo=3, bar=2, baz=1) - sparsifier.prepare(model, [{'tensor_fqn': 'linear1.weight'}, {'tensor_fqn': 'seq.0.weight'}]) - sparsifier.squash_mask(params_to_keep=('foo', 'bar')) - assert not is_parametrized(model.seq[0], 'weight') - assert not is_parametrized(model.linear1, 'weight') - assert hasattr(model.seq[0], 'sparse_params') - assert hasattr(model.linear1, 'sparse_params') - assert model.seq[0].sparse_params.get('foo', None) == 3 - assert model.seq[0].sparse_params.get('bar', None) == 2 - assert model.seq[0].sparse_params.get('baz', None) is None - assert model.linear1.sparse_params.get('foo', None) == 3 - assert model.linear1.sparse_params.get('bar', None) == 2 - assert model.linear1.sparse_params.get('baz', None) is None + sparsifier.prepare( + model, [{"tensor_fqn": "linear1.weight"}, {"tensor_fqn": "seq.0.weight"}] + ) + sparsifier.squash_mask(params_to_keep=("foo", "bar")) + assert not is_parametrized(model.seq[0], "weight") + assert not is_parametrized(model.linear1, "weight") + assert hasattr(model.seq[0], "sparse_params") + assert hasattr(model.linear1, "sparse_params") + assert model.seq[0].sparse_params.get("foo", None) == 3 + assert model.seq[0].sparse_params.get("bar", None) == 2 + assert model.seq[0].sparse_params.get("baz", None) is None + assert model.linear1.sparse_params.get("foo", None) == 3 + assert model.linear1.sparse_params.get("bar", None) == 2 + assert model.linear1.sparse_params.get("baz", None) is None def test_mask_squash_with_params3(self): model = SimpleLinear() sparsifier = ImplementedSparsifier(foo=3, bar=2, baz=1) - sparsifier.prepare(model, [{'tensor_fqn': 'linear1.weight'}, {'tensor_fqn': 'seq.0.weight'}]) + sparsifier.prepare( + model, [{"tensor_fqn": "linear1.weight"}, {"tensor_fqn": "seq.0.weight"}] + ) sparsifier.squash_mask( - params_to_keep=('foo', 'bar'), - params_to_keep_per_layer={'seq.0': ('baz',)}) - assert not is_parametrized(model.seq[0], 'weight') - assert not is_parametrized(model.linear1, 'weight') - assert hasattr(model.seq[0], 'sparse_params') - assert hasattr(model.linear1, 'sparse_params') - assert model.seq[0].sparse_params.get('foo', None) == 3 - assert model.seq[0].sparse_params.get('bar', None) == 2 - assert model.seq[0].sparse_params.get('baz', None) == 1 - assert model.linear1.sparse_params.get('foo', None) == 3 - assert model.linear1.sparse_params.get('bar', None) == 2 - assert model.linear1.sparse_params.get('baz', None) is None + params_to_keep=("foo", "bar"), params_to_keep_per_layer={"seq.0": ("baz",)} + ) + assert not is_parametrized(model.seq[0], "weight") + assert not is_parametrized(model.linear1, "weight") + assert hasattr(model.seq[0], "sparse_params") + assert hasattr(model.linear1, "sparse_params") + assert model.seq[0].sparse_params.get("foo", None) == 3 + assert model.seq[0].sparse_params.get("bar", None) == 2 + assert model.seq[0].sparse_params.get("baz", None) == 1 + assert model.linear1.sparse_params.get("foo", None) == 3 + assert model.linear1.sparse_params.get("bar", None) == 2 + assert model.linear1.sparse_params.get("baz", None) is None class TestWeightNormSparsifier(TestCase): @@ -208,25 +226,33 @@ class TestWeightNormSparsifier(TestCase): sparsifier = WeightNormSparsifier() sparsifier.prepare(model, config=None) for g in sparsifier.groups: - assert isinstance(g['module'], nn.Linear) + assert isinstance(g["module"], nn.Linear) # The groups are unordered - assert g['module_fqn'] in ('seq.0', 'seq.1', 'seq.2', 'linear1', 'linear2') + assert g["module_fqn"] in ("seq.0", "seq.1", "seq.2", "linear1", "linear2") def test_step(self): model = SimpleLinear() sparsifier = WeightNormSparsifier(sparsity_level=0.5) - sparsifier.prepare(model, config=[{'tensor_fqn': 'linear1.weight'}]) + sparsifier.prepare(model, config=[{"tensor_fqn": "linear1.weight"}]) for g in sparsifier.groups: # Before step - module = g['module'] - assert (1.0 - module.parametrizations['weight'][0].mask.mean()) == 0 # checking sparsity level is 0 + module = g["module"] + assert ( + 1.0 - module.parametrizations["weight"][0].mask.mean() + ) == 0 # checking sparsity level is 0 sparsifier.enable_mask_update = True sparsifier.step() - self.assertAlmostEqual(model.linear1.parametrizations['weight'][0].mask.mean().item(), 0.5, places=2) + self.assertAlmostEqual( + model.linear1.parametrizations["weight"][0].mask.mean().item(), + 0.5, + places=2, + ) for g in sparsifier.groups: # After step - module = g['module'] - assert (1.0 - module.parametrizations['weight'][0].mask.mean()) > 0 # checking sparsity level has increased + module = g["module"] + assert ( + 1.0 - module.parametrizations["weight"][0].mask.mean() + ) > 0 # checking sparsity level has increased # Test if the mask collapses to all zeros if the weights are randomized iters_before_collapse = 1000 for _ in range(iters_before_collapse): @@ -234,25 +260,29 @@ class TestWeightNormSparsifier(TestCase): sparsifier.step() for g in sparsifier.groups: # After step - module = g['module'] - assert (1.0 - module.parametrizations['weight'][0].mask.mean()) > 0 # checking sparsity level did not collapse + module = g["module"] + assert ( + 1.0 - module.parametrizations["weight"][0].mask.mean() + ) > 0 # checking sparsity level did not collapse def test_step_2_of_4(self): model = SimpleLinear() - sparsifier = WeightNormSparsifier(sparsity_level=1.0, - sparse_block_shape=(1, 4), - zeros_per_block=2) - sparsifier.prepare(model, config=[{'tensor_fqn': 'linear1.weight'}]) + sparsifier = WeightNormSparsifier( + sparsity_level=1.0, sparse_block_shape=(1, 4), zeros_per_block=2 + ) + sparsifier.prepare(model, config=[{"tensor_fqn": "linear1.weight"}]) sparsifier.step() # make sure the sparsity level is approximately 50% - mask = model.linear1.parametrizations['weight'][0].mask.to(torch.float) # mean works on float only + mask = model.linear1.parametrizations["weight"][0].mask.to( + torch.float + ) # mean works on float only self.assertAlmostEqual(mask.mean().item(), 0.5, places=2) # Make sure each block has exactly 50% zeros - module = sparsifier.groups[0]['module'] - mask = module.parametrizations['weight'][0].mask + module = sparsifier.groups[0]["module"] + mask = module.parametrizations["weight"][0].mask for row in mask: for idx in range(0, len(row), 4): - block = row[idx:idx + 4] + block = row[idx : idx + 4] block, _ = block.sort() assert (block[:2] == 0).all() assert (block[2:] != 0).all() @@ -262,11 +292,11 @@ class TestWeightNormSparsifier(TestCase): sparsifier = WeightNormSparsifier() sparsifier.prepare(model, config=None) for g in sparsifier.groups: - module = g['module'] + module = g["module"] # Check mask exists - assert hasattr(module.parametrizations['weight'][0], 'mask') + assert hasattr(module.parametrizations["weight"][0], "mask") # Check parametrization exists and is correct - assert is_parametrized(module, 'weight') + assert is_parametrized(module, "weight") assert type(module.parametrizations.weight[0]) == FakeSparsity def test_mask_squash(self): @@ -275,39 +305,39 @@ class TestWeightNormSparsifier(TestCase): sparsifier.prepare(model, config=None) sparsifier.squash_mask() for g in sparsifier.groups: - module = g['module'] - assert not is_parametrized(module, 'weight') - assert not hasattr(module, 'mask') + module = g["module"] + assert not is_parametrized(module, "weight") + assert not hasattr(module, "mask") def test_sparsity_levels(self): sparsity_levels = [-1.0, 0.0, 0.5, 1.0, 2.0] sparse_block_shapes = [(1, 1), (1, 4), (2, 2), (4, 1)] zeros_per_blocks = [0, 1, 2, 3, 4] - testcases = itertools.tee(itertools.product(sparsity_levels, - sparse_block_shapes, - zeros_per_blocks)) + testcases = itertools.tee( + itertools.product(sparsity_levels, sparse_block_shapes, zeros_per_blocks) + ) # Create a config and model with all the testcases model = nn.Sequential() sparsifier = WeightNormSparsifier() sparsity_per_layer_config = [] - p = re.compile(r'[-\.\s]') + p = re.compile(r"[-\.\s]") for sl, sbs, zpb in testcases[0]: # Make sure the number of zeros is not > values in a block if zpb > sbs[0] * sbs[1]: continue - layer_name = f'{sl}_{sbs}_{zpb}' - layer_name = p.sub('_', layer_name) + layer_name = f"{sl}_{sbs}_{zpb}" + layer_name = p.sub("_", layer_name) layer = nn.Linear(12, 12, bias=False) layer.weight = nn.Parameter(torch.ones(12, 12)) model.add_module(layer_name, layer) config = { - 'tensor_fqn': layer_name + ".weight", - 'sparsity_level': sl, - 'sparse_block_shape': sbs, - 'zeros_per_block': zpb + "tensor_fqn": layer_name + ".weight", + "sparsity_level": sl, + "sparse_block_shape": sbs, + "zeros_per_block": zpb, } sparsity_per_layer_config.append(config) @@ -319,8 +349,8 @@ class TestWeightNormSparsifier(TestCase): for sl, sbs, zpb in testcases[1]: if zpb > sbs[0] * sbs[1]: continue - layer_name = f'{sl}_{sbs}_{zpb}' - layer_name = p.sub('_', layer_name) + layer_name = f"{sl}_{sbs}_{zpb}" + layer_name = p.sub("_", layer_name) layer = getattr(model, layer_name) # Level of sparsity is achieved @@ -340,30 +370,34 @@ class TestNearlyDiagonalSparsifier(TestCase): sparsifier = NearlyDiagonalSparsifier(nearliness=1) sparsifier.prepare(model, config=None) for g in sparsifier.groups: - assert isinstance(g['module'], nn.Linear) + assert isinstance(g["module"], nn.Linear) # The groups are unordered - assert g['module_fqn'] in ('seq.0', 'seq.1', 'seq.2', 'linear1', 'linear2') + assert g["module_fqn"] in ("seq.0", "seq.1", "seq.2", "linear1", "linear2") def test_step(self): model = SimpleLinear() sparsifier = NearlyDiagonalSparsifier(nearliness=1) - sparsifier.prepare(model, config=[{'tensor_fqn': 'linear1.weight'}]) + sparsifier.prepare(model, config=[{"tensor_fqn": "linear1.weight"}]) for g in sparsifier.groups: # Before step - module = g['module'] - assert (1.0 - module.parametrizations['weight'][0].mask.mean()) == 0 # checking sparsity level is 0 + module = g["module"] + assert ( + 1.0 - module.parametrizations["weight"][0].mask.mean() + ) == 0 # checking sparsity level is 0 sparsifier.enable_mask_update = True sparsifier.step() - mask = module.parametrizations['weight'][0].mask + mask = module.parametrizations["weight"][0].mask height, width = mask.shape assert torch.all(mask == torch.eye(height, width)) for g in sparsifier.groups: # After step - module = g['module'] - assert (1.0 - module.parametrizations['weight'][0].mask.mean()) > 0 # checking sparsity level has increased + module = g["module"] + assert ( + 1.0 - module.parametrizations["weight"][0].mask.mean() + ) > 0 # checking sparsity level has increased # Test if the mask collapses to all zeros if the weights are randomized iters_before_collapse = 1000 @@ -372,19 +406,21 @@ class TestNearlyDiagonalSparsifier(TestCase): sparsifier.step() for g in sparsifier.groups: # After step - module = g['module'] - assert (1.0 - module.parametrizations['weight'][0].mask.mean()) > 0 # checking sparsity level did not collapse + module = g["module"] + assert ( + 1.0 - module.parametrizations["weight"][0].mask.mean() + ) > 0 # checking sparsity level did not collapse def test_prepare(self): model = SimpleLinear() sparsifier = NearlyDiagonalSparsifier(nearliness=1) sparsifier.prepare(model, config=None) for g in sparsifier.groups: - module = g['module'] + module = g["module"] # Check mask exists - assert hasattr(module.parametrizations['weight'][0], 'mask') + assert hasattr(module.parametrizations["weight"][0], "mask") # Check parametrization exists and is correct - assert is_parametrized(module, 'weight') + assert is_parametrized(module, "weight") assert type(module.parametrizations.weight[0]) == FakeSparsity def test_mask_squash(self): @@ -394,36 +430,36 @@ class TestNearlyDiagonalSparsifier(TestCase): sparsifier.step() sparsifier.squash_mask() for g in sparsifier.groups: - module = g['module'] - assert not is_parametrized(module, 'weight') - assert not hasattr(module, 'mask') + module = g["module"] + assert not is_parametrized(module, "weight") + assert not hasattr(module, "mask") weights = module.weight height, width = weights.shape - assert torch.all(weights == torch.eye(height, width) * weights) # only diagonal to be present - + assert torch.all( + weights == torch.eye(height, width) * weights + ) # only diagonal to be present def test_sparsity_levels(self): nearliness_levels = list(range(-1, 100)) model = nn.Sequential() - p = re.compile(r'[-\.\s]') + p = re.compile(r"[-\.\s]") for nearliness in nearliness_levels: sparsifier = NearlyDiagonalSparsifier(nearliness=1) - layer_name = f'{nearliness}' - layer_name = p.sub('_', layer_name) + layer_name = f"{nearliness}" + layer_name = p.sub("_", layer_name) layer = nn.Linear(32, 32, bias=False) layer.weight = nn.Parameter(torch.ones(32, 32)) width, height = layer.weight.shape model.add_module(layer_name, layer) - config = { - 'tensor_fqn': layer_name + ".weight", - 'nearliness': nearliness - } + config = {"tensor_fqn": layer_name + ".weight", "nearliness": nearliness} sparsifier.prepare(model, [config]) # should raise a ValueError when nearliness arg is illegal - if (nearliness > 0 and nearliness % 2 == 0) or (nearliness // 2 >= min(width, height)): + if (nearliness > 0 and nearliness % 2 == 0) or ( + nearliness // 2 >= min(width, height) + ): with self.assertRaises(ValueError): sparsifier.step() else: diff --git a/test/ao/sparsity/test_structured_sparsifier.py b/test/ao/sparsity/test_structured_sparsifier.py index 36b523993185..c76c9b20cc90 100644 --- a/test/ao/sparsity/test_structured_sparsifier.py +++ b/test/ao/sparsity/test_structured_sparsifier.py @@ -6,32 +6,32 @@ import random import torch from torch import nn from torch.ao.pruning._experimental.pruner import ( - SaliencyPruner, - LSTMSaliencyPruner, BaseStructuredSparsifier, FakeStructuredSparsity, - FPGMPruner + FPGMPruner, + LSTMSaliencyPruner, + SaliencyPruner, ) from torch.nn.utils import parametrize - -from torch.testing._internal.common_utils import TestCase, skipIfTorchDynamo from torch.testing._internal.common_pruning import ( - SimpleLinear, - LinearBias, - LinearActivation, - LinearActivationFunctional, - SimpleConv2d, - Conv2dBias, Conv2dActivation, + Conv2dBias, Conv2dPadBias, Conv2dPool, Conv2dPoolFlatten, Conv2dPoolFlattenFunctional, - LSTMLinearModel, + LinearActivation, + LinearActivationFunctional, + LinearBias, LSTMLayerNormLinearModel, + LSTMLinearModel, rows_are_subset, + SimpleConv2d, + SimpleLinear, ) +from torch.testing._internal.common_utils import skipIfTorchDynamo, TestCase + logging.basicConfig( format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO @@ -73,6 +73,7 @@ class BottomHalfLSTMPruner(BaseStructuredSparsifier): new_mask = torch.cat(masks) mask.data = new_mask.data + class TestSaliencyPruner(TestCase): def test_saliency_pruner_update_mask(self): """Test that we prune out the row with the lowest saliency (first row)""" @@ -103,14 +104,9 @@ class TestSaliencyPruner(TestCase): num_layers=1, ) - manual_weights = torch.Tensor([[1, 1], - [2, 2], - [2, 2], - [1, 1], - [-1, -1], - [-2, -2], - [-2, -2], - [-1, -1]]) + manual_weights = torch.Tensor( + [[1, 1], [2, 2], [2, 2], [1, 1], [-1, -1], [-2, -2], [-2, -2], [-1, -1]] + ) with torch.no_grad(): model.lstm.weight_ih_l0 = nn.Parameter(manual_weights) @@ -137,18 +133,12 @@ class TestSaliencyPruner(TestCase): pruned_model(lstm_input) # make sure lowest saliency rows are pruned - expected = torch.Tensor([[2, 2], - [2, 2], - [-2, -2], - [-2, -2]]) + expected = torch.Tensor([[2, 2], [2, 2], [-2, -2], [-2, -2]]) pruned = model.lstm.weight_ih_l0 assert expected.shape == pruned.shape assert torch.isclose(expected, pruned, rtol=1e-05, atol=1e-07).all() - expected = torch.Tensor([[2], - [2], - [-2], - [-2]]) + expected = torch.Tensor([[2], [2], [-2], [-2]]) pruned = model.lstm.weight_hh_l0 assert expected.shape == pruned.shape assert torch.isclose(expected, pruned, rtol=1e-05, atol=1e-07).all() @@ -159,7 +149,6 @@ class TestSaliencyPruner(TestCase): assert torch.isclose(expected, pruned, rtol=1e-05, atol=1e-07).all() - class TestBaseStructuredSparsifier(TestCase): def _check_pruner_prepared(self, model, pruner, device): for config in pruner.groups: @@ -916,15 +905,19 @@ class TestBaseStructuredSparsifier(TestCase): # linear columns correctly. assert out_expected.shape == out_pruned.shape + class TestFPGMPruner(TestCase): """ Test case for the implementation of paper: `Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration `_. """ + class SimpleConvFPGM(nn.Module): def __init__(self): super().__init__() - self.conv2d1 = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=3, padding=1, bias=False) + self.conv2d1 = nn.Conv2d( + in_channels=1, out_channels=3, kernel_size=3, padding=1, bias=False + ) # Manually set the filter weights for demonstration purposes """ Three filters' weight are manually set to values 3.0, 2.0, and 0.1. @@ -933,13 +926,19 @@ class TestFPGMPruner(TestCase): """ weights = torch.tensor([3.0, 2.0, 0.1]) # Weight weights for each filter weights = weights[:, None, None, None] # broadcasting - self.conv2d1.weight.data.copy_(torch.ones(self.conv2d1.weight.shape) * weights) + self.conv2d1.weight.data.copy_( + torch.ones(self.conv2d1.weight.shape) * weights + ) # Second Convolutional Layer - self.conv2d2 = nn.Conv2d(in_channels=3, out_channels=4, kernel_size=3, padding=1, bias=False) + self.conv2d2 = nn.Conv2d( + in_channels=3, out_channels=4, kernel_size=3, padding=1, bias=False + ) weights = torch.tensor([6.0, 7.0, 0.4, 0.5]) weights = weights[:, None, None, None] - self.conv2d2.weight.data.copy_(torch.ones(self.conv2d2.weight.shape) * weights) + self.conv2d2.weight.data.copy_( + torch.ones(self.conv2d2.weight.shape) * weights + ) def forward(self, x): x = self.conv2d1(x) @@ -953,11 +952,43 @@ class TestFPGMPruner(TestCase): dist_conv1 = pruner._compute_distance(model.conv2d1.weight) # compute the distance matrix using torch.cdist - flattened_filters = torch.Tensor([ - [3.0000, 3.0000, 3.0000, 3.0000, 3.0000, 3.0000, 3.0000, 3.0000, 3.0000], - [2.0000, 2.0000, 2.0000, 2.0000, 2.0000, 2.0000, 2.0000, 2.0000, 2.0000], - [0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000] - ]) + flattened_filters = torch.Tensor( + [ + [ + 3.0000, + 3.0000, + 3.0000, + 3.0000, + 3.0000, + 3.0000, + 3.0000, + 3.0000, + 3.0000, + ], + [ + 2.0000, + 2.0000, + 2.0000, + 2.0000, + 2.0000, + 2.0000, + 2.0000, + 2.0000, + 2.0000, + ], + [ + 0.1000, + 0.1000, + 0.1000, + 0.1000, + 0.1000, + 0.1000, + 0.1000, + 0.1000, + 0.1000, + ], + ] + ) """ Expected distance matrix should have the following values: @@ -967,9 +998,13 @@ class TestFPGMPruner(TestCase): the distance should therefore be: [11.7000, 8.7000, 14.4000] """ - expected_dist_matrix_conv1 = torch.cdist(flattened_filters, flattened_filters, p=2) + expected_dist_matrix_conv1 = torch.cdist( + flattened_filters, flattened_filters, p=2 + ) expected_dist_conv1 = torch.sum(torch.abs(expected_dist_matrix_conv1), 1) - assert torch.isclose(dist_conv1, expected_dist_conv1, rtol=1e-05, atol=1e-07).all() + assert torch.isclose( + dist_conv1, expected_dist_conv1, rtol=1e-05, atol=1e-07 + ).all() def _test_update_mask_on_single_layer(self, expected_conv1, device): """Test that pruning is conducted based on the pair-wise distance measurement instead of absolute norm value""" @@ -981,8 +1016,10 @@ class TestFPGMPruner(TestCase): pruner.prepare(model, config) pruner.enable_mask_update = True pruner.step() - assert pruner.groups[0]["module"].parametrizations.weight[0].mask[-1].item() is not False, \ - "do not prune the least-norm filter" + assert ( + pruner.groups[0]["module"].parametrizations.weight[0].mask[-1].item() + is not False + ), "do not prune the least-norm filter" # fusion step pruned_model = pruner.prune() @@ -992,27 +1029,38 @@ class TestFPGMPruner(TestCase): expected_conv1 = expected_conv1.to(device) assert pruned_y.shape == (1, 4, 32, 32) assert pruned_model.conv2d1.weight.shape == expected_conv1.shape - assert pruned_model.conv2d2.weight.shape == (4, 2, 3, 3), "conv2d2 should have input channel pruned" + assert pruned_model.conv2d2.weight.shape == ( + 4, + 2, + 3, + 3, + ), "conv2d2 should have input channel pruned" # assert value - assert torch.isclose(pruned_model.conv2d1.weight, expected_conv1, rtol=1e-05, atol=1e-07).all() + assert torch.isclose( + pruned_model.conv2d1.weight, expected_conv1, rtol=1e-05, atol=1e-07 + ).all() - def _test_update_mask_on_multiple_layer(self, expected_conv1, expected_conv2, device): + def _test_update_mask_on_multiple_layer( + self, expected_conv1, expected_conv2, device + ): # the second setting model = TestFPGMPruner.SimpleConvFPGM().to(device) x = torch.ones((1, 1, 32, 32), device=device) pruner = FPGMPruner(0.3) config = [ {"tensor_fqn": "conv2d1.weight"}, - {"tensor_fqn": "conv2d2.weight", "sparsity_level": 0.5} + {"tensor_fqn": "conv2d2.weight", "sparsity_level": 0.5}, ] pruner.prepare(model, config) pruner.enable_mask_update = True pruner.step() # Get the masks for the two least-norm filters - mask1 = pruner.groups[0]['module'].parametrizations.weight[0].mask[-1] - mask2 = pruner.groups[0]['module'].parametrizations.weight[0].mask[-2] + mask1 = pruner.groups[0]["module"].parametrizations.weight[0].mask[-1] + mask2 = pruner.groups[0]["module"].parametrizations.weight[0].mask[-2] # Check if either of the least-norm filters is not pruned - assert mask1.item() is not False or mask2.item() is not False, "Do not prune all least-norm filters" + assert ( + mask1.item() is not False or mask2.item() is not False + ), "Do not prune all least-norm filters" # fusion step pruned_model = pruner.prune() @@ -1024,8 +1072,12 @@ class TestFPGMPruner(TestCase): assert pruned_model.conv2d1.weight.shape == expected_conv1.shape assert pruned_model.conv2d2.weight.shape == expected_conv2.shape # assert values - assert torch.isclose(pruned_model.conv2d1.weight, expected_conv1, rtol=1e-05, atol=1e-07).all() - assert torch.isclose(pruned_model.conv2d2.weight, expected_conv2, rtol=1e-05, atol=1e-07).all() + assert torch.isclose( + pruned_model.conv2d1.weight, expected_conv1, rtol=1e-05, atol=1e-07 + ).all() + assert torch.isclose( + pruned_model.conv2d2.weight, expected_conv2, rtol=1e-05, atol=1e-07 + ).all() def test_update_mask(self): weights = torch.tensor([3.0, 0.1]) @@ -1036,4 +1088,6 @@ class TestFPGMPruner(TestCase): for device in DEVICES: self._test_update_mask_on_single_layer(expected_conv1, device) - self._test_update_mask_on_multiple_layer(expected_conv1, expected_conv2, device) + self._test_update_mask_on_multiple_layer( + expected_conv1, expected_conv2, device + ) diff --git a/test/autograd/test_complex.py b/test/autograd/test_complex.py index 5162e0399ee8..caca6f88a00f 100644 --- a/test/autograd/test_complex.py +++ b/test/autograd/test_complex.py @@ -2,7 +2,7 @@ import torch -from torch.testing._internal.common_utils import TestCase, run_tests, gradcheck +from torch.testing._internal.common_utils import gradcheck, run_tests, TestCase class TestAutogradComplex(TestCase): @@ -71,7 +71,9 @@ class TestAutogradComplex(TestCase): # modified inplace res = x1.unbind(0) - with self.assertRaisesRegex(RuntimeError, "output of a function that returns multiple views"): + with self.assertRaisesRegex( + RuntimeError, "output of a function that returns multiple views" + ): res[0] += torch.rand(2, requires_grad=True) x.requires_grad_(True) @@ -80,7 +82,9 @@ class TestAutogradComplex(TestCase): # modified inplace res = x1.unbind(0) - with self.assertRaisesRegex(RuntimeError, "output of a function that returns multiple views"): + with self.assertRaisesRegex( + RuntimeError, "output of a function that returns multiple views" + ): res[0] += torch.rand(2, requires_grad=True) def as_identity(self): @@ -101,5 +105,5 @@ class TestAutogradComplex(TestCase): self.assertEqual(z.grad, z1.grad) -if __name__ == '__main__': +if __name__ == "__main__": run_tests() diff --git a/test/autograd/test_functional.py b/test/autograd/test_functional.py index 9146edd0176a..90b855a78132 100644 --- a/test/autograd/test_functional.py +++ b/test/autograd/test_functional.py @@ -9,7 +9,14 @@ import torch.autograd.functional as autogradF from torch.testing._internal.common_cuda import TEST_CUDA from torch.testing._internal.common_utils import ( - TestCase, run_tests, subtest, gradcheck, gradgradcheck, parametrize, instantiate_parametrized_tests) + gradcheck, + gradgradcheck, + instantiate_parametrized_tests, + parametrize, + run_tests, + subtest, + TestCase, +) from torch.testing._internal.logging_tensor import LoggingTensor # Utilities for parametrizing the tensor constructors used in autograd tests @@ -27,37 +34,66 @@ base_ctors_dict = { } base_ctors = types.SimpleNamespace(**base_ctors_dict) + def wrap_with_logging_tensor(ctor): def wrapper(*args, **kwargs): requires_grad = kwargs.pop("requires_grad", False) return LoggingTensor(ctor(*args, **kwargs), requires_grad=requires_grad) + return wrapper -logging_tensor_ctors_dict = {k: wrap_with_logging_tensor(ctor) for (k, ctor) in base_ctors_dict.items()} + +logging_tensor_ctors_dict = { + k: wrap_with_logging_tensor(ctor) for (k, ctor) in base_ctors_dict.items() +} logging_tensor_ctors = types.SimpleNamespace(**logging_tensor_ctors_dict) -base_and_logging_tensor = parametrize("ctors", [subtest(base_ctors, name="base_tensor"), - subtest(logging_tensor_ctors, name="logging_tensor")]) +base_and_logging_tensor = parametrize( + "ctors", + [ + subtest(base_ctors, name="base_tensor"), + subtest(logging_tensor_ctors, name="logging_tensor"), + ], +) -FIXME_base_and_xfail_logging_tensor = parametrize("ctors", [subtest(base_ctors, name="base_tensor"), - subtest(logging_tensor_ctors, name="logging_tensor", - decorators=[unittest.expectedFailure])]) +FIXME_base_and_xfail_logging_tensor = parametrize( + "ctors", + [ + subtest(base_ctors, name="base_tensor"), + subtest( + logging_tensor_ctors, + name="logging_tensor", + decorators=[unittest.expectedFailure], + ), + ], +) # NB: This is equivalent to having both @parametrize("vectorized", [True, False]) and # FIXME_base_and_xfail_logging_tensor, except the non-vectorized logging_tensor case is # actually expected to succeed -FIXME_xfail_vectorized_logging_tensor = ( - parametrize("vectorize,ctors", [subtest((True, base_ctors), name="vectorized_base_tensor"), - subtest((False, base_ctors), name="base_tensor"), - subtest((True, logging_tensor_ctors), name="vectorized_logging_tensor", - decorators=[unittest.expectedFailure]), - subtest((False, logging_tensor_ctors), name="logging_tensor")])) +FIXME_xfail_vectorized_logging_tensor = parametrize( + "vectorize,ctors", + [ + subtest((True, base_ctors), name="vectorized_base_tensor"), + subtest((False, base_ctors), name="base_tensor"), + subtest( + (True, logging_tensor_ctors), + name="vectorized_logging_tensor", + decorators=[unittest.expectedFailure], + ), + subtest((False, logging_tensor_ctors), name="logging_tensor"), + ], +) -vectorized_logging_tensor = ( - parametrize("vectorize,ctors", [subtest((True, base_ctors), name="vectorized_base_tensor"), - subtest((False, base_ctors), name="base_tensor"), - subtest((True, logging_tensor_ctors), name="vectorized_logging_tensor"), - subtest((False, logging_tensor_ctors), name="logging_tensor")])) +vectorized_logging_tensor = parametrize( + "vectorize,ctors", + [ + subtest((True, base_ctors), name="vectorized_base_tensor"), + subtest((False, base_ctors), name="base_tensor"), + subtest((True, logging_tensor_ctors), name="vectorized_logging_tensor"), + subtest((False, logging_tensor_ctors), name="logging_tensor"), + ], +) class TestAutogradFunctional(TestCase): @@ -75,8 +111,10 @@ class TestAutogradFunctional(TestCase): self.assertEqual(el_base.size(), el_res.size()) else: # Wrong base - raise RuntimeError("The base given to `_assert_same_struct` doesn't have" - " the right structure.") + raise RuntimeError( + "The base given to `_assert_same_struct` doesn't have" + " the right structure." + ) def _assert_interleaved_struct(self, res, base1, base2): # base1 and base2 can be Tensors or tuples of Tensors. @@ -112,11 +150,15 @@ class TestAutogradFunctional(TestCase): for el_el_res, el_base2 in zip(el_res, base2): self.assertTrue(isinstance(el_el_res, torch.Tensor)) self.assertTrue(isinstance(el_base2, torch.Tensor)) - self.assertEqual(el_el_res.size(), el_base1.size() + el_base2.size()) + self.assertEqual( + el_el_res.size(), el_base1.size() + el_base2.size() + ) else: # Wrong bases - raise RuntimeError("The bases given to `_assert_interleaved_struct` don't have" - " the right structure.") + raise RuntimeError( + "The bases given to `_assert_interleaved_struct` don't have" + " the right structure." + ) @base_and_logging_tensor def test_vjp_err_check(self, ctors): @@ -128,19 +170,30 @@ class TestAutogradFunctional(TestCase): inp = ctors.rand(4) v = ctors.ones(3) - with self.assertRaisesRegex(TypeError, "The inputs given to vjp must be either a Tensor"): + with self.assertRaisesRegex( + TypeError, "The inputs given to vjp must be either a Tensor" + ): res = autogradF.vjp(foo, (inp, 2), v) - with self.assertRaisesRegex(TypeError, "The outputs of the user-provided function given to vjp must"): + with self.assertRaisesRegex( + TypeError, "The outputs of the user-provided function given to vjp must" + ): res = autogradF.vjp(bar, inp, v) - with self.assertRaisesRegex(RuntimeError, "The vector v can only be None if the user-provided function returns"): + with self.assertRaisesRegex( + RuntimeError, + "The vector v can only be None if the user-provided function returns", + ): res = autogradF.vjp(foo, inp) - with self.assertRaisesRegex(RuntimeError, "The given v should contain a single Tensor."): + with self.assertRaisesRegex( + RuntimeError, "The given v should contain a single Tensor." + ): res = autogradF.vjp(foo, inp, (torch.ones_like(inp), torch.ones_like(inp))) - with self.assertRaisesRegex(RuntimeError, "v has invalid size: should be torch.Size"): + with self.assertRaisesRegex( + RuntimeError, "v has invalid size: should be torch.Size" + ): res = autogradF.vjp(foo, inp, v[:2]) res = autogradF.vjp(foo, inp, v)[1] @@ -157,24 +210,33 @@ class TestAutogradFunctional(TestCase): inp = ctors.rand(4) v = ctors.rand(4) - with self.assertRaisesRegex(RuntimeError, "Output 0 of the user-provided function does not require gradients."): + with self.assertRaisesRegex( + RuntimeError, + "Output 0 of the user-provided function does not require gradients.", + ): res = autogradF.vjp(foo, inp, v, strict=True) res = autogradF.vjp(foo, inp, v, strict=False) self._assert_same_struct(res[1], inp) - self.assertEqual(res[1].abs().sum(), 0.) + self.assertEqual(res[1].abs().sum(), 0.0) - with self.assertRaisesRegex(RuntimeError, "The output of the user-provided function is independent of input 0"): + with self.assertRaisesRegex( + RuntimeError, + "The output of the user-provided function is independent of input 0", + ): res = autogradF.vjp(bar, inp, v, strict=True) res = autogradF.vjp(bar, inp, v, strict=False) self._assert_same_struct(res[1], inp) - self.assertEqual(res[1].abs().sum(), 0.) + self.assertEqual(res[1].abs().sum(), 0.0) # The Jacobian does not depend on the input def foo(a): return a.clone() inp.requires_grad_() - with self.assertRaisesRegex(RuntimeError, "jacobian of the user-provided function is independent of input 0."): + with self.assertRaisesRegex( + RuntimeError, + "jacobian of the user-provided function is independent of input 0.", + ): res = autogradF.vjp(foo, inp, v, create_graph=True, strict=True) res = autogradF.vjp(foo, inp, v, create_graph=True, strict=False) self._assert_same_struct(res[1], inp) @@ -184,6 +246,7 @@ class TestAutogradFunctional(TestCase): def test_vjp_no_grad(self, ctors): def reducer(x): return x.sum(dim=1) + inputs = ctors.rand(4, 4) v = ctors.ones(4) with torch.no_grad(): @@ -204,6 +267,7 @@ class TestAutogradFunctional(TestCase): def test_vjp_output(self, ctors): def reducer(x): return x.sum(dim=1) + inputs = ctors.rand(4, 4) v = ctors.ones(4) res = autogradF.vjp(reducer, inputs, v) @@ -226,7 +290,7 @@ class TestAutogradFunctional(TestCase): return 2 * x + 3 * y, x + y inputs = (ctors.rand(2), ctors.rand(2)) - v = (ctors.tensor([1., 0.]), ctors.tensor([1., 0.])) + v = (ctors.tensor([1.0, 0.0]), ctors.tensor([1.0, 0.0])) out, vjp_val = autogradF.vjp(adder, inputs, v) self._assert_same_struct(vjp_val, inputs) self.assertIsNone(out[0].grad_fn) @@ -238,6 +302,7 @@ class TestAutogradFunctional(TestCase): def test_vjp_scalar(self, ctors): def reducer(x): return x.sum() + inputs = ctors.rand(4, 4) v = ctors.ones([]) res = autogradF.vjp(reducer, inputs, v) @@ -250,6 +315,7 @@ class TestAutogradFunctional(TestCase): def expander(x): return x.unsqueeze(0).repeat(4) + inputs = ctors.rand([]) v = ctors.ones(4) res = autogradF.vjp(expander, inputs, v) @@ -260,6 +326,7 @@ class TestAutogradFunctional(TestCase): def test_vjp_create_graph(self, ctors): def reducer(x): return x.sum(dim=1) + inputs = ctors.rand(2, 2, dtype=torch.double) v = ctors.ones(2, dtype=torch.double) @@ -270,19 +337,39 @@ class TestAutogradFunctional(TestCase): self.assertIsNotNone(res[0].grad_fn) self.assertIsNotNone(res[1].grad_fn) - gradcheck(lambda inp, v: autogradF.vjp(reducer, inputs, v, create_graph=True), (inputs, v)) - gradgradcheck(lambda inp, v: autogradF.vjp(reducer, inputs, v, create_graph=True), (inputs, v)) + gradcheck( + lambda inp, v: autogradF.vjp(reducer, inputs, v, create_graph=True), + (inputs, v), + ) + gradgradcheck( + lambda inp, v: autogradF.vjp(reducer, inputs, v, create_graph=True), + (inputs, v), + ) def adder(x, y): return 2 * x + 3 * y, x * y - inputs = (ctors.rand(2, dtype=torch.double, requires_grad=True), - ctors.rand(2, dtype=torch.double, requires_grad=True)) - v = (ctors.tensor([1., 0.], dtype=torch.double, requires_grad=True), - ctors.tensor([1., 0.], dtype=torch.double, requires_grad=True)) + inputs = ( + ctors.rand(2, dtype=torch.double, requires_grad=True), + ctors.rand(2, dtype=torch.double, requires_grad=True), + ) + v = ( + ctors.tensor([1.0, 0.0], dtype=torch.double, requires_grad=True), + ctors.tensor([1.0, 0.0], dtype=torch.double, requires_grad=True), + ) - gradcheck(lambda *args: autogradF.vjp(adder, args[:2], args[2:], create_graph=True)[1], inputs + v) - gradgradcheck(lambda *args: autogradF.vjp(adder, args[:2], args[2:], create_graph=True)[1], inputs + v) + gradcheck( + lambda *args: autogradF.vjp(adder, args[:2], args[2:], create_graph=True)[ + 1 + ], + inputs + v, + ) + gradgradcheck( + lambda *args: autogradF.vjp(adder, args[:2], args[2:], create_graph=True)[ + 1 + ], + inputs + v, + ) def foo(*args): x, y = args[:2] @@ -291,7 +378,14 @@ class TestAutogradFunctional(TestCase): x = x.cos() val, grad = autogradF.vjp(adder, (x, y), v, create_graph=True) - return val[0].exp() + val[1].exp() + grad[0].exp() + grad[1].exp() + x.exp() + y.exp() + return ( + val[0].exp() + + val[1].exp() + + grad[0].exp() + + grad[1].exp() + + x.exp() + + y.exp() + ) gradcheck(foo, inputs + v) gradgradcheck(foo, inputs + v) @@ -306,19 +400,30 @@ class TestAutogradFunctional(TestCase): inp = ctors.rand(4) v = ctors.rand(4) - with self.assertRaisesRegex(TypeError, "The inputs given to jvp must be either a Tensor"): + with self.assertRaisesRegex( + TypeError, "The inputs given to jvp must be either a Tensor" + ): res = autogradF.jvp(foo, (inp, 2), v) - with self.assertRaisesRegex(TypeError, "The outputs of the user-provided function given to jvp must"): + with self.assertRaisesRegex( + TypeError, "The outputs of the user-provided function given to jvp must" + ): res = autogradF.jvp(bar, inp, v) - with self.assertRaisesRegex(RuntimeError, "The vector v can only be None if the input to the user-provided function"): + with self.assertRaisesRegex( + RuntimeError, + "The vector v can only be None if the input to the user-provided function", + ): res = autogradF.jvp(foo, inp) - with self.assertRaisesRegex(RuntimeError, "The given v should contain a single Tensor."): + with self.assertRaisesRegex( + RuntimeError, "The given v should contain a single Tensor." + ): res = autogradF.jvp(foo, inp, (v, v)) - with self.assertRaisesRegex(RuntimeError, "v has invalid size: should be torch.Size"): + with self.assertRaisesRegex( + RuntimeError, "v has invalid size: should be torch.Size" + ): res = autogradF.jvp(foo, inp, v[:2]) res = autogradF.jvp(foo, inp, v)[1] @@ -335,24 +440,33 @@ class TestAutogradFunctional(TestCase): inp = ctors.rand(4) v = ctors.rand(4) - with self.assertRaisesRegex(RuntimeError, "Output 0 of the user-provided function does not require gradients."): + with self.assertRaisesRegex( + RuntimeError, + "Output 0 of the user-provided function does not require gradients.", + ): res = autogradF.jvp(foo, inp, v, strict=True) res = autogradF.jvp(foo, inp, v, strict=False) self._assert_same_struct(res[1], res[0]) - self.assertEqual(res[1].abs().sum(), 0.) + self.assertEqual(res[1].abs().sum(), 0.0) - with self.assertRaisesRegex(RuntimeError, "The output of the user-provided function is independent of input 0"): + with self.assertRaisesRegex( + RuntimeError, + "The output of the user-provided function is independent of input 0", + ): res = autogradF.jvp(bar, inp, v, strict=True) res = autogradF.jvp(bar, inp, v, strict=False) self._assert_same_struct(res[1], res[0]) - self.assertEqual(res[1].abs().sum(), 0.) + self.assertEqual(res[1].abs().sum(), 0.0) # The Jacobian does not depend on the input def foo(a): return a.clone() inp.requires_grad_() - with self.assertRaisesRegex(RuntimeError, "jacobian of the user-provided function is independent of input 0."): + with self.assertRaisesRegex( + RuntimeError, + "jacobian of the user-provided function is independent of input 0.", + ): res = autogradF.jvp(foo, inp, v, create_graph=True, strict=True) res = autogradF.jvp(foo, inp, v, create_graph=True, strict=False) self._assert_same_struct(res[1], inp) @@ -362,6 +476,7 @@ class TestAutogradFunctional(TestCase): def test_jvp_no_grad(self, ctors): def reducer(x): return x.sum(dim=1) + inputs = ctors.rand(4, 4) v = ctors.ones(4, 4) with torch.no_grad(): @@ -382,6 +497,7 @@ class TestAutogradFunctional(TestCase): def test_jvp_output(self, ctors): def reducer(x): return x.sum(dim=1) + inputs = ctors.rand(4, 4) v = ctors.ones(4, 4) res = autogradF.jvp(reducer, inputs, v) @@ -404,7 +520,7 @@ class TestAutogradFunctional(TestCase): return 2 * x + 3 * y, x + y inputs = (ctors.rand(2), ctors.rand(2)) - v = (ctors.tensor([1., 0.]), ctors.tensor([1., 0.])) + v = (ctors.tensor([1.0, 0.0]), ctors.tensor([1.0, 0.0])) out, jvp_val = autogradF.jvp(adder, inputs, v) self._assert_same_struct(jvp_val, out) self.assertIsNone(out[0].grad_fn) @@ -416,6 +532,7 @@ class TestAutogradFunctional(TestCase): def test_jvp_scalar(self, ctors): def reducer(x): return x.sum() + inputs = ctors.rand(4, 4) v = ctors.ones(4, 4) res = autogradF.jvp(reducer, inputs, v) @@ -424,6 +541,7 @@ class TestAutogradFunctional(TestCase): def expander(x): return x.unsqueeze(0).repeat(4) + inputs = ctors.rand([]) v = ctors.ones([]) res = autogradF.jvp(expander, inputs, v) @@ -438,6 +556,7 @@ class TestAutogradFunctional(TestCase): def test_jvp_create_graph(self, ctors): def reducer(x): return x.sum(dim=1) + inputs = ctors.rand(2, 2, dtype=torch.double) v = ctors.ones(2, 2, dtype=torch.double) @@ -448,19 +567,39 @@ class TestAutogradFunctional(TestCase): self.assertIsNotNone(res[0].grad_fn) self.assertIsNotNone(res[1].grad_fn) - gradcheck(lambda inp, v: autogradF.jvp(reducer, inp, v, create_graph=True), (inputs, v)) - gradgradcheck(lambda inp, v: autogradF.jvp(reducer, inp, v, create_graph=True), (inputs, v)) + gradcheck( + lambda inp, v: autogradF.jvp(reducer, inp, v, create_graph=True), + (inputs, v), + ) + gradgradcheck( + lambda inp, v: autogradF.jvp(reducer, inp, v, create_graph=True), + (inputs, v), + ) def adder(x, y): return 2 * x + 3 * y, x * y - inputs = (ctors.rand(2, dtype=torch.double, requires_grad=True), - ctors.rand(2, dtype=torch.double, requires_grad=True)) - v = (ctors.tensor([1., 0.], dtype=torch.double, requires_grad=True), - ctors.tensor([1., 0.], dtype=torch.double, requires_grad=True)) + inputs = ( + ctors.rand(2, dtype=torch.double, requires_grad=True), + ctors.rand(2, dtype=torch.double, requires_grad=True), + ) + v = ( + ctors.tensor([1.0, 0.0], dtype=torch.double, requires_grad=True), + ctors.tensor([1.0, 0.0], dtype=torch.double, requires_grad=True), + ) - gradcheck(lambda *args: autogradF.jvp(adder, args[:2], args[2:], create_graph=True)[1], inputs + v) - gradgradcheck(lambda *args: autogradF.jvp(adder, args[:2], args[2:], create_graph=True)[1], inputs + v) + gradcheck( + lambda *args: autogradF.jvp(adder, args[:2], args[2:], create_graph=True)[ + 1 + ], + inputs + v, + ) + gradgradcheck( + lambda *args: autogradF.jvp(adder, args[:2], args[2:], create_graph=True)[ + 1 + ], + inputs + v, + ) def foo(*args): x, y = args[:2] @@ -469,7 +608,14 @@ class TestAutogradFunctional(TestCase): x = x.cos() val, grad = autogradF.jvp(adder, (x, y), v, create_graph=True) - return val[0].exp() + val[1].exp() + grad[0].exp() + grad[1].exp() + x.exp() + y.exp() + return ( + val[0].exp() + + val[1].exp() + + grad[0].exp() + + grad[1].exp() + + x.exp() + + y.exp() + ) gradcheck(foo, inputs + v) gradgradcheck(foo, inputs + v) @@ -480,8 +626,9 @@ class TestAutogradFunctional(TestCase): for result, inp in zip(results, inputs): self.assertEqual(result.dtype, inp.dtype) self.assertEqual(result.device, inp.device) - results = torch.cat([result.to(device='cpu', dtype=torch.float) - for result in results], dim=1) + results = torch.cat( + [result.to(device="cpu", dtype=torch.float) for result in results], dim=1 + ) expected = torch.eye(results[0].shape[0], dtype=torch.float) self.assertEqual(results, expected) @@ -505,8 +652,8 @@ class TestAutogradFunctional(TestCase): @base_and_logging_tensor def test_construct_standard_basis_for_cuda(self, ctors): test_cases = [ - (ctors.randn(2), ctors.randn(3, device='cuda')), - (ctors.randn(3, device='cuda'), ctors.randn(2)), + (ctors.randn(2), ctors.randn(3, device="cuda")), + (ctors.randn(3, device="cuda"), ctors.randn(2)), ] for inputs in test_cases: @@ -519,7 +666,7 @@ class TestAutogradFunctional(TestCase): # warning; it is not nice for a public-facing API to raise a warning # no matter how it is called. def foo(a): - return (a ** 2).sum() + return (a**2).sum() x = ctors.randn(3) with warnings.catch_warnings(record=True) as wa: @@ -544,10 +691,15 @@ class TestAutogradFunctional(TestCase): return 3 * a.narrow(0, 0, 3), "bar" inp = ctors.rand(4) - with self.assertRaisesRegex(TypeError, "The inputs given to jacobian must be either a Tensor"): + with self.assertRaisesRegex( + TypeError, "The inputs given to jacobian must be either a Tensor" + ): res = autogradF.jacobian(foo, (inp, 2), vectorize=vectorize) - with self.assertRaisesRegex(TypeError, "The outputs of the user-provided function given to jacobian must"): + with self.assertRaisesRegex( + TypeError, + "The outputs of the user-provided function given to jacobian must", + ): res = autogradF.jacobian(bar, inp, vectorize=vectorize) res = autogradF.jacobian(foo, inp, vectorize=vectorize) @@ -571,24 +723,33 @@ class TestAutogradFunctional(TestCase): return a.long().float().requires_grad_().clone() inp = ctors.rand(4) - with self.assertRaisesRegex(RuntimeError, "Output 0 of the user-provided function does not require gradients."): + with self.assertRaisesRegex( + RuntimeError, + "Output 0 of the user-provided function does not require gradients.", + ): res = autogradF.jacobian(foo, inp, strict=True) res = autogradF.jacobian(foo, inp, strict=False) self._assert_interleaved_struct(res, foo(inp), inp) - self.assertEqual(res.abs().sum(), 0.) + self.assertEqual(res.abs().sum(), 0.0) - with self.assertRaisesRegex(RuntimeError, "Output 0 of the user-provided function is independent of input 0."): + with self.assertRaisesRegex( + RuntimeError, + "Output 0 of the user-provided function is independent of input 0.", + ): res = autogradF.jacobian(bar, inp, strict=True) res = autogradF.jacobian(bar, inp, strict=False) self._assert_interleaved_struct(res, foo(inp), inp) - self.assertEqual(res.abs().sum(), 0.) + self.assertEqual(res.abs().sum(), 0.0) # The Jacobian does not depend on the input def foo(a): return a.clone() inp.requires_grad_() - with self.assertRaisesRegex(RuntimeError, "jacobian of the user-provided function is independent of input 0."): + with self.assertRaisesRegex( + RuntimeError, + "jacobian of the user-provided function is independent of input 0.", + ): res = autogradF.jacobian(foo, inp, create_graph=True, strict=True) res = autogradF.jacobian(foo, inp, create_graph=True, strict=False) self._assert_interleaved_struct(res, inp, inp) @@ -651,12 +812,14 @@ class TestAutogradFunctional(TestCase): def test_jacobian_scalar(self, vectorize, ctors): def reducer(x): return x.sum() + inputs = ctors.rand(4, 4) res = autogradF.jacobian(reducer, inputs, vectorize=vectorize) self._assert_same_struct(res, inputs) def expander(x): return x.unsqueeze(0).repeat(4) + inputs = ctors.rand([]) res = autogradF.jacobian(expander, inputs, vectorize=vectorize) self._assert_same_struct(res, ctors.zeros(4)) @@ -668,29 +831,57 @@ class TestAutogradFunctional(TestCase): return x.exp().sum(dim=1) inputs = ctors.rand(4, 4, dtype=torch.double, requires_grad=True) - res = autogradF.jacobian(exp_reducer, inputs, create_graph=True, vectorize=vectorize) + res = autogradF.jacobian( + exp_reducer, inputs, create_graph=True, vectorize=vectorize + ) self._assert_interleaved_struct(res, exp_reducer(inputs), inputs) self.assertIsNotNone(res.grad_fn) - gradcheck(lambda inp: autogradF.jacobian(exp_reducer, inp, create_graph=True, vectorize=vectorize), inputs) - gradgradcheck(lambda inp: autogradF.jacobian(exp_reducer, inp, create_graph=True, vectorize=vectorize), inputs) + gradcheck( + lambda inp: autogradF.jacobian( + exp_reducer, inp, create_graph=True, vectorize=vectorize + ), + inputs, + ) + gradgradcheck( + lambda inp: autogradF.jacobian( + exp_reducer, inp, create_graph=True, vectorize=vectorize + ), + inputs, + ) def add_exp_reducer(x, y): return (x + y).exp().sum(dim=1) - inputs = (ctors.rand(4, 4, dtype=torch.double, requires_grad=True), - ctors.rand(4, 4, dtype=torch.double, requires_grad=True)) - res = autogradF.jacobian(add_exp_reducer, inputs, create_graph=True, vectorize=vectorize) + inputs = ( + ctors.rand(4, 4, dtype=torch.double, requires_grad=True), + ctors.rand(4, 4, dtype=torch.double, requires_grad=True), + ) + res = autogradF.jacobian( + add_exp_reducer, inputs, create_graph=True, vectorize=vectorize + ) self._assert_interleaved_struct(res, add_exp_reducer(*inputs), inputs) self.assertIsNotNone(res[0].grad_fn) self.assertIsNotNone(res[1].grad_fn) - gradcheck(lambda *inp: autogradF.jacobian(add_exp_reducer, inp, create_graph=True, vectorize=vectorize), inputs) - gradgradcheck(lambda *inp: autogradF.jacobian(add_exp_reducer, inp, create_graph=True, vectorize=vectorize), inputs) + gradcheck( + lambda *inp: autogradF.jacobian( + add_exp_reducer, inp, create_graph=True, vectorize=vectorize + ), + inputs, + ) + gradgradcheck( + lambda *inp: autogradF.jacobian( + add_exp_reducer, inp, create_graph=True, vectorize=vectorize + ), + inputs, + ) def foo(x, y): x = x.cos() - val, jac = autogradF.jacobian(add_exp_reducer, (x, y), create_graph=True, vectorize=vectorize) + val, jac = autogradF.jacobian( + add_exp_reducer, (x, y), create_graph=True, vectorize=vectorize + ) res = val[0].exp().sum() + val[1].exp().sum() + jac[0].exp().sum() res = res + jac[1].exp().sum() + x.exp().sum() + y.exp().sum() @@ -705,13 +896,15 @@ class TestAutogradFunctional(TestCase): self.assertEqual(result_backward_mode, expected) if test_forward_ad: - result_forward_mode = autogradF.jacobian(f, inputs, strategy="forward-mode", vectorize=True) + result_forward_mode = autogradF.jacobian( + f, inputs, strategy="forward-mode", vectorize=True + ) self.assertEqual(result_forward_mode, expected) @base_and_logging_tensor def test_jacobian_vectorize_correctness_simple(self, ctors): def f(x): - return 3 * x ** 2 + return 3 * x**2 x = ctors.randn(2, 3, 5) self._check_jacobian_vectorize_correctness(f, x) @@ -794,13 +987,15 @@ class TestAutogradFunctional(TestCase): result = autogradF.hessian(f, inputs, vectorize=True) self.assertEqual(result, expected) - result_forward_mode = autogradF.hessian(f, inputs, outer_jacobian_strategy="forward-mode", vectorize=True) + result_forward_mode = autogradF.hessian( + f, inputs, outer_jacobian_strategy="forward-mode", vectorize=True + ) self.assertEqual(result_forward_mode, expected) @base_and_logging_tensor def test_hessian_vectorize_correctness_simple(self, ctors): def f(x): - return (3 * x ** 2).sum() + return (3 * x**2).sum() x = ctors.randn(2, 3, 5) self._check_hessian_vectorize_correctness(f, x) @@ -819,7 +1014,7 @@ class TestAutogradFunctional(TestCase): def test_hessian_vectorize_correctness_unrelated_outputs(self, ctors): # output unrelated to one input def f(x, y): - return (x ** 2).sum() + return (x**2).sum() x = ctors.randn(2) y = ctors.randn(3) @@ -849,17 +1044,23 @@ class TestAutogradFunctional(TestCase): return 3 * a.narrow(0, 0, 3), 3 * a.narrow(0, 0, 3) inp = ctors.rand(4) - with self.assertRaisesRegex(TypeError, "The inputs given to hessian must be either a Tensor"): + with self.assertRaisesRegex( + TypeError, "The inputs given to hessian must be either a Tensor" + ): res = autogradF.hessian(foo, (inp, 2), vectorize=vectorize) - with self.assertRaisesRegex(TypeError, "The outputs of the user-provided function given to hessian must"): + with self.assertRaisesRegex( + TypeError, "The outputs of the user-provided function given to hessian must" + ): res = autogradF.hessian(bar, inp, vectorize=vectorize) err_msg_out = "The Tensor returned by the function given to hessian should contain a single element" with self.assertRaisesRegex(RuntimeError, err_msg_out): res = autogradF.hessian(bar2, inp, vectorize=vectorize) - with self.assertRaisesRegex(RuntimeError, "The function given to hessian should return a single Tensor"): + with self.assertRaisesRegex( + RuntimeError, "The function given to hessian should return a single Tensor" + ): res = autogradF.hessian(bar3, inp, vectorize=vectorize) res = autogradF.hessian(foo, inp, vectorize=vectorize) @@ -887,28 +1088,37 @@ class TestAutogradFunctional(TestCase): return (3 * a).sum() inp = ctors.rand(4) - with self.assertRaisesRegex(RuntimeError, "Output 0 of the user-provided function does not require gradients."): + with self.assertRaisesRegex( + RuntimeError, + "Output 0 of the user-provided function does not require gradients.", + ): res = autogradF.hessian(foo, inp, strict=True) res = autogradF.hessian(foo, inp, strict=False) self._assert_interleaved_struct(res, inp, inp) - self.assertEqual(res.abs().sum(), 0.) + self.assertEqual(res.abs().sum(), 0.0) - with self.assertRaisesRegex(RuntimeError, "jacobian of the user-provided function with respect to input 0"): + with self.assertRaisesRegex( + RuntimeError, + "jacobian of the user-provided function with respect to input 0", + ): res = autogradF.hessian(bar, inp, strict=True) res = autogradF.hessian(bar, inp, strict=False) self._assert_interleaved_struct(res, inp, inp) - self.assertEqual(res.abs().sum(), 0.) + self.assertEqual(res.abs().sum(), 0.0) - with self.assertRaisesRegex(RuntimeError, "jacobian of the user-provided function with respect to input 0 is"): + with self.assertRaisesRegex( + RuntimeError, + "jacobian of the user-provided function with respect to input 0 is", + ): res = autogradF.hessian(bar2, inp, strict=True) res = autogradF.hessian(bar2, inp, strict=False) self._assert_interleaved_struct(res, inp, inp) - self.assertEqual(res.abs().sum(), 0.) + self.assertEqual(res.abs().sum(), 0.0) @base_and_logging_tensor def test_hessian_err_check_strict_vectorize(self, ctors): def foo(x): - return (x ** 3).sum() + return (x**3).sum() inp = ctors.rand(4) with self.assertRaisesRegex(RuntimeError, "not supported together"): @@ -962,6 +1172,7 @@ class TestAutogradFunctional(TestCase): def test_hessian_scalar(self, vectorize, ctors): def reducer(x): return x.sum() + inputs = ctors.rand(4, 4) res = autogradF.hessian(reducer, inputs, vectorize=vectorize) self._assert_interleaved_struct(res, inputs, inputs) @@ -972,6 +1183,7 @@ class TestAutogradFunctional(TestCase): def bad_reducer(x): return x.sum().view(1, 1, 1) + inputs = ctors.rand(4, 4) res = autogradF.hessian(bad_reducer, inputs, vectorize=vectorize) self._assert_interleaved_struct(res, inputs, inputs) @@ -983,19 +1195,35 @@ class TestAutogradFunctional(TestCase): return x.pow(3).sum() inputs = ctors.rand(2, 2, dtype=torch.double, requires_grad=True) - res = autogradF.hessian(pow_reducer, inputs, create_graph=True, vectorize=vectorize) + res = autogradF.hessian( + pow_reducer, inputs, create_graph=True, vectorize=vectorize + ) self._assert_interleaved_struct(res, inputs, inputs) self.assertIsNotNone(res.grad_fn) - gradcheck(lambda inp: autogradF.hessian(pow_reducer, inp, create_graph=True, vectorize=vectorize), inputs) - gradgradcheck(lambda inp: autogradF.hessian(pow_reducer, inp, create_graph=True, vectorize=vectorize), inputs) + gradcheck( + lambda inp: autogradF.hessian( + pow_reducer, inp, create_graph=True, vectorize=vectorize + ), + inputs, + ) + gradgradcheck( + lambda inp: autogradF.hessian( + pow_reducer, inp, create_graph=True, vectorize=vectorize + ), + inputs, + ) def add_pow_reducer(x, y): return (x + y).pow(3).sum() - inputs = (ctors.rand(2, 2, dtype=torch.double, requires_grad=True), - ctors.rand(2, 2, dtype=torch.double, requires_grad=True)) - res = autogradF.hessian(add_pow_reducer, inputs, create_graph=True, vectorize=vectorize) + inputs = ( + ctors.rand(2, 2, dtype=torch.double, requires_grad=True), + ctors.rand(2, 2, dtype=torch.double, requires_grad=True), + ) + res = autogradF.hessian( + add_pow_reducer, inputs, create_graph=True, vectorize=vectorize + ) self._assert_interleaved_struct(res, inputs, inputs) self.assertIsNotNone(res[0][0].grad_fn) self.assertIsNotNone(res[0][1].grad_fn) @@ -1005,12 +1233,28 @@ class TestAutogradFunctional(TestCase): def flatten(inp): return tuple(el_lvl2 for el_lvl1 in inp for el_lvl2 in el_lvl1) - gradcheck(lambda *inp: flatten(autogradF.hessian(add_pow_reducer, inp, create_graph=True, vectorize=vectorize)), inputs) - gradgradcheck(lambda *inp: flatten(autogradF.hessian(add_pow_reducer, inp, create_graph=True, vectorize=vectorize)), inputs) + gradcheck( + lambda *inp: flatten( + autogradF.hessian( + add_pow_reducer, inp, create_graph=True, vectorize=vectorize + ) + ), + inputs, + ) + gradgradcheck( + lambda *inp: flatten( + autogradF.hessian( + add_pow_reducer, inp, create_graph=True, vectorize=vectorize + ) + ), + inputs, + ) def foo(x, y): x = x.cos() - val, hess = autogradF.hessian(add_pow_reducer, (x, y), create_graph=True, vectorize=vectorize) + val, hess = autogradF.hessian( + add_pow_reducer, (x, y), create_graph=True, vectorize=vectorize + ) res = val[0].cos().sum() + val[1].cos().sum() + hess[0].cos().sum() res = res + hess[1].cos().sum() + x.cos().sum() + y.cos().sum() @@ -1032,10 +1276,14 @@ class TestAutogradFunctional(TestCase): inp = ctors.rand(4) v = ctors.rand(4) - with self.assertRaisesRegex(TypeError, "The inputs given to vhp must be either a Tensor"): + with self.assertRaisesRegex( + TypeError, "The inputs given to vhp must be either a Tensor" + ): res = autogradF.vhp(foo, (inp, 2), v) - with self.assertRaisesRegex(TypeError, "The outputs of the user-provided function given to vhp must"): + with self.assertRaisesRegex( + TypeError, "The outputs of the user-provided function given to vhp must" + ): res = autogradF.vhp(bar, inp, v) err_msg_out = "The Tensor returned by the function given to vhp should contain a single element" @@ -1045,7 +1293,10 @@ class TestAutogradFunctional(TestCase): with self.assertRaisesRegex(RuntimeError, "v has invalid size:"): res = autogradF.vhp(foo, inp, ctors.rand(5)) - with self.assertRaisesRegex(TypeError, "The v given to vhp must be either a Tensor or a tuple of Tensors"): + with self.assertRaisesRegex( + TypeError, + "The v given to vhp must be either a Tensor or a tuple of Tensors", + ): res = autogradF.vhp(foo, inp, (v, 2)) res = autogradF.vhp(foo, inp, v) @@ -1075,28 +1326,38 @@ class TestAutogradFunctional(TestCase): inp = ctors.rand(4) v = ctors.rand(4) - with self.assertRaisesRegex(RuntimeError, "Output 0 of the user-provided function does not require gradients."): + with self.assertRaisesRegex( + RuntimeError, + "Output 0 of the user-provided function does not require gradients.", + ): res = autogradF.vhp(foo, inp, v, strict=True) res = autogradF.vhp(foo, inp, v, strict=False) self._assert_same_struct(res[1], inp) - self.assertEqual(res[1].abs().sum(), 0.) + self.assertEqual(res[1].abs().sum(), 0.0) - with self.assertRaisesRegex(RuntimeError, "The output of the user-provided function is independent of input 0"): + with self.assertRaisesRegex( + RuntimeError, + "The output of the user-provided function is independent of input 0", + ): res = autogradF.vhp(bar, inp, v, strict=True) res = autogradF.vhp(bar, inp, v, strict=False) self._assert_same_struct(res[1], inp) - self.assertEqual(res[1].abs().sum(), 0.) + self.assertEqual(res[1].abs().sum(), 0.0) - with self.assertRaisesRegex(RuntimeError, "jacobian of the user-provided function with respect to input 0 is"): + with self.assertRaisesRegex( + RuntimeError, + "jacobian of the user-provided function with respect to input 0 is", + ): res = autogradF.vhp(bar2, inp, v, strict=True) res = autogradF.vhp(bar2, inp, v, strict=False) self._assert_same_struct(res[1], inp) - self.assertEqual(res[1].abs().sum(), 0.) + self.assertEqual(res[1].abs().sum(), 0.0) @base_and_logging_tensor def test_vhp_no_grad(self, ctors): def reducer(x): return x.exp().sum() + inputs = ctors.rand(4, 4) v = ctors.ones(4, 4) with torch.no_grad(): @@ -1138,6 +1399,7 @@ class TestAutogradFunctional(TestCase): def test_vhp_scalar(self, ctors): def reducer(x): return x.sum() + inputs = ctors.rand(4, 4) v = ctors.ones(4, 4) res = autogradF.vhp(reducer, inputs, v) @@ -1153,6 +1415,7 @@ class TestAutogradFunctional(TestCase): def bad_reducer(x): return x.sum().view(1, 1, 1) + inputs = ctors.rand(4, 4) v = ctors.rand(4, 4) res = autogradF.vhp(bad_reducer, inputs, v) @@ -1170,24 +1433,38 @@ class TestAutogradFunctional(TestCase): self.assertIsNotNone(res[0].grad_fn) self.assertIsNotNone(res[1].grad_fn) - gradcheck(lambda inp, v: autogradF.vhp(foo, inp, v, create_graph=True), (inputs, v)) - gradgradcheck(lambda inp, v: autogradF.vhp(foo, inp, v, create_graph=True), (inputs, v)) + gradcheck( + lambda inp, v: autogradF.vhp(foo, inp, v, create_graph=True), (inputs, v) + ) + gradgradcheck( + lambda inp, v: autogradF.vhp(foo, inp, v, create_graph=True), (inputs, v) + ) def bar(a, b): return (a + 3 * b.narrow(0, 0, 3)).exp().sum() - inputs = (ctors.rand(3, dtype=torch.double, requires_grad=True), - ctors.rand(4, dtype=torch.double, requires_grad=True)) - v = (ctors.ones(3, dtype=torch.double, requires_grad=True), - ctors.ones(4, dtype=torch.double, requires_grad=True)) + inputs = ( + ctors.rand(3, dtype=torch.double, requires_grad=True), + ctors.rand(4, dtype=torch.double, requires_grad=True), + ) + v = ( + ctors.ones(3, dtype=torch.double, requires_grad=True), + ctors.ones(4, dtype=torch.double, requires_grad=True), + ) out, vhp_val = autogradF.vhp(bar, inputs, v, create_graph=True) self._assert_same_struct(vhp_val, inputs) self.assertIsNotNone(out.grad_fn) self.assertIsNotNone(vhp_val[0].grad_fn) self.assertIsNotNone(vhp_val[1].grad_fn) - gradcheck(lambda *args: autogradF.vhp(bar, args[:2], args[2:], create_graph=True)[1], inputs + v) - gradgradcheck(lambda *args: autogradF.vhp(bar, args[:2], args[2:], create_graph=True)[1], inputs + v) + gradcheck( + lambda *args: autogradF.vhp(bar, args[:2], args[2:], create_graph=True)[1], + inputs + v, + ) + gradgradcheck( + lambda *args: autogradF.vhp(bar, args[:2], args[2:], create_graph=True)[1], + inputs + v, + ) def foo(*args): x, y = args[:2] @@ -1196,7 +1473,13 @@ class TestAutogradFunctional(TestCase): x = x.cos() val, grad = autogradF.vhp(bar, (x, y), v, create_graph=True) - return val.cos() + grad[0].cos().sum() + grad[1].cos() + x.cos().sum() + y.cos() + return ( + val.cos() + + grad[0].cos().sum() + + grad[1].cos() + + x.cos().sum() + + y.cos() + ) gradcheck(foo, inputs + v) gradgradcheck(foo, inputs + v) @@ -1215,10 +1498,14 @@ class TestAutogradFunctional(TestCase): inp = ctors.rand(4) v = ctors.rand(4) res = autogradF.hvp(foo, inp, v) - with self.assertRaisesRegex(TypeError, "The inputs given to hvp must be either a Tensor"): + with self.assertRaisesRegex( + TypeError, "The inputs given to hvp must be either a Tensor" + ): res = autogradF.hvp(foo, (inp, 2), v) - with self.assertRaisesRegex(TypeError, "The outputs of the user-provided function given to hvp must"): + with self.assertRaisesRegex( + TypeError, "The outputs of the user-provided function given to hvp must" + ): res = autogradF.hvp(bar, inp, v) err_msg_out = "The Tensor returned by the function given to hvp should contain a single element" @@ -1228,7 +1515,10 @@ class TestAutogradFunctional(TestCase): with self.assertRaisesRegex(RuntimeError, "v has invalid size:"): res = autogradF.hvp(foo, inp, ctors.rand(5)) - with self.assertRaisesRegex(TypeError, "The v given to hvp must be either a Tensor or a tuple of Tensors"): + with self.assertRaisesRegex( + TypeError, + "The v given to hvp must be either a Tensor or a tuple of Tensors", + ): res = autogradF.hvp(foo, inp, (v, 2)) res = autogradF.hvp(foo, inp, v) @@ -1258,28 +1548,38 @@ class TestAutogradFunctional(TestCase): inp = ctors.rand(4) v = ctors.rand(4) - with self.assertRaisesRegex(RuntimeError, "Output 0 of the user-provided function does not require gradients."): + with self.assertRaisesRegex( + RuntimeError, + "Output 0 of the user-provided function does not require gradients.", + ): res = autogradF.hvp(foo, inp, v, strict=True) res = autogradF.hvp(foo, inp, v, strict=False) self._assert_same_struct(res[1], inp) - self.assertEqual(res[1].abs().sum(), 0.) + self.assertEqual(res[1].abs().sum(), 0.0) - with self.assertRaisesRegex(RuntimeError, "The output of the user-provided function is independent of input 0"): + with self.assertRaisesRegex( + RuntimeError, + "The output of the user-provided function is independent of input 0", + ): res = autogradF.hvp(bar, inp, v, strict=True) res = autogradF.hvp(bar, inp, v, strict=False) self._assert_same_struct(res[1], inp) - self.assertEqual(res[1].abs().sum(), 0.) + self.assertEqual(res[1].abs().sum(), 0.0) - with self.assertRaisesRegex(RuntimeError, "jacobian of the user-provided function with respect to input 0 is"): + with self.assertRaisesRegex( + RuntimeError, + "jacobian of the user-provided function with respect to input 0 is", + ): res = autogradF.hvp(bar2, inp, v, strict=True) res = autogradF.hvp(bar2, inp, v, strict=False) self._assert_same_struct(res[1], inp) - self.assertEqual(res[1].abs().sum(), 0.) + self.assertEqual(res[1].abs().sum(), 0.0) @base_and_logging_tensor def test_hvp_no_grad(self, ctors): def reducer(x): return x.exp().sum() + inputs = ctors.rand(4, 4) v = ctors.ones(4, 4) with torch.no_grad(): @@ -1321,6 +1621,7 @@ class TestAutogradFunctional(TestCase): def test_hvp_scalar(self, ctors): def reducer(x): return x.exp().sum() + inputs = ctors.rand(4, 4) v = ctors.ones(4, 4) res = autogradF.hvp(reducer, inputs, v) @@ -1336,6 +1637,7 @@ class TestAutogradFunctional(TestCase): def bad_reducer(x): return x.exp().sum().view(1, 1, 1) + inputs = ctors.rand(4, 4) v = ctors.rand(4, 4) res = autogradF.hvp(bad_reducer, inputs, v) @@ -1353,24 +1655,38 @@ class TestAutogradFunctional(TestCase): self.assertIsNotNone(res[0].grad_fn) self.assertIsNotNone(res[1].grad_fn) - gradcheck(lambda inp, v: autogradF.hvp(foo, inp, v, create_graph=True), (inputs, v)) - gradgradcheck(lambda inp, v: autogradF.hvp(foo, inp, v, create_graph=True), (inputs, v)) + gradcheck( + lambda inp, v: autogradF.hvp(foo, inp, v, create_graph=True), (inputs, v) + ) + gradgradcheck( + lambda inp, v: autogradF.hvp(foo, inp, v, create_graph=True), (inputs, v) + ) def bar(a, b): return (a + 3 * b.narrow(0, 0, 3)).exp().sum() - inputs = (ctors.rand(3, dtype=torch.double, requires_grad=True), - ctors.rand(4, dtype=torch.double, requires_grad=True)) - v = (ctors.ones(3, dtype=torch.double, requires_grad=True), - ctors.ones(4, dtype=torch.double, requires_grad=True)) + inputs = ( + ctors.rand(3, dtype=torch.double, requires_grad=True), + ctors.rand(4, dtype=torch.double, requires_grad=True), + ) + v = ( + ctors.ones(3, dtype=torch.double, requires_grad=True), + ctors.ones(4, dtype=torch.double, requires_grad=True), + ) out, hvp_val = autogradF.hvp(bar, inputs, v, create_graph=True) self._assert_same_struct(hvp_val, inputs) self.assertIsNotNone(out.grad_fn) self.assertIsNotNone(hvp_val[0].grad_fn) self.assertIsNotNone(hvp_val[1].grad_fn) - gradcheck(lambda *args: autogradF.hvp(bar, args[:2], args[2:], create_graph=True)[1], inputs + v) - gradgradcheck(lambda *args: autogradF.hvp(bar, args[:2], args[2:], create_graph=True)[1], inputs + v) + gradcheck( + lambda *args: autogradF.hvp(bar, args[:2], args[2:], create_graph=True)[1], + inputs + v, + ) + gradgradcheck( + lambda *args: autogradF.hvp(bar, args[:2], args[2:], create_graph=True)[1], + inputs + v, + ) def foo(*args): x, y = args[:2] @@ -1379,7 +1695,13 @@ class TestAutogradFunctional(TestCase): x = x.cos() val, grad = autogradF.hvp(bar, (x, y), v, create_graph=True) - return val.cos() + grad[0].cos().sum() + grad[1].cos() + x.cos().sum() + y.cos() + return ( + val.cos() + + grad[0].cos().sum() + + grad[1].cos() + + x.cos().sum() + + y.cos() + ) gradcheck(foo, inputs + v) gradgradcheck(foo, inputs + v) @@ -1387,7 +1709,7 @@ class TestAutogradFunctional(TestCase): @base_and_logging_tensor def test_jacobian_match_vjp_jvp(self, ctors): def foo(x): - return x ** 3 + x.sum() + return x**3 + x.sum() inputs = ctors.rand(4) v = ctors.rand(4) @@ -1414,7 +1736,8 @@ class TestAutogradFunctional(TestCase): self.assertEqual(hvp, torch.mm(hes, v.unsqueeze(1)).squeeze(1)) self.assertEqual(vhp, torch.mm(v.unsqueeze(0), hes).squeeze(0)) + instantiate_parametrized_tests(TestAutogradFunctional) -if __name__ == '__main__': +if __name__ == "__main__": run_tests() diff --git a/test/backends/xeon/test_launch.py b/test/backends/xeon/test_launch.py index 9e5f4def951a..bab100601521 100644 --- a/test/backends/xeon/test_launch.py +++ b/test/backends/xeon/test_launch.py @@ -1,11 +1,13 @@ # Owner(s): ["module: intel"] -from torch.testing._internal.common_utils import TestCase, run_tests, IS_LINUX import shutil import subprocess import tempfile import unittest +from torch.testing._internal.common_utils import IS_LINUX, run_tests, TestCase + + @unittest.skipIf(not IS_LINUX, "Only works on linux") class TestTorchrun(TestCase): def setUp(self): @@ -37,6 +39,7 @@ class TestTorchrun(TestCase): 15,7,1,1 """ from torch.backends.xeon.run_cpu import _CPUinfo + cpuinfo = _CPUinfo(lscpu_info) assert cpuinfo._physical_core_nums() == 8 assert cpuinfo._logical_core_nums() == 16 @@ -45,21 +48,43 @@ class TestTorchrun(TestCase): assert cpuinfo.get_node_logical_cores(0) == [0, 1, 2, 3, 8, 9, 10, 11] assert cpuinfo.get_node_logical_cores(1) == [4, 5, 6, 7, 12, 13, 14, 15] assert cpuinfo.get_all_physical_cores() == [0, 1, 2, 3, 4, 5, 6, 7] - assert cpuinfo.get_all_logical_cores() == [0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15] + assert cpuinfo.get_all_logical_cores() == [ + 0, + 1, + 2, + 3, + 8, + 9, + 10, + 11, + 4, + 5, + 6, + 7, + 12, + 13, + 14, + 15, + ] assert cpuinfo.numa_aware_check([0, 1, 2, 3]) == [0] assert cpuinfo.numa_aware_check([4, 5, 6, 7]) == [1] assert cpuinfo.numa_aware_check([2, 3, 4, 5]) == [0, 1] def test_multi_threads(self): num = 0 - with subprocess.Popen(f"python -m torch.backends.xeon.run_cpu --ninstances 4 --use-default-allocator \ + with subprocess.Popen( + f"python -m torch.backends.xeon.run_cpu --ninstances 4 --use-default-allocator \ --disable-iomp --disable-numactl --disable-taskset --log-path {self._test_dir} --no-python pwd", - shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as p: + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) as p: for line in p.stdout.readlines(): segs = str(line, "utf-8").strip().split("-") if segs[-1].strip() == "pwd": num += 1 assert num == 4, "Failed to launch multiple instances for inference" + if __name__ == "__main__": run_tests() diff --git a/test/benchmark_utils/test_benchmark_utils.py b/test/benchmark_utils/test_benchmark_utils.py index 2267262551b4..ff3538769e06 100644 --- a/test/benchmark_utils/test_benchmark_utils.py +++ b/test/benchmark_utils/test_benchmark_utils.py @@ -6,19 +6,26 @@ import os import re import textwrap import timeit -from typing import Any, List, Tuple import unittest +from typing import Any, List, Tuple -import torch -import torch.utils.benchmark as benchmark_utils -from torch.testing._internal.common_utils import TestCase, run_tests, IS_SANDCASTLE, IS_WINDOWS, slowTest, TEST_WITH_ASAN import expecttest import numpy as np +import torch +import torch.utils.benchmark as benchmark_utils +from torch.testing._internal.common_utils import ( + IS_SANDCASTLE, + IS_WINDOWS, + run_tests, + slowTest, + TEST_WITH_ASAN, + TestCase, +) + CALLGRIND_ARTIFACTS: str = os.path.join( - os.path.split(os.path.abspath(__file__))[0], - "callgrind_artifacts.json" + os.path.split(os.path.abspath(__file__))[0], "callgrind_artifacts.json" ) @@ -33,13 +40,13 @@ def generate_callgrind_artifacts() -> None: """ print("Regenerating callgrind artifact.") - stats_no_data = benchmark_utils.Timer( - "y = torch.ones(())" - ).collect_callgrind(number=1000) + stats_no_data = benchmark_utils.Timer("y = torch.ones(())").collect_callgrind( + number=1000 + ) - stats_with_data = benchmark_utils.Timer( - "y = torch.ones((1,))" - ).collect_callgrind(number=1000) + stats_with_data = benchmark_utils.Timer("y = torch.ones((1,))").collect_callgrind( + number=1000 + ) user = os.getenv("USER") @@ -59,7 +66,9 @@ def generate_callgrind_artifacts() -> None: json.dump(artifacts, f, indent=4) -def load_callgrind_artifacts() -> Tuple[benchmark_utils.CallgrindStats, benchmark_utils.CallgrindStats]: +def load_callgrind_artifacts() -> ( + Tuple[benchmark_utils.CallgrindStats, benchmark_utils.CallgrindStats] +): """Hermetic artifact to unit test Callgrind wrapper. In addition to collecting counts, this wrapper provides some facilities for @@ -76,8 +85,7 @@ def load_callgrind_artifacts() -> Tuple[benchmark_utils.CallgrindStats, benchmar pattern = re.compile(r"^\s*([0-9]+)\s(.+)$") def to_function_counts( - count_strings: List[str], - inclusive: bool + count_strings: List[str], inclusive: bool ) -> benchmark_utils.FunctionCounts: data: List[benchmark_utils.FunctionCount] = [] for cs in count_strings: @@ -89,8 +97,8 @@ def load_callgrind_artifacts() -> Tuple[benchmark_utils.CallgrindStats, benchmar data.append(benchmark_utils.FunctionCount(count=int(c), function=fn)) return benchmark_utils.FunctionCounts( - tuple(sorted(data, reverse=True)), - inclusive=inclusive) + tuple(sorted(data, reverse=True)), inclusive=inclusive + ) baseline_inclusive = to_function_counts(artifacts["baseline_inclusive"], True) baseline_exclusive = to_function_counts(artifacts["baseline_exclusive"], False) @@ -101,8 +109,12 @@ def load_callgrind_artifacts() -> Tuple[benchmark_utils.CallgrindStats, benchmar built_with_debug_symbols=True, baseline_inclusive_stats=baseline_inclusive, baseline_exclusive_stats=baseline_exclusive, - stmt_inclusive_stats=to_function_counts(artifacts["ones_no_data_inclusive"], True), - stmt_exclusive_stats=to_function_counts(artifacts["ones_no_data_exclusive"], False), + stmt_inclusive_stats=to_function_counts( + artifacts["ones_no_data_inclusive"], True + ), + stmt_exclusive_stats=to_function_counts( + artifacts["ones_no_data_exclusive"], False + ), stmt_callgrind_out=None, ) @@ -112,8 +124,12 @@ def load_callgrind_artifacts() -> Tuple[benchmark_utils.CallgrindStats, benchmar built_with_debug_symbols=True, baseline_inclusive_stats=baseline_inclusive, baseline_exclusive_stats=baseline_exclusive, - stmt_inclusive_stats=to_function_counts(artifacts["ones_with_data_inclusive"], True), - stmt_exclusive_stats=to_function_counts(artifacts["ones_with_data_exclusive"], False), + stmt_inclusive_stats=to_function_counts( + artifacts["ones_with_data_inclusive"], True + ), + stmt_exclusive_stats=to_function_counts( + artifacts["ones_with_data_exclusive"], False + ), stmt_callgrind_out=None, ) @@ -127,14 +143,12 @@ class MyModule(torch.nn.Module): class TestBenchmarkUtils(TestCase): def regularizeAndAssertExpectedInline( - self, x: Any, - expect: str, - indent: int = 12 + self, x: Any, expect: str, indent: int = 12 ) -> None: x_str: str = re.sub( "object at 0x[0-9a-fA-F]+>", "object at 0xXXXXXXXXXXXX>", - x if isinstance(x, str) else repr(x) + x if isinstance(x, str) else repr(x), ) if "\n" in x_str: # Indent makes the reference align at the call site. @@ -157,15 +171,19 @@ class TestBenchmarkUtils(TestCase): median = timer.adaptive_autorange(threshold=0.5).median # Test that multi-line statements work properly. - median = benchmark_utils.Timer( - stmt=""" + median = ( + benchmark_utils.Timer( + stmt=""" with torch.no_grad(): y = x + 1""", - setup=""" + setup=""" x = torch.ones((1,), requires_grad=True) for _ in range(5): x = x + 1.0""", - ).timeit(5).median + ) + .timeit(5) + .median + ) self.assertIsInstance(sample, float) @slowTest @@ -173,7 +191,7 @@ class TestBenchmarkUtils(TestCase): @unittest.skipIf(True, "Failing on clang, see 74398") def test_timer_tiny_fast_snippet(self): timer = benchmark_utils.Timer( - 'auto x = 1;(void)x;', + "auto x = 1;(void)x;", timer=timeit.default_timer, language=benchmark_utils.Language.CPP, ) @@ -222,16 +240,16 @@ class TestBenchmarkUtils(TestCase): return max(self._random_state.normal(mean, mean * noise_level), 5e-9) def timeit(self, number): - return sum([ - # First timer invocation - self.sample(self._timer_cost, self._timer_noise_level), - - # Stmt body - self.sample(self._mean_cost * number, self._function_noise_level), - - # Second timer invocation - self.sample(self._timer_cost, self._timer_noise_level), - ]) + return sum( + [ + # First timer invocation + self.sample(self._timer_cost, self._timer_noise_level), + # Stmt body + self.sample(self._mean_cost * number, self._function_noise_level), + # Second timer invocation + self.sample(self._timer_cost, self._timer_noise_level), + ] + ) def test_adaptive_timer(self): class MockTimer(benchmark_utils.Timer): @@ -245,7 +263,6 @@ class TestBenchmarkUtils(TestCase): _function_costs = ( self._MockTimer._function_costs[0], self._MockTimer._function_costs[1], - # GPU should be faster once there is enough work. ("expensive_fn()", 5e-6), ) @@ -261,7 +278,7 @@ class TestBenchmarkUtils(TestCase): pass Median: 7.98 ns IQR: 0.52 ns (7.74 to 8.26) - 125 measurements, 10000000 runs per measurement, 1 thread""" + 125 measurements, 10000000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -271,7 +288,7 @@ class TestBenchmarkUtils(TestCase): pass Median: 7.86 ns IQR: 0.71 ns (7.63 to 8.34) - 6 measurements, 1000000 runs per measurement, 1 thread""" + 6 measurements, 1000000 runs per measurement, 1 thread""", ) # Check against strings so we can reuse expect infra. @@ -287,7 +304,7 @@ class TestBenchmarkUtils(TestCase): cheap_fn() Median: 3.98 us IQR: 0.27 us (3.85 to 4.12) - 252 measurements, 10000 runs per measurement, 1 thread""" + 252 measurements, 10000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -297,7 +314,7 @@ class TestBenchmarkUtils(TestCase): cheap_fn() Median: 4.16 us IQR: 0.22 us (4.04 to 4.26) - 4 measurements, 1000 runs per measurement, 1 thread""" + 4 measurements, 1000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -307,7 +324,7 @@ class TestBenchmarkUtils(TestCase): expensive_fn() Median: 19.97 us IQR: 1.35 us (19.31 to 20.65) - 501 measurements, 1000 runs per measurement, 1 thread""" + 501 measurements, 1000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -317,7 +334,7 @@ class TestBenchmarkUtils(TestCase): expensive_fn() Median: 20.79 us IQR: 1.09 us (20.20 to 21.29) - 4 measurements, 1000 runs per measurement, 1 thread""" + 4 measurements, 1000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -327,7 +344,7 @@ class TestBenchmarkUtils(TestCase): pass Median: 7.92 ns IQR: 0.43 ns (7.75 to 8.17) - 13 measurements, 100000000 runs per measurement, 1 thread""" + 13 measurements, 100000000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -337,7 +354,7 @@ class TestBenchmarkUtils(TestCase): pass Median: 7.75 ns IQR: 0.57 ns (7.56 to 8.13) - 4 measurements, 10000000 runs per measurement, 1 thread""" + 4 measurements, 10000000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -347,7 +364,7 @@ class TestBenchmarkUtils(TestCase): cheap_fn() Median: 4.04 us IQR: 0.30 us (3.90 to 4.19) - 25 measurements, 100000 runs per measurement, 1 thread""" + 25 measurements, 100000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -357,7 +374,7 @@ class TestBenchmarkUtils(TestCase): cheap_fn() Median: 4.09 us IQR: 0.38 us (3.90 to 4.28) - 4 measurements, 100000 runs per measurement, 1 thread""" + 4 measurements, 100000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -367,7 +384,7 @@ class TestBenchmarkUtils(TestCase): expensive_fn() Median: 4.98 us IQR: 0.31 us (4.83 to 5.13) - 20 measurements, 100000 runs per measurement, 1 thread""" + 20 measurements, 100000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -377,7 +394,7 @@ class TestBenchmarkUtils(TestCase): expensive_fn() Median: 5.01 us IQR: 0.28 us (4.87 to 5.15) - 4 measurements, 10000 runs per measurement, 1 thread""" + 4 measurements, 10000 runs per measurement, 1 thread""", ) # Make sure __repr__ is reasonable for @@ -398,7 +415,7 @@ class TestBenchmarkUtils(TestCase): Median: 10.06 us IQR: 0.54 us (9.73 to 10.27) - 20 measurements, 1000 runs per measurement, 1 thread""" + 20 measurements, 1000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -411,7 +428,7 @@ class TestBenchmarkUtils(TestCase): Median: 10.06 us IQR: 0.54 us (9.73 to 10.27) - 20 measurements, 1000 runs per measurement, 1 thread""" + 20 measurements, 1000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -425,7 +442,7 @@ class TestBenchmarkUtils(TestCase): x + 1 (no grad): scalar_add Median: 10.06 us IQR: 0.54 us (9.73 to 10.27) - 20 measurements, 1000 runs per measurement, 1 thread""" + 20 measurements, 1000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -443,7 +460,7 @@ class TestBenchmarkUtils(TestCase): setup: setup_fn() Median: 10.06 us IQR: 0.54 us (9.73 to 10.27) - 20 measurements, 1000 runs per measurement, 1 thread""" + 20 measurements, 1000 runs per measurement, 1 thread""", ) self.regularizeAndAssertExpectedInline( @@ -471,7 +488,7 @@ class TestBenchmarkUtils(TestCase): Median: 10.06 us IQR: 0.54 us (9.73 to 10.27) - 20 measurements, 1000 runs per measurement, 16 threads""" + 20 measurements, 1000 runs per measurement, 16 threads""", ) @slowTest @@ -482,24 +499,23 @@ class TestBenchmarkUtils(TestCase): with self.assertRaisesRegex( ValueError, r"`collect_callgrind` requires that globals be wrapped " - r"in `CopyIfCallgrind` so that serialization is explicit." + r"in `CopyIfCallgrind` so that serialization is explicit.", ): - benchmark_utils.Timer( - "pass", - globals={"x": 1} - ).collect_callgrind(collect_baseline=False) + benchmark_utils.Timer("pass", globals={"x": 1}).collect_callgrind( + collect_baseline=False + ) with self.assertRaisesRegex( # Subprocess raises AttributeError (from pickle), # _ValgrindWrapper re-raises as generic OSError. - OSError, "AttributeError: Can't get attribute 'MyModule'" + OSError, + "AttributeError: Can't get attribute 'MyModule'", ): benchmark_utils.Timer( "model(1)", - globals={"model": benchmark_utils.CopyIfCallgrind(MyModule())} + globals={"model": benchmark_utils.CopyIfCallgrind(MyModule())}, ).collect_callgrind(collect_baseline=False) - @torch.jit.script def add_one(x): return x + 1 @@ -516,9 +532,9 @@ class TestBenchmarkUtils(TestCase): import sys sys.path.append({repr(os.path.split(os.path.abspath(__file__))[0])}) from test_benchmark_utils import MyModule - """ - ) - } + """, + ), + }, ) stats = timer.collect_callgrind(number=1000) @@ -538,13 +554,22 @@ class TestBenchmarkUtils(TestCase): assert isinstance(stats, tuple) # Check that the repeats are at least somewhat repeatable. (within 10 instructions per iter) - counts = collections.Counter([s.counts(denoise=True) // 10_000 * 10_000 for s in stats]) - self.assertGreater(max(counts.values()), 1, f"Every instruction count total was unique: {counts}") + counts = collections.Counter( + [s.counts(denoise=True) // 10_000 * 10_000 for s in stats] + ) + self.assertGreater( + max(counts.values()), + 1, + f"Every instruction count total was unique: {counts}", + ) + + from torch.utils.benchmark.utils.valgrind_wrapper.timer_interface import ( + wrapper_singleton, + ) - from torch.utils.benchmark.utils.valgrind_wrapper.timer_interface import wrapper_singleton self.assertIsNone( wrapper_singleton()._bindings_module, - "JIT'd bindings are only for back testing." + "JIT'd bindings are only for back testing.", ) @slowTest @@ -558,28 +583,29 @@ class TestBenchmarkUtils(TestCase): timer=timeit.default_timer, language="c++", ) - stats = [ - timer.collect_callgrind() - for _ in range(3) - ] + stats = [timer.collect_callgrind() for _ in range(3)] counts = [s.counts() for s in stats] - self.assertGreater( - min(counts), 0, "No stats were collected") + self.assertGreater(min(counts), 0, "No stats were collected") self.assertEqual( - min(counts), max(counts), "C++ Callgrind should be deterministic") + min(counts), max(counts), "C++ Callgrind should be deterministic" + ) for s in stats: self.assertEqual( - s.counts(denoise=True), s.counts(denoise=False), - "De-noising should not apply to C++.") + s.counts(denoise=True), + s.counts(denoise=False), + "De-noising should not apply to C++.", + ) stats = timer.collect_callgrind(number=1000, repeats=20) assert isinstance(stats, tuple) # NB: Unlike the example above, there is no expectation that all # repeats will be identical. - counts = collections.Counter([s.counts(denoise=True) // 10_000 * 10_000 for s in stats]) + counts = collections.Counter( + [s.counts(denoise=True) // 10_000 * 10_000 for s in stats] + ) self.assertGreater(max(counts.values()), 1, repr(counts)) def test_manipulate_callgrind_stats(self): @@ -587,7 +613,8 @@ class TestBenchmarkUtils(TestCase): # Mock `torch.set_printoptions(linewidth=160)` wide_linewidth = benchmark_utils.FunctionCounts( - stats_no_data.stats(inclusive=False)._data, False, _linewidth=160) + stats_no_data.stats(inclusive=False)._data, False, _linewidth=160 + ) for l in repr(wide_linewidth).splitlines(keepends=False): self.assertLessEqual(len(l), 160) @@ -595,10 +622,12 @@ class TestBenchmarkUtils(TestCase): self.assertEqual( # `delta` is just a convenience method. stats_with_data.delta(stats_no_data)._data, - (stats_with_data.stats() - stats_no_data.stats())._data + (stats_with_data.stats() - stats_no_data.stats())._data, ) - deltas = stats_with_data.as_standardized().delta(stats_no_data.as_standardized()) + deltas = stats_with_data.as_standardized().delta( + stats_no_data.as_standardized() + ) def custom_transforms(fn: str): fn = re.sub(re.escape("/usr/include/c++/8/bits/"), "", fn) @@ -703,7 +732,7 @@ class TestBenchmarkUtils(TestCase): 2000 /usr/include/c++/8/bits/atomic_base.h:at::Tensor at::detail::make_tensor ... t_null_type >&&, c10::DispatchKey&&, caffe2::TypeMeta&) 2000 /usr/include/c++/8/array:at::Tensor& c10::Dispatcher::callWithDispatchKe ... , c10::Scalar)> const&, c10::DispatchKey, at::Tensor&, c10::Scalar) const - Total: 8869966""" # noqa: B950 + Total: 8869966""", # noqa: B950 ) self.regularizeAndAssertExpectedInline( @@ -821,10 +850,8 @@ class TestBenchmarkUtils(TestCase): costs = ( # overhead_optimized_fn() (1e-6, 1e-9), - # compute_optimized_fn() (3e-6, 5e-10), - # special_case_fn() [square inputs only] (1e-6, 4e-10), ) @@ -840,8 +867,7 @@ class TestBenchmarkUtils(TestCase): # overhead_optimized_fn() class _MockTimer_0(self._MockTimer): _function_costs = tuple( - (f"fn({i}, {j})", costs[0][0] + costs[0][1] * i * j) - for i, j in sizes + (f"fn({i}, {j})", costs[0][0] + costs[0][1] * i * j) for i, j in sizes ) class MockTimer_0(benchmark_utils.Timer): @@ -850,8 +876,7 @@ class TestBenchmarkUtils(TestCase): # compute_optimized_fn() class _MockTimer_1(self._MockTimer): _function_costs = tuple( - (f"fn({i}, {j})", costs[1][0] + costs[1][1] * i * j) - for i, j in sizes + (f"fn({i}, {j})", costs[1][0] + costs[1][1] * i * j) for i, j in sizes ) class MockTimer_1(benchmark_utils.Timer): @@ -861,7 +886,8 @@ class TestBenchmarkUtils(TestCase): class _MockTimer_2(self._MockTimer): _function_costs = tuple( (f"fn({i}, {j})", costs[2][0] + costs[2][1] * i * j) - for i, j in sizes if i == j + for i, j in sizes + if i == j ) class MockTimer_2(benchmark_utils.Timer): @@ -913,7 +939,7 @@ class TestBenchmarkUtils(TestCase): compute_optimized | 3.1 | 4.0 | 11.2 | 2099.3 | 2099.3 special_case (square) | 1.1 | | 7.5 | | 1674.7 - Times are in microseconds (us).""" + Times are in microseconds (us).""", ) compare.trim_significant_figures() @@ -927,7 +953,7 @@ class TestBenchmarkUtils(TestCase): compute_optimized | 3 | 4.0 | 11 | 2100 | 2100 special_case (square) | 1 | | 8 | | 1700 - Times are in microseconds (us).""" + Times are in microseconds (us).""", ) compare.colorize() @@ -978,12 +1004,16 @@ class TestBenchmarkUtils(TestCase): self.assertEqual(columnwise_colored_actual, columnwise_colored_expected) self.assertEqual(rowwise_colored_actual, rowwise_colored_expected) - @unittest.skipIf(IS_WINDOWS and os.getenv("VC_YEAR") == "2019", "Random seed only accepts int32") + @unittest.skipIf( + IS_WINDOWS and os.getenv("VC_YEAR") == "2019", "Random seed only accepts int32" + ) def test_fuzzer(self): fuzzer = benchmark_utils.Fuzzer( parameters=[ benchmark_utils.FuzzedParameter( - "n", minval=1, maxval=16, distribution="loguniform")], + "n", minval=1, maxval=16, distribution="loguniform" + ) + ], tensors=[benchmark_utils.FuzzedTensor("x", size=("n",))], seed=0, ) @@ -995,9 +1025,8 @@ class TestBenchmarkUtils(TestCase): for i, (tensors, _, _) in enumerate(fuzzer.take(2)): x = tensors["x"] - self.assertEqual( - x, torch.tensor(expected_results[i]), rtol=1e-3, atol=1e-3) + self.assertEqual(x, torch.tensor(expected_results[i]), rtol=1e-3, atol=1e-3) -if __name__ == '__main__': +if __name__ == "__main__": run_tests() diff --git a/test/bottleneck_test/test_args.py b/test/bottleneck_test/test_args.py index 6c00920841f8..f84260b7daea 100644 --- a/test/bottleneck_test/test_args.py +++ b/test/bottleneck_test/test_args.py @@ -1,14 +1,15 @@ # Owner(s): ["module: unknown"] import argparse + import torch -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() # Required args. Raises error if they aren't passed. - parser.add_argument('--foo', help='foo', required=True) - parser.add_argument('--bar', help='bar', required=True) + parser.add_argument("--foo", help="foo", required=True) + parser.add_argument("--bar", help="bar", required=True) _ = parser.parse_args() x = torch.ones((3, 3), requires_grad=True) diff --git a/test/bottleneck_test/test_cuda.py b/test/bottleneck_test/test_cuda.py index 65bbcac0f015..012b61daaa45 100644 --- a/test/bottleneck_test/test_cuda.py +++ b/test/bottleneck_test/test_cuda.py @@ -25,5 +25,5 @@ def main(): optimizer.step() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/test/conftest.py b/test/conftest.py index 6253ca7c0ba8..9ba728689285 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,25 +1,24 @@ -from _pytest.junitxml import LogXML, _NodeReporter, bin_xml_escape -from _pytest.terminal import _get_raw_skip_reason -from _pytest.stash import StashKey -from _pytest.reports import TestReport -from _pytest.config.argparsing import Parser -from _pytest.config import filename_arg -from _pytest.config import Config -from _pytest._code.code import ReprFileLocation -from _pytest.python import Module -from typing import Any, List, Union -from typing import Optional -from types import MethodType -import xml.etree.ElementTree as ET -import functools -import pytest -import sys -import os import copy +import functools import json +import os import re +import sys +import xml.etree.ElementTree as ET from collections import defaultdict -from pytest_shard_custom import PytestShardPlugin, pytest_addoptions as shard_addoptions +from types import MethodType +from typing import Any, List, Optional, Union + +import pytest +from _pytest._code.code import ReprFileLocation +from _pytest.config import Config, filename_arg +from _pytest.config.argparsing import Parser +from _pytest.junitxml import _NodeReporter, bin_xml_escape, LogXML +from _pytest.python import Module +from _pytest.reports import TestReport +from _pytest.stash import StashKey +from _pytest.terminal import _get_raw_skip_reason +from pytest_shard_custom import pytest_addoptions as shard_addoptions, PytestShardPlugin # a lot of this file is copied from _pytest.junitxml and modified to get rerun info @@ -42,7 +41,7 @@ def pytest_addoption(parser: Parser) -> None: dest="stepcurrent", ) - parser.addoption("--use-main-module", action='store_true') + parser.addoption("--use-main-module", action="store_true") group = parser.getgroup("terminal reporting") group.addoption( "--junit-xml-reruns", @@ -143,11 +142,14 @@ class _NodeReporterReruns(_NodeReporter): skipreason = skipreason[9:] details = f"{filename}:{lineno}: {skipreason}" - skipped = ET.Element("skipped", type="pytest.skip", message=bin_xml_escape(skipreason)) + skipped = ET.Element( + "skipped", type="pytest.skip", message=bin_xml_escape(skipreason) + ) skipped.text = bin_xml_escape(details) self.append(skipped) self.write_captured_output(report) + class LogXMLReruns(LogXML): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -223,7 +225,7 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config): def pytest_pycollect_makemodule(module_path, path, parent) -> Module: if parent.config.getoption("--use-main-module"): mod = Module.from_parent(parent, path=module_path) - mod._getobj = MethodType(lambda x: sys.modules['__main__'], mod) + mod._getobj = MethodType(lambda x: sys.modules["__main__"], mod) return mod @@ -275,7 +277,10 @@ def pytest_collection_modifyitems(items: List[Any]) -> None: test_name = item.name test_class = item.parent.name - if test_class not in disabled_tests or test_name not in disabled_tests[test_class]: + if ( + test_class not in disabled_tests + or test_name not in disabled_tests[test_class] + ): continue cpy = copy.copy(item) diff --git a/test/cpp/aot_inductor/test.py b/test/cpp/aot_inductor/test.py index cb75bd56e53c..65b05f619294 100644 --- a/test/cpp/aot_inductor/test.py +++ b/test/cpp/aot_inductor/test.py @@ -1,10 +1,10 @@ - import torch from torch._export import aot_compile from torch.export import Dim torch.manual_seed(1337) + class Net(torch.nn.Module): def __init__(self, device): super().__init__() @@ -17,6 +17,7 @@ class Net(torch.nn.Module): w = w_relu + self.w_add return torch.matmul(x, w) + class NetWithTensorConstants(torch.nn.Module): def __init__(self): super().__init__() @@ -26,9 +27,11 @@ class NetWithTensorConstants(torch.nn.Module): z = self.w * x * y return z[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17]] + data = {} data_with_tensor_constants = {} + # Basice AOTI model test generation. def generate_basic_tests(): for device in ["cpu", "cuda"]: @@ -49,18 +52,24 @@ def generate_basic_tests(): model, (x,), dynamic_shapes=dynamic_shapes, - options={"aot_inductor.use_runtime_constant_folding": use_runtime_constant_folding}) + options={ + "aot_inductor.use_runtime_constant_folding": use_runtime_constant_folding + }, + ) suffix = f"{device}" if use_runtime_constant_folding: suffix += "_use_runtime_constant_folding" - data.update({ - f"model_so_path_{suffix}": model_so_path, - f"inputs_{suffix}": [x], - f"outputs_{suffix}": [ref_output], - f"w_pre_{suffix}": model.w_pre, - f"w_add_{suffix}": model.w_add, - }) + data.update( + { + f"model_so_path_{suffix}": model_so_path, + f"inputs_{suffix}": [x], + f"outputs_{suffix}": [ref_output], + f"w_pre_{suffix}": model.w_pre, + f"w_add_{suffix}": model.w_add, + } + ) + # AOTI model which will create additional tensors during autograd. def generate_test_with_additional_tensors(): @@ -72,20 +81,22 @@ def generate_test_with_additional_tensors(): torch._dynamo.reset() with torch.no_grad(): - model_so_path = aot_compile( - model, - (x, y)) + model_so_path = aot_compile(model, (x, y)) + + data_with_tensor_constants.update( + { + "model_so_path": model_so_path, + "inputs": [x, y], + "outputs": [ref_output], + "w": model.w, + } + ) - data_with_tensor_constants.update({ - "model_so_path": model_so_path, - "inputs": [x, y], - "outputs": [ref_output], - "w": model.w, - }) generate_basic_tests() generate_test_with_additional_tensors() + # Use this to communicate tensors to the cpp code class Serializer(torch.nn.Module): def __init__(self, data): @@ -93,5 +104,8 @@ class Serializer(torch.nn.Module): for key in data: setattr(self, key, data[key]) + torch.jit.script(Serializer(data)).save("data.pt") -torch.jit.script(Serializer(data_with_tensor_constants)).save("data_with_tensor_constants.pt") +torch.jit.script(Serializer(data_with_tensor_constants)).save( + "data_with_tensor_constants.pt" +) diff --git a/test/cpp/api/init_baseline.py b/test/cpp/api/init_baseline.py index 9ed88f7c226c..0fb09acb143b 100644 --- a/test/cpp/api/init_baseline.py +++ b/test/cpp/api/init_baseline.py @@ -1,6 +1,7 @@ """Script to generate baseline values from PyTorch initialization algorithms""" import sys + import torch HEADER = """ @@ -19,13 +20,13 @@ INITIALIZERS = { "Xavier_Uniform": lambda w: torch.nn.init.xavier_uniform(w), "Xavier_Normal": lambda w: torch.nn.init.xavier_normal(w), "Kaiming_Normal": lambda w: torch.nn.init.kaiming_normal(w), - "Kaiming_Uniform": lambda w: torch.nn.init.kaiming_uniform(w) + "Kaiming_Uniform": lambda w: torch.nn.init.kaiming_uniform(w), } def emit(initializer_parameter_map): # Don't write generated with an @ in front, else this file is recognized as generated. - print("// @{} from {}".format('generated', __file__)) + print("// @{} from {}".format("generated", __file__)) print(HEADER) for initializer_name, weights in initializer_parameter_map.items(): print(PARAMETERS.format(initializer_name)) @@ -63,10 +64,11 @@ def run(initializer): def main(): initializer_parameter_map = {} for initializer in INITIALIZERS.keys(): - sys.stderr.write(f'Evaluating {initializer} ...\n') + sys.stderr.write(f"Evaluating {initializer} ...\n") initializer_parameter_map[initializer] = run(initializer) emit(initializer_parameter_map) + if __name__ == "__main__": main() diff --git a/test/cpp/api/optim_baseline.py b/test/cpp/api/optim_baseline.py index 16d2508ab41c..7e278d4e4208 100644 --- a/test/cpp/api/optim_baseline.py +++ b/test/cpp/api/optim_baseline.py @@ -21,27 +21,43 @@ FOOTER = "} // namespace expected_parameters" PARAMETERS = "inline std::vector> {}() {{" OPTIMIZERS = { - "LBFGS" : lambda p: torch.optim.LBFGS(p, 1.0), - "LBFGS_with_line_search" : lambda p: torch.optim.LBFGS(p, 1.0, line_search_fn="strong_wolfe"), + "LBFGS": lambda p: torch.optim.LBFGS(p, 1.0), + "LBFGS_with_line_search": lambda p: torch.optim.LBFGS( + p, 1.0, line_search_fn="strong_wolfe" + ), "Adam": lambda p: torch.optim.Adam(p, 1.0), "Adam_with_weight_decay": lambda p: torch.optim.Adam(p, 1.0, weight_decay=1e-2), - "Adam_with_weight_decay_and_amsgrad": lambda p: torch.optim.Adam(p, 1.0, weight_decay=1e-6, amsgrad=True), + "Adam_with_weight_decay_and_amsgrad": lambda p: torch.optim.Adam( + p, 1.0, weight_decay=1e-6, amsgrad=True + ), "AdamW": lambda p: torch.optim.AdamW(p, 1.0), "AdamW_without_weight_decay": lambda p: torch.optim.AdamW(p, 1.0, weight_decay=0), "AdamW_with_amsgrad": lambda p: torch.optim.AdamW(p, 1.0, amsgrad=True), "Adagrad": lambda p: torch.optim.Adagrad(p, 1.0), - "Adagrad_with_weight_decay": lambda p: torch.optim.Adagrad(p, 1.0, weight_decay=1e-2), - "Adagrad_with_weight_decay_and_lr_decay": lambda p: torch.optim.Adagrad(p, 1.0, weight_decay=1e-6, lr_decay=1e-3), + "Adagrad_with_weight_decay": lambda p: torch.optim.Adagrad( + p, 1.0, weight_decay=1e-2 + ), + "Adagrad_with_weight_decay_and_lr_decay": lambda p: torch.optim.Adagrad( + p, 1.0, weight_decay=1e-6, lr_decay=1e-3 + ), "RMSprop": lambda p: torch.optim.RMSprop(p, 0.1), - "RMSprop_with_weight_decay": lambda p: torch.optim.RMSprop(p, 0.1, weight_decay=1e-2), - "RMSprop_with_weight_decay_and_centered": lambda p: torch.optim.RMSprop(p, 0.1, weight_decay=1e-6, centered=True), - "RMSprop_with_weight_decay_and_centered_and_momentum": - lambda p: torch.optim.RMSprop(p, 0.1, weight_decay=1e-6, centered=True, momentum=0.9), + "RMSprop_with_weight_decay": lambda p: torch.optim.RMSprop( + p, 0.1, weight_decay=1e-2 + ), + "RMSprop_with_weight_decay_and_centered": lambda p: torch.optim.RMSprop( + p, 0.1, weight_decay=1e-6, centered=True + ), + "RMSprop_with_weight_decay_and_centered_and_momentum": lambda p: torch.optim.RMSprop( + p, 0.1, weight_decay=1e-6, centered=True, momentum=0.9 + ), "SGD": lambda p: torch.optim.SGD(p, 0.1), "SGD_with_weight_decay": lambda p: torch.optim.SGD(p, 0.1, weight_decay=1e-2), - "SGD_with_weight_decay_and_momentum": lambda p: torch.optim.SGD(p, 0.1, momentum=0.9, weight_decay=1e-2), - "SGD_with_weight_decay_and_nesterov_momentum": - lambda p: torch.optim.SGD(p, 0.1, momentum=0.9, weight_decay=1e-6, nesterov=True), + "SGD_with_weight_decay_and_momentum": lambda p: torch.optim.SGD( + p, 0.1, momentum=0.9, weight_decay=1e-2 + ), + "SGD_with_weight_decay_and_nesterov_momentum": lambda p: torch.optim.SGD( + p, 0.1, momentum=0.9, weight_decay=1e-6, nesterov=True + ), } @@ -75,11 +91,11 @@ def run(optimizer_name, iterations, sample_every): loss.backward() def closure(): - return torch.tensor([10.]) + return torch.tensor([10.0]) + optimizer.step(closure) if i % sample_every == 0: - values.append( [p.clone().flatten().data.numpy() for p in model.parameters()] ) @@ -89,7 +105,7 @@ def run(optimizer_name, iterations, sample_every): def emit(optimizer_parameter_map): # Don't write generated with an @ in front, else this file is recognized as generated. - print("// @{} from {}".format('generated', __file__)) + print("// @{} from {}".format("generated", __file__)) print(HEADER) for optimizer_name, parameters in optimizer_parameter_map.items(): print(PARAMETERS.format(optimizer_name)) @@ -115,7 +131,7 @@ def main(): optimizer_parameter_map = {} for optimizer in OPTIMIZERS.keys(): - sys.stderr.write(f'Evaluating {optimizer} ...\n') + sys.stderr.write(f"Evaluating {optimizer} ...\n") optimizer_parameter_map[optimizer] = run( optimizer, options.iterations, options.sample_every ) diff --git a/test/cpp/jit/tests_setup.py b/test/cpp/jit/tests_setup.py index b4643927a978..1edb19f21e62 100644 --- a/test/cpp/jit/tests_setup.py +++ b/test/cpp/jit/tests_setup.py @@ -1,5 +1,6 @@ -import sys import os +import sys + import torch @@ -21,7 +22,7 @@ class FileSetup: class EvalModeForLoadedModule(FileSetup): - path = 'dropout_model.pt' + path = "dropout_model.pt" def setup(self): class Model(torch.jit.ScriptModule): @@ -40,7 +41,7 @@ class EvalModeForLoadedModule(FileSetup): class SerializationInterop(FileSetup): - path = 'ivalue.pt' + path = "ivalue.pt" def setup(self): ones = torch.ones(2, 2) @@ -53,7 +54,7 @@ class SerializationInterop(FileSetup): # See testTorchSaveError in test/cpp/jit/tests.h for usage class TorchSaveError(FileSetup): - path = 'eager_value.pt' + path = "eager_value.pt" def setup(self): ones = torch.ones(2, 2) @@ -63,8 +64,9 @@ class TorchSaveError(FileSetup): torch.save(value, self.path, _use_new_zipfile_serialization=False) + class TorchSaveJitStream_CUDA(FileSetup): - path = 'saved_stream_model.pt' + path = "saved_stream_model.pt" def setup(self): if not torch.cuda.is_available(): @@ -77,7 +79,9 @@ class TorchSaveJitStream_CUDA(FileSetup): b = torch.rand(3, 4, device="cuda") with torch.cuda.stream(s): - is_stream_s = torch.cuda.current_stream(s.device_index()).id() == s.id() + is_stream_s = ( + torch.cuda.current_stream(s.device_index()).id() == s.id() + ) c = torch.cat((a, b), 0).to("cuda") s.synchronize() return is_stream_s, a, b, c @@ -93,9 +97,10 @@ tests = [ EvalModeForLoadedModule(), SerializationInterop(), TorchSaveError(), - TorchSaveJitStream_CUDA() + TorchSaveJitStream_CUDA(), ] + def setup(): for test in tests: test.setup()