[BE] Enable ruff's UP rules and autoformat benchmarks/ (#105429)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/105429 Approved by: https://github.com/malfet
2025-10-20 21:14:14 +08:00 · 2023-07-19 02:04:18 +00:00
parent 9c225c9b9a
commit 5ef023b05a
43 changed files with 129 additions and 151 deletions
--- a/benchmarks/compare-fastrnn-results.py
+++ b/benchmarks/compare-fastrnn-results.py
@ -7,7 +7,7 @@ Result = namedtuple("Result", ["name", "base_time", "diff_time"])
 def construct_name(fwd_bwd, test_name):
    bwd = 'backward' in fwd_bwd
    suite_name = fwd_bwd.replace('-backward', '')
-    return '{suite}[{test}]:{fwd_bwd}'.format(suite=suite_name, test=test_name, fwd_bwd='bwd' if bwd else 'fwd')
+    return f"{suite_name}[{test_name}]:{'bwd' if bwd else 'fwd'}"

 def get_times(json_data):
    r = {}
@ -23,9 +23,9 @@ parser.add_argument('diff', help='diff json file')
 parser.add_argument('--format', default='md', type=str, help='output format (csv, md, json, table)')
 args = parser.parse_args()

-with open(args.base, "r") as base:
+with open(args.base) as base:
    base_times = get_times(json.load(base))
-with open(args.diff, "r") as diff:
+with open(args.diff) as diff:
    diff_times = get_times(json.load(diff))

 all_keys = set(base_times.keys()).union(diff_times.keys())
--- a/benchmarks/cpp/tensorexpr/bench_ops.py
+++ b/benchmarks/cpp/tensorexpr/bench_ops.py
@ -46,7 +46,7 @@ unary_ops = [
    torch.lgamma,
 ]

-print("{:20s} {:>10s} {:>10s} {:>10s}".format("op", "eager", "nnc", "speedup"))
+print(f"{'op':20s} {'eager':>10s} {'nnc':>10s} {'speedup':>10s}")

 for op in unary_ops:
    x = torch.rand((1024, 1024))
@ -69,7 +69,7 @@ for op in unary_ops:

 def test_batch_norm():
    op = F.batch_norm
-    print("{:20s} {:20s} {:>10s} {:>10s} {:>10s}".format("op", "shape", "eager", "nnc", "speedup"))
+    print(f"{'op':20s} {'shape':20s} {'eager':>10s} {'nnc':>10s} {'speedup':>10s}")
    batch_norm_shapes = [
        [1, 64, 112, 112],
        [1, 256, 14, 14],
@ -83,8 +83,8 @@ def test_batch_norm():
        [5, 512, 7, 7]]
    for n, c, h, w in batch_norm_shapes:
        x = torch.rand((n, c, h, w))
-        y = torch.rand((c))
-        z = torch.rand((c))
+        y = torch.rand(c)
+        z = torch.rand(c)
        traced = torch.jit.trace(lambda x, y, z: op(x, y, z), (x, y, z))

        # Warmup.
--- a/benchmarks/distributed/ddp/benchmark.py
+++ b/benchmarks/distributed/ddp/benchmark.py
@ -102,7 +102,7 @@ def sweep(benchmark):
    benchmarks = []

    def append_benchmark(prefix, ranks, opts=None):
-        prefix = "%4d GPUs -- %s" % (len(ranks), prefix)
+        prefix = f"{len(ranks):4} GPUs -- {prefix}"
        benchmarks.append((prefix, ranks, opts))

    def local_print(msg):
@ -181,7 +181,7 @@ class TorchvisionBenchmark(Benchmark):
        self.model = model

    def __str__(self):
-        return "{} with batch size {}".format(self.model, self.batch_size)
+        return f"{self.model} with batch size {self.batch_size}"

    def create_model(self):
        return torchvision.models.__dict__[self.model]().to(self.device)
@ -212,7 +212,7 @@ def main():
    # metadata, like measurements. Not for benchmarking itself.
    dist.init_process_group(
        backend="gloo",
-        init_method="tcp://{}:{}".format(args.master_addr, args.master_port),
+        init_method=f"tcp://{args.master_addr}:{args.master_port}",
        rank=args.rank,
        world_size=args.world_size,
    )
@ -227,10 +227,10 @@ def main():
        print("PyTorch distributed benchmark suite")
        print("-----------------------------------")
        print("")
-        print("* PyTorch version: {}".format(torch.__version__))
-        print("* CUDA version: {}".format(torch.version.cuda))
-        print("* Distributed backend: {}".format(args.distributed_backend))
-        print("* Maximum bucket size: {}MB".format(args.bucket_size))
+        print(f"* PyTorch version: {torch.__version__}")
+        print(f"* CUDA version: {torch.version.cuda}")
+        print(f"* Distributed backend: {args.distributed_backend}")
+        print(f"* Maximum bucket size: {args.bucket_size}MB")
        print("")
        print("--- nvidia-smi topo -m ---")
        print("")
@ -261,7 +261,7 @@ def main():
    benchmark_results = []
    for benchmark in benchmarks:
        if args.rank == 0:
-            print("\nBenchmark: {}".format(str(benchmark)))
+            print(f"\nBenchmark: {str(benchmark)}")
        result = sweep(benchmark)
        benchmark_results.append({
            "model": benchmark.model,
--- a/benchmarks/distributed/ddp/diff.py
+++ b/benchmarks/distributed/ddp/diff.py
@ -10,7 +10,7 @@ import numpy as np


 def load(path):
-    with open(path, 'r') as f:
+    with open(path) as f:
        return json.load(f)


@ -26,12 +26,12 @@ def main():
    jb = load(args.file[1])

    keys = (set(ja.keys()) | set(jb.keys())) - {"benchmark_results"}
-    print("{:20s} {:>20s}      {:>20s}".format("", "baseline", "test"))
-    print("{:20s} {:>20s}      {:>20s}".format("", "-" * 20, "-" * 20))
+    print(f"{'':20s} {'baseline':>20s}      {'test':>20s}")
+    print(f"{'':20s} {'-' * 20:>20s}      {'-' * 20:>20s}")
    for key in sorted(keys):
        va = str(ja.get(key, "-"))
        vb = str(jb.get(key, "-"))
-        print("{:20s} {:>20s}  vs  {:>20s}".format(key + ":", va, vb))
+        print(f"{key + ':':20s} {va:>20s}  vs  {vb:>20s}")
    print("")

    ba = ja["benchmark_results"]
@ -44,14 +44,14 @@ def main():

        model = ra["model"]
        batch_size = int(ra["batch_size"])
-        name = "{} with batch size {}".format(model, batch_size)
-        print("Benchmark: {}".format(name))
+        name = f"{model} with batch size {batch_size}"
+        print(f"Benchmark: {name}")

        # Print header
        print("")
-        print("{:>10s}".format(""), end='')  # noqa: E999
+        print(f"{'':>10s}", end='')  # noqa: E999
        for _ in [75, 95]:
-            print("{:>16s}{:>10s}{:>10s}".format("sec/iter", "ex/sec", "diff"), end='')  # noqa: E999
+            print(f"{'sec/iter':>16s}{'ex/sec':>10s}{'diff':>10s}", end='')  # noqa: E999
        print("")

        # Print measurements
@ -66,13 +66,13 @@ def main():
            ngpus = len(xa["ranks"])
            ma = sorted(xa["measurements"])
            mb = sorted(xb["measurements"])
-            print("{:>4d} GPUs:".format(ngpus), end='')  # noqa: E999
+            print(f"{ngpus:>4d} GPUs:", end='')  # noqa: E999
            for p in [75, 95]:
                va = np.percentile(ma, p)
                vb = np.percentile(mb, p)
                # We're measuring time, so lower is better (hence the negation)
                delta = -100 * ((vb - va) / va)
-                print("  p{:02d}: {:8.3f}s {:7d}/s {:+8.1f}%".format(p, vb, int(batch_size / vb), delta), end='')  # noqa: E999
+                print(f"  p{p:02d}: {vb:8.3f}s {int(batch_size / vb):7d}/s {delta:+8.1f}%", end='')  # noqa: E999
            print("")
        print("")

--- a/benchmarks/distributed/pipeline/pipe.py
+++ b/benchmarks/distributed/pipeline/pipe.py
@ -16,7 +16,7 @@ from torch.optim import Adam
 def sizeof_fmt(num, suffix='B'):
    for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti']:
        if abs(num) < 1024.0:
-            return "%3.2f%sB" % (num, unit)
+            return f"{num:3.2f}{unit}B"
        num /= 1024.0


@ -146,7 +146,7 @@ def train(lm_dataloader, model, criterion, optimizer, vocab_size, args):
            return torch.cuda.current_device()


-    print('Number of parameters for model: {}'.format(sum(p.numel() for p in model.parameters())))
+    print(f'Number of parameters for model: {sum(p.numel() for p in model.parameters())}')
    for i, batch in enumerate(lm_dataloader):
        bi = batch["input"]
        if args.max_batch and i > args.max_batch:
@ -186,9 +186,7 @@ def train(lm_dataloader, model, criterion, optimizer, vocab_size, args):

    print('Peak memory usage for GPUs: ', end='')
    for i in range(len(model.devices)):
-        print("cuda:{}: {}, ".format(
-            i,
-            sizeof_fmt(torch.cuda.memory_stats(i)["allocated_bytes.all.peak"])), end='')
+        print(f"cuda:{i}: {sizeof_fmt(torch.cuda.memory_stats(i)['allocated_bytes.all.peak'])}, ", end='')
    print()


--- a/benchmarks/distributed/rpc/parameter_server/launcher.py
+++ b/benchmarks/distributed/rpc/parameter_server/launcher.py
@ -355,14 +355,14 @@ def run_benchmark(rank, args, data):
    rpc.shutdown()


-def get_json_config(file_name, id):
+def get_json_config(file_name: str, id: str):
    r"""
    A function that loads a json configuration from a file.
    Args:
        file_name (str): name of configuration file to load
        id (str): configuration that will be loaded
    """
-    with open(os.path.join(Path(__file__).parent, file_name), "r") as f:
+    with open(Path(__file__).parent / file_name) as f:
        json_config = json.load(f)[id]
    return json_config

--- a/benchmarks/distributed/rpc/rl/coordinator.py
+++ b/benchmarks/distributed/rpc/rl/coordinator.py
@ -102,7 +102,7 @@ class CoordinatorBase:
                             'observer throughput': {}}


-        print("For batch size {0}".format(self.batch_size))
+        print(f"For batch size {self.batch_size}")
        print("\nAgent Latency - ", len(agent_latency_final))
        agent_latency_final = sorted(agent_latency_final)
        for p in [50, 75, 90, 95]:
--- a/benchmarks/dynamo/_onnx/reporter.py
+++ b/benchmarks/dynamo/_onnx/reporter.py
@ -22,7 +22,7 @@ logging.basicConfig(level=logging.INFO)
 _COMPACT_ERROR_GROUP = False


-class ErrorAggregator(object):
+class ErrorAggregator:
    """
    Collect and group error messages for report at the end.

@ -47,7 +47,7 @@ class ErrorAggregator(object):
    ]

    def __init__(self, log: Optional[logging.Logger] = None):
-        super(ErrorAggregator, self).__init__()
+        super().__init__()
        self.error_groups = []
        self.bigram_to_group_ids = collections.defaultdict(list)
        self.log = log or logging.getLogger(__name__)
@ -141,7 +141,7 @@ class ErrorAggregator(object):
        return sum(map(len, self.error_groups))


-class ErrorAggregatorDict(object):
+class ErrorAggregatorDict:
    """
    Collect error types and individually group their error messages for a debug report at the end.

@ -152,7 +152,7 @@ class ErrorAggregatorDict(object):
    """

    def __init__(self):
-        super(ErrorAggregatorDict, self).__init__()
+        super().__init__()
        self.aggregator: Dict[str, ErrorAggregator] = dict()

    def __getitem__(self, item: str):
@ -179,7 +179,7 @@ class ErrorAggregatorDict(object):
            log.exception("%s error from %s", error_type, module)


-class ExportErrorCsvParser(object):
+class ExportErrorCsvParser:
    """Parses `*_export_error.csv` produced by onnxbench, aggregates errors and produces report.

    Two types of aggregations are performed.
@ -310,7 +310,7 @@ class ExportErrorRow:
        return [getattr(self, field.name) for field in dataclasses.fields(self)]


-class ExportErrorParser(object):
+class ExportErrorParser:
    def __init__(self, device: str, model_name: str, batch_size: int):
        self.device = device
        self.model_name = model_name
--- a/benchmarks/dynamo/benchmarks.py
+++ b/benchmarks/dynamo/benchmarks.py
@ -9,7 +9,7 @@ from typing import Set
 # TOOD(voz): Someday, consolidate all the files into one runner instead of a shim like this...
 def model_names(filename: str) -> Set[str]:
    names = set()
-    with open(filename, "r") as fh:
+    with open(filename) as fh:
        lines = fh.readlines()
        lines = [line.rstrip() for line in lines]
        for line in lines:
--- a/benchmarks/dynamo/combine_csv.py
+++ b/benchmarks/dynamo/combine_csv.py
@ -11,7 +11,7 @@ assert len(sys.argv) == 3
 RESULTS = defaultdict(dict)

 for side, f in zip(["static", "dynamic"], sys.argv[1:]):
-    with open(f, "r") as f:
+    with open(f) as f:
        reader = csv.DictReader(f)
        for row in reader:
            RESULTS[(row["bench"], row["name"])][side] = row
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@ -342,7 +342,7 @@ def load_model_from_path(path_and_class_str):

 def output_csv(filename, headers, row):
    if os.path.exists(filename):
-        with open(filename, "r") as fd:
+        with open(filename) as fd:
            lines = list(csv.reader(fd)) or [[]]
            if headers and len(headers) > len(lines[0]):
                # if prior results failed the header might not be filled in yet
@ -1507,7 +1507,7 @@ def read_batch_size_from_file(args, filename, model_name):
    if os.path.exists("benchmarks"):
        filename = os.path.join("benchmarks", filename)
    assert os.path.exists(filename), filename
-    with open(filename, "r") as f:
+    with open(filename) as f:
        lines = f.readlines()
        lines = [i.split(",") for i in lines if len(i.strip()) > 0]
        for val in lines:
--- a/benchmarks/dynamo/microbenchmarks/operator_inp_utils.py
+++ b/benchmarks/dynamo/microbenchmarks/operator_inp_utils.py
@ -240,7 +240,7 @@ class OperatorInputsLoader:
    def __init__(self, json_file_path):
        self.operator_db = defaultdict(Counter)

-        with open(json_file_path, "r") as f:
+        with open(json_file_path) as f:
            lines = f.readlines()

        i = 0
--- a/benchmarks/dynamo/parse_logs.py
+++ b/benchmarks/dynamo/parse_logs.py
@ -15,7 +15,7 @@ import sys

 assert len(sys.argv) == 2

-full_log = open(sys.argv[1], "r").read()
+full_log = open(sys.argv[1]).read()

 # If the log contains a gist URL, extract it so we can include it in the CSV
 gist_url = ""
--- a/benchmarks/dynamo/runner.py
+++ b/benchmarks/dynamo/runner.py
@ -391,8 +391,8 @@ def generate_commands(args, dtypes, suites, devices, compilers, output_dir):
    devices_str = "_".join(devices)
    dtypes_str = "_".join(dtypes)
    compilers_str = "_".join(compilers)
-    generated_file = "run_{}_{}_{}_{}_{}.sh".format(
-        mode, devices_str, dtypes_str, suites_str, compilers_str
+    generated_file = (
+        f"run_{mode}_{devices_str}_{dtypes_str}_{suites_str}_{compilers_str}.sh"
    )
    with open(generated_file, "w") as runfile:
        lines = []
@ -608,7 +608,7 @@ class Parser:

    def has_header(self, output_filename):
        header_present = False
-        with open(output_filename, "r") as f:
+        with open(output_filename) as f:
            line = f.readline()
            if "dev" in line:
                header_present = True
@ -1026,7 +1026,7 @@ class SummaryStatDiffer:
        assert os.path.exists(self.lookup_file)

    def generate_diff(self, last2, filename, caption):
-        df_cur, df_prev = [pd.read_csv(os.path.join(path, filename)) for path in last2]
+        df_cur, df_prev = (pd.read_csv(os.path.join(path, filename)) for path in last2)
        df_merge = df_cur.merge(df_prev, on="Compiler", suffixes=("_cur", "_prev"))
        data = {col: [] for col in ("compiler", "suite", "prev_value", "cur_value")}
        for _, row in df_merge.iterrows():
@ -1145,10 +1145,10 @@ class RegressionDetector:
                    if last2[compiler] is None:
                        continue

-                    df_cur, df_prev = [
+                    df_cur, df_prev = (
                        last2[compiler][i].untouched_parsed_frames[suite][metric]
                        for i in (0, 1)
-                    ]
+                    )
                    df_merge = df_cur.merge(
                        df_prev, on="name", suffixes=("_cur", "_prev")
                    )
@ -1367,7 +1367,7 @@ class DashboardUpdater:
        all_lines = []
        for f in files:
            try:
-                with open(os.path.join(self.output_dir, f), "r") as fh:
+                with open(os.path.join(self.output_dir, f)) as fh:
                    all_lines.extend(fh.readlines())
            except FileNotFoundError:
                pass
--- a/benchmarks/dynamo/timm_models.py
+++ b/benchmarks/dynamo/timm_models.py
@ -31,7 +31,7 @@ finally:
 TIMM_MODELS = dict()
 filename = os.path.join(os.path.dirname(__file__), "timm_models_list.txt")

-with open(filename, "r") as fh:
+with open(filename) as fh:
    lines = fh.readlines()
    lines = [line.rstrip() for line in lines]
    for line in lines:
@ -92,7 +92,7 @@ def refresh_model_names():
        models = set()
        # TODO - set the path to pytorch-image-models repo
        for fn in glob.glob("../pytorch-image-models/docs/models/*.md"):
-            with open(fn, "r") as f:
+            with open(fn) as f:
                while True:
                    line = f.readline()
                    if not line:
--- a/benchmarks/fastrnns/bench.py
+++ b/benchmarks/fastrnns/bench.py
@ -26,7 +26,7 @@ def fit_str(string, colwidth=16):

 def to_str(item):
    if isinstance(item, float):
-        return '%.4g' % item
+        return f'{item:.4g}'
    return str(item)


@ -187,7 +187,7 @@ def bench(rnn_runners, group_name, print_json=False, sep=' ', **params):


 def bench_group(model_list, bench_name, bench_group, bench_args):
-    print_stderr('Benchmarking {}s...'.format(bench_name))
+    print_stderr(f'Benchmarking {bench_name}s...')
    nn_results = bench(get_nn_runners(*model_list), bench_group, **bench_args)
    print_stderr('')
    return nn_results
--- a/benchmarks/fastrnns/profile.py
+++ b/benchmarks/fastrnns/profile.py
@ -54,7 +54,7 @@ def profile(rnns, sleep_between_seconds=1, nloops=5,

 def system(command):
    """Returns (return-code, stdout, stderr)"""
-    print('[system] {}'.format(command))
+    print(f'[system] {command}')
    p = subprocess.Popen(command, stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE, shell=True)
    output, err = p.communicate()
@ -82,28 +82,26 @@ def nvprof_output_filename(rnns, **params):
    rnn_tag = '-'.join(rnns)
    size_tag = describe_sizes(**params)
    date_tag = datetime.datetime.now().strftime("%m%d%y-%H%M")
-    return '{}prof_{}_{}_{}.nvvp'.format(OUTPUT_DIR, rnn_tag,
-                                         size_tag, date_tag)
+    return f'{OUTPUT_DIR}prof_{rnn_tag}_{size_tag}_{date_tag}.nvvp'


 def nvprof(cmd, outpath):
-    return system('nvprof -o {} {}'.format(outpath, cmd))
+    return system(f'nvprof -o {outpath} {cmd}')


 def full_profile(rnns, **args):
    profile_args = []
    for k, v in args.items():
-        profile_args.append('--{}={}'.format(k, v))
-    profile_args.append('--rnns {}'.format(' '.join(rnns)))
+        profile_args.append(f'--{k}={v}')
+    profile_args.append(f"--rnns {' '.join(rnns)}")
    profile_args.append('--internal-run')

    outpath = nvprof_output_filename(rnns, **args)

-    cmd = '{} -m fastrnns.profile {}'.format(
-        sys.executable, ' '.join(profile_args))
+    cmd = f"{sys.executable} -m fastrnns.profile {' '.join(profile_args)}"
    rc, stdout, stderr = nvprof(cmd, outpath)
    if rc != 0:
-        raise RuntimeError('stderr: {}\nstdout: {}'.format(stderr, stdout))
+        raise RuntimeError(f'stderr: {stderr}\nstdout: {stdout}')


 if __name__ == '__main__':
--- a/benchmarks/fastrnns/runner.py
+++ b/benchmarks/fastrnns/runner.py
@ -11,7 +11,7 @@ from .factory import (dropoutlstm_creator, imagenet_cnn_creator,
                      varlen_lstm_creator, varlen_pytorch_lstm_creator)


-class DisableCuDNN():
+class DisableCuDNN:
    def __enter__(self):
        self.saved = torch.backends.cudnn.enabled
        torch.backends.cudnn.enabled = False
@ -20,7 +20,7 @@ class DisableCuDNN():
        torch.backends.cudnn.enabled = self.saved


-class DummyContext():
+class DummyContext:
    def __enter__(self):
        pass

@ -28,7 +28,7 @@ class DummyContext():
        pass


-class AssertNoJIT():
+class AssertNoJIT:
    def __enter__(self):
        import os
        enabled = os.environ.get('PYTORCH_JIT', 1)
--- a/benchmarks/fastrnns/test.py
+++ b/benchmarks/fastrnns/test.py
@ -71,7 +71,7 @@ def test_vl_py(**test_args):
    control_creator = varlen_pytorch_lstm_creator
    name, experim_creator, context = get_nn_runners('vl_py')[0]
    with context():
-        print('testing {}...'.format(name))
+        print(f'testing {name}...')
        creator_keys = [
            'seqLength', 'numLayers', 'inputSize',
            'hiddenSize', 'miniBatch', 'device', 'seed'
@ -154,5 +154,5 @@ if __name__ == '__main__':

    for name, creator, context in rnn_runners:
        with context():
-            print('testing {}...'.format(name))
+            print(f'testing {name}...')
            test_rnns(creator, pytorch_lstm_creator, **test_args)
--- a/benchmarks/framework_overhead_benchmark/C2Module.py
+++ b/benchmarks/framework_overhead_benchmark/C2Module.py
@ -20,14 +20,14 @@ class C2SimpleNet:
    def __init__(self, op_name, num_inputs=1, debug=False):
        self.input_names = []
        self.net = core.Net("framework_benchmark_net")
-        self.input_names = ["in_{}".format(i) for i in range(num_inputs)]
+        self.input_names = [f"in_{i}" for i in range(num_inputs)]
        for i in range(num_inputs):
            add_blob(workspace, self.input_names[i], [1])
        self.net.AddExternalInputs(self.input_names)
        op_constructor = getattr(self.net, op_name)
        op_constructor(self.input_names)
        self.output_name = self.net._net.op[-1].output
-        print("Benchmarking op {}:".format(op_name))
+        print(f"Benchmarking op {op_name}:")
        for _ in range(NUM_LOOP_ITERS):
            output_name = self.net._net.op[-1].output
            self.input_names[-1] = output_name[0]
--- a/benchmarks/framework_overhead_benchmark/framework_overhead_benchmark.py
+++ b/benchmarks/framework_overhead_benchmark/framework_overhead_benchmark.py
@ -31,7 +31,7 @@ def parse_op_args(op):
 def print_results(result):
    print("===================================")
    for key, value in result.items():
-        print("{}, latency per iter (us):{}".format(key, ms_to_us(value)))
+        print(f"{key}, latency per iter (us):{ms_to_us(value)}")
    print("===================================")

 def benchmark_simple_fn(args, config, module_config, module_type, result):
@ -46,7 +46,7 @@ def benchmark_simple_fn(args, config, module_config, module_type, result):
        result:         dictionary instance to be populated with the benchmark result (latency per iter).
    """
    benchmark_c2_net = args.benchmark_c2_net
-    print("Benchmarking {}".format(module_type.__name__))
+    print(f"Benchmarking {module_type.__name__}")
    if benchmark_c2_net:
        op_name = module_config.c2_op
        num_inputs = module_config.num_params
@ -86,7 +86,7 @@ def main():
    args = parser.parse_args()

    if args.op not in SUPPORTED_OPS:
-        print("Op {} is not supported: Supported ops are:{}".format(args.op, SUPPORTED_OPS))
+        print(f"Op {args.op} is not supported: Supported ops are:{SUPPORTED_OPS}")
        return
    assert not (args.benchmark_c2_net and args.use_throughput_benchmark), \
        "Benchmarking of C2 net via throughput benchmarking is not yet supported"
--- a/benchmarks/framework_overhead_benchmark/pt_wrapper_module.py
+++ b/benchmarks/framework_overhead_benchmark/pt_wrapper_module.py
@ -31,8 +31,8 @@ class WrapperModule:
            if save:
                file_name = self.module_name + "_" + pt_fn.__name__ + ".pt"
                torch.jit.save(self.module, file_name)
-                print("Generated graph is saved in {}".format(file_name))
-        print("Benchmarking module {} with fn {}: Graph mode:{}".format(self.module_name, pt_fn.__name__, module_config.graph_mode))
+                print(f"Generated graph is saved in {file_name}")
+        print(f"Benchmarking module {self.module_name} with fn {pt_fn.__name__}: Graph mode:{module_config.graph_mode}")
        if (debug and isinstance(self.module, torch.jit.ScriptModule)):
            print(self.module.graph)
            print(self.module.code)
--- a/benchmarks/framework_overhead_benchmark/utils.py
+++ b/benchmarks/framework_overhead_benchmark/utils.py
@ -26,7 +26,7 @@ def benchmark_module(config, module, use_throughput_benchmark=False):
    if use_throughput_benchmark:
        return benchmark_using_throughput_benchmark(config, module)
    module.forward(config.num_warmup_iters)
-    print("Running module for {} iterations".format(config.num_iters))
+    print(f"Running module for {config.num_iters} iterations")
    start = time.time()
    module.forward(config.num_iters)
    end = time.time()
--- a/benchmarks/functional_autograd_benchmark/compare.py
+++ b/benchmarks/functional_autograd_benchmark/compare.py
@ -10,11 +10,11 @@ def main():
    parser.add_argument("--output", type=str, default="", help="Text file where to write the output")
    args = parser.parse_args()

-    with open(args.before, "r") as f:
+    with open(args.before) as f:
        content = f.read()
    res_before = from_markdown_table(content)

-    with open(args.after, "r") as f:
+    with open(args.after) as f:
        content = f.read()
    res_after = from_markdown_table(content)

--- a/benchmarks/functional_autograd_benchmark/functional_autograd_benchmark.py
+++ b/benchmarks/functional_autograd_benchmark/functional_autograd_benchmark.py
@ -198,7 +198,7 @@ def run_model(model_getter: GetterType, args: Any, task: str, run_once_fn: Calla
            pass
        do_sync = noop
    else:
-        device = torch.device("cuda:{}".format(args.gpu))
+        device = torch.device(f"cuda:{args.gpu}")
        do_sync = torch.cuda.synchronize

    model, inp = model_getter(device)
@ -257,7 +257,7 @@ def main():
            runtimes = torch.tensor(runtimes)
            mean, var = runtimes.mean(), runtimes.var()
            results[name][task] = (mean.item(), var.item())
-            print("Results for model {} on task {}: {}s (var: {})".format(name, task, mean, var))
+            print(f"Results for model {name} on task {task}: {mean}s (var: {var})")

            if has_functorch:
                try:
@ -269,7 +269,7 @@ def main():
                runtimes = torch.tensor(runtimes)
                mean, var = runtimes.mean(), runtimes.var()
                results[name][f"functorch {task}"] = (mean.item(), var.item())
-                print("Results for model {} on task {} using Functorch: {}s (var: {})".format(name, task, mean, var))
+                print(f"Results for model {name} on task {task} using Functorch: {mean}s (var: {var})")

    if args.output:
        with open(args.output, "w") as f:
--- a/benchmarks/functional_autograd_benchmark/utils.py
+++ b/benchmarks/functional_autograd_benchmark/utils.py
@ -78,7 +78,7 @@ def to_markdown_table(res: TimingResultType, header: Tuple[str, ...] = None) ->

    def write_line(*args):
        nonlocal out
-        out += "| {} |\n".format(" | ".join(str(a) for a in args))
+        out += f"| {' | '.join(str(a) for a in args)} |\n"

    # Make it a markdown table
    write_line(*header)
@ -97,7 +97,7 @@ def from_markdown_table(data: str) -> TimingResultType:
    res = defaultdict(defaultdict)

    for line in out:
-        model, task, mean, var = [f.strip() for f in line.strip().split("|") if f]
+        model, task, mean, var = (f.strip() for f in line.strip().split("|") if f)
        res[model][task] = (float(mean), float(var))

    return res
--- a/benchmarks/instruction_counts/applications/ci.py
+++ b/benchmarks/instruction_counts/applications/ci.py
@ -70,7 +70,7 @@ def main(argv: List[str]) -> None:
    }

    if args.destination:
-        with open(args.destination, "wt") as f:
+        with open(args.destination, "w") as f:
            json.dump(final_results, f)

    if in_debug_mode:
--- a/benchmarks/instruction_counts/core/expand.py
+++ b/benchmarks/instruction_counts/core/expand.py
@ -58,7 +58,7 @@ def _generate_torchscript_file(model_src: str, name: str) -> Optional[str]:
        # to confirm.
        raise ValueError(f"File {module_path} already exists.")

-    with open(module_path, "wt") as f:
+    with open(module_path, "w") as f:
        f.write(model_src)

    # Import magic to actually load our function.
--- a/benchmarks/operator_benchmark/benchmark_caffe2.py
+++ b/benchmarks/operator_benchmark/benchmark_caffe2.py
@ -122,7 +122,7 @@ class Caffe2OperatorTestCase:
        with core.DeviceScope(self.op_bench.dev):
            op = self.op_bench.forward()
        if not workspace.RunOperatorMultiple(op, num_runs):
-            raise ValueError("Unable to run operator test case: {}".format(self.test_name))
+            raise ValueError(f"Unable to run operator test case: {self.test_name}")

    def run_backward(self, num_runs, print_per_iter=False):
        """ Run the backward path of an operator in a loop
@ -130,7 +130,7 @@ class Caffe2OperatorTestCase:
        with core.DeviceScope(self.op_bench.dev):
            op = self.op_bench.backward()
        if not workspace.RunOperatorMultiple(op, num_runs):
-            raise ValueError("Unable to run operator gradient test case: {}".format(self.test_name))
+            raise ValueError(f"Unable to run operator gradient test case: {self.test_name}")

    def _print_per_iter(self):
        pass
@ -140,7 +140,7 @@ def create_caffe2_op_test_case(op_bench, test_config):
    test_case = Caffe2OperatorTestCase(op_bench, test_config)
    test_config = test_case.test_config
    op = test_case.op_bench
-    func_name = "{}{}{}".format(op.module_name(), test_case.framework, str(test_config))
+    func_name = f"{op.module_name()}{test_case.framework}{str(test_config)}"
    return (func_name, test_case)


--- a/benchmarks/operator_benchmark/benchmark_core.py
+++ b/benchmarks/operator_benchmark/benchmark_core.py
@ -197,7 +197,7 @@ class BenchmarkRunner:
            print("# List of Operators to run:")
            self.printed_ops_list = set()
            if self.args.operators:
-                print("# {}".format(self.args.operators))
+                print(f"# {self.args.operators}")

    def _print_perf_result(self, reported_run_time_us, test_case):
        if self.args.report_aibench:
@ -206,7 +206,7 @@ class BenchmarkRunner:
            return
            test_name = '_'.join([test_case.framework, test_case.test_config.test_name])
            for run in range(self.num_runs):
-                print("{}Observer ".format(test_case.framework) + json.dumps(
+                print(f"{test_case.framework}Observer " + json.dumps(
                    {
                        "type": test_name,
                        "metric": "latency",
@ -216,23 +216,17 @@ class BenchmarkRunner:
                ))
        else:
            if test_case.framework == "PyTorch":
-                print("# Mode: {}".format("JIT" if self.use_jit else "Eager"))
+                print(f"# Mode: {'JIT' if self.use_jit else 'Eager'}")

-            print("# Name: {}\n"
-                  "# Input: {}".format(
-                      test_case.test_config.test_name,
-                      test_case.test_config.input_config))
+            print(f"# Name: {test_case.test_config.test_name}\n# Input: {test_case.test_config.input_config}")

            mode = "Backward" if test_case.test_config.run_backward else "Forward"
            if self.num_runs > 1:
                for run in range(self.num_runs):
-                    print("Run: {}, {} Execution Time (us) : {:.3f}".format(
-                        run,
-                        mode, reported_run_time_us[run]))
+                    print(f"Run: {run}, {mode} Execution Time (us) : {reported_run_time_us[run]:.3f}")
                print()
            else:
-                print("{} Execution Time (us) : {:.3f}\n".format(
-                    mode, reported_run_time_us[0]))
+                print(f"{mode} Execution Time (us) : {reported_run_time_us[0]:.3f}\n")

    def _predict_num_iter_needed(self, i):
        return (i * self.multiplier)
@ -349,14 +343,14 @@ class BenchmarkRunner:
    def _print_test_case_info(self, test_case):
        # Print out the test name and skip the real execution
        if self.args.list_tests:
-            print("# {}".format(test_case.test_config.test_name))
+            print(f"# {test_case.test_config.test_name}")
            return True
        elif self.args.list_ops:
            if self.args.operators is None:
                op_name = test_case.op_bench.module_name()

                if op_name not in self.printed_ops_list:
-                    print("# {}".format(op_name))
+                    print(f"# {op_name}")
                    self.printed_ops_list.add(op_name)
            return True

@ -383,9 +377,7 @@ class BenchmarkRunner:
                # requirement.
                np.random.seed(seed=hash(full_test_id) & ((1 << 32) - 1))

-                print("# Benchmarking {}: {}".format(
-                    test_case.framework,
-                    test_case.op_bench.module_name()))
+                print(f"# Benchmarking {test_case.framework}: {test_case.op_bench.module_name()}")

                if op_test_config.run_backward:
                    launch_func = self._launch_backward
--- a/benchmarks/operator_benchmark/benchmark_pytorch.py
+++ b/benchmarks/operator_benchmark/benchmark_pytorch.py
@ -192,5 +192,5 @@ def create_pytorch_op_test_case(op_bench, test_config):
    test_case = PyTorchOperatorTestCase(op_bench, test_config)
    test_config = test_case.test_config
    op = test_case.op_bench
-    func_name = "{}{}{}".format(op.module_name(), test_case.framework, str(test_config))
+    func_name = f"{op.module_name()}{test_case.framework}{str(test_config)}"
    return (func_name, test_case)
--- a/benchmarks/operator_benchmark/benchmark_utils.py
+++ b/benchmarks/operator_benchmark/benchmark_utils.py
@ -44,16 +44,14 @@ def numpy_random(dtype, *shapes):
 def set_omp_threads(num_threads):
    existing_value = os.environ.get('OMP_NUM_THREADS', '')
    if existing_value != '':
-        print("Overwriting existing OMP_NUM_THREADS value: {}; Setting it to {}.".format(
-            existing_value, num_threads))
+        print(f"Overwriting existing OMP_NUM_THREADS value: {existing_value}; Setting it to {num_threads}.")
    os.environ["OMP_NUM_THREADS"] = str(num_threads)


 def set_mkl_threads(num_threads):
    existing_value = os.environ.get('MKL_NUM_THREADS', '')
    if existing_value != '':
-        print("Overwriting existing MKL_NUM_THREADS value: {}; Setting it to {}.".format(
-            existing_value, num_threads))
+        print(f"Overwriting existing MKL_NUM_THREADS value: {existing_value}; Setting it to {num_threads}.")
    os.environ["MKL_NUM_THREADS"] = str(num_threads)


--- a/benchmarks/operator_benchmark/common/repeat_benchmark.py
+++ b/benchmarks/operator_benchmark/common/repeat_benchmark.py
@ -54,4 +54,4 @@ if __name__ == "__main__":
    total_time_s = (time.time() - s)
    total_time_per_iter_s = total_time_s / NUM_BENCHMARK_ITERS
    achieved_bandwidth = (total_bytes * BYTES_TO_MB) / total_time_per_iter_s
-    print("Time:{} Achieved Bandwidth:{} MB/s".format(total_time_per_iter_s, achieved_bandwidth))
+    print(f"Time:{total_time_per_iter_s} Achieved Bandwidth:{achieved_bandwidth} MB/s")
--- a/benchmarks/overrides_benchmark/bench.py
+++ b/benchmarks/overrides_benchmark/bench.py
@ -56,8 +56,8 @@ def main():

        bench_min, bench_std = bench(tensor_1, tensor_2)
        print(
-            "Type {0} had a minimum time of {1} us"
-            " and a standard deviation of {2} us.".format(
+            "Type {} had a minimum time of {} us"
+            " and a standard deviation of {} us.".format(
                t.__name__, (10 ** 6 * bench_min), (10 ** 6) * bench_std
            )
        )
--- a/benchmarks/sparse/dlmc/matmul_bench.py
+++ b/benchmarks/sparse/dlmc/matmul_bench.py
@ -62,8 +62,8 @@ def get_tasks(op, backward_test, device):
            test_name = device + ":matmul-forward"
            return list(filter(None, [
                (test_name, device, "torch:" + operation.replace("sparse", "dense"),
-                 "{}(dx, dy)".format(OPS_MAP[operation])),
-                (test_name, device, "torch:" + operation, "{}(x, y)".format(OPS_MAP[operation])),
+                 f"{OPS_MAP[operation]}(dx, dy)"),
+                (test_name, device, "torch:" + operation, f"{OPS_MAP[operation]}(x, y)"),
                (test_name, device, "scipy:" + operation, "scipy_matmul(sx, sy)") if device == "cpu" else None
            ]))

--- a/benchmarks/sparse/dlmc/utils.py
+++ b/benchmarks/sparse/dlmc/utils.py
@ -21,7 +21,7 @@ def sparse_grad_output(a, b):


 def read_matrix_params(path):
-    with open(path, 'r') as file:
+    with open(path) as file:
        line = file.readline()
        nrows, ncols, nnz = (int(el) for el in line.split(', '))
        return (nrows, ncols), nnz
@ -38,7 +38,7 @@ def csr_to_coo(indices, indptr, shape):


 def load_sparse_matrix(path, device):
-    with open(path, 'r') as file:
+    with open(path) as file:
        nrows, ncols, nnz = (int(el) for el in file.readline().split(', '))
        index_pointers = (int(el) for el in file.readline().split())
        indices = (int(el) for el in file.readline().split())
@ -51,7 +51,7 @@ def load_sparse_matrix(path, device):


 def gen_vector(path, device):
-    with open(path, 'r') as file:
+    with open(path) as file:
        nrows, ncols, nnz = (int(el) for el in file.readline().split(', '))
        index_pointers = (int(el) for el in file.readline().split())
        indices = (int(el) for el in file.readline().split())
@ -59,7 +59,7 @@ def gen_vector(path, device):


 def gen_matrix(path, device):
-    with open(path, 'r') as file:
+    with open(path) as file:
        nrows, ncols, nnz = (int(el) for el in file.readline().split(', '))
        index_pointers = (int(el) for el in file.readline().split())
        indices = (int(el) for el in file.readline().split())
--- a/benchmarks/tensorexpr/main.py
+++ b/benchmarks/tensorexpr/main.py
@ -157,7 +157,7 @@ Works only with Python3.\n A few examples:
        torch._C._jit_set_nvfuser_enabled(True)
        torch._C._get_graph_executor_optimize(True)
    else :
-        raise ValueError("Undefined fuser: {}".format(args.cuda_fuser))
+        raise ValueError(f"Undefined fuser: {args.cuda_fuser}")

    if args.cpu_fusion:
        import torch
@ -207,7 +207,7 @@ Works only with Python3.\n A few examples:
    for index, dtype in enumerate(datatypes):
        datatypes[index] = getattr(torch, dtype)
        if not datatypes[index] :
-            raise AttributeError("DataType: {} is not valid!".format(dtype))
+            raise AttributeError(f"DataType: {dtype} is not valid!")

    tensor_engine.set_engine_mode(args.engine)

@ -223,7 +223,7 @@ Works only with Python3.\n A few examples:
                    continue
                else:
                    raise ValueError(
-                        "attempted to run an unsupported benchmark: %s" % (bench.desc())
+                        f"attempted to run an unsupported benchmark: {bench.desc()}"
                    )
            bench.run(args)

@ -262,7 +262,7 @@ Works only with Python3.\n A few examples:
                    continue
                else:
                    raise ValueError(
-                        "attempted to run an unsupported benchmark: %s" % (bench.desc())
+                        f"attempted to run an unsupported benchmark: {bench.desc()}"
                    )
            bench.run(args)

@ -282,7 +282,7 @@ Works only with Python3.\n A few examples:
                        run_with_input_iter(bench_cls, args.input_iter, allow_skip=True)
                    else :
                        if args.input_iter is not None :
-                            print("WARNING: Incompatible benchmark class called with input_iter arg: {}".format(name))
+                            print(f"WARNING: Incompatible benchmark class called with input_iter arg: {name}")
                        run_default_configs(bench_cls, allow_skip=True)

            if match_class_name:
@ -295,15 +295,15 @@ Works only with Python3.\n A few examples:
                if name.startswith(cls_module):
                    match_class_name = True
                    if name[len(cls_module)] != "_":
-                        raise ValueError("invalid name: %s" % (name))
+                        raise ValueError(f"invalid name: {name}")
                    config_str = name[(len(cls_module) + 1) :]
                    config = config_str.split("_")
                    if len(config) < 2:
-                        raise ValueError("invalid config: %s" % config)
+                        raise ValueError(f"invalid config: {config}")
                    mode, device = config[0:2]
                    # TODO: make sure virtual devices such as 'cpu1' and 'cpu4' are supported.
                    if mode not in ["fwd", "both"]:
-                        raise ValueError("invalid mode: %s" % (mode))
+                        raise ValueError(f"invalid mode: {mode}")
                    for i, entry in enumerate(config):
                        try:
                            value = int(entry)
@ -321,8 +321,7 @@ Works only with Python3.\n A few examples:
                    [bench_cls.module() for bench_cls in benchmark_classes]
                )
                raise ValueError(
-                    "invalid name: %s\nAvailable benchmark classes:\n%s"
-                    % (name, available_classes)
+                    f"invalid name: {name}\nAvailable benchmark classes:\n{available_classes}"
                )


--- a/benchmarks/tensorexpr/benchmark.py
+++ b/benchmarks/tensorexpr/benchmark.py
@ -21,7 +21,7 @@ class Benchmark:
        elif mode == "fwd":
            self.requires_grad = False
        else:
-            raise ValueError("invalid mode: %s" % (mode))
+            raise ValueError(f"invalid mode: {mode}")
        self.result_grad = None
        self.grad_variables = []
        self.engine = tensor_engine.get_engine()
@ -66,13 +66,7 @@ class Benchmark:
        if "NNC_NUM_THREADS" in os.environ:
            num_threads_str = os.environ["NNC_NUM_THREADS"]
            device += num_threads_str
-        return "%s: %s_%s_%s_%s" % (
-            self.engine.mode,
-            self.module(),
-            self.mode,
-            device,
-            config_str,
-        )
+        return f"{self.engine.mode}: {self.module()}_{self.mode}_{device}_{config_str}"

    @staticmethod
    def module():
@ -203,14 +197,14 @@ class Benchmark:
        if self.output_type == "json":
            print(json.dumps(result_dict))
        elif self.output_type == "stdout":
-            msg = "%s: %.2f us, SOL %.2f GB/s, algorithmic %.2f GB/s" % (
+            msg = "{}: {:.2f} us, SOL {:.2f} GB/s, algorithmic {:.2f} GB/s".format(
                result_dict["desc"],
                result_dict["us"],
                result_dict["sol"],
                result_dict["algorithmic"],
            )
            if "compute_workload" in result_dict:
-                msg += ", compute %.2f Gops/s" % result_dict["compute_workload"]
+                msg += f", compute {result_dict['compute_workload']:.2f} Gops/s"
            print(msg)
        else:
            raise Exception("Unknown output_type " + self.output_type)
--- a/benchmarks/tensorexpr/broadcast.py
+++ b/benchmarks/tensorexpr/broadcast.py
@ -34,7 +34,7 @@ class BroadcastMulBench(benchmark.Benchmark):
                [1, N, K], device=device, dtype=dtype, requires_grad=self.requires_grad
            )
        else:
-            raise ValueError("invalid case: %s" % (case))
+            raise ValueError(f"invalid case: {case}")

        self.inputs = [self.d1, self.d2]

--- a/benchmarks/tensorexpr/conv.py
+++ b/benchmarks/tensorexpr/conv.py
@ -19,7 +19,7 @@ class ConvImplBench(benchmark.Benchmark):
        elif case == "depthwise_conv":
            self.groups = iC
        else:
-            raise ValueError("invalid case: %s" % (case))
+            raise ValueError(f"invalid case: {case}")

        self.conv = self.conv2d_layer(iC, oC, kernel_size, groups=self.groups)
        if device != "cpu":
@ -61,7 +61,7 @@ class ConvImplBench(benchmark.Benchmark):
        elif self.mode == "both":
            count = 1 + (1 + 1)
        else:
-            raise ValueError("invalid mode: %s" % (self.mode))
+            raise ValueError(f"invalid mode: {self.mode}")

        op_count = (
            self.N
--- a/benchmarks/tensorexpr/reduction.py
+++ b/benchmarks/tensorexpr/reduction.py
@ -22,7 +22,7 @@ class ReduceBench(benchmark.Benchmark):
        elif case == "full":
            self.dims = [0, 1, 2]
        else:
-            raise ValueError("invalid case: %s" % case)
+            raise ValueError(f"invalid case: {case}")

    def forward(self, inputs):
        if self.skip_input_transform:
@ -69,7 +69,7 @@ class ReduceBench(benchmark.Benchmark):
        elif input_str == "s1":
            self.skip_input_transform = True
        else:
-            raise ValueError('invalid skip_input_transform: %s' % (input_str))
+            raise ValueError(f'invalid skip_input_transform: {input_str}')

    def _skip_input_transform_str(self):
        if self.skip_input_transform:
@ -139,7 +139,7 @@ class Reduce2DBench(benchmark.Benchmark):
        )]

        if red_dim != 0 and red_dim != 1 :
-            raise ValueError("invalid reduction dimension: {}".format(red_dim))
+            raise ValueError(f"invalid reduction dimension: {red_dim}")

    def forward(self, inputs):
        x = self.add(inputs, 0.001)
--- a/benchmarks/tensorexpr/tensor_engine.py
+++ b/benchmarks/tensorexpr/tensor_engine.py
@ -38,7 +38,7 @@ def set_engine_mode(mode):

        tensor_engine = nnc_engine.NncEngine()
    else:
-        raise ValueError("invalid tensor engine mode: %s" % (mode))
+        raise ValueError(f"invalid tensor engine mode: {mode}")
    tensor_engine.mode = mode


--- a/benchmarks/upload_scribe.py
+++ b/benchmarks/upload_scribe.py
@ -31,8 +31,7 @@ class ScribeUploader:
                message['float'][field] = float(value)
            else:

-                raise ValueError("Field {} is not currently used, "
-                                 "be intentional about adding new fields".format(field))
+                raise ValueError(f"Field {field} is not currently used, be intentional about adding new fields")
        return message

    def _upload_intern(self, messages):
@ -95,7 +94,7 @@ class PytorchBenchmarkUploader(ScribeUploader):
        for b in pytest_json['benchmarks']:
            test = b['name'].split('[')[0]
            net_name = b['params']['net_name']
-            benchmark_name = '{}[{}]'.format(test, net_name)
+            benchmark_name = f'{test}[{net_name}]'
            executor = b['params']['executor']
            fuser = b['params']['fuser']
            m = self.format_message({