mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
[BE] Prefer dash over underscore in command-line options (#94505)
Preferring dash over underscore in command-line options. Add `--command-arg-name` to the argument parser. The old arguments with underscores `--command_arg_name` are kept for backward compatibility.
Both dashes and underscores are used in the PyTorch codebase. Some argument parsers only have dashes or only have underscores in arguments. For example, the `torchrun` utility for distributed training only accepts underscore arguments (e.g., `--master_port`). The dashes are more common in other command-line tools. And it looks to be the default choice in the Python standard library:
`argparse.BooleanOptionalAction`: 4a9dff0e5a/Lib/argparse.py (L893-L895)
```python
class BooleanOptionalAction(Action):
def __init__(...):
if option_string.startswith('--'):
option_string = '--no-' + option_string[2:]
_option_strings.append(option_string)
```
It adds `--no-argname`, not `--no_argname`. Also typing `_` need to press the shift or the caps-lock key than `-`.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/94505
Approved by: https://github.com/ezyang, https://github.com/seemethere
This commit is contained in:
committed by
PyTorch MergeBot
parent
a63524684d
commit
a229b4526f
@ -448,11 +448,13 @@ def main(args):
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="RPC server Benchmark")
|
||||
parser.add_argument(
|
||||
"--master-addr",
|
||||
"--master_addr",
|
||||
type=str,
|
||||
help="IP address of the machine that will host the process with rank 0"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--master-port",
|
||||
"--master_port",
|
||||
type=str,
|
||||
help="A free port on the machine that will host the process with rank 0"
|
||||
@ -493,6 +495,7 @@ if __name__ == "__main__":
|
||||
help="cudaserver count for benchmark run"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rpc-timeout",
|
||||
"--rpc_timeout",
|
||||
type=int,
|
||||
help="timeout in seconds to use for RPC"
|
||||
@ -508,6 +511,7 @@ if __name__ == "__main__":
|
||||
help="epoch count for training"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch-size",
|
||||
"--batch_size",
|
||||
type=int,
|
||||
help="number of training examples used in one iteration"
|
||||
@ -523,62 +527,74 @@ if __name__ == "__main__":
|
||||
help="id for model configuration"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--data-config-path",
|
||||
"--data_config_path",
|
||||
type=str,
|
||||
help="path to data configuration file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model-config-path",
|
||||
"--model_config_path",
|
||||
type=str,
|
||||
help="path to model configuration file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--server-config-path",
|
||||
"--server_config_path",
|
||||
type=str,
|
||||
help="path to server configuration file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--trainer-config-path",
|
||||
"--trainer_config_path",
|
||||
type=str,
|
||||
help="path to trainer configuration file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--torch-seed",
|
||||
"--torch_seed",
|
||||
type=int,
|
||||
help="seed for generating random numbers to a non-deterministic random number"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cuda-seed",
|
||||
"--cuda_seed",
|
||||
type=int,
|
||||
help="seed for generating random numbers to a random number for the current GPU"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--preprocess-data",
|
||||
"--preprocess_data",
|
||||
type=str,
|
||||
help="this function will be used to preprocess data before training"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--create-criterion",
|
||||
"--create_criterion",
|
||||
type=str,
|
||||
help="this function will be used to create the criterion used for model loss calculation"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--create-ddp-model",
|
||||
"--create_ddp_model",
|
||||
type=str,
|
||||
help="this function will be used to create the ddp model used during training"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hook-state",
|
||||
"--hook_state",
|
||||
type=str,
|
||||
help="this will be the state class used when registering the ddp communication hook"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ddp-hook",
|
||||
"--ddp_hook",
|
||||
type=str,
|
||||
default="allreduce_hook",
|
||||
help="ddp communication hook"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--iteration-step",
|
||||
"--iteration_step",
|
||||
type=str,
|
||||
help="this will be the function called for each iteration of training"
|
||||
|
@ -20,7 +20,7 @@ This benchmark depends on PyTorch.
|
||||
|
||||
For any environments you are interested in, pass the corresponding arguments to `python launcher.py`.
|
||||
|
||||
```python launcher.py --world_size="10,20" --master_addr="127.0.0.1" --master_port="29501 --batch="True" --state_size="10-20-10" --nlayers="5" --out_features="10" --output_file_path="benchmark_report.json"```
|
||||
```python launcher.py --world-size="10,20" --master-addr="127.0.0.1" --master-port="29501 --batch="True" --state-size="10-20-10" --nlayers="5" --out-features="10" --output-file-path="benchmark_report.json"```
|
||||
|
||||
Example Output:
|
||||
|
||||
|
@ -29,15 +29,15 @@ def str2bool(v):
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch RPC RL Benchmark')
|
||||
parser.add_argument('--world_size', type=str, default='10')
|
||||
parser.add_argument('--master_addr', type=str, default='127.0.0.1')
|
||||
parser.add_argument('--master_port', type=str, default='29501')
|
||||
parser.add_argument('--world-size', '--world_size', type=str, default='10')
|
||||
parser.add_argument('--master-addr', '--master_addr', type=str, default='127.0.0.1')
|
||||
parser.add_argument('--master-port', '--master_port', type=str, default='29501')
|
||||
parser.add_argument('--batch', type=str, default='True')
|
||||
|
||||
parser.add_argument('--state_size', type=str, default='10-20-10')
|
||||
parser.add_argument('--state-size', '--state_size', type=str, default='10-20-10')
|
||||
parser.add_argument('--nlayers', type=str, default='5')
|
||||
parser.add_argument('--out_features', type=str, default='10')
|
||||
parser.add_argument('--output_file_path', type=str, default='benchmark_report.json')
|
||||
parser.add_argument('--out-features', '--out_features', type=str, default='10')
|
||||
parser.add_argument('--output-file-path', '--output_file_path', type=str, default='benchmark_report.json')
|
||||
|
||||
args = parser.parse_args()
|
||||
args = vars(args)
|
||||
|
@ -1520,7 +1520,9 @@ def parse_args(args=None):
|
||||
default=False,
|
||||
help="use channels last format",
|
||||
)
|
||||
parser.add_argument("--batch_size", type=int, help="batch size for benchmarking")
|
||||
parser.add_argument(
|
||||
"--batch-size", "--batch_size", type=int, help="batch size for benchmarking"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--iterations", type=int, default=2, help="how many iterations to run"
|
||||
)
|
||||
@ -1651,7 +1653,11 @@ def parse_args(args=None):
|
||||
action="store_true",
|
||||
help="exports trace of kineto profiler",
|
||||
)
|
||||
parser.add_argument("--profiler_trace_name", help="Overwrites exported trace name")
|
||||
parser.add_argument(
|
||||
"--profiler-trace-name",
|
||||
"--profiler_trace_name",
|
||||
help="Overwrites exported trace name",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--diff-branch",
|
||||
@ -1670,6 +1676,7 @@ def parse_args(args=None):
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--cold-start-latency",
|
||||
"--cold_start_latency",
|
||||
action="store_true",
|
||||
help="Use a fresh triton cachedir when running each model, to force cold-start compile.",
|
||||
@ -1787,6 +1794,7 @@ def parse_args(args=None):
|
||||
help="Dump convolution input/weight/bias's shape/stride/dtype and other options to json",
|
||||
)
|
||||
group.add_argument(
|
||||
"--recompile-profiler",
|
||||
"--recompile_profiler",
|
||||
action="store_true",
|
||||
help="Run the dynamo recompilation profiler on each model.",
|
||||
|
@ -121,24 +121,29 @@ if __name__ == "__main__":
|
||||
help="if set to a str, uses dynamo[str] backend. else, eager",
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true")
|
||||
parser.add_argument("--batch_size", default=None)
|
||||
parser.add_argument("--batch-size", "--batch_size", default=None)
|
||||
parser.add_argument(
|
||||
"--torchviz", action="store_true", help="Dump autograd graph with torchviz"
|
||||
)
|
||||
parser.add_argument("--profile", action="store_true", help="Run the profiler")
|
||||
parser.add_argument("--trace_file", default="profile.json", help="Run the profiler")
|
||||
parser.add_argument(
|
||||
"--trace-file", "--trace_file", default="profile.json", help="Run the profiler"
|
||||
)
|
||||
parser.add_argument("--repeat", default=10, help="Repeats for timing run")
|
||||
parser.add_argument(
|
||||
"--dynamo-no-optimize-ddp",
|
||||
"--dynamo_no_optimize_ddp",
|
||||
action="store_true",
|
||||
help="Disable dynamo's ddp optimizer (enabled by default)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fsdp-checkpoint",
|
||||
"--fsdp_checkpoint",
|
||||
action="store_true",
|
||||
help="Use gradient checkpointing via model-specific policy",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fsdp-wrap",
|
||||
"--fsdp_wrap",
|
||||
action="store_true",
|
||||
help="Apply fsdp to submodules via model-specific policy",
|
||||
@ -150,10 +155,12 @@ if __name__ == "__main__":
|
||||
|
||||
model_arg = parser.add_mutually_exclusive_group(required=True)
|
||||
model_arg.add_argument(
|
||||
"--torchbench_model", help="name of torchbench model, e.g. hf_Bert"
|
||||
"--torchbench-model",
|
||||
"--torchbench_model",
|
||||
help="name of torchbench model, e.g. hf_Bert",
|
||||
)
|
||||
model_arg.add_argument(
|
||||
"--toy_model", action="store_true", help="use toy model instead"
|
||||
"--toy-model", "--toy_model", action="store_true", help="use toy model instead"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -13,10 +13,10 @@ This command will generate the commands for the default compilers (see DEFAULTS
|
||||
below) for inference, run them and visualize the logs.
|
||||
|
||||
If you want to just print the commands, you could use the following command
|
||||
-> python benchmarks/runner.py --print_run_commands --suites=torchbench --inference
|
||||
-> python benchmarks/runner.py --print-run-commands --suites=torchbench --inference
|
||||
|
||||
Similarly, if you want to just visualize the already finished logs
|
||||
-> python benchmarks/runner.py --visualize_logs --suites=torchbench --inference
|
||||
-> python benchmarks/runner.py --visualize-logs --suites=torchbench --inference
|
||||
|
||||
If you want to test float16
|
||||
-> python benchmarks/runner.py --suites=torchbench --inference --dtypes=float16
|
||||
@ -178,11 +178,13 @@ def parse_args():
|
||||
# Choose either generation of commands, pretty parsing or e2e runs
|
||||
group = parser.add_mutually_exclusive_group(required=False)
|
||||
group.add_argument(
|
||||
"--print-run-commands",
|
||||
"--print_run_commands",
|
||||
action="store_true",
|
||||
help="Generate commands and saves them to run.sh",
|
||||
)
|
||||
group.add_argument(
|
||||
"--visualize-logs",
|
||||
"--visualize_logs",
|
||||
action="store_true",
|
||||
help="Pretty print the log files and draw graphs",
|
||||
@ -265,7 +267,11 @@ def parse_args():
|
||||
help="Github CLI path",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--batch_size", type=int, default=None, help="batch size for benchmarking"
|
||||
"--batch-size",
|
||||
"--batch_size",
|
||||
type=int,
|
||||
default=None,
|
||||
help="batch size for benchmarking",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--threads",
|
||||
@ -276,12 +282,14 @@ def parse_args():
|
||||
)
|
||||
launcher_group = parser.add_argument_group("CPU Launcher Parameters")
|
||||
launcher_group.add_argument(
|
||||
"--enable-cpu-launcher",
|
||||
"--enable_cpu_launcher",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Use torch.backends.xeon.run_cpu to get the peak performance on Intel(R) Xeon(R) Scalable Processors.",
|
||||
)
|
||||
launcher_group.add_argument(
|
||||
"--cpu-launcher-args",
|
||||
"--cpu_launcher_args",
|
||||
type=str,
|
||||
default="",
|
||||
@ -370,10 +378,10 @@ def generate_commands(args, dtypes, suites, devices, compilers, output_dir):
|
||||
"inductor",
|
||||
"inductor_no_cudagraphs",
|
||||
):
|
||||
cmd = f"{cmd} --cold_start_latency"
|
||||
cmd = f"{cmd} --cold-start-latency"
|
||||
|
||||
if args.batch_size is not None:
|
||||
cmd = f"{cmd} --batch_size {args.batch_size}"
|
||||
cmd = f"{cmd} --batch-size {args.batch_size}"
|
||||
|
||||
if args.threads is not None:
|
||||
cmd = f"{cmd} --threads {args.threads}"
|
||||
|
@ -36,7 +36,7 @@ class TestDynamoBenchmark(unittest.TestCase):
|
||||
"--performance",
|
||||
"--only=BERT_pytorch",
|
||||
"-n1",
|
||||
"--batch_size=1",
|
||||
"--batch-size=1",
|
||||
]
|
||||
)
|
||||
run(TorchBenchmarkRunner(), args, original_dir)
|
||||
|
@ -209,7 +209,7 @@ if __name__ == '__main__':
|
||||
parser.add_argument('--warmup', default='10', type=int)
|
||||
parser.add_argument('--nloops', default='100', type=int)
|
||||
parser.add_argument('--device', default='cuda', type=str)
|
||||
parser.add_argument('--variable_lstms', action='store_true',
|
||||
parser.add_argument('--variable-lstms', '--variable_lstms', action='store_true',
|
||||
help='Also benchmark variable sequence length lstms '
|
||||
'Note that some of these run really slowly '
|
||||
'and that the `seqLength` flag will be ignored.')
|
||||
@ -224,9 +224,9 @@ if __name__ == '__main__':
|
||||
help='The fuser backend to use. One of: te, old, or none')
|
||||
parser.add_argument('--executor', default=None, type=str,
|
||||
help='The executor to use. One of: legacy, simple, profiling')
|
||||
parser.add_argument('--cuda_pointwise_loop_level', default=None, type=int)
|
||||
parser.add_argument('--cuda_pointwise_block_count', default=None, type=int)
|
||||
parser.add_argument('--cuda_pointwise_block_size', default=None, type=int)
|
||||
parser.add_argument('--cuda-pointwise-loop-level', '--cuda_pointwise_loop_level', default=None, type=int)
|
||||
parser.add_argument('--cuda-pointwise-block-count', '--cuda_pointwise_block_count', default=None, type=int)
|
||||
parser.add_argument('--cuda-pointwise-block-size', '--cuda_pointwise_block_size', default=None, type=int)
|
||||
|
||||
args = parser.parse_args()
|
||||
set_fuser(args.fuser, args.executor)
|
||||
|
@ -95,7 +95,7 @@ def full_profile(rnns, **args):
|
||||
for k, v in args.items():
|
||||
profile_args.append('--{}={}'.format(k, v))
|
||||
profile_args.append('--rnns {}'.format(' '.join(rnns)))
|
||||
profile_args.append('--internal_run')
|
||||
profile_args.append('--internal-run')
|
||||
|
||||
outpath = nvprof_output_filename(rnns, **args)
|
||||
|
||||
@ -114,7 +114,7 @@ if __name__ == '__main__':
|
||||
parser.add_argument('--inputSize', default='512', type=int)
|
||||
parser.add_argument('--hiddenSize', default='512', type=int)
|
||||
parser.add_argument('--miniBatch', default='64', type=int)
|
||||
parser.add_argument('--sleep_between_seconds', default='1', type=int)
|
||||
parser.add_argument('--sleep-between-seconds', '--sleep_between_seconds', default='1', type=int)
|
||||
parser.add_argument('--nloops', default='5', type=int)
|
||||
|
||||
parser.add_argument('--rnns', nargs='*',
|
||||
@ -122,7 +122,7 @@ if __name__ == '__main__':
|
||||
|
||||
# if internal_run, we actually run the rnns.
|
||||
# if not internal_run, we shell out to nvprof with internal_run=T
|
||||
parser.add_argument('--internal_run', default=False, action='store_true',
|
||||
parser.add_argument('--internal-run', '--internal_run', default=False, action='store_true',
|
||||
help='Don\'t use this')
|
||||
args = parser.parse_args()
|
||||
if args.rnns is None:
|
||||
|
@ -128,8 +128,8 @@ if __name__ == '__main__':
|
||||
parser.add_argument('--hiddenSize', default='512', type=int)
|
||||
parser.add_argument('--miniBatch', default='64', type=int)
|
||||
parser.add_argument('--device', default='cuda', type=str)
|
||||
parser.add_argument('--check_grad', default='True', type=bool)
|
||||
parser.add_argument('--variable_lstms', action='store_true')
|
||||
parser.add_argument('--check-grad', '--check_grad', default='True', type=bool)
|
||||
parser.add_argument('--variable-lstms', '--variable_lstms', action='store_true')
|
||||
parser.add_argument('--seed', default='17', type=int)
|
||||
parser.add_argument('--verbose', action='store_true')
|
||||
parser.add_argument('--rnns', nargs='*',
|
||||
|
@ -15,12 +15,12 @@ Graph can be saved via save option. Saved in the directory where benchmark is ru
|
||||
Example build/run:
|
||||
To run PT benchmark:
|
||||
buck run @mode/opt <path-to-framework_overhead_benchmark>:framework_overhead_benchmark --
|
||||
--add_op --graph_mode --eager_mode (Runs both graph mode and eager mode)
|
||||
--add-op --graph-mode --eager-mode (Runs both graph mode and eager mode)
|
||||
buck run @mode/opt <path-to-framework_overhead_benchmark>:framework_overhead_benchmark --
|
||||
--add_op --graph_mode (Runs only graph mode)
|
||||
--add-op --graph-mode (Runs only graph mode)
|
||||
To run C2 benchmark:
|
||||
buck run @mode/opt <path-to-framework_overhead_benchmark>:framework_overhead_benchmark --
|
||||
--add_op --benchmark_c2_net
|
||||
--add-op --benchmark-c2-net
|
||||
"""
|
||||
|
||||
SUPPORTED_OPS = {"add_op"}
|
||||
@ -64,13 +64,25 @@ def benchmark_simple_fn(args, config, module_config, module_type, result):
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--op", default="add_op", dest="op", type=str)
|
||||
parser.add_argument("--benchmark_c2_net", default=False, dest="benchmark_c2_net", action="store_true")
|
||||
parser.add_argument("--use_throughput_benchmark", default=False, dest="use_throughput_benchmark", action="store_true")
|
||||
parser.add_argument(
|
||||
"--benchmark-c2-net",
|
||||
"--benchmark_c2_net",
|
||||
default=False,
|
||||
dest="benchmark_c2_net",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-throughput-benchmark",
|
||||
"--use_throughput_benchmark",
|
||||
default=False,
|
||||
dest="use_throughput_benchmark",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument("--debug", default=False, dest="debug", action="store_true")
|
||||
parser.add_argument("--save", default=False, dest="save", action="store_true")
|
||||
parser.add_argument("--eager_mode", default=False, dest="eager_mode", action="store_true")
|
||||
parser.add_argument("--num_warmup_iters", type=int, default=100)
|
||||
parser.add_argument("--num_iters", type=int, default=1000)
|
||||
parser.add_argument("--eager-mode", "--eager_mode", default=False, dest="eager_mode", action="store_true")
|
||||
parser.add_argument("--num-warmup-iters", "--num_warmup_iters", type=int, default=100)
|
||||
parser.add_argument("--num-iters", "--num_iters", type=int, default=1000)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.op not in SUPPORTED_OPS:
|
||||
|
@ -100,7 +100,7 @@ class _BenchmarkProcess:
|
||||
|
||||
cmd.extend([
|
||||
_PYTHON, WORKER_PATH,
|
||||
"--communication_file", self._communication_file,
|
||||
"--communication-file", self._communication_file,
|
||||
])
|
||||
return " ".join(cmd)
|
||||
|
||||
|
@ -183,6 +183,6 @@ def main(communication_file: str) -> None:
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--communication_file', type=str)
|
||||
parser.add_argument('--communication-file', '--communication_file', type=str)
|
||||
communication_file = parser.parse_args().communication_file
|
||||
main(communication_file)
|
||||
|
@ -28,19 +28,19 @@ $ python setup.py install
|
||||
Run `torch.add` benchmark:
|
||||
```
|
||||
$ cd pytorch/benchmarks/operator_benchmark
|
||||
$ python -m pt.add_test --omp_num_threads 1 --mkl_num_threads 1
|
||||
$ python -m pt.add_test --omp-num-threads 1 --mkl-num-threads 1
|
||||
```
|
||||
Note: we set the number of OpenMP and MKL threads both to 1. If you want to benchmark operators with multithreading (intra-op parallelism), use the `--omp_num_threads` and `--mkl_num_threads` flags.
|
||||
Note: we set the number of OpenMP and MKL threads both to 1. If you want to benchmark operators with multithreading (intra-op parallelism), use the `--omp-num-threads` and `--mkl-num-threads` flags.
|
||||
|
||||
List all the supported tests:
|
||||
```
|
||||
$ python -m pt.add_test --list_tests
|
||||
$ python -m pt.add_test --list-tests
|
||||
```
|
||||
|
||||
Filter and run a test (use `add_M8_N16_K32` as an example):
|
||||
```
|
||||
$ python -m pt.add_test --test_name add_K32_M8_N1
|
||||
--omp_num_threads 1 --mkl_num_threads 1
|
||||
$ python -m pt.add_test --test-name add_K32_M8_N1
|
||||
--omp-num-threads 1 --mkl-num-threads 1
|
||||
```
|
||||
|
||||
Run all the supported benchmarks:
|
||||
@ -121,28 +121,28 @@ $ python benchmark_runner.py --help
|
||||
|
||||
Run all the supported benchmarks:
|
||||
```
|
||||
$ python -m benchmark_all_test --omp_num_threads 1 --mkl_num_threads 1
|
||||
$ python -m benchmark_all_test --omp-num-threads 1 --mkl-num-threads 1
|
||||
```
|
||||
|
||||
List all the supported operators:
|
||||
```
|
||||
$ python -m benchmark_all_test --list_ops
|
||||
$ python -m benchmark_all_test --list-ops
|
||||
```
|
||||
|
||||
List all the supported tests:
|
||||
```
|
||||
$ python -m benchmark_all_test --list_tests
|
||||
$ python -m benchmark_all_test --list-tests
|
||||
```
|
||||
|
||||
Filter and run an operator (use add as an example):
|
||||
```
|
||||
$ python -m benchmark_all_test --operators add --omp_num_threads 1 --mkl_num_threads 1
|
||||
$ python -m benchmark_all_test --operators add --omp-num-threads 1 --mkl-num-threads 1
|
||||
```
|
||||
Note: this filter is based on the operator name rather than the file name.
|
||||
|
||||
Run torch.add benchmark with tag 'long':
|
||||
```
|
||||
$ python -m pt.add_test --tag_filter long
|
||||
$ python -m pt.add_test --tag-filter long
|
||||
```
|
||||
|
||||
## Adding New Operators to the Benchmark Suite
|
||||
|
@ -17,6 +17,7 @@ parser = argparse.ArgumentParser(
|
||||
|
||||
def parse_args():
|
||||
parser.add_argument(
|
||||
'--tag-filter',
|
||||
'--tag_filter',
|
||||
help='tag_filter can be used to run the shapes which matches the tag. (all is used to run all the shapes)',
|
||||
default='short')
|
||||
@ -28,21 +29,25 @@ def parse_args():
|
||||
default=None)
|
||||
|
||||
parser.add_argument(
|
||||
'--operator-range',
|
||||
'--operator_range',
|
||||
help='Filter tests based on operator_range(e.g. a-c or b,c-d)',
|
||||
default=None)
|
||||
|
||||
parser.add_argument(
|
||||
'--test-name',
|
||||
'--test_name',
|
||||
help='Run tests that have the provided test_name',
|
||||
default=None)
|
||||
|
||||
parser.add_argument(
|
||||
'--list-ops',
|
||||
'--list_ops',
|
||||
help='List operators without running them',
|
||||
action='store_true')
|
||||
|
||||
parser.add_argument(
|
||||
'--list-tests',
|
||||
'--list_tests',
|
||||
help='List all test cases without running them',
|
||||
action='store_true')
|
||||
@ -54,6 +59,7 @@ def parse_args():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--num-runs",
|
||||
"--num_runs",
|
||||
help="Run each test for num_runs. Each run executes an operator for number of <--iterations>",
|
||||
type=int,
|
||||
@ -61,6 +67,7 @@ def parse_args():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--min-time-per-test",
|
||||
"--min_time_per_test",
|
||||
help="Set the minimum time (unit: seconds) to run each test",
|
||||
type=int,
|
||||
@ -68,6 +75,7 @@ def parse_args():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--warmup-iterations",
|
||||
"--warmup_iterations",
|
||||
help="Number of iterations to ignore before measuring performance",
|
||||
default=100,
|
||||
@ -75,6 +83,7 @@ def parse_args():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--omp-num-threads",
|
||||
"--omp_num_threads",
|
||||
help="Number of OpenMP threads used in PyTorch/Caffe2 runtime",
|
||||
default=None,
|
||||
@ -82,6 +91,7 @@ def parse_args():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--mkl-num-threads",
|
||||
"--mkl_num_threads",
|
||||
help="Number of MKL threads used in PyTorch/Caffe2 runtime",
|
||||
default=None,
|
||||
@ -89,6 +99,7 @@ def parse_args():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--report-aibench",
|
||||
"--report_aibench",
|
||||
type=benchmark_utils.str2bool,
|
||||
nargs='?',
|
||||
@ -98,6 +109,7 @@ def parse_args():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--use-jit",
|
||||
"--use_jit",
|
||||
type=benchmark_utils.str2bool,
|
||||
nargs='?',
|
||||
@ -107,6 +119,7 @@ def parse_args():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--forward-only",
|
||||
"--forward_only",
|
||||
type=benchmark_utils.str2bool,
|
||||
nargs='?',
|
||||
|
@ -30,15 +30,15 @@ if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Profiler benchmark')
|
||||
|
||||
parser.add_argument('--with_cuda', action='store_true')
|
||||
parser.add_argument('--with_stack', action='store_true')
|
||||
parser.add_argument('--use_script', action='store_true')
|
||||
parser.add_argument('--use_kineto', action='store_true')
|
||||
parser.add_argument('--profiling_tensor_size', default=1, type=int)
|
||||
parser.add_argument('--workload', default='loop', type=str)
|
||||
parser.add_argument('--internal_iter', default=256, type=int)
|
||||
parser.add_argument('--timer_min_run_time', default=10, type=int)
|
||||
parser.add_argument('--cuda_only', action='store_true')
|
||||
parser.add_argument('--with-cuda', '--with_cuda', action='store_true')
|
||||
parser.add_argument('--with-stack', '--with_stack', action='store_true')
|
||||
parser.add_argument('--use-script', '--use_script', action='store_true')
|
||||
parser.add_argument('--use-kineto', '--use_kineto', action='store_true')
|
||||
parser.add_argument('--profiling-tensor-size', '--profiling_tensor_size', default=1, type=int)
|
||||
parser.add_argument('--workload', '--workload', default='loop', type=str)
|
||||
parser.add_argument('--internal-iter', '--internal_iter', default=256, type=int)
|
||||
parser.add_argument('--timer-min-run-time', '--timer_min_run_time', default=10, type=int)
|
||||
parser.add_argument('--cuda-only', '--cuda_only', action='store_true')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -92,7 +92,7 @@ if __name__ == '__main__':
|
||||
parser.add_argument('--lstmMiniBatch', default='64', type=int)
|
||||
parser.add_argument('--warmup', default='2', type=int)
|
||||
parser.add_argument('--nloops', default='50', type=int)
|
||||
parser.add_argument('--timer_min_run_time', default=120, type=int)
|
||||
parser.add_argument('--timer-min-run-time', '--timer_min_run_time', default=120, type=int)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -4,7 +4,7 @@ These sets of benchmarks are for the sparse matrix functionality using a popular
|
||||
|
||||
Performance benchmarks scripts for matrix-matrix and matrix-vector ops (dense-sparse, sparse-sparse, and compare to dense-dense) are implemented here.
|
||||
|
||||
- `matmul_bench.py` with `--operation sparse@sparse|sparse@dense` is for Sparse matrix-matrix multiplication (SPMM) performance test. It can run in forward and backward mode with `--backward_test`, on CPU or CUDA with `--with_cuda`, using different datasets from the dataset collection DLMC. For more details see `test.sh` file.
|
||||
- `matmul_bench.py` with `--operation sparse@sparse|sparse@dense` is for Sparse matrix-matrix multiplication (SPMM) performance test. It can run in forward and backward mode with `--backward-test`, on CPU or CUDA with `--with-cuda`, using different datasets from the dataset collection DLMC. For more details see `test.sh` file.
|
||||
|
||||
- `matmul_bench.py` with `--operation sparse@vector` is for Sparse matrix-vector multiplication (SPMV) performance test.
|
||||
|
||||
|
@ -41,11 +41,11 @@ def parse_args():
|
||||
parser = argparse.ArgumentParser(description='matmul benchmark')
|
||||
parser.add_argument('--path', type=str, help='DLMC dataset path')
|
||||
parser.add_argument('--dataset', type=str, default='magnitude_pruning')
|
||||
parser.add_argument('--hidden_size', default=2048, type=int)
|
||||
parser.add_argument('--backward_test', action="store_true")
|
||||
parser.add_argument('--hidden-size', '--hidden_size', default=2048, type=int)
|
||||
parser.add_argument('--backward-test', '--backward_test', action="store_true")
|
||||
parser.add_argument('--operation', type=str, help="|".join(OPS_MAP.keys()), default=next(iter(OPS_MAP)))
|
||||
parser.add_argument('--with_cuda', action='store_true')
|
||||
parser.add_argument('--timer_min_run_time', default=1, type=float)
|
||||
parser.add_argument('--with-cuda', '--with_cuda', action='store_true')
|
||||
parser.add_argument('--timer-min-run-time', '--timer_min_run_time', default=1, type=float)
|
||||
return parser
|
||||
|
||||
|
||||
|
@ -8,20 +8,20 @@ DATASET_ROOT_DIR=$HOME/datasets/
|
||||
echo "!! SPARSE SPMS TIME BENCHMARK!! "
|
||||
|
||||
# cpu
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@sparse
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@sparse --backward_test
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@sparse
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@sparse --backward-test
|
||||
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@dense
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@dense --backward_test
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@dense
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@dense --backward-test
|
||||
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@vector
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@vector
|
||||
|
||||
|
||||
# cuda
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@sparse --with_cuda
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@sparse --with_cuda--backward_test
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@sparse --with-cuda
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@sparse --with-cuda --backward-test
|
||||
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@dense --with_cuda
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@dense --with_cuda --backward_test
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@dense --with-cuda
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@dense --with-cuda --backward-test
|
||||
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@vector --with_cuda
|
||||
python -m dlmc.matmul_bench --path $DATASET_ROOT_DIR/dlmc/rn50 --dataset magnitude_pruning --operation sparse@vector --with-cuda
|
||||
|
@ -70,9 +70,9 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--m", default='1000', type=int)
|
||||
parser.add_argument("--n", default='1000', type=int)
|
||||
parser.add_argument("--k", default='1000', type=int)
|
||||
parser.add_argument("--nnz_ratio", default='0.1', type=float)
|
||||
parser.add_argument("--nnz-ratio", "--nnz_ratio", default='0.1', type=float)
|
||||
parser.add_argument("--outfile", default='stdout', type=str)
|
||||
parser.add_argument("--test_count", default='10', type=int)
|
||||
parser.add_argument("--test-count", "--test_count", default='10', type=int)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -68,9 +68,9 @@ if __name__ == "__main__":
|
||||
|
||||
parser.add_argument("--format", default='csr', type=str)
|
||||
parser.add_argument("--m", default='1000', type=int)
|
||||
parser.add_argument("--nnz_ratio", default='0.1', type=float)
|
||||
parser.add_argument("--nnz-ratio", "--nnz_ratio", default='0.1', type=float)
|
||||
parser.add_argument("--outfile", default='stdout', type=str)
|
||||
parser.add_argument("--test_count", default='10', type=int)
|
||||
parser.add_argument("--test-count", "--test_count", default='10', type=int)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -18,8 +18,8 @@ cd benchmarks
|
||||
echo "!! SPARSE SPMM TIME BENCHMARK!! " >> $OUTFILE
|
||||
for dim0 in 1000 5000 10000; do
|
||||
for nnzr in 0.01 0.05 0.1 0.3; do
|
||||
python -m sparse.spmm --format csr --m $dim0 --n $dim0 --k $dim0 --nnz_ratio $nnzr --outfile $OUTFILE
|
||||
# python -m sparse.spmm --format coo --m $dim0 --n $dim0 --k $dim0 --nnz_ratio $nnzr --outfile $OUTFILE
|
||||
python -m sparse.spmm --format csr --m $dim0 --n $dim0 --k $dim0 --nnz-ratio $nnzr --outfile $OUTFILE
|
||||
# python -m sparse.spmm --format coo --m $dim0 --n $dim0 --k $dim0 --nnz-ratio $nnzr --outfile $OUTFILE
|
||||
done
|
||||
done
|
||||
echo "----------------------" >> $OUTFILE
|
||||
@ -34,8 +34,8 @@ python setup.py install
|
||||
cd benchmarks
|
||||
for dim0 in 1000 5000 10000; do
|
||||
for nnzr in 0.01 0.05 0.1 0.3; do
|
||||
python -m sparse.spmv --format csr --m $dim0 --nnz_ratio $nnzr --outfile $OUTFILE
|
||||
python -m sparse.spmv --format coo --m $dim0 --nnz_ratio $nnzr --outfile $OUTFILE
|
||||
python -m sparse.spmv --format csr --m $dim0 --nnz-ratio $nnzr --outfile $OUTFILE
|
||||
python -m sparse.spmv --format coo --m $dim0 --nnz-ratio $nnzr --outfile $OUTFILE
|
||||
done
|
||||
done
|
||||
echo "----------------------" >> $OUTFILE
|
||||
|
@ -6,5 +6,5 @@ to show documentation.
|
||||
|
||||
An example of an actual command line that one might use as a starting point:
|
||||
```
|
||||
python -m benchmarks.tensorexpr --device gpu --mode fwd --jit_mode trace --cuda_fuser=te
|
||||
python -m benchmarks.tensorexpr --device gpu --mode fwd --jit-mode trace --cuda-fuser=te
|
||||
```
|
||||
|
@ -67,30 +67,35 @@ Works only with Python3.\n A few examples:
|
||||
help="the underlying tensor engine. only pt for now",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--jit-mode",
|
||||
"--jit_mode",
|
||||
type=str,
|
||||
default="trace",
|
||||
help="the jit mode to use: one of {trace, none}",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cuda-pointwise-loop-levels",
|
||||
"--cuda_pointwise_loop_levels",
|
||||
type=int,
|
||||
default=None,
|
||||
help="num of loop levesl for Cuda pointwise operations: 2 or 3",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cuda-pointwise-block-count",
|
||||
"--cuda_pointwise_block_count",
|
||||
type=int,
|
||||
default=None,
|
||||
help="num of block for Cuda pointwise operations",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cuda-pointwise-block-size",
|
||||
"--cuda_pointwise_block_size",
|
||||
type=int,
|
||||
default=None,
|
||||
help="num of blocks for Cuda pointwise operations",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cuda-fuser",
|
||||
"--cuda_fuser",
|
||||
type=str,
|
||||
default="te",
|
||||
@ -118,12 +123,14 @@ Works only with Python3.\n A few examples:
|
||||
help="Disable shape randomization in dynamic benchmarks.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cpu-fusion",
|
||||
"--cpu_fusion",
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Enable CPU fusion.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cat-wo-conditionals",
|
||||
"--cat_wo_conditionals",
|
||||
default=False,
|
||||
action='store_true',
|
||||
|
@ -247,7 +247,7 @@ def dump_plot(df, sizes):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Runs NNC microbenchmarks')
|
||||
parser.add_argument('--multi_threaded', action='store_true', help='Run with more than one thread')
|
||||
parser.add_argument('--multi-threaded', '--multi_threaded', action='store_true', help='Run with more than one thread')
|
||||
args = parser.parse_args()
|
||||
if not args.multi_threaded:
|
||||
torch.set_num_threads(1)
|
||||
|
@ -185,8 +185,8 @@ def main(save_path: Optional[Path], error_path: Optional[Path]):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--save_path", type=str, help="Path to save the results")
|
||||
parser.add_argument("--error_save_path", type=str, help="Path to save the errors")
|
||||
parser.add_argument("--save-path", "--save_path", type=str, help="Path to save the results")
|
||||
parser.add_argument("--error-save-path", "--error_save_path", type=str, help="Path to save the errors")
|
||||
|
||||
args = parser.parse_args()
|
||||
save_path = Path(args.save_path) if args.save_path else None
|
||||
|
@ -339,7 +339,7 @@ def main(save_path: Optional[Path]):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--save_path", type=str, help="Path to save the results")
|
||||
parser.add_argument("--save-path", "--save_path", type=str, help="Path to save the results")
|
||||
|
||||
args = parser.parse_args()
|
||||
save_path = Path(args.save_path) if args.save_path else None
|
||||
|
@ -129,7 +129,7 @@ class PytorchBenchmarkUploader(ScribeUploader):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--pytest_bench_json", type=argparse.FileType('r'),
|
||||
parser.add_argument("--pytest-bench-json", "--pytest_bench_json", type=argparse.FileType('r'),
|
||||
help='Upload json data formatted by pytest-benchmark module')
|
||||
args = parser.parse_args()
|
||||
if args.pytest_bench_json:
|
||||
|
@ -67,16 +67,16 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--context", help="Context to run on.", default="CPU")
|
||||
parser.add_argument("--kwargs", help="kwargs to pass to operator.",
|
||||
nargs="*", type=parse_kwarg, default=[])
|
||||
parser.add_argument("--init_net", help="Output initialization net.",
|
||||
parser.add_argument("--init-net", "--init_net", help="Output initialization net.",
|
||||
default="init_net.pb")
|
||||
parser.add_argument("--predict_net", help="Output prediction net.",
|
||||
parser.add_argument("--predict-net", "--predict_net", help="Output prediction net.",
|
||||
default="predict_net.pb")
|
||||
parser.add_argument("--benchmark_name",
|
||||
parser.add_argument("--benchmark-name", "--benchmark_name",
|
||||
help="Name of the benchmark network",
|
||||
default="benchmark")
|
||||
parser.add_argument("--input_name", help="Name of the input blob.",
|
||||
parser.add_argument("--input-name", "--input_name", help="Name of the input blob.",
|
||||
default="data")
|
||||
parser.add_argument("--output_name", help="Name of the output blob.",
|
||||
parser.add_argument("--output-name", "--output_name", help="Name of the output blob.",
|
||||
default="output")
|
||||
parser.add_argument("--instances",
|
||||
help="Number of instances to run the operator.",
|
||||
|
@ -7,11 +7,11 @@ To launch a **fault-tolerant** job, run the following on all nodes.
|
||||
|
||||
torchrun
|
||||
--nnodes=NUM_NODES
|
||||
--nproc_per_node=TRAINERS_PER_NODE
|
||||
--max_restarts=NUM_ALLOWED_FAILURES
|
||||
--rdzv_id=JOB_ID
|
||||
--rdzv_backend=c10d
|
||||
--rdzv_endpoint=HOST_NODE_ADDR
|
||||
--nproc-per-node=TRAINERS_PER_NODE
|
||||
--max-restarts=NUM_ALLOWED_FAILURES
|
||||
--rdzv-id=JOB_ID
|
||||
--rdzv-backend=c10d
|
||||
--rdzv-endpoint=HOST_NODE_ADDR
|
||||
YOUR_TRAINING_SCRIPT.py (--arg1 ... train script args...)
|
||||
|
||||
|
||||
@ -22,18 +22,18 @@ and at most ``MAX_SIZE`` nodes.
|
||||
|
||||
torchrun
|
||||
--nnodes=MIN_SIZE:MAX_SIZE
|
||||
--nproc_per_node=TRAINERS_PER_NODE
|
||||
--max_restarts=NUM_ALLOWED_FAILURES_OR_MEMBERSHIP_CHANGES
|
||||
--rdzv_id=JOB_ID
|
||||
--rdzv_backend=c10d
|
||||
--rdzv_endpoint=HOST_NODE_ADDR
|
||||
--nproc-per-node=TRAINERS_PER_NODE
|
||||
--max-restarts=NUM_ALLOWED_FAILURES_OR_MEMBERSHIP_CHANGES
|
||||
--rdzv-id=JOB_ID
|
||||
--rdzv-backend=c10d
|
||||
--rdzv-endpoint=HOST_NODE_ADDR
|
||||
YOUR_TRAINING_SCRIPT.py (--arg1 ... train script args...)
|
||||
|
||||
.. note::
|
||||
TorchElastic models failures as membership changes. When a node fails,
|
||||
this is treated as a "scale down" event. When the failed node is replaced by
|
||||
the scheduler, it is a "scale up" event. Hence for both fault tolerant
|
||||
and elastic jobs, ``--max_restarts`` is used to control the total number of
|
||||
and elastic jobs, ``--max-restarts`` is used to control the total number of
|
||||
restarts before giving up, regardless of whether the restart was caused
|
||||
due to a failure or a scaling event.
|
||||
|
||||
@ -47,8 +47,8 @@ ideally you should pick a node that has a high bandwidth.
|
||||
|
||||
.. note::
|
||||
The ``--standalone`` option can be passed to launch a single node job with a
|
||||
sidecar rendezvous backend. You don’t have to pass ``--rdzv_id``,
|
||||
``--rdzv_endpoint``, and ``--rdzv_backend`` when the ``--standalone`` option
|
||||
sidecar rendezvous backend. You don’t have to pass ``--rdzv-id``,
|
||||
``--rdzv-endpoint``, and ``--rdzv-backend`` when the ``--standalone`` option
|
||||
is used.
|
||||
|
||||
|
||||
|
@ -21,7 +21,7 @@ working with ``torchrun`` with these differences:
|
||||
(see `elastic launch <run.html>`_).
|
||||
|
||||
4. ``use_env`` flag has been removed. If you were parsing local rank by parsing
|
||||
the ``--local_rank`` option, you need to get the local rank from the
|
||||
the ``--local-rank`` option, you need to get the local rank from the
|
||||
environment variable ``LOCAL_RANK`` (e.g. ``int(os.environ["LOCAL_RANK"])``).
|
||||
|
||||
Below is an expository example of a training script that checkpoints on each
|
||||
|
@ -449,6 +449,7 @@ def parse_args():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--clip-per-layer",
|
||||
"--clip_per_layer",
|
||||
action="store_true",
|
||||
default=False,
|
||||
|
@ -472,6 +472,7 @@ def parse_args():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--clip-per-layer",
|
||||
"--clip_per_layer",
|
||||
action="store_true",
|
||||
default=False,
|
||||
|
@ -46,15 +46,15 @@ plt.style.use('bmh')
|
||||
|
||||
def main():
|
||||
argparser = argparse.ArgumentParser()
|
||||
argparser.add_argument('--n_way', type=int, help='n way', default=5)
|
||||
argparser.add_argument('--n-way', '--n_way', type=int, help='n way', default=5)
|
||||
argparser.add_argument(
|
||||
'--k_spt', type=int, help='k shot for support set', default=5)
|
||||
'--k-spt', '--k_spt', type=int, help='k shot for support set', default=5)
|
||||
argparser.add_argument(
|
||||
'--k_qry', type=int, help='k shot for query set', default=15)
|
||||
'--k-qry', '--k_qry', type=int, help='k shot for query set', default=15)
|
||||
argparser.add_argument(
|
||||
'--device', type=str, help='device', default='cuda')
|
||||
argparser.add_argument(
|
||||
'--task_num',
|
||||
'--task-num', '--task_num',
|
||||
type=int,
|
||||
help='meta batch size, namely task num',
|
||||
default=32)
|
||||
|
@ -46,15 +46,15 @@ plt.style.use('bmh')
|
||||
|
||||
def main():
|
||||
argparser = argparse.ArgumentParser()
|
||||
argparser.add_argument('--n_way', type=int, help='n way', default=5)
|
||||
argparser.add_argument('--n-way', '--n_way', type=int, help='n way', default=5)
|
||||
argparser.add_argument(
|
||||
'--k_spt', type=int, help='k shot for support set', default=5)
|
||||
'--k-spt', '--k_spt', type=int, help='k shot for support set', default=5)
|
||||
argparser.add_argument(
|
||||
'--k_qry', type=int, help='k shot for query set', default=15)
|
||||
'--k-qry', '--k_qry', type=int, help='k shot for query set', default=15)
|
||||
argparser.add_argument(
|
||||
'--device', type=str, help='device', default='cuda')
|
||||
argparser.add_argument(
|
||||
'--task_num',
|
||||
'--task-num', '--task_num',
|
||||
type=int,
|
||||
help='meta batch size, namely task num',
|
||||
default=32)
|
||||
|
@ -47,15 +47,15 @@ plt.style.use('bmh')
|
||||
|
||||
def main():
|
||||
argparser = argparse.ArgumentParser()
|
||||
argparser.add_argument('--n_way', type=int, help='n way', default=5)
|
||||
argparser.add_argument('--n-way', '--n_way', type=int, help='n way', default=5)
|
||||
argparser.add_argument(
|
||||
'--k_spt', type=int, help='k shot for support set', default=5)
|
||||
'--k-spt', '--k_spt', type=int, help='k shot for support set', default=5)
|
||||
argparser.add_argument(
|
||||
'--k_qry', type=int, help='k shot for query set', default=15)
|
||||
'--k-qry', '--k_qry', type=int, help='k shot for query set', default=15)
|
||||
argparser.add_argument(
|
||||
'--device', type=str, help='device', default='cuda')
|
||||
argparser.add_argument(
|
||||
'--task_num',
|
||||
'--task-num', '--task_num',
|
||||
type=int,
|
||||
help='meta batch size, namely task num',
|
||||
default=32)
|
||||
|
@ -17,11 +17,11 @@ Example Usages
|
||||
Create a new commitlist for consumption by categorize.py.
|
||||
Said commitlist contains commits between v1.5.0 and f5bc91f851.
|
||||
|
||||
python commitlist.py --create_new tags/v1.5.0 f5bc91f851
|
||||
python commitlist.py --create-new tags/v1.5.0 f5bc91f851
|
||||
|
||||
Update the existing commitlist to commit bfcb687b9c.
|
||||
|
||||
python commitlist.py --update_to bfcb687b9c
|
||||
python commitlist.py --update-to bfcb687b9c
|
||||
|
||||
"""
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
@ -342,16 +342,16 @@ def main():
|
||||
parser = argparse.ArgumentParser(description='Tool to create a commit list')
|
||||
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--create_new', nargs=2)
|
||||
group.add_argument('--update_to')
|
||||
group.add_argument('--create-new', '--create_new', nargs=2)
|
||||
group.add_argument('--update-to', '--update_to')
|
||||
# I found this flag useful when experimenting with adding new auto-categorizing filters.
|
||||
# After running commitlist.py the first time, if you add any new filters in this file,
|
||||
# re-running with "rerun_with_new_filters" will update the existing commitlist.csv file,
|
||||
# but only affect the rows that were previously marked as "Uncategorized"
|
||||
group.add_argument('--rerun_with_new_filters', action='store_true')
|
||||
group.add_argument('--rerun-with-new-filters', '--rerun_with_new_filters', action='store_true')
|
||||
group.add_argument('--stat', action='store_true')
|
||||
group.add_argument('--export_markdown', action='store_true')
|
||||
group.add_argument('--export_csv_categories', action='store_true')
|
||||
group.add_argument('--export-markdown', '--export_markdown', action='store_true')
|
||||
group.add_argument('--export-csv-categories', '--export_csv_categories', action='store_true')
|
||||
parser.add_argument('--path', default='results/commitlist.csv')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -52,8 +52,8 @@ class TestTorchrun(TestCase):
|
||||
|
||||
def test_multi_threads(self):
|
||||
num = 0
|
||||
with subprocess.Popen(f"python -m torch.backends.xeon.run_cpu --ninstances 4 --use_default_allocator \
|
||||
--disable_iomp --disable_numactl --log_path {self._test_dir} --no_python pwd",
|
||||
with subprocess.Popen(f"python -m torch.backends.xeon.run_cpu --ninstances 4 --use-default-allocator \
|
||||
--disable-iomp --disable-numactl --log-path {self._test_dir} --no-python pwd",
|
||||
shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as p:
|
||||
for line in p.stdout.readlines():
|
||||
segs = str(line, "utf-8").strip().split("-")
|
||||
|
@ -92,7 +92,7 @@ def elastic_launch_wrapper(
|
||||
rdzv_endpoint, min_nodes, max_nodes, nproc_per_node, run_id
|
||||
),
|
||||
sys.executable,
|
||||
)("-u", path("bin/test_script.py"), f"--touch_file_dir={test_dir}")
|
||||
)("-u", path("bin/test_script.py"), f"--touch-file-dir={test_dir}")
|
||||
|
||||
|
||||
def _dist_sum(wait=0):
|
||||
@ -163,7 +163,7 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
elastic_launch(
|
||||
get_test_launch_config(self._etcd_endpoint, nnodes, nnodes, nproc_per_node),
|
||||
sys.executable,
|
||||
)("-u", path("bin/test_script.py"), f"--touch_file_dir={self.test_dir}")
|
||||
)("-u", path("bin/test_script.py"), f"--touch-file-dir={self.test_dir}")
|
||||
|
||||
# make sure all the workers ran.
|
||||
# each worker touches a file with its global rank as the name.
|
||||
@ -178,7 +178,7 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
elastic_launch(
|
||||
get_test_launch_config(self._etcd_endpoint, nnodes, nnodes, nproc_per_node),
|
||||
sys.executable,
|
||||
)("-u", path("bin/test_script.py"), f"--touch_file_dir={self.test_dir}")
|
||||
)("-u", path("bin/test_script.py"), f"--touch-file-dir={self.test_dir}")
|
||||
|
||||
# make sure all the workers ran.
|
||||
# each worker touches a file with its global rank as the name.
|
||||
@ -248,7 +248,7 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
elastic_launch(
|
||||
get_test_launch_config(self._etcd_endpoint, 1, 2, nproc_per_node),
|
||||
sys.executable,
|
||||
)("-u", path("bin/test_script.py"), f"--touch_file_dir={self.test_dir}")
|
||||
)("-u", path("bin/test_script.py"), f"--touch-file-dir={self.test_dir}")
|
||||
|
||||
world_size = nproc_per_node
|
||||
self.check_works_ran(world_size)
|
||||
@ -283,7 +283,7 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
elastic_launch(
|
||||
get_test_launch_config(self._etcd_endpoint, 1, 2, 4),
|
||||
sys.executable,
|
||||
)("-u", path("bin/test_script.py"), f"--touch_file_dir={self.test_dir}")
|
||||
)("-u", path("bin/test_script.py"), f"--touch-file-dir={self.test_dir}")
|
||||
record_mock.assert_called_once()
|
||||
|
||||
@sandcastle_skip_if(TEST_WITH_DEV_DBG_ASAN, "test incompatible with dev/dbg asan")
|
||||
@ -345,7 +345,7 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
elastic_launch(
|
||||
get_test_launch_config(self._etcd_endpoint, 1, 1, 4),
|
||||
sys.executable,
|
||||
)("-u", path("bin/test_script.py"), f"--touch_file_dir={self.test_dir}")
|
||||
)("-u", path("bin/test_script.py"), f"--touch-file-dir={self.test_dir}")
|
||||
|
||||
rdzv_handler_mock.shutdown.assert_called_once()
|
||||
|
||||
|
@ -24,6 +24,7 @@ def parse_args():
|
||||
|
||||
# file is used for assertions
|
||||
parser.add_argument(
|
||||
"--touch-file-dir",
|
||||
"--touch_file_dir",
|
||||
type=str,
|
||||
help="dir to touch a file with global rank as the filename",
|
||||
|
@ -19,12 +19,14 @@ def parse_args():
|
||||
parser = argparse.ArgumentParser(description="test script")
|
||||
|
||||
parser.add_argument(
|
||||
"--init-method",
|
||||
"--init_method",
|
||||
type=str,
|
||||
required=True,
|
||||
help="init_method to pass to `dist.init_process_group()` (e.g. env://)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--world-size",
|
||||
"--world_size",
|
||||
type=int,
|
||||
default=os.getenv("WORLD_SIZE", -1),
|
||||
|
@ -27,6 +27,7 @@ import torch.distributed as dist
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="test script")
|
||||
parser.add_argument(
|
||||
"--out-file",
|
||||
"--out_file",
|
||||
help="file to write indicating whether this script was launched with torchelastic",
|
||||
)
|
||||
|
@ -15,6 +15,7 @@ def parse_args():
|
||||
parser = argparse.ArgumentParser(description="test script")
|
||||
|
||||
parser.add_argument(
|
||||
"--local-rank",
|
||||
"--local_rank",
|
||||
type=int,
|
||||
required=True,
|
||||
@ -31,7 +32,7 @@ def main():
|
||||
actual_rank = args.local_rank
|
||||
if expected_rank != actual_rank:
|
||||
raise RuntimeError(
|
||||
"Parameters passed: --local_rank that has different value "
|
||||
"Parameters passed: --local-rank that has different value "
|
||||
f"from env var: expected: {expected_rank}, got: {actual_rank}"
|
||||
)
|
||||
print("End execution")
|
||||
|
@ -47,12 +47,12 @@ class LaunchTest(unittest.TestCase):
|
||||
master_port = sock.getsockname()[1]
|
||||
args = [
|
||||
f"--nnodes={nnodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
"--master_addr=localhost",
|
||||
f"--master_port={master_port}",
|
||||
"--node_rank=0",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
"--master-addr=localhost",
|
||||
f"--master-port={master_port}",
|
||||
"--node-rank=0",
|
||||
path("bin/test_script_local_rank.py"),
|
||||
]
|
||||
launch.main(args)
|
||||
@ -69,15 +69,15 @@ class LaunchTest(unittest.TestCase):
|
||||
master_port = sock.getsockname()[1]
|
||||
args = [
|
||||
f"--nnodes={nnodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
"--master_addr=localhost",
|
||||
f"--master_port={master_port}",
|
||||
"--node_rank=0",
|
||||
"--use_env",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
"--master-addr=localhost",
|
||||
f"--master-port={master_port}",
|
||||
"--node-rank=0",
|
||||
"--use-env",
|
||||
path("bin/test_script.py"),
|
||||
f"--touch_file_dir={self.test_dir}",
|
||||
f"--touch-file-dir={self.test_dir}",
|
||||
]
|
||||
launch.main(args)
|
||||
# make sure all the workers ran
|
||||
|
@ -101,14 +101,14 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
world_size = nnodes * nproc_per_node
|
||||
args = [
|
||||
f"--nnodes={nnodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
"--rdzv_backend=etcd",
|
||||
f"--rdzv_endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv_id={run_id}",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--rdzv-backend=etcd",
|
||||
f"--rdzv-endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv-id={run_id}",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
path("bin/test_script.py"),
|
||||
f"--touch_file_dir={self.test_dir}",
|
||||
f"--touch-file-dir={self.test_dir}",
|
||||
]
|
||||
launch.main(args)
|
||||
|
||||
@ -127,14 +127,14 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
master_port = sock.getsockname()[1]
|
||||
args = [
|
||||
f"--nnodes={nnodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
"--master_addr=localhost",
|
||||
f"--master_port={master_port}",
|
||||
"--node_rank=0",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
"--master-addr=localhost",
|
||||
f"--master-port={master_port}",
|
||||
"--node-rank=0",
|
||||
path("bin/test_script.py"),
|
||||
f"--touch_file_dir={self.test_dir}",
|
||||
f"--touch-file-dir={self.test_dir}",
|
||||
]
|
||||
launch.main(args)
|
||||
|
||||
@ -152,19 +152,19 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
world_size = nnodes * nproc_per_node
|
||||
args = [
|
||||
f"--nnodes={nnodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
"--rdzv_backend=etcd",
|
||||
f"--rdzv_endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv_id={run_id}",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
"--no_python",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--rdzv-backend=etcd",
|
||||
f"--rdzv-endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv-id={run_id}",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
"--no-python",
|
||||
]
|
||||
|
||||
script_args = [path("bin/test_script.sh"), f"{self.test_dir}"]
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
# --no_python cannot be used with --module
|
||||
# --no-python cannot be used with --module
|
||||
launch.main(args + ["--module"] + script_args)
|
||||
|
||||
launch.main(args + script_args)
|
||||
@ -182,18 +182,18 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
world_size = 1
|
||||
args = [
|
||||
f"--nnodes={nnodes}",
|
||||
"--rdzv_backend=etcd",
|
||||
f"--rdzv_endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv_id={run_id}",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
"--no_python",
|
||||
"--rdzv-backend=etcd",
|
||||
f"--rdzv-endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv-id={run_id}",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
"--no-python",
|
||||
]
|
||||
|
||||
script_args = [path("bin/test_script.sh"), f"{self.test_dir}"]
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
# --no_python cannot be used with --module
|
||||
# --no-python cannot be used with --module
|
||||
launch.main(args + ["--module"] + script_args)
|
||||
|
||||
launch.main(args + script_args)
|
||||
@ -223,7 +223,7 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
script_args = [path("bin/test_script.sh"), f"{self.test_dir}"]
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
# --no_python cannot be used with --module
|
||||
# --no-python cannot be used with --module
|
||||
os.environ["PET_MODULE"] = "1"
|
||||
launch.main(script_args)
|
||||
|
||||
@ -242,13 +242,13 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
|
||||
args = [
|
||||
f"--nnodes={nnodes}",
|
||||
f"--nproc_per_node={nproc_type}",
|
||||
"--rdzv_backend=etcd",
|
||||
f"--rdzv_endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv_id={run_id}",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
"--no_python",
|
||||
f"--nproc-per-node={nproc_type}",
|
||||
"--rdzv-backend=etcd",
|
||||
f"--rdzv-endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv-id={run_id}",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
"--no-python",
|
||||
]
|
||||
|
||||
script_args = [path("bin/test_script.sh"), f"{self.test_dir}"]
|
||||
@ -292,14 +292,14 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
world_size = nproc_per_node
|
||||
args = [
|
||||
f"--nnodes={min_nodes}:{max_nodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
"--rdzv_backend=etcd",
|
||||
f"--rdzv_endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv_id={run_id}",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--rdzv-backend=etcd",
|
||||
f"--rdzv-endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv-id={run_id}",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
path("bin/test_script.py"),
|
||||
f"--touch_file_dir={self.test_dir}",
|
||||
f"--touch-file-dir={self.test_dir}",
|
||||
]
|
||||
launch.main(args)
|
||||
|
||||
@ -323,13 +323,13 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
nproc_per_node = 4
|
||||
args = [
|
||||
f"--nnodes={min_nodes}:{max_nodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
"--rdzv_backend=etcd",
|
||||
f"--rdzv_endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv_id={run_id}",
|
||||
"--monitor_interval=1",
|
||||
"--max_restarts=0",
|
||||
"--start_method=spawn",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--rdzv-backend=etcd",
|
||||
f"--rdzv-endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv-id={run_id}",
|
||||
"--monitor-interval=1",
|
||||
"--max-restarts=0",
|
||||
"--start-method=spawn",
|
||||
path("bin/test_script.py"),
|
||||
"--fail",
|
||||
]
|
||||
@ -354,15 +354,15 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
nproc_per_node = 4
|
||||
args = [
|
||||
f"--nnodes={min_nodes}:{max_nodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
"--rdzv_backend=etcd",
|
||||
f"--rdzv_endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv_id={run_id}",
|
||||
"--monitor_interval=1",
|
||||
"--max_restarts=0",
|
||||
"--start_method=spawn",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--rdzv-backend=etcd",
|
||||
f"--rdzv-endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv-id={run_id}",
|
||||
"--monitor-interval=1",
|
||||
"--max-restarts=0",
|
||||
"--start-method=spawn",
|
||||
path("bin/test_script.py"),
|
||||
f"--touch_file_dir={self.test_dir}",
|
||||
f"--touch-file-dir={self.test_dir}",
|
||||
]
|
||||
|
||||
mock_agent_run.side_effect = MockException
|
||||
@ -377,12 +377,12 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
world_size = nnodes * nproc_per_node
|
||||
args = [
|
||||
f"--nnodes={nnodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--standalone",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
path("bin/test_script.py"),
|
||||
f"--touch_file_dir={self.test_dir}",
|
||||
f"--touch-file-dir={self.test_dir}",
|
||||
]
|
||||
launch.main(args)
|
||||
|
||||
@ -398,13 +398,13 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
nproc_per_node = 4
|
||||
world_size = nnodes * nproc_per_node
|
||||
args = [
|
||||
"--run_path",
|
||||
"--run-path",
|
||||
f"--nnodes={nnodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
path("bin/test_script.py"),
|
||||
f"--touch_file_dir={self.test_dir}",
|
||||
f"--touch-file-dir={self.test_dir}",
|
||||
]
|
||||
launch.main(args)
|
||||
|
||||
@ -424,14 +424,14 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
world_size = nnodes * nproc_per_node
|
||||
args = [
|
||||
f"--nnodes={min_nodes}:{max_nodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
"--rdzv_backend=etcd",
|
||||
f"--rdzv_endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv_id={run_id}",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--rdzv-backend=etcd",
|
||||
f"--rdzv-endpoint={self._etcd_endpoint}",
|
||||
f"--rdzv-id={run_id}",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
path("bin/test_script.py"),
|
||||
f"--touch_file_dir={self.test_dir}",
|
||||
f"--touch-file-dir={self.test_dir}",
|
||||
]
|
||||
procs = []
|
||||
for _ in range(nnodes - 1):
|
||||
@ -466,11 +466,11 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
nproc_per_node = 4
|
||||
args = [
|
||||
f"--nnodes={nnodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
path("bin/test_script.py"),
|
||||
f"--touch_file_dir={self.test_dir}",
|
||||
f"--touch-file-dir={self.test_dir}",
|
||||
]
|
||||
agent_mock = Mock()
|
||||
agent_mock.run.return_value = RunResult(WorkerState.SUCCEEDED)
|
||||
@ -492,12 +492,12 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
|
||||
launch.main(
|
||||
[
|
||||
"--run_path",
|
||||
"--run-path",
|
||||
"--nnodes=1",
|
||||
"--nproc_per_node=1",
|
||||
"--monitor_interval=1",
|
||||
"--nproc-per-node=1",
|
||||
"--monitor-interval=1",
|
||||
path("bin/test_script_is_torchelastic_launched.py"),
|
||||
f"--out_file={out_file}",
|
||||
f"--out-file={out_file}",
|
||||
]
|
||||
)
|
||||
|
||||
@ -519,7 +519,7 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
"argv",
|
||||
[
|
||||
path("bin/test_script_is_torchelastic_launched.py"),
|
||||
f"--out_file={out_file}",
|
||||
f"--out-file={out_file}",
|
||||
],
|
||||
):
|
||||
runpy.run_path(sys.argv[0], run_name="__main__")
|
||||
@ -534,9 +534,9 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
"argv",
|
||||
[
|
||||
path("bin/test_script_init_method.py"),
|
||||
f"--init_method=tcp://localhost:{port}",
|
||||
f"--init-method=tcp://localhost:{port}",
|
||||
"--rank=0",
|
||||
"--world_size=1",
|
||||
"--world-size=1",
|
||||
],
|
||||
):
|
||||
runpy.run_path(sys.argv[0], run_name="__main__")
|
||||
@ -547,14 +547,14 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
port = get_free_port()
|
||||
launch.main(
|
||||
[
|
||||
"--run_path",
|
||||
"--run-path",
|
||||
"--nnodes=1",
|
||||
"--nproc_per_node=4",
|
||||
"--master_addr=localhost",
|
||||
f"--master_port={port}",
|
||||
"--monitor_interval=1",
|
||||
"--nproc-per-node=4",
|
||||
"--master-addr=localhost",
|
||||
f"--master-port={port}",
|
||||
"--monitor-interval=1",
|
||||
path("bin/test_script_init_method.py"),
|
||||
f"--init_method=tcp://localhost:{port}",
|
||||
f"--init-method=tcp://localhost:{port}",
|
||||
]
|
||||
)
|
||||
# nothing to validate, just make sure it runs
|
||||
@ -574,7 +574,7 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
"argv",
|
||||
[
|
||||
path("bin/test_script_init_method.py"),
|
||||
"--init_method=env://",
|
||||
"--init-method=env://",
|
||||
],
|
||||
):
|
||||
runpy.run_path(sys.argv[0], run_name="__main__")
|
||||
@ -585,14 +585,14 @@ class ElasticLaunchTest(unittest.TestCase):
|
||||
port = get_free_port()
|
||||
launch.main(
|
||||
[
|
||||
"--run_path",
|
||||
"--run-path",
|
||||
"--nnodes=1",
|
||||
"--nproc_per_node=4",
|
||||
"--master_addr=localhost",
|
||||
f"--master_port={port}",
|
||||
"--monitor_interval=1",
|
||||
"--nproc-per-node=4",
|
||||
"--master-addr=localhost",
|
||||
f"--master-port={port}",
|
||||
"--monitor-interval=1",
|
||||
path("bin/test_script_init_method.py"),
|
||||
"--init_method=env://",
|
||||
"--init-method=env://",
|
||||
]
|
||||
)
|
||||
# nothing to validate, just make sure it runs
|
||||
|
@ -40,13 +40,13 @@ class TestDistributedLaunch(TestCase):
|
||||
master_port = sock.getsockname()[1]
|
||||
args = [
|
||||
f"--nnodes={nnodes}",
|
||||
f"--nproc_per_node={nproc_per_node}",
|
||||
"--monitor_interval=1",
|
||||
"--start_method=spawn",
|
||||
"--master_addr=localhost",
|
||||
f"--master_port={master_port}",
|
||||
"--node_rank=0",
|
||||
"--use_env",
|
||||
f"--nproc-per-node={nproc_per_node}",
|
||||
"--monitor-interval=1",
|
||||
"--start-method=spawn",
|
||||
"--master-addr=localhost",
|
||||
f"--master-port={master_port}",
|
||||
"--node-rank=0",
|
||||
"--use-env",
|
||||
path("bin/test_script.py"),
|
||||
]
|
||||
launch.main(args)
|
||||
|
@ -9,12 +9,12 @@ file(GLOB_RECURSE all_python "${TORCH_ROOT}/torchgen/*.py")
|
||||
set(GEN_COMMAND
|
||||
"${PYTHON_EXECUTABLE}" -m torchgen.gen_executorch
|
||||
--source-path=${TEST_ROOT}
|
||||
--install_dir=${OUTPUT_DIRECTORY}
|
||||
--install-dir=${OUTPUT_DIRECTORY}
|
||||
--tags-path=${TORCH_ROOT}/aten/src/ATen/native/tags.yaml
|
||||
--aten_yaml_path=${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml
|
||||
--use_aten_lib
|
||||
--op_selection_yaml_path=${TEST_ROOT}/selected_operators.yaml
|
||||
--custom_ops_yaml_path=${TEST_ROOT}/custom_ops.yaml
|
||||
--aten-yaml-path=${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml
|
||||
--use-aten-lib
|
||||
--op-selection-yaml-path=${TEST_ROOT}/selected_operators.yaml
|
||||
--custom-ops-yaml-path=${TEST_ROOT}/custom_ops.yaml
|
||||
)
|
||||
set(GEN_COMMAND_sources
|
||||
${OUTPUT_DIRECTORY}/RegisterCodegenUnboxedKernelsEverything.cpp
|
||||
|
@ -75,7 +75,7 @@ class TestFuser(JitTestCase):
|
||||
shell_env = os.environ.copy()
|
||||
shell_env['TMP'] = dname
|
||||
cmd = [sys.executable, os.path.basename(__file__), type(self).__name__ + '.test_abs_cpu']
|
||||
legacy_jit_flag = '--jit_executor=legacy'
|
||||
legacy_jit_flag = '--jit-executor=legacy'
|
||||
for v in sys.argv:
|
||||
if v == legacy_jit_flag:
|
||||
cmd.append(legacy_jit_flag)
|
||||
|
@ -1,7 +1,7 @@
|
||||
# Owner(s): ["oncall: jit"]
|
||||
|
||||
import sys
|
||||
sys.argv.append("--jit_executor=legacy")
|
||||
sys.argv.append("--jit-executor=legacy")
|
||||
from test_jit_fuser import * # noqa: F403
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -1,7 +1,7 @@
|
||||
# Owner(s): ["oncall: jit"]
|
||||
|
||||
import sys
|
||||
sys.argv.append("--jit_executor=legacy")
|
||||
sys.argv.append("--jit-executor=legacy")
|
||||
from test_jit import * # noqa: F403
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -1,7 +1,7 @@
|
||||
# Owner(s): ["oncall: jit"]
|
||||
|
||||
import sys
|
||||
sys.argv.append("--jit_executor=profiling")
|
||||
sys.argv.append("--jit-executor=profiling")
|
||||
from test_jit import * # noqa: F403
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -1,7 +1,7 @@
|
||||
# Owner(s): ["oncall: jit"]
|
||||
|
||||
import sys
|
||||
sys.argv.append("--jit_executor=simple")
|
||||
sys.argv.append("--jit-executor=simple")
|
||||
from test_jit import * # noqa: F403
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -55,15 +55,15 @@ from torchgen.selective_build.selector import merge_kernel_metadata
|
||||
# There are a few main inputs to this application
|
||||
# -----------------------------------------------
|
||||
#
|
||||
# 1. Inference Root Operators (--root_ops): Root operators (called directly
|
||||
# 1. Inference Root Operators (--root-ops): Root operators (called directly
|
||||
# from TorchScript) used by inference use-cases.
|
||||
#
|
||||
# 2. Training Root Operators (--training_root_ops): Root operators used
|
||||
# 2. Training Root Operators (--training-root-ops): Root operators used
|
||||
# by training use-cases. Currently, this list is the list of all operators
|
||||
# used by training, and not just the root operators. All Training ops are
|
||||
# also considered for inference, so these are merged into inference ops.
|
||||
#
|
||||
# 3. Operator Depencency Graph (--dep_graph_yaml_path): A path to the
|
||||
# 3. Operator Depencency Graph (--dep-graph-yaml-path): A path to the
|
||||
# operator dependency graph used to determine which operators depend on
|
||||
# which other operators for correct functioning. This is used for
|
||||
# generating the transitive closure of all the operators used by the
|
||||
@ -71,12 +71,12 @@ from torchgen.selective_build.selector import merge_kernel_metadata
|
||||
# For tracing based selective build, we don't need to perform this
|
||||
# transitive cloure.
|
||||
#
|
||||
# 4. Model Metadata (--model_name, --model_versions, --model_assets,
|
||||
# --model_backends): Self-descriptive. These are used to tell this
|
||||
# 4. Model Metadata (--model-name, --model-versions, --model-assets,
|
||||
# --model-backends): Self-descriptive. These are used to tell this
|
||||
# script which model operator lists to fetch from the Unified Model
|
||||
# Build Metadata YAML file.
|
||||
#
|
||||
# 5. Unified Model YAML file (--models_yaml_path): A path to the Unified
|
||||
# 5. Unified Model YAML file (--models-yaml-path): A path to the Unified
|
||||
# model YAML operator list file. This yaml file contains (for each
|
||||
# model/version/asset/backend) the set of used root and traced
|
||||
# operators. This is used to extract the actual set of operators
|
||||
@ -490,45 +490,53 @@ def fill_output(output: Dict[str, object], options: object):
|
||||
|
||||
def get_parser_options(parser: argparse.ArgumentParser) -> argparse.Namespace:
|
||||
parser.add_argument(
|
||||
"--root-ops",
|
||||
"--root_ops",
|
||||
help="A comma separated list of root operators used by the model",
|
||||
required=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--training-root-ops",
|
||||
"--training_root_ops",
|
||||
help="A comma separated list of root operators used for training",
|
||||
required=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-path",
|
||||
"--output_path",
|
||||
help="The location of the output yaml file.",
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dep-graph-yaml-path",
|
||||
"--dep_graph_yaml_path",
|
||||
type=str,
|
||||
help="A path to the Operator Dependency Graph YAML file.",
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model-name",
|
||||
"--model_name",
|
||||
type=str,
|
||||
help="The name of the model that uses the specified root operators.",
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model-versions",
|
||||
"--model_versions",
|
||||
type=str,
|
||||
help="A comma separated list of model versions.",
|
||||
required=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model-assets",
|
||||
"--model_assets",
|
||||
type=str,
|
||||
help="A comma separate list of model asset names (if absent, defaults to all assets for this model).",
|
||||
required=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model-backends",
|
||||
"--model_backends",
|
||||
type=str,
|
||||
default="CPU",
|
||||
@ -536,12 +544,14 @@ def get_parser_options(parser: argparse.ArgumentParser) -> argparse.Namespace:
|
||||
required=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--models-yaml-path",
|
||||
"--models_yaml_path",
|
||||
type=str,
|
||||
help="The path to where the unified Mobile Model Config YAML resides.",
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-all-operators",
|
||||
"--include_all_operators",
|
||||
action="store_true",
|
||||
default=False,
|
||||
@ -549,6 +559,7 @@ def get_parser_options(parser: argparse.ArgumentParser) -> argparse.Namespace:
|
||||
required=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rule-name",
|
||||
"--rule_name",
|
||||
type=str,
|
||||
help="The name of pt_operator_library rule resulting in this generation",
|
||||
|
@ -40,7 +40,7 @@ def throw_if_any_op_includes_overloads(selective_builder: SelectiveBuilder) -> N
|
||||
raise Exception(
|
||||
(
|
||||
"Operators that include all overloads are "
|
||||
+ "not allowed since --allow_include_all_overloads "
|
||||
+ "not allowed since --allow-include-all-overloads "
|
||||
+ "was specified: {}"
|
||||
).format(", ".join(ops))
|
||||
)
|
||||
@ -99,6 +99,7 @@ def main(argv: List[Any]) -> None:
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="Generate operator lists")
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
"--output_dir",
|
||||
help=(
|
||||
"The directory to store the output yaml files (selected_mobile_ops.h, "
|
||||
@ -107,6 +108,7 @@ def main(argv: List[Any]) -> None:
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model-file-list-path",
|
||||
"--model_file_list_path",
|
||||
help=(
|
||||
"Path to a file that contains the locations of individual "
|
||||
@ -117,6 +119,7 @@ def main(argv: List[Any]) -> None:
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--allow-include-all-overloads",
|
||||
"--allow_include_all_overloads",
|
||||
help=(
|
||||
"Flag to allow operators that include all overloads. "
|
||||
|
@ -61,12 +61,13 @@ if __name__ == "__main__":
|
||||
description="Generate torch/version.py from build and environment metadata."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--is-debug",
|
||||
"--is_debug",
|
||||
type=distutils.util.strtobool,
|
||||
help="Whether this build is debug mode or not.",
|
||||
)
|
||||
parser.add_argument("--cuda_version", type=str)
|
||||
parser.add_argument("--hip_version", type=str)
|
||||
parser.add_argument("--cuda-version", "--cuda_version", type=str)
|
||||
parser.add_argument("--hip-version", "--hip_version", type=str)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -204,7 +204,11 @@ def main(args: List[str]) -> None:
|
||||
default="aten/src/ATen",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d", "--install_dir", help="output directory", default="build/aten/src/ATen"
|
||||
"-d",
|
||||
"--install-dir",
|
||||
"--install_dir",
|
||||
help="output directory",
|
||||
default="build/aten/src/ATen",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
@ -217,6 +221,7 @@ def main(args: List[str]) -> None:
|
||||
help="run without writing any files (still updates outputs)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--op-selection-yaml-path",
|
||||
"--op_selection_yaml_path",
|
||||
help="Provide a path to the operator selection (for custom build) YAML "
|
||||
"that contains the information about the set of selected operators "
|
||||
@ -225,6 +230,7 @@ def main(args: List[str]) -> None:
|
||||
"The operator names also contain the namespace prefix (e.g. aten::)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--op-registration-allowlist",
|
||||
"--op_registration_allowlist",
|
||||
nargs="*",
|
||||
help="filter op registrations by the allowlist (if set); "
|
||||
@ -232,6 +238,7 @@ def main(args: List[str]) -> None:
|
||||
"e.g.: aten::empty aten::conv2d ...",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--TEST-ONLY-op-registration-allowlist-yaml-path",
|
||||
"--TEST_ONLY_op_registration_allowlist_yaml_path",
|
||||
help="Provide a path to the operator selection (for custom build) YAML "
|
||||
"which contains a list of operators. It is to serve testing purpose and "
|
||||
|
@ -17,7 +17,7 @@ class TestGenUnboxing(unittest.TestCase):
|
||||
mock_parse_native_yaml: NonCallableMock,
|
||||
mock_get_custom_build_selector: NonCallableMock,
|
||||
) -> None:
|
||||
args = ["--op_registration_allowlist=op1", "--op_selection_yaml_path=path2"]
|
||||
args = ["--op-registration-allowlist=op1", "--op-selection-yaml-path=path2"]
|
||||
gen_unboxing.main(args)
|
||||
mock_get_custom_build_selector.assert_called_once_with(["op1"], "path2")
|
||||
|
||||
@ -32,8 +32,8 @@ class TestGenUnboxing(unittest.TestCase):
|
||||
temp_file.write(b"- aten::add.Tensor")
|
||||
temp_file.seek(0)
|
||||
args = [
|
||||
f"--TEST_ONLY_op_registration_allowlist_yaml_path={temp_file.name}",
|
||||
"--op_selection_yaml_path=path2",
|
||||
f"--TEST-ONLY-op-registration-allowlist-yaml-path={temp_file.name}",
|
||||
"--op-selection-yaml-path=path2",
|
||||
]
|
||||
gen_unboxing.main(args)
|
||||
mock_get_custom_build_selector.assert_called_once_with(
|
||||
@ -52,9 +52,9 @@ class TestGenUnboxing(unittest.TestCase):
|
||||
temp_file.write(b"- aten::add.Tensor")
|
||||
temp_file.seek(0)
|
||||
args = [
|
||||
"--op_registration_allowlist=op1",
|
||||
"--TEST_ONLY_op_registration_allowlist_yaml_path={temp_file.name}",
|
||||
"--op_selection_yaml_path=path2",
|
||||
"--op-registration-allowlist=op1",
|
||||
"--TEST-ONLY-op-registration-allowlist-yaml-path={temp_file.name}",
|
||||
"--op-selection-yaml-path=path2",
|
||||
]
|
||||
gen_unboxing.main(args)
|
||||
mock_get_custom_build_selector.assert_called_once_with(["op1"], "path2")
|
||||
|
@ -204,6 +204,7 @@ def main() -> None:
|
||||
help="clang-tidy binary path",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--build-dir",
|
||||
"--build_dir",
|
||||
required=True,
|
||||
help=(
|
||||
|
@ -59,7 +59,7 @@ def run_autogen() -> None:
|
||||
"aten/src/ATen/native/native_functions.yaml",
|
||||
"--tags-path",
|
||||
"aten/src/ATen/native/tags.yaml",
|
||||
"--gen_lazy_ts_backend",
|
||||
"--gen-lazy-ts-backend",
|
||||
]
|
||||
)
|
||||
|
||||
|
@ -147,6 +147,7 @@ def main() -> None:
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--yaml-file-path",
|
||||
"--yaml_file_path",
|
||||
type=str,
|
||||
required=True,
|
||||
@ -154,6 +155,7 @@ def main() -> None:
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output-file-path",
|
||||
"--output_file_path",
|
||||
type=str,
|
||||
required=True,
|
||||
|
@ -107,6 +107,9 @@ def main(args: Any) -> None:
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--skip_build", action="store_true", help="Skip building pytorch"
|
||||
"--skip-build",
|
||||
"--skip_build",
|
||||
action="store_true",
|
||||
help="Skip building pytorch",
|
||||
)
|
||||
main(parser.parse_args())
|
||||
|
@ -138,6 +138,7 @@ def main() -> None:
|
||||
help="Root directory where to install files. Defaults to the current working directory.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--install-dir",
|
||||
"--install_dir",
|
||||
help=(
|
||||
"Deprecated. Use --gen-dir instead. The semantics are different, do not change "
|
||||
@ -159,21 +160,25 @@ def main() -> None:
|
||||
help="Path to the YAML file that contains the list of operators to include for custom build.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--operators-yaml-path",
|
||||
"--operators_yaml_path",
|
||||
help="Path to the model YAML file that contains the list of operators to include for custom build.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force-schema-registration",
|
||||
"--force_schema_registration",
|
||||
action="store_true",
|
||||
help="force it to generate schema-only registrations for ops that are not"
|
||||
"listed on --selected-op-list",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gen-lazy-ts-backend",
|
||||
"--gen_lazy_ts_backend",
|
||||
action="store_true",
|
||||
help="Enable generation of the torch::lazy TorchScript backend",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--per-operator-headers",
|
||||
"--per_operator_headers",
|
||||
action="store_true",
|
||||
help="Build lazy tensor ts backend with per-operator ATen headers, must match how ATen was built",
|
||||
|
@ -7,7 +7,7 @@ if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--input-file")
|
||||
parser.add_argument("--output-file")
|
||||
parser.add_argument("--install_dir")
|
||||
parser.add_argument("--install-dir", "--install_dir")
|
||||
parser.add_argument("--replace", action="append", nargs=2)
|
||||
options = parser.parse_args()
|
||||
|
||||
|
@ -387,9 +387,9 @@ add_custom_command(
|
||||
"${PYTHON_EXECUTABLE}" -c \"from pathlib import Path\; Path('${TOOLS_PATH}/generate_torch_version.py').touch()\"
|
||||
COMMAND
|
||||
"${PYTHON_EXECUTABLE}" ${TOOLS_PATH}/generate_torch_version.py
|
||||
--is_debug=${TORCH_VERSION_DEBUG}
|
||||
--cuda_version=${CUDA_VERSION}
|
||||
--hip_version=${HIP_VERSION}
|
||||
--is-debug=${TORCH_VERSION_DEBUG}
|
||||
--cuda-version=${CUDA_VERSION}
|
||||
--hip-version=${HIP_VERSION}
|
||||
DEPENDS ${TOOLS_PATH}/generate_torch_version.py
|
||||
WORKING_DIRECTORY ${TORCH_ROOT}
|
||||
)
|
||||
|
@ -50,7 +50,7 @@ The benchmark codes depend on the [DLRM codebase](https://github.com/facebookres
|
||||
|
||||
### **Disk savings**
|
||||
```
|
||||
python evaluate_disk_savings.py --model_path=<path_to_model_checkpoint> --sparsified_model_dump_path=<path_to_dump_sparsified_models>
|
||||
python evaluate_disk_savings.py --model-path=<path_to_model_checkpoint> --sparsified-model-dump-path=<path_to_dump_sparsified_models>
|
||||
```
|
||||
|
||||
Running this script should dump
|
||||
@ -62,13 +62,13 @@ Running this script should dump
|
||||
|
||||
### **Model Quality**
|
||||
```
|
||||
python evaluate_model_metrics.py --raw_data_file=<path_to_raw_data_txt_file> --processed_data_file=<path_to_kaggleAdDisplayChallenge_processed.npz> --sparse_model_metadata=<path_to_sparse_model_metadata_csv>
|
||||
python evaluate_model_metrics.py --raw-data-file=<path_to_raw_data_txt_file> --processed-data-file=<path_to_kaggleAdDisplayChallenge_processed.npz> --sparse-model-metadata=<path_to_sparse_model_metadata_csv>
|
||||
```
|
||||
Running this script should dump ```sparse_model_metrics.csv``` that contains evaluation metrics for all sparsified models.
|
||||
|
||||
### **Model forward time**:
|
||||
```
|
||||
python evaluate_forward_time.py --raw_data_file=<path_to_raw_data_txt_file> --processed_data_file=<path_to_kaggleAdDisplayChallenge_processed.npz> --sparse_model_metadata=<path_to_sparse_model_metadata_csv>
|
||||
python evaluate_forward_time.py --raw-data-file=<path_to_raw_data_txt_file> --processed-data-file=<path_to_kaggleAdDisplayChallenge_processed.npz> --sparse-model-metadata=<path_to_sparse_model_metadata_csv>
|
||||
```
|
||||
Running this script should dump ```dlrm_forward_time_info.csv``` that contains forward time for all sparsified models with and without torch.sparse in the forward pass.
|
||||
|
||||
|
@ -152,8 +152,8 @@ def sparsify_model(path_to_model, sparsified_model_dump_path):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model_path', type=str)
|
||||
parser.add_argument('--sparsified_model_dump_path', type=str)
|
||||
parser.add_argument('--model-path', '--model_path', type=str)
|
||||
parser.add_argument('--sparsified-model-dump-path', '--sparsified_model_dump_path', type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
sparsify_model(args.model_path, args.sparsified_model_dump_path)
|
||||
|
@ -85,9 +85,9 @@ def measure_forward_pass(sparse_model_metadata, device, sparse_dlrm, **batch):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--raw_data_file', type=str)
|
||||
parser.add_argument('--processed_data_file', type=str)
|
||||
parser.add_argument('--sparse_model_metadata', type=str)
|
||||
parser.add_argument('--raw-data-file', '--raw_data_file', type=str)
|
||||
parser.add_argument('--processed-data-file', '--processed_data_file', type=str)
|
||||
parser.add_argument('--sparse-model-metadata', '--sparse_model_metadata', type=str)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -119,9 +119,9 @@ def evaluate_metrics(test_dataloader, sparse_model_metadata):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--raw_data_file', type=str)
|
||||
parser.add_argument('--processed_data_file', type=str)
|
||||
parser.add_argument('--sparse_model_metadata', type=str)
|
||||
parser.add_argument('--raw-data-file', '--raw_data_file', type=str)
|
||||
parser.add_argument('--processed-data-file', '--processed_data_file', type=str)
|
||||
parser.add_argument('--sparse-model-metadata', '--sparse_model_metadata', type=str)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -554,7 +554,7 @@ class emit_itt:
|
||||
|
||||
It is useful when running the program under Intel(R) VTune Profiler::
|
||||
|
||||
vtune <--vtune_flags> <regular command here>
|
||||
vtune <--vtune-flags> <regular command here>
|
||||
|
||||
The Instrumentation and Tracing Technology (ITT) API enables your application to generate and
|
||||
control the collection of trace data during its execution across different Intel tools.
|
||||
|
@ -60,20 +60,20 @@ Single instance inference
|
||||
|
||||
::
|
||||
|
||||
python -m torch.backends.xeon.run_cpu --throughput_mode script.py args
|
||||
python -m torch.backends.xeon.run_cpu --throughput-mode script.py args
|
||||
|
||||
2. Run single-instance inference on a single CPU node.
|
||||
|
||||
::
|
||||
|
||||
python -m torch.backends.xeon.run_cpu --node_id 1 script.py args
|
||||
python -m torch.backends.xeon.run_cpu --node-id 1 script.py args
|
||||
|
||||
Multi-instance inference
|
||||
------------------------
|
||||
|
||||
1. Multi-instance
|
||||
By default this tool runs one process per node. If you want to set the instance numbers and core per instance,
|
||||
--ninstances and --ncores_per_instance should be set.
|
||||
--ninstances and --ncores-per-instance should be set.
|
||||
|
||||
::
|
||||
|
||||
@ -83,7 +83,7 @@ Multi-instance inference
|
||||
|
||||
::
|
||||
|
||||
python -m torch.backends.xeon.run_cpu --ninstances 14 --ncores_per_instance 4 python_script args
|
||||
python -m torch.backends.xeon.run_cpu --ninstances 14 --ncores-per-instance 4 python_script args
|
||||
|
||||
2. Run single-instance inference among multiple instances.
|
||||
By default, runs all ninstances. If you want to independently run a single instance among ninstances, specify rank.
|
||||
@ -105,7 +105,7 @@ Multi-instance inference
|
||||
|
||||
::
|
||||
|
||||
python -m torch.backends.xeon.run_cpu --core_list "0, 1, 2, 3" --ninstances 2 --ncores_per_instance 2
|
||||
python -m torch.backends.xeon.run_cpu --core-list "0, 1, 2, 3" --ninstances 2 --ncores-per-instance 2
|
||||
--rank 0 python_script args
|
||||
|
||||
3. To look up what optional arguments this module offers:
|
||||
@ -117,7 +117,7 @@ Multi-instance inference
|
||||
Memory allocator
|
||||
----------------
|
||||
|
||||
"--enable_tcmalloc" and "--enable_jemalloc" can be used to enable different memory allcator.
|
||||
"--enable-tcmalloc" and "--enable-jemalloc" can be used to enable different memory allcator.
|
||||
|
||||
"""
|
||||
|
||||
@ -233,8 +233,8 @@ class _CPUinfo():
|
||||
numa_ids.append(numa_id)
|
||||
if len(numa_ids) > 1:
|
||||
logger.warning(f"Numa Aware: cores:{str(core_list)} on different NUMA nodes:{str(numa_ids)}. To avoid \
|
||||
this behavior, please use --ncores_per_instance knob to make sure number of cores is divisible by --ncores_per_\
|
||||
instance. Alternatively, please use --skip_cross_node_cores knob.")
|
||||
this behavior, please use --ncores-per-instance knob to make sure number of cores is divisible by --ncores-per-\
|
||||
instance. Alternatively, please use --skip-cross-node-cores knob.")
|
||||
if len(numa_ids) == 0:
|
||||
raise RuntimeError("invalid number of NUMA nodes; please make sure numa_ids >= 1")
|
||||
return numa_ids
|
||||
@ -376,7 +376,7 @@ Value applied: {os.environ[env_name]}. Value ignored: {env_value}")
|
||||
if args.core_list: # user specify what cores will be used by params
|
||||
cores = [int(x) for x in args.core_list.split(",")]
|
||||
if args.ncores_per_instance == -1:
|
||||
raise RuntimeError("please specify the \"--ncores_per_instance\" if you have pass the --core_list params")
|
||||
raise RuntimeError("please specify the \"--ncores-per-instance\" if you have pass the --core-list params")
|
||||
elif args.ninstances > 1 and args.ncores_per_instance * args.ninstances < len(cores):
|
||||
logger.warning(f"only first {args.ncores_per_instance * args.ninstances} cores will be used, \
|
||||
but you specify {len(cores)} cores in core_list")
|
||||
@ -417,17 +417,17 @@ please make sure ninstances <= total_cores)")
|
||||
if args.ncores_per_instance > ncore_per_node:
|
||||
# too many ncores_per_instance to skip cross-node cores
|
||||
logger.warning("there are {} core(s) per socket, but you specify {} ncores_per_instance and \
|
||||
skip_cross_node_cores. Please make sure --ncores_per_instance < core(s) per \
|
||||
skip_cross_node_cores. Please make sure --ncores-per-instance < core(s) per \
|
||||
socket".format(ncore_per_node, args.ncores_per_instance))
|
||||
exit(-1)
|
||||
elif num_leftover_cores == 0:
|
||||
# aren't any cross-node cores
|
||||
logger.info('--skip_cross_node_cores is set, but there are no cross-node cores.')
|
||||
logger.info('--skip-cross-node-cores is set, but there are no cross-node cores.')
|
||||
args.ninstances = len(cores) // args.ncores_per_instance
|
||||
else:
|
||||
# skip cross-node cores
|
||||
if args.ninstances != -1:
|
||||
logger.warning('--skip_cross_node_cores is exclusive to --ninstances. --ninstances \
|
||||
logger.warning('--skip-cross-node-cores is exclusive to --ninstances. --ninstances \
|
||||
won\'t take effect even if it is set explicitly.')
|
||||
|
||||
i = 1
|
||||
@ -442,15 +442,15 @@ won\'t take effect even if it is set explicitly.')
|
||||
if args.ninstances * args.ncores_per_instance > len(cores):
|
||||
raise RuntimeError("Please make sure ninstances * ncores_per_instance <= total_cores")
|
||||
if args.latency_mode:
|
||||
logger.warning("--latency_mode is exclusive to --ninstances, --ncores_per_instance, --node_id and \
|
||||
--use_logical_core. They won't take effect even they are set explicitly.")
|
||||
logger.warning("--latency-mode is exclusive to --ninstances, --ncores-per-instance, --node-id and \
|
||||
--use-logical-core. They won't take effect even they are set explicitly.")
|
||||
args.ncores_per_instance = 4
|
||||
cores = self.cpuinfo.get_all_physical_cores()
|
||||
args.ninstances = len(cores) // args.ncores_per_instance
|
||||
|
||||
if args.throughput_mode:
|
||||
logger.warning("--throughput_mode is exclusive to --ninstances, --ncores_per_instance, --node_id and \
|
||||
--use_logical_core. They won't take effect even they are set explicitly.")
|
||||
logger.warning("--throughput-mode is exclusive to --ninstances, --ncores-per-instance, --node-id and \
|
||||
--use-logical-core. They won't take effect even they are set explicitly.")
|
||||
args.ninstances = self.cpuinfo.node_nums
|
||||
cores = self.cpuinfo.get_all_physical_cores()
|
||||
args.ncores_per_instance = len(cores) // args.ninstances
|
||||
@ -531,48 +531,48 @@ def _add_memory_allocator_params(parser):
|
||||
|
||||
group = parser.add_argument_group("Memory Allocator Parameters")
|
||||
# allocator control
|
||||
group.add_argument("--enable_tcmalloc", action="store_true", default=False,
|
||||
group.add_argument("--enable-tcmalloc", "--enable_tcmalloc", action="store_true", default=False,
|
||||
help="Enable tcmalloc allocator")
|
||||
group.add_argument("--enable_jemalloc", action="store_true", default=False,
|
||||
group.add_argument("--enable-jemalloc", "--enable_jemalloc", action="store_true", default=False,
|
||||
help="Enable jemalloc allocator")
|
||||
group.add_argument("--use_default_allocator", action="store_true", default=False,
|
||||
group.add_argument("--use-default-allocator", "--use_default_allocator", action="store_true", default=False,
|
||||
help="Use default memory allocator")
|
||||
|
||||
def _add_multi_instance_params(parser):
|
||||
|
||||
group = parser.add_argument_group("Multi-instance Parameters")
|
||||
# multi-instance control
|
||||
group.add_argument("--ncores_per_instance", metavar="\b", default=-1, type=int,
|
||||
group.add_argument("--ncores-per-instance", "--ncores_per_instance", metavar="\b", default=-1, type=int,
|
||||
help="Cores per instance")
|
||||
group.add_argument("--ninstances", metavar="\b", default=-1, type=int,
|
||||
help="For multi-instance, you should give the cores number you used for per instance.")
|
||||
group.add_argument("--skip_cross_node_cores", action='store_true', default=False,
|
||||
help="If specified --ncores_per_instance, skips cross-node cores.")
|
||||
group.add_argument("--skip-cross-node-cores", "--skip_cross_node_cores", action='store_true', default=False,
|
||||
help="If specified --ncores-per-instance, skips cross-node cores.")
|
||||
group.add_argument("--rank", metavar="\b", default="-1", type=int,
|
||||
help="Specify instance index to assign ncores_per_instance for rank; \
|
||||
otherwise ncores_per_instance will be assigned sequentially to ninstances. Please refer to \
|
||||
https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/launch_script.md")
|
||||
group.add_argument("--latency_mode", action="store_true", default=False,
|
||||
group.add_argument("--latency-mode", "--latency_mode", action="store_true", default=False,
|
||||
help="By detault 4 core per instance and use all physical cores")
|
||||
group.add_argument("--throughput_mode", action="store_true", default=False,
|
||||
group.add_argument("--throughput-mode", "--throughput_mode", action="store_true", default=False,
|
||||
help="By default one instance per node and use all physical cores")
|
||||
group.add_argument("--node_id", metavar="\b", default=-1, type=int,
|
||||
group.add_argument("--node-id", "--node_id", metavar="\b", default=-1, type=int,
|
||||
help="node id for multi-instance, by default all nodes will be used")
|
||||
group.add_argument("--use_logical_core", action="store_true", default=False,
|
||||
group.add_argument("--use-logical-core", "--use_logical_core", action="store_true", default=False,
|
||||
help="Whether only use physical cores")
|
||||
group.add_argument("--disable_numactl", action="store_true", default=False,
|
||||
group.add_argument("--disable-numactl", "--disable_numactl", action="store_true", default=False,
|
||||
help="Disable numactl")
|
||||
group.add_argument("--core_list", metavar="\b", default=None, type=str,
|
||||
group.add_argument("--core-list", "--core_list", metavar="\b", default=None, type=str,
|
||||
help="Specify the core list as \"core_id, core_id, ....\", otherwise, all the cores will be used.")
|
||||
group.add_argument("--log_path", metavar="\b", default="", type=str,
|
||||
group.add_argument("--log-path", "--log_path", metavar="\b", default="", type=str,
|
||||
help="The log file directory. Default path is "", which means disable logging to files.")
|
||||
group.add_argument("--log_file_prefix", metavar="\b", default="run", type=str,
|
||||
group.add_argument("--log-file-prefix", "--log_file_prefix", metavar="\b", default="run", type=str,
|
||||
help="log file prefix")
|
||||
|
||||
def _add_kmp_iomp_params(parser):
|
||||
|
||||
group = parser.add_argument_group("IOMP Parameters")
|
||||
group.add_argument("--disable_iomp", action="store_true", default=False,
|
||||
group.add_argument("--disable-iomp", "--disable_iomp", action="store_true", default=False,
|
||||
help="By default, we use Intel OpenMP and libiomp5.so will be add to LD_PRELOAD")
|
||||
|
||||
def create_args(parser=None):
|
||||
@ -580,7 +580,7 @@ def create_args(parser=None):
|
||||
Helper function parsing the command line options
|
||||
@retval ArgumentParser
|
||||
"""
|
||||
parser.add_argument("--multi_instance", action="store_true", default=False,
|
||||
parser.add_argument("--multi-instance", "--multi_instance", action="store_true", default=False,
|
||||
help="Enable multi-instance, by default one instance per node")
|
||||
|
||||
parser.add_argument("-m", "--module", default=False, action="store_true",
|
||||
@ -588,7 +588,7 @@ def create_args(parser=None):
|
||||
"as a python module, executing with the same behavior as"
|
||||
"\"python -m\".")
|
||||
|
||||
parser.add_argument("--no_python", default=False, action="store_true",
|
||||
parser.add_argument("--no-python", "--no_python", default=False, action="store_true",
|
||||
help="Do not prepend the --program script with \"python\" - just exec "
|
||||
"it directly. Useful when the script is not a Python script.")
|
||||
|
||||
@ -618,7 +618,7 @@ def main(args):
|
||||
raise RuntimeError("Either args.latency_mode or args.throughput_mode should be set")
|
||||
|
||||
if not args.no_python and not args.program.endswith(".py"):
|
||||
raise RuntimeError("For non Python script, you should use \"--no_python\" parameter.")
|
||||
raise RuntimeError("For non Python script, you should use \"--no-python\" parameter.")
|
||||
|
||||
# Verify LD_PRELOAD
|
||||
if "LD_PRELOAD" in os.environ:
|
||||
@ -653,7 +653,7 @@ if __name__ == "__main__":
|
||||
"\n >>> python -m torch.backends.xeon.run_cpu python_script args \n"
|
||||
"\n2. multi-instance \n"
|
||||
"\n >>> python -m torch.backends.xeon.run_cpu --ninstances xxx "
|
||||
"--ncores_per_instance xx python_script args\n"
|
||||
"--ncores-per-instance xx python_script args\n"
|
||||
"\n############################################################################# \n",
|
||||
formatter_class=RawTextHelpFormatter)
|
||||
create_args(parser)
|
||||
|
@ -80,13 +80,15 @@ const static RegisterNNCExternalFunction nnc_{name}(
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate annotated_fn_args script')
|
||||
parser.add_argument('--native_functions',
|
||||
parser.add_argument('--native-functions',
|
||||
'--native_functions',
|
||||
help='path to native_functions.yaml',
|
||||
default='../../../../aten/src/ATen/native/native_functions.yaml')
|
||||
parser.add_argument('--tags',
|
||||
help='path to tags.yaml',
|
||||
default='../../../../aten/src/ATen/native/tags.yaml')
|
||||
parser.add_argument('--template_path',
|
||||
parser.add_argument('--template-path',
|
||||
'--template_path',
|
||||
help='path to external_functions_codegen_template.cpp',
|
||||
default='../../../../tools/jit/templates/external_functions_codegen_template.cpp')
|
||||
args = parser.parse_args()
|
||||
|
@ -113,7 +113,7 @@ class LocalElasticAgent(SimpleElasticAgent):
|
||||
role="trainer",
|
||||
local_world_size=nproc_per_process,
|
||||
entrypoint="/usr/local/bin/trainer",
|
||||
args=("--trainer_args", "foobar"),
|
||||
args=("--trainer-args", "foobar"),
|
||||
...<OTHER_PARAMS...>)
|
||||
agent = LocalElasticAgent(spec)
|
||||
results = agent.run()
|
||||
|
@ -83,18 +83,18 @@ def create_rdzv_handler(params: RendezvousParameters) -> RendezvousHandler:
|
||||
if "rank" not in params.config:
|
||||
raise ValueError(
|
||||
"rank is absent in RendezvousParameters."
|
||||
"Try add --node_rank to the cmd request"
|
||||
"Try add --node-rank to the cmd request"
|
||||
)
|
||||
endpoint = params.endpoint.strip()
|
||||
if not endpoint:
|
||||
raise ValueError(
|
||||
"endpoint is absent in RendezvousParameters"
|
||||
"Try add --master_port and --master_addr to the cmd request"
|
||||
"Try add --master-port and --master-addr to the cmd request"
|
||||
)
|
||||
master_addr, master_port = parse_rendezvous_endpoint(endpoint, -1)
|
||||
if master_port == -1:
|
||||
raise ValueError(
|
||||
f"Port is absent in endpoint: {endpoint}. Try launching with --master_port"
|
||||
f"Port is absent in endpoint: {endpoint}. Try launching with --master-port"
|
||||
)
|
||||
world_size = params.max_nodes
|
||||
rank = cast(int, params.config.get("rank"))
|
||||
|
@ -19,7 +19,7 @@ aggregated communication bandwidth.
|
||||
|
||||
In both cases of single-node distributed training or multi-node distributed
|
||||
training, this utility will launch the given number of processes per node
|
||||
(``--nproc_per_node``). If used for GPU training, this number needs to be less
|
||||
(``--nproc-per-node``). If used for GPU training, this number needs to be less
|
||||
or equal to the number of GPUs on the current system (``nproc_per_node``),
|
||||
and each process will be operating on a single GPU from *GPU 0 to
|
||||
GPU (nproc_per_node - 1)*.
|
||||
@ -30,7 +30,7 @@ GPU (nproc_per_node - 1)*.
|
||||
|
||||
::
|
||||
|
||||
python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_YOU_HAVE
|
||||
python -m torch.distributed.launch --nproc-per-node=NUM_GPUS_YOU_HAVE
|
||||
YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3 and all other
|
||||
arguments of your training script)
|
||||
|
||||
@ -41,18 +41,18 @@ Node 1: *(IP: 192.168.1.1, and has a free port: 1234)*
|
||||
|
||||
::
|
||||
|
||||
python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_YOU_HAVE
|
||||
--nnodes=2 --node_rank=0 --master_addr="192.168.1.1"
|
||||
--master_port=1234 YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3
|
||||
python -m torch.distributed.launch --nproc-per-node=NUM_GPUS_YOU_HAVE
|
||||
--nnodes=2 --node-rank=0 --master-addr="192.168.1.1"
|
||||
--master-port=1234 YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3
|
||||
and all other arguments of your training script)
|
||||
|
||||
Node 2:
|
||||
|
||||
::
|
||||
|
||||
python -m torch.distributed.launch --nproc_per_node=NUM_GPUS_YOU_HAVE
|
||||
--nnodes=2 --node_rank=1 --master_addr="192.168.1.1"
|
||||
--master_port=1234 YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3
|
||||
python -m torch.distributed.launch --nproc-per-node=NUM_GPUS_YOU_HAVE
|
||||
--nnodes=2 --node-rank=1 --master-addr="192.168.1.1"
|
||||
--master-port=1234 YOUR_TRAINING_SCRIPT.py (--arg1 --arg2 --arg3
|
||||
and all other arguments of your training script)
|
||||
|
||||
3. To look up what optional arguments this module offers:
|
||||
@ -70,7 +70,7 @@ the NCCL distributed backend. Thus NCCL backend is the recommended backend to
|
||||
use for GPU training.
|
||||
|
||||
2. In your training program, you must parse the command-line argument:
|
||||
``--local_rank=LOCAL_PROCESS_RANK``, which will be provided by this module.
|
||||
``--local-rank=LOCAL_PROCESS_RANK``, which will be provided by this module.
|
||||
If your training program uses GPUs, you should ensure that your code only
|
||||
runs on the GPU device of LOCAL_PROCESS_RANK. This can be done by:
|
||||
|
||||
@ -81,7 +81,7 @@ Parsing the local_rank argument
|
||||
>>> # xdoctest: +SKIP
|
||||
>>> import argparse
|
||||
>>> parser = argparse.ArgumentParser()
|
||||
>>> parser.add_argument("--local_rank", type=int)
|
||||
>>> parser.add_argument("--local-rank", type=int)
|
||||
>>> args = parser.parse_args()
|
||||
|
||||
Set your device to local rank using either
|
||||
@ -128,9 +128,9 @@ utility
|
||||
|
||||
5. Another way to pass ``local_rank`` to the subprocesses via environment variable
|
||||
``LOCAL_RANK``. This behavior is enabled when you launch the script with
|
||||
``--use_env=True``. You must adjust the subprocess example above to replace
|
||||
``--use-env=True``. You must adjust the subprocess example above to replace
|
||||
``args.local_rank`` with ``os.environ['LOCAL_RANK']``; the launcher
|
||||
will not pass ``--local_rank`` when you specify this flag.
|
||||
will not pass ``--local-rank`` when you specify this flag.
|
||||
|
||||
.. warning::
|
||||
|
||||
@ -156,13 +156,14 @@ logger = logging.getLogger(__name__)
|
||||
def parse_args(args):
|
||||
parser = get_args_parser()
|
||||
parser.add_argument(
|
||||
"--use-env",
|
||||
"--use_env",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="Use environment variable to pass "
|
||||
"'local rank'. For legacy reasons, the default value is False. "
|
||||
"If set to True, the script will not pass "
|
||||
"--local_rank as argument, and will instead set LOCAL_RANK.",
|
||||
"--local-rank as argument, and will instead set LOCAL_RANK.",
|
||||
)
|
||||
return parser.parse_args(args)
|
||||
|
||||
@ -170,8 +171,8 @@ def parse_args(args):
|
||||
def launch(args):
|
||||
if args.no_python and not args.use_env:
|
||||
raise ValueError(
|
||||
"When using the '--no_python' flag,"
|
||||
" you must also set the '--use_env' flag."
|
||||
"When using the '--no-python' flag,"
|
||||
" you must also set the '--use-env' flag."
|
||||
)
|
||||
run(args)
|
||||
|
||||
@ -180,8 +181,8 @@ def main(args=None):
|
||||
warnings.warn(
|
||||
"The module torch.distributed.launch is deprecated\n"
|
||||
"and will be removed in future. Use torchrun.\n"
|
||||
"Note that --use_env is set by default in torchrun.\n"
|
||||
"If your script expects `--local_rank` argument to be set, please\n"
|
||||
"Note that --use-env is set by default in torchrun.\n"
|
||||
"If your script expects `--local-rank` argument to be set, please\n"
|
||||
"change it to read from `os.environ['LOCAL_RANK']` instead. See \n"
|
||||
"https://pytorch.org/docs/stable/distributed.html#launch-utility for \n"
|
||||
"further instructions\n",
|
||||
|
@ -165,12 +165,12 @@ def _get_addr_and_port(
|
||||
endpoint = endpoint.strip()
|
||||
if not endpoint:
|
||||
raise ValueError(
|
||||
"Endpoint is missing in endpoint. Try to add --master_addr and --master_port"
|
||||
"Endpoint is missing in endpoint. Try to add --master-addr and --master-port"
|
||||
)
|
||||
master_addr, master_port = parse_rendezvous_endpoint(endpoint, default_port=-1)
|
||||
if master_port == -1:
|
||||
raise ValueError(
|
||||
f"port is missing in endpoint: {endpoint}. Try to specify --master_port"
|
||||
f"port is missing in endpoint: {endpoint}. Try to specify --master-port"
|
||||
)
|
||||
return (master_addr, master_port)
|
||||
|
||||
|
@ -30,11 +30,11 @@ Transitioning from torch.distributed.launch to torchrun
|
||||
|
||||
|
||||
``torchrun`` supports the same arguments as ``torch.distributed.launch`` **except**
|
||||
for ``--use_env`` which is now deprecated. To migrate from ``torch.distributed.launch``
|
||||
for ``--use-env`` which is now deprecated. To migrate from ``torch.distributed.launch``
|
||||
to ``torchrun`` follow these steps:
|
||||
|
||||
1. If your training script is already reading ``local_rank`` from the ``LOCAL_RANK`` environment variable.
|
||||
Then you need simply omit the ``--use_env`` flag, e.g.:
|
||||
Then you need simply omit the ``--use-env`` flag, e.g.:
|
||||
|
||||
+--------------------------------------------------------------------+--------------------------------------------+
|
||||
| ``torch.distributed.launch`` | ``torchrun`` |
|
||||
@ -42,11 +42,11 @@ to ``torchrun`` follow these steps:
|
||||
| | |
|
||||
| .. code-block:: shell-session | .. code-block:: shell-session |
|
||||
| | |
|
||||
| $ python -m torch.distributed.launch --use_env train_script.py | $ torchrun train_script.py |
|
||||
| $ python -m torch.distributed.launch --use-env train_script.py | $ torchrun train_script.py |
|
||||
| | |
|
||||
+--------------------------------------------------------------------+--------------------------------------------+
|
||||
|
||||
2. If your training script reads local rank from a ``--local_rank`` cmd argument.
|
||||
2. If your training script reads local rank from a ``--local-rank`` cmd argument.
|
||||
Change your training script to read from the ``LOCAL_RANK`` environment variable as
|
||||
demonstrated by the following code snippet:
|
||||
|
||||
@ -59,7 +59,7 @@ to ``torchrun`` follow these steps:
|
||||
| | |
|
||||
| import argparse | import os |
|
||||
| parser = argparse.ArgumentParser() | local_rank = int(os.environ["LOCAL_RANK"]) |
|
||||
| parser.add_argument("--local_rank", type=int) | |
|
||||
| parser.add_argument("--local-rank", type=int) | |
|
||||
| args = parser.parse_args() | |
|
||||
| | |
|
||||
| local_rank = args.local_rank | |
|
||||
@ -85,7 +85,7 @@ Single-node multi-worker
|
||||
torchrun
|
||||
--standalone
|
||||
--nnodes=1
|
||||
--nproc_per_node=$NUM_TRAINERS
|
||||
--nproc-per-node=$NUM_TRAINERS
|
||||
YOUR_TRAINING_SCRIPT.py (--arg1 ... train script args...)
|
||||
|
||||
Stacked single-node multi-worker
|
||||
@ -94,18 +94,18 @@ Stacked single-node multi-worker
|
||||
To run multiple instances (separate jobs) of single-node, multi-worker on the
|
||||
same host, we need to make sure that each instance (job) is
|
||||
setup on different ports to avoid port conflicts (or worse, two jobs being merged
|
||||
as a single job). To do this you have to run with ``--rdzv_backend=c10d``
|
||||
and specify a different port by setting ``--rdzv_endpoint=localhost:$PORT_k``.
|
||||
as a single job). To do this you have to run with ``--rdzv-backend=c10d``
|
||||
and specify a different port by setting ``--rdzv-endpoint=localhost:$PORT_k``.
|
||||
For ``--nodes=1``, its often convenient to let ``torchrun`` pick a free random
|
||||
port automatically instead of manually assgining different ports for each run.
|
||||
|
||||
::
|
||||
|
||||
torchrun
|
||||
--rdzv_backend=c10d
|
||||
--rdzv_endpoint=localhost:0
|
||||
--rdzv-backend=c10d
|
||||
--rdzv-endpoint=localhost:0
|
||||
--nnodes=1
|
||||
--nproc_per_node=$NUM_TRAINERS
|
||||
--nproc-per-node=$NUM_TRAINERS
|
||||
YOUR_TRAINING_SCRIPT.py (--arg1 ... train script args...)
|
||||
|
||||
|
||||
@ -116,11 +116,11 @@ Fault tolerant (fixed sized number of workers, no elasticity, tolerates 3 failur
|
||||
|
||||
torchrun
|
||||
--nnodes=$NUM_NODES
|
||||
--nproc_per_node=$NUM_TRAINERS
|
||||
--max_restarts=3
|
||||
--rdzv_id=$JOB_ID
|
||||
--rdzv_backend=c10d
|
||||
--rdzv_endpoint=$HOST_NODE_ADDR
|
||||
--nproc-per-node=$NUM_TRAINERS
|
||||
--max-restarts=3
|
||||
--rdzv-id=$JOB_ID
|
||||
--rdzv-backend=c10d
|
||||
--rdzv-endpoint=$HOST_NODE_ADDR
|
||||
YOUR_TRAINING_SCRIPT.py (--arg1 ... train script args...)
|
||||
|
||||
``HOST_NODE_ADDR``, in form <host>[:<port>] (e.g. node1.example.com:29400), specifies the node and
|
||||
@ -137,11 +137,11 @@ Elastic (``min=1``, ``max=4``, tolerates up to 3 membership changes or failures)
|
||||
|
||||
torchrun
|
||||
--nnodes=1:4
|
||||
--nproc_per_node=$NUM_TRAINERS
|
||||
--max_restarts=3
|
||||
--rdzv_id=$JOB_ID
|
||||
--rdzv_backend=c10d
|
||||
--rdzv_endpoint=$HOST_NODE_ADDR
|
||||
--nproc-per-node=$NUM_TRAINERS
|
||||
--max-restarts=3
|
||||
--rdzv-id=$JOB_ID
|
||||
--rdzv-backend=c10d
|
||||
--rdzv-endpoint=$HOST_NODE_ADDR
|
||||
YOUR_TRAINING_SCRIPT.py (--arg1 ... train script args...)
|
||||
|
||||
``HOST_NODE_ADDR``, in form <host>[:<port>] (e.g. node1.example.com:29400), specifies the node and
|
||||
@ -156,10 +156,10 @@ Note on rendezvous backend
|
||||
|
||||
For multi-node training you need to specify:
|
||||
|
||||
1. ``--rdzv_id``: A unique job id (shared by all nodes participating in the job)
|
||||
2. ``--rdzv_backend``: An implementation of
|
||||
1. ``--rdzv-id``: A unique job id (shared by all nodes participating in the job)
|
||||
2. ``--rdzv-backend``: An implementation of
|
||||
:py:class:`torch.distributed.elastic.rendezvous.RendezvousHandler`
|
||||
3. ``--rdzv_endpoint``: The endpoint where the rendezvous backend is running; usually in form
|
||||
3. ``--rdzv-endpoint``: The endpoint where the rendezvous backend is running; usually in form
|
||||
``host:port``.
|
||||
|
||||
Currently ``c10d`` (recommended), ``etcd-v2``, and ``etcd`` (legacy) rendezvous backends are
|
||||
@ -221,7 +221,7 @@ The following environment variables are made available to you in your script:
|
||||
of the worker is specified in the ``WorkerSpec``.
|
||||
|
||||
5. ``LOCAL_WORLD_SIZE`` - The local world size (e.g. number of workers running locally); equals to
|
||||
``--nproc_per_node`` specified on ``torchrun``.
|
||||
``--nproc-per-node`` specified on ``torchrun``.
|
||||
|
||||
6. ``WORLD_SIZE`` - The world size (total number of workers in the job).
|
||||
|
||||
@ -246,7 +246,7 @@ Deployment
|
||||
------------
|
||||
|
||||
1. (Not needed for the C10d backend) Start the rendezvous backend server and get the endpoint (to be
|
||||
passed as ``--rdzv_endpoint`` to the launcher script)
|
||||
passed as ``--rdzv-endpoint`` to the launcher script)
|
||||
|
||||
2. Single-node multi-worker: Start the launcher on the host to start the agent process which
|
||||
creates and monitors a local worker group.
|
||||
@ -406,6 +406,7 @@ def get_args_parser() -> ArgumentParser:
|
||||
help="Number of nodes, or the range of nodes in form <minimum_nodes>:<maximum_nodes>.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--nproc-per-node",
|
||||
"--nproc_per_node",
|
||||
action=env,
|
||||
type=str,
|
||||
@ -418,6 +419,7 @@ def get_args_parser() -> ArgumentParser:
|
||||
#
|
||||
|
||||
parser.add_argument(
|
||||
"--rdzv-backend",
|
||||
"--rdzv_backend",
|
||||
action=env,
|
||||
type=str,
|
||||
@ -425,6 +427,7 @@ def get_args_parser() -> ArgumentParser:
|
||||
help="Rendezvous backend.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rdzv-endpoint",
|
||||
"--rdzv_endpoint",
|
||||
action=env,
|
||||
type=str,
|
||||
@ -432,6 +435,7 @@ def get_args_parser() -> ArgumentParser:
|
||||
help="Rendezvous backend endpoint; usually in form <host>:<port>.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rdzv-id",
|
||||
"--rdzv_id",
|
||||
action=env,
|
||||
type=str,
|
||||
@ -439,6 +443,7 @@ def get_args_parser() -> ArgumentParser:
|
||||
help="User-defined group id.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rdzv-conf",
|
||||
"--rdzv_conf",
|
||||
action=env,
|
||||
type=str,
|
||||
@ -450,7 +455,7 @@ def get_args_parser() -> ArgumentParser:
|
||||
action=check_env,
|
||||
help="Start a local standalone rendezvous backend that is represented by a C10d TCP store "
|
||||
"on port 29400. Useful when launching single-node, multi-worker job. If specified "
|
||||
"--rdzv_backend, --rdzv_endpoint, --rdzv_id are auto-assigned; any explicitly set values "
|
||||
"--rdzv-backend, --rdzv-endpoint, --rdzv-id are auto-assigned; any explicitly set values "
|
||||
"are ignored.",
|
||||
)
|
||||
|
||||
@ -459,6 +464,7 @@ def get_args_parser() -> ArgumentParser:
|
||||
#
|
||||
|
||||
parser.add_argument(
|
||||
"--max-restarts",
|
||||
"--max_restarts",
|
||||
action=env,
|
||||
type=int,
|
||||
@ -466,6 +472,7 @@ def get_args_parser() -> ArgumentParser:
|
||||
help="Maximum number of worker group restarts before failing.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--monitor-interval",
|
||||
"--monitor_interval",
|
||||
action=env,
|
||||
type=float,
|
||||
@ -473,6 +480,7 @@ def get_args_parser() -> ArgumentParser:
|
||||
help="Interval, in seconds, to monitor the state of workers.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--start-method",
|
||||
"--start_method",
|
||||
action=env,
|
||||
type=str,
|
||||
@ -495,6 +503,7 @@ def get_args_parser() -> ArgumentParser:
|
||||
"with the same behavior as 'python -m'.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-python",
|
||||
"--no_python",
|
||||
action=check_env,
|
||||
help="Skip prepending the training script with 'python' - just execute it directly. Useful "
|
||||
@ -502,13 +511,15 @@ def get_args_parser() -> ArgumentParser:
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--run-path",
|
||||
"--run_path",
|
||||
action=check_env,
|
||||
help="Run the training script with runpy.run_path in the same interpreter."
|
||||
" Script must be provided as an abs path (e.g. /abs/path/script.py)."
|
||||
" Takes precedence over --no_python.",
|
||||
" Takes precedence over --no-python.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log-dir",
|
||||
"--log_dir",
|
||||
action=env,
|
||||
type=str,
|
||||
@ -541,6 +552,7 @@ def get_args_parser() -> ArgumentParser:
|
||||
#
|
||||
|
||||
parser.add_argument(
|
||||
"--node-rank",
|
||||
"--node_rank",
|
||||
type=int,
|
||||
action=env,
|
||||
@ -548,16 +560,18 @@ def get_args_parser() -> ArgumentParser:
|
||||
help="Rank of the node for multi-node distributed training.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--master-addr",
|
||||
"--master_addr",
|
||||
default="127.0.0.1",
|
||||
type=str,
|
||||
action=env,
|
||||
help="Address of the master node (rank 0) that only used for static rendezvous. It should "
|
||||
"be either the IP address or the hostname of rank 0. For single node multi-proc training "
|
||||
"the --master_addr can simply be 127.0.0.1; IPv6 should have the pattern "
|
||||
"the --master-addr can simply be 127.0.0.1; IPv6 should have the pattern "
|
||||
"`[0:0:0:0:0:0:0:1]`.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--master-port",
|
||||
"--master_port",
|
||||
default=29500,
|
||||
type=int,
|
||||
@ -566,6 +580,7 @@ def get_args_parser() -> ArgumentParser:
|
||||
"training. It is only used for static rendezvous.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--local-addr",
|
||||
"--local_addr",
|
||||
default=None,
|
||||
type=str,
|
||||
@ -652,7 +667,7 @@ def get_use_env(args) -> bool:
|
||||
"""
|
||||
Retrieves ``use_env`` from the args.
|
||||
``use_env`` is a legacy argument, if ``use_env`` is False, the
|
||||
``--node_rank`` argument will be transferred to all worker processes.
|
||||
``--node-rank`` argument will be transferred to all worker processes.
|
||||
``use_env`` is only used by the ``torch.distributed.launch`` and will
|
||||
be deprecated in future releases.
|
||||
"""
|
||||
@ -729,12 +744,12 @@ def config_from_args(args) -> Tuple[LaunchConfig, Union[Callable, str], List[str
|
||||
else:
|
||||
if args.module:
|
||||
raise ValueError(
|
||||
"Don't use both the '--no_python' flag"
|
||||
"Don't use both the '--no-python' flag"
|
||||
" and the '--module' flag at the same time."
|
||||
)
|
||||
cmd = args.training_script
|
||||
if not use_env:
|
||||
cmd_args.append(f"--local_rank={macros.local_rank}")
|
||||
cmd_args.append(f"--local-rank={macros.local_rank}")
|
||||
cmd_args.extend(args.training_script_args)
|
||||
|
||||
return config, cmd, cmd_args
|
||||
@ -760,9 +775,9 @@ def run(args):
|
||||
log.info(
|
||||
f"\n**************************************\n"
|
||||
f"Rendezvous info:\n"
|
||||
f"--rdzv_backend={args.rdzv_backend} "
|
||||
f"--rdzv_endpoint={args.rdzv_endpoint} "
|
||||
f"--rdzv_id={args.rdzv_id}\n"
|
||||
f"--rdzv-backend={args.rdzv_backend} "
|
||||
f"--rdzv-endpoint={args.rdzv_endpoint} "
|
||||
f"--rdzv-id={args.rdzv_id}\n"
|
||||
f"**************************************\n"
|
||||
)
|
||||
|
||||
|
@ -43,12 +43,14 @@ class _SplitterSettingBase:
|
||||
):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--min-acc-module-size",
|
||||
"--min_acc_module_size",
|
||||
required=False,
|
||||
type=int,
|
||||
help="Minimum size limit of an accelerator subgraph.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-fusion",
|
||||
"--skip_fusion",
|
||||
default=False,
|
||||
action="store_true",
|
||||
@ -58,6 +60,7 @@ class _SplitterSettingBase:
|
||||
"can reduce overhead.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--allow-non-tensor",
|
||||
"--allow_non_tensor",
|
||||
default=False,
|
||||
action="store_true",
|
||||
|
@ -250,17 +250,17 @@ def prepareInputTensorsToRandomTopoTest(seed,
|
||||
def reproString(current_seed, args):
|
||||
repro_str = "python {0}".format(__file__)
|
||||
if args.cuda_fuser:
|
||||
repro_str += " --cuda_fuser"
|
||||
repro_str += " --cuda-fuser"
|
||||
if args.legacy_fuser:
|
||||
repro_str += " --legacy_fuser"
|
||||
repro_str += " --legacy-fuser"
|
||||
if args.profiling_executor:
|
||||
repro_str += " --profiling_executor"
|
||||
repro_str += " --profiling-executor"
|
||||
if args.fp16:
|
||||
repro_str += " --fp16"
|
||||
if args.cpu:
|
||||
repro_str += " --cpu"
|
||||
repro_str += " --max_num_tensor {0} --max_tensor_dim {1} --max_tensor_size {2}"\
|
||||
" --depth_factor {3} --seed {4} --repro_run".format(
|
||||
repro_str += " --max-num-tensor {0} --max-tensor-dim {1} --max-tensor-size {2}"\
|
||||
" --depth-factor {3} --seed {4} --repro-run".format(
|
||||
args.max_num_tensor, args.max_tensor_dim, args.max_tensor_size,
|
||||
args.depth_factor, current_seed)
|
||||
return repro_str
|
||||
@ -337,21 +337,21 @@ def runTest(seed, args):
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--cuda_fuser", action='store_true', default=True)
|
||||
parser.add_argument("--legacy_fuser", action='store_true', default=False)
|
||||
parser.add_argument("--profiling_executor", action='store_true', default=False)
|
||||
parser.add_argument("--cuda-fuser", "--cuda_fuser", action='store_true', default=True)
|
||||
parser.add_argument("--legacy-fuser", "--legacy_fuser", action='store_true', default=False)
|
||||
parser.add_argument("--profiling-executor", "--profiling_executor", action='store_true', default=False)
|
||||
parser.add_argument("--fp16", action='store_true', default=False)
|
||||
parser.add_argument("--cpu", action='store_true', default=False)
|
||||
parser.add_argument("--debug_print", action='store_true', default=False)
|
||||
parser.add_argument("--debug_tensor", action='store_true', default=False)
|
||||
parser.add_argument("--max_num_tensor", default=MAX_TENSOR, type=int)
|
||||
parser.add_argument("--max_tensor_dim", default=MAX_TENSOR_DIM, type=int)
|
||||
parser.add_argument("--max_tensor_size", default=MAX_TENSOR_SIZE, type=int)
|
||||
parser.add_argument("--depth_factor", default=GRAPH_FACTOR, type=int)
|
||||
parser.add_argument("--debug-print", "--debug_print", action='store_true', default=False)
|
||||
parser.add_argument("--debug-tensor", "--debug_tensor", action='store_true', default=False)
|
||||
parser.add_argument("--max-num-tensor", "--max_num_tensor", default=MAX_TENSOR, type=int)
|
||||
parser.add_argument("--max-tensor-dim", "--max_tensor_dim", default=MAX_TENSOR_DIM, type=int)
|
||||
parser.add_argument("--max-tensor-size", "--max_tensor_size", default=MAX_TENSOR_SIZE, type=int)
|
||||
parser.add_argument("--depth-factor", "--depth-factor", default=GRAPH_FACTOR, type=int)
|
||||
parser.add_argument("--seed", default=45589, type=int)
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument("--iterations", default=4, type=int)
|
||||
group.add_argument("--repro_run", action='store_true', default=False)
|
||||
group.add_argument("--repro-run", "--repro_run", action='store_true', default=False)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
@ -501,9 +501,9 @@ parser.add_argument('--subprocess', action='store_true',
|
||||
help='whether to run each test in a subprocess')
|
||||
parser.add_argument('--seed', type=int, default=1234)
|
||||
parser.add_argument('--accept', action='store_true')
|
||||
parser.add_argument('--jit_executor', type=str)
|
||||
parser.add_argument('--jit-executor', '--jit_executor', type=str)
|
||||
parser.add_argument('--repeat', type=int, default=1)
|
||||
parser.add_argument('--test_bailouts', action='store_true')
|
||||
parser.add_argument('--test-bailouts', '--test_bailouts', action='store_true')
|
||||
parser.add_argument('--use-pytest', action='store_true')
|
||||
parser.add_argument('--save-xml', nargs='?', type=str,
|
||||
const=_get_test_report_path(),
|
||||
|
@ -253,9 +253,10 @@ if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Compile py source")
|
||||
parser.add_argument("paths", nargs="*", help="Paths to freeze.")
|
||||
parser.add_argument("--verbose", action="store_true", help="Print debug logs")
|
||||
parser.add_argument("--install_dir", help="Root directory for all output files")
|
||||
parser.add_argument("--install-dir", "--install_dir", help="Root directory for all output files")
|
||||
parser.add_argument("--oss", action="store_true", help="If it's OSS build, add a fake _PyImport_FrozenModules")
|
||||
parser.add_argument(
|
||||
"--symbol-name",
|
||||
"--symbol_name",
|
||||
help="The name of the frozen module array symbol to generate",
|
||||
default="_PyImport_FrozenModules_torch",
|
||||
|
@ -40,10 +40,12 @@ def write_to_zip(file_path, strip_file_path, zf, prepend_str=""):
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Zip py source")
|
||||
parser.add_argument("paths", nargs="*", help="Paths to zip.")
|
||||
parser.add_argument("--install_dir", help="Root directory for all output files")
|
||||
parser.add_argument("--strip_dir", help="The absolute directory we want to remove from zip")
|
||||
parser.add_argument("--prepend_str", help="A string to prepend onto all paths of a file in the zip", default="")
|
||||
parser.add_argument("--zip_name", help="Output zip name")
|
||||
parser.add_argument("--install-dir", "--install_dir", help="Root directory for all output files")
|
||||
parser.add_argument("--strip-dir", "--strip_dir", help="The absolute directory we want to remove from zip")
|
||||
parser.add_argument(
|
||||
"--prepend-str", "--prepend_str", help="A string to prepend onto all paths of a file in the zip", default=""
|
||||
)
|
||||
parser.add_argument("--zip-name", "--zip_name", help="Output zip name")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -123,12 +123,12 @@ def run_subprocess(args):
|
||||
f"source activate {env} && "
|
||||
f"taskset --cpu-list {core_str} "
|
||||
f"python {os.path.abspath(__file__)} "
|
||||
"--DETAIL_in_subprocess "
|
||||
f"--DETAIL_seed {seed} "
|
||||
f"--DETAIL_num_threads {num_threads} "
|
||||
f"--DETAIL_sub_label '{sub_label}' "
|
||||
f"--DETAIL_result_file {result_file} "
|
||||
f"--DETAIL_env {env}",
|
||||
"--DETAIL-in-subprocess "
|
||||
f"--DETAIL-seed {seed} "
|
||||
f"--DETAIL-num-threads {num_threads} "
|
||||
f"--DETAIL-sub-label '{sub_label}' "
|
||||
f"--DETAIL-result-file {result_file} "
|
||||
f"--DETAIL-env {env}",
|
||||
env=env_vars,
|
||||
stdout=subprocess.PIPE,
|
||||
shell=True
|
||||
@ -197,7 +197,7 @@ def main():
|
||||
subprocess.run(
|
||||
f"source activate {env_path} && "
|
||||
f"python {os.path.abspath(__file__)} "
|
||||
"--DETAIL_in_compare",
|
||||
"--DETAIL-in-compare",
|
||||
shell=True
|
||||
)
|
||||
|
||||
@ -205,13 +205,13 @@ def main():
|
||||
if __name__ == "__main__":
|
||||
# These flags are for subprocess control, not controlling the main loop.
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--DETAIL_in_subprocess", action="store_true")
|
||||
parser.add_argument("--DETAIL_in_compare", action="store_true")
|
||||
parser.add_argument("--DETAIL_seed", type=int, default=None)
|
||||
parser.add_argument("--DETAIL_num_threads", type=int, default=None)
|
||||
parser.add_argument("--DETAIL_sub_label", type=str, default="N/A")
|
||||
parser.add_argument("--DETAIL_result_file", type=str, default=None)
|
||||
parser.add_argument("--DETAIL_env", type=str, default=None)
|
||||
parser.add_argument("--DETAIL-in-subprocess", "--DETAIL_in_subprocess", action="store_true")
|
||||
parser.add_argument("--DETAIL-in-compare", "--DETAIL_in_compare", action="store_true")
|
||||
parser.add_argument("--DETAIL-seed", "--DETAIL_seed", type=int, default=None)
|
||||
parser.add_argument("--DETAIL-num-threads", "--DETAIL_num_threads", type=int, default=None)
|
||||
parser.add_argument("--DETAIL-sub-label", "--DETAIL_sub_label", type=str, default="N/A")
|
||||
parser.add_argument("--DETAIL-result-file", "--DETAIL_result_file", type=str, default=None)
|
||||
parser.add_argument("--DETAIL-env", "--DETAIL_env", type=str, default=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.DETAIL_in_subprocess:
|
||||
|
@ -82,15 +82,15 @@ _DTYPE_STR_TO_DTYPE = {
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--pr", type=str, default=_PR_LIST[0], choices=_PR_LIST)
|
||||
parser.add_argument("--num_gpus", type=int, default=None)
|
||||
parser.add_argument("--test_variance", action="store_true")
|
||||
parser.add_argument("--num-gpus", "--num_gpus", type=int, default=None)
|
||||
parser.add_argument("--test-variance", "--test_variance", action="store_true")
|
||||
|
||||
# (Implementation details)
|
||||
parser.add_argument("--DETAIL_context", type=str, choices=(_MAIN, _SUBPROCESS), default=_MAIN)
|
||||
parser.add_argument("--DETAIL_device", type=str, choices=(_CPU, _GPU), default=None)
|
||||
parser.add_argument("--DETAIL_env", type=str, default=None)
|
||||
parser.add_argument("--DETAIL_result_file", type=str, default=None)
|
||||
parser.add_argument("--DETAIL_seed", type=int, default=None)
|
||||
parser.add_argument("--DETAIL-context", "--DETAIL_context", type=str, choices=(_MAIN, _SUBPROCESS), default=_MAIN)
|
||||
parser.add_argument("--DETAIL-device", "--DETAIL_device", type=str, choices=(_CPU, _GPU), default=None)
|
||||
parser.add_argument("--DETAIL-env", "--DETAIL_env", type=str, default=None)
|
||||
parser.add_argument("--DETAIL-result-file", "--DETAIL_result_file", type=str, default=None)
|
||||
parser.add_argument("--DETAIL-seed", "--DETAIL_seed", type=int, default=None)
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.num_gpus is None:
|
||||
@ -101,11 +101,11 @@ def parse_args():
|
||||
_SUBPROCESS_CMD_TEMPLATE = (
|
||||
"source activate {source_env} && python -m examples.end_to_end "
|
||||
"--pr {pr} "
|
||||
"--DETAIL_context subprocess "
|
||||
"--DETAIL_device {device} "
|
||||
"--DETAIL_env {env} "
|
||||
"--DETAIL_result_file {result_file} "
|
||||
"--DETAIL_seed {seed}"
|
||||
"--DETAIL-context subprocess "
|
||||
"--DETAIL-device {device} "
|
||||
"--DETAIL-env {env} "
|
||||
"--DETAIL-result-file {result_file} "
|
||||
"--DETAIL-seed {seed}"
|
||||
)
|
||||
|
||||
|
||||
|
@ -87,7 +87,7 @@ if __name__ == '__main__':
|
||||
parser.add_argument('--bench', type=str, choices=BENCHMARK_NAMES, nargs='+', default=BENCHMARK_NAMES)
|
||||
parser.add_argument('--seed', type=int, default=0)
|
||||
parser.add_argument('--samples', type=int, default=10)
|
||||
parser.add_argument('--probability_regular', type=float, default=1.0)
|
||||
parser.add_argument('--probability-regular', '--probability_regular', type=float, default=1.0)
|
||||
parser.add_argument('-o', '--output', type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -28,13 +28,17 @@ int main(int argc, char* argv[]) {
|
||||
TORCH_CHECK(std::string(argv[1]) == "--number");
|
||||
auto number = std::stoi(argv[2]);
|
||||
|
||||
TORCH_CHECK(std::string(argv[3]) == "--number_warmup");
|
||||
TORCH_CHECK(
|
||||
std::string(argv[3]) == "--number-warmup" ||
|
||||
std::string(argv[3]) == "--number_warmup");
|
||||
auto number_warmup = std::stoi(argv[4]);
|
||||
|
||||
TORCH_CHECK(std::string(argv[5]) == "--repeats");
|
||||
auto repeats = std::stoi(argv[6]);
|
||||
|
||||
TORCH_CHECK(std::string(argv[7]) == "--number_threads");
|
||||
TORCH_CHECK(
|
||||
std::string(argv[7]) == "--number-threads" ||
|
||||
std::string(argv[7]) == "--number_threads");
|
||||
auto number_threads = std::stoi(argv[8]);
|
||||
torch::set_num_threads(number_threads);
|
||||
|
||||
|
@ -635,9 +635,9 @@ class _ValgrindWrapper:
|
||||
run_loop_cmd = [
|
||||
run_loop_exec,
|
||||
"--number", str(number),
|
||||
"--number_warmup", str(min(number, 10)),
|
||||
"--number-warmup", str(min(number, 10)),
|
||||
"--repeats", str(repeats),
|
||||
"--number_threads", str(task_spec.num_threads),
|
||||
"--number-threads", str(task_spec.num_threads),
|
||||
]
|
||||
|
||||
valgrind_invocation, valgrind_invocation_output = run([
|
||||
|
@ -2611,7 +2611,11 @@ def main() -> None:
|
||||
help="generate separate headers per operator in ATen/ops",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d", "--install_dir", help="output directory", default="build/aten/src/ATen"
|
||||
"-d",
|
||||
"--install-dir",
|
||||
"--install_dir",
|
||||
help="output directory",
|
||||
default="build/aten/src/ATen",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rocm",
|
||||
@ -2623,10 +2627,11 @@ def main() -> None:
|
||||
action="store_true",
|
||||
help="Generate MPS registration code when set",
|
||||
)
|
||||
# TODO: --op_registration_whitelist will be removed when all call-sites
|
||||
# TODO: --op-registration-whitelist will be removed when all call-sites
|
||||
# for gen.py are moved over to using the operator YAML file for mobile
|
||||
# custom build.
|
||||
parser.add_argument(
|
||||
"--op-registration-whitelist",
|
||||
"--op_registration_whitelist",
|
||||
nargs="*",
|
||||
help="filter op registrations by the whitelist (if set); "
|
||||
@ -2634,6 +2639,7 @@ def main() -> None:
|
||||
"e.g.: aten::empty aten::conv2d ...",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--op-selection-yaml-path",
|
||||
"--op_selection_yaml_path",
|
||||
help="Provide a path to the operator selection (for custom build) YAML "
|
||||
"that contains the information about the set of selected operators "
|
||||
@ -2642,26 +2648,30 @@ def main() -> None:
|
||||
"The operator names also contain the namespace prefix (e.g. aten::)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--backend-whitelist",
|
||||
"--backend_whitelist",
|
||||
nargs="*",
|
||||
help="filter dispatch backend by the whitelist (if set), "
|
||||
"e.g.: CPU CUDA QuantizedCPU ...",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--static-dispatch-backend",
|
||||
"--static_dispatch_backend",
|
||||
nargs="*",
|
||||
help="generate static dispatch code for the specific backend (if set)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skip-dispatcher-op-registration",
|
||||
"--skip_dispatcher_op_registration",
|
||||
action="store_true",
|
||||
help="Avoid registering operators into the dispatcher.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force-schema-registration",
|
||||
"--force_schema_registration",
|
||||
action="store_true",
|
||||
help="force it to generate schema-only registrations for all ops, including"
|
||||
"those that are not listed on --op_registration_whitelist",
|
||||
"those that are not listed on --op-registration-whitelist",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--generate",
|
||||
|
@ -339,12 +339,16 @@ def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Generate backend stub files")
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
"--source-yaml",
|
||||
"--source_yaml",
|
||||
help="path to source yaml file containing operator external definitions",
|
||||
)
|
||||
parser.add_argument("-o", "--output_dir", help="output directory")
|
||||
parser.add_argument("--dry_run", type=bool, default=False, help="output directory")
|
||||
parser.add_argument("-o", "--output-dir", "--output_dir", help="output directory")
|
||||
parser.add_argument(
|
||||
"--dry-run", "--dry_run", type=bool, default=False, help="output directory"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--impl-path",
|
||||
"--impl_path",
|
||||
type=str,
|
||||
default=None,
|
||||
|
@ -626,24 +626,31 @@ def main() -> None:
|
||||
help="path to source directory for kernel templates",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--functions-yaml-path",
|
||||
"--functions_yaml_path",
|
||||
help="path to the functions.yaml file to use. Optional, but at least "
|
||||
"one of --functions_yaml_path and --custom_ops_yaml_path must be "
|
||||
"one of --functions-yaml-path and --custom-ops-yaml-path must be "
|
||||
"specified.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--custom-ops-yaml-path",
|
||||
"--custom_ops_yaml_path",
|
||||
help="path to the custom_ops.yaml file to use. Optional, but at least "
|
||||
"one of --functions_yaml_path and --custom_ops_yaml_path must be "
|
||||
"one of --functions-yaml-path and --custom-ops-yaml-path must be "
|
||||
"specified.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--aten-yaml-path",
|
||||
"--aten_yaml_path",
|
||||
help="path to native_functions.yaml file.",
|
||||
)
|
||||
# Note that make_file_manager() also looks at --install-dir.
|
||||
parser.add_argument(
|
||||
"-d", "--install_dir", help="output directory", default="build/generated"
|
||||
"-d",
|
||||
"--install-dir",
|
||||
"--install_dir",
|
||||
help="output directory",
|
||||
default="build/generated",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
@ -658,11 +665,13 @@ def main() -> None:
|
||||
help="run without writing any files (still updates outputs)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--static-dispatch-backend",
|
||||
"--static_dispatch_backend",
|
||||
nargs="*",
|
||||
help="generate static dispatch code for the specific backend (if set)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--op-registration-whitelist",
|
||||
"--op_registration_whitelist",
|
||||
nargs="*",
|
||||
help="filter op registrations by the whitelist (if set); "
|
||||
@ -670,6 +679,7 @@ def main() -> None:
|
||||
"e.g.: aten::empty aten::conv2d ...",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--op-selection-yaml-path",
|
||||
"--op_selection_yaml_path",
|
||||
help="Provide a path to the operator selection (for custom build) YAML "
|
||||
"that contains the information about the set of selected operators "
|
||||
@ -687,6 +697,7 @@ def main() -> None:
|
||||
help="reinterpret CUDA as ROCm/HIP and adjust filepaths accordingly",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-aten-lib",
|
||||
"--use_aten_lib",
|
||||
action="store_true",
|
||||
help="a boolean flag to indicate whether we use ATen kernels or not, in the future this flag will be per "
|
||||
|
@ -210,53 +210,64 @@ def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Generate Lazy Tensor backend files")
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
"--source-yaml",
|
||||
"--source_yaml",
|
||||
help="path to source yaml file containing operator external definitions",
|
||||
)
|
||||
parser.add_argument("-o", "--output_dir", help="output directory")
|
||||
parser.add_argument("--dry_run", type=bool, default=False, help="output directory")
|
||||
parser.add_argument("-o", "--output-dir", "--output_dir", help="output directory")
|
||||
parser.add_argument(
|
||||
"--dry-run", "--dry_run", type=bool, default=False, help="output directory"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--impl-path",
|
||||
"--impl_path",
|
||||
type=str,
|
||||
default=None,
|
||||
help="path to the source C++ file containing kernel definitions",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gen-ts-lowerings",
|
||||
"--gen_ts_lowerings",
|
||||
action="store_true",
|
||||
help="Generate TorchScript lowerings in addition to Lazy IR and NativeFunctions",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--node-base",
|
||||
"--node_base",
|
||||
type=str,
|
||||
default=default_args.node_base,
|
||||
help="Name of backend specific custom Lazy IR Node base class",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--node-base-hdr",
|
||||
"--node_base_hdr",
|
||||
type=str,
|
||||
default=default_args.node_base_hdr,
|
||||
help="Path to header file defining custom Lazy IR Node base class",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--shape-inference-hdr",
|
||||
"--shape_inference_hdr",
|
||||
type=str,
|
||||
default=default_args.shape_inference_hdr,
|
||||
help="Path to header file defining custom Lazy shape inference functions",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tensor-class",
|
||||
"--tensor_class",
|
||||
type=str,
|
||||
default=default_args.tensor_class,
|
||||
help="Name of backend specific custom Lazy Tensor class",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tensor-class-hdr",
|
||||
"--tensor_class_hdr",
|
||||
type=str,
|
||||
default=default_args.tensor_class_hdr,
|
||||
help="Path to header file defining custom Lazy Tensor class",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--backend-name",
|
||||
"--backend_name",
|
||||
type=str,
|
||||
default=default_args.backend_name,
|
||||
|
Reference in New Issue
Block a user