mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-20 21:14:14 +08:00 
			
		
		
		
	Signed-off-by: Edward Z. Yang <ezyang@meta.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/105928 Approved by: https://github.com/albanD
		
			
				
	
	
		
			172 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			172 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import argparse
 | |
| import datetime
 | |
| import subprocess
 | |
| import sys
 | |
| import time
 | |
| 
 | |
| import torch
 | |
| 
 | |
| from .runner import get_nn_runners
 | |
| 
 | |
| 
 | |
| def run_rnn(
 | |
|     name,
 | |
|     rnn_creator,
 | |
|     nloops=5,
 | |
|     seqLength=100,
 | |
|     numLayers=1,
 | |
|     inputSize=512,
 | |
|     hiddenSize=512,
 | |
|     miniBatch=64,
 | |
|     device="cuda",
 | |
|     seed=None,
 | |
| ):
 | |
|     def run_iter(modeldef):
 | |
|         # Forward
 | |
|         forward_output = modeldef.forward(*modeldef.inputs)
 | |
| 
 | |
|         # "loss computation" and backward
 | |
|         if modeldef.backward_setup is not None:
 | |
|             backward_input = modeldef.backward_setup(forward_output)
 | |
|         else:
 | |
|             backward_input = forward_output
 | |
|         if modeldef.backward is not None:
 | |
|             modeldef.backward(*backward_input)
 | |
| 
 | |
|         # "Update" parameters
 | |
|         if modeldef.backward is not None:
 | |
|             with torch.no_grad():
 | |
|                 for param in modeldef.params:
 | |
|                     param.grad.zero_()
 | |
|         torch.cuda.synchronize()
 | |
| 
 | |
|     assert device == "cuda"
 | |
|     creator_args = dict(
 | |
|         seqLength=seqLength,
 | |
|         numLayers=numLayers,
 | |
|         inputSize=inputSize,
 | |
|         hiddenSize=hiddenSize,
 | |
|         miniBatch=miniBatch,
 | |
|         device=device,
 | |
|         seed=seed,
 | |
|     )
 | |
|     modeldef = rnn_creator(**creator_args)
 | |
| 
 | |
|     [run_iter(modeldef) for _ in range(nloops)]
 | |
| 
 | |
| 
 | |
| def profile(
 | |
|     rnns,
 | |
|     sleep_between_seconds=1,
 | |
|     nloops=5,
 | |
|     internal_run=True,  # Unused, get rid of this TODO
 | |
|     seqLength=100,
 | |
|     numLayers=1,
 | |
|     inputSize=512,
 | |
|     hiddenSize=512,
 | |
|     miniBatch=64,
 | |
|     device="cuda",
 | |
|     seed=None,
 | |
| ):
 | |
|     params = dict(
 | |
|         seqLength=seqLength,
 | |
|         numLayers=numLayers,
 | |
|         inputSize=inputSize,
 | |
|         hiddenSize=hiddenSize,
 | |
|         miniBatch=miniBatch,
 | |
|         device=device,
 | |
|         seed=seed,
 | |
|     )
 | |
|     for name, creator, context in get_nn_runners(*rnns):
 | |
|         with context():
 | |
|             run_rnn(name, creator, nloops, **params)
 | |
|             time.sleep(sleep_between_seconds)
 | |
| 
 | |
| 
 | |
| def system(command):
 | |
|     """Returns (return-code, stdout, stderr)"""
 | |
|     print(f"[system] {command}")
 | |
|     p = subprocess.Popen(
 | |
|         command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
 | |
|     )
 | |
|     output, err = p.communicate()
 | |
|     rc = p.returncode
 | |
|     output = output.decode("ascii")
 | |
|     err = err.decode("ascii")
 | |
|     return rc, output, err
 | |
| 
 | |
| 
 | |
| def describe_sizes(**sizes):
 | |
|     # seqLength, numLayers, inputSize, hiddenSize, miniBatch
 | |
|     return "s{}-l{}-i{}-h{}-b{}".format(
 | |
|         sizes["seqLength"],
 | |
|         sizes["numLayers"],
 | |
|         sizes["inputSize"],
 | |
|         sizes["hiddenSize"],
 | |
|         sizes["miniBatch"],
 | |
|     )
 | |
| 
 | |
| 
 | |
| OUTPUT_DIR = "~/profout/"
 | |
| 
 | |
| 
 | |
| def nvprof_output_filename(rnns, **params):
 | |
|     rnn_tag = "-".join(rnns)
 | |
|     size_tag = describe_sizes(**params)
 | |
|     date_tag = datetime.datetime.now().strftime("%m%d%y-%H%M")
 | |
|     return f"{OUTPUT_DIR}prof_{rnn_tag}_{size_tag}_{date_tag}.nvvp"
 | |
| 
 | |
| 
 | |
| def nvprof(cmd, outpath):
 | |
|     return system(f"nvprof -o {outpath} {cmd}")
 | |
| 
 | |
| 
 | |
| def full_profile(rnns, **args):
 | |
|     profile_args = []
 | |
|     for k, v in args.items():
 | |
|         profile_args.append(f"--{k}={v}")
 | |
|     profile_args.append(f"--rnns {' '.join(rnns)}")
 | |
|     profile_args.append("--internal-run")
 | |
| 
 | |
|     outpath = nvprof_output_filename(rnns, **args)
 | |
| 
 | |
|     cmd = f"{sys.executable} -m fastrnns.profile {' '.join(profile_args)}"
 | |
|     rc, stdout, stderr = nvprof(cmd, outpath)
 | |
|     if rc != 0:
 | |
|         raise RuntimeError(f"stderr: {stderr}\nstdout: {stdout}")
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     parser = argparse.ArgumentParser(description="Profile RNNs")
 | |
| 
 | |
|     parser.add_argument("--seqLength", default="100", type=int)
 | |
|     parser.add_argument("--numLayers", default="1", type=int)
 | |
|     parser.add_argument("--inputSize", default="512", type=int)
 | |
|     parser.add_argument("--hiddenSize", default="512", type=int)
 | |
|     parser.add_argument("--miniBatch", default="64", type=int)
 | |
|     parser.add_argument(
 | |
|         "--sleep-between-seconds", "--sleep_between_seconds", default="1", type=int
 | |
|     )
 | |
|     parser.add_argument("--nloops", default="5", type=int)
 | |
| 
 | |
|     parser.add_argument("--rnns", nargs="*", help="What to run. cudnn, aten, jit, etc")
 | |
| 
 | |
|     # if internal_run, we actually run the rnns.
 | |
|     # if not internal_run, we shell out to nvprof with internal_run=T
 | |
|     parser.add_argument(
 | |
|         "--internal-run",
 | |
|         "--internal_run",
 | |
|         default=False,
 | |
|         action="store_true",
 | |
|         help="Don't use this",
 | |
|     )
 | |
|     args = parser.parse_args()
 | |
|     if args.rnns is None:
 | |
|         args.rnns = ["cudnn", "aten", "jit"]
 | |
|     print(args)
 | |
| 
 | |
|     if args.internal_run:
 | |
|         profile(**vars(args))
 | |
|     else:
 | |
|         full_profile(**vars(args))
 |