Add tools/ script to limit circleci to a set of jobs (#58001)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/58001

Adds a script so that devs can generate a commit (at the base of a stack) that removes all CI jobs but the set that they care about. See CONTRIBUTING.md changes for usage

Test Plan: Imported from OSS

Reviewed By: mruberry

Differential Revision: D28359910

Pulled By: driazati

fbshipit-source-id: 2741570f2bab2c28f4a9d7aef727b1b2399d0ce1
This commit is contained in:
driazati
2021-05-11 11:57:19 -07:00
committed by Facebook GitHub Bot
parent 18edb77a28
commit e385aa863a
3 changed files with 164 additions and 0 deletions

View File

@ -13,6 +13,7 @@
- [Local linting](#local-linting)
- [Running `mypy`](#running-mypy)
- [C++ Unit Testing](#c-unit-testing)
- [Run Specific CI Jobs](#run-specific-ci-jobs)
- [Writing documentation](#writing-documentation)
- [Building documentation](#building-documentation)
- [Tips](#tips)
@ -426,6 +427,26 @@ is part of the test suite `ContainerAliasingTest` in the file
./build/bin/test_jit --gtest_filter=ContainerAliasingTest.UnionAliasing
```
### Run Specific CI Jobs
You can generate a commit that limits the CI to only run a specific job by using
`tools/explicit_ci_jobs.py` like so:
```bash
# --job: specify one or more times to filter to a specific job + its dependencies
# --make-commit: commit CI changes to git with a message explaining the change
python tools/explicit_ci_jobs.py --job binary_linux_manywheel_3_6m_cpu_devtoolset7_nightly_test --make-commit
# Make your changes
ghstack submit
```
**NB**: It is not recommended to use this workflow unless you are also using
[`ghstack`](https://github.com/ezyang/ghstack). It creates a large commit that is
of very low signal to reviewers.
## Writing documentation
PyTorch uses [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)

View File

@ -43,6 +43,7 @@ files =
tools/autograd/*.py,
tools/clang_tidy.py,
tools/codegen,
tools/explicit_ci_jobs.py,
tools/extract_scripts.py,
tools/mypy_wrapper.py,
tools/print_test_stats.py,

142
tools/explicit_ci_jobs.py Executable file
View File

@ -0,0 +1,142 @@
#!/usr/bin/env python3
import yaml
import textwrap
import subprocess
import pathlib
import argparse
from typing import Dict, List, Any
REPO_ROOT = pathlib.Path(__file__).parent.parent
CONFIG_YML = REPO_ROOT / ".circleci" / "config.yml"
WORKFLOWS_DIR = REPO_ROOT / ".github" / "workflows"
WORKFLOWS_TO_CHECK = [
"binary_builds",
"build",
"master_build",
# These are formatted slightly differently, skip them
# "scheduled-ci",
# "debuggable-scheduled-ci",
# "slow-gradcheck-scheduled-ci",
# "ecr_gc",
# "promote",
]
def add_job(
workflows: Dict[str, Any],
workflow_name: str,
type: str,
job: Dict[str, Any],
past_jobs: Dict[str, Any],
) -> None:
"""
Add job 'job' under 'type' and 'workflow_name' to 'workflow' in place. Also
add any dependencies (they must already be in 'past_jobs')
"""
if workflow_name not in workflows:
workflows[workflow_name] = {"when": "always", "jobs": []}
requires = job.get("requires", None)
if requires is not None:
for requirement in requires:
dependency = past_jobs[requirement]
add_job(workflows, dependency["workflow_name"], dependency["type"], dependency["job"], past_jobs)
workflows[workflow_name]["jobs"].append({type: job})
def get_filtered_circleci_config(
workflows: Dict[str, Any], relevant_jobs: List[str]
) -> Dict[str, Any]:
"""
Given an existing CircleCI config, remove every job that's not listed in
'relevant_jobs'
"""
new_workflows: Dict[str, Any] = {}
past_jobs: Dict[str, Any] = {}
for workflow_name, workflow in workflows.items():
if workflow_name not in WORKFLOWS_TO_CHECK:
# Don't care about this workflow, skip it entirely
continue
for job_dict in workflow["jobs"]:
for type, job in job_dict.items():
if "name" not in job:
# Job doesn't have a name so it can't be handled
print("Skipping", type)
else:
if job["name"] in relevant_jobs:
# Found a job that was specified at the CLI, add it to
# the new result
add_job(new_workflows, workflow_name, type, job, past_jobs)
# Record the job in case it's needed as a dependency later
past_jobs[job["name"]] = {
"workflow_name": workflow_name,
"type": type,
"job": job,
}
return new_workflows
def commit_ci(files: List[str], message: str) -> None:
# Check that there are no other modified files than the ones edited by this
# tool
stdout = subprocess.run(["git", "status", "--porcelain"], stdout=subprocess.PIPE).stdout.decode()
for line in stdout.split("\n"):
if line == "":
continue
if line[0] != " ":
raise RuntimeError(f"Refusing to commit while other changes are already staged: {line}")
# Make the commit
subprocess.run(["git", "add"] + files)
subprocess.run(["git", "commit", "-m", message])
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="make .circleci/config.yml only have a specific set of jobs and delete GitHub actions"
)
parser.add_argument("--job", action="append", help="job name", required=True)
parser.add_argument(
"--keep-gha", action="store_true", help="don't delete GitHub actions"
)
parser.add_argument(
"--make-commit", action="store_true", help="add change to git with to a do-not-merge commit"
)
args = parser.parse_args()
touched_files = [CONFIG_YML]
with open(CONFIG_YML, "r") as f:
config_yml = yaml.safe_load(f.read())
config_yml["workflows"] = get_filtered_circleci_config(config_yml["workflows"], args.job)
with open(CONFIG_YML, "w") as f:
yaml.dump(config_yml, f)
if not args.keep_gha:
for relative_file in WORKFLOWS_DIR.iterdir():
path = WORKFLOWS_DIR.joinpath(relative_file)
touched_files.append(path)
path.unlink()
if args.make_commit:
jobs_str = '\n'.join([f" * {job}" for job in args.job])
message = textwrap.dedent(f"""
[skip ci][do not merge] Edit config.yml to filter specific jobs
Filter CircleCI to only run:
{jobs_str}
See [Run Specific CI Jobs](https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md#run-specific-ci-jobs) for details.
""").strip()
commit_ci([str(f.relative_to(REPO_ROOT)) for f in touched_files], message)