mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[TD] Use label to configure td on distributed for rollout (#122976)
Gate TD on distributed behind label TODO: auto add label to certain people's prs Pull Request resolved: https://github.com/pytorch/pytorch/pull/122976 Approved by: https://github.com/huydhn, https://github.com/ZainRizvi
This commit is contained in:
committed by
PyTorch MergeBot
parent
4f66db80ca
commit
61be8843c9
@ -40,6 +40,9 @@ outputs:
|
||||
ci-no-td:
|
||||
description: True if ci-no-td label was on PR or [ci-no-td] in PR body.
|
||||
value: ${{ steps.filter.outputs.ci-no-td }}
|
||||
ci-td-distributed:
|
||||
description: True if ci-td-distributed label was on PR or [ci-td-distributed] in PR body.
|
||||
value: ${{ steps.filter.outputs.ci-td-distributed }}
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
|
1
.github/pytorch-probot.yml
vendored
1
.github/pytorch-probot.yml
vendored
@ -1,5 +1,6 @@
|
||||
tracking_issue: 24422
|
||||
ciflow_tracking_issue: 64124
|
||||
TD_rollout_issue: 123120
|
||||
ciflow_push_tags:
|
||||
- ciflow/binaries
|
||||
- ciflow/binaries_conda
|
||||
|
5
.github/scripts/filter_test_configs.py
vendored
5
.github/scripts/filter_test_configs.py
vendored
@ -480,6 +480,11 @@ def perform_misc_tasks(
|
||||
"ci-no-test-timeout", check_for_setting(labels, pr_body, "ci-no-test-timeout")
|
||||
)
|
||||
set_output("ci-no-td", check_for_setting(labels, pr_body, "ci-no-td"))
|
||||
# Only relevant for the one linux distributed cuda job, delete this when TD
|
||||
# is rolled out completely
|
||||
set_output(
|
||||
"ci-td-distributed", check_for_setting(labels, pr_body, "ci-td-distributed")
|
||||
)
|
||||
|
||||
# Obviously, if the job name includes unstable, then this is an unstable job
|
||||
is_unstable = job_name and IssueType.UNSTABLE.value in job_name
|
||||
|
2
.github/scripts/test_filter_test_configs.py
vendored
2
.github/scripts/test_filter_test_configs.py
vendored
@ -639,6 +639,7 @@ class TestConfigFilter(TestCase):
|
||||
ci_verbose_test_logs: bool = False,
|
||||
ci_no_test_timeout: bool = False,
|
||||
ci_no_td: bool = False,
|
||||
ci_td_distributed: bool = False,
|
||||
is_unstable: bool = False,
|
||||
reenabled_issues: str = "",
|
||||
) -> str:
|
||||
@ -647,6 +648,7 @@ class TestConfigFilter(TestCase):
|
||||
f"ci-verbose-test-logs={ci_verbose_test_logs}\n"
|
||||
f"ci-no-test-timeout={ci_no_test_timeout}\n"
|
||||
f"ci-no-td={ci_no_td}\n"
|
||||
f"ci-td-distributed={ci_td_distributed}\n"
|
||||
f"is-unstable={is_unstable}\n"
|
||||
f"reenabled-issues={reenabled_issues}\n"
|
||||
)
|
||||
|
2
.github/workflows/_linux-test.yml
vendored
2
.github/workflows/_linux-test.yml
vendored
@ -200,6 +200,7 @@ jobs:
|
||||
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
|
||||
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
|
||||
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
|
||||
TD_DISTRIBUTED: ${{ steps.keep-going.outputs.ci-td-distributed }}
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }}
|
||||
SHM_SIZE: ${{ contains(inputs.build-environment, 'cuda') && '2g' || '1g' }}
|
||||
@ -252,6 +253,7 @@ jobs:
|
||||
-e VERBOSE_TEST_LOGS \
|
||||
-e NO_TEST_TIMEOUT \
|
||||
-e NO_TD \
|
||||
-e TD_DISTRIBUTED \
|
||||
-e PR_LABELS \
|
||||
-e MAX_JOBS="$(nproc --ignore=2)" \
|
||||
-e SCCACHE_BUCKET \
|
||||
|
@ -32,6 +32,7 @@ from torch.testing._internal.common_utils import (
|
||||
retry_shell,
|
||||
set_cwd,
|
||||
shell,
|
||||
TEST_CUDA,
|
||||
TEST_WITH_ASAN,
|
||||
TEST_WITH_CROSSREF,
|
||||
TEST_WITH_ROCM,
|
||||
@ -1180,7 +1181,15 @@ def parse_args():
|
||||
action="store_true",
|
||||
help="Enables removing tests based on TD",
|
||||
default=IS_CI
|
||||
and (TEST_WITH_CROSSREF or TEST_WITH_ASAN)
|
||||
and (
|
||||
TEST_WITH_CROSSREF
|
||||
or TEST_WITH_ASAN
|
||||
or (
|
||||
strtobool(os.environ.get("TD_DISTRIBUTED", "False"))
|
||||
and os.getenv("TEST_CONFIG") == "distributed"
|
||||
and TEST_CUDA
|
||||
)
|
||||
)
|
||||
and os.getenv("BRANCH", "") != "main"
|
||||
and not strtobool(os.environ.get("NO_TD", "False")),
|
||||
)
|
||||
|
Reference in New Issue
Block a user