Move slow test query to ClickHouse (#139322)

Example run: https://github.com/pytorch/pytorch/actions/runs/11602255032/job/32306827867?pr=139322 (pr creation commented out), also tested locally
Pull Request resolved: https://github.com/pytorch/pytorch/pull/139322
Approved by: https://github.com/huydhn
This commit is contained in:
Catherine Lee
2024-10-30 23:58:27 +00:00
committed by PyTorch MergeBot
parent 48854cbfc4
commit f747bd2947
3 changed files with 32 additions and 31 deletions

View File

@ -80,7 +80,7 @@ jobs:
# It takes less than 15m to finish functorch docs unless there are issues
timeout-minutes: 15
# Set a fixed name for this job instead of using the current matrix-generated name, i.e. build-docs (cpp, linux.12xlarge, 180)
# The current name requires updating the Rockset last docs push query from test-infra every time the matrix is updated
# The current name requires updating the database last docs push query from test-infra every time the matrix is updated
name: build-docs-${{ matrix.docs_type }}-${{ inputs.push }}
steps:
- name: Setup SSH (Click me for login details)

View File

@ -58,13 +58,15 @@ jobs:
- name: Install requirements
shell: bash
run: |
pip install rockset==1.0.3 requests==2.32.2
pip install requests==2.32.2 clickhouse-connect==0.7.16
- name: Update slow test file
shell: bash
env:
ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }}
PYTORCHBOT_TOKEN: ${{ secrets.GH_PYTORCHBOT_TOKEN }}
UPDATEBOT_TOKEN: ${{ secrets.UPDATEBOT_TOKEN }}
CLICKHOUSE_ENDPOINT: ${{ secrets.CLICKHOUSE_ENDPOINT }}
CLICKHOUSE_USERNAME: ${{ secrets.CLICKHOUSE_READONLY_USERNAME }}
CLICKHOUSE_PASSWORD: ${{ secrets.CLICKHOUSE_READONLY_PASSWORD }}
run: |
git config --global user.name "PyTorch UpdateBot"
git config --global user.email "pytorchupdatebot@users.noreply.github.com"

View File

@ -6,59 +6,62 @@ from pathlib import Path
from typing import Any, cast, Dict, List, Optional, Tuple
import requests
import rockset # type: ignore[import]
from clickhouse import query_clickhouse # type: ignore[import]
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
QUERY = """
WITH most_recent_strict_commits AS (
SELECT
push.head_commit.id as sha,
distinct push.head_commit.'id' as sha
FROM
commons.push
-- not bothering with final
default.push
WHERE
push.ref = 'refs/heads/viable/strict'
AND push.repository.full_name = 'pytorch/pytorch'
AND push.repository.'full_name' = 'pytorch/pytorch'
ORDER BY
push._event_time DESC
push.head_commit.'timestamp' desc
LIMIT
3
), workflows AS (
SELECT
id
FROM
commons.workflow_run w
INNER JOIN most_recent_strict_commits c on w.head_sha = c.sha
default.workflow_run w final
WHERE
w.name != 'periodic'
w.id in (select id from materialized_views.workflow_run_by_head_sha
where head_sha in (select sha from most_recent_strict_commits)
)
and w.name != 'periodic'
),
job AS (
SELECT
j.id
j.id as id
FROM
commons.workflow_job j
INNER JOIN workflows w on w.id = j.run_id
default.workflow_job j final
WHERE
j.name NOT LIKE '%asan%'
j.run_id in (select id from workflows)
and j.name NOT LIKE '%asan%'
),
duration_per_job AS (
SELECT
test_run.classname,
test_run.name,
job.id,
SUM(time) as time
test_run.classname as classname,
test_run.name as name,
job.id as id,
SUM(test_run.time) as time
FROM
commons.test_run_s3 test_run
/* `test_run` is ginormous and `job` is small, so lookup join is essential */
INNER JOIN job ON test_run.job_id = job.id HINT(join_strategy = lookup)
default.test_run_s3 test_run
INNER JOIN job ON test_run.job_id = job.id
WHERE
/* cpp tests do not populate `file` for some reason. */
/* Exclude them as we don't include them in our slow test infra */
test_run.file IS NOT NULL
test_run.file != ''
/* do some more filtering to cut down on the test_run size */
AND test_run.skipped IS NULL
AND test_run.failure IS NULL
AND test_run.error IS NULL
AND empty(test_run.skipped)
AND empty(test_run.failure)
AND empty(test_run.error)
and test_run.job_id in (select id from job)
GROUP BY
test_run.classname,
test_run.name,
@ -178,11 +181,7 @@ def search_for_open_pr(
if __name__ == "__main__":
rs_client = rockset.RocksetClient(
host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"]
)
results = rs_client.sql(QUERY).results
results = query_clickhouse(QUERY, {})
slow_tests = {row["test_name"]: row["avg_duration_sec"] for row in results}
with open(REPO_ROOT / "test" / "slow_tests.json", "w") as f: