Benchmarks dynamo update script to use ClickHouse instead of Rockset (#140574)

Query works but the part where it parses the job name is broken

Pull Request resolved: https://github.com/pytorch/pytorch/pull/140574
Approved by: https://github.com/huydhn
This commit is contained in:
Catherine Lee
2024-11-15 22:17:35 +00:00
committed by PyTorch MergeBot
parent d64827dc35
commit fc813df120

View File

@ -31,9 +31,45 @@ import pandas as pd
import requests import requests
# Note: the public query url targets this rockset lambda: """
# https://console.rockset.com/lambdas/details/commons.artifacts WITH job as (
ARTIFACTS_QUERY_URL = "https://api.usw2a1.rockset.com/v1/public/shared_lambdas/4ca0033e-0117-41f5-b043-59cde19eff35" SELECT
job.created_at as time,
job.name as job_name,
workflow.name as workflow_name,
job.id as id,
job.run_attempt as run_attempt,
workflow.id as workflow_id
FROM
default.workflow_job job final
INNER JOIN default.workflow_run workflow final on workflow.id = job.run_id
WHERE
job.name != 'ciflow_should_run'
AND job.name != 'generate-test-matrix'
-- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA
AND workflow.event != 'workflow_run'
-- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA
AND workflow.event != 'repository_dispatch'
AND workflow.head_sha = {sha: String}
AND job.head_sha = {sha: String}
AND workflow.repository.'full_name' = {repo: String}
)
SELECT
workflow_name as workflowName,
job_name as jobName,
CAST(id as String) as id,
run_attempt as runAttempt,
CAST(workflow_id as String) as workflowId,
time
from
job
ORDER BY
workflowName, jobName
"""
ARTIFACTS_QUERY_URL = (
"https://console-api.clickhouse.cloud/.api/query-endpoints/"
"c1cdfadc-6bb2-4a91-bbf9-3d19e1981cd4/run?format=JSON"
)
CSV_LINTER = str( CSV_LINTER = str(
Path(__file__).absolute().parent.parent.parent.parent Path(__file__).absolute().parent.parent.parent.parent
/ "tools/linter/adapters/no_merge_conflict_csv_linter.py" / "tools/linter/adapters/no_merge_conflict_csv_linter.py"
@ -42,15 +78,20 @@ CSV_LINTER = str(
def query_job_sha(repo, sha): def query_job_sha(repo, sha):
params = { params = {
"parameters": [ "queryVariables": {"sha": sha, "repo": repo},
{"name": "sha", "type": "string", "value": sha},
{"name": "repo", "type": "string", "value": repo},
]
} }
# If you are a Meta employee, go to P1679979893 to get the id and secret.
# Otherwise, ask a Meta employee give you the id and secret.
KEY_ID = os.environ["CH_KEY_ID"]
KEY_SECRET = os.environ["CH_KEY_SECRET"]
r = requests.post(url=ARTIFACTS_QUERY_URL, json=params) r = requests.post(
data = r.json() url=ARTIFACTS_QUERY_URL,
return data["results"] data=json.dumps(params),
headers={"Content-Type": "application/json"},
auth=(KEY_ID, KEY_SECRET),
)
return r.json()["data"]
def parse_job_name(job_str): def parse_job_name(job_str):