Add scripts to check xrefs and urls (#151844)

Traverses the docs and code to find any broken links
Pull Request resolved: https://github.com/pytorch/pytorch/pull/151844
Approved by: https://github.com/huydhn
This commit is contained in:
Anthony Shoumikhin
2025-04-28 09:30:03 +00:00
committed by PyTorch MergeBot
parent 7e8b9b3f51
commit 7cae7902a2
9 changed files with 160 additions and 6 deletions

View File

@ -281,6 +281,34 @@ jobs:
# All we need to see is that it passes
python3 torch/utils/collect_env.py
lint-urls:
name: Lint URLs
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
needs: get-label-type
with:
timeout: 120
runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge"
docker-image: pytorch-linux-focal-linter
fetch-depth: 0
submodules: false
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
./scripts/lint_urls.sh
lint-xrefs:
name: Lint Xrefs
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
needs: get-label-type
with:
timeout: 60
runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge"
docker-image: pytorch-linux-focal-linter
fetch-depth: 0
submodules: false
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
script: |
./scripts/lint_xrefs.sh
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true

View File

@ -23,5 +23,5 @@ to allow submodules to use these fixes because we can't patch their
`CMakeList.txt`.
If you need to update files under `./upstream` folder, we recommend you issue PRs
against [the CMake mainline branch](https://gitlab.kitware.com/cmake/cmake/tree/master/Modules/FindCUDA.cmake),
against [the CMake mainline branch](https://github.com/Kitware/CMake/blob/master/Modules/FindCUDA.cmake),
and then backport it here for earlier CMake compatibility.

View File

@ -1,5 +1,5 @@
If you need to update files under this folder, we recommend you issue PRs
against [the CMake mainline branch](https://gitlab.kitware.com/cmake/cmake/tree/master/Modules/FindCUDA.cmake),
against [the CMake mainline branch](https://github.com/Kitware/CMake/blob/master/Modules/FindCUDA.cmake),
and then backport it here for earlier CMake compatibility.
See [this](../README.md) for more details.

View File

@ -183,7 +183,7 @@ macro(caffe2_interface_library SRC DST)
# use the populated INTERFACE_LINK_LIBRARIES property, because if one of the
# dependent library is not a target, cmake creates a $<LINK_ONLY:src> wrapper
# and then one is not able to find target "src". For more discussions, check
# https://gitlab.kitware.com/cmake/cmake/issues/15415
# https://cmake.org/Bug/print_bug_page.php?bug_id=15415
# https://cmake.org/pipermail/cmake-developers/2013-May/019019.html
# Specifically the following quote
#

85
scripts/lint_urls.sh Executable file
View File

@ -0,0 +1,85 @@
#!/bin/bash
set -euo pipefail
status=0
green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; yellow='\e[1;33m'; reset='\e[0m'
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
max_jobs=10
pids=()
running_jobs() {
jobs -rp | wc -l
}
while IFS=: read -r filepath url; do
fpath="$filepath"
(
code=$(curl -k -gsLm30 --retry 3 --retry-delay 3 --retry-connrefused -o /dev/null -w "%{http_code}" -I "$url") || code=000
if [ "$code" -lt 200 ] || [ "$code" -ge 400 ]; then
code=$(curl -k -gsLm30 --retry 3 --retry-delay 3 --retry-connrefused -o /dev/null -w "%{http_code}" -r 0-0 -A "$user_agent" "$url") || code=000
fi
if [ "$code" -lt 200 ] || [ "$code" -ge 400 ]; then
request_id=$(curl -sS -G -H 'Accept: application/json' \
--data-urlencode "host=$url" \
--data-urlencode "max_nodes=1" \
--data-urlencode "node=us3.node.check-host.net" \
https://check-host.net/check-http \
| jq -r .request_id) || request_id=""
if [ -n "$request_id" ]; then
sleep 5
for _ in {1..5}; do
new_code=$(curl -sS -H 'Accept: application/json' \
"https://check-host.net/check-result/$request_id" \
| jq -r -e '.[][0][3]') || new_code=000
[[ "$new_code" =~ ^[0-9]+$ ]] || new_code=000
if [ "$new_code" -ge 200 ] && [ "$new_code" -lt 400 ]; then
code=$new_code
break
fi
sleep 5
done
fi
fi
if [ "$code" -lt 200 ] || [ "$code" -ge 400 ]; then
printf "${red}%s${reset} ${yellow}%s${reset} %s\n" "$code" "$url" "$fpath" >&2
exit 1
else
printf "${green}%s${reset} ${cyan}%s${reset} %s\n" "$code" "$url" "$fpath"
exit 0
fi
) &
pids+=($!)
while [ "$(running_jobs)" -ge "$max_jobs" ]; do
sleep 1
done
done < <(
git --no-pager grep --no-color -I -P -o \
'(?!.*@lint-ignore)(?<!git\+)(?<!\$\{)https?://(?![^\s<>\")]*[<>\{\}\$])[^[:space:]<>\")\[\]\(\\]+' \
-- '*' \
':(exclude).*' \
':(exclude,glob)**/.*' \
':(exclude,glob)**/*.lock' \
':(exclude,glob)**/*.svg' \
':(exclude,glob)**/*.xml' \
':(exclude,glob)**/*.gradle*' \
':(exclude,glob)**/*gradle*' \
':(exclude,glob)**/third-party/**' \
':(exclude,glob)**/third_party/**' \
| sed -E 's/[^/[:alnum:]]+$//' \
| grep -Ev '://(0\.0\.0\.0|127\.0\.0\.1|localhost)([:/])' \
| grep -Ev 'fwdproxy:8080' \
|| true
)
for pid in "${pids[@]}"; do
wait "$pid" 2>/dev/null || {
case $? in
1) status=1 ;;
127) ;; # ignore "not a child" noise
*) exit $? ;;
esac
}
done
exit $status

41
scripts/lint_xrefs.sh Executable file
View File

@ -0,0 +1,41 @@
#!/bin/bash
set -euo pipefail
status=0
green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; yellow='\e[1;33m'; reset='\e[0m'
last_filepath=
while IFS=: read -r filepath link; do
if [ "$filepath" != "$last_filepath" ]; then
printf '\n%s:\n' "$filepath"
last_filepath=$filepath
fi
if [ -e "$(dirname "$filepath")/${link%%#*}" ]; then
printf " ${green}OK${reset} ${cyan}%s${reset}\n" "$link"
else
printf "${red}FAIL${reset} ${yellow}%s${reset}\n" "$link" >&2
status=1
fi
done < <(
git --no-pager grep --no-color -I -P -o \
'(?!.*@lint-ignore)(?:\[[^]]+\]\([^[:space:])]*/[^[:space:])]*\)|href="[^"]*/[^"]*"|src="[^"]*/[^"]*")' \
-- '*' \
':(exclude).*' \
':(exclude)**/.*' \
':(exclude)**/*.lock' \
':(exclude)**/*.svg' \
':(exclude)**/*.xml' \
':(exclude,glob)**/third-party/**' \
':(exclude,glob)**/third_party/**' \
| grep -Ev 'https?://' \
| sed -E \
-e 's#([^:]+):\[[^]]+\]\(([^)]+)\)#\1:\2#' \
-e 's#([^:]+):href="([^"]+)"#\1:\2#' \
-e 's#([^:]+):src="([^"]+)"#\1:\2#' \
-e 's/[[:punct:]]*$//' \
| grep -Ev '\{\{' \
|| true
)
exit $status

View File

@ -28,7 +28,7 @@ def convert_cmake_value_to_python_value(
cmake_type = cmake_type.upper()
up_val = cmake_value.upper()
if cmake_type == "BOOL":
# https://gitlab.kitware.com/cmake/community/wikis/doc/cmake/VariablesListsStrings#boolean-values-in-cmake
# https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html#genex:BOOL
return not (
up_val in ("FALSE", "OFF", "N", "NO", "0", "", "NOTFOUND")
or up_val.endswith("-NOTFOUND")

View File

@ -498,7 +498,7 @@ def lobpcg(
[DuerschEtal2018] Jed A. Duersch, Meiyue Shao, Chao Yang, Ming
Gu. (2018) A Robust and Efficient Implementation of LOBPCG.
SIAM J. Sci. Comput., 40(5), C655-C676. (22 pages)
https://epubs.siam.org/doi/abs/10.1137/17M1129830
https://arxiv.org/abs/1704.07458
"""

View File

@ -189,7 +189,7 @@ ASGD.__doc__ = rf"""Implements Averaged Stochastic Gradient Descent.
{_capturable_doc}
.. _Acceleration of stochastic approximation by averaging:
https://dl.acm.org/citation.cfm?id=131098
https://meyn.ece.ufl.edu/wp-content/uploads/sites/77/archive/spm_files/Courses/ECE555-2011/555media/poljud92.pdf
"""