Add scripts to check xrefs and urls (#151844)

Traverses the docs and code to find any broken links
Pull Request resolved: https://github.com/pytorch/pytorch/pull/151844
Approved by: https://github.com/huydhn
This commit is contained in:
Anthony Shoumikhin
2025-04-28 09:30:03 +00:00
committed by PyTorch MergeBot
parent 7e8b9b3f51
commit 7cae7902a2
9 changed files with 160 additions and 6 deletions

85
scripts/lint_urls.sh Executable file
View File

@ -0,0 +1,85 @@
#!/bin/bash
set -euo pipefail
status=0
green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; yellow='\e[1;33m'; reset='\e[0m'
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
max_jobs=10
pids=()
running_jobs() {
jobs -rp | wc -l
}
while IFS=: read -r filepath url; do
fpath="$filepath"
(
code=$(curl -k -gsLm30 --retry 3 --retry-delay 3 --retry-connrefused -o /dev/null -w "%{http_code}" -I "$url") || code=000
if [ "$code" -lt 200 ] || [ "$code" -ge 400 ]; then
code=$(curl -k -gsLm30 --retry 3 --retry-delay 3 --retry-connrefused -o /dev/null -w "%{http_code}" -r 0-0 -A "$user_agent" "$url") || code=000
fi
if [ "$code" -lt 200 ] || [ "$code" -ge 400 ]; then
request_id=$(curl -sS -G -H 'Accept: application/json' \
--data-urlencode "host=$url" \
--data-urlencode "max_nodes=1" \
--data-urlencode "node=us3.node.check-host.net" \
https://check-host.net/check-http \
| jq -r .request_id) || request_id=""
if [ -n "$request_id" ]; then
sleep 5
for _ in {1..5}; do
new_code=$(curl -sS -H 'Accept: application/json' \
"https://check-host.net/check-result/$request_id" \
| jq -r -e '.[][0][3]') || new_code=000
[[ "$new_code" =~ ^[0-9]+$ ]] || new_code=000
if [ "$new_code" -ge 200 ] && [ "$new_code" -lt 400 ]; then
code=$new_code
break
fi
sleep 5
done
fi
fi
if [ "$code" -lt 200 ] || [ "$code" -ge 400 ]; then
printf "${red}%s${reset} ${yellow}%s${reset} %s\n" "$code" "$url" "$fpath" >&2
exit 1
else
printf "${green}%s${reset} ${cyan}%s${reset} %s\n" "$code" "$url" "$fpath"
exit 0
fi
) &
pids+=($!)
while [ "$(running_jobs)" -ge "$max_jobs" ]; do
sleep 1
done
done < <(
git --no-pager grep --no-color -I -P -o \
'(?!.*@lint-ignore)(?<!git\+)(?<!\$\{)https?://(?![^\s<>\")]*[<>\{\}\$])[^[:space:]<>\")\[\]\(\\]+' \
-- '*' \
':(exclude).*' \
':(exclude,glob)**/.*' \
':(exclude,glob)**/*.lock' \
':(exclude,glob)**/*.svg' \
':(exclude,glob)**/*.xml' \
':(exclude,glob)**/*.gradle*' \
':(exclude,glob)**/*gradle*' \
':(exclude,glob)**/third-party/**' \
':(exclude,glob)**/third_party/**' \
| sed -E 's/[^/[:alnum:]]+$//' \
| grep -Ev '://(0\.0\.0\.0|127\.0\.0\.1|localhost)([:/])' \
| grep -Ev 'fwdproxy:8080' \
|| true
)
for pid in "${pids[@]}"; do
wait "$pid" 2>/dev/null || {
case $? in
1) status=1 ;;
127) ;; # ignore "not a child" noise
*) exit $? ;;
esac
}
done
exit $status

41
scripts/lint_xrefs.sh Executable file
View File

@ -0,0 +1,41 @@
#!/bin/bash
set -euo pipefail
status=0
green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; yellow='\e[1;33m'; reset='\e[0m'
last_filepath=
while IFS=: read -r filepath link; do
if [ "$filepath" != "$last_filepath" ]; then
printf '\n%s:\n' "$filepath"
last_filepath=$filepath
fi
if [ -e "$(dirname "$filepath")/${link%%#*}" ]; then
printf " ${green}OK${reset} ${cyan}%s${reset}\n" "$link"
else
printf "${red}FAIL${reset} ${yellow}%s${reset}\n" "$link" >&2
status=1
fi
done < <(
git --no-pager grep --no-color -I -P -o \
'(?!.*@lint-ignore)(?:\[[^]]+\]\([^[:space:])]*/[^[:space:])]*\)|href="[^"]*/[^"]*"|src="[^"]*/[^"]*")' \
-- '*' \
':(exclude).*' \
':(exclude)**/.*' \
':(exclude)**/*.lock' \
':(exclude)**/*.svg' \
':(exclude)**/*.xml' \
':(exclude,glob)**/third-party/**' \
':(exclude,glob)**/third_party/**' \
| grep -Ev 'https?://' \
| sed -E \
-e 's#([^:]+):\[[^]]+\]\(([^)]+)\)#\1:\2#' \
-e 's#([^:]+):href="([^"]+)"#\1:\2#' \
-e 's#([^:]+):src="([^"]+)"#\1:\2#' \
-e 's/[[:punct:]]*$//' \
| grep -Ev '\{\{' \
|| true
)
exit $status