mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Do not match empty urls pieces like "https://" Add headers for better handling urls like "https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/data-sheets/amd-instinct-mi300x-data-sheet.pdf" Pull Request resolved: https://github.com/pytorch/pytorch/pull/154838 Approved by: https://github.com/Skylion007
105 lines
3.4 KiB
Bash
Executable File
105 lines
3.4 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
set -euo pipefail
|
|
|
|
trap 'kill 0' SIGINT
|
|
|
|
status=0
|
|
green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; yellow='\e[1;33m'; reset='\e[0m'
|
|
user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
|
|
max_jobs=10
|
|
pids=()
|
|
|
|
running_jobs() {
|
|
jobs -rp | wc -l
|
|
}
|
|
|
|
while IFS=: read -r filepath url; do
|
|
(
|
|
code=$(curl -k -gsLm30 --retry 3 --retry-delay 3 --retry-connrefused -o /dev/null -w "%{http_code}" -I "$url") || code=000
|
|
if [ "$code" -lt 200 ] || [ "$code" -ge 400 ]; then
|
|
sleep 1
|
|
code=$(curl -k -gsLm30 --retry 3 --retry-delay 3 --retry-connrefused -o /dev/null -w "%{http_code}" -r 0-0 -A "$user_agent" -H "Accept-Language: en-US,en" -H "Connection: keep-alive" "$url") || code=000
|
|
fi
|
|
if [ "$code" -lt 200 ] || [ "$code" -ge 400 ]; then
|
|
sleep 1
|
|
request_id=$(curl -sS -G -H 'Accept: application/json' \
|
|
--data-urlencode "host=$url" \
|
|
--data-urlencode "max_nodes=1" \
|
|
--data-urlencode "node=us3.node.check-host.net" \
|
|
https://check-host.net/check-http \
|
|
| jq -r .request_id) || request_id=""
|
|
if [ -n "$request_id" ]; then
|
|
sleep 5
|
|
for _ in {1..5}; do
|
|
new_code=$(curl -sS -H 'Accept: application/json' \
|
|
"https://check-host.net/check-result/$request_id" \
|
|
| jq -r -e '.[][0][3]') || new_code=000
|
|
[[ "$new_code" =~ ^[0-9]+$ ]] || new_code=000
|
|
if [ "$new_code" -ge 200 ] && [ "$new_code" -lt 400 ]; then
|
|
code=$new_code
|
|
break
|
|
fi
|
|
sleep 5
|
|
done
|
|
fi
|
|
fi
|
|
# Treat Cloudflare JS-challenge and rate-limit as success.
|
|
if [[ "$code" == "403" || "$code" == "429" || "$code" == "503" ]]; then
|
|
printf "${yellow}WARN %s${reset} ${cyan}%s${reset} %s\n" "$code" "$url" "$filepath"
|
|
exit 0
|
|
fi
|
|
if [ "$code" -lt 200 ] || [ "$code" -ge 400 ]; then
|
|
printf "${red}FAIL %s${reset} ${yellow}%s${reset} %s\n" "$code" "$url" "$filepath" >&2
|
|
exit 1
|
|
else
|
|
printf "${green} OK %s${reset} ${cyan}%s${reset} %s\n" "$code" "$url" "$filepath"
|
|
exit 0
|
|
fi
|
|
) &
|
|
pids+=($!)
|
|
while [ "$(running_jobs)" -ge "$max_jobs" ]; do
|
|
sleep 1
|
|
done
|
|
done < <(
|
|
pattern='(?!.*@lint-ignore)(?<!git\+)(?<!\$\{)https?://(?![^/]*@)(?![^\s<>\")]*[<>\{\}\$])[[:alnum:]][^[:space:]<>")\[\]\\|]*'
|
|
excludes=(
|
|
':(exclude,glob)**/.*'
|
|
':(exclude,glob)**/*.lock'
|
|
':(exclude,glob)**/*.svg'
|
|
':(exclude,glob)**/*.xml'
|
|
':(exclude,glob)**/*.gradle*'
|
|
':(exclude,glob)**/*gradle*'
|
|
':(exclude,glob)**/third-party/**'
|
|
':(exclude,glob)**/third_party/**'
|
|
)
|
|
if [ $# -eq 2 ]; then
|
|
for filename in $(git diff --name-only --unified=0 "$1...$2"); do
|
|
git diff --unified=0 "$1...$2" -- "$filename" "${excludes[@]}" \
|
|
| grep -E '^\+' \
|
|
| grep -Ev '^\+\+\+' \
|
|
| perl -nle 'print for m#'"$pattern"'#g' \
|
|
| sed 's|^|'"$filename"':|'
|
|
done
|
|
else
|
|
git --no-pager grep --no-color -I -P -o "$pattern" -- . "${excludes[@]}"
|
|
fi \
|
|
| sed -E 's/[^/[:alnum:]]+$//' \
|
|
| grep -Ev '://(0\.0\.0\.0|127\.0\.0\.1|localhost)([:/])' \
|
|
| grep -Ev '://[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' \
|
|
| grep -Ev 'fwdproxy:8080' \
|
|
|| true
|
|
)
|
|
|
|
for pid in "${pids[@]}"; do
|
|
wait "$pid" 2>/dev/null || {
|
|
case $? in
|
|
1) status=1 ;;
|
|
127) ;; # ignore "not a child" noise
|
|
*) exit $? ;;
|
|
esac
|
|
}
|
|
done
|
|
|
|
exit $status
|