mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Translate annotation line numbers from merge to head (#55569)
Summary: This PR - adds a `tools/translate_annotations.py` script that - parses annotations into JSON using the regexes that we were previously passing to [`pytorch/add-annotations-github-action`](https://github.com/pytorch/add-annotations-github-action) and - uses `git diff-index` to translate the line numbers for those annotations from the PR `merge` onto the PR `head`, since (as of https://github.com/pytorch/pytorch/issues/54967) we now run CI on the former instead of the latter; - modifies the `flake8-py3` and `clang-tidy` jobs to use that script and thus upload JSON in their artifacts instead of raw text; and - modifies the "Add annotations" workflow to specify `mode: json` to allow it to use those preprocessed annotations. Depends on https://github.com/pytorch/add-annotations-github-action/pull/18. Pull Request resolved: https://github.com/pytorch/pytorch/pull/55569 Test Plan: You can run the unit tests with this command: ``` python tools/test/test_translate_annotations.py ``` I also tested the entire system together in my personal sandbox repo. Reviewed By: malfet Differential Revision: D27662161 Pulled By: samestep fbshipit-source-id: ecca51b79b9cf00c90fd89f0d41d0c7b89d69c63
This commit is contained in:
committed by
Facebook GitHub Bot
parent
11dd6d3dbb
commit
f3367f917e
12
.github/workflows/add_annotations.yml
vendored
12
.github/workflows/add_annotations.yml
vendored
@ -12,11 +12,9 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- name: flake8-py3
|
||||
regex: '^(?<filename>.*?):(?<lineNumber>\d+):(?<columnNumber>\d+): (?<errorCode>\w+\d+) (?<errorDesc>.*)'
|
||||
- name: clang-tidy
|
||||
regex: '^(?<filename>.*?):(?<lineNumber>\d+):(?<columnNumber>\d+): (?<errorDesc>.*?) \[(?<errorCode>.*)\]'
|
||||
name:
|
||||
- flake8-py3
|
||||
- clang-tidy
|
||||
if: github.event.workflow_run.event == 'pull_request'
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
@ -62,8 +60,8 @@ jobs:
|
||||
uses: pytorch/add-annotations-github-action@master
|
||||
with:
|
||||
check_name: ${{ matrix.name }}
|
||||
linter_output_path: warnings.txt
|
||||
linter_output_path: annotations.json
|
||||
commit_sha: ${{ steps.unzip.outputs.commit-sha }}
|
||||
regex: ${{ matrix.regex }}
|
||||
mode: json
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
32
.github/workflows/lint.yml
vendored
32
.github/workflows/lint.yml
vendored
@ -115,6 +115,8 @@ jobs:
|
||||
architecture: x64
|
||||
- name: Fetch PyTorch
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 2 # to allow us to use github.event.pull_request.head.sha
|
||||
- name: Prepare output dir with HEAD commit SHA
|
||||
run: |
|
||||
mkdir flake8-output
|
||||
@ -125,9 +127,21 @@ jobs:
|
||||
- name: Run flake8
|
||||
run: |
|
||||
set -eux
|
||||
pip install typing-extensions # for tools/translate_annotations.py
|
||||
pip install -r requirements-flake8.txt
|
||||
flake8 --version
|
||||
flake8 | tee "${GITHUB_WORKSPACE}"/flake8-output/warnings.txt
|
||||
flake8 | tee "${GITHUB_WORKSPACE}"/flake8-output.txt
|
||||
cp flake8-output.txt flake8-output/annotations.json
|
||||
- name: Translate annotations
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
tools/translate_annotations.py \
|
||||
--file=flake8-output.txt \
|
||||
--regex='^(?P<filename>.*?):(?P<lineNumber>\d+):(?P<columnNumber>\d+): (?P<errorCode>\w+\d+) (?P<errorDesc>.*)' \
|
||||
--commit="$HEAD_SHA" \
|
||||
> flake8-output/annotations.json
|
||||
env:
|
||||
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
@ -135,7 +149,8 @@ jobs:
|
||||
path: flake8-output/
|
||||
- name: Fail if there were any warnings
|
||||
run: |
|
||||
[ ! -s flake8-output/warnings.txt ]
|
||||
cat flake8-output.txt
|
||||
[ ! -s flake8-output.txt ]
|
||||
|
||||
clang-tidy:
|
||||
if: github.event_name == 'pull_request'
|
||||
@ -148,6 +163,8 @@ jobs:
|
||||
architecture: x64
|
||||
- name: Checkout PyTorch
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0 # to allow tools/clang_tidy.py to do its thing
|
||||
- name: Prepare output dir with HEAD commit SHA
|
||||
run: |
|
||||
mkdir clang-tidy-output
|
||||
@ -225,11 +242,18 @@ jobs:
|
||||
-g"-torch/csrc/deploy/interpreter/interpreter.h" \
|
||||
-g"-torch/csrc/deploy/interpreter/interpreter_impl.h" \
|
||||
-g"-torch/csrc/deploy/interpreter/test_main.cpp" \
|
||||
"$@" > "${GITHUB_WORKSPACE}"/clang-tidy-output/warnings.txt
|
||||
"$@" > "${GITHUB_WORKSPACE}"/clang-tidy-output.txt
|
||||
|
||||
cat "${GITHUB_WORKSPACE}"/clang-tidy-output/warnings.txt
|
||||
cat "${GITHUB_WORKSPACE}"/clang-tidy-output.txt
|
||||
|
||||
tools/translate_annotations.py \
|
||||
--file=clang-tidy-output.txt \
|
||||
--regex='^(?P<filename>.*?):(?P<lineNumber>\d+):(?P<columnNumber>\d+): (?P<errorDesc>.*?) \[(?P<errorCode>.*)\]' \
|
||||
--commit="$HEAD_SHA" \
|
||||
> clang-tidy-output/annotations.json
|
||||
env:
|
||||
BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
|
@ -49,7 +49,9 @@ files =
|
||||
tools/test_history.py,
|
||||
tools/test/test_test_history.py,
|
||||
tools/test/test_trailing_newlines.py,
|
||||
tools/test/test_translate_annotations.py,
|
||||
tools/trailing_newlines.py,
|
||||
tools/translate_annotations.py,
|
||||
torch/testing/_internal/framework_utils.py,
|
||||
torch/utils/benchmark/utils/common.py,
|
||||
torch/utils/benchmark/utils/timer.py,
|
||||
|
@ -60,6 +60,11 @@ Developer tools which you might find useful:
|
||||
stdin, print names of nonempty files whose contents don't end in exactly one
|
||||
trailing newline, exit with status 1 if no output printed or 0 if some
|
||||
filenames were printed.
|
||||
* [translate_annotations.py](translate_annotations.py) - Read [Flake8][] or
|
||||
[clang-tidy][] warnings (according to a `--regex`) from a `--file`, convert
|
||||
to the JSON format accepted by [pytorch/add-annotations-github-action], and
|
||||
translate line numbers from `HEAD` back in time to the given `--commit` by
|
||||
running `git diff-index --unified=0` appropriately.
|
||||
|
||||
Important if you want to run on AMD GPU:
|
||||
|
||||
@ -80,5 +85,8 @@ Tools which are only situationally useful:
|
||||
* [run-clang-tidy-in-ci.sh](run-clang-tidy-in-ci.sh) - Responsible
|
||||
for checking that C++ code is clang-tidy clean in CI on Travis
|
||||
|
||||
[clang-tidy]: https://clang.llvm.org/extra/clang-tidy/
|
||||
[flake8]: https://flake8.pycqa.org/en/latest/
|
||||
[github actions expressions]: https://docs.github.com/en/actions/reference/context-and-expression-syntax-for-github-actions#about-contexts-and-expressions
|
||||
[pytorch/add-annotations-github-action]: https://github.com/pytorch/add-annotations-github-action
|
||||
[shellcheck]: https://github.com/koalaman/shellcheck
|
||||
|
280
tools/test/test_translate_annotations.py
Normal file
280
tools/test/test_translate_annotations.py
Normal file
@ -0,0 +1,280 @@
|
||||
import re
|
||||
import unittest
|
||||
|
||||
from tools.translate_annotations import parse_annotation, parse_diff, translate
|
||||
|
||||
flake8_regex \
|
||||
= r'^(?P<filename>.*?):(?P<lineNumber>\d+):(?P<columnNumber>\d+): (?P<errorCode>\w+\d+) (?P<errorDesc>.*)'
|
||||
clang_tidy_regex \
|
||||
= r'^(?P<filename>.*?):(?P<lineNumber>\d+):(?P<columnNumber>\d+): (?P<errorDesc>.*?) \[(?P<errorCode>.*)\]'
|
||||
|
||||
# in the below example patch, note that the filenames differ, so the
|
||||
# translation should reflect that as well as the line numbers
|
||||
|
||||
# $ git clone -b 1.0.2 https://github.com/cscorley/whatthepatch.git
|
||||
# $ cd whatthepatch/tests/casefiles
|
||||
# $ git diff --no-index --unified=0 lao tzu
|
||||
lao_tzu_diff = '''
|
||||
diff --git a/lao b/tzu
|
||||
index 635ef2c..5af88a8 100644
|
||||
--- a/lao
|
||||
+++ b/tzu
|
||||
@@ -1,2 +0,0 @@
|
||||
-The Way that can be told of is not the eternal Way;
|
||||
-The name that can be named is not the eternal name.
|
||||
@@ -4 +2,2 @@ The Nameless is the origin of Heaven and Earth;
|
||||
-The Named is the mother of all things.
|
||||
+The named is the mother of all things.
|
||||
+
|
||||
@@ -11,0 +11,3 @@ But after they are produced,
|
||||
+They both may be called deep and profound.
|
||||
+Deeper and more profound,
|
||||
+The door of all subtleties!
|
||||
'''.lstrip()
|
||||
|
||||
sparser_diff = '''
|
||||
diff --git a/foo.txt b/bar.txt
|
||||
index 27a6dad..6fae323 100644
|
||||
--- a/foo.txt
|
||||
+++ b/bar.txt
|
||||
@@ -4,3 +4,2 @@ lines
|
||||
-lines
|
||||
-lines
|
||||
-lines
|
||||
+A change!!
|
||||
+Wow
|
||||
@@ -10,2 +8,0 @@ more lines
|
||||
-even more
|
||||
-even more
|
||||
'''.lstrip()
|
||||
|
||||
new_file_diff = '''
|
||||
diff --git a/torch/csrc/jit/tensorexpr/operators/conv2d.h b/torch/csrc/jit/tensorexpr/operators/conv2d.h
|
||||
new file mode 100644
|
||||
index 0000000000..a81eeae346
|
||||
--- /dev/null
|
||||
+++ b/torch/csrc/jit/tensorexpr/operators/conv2d.h
|
||||
@@ -0,0 +1,19 @@
|
||||
+#pragma once
|
||||
+
|
||||
+#include <torch/csrc/jit/tensorexpr/tensor.h>
|
||||
+
|
||||
+namespace torch {
|
||||
+namespace jit {
|
||||
+namespace tensorexpr {
|
||||
+
|
||||
+TORCH_API Tensor* conv2d_depthwise(
|
||||
+ BufHandle input,
|
||||
+ BufHandle weight,
|
||||
+ BufHandle bias,
|
||||
+ int stride,
|
||||
+ int pad,
|
||||
+ int groups);
|
||||
+
|
||||
+} // namespace tensorexpr
|
||||
+} // namespace jit
|
||||
+} // namespace torch
|
||||
'''.lstrip()
|
||||
|
||||
# fun fact, this example fools VS Code's diff syntax highlighter
|
||||
haskell_diff = '''
|
||||
diff --git a/hello.hs b/hello.hs
|
||||
index ffb8d4ad14..0872ac9db6 100644
|
||||
--- a/hello.hs
|
||||
+++ b/hello.hs
|
||||
@@ -1 +1 @@
|
||||
--- a/hello/world/example
|
||||
+main = putStrLn "Hello, world!"
|
||||
'''.lstrip()
|
||||
|
||||
|
||||
class TestTranslateAnnotations(unittest.TestCase):
|
||||
maxDiff = None
|
||||
|
||||
def test_parse_diff_lao_tzu(self) -> None:
|
||||
self.assertEqual(
|
||||
parse_diff(lao_tzu_diff),
|
||||
{
|
||||
'old_filename': 'lao',
|
||||
'hunks': [
|
||||
{
|
||||
'old_start': 1,
|
||||
'old_count': 2,
|
||||
'new_start': 0,
|
||||
'new_count': 0,
|
||||
},
|
||||
{
|
||||
'old_start': 4,
|
||||
'old_count': 1,
|
||||
'new_start': 2,
|
||||
'new_count': 2,
|
||||
},
|
||||
{
|
||||
'old_start': 11,
|
||||
'old_count': 0,
|
||||
'new_start': 11,
|
||||
'new_count': 3,
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
def test_parse_diff_new_file(self) -> None:
|
||||
self.assertEqual(
|
||||
parse_diff(new_file_diff),
|
||||
{
|
||||
'old_filename': None,
|
||||
'hunks': [
|
||||
{
|
||||
'old_start': 0,
|
||||
'old_count': 0,
|
||||
'new_start': 1,
|
||||
'new_count': 19,
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
def test_parse_diff_haskell(self) -> None:
|
||||
self.assertEqual(
|
||||
parse_diff(haskell_diff),
|
||||
{
|
||||
'old_filename': 'hello.hs',
|
||||
'hunks': [
|
||||
{
|
||||
'old_start': 1,
|
||||
'old_count': 1,
|
||||
'new_start': 1,
|
||||
'new_count': 1,
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
def test_translate_lao_tzu(self) -> None:
|
||||
# we'll pretend that this diff represents the file lao being
|
||||
# renamed to tzu and also modified
|
||||
diff = parse_diff(lao_tzu_diff)
|
||||
|
||||
# line numbers less than 1 are invalid so they map to None
|
||||
self.assertEqual(translate(diff, -1), None)
|
||||
self.assertEqual(translate(diff, 0), None)
|
||||
|
||||
# the first two lines of the file were removed, so the first
|
||||
# line of the new version corresponds to the third line of the
|
||||
# original
|
||||
self.assertEqual(translate(diff, 1), 3)
|
||||
|
||||
# the second and third lines of the new file were not present in
|
||||
# the original version, so they map to None
|
||||
self.assertEqual(translate(diff, 2), None)
|
||||
self.assertEqual(translate(diff, 3), None)
|
||||
|
||||
# at this point, we have a stretch of lines that are identical
|
||||
# in both versions of the file, but the original version of the
|
||||
# file had 4 lines before this section whereas the new version
|
||||
# has only 3 lines before this section
|
||||
self.assertEqual(translate(diff, 4), 5)
|
||||
self.assertEqual(translate(diff, 5), 6)
|
||||
self.assertEqual(translate(diff, 6), 7)
|
||||
self.assertEqual(translate(diff, 7), 8)
|
||||
self.assertEqual(translate(diff, 8), 9)
|
||||
self.assertEqual(translate(diff, 9), 10)
|
||||
self.assertEqual(translate(diff, 10), 11)
|
||||
|
||||
# these three lines were added in the new version of the file,
|
||||
# so they map to None
|
||||
self.assertEqual(translate(diff, 11), None)
|
||||
self.assertEqual(translate(diff, 12), None)
|
||||
self.assertEqual(translate(diff, 13), None)
|
||||
|
||||
# the diff doesn't say how long the file is, so we keep mapping
|
||||
# line numbers back; since we can look back at the original
|
||||
# files, though, we can see that the original is two lines
|
||||
# shorter than the new version, which explains why we are
|
||||
# subtracting 2 here
|
||||
self.assertEqual(translate(diff, 14), 12)
|
||||
self.assertEqual(translate(diff, 15), 13)
|
||||
|
||||
def test_translate_empty(self) -> None:
|
||||
diff = parse_diff('--- a/foo')
|
||||
|
||||
# again, we start numbering at 1
|
||||
self.assertEqual(translate(diff, -1), None)
|
||||
self.assertEqual(translate(diff, 0), None)
|
||||
|
||||
# this diff says there are no changes, so all line numbers
|
||||
# greater than zero map to themselves
|
||||
self.assertEqual(translate(diff, 1), 1)
|
||||
self.assertEqual(translate(diff, 2), 2)
|
||||
self.assertEqual(translate(diff, 3), 3)
|
||||
self.assertEqual(translate(diff, 4), 4)
|
||||
self.assertEqual(translate(diff, 5), 5)
|
||||
|
||||
def test_translate_sparser(self) -> None:
|
||||
diff = parse_diff(sparser_diff)
|
||||
|
||||
# again, we start numbering at 1
|
||||
self.assertEqual(translate(diff, -1), None)
|
||||
self.assertEqual(translate(diff, 0), None)
|
||||
|
||||
# the first three lines are unchanged
|
||||
self.assertEqual(translate(diff, 1), 1)
|
||||
self.assertEqual(translate(diff, 2), 2)
|
||||
self.assertEqual(translate(diff, 3), 3)
|
||||
|
||||
# we removed three lines here and added two, so the two lines we
|
||||
# added don't map back to anything in the original file
|
||||
self.assertEqual(translate(diff, 4), None)
|
||||
self.assertEqual(translate(diff, 5), None)
|
||||
|
||||
# we have some unchanged lines here, but in the preceding hunk
|
||||
# we removed 3 and added only 2, so we have an offset of 1
|
||||
self.assertEqual(translate(diff, 6), 7)
|
||||
self.assertEqual(translate(diff, 7), 8)
|
||||
|
||||
# since the unified diff format essentially subtracts 1 from the
|
||||
# starting line number when the count is 0, and since we use
|
||||
# bisect.bisect_right to decide which hunk to look at, an
|
||||
# earlier version of translate had a bug that caused it to get
|
||||
# confused because it would look at the second hunk (which lists
|
||||
# 8 as its start line number) rather than the first hunk
|
||||
self.assertEqual(translate(diff, 8), 9)
|
||||
|
||||
# after the two lines that we removed in the second hunk, we've
|
||||
# reduced the total length of the file by 3 lines, so once we
|
||||
# reach the end of the diff, we just add 3 to every line number
|
||||
self.assertEqual(translate(diff, 9), 12)
|
||||
self.assertEqual(translate(diff, 10), 13)
|
||||
self.assertEqual(translate(diff, 11), 14)
|
||||
self.assertEqual(translate(diff, 12), 15)
|
||||
|
||||
def test_parse_annotation_flake8(self) -> None:
|
||||
regex = re.compile(flake8_regex)
|
||||
self.assertEqual(
|
||||
parse_annotation(regex, 'README.md:1:3: R100 make a better title'),
|
||||
{
|
||||
'filename': 'README.md',
|
||||
'lineNumber': 1,
|
||||
'columnNumber': 3,
|
||||
'errorCode': 'R100',
|
||||
'errorDesc': 'make a better title',
|
||||
},
|
||||
)
|
||||
|
||||
def test_parse_annotation_clang_tidy(self) -> None:
|
||||
regex = re.compile(clang_tidy_regex)
|
||||
self.assertEqual(
|
||||
parse_annotation(regex, 'README.md:2:1: improve description [R200]'),
|
||||
{
|
||||
'filename': 'README.md',
|
||||
'lineNumber': 2,
|
||||
'columnNumber': 1,
|
||||
'errorCode': 'R200',
|
||||
'errorDesc': 'improve description',
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
181
tools/translate_annotations.py
Executable file
181
tools/translate_annotations.py
Executable file
@ -0,0 +1,181 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
from bisect import bisect_right
|
||||
from collections import defaultdict
|
||||
from typing import (Callable, DefaultDict, Generic, List, Optional, Pattern,
|
||||
Sequence, TypeVar, cast)
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
|
||||
class Hunk(TypedDict):
|
||||
old_start: int
|
||||
old_count: int
|
||||
new_start: int
|
||||
new_count: int
|
||||
|
||||
|
||||
class Diff(TypedDict):
|
||||
old_filename: Optional[str]
|
||||
hunks: List[Hunk]
|
||||
|
||||
|
||||
# adapted from the similar regex in tools/clang_tidy.py
|
||||
# @@ -start,count +start,count @@
|
||||
hunk_pattern = r'^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@'
|
||||
|
||||
|
||||
def parse_diff(diff: str) -> Diff:
|
||||
name = None
|
||||
name_found = False
|
||||
hunks: List[Hunk] = []
|
||||
for line in diff.splitlines():
|
||||
hunk_match = re.match(hunk_pattern, line)
|
||||
if name_found:
|
||||
if hunk_match:
|
||||
old_start, old_count, new_start, new_count = hunk_match.groups()
|
||||
hunks.append({
|
||||
'old_start': int(old_start),
|
||||
'old_count': int(old_count or '1'),
|
||||
'new_start': int(new_start),
|
||||
'new_count': int(new_count or '1'),
|
||||
})
|
||||
else:
|
||||
assert not hunk_match
|
||||
name_match = re.match(r'^--- (?:(?:/dev/null)|(?:a/(.*)))$', line)
|
||||
if name_match:
|
||||
name_found = True
|
||||
name, = name_match.groups()
|
||||
return {
|
||||
'old_filename': name,
|
||||
'hunks': hunks,
|
||||
}
|
||||
|
||||
|
||||
T = TypeVar('T')
|
||||
U = TypeVar('U')
|
||||
|
||||
|
||||
# we want to use bisect.bisect_right to find the closest hunk to a given
|
||||
# line number, but the bisect module won't have a key function until
|
||||
# Python 3.10 https://github.com/python/cpython/pull/20556 so we make an
|
||||
# O(1) wrapper around the list of hunks that makes it pretend to just be
|
||||
# a list of line numbers
|
||||
# https://gist.github.com/ericremoreynolds/2d80300dabc70eebc790
|
||||
class KeyifyList(Generic[T, U]):
|
||||
def __init__(self, inner: List[T], key: Callable[[T], U]) -> None:
|
||||
self.inner = inner
|
||||
self.key = key
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.inner)
|
||||
|
||||
def __getitem__(self, k: int) -> U:
|
||||
return self.key(self.inner[k])
|
||||
|
||||
|
||||
def translate(diff: Diff, line_number: int) -> Optional[int]:
|
||||
if line_number < 1:
|
||||
return None
|
||||
|
||||
hunks = diff['hunks']
|
||||
if not hunks:
|
||||
return line_number
|
||||
|
||||
keyified = KeyifyList(
|
||||
hunks,
|
||||
lambda hunk: hunk['new_start'] + (0 if hunk['new_count'] > 0 else 1)
|
||||
)
|
||||
i = bisect_right(cast(Sequence[int], keyified), line_number)
|
||||
if i < 1:
|
||||
return line_number
|
||||
|
||||
hunk = hunks[i - 1]
|
||||
d = line_number - (hunk['new_start'] + (hunk['new_count'] or 1))
|
||||
return None if d < 0 else hunk['old_start'] + (hunk['old_count'] or 1) + d
|
||||
|
||||
|
||||
# we use camelCase here because this will be output as JSON and so the
|
||||
# field names need to match the group names from here:
|
||||
# https://github.com/pytorch/add-annotations-github-action/blob/3ab7d7345209f5299d53303f7aaca7d3bc09e250/action.yml#L23
|
||||
class Annotation(TypedDict):
|
||||
filename: str
|
||||
lineNumber: int
|
||||
columnNumber: int
|
||||
errorCode: str
|
||||
errorDesc: str
|
||||
|
||||
|
||||
def parse_annotation(regex: Pattern[str], line: str) -> Optional[Annotation]:
|
||||
m = re.match(regex, line)
|
||||
if m:
|
||||
try:
|
||||
line_number = int(m.group('lineNumber'))
|
||||
column_number = int(m.group('columnNumber'))
|
||||
except ValueError:
|
||||
return None
|
||||
return {
|
||||
'filename': m.group('filename'),
|
||||
'lineNumber': line_number,
|
||||
'columnNumber': column_number,
|
||||
'errorCode': m.group('errorCode'),
|
||||
'errorDesc': m.group('errorDesc'),
|
||||
}
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def translate_all(
|
||||
*,
|
||||
lines: List[str],
|
||||
regex: Pattern[str],
|
||||
commit: str
|
||||
) -> List[Annotation]:
|
||||
ann_dict: DefaultDict[str, List[Annotation]] = defaultdict(list)
|
||||
for line in lines:
|
||||
annotation = parse_annotation(regex, line)
|
||||
if annotation is not None:
|
||||
ann_dict[annotation['filename']].append(annotation)
|
||||
ann_list = []
|
||||
for filename, annotations in ann_dict.items():
|
||||
raw_diff = subprocess.check_output(
|
||||
['git', 'diff-index', '--unified=0', commit, filename],
|
||||
encoding='utf-8',
|
||||
)
|
||||
diff = parse_diff(raw_diff) if raw_diff.strip() else None
|
||||
# if there is a diff but it doesn't list an old filename, that
|
||||
# means the file is absent in the commit we're targeting, so we
|
||||
# skip it
|
||||
if not (diff and not diff['old_filename']):
|
||||
for annotation in annotations:
|
||||
line_number: Optional[int] = annotation['lineNumber']
|
||||
if diff:
|
||||
annotation['filename'] = cast(str, diff['old_filename'])
|
||||
line_number = translate(diff, cast(int, line_number))
|
||||
if line_number:
|
||||
annotation['lineNumber'] = line_number
|
||||
ann_list.append(annotation)
|
||||
return ann_list
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--file')
|
||||
parser.add_argument('--regex')
|
||||
parser.add_argument('--commit')
|
||||
args = parser.parse_args()
|
||||
with open(args.file, 'r') as f:
|
||||
lines = f.readlines()
|
||||
print(json.dumps(translate_all(
|
||||
lines=lines,
|
||||
regex=args.regex,
|
||||
commit=args.commit
|
||||
)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Reference in New Issue
Block a user