mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 13:44:15 +08:00
Rewrite the guts of torch::jit::Lexer to speed it up (#151850)
The trie-based approach was, apparently, not efficient. This incidentally fixes a bug where "not inp" and "is note" were lexed incorrectly; see test_lexer.cpp update. Differential Revision: [D73129443](https://our.internmc.facebook.com/intern/diff/D73129443/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/151850 Approved by: https://github.com/Skylion007 ghstack dependencies: #151801, #151802, #151803, #151804, #151805, #151806, #151807, #151810, #151849
This commit is contained in:
committed by
PyTorch MergeBot
parent
0f765773e3
commit
47d34261e0
@ -29,7 +29,7 @@ TEST(LexerTest, AllTokens) {
|
||||
TEST(LexerTest, SlightlyOffIsNot) {
|
||||
std::vector<std::string> suffixes = {"", " ", "**"};
|
||||
for (const auto& suffix : suffixes) {
|
||||
std::vector<std::string> extras = {"n", "no", "no3"};
|
||||
std::vector<std::string> extras = {"n", "no", "no3", "note"};
|
||||
for (const auto& extra : extras) {
|
||||
std::string s = "is " + extra + suffix;
|
||||
Lexer l(std::make_shared<Source>(s));
|
||||
@ -45,7 +45,7 @@ TEST(LexerTest, SlightlyOffIsNot) {
|
||||
TEST(LexerTest, SlightlyOffNotIn) {
|
||||
std::vector<std::string> suffixes = {"", " ", "**"};
|
||||
for (const auto& suffix : suffixes) {
|
||||
std::vector<std::string> extras = {"i", "i3"};
|
||||
std::vector<std::string> extras = {"i", "i3", "inn"};
|
||||
for (const auto& extra : extras) {
|
||||
std::string s = "not " + extra + suffix;
|
||||
Lexer l(std::make_shared<Source>(s));
|
||||
@ -57,32 +57,4 @@ TEST(LexerTest, SlightlyOffNotIn) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(LexerTest, IsNoteBug) {
|
||||
// The code string `is note` is lexed as TK_ISNOT followed by a
|
||||
// TK_IDENT that is an e. This is not how it works in Python, but
|
||||
// presumably we need to maintain this behavior.
|
||||
Lexer l(std::make_shared<Source>("is note"));
|
||||
const auto is_not_tok = l.next();
|
||||
EXPECT_EQ(is_not_tok.kind, TK_ISNOT);
|
||||
const auto e_tok = l.next();
|
||||
EXPECT_EQ(e_tok.kind, TK_IDENT);
|
||||
EXPECT_EQ(e_tok.range.text(), "e");
|
||||
const auto eof_tok = l.next();
|
||||
EXPECT_EQ(eof_tok.kind, TK_EOF);
|
||||
}
|
||||
|
||||
TEST(LexerTest, NotInpBug) {
|
||||
// Another manifestation of the above IsNoteBug; `not inp` is lexed
|
||||
// as TK_NOT_IN followed by a TK_IDENT that is a p. Again, not how
|
||||
// it works in Python.
|
||||
Lexer l(std::make_shared<Source>("not inp"));
|
||||
const auto not_in_tok = l.next();
|
||||
EXPECT_EQ(not_in_tok.kind, TK_NOTIN);
|
||||
const auto p_tok = l.next();
|
||||
EXPECT_EQ(p_tok.kind, TK_IDENT);
|
||||
EXPECT_EQ(p_tok.range.text(), "p");
|
||||
const auto eof_tok = l.next();
|
||||
EXPECT_EQ(eof_tok.kind, TK_EOF);
|
||||
}
|
||||
} // namespace torch::jit
|
||||
|
Reference in New Issue
Block a user