Files
pytorch/torch/csrc/profiler/unwind/fde.h
Yuanyuan Chen 9fff8155c3 [2/N] Fix clang-tidy readability checks (#164652)
This PR applies clang-tidy readability checks to jit sources and all headers in the code base.
`readability-redundant-inline-specifier` is suppressed because it incurs too many changes. `readability-redundant-inline-specifier` is used to detect redundant inline specifiers on function and variable declarations. There are many in-class method definitions that are marked inline.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/164652
Approved by: https://github.com/Skylion007
2025-10-06 01:06:01 +00:00

412 lines
12 KiB
C++

#pragma once
#include <c10/util/Exception.h>
#include <c10/util/irange.h>
#include <torch/csrc/profiler/unwind/action.h>
#include <torch/csrc/profiler/unwind/lexer.h>
#include <array>
#include <iostream>
#include <sstream>
#include <vector>
namespace torch::unwind {
struct TableState {
Action cfa;
std::array<Action, D_REG_SIZE> registers;
friend std::ostream& operator<<(std::ostream& out, const TableState& self) {
out << "cfa = " << self.cfa << "; ";
for (auto r : c10::irange(self.registers.size())) {
if (self.registers.at(r).kind != A_UNDEFINED) {
out << "r" << r << " = " << self.registers.at(r) << "; ";
}
}
return out;
}
};
// FDE - Frame Description Entry (Concept in ELF spec)
// This format is explained well by
// https://www.airs.com/blog/archives/460
// Details of different dwarf actions are explained
// in the spec document:
// https://web.archive.org/web/20221129184704/https://dwarfstd.org/doc/DWARF4.doc
// An overview of how DWARF unwinding works is given in
// https://dl.acm.org/doi/pdf/10.1145/3360572
// A similar implementation written in rust is:
// https://github.com/mstange/framehop/
template <bool LOG = false>
struct FDE {
FDE(void* data, const char* library_name, uint64_t load_bias)
: library_name_(library_name), load_bias_(load_bias) {
Lexer L(data);
auto length = L.read4or8Length();
void* fde_start = L.loc();
// NOLINTNEXTLINE(performance-no-int-to-ptr)
void* cie_data = (void*)((int64_t)fde_start - L.read<uint32_t>());
Lexer LC(cie_data);
auto cie_length = LC.read4or8Length();
void* cie_start = LC.loc();
auto zero = LC.read<uint32_t>();
TORCH_INTERNAL_ASSERT(zero == 0, "expected 0 for CIE");
auto version = LC.read<uint8_t>();
TORCH_INTERNAL_ASSERT(
version == 1 || version == 3, "non-1 version for CIE");
augmentation_string_ = LC.readCString();
if (hasAugmentation("eh")) {
throw UnwindError("unsupported 'eh' augmentation string");
}
code_alignment_factor_ = static_cast<int64_t>(LC.readULEB128());
data_alignment_factor_ = LC.readSLEB128();
if (version == 1) {
ra_register_ = LC.read<uint8_t>();
} else {
ra_register_ = static_cast<int64_t>(LC.readULEB128());
}
// we assume this in the state
TORCH_INTERNAL_ASSERT(ra_register_ == 16, "unexpected number of registers");
if (augmentation_string_ && *augmentation_string_ == 'z') {
augmentation_length_ = static_cast<int64_t>(LC.readULEB128());
Lexer A(LC.loc());
for (auto ap = augmentation_string_ + 1; *ap; ap++) {
switch (*ap) {
case 'L':
lsda_enc = A.read<uint8_t>();
break;
case 'R':
fde_enc = A.read<uint8_t>();
break;
case 'P': {
uint8_t personality_enc = A.read<uint8_t>();
A.readEncoded(personality_enc);
} break;
case 'S': {
// signal handler
} break;
default: {
throw UnwindError("unknown augmentation string");
} break;
}
}
}
LC.skip(augmentation_length_);
low_pc_ = L.readEncoded(fde_enc);
high_pc_ = low_pc_ + L.readEncodedValue(fde_enc);
if (hasAugmentation("z")) {
augmentation_length_fde_ = static_cast<int64_t>(L.readULEB128());
}
L.readEncodedOr(lsda_enc, 0);
cie_begin_ = LC.loc();
fde_begin_ = L.loc();
cie_end_ = (void*)((const char*)cie_start + cie_length);
fde_end_ = (void*)((const char*)fde_start + length);
}
// OP Code implementations
void advance_raw(int64_t amount) {
auto previous_pc = current_pc_;
current_pc_ += amount;
if (LOG) {
(*out_) << (void*)(previous_pc - load_bias_) << "-"
<< (void*)(current_pc_ - load_bias_) << ": " << state() << "\n";
}
}
void advance_loc(int64_t amount) {
if (LOG) {
(*out_) << "advance_loc " << amount << "\n";
}
advance_raw(amount * code_alignment_factor_);
}
void offset(int64_t reg, int64_t offset) {
if (LOG) {
(*out_) << "offset " << reg << " " << offset << "\n";
}
if (reg > (int64_t)state().registers.size()) {
if (LOG) {
(*out_) << "OFFSET OF BIG REGISTER " << reg << "ignored...\n";
}
return;
}
state().registers.at(reg) =
Action{A_LOAD_CFA_OFFSET, -1, offset * data_alignment_factor_};
}
void restore(int64_t reg) {
if (LOG) {
(*out_) << "restore " << reg << "\n";
}
if (reg > (int64_t)state().registers.size()) {
if (LOG) {
(*out_) << "RESTORE OF BIG REGISTER " << reg << "ignored...\n";
}
return;
}
state().registers.at(reg) = initial_state_.registers.at(reg);
}
void def_cfa(int64_t reg, int64_t off) {
if (LOG) {
(*out_) << "def_cfa " << reg << " " << off << "\n";
}
last_reg_ = reg;
last_offset_ = off;
state().cfa = Action::regPlusData(static_cast<int32_t>(reg), off);
}
void def_cfa_register(int64_t reg) {
def_cfa(reg, last_offset_);
}
void def_cfa_offset(int64_t off) {
def_cfa(last_reg_, off);
}
void remember_state() {
if (LOG) {
(*out_) << "remember_state\n";
}
state_stack_.push_back(state());
}
void restore_state() {
if (LOG) {
(*out_) << "restore_state\n";
}
state_stack_.pop_back();
}
void undefined(int64_t reg) {
if (LOG) {
(*out_) << "undefined " << reg << "\n";
}
state().registers.at(reg) = Action::undefined();
}
void register_(int64_t reg, int64_t rhs_reg) {
if (LOG) {
(*out_) << "register " << reg << " " << rhs_reg << "\n";
}
state().registers.at(reg) =
Action::regPlusData(static_cast<int32_t>(reg), 0);
}
TableState& state() {
return state_stack_.back();
}
void dump(std::ostream& out) {
out_ = &out;
out << "FDE(augmentation_string=" << augmentation_string_
<< ", low_pc=" << (void*)(low_pc_ - load_bias_)
<< ",high_pc=" << (void*)(high_pc_ - load_bias_)
<< ",code_alignment_factor=" << code_alignment_factor_
<< ", data_alignment_factor=" << data_alignment_factor_
<< ", ra_register_=" << ra_register_ << ")\n";
readUpTo(high_pc_);
out_ = &std::cout;
}
TableState readUpTo(uint64_t addr) {
if (addr < low_pc_ || addr > high_pc_) {
throw UnwindError("Address not in range");
}
if (LOG) {
// NOLINTNEXTLINE(performance-no-int-to-ptr)
(*out_) << "readUpTo " << (void*)addr << " for " << library_name_
<< " at " << (void*)load_bias_ << "\n";
}
state_stack_.emplace_back();
current_pc_ = low_pc_;
// parse instructions...
Lexer LC(cie_begin_);
while (LC.loc() < cie_end_ && current_pc_ <= addr) {
readInstruction(LC);
}
if (current_pc_ > addr) {
return state();
}
initial_state_ = state_stack_.back();
if (LOG) {
(*out_) << "--\n";
}
Lexer L(fde_begin_);
while (L.loc() < fde_end_ && current_pc_ <= addr) {
readInstruction(L);
}
// so that we print the full range in debugging
if (current_pc_ <= addr) {
advance_raw(addr - current_pc_);
}
return state();
}
void dumpAddr2Line() {
std::cout << "addr2line -f -e " << library_name_ << " "
<< (void*)(low_pc_ - load_bias_) << "\n";
}
void readInstruction(Lexer& L) {
uint8_t bc = L.read<uint8_t>();
auto op = bc >> 6;
auto lowbits = bc & 0x3F;
switch (op) {
case 0x0: {
switch (lowbits) {
case DW_CFA_nop: {
return; // nop
}
case DW_CFA_advance_loc1: {
auto delta = L.read<uint8_t>();
return advance_loc(delta);
}
case DW_CFA_advance_loc2: {
auto delta = L.read<uint16_t>();
return advance_loc(delta);
}
case DW_CFA_advance_loc4: {
auto delta = L.read<uint32_t>();
return advance_loc(delta);
}
case DW_CFA_restore_extended: {
auto reg = L.readULEB128();
return restore(reg);
}
case DW_CFA_undefined: {
auto reg = L.readULEB128();
return undefined(reg);
}
case DW_CFA_register: {
auto reg = L.readULEB128();
auto rhs_reg = L.readULEB128();
return register_(reg, rhs_reg);
}
case DW_CFA_def_cfa: {
auto reg = L.readULEB128();
auto off = L.readULEB128();
return def_cfa(reg, off);
}
case DW_CFA_def_cfa_register: {
auto reg = L.readULEB128();
return def_cfa_register(reg);
}
case DW_CFA_def_cfa_offset: {
auto off = L.readULEB128();
return def_cfa_offset(off);
}
case DW_CFA_offset_extended_sf: {
auto reg = L.readULEB128();
auto off = L.readSLEB128();
return offset(reg, off);
}
case DW_CFA_remember_state: {
return remember_state();
}
case DW_CFA_restore_state: {
return restore_state();
}
case DW_CFA_GNU_args_size: {
// GNU_args_size, we do not need to know it..
L.readULEB128();
return;
}
case DW_CFA_expression: {
auto reg = L.readULEB128();
auto len = L.readULEB128();
// NOLINTNEXTLINE(performance-no-int-to-ptr)
auto end = (void*)((uint64_t)L.loc() + len);
auto op = L.read<uint8_t>();
if ((op & 0xF0) == 0x70) { // DW_bregX
auto rhs_reg = (op & 0xF);
auto addend = L.readSLEB128();
if (L.loc() == end) {
state().registers.at(reg) =
Action::regPlusDataDeref(rhs_reg, addend);
return;
}
}
throw UnwindError("Unsupported dwarf expression");
}
case DW_CFA_def_cfa_expression: {
auto len = L.readULEB128();
// NOLINTNEXTLINE(performance-no-int-to-ptr)
auto end = (void*)((uint64_t)L.loc() + len);
auto op = L.read<uint8_t>();
if ((op & 0xF0) == 0x70) { // DW_bregX
auto rhs_reg = (op & 0xF);
auto addend = L.readSLEB128();
if (L.loc() != end) {
auto op2 = L.read<uint8_t>();
if (op2 == DW_OP_deref && L.loc() == end) { // deref
state().cfa = Action::regPlusDataDeref(rhs_reg, addend);
return;
}
}
}
throw UnwindError("Unsupported def_cfa dwarf expression");
}
default: {
std::stringstream ss;
// NOLINTNEXTLINE(performance-no-int-to-ptr)
ss << "unknown op code " << (void*)(uint64_t)lowbits;
throw UnwindError(ss.str());
}
}
}
case DW_CFA_advance_loc: {
return advance_loc(lowbits);
}
case DW_CFA_offset: {
auto off = L.readULEB128();
return offset(lowbits, off);
}
case DW_CFA_restore: {
return restore(lowbits);
}
}
}
// used for debug printing
const char* library_name_;
uint64_t load_bias_;
// parsed from the eh_string data structures:
const char* augmentation_string_ = nullptr;
int64_t augmentation_length_ = 0;
int64_t augmentation_length_fde_ = 0;
int64_t code_alignment_factor_;
int64_t data_alignment_factor_;
void* cie_data_{nullptr};
int64_t ra_register_;
uint8_t lsda_enc = DW_EH_PE_omit;
uint8_t fde_enc = DW_EH_PE_absptr;
uint64_t low_pc_ = UINT64_MAX;
uint64_t high_pc_ = UINT64_MAX;
void* cie_begin_;
void* fde_begin_;
void* cie_end_;
void* fde_end_;
// state accumulated while parsing instructions
int64_t last_reg_ = 0;
int64_t last_offset_ = 0;
uint64_t current_pc_ = 0;
TableState
initial_state_; // state after the initial instructions, used by restore
std::vector<TableState> state_stack_;
std::ostream* out_ = &std::cout; // for debug dumping
private:
bool hasAugmentation(const char* s) {
return strstr(augmentation_string_, s) != nullptr;
}
};
} // namespace torch::unwind