object-introspection/oi/SymbolService.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

916 lines
27 KiB
C++
Raw Normal View History

2022-12-19 14:37:51 +00:00
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2023-04-26 16:20:53 +01:00
#include "oi/SymbolService.h"
2022-12-19 14:37:51 +00:00
#include <glog/logging.h>
#include <algorithm>
#include <boost/scope_exit.hpp>
#include <cassert>
#include <cstring>
#include <fstream>
2023-04-26 16:20:53 +01:00
#include "oi/DrgnUtils.h"
#include "oi/OIParser.h"
2022-12-19 14:37:51 +00:00
extern "C" {
#include <elfutils/known-dwarf.h>
#include <elfutils/libdwfl.h>
#include "drgn.h"
#include "dwarf.h"
}
namespace fs = std::filesystem;
namespace oi::detail {
2023-06-06 16:13:51 +01:00
template <typename... Ts>
struct visitor : Ts... {
using Ts::operator()...;
};
// Type deduction for the helper above
template <typename... Ts>
visitor(Ts...) -> visitor<Ts...>;
2022-12-19 14:37:51 +00:00
static bool LoadExecutableAddressRange(
pid_t pid, std::vector<std::pair<uint64_t, uint64_t>>& exeAddrs) {
2022-12-19 14:37:51 +00:00
std::ifstream f("/proc/" + std::to_string(pid) + "/maps");
if (f.is_open()) {
std::string line;
uint64_t start = 0;
uint64_t end = 0;
uint64_t offset = 0;
uint64_t inode = 0;
uint dmajor = 0;
uint dminor = 0;
int nread = -1;
constexpr int permissionsLen = 4;
char perm[permissionsLen + 1];
while (std::getline(f, line)) {
if (sscanf(line.c_str(),
"%" PRIx64 "-%" PRIx64 " %s %" PRIx64 " %x:%x %" PRIu64 " %n",
&start,
&end,
perm,
&offset,
&dmajor,
&dminor,
&inode,
2022-12-19 14:37:51 +00:00
&nread) < 7 ||
nread <= 0) {
return false;
}
if (strlen(perm) != permissionsLen) {
return false;
}
if (perm[2] == 'x') {
exeAddrs.emplace_back(start, end);
}
}
}
return true;
}
#undef PREMISSIONS_LEN
static bool isExecutableAddr(
uint64_t addr, const std::vector<std::pair<uint64_t, uint64_t>>& exeAddrs) {
2022-12-19 14:37:51 +00:00
assert(std::is_sorted(begin(exeAddrs), end(exeAddrs)));
// Find the smallest exeAddrs range where addr < range.end
auto it = std::upper_bound(
begin(exeAddrs),
end(exeAddrs),
std::make_pair(addr, addr),
[](const auto& r1, const auto& r2) { return r1.second < r2.second; });
2022-12-19 14:37:51 +00:00
return it != end(exeAddrs) && addr >= it->first;
}
SymbolService::SymbolService(pid_t pid) : target{pid} {
// Update target processes memory map
LoadExecutableAddressRange(pid, executableAddrs);
if (!loadModules()) {
throw std::runtime_error("Failed to load modules for process " +
std::to_string(pid));
}
}
2022-12-19 14:37:51 +00:00
SymbolService::SymbolService(fs::path executablePath)
: target{std::move(executablePath)} {
if (!loadModules()) {
throw std::runtime_error("Failed to load modules for executable " +
executablePath.string());
2022-12-19 14:37:51 +00:00
}
}
SymbolService::~SymbolService() {
if (dwfl != nullptr) {
dwfl_end(dwfl);
}
2022-12-19 14:37:51 +00:00
if (prog != nullptr) {
drgn_program_destroy(prog);
}
}
struct ModParams {
std::string_view symName;
GElf_Sym sym;
2022-12-19 14:37:51 +00:00
GElf_Addr value;
std::vector<std::pair<uint64_t, uint64_t>>& exeAddrs;
bool demangle;
2022-12-19 14:37:51 +00:00
};
/**
* Callback for dwfl_getmodules(). For the provided module we iterate
* through its symbol table and look for the given symbol. Values
* are passed in and out via the 'arg' parameter.
*
* @param[in] arg[0] - The symbol to locate.
* @param[out] arg[1] - Symbol information if found.
* @param[out] arg[2] - Address of the symbol if found.
*
*/
static int moduleCallback(Dwfl_Module* mod,
void** /* userData */,
const char* name,
Dwarf_Addr /* start */,
void* arg) {
ModParams* m = (ModParams*)arg;
2022-12-19 14:37:51 +00:00
int nsym = dwfl_module_getsymtab(mod);
VLOG(1) << "mod name: " << name << " "
<< "nsyms " << nsym;
// FIXME: There's surely a better way to distinguish debuginfo modules from
// actual code modules.
char debugSuffix[] = ".debuginfo";
size_t debugSuffixLen = sizeof(debugSuffix) - 1;
size_t nameLen = strlen(name);
if (debugSuffixLen <= nameLen) {
if (strncmp(name + nameLen - debugSuffixLen, debugSuffix, debugSuffixLen) ==
0) {
VLOG(1) << "Skipping debuginfo module";
m->value = 0;
return DWARF_CB_OK;
}
}
/* I think the first entry is always UNDEF */
for (int i = 1; i < nsym; ++i) {
Elf* elf = nullptr;
2022-12-19 14:37:51 +00:00
GElf_Word shndxp = 0;
const char* lookupResult = dwfl_module_getsym_info(
mod, i, &m->sym, &m->value, &shndxp, &elf, nullptr);
if (lookupResult == nullptr || lookupResult[0] == '\0') {
2022-12-19 14:37:51 +00:00
continue;
}
std::string symName = lookupResult;
if (m->demangle) {
symName = boost::core::demangle(symName.c_str());
}
2022-12-19 14:37:51 +00:00
switch
GELF_ST_TYPE(m->sym.st_info) {
2022-12-19 14:37:51 +00:00
case STT_SECTION:
case STT_FILE:
case STT_TLS:
case STT_NOTYPE:
break;
case STT_OBJECT:
if (shndxp != SHN_UNDEF && symName == m->symName) {
VLOG(1) << "Symbol lookup successful for " << symName
<< " in module " << name;
2022-12-19 14:37:51 +00:00
return DWARF_CB_ABORT;
}
break;
default:
/*
* I don't understand why the only symbol that is presented
* to us here has NOTYPE yet readelf shows me it is defined
* as an STT_FUNC. Confused...
*/
if (shndxp != SHN_UNDEF && symName == m->symName &&
2022-12-19 14:37:51 +00:00
isExecutableAddr(m->value, m->exeAddrs)) {
VLOG(1) << "Symbol lookup successful for " << symName
<< " in module " << name;
2022-12-19 14:37:51 +00:00
return DWARF_CB_ABORT;
}
break;
}
}
// Set m->value to 0 if symbol is not found
m->value = 0;
return DWARF_CB_OK;
}
/* Load modules from a live process */
2023-05-19 16:18:04 +01:00
bool SymbolService::loadModulesFromPid(pid_t targetPid) {
if (int err = dwfl_linux_proc_report(dwfl, targetPid)) {
LOG(ERROR) << "dwfl_linux_proc_report: " << dwfl_errmsg(err);
return false;
}
return true;
}
/* Load modules from an ELF binary */
2023-05-19 16:18:04 +01:00
bool SymbolService::loadModulesFromPath(const fs::path& targetPath) {
auto* mod =
dwfl_report_offline(dwfl, targetPath.c_str(), targetPath.c_str(), -1);
if (mod == nullptr) {
LOG(ERROR) << "dwfl_report_offline: " << dwfl_errmsg(dwfl_errno());
return false;
}
Dwarf_Addr start = 0;
Dwarf_Addr end = 0;
if (dwfl_module_info(
mod, nullptr, &start, &end, nullptr, nullptr, nullptr, nullptr) ==
nullptr) {
LOG(ERROR) << "dwfl_module_info: " << dwfl_errmsg(dwfl_errno());
return false;
}
2023-05-19 16:18:04 +01:00
VLOG(1) << "Module info for " << targetPath << ": start= " << std::hex
<< start << ", end=" << end;
// Add module's boundary to executableAddrs
executableAddrs = {{start, end}};
return true;
}
bool SymbolService::loadModules() {
static char* debuginfo_path;
2022-12-19 14:37:51 +00:00
static const Dwfl_Callbacks proc_callbacks{
.find_elf = dwfl_linux_proc_find_elf,
.find_debuginfo = dwfl_standard_find_debuginfo,
.section_address = dwfl_offline_section_address,
.debuginfo_path = &debuginfo_path,
};
dwfl = dwfl_begin(&proc_callbacks);
2022-12-19 14:37:51 +00:00
if (dwfl == nullptr) {
LOG(ERROR) << "dwfl_begin: " << dwfl_errmsg(dwfl_errno());
return false;
2022-12-19 14:37:51 +00:00
}
dwfl_report_begin(dwfl);
2022-12-19 14:37:51 +00:00
bool ok = std::visit(
2023-05-19 16:18:04 +01:00
visitor{[this](pid_t targetPid) { return loadModulesFromPid(targetPid); },
[this](const fs::path& targetPath) {
return loadModulesFromPath(targetPath);
}},
target);
2022-12-19 14:37:51 +00:00
if (!ok) {
// The loadModules* function above already logged the error message
return false;
2022-12-19 14:37:51 +00:00
}
if (dwfl_report_end(dwfl, nullptr, nullptr) != 0) {
LOG(ERROR) << "dwfl_report_end: " << dwfl_errmsg(-1);
return false;
2022-12-19 14:37:51 +00:00
}
return true;
}
2023-08-16 20:40:36 +01:00
std::optional<drgn_qualified_type> SymbolService::findTypeOfSymbol(
drgn_program* prog, const std::string& symbolName) {
drgn_symbol* sym;
if (auto* err =
drgn_program_find_symbol_by_name(prog, symbolName.c_str(), &sym);
err != nullptr) {
LOG(ERROR) << "Failed to lookup symbol '" << symbolName
<< "': " << err->code << " " << err->message;
drgn_error_destroy(err);
return std::nullopt;
}
uint64_t addr = drgn_symbol_address(sym);
drgn_symbol_destroy(sym);
2023-08-16 20:40:36 +01:00
if (auto t = findTypeOfAddr(prog, addr)) {
return t;
} else {
LOG(ERROR) << "Failed to lookup symbol '" << symbolName;
return std::nullopt;
}
}
std::optional<drgn_qualified_type> SymbolService::findTypeOfAddr(
drgn_program* prog, uintptr_t addr) {
drgn_object obj;
drgn_object_init(&obj, prog);
const char* name;
if (auto* err =
drgn_program_find_function_by_address(prog, addr, &name, &obj);
err != nullptr) {
2023-08-16 20:40:36 +01:00
LOG(ERROR) << "Failed to lookup function '" << reinterpret_cast<void*>(addr)
<< "': " << err->code << " " << err->message;
drgn_error_destroy(err);
return std::nullopt;
}
auto type = drgn_object_qualified_type(&obj);
drgn_object_deinit(&obj);
return type;
}
/**
* Resolve a symbol to its location in the target ELF binary.
*
* @param[in] symName - symbol to resolve
* @return - A std::optional with the symbol's information
*/
std::optional<SymbolInfo> SymbolService::locateSymbol(
const std::string& symName, bool demangle) {
ModParams m = {.symName = symName,
.sym = {},
.value = 0,
.exeAddrs = executableAddrs,
.demangle = demangle};
2022-12-19 14:37:51 +00:00
dwfl_getmodules(dwfl, moduleCallback, (void*)&m, 0);
2022-12-19 14:37:51 +00:00
if (m.value == 0) {
return std::nullopt;
}
return SymbolInfo{m.value, m.sym.st_size};
2022-12-19 14:37:51 +00:00
}
static std::string bytesToHexString(const unsigned char* bytes, int nbbytes) {
2022-12-19 14:37:51 +00:00
static const char characters[] = "0123456789abcdef";
std::string ret(nbbytes * 2, 0);
for (int i = 0; i < nbbytes; ++i) {
ret[2 * i] = characters[bytes[i] >> 4];
ret[2 * i + 1] = characters[bytes[i] & 0x0F];
}
return ret;
}
/**
* Callback for dwfl_getmodules(). For the provided module we lookup
* its build ID and pass it back via the 'arg' parameter.
* We expect the target program to always be the first module passed
* to this callback. So we always return DWARF_CB_ABORT, as this is
* the only build ID we are interested in.
*/
static int buildIDCallback(Dwfl_Module* mod,
void** /* userData */,
const char* name,
Dwarf_Addr /* start */,
void* arg) {
auto* buildID = static_cast<std::optional<std::string>*>(arg);
2022-12-19 14:37:51 +00:00
// We must call dwfl_module_getelf before using dwfl_module_build_id
GElf_Addr bias = 0;
Elf* elf = dwfl_module_getelf(mod, &bias);
2022-12-19 14:37:51 +00:00
if (elf == nullptr) {
LOG(ERROR) << "Failed to getelf for " << name << ": " << dwfl_errmsg(-1);
return DWARF_CB_ABORT;
}
GElf_Addr vaddr = 0;
const unsigned char* bytes = nullptr;
2022-12-19 14:37:51 +00:00
int nbbytes = dwfl_module_build_id(mod, &bytes, &vaddr);
if (nbbytes <= 0) {
*buildID = std::nullopt;
LOG(ERROR) << "Build ID not found for " << name;
} else {
*buildID = bytesToHexString(bytes, nbbytes);
VLOG(1) << "Build ID lookup successful for " << name << ": "
<< buildID->value();
}
return DWARF_CB_ABORT;
}
std::optional<std::string> SymbolService::locateBuildID() {
std::optional<std::string> buildID;
dwfl_getmodules(dwfl, buildIDCallback, (void*)&buildID, 0);
2022-12-19 14:37:51 +00:00
return buildID;
}
struct drgn_program* SymbolService::getDrgnProgram() {
2022-12-19 14:37:51 +00:00
if (hardDisableDrgn) {
LOG(ERROR) << "drgn is disabled, refusing to initialize";
return nullptr;
}
if (prog != nullptr) {
return prog;
}
LOG(INFO) << "Initialising drgn. This might take a while";
switch (target.index()) {
case 0: {
if (auto* err = drgn_program_from_pid(std::get<pid_t>(target), &prog)) {
2022-12-19 14:37:51 +00:00
LOG(ERROR) << "Failed to initialize drgn: " << err->code << " "
<< err->message;
return nullptr;
}
auto executable = fs::read_symlink(
"/proc/" + std::to_string(std::get<pid_t>(target)) + "/exe");
const auto* executableCStr = executable.c_str();
if (auto* err = drgn_program_load_debug_info(
prog, &executableCStr, 1, false, false)) {
2022-12-19 14:37:51 +00:00
LOG(ERROR) << "Error loading debug info: " << err->message;
return nullptr;
}
break;
}
case 1: {
if (auto* err = drgn_program_create(nullptr, &prog)) {
2022-12-19 14:37:51 +00:00
LOG(ERROR) << "Failed to create empty drgn program: " << err->code
<< " " << err->message;
return nullptr;
}
const char* path = std::get<fs::path>(target).c_str();
if (auto* err =
2022-12-19 14:37:51 +00:00
drgn_program_load_debug_info(prog, &path, 1, false, false)) {
LOG(ERROR) << "Failed to read debug info: " << err->code << " "
<< err->message;
drgn_program_destroy(prog);
prog = nullptr;
return prog;
}
LOG(INFO) << "Successfully read debug info";
break;
}
}
return prog;
}
/*
* Although 'parseFormalParam' has an all-encompassing sounding name, its sole
* task is to extract the location information for this parameter if any exist.
*/
static void parseFormalParam(Dwarf_Die& param,
2023-05-04 13:58:13 +01:00
struct drgn_elf_file* file,
struct drgn_program* prog,
Dwarf_Die& funcDie,
std::shared_ptr<FuncDesc>& fd) {
2022-12-19 14:37:51 +00:00
/*
* NOTE: It is vital that the function descriptors list of arguments
* are in order and that an entry exists for each argument position
* even if something goes wrong here when extracting the formal parameter.
* We *must* pay careful attention to that especially when introducing
* any new error handling.
*/
auto farg = fd->addArgument();
auto* err =
2023-05-04 13:58:13 +01:00
drgn_object_locator_init(prog, file, &funcDie, &param, &farg->locator);
2022-12-19 14:37:51 +00:00
if (err) {
LOG(ERROR) << "Could not initialize drgn_object_locator for parameter: "
<< err->code << ", " << err->message;
farg->valid = false;
return;
}
const char* name = nullptr;
2022-12-19 14:37:51 +00:00
Dwarf_Attribute attr;
if (dwarf_attr_integrate(&param, DW_AT_name, &attr)) {
if (!(name = dwarf_formstring(&attr))) {
LOG(ERROR) << "DW_AT_name exists but no name extracted";
}
} else {
VLOG(1) << "Parameter has no DW_AT_name attribute!";
}
if (name && !strcmp(name, "this")) {
VLOG(1) << "'this' pointer found";
fd->isMethod = true;
}
farg->typeName =
SymbolService::getTypeName(farg->locator.qualified_type.type);
VLOG(1) << "Type of argument '" << name << "': " << farg->typeName;
farg->valid = true;
VLOG(1) << "Adding function arg address: " << farg;
}
/*
static bool handleInlinedFunction(const irequest& request,
2022-12-19 14:37:51 +00:00
std::shared_ptr<FuncDesc> funcDesc,
struct drgn_qualified_type& funcType,
Dwarf_Die& funcDie,
struct drgn_module*& module) {
2022-12-19 14:37:51 +00:00
VLOG(1) << "Function '" << funcDesc->symName << "' has been inlined";
struct drgn_type_inlined_instances_iterator* iter = nullptr;
auto* err = drgn_type_inlined_instances_iterator_init(funcType.type, &iter);
2022-12-19 14:37:51 +00:00
if (err) {
LOG(ERROR) << "Error creating inlined instances iterator: " << err->message;
return false;
}
if (strcmp(drgn_type_parameters(funcType.type)[0].name, "this") == 0) {
funcDesc->isMethod = true;
}
auto index = funcDesc->getArgumentIndex(request.arg, false);
if (!index.has_value()) {
return false;
}
auto* argumentName = drgn_type_parameters(funcType.type)[index.value()].name;
struct drgn_type* inlinedInstance = nullptr;
2022-12-19 14:37:51 +00:00
bool foundInstance = false;
// The index at which the parameter was actually found in the inlined
// instance. This may differ from the index of the parameter in the function
// definition, as oftentimes as the result of compiler optimizations, some
// parameters will be omitted altogether from inlined instances.
size_t foundIndex = 0;
while (!foundInstance) {
err = drgn_type_inlined_instances_iterator_next(iter, &inlinedInstance);
if (err) {
LOG(ERROR) << "Error advancing inlined instances iterator: "
<< err->message;
return false;
}
if (!inlinedInstance) {
LOG(ERROR) << "Could not find an inlined instance of this function "
"with the argument '"
<< argumentName << "'";
return false;
}
auto numParameters = drgn_type_num_parameters(inlinedInstance);
auto* parameters = drgn_type_parameters(inlinedInstance);
2022-12-19 14:37:51 +00:00
for (size_t i = 0; i < numParameters; i++) {
if (strcmp(argumentName, parameters[i].name) == 0) {
foundInstance = true;
foundIndex = i;
break;
}
}
}
if (foundIndex != index) {
// We patch the parameters of `inlinedInstance` such that
// each parameter is found at the index one would expect from
// the function definition, matching the representation of the
// abstract root.
auto targetParameter = drgn_type_parameters(inlinedInstance)[foundIndex];
inlinedInstance->_private.num_parameters =
drgn_type_num_parameters(funcType.type);
// Allocating with `calloc` since `drgn` manages the lifetimes of its
// own structures, and it is written in C.
inlinedInstance->_private.parameters = (struct drgn_type_parameter*)calloc(
2022-12-19 14:37:51 +00:00
inlinedInstance->_private.num_parameters,
sizeof(*inlinedInstance->_private.parameters));
inlinedInstance->_private.parameters[index.value()] = targetParameter;
}
err = drgn_type_dwarf_die(inlinedInstance, &funcDie);
if (err) {
LOG(ERROR) << "Error obtaining DWARF DIE from type: " << err->message;
return false;
}
funcType.type = inlinedInstance;
module = inlinedInstance->_private.module;
return true;
}
*/
2022-12-19 14:37:51 +00:00
static std::optional<std::shared_ptr<FuncDesc>> createFuncDesc(
struct drgn_program* prog, const irequest& request) {
2022-12-19 14:37:51 +00:00
VLOG(1) << "Creating function description for: " << request.func;
2023-08-16 20:40:36 +01:00
auto ft = SymbolService::findTypeOfSymbol(prog, request.func);
if (!ft) {
2022-12-19 14:37:51 +00:00
return std::nullopt;
}
if (drgn_type_kind(ft->type) != DRGN_TYPE_FUNCTION) {
2022-12-19 14:37:51 +00:00
LOG(ERROR) << "Type corresponding to symbol '" << request.func
<< "' is not a function";
return std::nullopt;
}
auto fd = std::make_shared<FuncDesc>(request.func);
2023-05-04 13:58:13 +01:00
drgn_elf_file* file = ft->type->_private.file;
Dwarf_Die funcDie;
if (auto* err = drgn_type_dwarf_die(ft->type, &funcDie); err != nullptr) {
LOG(ERROR) << "Error obtaining DWARF DIE from type: " << err->message;
return std::nullopt;
}
2022-12-19 14:37:51 +00:00
if (dwarf_func_inline(&funcDie) == 1) {
// if (!handleInlinedFunction(request, fd, *ft, funcDie, module)) {
// return std::nullopt;
// }
LOG(ERROR) << "inlined functions are not supported";
return std::nullopt;
2022-12-19 14:37:51 +00:00
}
ptrdiff_t offset = 0;
uintptr_t base = 0;
uintptr_t start = 0;
uintptr_t end = 0;
while ((offset = dwarf_ranges(&funcDie, offset, &base, &start, &end)) > 0) {
fd->ranges.emplace_back(start, end);
}
if (offset < 0) {
LOG(ERROR) << "Error while finding ranges of function: "
<< dwarf_errmsg(dwarf_errno());
return std::nullopt;
}
auto retType = drgn_type_type(ft->type);
2022-12-19 14:37:51 +00:00
auto retTypeName = SymbolService::getTypeName(retType.type);
VLOG(1) << "Retval has type: " << retTypeName;
if (!retTypeName.empty() && retTypeName != "void") {
/*
* I really can't figure out at the minute how to deduce from the DWARF
* which register is used for the return value. I don't think we can just
* assume it's 'rax' as according to the AMD64 ABI V1.0 Section 12.1.3 we
* can use 'rax', 'rdi, and I think it may be more complex than that. More
* investigation required.
* Moreover, we must fabricate a pointer type to the return type for the
* locator code to properly interpret the register's content. This WILL
* break for return-by-value instead of return-by-reference. But this kind
* of assumption is in-line we what we need to improve about return-value
* locating, so this will be good-enough for now.
*
* For now, fabricate a 'Retval' object for rax.
*/
fd->retval = std::make_shared<FuncDesc::Retval>();
fd->retval->typeName = std::move(retTypeName);
fd->retval->valid = true;
}
// Add params
bool isVariadic = false;
fd->arguments.reserve(drgn_type_num_parameters(ft->type));
2022-12-19 14:37:51 +00:00
Dwarf_Die child;
int r = dwarf_child(&funcDie, &child);
while (r == 0) {
switch (dwarf_tag(&child)) {
case DW_TAG_formal_parameter:
if (isVariadic) {
LOG(WARNING) << "Formal parameter after unspecified "
"parameters tag!";
}
2023-05-04 13:58:13 +01:00
parseFormalParam(child, file, prog, funcDie, fd);
2022-12-19 14:37:51 +00:00
break;
case DW_TAG_unspecified_parameters:
if (isVariadic) {
VLOG(1) << "Multiple variadic parameters!";
}
VLOG(1) << "Unspecified parameters tag";
isVariadic = true;
break;
default:
break;
}
r = dwarf_siblingof(&child, &child);
}
if (r == -1) {
LOG(ERROR) << "Couldn't parse DIE children";
}
return fd;
}
/*
* Locate the function descriptor from the function descriptor cache or create
* one if it doesn't exist. Just take the
* up front hit of looking everything up now.
*/
std::shared_ptr<FuncDesc> SymbolService::findFuncDesc(const irequest& request) {
2022-12-19 14:37:51 +00:00
if (auto it = funcDescs.find(request.func); it != end(funcDescs)) {
VLOG(1) << "Found funcDesc for " << request.func;
return it->second;
}
struct drgn_program* drgnProg = getDrgnProgram();
2022-12-19 14:37:51 +00:00
if (drgnProg == nullptr) {
return nullptr;
}
auto fd = createFuncDesc(drgnProg, request);
if (!fd.has_value()) {
LOG(ERROR) << "Failed to create FuncDesc for " << request.func;
return nullptr;
}
VLOG(1) << "findFuncDesc returning " << std::hex << fd.value()->symName;
funcDescs.emplace(request.func, fd.value());
return fd.value();
}
std::shared_ptr<GlobalDesc> SymbolService::findGlobalDesc(
const std::string& global) {
2022-12-19 14:37:51 +00:00
if (auto it = globalDescs.find(global); it != end(globalDescs)) {
VLOG(1) << "Found globalDesc for " << global;
return it->second;
}
auto sym = locateSymbol(global);
if (!sym.has_value()) {
LOG(ERROR) << "Failed to get address for global " << global;
return nullptr;
}
VLOG(1) << "locateGlobal: address of " << global << " " << std::hex
<< sym->addr;
struct drgn_program* drgnProg = getDrgnProgram();
2022-12-19 14:37:51 +00:00
if (drgnProg == nullptr) {
return nullptr;
}
auto gd = std::make_shared<GlobalDesc>(global, sym->addr);
struct drgn_object globalObj {};
drgn_object_init(&globalObj, drgnProg);
BOOST_SCOPE_EXIT_ALL(&) {
drgn_object_deinit(&globalObj);
};
if (auto* err = drgn_program_find_object(drgnProg,
global.c_str(),
nullptr,
DRGN_FIND_OBJECT_ANY,
&globalObj)) {
2022-12-19 14:37:51 +00:00
LOG(ERROR) << "Failed to lookup global variable '" << global
<< "': " << err->code << " " << err->message;
return nullptr;
}
auto globalType = drgn_object_qualified_type(&globalObj);
gd->typeName = getTypeName(globalType.type);
VLOG(1) << "findGlobalDesc returning " << std::hex << gd;
globalDescs.emplace(global, gd);
return gd;
}
std::string SymbolService::getTypeName(struct drgn_type* type) {
2022-12-19 14:37:51 +00:00
if (drgn_type_kind(type) == DRGN_TYPE_POINTER) {
type = drgn_type_type(type).type;
}
return drgn_utils::typeToName(type);
2022-12-19 14:37:51 +00:00
}
std::optional<RootInfo> SymbolService::getRootType(const irequest& req) {
if (req.type == "global") {
/*
* This is super simple as all we have to do is locate assign the
* type of the provided global variable.
*/
VLOG(1) << "Processing global: " << req.func;
auto globalDesc = findGlobalDesc(req.func);
if (!globalDesc) {
return std::nullopt;
}
2023-05-19 16:18:04 +01:00
auto* drgnProg = getDrgnProgram();
if (drgnProg == nullptr) {
return std::nullopt;
}
drgn_object global{};
2023-05-19 16:18:04 +01:00
drgn_object_init(&global, drgnProg);
if (auto* err = drgn_program_find_object(
drgnProg, req.func.c_str(), nullptr, DRGN_FIND_OBJECT_ANY, &global);
err != nullptr) {
LOG(ERROR) << "Failed to lookup global variable '" << req.func
<< "': " << err->code << " " << err->message;
drgn_error_destroy(err);
return std::nullopt;
}
return RootInfo{req.func, drgn_object_qualified_type(&global)};
}
VLOG(1) << "Passing : " << req.func;
auto fd = findFuncDesc(req);
if (!fd) {
VLOG(1) << "Failed to lookup function " << req.func;
return std::nullopt;
}
// TODO: We are dealing with demangled names for drgn. drgn seems to store
// function names without parameters however. So strip parameters from
// demangled function name before passing to drgn.
// auto tmp = boost::core::demangle(req->func.c_str());
// auto demangledName = tmp.substr(0, tmp.find("("));
2023-05-19 16:18:04 +01:00
auto* drgnProg = getDrgnProgram();
if (drgnProg == nullptr) {
return std::nullopt;
}
2023-05-19 16:18:04 +01:00
auto ft = findTypeOfSymbol(drgnProg, req.func);
if (!ft) {
return std::nullopt;
}
if (req.isReturnRetVal()) {
VLOG(1) << "Processing return retval";
return RootInfo{std::string("return"), drgn_type_type(ft->type)};
}
if (!drgn_type_has_parameters(ft->type)) {
LOG(ERROR) << "Error: Object is not function?";
return std::nullopt;
}
auto* params = drgn_type_parameters(ft->type);
auto paramsCount = drgn_type_num_parameters(ft->type);
if (paramsCount == 0) {
LOG(ERROR) << "Function " << req.func << " has no parameters";
return std::nullopt;
}
auto argIdxOpt = fd->getArgumentIndex(req.arg);
if (!argIdxOpt.has_value()) {
return std::nullopt;
}
uint8_t argIdx = argIdxOpt.value();
/*
* The function descriptor has a fully populated argument vector so
* check that we have a valid argument decriptor for the requested arg.
* Most likely reason for it being invalid is if the DWARF formal parameter
* contains no location information.
*/
if (!fd->arguments[argIdx]->valid) {
LOG(ERROR) << "Argument " << argIdx << " for " << fd->symName
<< " is invalid";
return std::nullopt;
}
drgn_qualified_type paramType{};
if (auto* err = drgn_parameter_type(&params[argIdx], &paramType);
err != nullptr) {
LOG(ERROR) << "Failed to get params: " << err->code << " " << err->message;
drgn_error_destroy(err);
return std::nullopt;
}
std::string paramName;
if (params[argIdx].name) {
VLOG(1) << "ARG NAME: " << params[argIdx].name;
paramName = params[argIdx].name;
}
return RootInfo{paramName, paramType};
}
} // namespace oi::detail