/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include extern "C" { #include #include } #include "Metrics.h" #include "OIDebugger.h" #include "OIOpts.h" #include "PaddingHunter.h" #include "TimeUtils.h" #include "TreeBuilder.h" /* Global for signal handling */ std::weak_ptr weak_oid; namespace fs = std::filesystem; using namespace ObjectIntrospection; // Using an enum inside a namespace here instead of an `enum class` because // enums defined via `enum class` aren't implicitly convertible to `int`, and // having to cast the argument for each call to `exit` would be ugly. namespace ExitStatus { enum ExitStatus { Success = EXIT_SUCCESS, UsageError, FileNotFoundError, ConfigGenerationError, ScriptParsingError, StopTargetError, SegmentRemovalError, SegmentInitError, CompilationError, PatchingError, ProcessingTargetDataError, OidObjectError, CacheUploadError, }; } /* * This is the main driver code for the Object Introspection (OI) debugger. * The 'oid' debugger is the driver application which instruments a target * application to collect data and then reaps that data from the target. * * The flow of work in 'oid' can, roughly speaking, be split into several * phases: * * Phase 1 - Object Discovery * Using the 'drgn' debugger library, discover the container types in a * given parent object and its descendent objects. With this information we * can locate the addresses in memory of these container objects. * * Phase 2 - Code Generation * Auto generate C++ code to iterate over the data structures of interest, * calculate the size of these objects and record the data. * * Phase 3 - Object Code Generation * JIT compile the C++ code into object code and relocate the resulting * text into the traget processes address space. This is done using * clang/llvm APIs. * * Phase 4 - Target Process Instrumentation * The generated object code is injected into the target process in a text * segment created apriori. Threads are captured and controlled at probe * sites using breakpoint traps and the ptrace(2) interfaces. * * Phase 5 - Data processing * The results are retrieved from the target processes data buffer and * processed. The data buffer is a data segment that we mapped into the * target process. * * In addition to the above phases we have process control which is * currently based around ptrace(2). */ constexpr static OIOpts opts{ OIOpt{'h', "help", no_argument, nullptr, "Print this message and exit"}, OIOpt{'p', "pid", required_argument, "", "Target process to attach to"}, OIOpt{'c', "config-file", required_argument, nullptr, ""}, OIOpt{'x', "data-buf-size", required_argument, "", "Size of data segment (default:1MB)\n" "Accepts multiplicative suffix: K, M, G, T, P, E"}, OIOpt{'d', "debug-level", required_argument, "", "Verbose level for logging"}, OIOpt{'l', "jit-logging", no_argument, nullptr, "Enable JIT's logs"}, OIOpt{'r', "remove-mappings", no_argument, nullptr, "Remove oid mappings from target process"}, OIOpt{'s', "script", required_argument, nullptr, ""}, OIOpt{'S', "script-source", required_argument, nullptr, "type:symbol:arg"}, OIOpt{'t', "timeout", required_argument, "", "How long to probe the target process for"}, OIOpt{'k', "custom-code-file", required_argument, nullptr, "\n" "Use your own CPP file instead of CodeGen"}, OIOpt{'e', "compile-and-exit", no_argument, nullptr, "Compile only then exit"}, OIOpt{'o', "cache-path", required_argument, "", "Enable caching using the provided directory"}, OIOpt{'u', "cache-remote", required_argument, nullptr, "Enable upload/download of cache files\n" "Pick from {both,upload,download}"}, OIOpt{'i', "debug-path", required_argument, nullptr, "\n" "Run oid on a executable with debug infos instead of a running " "process"}, // Optional arguments are pretty nasty - it will only work as // "--dump-json=PATH" and not "--dump-json PATH". Try and make this take a // required argument at a later point OIOpt{'J', "dump-json", optional_argument, "[oid_out.json]", "File to dump the results to, as JSON\n" "(in addition to the default RocksDB output)"}, OIOpt{ 'B', "dump-data-segment", no_argument, nullptr, "Dump the data segment's content, before TreeBuilder processes it\n" "Each argument gets its own dump file: 'dataseg...dump'"}, OIOpt{'j', "generate-jit-debug", no_argument, nullptr, "Output debug info for the generated JIT code"}, OIOpt{'n', "chase-raw-pointers", no_argument, nullptr, "Generate probe for raw pointers"}, OIOpt{'a', "log-all-structs", no_argument, nullptr, "Log all structures"}, OIOpt{'z', "disable-packed-structs", no_argument, nullptr, "Disable appending packed attributes to the definition of structs"}, OIOpt{'w', "disable-padding-hunter", no_argument, nullptr, "Disable Padding Hunter\n" "Padded structs will be written to file called PADDING"}, OIOpt{'T', "capture-thrift-isset", no_argument, nullptr, "Capture the isset value for Thrift fields"}, OIOpt{'P', "polymorphic-inheritance", no_argument, nullptr, "Follow runtime polymorphic inheritance hierarchies"}, OIOpt{'m', "mode", required_argument, "[prod]", "Allows to specify a mode of operation/group of settings"}, }; void usage() { std::cout << "usage: oid ...\n"; std::cout << opts; std::cout << "\n\tFor problem reporting, questions and general comments " "please pop along" "\n\tto the Object Introspection Workplace group at " "https://fburl.com/oid.\n" << std::endl; } /* * This handler currently isn't completely async-signal-safe. It's mostly * all in the segment removal code and is commented in appropriate places. * The error messages are obviously not safe either. */ void sigIntHandler(int sigNum) { VLOG(1) << "Received SIGNAL " << sigNum; if (auto oid = weak_oid.lock()) { oid->stopAll(); } else { /* * A small window exists between install a handler and creating the main * debugger object. */ LOG(ERROR) << "Failed to find oid object when handling signal"; exit(ExitStatus::OidObjectError); } } void installSigHandlers(void) { struct sigaction nact {}; struct sigaction oact {}; nact.sa_handler = sigIntHandler; sigemptyset(&nact.sa_mask); nact.sa_flags = SA_SIGINFO; sigaction(SIGINT, nullptr, &oact); if (oact.sa_handler != SIG_IGN) { sigaction(SIGINT, &nact, nullptr); } /* Also stop on SIGALRM, for handling timeout */ sigaction(SIGALRM, &nact, nullptr); } std::optional strunittol(const char *str) { errno = 0; char *strend = nullptr; long retval = strtol(str, &strend, 10); if (errno != 0) { return std::nullopt; } switch (*strend) { case 'E': retval *= 1024; [[fallthrough]]; case 'P': retval *= 1024; [[fallthrough]]; case 'T': retval *= 1024; [[fallthrough]]; case 'G': retval *= 1024; [[fallthrough]]; case 'M': retval *= 1024; [[fallthrough]]; case 'K': retval *= 1024; if (*(strend + 1) != '\0') { return std::nullopt; } [[fallthrough]]; case '\0': break; default: return std::nullopt; } return retval; } namespace Oid { struct Config { pid_t pid; std::string debugInfoFile; std::string configFile; fs::path cacheBasePath; fs::path customCodeFile; size_t dataSegSize; int timeout_s; bool cacheRemoteUpload; bool cacheRemoteDownload; bool enableJitLogging; bool removeMappings; bool generateJitDebug; bool compAndExit; bool genPaddingStats = true; bool attachToProcess = true; bool hardDisableDrgn = false; }; } // namespace Oid static ExitStatus::ExitStatus runScript(const std::string &fileName, std::istream &script, const Oid::Config &oidConfig, const OICodeGen::Config &codeGenConfig, const TreeBuilder::Config &tbConfig) { if (!fileName.empty()) { VLOG(1) << "SCR FILE: " << fileName; } auto progStart = time_hr::now(); std::shared_ptr oid; // share oid with the global signal handler if (oidConfig.pid != 0) { oid = std::make_shared(oidConfig.pid, oidConfig.configFile, codeGenConfig, tbConfig); } else { oid = std::make_shared( oidConfig.debugInfoFile, oidConfig.configFile, codeGenConfig, tbConfig); } weak_oid = oid; // set the weak_ptr for signal handlers if (!oidConfig.cacheBasePath.empty()) { oid->setCacheBasePath(oidConfig.cacheBasePath); } oid->setCacheRemoteEnabled(oidConfig.cacheRemoteUpload, oidConfig.cacheRemoteDownload); oid->setCustomCodeFile(oidConfig.customCodeFile); oid->setEnableJitLogging(oidConfig.enableJitLogging); oid->setGenerateJitDebugInfo(oidConfig.generateJitDebug); oid->setHardDisableDrgn(oidConfig.hardDisableDrgn); VLOG(1) << "OIDebugger constructor took " << std::dec << time_ns(time_hr::now() - progStart) << " nsecs"; LOG(INFO) << "Script file: " << fileName; if (!oid->parseScript(script)) { LOG(ERROR) << "Error parsing input file '" << fileName << "'"; return ExitStatus::ScriptParsingError; } if (oidConfig.attachToProcess && !oid->stopTarget()) { LOG(ERROR) << "Couldn't stop target process with PID " << oidConfig.pid; return ExitStatus::StopTargetError; } auto initStart = time_hr::now(); /* * Remove any existing mappings if the '-r' flag is used or if any of the * segments have been explicitly changed on the command line. It's a bit of * a heavy hammer to remove both text and data if only one of the relevant * parameters have been set but that can always be modified in the future * if necessary. */ if (oidConfig.attachToProcess) { if (oidConfig.removeMappings) { if (!oid->segConfigExists()) { LOG(INFO) << "No config exists for pid " << oidConfig.pid << " : cannot remove mappings"; } else if (!oid->unmapSegments(true)) { LOG(ERROR) << "Failed to remove segments in target process with PID " << oidConfig.pid; return ExitStatus::SegmentRemovalError; } return ExitStatus::Success; } if (oidConfig.dataSegSize > 0) { oid->setDataSegmentSize(oidConfig.dataSegSize); } if (!oid->segmentInit()) { oid->contTargetThread(); LOG(ERROR) << "Failed to initialise segments in target process with PID " << oidConfig.pid; return ExitStatus::SegmentInitError; } // continue and detach main thread oid->contTargetThread(); } VLOG(1) << "init took " << std::dec << time_ns(time_hr::now() - initStart) << " nsecs\n" << "Compilation Started"; auto compileStart = time_hr::now(); if (!oid->compileCode()) { LOG(ERROR) << "Compilation failed"; return ExitStatus::CompilationError; } VLOG(1) << "Compilation Finished (" << std::dec << time_ns(time_hr::now() - compileStart) << " nsecs)"; if (oidConfig.compAndExit) { // Ensure the .th cache file also gets created oid->getTreeBuilderTyping(); if (oidConfig.genPaddingStats) { PaddingHunter paddingHunter; paddingHunter.localPaddedStructs = oid->getPaddingInfo(); paddingHunter.processLocalPaddingInfo(); paddingHunter.outputPaddingInfo(); } } else { installSigHandlers(); /* * Sigh. This is nonsense really and is tied to a single probe enabling. * This will need re-architecting when we move to multiple enablings. */ if (!oid->isGlobalDataProbeEnabled()) { oid->setMode(OIDebugger::OID_MODE_FUNC); } /* * I think we might be able to just fit the global variable work entirely * under patchFunctions and therefore leave the shape of the code at * this level pretty much unaltered. */ if (!oid->stopTarget()) { LOG(ERROR) << "Couldn't stop target process with PID " << oidConfig.pid; return ExitStatus::StopTargetError; } if (!oid->patchFunctions()) { oid->contTargetThread(); LOG(ERROR) << "Error patching functions"; return ExitStatus::PatchingError; } oid->contTargetThread(false); if (oidConfig.timeout_s > 0) { alarm(oidConfig.timeout_s); } while (!oid->isInterrupted()) { if (oid->processTrap(oidConfig.pid) == OIDebugger::OID_DONE) { break; } }; // Disable timeout timer alarm(0); // Cleanup all the remaining traps that were injected if (!oid->removeTraps(0)) { LOG(ERROR) << "Failed to remove instrumentation..."; } { // Resume stopped thread before cleanup VLOG(1) << "Resuming stopped threads..."; Metrics::Tracing __("resume_threads"); while (oid->processTrap(oidConfig.pid, false) == OIDebugger::OID_CONT) { } } oid->restoreState(); if (!oid->isInterrupted() && !oid->processTargetData()) { LOG(ERROR) << "Problems processing target data"; return ExitStatus::ProcessingTargetDataError; } } // Upload cache artifacts if present if (!oid->uploadCache()) { LOG(ERROR) << "cache upload requested and failed"; return ExitStatus::CacheUploadError; } std::cout << "SUCCESS " << fileName << std::endl; VLOG(1) << "Entire process took " << time_ns(time_hr::now() - progStart) << " nsecs"; return ExitStatus::Success; } int main(int argc, char *argv[]) { int debugLevel = 1; Oid::Config oidConfig = {}; std::string scriptFile; std::string scriptSource; std::string configGenOption; std::optional jsonPath{std::nullopt}; bool logAllStructs = true; bool chaseRawPointers = false; bool packStructs = true; bool dumpDataSegment = false; bool captureThriftIsset = false; bool polymorphicInheritance = false; Metrics::Tracing _("main"); #ifndef OSS_ENABLE folly::InitOptions init; init.useGFlags(false); init.removeFlags(false); folly::init(&argc, &argv, init); #else google::InitGoogleLogging(argv[0]); #endif google::SetStderrLogging(google::WARNING); int c = 0; while ((c = getopt_long(argc, argv, opts.shortOpts(), opts.longOpts(), nullptr)) != -1) { switch (c) { case 'm': { if (strcmp("prod", optarg) == 0) { // change default settings for prod oidConfig.hardDisableDrgn = true; oidConfig.cacheRemoteDownload = true; oidConfig.cacheBasePath = "/tmp/oid-cache"; chaseRawPointers = true; } else { LOG(ERROR) << "Invalid mode: " << optarg << " specified!"; usage(); return ExitStatus::UsageError; } break; } case 'x': { auto dataSegSizeArg = strunittol(optarg); if (!dataSegSizeArg.has_value() || dataSegSizeArg.value() <= 0) { LOG(ERROR) << "Invalid value specified for data buffer size"; usage(); return ExitStatus::UsageError; } oidConfig.dataSegSize = static_cast(dataSegSizeArg.value()); break; } case 'p': oidConfig.pid = atoi(optarg); break; case 'd': debugLevel = atoi(optarg); google::LogToStderr(); google::SetStderrLogging(google::INFO); // Enable debug logging for *only* our project, // and not the rest of fbcode google::SetVLOGLevel("Common", debugLevel); google::SetVLOGLevel("Descs", debugLevel); google::SetVLOGLevel("FuncGen", debugLevel); google::SetVLOGLevel("GobsService", debugLevel); google::SetVLOGLevel("ManifoldCache", debugLevel); google::SetVLOGLevel("Metrics", debugLevel); google::SetVLOGLevel("OICache", debugLevel); google::SetVLOGLevel("OICodeGen", debugLevel); google::SetVLOGLevel("OICompiler", debugLevel); google::SetVLOGLevel("OID", debugLevel); google::SetVLOGLevel("OIDebugger", debugLevel); google::SetVLOGLevel("OILexer", debugLevel); google::SetVLOGLevel("OILibrary", debugLevel); google::SetVLOGLevel("OILibraryImpl", debugLevel); google::SetVLOGLevel("OILogging", debugLevel); google::SetVLOGLevel("OIOpts", debugLevel); google::SetVLOGLevel("OIParser", debugLevel); google::SetVLOGLevel("OIUtils", debugLevel); google::SetVLOGLevel("PaddingHunter", debugLevel); google::SetVLOGLevel("Serialize", debugLevel); google::SetVLOGLevel("SymbolService", debugLevel); google::SetVLOGLevel("TimeUtils", debugLevel); google::SetVLOGLevel("TrapInfo", debugLevel); google::SetVLOGLevel("TreeBuilder", debugLevel); // Upstream glog defines `GLOG_INFO` as 0 https://fburl.com/ydjajhz0, // but internally it's defined as 1 https://fburl.com/code/9fwams75 gflags::SetCommandLineOption("minloglevel", "0"); break; case 'l': oidConfig.enableJitLogging = true; break; case 'k': oidConfig.customCodeFile = optarg; if (!fs::exists(oidConfig.customCodeFile)) { LOG(ERROR) << "Non existent generated code file: " << oidConfig.customCodeFile; usage(); return ExitStatus::FileNotFoundError; } if (oidConfig.customCodeFile == "/tmp/tmp_oid_output_2.cpp") { LOG(ERROR) << "Cannot use generatedCodePath:" << oidConfig.customCodeFile; return ExitStatus::UsageError; } break; case 'e': oidConfig.compAndExit = true; break; case 'j': oidConfig.generateJitDebug = true; break; case 'c': oidConfig.configFile = std::string(optarg); if (!fs::exists(oidConfig.configFile)) { LOG(ERROR) << "Non existent config file: " << oidConfig.configFile; usage(); return ExitStatus::FileNotFoundError; } break; case 'i': oidConfig.debugInfoFile = std::string(optarg); oidConfig.attachToProcess = false; oidConfig.compAndExit = true; if (!fs::exists(oidConfig.debugInfoFile)) { LOG(ERROR) << "Non existent debuginfo file: " << oidConfig.debugInfoFile; usage(); return ExitStatus::FileNotFoundError; } break; case 'o': oidConfig.cacheBasePath = optarg; break; case 'u': if (strcmp(optarg, "both") == 0) { oidConfig.cacheRemoteUpload = true; oidConfig.cacheRemoteDownload = true; } else if (strcmp(optarg, "upload") == 0) { oidConfig.cacheRemoteUpload = true; } else if (strcmp(optarg, "download") == 0) { oidConfig.cacheRemoteDownload = true; } else { LOG(ERROR) << "Invalid download option: " << optarg << " specified!"; usage(); return ExitStatus::UsageError; } break; case 'r': oidConfig.removeMappings = true; break; case 'n': chaseRawPointers = true; break; case 'a': logAllStructs = true; break; case 'z': packStructs = false; break; case 'B': dumpDataSegment = true; break; case 's': scriptFile = std::string(optarg); break; case 'S': scriptSource = std::string(optarg); break; case 't': oidConfig.timeout_s = atoi(optarg); break; case 'w': oidConfig.genPaddingStats = false; break; case 'J': jsonPath = optarg != nullptr ? optarg : "oid_out.json"; break; case 'T': captureThriftIsset = true; break; case 'P': polymorphicInheritance = true; break; case 'h': default: usage(); return ExitStatus::Success; } } if (oidConfig.configFile.empty()) { oidConfig.configFile = "/usr/local/share/oi/base.oid.toml"; if (!fs::exists(oidConfig.configFile)) { LOG(ERROR) << "Non existent default config file: " << oidConfig.configFile; usage(); return ExitStatus::FileNotFoundError; } LOG(INFO) << "Using default config file " << oidConfig.configFile; } if (oidConfig.pid != 0 && !oidConfig.debugInfoFile.empty()) { LOG(INFO) << "'-p' and '-b' are mutually exclusive"; usage(); return ExitStatus::UsageError; } if ((oidConfig.pid == 0 && oidConfig.debugInfoFile.empty()) || oidConfig.configFile.empty()) { usage(); return ExitStatus::UsageError; } if (!oidConfig.removeMappings && scriptFile.empty() && scriptSource.empty()) { LOG(INFO) << "One of '-s', '-r' or '-S' must be specified"; usage(); return ExitStatus::UsageError; } OICodeGen::Config codeGenConfig{ .useDataSegment = true, .chaseRawPointers = chaseRawPointers, .packStructs = packStructs, .genPaddingStats = oidConfig.genPaddingStats, .captureThriftIsset = captureThriftIsset, .polymorphicInheritance = polymorphicInheritance, }; TreeBuilder::Config tbConfig{ .logAllStructs = logAllStructs, .chaseRawPointers = chaseRawPointers, .genPaddingStats = oidConfig.genPaddingStats, .dumpDataSegment = dumpDataSegment, .jsonPath = jsonPath, }; if (!scriptFile.empty()) { if (!std::filesystem::exists(scriptFile)) { LOG(ERROR) << "Non-existent script file: " << scriptFile; return ExitStatus::FileNotFoundError; } std::ifstream script(scriptFile); auto status = runScript(scriptFile, script, oidConfig, codeGenConfig, tbConfig); if (status != ExitStatus::Success) { return status; } } else if (!scriptSource.empty()) { std::istringstream script(scriptSource); auto status = runScript(scriptFile, script, oidConfig, codeGenConfig, tbConfig); if (status != ExitStatus::Success) { return status; } } if (Metrics::Tracing::isEnabled()) { LOG(INFO) << "Will write metrics (" << Metrics::Tracing::isEnabled() << ") in " << Metrics::Tracing::outputPath(); } else { LOG(INFO) << "Will not write any metric: " << Metrics::Tracing::isEnabled(); } return ExitStatus::Success; }