object-introspection/oi/TreeBuilder.cpp

978 lines
33 KiB
C++
Raw Normal View History

2022-12-19 14:37:51 +00:00
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2023-04-26 16:20:53 +01:00
#include "oi/TreeBuilder.h"
2022-12-19 14:37:51 +00:00
#include <glog/logging.h>
#include <boost/algorithm/string/regex.hpp>
#include <boost/scope_exit.hpp>
#include <fstream>
#include <iostream>
#include <limits>
#include <msgpack.hpp>
#include <stdexcept>
2023-04-26 16:20:53 +01:00
#include "oi/ContainerInfo.h"
#include "oi/DrgnUtils.h"
#include "oi/Metrics.h"
#include "oi/OICodeGen.h"
#include "oi/PaddingHunter.h"
2022-12-19 14:37:51 +00:00
#include "rocksdb/db.h"
#include "rocksdb/options.h"
2023-02-27 19:01:59 +00:00
#include "rocksdb/statistics.h"
2022-12-19 14:37:51 +00:00
extern "C" {
#include <drgn.h>
#include <sys/types.h>
}
namespace oi::detail {
2023-04-05 20:55:40 +01:00
/* Tag indicating if the pointer has been followed or skipped */
enum class TrackPointerTag : uint64_t {
2023-04-05 20:55:40 +01:00
/* The content has been skipped.
* It prevents double counting the footprint of a node the JIT code has seen
* before, double counting content being stored inline, or getting stuck in an
* infinite loop when processing circular linked list.
*/
skipped = 0,
followed = 1,
};
2022-12-19 14:37:51 +00:00
TreeBuilder::TreeBuilder(Config c) : config{std::move(c)} {
buffer = std::make_unique<msgpack::sbuffer>();
auto testdbPath = "/tmp/testdb_" + std::to_string(getpid());
if (auto status = rocksdb::DestroyDB(testdbPath, {}); !status.ok()) {
LOG(FATAL) << "RocksDB error while destroying database: "
<< status.ToString();
}
const int twoMinutes = 120;
rocksdb::Options options;
options.compression = rocksdb::kZSTD;
options.create_if_missing = true;
options.statistics = rocksdb::CreateDBStatistics();
options.stats_dump_period_sec = twoMinutes;
options.PrepareForBulkLoad();
options.OptimizeForSmallDb();
if (auto status = rocksdb::DB::Open(options, testdbPath, &db); !status.ok()) {
LOG(FATAL) << "RocksDB error while opening database: " << status.ToString();
}
}
struct TreeBuilder::Variable {
struct drgn_type* type;
2022-12-19 14:37:51 +00:00
std::string_view name;
std::string typePath;
std::optional<bool> isset = std::nullopt;
bool isStubbed = false;
};
struct TreeBuilder::DBHeader {
/**
* Version of the database schema. See TreeBuilder.h for more info.
*/
Version version;
/**
* List of IDs corresponding to the root of the probed objects.
*/
std::vector<NodeID> rootIDs;
MSGPACK_DEFINE_ARRAY(version, rootIDs)
};
struct TreeBuilder::Node {
struct ContainerStats {
/**
* The number of elements currently present in the container
* (e.g. `std::vector::size()`).
*/
size_t length;
/**
* The maximum number of elements the container can
* currently hold (e.g. `std::vector::capacity()`).
*/
size_t capacity;
/**
* The static size (see comment for `staticSize` below for clarification on
* what this means) of each element in a container. For example, if this
* node corresponds to a `std::vector<int>` then `elementStaticSize`
* would be `sizeof(int)`.
*/
size_t elementStaticSize;
MSGPACK_DEFINE_ARRAY(length, capacity, elementStaticSize)
};
/**
* The unique identifier for this node, used as the key for this
* node's entry in RocksDB.
*/
NodeID id;
/**
* Roughly corresponds to the name you would use to refer to this node in
* the code (e.g. variable name, member name, etc.). In some cases there is
* no meaningful name (e.g. the elements of a vector, the node behind a
* `typedef`) and this is left empty.
*/
std::string_view name{};
/**
* The type of this node, as it would be written in the code
* (e.g. `std::vector<int>`, `float`, `MyStruct`).
*/
std::string typeName{};
std::string typePath{};
bool isTypedef{};
/**
* The compile-time-determinable size (i.e. memory footprint measured in
* bytes) of this node, essentially corresponding to `sizeof(TYPE)`.
* Just like the semantics of `sizeof`, this is inherently inclusive of the
* type's members (if it is a `struct`, `class`, or `union`).
*/
size_t staticSize{};
/**
* The size (i.e. memory usage measured in bytes) of the dynamically
* allocated data used by this node (e.g. the heap-allocated memory
* associated with a `std::vector`). This includes the `dynamicSize` of all
* children (whether they be `struct`/`class` members, or the elements of a
* container).
*/
size_t dynamicSize{};
std::optional<size_t> paddingSavingsSize{std::nullopt};
std::optional<uintptr_t> pointer{std::nullopt};
std::optional<ContainerStats> containerStats{std::nullopt};
/**
* Range of this node's children (start is inclusive, end is exclusive)
*
* If this node represents a container, `children` contains all
* of the container's elements.
* If this node represents a `struct` or `class`, `children`
* contains all of its members.
* If this node is a `typedef` or a pointer, `children` should contain a
* single entry corresponding to the referenced type.
*/
std::optional<std::pair<NodeID, NodeID>> children{std::nullopt};
std::optional<bool> isset{std::nullopt};
/**
* An estimation of the "exclusive size" of a type, trying to
* attribute every byte once and only once to types in the tree.
*/
size_t exclusiveSize{};
MSGPACK_DEFINE_ARRAY(id,
name,
typeName,
typePath,
isTypedef,
staticSize,
dynamicSize,
paddingSavingsSize,
containerStats,
pointer,
children,
isset,
exclusiveSize)
2022-12-19 14:37:51 +00:00
};
TreeBuilder::~TreeBuilder() {
/* FB: Remove error IDs, Strobelight doesn't handle them yet */
std::erase(rootIDs, ERROR_NODE_ID);
/*
* Now that all the Nodes have been inserted in the DB,
* we can insert the DBHeader with the proper list of rootIDs.
*/
const DBHeader header{.version = VERSION, .rootIDs = std::move(rootIDs)};
auto serializedHeader = serialize(header);
rocksdb::WriteOptions options{};
options.disableWAL = true;
if (auto status =
db->Put(options, std::to_string(ROOT_NODE_ID), serializedHeader);
!status.ok()) {
LOG(ERROR) << "RocksDB error while writing DBHeader: " << status.ToString();
}
if (auto status = db->Close(); !status.ok()) {
LOG(ERROR) << "RocksDB error while closing database: " << status.ToString();
}
delete db;
}
bool TreeBuilder::emptyOutput() const {
return std::ranges::all_of(rootIDs,
[](auto& id) { return id == ERROR_NODE_ID; });
2022-12-19 14:37:51 +00:00
}
void TreeBuilder::build(const std::vector<uint64_t>& data,
const std::string& argName,
struct drgn_type* type,
const TypeHierarchy& typeHierarchy) {
2022-12-19 14:37:51 +00:00
th = &typeHierarchy;
oidData = &data;
2023-06-22 17:27:21 +01:00
oidDataIndex = 4; // HACK: OID's first 4 outputs are dummy 0s
2022-12-19 14:37:51 +00:00
metrics::Tracing _("build_tree");
2022-12-19 14:37:51 +00:00
VLOG(1) << "Building tree...";
{
auto& rootID = rootIDs.emplace_back(nextNodeID++);
2022-12-19 14:37:51 +00:00
try {
process(rootID, {.type = type, .name = argName, .typePath = argName});
} catch (...) {
// Mark the failure using the error node ID
rootID = ERROR_NODE_ID;
throw;
}
}
VLOG(1) << "Finished building tree";
rocksdb::CompactRangeOptions opts;
rocksdb::Status s = db->CompactRange(opts, nullptr, nullptr);
if (!s.ok()) {
LOG(FATAL) << "RocksDB error while compacting: " << s.ToString();
}
VLOG(1) << "Finished compacting db";
// Were all object sizes consumed?
if (oidDataIndex != oidData->size()) {
if (config.strict) {
LOG(FATAL) << "some object sizes not consumed and OID is in strict mode!"
<< "reported: " << oidData->size() << " consumed "
<< oidDataIndex;
}
2022-12-19 14:37:51 +00:00
LOG(WARNING) << "WARNING: some object sizes not consumed;"
<< "object tree may be inaccurate. "
<< "reported: " << oidData->size() << " consumed "
<< oidDataIndex;
} else {
VLOG(1) << "Consumed all object sizes: " << oidDataIndex;
}
th = nullptr;
oidData = nullptr;
}
void TreeBuilder::dumpJson() {
if (!config.jsonPath.has_value()) {
LOG(ERROR) << "No output path was provided for JSON";
return;
}
std::ofstream output(*config.jsonPath);
output << '[';
for (auto rootID : rootIDs) {
if (rootID == ERROR_NODE_ID) {
// On error, output an empty object to maintain offsets
output << "{},";
} else {
JSON(rootID, output);
output << ',';
}
}
/* Remove the trailing comma */
if (!rootIDs.empty()) {
output.seekp(-1, std::ios_base::cur);
}
output << "]\n"; // Text files should end with a newline per POSIX
VLOG(1) << "Finished writing JSON to disk";
}
void TreeBuilder::setPaddedStructs(
std::map<std::string, PaddingInfo>* _paddedStructs) {
2022-12-19 14:37:51 +00:00
this->paddedStructs = _paddedStructs;
}
static std::string drgnTypeToName(struct drgn_type* type) {
2022-12-19 14:37:51 +00:00
if (type->_private.program != nullptr) {
return drgn_utils::typeToName(type);
2022-12-19 14:37:51 +00:00
}
return type->_private.oi_name ? type->_private.oi_name : "";
}
static struct drgn_error* drgnTypeSizeof(struct drgn_type* type,
uint64_t* ret) {
2022-12-19 14:37:51 +00:00
static struct drgn_error incompleteTypeError = {
.code = DRGN_ERROR_TYPE,
.needs_destroy = false,
.errnum = 0,
.path = NULL,
.address = 0,
.message = (char*)"cannot get size of incomplete type",
2022-12-19 14:37:51 +00:00
};
if (drgn_type_kind(type) == DRGN_TYPE_FUNCTION) {
*ret = sizeof(uintptr_t);
return nullptr;
}
if (type->_private.program != nullptr) {
return drgn_type_sizeof(type, ret);
}
// If type has no size, report an error to trigger a sizeMap lookup
if (type->_private.oi_size ==
std::numeric_limits<decltype(type->_private.size)>::max()) {
return &incompleteTypeError;
}
*ret = type->_private.oi_size;
return nullptr;
}
uint64_t TreeBuilder::getDrgnTypeSize(struct drgn_type* type) {
2022-12-19 14:37:51 +00:00
uint64_t size = 0;
struct drgn_error* err = drgnTypeSizeof(type, &size);
2022-12-19 14:37:51 +00:00
BOOST_SCOPE_EXIT(err) {
drgn_error_destroy(err);
}
BOOST_SCOPE_EXIT_END
if (err == nullptr) {
return size;
}
std::string typeName = drgnTypeToName(type);
for (auto& [typeName2, size2] : th->sizeMap)
2022-12-19 14:37:51 +00:00
if (typeName.starts_with(typeName2))
return size2;
if (typeName.starts_with("basic_string<char, std::char_traits<char>, "
"std::allocator<char> >")) {
return sizeof(std::string);
}
throw std::runtime_error("Failed to get size: " + std::to_string(err->code) +
" " + err->message);
}
uint64_t TreeBuilder::next() {
if (oidDataIndex >= oidData->size()) {
throw std::runtime_error("Unexpected end of data");
}
VLOG(3) << "next = " << (void*)(*oidData)[oidDataIndex];
2022-12-19 14:37:51 +00:00
return (*oidData)[oidDataIndex++];
}
bool TreeBuilder::isContainer(const Variable& variable) {
2022-12-19 14:37:51 +00:00
return th->containerTypeMap.contains(variable.type) ||
(drgn_type_kind(variable.type) == DRGN_TYPE_ARRAY &&
drgn_type_length(variable.type) > 0);
}
bool TreeBuilder::isPrimitive(struct drgn_type* type) {
2022-12-19 14:37:51 +00:00
while (drgn_type_kind(type) == DRGN_TYPE_TYPEDEF) {
auto entry = th->typedefMap.find(type);
if (entry == th->typedefMap.end())
return false;
type = entry->second;
}
return drgn_type_primitive(type) != DRGN_NOT_PRIMITIVE_TYPE;
}
static std::string_view drgnKindStr(struct drgn_type* type) {
2022-12-19 14:37:51 +00:00
auto kind = OICodeGen::drgnKindStr(type);
// -1 is for the null terminator
kind.remove_prefix(sizeof("DRGN_TYPE_") - 1);
return kind;
}
void TreeBuilder::setSize(TreeBuilder::Node& node,
uint64_t dynamicSize,
uint64_t memberSizes) {
node.dynamicSize = dynamicSize;
if (memberSizes > node.staticSize + node.dynamicSize) {
// TODO handle this edge case in a more elegant way.
// This can occur when handling bitfields or vector<bool>
// (as we cannot accurately track sub-byte sizes currently)
node.exclusiveSize = 0;
} else {
node.exclusiveSize = node.staticSize + node.dynamicSize - memberSizes;
}
}
2022-12-19 14:37:51 +00:00
TreeBuilder::Node TreeBuilder::process(NodeID id, Variable variable) {
Node node{
.id = id,
.name = variable.name,
.typeName = drgnTypeToName(variable.type),
.typePath = std::move(variable.typePath),
.staticSize = getDrgnTypeSize(variable.type),
.isset = variable.isset,
};
VLOG(2) << "Processing node [" << id << "] (name: '" << variable.name
<< "', typeName: '" << node.typeName
<< "', kind: " << drgnKindStr(variable.type) << ")"
<< (variable.isStubbed ? " STUBBED" : "")
<< (th->knownDummyTypeList.contains(variable.type) ? " DUMMY" : "");
// Default dynamic size to 0 and calculate fallback exclusive size
setSize(node, 0, 0);
2022-12-19 14:37:51 +00:00
if (!variable.isStubbed) {
switch (drgn_type_kind(variable.type)) {
case DRGN_TYPE_POINTER:
2023-04-24 16:35:16 +01:00
if (config.features[Feature::ChaseRawPointers]) {
2022-12-19 14:37:51 +00:00
// Pointers to incomplete types are stubbed out
// See OICodeGen::enumeratePointerType
if (th->knownDummyTypeList.contains(variable.type)) {
break;
}
auto entry = th->pointerToTypeMap.find(variable.type);
if (entry != th->pointerToTypeMap.end()) {
auto innerTypeKind = drgn_type_kind(entry->second);
if (innerTypeKind != DRGN_TYPE_FUNCTION) {
node.pointer = next();
2023-04-05 20:55:40 +01:00
if (innerTypeKind == DRGN_TYPE_VOID) {
break;
}
if (next() == (uint64_t)TrackPointerTag::skipped) {
2022-12-19 14:37:51 +00:00
break;
}
}
auto childID = nextNodeID++;
auto child = process(childID, Variable{entry->second, "", ""});
node.children = {childID, childID + 1};
setSize(node, child.staticSize + child.dynamicSize,
child.staticSize + child.dynamicSize);
2022-12-19 14:37:51 +00:00
}
}
break;
case DRGN_TYPE_TYPEDEF: {
const static boost::regex standardIntegerRegex{
"((s?size)|(u?int(_fast|_least)?(8|16|32|64|128|ptr))|(ptrdiff))_"
"t"};
// We don't expand typedefs for well-known integer types from `stdint.h`
// to prevent our output from being extremely verbose. We treat them as
// if they are primitives directly (hence this check coming *before* we
// set `node.isTypedef`).
if (boost::regex_match(node.typeName, standardIntegerRegex)) {
break;
}
node.isTypedef = true;
auto entry = th->typedefMap.find(variable.type);
if (entry != th->typedefMap.end()) {
auto childID = nextNodeID++;
auto child = process(childID, Variable{entry->second, "", ""});
node.children = {childID, childID + 1};
setSize(node, child.dynamicSize,
child.dynamicSize + child.staticSize);
2022-12-19 14:37:51 +00:00
}
} break;
case DRGN_TYPE_CLASS:
case DRGN_TYPE_STRUCT:
case DRGN_TYPE_ARRAY:
if (th->knownDummyTypeList.contains(variable.type)) {
break;
} else if (isContainer(variable)) {
processContainer(variable, node);
} else {
drgn_type* objectType = variable.type;
if (auto it = th->descendantClasses.find(objectType);
it != th->descendantClasses.end()) {
// The first item of data in dynamic classes identifies which
// concrete type we should process it as, represented as an index
// into the vector of child classes, or -1 to processes this type
// as itself.
const auto& descendants = it->second;
auto val = next();
if (val != (uint64_t)-1) {
objectType = descendants[val];
node.typeName = drgnTypeToName(objectType);
node.staticSize = getDrgnTypeSize(objectType);
}
}
auto entry = th->classMembersMap.find(objectType);
2022-12-19 14:37:51 +00:00
if (entry == th->classMembersMap.end() || entry->second.empty()) {
break;
}
const auto& members = entry->second;
2022-12-19 14:37:51 +00:00
node.children = {nextNodeID, nextNodeID + members.size()};
nextNodeID += members.size();
auto childID = node.children->first;
bool captureThriftIsset =
th->thriftIssetStructTypes.contains(objectType);
2022-12-19 14:37:51 +00:00
uint64_t memberSizes = 0;
2022-12-19 14:37:51 +00:00
for (std::size_t i = 0; i < members.size(); i++) {
std::optional<bool> isset;
if (captureThriftIsset && i < members.size() - 1) {
// Retrieve isset value for each member variable, except Thrift's
// __isset field, which we assume comes last.
// A value of -1 indicates a non-optional field for which we
// don't record an isset value.
auto val = next();
if (val != (uint64_t)-1) {
isset = val;
}
}
const auto& member = members[i];
2022-12-19 14:37:51 +00:00
auto child =
process(childID++,
Variable{member.type, member.member_name,
member.member_name, isset, member.isStubbed});
node.dynamicSize += child.dynamicSize;
memberSizes += child.dynamicSize + child.staticSize;
2022-12-19 14:37:51 +00:00
}
setSize(node, node.dynamicSize, memberSizes);
2022-12-19 14:37:51 +00:00
}
break;
default:
// The remaining types are all described entirely by their static size,
// and hence need no special handling.
break;
}
2023-04-24 16:35:16 +01:00
if (config.features[Feature::GenPaddingStats]) {
2022-12-19 14:37:51 +00:00
auto entry = paddedStructs->find(node.typeName);
if (entry != paddedStructs->end()) {
entry->second.instancesCnt++;
node.paddingSavingsSize = entry->second.savingSize;
}
}
}
rocksdb::WriteOptions options{};
options.disableWAL = true;
auto status = db->Put(options, std::to_string(node.id), serialize(node));
if (!status.ok()) {
throw std::runtime_error("RocksDB error while inserting node [" +
std::to_string(node.id) +
"]: " + status.ToString());
}
return node;
}
void TreeBuilder::processContainer(const Variable& variable, Node& node) {
2022-12-19 14:37:51 +00:00
VLOG(1) << "Processing container [" << node.id << "] of type '"
<< node.typeName << "'";
ContainerTypeEnum kind = UNKNOWN_TYPE;
std::vector<struct drgn_qualified_type> elementTypes;
if (drgn_type_kind(variable.type) == DRGN_TYPE_ARRAY) {
kind = ARRAY_TYPE;
struct drgn_type* arrayElementType = nullptr;
2022-12-19 14:37:51 +00:00
size_t numElems = 0;
if (config.features[Feature::TypeGraph]) {
arrayElementType = drgn_type_type(variable.type).type;
numElems = drgn_type_length(variable.type);
} else {
drgn_utils::getDrgnArrayElementType(variable.type, &arrayElementType,
numElems);
}
2022-12-19 14:37:51 +00:00
assert(numElems > 0);
elementTypes.push_back(
drgn_qualified_type{arrayElementType, (enum drgn_qualifiers)(0)});
} else {
auto entry = th->containerTypeMap.find(variable.type);
if (entry == th->containerTypeMap.end()) {
throw std::runtime_error(
"Could not find container information for type with name '" +
node.typeName + "'");
}
auto& [containerKind, templateTypes] = entry->second;
kind = containerKind;
for (const auto& tt : templateTypes) {
2022-12-19 14:37:51 +00:00
elementTypes.push_back(tt);
}
}
/**
* Some containers (conditionally) store their contents *directly* inside
* themselves (as opposed to having a pointer to heap-allocated memory).
* `std::pair` and `std::array` are two trivial examples, but some types vary
* whether their contents are stored inline or externally depending on
* runtime conditions (usually the number of elements currently present in
* the container).
*/
bool contentsStoredInline = false;
// Initialize, then take a reference to the underlying value for convenience
// so that we don't have to dereference the optional every time we want to use
// it.
auto& containerStats =
2022-12-19 14:37:51 +00:00
node.containerStats.emplace(Node::ContainerStats{0, 0, 0});
for (auto& type : elementTypes) {
2022-12-19 14:37:51 +00:00
containerStats.elementStaticSize += getDrgnTypeSize(type.type);
}
switch (kind) {
case OPTIONAL_TYPE:
contentsStoredInline = true;
containerStats.length = containerStats.capacity = 1;
if (next() == 0U) {
containerStats.length = 0;
return;
}
break;
case FOLLY_OPTIONAL_TYPE:
// TODO: Not sure why we are capturing pointer for folly::Optional but
// not std::optional. Both are supposed to store data inline.
contentsStoredInline = true;
node.pointer = next();
containerStats.length = containerStats.capacity = 1;
if (*node.pointer == 0) {
containerStats.length = 0;
return;
}
break;
case WEAK_PTR_TYPE:
// Do not handle weak pointers beyond their static size for now.
break;
2022-12-19 14:37:51 +00:00
case SHRD_PTR_TYPE:
case UNIQ_PTR_TYPE:
node.pointer = next();
containerStats.length = *node.pointer ? 1 : 0;
containerStats.capacity = 1;
2023-04-05 20:55:40 +01:00
if (next() == (uint64_t)TrackPointerTag::skipped) {
2022-12-19 14:37:51 +00:00
return;
}
break;
case TRY_TYPE:
case REF_WRAPPER_TYPE:
node.pointer = next();
containerStats.length = containerStats.capacity = 1;
2023-04-05 20:55:40 +01:00
if (next() == (uint64_t)TrackPointerTag::skipped) {
2022-12-19 14:37:51 +00:00
return;
}
break;
case SORTED_VEC_SET_TYPE:
2022-12-19 14:37:51 +00:00
case CONTAINER_ADAPTER_TYPE: {
node.pointer = next();
// Copy the underlying container's sizes and stats directly into this
// container adapter
node.children = {nextNodeID, nextNodeID + 1};
nextNodeID += 1;
auto childID = node.children->first;
// elementTypes is only populated with the underlying container type for
// container adapters
auto containerType = elementTypes[0];
auto child = process(
childID++, {.type = containerType.type,
.name = "",
.typePath = drgnTypeToName(containerType.type) + "[]"});
setSize(node, child.dynamicSize, child.dynamicSize + child.staticSize);
2022-12-19 14:37:51 +00:00
node.containerStats = child.containerStats;
return;
}
case STD_VARIANT_TYPE: {
containerStats.length = containerStats.capacity = 1;
containerStats.elementStaticSize = 0;
for (auto& type : elementTypes) {
2022-12-19 14:37:51 +00:00
auto paramSize = getDrgnTypeSize(type.type);
containerStats.elementStaticSize =
std::max(containerStats.elementStaticSize, paramSize);
}
node.dynamicSize = 0;
// When a std::variant is valueless_by_exception, its index will be
// std::variant_npos (i.e. 0xffffffffffffffff).
//
// libstdc++ and libc++ both optimise the storage required for#
// std::variant's index value by using fewer than 8-bytes when possible.
// e.g. for a std::variant<A, B>, only three index values are required:
// one each for A and B and one for variant_npos. variant_npos may be
// represented internally by 0xff and only converted back to
// 0xffffffffffffffff when index() is called.
//
// However, this conversion may be optimised away in the target process,
// so we need to treat any invalid index as variant_npos.
if (auto index = next(); index < elementTypes.size()) {
// Recurse only into the type of the template parameter which
// is currently stored in this variant
node.children = {nextNodeID, nextNodeID + 1};
nextNodeID += 1;
auto childID = node.children->first;
auto elementType = elementTypes[index];
auto child = process(
childID++, {.type = elementType.type,
.name = "",
.typePath = drgnTypeToName(elementType.type) + "[]"});
setSize(node, child.dynamicSize, child.dynamicSize + child.staticSize);
2022-12-19 14:37:51 +00:00
}
return;
}
case PAIR_TYPE:
contentsStoredInline = true;
containerStats.length = containerStats.capacity = 1;
break;
case SEQ_TYPE:
case MICROLIST_TYPE:
case FEED_QUICK_HASH_SET:
case FEED_QUICK_HASH_MAP:
case FB_HASH_MAP_TYPE:
case FB_HASH_SET_TYPE:
case MAP_SEQ_TYPE:
case FOLLY_SMALL_HEAP_VECTOR_MAP:
case REPEATED_FIELD_TYPE:
node.pointer = next();
containerStats.capacity = next();
containerStats.length = next();
break;
case LIST_TYPE:
node.pointer = next();
containerStats.length = containerStats.capacity = next();
break;
case FOLLY_IOBUFQUEUE_TYPE:
node.pointer = next();
containerStats.length = containerStats.capacity = 0;
2023-04-05 20:55:40 +01:00
if (next() == (uint64_t)TrackPointerTag::skipped) {
2022-12-19 14:37:51 +00:00
return;
}
// Fallthrough to the IOBuf data if we have a valid pointer
[[fallthrough]];
case FOLLY_IOBUF_TYPE:
containerStats.capacity = next();
containerStats.length = next();
break;
case FB_STRING_TYPE:
node.pointer = next();
containerStats.capacity = next();
containerStats.length = next();
2023-04-05 20:55:40 +01:00
// Contents are either stored inline or have been seen before in the JIT
// code. Set to true either way so as not to double count.
contentsStoredInline = next() == 0;
{
constexpr int sharedCutOff = 255;
if (containerStats.capacity < sharedCutOff) {
// No sense in recording the pointer value if the string isn't
// potentially shared.
node.pointer.reset();
}
2022-12-19 14:37:51 +00:00
}
break;
case STRING_TYPE:
containerStats.capacity = next();
containerStats.length = next();
// Account for Small String Optimization (SSO)
// LLVM libc++: sizeof(string) = 24, SSO cutoff = 22
// GNU libstdc++: sizeof(string) = 32, SSO cutoff = 15
{
const int llvmSizeOf = 24;
const int llvmSsoCutOff = 22;
[[maybe_unused]] const int gnuSizeOf = 32;
const int gnuSsoCutOff = 15;
assert(node.staticSize == llvmSizeOf || node.staticSize == gnuSizeOf);
size_t ssoCutoff =
node.staticSize == llvmSizeOf ? llvmSsoCutOff : gnuSsoCutOff;
contentsStoredInline = containerStats.capacity <= ssoCutoff;
}
break;
case CAFFE2_BLOB_TYPE:
// This is a weird one, need to ask why we just overwite size like this
setSize(node, next(), 0);
2022-12-19 14:37:51 +00:00
return;
case ARRAY_TYPE:
contentsStoredInline = true;
containerStats.length = containerStats.capacity = next();
break;
case SMALL_VEC_TYPE: {
size_t requestedMaxInline = next();
size_t maxInline =
requestedMaxInline == 0
? 0
: std::max(sizeof(uintptr_t) / containerStats.elementStaticSize,
requestedMaxInline);
2022-12-19 14:37:51 +00:00
containerStats.capacity = next();
containerStats.length = next();
contentsStoredInline = containerStats.capacity <= maxInline;
} break;
case BOOST_BIMAP_TYPE:
// TODO: Hard to know the overhead of boost bimap. It isn't documented in
// the boost docs. Need to look closer at the implementation.
containerStats.length = containerStats.capacity = next();
break;
case SET_TYPE:
case STD_MAP_TYPE:
// Account for node overhead
containerStats.elementStaticSize += next();
containerStats.length = containerStats.capacity = next();
break;
case UNORDERED_SET_TYPE:
case STD_UNORDERED_MAP_TYPE: {
// Account for node overhead
containerStats.elementStaticSize += next();
size_t bucketCount = next();
// Both libc++ and libstdc++ define buckets as an array of raw pointers
setSize(node, node.dynamicSize + bucketCount * sizeof(void*), 0);
2022-12-19 14:37:51 +00:00
containerStats.length = containerStats.capacity = next();
} break;
case F14_MAP:
case F14_SET:
// F14 maps/sets don't actually store their contents inline, but the
// intention of setting this to `true` is to skip the usual calculation
// performed to determine `node.dynamicSize`, since F14 maps very
// conveniently provide a `getAllocatedMemorySize()` method which we can
// use instead.
contentsStoredInline = true;
setSize(node, node.dynamicSize + next(), 0);
2022-12-19 14:37:51 +00:00
containerStats.capacity = next();
containerStats.length = next();
break;
case RADIX_TREE_TYPE:
case MULTI_MAP_TYPE:
case BY_MULTI_QRT_TYPE:
containerStats.length = containerStats.capacity = next();
break;
case THRIFT_ISSET_TYPE:
case DUMMY_TYPE:
2022-12-19 14:37:51 +00:00
// Dummy container
containerStats.elementStaticSize = 0;
break;
default:
throw std::runtime_error("Unknown container (type was 0x" +
std::to_string(kind) + ")");
break;
}
if (!contentsStoredInline) {
setSize(node,
node.dynamicSize +
containerStats.elementStaticSize * containerStats.capacity,
0);
2022-12-19 14:37:51 +00:00
}
// A cutoff value used to sanity-check our results. If a container
// is larger than this, chances are that we've read uninitialized data,
// or there's a bug in Codegen.
constexpr size_t CONTAINER_SIZE_THRESHOLD = 1ULL << 38;
if (containerStats.elementStaticSize * containerStats.capacity >=
CONTAINER_SIZE_THRESHOLD) {
throw std::runtime_error(
"Container size exceeds threshold, this is likely due to reading "
"uninitialized data in the target process");
}
if (std::ranges::all_of(
elementTypes.cbegin(), elementTypes.cend(),
[this](auto& type) { return isPrimitive(type.type); })) {
2022-12-19 14:37:51 +00:00
VLOG(1)
<< "Container [" << node.id
<< "] contains only primitive types, skipping processing its members";
return;
}
auto numChildren = containerStats.length * elementTypes.size();
if (numChildren == 0) {
VLOG(1) << "Container [" << node.id << "] has no children";
return;
}
node.children = {nextNodeID, nextNodeID + numChildren};
VLOG(1) << "Container [" << node.id << "]'s children cover range ["
<< node.children->first << ", " << node.children->second << ")";
nextNodeID += numChildren;
auto childID = node.children->first;
uint64_t memberSizes = 0;
2022-12-19 14:37:51 +00:00
for (size_t i = 0; i < containerStats.length; i++) {
for (auto& type : elementTypes) {
2022-12-19 14:37:51 +00:00
auto child =
process(childID++, {.type = type.type,
.name = "",
.typePath = drgnTypeToName(type.type) + "[]"});
node.dynamicSize += child.dynamicSize;
memberSizes += child.dynamicSize + child.staticSize;
2022-12-19 14:37:51 +00:00
}
}
setSize(node, node.dynamicSize, memberSizes);
2022-12-19 14:37:51 +00:00
}
template <class T>
std::string_view TreeBuilder::serialize(const T& data) {
2022-12-19 14:37:51 +00:00
buffer->clear();
msgpack::pack(*buffer, data);
// It is *very* important that we construct the `std::string_view` with an
// explicit length, since `buffer->data()` may contain null bytes.
return std::string_view(buffer->data(), buffer->size());
}
void TreeBuilder::JSON(NodeID id, std::ofstream& output) {
2022-12-19 14:37:51 +00:00
std::string data;
auto status = db->Get(rocksdb::ReadOptions(), std::to_string(id), &data);
if (!status.ok()) {
throw std::runtime_error("RocksDB error while reading node [" +
std::to_string(id) + "]: " + status.ToString());
}
Node node;
msgpack::unpack(data.data(), data.size()).get().convert(node);
// Remove all backslashes to ensure the output is valid JSON
std::replace(node.typePath.begin(), node.typePath.end(), '\\', ' ');
std::replace(node.typeName.begin(), node.typeName.end(), '\\', ' ');
output << "{";
output << "\"name\":\"" << node.name << "\",";
output << "\"typePath\":\"" << node.typePath << "\",";
output << "\"typeName\":\"" << node.typeName << "\",";
output << "\"isTypedef\":" << (node.isTypedef ? "true" : "false") << ",";
output << "\"staticSize\":" << node.staticSize << ",";
output << "\"dynamicSize\":" << node.dynamicSize << ",";
output << "\"exclusiveSize\":" << node.exclusiveSize;
2022-12-19 14:37:51 +00:00
if (node.paddingSavingsSize.has_value()) {
output << ",";
output << "\"paddingSavingsSize\":" << *node.paddingSavingsSize;
}
if (node.pointer.has_value()) {
output << ",";
output << "\"pointer\":" << *node.pointer;
}
if (node.containerStats.has_value()) {
output << ",";
output << "\"length\":" << node.containerStats->length << ",";
output << "\"capacity\":" << node.containerStats->capacity << ",";
output << "\"elementStaticSize\":"
<< node.containerStats->elementStaticSize;
}
if (node.isset.has_value()) {
output << ",";
output << "\"isset\":" << (*node.isset ? "true" : "false");
}
if (node.children.has_value()) {
output << ",";
output << "\"members\":[";
auto [childIDStart, childIDEnd] = *node.children;
assert(childIDStart < childIDEnd);
// Trailing commas are disallowed in JSON, so we pull
// out the first iteration of the loop.
JSON(childIDStart, output);
for (auto childID = childIDStart + 1; childID < childIDEnd; childID++) {
output << ",";
JSON(childID, output);
}
output << "]";
}
output << "}";
}
} // namespace oi::detail