object-introspection/oi/TreeBuilder.cpp
Alastair Robertson 2d28b20d46 TypeGraph: Fix multi dimensional arrays
Multi dimensional arrays are not flattened into 1-D arrays when using
TypeGraph. Update TreeBuilder to account for this.

By not flattening arrays, we are able to produce more descriptive
results.

The disadvantage is that we must now recurse inside arrays
containing only primitives. A better solution to requiring flattening
would be the planned work to not recurse into any static types (not just
primitives). This would also apply to multi-dimensional arrays of
primtivies.
2023-05-31 16:53:38 +01:00

963 lines
33 KiB
C++

/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "oi/TreeBuilder.h"
#include <glog/logging.h>
#include <boost/algorithm/string/regex.hpp>
#include <boost/scope_exit.hpp>
#include <fstream>
#include <iostream>
#include <limits>
#include <msgpack.hpp>
#include <stdexcept>
#include "oi/ContainerInfo.h"
#include "oi/DrgnUtils.h"
#include "oi/Metrics.h"
#include "oi/OICodeGen.h"
#include "oi/PaddingHunter.h"
#include "rocksdb/db.h"
#include "rocksdb/options.h"
#include "rocksdb/statistics.h"
extern "C" {
#include <drgn.h>
#include <sys/types.h>
}
/* Tag indicating if the pointer has been followed or skipped */
enum class TrackPointerTag : uint64_t {
/* The content has been skipped.
* It prevents double counting the footprint of a node the JIT code has seen
* before, double counting content being stored inline, or getting stuck in an
* infinite loop when processing circular linked list.
*/
skipped = 0,
followed = 1,
};
TreeBuilder::TreeBuilder(Config c) : config{std::move(c)} {
buffer = std::make_unique<msgpack::sbuffer>();
auto testdbPath = "/tmp/testdb_" + std::to_string(getpid());
if (auto status = rocksdb::DestroyDB(testdbPath, {}); !status.ok()) {
LOG(FATAL) << "RocksDB error while destroying database: "
<< status.ToString();
}
const int twoMinutes = 120;
rocksdb::Options options;
options.compression = rocksdb::kZSTD;
options.create_if_missing = true;
options.statistics = rocksdb::CreateDBStatistics();
options.stats_dump_period_sec = twoMinutes;
options.PrepareForBulkLoad();
options.OptimizeForSmallDb();
if (auto status = rocksdb::DB::Open(options, testdbPath, &db); !status.ok()) {
LOG(FATAL) << "RocksDB error while opening database: " << status.ToString();
}
}
struct TreeBuilder::Variable {
struct drgn_type* type;
std::string_view name;
std::string typePath;
std::optional<bool> isset = std::nullopt;
bool isStubbed = false;
};
struct TreeBuilder::DBHeader {
/**
* Version of the database schema. See TreeBuilder.h for more info.
*/
Version version;
/**
* List of IDs corresponding to the root of the probed objects.
*/
std::vector<NodeID> rootIDs;
MSGPACK_DEFINE_ARRAY(version, rootIDs)
};
struct TreeBuilder::Node {
struct ContainerStats {
/**
* The number of elements currently present in the container
* (e.g. `std::vector::size()`).
*/
size_t length;
/**
* The maximum number of elements the container can
* currently hold (e.g. `std::vector::capacity()`).
*/
size_t capacity;
/**
* The static size (see comment for `staticSize` below for clarification on
* what this means) of each element in a container. For example, if this
* node corresponds to a `std::vector<int>` then `elementStaticSize`
* would be `sizeof(int)`.
*/
size_t elementStaticSize;
MSGPACK_DEFINE_ARRAY(length, capacity, elementStaticSize)
};
/**
* The unique identifier for this node, used as the key for this
* node's entry in RocksDB.
*/
NodeID id;
/**
* Roughly corresponds to the name you would use to refer to this node in
* the code (e.g. variable name, member name, etc.). In some cases there is
* no meaningful name (e.g. the elements of a vector, the node behind a
* `typedef`) and this is left empty.
*/
std::string_view name{};
/**
* The type of this node, as it would be written in the code
* (e.g. `std::vector<int>`, `float`, `MyStruct`).
*/
std::string typeName{};
std::string typePath{};
bool isTypedef{};
/**
* The compile-time-determinable size (i.e. memory footprint measured in
* bytes) of this node, essentially corresponding to `sizeof(TYPE)`.
* Just like the semantics of `sizeof`, this is inherently inclusive of the
* type's members (if it is a `struct`, `class`, or `union`).
*/
size_t staticSize{};
/**
* The size (i.e. memory usage measured in bytes) of the dynamically
* allocated data used by this node (e.g. the heap-allocated memory
* associated with a `std::vector`). This includes the `dynamicSize` of all
* children (whether they be `struct`/`class` members, or the elements of a
* container).
*/
size_t dynamicSize{};
std::optional<size_t> paddingSavingsSize{std::nullopt};
std::optional<uintptr_t> pointer{std::nullopt};
std::optional<ContainerStats> containerStats{std::nullopt};
/**
* Range of this node's children (start is inclusive, end is exclusive)
*
* If this node represents a container, `children` contains all
* of the container's elements.
* If this node represents a `struct` or `class`, `children`
* contains all of its members.
* If this node is a `typedef` or a pointer, `children` should contain a
* single entry corresponding to the referenced type.
*/
std::optional<std::pair<NodeID, NodeID>> children{std::nullopt};
std::optional<bool> isset{std::nullopt};
/**
* An estimation of the "exclusive size" of a type, trying to
* attribute every byte once and only once to types in the tree.
*/
size_t exclusiveSize{};
MSGPACK_DEFINE_ARRAY(id,
name,
typeName,
typePath,
isTypedef,
staticSize,
dynamicSize,
paddingSavingsSize,
containerStats,
pointer,
children,
isset,
exclusiveSize)
};
TreeBuilder::~TreeBuilder() {
/* FB: Remove error IDs, Strobelight doesn't handle them yet */
std::erase(rootIDs, ERROR_NODE_ID);
/*
* Now that all the Nodes have been inserted in the DB,
* we can insert the DBHeader with the proper list of rootIDs.
*/
const DBHeader header{.version = VERSION, .rootIDs = std::move(rootIDs)};
auto serializedHeader = serialize(header);
rocksdb::WriteOptions options{};
options.disableWAL = true;
if (auto status =
db->Put(options, std::to_string(ROOT_NODE_ID), serializedHeader);
!status.ok()) {
LOG(ERROR) << "RocksDB error while writing DBHeader: " << status.ToString();
}
if (auto status = db->Close(); !status.ok()) {
LOG(ERROR) << "RocksDB error while closing database: " << status.ToString();
}
delete db;
}
bool TreeBuilder::emptyOutput() const {
return std::ranges::all_of(rootIDs,
[](auto& id) { return id == ERROR_NODE_ID; });
}
void TreeBuilder::build(const std::vector<uint64_t>& data,
const std::string& argName,
struct drgn_type* type,
const TypeHierarchy& typeHierarchy) {
th = &typeHierarchy;
oidData = &data;
oidDataIndex = 3; // HACK: OID's first 3 outputs are dummy 0s
ObjectIntrospection::Metrics::Tracing _("build_tree");
VLOG(1) << "Building tree...";
{
auto& rootID = rootIDs.emplace_back(nextNodeID++);
try {
process(rootID, {.type = type, .name = argName, .typePath = argName});
} catch (...) {
// Mark the failure using the error node ID
rootID = ERROR_NODE_ID;
throw;
}
}
VLOG(1) << "Finished building tree";
rocksdb::CompactRangeOptions opts;
rocksdb::Status s = db->CompactRange(opts, nullptr, nullptr);
if (!s.ok()) {
LOG(FATAL) << "RocksDB error while compacting: " << s.ToString();
}
VLOG(1) << "Finished compacting db";
// Were all object sizes consumed?
if (oidDataIndex != oidData->size()) {
LOG(WARNING) << "WARNING: some object sizes not consumed;"
<< "object tree may be inaccurate. "
<< "reported: " << oidData->size() << " consumed "
<< oidDataIndex;
} else {
VLOG(1) << "Consumed all object sizes: " << oidDataIndex;
}
th = nullptr;
oidData = nullptr;
}
void TreeBuilder::dumpJson() {
if (!config.jsonPath.has_value()) {
LOG(ERROR) << "No output path was provided for JSON";
return;
}
std::ofstream output(*config.jsonPath);
output << '[';
for (auto rootID : rootIDs) {
if (rootID == ERROR_NODE_ID) {
// On error, output an empty object to maintain offsets
output << "{},";
} else {
JSON(rootID, output);
output << ',';
}
}
/* Remove the trailing comma */
if (!rootIDs.empty()) {
output.seekp(-1, std::ios_base::cur);
}
output << "]\n"; // Text files should end with a newline per POSIX
VLOG(1) << "Finished writing JSON to disk";
}
void TreeBuilder::setPaddedStructs(
std::map<std::string, PaddingInfo>* _paddedStructs) {
this->paddedStructs = _paddedStructs;
}
static std::string drgnTypeToName(struct drgn_type* type) {
if (type->_private.program != nullptr) {
return drgn_utils::typeToName(type);
}
return type->_private.oi_name ? type->_private.oi_name : "";
}
static struct drgn_error* drgnTypeSizeof(struct drgn_type* type,
uint64_t* ret) {
static struct drgn_error incompleteTypeError = {
.code = DRGN_ERROR_TYPE,
.needs_destroy = false,
.errnum = 0,
.path = NULL,
.address = 0,
.message = (char*)"cannot get size of incomplete type",
};
if (drgn_type_kind(type) == DRGN_TYPE_FUNCTION) {
*ret = sizeof(uintptr_t);
return nullptr;
}
if (type->_private.program != nullptr) {
return drgn_type_sizeof(type, ret);
}
// If type has no size, report an error to trigger a sizeMap lookup
if (type->_private.oi_size ==
std::numeric_limits<decltype(type->_private.size)>::max()) {
return &incompleteTypeError;
}
*ret = type->_private.oi_size;
return nullptr;
}
uint64_t TreeBuilder::getDrgnTypeSize(struct drgn_type* type) {
uint64_t size = 0;
struct drgn_error* err = drgnTypeSizeof(type, &size);
BOOST_SCOPE_EXIT(err) {
drgn_error_destroy(err);
}
BOOST_SCOPE_EXIT_END
if (err == nullptr) {
return size;
}
std::string typeName = drgnTypeToName(type);
for (auto& [typeName2, size2] : th->sizeMap)
if (typeName.starts_with(typeName2))
return size2;
if (typeName.starts_with("basic_string<char, std::char_traits<char>, "
"std::allocator<char> >")) {
return sizeof(std::string);
}
throw std::runtime_error("Failed to get size: " + std::to_string(err->code) +
" " + err->message);
}
uint64_t TreeBuilder::next() {
if (oidDataIndex >= oidData->size()) {
throw std::runtime_error("Unexpected end of data");
}
VLOG(3) << "next = " << (void*)(*oidData)[oidDataIndex];
return (*oidData)[oidDataIndex++];
}
bool TreeBuilder::isContainer(const Variable& variable) {
return th->containerTypeMap.contains(variable.type) ||
(drgn_type_kind(variable.type) == DRGN_TYPE_ARRAY &&
drgn_type_length(variable.type) > 0);
}
bool TreeBuilder::isPrimitive(struct drgn_type* type) {
while (drgn_type_kind(type) == DRGN_TYPE_TYPEDEF) {
auto entry = th->typedefMap.find(type);
if (entry == th->typedefMap.end())
return false;
type = entry->second;
}
return drgn_type_primitive(type) != DRGN_NOT_PRIMITIVE_TYPE;
}
static std::string_view drgnKindStr(struct drgn_type* type) {
auto kind = OICodeGen::drgnKindStr(type);
// -1 is for the null terminator
kind.remove_prefix(sizeof("DRGN_TYPE_") - 1);
return kind;
}
void TreeBuilder::setSize(TreeBuilder::Node& node,
uint64_t dynamicSize,
uint64_t memberSizes) {
node.dynamicSize = dynamicSize;
if (memberSizes > node.staticSize + node.dynamicSize) {
// TODO handle this edge case in a more elegant way.
// This can occur when handling bitfields or vector<bool>
// (as we cannot accurately track sub-byte sizes currently)
node.exclusiveSize = 0;
} else {
node.exclusiveSize = node.staticSize + node.dynamicSize - memberSizes;
}
}
TreeBuilder::Node TreeBuilder::process(NodeID id, Variable variable) {
Node node{
.id = id,
.name = variable.name,
.typeName = drgnTypeToName(variable.type),
.typePath = std::move(variable.typePath),
.staticSize = getDrgnTypeSize(variable.type),
.isset = variable.isset,
};
VLOG(2) << "Processing node [" << id << "] (name: '" << variable.name
<< "', typeName: '" << node.typeName
<< "', kind: " << drgnKindStr(variable.type) << ")"
<< (variable.isStubbed ? " STUBBED" : "")
<< (th->knownDummyTypeList.contains(variable.type) ? " DUMMY" : "");
// Default dynamic size to 0 and calculate fallback exclusive size
setSize(node, 0, 0);
if (!variable.isStubbed) {
switch (drgn_type_kind(variable.type)) {
case DRGN_TYPE_POINTER:
if (config.features[Feature::ChaseRawPointers]) {
// Pointers to incomplete types are stubbed out
// See OICodeGen::enumeratePointerType
if (th->knownDummyTypeList.contains(variable.type)) {
break;
}
auto entry = th->pointerToTypeMap.find(variable.type);
if (entry != th->pointerToTypeMap.end()) {
auto innerTypeKind = drgn_type_kind(entry->second);
if (innerTypeKind != DRGN_TYPE_FUNCTION) {
node.pointer = next();
if (innerTypeKind == DRGN_TYPE_VOID) {
break;
}
if (next() == (uint64_t)TrackPointerTag::skipped) {
break;
}
}
auto childID = nextNodeID++;
auto child = process(childID, Variable{entry->second, "", ""});
node.children = {childID, childID + 1};
setSize(node, child.staticSize + child.dynamicSize,
child.staticSize + child.dynamicSize);
}
}
break;
case DRGN_TYPE_TYPEDEF: {
const static boost::regex standardIntegerRegex{
"((s?size)|(u?int(_fast|_least)?(8|16|32|64|128|ptr))|(ptrdiff))_"
"t"};
// We don't expand typedefs for well-known integer types from `stdint.h`
// to prevent our output from being extremely verbose. We treat them as
// if they are primitives directly (hence this check coming *before* we
// set `node.isTypedef`).
if (boost::regex_match(node.typeName, standardIntegerRegex)) {
break;
}
node.isTypedef = true;
auto entry = th->typedefMap.find(variable.type);
if (entry != th->typedefMap.end()) {
auto childID = nextNodeID++;
auto child = process(childID, Variable{entry->second, "", ""});
node.children = {childID, childID + 1};
setSize(node, child.dynamicSize,
child.dynamicSize + child.staticSize);
}
} break;
case DRGN_TYPE_CLASS:
case DRGN_TYPE_STRUCT:
case DRGN_TYPE_ARRAY:
if (th->knownDummyTypeList.contains(variable.type)) {
break;
} else if (isContainer(variable)) {
processContainer(variable, node);
} else {
drgn_type* objectType = variable.type;
if (auto it = th->descendantClasses.find(objectType);
it != th->descendantClasses.end()) {
// The first item of data in dynamic classes identifies which
// concrete type we should process it as, represented as an index
// into the vector of child classes, or -1 to processes this type
// as itself.
const auto& descendants = it->second;
auto val = next();
if (val != (uint64_t)-1) {
objectType = descendants[val];
node.typeName = drgnTypeToName(objectType);
node.staticSize = getDrgnTypeSize(objectType);
}
}
auto entry = th->classMembersMap.find(objectType);
if (entry == th->classMembersMap.end() || entry->second.empty()) {
break;
}
const auto& members = entry->second;
node.children = {nextNodeID, nextNodeID + members.size()};
nextNodeID += members.size();
auto childID = node.children->first;
bool captureThriftIsset =
th->thriftIssetStructTypes.contains(objectType);
uint64_t memberSizes = 0;
for (std::size_t i = 0; i < members.size(); i++) {
std::optional<bool> isset;
if (captureThriftIsset && i < members.size() - 1) {
// Retrieve isset value for each member variable, except Thrift's
// __isset field, which we assume comes last.
// A value of -1 indicates a non-optional field for which we
// don't record an isset value.
auto val = next();
if (val != (uint64_t)-1) {
isset = val;
}
}
const auto& member = members[i];
auto child =
process(childID++,
Variable{member.type, member.member_name,
member.member_name, isset, member.isStubbed});
node.dynamicSize += child.dynamicSize;
memberSizes += child.dynamicSize + child.staticSize;
}
setSize(node, node.dynamicSize, memberSizes);
}
break;
default:
// The remaining types are all described entirely by their static size,
// and hence need no special handling.
break;
}
if (config.features[Feature::GenPaddingStats]) {
auto entry = paddedStructs->find(node.typeName);
if (entry != paddedStructs->end()) {
entry->second.instancesCnt++;
node.paddingSavingsSize = entry->second.savingSize;
}
}
}
rocksdb::WriteOptions options{};
options.disableWAL = true;
auto status = db->Put(options, std::to_string(node.id), serialize(node));
if (!status.ok()) {
throw std::runtime_error("RocksDB error while inserting node [" +
std::to_string(node.id) +
"]: " + status.ToString());
}
return node;
}
void TreeBuilder::processContainer(const Variable& variable, Node& node) {
VLOG(1) << "Processing container [" << node.id << "] of type '"
<< node.typeName << "'";
ContainerTypeEnum kind = UNKNOWN_TYPE;
std::vector<struct drgn_qualified_type> elementTypes;
if (drgn_type_kind(variable.type) == DRGN_TYPE_ARRAY) {
kind = ARRAY_TYPE;
struct drgn_type* arrayElementType = nullptr;
size_t numElems = 0;
if (config.features[Feature::TypeGraph]) {
arrayElementType = drgn_type_type(variable.type).type;
numElems = drgn_type_length(variable.type);
} else {
drgn_utils::getDrgnArrayElementType(variable.type, &arrayElementType,
numElems);
}
assert(numElems > 0);
elementTypes.push_back(
drgn_qualified_type{arrayElementType, (enum drgn_qualifiers)(0)});
} else {
auto entry = th->containerTypeMap.find(variable.type);
if (entry == th->containerTypeMap.end()) {
throw std::runtime_error(
"Could not find container information for type with name '" +
node.typeName + "'");
}
auto& [containerKind, templateTypes] = entry->second;
kind = containerKind;
for (const auto& tt : templateTypes) {
elementTypes.push_back(tt);
}
}
/**
* Some containers (conditionally) store their contents *directly* inside
* themselves (as opposed to having a pointer to heap-allocated memory).
* `std::pair` and `std::array` are two trivial examples, but some types vary
* whether their contents are stored inline or externally depending on
* runtime conditions (usually the number of elements currently present in
* the container).
*/
bool contentsStoredInline = false;
// Initialize, then take a reference to the underlying value for convenience
// so that we don't have to dereference the optional every time we want to use
// it.
auto& containerStats =
node.containerStats.emplace(Node::ContainerStats{0, 0, 0});
for (auto& type : elementTypes) {
containerStats.elementStaticSize += getDrgnTypeSize(type.type);
}
switch (kind) {
case OPTIONAL_TYPE:
contentsStoredInline = true;
containerStats.length = containerStats.capacity = 1;
if (next() == 0U) {
containerStats.length = 0;
return;
}
break;
case FOLLY_OPTIONAL_TYPE:
// TODO: Not sure why we are capturing pointer for folly::Optional but
// not std::optional. Both are supposed to store data inline.
contentsStoredInline = true;
node.pointer = next();
containerStats.length = containerStats.capacity = 1;
if (*node.pointer == 0) {
containerStats.length = 0;
return;
}
break;
case WEAK_PTR_TYPE:
// Do not handle weak pointers beyond their static size for now.
break;
case SHRD_PTR_TYPE:
case UNIQ_PTR_TYPE:
node.pointer = next();
containerStats.length = *node.pointer ? 1 : 0;
containerStats.capacity = 1;
if (next() == (uint64_t)TrackPointerTag::skipped) {
return;
}
break;
case TRY_TYPE:
case REF_WRAPPER_TYPE:
node.pointer = next();
containerStats.length = containerStats.capacity = 1;
if (next() == (uint64_t)TrackPointerTag::skipped) {
return;
}
break;
case SORTED_VEC_SET_TYPE:
case CONTAINER_ADAPTER_TYPE: {
node.pointer = next();
// Copy the underlying container's sizes and stats directly into this
// container adapter
node.children = {nextNodeID, nextNodeID + 1};
nextNodeID += 1;
auto childID = node.children->first;
// elementTypes is only populated with the underlying container type for
// container adapters
auto containerType = elementTypes[0];
auto child = process(
childID++, {.type = containerType.type,
.name = "",
.typePath = drgnTypeToName(containerType.type) + "[]"});
setSize(node, child.dynamicSize, child.dynamicSize + child.staticSize);
node.containerStats = child.containerStats;
return;
}
case STD_VARIANT_TYPE: {
containerStats.length = containerStats.capacity = 1;
containerStats.elementStaticSize = 0;
for (auto& type : elementTypes) {
auto paramSize = getDrgnTypeSize(type.type);
containerStats.elementStaticSize =
std::max(containerStats.elementStaticSize, paramSize);
}
node.dynamicSize = 0;
// When a std::variant is valueless_by_exception, its index will be
// std::variant_npos (i.e. 0xffffffffffffffff).
//
// libstdc++ and libc++ both optimise the storage required for#
// std::variant's index value by using fewer than 8-bytes when possible.
// e.g. for a std::variant<A, B>, only three index values are required:
// one each for A and B and one for variant_npos. variant_npos may be
// represented internally by 0xff and only converted back to
// 0xffffffffffffffff when index() is called.
//
// However, this conversion may be optimised away in the target process,
// so we need to treat any invalid index as variant_npos.
if (auto index = next(); index < elementTypes.size()) {
// Recurse only into the type of the template parameter which
// is currently stored in this variant
node.children = {nextNodeID, nextNodeID + 1};
nextNodeID += 1;
auto childID = node.children->first;
auto elementType = elementTypes[index];
auto child = process(
childID++, {.type = elementType.type,
.name = "",
.typePath = drgnTypeToName(elementType.type) + "[]"});
setSize(node, child.dynamicSize, child.dynamicSize + child.staticSize);
}
return;
}
case PAIR_TYPE:
contentsStoredInline = true;
containerStats.length = containerStats.capacity = 1;
break;
case SEQ_TYPE:
case MICROLIST_TYPE:
case FEED_QUICK_HASH_SET:
case FEED_QUICK_HASH_MAP:
case FB_HASH_MAP_TYPE:
case FB_HASH_SET_TYPE:
case MAP_SEQ_TYPE:
case FOLLY_SMALL_HEAP_VECTOR_MAP:
case REPEATED_FIELD_TYPE:
node.pointer = next();
containerStats.capacity = next();
containerStats.length = next();
break;
case LIST_TYPE:
node.pointer = next();
containerStats.length = containerStats.capacity = next();
break;
case FOLLY_IOBUFQUEUE_TYPE:
node.pointer = next();
containerStats.length = containerStats.capacity = 0;
if (next() == (uint64_t)TrackPointerTag::skipped) {
return;
}
// Fallthrough to the IOBuf data if we have a valid pointer
[[fallthrough]];
case FOLLY_IOBUF_TYPE:
containerStats.capacity = next();
containerStats.length = next();
break;
case FB_STRING_TYPE:
node.pointer = next();
containerStats.capacity = next();
containerStats.length = next();
// Contents are either stored inline or have been seen before in the JIT
// code. Set to true either way so as not to double count.
contentsStoredInline = next() == 0;
{
constexpr int sharedCutOff = 255;
if (containerStats.capacity < sharedCutOff) {
// No sense in recording the pointer value if the string isn't
// potentially shared.
node.pointer.reset();
}
}
break;
case STRING_TYPE:
containerStats.capacity = next();
containerStats.length = next();
// Account for Small String Optimization (SSO)
// LLVM libc++: sizeof(string) = 24, SSO cutoff = 22
// GNU libstdc++: sizeof(string) = 32, SSO cutoff = 15
{
const int llvmSizeOf = 24;
const int llvmSsoCutOff = 22;
[[maybe_unused]] const int gnuSizeOf = 32;
const int gnuSsoCutOff = 15;
assert(node.staticSize == llvmSizeOf || node.staticSize == gnuSizeOf);
size_t ssoCutoff =
node.staticSize == llvmSizeOf ? llvmSsoCutOff : gnuSsoCutOff;
contentsStoredInline = containerStats.capacity <= ssoCutoff;
}
break;
case CAFFE2_BLOB_TYPE:
// This is a weird one, need to ask why we just overwite size like this
setSize(node, next(), 0);
return;
case ARRAY_TYPE:
contentsStoredInline = true;
containerStats.length = containerStats.capacity = next();
break;
case SMALL_VEC_TYPE: {
size_t maxInline = next();
containerStats.capacity = next();
containerStats.length = next();
contentsStoredInline = containerStats.capacity <= maxInline;
} break;
case BOOST_BIMAP_TYPE:
// TODO: Hard to know the overhead of boost bimap. It isn't documented in
// the boost docs. Need to look closer at the implementation.
containerStats.length = containerStats.capacity = next();
break;
case SET_TYPE:
case STD_MAP_TYPE:
// Account for node overhead
containerStats.elementStaticSize += next();
containerStats.length = containerStats.capacity = next();
break;
case UNORDERED_SET_TYPE:
case STD_UNORDERED_MAP_TYPE: {
// Account for node overhead
containerStats.elementStaticSize += next();
size_t bucketCount = next();
// Both libc++ and libstdc++ define buckets as an array of raw pointers
setSize(node, node.dynamicSize + bucketCount * sizeof(void*), 0);
containerStats.length = containerStats.capacity = next();
} break;
case F14_MAP:
case F14_SET:
// F14 maps/sets don't actually store their contents inline, but the
// intention of setting this to `true` is to skip the usual calculation
// performed to determine `node.dynamicSize`, since F14 maps very
// conveniently provide a `getAllocatedMemorySize()` method which we can
// use instead.
contentsStoredInline = true;
setSize(node, node.dynamicSize + next(), 0);
containerStats.capacity = next();
containerStats.length = next();
break;
case RADIX_TREE_TYPE:
case MULTI_MAP_TYPE:
case BY_MULTI_QRT_TYPE:
containerStats.length = containerStats.capacity = next();
break;
case DUMMY_TYPE:
// Dummy container
containerStats.elementStaticSize = 0;
break;
default:
throw std::runtime_error("Unknown container (type was 0x" +
std::to_string(kind) + ")");
break;
}
if (!contentsStoredInline) {
setSize(node,
node.dynamicSize +
containerStats.elementStaticSize * containerStats.capacity,
0);
}
// A cutoff value used to sanity-check our results. If a container
// is larger than this, chances are that we've read uninitialized data,
// or there's a bug in Codegen.
constexpr size_t CONTAINER_SIZE_THRESHOLD = 1ULL << 38;
if (containerStats.elementStaticSize * containerStats.capacity >=
CONTAINER_SIZE_THRESHOLD) {
throw std::runtime_error(
"Container size exceeds threshold, this is likely due to reading "
"uninitialized data in the target process");
}
if (std::ranges::all_of(
elementTypes.cbegin(), elementTypes.cend(),
[this](auto& type) { return isPrimitive(type.type); })) {
VLOG(1)
<< "Container [" << node.id
<< "] contains only primitive types, skipping processing its members";
return;
}
auto numChildren = containerStats.length * elementTypes.size();
if (numChildren == 0) {
VLOG(1) << "Container [" << node.id << "] has no children";
return;
}
node.children = {nextNodeID, nextNodeID + numChildren};
VLOG(1) << "Container [" << node.id << "]'s children cover range ["
<< node.children->first << ", " << node.children->second << ")";
nextNodeID += numChildren;
auto childID = node.children->first;
uint64_t memberSizes = 0;
for (size_t i = 0; i < containerStats.length; i++) {
for (auto& type : elementTypes) {
auto child =
process(childID++, {.type = type.type,
.name = "",
.typePath = drgnTypeToName(type.type) + "[]"});
node.dynamicSize += child.dynamicSize;
memberSizes += child.dynamicSize + child.staticSize;
}
}
setSize(node, node.dynamicSize, memberSizes);
}
template <class T>
std::string_view TreeBuilder::serialize(const T& data) {
buffer->clear();
msgpack::pack(*buffer, data);
// It is *very* important that we construct the `std::string_view` with an
// explicit length, since `buffer->data()` may contain null bytes.
return std::string_view(buffer->data(), buffer->size());
}
void TreeBuilder::JSON(NodeID id, std::ofstream& output) {
std::string data;
auto status = db->Get(rocksdb::ReadOptions(), std::to_string(id), &data);
if (!status.ok()) {
throw std::runtime_error("RocksDB error while reading node [" +
std::to_string(id) + "]: " + status.ToString());
}
Node node;
msgpack::unpack(data.data(), data.size()).get().convert(node);
// Remove all backslashes to ensure the output is valid JSON
std::replace(node.typePath.begin(), node.typePath.end(), '\\', ' ');
std::replace(node.typeName.begin(), node.typeName.end(), '\\', ' ');
output << "{";
output << "\"name\":\"" << node.name << "\",";
output << "\"typePath\":\"" << node.typePath << "\",";
output << "\"typeName\":\"" << node.typeName << "\",";
output << "\"isTypedef\":" << (node.isTypedef ? "true" : "false") << ",";
output << "\"staticSize\":" << node.staticSize << ",";
output << "\"dynamicSize\":" << node.dynamicSize << ",";
output << "\"exclusiveSize\":" << node.exclusiveSize;
if (node.paddingSavingsSize.has_value()) {
output << ",";
output << "\"paddingSavingsSize\":" << *node.paddingSavingsSize;
}
if (node.pointer.has_value()) {
output << ",";
output << "\"pointer\":" << *node.pointer;
}
if (node.containerStats.has_value()) {
output << ",";
output << "\"length\":" << node.containerStats->length << ",";
output << "\"capacity\":" << node.containerStats->capacity << ",";
output << "\"elementStaticSize\":"
<< node.containerStats->elementStaticSize;
}
if (node.isset.has_value()) {
output << ",";
output << "\"isset\":" << (*node.isset ? "true" : "false");
}
if (node.children.has_value()) {
output << ",";
output << "\"members\":[";
auto [childIDStart, childIDEnd] = *node.children;
assert(childIDStart < childIDEnd);
// Trailing commas are disallowed in JSON, so we pull
// out the first iteration of the loop.
JSON(childIDStart, output);
for (auto childID = childIDStart + 1; childID < childIDEnd; childID++) {
output << ",";
JSON(childID, output);
}
output << "]";
}
output << "}";
}