codegenv2: improve multi argument generation (#395)

Summary:
Adds a new function `CodeGen::codegenFromDrgns` which runs multiple drgn root types through one `DrgnParser`. Stores the naming for the output within this function as we previously decided that when doing the actual codegen, which is too late with multiple root types.

This new function is used in `OIGenerator` when you have multiple OIL calls within one input object. Rather than producing separate `.o`s with likely duplicate code for each callsite, we produce a single `.o` that contains both calls. For calls with types with significant overlap this should be a significant reduction in work. The downside is the calls can't have different features, but this could be solved in the future by namespacing the code based on features/config within the generated `.cpp` - the pros seem to outweigh the con.

This should also be used with `oid` in multi probe mode as it would again significantly reduce the work it has to do. I didn't do this yet as it requires changing the cache. As I've got a big refactor cache ongoing at the minute it makes sense to wait until after that's landed to make this change.


Test Plan: Generally tested with GitHub CI. Tested the new multi call OILGen with the new example seen below. Outputs the following code with two root calls: P869569859 - note that `VectorOfStrings_0` is the only instance, whereas previously we'd have generated it in two files.

Differential Revision: D50835153

Pulled By: JakeHillion
This commit is contained in:
Jake Hillion 2023-10-31 10:22:56 -07:00 committed by Facebook GitHub Bot
parent 6e1635ce1e
commit a49f0e7410
7 changed files with 215 additions and 157 deletions

View File

@ -42,6 +42,9 @@
#include "type_graph/TypeIdentifier.h"
#include "type_graph/Types.h"
template <typename T>
inline constexpr bool always_false_v = false;
namespace oi::detail {
using type_graph::AddChildren;
@ -74,6 +77,13 @@ using ref = std::reference_wrapper<T>;
namespace {
std::string rootTypedefName(Type& t) {
std::string out{"RootType"};
out += std::to_string(std::hash<std::string>{}(t.name()));
out += '_';
return out;
}
std::vector<std::string_view> enumerateTypeNames(Type& type) {
std::vector<std::string_view> names;
Type* t = &type;
@ -1106,14 +1116,7 @@ void CodeGen::addTypeHandlers(const TypeGraph& typeGraph, std::string& code) {
}
}
bool CodeGen::codegenFromDrgn(struct drgn_type* drgnType,
std::string linkageName,
std::string& code) {
linkageName_ = std::move(linkageName);
return codegenFromDrgn(drgnType, code);
}
bool CodeGen::codegenFromDrgn(struct drgn_type* drgnType, std::string& code) {
bool CodeGen::codegenFromDrgns(std::span<DrgnRequest> reqs, std::string& code) {
try {
containerInfos_.reserve(config_.containerConfigPaths.size());
for (const auto& path : config_.containerConfigPaths) {
@ -1125,18 +1128,40 @@ bool CodeGen::codegenFromDrgn(struct drgn_type* drgnType, std::string& code) {
}
TypeGraph typeGraph;
DrgnParserOptions options{
.chaseRawPointers = config_.features[Feature::ChaseRawPointers],
};
DrgnParser drgnParser{typeGraph, containerInfos_, options};
for (auto& req : reqs) {
try {
addDrgnRoot(drgnType, typeGraph);
Type& parsedRoot = drgnParser.parse(req.ty);
typeGraph.addRoot(parsedRoot);
if (req.linkageName.has_value()) {
rootNames_.emplace_back(ExactName{.name = std::move(*req.linkageName)});
} else {
rootNames_.emplace_back(
HashedComponent{.name = SymbolService::getTypeName(req.ty)});
}
} catch (const type_graph::DrgnParserError& err) {
LOG(ERROR) << "Error parsing DWARF: " << err.what();
return false;
}
}
transform(typeGraph);
generate(typeGraph, code, drgnType);
generate(typeGraph, code);
return true;
}
bool CodeGen::codegenFromDrgn(struct drgn_type* drgnType, std::string& code) {
std::array<DrgnRequest, 1> reqs{DrgnRequest{
.ty = drgnType,
.linkageName = std::nullopt,
}};
return codegenFromDrgns(reqs, code);
}
void CodeGen::registerContainer(const fs::path& path) {
auto info = std::make_unique<ContainerInfo>(path);
if (info->requiredFeatures != (config_.features & info->requiredFeatures)) {
@ -1147,15 +1172,6 @@ void CodeGen::registerContainer(const fs::path& path) {
containerInfos_.emplace_back(std::move(info));
}
void CodeGen::addDrgnRoot(struct drgn_type* drgnType, TypeGraph& typeGraph) {
DrgnParserOptions options{
.chaseRawPointers = config_.features[Feature::ChaseRawPointers],
};
DrgnParser drgnParser{typeGraph, containerInfos_, options};
Type& parsedRoot = drgnParser.parse(drgnType);
typeGraph.addRoot(parsedRoot);
}
void CodeGen::transform(TypeGraph& typeGraph) {
type_graph::PassManager pm;
@ -1207,11 +1223,7 @@ void CodeGen::transform(TypeGraph& typeGraph) {
};
}
void CodeGen::generate(
TypeGraph& typeGraph,
std::string& code,
struct drgn_type* drgnType /* TODO: this argument should not be required */
) {
void CodeGen::generate(TypeGraph& typeGraph, std::string& code) {
code = headers::oi_OITraceCode_cpp;
if (!config_.features[Feature::Library]) {
FuncGen::DeclareExterns(code);
@ -1280,30 +1292,66 @@ void CodeGen::generate(
addGetSizeFuncDefs(typeGraph, code);
}
assert(typeGraph.rootTypes().size() == 1);
Type& rootType = typeGraph.rootTypes()[0];
code += "\nusing __ROOT_TYPE__ = " + rootType.name() + ";\n";
// Give each root type a unique typedef in the OIInternal namespace so they
// don't have naming issues from outside.
for (Type& rootType : typeGraph.rootTypes()) {
code += "using ";
code += rootTypedefName(rootType);
code += " = ";
code += rootType.name();
code += ";\n";
}
code += "} // namespace\n} // namespace OIInternal\n";
const auto typeName = SymbolService::getTypeName(drgnType);
if (config_.features[Feature::Library]) {
FuncGen::DefineTopLevelIntrospect(code, typeName);
} else if (config_.features[Feature::TypedDataSegment]) {
FuncGen::DefineTopLevelGetSizeRefTyped(code, typeName, config_.features);
} else {
FuncGen::DefineTopLevelGetSizeRef(code, typeName, config_.features);
}
assert(typeGraph.rootTypes().size() == rootNames_.size());
// should be std::ranges::zip_view(typeGraph.rootTypes(), rootNames_)
auto rootTypeIt = typeGraph.rootTypes().begin();
for (auto rootNameIt = rootNames_.cbegin();
rootNameIt != rootNames_.cend() &&
rootTypeIt != typeGraph.rootTypes().end();
++rootNameIt, ++rootTypeIt) {
Type& rootType = *rootTypeIt;
const auto& rootName = *rootNameIt;
const auto typedefName = rootTypedefName(rootType);
if (config_.features[Feature::TreeBuilderV2]) {
FuncGen::DefineTreeBuilderInstructions(code, typeName,
std::visit(
[&](const auto& name) {
using T = std::decay_t<decltype(name)>;
if constexpr (std::is_same_v<ExactName, T>) {
FuncGen::DefineTopLevelIntrospectNamed(
code, typedefName, name.name,
calculateExclusiveSize(rootType),
enumerateTypeNames(rootType));
} else if (config_.features[Feature::TreeBuilderTypeChecking]) {
FuncGen::DefineOutputType(code, typeName);
} else if constexpr (std::is_same_v<HashedComponent, T>) {
FuncGen::DefineTopLevelIntrospect(code, typedefName, name.name);
FuncGen::DefineTreeBuilderInstructions(
code, typedefName, name.name,
calculateExclusiveSize(rootType),
enumerateTypeNames(rootType));
} else {
static_assert(always_false_v<T>);
}
},
rootName);
} else if (config_.features[Feature::TypedDataSegment]) {
const auto& name = std::get<HashedComponent>(rootName).name;
FuncGen::DefineTopLevelGetSizeRefTyped(code, typedefName, name,
config_.features);
} else {
const auto& name = std::get<HashedComponent>(rootName).name;
FuncGen::DefineTopLevelGetSizeRef(code, typedefName, name,
config_.features);
}
if (!linkageName_.empty())
FuncGen::DefineTopLevelIntrospectNamed(code, typeName, linkageName_);
if (config_.features[Feature::TreeBuilderTypeChecking] &&
!config_.features[Feature::TreeBuilderV2]) {
const auto& name = std::get<HashedComponent>(rootName).name;
FuncGen::DefineOutputType(code, typedefName, name);
}
}
if (VLOG_IS_ON(3)) {
VLOG(3) << "Generated trace code:\n";

View File

@ -18,9 +18,11 @@
#include <filesystem>
#include <functional>
#include <memory>
#include <span>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <variant>
#include <vector>
#include "ContainerInfo.h"
@ -40,6 +42,11 @@ namespace oi::detail {
class CodeGen {
public:
struct DrgnRequest {
drgn_type* ty = nullptr;
std::optional<std::string> linkageName;
};
CodeGen(const OICodeGen::Config& config, SymbolService& symbols)
: config_(config), symbols_(symbols) {
}
@ -48,29 +55,29 @@ class CodeGen {
* Helper function to perform all the steps required for code generation for a
* single drgn_type.
*/
bool codegenFromDrgn(struct drgn_type* drgnType, std::string& code);
bool codegenFromDrgn(struct drgn_type* drgnType,
std::string linkageName,
std::string& code);
bool codegenFromDrgn(drgn_type* drgnType, std::string& code);
bool codegenFromDrgns(std::span<DrgnRequest> reqs, std::string& code);
void registerContainer(const std::filesystem::path& path);
void addDrgnRoot(struct drgn_type* drgnType,
type_graph::TypeGraph& typeGraph);
void transform(type_graph::TypeGraph& typeGraph);
void generate(type_graph::TypeGraph& typeGraph,
std::string& code,
struct drgn_type*
drgnType /* TODO: this argument should not be required */
);
void generate(type_graph::TypeGraph& typeGraph, std::string& code);
private:
struct ExactName {
std::string name;
};
struct HashedComponent {
std::string name;
};
using RootFunctionName = std::variant<ExactName, HashedComponent>;
const OICodeGen::Config& config_;
SymbolService& symbols_;
std::vector<std::unique_ptr<ContainerInfo>> containerInfos_;
std::unordered_set<const ContainerInfo*> definedContainers_;
std::unordered_map<const type_graph::Class*, const type_graph::Member*>
thriftIssetMembers_;
std::string linkageName_;
std::vector<RootFunctionName> rootNames_;
void genDefsThrift(const type_graph::TypeGraph& typeGraph, std::string& code);
void addGetSizeFuncDefs(const type_graph::TypeGraph& typeGraph,

View File

@ -252,13 +252,13 @@ void FuncGen::DefineStoreData(std::string& testCode) {
}
void FuncGen::DefineTopLevelIntrospect(std::string& code,
const std::string& type) {
const std::string& type,
const std::string& idToHash) {
std::string func = R"(
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunknown-attributes"
/* RawType: %1% */
void __attribute__((used, retain)) introspect_%2$016x(
const OIInternal::__ROOT_TYPE__& t,
const OIInternal::%1%& t,
std::vector<uint8_t>& v)
#pragma GCC diagnostic pop
{
@ -269,7 +269,7 @@ void __attribute__((used, retain)) introspect_%2$016x(
v.reserve(4096);
using DataBufferType = DataBuffer::BackInserter<std::vector<uint8_t>>;
using ContentType = OIInternal::TypeHandler<DataBufferType, OIInternal::__ROOT_TYPE__>::type;
using ContentType = OIInternal::TypeHandler<DataBufferType, OIInternal::%1%>::type;
ContentType ret{DataBufferType{v}};
OIInternal::getSizeType<DataBufferType>(t, ret);
@ -277,39 +277,47 @@ void __attribute__((used, retain)) introspect_%2$016x(
)";
code.append(
(boost::format(func) % type % std::hash<std::string>{}(type)).str());
(boost::format(func) % type % std::hash<std::string>{}(idToHash)).str());
}
void FuncGen::DefineTopLevelIntrospectNamed(std::string& code,
void FuncGen::DefineTopLevelIntrospectNamed(
std::string& code,
const std::string& type,
const std::string& linkageName) {
std::string typeHash =
(boost::format("%1$016x") % std::hash<std::string>{}(type)).str();
const std::string& linkageName,
size_t exclusiveSize,
std::span<const std::string_view> typeNames) {
code += "namespace {\n";
DefineTreeBuilderInstructions(code, type, linkageName, exclusiveSize,
typeNames);
DefineTopLevelIntrospect(code, type, linkageName);
code += "} // namespace\n";
std::string internalId =
(boost::format("%1$016x") % std::hash<std::string>{}(linkageName)).str();
code += "/* RawType: ";
code += type;
code += " */\n";
code += "extern \"C\" IntrospectionResult ";
code += linkageName;
code += "(const OIInternal::__ROOT_TYPE__& t) {\n";
code += "(const OIInternal::";
code += type;
code += "& t) {\n";
code += " std::vector<uint8_t> v{};\n";
code += " introspect_";
code += typeHash;
code += internalId;
code += "(t, v);\n";
code += " return IntrospectionResult{std::move(v), treeBuilderInstructions";
code += typeHash;
code += internalId;
code += "};\n";
code += "}\n";
}
void FuncGen::DefineTopLevelGetSizeRef(std::string& testCode,
const std::string& rawType,
const std::string& type,
const std::string& idToHash,
FeatureSet features) {
std::string func = R"(
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunknown-attributes"
/* RawType: %1% */
void __attribute__((used, retain)) getSize_%2$016x(const OIInternal::__ROOT_TYPE__& t)
void __attribute__((used, retain)) getSize_%2$016x(const OIInternal::%1%& t)
#pragma GCC diagnostic pop
{
)";
@ -348,7 +356,7 @@ void FuncGen::DefineTopLevelGetSizeRef(std::string& testCode,
)";
boost::format fmt =
boost::format(func) % rawType % std::hash<std::string>{}(rawType);
boost::format(func) % type % std::hash<std::string>{}(idToHash);
testCode.append(fmt.str());
}
@ -359,13 +367,13 @@ void FuncGen::DefineTopLevelGetSizeRef(std::string& testCode,
* with feature '-ftyped-data-segment'.
*/
void FuncGen::DefineTopLevelGetSizeRefTyped(std::string& testCode,
const std::string& rawType,
const std::string& type,
const std::string& idToHash,
FeatureSet features) {
std::string func = R"(
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunknown-attributes"
/* RawType: %1% */
void __attribute__((used, retain)) getSize_%2$016x(const OIInternal::__ROOT_TYPE__& t)
void __attribute__((used, retain)) getSize_%2$016x(const OIInternal::%1%& t)
#pragma GCC diagnostic pop
{
)";
@ -390,7 +398,7 @@ void FuncGen::DefineTopLevelGetSizeRefTyped(std::string& testCode,
JLOG("%1% @");
JLOGPTR(&t);
using ContentType = OIInternal::TypeHandler<DataBuffer::DataSegment, OIInternal::__ROOT_TYPE__>::type;
using ContentType = OIInternal::TypeHandler<DataBuffer::DataSegment, OIInternal::%1%>::type;
using SuffixType = types::st::Pair<
DataBuffer::DataSegment,
types::st::VarInt<DataBuffer::DataSegment>,
@ -425,7 +433,7 @@ void FuncGen::DefineTopLevelGetSizeRefTyped(std::string& testCode,
)";
boost::format fmt =
boost::format(func) % rawType % std::hash<std::string>{}(rawType);
boost::format(func) % type % std::hash<std::string>{}(idToHash);
testCode.append(fmt.str());
}
@ -434,28 +442,30 @@ void FuncGen::DefineTopLevelGetSizeRefTyped(std::string& testCode,
*
* Present the dynamic type of an object for OID/OIL/OITB to link against.
*/
void FuncGen::DefineOutputType(std::string& code, const std::string& rawType) {
void FuncGen::DefineOutputType(std::string& code,
const std::string& type,
const std::string& idToHash) {
std::string func = R"(
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunknown-attributes"
/* RawType: %1% */
extern const types::dy::Dynamic __attribute__((used, retain)) outputType%2$016x =
OIInternal::TypeHandler<DataBuffer::DataSegment, OIInternal::__ROOT_TYPE__>::type::describe;
OIInternal::TypeHandler<DataBuffer::DataSegment, OIInternal::%1%>::type::describe;
#pragma GCC diagnostic pop
)";
boost::format fmt =
boost::format(func) % rawType % std::hash<std::string>{}(rawType);
boost::format(func) % type % std::hash<std::string>{}(idToHash);
code.append(fmt.str());
}
void FuncGen::DefineTreeBuilderInstructions(
std::string& code,
const std::string& rawType,
const std::string& type,
const std::string& idToHash,
size_t exclusiveSize,
std::span<const std::string_view> typeNames) {
std::string typeHash =
(boost::format("%1$016x") % std::hash<std::string>{}(rawType)).str();
(boost::format("%1$016x") % std::hash<std::string>{}(idToHash)).str();
code += R"(
#pragma GCC diagnostic push
@ -474,13 +484,17 @@ const std::array<std::string_view, )";
code += "};\n";
code += "const exporters::inst::Field rootInstructions";
code += typeHash;
code += "{sizeof(OIInternal::__ROOT_TYPE__), ";
code += "{sizeof(OIInternal::";
code += type;
code += "), ";
code += std::to_string(exclusiveSize);
code += ", \"a0\", typeNames";
code += typeHash;
code +=
", OIInternal::TypeHandler<int, OIInternal::__ROOT_TYPE__>::fields, "
"OIInternal::TypeHandler<int, OIInternal::__ROOT_TYPE__>::processors};\n";
code += ", OIInternal::TypeHandler<int, OIInternal::";
code += type;
code += ">::fields, OIInternal::TypeHandler<int, OIInternal::";
code += type;
code += ">::processors};\n";
code += "} // namespace\n";
code +=
"extern const exporters::inst::Inst __attribute__((used, retain)) "

View File

@ -53,22 +53,30 @@ class FuncGen {
static void DeclareGetSize(std::string& testCode, const std::string& type);
static void DefineTopLevelIntrospect(std::string& code,
const std::string& type);
static void DefineTopLevelIntrospectNamed(std::string& code,
const std::string& type,
const std::string& linkageName);
const std::string& idToHash);
static void DefineTopLevelIntrospectNamed(
std::string& code,
const std::string& type,
const std::string& linkageName,
size_t exclusiveSize,
std::span<const std::string_view> typeNames);
static void DefineTopLevelGetSizeRef(std::string& testCode,
const std::string& rawType,
const std::string& type,
const std::string& idToHash,
FeatureSet features);
static void DefineTopLevelGetSizeRefTyped(std::string& testCode,
const std::string& rawType,
const std::string& type,
const std::string& idToHash,
FeatureSet features);
static void DefineOutputType(std::string& testCode,
const std::string& rawType);
const std::string& type,
const std::string& idToHash);
static void DefineTreeBuilderInstructions(
std::string& testCode,
const std::string& rawType,
const std::string& type,
const std::string& idToHash,
size_t exclusiveSize,
std::span<const std::string_view> typeNames);

View File

@ -3331,8 +3331,8 @@ bool OICodeGen::generateJitCode(std::string& code) {
funcGen.DefineTopLevelGetSizeSmartPtr(functionsCode, rawTypeName,
config.features);
} else {
funcGen.DefineTopLevelGetSizeRef(functionsCode, rawTypeName,
config.features);
funcGen.DefineTopLevelGetSizeRef(functionsCode, "__ROOT_TYPE__",
rawTypeName, config.features);
}
}

View File

@ -29,9 +29,16 @@
#include "oi/Config.h"
#include "oi/DrgnUtils.h"
#include "oi/Headers.h"
#include "oi/type_graph/DrgnParser.h"
#include "oi/type_graph/TypeGraph.h"
namespace oi::detail {
using type_graph::DrgnParser;
using type_graph::DrgnParserOptions;
using type_graph::Type;
using type_graph::TypeGraph;
std::unordered_map<std::string, std::string>
OIGenerator::oilStrongToWeakSymbolsMap(drgnplusplus::program& prog) {
static constexpr std::string_view strongSymbolPrefix =
@ -128,42 +135,6 @@ OIGenerator::findOilTypesAndNames(drgnplusplus::program& prog) {
return out;
}
fs::path OIGenerator::generateForType(const OICodeGen::Config& generatorConfig,
const OICompiler::Config& compilerConfig,
const drgn_qualified_type& type,
const std::string& linkageName,
SymbolService& symbols) {
CodeGen codegen{generatorConfig, symbols};
std::string code;
if (!codegen.codegenFromDrgn(type.type, linkageName, code)) {
LOG(ERROR) << "codegen failed!";
return {};
}
std::string sourcePath = sourceFileDumpPath;
if (sourceFileDumpPath.empty()) {
// This is the path Clang acts as if it has compiled from e.g. for debug
// information. It does not need to exist.
sourcePath = "oil_jit.cpp";
} else {
std::ofstream outputFile(sourcePath);
outputFile << code;
}
OICompiler compiler{{}, compilerConfig};
// TODO: Revert to outputPath and remove printing when typegraph is done.
fs::path tmpObject = outputPath;
tmpObject.replace_extension(
"." + std::to_string(std::hash<std::string>{}(linkageName)) + ".o");
if (!compiler.compile(code, sourcePath, tmpObject)) {
return {};
}
return tmpObject;
}
int OIGenerator::generate(fs::path& primaryObject, SymbolService& symbols) {
drgnplusplus::program prog;
@ -202,26 +173,34 @@ int OIGenerator::generate(fs::path& primaryObject, SymbolService& symbols) {
generatorConfig.features = *features;
compilerConfig.features = *features;
size_t failures = 0;
std::vector<CodeGen::DrgnRequest> reqs{};
reqs.reserve(oilTypes.size());
for (const auto& [linkageName, type] : oilTypes) {
if (auto obj = generateForType(generatorConfig, compilerConfig, type,
linkageName, symbols);
!obj.empty()) {
std::cout << obj.string() << std::endl;
reqs.emplace_back(
CodeGen::DrgnRequest{.ty = type.type, .linkageName = linkageName});
}
CodeGen codegen{generatorConfig, symbols};
std::string code;
codegen.codegenFromDrgns(reqs, code);
std::string sourcePath = sourceFileDumpPath;
if (sourceFileDumpPath.empty()) {
// This is the path Clang acts as if it has compiled from e.g. for debug
// information. It does not need to exist.
sourcePath = "oil_jit.cpp";
} else {
LOG(WARNING) << "failed to generate for symbol `" << linkageName
<< "`. this is non-fatal but the call will not work.";
failures++;
}
std::ofstream outputFile(sourcePath);
outputFile << code;
}
OICompiler compiler{{}, compilerConfig};
size_t successes = oilTypes.size() - failures;
LOG(INFO) << "object introspection generation complete. " << successes
<< " successes and " << failures << " failures.";
if (failures > 0 || (failIfNothingGenerated && successes == 0)) {
if (!compiler.compile(code, sourcePath, outputPath)) {
return -1;
}
LOG(INFO) << "object introspection generation complete, generated for "
<< oilTypes.size() << "sites.";
return 0;
}

View File

@ -16,6 +16,7 @@
#pragma once
#include <memory>
#include <span>
#include <unordered_map>
#include <vector>
@ -54,7 +55,7 @@ struct DrgnParserOptions {
class DrgnParser {
public:
DrgnParser(TypeGraph& typeGraph,
const std::vector<std::unique_ptr<ContainerInfo>>& containers,
std::span<const std::unique_ptr<ContainerInfo>> containers,
DrgnParserOptions options)
: typeGraph_(typeGraph), containers_(containers), options_(options) {
}
@ -90,6 +91,7 @@ class DrgnParser {
drgn_types_.insert({drgnType, newType});
return newType;
}
bool chasePointer() const;
// Store a mapping of drgn types to type graph nodes for deduplication during
@ -98,7 +100,7 @@ class DrgnParser {
drgn_types_;
TypeGraph& typeGraph_;
const std::vector<std::unique_ptr<ContainerInfo>>& containers_;
std::span<const std::unique_ptr<ContainerInfo>> containers_;
int depth_;
DrgnParserOptions options_;
};