From a49f0e7410be5af7358fa69a814baa2cba01e492 Mon Sep 17 00:00:00 2001 From: Jake Hillion Date: Tue, 31 Oct 2023 10:22:56 -0700 Subject: [PATCH] codegenv2: improve multi argument generation (#395) Summary: Adds a new function `CodeGen::codegenFromDrgns` which runs multiple drgn root types through one `DrgnParser`. Stores the naming for the output within this function as we previously decided that when doing the actual codegen, which is too late with multiple root types. This new function is used in `OIGenerator` when you have multiple OIL calls within one input object. Rather than producing separate `.o`s with likely duplicate code for each callsite, we produce a single `.o` that contains both calls. For calls with types with significant overlap this should be a significant reduction in work. The downside is the calls can't have different features, but this could be solved in the future by namespacing the code based on features/config within the generated `.cpp` - the pros seem to outweigh the con. This should also be used with `oid` in multi probe mode as it would again significantly reduce the work it has to do. I didn't do this yet as it requires changing the cache. As I've got a big refactor cache ongoing at the minute it makes sense to wait until after that's landed to make this change. Test Plan: Generally tested with GitHub CI. Tested the new multi call OILGen with the new example seen below. Outputs the following code with two root calls: P869569859 - note that `VectorOfStrings_0` is the only instance, whereas previously we'd have generated it in two files. Differential Revision: D50835153 Pulled By: JakeHillion --- oi/CodeGen.cpp | 144 ++++++++++++++++++++++++------------- oi/CodeGen.h | 31 ++++---- oi/FuncGen.cpp | 84 +++++++++++++--------- oi/FuncGen.h | 24 ++++--- oi/OICodeGen.cpp | 4 +- oi/OIGenerator.cpp | 79 ++++++++------------ oi/type_graph/DrgnParser.h | 6 +- 7 files changed, 215 insertions(+), 157 deletions(-) diff --git a/oi/CodeGen.cpp b/oi/CodeGen.cpp index fef0a47..fae6a02 100644 --- a/oi/CodeGen.cpp +++ b/oi/CodeGen.cpp @@ -42,6 +42,9 @@ #include "type_graph/TypeIdentifier.h" #include "type_graph/Types.h" +template +inline constexpr bool always_false_v = false; + namespace oi::detail { using type_graph::AddChildren; @@ -74,6 +77,13 @@ using ref = std::reference_wrapper; namespace { +std::string rootTypedefName(Type& t) { + std::string out{"RootType"}; + out += std::to_string(std::hash{}(t.name())); + out += '_'; + return out; +} + std::vector enumerateTypeNames(Type& type) { std::vector names; Type* t = &type; @@ -1106,14 +1116,7 @@ void CodeGen::addTypeHandlers(const TypeGraph& typeGraph, std::string& code) { } } -bool CodeGen::codegenFromDrgn(struct drgn_type* drgnType, - std::string linkageName, - std::string& code) { - linkageName_ = std::move(linkageName); - return codegenFromDrgn(drgnType, code); -} - -bool CodeGen::codegenFromDrgn(struct drgn_type* drgnType, std::string& code) { +bool CodeGen::codegenFromDrgns(std::span reqs, std::string& code) { try { containerInfos_.reserve(config_.containerConfigPaths.size()); for (const auto& path : config_.containerConfigPaths) { @@ -1125,18 +1128,40 @@ bool CodeGen::codegenFromDrgn(struct drgn_type* drgnType, std::string& code) { } TypeGraph typeGraph; - try { - addDrgnRoot(drgnType, typeGraph); - } catch (const type_graph::DrgnParserError& err) { - LOG(ERROR) << "Error parsing DWARF: " << err.what(); - return false; + DrgnParserOptions options{ + .chaseRawPointers = config_.features[Feature::ChaseRawPointers], + }; + DrgnParser drgnParser{typeGraph, containerInfos_, options}; + + for (auto& req : reqs) { + try { + Type& parsedRoot = drgnParser.parse(req.ty); + typeGraph.addRoot(parsedRoot); + if (req.linkageName.has_value()) { + rootNames_.emplace_back(ExactName{.name = std::move(*req.linkageName)}); + } else { + rootNames_.emplace_back( + HashedComponent{.name = SymbolService::getTypeName(req.ty)}); + } + } catch (const type_graph::DrgnParserError& err) { + LOG(ERROR) << "Error parsing DWARF: " << err.what(); + return false; + } } transform(typeGraph); - generate(typeGraph, code, drgnType); + generate(typeGraph, code); return true; } +bool CodeGen::codegenFromDrgn(struct drgn_type* drgnType, std::string& code) { + std::array reqs{DrgnRequest{ + .ty = drgnType, + .linkageName = std::nullopt, + }}; + return codegenFromDrgns(reqs, code); +} + void CodeGen::registerContainer(const fs::path& path) { auto info = std::make_unique(path); if (info->requiredFeatures != (config_.features & info->requiredFeatures)) { @@ -1147,15 +1172,6 @@ void CodeGen::registerContainer(const fs::path& path) { containerInfos_.emplace_back(std::move(info)); } -void CodeGen::addDrgnRoot(struct drgn_type* drgnType, TypeGraph& typeGraph) { - DrgnParserOptions options{ - .chaseRawPointers = config_.features[Feature::ChaseRawPointers], - }; - DrgnParser drgnParser{typeGraph, containerInfos_, options}; - Type& parsedRoot = drgnParser.parse(drgnType); - typeGraph.addRoot(parsedRoot); -} - void CodeGen::transform(TypeGraph& typeGraph) { type_graph::PassManager pm; @@ -1207,11 +1223,7 @@ void CodeGen::transform(TypeGraph& typeGraph) { }; } -void CodeGen::generate( - TypeGraph& typeGraph, - std::string& code, - struct drgn_type* drgnType /* TODO: this argument should not be required */ -) { +void CodeGen::generate(TypeGraph& typeGraph, std::string& code) { code = headers::oi_OITraceCode_cpp; if (!config_.features[Feature::Library]) { FuncGen::DeclareExterns(code); @@ -1280,30 +1292,66 @@ void CodeGen::generate( addGetSizeFuncDefs(typeGraph, code); } - assert(typeGraph.rootTypes().size() == 1); - Type& rootType = typeGraph.rootTypes()[0]; - code += "\nusing __ROOT_TYPE__ = " + rootType.name() + ";\n"; + // Give each root type a unique typedef in the OIInternal namespace so they + // don't have naming issues from outside. + for (Type& rootType : typeGraph.rootTypes()) { + code += "using "; + code += rootTypedefName(rootType); + code += " = "; + code += rootType.name(); + code += ";\n"; + } + code += "} // namespace\n} // namespace OIInternal\n"; - const auto typeName = SymbolService::getTypeName(drgnType); - if (config_.features[Feature::Library]) { - FuncGen::DefineTopLevelIntrospect(code, typeName); - } else if (config_.features[Feature::TypedDataSegment]) { - FuncGen::DefineTopLevelGetSizeRefTyped(code, typeName, config_.features); - } else { - FuncGen::DefineTopLevelGetSizeRef(code, typeName, config_.features); - } + assert(typeGraph.rootTypes().size() == rootNames_.size()); + // should be std::ranges::zip_view(typeGraph.rootTypes(), rootNames_) + auto rootTypeIt = typeGraph.rootTypes().begin(); + for (auto rootNameIt = rootNames_.cbegin(); + rootNameIt != rootNames_.cend() && + rootTypeIt != typeGraph.rootTypes().end(); + ++rootNameIt, ++rootTypeIt) { + Type& rootType = *rootTypeIt; + const auto& rootName = *rootNameIt; - if (config_.features[Feature::TreeBuilderV2]) { - FuncGen::DefineTreeBuilderInstructions(code, typeName, - calculateExclusiveSize(rootType), - enumerateTypeNames(rootType)); - } else if (config_.features[Feature::TreeBuilderTypeChecking]) { - FuncGen::DefineOutputType(code, typeName); - } + const auto typedefName = rootTypedefName(rootType); - if (!linkageName_.empty()) - FuncGen::DefineTopLevelIntrospectNamed(code, typeName, linkageName_); + if (config_.features[Feature::TreeBuilderV2]) { + std::visit( + [&](const auto& name) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + FuncGen::DefineTopLevelIntrospectNamed( + code, typedefName, name.name, + calculateExclusiveSize(rootType), + enumerateTypeNames(rootType)); + } else if constexpr (std::is_same_v) { + FuncGen::DefineTopLevelIntrospect(code, typedefName, name.name); + FuncGen::DefineTreeBuilderInstructions( + code, typedefName, name.name, + calculateExclusiveSize(rootType), + enumerateTypeNames(rootType)); + } else { + static_assert(always_false_v); + } + }, + rootName); + } else if (config_.features[Feature::TypedDataSegment]) { + const auto& name = std::get(rootName).name; + FuncGen::DefineTopLevelGetSizeRefTyped(code, typedefName, name, + config_.features); + } else { + const auto& name = std::get(rootName).name; + FuncGen::DefineTopLevelGetSizeRef(code, typedefName, name, + config_.features); + } + + if (config_.features[Feature::TreeBuilderTypeChecking] && + !config_.features[Feature::TreeBuilderV2]) { + const auto& name = std::get(rootName).name; + FuncGen::DefineOutputType(code, typedefName, name); + } + } if (VLOG_IS_ON(3)) { VLOG(3) << "Generated trace code:\n"; diff --git a/oi/CodeGen.h b/oi/CodeGen.h index c6f2a5d..f64140e 100644 --- a/oi/CodeGen.h +++ b/oi/CodeGen.h @@ -18,9 +18,11 @@ #include #include #include +#include #include #include #include +#include #include #include "ContainerInfo.h" @@ -40,6 +42,11 @@ namespace oi::detail { class CodeGen { public: + struct DrgnRequest { + drgn_type* ty = nullptr; + std::optional linkageName; + }; + CodeGen(const OICodeGen::Config& config, SymbolService& symbols) : config_(config), symbols_(symbols) { } @@ -48,29 +55,29 @@ class CodeGen { * Helper function to perform all the steps required for code generation for a * single drgn_type. */ - bool codegenFromDrgn(struct drgn_type* drgnType, std::string& code); - bool codegenFromDrgn(struct drgn_type* drgnType, - std::string linkageName, - std::string& code); + bool codegenFromDrgn(drgn_type* drgnType, std::string& code); + bool codegenFromDrgns(std::span reqs, std::string& code); void registerContainer(const std::filesystem::path& path); - void addDrgnRoot(struct drgn_type* drgnType, - type_graph::TypeGraph& typeGraph); void transform(type_graph::TypeGraph& typeGraph); - void generate(type_graph::TypeGraph& typeGraph, - std::string& code, - struct drgn_type* - drgnType /* TODO: this argument should not be required */ - ); + void generate(type_graph::TypeGraph& typeGraph, std::string& code); private: + struct ExactName { + std::string name; + }; + struct HashedComponent { + std::string name; + }; + using RootFunctionName = std::variant; + const OICodeGen::Config& config_; SymbolService& symbols_; std::vector> containerInfos_; std::unordered_set definedContainers_; std::unordered_map thriftIssetMembers_; - std::string linkageName_; + std::vector rootNames_; void genDefsThrift(const type_graph::TypeGraph& typeGraph, std::string& code); void addGetSizeFuncDefs(const type_graph::TypeGraph& typeGraph, diff --git a/oi/FuncGen.cpp b/oi/FuncGen.cpp index dc494f9..1d12e01 100644 --- a/oi/FuncGen.cpp +++ b/oi/FuncGen.cpp @@ -252,13 +252,13 @@ void FuncGen::DefineStoreData(std::string& testCode) { } void FuncGen::DefineTopLevelIntrospect(std::string& code, - const std::string& type) { + const std::string& type, + const std::string& idToHash) { std::string func = R"( #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunknown-attributes" -/* RawType: %1% */ void __attribute__((used, retain)) introspect_%2$016x( - const OIInternal::__ROOT_TYPE__& t, + const OIInternal::%1%& t, std::vector& v) #pragma GCC diagnostic pop { @@ -269,7 +269,7 @@ void __attribute__((used, retain)) introspect_%2$016x( v.reserve(4096); using DataBufferType = DataBuffer::BackInserter>; - using ContentType = OIInternal::TypeHandler::type; + using ContentType = OIInternal::TypeHandler::type; ContentType ret{DataBufferType{v}}; OIInternal::getSizeType(t, ret); @@ -277,39 +277,47 @@ void __attribute__((used, retain)) introspect_%2$016x( )"; code.append( - (boost::format(func) % type % std::hash{}(type)).str()); + (boost::format(func) % type % std::hash{}(idToHash)).str()); } -void FuncGen::DefineTopLevelIntrospectNamed(std::string& code, - const std::string& type, - const std::string& linkageName) { - std::string typeHash = - (boost::format("%1$016x") % std::hash{}(type)).str(); +void FuncGen::DefineTopLevelIntrospectNamed( + std::string& code, + const std::string& type, + const std::string& linkageName, + size_t exclusiveSize, + std::span typeNames) { + code += "namespace {\n"; + DefineTreeBuilderInstructions(code, type, linkageName, exclusiveSize, + typeNames); + DefineTopLevelIntrospect(code, type, linkageName); + code += "} // namespace\n"; + + std::string internalId = + (boost::format("%1$016x") % std::hash{}(linkageName)).str(); - code += "/* RawType: "; - code += type; - code += " */\n"; code += "extern \"C\" IntrospectionResult "; code += linkageName; - code += "(const OIInternal::__ROOT_TYPE__& t) {\n"; + code += "(const OIInternal::"; + code += type; + code += "& t) {\n"; code += " std::vector v{};\n"; code += " introspect_"; - code += typeHash; + code += internalId; code += "(t, v);\n"; code += " return IntrospectionResult{std::move(v), treeBuilderInstructions"; - code += typeHash; + code += internalId; code += "};\n"; code += "}\n"; } void FuncGen::DefineTopLevelGetSizeRef(std::string& testCode, - const std::string& rawType, + const std::string& type, + const std::string& idToHash, FeatureSet features) { std::string func = R"( #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunknown-attributes" - /* RawType: %1% */ - void __attribute__((used, retain)) getSize_%2$016x(const OIInternal::__ROOT_TYPE__& t) + void __attribute__((used, retain)) getSize_%2$016x(const OIInternal::%1%& t) #pragma GCC diagnostic pop { )"; @@ -348,7 +356,7 @@ void FuncGen::DefineTopLevelGetSizeRef(std::string& testCode, )"; boost::format fmt = - boost::format(func) % rawType % std::hash{}(rawType); + boost::format(func) % type % std::hash{}(idToHash); testCode.append(fmt.str()); } @@ -359,13 +367,13 @@ void FuncGen::DefineTopLevelGetSizeRef(std::string& testCode, * with feature '-ftyped-data-segment'. */ void FuncGen::DefineTopLevelGetSizeRefTyped(std::string& testCode, - const std::string& rawType, + const std::string& type, + const std::string& idToHash, FeatureSet features) { std::string func = R"( #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunknown-attributes" - /* RawType: %1% */ - void __attribute__((used, retain)) getSize_%2$016x(const OIInternal::__ROOT_TYPE__& t) + void __attribute__((used, retain)) getSize_%2$016x(const OIInternal::%1%& t) #pragma GCC diagnostic pop { )"; @@ -390,7 +398,7 @@ void FuncGen::DefineTopLevelGetSizeRefTyped(std::string& testCode, JLOG("%1% @"); JLOGPTR(&t); - using ContentType = OIInternal::TypeHandler::type; + using ContentType = OIInternal::TypeHandler::type; using SuffixType = types::st::Pair< DataBuffer::DataSegment, types::st::VarInt, @@ -425,7 +433,7 @@ void FuncGen::DefineTopLevelGetSizeRefTyped(std::string& testCode, )"; boost::format fmt = - boost::format(func) % rawType % std::hash{}(rawType); + boost::format(func) % type % std::hash{}(idToHash); testCode.append(fmt.str()); } @@ -434,28 +442,30 @@ void FuncGen::DefineTopLevelGetSizeRefTyped(std::string& testCode, * * Present the dynamic type of an object for OID/OIL/OITB to link against. */ -void FuncGen::DefineOutputType(std::string& code, const std::string& rawType) { +void FuncGen::DefineOutputType(std::string& code, + const std::string& type, + const std::string& idToHash) { std::string func = R"( #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunknown-attributes" - /* RawType: %1% */ extern const types::dy::Dynamic __attribute__((used, retain)) outputType%2$016x = - OIInternal::TypeHandler::type::describe; + OIInternal::TypeHandler::type::describe; #pragma GCC diagnostic pop )"; boost::format fmt = - boost::format(func) % rawType % std::hash{}(rawType); + boost::format(func) % type % std::hash{}(idToHash); code.append(fmt.str()); } void FuncGen::DefineTreeBuilderInstructions( std::string& code, - const std::string& rawType, + const std::string& type, + const std::string& idToHash, size_t exclusiveSize, std::span typeNames) { std::string typeHash = - (boost::format("%1$016x") % std::hash{}(rawType)).str(); + (boost::format("%1$016x") % std::hash{}(idToHash)).str(); code += R"( #pragma GCC diagnostic push @@ -474,13 +484,17 @@ const std::array::fields, " - "OIInternal::TypeHandler::processors};\n"; + code += ", OIInternal::TypeHandler typeNames); static void DefineTopLevelGetSizeRef(std::string& testCode, - const std::string& rawType, + const std::string& type, + const std::string& idToHash, FeatureSet features); static void DefineTopLevelGetSizeRefTyped(std::string& testCode, - const std::string& rawType, + const std::string& type, + const std::string& idToHash, FeatureSet features); static void DefineOutputType(std::string& testCode, - const std::string& rawType); + const std::string& type, + const std::string& idToHash); static void DefineTreeBuilderInstructions( std::string& testCode, - const std::string& rawType, + const std::string& type, + const std::string& idToHash, size_t exclusiveSize, std::span typeNames); diff --git a/oi/OICodeGen.cpp b/oi/OICodeGen.cpp index 0cbbfc9..21e2d5f 100644 --- a/oi/OICodeGen.cpp +++ b/oi/OICodeGen.cpp @@ -3331,8 +3331,8 @@ bool OICodeGen::generateJitCode(std::string& code) { funcGen.DefineTopLevelGetSizeSmartPtr(functionsCode, rawTypeName, config.features); } else { - funcGen.DefineTopLevelGetSizeRef(functionsCode, rawTypeName, - config.features); + funcGen.DefineTopLevelGetSizeRef(functionsCode, "__ROOT_TYPE__", + rawTypeName, config.features); } } diff --git a/oi/OIGenerator.cpp b/oi/OIGenerator.cpp index d98e7b8..2c662af 100644 --- a/oi/OIGenerator.cpp +++ b/oi/OIGenerator.cpp @@ -29,9 +29,16 @@ #include "oi/Config.h" #include "oi/DrgnUtils.h" #include "oi/Headers.h" +#include "oi/type_graph/DrgnParser.h" +#include "oi/type_graph/TypeGraph.h" namespace oi::detail { +using type_graph::DrgnParser; +using type_graph::DrgnParserOptions; +using type_graph::Type; +using type_graph::TypeGraph; + std::unordered_map OIGenerator::oilStrongToWeakSymbolsMap(drgnplusplus::program& prog) { static constexpr std::string_view strongSymbolPrefix = @@ -128,42 +135,6 @@ OIGenerator::findOilTypesAndNames(drgnplusplus::program& prog) { return out; } -fs::path OIGenerator::generateForType(const OICodeGen::Config& generatorConfig, - const OICompiler::Config& compilerConfig, - const drgn_qualified_type& type, - const std::string& linkageName, - SymbolService& symbols) { - CodeGen codegen{generatorConfig, symbols}; - - std::string code; - if (!codegen.codegenFromDrgn(type.type, linkageName, code)) { - LOG(ERROR) << "codegen failed!"; - return {}; - } - - std::string sourcePath = sourceFileDumpPath; - if (sourceFileDumpPath.empty()) { - // This is the path Clang acts as if it has compiled from e.g. for debug - // information. It does not need to exist. - sourcePath = "oil_jit.cpp"; - } else { - std::ofstream outputFile(sourcePath); - outputFile << code; - } - - OICompiler compiler{{}, compilerConfig}; - - // TODO: Revert to outputPath and remove printing when typegraph is done. - fs::path tmpObject = outputPath; - tmpObject.replace_extension( - "." + std::to_string(std::hash{}(linkageName)) + ".o"); - - if (!compiler.compile(code, sourcePath, tmpObject)) { - return {}; - } - return tmpObject; -} - int OIGenerator::generate(fs::path& primaryObject, SymbolService& symbols) { drgnplusplus::program prog; @@ -202,26 +173,34 @@ int OIGenerator::generate(fs::path& primaryObject, SymbolService& symbols) { generatorConfig.features = *features; compilerConfig.features = *features; - size_t failures = 0; + std::vector reqs{}; + reqs.reserve(oilTypes.size()); for (const auto& [linkageName, type] : oilTypes) { - if (auto obj = generateForType(generatorConfig, compilerConfig, type, - linkageName, symbols); - !obj.empty()) { - std::cout << obj.string() << std::endl; - } else { - LOG(WARNING) << "failed to generate for symbol `" << linkageName - << "`. this is non-fatal but the call will not work."; - failures++; - } + reqs.emplace_back( + CodeGen::DrgnRequest{.ty = type.type, .linkageName = linkageName}); } - size_t successes = oilTypes.size() - failures; - LOG(INFO) << "object introspection generation complete. " << successes - << " successes and " << failures << " failures."; + CodeGen codegen{generatorConfig, symbols}; + std::string code; + codegen.codegenFromDrgns(reqs, code); - if (failures > 0 || (failIfNothingGenerated && successes == 0)) { + std::string sourcePath = sourceFileDumpPath; + if (sourceFileDumpPath.empty()) { + // This is the path Clang acts as if it has compiled from e.g. for debug + // information. It does not need to exist. + sourcePath = "oil_jit.cpp"; + } else { + std::ofstream outputFile(sourcePath); + outputFile << code; + } + OICompiler compiler{{}, compilerConfig}; + + if (!compiler.compile(code, sourcePath, outputPath)) { return -1; } + + LOG(INFO) << "object introspection generation complete, generated for " + << oilTypes.size() << "sites."; return 0; } diff --git a/oi/type_graph/DrgnParser.h b/oi/type_graph/DrgnParser.h index 9b00928..1ababda 100644 --- a/oi/type_graph/DrgnParser.h +++ b/oi/type_graph/DrgnParser.h @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -54,7 +55,7 @@ struct DrgnParserOptions { class DrgnParser { public: DrgnParser(TypeGraph& typeGraph, - const std::vector>& containers, + std::span> containers, DrgnParserOptions options) : typeGraph_(typeGraph), containers_(containers), options_(options) { } @@ -90,6 +91,7 @@ class DrgnParser { drgn_types_.insert({drgnType, newType}); return newType; } + bool chasePointer() const; // Store a mapping of drgn types to type graph nodes for deduplication during @@ -98,7 +100,7 @@ class DrgnParser { drgn_types_; TypeGraph& typeGraph_; - const std::vector>& containers_; + std::span> containers_; int depth_; DrgnParserOptions options_; };