From 31050735d6ee3cf245ce8f286331440ce1a509ef Mon Sep 17 00:00:00 2001 From: Alastair Robertson Date: Mon, 26 Jun 2023 08:43:15 -0700 Subject: [PATCH] CodeGen: Add support for capturing Thrift isset data --- oi/CodeGen.cpp | 286 +++++++++++++++++------- oi/CodeGen.h | 14 ++ oi/ContainerTypeEnum.h | 1 + oi/TreeBuilder.cpp | 1 + test/integration/thrift_namespaces.toml | 1 + types/thrift_isset_type.toml | 2 +- 6 files changed, 218 insertions(+), 87 deletions(-) diff --git a/oi/CodeGen.cpp b/oi/CodeGen.cpp index 6f2ce5f..721887d 100644 --- a/oi/CodeGen.cpp +++ b/oi/CodeGen.cpp @@ -41,6 +41,7 @@ using type_graph::Class; using type_graph::Container; using type_graph::Enum; +using type_graph::Member; using type_graph::Type; using type_graph::Typedef; using type_graph::TypeGraph; @@ -165,6 +166,81 @@ void genDecls(const TypeGraph& typeGraph, std::string& code) { } } +/* + * Generates a declaration for a given fully-qualified type. + * + * e.g. Given "nsA::nsB::Foo" + * + * The folowing is generated: + * namespace nsA::nsB { + * struct Foo; + * } // namespace nsA::nsB + */ +void declareFullyQualifiedStruct(const std::string& name, std::string& code) { + if (auto pos = name.rfind("::"); pos != name.npos) { + auto ns = name.substr(0, pos); + auto structName = name.substr(pos + 2); + code += "namespace "; + code += ns; + code += " {\n"; + code += "struct " + structName + ";\n"; + code += "} // namespace "; + code += ns; + code += "\n"; + } else { + code += "struct "; + code += name; + code += ";\n"; + } +} + +void genDefsThriftClass(const Class& c, std::string& code) { + declareFullyQualifiedStruct(c.fqName(), code); + code += "namespace apache { namespace thrift {\n"; + code += "template <> struct TStructDataStorage<" + c.fqName() + "> {\n"; + code += + " static constexpr const std::size_t fields_size = 1; // Invalid, do " + "not use\n"; + code += + " static const std::array " + "fields_names;\n"; + code += " static const std::array fields_ids;\n"; + code += + " static const std::array fields_types;\n"; + code += "\n"; + code += + " static const std::array " + "storage_names;\n"; + code += + " static const std::array __attribute__((weak)) " + "isset_indexes;\n"; + code += "};\n"; + code += "}} // namespace thrift, namespace apache\n"; +} + +} // namespace + +void CodeGen::genDefsThrift(const TypeGraph& typeGraph, std::string& code) { + for (const Type& t : typeGraph.finalTypes) { + if (const auto* c = dynamic_cast(&t)) { + const Member* issetMember = nullptr; + for (const auto& member : c->members) { + if (const auto* container = dynamic_cast(member.type); + container && container->containerInfo_.ctype == THRIFT_ISSET_TYPE) { + issetMember = &member; + break; + } + } + if (issetMember) { + genDefsThriftClass(*c, code); + thriftIssetMembers_[c] = issetMember; + } + } + } +} + +namespace { + void genDefsClass(const Class& c, std::string& code) { if (c.kind() == Class::Kind::Union) code += "union "; @@ -293,95 +369,129 @@ void addStandardGetSizeFuncDefs(std::string& code) { void getClassSizeFuncDecl(const Class& c, std::string& code) { code += "void getSizeType(const " + c.name() + " &t, size_t &returnArg);\n"; } +} // namespace -void getClassSizeFuncDef(const Class& c, - SymbolService& symbols, - bool polymorphicInheritance, - std::string& code) { - bool enablePolymorphicInheritance = polymorphicInheritance && c.isDynamic(); +/* + * Generates a getSizeType function for the given concrete class. + * + * Does not worry about polymorphism. + */ +void CodeGen::getClassSizeFuncConcrete(std::string_view funcName, + const Class& c, + std::string& code) const { + code += "void " + std::string{funcName} + "(const " + c.name() + + " &t, size_t &returnArg) {\n"; - std::string funcName = "getSizeType"; - if (enablePolymorphicInheritance) { - funcName = "getSizeTypeConcrete"; + const Member* thriftIssetMember = nullptr; + if (const auto it = thriftIssetMembers_.find(&c); + it != thriftIssetMembers_.end()) { + thriftIssetMember = it->second; } - code += - "void " + funcName + "(const " + c.name() + " &t, size_t &returnArg) {\n"; - for (const auto& member : c.members) { + if (thriftIssetMember) { + code += " using thrift_data = apache::thrift::TStructDataStorage<" + + c.fqName() + ">;\n"; + } + + for (size_t i = 0; i < c.members.size(); i++) { + const auto& member = c.members[i]; if (member.name.starts_with(type_graph::AddPadding::MemberPrefix)) continue; + + if (thriftIssetMember && thriftIssetMember != &member) { + // Capture Thrift's isset value for each field, except for __isset + // itself + std::string issetIdxStr = + "thrift_data::isset_indexes[" + std::to_string(i) + "]"; + code += " if (&thrift_data::isset_indexes != nullptr && " + issetIdxStr + + " != -1) {\n"; + code += " SAVE_DATA(t." + thriftIssetMember->name + ".get(" + + issetIdxStr + "));\n"; + code += " } else {\n"; + code += " SAVE_DATA(-1);\n"; + code += " }\n"; + } + code += " JLOG(\"" + member.name + " @\");\n"; code += " JLOGPTR(&t." + member.name + ");\n"; code += " getSizeType(t." + member.name + ", returnArg);\n"; } code += "}\n"; - - if (enablePolymorphicInheritance) { - std::vector childVtableAddrs; - childVtableAddrs.reserve(c.children.size()); - - for (const Type& childType : c.children) { - auto* childClass = dynamic_cast(&childType); - if (childClass == nullptr) { - abort(); // TODO - } - // TODO: - // auto fqChildName = *fullyQualifiedName(child); - auto fqChildName = "TODO - implement me"; - - // We must split this assignment and append because the C++ standard lacks - // an operator for concatenating std::string and std::string_view... - std::string childVtableName = "vtable for "; - childVtableName += fqChildName; - - auto optVtableSym = symbols.locateSymbol(childVtableName, true); - if (!optVtableSym) { - // LOG(ERROR) << "Failed to find vtable address for '" << - // childVtableName; LOG(ERROR) << "Falling back to non dynamic - // mode"; - childVtableAddrs.clear(); // TODO why?? - break; - } - childVtableAddrs.push_back(*optVtableSym); - } - - code += - "void getSizeType(const " + c.name() + " &t, size_t &returnArg) {\n"; - code += " auto *vptr = *reinterpret_cast(&t);\n"; - code += " uintptr_t topOffset = *(vptr - 2);\n"; - code += " uintptr_t vptrVal = reinterpret_cast(vptr);\n"; - - for (size_t i = 0; i < c.children.size(); i++) { - // The vptr will point to *somewhere* in the vtable of this object's - // concrete class. The exact offset into the vtable can vary based on a - // number of factors, so we compare the vptr against the vtable range for - // each possible class to determine the concrete type. - // - // This works for C++ compilers which follow the GNU v3 ABI, i.e. GCC and - // Clang. Other compilers may differ. - const Type& child = c.children[i]; - auto& vtableSym = childVtableAddrs[i]; - uintptr_t vtableMinAddr = vtableSym.addr; - uintptr_t vtableMaxAddr = vtableSym.addr + vtableSym.size; - code += " if (vptrVal >= 0x" + - (boost::format("%x") % vtableMinAddr).str() + " && vptrVal < 0x" + - (boost::format("%x") % vtableMaxAddr).str() + ") {\n"; - code += " SAVE_DATA(" + std::to_string(i) + ");\n"; - code += - " uintptr_t baseAddress = reinterpret_cast(&t) + " - "topOffset;\n"; - code += " getSizeTypeConcrete(*reinterpret_cast(baseAddress), returnArg);\n"; - code += " return;\n"; - code += " }\n"; - } - - code += " SAVE_DATA(-1);\n"; - code += " getSizeTypeConcrete(t, returnArg);\n"; - code += "}\n"; - } } +void CodeGen::getClassSizeFuncDef(const Class& c, std::string& code) { + if (!config_.features[Feature::PolymorphicInheritance] || !c.isDynamic()) { + // Just directly use the concrete size function as this class' getSizeType() + getClassSizeFuncConcrete("getSizeType", c, code); + return; + } + + getClassSizeFuncConcrete("getSizeTypeConcrete", c, code); + + std::vector childVtableAddrs; + childVtableAddrs.reserve(c.children.size()); + + for (const Type& childType : c.children) { + auto* childClass = dynamic_cast(&childType); + if (childClass == nullptr) { + abort(); // TODO + } + // TODO: + // auto fqChildName = *fullyQualifiedName(child); + auto fqChildName = "TODO - implement me"; + + // We must split this assignment and append because the C++ standard lacks + // an operator for concatenating std::string and std::string_view... + std::string childVtableName = "vtable for "; + childVtableName += fqChildName; + + auto optVtableSym = symbols_.locateSymbol(childVtableName, true); + if (!optVtableSym) { + // LOG(ERROR) << "Failed to find vtable address for '" << + // childVtableName; LOG(ERROR) << "Falling back to non dynamic + // mode"; + childVtableAddrs.clear(); // TODO why?? + break; + } + childVtableAddrs.push_back(*optVtableSym); + } + + code += "void getSizeType(const " + c.name() + " &t, size_t &returnArg) {\n"; + code += " auto *vptr = *reinterpret_cast(&t);\n"; + code += " uintptr_t topOffset = *(vptr - 2);\n"; + code += " uintptr_t vptrVal = reinterpret_cast(vptr);\n"; + + for (size_t i = 0; i < c.children.size(); i++) { + // The vptr will point to *somewhere* in the vtable of this object's + // concrete class. The exact offset into the vtable can vary based on a + // number of factors, so we compare the vptr against the vtable range for + // each possible class to determine the concrete type. + // + // This works for C++ compilers which follow the GNU v3 ABI, i.e. GCC and + // Clang. Other compilers may differ. + const Type& child = c.children[i]; + auto& vtableSym = childVtableAddrs[i]; + uintptr_t vtableMinAddr = vtableSym.addr; + uintptr_t vtableMaxAddr = vtableSym.addr + vtableSym.size; + code += " if (vptrVal >= 0x" + + (boost::format("%x") % vtableMinAddr).str() + " && vptrVal < 0x" + + (boost::format("%x") % vtableMaxAddr).str() + ") {\n"; + code += " SAVE_DATA(" + std::to_string(i) + ");\n"; + code += + " uintptr_t baseAddress = reinterpret_cast(&t) + " + "topOffset;\n"; + code += " getSizeTypeConcrete(*reinterpret_cast(baseAddress), returnArg);\n"; + code += " return;\n"; + code += " }\n"; + } + + code += " SAVE_DATA(-1);\n"; + code += " getSizeTypeConcrete(t, returnArg);\n"; + code += "}\n"; +} + +namespace { void getContainerSizeFuncDecl(const Container& c, std::string& code) { auto fmt = boost::format(c.containerInfo_.codegen.decl) % c.containerInfo_.typeName; @@ -410,21 +520,21 @@ void addGetSizeFuncDecls(const TypeGraph& typeGraph, std::string& code) { } } -void addGetSizeFuncDefs( - const TypeGraph& typeGraph, - SymbolService& symbols, - std::unordered_set& definedContainers, - bool polymorphicInheritance, - std::string& code) { +} // namespace + +void CodeGen::addGetSizeFuncDefs(const TypeGraph& typeGraph, + std::string& code) { for (const Type& t : typeGraph.finalTypes) { if (const auto* c = dynamic_cast(&t)) { - getClassSizeFuncDef(*c, symbols, polymorphicInheritance, code); + getClassSizeFuncDef(*c, code); } else if (const auto* con = dynamic_cast(&t)) { - getContainerSizeFuncDef(definedContainers, *con, code); + getContainerSizeFuncDef(definedContainers_, *con, code); } } } +namespace { + void addStandardTypeHandlers(std::string& code) { code += R"( template @@ -455,6 +565,7 @@ void addStandardTypeHandlers(std::string& code) { )"; } +// TODO support thrift isset void getClassTypeHandler(const Class& c, std::string& code) { std::string funcName = "getSizeType"; @@ -646,6 +757,10 @@ void CodeGen::generate( code += "} // namespace\n} // namespace OIInternal\n"; } + if (config_.features[Feature::CaptureThriftIsset]) { + genDefsThrift(typeGraph, code); + } + /* * The purpose of the anonymous namespace within `OIInternal` is that * anything defined within an anonymous namespace has internal-linkage, @@ -676,8 +791,7 @@ void CodeGen::generate( addGetSizeFuncDecls(typeGraph, code); addStandardGetSizeFuncDefs(code); - addGetSizeFuncDefs(typeGraph, symbols_, definedContainers_, - config_.features[Feature::PolymorphicInheritance], code); + addGetSizeFuncDefs(typeGraph, code); } assert(typeGraph.rootTypes().size() == 1); diff --git a/oi/CodeGen.h b/oi/CodeGen.h index 361c53f..8bea7c3 100644 --- a/oi/CodeGen.h +++ b/oi/CodeGen.h @@ -18,7 +18,9 @@ #include #include #include +#include #include +#include #include "ContainerInfo.h" #include "OICodeGen.h" @@ -28,6 +30,8 @@ struct drgn_type; class SymbolService; namespace type_graph { +class Class; +struct Member; class TypeGraph; } // namespace type_graph @@ -58,4 +62,14 @@ class CodeGen { SymbolService& symbols_; std::vector containerInfos_; std::unordered_set definedContainers_; + std::unordered_map + thriftIssetMembers_; + + void genDefsThrift(const type_graph::TypeGraph& typeGraph, std::string& code); + void addGetSizeFuncDefs(const type_graph::TypeGraph& typeGraph, + std::string& code); + void getClassSizeFuncDef(const type_graph::Class& c, std::string& code); + void getClassSizeFuncConcrete(std::string_view funcName, + const type_graph::Class& c, + std::string& code) const; }; diff --git a/oi/ContainerTypeEnum.h b/oi/ContainerTypeEnum.h index 56a6817..664bc7e 100644 --- a/oi/ContainerTypeEnum.h +++ b/oi/ContainerTypeEnum.h @@ -55,6 +55,7 @@ X(ENUM_MAP_TYPE) \ X(BOOST_BIMAP_TYPE) \ X(STD_VARIANT_TYPE) \ + X(THRIFT_ISSET_TYPE) \ X(DUMMY_TYPE) \ X(WEAK_PTR_TYPE) diff --git a/oi/TreeBuilder.cpp b/oi/TreeBuilder.cpp index 2bd068a..6655f46 100644 --- a/oi/TreeBuilder.cpp +++ b/oi/TreeBuilder.cpp @@ -835,6 +835,7 @@ void TreeBuilder::processContainer(const Variable& variable, Node& node) { case BY_MULTI_QRT_TYPE: containerStats.length = containerStats.capacity = next(); break; + case THRIFT_ISSET_TYPE: case DUMMY_TYPE: // Dummy container containerStats.elementStaticSize = 0; diff --git a/test/integration/thrift_namespaces.toml b/test/integration/thrift_namespaces.toml index b99aceb..cc0a28c 100644 --- a/test/integration/thrift_namespaces.toml +++ b/test/integration/thrift_namespaces.toml @@ -15,6 +15,7 @@ thrift_definitions = ''' namespaceA::namespaceB::TTTTT ret; return ret; ''' + cli_options = ["-fcapture-thrift-isset"] expect_json = '''[{ "staticSize":16, "dynamicSize":0, diff --git a/types/thrift_isset_type.toml b/types/thrift_isset_type.toml index 09ca687..54eff13 100644 --- a/types/thrift_isset_type.toml +++ b/types/thrift_isset_type.toml @@ -1,6 +1,6 @@ [info] type_name = "apache::thrift::detail::isset_bitset" -ctype = "DUMMY_TYPE" +ctype = "THRIFT_ISSET_TYPE" header = "thrift/lib/cpp2/gen/module_types_h.h" # Old: