/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "CodeGen.h" #include #include #include #include #include #include "oi/FuncGen.h" #include "oi/Headers.h" #include "oi/SymbolService.h" #include "type_graph/AddChildren.h" #include "type_graph/AddPadding.h" #include "type_graph/AlignmentCalc.h" #include "type_graph/DrgnParser.h" #include "type_graph/Flattener.h" #include "type_graph/NameGen.h" #include "type_graph/Prune.h" #include "type_graph/RemoveMembers.h" #include "type_graph/RemoveTopLevelPointer.h" #include "type_graph/TopoSorter.h" #include "type_graph/TypeGraph.h" #include "type_graph/TypeIdentifier.h" #include "type_graph/Types.h" using type_graph::AddChildren; using type_graph::AddPadding; using type_graph::AlignmentCalc; using type_graph::Class; using type_graph::Container; using type_graph::DrgnParser; using type_graph::Enum; using type_graph::Flattener; using type_graph::Member; using type_graph::NameGen; using type_graph::Prune; using type_graph::RemoveMembers; using type_graph::RemoveTopLevelPointer; using type_graph::TopoSorter; using type_graph::Type; using type_graph::Typedef; using type_graph::TypeGraph; using type_graph::TypeIdentifier; template using ref = std::reference_wrapper; namespace { void defineMacros(std::string& code) { if (true /* TODO: config.useDataSegment*/) { code += R"( #define SAVE_SIZE(val) #define SAVE_DATA(val) StoreData(val, returnArg) )"; } else { code += R"( #define SAVE_SIZE(val) AddData(val, returnArg) #define SAVE_DATA(val) )"; } } void defineArray(std::string& code) { code += R"( template struct OIArray { T vals[N]; }; )"; } void defineJitLog(FeatureSet features, std::string& code) { if (features[Feature::JitLogging]) { code += R"( #define JLOG(str) \ do { \ if (__builtin_expect(logFile, 0)) { \ write(logFile, str, sizeof(str) - 1); \ } \ } while (false) #define JLOGPTR(ptr) \ do { \ if (__builtin_expect(logFile, 0)) { \ __jlogptr((uintptr_t)ptr); \ } \ } while (false) )"; } else { code += R"( #define JLOG(str) #define JLOGPTR(ptr) )"; } } void addIncludes(const TypeGraph& typeGraph, FeatureSet features, std::string& code) { std::set includes{"cstddef"}; if (features[Feature::TypedDataSegment]) { includes.emplace("functional"); includes.emplace("oi/types/st.h"); } if (features[Feature::TreeBuilderTypeChecking]) { includes.emplace("oi/types/dy.h"); code += "#define DEFINE_DESCRIBE 1\n"; // added before all includes } if (features[Feature::JitTiming]) { includes.emplace("chrono"); } for (const Type& t : typeGraph.finalTypes) { if (const auto* c = dynamic_cast(&t)) { includes.emplace(c->containerInfo_.header); } } for (const auto& include : includes) { code += "#include <"; code += include; code += ">\n"; } } void genDeclsClass(const Class& c, std::string& code) { if (c.kind() == Class::Kind::Union) code += "union "; else code += "struct "; code += c.name() + ";\n"; } void genDeclsEnum(const Enum& e, std::string& code) { code += "using " + e.name() + " = "; switch (e.size()) { case 8: code += "uint64_t"; break; case 4: code += "uint32_t"; break; case 2: code += "uint16_t"; break; case 1: code += "uint8_t"; break; default: abort(); // TODO } code += ";\n"; } void genDecls(const TypeGraph& typeGraph, std::string& code) { for (const Type& t : typeGraph.finalTypes) { if (const auto* c = dynamic_cast(&t)) { genDeclsClass(*c, code); } else if (const auto* e = dynamic_cast(&t)) { genDeclsEnum(*e, code); } } } /* * Generates a declaration for a given fully-qualified type. * * e.g. Given "nsA::nsB::Foo" * * The folowing is generated: * namespace nsA::nsB { * struct Foo; * } // namespace nsA::nsB */ void declareFullyQualifiedStruct(const std::string& name, std::string& code) { if (auto pos = name.rfind("::"); pos != name.npos) { auto ns = name.substr(0, pos); auto structName = name.substr(pos + 2); code += "namespace "; code += ns; code += " {\n"; code += "struct " + structName + ";\n"; code += "} // namespace "; code += ns; code += "\n"; } else { code += "struct "; code += name; code += ";\n"; } } void genDefsThriftClass(const Class& c, std::string& code) { declareFullyQualifiedStruct(c.fqName(), code); code += "namespace apache { namespace thrift {\n"; code += "template <> struct TStructDataStorage<" + c.fqName() + "> {\n"; code += " static constexpr const std::size_t fields_size = 1; // Invalid, do " "not use\n"; code += " static const std::array " "fields_names;\n"; code += " static const std::array fields_ids;\n"; code += " static const std::array fields_types;\n"; code += "\n"; code += " static const std::array " "storage_names;\n"; code += " static const std::array __attribute__((weak)) " "isset_indexes;\n"; code += "};\n"; code += "}} // namespace thrift, namespace apache\n"; } } // namespace void CodeGen::genDefsThrift(const TypeGraph& typeGraph, std::string& code) { for (const Type& t : typeGraph.finalTypes) { if (const auto* c = dynamic_cast(&t)) { const Member* issetMember = nullptr; for (const auto& member : c->members) { if (const auto* container = dynamic_cast(&member.type()); container && container->containerInfo_.ctype == THRIFT_ISSET_TYPE) { issetMember = &member; break; } } if (issetMember) { genDefsThriftClass(*c, code); thriftIssetMembers_[c] = issetMember; } } } } namespace { void genDefsClass(const Class& c, std::string& code) { if (c.kind() == Class::Kind::Union) code += "union "; else code += "struct "; if (c.packed()) { code += "__attribute__((__packed__)) "; } if (c.kind() == Class::Kind::Union) { // Need to specify alignment manually for unions as their members have been // removed. It would be nice to do this for all types, but our alignment // information is not complete, so it would result in some errors. code += "alignas(" + std::to_string(c.align()) + ") "; } code += c.name() + " {\n"; for (const auto& mem : c.members) { code += " " + mem.type().name() + " " + mem.name; if (mem.bitsize) { code += " : " + std::to_string(mem.bitsize); } code += ";\n"; } code += "};\n\n"; } void genDefsTypedef(const Typedef& td, std::string& code) { code += "using " + td.name() + " = " + td.underlyingType().name() + ";\n"; } void genDefs(const TypeGraph& typeGraph, std::string& code) { for (const Type& t : typeGraph.finalTypes) { if (const auto* c = dynamic_cast(&t)) { genDefsClass(*c, code); } else if (const auto* td = dynamic_cast(&t)) { genDefsTypedef(*td, code); } } } void genStaticAssertsClass(const Class& c, std::string& code) { code += "static_assert(validate_size<" + c.name() + ", " + std::to_string(c.size()) + ">::value);\n"; for (const auto& member : c.members) { if (member.bitsize > 0) continue; code += "static_assert(validate_offset::value, \"Unexpected offset of " + c.name() + "::" + member.name + "\");\n"; } code.push_back('\n'); } void genStaticAssertsContainer(const Container& c, std::string& code) { code += "static_assert(validate_size<" + c.name() + ", " + std::to_string(c.size()) + ">::value);\n"; code.push_back('\n'); } void genStaticAsserts(const TypeGraph& typeGraph, std::string& code) { for (const Type& t : typeGraph.finalTypes) { if (const auto* c = dynamic_cast(&t)) { genStaticAssertsClass(*c, code); } else if (const auto* con = dynamic_cast(&t)) { genStaticAssertsContainer(*con, code); } } } void addStandardGetSizeFuncDecls(std::string& code) { code += R"( template void getSizeType(const T &t, size_t& returnArg); template void getSizeType(/*const*/ T* s_ptr, size_t& returnArg); void getSizeType(/*const*/ void *s_ptr, size_t& returnArg); template void getSizeType(const OIArray& container, size_t& returnArg); )"; } void addStandardGetSizeFuncDefs(std::string& code) { // TODO use macros, not StoreData directly code += R"( template void getSizeType(const T &t, size_t& returnArg) { JLOG("obj @"); JLOGPTR(&t); SAVE_SIZE(sizeof(T)); } )"; // TODO const and non-const versions // OR maybe just remove const everywhere code += R"( template void getSizeType(/*const*/ T* s_ptr, size_t& returnArg) { JLOG("ptr val @"); JLOGPTR(s_ptr); StoreData((uintptr_t)(s_ptr), returnArg); if (s_ptr && pointers.add((uintptr_t)s_ptr)) { StoreData(1, returnArg); getSizeType(*(s_ptr), returnArg); } else { StoreData(0, returnArg); } } void getSizeType(/*const*/ void *s_ptr, size_t& returnArg) { JLOG("void ptr @"); JLOGPTR(s_ptr); StoreData((uintptr_t)(s_ptr), returnArg); } template void getSizeType(const OIArray& container, size_t& returnArg) { SAVE_DATA((uintptr_t)N); SAVE_SIZE(sizeof(container)); for (size_t i=0; isecond; } if (thriftIssetMember) { code += " using thrift_data = apache::thrift::TStructDataStorage<" + c.fqName() + ">;\n"; } for (size_t i = 0; i < c.members.size(); i++) { const auto& member = c.members[i]; if (member.name.starts_with(AddPadding::MemberPrefix)) continue; if (thriftIssetMember && thriftIssetMember != &member) { // Capture Thrift's isset value for each field, except for __isset // itself std::string issetIdxStr = "thrift_data::isset_indexes[" + std::to_string(i) + "]"; code += " if (&thrift_data::isset_indexes != nullptr && " + issetIdxStr + " != -1) {\n"; code += " SAVE_DATA(t." + thriftIssetMember->name + ".get(" + issetIdxStr + "));\n"; code += " } else {\n"; code += " SAVE_DATA(-1);\n"; code += " }\n"; } code += " JLOG(\"" + member.name + " @\");\n"; if (member.bitsize == 0) code += " JLOGPTR(&t." + member.name + ");\n"; code += " getSizeType(t." + member.name + ", returnArg);\n"; } code += "}\n"; } void CodeGen::getClassSizeFuncDef(const Class& c, std::string& code) { if (!config_.features[Feature::PolymorphicInheritance] || !c.isDynamic()) { // Just directly use the concrete size function as this class' getSizeType() getClassSizeFuncConcrete("getSizeType", c, code); return; } getClassSizeFuncConcrete("getSizeTypeConcrete", c, code); std::vector childVtableAddrs; childVtableAddrs.reserve(c.children.size()); for (const Type& childType : c.children) { auto* childClass = dynamic_cast(&childType); if (childClass == nullptr) { abort(); // TODO } // TODO: // auto fqChildName = *fullyQualifiedName(child); auto fqChildName = "TODO - implement me"; // We must split this assignment and append because the C++ standard lacks // an operator for concatenating std::string and std::string_view... std::string childVtableName = "vtable for "; childVtableName += fqChildName; auto optVtableSym = symbols_.locateSymbol(childVtableName, true); if (!optVtableSym) { // LOG(ERROR) << "Failed to find vtable address for '" << // childVtableName; LOG(ERROR) << "Falling back to non dynamic // mode"; childVtableAddrs.clear(); // TODO why?? break; } childVtableAddrs.push_back(*optVtableSym); } code += "void getSizeType(const " + c.name() + " &t, size_t &returnArg) {\n"; code += " auto *vptr = *reinterpret_cast(&t);\n"; code += " uintptr_t topOffset = *(vptr - 2);\n"; code += " uintptr_t vptrVal = reinterpret_cast(vptr);\n"; for (size_t i = 0; i < c.children.size(); i++) { // The vptr will point to *somewhere* in the vtable of this object's // concrete class. The exact offset into the vtable can vary based on a // number of factors, so we compare the vptr against the vtable range for // each possible class to determine the concrete type. // // This works for C++ compilers which follow the GNU v3 ABI, i.e. GCC and // Clang. Other compilers may differ. const Type& child = c.children[i]; auto& vtableSym = childVtableAddrs[i]; uintptr_t vtableMinAddr = vtableSym.addr; uintptr_t vtableMaxAddr = vtableSym.addr + vtableSym.size; code += " if (vptrVal >= 0x" + (boost::format("%x") % vtableMinAddr).str() + " && vptrVal < 0x" + (boost::format("%x") % vtableMaxAddr).str() + ") {\n"; code += " SAVE_DATA(" + std::to_string(i) + ");\n"; code += " uintptr_t baseAddress = reinterpret_cast(&t) + " "topOffset;\n"; code += " getSizeTypeConcrete(*reinterpret_cast(baseAddress), returnArg);\n"; code += " return;\n"; code += " }\n"; } code += " SAVE_DATA(-1);\n"; code += " getSizeTypeConcrete(t, returnArg);\n"; code += "}\n"; } namespace { void getContainerSizeFuncDecl(const Container& c, std::string& code) { auto fmt = boost::format(c.containerInfo_.codegen.decl) % c.containerInfo_.typeName; code += fmt.str(); } void getContainerSizeFuncDef(std::unordered_set& used, const Container& c, std::string& code) { if (!used.insert(&c.containerInfo_).second) { return; } auto fmt = boost::format(c.containerInfo_.codegen.func) % c.containerInfo_.typeName; code += fmt.str(); } void addGetSizeFuncDecls(const TypeGraph& typeGraph, std::string& code) { for (const Type& t : typeGraph.finalTypes) { if (const auto* c = dynamic_cast(&t)) { getClassSizeFuncDecl(*c, code); } else if (const auto* con = dynamic_cast(&t)) { getContainerSizeFuncDecl(*con, code); } } } } // namespace void CodeGen::addGetSizeFuncDefs(const TypeGraph& typeGraph, std::string& code) { for (const Type& t : typeGraph.finalTypes) { if (const auto* c = dynamic_cast(&t)) { getClassSizeFuncDef(*c, code); } else if (const auto* con = dynamic_cast(&t)) { getContainerSizeFuncDef(definedContainers_, *con, code); } } } namespace { void addStandardTypeHandlers(std::string& code) { // Provide a wrapper function, getSizeType, to infer T instead of having to // explicitly specify it with TypeHandler::getSizeType every time. code += R"( template types::st::Unit getSizeType(const T &t, typename TypeHandler::type returnArg) { JLOG("obj @"); JLOGPTR(&t); return TypeHandler::getSizeType(t, returnArg); } )"; code += R"( template struct TypeHandler> { using type = types::st::List::type>; static types::st::Unit getSizeType( const OIArray &container, typename TypeHandler>::type returnArg) { auto tail = returnArg.write(N); for (size_t i=0; i::getSizeType(container.vals[i], ret); }); } return tail.finish(); } }; )"; } } // namespace void CodeGen::getClassTypeHandler(const Class& c, std::string& code) { std::string funcName = "getSizeType"; std::string extras; const Member* thriftIssetMember = nullptr; if (const auto it = thriftIssetMembers_.find(&c); it != thriftIssetMembers_.end()) { thriftIssetMember = it->second; extras += (boost::format(R"( static int getThriftIsset(const %1%& t, size_t i) { using thrift_data = apache::thrift::TStructDataStorage<%2%>; if (&thrift_data::isset_indexes == nullptr) return -1; auto idx = thrift_data::isset_indexes[i]; if (idx == -1) return -1; return t.%3%.get(idx); } )") % c.name() % c.fqName() % thriftIssetMember->name) .str(); } size_t lastNonPaddingElement = -1; for (size_t i = 0; i < c.members.size(); i++) { const auto& el = c.members[i]; if (!el.name.starts_with(AddPadding::MemberPrefix)) { lastNonPaddingElement = i; } } // Generate the static type for the class's representation in the data buffer. // For `class { int a,b,c; }` we generate (DB omitted for clarity): // Pair::type, // Pair::type, // TypeHandler::type // >> std::string typeStaticType; { size_t pairs = 0; for (size_t i = 0; i < lastNonPaddingElement + 1; i++) { const auto& member = c.members[i]; if (member.name.starts_with(AddPadding::MemberPrefix)) { continue; } if (i != lastNonPaddingElement) { typeStaticType += "types::st::Pair::type") % c.name() % member.name) .str(); if (i != lastNonPaddingElement) { typeStaticType += ", "; } } typeStaticType += std::string(pairs, '>'); if (typeStaticType.empty()) { typeStaticType = "types::st::Unit"; } } // Generate the function body that walks the type. Uses the monadic // `delegate()` form to handle each field except for the last. The last field // is handled explicitly by passing it to `getSizeType`, as we must consume // the entire type instead of delegating the next part. std::string traverser; { for (size_t i = 0; i < lastNonPaddingElement + 1; i++) { const auto& member = c.members[i]; if (member.name.starts_with(AddPadding::MemberPrefix)) { continue; } if (traverser.empty()) { traverser = "auto ret = returnArg"; } if (thriftIssetMember != nullptr && thriftIssetMember != &member) { traverser += "\n .write(getThriftIsset(t, " + std::to_string(i) + "))"; } if (i != lastNonPaddingElement) { traverser += "\n .delegate([&t](auto ret) {"; traverser += "\n return OIInternal::getSizeType(t." + member.name + ", ret);"; traverser += "\n})"; } else { traverser += ";"; traverser += "\nreturn OIInternal::getSizeType(t." + member.name + ", ret);"; } } if (traverser.empty()) { traverser = "return returnArg;"; } } code += (boost::format(R"( template class TypeHandler {%2% public: using type = %3%; static types::st::Unit %4%( const %1%& t, typename TypeHandler::type returnArg) { %5% } }; )") % c.name() % extras % typeStaticType % funcName % traverser) .str(); } namespace { void getContainerTypeHandler(std::unordered_set& used, const Container& c, std::string& code) { if (!used.insert(&c.containerInfo_).second) { return; } const auto& handler = c.containerInfo_.codegen.handler; // TODO: Move this check into the ContainerInfo parsing once always enabled. if (handler.empty()) { LOG(ERROR) << "`codegen.handler` must be specified for all containers " "under \"-ftyped-data-segment\", not specified for \"" + c.containerInfo_.typeName + "\""; throw std::runtime_error("missing `codegen.handler`"); } auto fmt = boost::format(c.containerInfo_.codegen.handler) % c.containerInfo_.typeName; code += fmt.str(); } } // namespace void CodeGen::addTypeHandlers(const TypeGraph& typeGraph, std::string& code) { for (const Type& t : typeGraph.finalTypes) { if (const auto* c = dynamic_cast(&t)) { getClassTypeHandler(*c, code); } else if (const auto* con = dynamic_cast(&t)) { getContainerTypeHandler(definedContainers_, *con, code); } } } bool CodeGen::codegenFromDrgn(struct drgn_type* drgnType, std::string& code) { try { containerInfos_.reserve(config_.containerConfigPaths.size()); for (const auto& path : config_.containerConfigPaths) { registerContainer(path); } } catch (const ContainerInfoError& err) { LOG(ERROR) << "Error reading container TOML file " << err.what(); return false; } TypeGraph typeGraph; try { addDrgnRoot(drgnType, typeGraph); } catch (const type_graph::DrgnParserError& err) { LOG(ERROR) << "Error parsing DWARF: " << err.what(); return false; } transform(typeGraph); generate(typeGraph, code, drgnType); return true; } void CodeGen::registerContainer(const fs::path& path) { const auto& info = containerInfos_.emplace_back(path); VLOG(1) << "Registered container: " << info.typeName; } void CodeGen::addDrgnRoot(struct drgn_type* drgnType, TypeGraph& typeGraph) { DrgnParser drgnParser{typeGraph, containerInfos_, config_.features[Feature::ChaseRawPointers]}; Type& parsedRoot = drgnParser.parse(drgnType); typeGraph.addRoot(parsedRoot); } void CodeGen::transform(TypeGraph& typeGraph) { type_graph::PassManager pm; // Simplify the type graph first so there is less work for later passes pm.addPass(RemoveTopLevelPointer::createPass()); pm.addPass(Flattener::createPass()); pm.addPass(TypeIdentifier::createPass(config_.passThroughTypes)); if (config_.features[Feature::PruneTypeGraph]) pm.addPass(Prune::createPass()); if (config_.features[Feature::PolymorphicInheritance]) { // Parse new children nodes DrgnParser drgnParser{typeGraph, containerInfos_, config_.features[Feature::ChaseRawPointers]}; pm.addPass(AddChildren::createPass(drgnParser, symbols_)); // Re-run passes over newly added children pm.addPass(Flattener::createPass()); pm.addPass(TypeIdentifier::createPass(config_.passThroughTypes)); if (config_.features[Feature::PruneTypeGraph]) pm.addPass(Prune::createPass()); } // Calculate alignment before removing members, as those members may have an // influence on the class' overall alignment. pm.addPass(AlignmentCalc::createPass()); pm.addPass(RemoveMembers::createPass(config_.membersToStub)); // Add padding to fill in the gaps of removed members and ensure their // alignments pm.addPass(AddPadding::createPass(config_.features)); pm.addPass(NameGen::createPass()); pm.addPass(TopoSorter::createPass()); pm.run(typeGraph); LOG(INFO) << "Sorted types:\n"; for (Type& t : typeGraph.finalTypes) { LOG(INFO) << " " << t.name() << std::endl; }; } void CodeGen::generate( TypeGraph& typeGraph, std::string& code, struct drgn_type* drgnType /* TODO: this argument should not be required */ ) { code = headers::oi_OITraceCode_cpp; if (!config_.features[Feature::TypedDataSegment]) { defineMacros(code); } addIncludes(typeGraph, config_.features, code); defineArray(code); defineJitLog(config_.features, code); if (config_.features[Feature::TypedDataSegment]) { FuncGen::DefineDataSegmentDataBuffer(code); code += "using namespace ObjectIntrospection;\n"; code += "namespace OIInternal {\nnamespace {\n"; FuncGen::DefineBasicTypeHandlers(code); code += "} // namespace\n} // namespace OIInternal\n"; } if (config_.features[Feature::CaptureThriftIsset]) { genDefsThrift(typeGraph, code); } /* * The purpose of the anonymous namespace within `OIInternal` is that * anything defined within an anonymous namespace has internal-linkage, * and therefore won't appear in the symbol table of the resulting object * file. Both OIL and OID do a linear search through the symbol table for * the top-level `getSize` function to locate the probe entry point, so * by keeping the contents of the symbol table to a minimum, we make that * process faster. */ code += "namespace OIInternal {\nnamespace {\n"; if (!config_.features[Feature::TypedDataSegment]) { FuncGen::DefineEncodeData(code); FuncGen::DefineEncodeDataSize(code); FuncGen::DefineStoreData(code); FuncGen::DefineAddData(code); } FuncGen::DeclareGetContainer(code); genDecls(typeGraph, code); genDefs(typeGraph, code); genStaticAsserts(typeGraph, code); if (config_.features[Feature::TypedDataSegment]) { addStandardTypeHandlers(code); addTypeHandlers(typeGraph, code); } else { addStandardGetSizeFuncDecls(code); addGetSizeFuncDecls(typeGraph, code); addStandardGetSizeFuncDefs(code); addGetSizeFuncDefs(typeGraph, code); } assert(typeGraph.rootTypes().size() == 1); Type& rootType = typeGraph.rootTypes()[0]; code += "\nusing __ROOT_TYPE__ = " + rootType.name() + ";\n"; code += "} // namespace\n} // namespace OIInternal\n"; const auto typeName = SymbolService::getTypeName(drgnType); if (config_.features[Feature::TypedDataSegment]) { FuncGen::DefineTopLevelGetSizeRefTyped(code, typeName, config_.features); } else { FuncGen::DefineTopLevelGetSizeRef(code, typeName, config_.features); } if (config_.features[Feature::TreeBuilderTypeChecking]) { FuncGen::DefineOutputType(code, typeName); } if (VLOG_IS_ON(3)) { VLOG(3) << "Generated trace code:\n"; // VLOG truncates output, so use std::cout std::cout << code; } }