/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "oi/FuncGen.h" #include #include #include #include "oi/ContainerInfo.h" namespace oi::detail { namespace { const std::string typedValueFunc = R"( void getSizeType(const %1%& t, size_t& returnArg) { const uint8_t KindOfPersistentDict = 14; const uint8_t KindOfDict = 15; const uint8_t KindOfPersistentVec = 22; const uint8_t KindOfVec = 23; const uint8_t KindOfPersistentKeyset = 26; const uint8_t KindOfKeyset = 27; const uint8_t KindOfRecord = 29; const uint8_t KindOfPersistentString = 38; const uint8_t KindOfString = 39; const uint8_t KindOfObject = 43; const uint8_t KindOfResource = 45; const uint8_t KindOfRFunc = 51; const uint8_t KindOfRClsMeth = 53; const uint8_t KindOfClsMeth = 56; const uint8_t KindOfBoolean = 70; const uint8_t KindOfInt64 = 74; const uint8_t KindOfDouble = 76; const uint8_t KindOfFunc = 82; const uint8_t KindOfClass = 84; const uint8_t KindOfLazyClass = 88; const uint8_t KindOfUninit = 98; const uint8_t KindOfNull = 100; SAVE_DATA((uintptr_t)t.m_type); switch(t.m_type) { case KindOfInt64: case KindOfBoolean: SAVE_DATA(0); getSizeType(t.m_data.num, returnArg); break; case KindOfDouble: SAVE_DATA(1); getSizeType(t.m_data.dbl, returnArg); break; case KindOfPersistentString: case KindOfString: SAVE_DATA(2); getSizeType(t.m_data.pstr, returnArg); break; case KindOfPersistentDict: case KindOfDict: case KindOfPersistentVec: case KindOfVec: case KindOfPersistentKeyset: case KindOfKeyset: SAVE_DATA(3); getSizeType(t.m_data.parr, returnArg); break; case KindOfObject: SAVE_DATA(4); getSizeType(t.m_data.pobj, returnArg); break; case KindOfResource: SAVE_DATA(5); getSizeType(t.m_data.pres, returnArg); break; case KindOfFunc: SAVE_DATA(8); getSizeType(t.m_data.pfunc, returnArg); break; case KindOfRFunc: SAVE_DATA(9); getSizeType(t.m_data.prfunc, returnArg); break; case KindOfClass: SAVE_DATA(10); getSizeType(t.m_data.pclass, returnArg); break; case KindOfClsMeth: SAVE_DATA(11); getSizeType(t.m_data.pclsmeth, returnArg); break; case KindOfRClsMeth: SAVE_DATA(12); getSizeType(t.m_data.prclsmeth, returnArg); break; case KindOfRecord: SAVE_DATA(13); getSizeType(t.m_data.prec, returnArg); break; case KindOfLazyClass: SAVE_DATA(14); getSizeType(t.m_data.plazyclass, returnArg); break; case KindOfUninit: case KindOfNull: break; } } )"; } // namespace void FuncGen::DeclareGetSize(std::string& testCode, const std::string& type) { boost::format fmt = boost::format("void getSizeType(const %1% &t, size_t& returnArg);\n") % type; testCode.append(fmt.str()); } void FuncGen::DeclareExterns(std::string& code) { constexpr std::string_view vars = R"( extern uint8_t* dataBase; extern size_t dataSize; extern uintptr_t cookieValue; )"; code.append(vars); } void FuncGen::DefineJitLog(std::string& code, FeatureSet features) { if (features[Feature::JitLogging]) { code += R"( extern int logFile; void __jlogptr(uintptr_t ptr) { static constexpr char hexdigits[] = "0123456789abcdef"; static constexpr size_t ptrlen = 2 * sizeof(ptr); static char hexstr[ptrlen + 1] = {}; size_t i = ptrlen; while (i--) { hexstr[i] = hexdigits[ptr & 0xf]; ptr = ptr >> 4; } hexstr[ptrlen] = '\n'; write(logFile, hexstr, sizeof(hexstr)); } #define JLOG(str) \ do { \ if (__builtin_expect(logFile, 0)) { \ write(logFile, str, sizeof(str) - 1); \ } \ } while (false) #define JLOGPTR(ptr) \ do { \ if (__builtin_expect(logFile, 0)) { \ __jlogptr((uintptr_t)ptr); \ } \ } while (false) )"; } else { code += R"( #define JLOG(str) #define JLOGPTR(ptr) )"; } } void FuncGen::DeclareStoreData(std::string& testCode) { testCode.append("void StoreData(uintptr_t data, size_t& dataSegOffset);\n"); } void FuncGen::DeclareEncodeData(std::string& testCode) { testCode.append("size_t EncodeVarint(uint64_t val, uint8_t* buf);\n"); } void FuncGen::DeclareEncodeDataSize(std::string& testCode) { testCode.append("size_t EncodeVarintSize(uint64_t val);\n"); } void FuncGen::DefineEncodeData(std::string& testCode) { std::string func = R"( size_t EncodeVarint(uint64_t val, uint8_t* buf) { uint8_t* p = buf; while (val >= 128) { *p++ = 0x80 | (val & 0x7f); val >>= 7; } *p++ = uint8_t(val); return size_t(p - buf); } )"; testCode.append(func); } void FuncGen::DefineEncodeDataSize(std::string& testCode) { std::string func = R"( size_t EncodeVarintSize(uint64_t val) { int s = 1; while (val >= 128) { ++s; val >>= 7; } return s; } )"; testCode.append(func); } void FuncGen::DefineStoreData(std::string& testCode) { // TODO: We are encoding twice. Once to check the size and later to // actually encode. Maybe just do it once leaving a max of uintptr_t // space at the end. std::string func = R"( void StoreData(uint64_t data, size_t& dataSegOffset) { size_t sz = EncodeVarintSize(data); if (sz + dataSegOffset < dataSize) { auto data_base = reinterpret_cast(dataBase); data_base += dataSegOffset; size_t data_size = EncodeVarint(data, data_base); dataSegOffset += data_size; } else { dataSegOffset += sz; } } )"; testCode.append(func); } void FuncGen::DefineTopLevelIntrospect(std::string& code, const std::string& type, const std::string& idToHash) { std::string func = R"( #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunknown-attributes" void __attribute__((used, retain)) introspect_%2$016x( const OIInternal::%1%& t, std::vector& v) #pragma GCC diagnostic pop { pointers.initialize(); pointers.add((uintptr_t)&t); v.clear(); v.reserve(4096); using DataBufferType = DataBuffer::BackInserter>; using ContentType = OIInternal::TypeHandler::type; ContentType ret{DataBufferType{v}}; OIInternal::getSizeType(t, ret); } )"; code.append( (boost::format(func) % type % std::hash{}(idToHash)).str()); } void FuncGen::DefineTopLevelIntrospectNamed( std::string& code, const std::string& type, const std::string& linkageName, size_t exclusiveSize, std::span typeNames) { code += "namespace {\n"; DefineTreeBuilderInstructions(code, type, linkageName, exclusiveSize, typeNames); DefineTopLevelIntrospect(code, type, linkageName); code += "} // namespace\n"; std::string internalId = (boost::format("%1$016x") % std::hash{}(linkageName)).str(); code += "extern \"C\" IntrospectionResult "; code += linkageName; code += "(const OIInternal::"; code += type; code += "& t) {\n"; code += " std::vector v{};\n"; code += " introspect_"; code += internalId; code += "(t, v);\n"; code += " return IntrospectionResult{std::move(v), treeBuilderInstructions"; code += internalId; code += "};\n"; code += "}\n"; } void FuncGen::DefineTopLevelGetSizeRef(std::string& testCode, const std::string& type, const std::string& idToHash, FeatureSet features) { std::string func = R"( #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunknown-attributes" void __attribute__((used, retain)) getSize_%2$016x(const OIInternal::%1%& t) #pragma GCC diagnostic pop { )"; if (features[Feature::JitTiming]) { func += " const auto startTime = std::chrono::steady_clock::now();\n"; } func += R"( pointers.initialize(); pointers.add((uintptr_t)&t); auto data = reinterpret_cast(dataBase); size_t dataSegOffset = 0; data[dataSegOffset++] = oidMagicId; data[dataSegOffset++] = cookieValue; uintptr_t& writtenSize = data[dataSegOffset++]; writtenSize = 0; uintptr_t& timeTakenNs = data[dataSegOffset++]; dataSegOffset *= sizeof(uintptr_t); JLOG("%1% @"); JLOGPTR(&t); OIInternal::getSizeType(t, dataSegOffset); OIInternal::StoreData((uintptr_t)123456789, dataSegOffset); OIInternal::StoreData((uintptr_t)123456789, dataSegOffset); writtenSize = dataSegOffset; dataBase += dataSegOffset; )"; if (features[Feature::JitTiming]) { func += R"( timeTakenNs = std::chrono::duration_cast( std::chrono::steady_clock::now() - startTime).count(); )"; } func += R"( } )"; boost::format fmt = boost::format(func) % type % std::hash{}(idToHash); testCode.append(fmt.str()); } /* * DefineTopLevelGetSizeRefTyped * * Top level function to run OI on a type utilising static types and enabled * with feature '-ftyped-data-segment'. */ void FuncGen::DefineTopLevelGetSizeRefTyped(std::string& testCode, const std::string& type, const std::string& idToHash, FeatureSet features) { std::string func = R"( #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunknown-attributes" void __attribute__((used, retain)) getSize_%2$016x(const OIInternal::%1%& t) #pragma GCC diagnostic pop { )"; if (features[Feature::JitTiming]) { func += " const auto startTime = std::chrono::steady_clock::now();\n"; } func += R"( pointers.initialize(); pointers.add((uintptr_t)&t); auto data = reinterpret_cast(dataBase); // TODO: Replace these with types::st::Uint64 once the VarInt decoding // logic is moved out of OIDebugger and into new TreeBuilder. size_t dataSegOffset = 0; data[dataSegOffset++] = oidMagicId; data[dataSegOffset++] = cookieValue; uintptr_t& writtenSize = data[dataSegOffset++]; writtenSize = 0; uintptr_t& timeTakenNs = data[dataSegOffset++]; dataSegOffset *= sizeof(uintptr_t); JLOG("%1% @"); JLOGPTR(&t); using ContentType = OIInternal::TypeHandler::type; using SuffixType = types::st::Pair< DataBuffer::DataSegment, types::st::VarInt, types::st::VarInt >; using DataBufferType = types::st::Pair< DataBuffer::DataSegment, ContentType, SuffixType >; DataBufferType db = DataBuffer::DataSegment(dataSegOffset); SuffixType suffix = db.delegate([&t](auto ret) { return OIInternal::getSizeType(t, ret); }); types::st::Unit end = suffix .write(123456789) .write(123456789); dataSegOffset = end.offset(); writtenSize = dataSegOffset; dataBase += dataSegOffset; )"; if (features[Feature::JitTiming]) { func += R"( timeTakenNs = std::chrono::duration_cast( std::chrono::steady_clock::now() - startTime).count(); )"; } func += R"( } )"; boost::format fmt = boost::format(func) % type % std::hash{}(idToHash); testCode.append(fmt.str()); } /* * DefineOutputType * * Present the dynamic type of an object for OID/OIL/OITB to link against. */ void FuncGen::DefineOutputType(std::string& code, const std::string& type, const std::string& idToHash) { std::string func = R"( #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunknown-attributes" extern const types::dy::Dynamic __attribute__((used, retain)) outputType%2$016x = OIInternal::TypeHandler::type::describe; #pragma GCC diagnostic pop )"; boost::format fmt = boost::format(func) % type % std::hash{}(idToHash); code.append(fmt.str()); } void FuncGen::DefineTreeBuilderInstructions( std::string& code, const std::string& type, const std::string& idToHash, size_t exclusiveSize, std::span typeNames) { std::string typeHash = (boost::format("%1$016x") % std::hash{}(idToHash)).str(); code += R"( #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunknown-attributes" namespace { const std::array(dataBase); size_t dataSegOffset = 0; data[dataSegOffset++] = oidMagicId; data[dataSegOffset++] = cookieValue; uintptr_t& writtenSize = data[dataSegOffset++]; writtenSize = 0; uintptr_t& timeTakenNs = data[dataSegOffset++]; dataSegOffset *= sizeof(uintptr_t); OIInternal::getSizeType(t, dataSegOffset); OIInternal::StoreData((uintptr_t)123456789, dataSegOffset); OIInternal::StoreData((uintptr_t)123456789, dataSegOffset); writtenSize = dataSegOffset; dataBase += dataSegOffset; )"; if (features[Feature::JitTiming]) { func += R"( timeTakenNs = std::chrono::duration_cast( std::chrono::steady_clock::now() - startTime).count(); )"; } func += R"( } )"; boost::format fmt = boost::format(func) % rawType % std::hash{}(rawType); testCode.append(fmt.str()); } bool FuncGen::DeclareGetSizeFuncs(std::string& testCode, const ContainerInfoRefSet& containerInfo, FeatureSet features) { for (const ContainerInfo& cInfo : containerInfo) { std::string ctype = cInfo.typeName; ctype = ctype.substr(0, ctype.find("<", 0)); auto& decl = cInfo.codegen.decl; boost::format fmt = boost::format(decl) % ctype; testCode.append(fmt.str()); } if (features[Feature::ChaseRawPointers]) { testCode.append( "template>>>\n"); } else { testCode.append("template\n"); } testCode.append("void getSizeType(const T &t, size_t& returnArg);"); return true; } bool FuncGen::DefineGetSizeFuncs(std::string& testCode, const ContainerInfoRefSet& containerInfo, FeatureSet features) { for (const ContainerInfo& cInfo : containerInfo) { std::string ctype = cInfo.typeName; ctype = ctype.substr(0, ctype.find("<", 0)); auto& func = cInfo.codegen.func; boost::format fmt = boost::format(func) % ctype; testCode.append(fmt.str()); } if (features[Feature::ChaseRawPointers]) { testCode.append("template\n"); } else { testCode.append("template\n"); } testCode.append(R"( void getSizeType(const T &t, size_t& returnArg) { JLOG("obj @"); JLOGPTR(&t); SAVE_SIZE(sizeof(T)); } )"); return true; } void FuncGen::DefineGetSizeTypedValueFunc(std::string& testCode, const std::string& ctype) { boost::format fmt = boost::format(typedValueFunc) % ctype; testCode.append(fmt.str()); } void FuncGen::DeclareGetContainer(std::string& testCode) { std::string func = R"( template const typename ContainerAdapter::container_type & get_container (ContainerAdapter &ca) { struct unwrap : ContainerAdapter { static const typename ContainerAdapter::container_type & get (ContainerAdapter &ca) { return ca.*&unwrap::c; } }; return unwrap::get(ca); } )"; testCode.append(func); } /* * DefineDataSegmentDataBuffer * * Provides a DataBuffer implementation that stores data in the setup Data * Segment. If more data is written than space available in the data segment, * the offset continues to increment but the data is not written. This allows * OID to report the size needed to process the data successfully. */ void FuncGen::DefineDataSegmentDataBuffer(std::string& testCode) { constexpr std::string_view func = R"( namespace oi::detail::DataBuffer { class DataSegment { public: DataSegment(size_t offset) : buf(dataBase + offset) {} void write_byte(uint8_t byte) { // TODO: Change the inputs to dataBase / dataEnd to improve this check if (buf < (dataBase + dataSize)) { *buf = byte; } buf++; } size_t offset() { return buf - dataBase; } private: uint8_t* buf; }; } // namespace oi::detail::DataBuffer )"; testCode.append(func); } /* * DefineBackInserterDataBuffer * * Provides a DataBuffer implementation that takes anything convertible with * std::back_inserter. */ void FuncGen::DefineBackInserterDataBuffer(std::string& code) { constexpr std::string_view buf = R"( namespace oi::detail::DataBuffer { template class BackInserter { public: BackInserter(Container& v) : buf(v) {} void write_byte(uint8_t byte) { *buf = byte; } private: std::back_insert_iterator buf; }; } // namespace oi::detail::DataBuffer )"; code.append(buf); } /* * DefineBasicTypeHandlers * * Provides TypeHandler implementations for types T, T*, and void. T is of type * Unit type and stores nothing. It should be overridden to provide an * implementation. T* is of type Pair. It stores the * pointer's value always, then the value of the pointer if it is unique. void * is of type Unit and always stores nothing. */ void FuncGen::DefineBasicTypeHandlers(std::string& code, FeatureSet features) { code += R"( template struct TypeHandler { private: static auto choose_type() { if constexpr(std::is_pointer_v) { return std::type_identity, types::st::Sum, typename TypeHandler>::type> >>(); } else { return std::type_identity>(); } } public: using type = typename decltype(choose_type())::type; )"; if (features[Feature::TreeBuilderV2]) { code += R"(private: static void process_pointer(result::Element& el, std::function stack_ins, ParsedData d) { el.pointer = std::get(d.val).value; } static void process_pointer_content(result::Element& el, std::function stack_ins, ParsedData d) { static constexpr std::array names{"TODO"}; static constexpr auto childField = inst::Field{ sizeof(T), "*", names, TypeHandler::fields, TypeHandler::processors, }; const ParsedData::Sum& sum = std::get(d.val); el.container_stats.emplace(result::Element::ContainerStats{ .capacity = 1 }); if (sum.index == 0) return; el.container_stats->length = 1; stack_ins(childField); } static constexpr auto choose_fields() { if constexpr(std::is_pointer_v) { return std::array{}; } else { return std::array{}; } } static constexpr auto choose_processors() { if constexpr(std::is_pointer_v) { return std::array{ {types::st::VarInt::describe, &process_pointer}, {types::st::Sum, typename TypeHandler>::type>::describe, &process_pointer_content}, }; } else { return std::array{}; } } public: static constexpr auto fields = choose_fields(); static constexpr auto processors = choose_processors(); )"; } code += R"( static types::st::Unit getSizeType( const T& t, typename TypeHandler::type returnArg) { if constexpr(std::is_pointer_v) { JLOG("ptr val @"); JLOGPTR(t); auto r0 = returnArg.write((uintptr_t)t); if (t && pointers.add((uintptr_t)t)) { return r0.template delegate<1>([&t](auto ret) { if constexpr (!std::is_void>::value) { return TypeHandler>::getSizeType(*t, ret); } else { return ret; } }); } else { return r0.template delegate<0>(std::identity()); } } else { return returnArg; } } }; )"; code += R"( template class TypeHandler { public: using type = types::st::Unit; )"; if (features[Feature::TreeBuilderV2]) { code += "static constexpr std::array fields{};\n"; code += "static constexpr std::array " "processors{};\n"; } code += "};\n"; } ContainerInfo FuncGen::GetOiArrayContainerInfo() { ContainerInfo oiArray{"OIArray", UNKNOWN_TYPE, "cstdint"}; // TODO: remove the need for a dummy header oiArray.codegen.handler = R"( template struct TypeHandler> { using type = types::st::List::type>; static types::st::Unit getSizeType( const %1% &container, typename TypeHandler>::type returnArg) { auto tail = returnArg.write(N); for (size_t i=0; i::getSizeType(container.vals[i], ret); }); } return tail.finish(); } }; )"; oiArray.codegen.traversalFunc = R"( auto tail = returnArg.write(N0); for (size_t i=0; i::getSizeType(container.vals[i], ret); }); } return tail.finish(); )"; oiArray.codegen.processors.emplace_back(ContainerInfo::Processor{ .type = "types::st::List::type>", .func = R"( static constexpr std::array names{"TODO"}; static constexpr auto childField = inst::Field{ sizeof(T0), "[]", names, TypeHandler::fields, TypeHandler::processors, }; el.exclusive_size = 0; el.container_stats.emplace(result::Element::ContainerStats{ .capacity = N0, .length = N0 }); auto list = std::get(d.val); // assert(list.length == N0); for (size_t i = 0; i < N0; i++) stack_ins(childField); )", }); return oiArray; } } // namespace oi::detail