diff --git a/include/oi/types/st.h b/include/oi/types/st.h index 8e1dcb1..762682d 100644 --- a/include/oi/types/st.h +++ b/include/oi/types/st.h @@ -16,10 +16,50 @@ #ifndef OI_TYPES_ST_H #define OI_TYPES_ST_H 1 -namespace ObjectIntrospection { -namespace types { -namespace st { +/* + * Static Types + * + * OI employs a data segment to transfer information about the probed object to + * the debugger. Static Types are used with the `-ftyped-data-segment` feature + * to provide a compile time description of the contents of this data segment. + * + * DataBuffer represents any type with two methods: `void write_byte(uint8_t)`, + * which writes a given byte to the buffer; and, `size_t offset()`, which + * returns the number of bytes written. Each Static Type holds a DataBuffer + * which describes where to write data, and has no other fields. DataBuffers + * should remain pointer sized enabling trivial copies. + * + * Writing to an object of a given static type returns a different type which + * has had that part written. When there is no more to write, the type will + * return a Unit. There are two ways to write data from the JIT code into a + * static type: + * + * - .write(): This works if you can write an entire object from one input. For + * example, VarInt::write(0) returns a Unit, and + * Pair::write(0) returns a VarInt. + * + * - .delegate(): This handles the remainder of the cases where you need to do + * something more complicated. For example: + * ``` + * using ComplexType = Pair; + * Pair::delegate([](auto ret) { + * return ret.write(0).write(1); + * }).write(2); + * ``` + * In this case, `ret` is of type `ComplexType`. After the two + * writes, the inner function returns `Unit`. Delegate then + * internally converts this unit to a `VarInt`. + */ +namespace ObjectIntrospection::types::st { +/* + * Unit + * + * Represents the case of having completely written the type, or having nothing + * of interest to write. Examples are after having written the final element of + * the object, after having completely delegated a field, or having a field of + * a struct that makes sense structurally but holds no interesting data. + */ template class Unit { public: @@ -30,20 +70,39 @@ class Unit { return _buf.offset(); } - template - T cast() { - return T(_buf); - } - template Unit delegate(F const& cb) { return cb(*this); } + private: + /* + * Allows you to cast the Unit type to another Static Type. Think very + * carefully before using it. It is private so that only friends can access + * it. Good use cases are Pair::write and Pair::delegate to cast the result to + * the second element. Bad use cases are within a type handler because the + * type doesn't quite fit. + */ + template + T cast() { + return T(_buf); + } + private: DataBuffer _buf; + + template + friend class Pair; + template + friend class ListContents; }; +/* + * VarInt + * + * Represents a variable length integer. The only primitive type at present, + * used for all data transfer. + */ template class VarInt { public: @@ -63,16 +122,25 @@ class VarInt { DataBuffer _buf; }; +/* + * Pair + * + * Represents a pair of types. Can be combined to hold an arbitrary number of + * types, e.g. Pair> allows you to write three + * integers. + */ template class Pair { public: Pair(DataBuffer db) : _buf(db) { } + template T2 write(U val) { Unit second = T1(_buf).write(val); return second.template cast(); } + template T2 delegate(F const& cb) { T1 first = T1(_buf); @@ -84,9 +152,19 @@ class Pair { DataBuffer _buf; }; +/* + * Sum + * + * Represents a tagged union of types. + */ template class Sum { private: + /* + * Selector + * + * Selects the Ith type of Elements... and makes it available at ::type. + */ template struct Selector; template @@ -104,12 +182,14 @@ class Sum { public: Sum(DataBuffer db) : _buf(db) { } + template typename Selector::type write() { Pair, typename Selector::type> buf(_buf); return buf.write(I); } + template Unit delegate(F const& cb) { auto tail = write(); @@ -120,6 +200,12 @@ class Sum { DataBuffer _buf; }; +/* + * ListContents + * + * Repeatedly delegate instances of type T, writing them one after the other. + * Terminate with a call to finish(). + */ template class ListContents { public: @@ -141,11 +227,18 @@ class ListContents { DataBuffer _buf; }; +/* + * List + * + * Holds the length of a list followed by the elements. Write the length of the + * list first then that number of elements. + * + * BEWARE: There is NO static or dynamic checking that you write the number of + * elements promised. + */ template using List = Pair, ListContents>; -} // namespace st -} // namespace types -} // namespace ObjectIntrospection +} // namespace ObjectIntrospection::types::st #endif diff --git a/oi/CodeGen.cpp b/oi/CodeGen.cpp index 5e043cf..7b46fa1 100644 --- a/oi/CodeGen.cpp +++ b/oi/CodeGen.cpp @@ -538,6 +538,8 @@ void CodeGen::addGetSizeFuncDefs(const TypeGraph& typeGraph, namespace { void addStandardTypeHandlers(std::string& code) { + // Provide a wrapper function, getSizeType, to infer T instead of having to + // explicitly specify it with TypeHandler::getSizeType every time. code += R"( template types::st::Unit @@ -578,20 +580,19 @@ void CodeGen::getClassTypeHandler(const Class& c, std::string& code) { it != thriftIssetMembers_.end()) { thriftIssetMember = it->second; - extras += "\n using thrift_data = apache::thrift::TStructDataStorage<" + - c.fqName() + ">;"; - extras += (boost::format(R"( static int getThriftIsset(const %1%& t, size_t i) { + using thrift_data = apache::thrift::TStructDataStorage<%2%>; + if (&thrift_data::isset_indexes == nullptr) return -1; auto idx = thrift_data::isset_indexes[i]; if (idx == -1) return -1; - return t.%2%.get(idx); + return t.%3%.get(idx); } )") % c.name() % - thriftIssetMember->name) + c.fqName() % thriftIssetMember->name) .str(); } @@ -603,6 +604,12 @@ void CodeGen::getClassTypeHandler(const Class& c, std::string& code) { } } + // Generate the static type for the class's representation in the data buffer. + // For `class { int a,b,c; }` we generate (DB omitted for clarity): + // Pair::type, + // Pair::type, + // TypeHandler::type + // >> std::string typeStaticType; { size_t pairs = 0; @@ -645,6 +652,10 @@ void CodeGen::getClassTypeHandler(const Class& c, std::string& code) { } } + // Generate the function body that walks the type. Uses the monadic + // `delegate()` form to handle each field except for the last. The last field + // is handled explicitly by passing it to `getSizeType`, as we must consume + // the entire type instead of delegating the next part. std::string traverser; { if (!c.members.empty()) { @@ -703,6 +714,7 @@ void getContainerTypeHandler(std::unordered_set& used, } const auto& handler = c.containerInfo_.codegen.handler; + // TODO: Move this check into the ContainerInfo parsing once always enabled. if (handler.empty()) { LOG(ERROR) << "`codegen.handler` must be specified for all containers " "under \"-ftyped-data-segment\", not specified for \"" + diff --git a/oi/FuncGen.cpp b/oi/FuncGen.cpp index 5163663..e5d519c 100644 --- a/oi/FuncGen.cpp +++ b/oi/FuncGen.cpp @@ -287,6 +287,12 @@ void FuncGen::DefineTopLevelGetSizeRef(std::string& testCode, testCode.append(fmt.str()); } +/* + * DefineTopLevelGetSizeRefTyped + * + * Top level function to run OI on a type utilising static types and enabled + * with feature '-ftyped-data-segment'. + */ void FuncGen::DefineTopLevelGetSizeRefTyped(std::string& testCode, const std::string& rawType, FeatureSet features) { @@ -502,17 +508,22 @@ void FuncGen::DeclareGetContainer(std::string& testCode) { testCode.append(func); } +/* + * DefineDataSegmentDataBuffer + * + * Provides a DataBuffer implementation that stores data in the setup Data + * Segment. If more data is written than space available in the data segment, + * the offset continues to increment but the data is not written. This allows + * OID to report the size needed to process the data successfully. + */ void FuncGen::DefineDataSegmentDataBuffer(std::string& testCode) { constexpr std::string_view func = R"( - namespace ObjectIntrospection { - namespace DataBuffer { - class DataBuffer { - protected: - void write_byte(uint8_t); - }; - class DataSegment: public DataBuffer { + namespace ObjectIntrospection::DataBuffer { + + class DataSegment { public: DataSegment(size_t offset) : buf(dataBase + offset) {} + void write_byte(uint8_t byte) { // TODO: Change the inputs to dataBase / dataEnd to improve this check if (buf < (dataBase + dataSize)) { @@ -520,21 +531,32 @@ void FuncGen::DefineDataSegmentDataBuffer(std::string& testCode) { } buf++; } + size_t offset() { return buf - dataBase; } + private: uint8_t* buf; }; - } // namespace DataBuffer - } // namespace ObjectIntrospection + + } // namespace ObjectIntrospection::DataBuffer )"; testCode.append(func); } +/* + * DefineBasicTypeHandlers + * + * Provides TypeHandler implementations for types T, T*, and void. T is of type + * Unit type and stores nothing. It should be overridden to provide an + * implementation. T* is of type Pair. It stores the + * pointer's value always, then the value of the pointer if it is unique. void + * is of type Unit and always stores nothing. + */ void FuncGen::DefineBasicTypeHandlers(std::string& testCode) { - constexpr std::string_view handlers = R"( + constexpr std::string_view tHandler = R"( template struct TypeHandler { private: @@ -550,31 +572,36 @@ void FuncGen::DefineBasicTypeHandlers(std::string& testCode) { return std::type_identity>(); } } + public: using type = typename decltype(choose_type())::type; + static types::st::Unit getSizeType( const T& t, typename TypeHandler::type returnArg) { - if constexpr(std::is_pointer_v) { - JLOG("ptr val @"); - JLOGPTR(t); - auto r0 = returnArg.write((uintptr_t)t); - if (t && pointers.add((uintptr_t)t)) { - return r0.template delegate<1>([&t](auto ret) { - if constexpr (!std::is_void>::value) { - return TypeHandler>::getSizeType(*t, ret); - } else { - return ret; - } - }); + if constexpr(std::is_pointer_v) { + JLOG("ptr val @"); + JLOGPTR(t); + auto r0 = returnArg.write((uintptr_t)t); + if (t && pointers.add((uintptr_t)t)) { + return r0.template delegate<1>([&t](auto ret) { + if constexpr (!std::is_void>::value) { + return TypeHandler>::getSizeType(*t, ret); + } else { + return ret; + } + }); + } else { + return r0.template delegate<0>(std::identity()); + } } else { - return r0.template delegate<0>(std::identity()); + return returnArg; } - } else { - return returnArg; - } - } + } }; + )"; + + constexpr std::string_view voidHandler = R"( template class TypeHandler { public: @@ -582,5 +609,6 @@ void FuncGen::DefineBasicTypeHandlers(std::string& testCode) { }; )"; - testCode.append(handlers); + testCode.append(tHandler); + testCode.append(voidHandler); }