comment existing typed data segment work

This commit is contained in:
Jake Hillion 2023-07-06 06:28:49 -07:00 committed by Jake Hillion
parent 43303ae6d3
commit 02defdb859
3 changed files with 177 additions and 44 deletions

View File

@ -16,10 +16,50 @@
#ifndef OI_TYPES_ST_H
#define OI_TYPES_ST_H 1
namespace ObjectIntrospection {
namespace types {
namespace st {
/*
* Static Types
*
* OI employs a data segment to transfer information about the probed object to
* the debugger. Static Types are used with the `-ftyped-data-segment` feature
* to provide a compile time description of the contents of this data segment.
*
* DataBuffer represents any type with two methods: `void write_byte(uint8_t)`,
* which writes a given byte to the buffer; and, `size_t offset()`, which
* returns the number of bytes written. Each Static Type holds a DataBuffer
* which describes where to write data, and has no other fields. DataBuffers
* should remain pointer sized enabling trivial copies.
*
* Writing to an object of a given static type returns a different type which
* has had that part written. When there is no more to write, the type will
* return a Unit. There are two ways to write data from the JIT code into a
* static type:
*
* - .write(): This works if you can write an entire object from one input. For
* example, VarInt::write(0) returns a Unit, and
* Pair<VarInt, VarInt>::write(0) returns a VarInt.
*
* - .delegate(): This handles the remainder of the cases where you need to do
* something more complicated. For example:
* ```
* using ComplexType = Pair<VarInt, VarInt>;
* Pair<ComplexType, VarInt>::delegate([](auto ret) {
* return ret.write(0).write(1);
* }).write(2);
* ```
* In this case, `ret` is of type `ComplexType`. After the two
* writes, the inner function returns `Unit`. Delegate then
* internally converts this unit to a `VarInt`.
*/
namespace ObjectIntrospection::types::st {
/*
* Unit
*
* Represents the case of having completely written the type, or having nothing
* of interest to write. Examples are after having written the final element of
* the object, after having completely delegated a field, or having a field of
* a struct that makes sense structurally but holds no interesting data.
*/
template <typename DataBuffer>
class Unit {
public:
@ -30,20 +70,39 @@ class Unit {
return _buf.offset();
}
template <typename T>
T cast() {
return T(_buf);
}
template <typename F>
Unit<DataBuffer> delegate(F const& cb) {
return cb(*this);
}
private:
/*
* Allows you to cast the Unit type to another Static Type. Think very
* carefully before using it. It is private so that only friends can access
* it. Good use cases are Pair::write and Pair::delegate to cast the result to
* the second element. Bad use cases are within a type handler because the
* type doesn't quite fit.
*/
template <typename T>
T cast() {
return T(_buf);
}
private:
DataBuffer _buf;
template <typename DB, typename T1, typename T2>
friend class Pair;
template <typename DB, typename T>
friend class ListContents;
};
/*
* VarInt
*
* Represents a variable length integer. The only primitive type at present,
* used for all data transfer.
*/
template <typename DataBuffer>
class VarInt {
public:
@ -63,16 +122,25 @@ class VarInt {
DataBuffer _buf;
};
/*
* Pair<T1,T2>
*
* Represents a pair of types. Can be combined to hold an arbitrary number of
* types, e.g. Pair<VarInt, Pair<VarInt, VarInt>> allows you to write three
* integers.
*/
template <typename DataBuffer, typename T1, typename T2>
class Pair {
public:
Pair(DataBuffer db) : _buf(db) {
}
template <class U>
T2 write(U val) {
Unit<DataBuffer> second = T1(_buf).write(val);
return second.template cast<T2>();
}
template <typename F>
T2 delegate(F const& cb) {
T1 first = T1(_buf);
@ -84,9 +152,19 @@ class Pair {
DataBuffer _buf;
};
/*
* Sum<Types...>
*
* Represents a tagged union of types.
*/
template <typename DataBuffer, typename... Types>
class Sum {
private:
/*
* Selector<I, Elements...>
*
* Selects the Ith type of Elements... and makes it available at ::type.
*/
template <size_t I, typename... Elements>
struct Selector;
template <size_t I, typename Head, typename... Tail>
@ -104,12 +182,14 @@ class Sum {
public:
Sum(DataBuffer db) : _buf(db) {
}
template <size_t I>
typename Selector<I, Types...>::type write() {
Pair<DataBuffer, VarInt<DataBuffer>, typename Selector<I, Types...>::type>
buf(_buf);
return buf.write(I);
}
template <size_t I, typename F>
Unit<DataBuffer> delegate(F const& cb) {
auto tail = write<I>();
@ -120,6 +200,12 @@ class Sum {
DataBuffer _buf;
};
/*
* ListContents<T>
*
* Repeatedly delegate instances of type T, writing them one after the other.
* Terminate with a call to finish().
*/
template <typename DataBuffer, typename T>
class ListContents {
public:
@ -141,11 +227,18 @@ class ListContents {
DataBuffer _buf;
};
/*
* List<T>
*
* Holds the length of a list followed by the elements. Write the length of the
* list first then that number of elements.
*
* BEWARE: There is NO static or dynamic checking that you write the number of
* elements promised.
*/
template <typename DataBuffer, typename T>
using List = Pair<DataBuffer, VarInt<DataBuffer>, ListContents<DataBuffer, T>>;
} // namespace st
} // namespace types
} // namespace ObjectIntrospection
} // namespace ObjectIntrospection::types::st
#endif

View File

@ -538,6 +538,8 @@ void CodeGen::addGetSizeFuncDefs(const TypeGraph& typeGraph,
namespace {
void addStandardTypeHandlers(std::string& code) {
// Provide a wrapper function, getSizeType, to infer T instead of having to
// explicitly specify it with TypeHandler<DB, T>::getSizeType every time.
code += R"(
template <typename DB, typename T>
types::st::Unit<DB>
@ -578,20 +580,19 @@ void CodeGen::getClassTypeHandler(const Class& c, std::string& code) {
it != thriftIssetMembers_.end()) {
thriftIssetMember = it->second;
extras += "\n using thrift_data = apache::thrift::TStructDataStorage<" +
c.fqName() + ">;";
extras += (boost::format(R"(
static int getThriftIsset(const %1%& t, size_t i) {
using thrift_data = apache::thrift::TStructDataStorage<%2%>;
if (&thrift_data::isset_indexes == nullptr) return -1;
auto idx = thrift_data::isset_indexes[i];
if (idx == -1) return -1;
return t.%2%.get(idx);
return t.%3%.get(idx);
}
)") % c.name() %
thriftIssetMember->name)
c.fqName() % thriftIssetMember->name)
.str();
}
@ -603,6 +604,12 @@ void CodeGen::getClassTypeHandler(const Class& c, std::string& code) {
}
}
// Generate the static type for the class's representation in the data buffer.
// For `class { int a,b,c; }` we generate (DB omitted for clarity):
// Pair<TypeHandler<int>::type,
// Pair<TypeHandler<int>::type,
// TypeHandler<int>::type
// >>
std::string typeStaticType;
{
size_t pairs = 0;
@ -645,6 +652,10 @@ void CodeGen::getClassTypeHandler(const Class& c, std::string& code) {
}
}
// Generate the function body that walks the type. Uses the monadic
// `delegate()` form to handle each field except for the last. The last field
// is handled explicitly by passing it to `getSizeType`, as we must consume
// the entire type instead of delegating the next part.
std::string traverser;
{
if (!c.members.empty()) {
@ -703,6 +714,7 @@ void getContainerTypeHandler(std::unordered_set<const ContainerInfo*>& used,
}
const auto& handler = c.containerInfo_.codegen.handler;
// TODO: Move this check into the ContainerInfo parsing once always enabled.
if (handler.empty()) {
LOG(ERROR) << "`codegen.handler` must be specified for all containers "
"under \"-ftyped-data-segment\", not specified for \"" +

View File

@ -287,6 +287,12 @@ void FuncGen::DefineTopLevelGetSizeRef(std::string& testCode,
testCode.append(fmt.str());
}
/*
* DefineTopLevelGetSizeRefTyped
*
* Top level function to run OI on a type utilising static types and enabled
* with feature '-ftyped-data-segment'.
*/
void FuncGen::DefineTopLevelGetSizeRefTyped(std::string& testCode,
const std::string& rawType,
FeatureSet features) {
@ -502,17 +508,22 @@ void FuncGen::DeclareGetContainer(std::string& testCode) {
testCode.append(func);
}
/*
* DefineDataSegmentDataBuffer
*
* Provides a DataBuffer implementation that stores data in the setup Data
* Segment. If more data is written than space available in the data segment,
* the offset continues to increment but the data is not written. This allows
* OID to report the size needed to process the data successfully.
*/
void FuncGen::DefineDataSegmentDataBuffer(std::string& testCode) {
constexpr std::string_view func = R"(
namespace ObjectIntrospection {
namespace DataBuffer {
class DataBuffer {
protected:
void write_byte(uint8_t);
};
class DataSegment: public DataBuffer {
namespace ObjectIntrospection::DataBuffer {
class DataSegment {
public:
DataSegment(size_t offset) : buf(dataBase + offset) {}
void write_byte(uint8_t byte) {
// TODO: Change the inputs to dataBase / dataEnd to improve this check
if (buf < (dataBase + dataSize)) {
@ -520,21 +531,32 @@ void FuncGen::DefineDataSegmentDataBuffer(std::string& testCode) {
}
buf++;
}
size_t offset() {
return buf - dataBase;
}
private:
uint8_t* buf;
};
} // namespace DataBuffer
} // namespace ObjectIntrospection
} // namespace ObjectIntrospection::DataBuffer
)";
testCode.append(func);
}
/*
* DefineBasicTypeHandlers
*
* Provides TypeHandler implementations for types T, T*, and void. T is of type
* Unit type and stores nothing. It should be overridden to provide an
* implementation. T* is of type Pair<VarInt, Sum<Unit, T::type>. It stores the
* pointer's value always, then the value of the pointer if it is unique. void
* is of type Unit and always stores nothing.
*/
void FuncGen::DefineBasicTypeHandlers(std::string& testCode) {
constexpr std::string_view handlers = R"(
constexpr std::string_view tHandler = R"(
template <typename DB, typename T>
struct TypeHandler {
private:
@ -550,31 +572,36 @@ void FuncGen::DefineBasicTypeHandlers(std::string& testCode) {
return std::type_identity<types::st::Unit<DB>>();
}
}
public:
using type = typename decltype(choose_type())::type;
static types::st::Unit<DB> getSizeType(
const T& t,
typename TypeHandler<DB, T>::type returnArg) {
if constexpr(std::is_pointer_v<T>) {
JLOG("ptr val @");
JLOGPTR(t);
auto r0 = returnArg.write((uintptr_t)t);
if (t && pointers.add((uintptr_t)t)) {
return r0.template delegate<1>([&t](auto ret) {
if constexpr (!std::is_void<std::remove_pointer_t<T>>::value) {
return TypeHandler<DB, std::remove_pointer_t<T>>::getSizeType(*t, ret);
} else {
return ret;
}
});
if constexpr(std::is_pointer_v<T>) {
JLOG("ptr val @");
JLOGPTR(t);
auto r0 = returnArg.write((uintptr_t)t);
if (t && pointers.add((uintptr_t)t)) {
return r0.template delegate<1>([&t](auto ret) {
if constexpr (!std::is_void<std::remove_pointer_t<T>>::value) {
return TypeHandler<DB, std::remove_pointer_t<T>>::getSizeType(*t, ret);
} else {
return ret;
}
});
} else {
return r0.template delegate<0>(std::identity());
}
} else {
return r0.template delegate<0>(std::identity());
return returnArg;
}
} else {
return returnArg;
}
}
}
};
)";
constexpr std::string_view voidHandler = R"(
template <typename DB>
class TypeHandler<DB, void> {
public:
@ -582,5 +609,6 @@ void FuncGen::DefineBasicTypeHandlers(std::string& testCode) {
};
)";
testCode.append(handlers);
testCode.append(tHandler);
testCode.append(voidHandler);
}