/* * Copyright (c) Meta Platforms, Inc. and affiliates. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include "oi/Features.h" #include "oi/SymbolService.h" #include "oi/X86InstDefs.h" namespace oi::detail { namespace fs = std::filesystem; class OIMemoryManager; /** * `OICompiler` provides the tools to compile and relocate code. * It also provides utilities to manipulate X86 assembly instructions * and retrieve the type to introspect for a given `irequest`. */ class OICompiler { public: /* Configuration option for `OICompiler` */ struct Config { FeatureSet features; std::vector userHeaderPaths{}; std::vector sysHeaderPaths{}; bool usePIC = false; }; /** * The result of a call to `applyRelocs()`. * It contains the next BaseRelocAddress for further relocation, * information about the Objects' buffers location and symbols. */ struct RelocResult { /** * Information about an Object file and its relocation. * * BaseAddr contains the address of the object file buffer. * RelocAddr contains the relocated address of the object file buffer. * Size is the size of the object file buffer in bytes. * * You typically need to copy the BaseAddr buffer to RelocAddr in order to * finalize the relocation. * Note that we don't manage separate segments for code and data. Thus, the * RelocInfo struct describes one Slab which contains the two segments. */ struct RelocInfo { uintptr_t BaseAddr, RelocAddr; size_t Size; }; using RelocInfos = std::vector; using SymTable = std::unordered_map; uintptr_t newBaseRelocAddr; RelocInfos relocInfos; SymTable symbols; }; /** * Generator that takes a series of opcodes in * and output the corresponding disassembled instructions. */ class Disassembler { public: /** * Instruction holds the information returned by the disassembler. * The fields are valid until a new Instruction struct has been * output by the disassembler. * There is no ownership on the fields, so copy the values * in owning data-structure, if you want to extend the lifetime. */ struct Instruction { uintptr_t offset; std::span opcodes; std::string_view disassembly; }; /** * Create a disassembler from anything that resemble a std::span. */ template Disassembler(Args&&... args) : funcText(std::forward(args)...) { } /* * Disassemble the next instuction, if any, and return the * corresponding Instruction struct. */ std::optional operator()(); private: uintptr_t offset = 0; std::span funcText; std::array disassemblyBuffer; }; OICompiler(std::shared_ptr, Config); ~OICompiler(); /** * Compile the given @param code and write the result in @param objectPath. * * @param code the C++ source code to compile * @param sourcePath path/name of the code to compile (not used) * @param objectPath path where to write the resulting Object file * * @return true if the compilation succeeded, false otherwise. */ bool compile(const std::string&, const fs::path&, const fs::path&); /** * Load the @param objectFiles in memory and apply relocation at * @param BaseRelocAddress. Note that it doesn't copy the object files at the * @param BaseRelocAddress, but returns all the information to make the copy. * Note: synthetic variables are symbols we define, but are used within the * JIT code. See 'OITraceCode.cpp' and its global variables declared with * extern. * * @param BaseRelocAddress where will the relocated code be located * @param objectFiles paths to the object files to load and relocate * @param syntheticSymbols a symbol table for synthetic variables * * @return a `std::optional` containing @ref RelocResult if the relocation was * successful. Calling `applyRelocs()` again invalidates the Segments * information. So make sure you copy the Segments' content before doing * another call. */ std::optional applyRelocs( uintptr_t, const std::set&, const std::unordered_map&); /** * Locates all the offsets of the given @param insts opcodes * in the @param funcText. Typically used to find all `ret` instructions * within a function. * * @param funcText the binary instruction of a function * @param insts an array of instruction buffers to look for in @param funcText * * @return an optional with the offsets where the given instructions were * found */ template static std::optional> locateOpcodes( const FuncTextRange& funcText, const NeedlesRange& needles); /** * @return a string representation of the opcode(s) of the instruction found * at @param offset within function's binary instructions @param funcText. */ static std::optional decodeInst(const std::vector&, uintptr_t); private: std::shared_ptr symbols; Config config; /** * memMgr is only used by applyReloc, but its lifetime must be larger than * the duration of the function. The RelocResult returned references addrs * manager by the Memory Manager, so we need to let the caller copy the code * and data sections to their final location before release the Objects * memory. * This is why memMgr is a std::unique_ptr in the class instead of a local * variable in the applyReloc function. */ std::unique_ptr memMgr; }; template std::optional> OICompiler::locateOpcodes( const FuncTextRange& funcText, const NeedlesRange& needles) { auto DG = Disassembler((uint8_t*)std::data(funcText), std::size(funcText)); std::vector locs; while (auto inst = DG()) { auto it = ranges::find_if(needles, [&](const auto& needle) { return ranges::starts_with(inst->opcodes, needle); }); if (it != std::end(needles)) { locs.push_back(inst->offset); } } return locs; } } // namespace oi::detail