Initial commit

This commit is contained in:
Jon Haslam 2022-12-19 06:37:51 -08:00
commit db90326c4b
251 changed files with 44070 additions and 0 deletions

123
.circleci/config.yml Normal file
View File

@ -0,0 +1,123 @@
version: 2.1
workflows:
build-test:
jobs:
- lint
- build-test:
name: build-test-gcc
cc: /usr/bin/gcc
cxx: /usr/bin/g++
- build-test:
name: build-test-clang
cc: /usr/bin/clang-12
cxx: /usr/bin/clang++-12
jobs:
lint:
docker:
- image: ubuntu:jammy
steps:
- run:
name: Install dependencies
command: |
apt-get update
apt-get install -y \
clang-format \
git \
python3-pip
# click broke semver with 8.1.0, causing issues for black
pip install click==8.0.0 black isort
- checkout
- run:
name: clang-format
command: |
git ls-files '*.cpp' '*.h' | xargs clang-format --fallback-style=Google -i
git ls-files '*.py' | xargs black
git ls-files '*.py' | xargs isort
git diff --exit-code
- run:
name: python linting
command: |
black --check --diff test/
isort --check --diff test/
build-test:
machine:
image: ubuntu-2204:2022.10.2
resource_class: 2xlarge
parameters:
cc:
type: string
cxx:
type: string
environment:
CC: << parameters.cc >>
CXX: << parameters.cxx >>
steps:
- checkout
- run:
name: Install dependencies
command: |
sudo rm -f /etc/apt/sources.list.d/heroku.list
sudo apt-get update
sudo apt-get install -y \
bison \
build-essential \
clang-12 \
cmake \
flex \
gawk \
libboost-all-dev \
libbz2-dev \
libcap2-bin \
libclang-12-dev \
libcurl4-gnutls-dev \
libdouble-conversion-dev \
libdw-dev \
libfmt-dev \
libgflags-dev \
libgmock-dev \
libgoogle-glog-dev \
libgtest-dev \
libjemalloc-dev \
libmsgpack-dev \
libzstd-dev \
llvm-12-dev \
ninja-build \
pkg-config \
python3-setuptools \
sudo \
xsltproc
pip3 install toml
environment:
DEBIAN_FRONTEND: noninteractive
- run:
name: Build
command: |
cmake -G Ninja -B build/ -DWITH_TESTS=On
cmake --build build/ -j
- run:
name: Test
environment:
# disable drgn multithreading as tests are already run in parallel
OMP_NUM_THREADS: 1
command: |
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
cp test/ci.oid.toml build/testing.oid.toml
ctest --test-dir build/test/ --test-action Test -j$(nproc) \
--no-compress-output --output-on-failure \
--exclude-regex 'TestTypes\/ComparativeTest\..*' \
--schedule-random --timeout 30 --repeat until-pass:2
- run:
name: Convert test results
when: always
command: |
mkdir -p build/results/ctest
bash test/convert_to_junit.sh build/test
- store_test_results:
path: build/results

8
.clang-format Normal file
View File

@ -0,0 +1,8 @@
---
Language: Cpp
BasedOnStyle: Google
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: false
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false

31
.clang-tidy Normal file
View File

@ -0,0 +1,31 @@
Checks: >
-*,
bugprone*,
readability*,
cppcoreguidelines*,
clang-analyzer*,
performance*,
-cppcoreguidelines-pro-type-union-access,
-cppcoreguidelines-pro-type-vararg,
-cppcoreguidelines-avoid-non-const-global-variables,
-cppcoreguidelines-pro-bounds-pointer-arithmetic,
-readability-function-cognitive-complexity
CheckOptions:
- { key: readability-identifier-naming.NamespaceCase, value: lower_case }
- { key: readability-identifier-naming.ClassCase, value: CamelCase }
- { key: readability-identifier-naming.StructCase, value: CamelCase }
- { key: readability-identifier-naming.TemplateParameterCase, value: CamelCase }
- { key: readability-identifier-naming.FunctionCase, value: camelBack }
- { key: readability-identifier-naming.VariableCase, value: camelBack }
- { key: readability-identifier-naming.ClassMemberCase, value: camelBack }
- { key: readability-identifier-naming.ClassMemberCase, value: camelBack }
- { key: readability-identifier-naming.PrivateMemberCase, value: camelBack }
- { key: readability-identifier-naming.ProtectedMemberCase, value: camelBack }
- { key: readability-identifier-naming.EnumConstantCase, value: camelBack }
- { key: readability-identifier-naming.ConstexprVariableCase, value: camelBack }
- { key: readability-identifier-naming.GlobalConstantCase, value: camelBack }
- { key: readability-identifier-naming.MemberConstantCase, value: CamelCase }
- { key: readability-identifier-naming.StaticConstantCase, value: camelBack }
- { key: readability-implicit-bool-conversion.AllowIntegerConditions, value: 1 }
- { key: readability-implicit-bool-conversion.AllowPointerConditions, value: 1 }

14
.editorconfig Normal file
View File

@ -0,0 +1,14 @@
[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8
indent_style = space
indent_size = 2
[*.py]
indent_size = 4
indent_style = space
[{makefile, Makefile}*]
indent_style = tab

2
.git-blame-ignore-revs Normal file
View File

@ -0,0 +1,2 @@
a52e90c0419d7da0fe65cb758d2ed10024ec7b2e
06604fd12398b7d1b0563350ea48382555a51840

7
.github/pull_request_template.md vendored Normal file
View File

@ -0,0 +1,7 @@
## Summary
Describe what your change accomplishes
## Test plan
Check that OI is working correctly, you can add some unit or integration test,
paste the output of some manual test, and / or paste the output of running
the test locally with `make test-static`

30
.gitignore vendored Normal file
View File

@ -0,0 +1,30 @@
## OI specific
build/
test.o
test/tester
test/mttest?
test/mttest2_inline
test/integration_mttest
test/integration_cycles
test/integration_sleepy
test/integration_packed
test/mapiter
test/userDef1
test/vector
test/inlined_test
test/.autogen-*
oi_preprocessed
*_test.oid
oid_out.json
compile_commands.json
oid_metrics.json
Testing
*.o
PADDING
failed
fb_*_wrapper.sh
website/node_modules
## Vim
*.swp
*.swo

9
.gitmodules vendored Normal file
View File

@ -0,0 +1,9 @@
[submodule "extern/drgn"]
path = extern/drgn
url = git@github.com:JakeHillion/drgn.git
[submodule "extern/folly"]
path = extern/folly
url = git@github.com:jgkamat/folly.git
[submodule "extern/rocksdb"]
path = extern/rocksdb
url = git@github.com:facebook/rocksdb.git

1
CHANGELOG.md Normal file
View File

@ -0,0 +1 @@

376
CMakeLists.txt Normal file
View File

@ -0,0 +1,376 @@
# object-introspection
cmake_minimum_required(VERSION 3.13)
project(object-introspection)
# Lets find_program() locate SETUID binaries
cmake_policy(SET CMP0109 NEW)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED True)
add_compile_options(-ggdb3)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
add_compile_definitions(OSS_ENABLE)
include(FetchContent)
include(ProcessorCount)
include(cmake/StandardProjectSettings.cmake)
include(cmake/PreventInSourceBuilds.cmake)
include(cmake/CompilerWarnings.cmake)
option(STATIC_LINK "Statically link oid" OFF)
option(ASAN "Enable address sanitizer" OFF)
option(WITH_TESTS "Build with tests" Off)
option(FORCE_BOOST_STATIC "Build with static boost" On)
option(FORCE_LLVM_STATIC "Build with static llvm and clang" On)
if (ASAN)
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
add_link_options(-fsanitize=address)
endif()
## System checks
## These checks are potentially fatal so perform them first.
### Require sudo and setcap (for setting oid capabilities)
find_program(SETCAP setcap)
if(NOT SETCAP)
message(FATAL_ERROR "setcap not found - please install")
endif()
if(NOT EXISTS "/etc/centos-release")
find_program(SUDO sudo)
if(NOT SUDO)
message(FATAL_ERROR "sudo not found - please install")
endif()
endif()
### (Re)download submodules
find_package(Git QUIET)
# TODO: No idea if this huge block is required, just picked from an example. There may be a short-hand.
if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
# Update submodules as needed
option(GIT_SUBMODULE "Check submodules during build" ON)
if(GIT_SUBMODULE)
message(STATUS "Submodule update")
# This is a hack. If contents in drgn/libdrgn folder are not found, do a force checkout
# If drgn/* is manually deleted (for whatever reason), git doesn't seem to re-pull the contents unless forced
if (NOT EXISTS "${PROJECT_SOURCE_DIR}/extern/drgn/libdrgn")
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive --force
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE GIT_SUBMOD_RESULT)
else()
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE GIT_SUBMOD_RESULT)
endif()
if(NOT GIT_SUBMOD_RESULT EQUAL "0")
message(FATAL_ERROR "git submodule update --init failed with ${GIT_SUBMOD_RESULT}, please checkout submodules")
endif()
endif()
endif()
if(NOT EXISTS "${PROJECT_SOURCE_DIR}/extern/drgn")
message(FATAL_ERROR "The submodules were not downloaded! GIT_SUBMODULE was turned off or failed. Please update submodules and try again.")
endif()
### Select Python version
find_program(PYTHON NAMES python3.8 python3)
### gflags (before glog)
find_package(gflags REQUIRED)
### tomlplusplus (for configuration files)
FetchContent_Declare(
tomlplusplus
GIT_REPOSITORY https://github.com/marzer/tomlplusplus.git
GIT_TAG 4b166b69f28e70a416a1a04a98f365d2aeb90de8 # v3.2.0
)
FetchContent_MakeAvailable(tomlplusplus)
### glog
FetchContent_Declare(
glog
GIT_REPOSITORY https://github.com/google/glog.git
GIT_TAG 96a2f23dca4cc7180821ca5f32e526314395d26a
)
FetchContent_MakeAvailable(glog)
### bison & flex (for oid_parser)
find_package(BISON 3.5 REQUIRED)
find_package(FLEX)
### Boost
### Always use static linking with Boost, as some of its dependencies are not in the system's LD_LIBRARY_PATH.
if (FORCE_BOOST_STATIC)
set(Boost_USE_STATIC_LIBS True)
endif()
find_package(Boost REQUIRED COMPONENTS
system
filesystem
thread
regex
serialization
)
message(STATUS "Linking Boost libraries: ${Boost_LIBRARIES}")
### LLVM and Clang - Preferring Clang 12
find_package(LLVM 12 REQUIRED CONFIG)
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
find_package(Clang REQUIRED CONFIG)
message(STATUS "Found Clang ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using ClangConfig.cmake in: ${Clang_DIR}")
### msgpack
# msgpack v3.0.0 doesn't define the msgpackc-cxx target, but since the library is header only,
# we can locate the header dir and add it to our include directories.
# Ideally, we would use a more modern version, like v3.3.0, and directly use the msgpackc-cxx target.
find_package(msgpack REQUIRED CONFIG)
get_target_property(MSGPACK_INCLUDE_DIRS msgpackc INTERFACE_INCLUDE_DIRECTORIES)
include_directories(SYSTEM ${MSGPACK_INCLUDE_DIRS})
### folly
### use folly as a header only library. some features won't be supported.
include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/extern/folly)
### zstd (for rocksdb)
find_package(zstd REQUIRED)
### rocksdb
add_custom_target(librocksdb ALL
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/extern/rocksdb
COMMAND cmake -G Ninja -B build/ -DCMAKE_BUILD_TYPE=Release -DWITH_GFLAGS=Off -DWITH_LIBURING=Off -DWITH_ZSTD=On
COMMAND cmake --build build/ --target rocksdb
BYPRODUCTS ${CMAKE_CURRENT_SOURCE_DIR}/extern/rocksdb/build/librocksdb.a
COMMENT "Building RocksDB"
USES_TERMINAL
)
set(ROCKSDB_PATH "${PROJECT_SOURCE_DIR}/extern/rocksdb")
include_directories(SYSTEM "${ROCKSDB_PATH}/include")
### drgn
# The setup.py script in drgn is really meant to build drgn (python
# debugger). It shoves the C headers/lib in a temporary directory (which
# is named 'build' below using --build-temp flag). It may(not) make sense
# to just build libdrgn manually. Don't know how finicky the setup.py
# might be. These are the steps to manually build lib/headers and output
# to extern/drgn/libdrgn/build directory :-
#
# cd extern/drgn/libdrgn
# autoreconf -i .
# autoreconf -i ./elfutils
# mkdir build
# cd build
# ../configure
# make
#
# Since setup.py has a single cmd to do this, just use it for now.
#
# Another extemely annoying point. drgn pretty much has to be compiled with gcc only
# clang-12 does NOT work. clang fails with the following error :-
# configure: error: gcc with GNU99 support required
set(DRGN_CONFIGURE_FLAGS "--with-libkdumpfile=no")
if (ASAN)
list(APPEND DRGN_CONFIGURE_FLAGS "--enable-asan=yes")
endif()
# We always compile drgn with aggressive optimizations because it is
# responsible for most of the time OID takes to generate caches.
set(DRGN_CFLAGS "-O3" "-march=broadwell")
add_custom_target(libdrgn ALL
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/extern/drgn
COMMAND unset BISON_PKGDATADIR && CC=gcc CFLAGS="${DRGN_CFLAGS}" CONFIGURE_FLAGS="${DRGN_CONFIGURE_FLAGS}" ${PYTHON} ./setup.py build --build-temp build
BYPRODUCTS ${CMAKE_CURRENT_SOURCE_DIR}/extern/drgn/build/.libs/libdrgnimpl.a
${CMAKE_CURRENT_SOURCE_DIR}/extern/drgn/build/velfutils/libdw/libdw.a
${CMAKE_CURRENT_SOURCE_DIR}/extern/drgn/build/velfutils/libelf/libelf.a
${CMAKE_CURRENT_SOURCE_DIR}/extern/drgn/build/velfutils/libdwelf/libdwelf.a
COMMENT "Building drgn"
USES_TERMINAL
)
set(DRGN_PATH "${PROJECT_SOURCE_DIR}/extern/drgn/build")
# Ideally drgn stuff should be together at the end. But looks like rpath needs
# to be set before add_executable() unfortunately. Maybe split libdrgn stuff
# into a separate file later.
set(CMAKE_SKIP_BUILD_RPATH FALSE)
set(CMAKE_INSTALL_RPATH "${DRGN_PATH}/.libs")
set(CMAKE_BUILD_RPATH "${DRGN_PATH}/.libs")
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
include_directories(SYSTEM "${DRGN_PATH}")
if (STATIC_LINK)
# glog links against the `gflags` target, which is an alias for `gflags_shared`
# For static builds, we force it to link against `gflags_static` instead
set_property(TARGET glog PROPERTY INTERFACE_LINK_LIBRARIES "gflags_static")
endif()
# FIXME: LLVM 12's source code is not compatible with C++20.
# We should check with the compiler team if we could apply a fix to our LLVM.
# In the meantime, we can compile OICompiler with C++17.
set_source_files_properties(src/OICompiler.cpp PROPERTIES COMPILE_FLAGS -std=c++17 SKIP_PRECOMPILE_HEADERS ON)
## OI Dependencies (linked to by output libraries and executables)
### OI Language Parser
BISON_TARGET(Parser src/OIParser.yy ${CMAKE_CURRENT_BINARY_DIR}/OIParser.tab.cpp
DEFINES_FILE ${CMAKE_CURRENT_BINARY_DIR}/OIParser.tab.hh)
FLEX_TARGET(Lexer src/OILexer.l ${CMAKE_CURRENT_BINARY_DIR}/OILexer.cpp)
ADD_FLEX_BISON_DEPENDENCY(Lexer Parser)
add_library(oid_parser STATIC ${BISON_Parser_OUTPUTS} ${FLEX_Lexer_OUTPUTS})
target_link_libraries(oid_parser glog::glog)
### Core OI
include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/src)
add_library(oicore
src/ContainerInfo.cpp
src/Descs.cpp
src/FuncGen.cpp
src/Metrics.cpp
src/OICache.cpp
src/OICodeGen.cpp
src/OICompiler.cpp
src/OIUtils.cpp
src/PaddingHunter.cpp
src/Serialize.cpp
src/SymbolService.cpp
)
add_dependencies(oicore libdrgn)
set_project_warnings(oicore)
target_include_directories(oicore SYSTEM PRIVATE ${LLVM_INCLUDE_DIRS} ${CLANG_INCLUDE_DIRS})
target_compile_definitions(oicore PRIVATE ${LLVM_DEFINITIONS})
llvm_map_components_to_libnames(llvm_libs core native mcjit x86disassembler)
target_link_libraries(oicore
${Boost_LIBRARIES}
Boost::headers
glog::glog
tomlplusplus::tomlplusplus
)
if (FORCE_LLVM_STATIC)
target_link_libraries(oicore
clangCodeGen
clangFrontend
)
else()
target_link_libraries(oicore
clang-cpp
)
endif()
# link the llvm_libs last as they must come after the clang dependencies in the
# linker order
if (FORCE_LLVM_STATIC)
target_link_libraries(oicore ${llvm_libs})
else()
target_link_libraries(oicore LLVM)
endif()
target_link_libraries(oicore
"-L${DRGN_PATH}/.libs"
drgn
dw
pthread
)
### TreeBuilder
add_library(treebuilder src/TreeBuilder.cpp)
add_dependencies(treebuilder librocksdb)
target_link_libraries(treebuilder
${ROCKSDB_PATH}/build/librocksdb.a
oicore # overkill but it does need a lot of stuff
zstd::zstd
)
## OI Outputs
### Object Introspection as a Library (OIL)
add_library(oil src/OILibrary.cpp src/OILibraryImpl.cpp)
target_include_directories(oil PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
target_link_libraries(oil oicore)
### Object Introspection as a Library Generator (OILGen)
add_executable(oilgen
tools/OILGen.cpp
src/OIGenerator.cpp
src/DrgnUtils.cpp
)
target_link_libraries(oilgen oicore)
### Object Introspection Compiler
add_executable(oi_compile src/OICompile.cpp)
target_link_libraries(oi_compile oicore oil)
### Object Introspection cache Printer (OIP)
add_executable(oip tools/OIP.cpp)
set_project_warnings(oip)
target_link_libraries(oip oicore)
### Object Introspection Tree Builder (OITB)
add_executable(oitb tools/OITB.cpp)
set_project_warnings(oitb)
target_link_libraries(oitb oicore treebuilder)
### Object Introspection Debugger (OID)
add_executable(oid src/OID.cpp src/OIDebugger.cpp)
set_project_warnings(oid)
target_link_libraries(oid oicore oid_parser treebuilder)
if (STATIC_LINK)
target_link_libraries(oid gflags_static)
else()
target_link_libraries(oid gflags_shared)
endif()
target_link_libraries(oid oicore treebuilder)
add_custom_command(TARGET oid POST_BUILD
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMAND sudo setcap cap_sys_ptrace+ep ./oid)
### Object Introspection Tests
if (WITH_TESTS)
add_subdirectory(test)
endif()
### Custom link options
if (STATIC_LINK)
target_link_libraries(oicore -static)
target_link_libraries(oil -static)
target_link_libraries(oip -static)
target_link_libraries(oid -static)
target_link_libraries(oitb -static)
endif()
## Performance improvements
### Precompile headers
target_precompile_headers(oicore
PUBLIC <fstream>
PUBLIC <iostream>
PUBLIC <map>
PUBLIC <memory>
PUBLIC <set>
PUBLIC <string>
PUBLIC <vector>
PUBLIC <glog/logging.h>
)
foreach(TARGET oil oip oi_compile oid oitb)
target_precompile_headers(${TARGET} REUSE_FROM oicore)
endforeach()

76
CODE_OF_CONDUCT.md Normal file
View File

@ -0,0 +1,76 @@
# Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to make participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies within all project spaces, and it also applies when
an individual is representing the project or its community in public spaces.
Examples of representing a project or community include using an official
project e-mail address, posting via an official social media account, or acting
as an appointed representative at an online or offline event. Representation of
a project may be further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at <opensource-conduct@fb.com>. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq

31
CONTRIBUTING.md Normal file
View File

@ -0,0 +1,31 @@
# Contributing to object-introspection
We want to make contributing to this project as easy and transparent as
possible.
## Pull Requests
We actively welcome your pull requests.
1. Fork the repo and create your branch from `master`.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes.
5. Make sure your code lints.
6. If you haven't already, complete the Contributor License Agreement ("CLA").
## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Facebook's open source projects.
Complete your CLA here: <https://code.facebook.com/cla>
## Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.
Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.
## License
By contributing to object-introspection, you agree that your contributions will be licensed
under the LICENSE file in the root directory of this source tree.

201
LICENSE Normal file
View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

14
README.org Normal file
View File

@ -0,0 +1,14 @@
* object-introspection
[[https://matrix.to/#/#object-introspection:matrix.org][https://img.shields.io/matrix/object-introspection:matrix.org.svg]]
Object Introspection is a memory profiling technology for C++ objects. It provides the ability to dynamically instrument applications to capture the precise memory occupancy of entire object hierarchies including all containers and dynamic allocations. All this with no code modification or recompilation!
For more information on the technology and how to get started applying it to your applications please check out the [[Object Introspection][https://object-introspection.com]] website.
* Join the Object Introspection community
See the [[CONTRIBUTING][CONTRIBUTING.md]] file for how to help out.
* License
Object Introspection is licensed under the [[Apache 2.0 License][LICENSE]].

View File

@ -0,0 +1,84 @@
# https://github.com/lefticus/cppbestpractices/blob/master/02-Use_the_Tools_Available.md
function(set_project_warnings project_name)
option(WARNINGS_AS_ERRORS "Treat compiler warnings as errors" OFF)
set(MSVC_WARNINGS
/W4 # Baseline reasonable warnings
/w14242 # 'identifier': conversion from 'type1' to 'type1', possible loss of data
/w14254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
/w14263 # 'function': member function does not override any base class virtual member function
/w14265 # 'classname': class has virtual functions, but destructor is not virtual instances of this class may not
# be destructed correctly
/w14287 # 'operator': unsigned/negative constant mismatch
/we4289 # nonstandard extension used: 'variable': loop control variable declared in the for-loop is used outside
# the for-loop scope
/w14296 # 'operator': expression is always 'boolean_value'
/w14311 # 'variable': pointer truncation from 'type1' to 'type2'
/w14545 # expression before comma evaluates to a function which is missing an argument list
/w14546 # function call before comma missing argument list
/w14547 # 'operator': operator before comma has no effect; expected operator with side-effect
/w14549 # 'operator': operator before comma has no effect; did you intend 'operator'?
/w14555 # expression has no effect; expected expression with side- effect
/w14619 # pragma warning: there is no warning number 'number'
/w14640 # Enable warning on thread un-safe static member initialization
/w14826 # Conversion from 'type1' to 'type_2' is sign-extended. This may cause unexpected runtime behavior.
/w14905 # wide string literal cast to 'LPSTR'
/w14906 # string literal cast to 'LPWSTR'
/w14928 # illegal copy-initialization; more than one user-defined conversion has been implicitly applied
/permissive- # standards conformance mode for MSVC compiler.
)
set(CLANG_WARNINGS
-Wall
-Wextra # reasonable and standard
-Wshadow # warn the user if a variable declaration shadows one from a parent context
-Wnon-virtual-dtor # warn the user if a class with virtual functions has a non-virtual destructor. This helps
# catch hard to track down memory errors
-Wold-style-cast # warn for c-style casts
-Wcast-align # warn for potential performance problem casts
-Wunused # warn on anything being unused
-Woverloaded-virtual # warn if you overload (not override) a virtual function
-Wpedantic # warn if non-standard C++ is used
-Wconversion # warn on type conversions that may lose data
-Wsign-conversion # warn on sign conversions
-Wnull-dereference # warn if a null dereference is detected
-Wdouble-promotion # warn if float is implicit promoted to double
-Wformat=2 # warn on security issues around functions that format output (ie printf)
-Wimplicit-fallthrough # warn on statements that fallthrough without an explicit annotation
# We use "old style cast" a lot through our code base, this warning is therefore too noisy.
-Wno-old-style-cast
# Sign conversion warnings don't seem to point to error and is noisy. Let's revisit it later!
-Wno-sign-conversion # warn on sign conversions
-Werror=unused-result # turn unused [[nodiscard]] into errors
)
if(WARNINGS_AS_ERRORS)
set(CLANG_WARNINGS ${CLANG_WARNINGS} -Werror)
set(MSVC_WARNINGS ${MSVC_WARNINGS} /WX)
endif()
set(GCC_WARNINGS
${CLANG_WARNINGS}
-Wmisleading-indentation # warn if indentation implies blocks where blocks do not exist
-Wduplicated-cond # warn if if / else chain has duplicated conditions
-Wduplicated-branches # warn if if / else branches have duplicated code
-Wlogical-op # warn about logical operations being used where bitwise were probably wanted
-Wuseless-cast # warn if you perform a cast to the same type
)
if(MSVC)
set(PROJECT_WARNINGS ${MSVC_WARNINGS})
elseif(CMAKE_CXX_COMPILER_ID MATCHES ".*Clang")
set(PROJECT_WARNINGS ${CLANG_WARNINGS})
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(PROJECT_WARNINGS ${GCC_WARNINGS})
else()
message(AUTHOR_WARNING "No compiler warnings set for '${CMAKE_CXX_COMPILER_ID}' compiler.")
endif()
target_compile_options(${project_name} PUBLIC ${PROJECT_WARNINGS})
endfunction()

24
cmake/FindThrift.cmake Normal file
View File

@ -0,0 +1,24 @@
# - Try to find Thrift
#
# Defines the following variables:
#
# THRIFT_FOUND - system has Thrift
# THRIFT_COMPILER - The thrift compiler executable
# THRIFT_INCLUDE_DIRS - The Thrift include directory
find_program(THRIFT_COMPILER thrift)
find_path(THRIFT_INCLUDE_DIRS
NAMES
thrift/annotation/cpp.thrift
HINTS
${THRIFT_ROOT})
include (FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set THRIFT_FOUND to TRUE if all listed variables are TRUE
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Thrift "Thrift not found"
THRIFT_COMPILER
THRIFT_INCLUDE_DIRS)
mark_as_advanced(THRIFT_COMPILER THRIFT_INCLUDE_DIRS)

29
cmake/Findzstd.cmake Normal file
View File

@ -0,0 +1,29 @@
# - Find zstd
# Find the zstd compression library and includes
#
# zstd_INCLUDE_DIRS - where to find zstd.h, etc.
# zstd_LIBRARIES - List of libraries when using zstd.
# zstd_FOUND - True if zstd found.
find_path(zstd_INCLUDE_DIRS zstd.h HINTS ${zstd_ROOT_DIR}/include)
# Don't merge these two `find_library`! This is to give priority to the static library.
# See "Professional CMake", page 300, for more info.
find_library(zstd_LIBRARIES libzstd.a HINTS ${zstd_ROOT_DIR}/lib)
find_library(zstd_LIBRARIES zstd HINTS ${zstd_ROOT_DIR}/lib)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(zstd DEFAULT_MSG zstd_LIBRARIES zstd_INCLUDE_DIRS)
mark_as_advanced(
zstd_LIBRARIES
zstd_INCLUDE_DIRS)
if(zstd_FOUND AND NOT (TARGET zstd::zstd))
add_library (zstd::zstd UNKNOWN IMPORTED)
set_target_properties(zstd::zstd
PROPERTIES
IMPORTED_LOCATION ${zstd_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${zstd_INCLUDE_DIRS}
COMPILE_DEFINITIONS -DZSTD)
endif()

View File

@ -0,0 +1,18 @@
#
# This function will prevent in-source builds
function(AssureOutOfSourceBuilds)
# make sure the user doesn't play dirty with symlinks
get_filename_component(srcdir "${CMAKE_SOURCE_DIR}" REALPATH)
get_filename_component(bindir "${CMAKE_BINARY_DIR}" REALPATH)
# disallow in-source builds
if("${srcdir}" STREQUAL "${bindir}")
message("######################################################")
message("Warning: in-source builds are disabled")
message("Please create a separate build directory and run cmake from there")
message("######################################################")
message(FATAL_ERROR "Quitting configuration")
endif()
endfunction()
assureoutofsourcebuilds()

View File

@ -0,0 +1,42 @@
# Set a default build type if none was specified
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
set(CMAKE_BUILD_TYPE
RelWithDebInfo
CACHE STRING "Choose the type of build." FORCE)
# Set the possible values of build type for cmake-gui, ccmake
set_property(
CACHE CMAKE_BUILD_TYPE
PROPERTY STRINGS
"Debug"
"Release"
"MinSizeRel"
"RelWithDebInfo")
endif()
# Generate compile_commands.json to make it easier to work with clang based tools
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
option(ENABLE_IPO "Enable Interprocedural Optimization, aka Link Time Optimization (LTO)" OFF)
if(ENABLE_IPO)
include(CheckIPOSupported)
check_ipo_supported(
RESULT
result
OUTPUT
output)
if(result)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
else()
message(SEND_ERROR "IPO is not supported: ${output}")
endif()
endif()
if(CMAKE_CXX_COMPILER_ID MATCHES ".*Clang")
add_compile_options(-fcolor-diagnostics)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
add_compile_options(-fdiagnostics-color=auto)
else()
message(STATUS "No colored compiler diagnostic set for '${CMAKE_CXX_COMPILER_ID}' compiler.")
endif()

48
dev.oid.toml Normal file
View File

@ -0,0 +1,48 @@
# DON'T use this file directly.
#
# It is used to extend sample.oid.toml for development purposes.
[types]
containers = [
"PWD/types/array_type.toml",
"PWD/types/string_type.toml",
"PWD/types/cxx11_string_type.toml",
"PWD/types/folly_iobuf_type.toml",
"PWD/types/folly_iobuf_queue_type.toml",
"PWD/types/set_type.toml",
"PWD/types/unordered_set_type.toml",
"PWD/types/seq_type.toml",
"PWD/types/list_type.toml",
"PWD/types/cxx11_list_type.toml",
"PWD/types/deque_list_type.toml",
"PWD/types/shrd_ptr_type.toml",
"PWD/types/uniq_ptr_type.toml",
"PWD/types/std_map_type.toml",
"PWD/types/std_unordered_map_type.toml",
"PWD/types/pair_type.toml",
"PWD/types/stack_container_adapter_type.toml",
"PWD/types/queue_container_adapter_type.toml",
"PWD/types/priority_queue_container_adapter_type.toml",
"PWD/types/ref_wrapper_type.toml",
"PWD/types/multi_map_type.toml",
"PWD/types/folly_small_heap_vector_map.toml",
"PWD/types/folly_optional_type.toml",
"PWD/types/optional_type.toml",
"PWD/types/try_type.toml",
"PWD/types/fb_string_type.toml",
"PWD/types/small_vec_type.toml",
"PWD/types/f14_fast_map.toml",
"PWD/types/f14_node_map.toml",
"PWD/types/f14_vector_map.toml",
"PWD/types/f14_fast_set.toml",
"PWD/types/f14_node_set.toml",
"PWD/types/f14_vector_set.toml",
"PWD/types/sorted_vec_set_type.toml",
"PWD/types/map_seq_type.toml",
"PWD/types/boost_bimap_type.toml",
"PWD/types/repeated_field_type.toml",
"PWD/types/repeated_ptr_field_type.toml",
"PWD/types/caffe2_blob_type.toml",
"PWD/types/std_variant.toml",
"PWD/types/thrift_isset_type.toml",
]

6
examples/compile-time-oil/.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
oilgen
OilVectorOfStrings
OilVectorOfStrings.o
jit.cpp
JitCompiled.o

View File

@ -0,0 +1,28 @@
.DEFAULT_GOAL := all
CXX=clang++
CXXFLAGS=-g -std=c++17 -Wall -pedantic -DOIL_AOT_COMPILATION=1
.PHONY: oilgen
INC=-I../../include
OilVectorOfStrings.o: OilVectorOfStrings.cpp
${CXX} ${CXXFLAGS} ${INC} OilVectorOfStrings.cpp -c -o OilVectorOfStrings.o
oilgen:
rm -f oilgen
(cd ../../ && make oid-devel)
ln -s ../../build/oilgen oilgen
JitCompiled.o: oilgen OilVectorOfStrings.o
DRGN_ENABLE_TYPE_ITERATOR=1 ./oilgen -o JitCompiled.o -c ../../build/sample.oid.toml -d OilVectorOfStrings.o
OilVectorOfStrings: OilVectorOfStrings.o JitCompiled.o
${CXX} ${CXXFLAGS} OilVectorOfStrings.o JitCompiled.o -o OilVectorOfStrings
all: OilVectorOfStrings
clean:
rm -f oilgen OilVectorOfStrings{,.o,.o.dwarf} JitCompiled.o jit.cpp

View File

@ -0,0 +1,38 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <ObjectIntrospection.h>
#include <iostream>
#include <string>
#include <vector>
struct Foo {
std::vector<std::string> strings;
};
int main() {
Foo foo;
foo.strings.push_back("Lorem ipsum dolor");
foo.strings.push_back("sit amet,");
foo.strings.push_back("consectetur adipiscing elit,");
size_t size = -1;
int ret = ObjectIntrospection::getObjectSize<Foo>(&foo, &size);
std::cout << "oil returned: " << ret << "; with size: " << size << std::endl;
}

View File

@ -0,0 +1,119 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Metrics.h"
namespace Metrics {
std::atomic<Metrics *> Metrics::singleton = nullptr;
const char *to_string(ArgTiming t) {
switch (t) {
case ENTRY:
return "entry";
case EXIT:
return "exit";
default:
return "";
}
}
Metrics::Metrics(ObjectIntrospection::options opts, const std::string &savePath)
: opts(opts) {
writer = std::fstream(savePath, std::ios_base::out);
writer << "{ \"metrics\": [" << std::endl;
// set the singleton to this once fully constructed
Metrics::singleton = this;
}
Metrics::~Metrics() {
writer << "]}" << std::endl;
}
void Metrics::save(std::string object) {
Metrics *m = singleton.load();
std::lock_guard<std::mutex> guard(m->writerLock);
if (m->hasWritten) {
m->writer << ',' << object << std::endl;
} else {
m->hasWritten = true;
m->writer << object << std::endl;
}
}
void Metrics::saveArg(const char *name, const char *argName, ArgTiming timing,
size_t size) {
std::string out = "{\"type\": \"size\", \"traceName\": \"";
out += name;
out += "\", \"argName\": \"";
out += argName;
out += "\", \"timing\": \"";
out += to_string(timing);
out += "\", \"size\": ";
out += std::to_string(size);
out += '}';
save(out);
}
void Metrics::saveDuration(const char *name,
std::chrono::milliseconds duration) {
std::string out = "{\"type\": \"duration\", \"traceName\": \"";
out += name;
out += "\", \"duration\": ";
out += std::to_string(duration.count());
out += '}';
save(out);
}
Tracing::Tracing(const char *name, bool enabled)
: name(name), enabled(enabled) {
}
Tracing::~Tracing() {
if (isTimingEnabled()) {
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - startTime);
saveDuration(duration);
}
for (auto const &exitFunc : exitFuncs)
exitFunc();
}
bool Tracing::isTimingEnabled() {
return enabled || Metrics::isEnabled();
}
bool Tracing::isArgEnabled(const char *argName, ArgTiming timing) {
return enabled || Metrics::isEnabled();
}
void Tracing::start() {
if (isTimingEnabled()) {
startTime = std::chrono::high_resolution_clock::now();
}
}
void Tracing::saveArg(const char *argName, ArgTiming timing, size_t size) {
Metrics::saveArg(name, argName, timing, size);
}
void Tracing::saveDuration(std::chrono::milliseconds duration) {
Metrics::saveDuration(name, duration);
}
} // namespace Metrics

View File

@ -0,0 +1,123 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <ObjectIntrospection.h>
#include <atomic>
#include <chrono>
#include <fstream>
#include <functional>
#include <mutex>
#include <string>
#include <vector>
namespace Metrics {
enum ArgTiming {
ENTRY,
EXIT,
};
class Metrics {
friend class Tracing;
public:
Metrics(ObjectIntrospection::options opts, const std::string &savePath);
~Metrics();
void enable() {
enableAll = true;
}
private:
static std::atomic<Metrics *> singleton;
ObjectIntrospection::options opts;
std::fstream writer;
std::mutex writerLock;
bool hasWritten = false;
bool enableAll = false;
static ObjectIntrospection::options &getOptions() {
return singleton.load()->opts;
}
static bool isEnabled() {
return singleton.load()->enableAll;
}
static void save(std::string object);
static void saveArg(const char *name, const char *argName, ArgTiming timing,
size_t size);
static void saveDuration(const char *name,
std::chrono::milliseconds duration);
};
class Tracing {
public:
Tracing(const char *name, bool enabled = false);
~Tracing();
void start();
template <class T>
void registerArg(const char *argName, T *value);
private:
bool isTimingEnabled();
bool isArgEnabled(const char *argName, ArgTiming timing);
void saveArg(const char *argName, ArgTiming timing, size_t size);
void saveDuration(std::chrono::milliseconds duration);
template <class T>
void inspectArg(const char *argName, ArgTiming timing, T *value);
const char *name;
bool enabled;
std::chrono::high_resolution_clock::time_point startTime;
std::vector<std::function<void()>> exitFuncs;
};
template <class T>
void Tracing::registerArg(const char *argName, T *value) {
if (isArgEnabled(argName, ArgTiming::ENTRY)) {
inspectArg(argName, ArgTiming::ENTRY, value);
}
if (isArgEnabled(argName, ArgTiming::EXIT)) {
if (exitFuncs.capacity() == 0)
exitFuncs.reserve(8);
std::function<void()> exitFunc = [this, argName, value]() {
inspectArg(argName, ArgTiming::EXIT, value);
};
exitFuncs.push_back(exitFunc);
}
}
template <class T>
void Tracing::inspectArg(const char *argName, ArgTiming timing, T *value) {
size_t size;
if (int responseCode = ObjectIntrospection::getObjectSize(
value, &size, Metrics::getOptions(), false);
responseCode > ObjectIntrospection::Response::OIL_INITIALISING) {
throw std::runtime_error("object introspection failed");
} else if (responseCode == ObjectIntrospection::Response::OIL_INITIALISING) {
return; // do nothing to avoid blocking
}
saveArg(argName, timing, size);
}
} // namespace Metrics

View File

@ -0,0 +1,103 @@
#include <algorithm>
#include <chrono>
#include <iostream>
#include <random>
#include <string>
#include <thread>
#include <vector>
class Contact {
public:
Contact(std::string& f, std::string& l, std::string& n) {
firstName = f;
lastName = l;
number = n;
};
std::string getFirstName() {
return firstName;
};
std::string getLastName() {
return lastName;
};
std::string getNumber() {
return number;
};
private:
std::string firstName, lastName;
std::string number;
};
class AddressBook {
public:
void AddContact(std::string& f, std::string& l, std::string& n) {
Entries.insert(Entries.begin(), Contact(f, l, n));
};
void DumpContacts(void) {
int sz = 0;
std::cout << "number of Entries: " << Entries.size() << std::endl;
for (auto contact : Entries) {
std::string firstName = contact.getFirstName();
std::string lastName = contact.getLastName();
std::string number = contact.getNumber();
sz +=
sizeof(contact) + firstName.size() + lastName.size() + number.size();
std::cout << "sizeof contact = " << sizeof(contact) << " sizeof fname "
<< sizeof(firstName) << " sizeof lname: " << sizeof(lastName)
<< " sizeof number: " << sizeof(number)
<< " size fname: " << firstName.size()
<< " size lname: " << lastName.size()
<< " size number: " << number.size() << std::endl;
}
std::cout << "Total size = " << sz << " bytes\n\n";
};
private:
int rev;
std::string Owner;
std::vector<Contact> Entries;
};
/*
* Borrowed from
* https://stackoverflow.com/questions/440133/how-do-i-create-a-random-alpha-numeric-string-in-c
* .
*/
std::string random_string(size_t length) {
auto randchar = []() -> char {
const char charset[] =
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz";
const size_t maxIndex = (sizeof(charset) - 1);
return charset[rand() % maxIndex];
};
std::string str(length, 0);
std::generate_n(str.begin(), length, randchar);
return str;
}
AddressBook globalAddrBook;
int main(int argc, char* argv[]) {
AddressBook A;
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> distr(1, 100);
while (1) {
std::string fname = random_string(distr(gen));
std::string sname = random_string(distr(gen));
std::string number = random_string(distr(gen));
std::this_thread::sleep_for(std::chrono::seconds(1));
A.AddContact(fname, sname, number);
A.DumpContacts();
}
}

View File

@ -0,0 +1,16 @@
.DEFAULT_GOAL := addrbook
CC = clang++
CFLAGS = -std=c++20 -g -O3
SRC = AddrBook.cpp
default: addrbook
.PHONY: addrbook
addrbook: $(SRC)
$(CC) -o $@ $^ $(CFLAGS)
.PHONY: clean
clean:
rm addrbook

1
extern/drgn vendored Submodule

@ -0,0 +1 @@
Subproject commit 204c257b26788f0dd3913486bbb16f85053db0fd

1
extern/folly vendored Submodule

@ -0,0 +1 @@
Subproject commit d247a1ab1891677bfc8dd4fd2ea95fb43e160455

1
extern/rocksdb vendored Submodule

@ -0,0 +1 @@
Subproject commit 3981430f541098982b2e39d85c6f12fe0d6a5bdf

View File

@ -0,0 +1,255 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <atomic>
#include <cstddef>
#include <string>
/*
* Library interface for Object Introspection
*
* On the first call for each type these library functions perform significant
* setup. In a single-threaded context, the calling thread blocks on the first
* call. In a multi-threaded context, the first caller blocks, and other callers
* see Response::OIL_INITIALISING until initialisation completes.
*
* The options passed to library functions MUST NOT change after the first call
* for each type. By default, this will result in Response::OIL_CHANGED_OPTIONS.
* This check can be disabled for decreased latency by passing checkOptions as
* false.
*
* Generally the only required option is configFilePath. See sample.oid.toml for
* an example configuration file.
*
* -- SINGLE-THREADED
* ObjectIntrospection::options opts = { .configFilePath = "sample.oid.toml" };
* size_t size;
* int responseCode = ObjectIntrospection::getObjectSize(&obj, &size, opts);
* if (responseCode != ObjectIntrospection::Response::OIL_SUCCESS) {
* // handle error
* }
*
* -- MULTI-THREADED (NO SETUP)
* ObjectIntrospection::options opts = { .configFilePath = "sample.oid.toml" };
* size_t size;
* int responseCode = ObjectIntrospection::getObjectSize(&obj, &size, opts);
* if (responseCode > ObjectIntrospection::Response::OIL_INITIALISING) {
* // handle error
* } else if (responseCode == ObjectIntrospection::Response::OIL_SUCCESS) {
* // do something
* } // do nothing if still initialising
*
* -- MULTI-THREADED (WITH SETUP)
* ObjectIntrospection::options opts = { .configFilePath = "sample.oid.toml" };
* int responseCode = ObjectIntrospection::CodegenHandler<T>::init(opts);
* if (responseCode != ObjectIntrospection::Response::OIL_SUCCESS) {
* // handle error
* }
* size_t size;
* int responseCode = ObjectIntrospection::getObjectSize(&obj, &size);
* if (responseCode == ObjectIntrospection::Response::OIL_UNINITIALISED) {
* // handle error - impossible if successfully inited
* }
*
*/
namespace ObjectIntrospection {
enum Response : int {
OIL_SUCCESS = 0,
OIL_INITIALISING = 1,
OIL_CHANGED_OPTIONS = 2,
OIL_BAD_CONFIG_FILE = 3,
OIL_SEGMENT_INIT_FAIL = 4,
OIL_COMPILATION_FAILURE = 5,
OIL_RELOCATION_FAILURE = 6,
OIL_UNINITIALISED = 7,
};
struct options {
std::string configFilePath{};
std::string debugFilePath{};
std::string cacheDirPath{};
int debugLevel = 0;
std::string sourceFileDumpPath{};
bool layout = false;
bool chaseRawPointers = false;
bool generateJitDebugInfo = false;
bool enableUpload = false;
bool enableDownload = false;
bool abortOnLoadFail = false;
bool forceJIT = true;
friend bool operator==(const options &lhs, const options &rhs);
friend bool operator!=(const options &lhs, const options &rhs);
};
constexpr std::string_view OI_SECTION_PREFIX = ".oi.";
class OILibrary {
friend class OILibraryImpl;
public:
OILibrary(void *TemplateFunc, options opt);
~OILibrary();
int init();
int getObjectSize(void *ObjectAddr, size_t *size);
options opts;
private:
class OILibraryImpl *pimpl_;
size_t (*fp)(void *) = nullptr;
};
template <class T>
class CodegenHandler {
public:
static int init(const options &opts = {}, bool checkOptions = true) {
OILibrary *lib;
return getLibrary(lib, opts, checkOptions);
}
static void teardown() {
OILibrary *lib;
if (int responseCode = getLibrary(lib);
responseCode != Response::OIL_SUCCESS) {
return;
}
getLib()->store(nullptr);
getBoxedLib()->store(nullptr);
delete lib;
}
static int getObjectSize(T *ObjectAddr, size_t *ObjectSize) {
OILibrary *lib;
if (int responseCode = getLibrary(lib);
responseCode != Response::OIL_SUCCESS) {
return responseCode;
}
return lib->getObjectSize((void *)ObjectAddr, ObjectSize);
}
static int getObjectSize(T *ObjectAddr, size_t *ObjectSize,
const options &opts, bool checkOptions = true) {
OILibrary *lib;
if (int responseCode = getLibrary(lib, opts, checkOptions);
responseCode != Response::OIL_SUCCESS) {
return responseCode;
}
return lib->getObjectSize((void *)ObjectAddr, ObjectSize);
}
private:
static std::atomic<OILibrary *> *getLib() {
static std::atomic<OILibrary *> lib = nullptr;
return &lib;
}
static std::atomic<std::atomic<OILibrary *> *> *getBoxedLib() {
static std::atomic<std::atomic<OILibrary *> *> boxedLib = nullptr;
return &boxedLib;
}
static int getLibrary(OILibrary *&result) {
std::atomic<OILibrary *> *curBoxedLib = getBoxedLib()->load();
if (curBoxedLib == nullptr)
return Response::OIL_UNINITIALISED;
OILibrary *curLib = curBoxedLib->load();
if (curLib == nullptr)
return Response::OIL_UNINITIALISED;
result = curLib;
return Response::OIL_SUCCESS;
}
static int getLibrary(OILibrary *&result, const options &opts,
bool checkOptions) {
std::atomic<OILibrary *> *curBoxedLib = getBoxedLib()->load();
if (curBoxedLib == nullptr) {
if (!getBoxedLib()->compare_exchange_strong(curBoxedLib, getLib())) {
return Response::OIL_INITIALISING;
}
curBoxedLib = getLib();
int (*sizeFp)(T *, size_t *) = &getObjectSize;
void *typedFp = reinterpret_cast<void *>(sizeFp);
OILibrary *newLib = new OILibrary(typedFp, opts);
if (int initCode = newLib->init(); initCode != Response::OIL_SUCCESS) {
delete newLib;
getBoxedLib()->store(nullptr); // allow next attempt to initialise
return initCode;
}
getLib()->store(newLib);
}
OILibrary *curLib = curBoxedLib->load();
if (curLib == nullptr) {
return Response::OIL_INITIALISING;
}
if (checkOptions && opts != curLib->opts) {
return Response::OIL_CHANGED_OPTIONS;
}
result = curLib;
return Response::OIL_SUCCESS;
}
};
/*
* Call this from anywhere in your program. It blocks on the first call for
* each type when seen for the first time. Usage patterns are given at the
* top of this file. This method should not be called when utilising
* Ahead-Of-Time (AOT) compilation.
*/
template <class T>
int getObjectSize(T *ObjectAddr, size_t *ObjectSize, const options &opts,
bool checkOptions = true) {
return CodegenHandler<T>::getObjectSize(ObjectAddr, ObjectSize, opts,
checkOptions);
}
/*
* You may only call this after a call to the previous signature, or a
* call to CodegenHandler<T>::init(...) for the used type.
*
* As we can choose to compile the OIL blob Ahead-Of-Time (AOT) rather
* than Just-In-Time (JIT), this default is provided as a weak symbol.
* When in AOT mode this will no-op, removing the burden of JIT on a
* production system.
*/
template <class T>
int __attribute__((weak)) getObjectSize(T *ObjectAddr, size_t *ObjectSize) {
#ifdef OIL_AOT_COMPILATION
return Response::OIL_UNINITIALISED;
#else
return CodegenHandler<T>::getObjectSize(ObjectAddr, ObjectSize);
#endif
}
} // namespace ObjectIntrospection

9
oss.oid.toml Normal file
View File

@ -0,0 +1,9 @@
[headers]
user_paths = [
"/usr/lib64/llvm12/lib/clang/12.0.1/include/",
]
system_paths = [
"/usr/include/c++/11",
"/usr/include/c++/11/x86_64-redhat-linux/",
"/usr/include",
]

59
src/Common.h Normal file
View File

@ -0,0 +1,59 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <map>
#include <set>
#include <string>
#include <vector>
extern "C" {
#include <drgn.h>
}
constexpr int oidMagicId = 0x01DE8;
struct ContainerInfo;
struct RootInfo {
std::string varName;
struct drgn_qualified_type type;
};
struct ClassMember {
std::string typeName;
std::string varName;
};
struct DrgnClassMemberInfo {
struct drgn_type *type;
std::string member_name;
uint64_t bit_offset;
uint64_t bit_field_size;
bool isStubbed;
};
struct TypeHierarchy {
std::map<struct drgn_type *, std::vector<DrgnClassMemberInfo>>
classMembersMap;
std::map<struct drgn_type *,
std::pair<ContainerInfo, std::vector<struct drgn_qualified_type>>>
containerTypeMap;
std::map<struct drgn_type *, struct drgn_type *> typedefMap;
std::map<std::string, size_t> sizeMap;
std::set<struct drgn_type *> knownDummyTypeList;
std::map<struct drgn_type *, struct drgn_type *> pointerToTypeMap;
std::set<struct drgn_type *> thriftIssetStructTypes;
};

133
src/ContainerInfo.cpp Normal file
View File

@ -0,0 +1,133 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ContainerInfo.h"
#include <glog/logging.h>
#include <toml++/toml.h>
#include <map>
ContainerTypeEnum containerTypeEnumFromStr(std::string& str) {
static const std::map<std::string, ContainerTypeEnum> nameMap = {
#define X(name) {#name, name},
LIST_OF_CONTAINER_TYPES
#undef X
};
if (!nameMap.contains(str)) {
return UNKNOWN_TYPE;
}
return nameMap.at(str);
}
const char* containerTypeEnumToStr(ContainerTypeEnum ty) {
switch (ty) {
#define X(name) \
case name: \
return #name;
LIST_OF_CONTAINER_TYPES
#undef X
default:
return "UNKNOWN_TYPE";
}
}
std::unique_ptr<ContainerInfo> ContainerInfo::loadFromFile(
const fs::path& path) {
toml::table container;
try {
container = toml::parse_file(std::string(path));
} catch (const toml::parse_error& ex) {
LOG(ERROR) << "ContainerInfo::loadFromFile: " << path << " : "
<< ex.description();
return nullptr;
}
toml::table* info = container["info"].as_table();
if (!info) {
LOG(ERROR) << "a container info file requires an `info` table";
return nullptr;
}
std::string typeName;
if (std::optional<std::string> str =
(*info)["typeName"].value<std::string>()) {
typeName = std::move(*str);
} else {
LOG(ERROR) << "`info.typeName` is a required field";
return nullptr;
}
std::optional<size_t> numTemplateParams =
(*info)["numTemplateParams"].value<size_t>();
ContainerTypeEnum ctype;
if (std::optional<std::string> str = (*info)["ctype"].value<std::string>()) {
ctype = containerTypeEnumFromStr(*str);
if (ctype == UNKNOWN_TYPE) {
LOG(ERROR) << "`" << (*str) << "` is not a valid container type";
return nullptr;
}
} else {
LOG(ERROR) << "`info.ctype` is a required field";
return nullptr;
}
std::string header;
if (std::optional<std::string> str = (*info)["header"].value<std::string>()) {
header = std::move(*str);
} else {
LOG(ERROR) << "`info.header` is a required field";
return nullptr;
}
std::vector<std::string> ns;
if (toml::array* arr = (*info)["ns"].as_array()) {
ns.reserve(arr->size());
arr->for_each([&](auto&& el) {
if constexpr (toml::is_string<decltype(el)>) {
ns.emplace_back(el);
}
});
}
std::vector<size_t> replaceTemplateParamIndex{};
if (toml::array* arr = (*info)["replaceTemplateParamIndex"].as_array()) {
replaceTemplateParamIndex.reserve(arr->size());
arr->for_each([&](auto&& el) {
if constexpr (toml::is_integer<decltype(el)>) {
replaceTemplateParamIndex.push_back(*el);
}
});
}
std::optional<size_t> allocatorIndex =
(*info)["allocatorIndex"].value<size_t>();
std::optional<size_t> underlyingContainerIndex =
(*info)["underlyingContainerIndex"].value<size_t>();
return std::unique_ptr<ContainerInfo>(new ContainerInfo{
std::move(typeName),
numTemplateParams,
ctype,
std::move(header),
std::move(ns),
std::move(replaceTemplateParamIndex),
allocatorIndex,
underlyingContainerIndex,
});
}

91
src/ContainerInfo.h Normal file
View File

@ -0,0 +1,91 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <filesystem>
#include <optional>
#include <string>
#include <vector>
namespace fs = std::filesystem;
#define LIST_OF_CONTAINER_TYPES \
X(UNKNOWN_TYPE) \
X(ARRAY_TYPE) \
X(SMALL_VEC_TYPE) \
X(SET_TYPE) \
X(UNORDERED_SET_TYPE) \
X(SEQ_TYPE) \
X(LIST_TYPE) \
X(STD_MAP_TYPE) \
X(STD_UNORDERED_MAP_TYPE) \
X(MAP_SEQ_TYPE) \
X(BY_MULTI_QRT_TYPE) \
X(F14_MAP) \
X(F14_SET) \
X(FEED_QUICK_HASH_SET) \
X(FEED_QUICK_HASH_MAP) \
X(RADIX_TREE_TYPE) \
X(PAIR_TYPE) \
X(STRING_TYPE) \
X(FOLLY_IOBUF_TYPE) \
X(FOLLY_IOBUFQUEUE_TYPE) \
X(FB_STRING_TYPE) \
X(UNIQ_PTR_TYPE) \
X(SHRD_PTR_TYPE) \
X(FB_HASH_MAP_TYPE) \
X(FB_HASH_SET_TYPE) \
X(FOLLY_OPTIONAL_TYPE) \
X(OPTIONAL_TYPE) \
X(TRY_TYPE) \
X(REF_WRAPPER_TYPE) \
X(SORTED_VEC_SET_TYPE) \
X(REPEATED_FIELD_TYPE) \
X(CAFFE2_BLOB_TYPE) \
X(MULTI_MAP_TYPE) \
X(FOLLY_SMALL_HEAP_VECTOR_MAP) \
X(CONTAINER_ADAPTER_TYPE) \
X(MICROLIST_TYPE) \
X(ENUM_MAP_TYPE) \
X(BOOST_BIMAP_TYPE) \
X(STD_VARIANT_TYPE) \
X(THRIFT_ISSET_TYPE)
enum ContainerTypeEnum {
#define X(name) name,
LIST_OF_CONTAINER_TYPES
#undef X
};
ContainerTypeEnum containerTypeEnumFromStr(std::string &str);
const char *containerTypeEnumToStr(ContainerTypeEnum ty);
struct ContainerInfo {
std::string typeName;
std::optional<size_t> numTemplateParams;
ContainerTypeEnum ctype = UNKNOWN_TYPE;
std::string header;
std::vector<std::string> ns;
std::vector<size_t> replaceTemplateParamIndex{};
std::optional<size_t> allocatorIndex{};
// Index of underlying container in template parameters for a container
// adapter
std::optional<size_t> underlyingContainerIndex{};
static std::unique_ptr<ContainerInfo> loadFromFile(const fs::path &path);
bool operator<(const ContainerInfo &rhs) const {
return (typeName < rhs.typeName);
}
};

128
src/Descs.cpp Normal file
View File

@ -0,0 +1,128 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Descs.h"
#include <glog/logging.h>
#include <algorithm>
#include <boost/scope_exit.hpp>
#include <charconv>
#include <iostream>
#include <utility>
extern "C" {
#include <drgn.h>
#include <sys/user.h>
}
std::ostream &operator<<(std::ostream &os, const FuncDesc::Range &r) {
return os << (void *)r.start << ':' << (void *)r.end;
}
/*
* Given a register set return the address where the supplied argument
* position can be found at the given pc (what about if we don't have this
* location?).
*/
std::optional<uintptr_t> FuncDesc::Arg::findAddress(
struct user_regs_struct *regs, uintptr_t pc) const {
auto prevRip = std::exchange(regs->rip, pc);
BOOST_SCOPE_EXIT_ALL(&) {
regs->rip = prevRip;
};
struct drgn_object object {};
BOOST_SCOPE_EXIT_ALL(&) {
drgn_object_deinit(&object);
};
if (auto *err = drgn_object_locate(&locator, regs, &object)) {
LOG(ERROR) << "Error while finding address of argument: " << err->message;
drgn_error_destroy(err);
return std::nullopt;
}
return object.address;
}
std::optional<uint8_t> FuncDesc::getArgumentIndex(const std::string &arg,
bool validateIndex) const {
if (arg == "retval") {
return std::nullopt;
}
if (arg == "this") {
if (!isMethod) {
LOG(ERROR) << "Function " << symName << " has no 'this' parameter";
return std::nullopt;
}
return 0;
}
//
// Extract arg's number
auto it = arg.find_first_of("0123456789");
if (it == std::string::npos) {
LOG(ERROR) << "Invalid argument: " << arg;
return std::nullopt;
}
const auto *argIdxBegin = arg.data() + it;
const auto *argIdxEnd = arg.data() + arg.size();
uint8_t argIdx = 0;
if (auto res = std::from_chars(argIdxBegin, argIdxEnd, argIdx);
res.ec != std::errc{}) {
LOG(ERROR) << "Failed to convert " << arg
<< " digits: " << strerror((int)res.ec);
return std::nullopt;
}
// Check and offset for methods
if (validateIndex && argIdx >= numArgs()) {
LOG(ERROR) << "Argument index " << (int)argIdx
<< " too large. Args count: " << numArgs();
return std::nullopt;
}
if (isMethod) {
argIdx += 1;
}
return argIdx;
}
std::shared_ptr<FuncDesc::TargetObject> FuncDesc::getArgument(
const std::string &arg) {
std::shared_ptr<FuncDesc::TargetObject> outArg;
if (arg == "retval") {
outArg = retval;
} else {
auto argIdx = getArgumentIndex(arg);
if (!argIdx.has_value()) {
return nullptr;
}
outArg = arguments[*argIdx];
}
if (!outArg || !outArg->valid) {
LOG(ERROR) << "Argument " << arg << " for " << symName << " is invalid";
return nullptr;
}
return outArg;
}

142
src/Descs.h Normal file
View File

@ -0,0 +1,142 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <memory>
#include <optional>
#include <string>
#include <vector>
extern "C" {
#include <drgn.h>
}
struct FuncDesc {
struct TargetObject;
struct Arg;
struct Retval;
std::string symName{};
struct Range {
uintptr_t start;
uintptr_t end;
Range() = default;
Range(uintptr_t _start, uintptr_t _end) : start{_start}, end{_end} {
assert(end >= start);
};
constexpr uintptr_t size() const noexcept {
return end - start;
}
};
std::vector<Range> ranges;
std::vector<std::shared_ptr<FuncDesc::Arg>> arguments{};
std::shared_ptr<FuncDesc::Retval> retval{};
bool isMethod{false};
FuncDesc() = default;
FuncDesc(std::string func) : symName{std::move(func)} {};
std::shared_ptr<FuncDesc::Arg> addArgument() {
return arguments.emplace_back(std::make_shared<FuncDesc::Arg>());
}
std::shared_ptr<FuncDesc::TargetObject> getThis() {
if (!isMethod) {
return nullptr;
}
return arguments[0];
}
std::shared_ptr<FuncDesc::TargetObject> getArgument(size_t argPos) {
// Offset by 1, as methods have 'this' at arg 0
if (isMethod) {
argPos += 1;
}
return arguments[argPos];
}
std::shared_ptr<FuncDesc::TargetObject> getArgument(const std::string &);
std::optional<uint8_t> getArgumentIndex(const std::string &,
bool = true) const;
size_t numArgs() const {
if (isMethod) {
return arguments.size() - 1;
}
return arguments.size();
}
std::optional<Range> getRange(uintptr_t addr) {
for (const auto &range : ranges) {
if (addr >= range.start && addr < range.end) {
return range;
}
}
return std::nullopt;
}
struct TargetObject {
bool valid = false;
std::string typeName{};
virtual ~TargetObject() = default;
/* Given a register set return the address where the object position
* can be found at the given pc (what about if we don't have this
* location?).
*/
virtual std::optional<uintptr_t> findAddress(struct user_regs_struct *regs,
uintptr_t pc) const = 0;
};
struct Arg final : virtual TargetObject {
struct drgn_object_locator locator;
~Arg() final {
drgn_object_locator_deinit(&locator);
}
std::optional<uintptr_t> findAddress(struct user_regs_struct *regs,
uintptr_t pc) const final;
};
struct Retval final : virtual TargetObject {
~Retval() final = default;
std::optional<uintptr_t> findAddress(struct user_regs_struct *regs,
uintptr_t /* pc */) const final {
return regs->rax;
}
};
};
std::ostream &operator<<(std::ostream &os, const FuncDesc::Range &r);
class GlobalDesc {
public:
GlobalDesc() = default;
GlobalDesc(std::string name, uintptr_t addr)
: symName{std::move(name)}, baseAddr{addr} {};
std::string symName{};
std::string typeName{};
uintptr_t baseAddr{0};
};

72
src/DrgnUtils.cpp Normal file
View File

@ -0,0 +1,72 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "DrgnUtils.h"
extern "C" {
#include <drgn.h>
}
namespace drgnplusplus {
void error::Deleter::operator()(drgn_error* err) noexcept {
drgn_error_destroy(err);
}
const char* error::what() const noexcept {
return ptr->message;
}
program::program() {
struct drgn_program* prog;
error err(drgn_program_create(NULL, &prog));
if (err) {
throw err;
}
ptr.reset(prog);
}
void program::Deleter::operator()(drgn_program* prog) noexcept {
drgn_program_destroy(prog);
}
func_iterator::func_iterator(drgn_program* prog) {
drgn_func_iterator* ret;
error err(drgn_func_iterator_create(prog, &ret));
if (err) {
throw err;
}
iter.reset(ret, Deleter());
}
func_iterator::func_iterator(program& prog) : func_iterator(prog.get()){};
void func_iterator::Deleter::operator()(drgn_func_iterator* _iter) noexcept {
drgn_func_iterator_destroy(_iter);
}
func_iterator& func_iterator::operator++() {
auto err = drgn_func_iterator_next(iter.get(), &current);
if (err) {
throw error(err);
}
if (current == nullptr) {
iter = nullptr;
}
return *this;
}
} // namespace drgnplusplus

114
src/DrgnUtils.h Normal file
View File

@ -0,0 +1,114 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <exception>
#include <iterator>
#include <memory>
#include <sstream>
#include <variant>
extern "C" {
// Declare drgn structs and only refer to them by pointers to avoid exposing
// drgn.h.
struct drgn_error;
struct drgn_program;
struct drgn_func_iterator;
struct drgn_qualified_type;
}
namespace drgnplusplus {
class error : public std::exception {
public:
struct Deleter {
void operator()(drgn_error* err) noexcept;
};
error(drgn_error* err) : ptr(err){};
operator bool() const {
return static_cast<bool>(ptr);
}
const char* what() const noexcept final;
private:
std::unique_ptr<drgn_error, Deleter> ptr;
};
class program {
public:
struct Deleter {
void operator()(drgn_program* prog) noexcept;
};
program();
program(drgn_program* prog) : ptr(prog){};
drgn_program* get() {
return ptr.get();
}
private:
std::unique_ptr<drgn_program, Deleter> ptr;
};
class func_iterator {
public:
using iterator_category = std::input_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = drgn_qualified_type;
using pointer = drgn_qualified_type*;
using reference = drgn_qualified_type&;
struct Deleter {
void operator()(drgn_func_iterator* _iter) noexcept;
};
func_iterator(drgn_program* prog);
func_iterator(program& prog);
func_iterator() = default;
func_iterator(drgn_func_iterator* _iter) : iter(_iter, Deleter()) {
}
reference operator*() const {
return *current;
}
pointer operator->() {
return current;
}
func_iterator& operator++();
friend bool operator==(const func_iterator& a, const func_iterator& b) {
return a.iter == b.iter;
};
friend bool operator!=(const func_iterator& a, const func_iterator& b) {
return !(a == b);
};
func_iterator begin() {
return ++(*this);
}
func_iterator end() {
return func_iterator();
}
private:
std::shared_ptr<drgn_func_iterator> iter = nullptr;
pointer current = nullptr;
};
} // namespace drgnplusplus

509
src/FuncGen.cpp Normal file
View File

@ -0,0 +1,509 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "FuncGen.h"
#include <glog/logging.h>
#include <toml++/toml.h>
#include <boost/format.hpp>
#include <map>
#include "ContainerInfo.h"
#ifndef OSS_ENABLE
#include "cea/object-introspection/internal/FuncGenInternal.h"
#endif
namespace {
const std::string typedValueFunc = R"(
void getSizeType(const %1%& t, size_t& returnArg)
{
const uint8_t KindOfPersistentDict = 14;
const uint8_t KindOfDict = 15;
const uint8_t KindOfPersistentVec = 22;
const uint8_t KindOfVec = 23;
const uint8_t KindOfPersistentKeyset = 26;
const uint8_t KindOfKeyset = 27;
const uint8_t KindOfRecord = 29;
const uint8_t KindOfPersistentString = 38;
const uint8_t KindOfString = 39;
const uint8_t KindOfObject = 43;
const uint8_t KindOfResource = 45;
const uint8_t KindOfRFunc = 51;
const uint8_t KindOfRClsMeth = 53;
const uint8_t KindOfClsMeth = 56;
const uint8_t KindOfBoolean = 70;
const uint8_t KindOfInt64 = 74;
const uint8_t KindOfDouble = 76;
const uint8_t KindOfFunc = 82;
const uint8_t KindOfClass = 84;
const uint8_t KindOfLazyClass = 88;
const uint8_t KindOfUninit = 98;
const uint8_t KindOfNull = 100;
SAVE_DATA((uintptr_t)t.m_type);
switch(t.m_type) {
case KindOfInt64:
case KindOfBoolean:
SAVE_DATA(0);
getSizeType(t.m_data.num, returnArg);
break;
case KindOfDouble:
SAVE_DATA(1);
getSizeType(t.m_data.dbl, returnArg);
break;
case KindOfPersistentString:
case KindOfString:
SAVE_DATA(2);
getSizeType(t.m_data.pstr, returnArg);
break;
case KindOfPersistentDict:
case KindOfDict:
case KindOfPersistentVec:
case KindOfVec:
case KindOfPersistentKeyset:
case KindOfKeyset:
SAVE_DATA(3);
getSizeType(t.m_data.parr, returnArg);
break;
case KindOfObject:
SAVE_DATA(4);
getSizeType(t.m_data.pobj, returnArg);
break;
case KindOfResource:
SAVE_DATA(5);
getSizeType(t.m_data.pres, returnArg);
break;
case KindOfFunc:
SAVE_DATA(8);
getSizeType(t.m_data.pfunc, returnArg);
break;
case KindOfRFunc:
SAVE_DATA(9);
getSizeType(t.m_data.prfunc, returnArg);
break;
case KindOfClass:
SAVE_DATA(10);
getSizeType(t.m_data.pclass, returnArg);
break;
case KindOfClsMeth:
SAVE_DATA(11);
getSizeType(t.m_data.pclsmeth, returnArg);
break;
case KindOfRClsMeth:
SAVE_DATA(12);
getSizeType(t.m_data.prclsmeth, returnArg);
break;
case KindOfRecord:
SAVE_DATA(13);
getSizeType(t.m_data.prec, returnArg);
break;
case KindOfLazyClass:
SAVE_DATA(14);
getSizeType(t.m_data.plazyclass, returnArg);
break;
case KindOfUninit:
case KindOfNull:
break;
}
}
)";
const std::map<ContainerTypeEnum, std::string> defaultTypeToDeclMap = {};
const std::map<ContainerTypeEnum, std::string> defaultTypeToFuncMap = {};
} // namespace
void FuncGen::DeclareGetSize(std::string& testCode, const std::string& type) {
boost::format fmt =
boost::format("void getSizeType(const %1% &t, size_t& returnArg);\n") %
type;
testCode.append(fmt.str());
}
void FuncGen::DeclareTopLevelGetSize(std::string& testCode,
const std::string& type) {
boost::format fmt = boost::format("void getSizeType(const %1% &t);\n") % type;
testCode.append(fmt.str());
}
void FuncGen::DeclareStoreData(std::string& testCode) {
testCode.append("void StoreData(uintptr_t data, size_t& dataSegOffset);\n");
}
void FuncGen::DeclareAddData(std::string& testCode) {
testCode.append("void AddData(uint64_t data, size_t& dataSegOffset);\n");
}
void FuncGen::DeclareEncodeData(std::string& testCode) {
testCode.append("size_t EncodeVarint(uint64_t val, uint8_t* buf);\n");
}
void FuncGen::DeclareEncodeDataSize(std::string& testCode) {
testCode.append("size_t EncodeVarintSize(uint64_t val);\n");
}
void FuncGen::DefineEncodeData(std::string& testCode) {
std::string func = R"(
size_t EncodeVarint(uint64_t val, uint8_t* buf) {
uint8_t* p = buf;
while (val >= 128) {
*p++ = 0x80 | (val & 0x7f);
val >>= 7;
}
*p++ = uint8_t(val);
return size_t(p - buf);
}
)";
testCode.append(func);
}
void FuncGen::DefineEncodeDataSize(std::string& testCode) {
std::string func = R"(
size_t EncodeVarintSize(uint64_t val) {
int s = 1;
while (val >= 128) {
++s;
val >>= 7;
}
return s;
}
)";
testCode.append(func);
}
void FuncGen::DefineStoreData(std::string& testCode) {
// TODO: We are encoding twice. Once to check the size and later to
// actually encode. Maybe just do it once leaving a max of uintptr_t
// space at the end.
std::string func = R"(
void StoreData(uint64_t data, size_t& dataSegOffset) {
size_t sz = EncodeVarintSize(data);
if (sz + dataSegOffset < dataSize) {
auto data_base = reinterpret_cast<uint8_t*>(dataBase);
data_base += dataSegOffset;
size_t data_size = EncodeVarint(data, data_base);
dataSegOffset += data_size;
} else {
dataSegOffset += sz;
}
}
)";
testCode.append(func);
}
void FuncGen::DefineAddData(std::string& testCode) {
std::string func = R"(
void AddData(uint64_t data, size_t& output) {
output += data;
}
)";
testCode.append(func);
}
void FuncGen::DefineTopLevelGetObjectSize(std::string& testCode,
const std::string& rawType,
const std::string& linkageName) {
std::string func = R"(
/* RawType: %1% */
extern "C" int %2%(const OIInternal::__ROOT_TYPE__* ObjectAddr, size_t* ObjectSize)
{
OIInternal::getSizeType(*ObjectAddr, *ObjectSize);
return 0;
}
)";
boost::format fmt = boost::format(func) % rawType % linkageName;
testCode.append(fmt.str());
}
void FuncGen::DefineTopLevelGetSizeRef(std::string& testCode,
const std::string& rawType) {
std::string func = R"(
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunknown-attributes"
/* RawType: %1% */
void __attribute__((used, retain)) getSize_%2$016x(const OIInternal::__ROOT_TYPE__& t)
#pragma GCC diagnostic pop
{
pointers.initialize();
pointers.add((uintptr_t)&t);
auto data = reinterpret_cast<uintptr_t*>(dataBase);
data[0] = oidMagicId;
data[1] = cookieValue;
data[2] = 0;
size_t dataSegOffset = 3 * sizeof(uintptr_t);
OIInternal::StoreData((uintptr_t)(&t), dataSegOffset);
JLOG("%1% @");
JLOGPTR(&t);
OIInternal::getSizeType(t, dataSegOffset);
OIInternal::StoreData((uintptr_t)123456789, dataSegOffset);
OIInternal::StoreData((uintptr_t)123456789, dataSegOffset);
data[2] = dataSegOffset;
dataBase += dataSegOffset;
}
)";
boost::format fmt =
boost::format(func) % rawType % std::hash<std::string>{}(rawType);
testCode.append(fmt.str());
}
void FuncGen::DefineTopLevelGetSizePtr(std::string& testCode,
const std::string& type,
const std::string& rawType) {
std::string func = R"(
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunknown-attributes"
/* Type: %1%, RawType: %2% */
void __attribute__((used, retain)) getSize_%3$016x(const %1% * t)
#pragma GCC diagnostic pop
void getSize(const %1% * t)
{
pointers.initialize();
auto data = reinterpret_cast<uintptr_t*>(dataBase);
data[0] = oidMagicId;
data[1] = cookieValue;
data[2] = 0;
size_t dataSegOffset = 3 * sizeof(uintptr_t);
getSizeType(t, dataSegOffset);
OIInternal::StoreData((uintptr_t)123456789, dataSegOffset);
OIInternal::StoreData((uintptr_t)123456789, dataSegOffset);
data[2] = dataSegOffset;
dataBase += dataSegOffset;
}
)";
boost::format fmt =
boost::format(func) % type % rawType % std::hash<std::string>{}(rawType);
testCode.append(fmt.str());
}
void FuncGen::DefineTopLevelGetSizePtrRet(std::string& testCode,
const std::string& rawType) {
std::string func = R"(
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunknown-attributes"
/* Raw Type: %1% */
size_t __attribute__((used, retain)) getSize(const OIInternal::__ROOT_TYPE__* t)
#pragma GCC diagnostic pop
{
pointers.initialize();
size_t ret = 0;
pointers.add((uintptr_t)t);
SAVE_DATA((uintptr_t)t);
OIInternal::getSizeType(*t, ret);
return ret;
}
)";
boost::format fmt = boost::format(func) % rawType;
testCode.append(fmt.str());
}
void FuncGen::DefineTopLevelGetSizeSmartPtr(std::string& testCode,
const std::string& rawType) {
std::string func = R"(
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunknown-attributes"
/* RawType: %1% */
void __attribute__((used, retain)) getSize_%2$016x(const OIInternal::__ROOT_TYPE__& t)
#pragma GCC diagnostic pop
{
pointers.initialize();
auto data = reinterpret_cast<uintptr_t*>(dataBase);
data[0] = oidMagicId;
data[1] = cookieValue;
data[2] = 0;
size_t dataSegOffset = 3 * sizeof(uintptr_t);
OIInternal::StoreData((uintptr_t)(&t), dataSegOffset);
OIInternal::getSizeType(t, dataSegOffset);
OIInternal::StoreData((uintptr_t)123456789, dataSegOffset);
OIInternal::StoreData((uintptr_t)123456789, dataSegOffset);
data[2] = dataSegOffset;
dataBase += dataSegOffset;
}
)";
boost::format fmt =
boost::format(func) % rawType % std::hash<std::string>{}(rawType);
testCode.append(fmt.str());
}
bool FuncGen::DeclareGetSizeFuncs(std::string& testCode,
const std::set<ContainerInfo>& containerInfo,
bool chaseRawPointers) {
for (auto& cInfo : containerInfo) {
std::string ctype = cInfo.typeName;
ctype = ctype.substr(0, ctype.find("<", 0));
if (!typeToFuncMap.contains(cInfo.ctype)) {
LOG(ERROR) << "attempted to use container `"
<< containerTypeEnumToStr(cInfo.ctype)
<< "` for which a declaration was not provided";
return false;
}
auto& func = typeToDeclMap[cInfo.ctype];
boost::format fmt;
fmt = boost::format(func) % ctype;
/*if (cInfo.ctype == STRING_TYPE) {
fmt = boost::format(func);
} else {
fmt = boost::format(func) % ctype;
}*/
testCode.append(fmt.str());
}
if (chaseRawPointers) {
testCode.append(
"template<typename T, typename = "
"std::enable_if_t<!std::is_pointer_v<std::decay_t<T>>>>\n");
} else {
testCode.append("template<typename T>\n");
}
testCode.append("void getSizeType(const T &t, size_t& returnArg);");
return true;
}
bool FuncGen::DefineGetSizeFuncs(std::string& testCode,
const std::set<ContainerInfo>& containerInfo,
bool chaseRawPointers) {
for (auto& cInfo : containerInfo) {
std::string ctype = cInfo.typeName;
ctype = ctype.substr(0, ctype.find("<", 0));
if (!typeToFuncMap.contains(cInfo.ctype)) {
LOG(ERROR) << "attempted to use container `"
<< containerTypeEnumToStr(cInfo.ctype)
<< "` for which a definition was not provided";
return false;
}
auto& func = typeToFuncMap[cInfo.ctype];
boost::format fmt;
fmt = boost::format(func) % ctype;
/*if (cInfo.ctype == STRING_TYPE) {
fmt = boost::format(func);
} else {
fmt = boost::format(func) % ctype;
}*/
testCode.append(fmt.str());
}
if (chaseRawPointers) {
testCode.append("template<typename T, typename C>\n");
} else {
testCode.append("template<typename T>\n");
}
testCode.append(R"(
void getSizeType(const T &t, size_t& returnArg) {
JLOG("obj @");
JLOGPTR(&t);
SAVE_SIZE(sizeof(T));
}
)");
return true;
}
void FuncGen::DefineGetSizeTypedValueFunc(std::string& testCode,
const std::string& ctype) {
boost::format fmt = boost::format(typedValueFunc) % ctype;
testCode.append(fmt.str());
}
void FuncGen::DeclareGetContainer(std::string& testCode) {
std::string func = R"(
template <class ContainerAdapter>
const typename ContainerAdapter::container_type & get_container (ContainerAdapter &ca)
{
struct unwrap : ContainerAdapter {
static const typename ContainerAdapter::container_type & get (ContainerAdapter &ca) {
return ca.*&unwrap::c;
}
};
return unwrap::get(ca);
}
)";
testCode.append(func);
}
// TODO: remove map initialisation once all container configs are removed from
// the code
FuncGen::FuncGen()
: typeToDeclMap(defaultTypeToDeclMap), typeToFuncMap(defaultTypeToFuncMap) {
#ifndef OSS_ENABLE
typeToDeclMap.insert(typeToDeclMapInternal.begin(),
typeToDeclMapInternal.end());
typeToFuncMap.insert(typeToFuncMapInternal.begin(),
typeToFuncMapInternal.end());
#endif
}
bool FuncGen::RegisterContainer(ContainerTypeEnum ctype, const fs::path& path) {
toml::table container;
try {
container = toml::parse_file(std::string(path));
} catch (const toml::parse_error& ex) {
LOG(ERROR) << "FuncGen::RegisterContainer: " << path << " : "
<< ex.description();
return false;
}
toml::table* codegen = container["codegen"].as_table();
if (!codegen) {
LOG(ERROR) << "a container info file requires an `codegen` table";
return false;
}
if (std::optional<std::string> str =
(*codegen)["decl"].value<std::string>()) {
typeToDeclMap.emplace(ctype, std::move(*str));
} else {
LOG(ERROR) << "`codegen.decl` is a required field";
return false;
}
if (std::optional<std::string> str =
(*codegen)["func"].value<std::string>()) {
typeToFuncMap.emplace(ctype, std::move(*str));
} else {
LOG(ERROR) << "`codegen.func` is a required field";
return false;
}
return true;
}

78
src/FuncGen.h Normal file
View File

@ -0,0 +1,78 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <filesystem>
#include <map>
#include <set>
#include <string>
#include "ContainerInfo.h"
namespace fs = std::filesystem;
class FuncGen {
public:
// TODO: remove me once all containers are in toml files
FuncGen();
bool RegisterContainer(ContainerTypeEnum, const fs::path& path);
void DeclareStoreData(std::string& testCode);
void DefineStoreData(std::string& testCode);
void DeclareAddData(std::string& testCode);
void DefineAddData(std::string& testCode);
void DeclareEncodeData(std::string& testCode);
void DefineEncodeData(std::string& testCode);
void DeclareEncodeDataSize(std::string& testCode);
void DefineEncodeDataSize(std::string& testCode);
bool DeclareGetSizeFuncs(std::string& testCode,
const std::set<ContainerInfo>& containerInfo,
bool chaseRawPointers);
bool DefineGetSizeFuncs(std::string& testCode,
const std::set<ContainerInfo>& containerInfo,
bool chaseRawPointers);
void DeclareGetContainer(std::string& testCode);
void DeclareGetSize(std::string& testCode, const std::string& type);
void DeclareTopLevelGetSize(std::string& testCode, const std::string& type);
void DefineTopLevelGetObjectSize(std::string& testCode,
const std::string& type,
const std::string& linkageName);
void DefineTopLevelGetSizePtr(std::string& testCode, const std::string& type,
const std::string& rawType);
void DefineTopLevelGetSizeRef(std::string& testCode,
const std::string& rawType);
void DefineTopLevelGetSizePtrRet(std::string& testCode,
const std::string& type);
void DefineTopLevelGetSizeSmartPtr(std::string& testCode,
const std::string& rawType);
void DefineGetSizeTypedValueFunc(std::string& testCode,
const std::string& ctype);
private:
std::map<ContainerTypeEnum, std::string> typeToDeclMap;
std::map<ContainerTypeEnum, std::string> typeToFuncMap;
};

207
src/Metrics.cpp Normal file
View File

@ -0,0 +1,207 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Metrics.h"
#include <unistd.h>
#include <cstring>
#include <fstream>
/*
* NOTA BENE: Metrics are disabled by default. They are enabled by setting the
* 'OID_METRICS_TRACE' environment variable as detailed in Metrics.h.
*
* This small library to track how much time and other resources we are
* spending on different phases of the execution of OI.
*
* You can instrument some code with:
* ```
* auto t = Metrics::Tracing("name_of_your_trace");
* [... some code ...]
* t.stop();
* ```
*
* The code above will create a new trace that, when the `.stop()` method
* is called will be appended to the list of traces.
*
* Alternatively, you can use automatically deal with this in every return
* point thanks to C++'s RAII:
* ```
* Metrics::Tracing unused_var("name_of_your_trace");
* ```
*
* When you want to collect the data, `::showTraces()` to print the data to
* stdout, and `::saveTraces(file)` to save it to disk using JSON.
*/
namespace ObjectIntrospection::Metrics {
static inline TraceFlags parseTraceFlags(const char* flags) {
if (flags == nullptr) {
return TraceFlags{};
}
return {
.time = strcasestr(flags, "time") != nullptr,
.rss = strcasestr(flags, "rss") != nullptr,
};
}
Tracing::Static Tracing::static_{};
Tracing::Static::Static() {
traceEnabled = parseTraceFlags(std::getenv(traceEnvKey));
errno = 0;
if (auto pageSizeBytes = sysconf(_SC_PAGESIZE); pageSizeBytes > 0) {
pageSizeKB = pageSizeBytes / 1024;
} else {
std::perror("Failed to retrieve page size");
}
}
Tracing::Static::~Static() {
if (!traceEnabled) {
return;
}
Tracing::saveTraces(Tracing::outputPath());
traces.clear();
}
uint32_t Tracing::getNextIndex() {
// NOTE: we already own the lock on static_.traces
return static_cast<uint32_t>(static_.traces.size());
}
Tracing::TimePoint Tracing::fetchTime() {
if (!static_.traceEnabled.time) {
return TimePoint{};
}
return std::chrono::high_resolution_clock::now();
}
long Tracing::fetchRssUsage() {
if (!static_.traceEnabled.rss) {
return 0;
}
std::ifstream statStream("/proc/self/stat");
// Placeholders as we don't care about these at the minute. There are more
// fields in /stat that we don't have here
/* std::string pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags,
minflt, cminflt, majflt, cmajflt, utime, stime, cutime, cstime,
priority, nice, num_threads, itrealvalue, starttime, vsize;
*/
for (size_t i = 0; i < 23; ++i) {
statStream.ignore(std::numeric_limits<std::streamsize>::max(), ' ');
}
// We care about this field
long rss = 0;
statStream >> rss;
return rss * static_.pageSizeKB;
}
void Tracing::stop() {
ended = true;
if (!static_.traceEnabled) {
return;
}
using namespace std::chrono;
auto stopTs = fetchTime();
auto duration = duration_cast<nanoseconds>(stopTs - startTs);
auto rssAfterBytes = fetchRssUsage();
std::lock_guard<std::mutex> guard{static_.mutex};
// Can't use emplace_back() because of old clang++ on CI
static_.traces.push_back({getNextIndex(), std::move(traceName),
duration.count(), rssBeforeBytes, rssAfterBytes});
}
void Tracing::saveTraces(const std::filesystem::path& output) {
std::ofstream osf{output};
if (!osf) {
perror("Failed to open output file");
return;
}
osf << "[";
for (const auto& span : static_.traces) {
if (span.index > 0) {
osf << ",";
}
osf << "{\"name\":\"" << span.name << "\"";
osf << ",\"index\":" << span.index;
if (static_.traceEnabled.time) {
osf << ",\"duration_ns\":" << span.duration;
}
if (static_.traceEnabled.rss) {
osf << ",\"rss_before_bytes\":" << span.rssBeforeBytes;
osf << ",\"rss_after_bytes\":" << span.rssAfterBytes;
}
osf << "}";
}
osf << "]\n";
}
const char* Tracing::outputPath() {
const char* output = std::getenv(outputEnvKey);
if (output == nullptr) {
output = "oid_metrics.json";
}
return output;
}
std::ostream& operator<<(std::ostream& out, const TraceFlags& tf) {
if (tf.time && tf.rss) {
out << "time, rss";
} else if (tf.time) {
out << "time";
} else if (tf.rss) {
out << "rss";
} else {
out << "disabled";
}
return out;
}
std::ostream& operator<<(std::ostream& out, const Span& span) {
out << "Span for: " << span.name << " (" << span.index << ")\n";
out << " Duration: " << span.duration << " ns\n";
out << " RSS before: " << span.rssBeforeBytes << " bytes\n";
out << " RSS after: " << span.rssAfterBytes << " bytes\n";
return out;
}
std::ostream& operator<<(std::ostream& out, const std::vector<Span>& spans) {
out << "Showing all spans:\n";
out << "==================\n\n";
for (const auto& span : spans) {
out << span;
}
return out;
}
} // namespace ObjectIntrospection::Metrics

173
src/Metrics.h Normal file
View File

@ -0,0 +1,173 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <chrono>
#include <filesystem>
#include <mutex>
#include <ostream>
#include <string>
#include <vector>
namespace ObjectIntrospection::Metrics {
constexpr auto traceEnvKey = "OID_METRICS_TRACE";
constexpr auto outputEnvKey = "OID_METRICS_OUTPUT";
/*
* Control which metric are collected using the environment variable
* OID_METRICS_TRACE. It accepts a comma-separated list of metrics: time, rss.
* By default, no metrics are collected.
* The metrics are written to the path specified by the environment variable
* OID_METRICS_OUTPUT. If not specified, they are written into
* "oid_metrics.json".
*/
struct TraceFlags {
bool time : 1 = false;
bool rss : 1 = false;
operator bool() const {
return time || rss;
}
};
struct Span {
uint32_t index;
std::string name;
int64_t duration;
long rssBeforeBytes;
long rssAfterBytes;
};
class Tracing final {
private:
/*
* Independent static variables might be destroyed before our std::atexit()
* handler is called, leading to an use-after-free error. Instead, we group
* all static variables in the following structure and put our atexit()
* handler's code in its destructor. This ensure that all variables are
* destroyed **AFTER** the call to ~Static().
*/
static struct Static {
long pageSizeKB;
TraceFlags traceEnabled;
std::vector<Span> traces;
std::mutex mutex;
Static();
~Static();
} static_;
using TimePoint = std::chrono::high_resolution_clock::time_point;
public:
/*
* Metrics::Tracing("bad");
*
* Usage is Metrics::Tracing __varname_(...) The code above is an improper
* use of the Tracing library. The tracing object above is not stored in a
* variable. So it is immediately destroyed and won't record the expected
* results. [[nodiscard]] flags the code above with a warning, which we
* enforce as an error in our cmake/CompilerWarnings.cmake.
*/
[[nodiscard]] explicit Tracing(const char* name) {
if (!Tracing::isEnabled()) {
return;
}
traceName = name;
}
[[nodiscard]] explicit Tracing(const std::string& name) {
if (!Tracing::isEnabled()) {
return;
}
traceName = name;
}
[[nodiscard]] explicit Tracing(std::string&& name) {
if (!Tracing::isEnabled()) {
return;
}
traceName = std::move(name);
}
Tracing() = delete;
Tracing(const Tracing& other) : Tracing{other.traceName} {
}
Tracing(Tracing&&) noexcept = default;
Tracing& operator=(Tracing&) = delete;
Tracing& operator=(Tracing&&) = delete;
~Tracing() {
if (!ended) {
stop();
}
}
void reset() {
if (!Tracing::isEnabled()) {
return;
}
startTs = fetchTime();
rssBeforeBytes = fetchRssUsage();
}
void rename(const char* name) {
if (!Tracing::isEnabled()) {
return;
}
traceName = name;
}
void rename(const std::string& name) {
if (!Tracing::isEnabled()) {
return;
}
traceName = name;
}
void rename(std::string&& name) {
if (!Tracing::isEnabled()) {
return;
}
traceName = std::move(name);
}
void stop();
static TraceFlags& isEnabled() {
return static_.traceEnabled;
}
static const char* outputPath();
static void saveTraces(const std::filesystem::path&);
private:
static uint32_t getNextIndex();
static TimePoint fetchTime();
static long fetchRssUsage();
bool ended{false};
std::string traceName{};
TimePoint startTs{Tracing::fetchTime()};
long rssBeforeBytes{Tracing::fetchRssUsage()};
};
std::ostream& operator<<(std::ostream&, const TraceFlags&);
std::ostream& operator<<(std::ostream&, const Span&);
std::ostream& operator<<(std::ostream&, const std::vector<Span>&);
} // namespace ObjectIntrospection::Metrics

247
src/OICache.cpp Normal file
View File

@ -0,0 +1,247 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "OICache.h"
#include <glog/logging.h>
#include <boost/archive/text_iarchive.hpp>
#include <boost/archive/text_oarchive.hpp>
#include <fstream>
#include "Descs.h"
#include "OICodeGen.h"
#include "Serialize.h"
#ifndef OSS_ENABLE
#include "cea/object-introspection/internal/ManifoldCache.h"
#endif
static std::optional<std::reference_wrapper<const std::string>> getEntName(
SymbolService &symbols, const irequest &req, OICache::Entity ent) {
if (ent == OICache::Entity::FuncDescs ||
ent == OICache::Entity::GlobalDescs) {
return req.func;
} else {
if (req.type == "global") {
const auto &globalDesc = symbols.findGlobalDesc(req.func);
if (!globalDesc) {
return std::nullopt;
}
return globalDesc->typeName;
} else {
const auto &funcDesc = symbols.findFuncDesc(req);
if (!funcDesc) {
return std::nullopt;
}
const auto &arg = funcDesc->getArgument(req.arg);
if (!arg) {
return std::nullopt;
}
return arg->typeName;
}
}
}
std::optional<fs::path> OICache::getPath(const irequest &req,
Entity ent) const {
auto hash = [](const std::string &str) {
return std::to_string(std::hash<std::string>{}(str));
};
auto ext = extensions[static_cast<size_t>(ent)];
const auto &entName = getEntName(*symbols, req, ent);
if (!entName.has_value()) {
return std::nullopt;
}
return basePath / (hash(*entName) + ext);
}
template <typename T>
bool OICache::load(const irequest &req, Entity ent, T &data) {
if (!isEnabled())
return false;
try {
auto buildID = symbols->locateBuildID();
if (!buildID) {
LOG(ERROR) << "Failed to locate buildID";
return false;
}
auto cachePath = getPath(req, ent);
if (!cachePath.has_value()) {
LOG(ERROR) << "Failed to get cache path for " << req.type << ':'
<< req.func << ':' << req.arg << '/'
<< static_cast<size_t>(ent);
return false;
}
LOG(INFO) << "Loading cache " << *cachePath;
std::ifstream ifs(*cachePath);
boost::archive::text_iarchive ia(ifs);
std::string cacheBuildId;
ia >> cacheBuildId;
if (cacheBuildId != *buildID) {
LOG(ERROR) << "The cache's build id '" << cacheBuildId
<< "' doesn't match the target's build id '" << *buildID
<< "'";
return false;
}
ia >> data;
return true;
} catch (const std::exception &e) {
LOG(WARNING) << "Failed to load from cache: " << e.what();
return false;
}
}
template <typename T>
bool OICache::store(const irequest &req, Entity ent, const T &data) {
if (!isEnabled())
return false;
try {
auto buildID = symbols->locateBuildID();
if (!buildID) {
LOG(ERROR) << "Failed to locate buildID";
return false;
}
auto cachePath = getPath(req, ent);
if (!cachePath.has_value()) {
LOG(ERROR) << "Failed to get cache path for " << req.type << ':'
<< req.func << ':' << req.arg << '/'
<< static_cast<size_t>(ent);
return false;
}
LOG(INFO) << "Storing cache " << *cachePath;
std::ofstream ofs(*cachePath);
boost::archive::text_oarchive oa(ofs);
oa << *buildID;
oa << data;
return true;
} catch (const std::exception &e) {
LOG(WARNING) << "Failed to write to cache: " << e.what();
return false;
}
}
#define INSTANTIATE_ARCHIVE(...) \
template bool OICache::load(const irequest &, Entity, __VA_ARGS__ &); \
template bool OICache::store(const irequest &, Entity, const __VA_ARGS__ &);
INSTANTIATE_ARCHIVE(std::pair<RootInfo, TypeHierarchy>)
INSTANTIATE_ARCHIVE(std::unordered_map<std::string, std::shared_ptr<FuncDesc>>)
INSTANTIATE_ARCHIVE(
std::unordered_map<std::string, std::shared_ptr<GlobalDesc>>)
INSTANTIATE_ARCHIVE(std::map<std::string, PaddingInfo>)
#undef INSTANTIATE_ARCHIVE
// Upload all contents of cache for this request
bool OICache::upload([[maybe_unused]] const irequest &req) {
#ifndef OSS_ENABLE
if (!isEnabled() || downloadedRemote || !enableUpload)
return true;
std::vector<std::filesystem::path> files;
for (size_t i = 0; i < static_cast<size_t>(OICache::Entity::MAX); i++) {
auto cachePath = getPath(req, static_cast<OICache::Entity>(i));
if (!cachePath.has_value()) {
LOG(ERROR) << "Failed to get cache path for " << req.type << ':'
<< req.func << ':' << req.arg << '/' << static_cast<size_t>(i);
return false;
}
files.push_back(*cachePath);
}
auto hash = generateRemoteHash(req);
if (hash.empty()) {
LOG(ERROR) << "failed to generate remote lookup hash";
return false;
}
return ObjectIntrospection::ManifoldCache::upload(hash, files);
#else
if (isEnabled() && !downloadedRemote && enableUpload) {
// We tried to download when support is not enabled!
LOG(ERROR) << "Tried to upload artifacts when support is not enabled!";
return false;
}
return true;
#endif
}
// Try to fetch contents of cache
bool OICache::download([[maybe_unused]] const irequest &req) {
#ifndef OSS_ENABLE
if (!isEnabled() || !enableDownload)
return true;
auto hash = generateRemoteHash(req);
if (hash.empty()) {
LOG(ERROR) << "failed to generate remote lookup hash";
return false;
}
if (basePath.filename() != hash) {
// Use a subdirectory per hash shard to avoid conflicts
basePath /= hash;
if (fs::exists(basePath.parent_path()) && !fs::exists(basePath))
fs::create_directory(basePath);
}
if (ObjectIntrospection::ManifoldCache::download(hash, basePath)) {
downloadedRemote = true;
return true;
}
if (abortOnLoadFail) {
LOG(ERROR) << "We weren't able to pull artifacts when requested, "
"aborting run!";
// If we aren't uploading, quit early as we requested a download and
// weren't able to get it.
return false;
}
return true;
#else
if (isEnabled() && enableDownload) {
// We tried to download when support is not enabled!
LOG(ERROR) << "Tried to download artifacts when support is not enabled!";
return false;
}
return true;
#endif
}
std::string OICache::generateRemoteHash(const irequest &req) {
auto buildID = symbols->locateBuildID();
if (!buildID) {
LOG(ERROR) << "Failed to locate buildID";
return "";
}
std::string remote_cache_id = *buildID + "/" + req.func + "/" + req.arg +
"/" + generatorConfig.toString();
LOG(INFO) << "generating remote hash from: " << remote_cache_id;
return std::to_string(std::hash<std::string>{}(remote_cache_id));
}

70
src/OICache.h Normal file
View File

@ -0,0 +1,70 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <array>
#include <filesystem>
#include <memory>
#include <optional>
#include "OICodeGen.h"
#include "OIParser.h"
#include "SymbolService.h"
namespace fs = std::filesystem;
class OICache {
public:
fs::path basePath{};
std::shared_ptr<SymbolService> symbols{};
bool downloadedRemote = false;
bool enableUpload = false;
bool enableDownload = false;
bool abortOnLoadFail = false;
// We need the generator config to download the cache
// with the matching configuration.
OICodeGen::Config generatorConfig{};
// Entity is used to index the `extensions` array
// So we must keep the Entity enum and `extensions` array in sync!
enum class Entity {
Source,
Object,
FuncDescs,
GlobalDescs,
TypeHierarchy,
PaddingInfo,
MAX
};
static constexpr std::array<const char *, static_cast<size_t>(Entity::MAX)>
extensions{".cc", ".o", ".fd", ".gd", ".th", ".pd"};
bool isEnabled() const {
return !basePath.empty();
}
std::optional<fs::path> getPath(const irequest &, Entity) const;
template <typename T>
bool store(const irequest &, Entity, const T &);
template <typename T>
bool load(const irequest &, Entity, T &);
bool upload(const irequest &req);
bool download(const irequest &req);
private:
std::string generateRemoteHash(const irequest &);
};

3728
src/OICodeGen.cpp Normal file

File diff suppressed because it is too large Load Diff

332
src/OICodeGen.h Normal file
View File

@ -0,0 +1,332 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <filesystem>
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
class SymbolService;
struct irequest;
#include "Common.h"
#include "ContainerInfo.h"
#include "FuncGen.h"
#include "PaddingHunter.h"
extern "C" {
#include <drgn.h>
}
namespace fs = std::filesystem;
struct ParentMember {
struct drgn_type *type;
uint64_t bit_offset;
bool operator<(const ParentMember &parent) const {
return (bit_offset < parent.bit_offset);
}
};
class OICodeGen {
public:
struct Config {
/*
* Note: don't set default values for the config so the user gets an
* uninitialized field" warning if they missed any.
*/
bool useDataSegment;
bool chaseRawPointers;
bool packStructs;
bool genPaddingStats;
bool captureThriftIsset;
std::set<fs::path> containerConfigPaths{};
std::set<std::string> defaultHeaders{};
std::set<std::string> defaultNamespaces{};
std::vector<std::pair<std::string, std::string>> membersToStub{};
std::string toString() const;
std::vector<std::string> toOptions() const;
};
private:
// Private constructor. Please use the fallible `OICodeGen::buildFromConfig`
// for the expected behaviour.
OICodeGen(const Config &);
public:
static std::unique_ptr<OICodeGen> buildFromConfig(const Config &);
bool generate(std::string &code);
[[deprecated("Use generate(std::string&) instead.")]] bool
generateFunctionsForTypesDrgn(std::string &code) {
return generate(code);
}
static std::optional<RootInfo> getRootType(SymbolService &, const irequest &);
bool registerContainer(const fs::path &);
// TODO: remove me once all the callsites are gone
static void initializeCodeGen();
struct drgn_qualified_type getRootType();
void setRootType(struct drgn_qualified_type rt);
void setLinkageName(std::string name) {
linkageName = name;
};
TypeHierarchy getTypeHierarchy();
std::map<std::string, PaddingInfo> getPaddingInfo();
static void getDrgnArrayElementType(struct drgn_type *type,
struct drgn_type **outElemType,
size_t &outNumElems);
bool isContainer(struct drgn_type *type);
static struct drgn_type *drgnUnderlyingType(struct drgn_type *type);
bool buildName(struct drgn_type *type, std::string &text,
std::string &outName);
std::string typeToTransformedName(struct drgn_type *type);
static std::string typeToName(struct drgn_type *type);
bool enumerateTypesRecurse(struct drgn_type *type);
static std::string_view drgnKindStr(struct drgn_type *type);
std::set<struct drgn_type *> processedTypes;
private:
Config config{};
FuncGen funcGen;
using ContainerTypeMap =
std::pair<ContainerInfo, std::vector<struct drgn_qualified_type>>;
using TemplateParamList =
std::vector<std::pair<struct drgn_qualified_type, std::string>>;
using SortedTypeDefMap =
std::vector<std::pair<struct drgn_type *, struct drgn_type *>>;
std::string rootTypeStr;
std::string linkageName;
std::map<struct drgn_type *, std::string> unnamedUnion;
std::map<std::string, size_t> sizeMap;
std::map<struct drgn_type *, ContainerTypeMap> containerTypeMapDrgn;
std::vector<std::unique_ptr<ContainerInfo>> containerInfoList;
std::vector<struct drgn_type *> enumTypes;
std::vector<struct drgn_type *> typePath;
std::vector<std::string> knownTypes;
struct drgn_qualified_type rootType;
struct drgn_qualified_type rootTypeToIntrospect;
std::map<std::string, std::string> typedefMap;
std::map<struct drgn_type *, std::vector<ParentMember>> parentClasses;
size_t pad_index = 0;
std::unordered_map<struct drgn_type *, std::pair<size_t, size_t>>
paddingIndexMap;
std::map<struct drgn_type *, struct drgn_type *> typedefTypes;
std::map<struct drgn_type *, std::vector<DrgnClassMemberInfo>>
classMembersMap;
std::map<struct drgn_type *, std::vector<DrgnClassMemberInfo>>
classMembersMapCopy;
std::map<struct drgn_type *, std::string> typeToNameMap;
std::map<std::string, struct drgn_type *> nameToTypeMap;
std::set<struct drgn_type *> funcDefTypeList;
std::vector<struct drgn_type *> structDefType;
std::set<struct drgn_type *> knownDummyTypeList;
std::map<struct drgn_type *, struct drgn_type *> pointerToTypeMap;
std::set<struct drgn_type *> thriftIssetStructTypes;
std::vector<struct drgn_type *> topoSortedStructTypes;
std::set<ContainerInfo> containerTypesFuncDef;
std::map<std::string, PaddingInfo> paddedStructs;
std::map<struct drgn_type *, std::vector<DrgnClassMemberInfo>>
&getClassMembersMap();
class DrgnString {
struct FreeDeleter {
void operator()(void *allocated) {
free(allocated);
}
};
public:
std::string_view contents;
DrgnString(char *data, size_t length)
: contents{data, length}, _data{data} {
}
DrgnString() = delete;
private:
std::unique_ptr<char, FreeDeleter> _data;
};
static void prependQualifiers(enum drgn_qualifiers, std::string &sb);
static std::string stripFullyQualifiedName(
const std::string &fullyQualifiedName);
std::string stripFullyQualifiedNameWithSeparators(
const std::string &fullyQualifiedname);
static void removeTemplateParamAtIndex(std::vector<std::string> &params,
const size_t index);
std::unordered_map<struct drgn_type *, DrgnString> fullyQualifiedNames;
std::optional<const std::string_view> fullyQualifiedName(
struct drgn_type *type);
static SortedTypeDefMap getSortedTypeDefMap(
const std::map<struct drgn_type *, struct drgn_type *> &typedefTypeMap);
std::optional<ContainerInfo> getContainerInfo(struct drgn_type *type);
void printAllTypes();
void printAllTypeNames();
void printTypePath();
static void addPaddingForBaseClass(struct drgn_type *type,
std::vector<std::string> &def);
void addTypeToName(struct drgn_type *type, std::string name);
bool generateNamesForTypes();
bool generateJitCode(std::string &code);
bool generateStructDefs(std::string &code);
bool generateStructDef(struct drgn_type *e, std::string &code);
bool getDrgnTypeName(struct drgn_type *type, std::string &outName);
bool getDrgnTypeNameInt(struct drgn_type *type, std::string &outName);
bool populateDefsAndDecls();
static void memberTransformName(
std::map<std::string, std::string> &templateTransformMap,
std::string &typeName);
bool getMemberDefinition(struct drgn_type *type);
bool isKnownType(const std::string &type);
bool isKnownType(const std::string &type, std::string &matched);
static bool getTemplateParams(
struct drgn_type *type, size_t numTemplateParams,
std::vector<std::pair<struct drgn_qualified_type, std::string>> &v);
bool enumerateTemplateParamIdxs(struct drgn_type *type,
const ContainerInfo &containerInfo,
const std::vector<size_t> &paramIdxs,
bool &ifStub);
bool getContainerTemplateParams(struct drgn_type *type, bool &ifStub);
void getFuncDefinitionStr(std::string &code, struct drgn_type *type,
const std::string &typeName);
std::optional<uint64_t> getDrgnTypeSize(struct drgn_type *type);
std::optional<std::string> getNameForType(struct drgn_type *type);
static std::string preProcessUniquePtr(struct drgn_type *type,
std::string name);
std::string transformTypeName(struct drgn_type *type, std::string &text);
static std::string templateTransformType(const std::string &typeName);
static std::string structNameTransformType(const std::string &typeName);
bool addPadding(uint64_t padding_bits, std::string &code);
static void deduplicateMemberName(
std::unordered_map<std::string, int> &memberNames,
std::string &memberName);
std::optional<uint64_t> generateMember(
const DrgnClassMemberInfo &m,
std::unordered_map<std::string, int> &memberNames,
uint64_t currOffsetBits, std::string &code, bool isInUnion);
bool generateParent(struct drgn_type *p,
std::unordered_map<std::string, int> &memberNames,
uint64_t &currOffsetBits, std::string &code,
size_t offsetToNextMember);
std::optional<uint64_t> getAlignmentRequirements(struct drgn_type *e);
bool generateStructMembers(struct drgn_type *e,
std::unordered_map<std::string, int> &memberNames,
std::string &code, uint64_t &out_offset_bits,
PaddingInfo &paddingInfo,
bool &violatesAlignmentRequirement,
size_t offsetToNextMember);
void getFuncDefClassMembers(std::string &code, struct drgn_type *type,
std::unordered_map<std::string, int> &memberNames,
bool skipPadding = false);
bool isDrgnSizeComplete(struct drgn_type *type);
static bool getEnumUnderlyingTypeStr(struct drgn_type *e,
std::string &enumUnderlyingTypeStr);
bool ifEnumerateClass(const std::string &typeName);
bool enumerateClassParents(struct drgn_type *type,
const std::string &typeName);
bool enumerateClassMembers(struct drgn_type *type,
const std::string &typeName, bool &isStubbed);
bool enumerateClassTemplateParams(struct drgn_type *type,
const std::string &typeName,
bool &isStubbed);
bool ifGenerateMemberDefinition(const std::string &typeName);
bool generateMemberDefinition(struct drgn_type *type, std::string &typeName);
std::optional<std::pair<std::string_view, std::string_view>> isMemberToStub(
const std::string &type, const std::string &member);
std::optional<std::string_view> isTypeToStub(const std::string &typeName);
bool isTypeToStub(struct drgn_type *type, const std::string &typeName);
bool isEmptyClassOrFunctionType(struct drgn_type *type,
const std::string &typeName);
bool enumerateClassType(struct drgn_type *type);
bool enumerateTypeDefType(struct drgn_type *type);
bool enumerateEnumType(struct drgn_type *type);
bool enumeratePointerType(struct drgn_type *type);
bool enumeratePrimitiveType(struct drgn_type *type);
bool enumerateArrayType(struct drgn_type *type);
bool isUnnamedStruct(struct drgn_type *type);
std::string getAnonName(struct drgn_type *, const char *);
std::string getStructName(struct drgn_type *type) {
return getAnonName(type, "__anon_struct_");
}
std::string getUnionName(struct drgn_type *type) {
return getAnonName(type, "__anon_union_");
}
static void declareThriftStruct(std::string &code, std::string_view name);
bool isNumMemberGreaterThanZero(struct drgn_type *type);
void getClassMembersIncludingParent(struct drgn_type *type,
std::vector<DrgnClassMemberInfo> &out);
bool staticAssertMemberOffsets(
const std::string &struct_name, struct drgn_type *struct_type,
std::string &assert_str,
std::unordered_map<std::string, int> &member_names,
uint64_t base_offset = 0);
bool addStaticAssertsForType(struct drgn_type *type,
bool generateAssertsForOffsets,
std::string &code);
bool buildNameInt(struct drgn_type *type, std::string &nameWithoutTemplate,
std::string &outName);
void replaceTemplateOperator(
std::vector<std::pair<struct drgn_qualified_type, std::string>>
&template_params,
std::vector<std::string> &template_params_strings, size_t index);
void replaceTemplateParameters(
struct drgn_type *type,
std::vector<std::pair<struct drgn_qualified_type, std::string>>
&template_params,
std::vector<std::string> &template_params_strings,
const std::string &nameWithoutTemplate);
};

203
src/OICompile.cpp Normal file
View File

@ -0,0 +1,203 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <glog/logging.h>
extern "C" {
#include <drgn.h>
}
#include <filesystem>
#include "OICodeGen.h"
#include "OICompiler.h"
#include "OILibraryImpl.h"
#include "OIUtils.h"
#include "ObjectIntrospection.h"
namespace fs = std::filesystem;
static void assert_drgn_succeeded(struct drgn_error *err, const char *message) {
if (err) {
LOG(ERROR) << message << ": " << err->message;
exit(EXIT_FAILURE);
}
}
static bool compile_type(
struct drgn_qualified_type *type, uintptr_t function_addr,
OICompiler::Config &compilerConfig, OICodeGen::Config &generatorConfig,
std::vector<std::pair<std::string, std::string>> &results) {
std::string code =
#include "OITraceCode.cpp"
;
auto codegen = OICodeGen::buildFromConfig(generatorConfig);
if (!codegen) {
return false;
}
codegen->setRootType(*type);
if (!codegen->generate(code)) {
return false;
}
const auto identifier =
ObjectIntrospection::function_identifier(function_addr);
if (drgn_type_has_name(type->type)) {
VLOG(1) << "Identifier for type '" << drgn_type_name(type->type) << "' is "
<< identifier;
}
OICompiler compiler{{}, compilerConfig};
auto source_path =
identifier +
OICache::extensions[static_cast<size_t>(OICache::Entity::Source)];
auto object_path =
identifier +
OICache::extensions[static_cast<size_t>(OICache::Entity::Object)];
if (!compiler.compile(code, source_path, object_path)) {
return false;
}
results.emplace_back(identifier, object_path);
return true;
}
int main(int argc, char *argv[]) {
google::LogToStderr();
google::InitGoogleLogging("oi_compile");
// Rudimentary argument parsing:
// argv[1] is the executable we want to perform codegen for
// argv[2] is an optional configuration file
if (argc <= 1) {
LOG(ERROR)
<< "Please provide the file to perform codegen for as an argument";
return EXIT_FAILURE;
}
if (argc > 3) {
LOG(ERROR) << "Unrecognized command line arguments, oi_compile accepts at "
"most two arguments (the file to perform codegen for, and "
"optionally a config file)";
return EXIT_FAILURE;
}
const char *target = argv[1];
if (!fs::exists(target)) {
LOG(ERROR) << target << ": file does not exist";
return EXIT_FAILURE;
}
if (!fs::is_regular_file(target)) {
LOG(ERROR) << target << ": not a regular file";
return EXIT_FAILURE;
}
const char *config_file =
argc == 3 ? argv[2] : "/usr/local/share/oi/base.oid.toml";
if (!fs::exists(config_file)) {
LOG(ERROR) << config_file << ": file does not exist";
return EXIT_FAILURE;
}
// OI initialization
OICompiler::Config compilerConfig{};
OICodeGen::Config generatorConfig{};
if (!OIUtils::processConfigFile(config_file, compilerConfig,
generatorConfig)) {
LOG(ERROR) << "Failed to process config file";
return EXIT_FAILURE;
}
generatorConfig.useDataSegment = false;
// drgn initialization
char envVar[] = "RGN_ENABLE_TYPE_ITERATOR=1";
if (putenv(envVar) == -1) {
PLOG(ERROR)
<< "Failed to set environment variable DRGN_ENABLE_TYPE_ITERATOR";
return EXIT_FAILURE;
}
struct drgn_program *prog;
assert_drgn_succeeded(drgn_program_create(NULL, &prog),
"Error while initializing drgn program");
assert_drgn_succeeded(
drgn_program_load_debug_info(prog, &target, 1, false, false),
"Error while loading debug info");
struct drgn_type_iterator *types;
assert_drgn_succeeded(drgn_oil_type_iterator_create(prog, &types),
"Error while creating type iterator");
// Perform codegen for each type
std::vector<std::pair<std::string, std::string>> results{};
struct drgn_qualified_type *type;
uintptr_t *function_addrs;
std::size_t function_addrs_len;
assert_drgn_succeeded(drgn_oil_type_iterator_next(
types, &type, &function_addrs, &function_addrs_len),
"Error while advancing type iterator");
while (type) {
for (std::size_t i = 0; i < function_addrs_len; i++) {
auto function_addr = function_addrs[i];
if (!compile_type(type, function_addr, compilerConfig, generatorConfig,
results)) {
if (drgn_type_has_name(type->type)) {
LOG(ERROR) << "Compilation failed for type '"
<< drgn_type_name(type->type) << "'";
} else {
LOG(ERROR)
<< "Compilation failed for type with template function address 0x"
<< std::hex << function_addr;
}
return EXIT_FAILURE;
}
}
free(function_addrs);
assert_drgn_succeeded(
drgn_oil_type_iterator_next(types, &type, &function_addrs,
&function_addrs_len),
"Error while advancing type iterator");
}
drgn_type_iterator_destroy(types);
// Finally, create a new executable by copying the given executable
// and appending the object files created by codegen to the new
// executable as ELF sections, with one section for each type.
std::string output = target + std::string("_oil");
if (fs::exists(output)) {
LOG(WARNING) << output << " already exists, overwriting it";
fs::remove(output);
}
fs::copy(target, output);
std::string command = "objcopy";
for (auto &[identifier, object_path] : results) {
command += " --add-section ";
command += ObjectIntrospection::OI_SECTION_PREFIX;
command += identifier;
command += "=";
command += object_path;
}
command += " ";
command += output;
int status = system(command.c_str());
// Delete temporary files now that we're done with them
for (auto &[_, object_path] : results) {
fs::remove(object_path);
}
if (status == -1) {
PLOG(ERROR) << "Failed to launch subprocess";
return EXIT_FAILURE;
} else if (status != 0) {
LOG(ERROR) << "objcopy exited with non-zero exit code " << status;
return EXIT_FAILURE;
}
}

638
src/OICompiler.cpp Normal file
View File

@ -0,0 +1,638 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "OICompiler.h"
#ifndef OSS_ENABLE
#include "cea/object-introspection/internal/ManifoldCache.h"
#endif
#include <clang/Basic/LangStandard.h>
#include <clang/Basic/TargetInfo.h>
#include <clang/Basic/TargetOptions.h>
#include <clang/CodeGen/CodeGenAction.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Frontend/CompilerInvocation.h>
#include <clang/Frontend/FrontendActions.h>
#include <clang/Frontend/FrontendOptions.h>
#include <clang/Lex/HeaderSearchOptions.h>
#include <clang/Lex/PreprocessorOptions.h>
#include <glog/logging.h>
#include <llvm/ADT/SmallVector.h>
#include <llvm/ADT/Triple.h>
#include <llvm/Demangle/Demangle.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
#include <llvm/Support/Host.h>
#include <llvm/Support/Memory.h>
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/raw_os_ostream.h>
#include <boost/range/combine.hpp>
#include <boost/scope_exit.hpp>
#include "Metrics.h"
extern "C" {
#include <llvm-c/Disassembler.h>
}
using namespace std;
using namespace clang;
using namespace llvm;
using namespace llvm::object;
using namespace ObjectIntrospection;
static const char *symbolLookupCallback(
[[maybe_unused]] void *disInfo, [[maybe_unused]] uint64_t referenceValue,
uint64_t *referenceType, [[maybe_unused]] uint64_t referencePC,
[[maybe_unused]] const char **referenceName) {
*referenceType = LLVMDisassembler_ReferenceType_InOut_None;
return nullptr;
}
/*
* This structure's goal is to statically initialize parts of LLVM used by
* Disassembler. We're declaring a static global variable with a constructor
* doing the init calls once and for all, on our behalf. The destructor will
* then take care of the cleanup, at exit.
*/
static LLVMDisasmContextRef disassemblerContext = nullptr;
static struct LLVMInitializer {
LLVMInitializer() {
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
llvm::InitializeNativeTargetDisassembler();
disassemblerContext = LLVMCreateDisasm("x86_64-pc-linux", nullptr, 0,
nullptr, symbolLookupCallback);
if (!disassemblerContext) {
throw std::runtime_error("Failed to initialize disassemblerContext");
}
/*
* Enable Intel assembly syntax and print immediate values in hexadecimal.
* The order in which the options are set matters. Don't re-order!
*/
LLVMSetDisasmOptions(disassemblerContext,
LLVMDisassembler_Option_AsmPrinterVariant);
LLVMSetDisasmOptions(disassemblerContext,
LLVMDisassembler_Option_PrintImmHex);
}
~LLVMInitializer() {
LLVMDisasmDispose(disassemblerContext);
}
} llvmInitializer;
std::optional<OICompiler::Disassembler::Instruction>
OICompiler::Disassembler::operator()() {
if (disassemblerContext == nullptr || std::empty(funcText)) {
return std::nullopt;
}
size_t instSize = LLVMDisasmInstruction(
disassemblerContext, const_cast<uint8_t *>(std::data(funcText)),
std::size(funcText), 0, std::data(disassemblyBuffer),
std::size(disassemblyBuffer));
if (instSize == 0) {
return std::nullopt;
}
Instruction inst{
offset,
{std::data(funcText), instSize},
{std::data(disassemblyBuffer)},
};
offset += instSize;
funcText.remove_prefix(instSize);
return inst;
}
/*
* Manage memory for the object files and handle symbol resolution.
* The interface is defined by LLVM and we setup our hooks to
* allocate memory and prepare the relocation.
*/
class OIMemoryManager : public RTDyldMemoryManager {
public:
struct Slab {
private:
/*
* Allocate a slab of memory out of which we will allocate text and data.
* One Slab correspond to one Object file.
* The slab is divided in two segments in this order:
* 1. The text segment, to host the executable instructions
* 2. The data segment, to host the static variables
* At the minute, we make no differentiation between RW/RO data segments.
* We don't set the correct permissions on the pages allocated in the target
* process. Adding that would require making lots of `mprotect(2)` syscalls
* and introduce more latency.
*/
static sys::MemoryBlock allocateBlock(size_t totalSize) {
std::error_code errorCode;
auto mem = sys::Memory::allocateMappedMemory(
alignTo(totalSize + 256, 256), // Extra to fit paddings added below
nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, errorCode);
/*
* It looks like report_fatal_error() calls exit() by default. If it's
* not possible to allocate memory then we need to fail anyway but do it
* gracefully. Try installing an error handler and propogating the
* failure upwards so we can shutdown cleanly.
*/
if (errorCode) {
report_fatal_error("Can't allocate enough memory: " +
errorCode.message());
}
return mem;
}
public:
Slab(size_t totalSize, size_t codeSize, size_t dataSize)
: memBlock{allocateBlock(totalSize)},
/*
* Allow some extra space to allow for alignment needs of segments.
* 128 bytes should be ample and well within our "slop" allocation.
*/
textSegBase{(uintptr_t)memBlock.base()},
textSegLimit{alignTo(textSegBase + codeSize, 128)},
dataSegBase{textSegLimit},
dataSegLimit{alignTo(dataSegBase + dataSize, 128)} {
assert(dataSegLimit <=
(uintptr_t)memBlock.base() + memBlock.allocatedSize());
/* Fill the slab with NOP instructions */
memset(memBlock.base(), nopInst, memBlock.allocatedSize());
}
~Slab() {
sys::Memory::releaseMappedMemory(memBlock);
}
sys::MemoryBlock memBlock;
SmallVector<sys::MemoryBlock, 8> functionSections{};
SmallVector<sys::MemoryBlock, 8> dataSections{};
uintptr_t textSegBase = 0;
const uintptr_t textSegLimit = 0;
uintptr_t dataSegBase = 0;
const uintptr_t dataSegLimit = 0;
uint8_t *allocate(uintptr_t Size, unsigned Alignment, bool isCode) {
auto *allocOffset = isCode ? &textSegBase : &dataSegBase;
auto allocLimit = isCode ? textSegLimit : dataSegLimit;
VLOG(1) << "allocateFromSlab " << (isCode ? "Code " : "Data ") << " Size "
<< Size << " allocOffset " << std::hex << *allocOffset
<< " allocLimit " << allocLimit;
auto allocAddr = alignTo(*allocOffset, Alignment);
auto newAllocOffset = allocAddr + Size;
if (newAllocOffset > allocLimit) {
LOG(ERROR) << "allocateFromSlab: " << (isCode ? "Code " : "Data ")
<< "allocOffset= " << std::hex << *allocOffset
<< " Size = " << Size << " allocLimit = " << allocLimit;
/* See above comment about failure handling */
report_fatal_error("Can't allocate enough memory from slab");
}
auto &sections = isCode ? functionSections : dataSections;
sections.emplace_back((void *)allocAddr, Size);
*allocOffset = newAllocOffset;
VLOG(1) << "allocateFromSlab return: " << std::hex << allocAddr;
return (uint8_t *)allocAddr;
}
};
SmallVector<Slab, 4> Slabs{};
OIMemoryManager(std::shared_ptr<SymbolService> ss,
const std::unordered_map<std::string, uintptr_t> &synths)
: RTDyldMemoryManager{},
symbols{std::move(ss)},
syntheticSymbols{synths} {
}
/* Hook to make LLVM call `reserveAllocationSpace()` for each Object file */
bool needsToReserveAllocationSpace(void) override {
return true;
}
void reserveAllocationSpace(uintptr_t, uint32_t, uintptr_t, uint32_t,
uintptr_t, uint32_t) override;
uint8_t *allocateCodeSection(uintptr_t, unsigned, unsigned,
StringRef) override;
uint8_t *allocateDataSection(uintptr_t, unsigned, unsigned, StringRef,
bool) override;
/* Hook to set up proper memory permission. We don't handle that */
bool finalizeMemory(std::string *) override {
return false;
}
/* Hook to locate symbols in the remote process */
JITSymbol findSymbol(const std::string &) override;
/*
* We don't use EH frames in this context, as we generate then copy to another
* process, and enabling them causes issues with folly crashing on oid exit.
*/
void registerEHFrames(uint8_t *, uint64_t, size_t) override {
}
void deregisterEHFrames() override {
}
private:
std::shared_ptr<SymbolService> symbols;
const std::unordered_map<std::string, uintptr_t> &syntheticSymbols;
Slab &currentSlab() {
assert(!Slabs.empty());
return Slabs.back();
}
};
void OIMemoryManager::reserveAllocationSpace(
uintptr_t codeSize, uint32_t codeAlign, uintptr_t roDataSize,
uint32_t roDataAlign, uintptr_t rwDataSize, uint32_t rwDataAlign) {
/*
* It looks like the sizes given to us already take into account the
* alignment restrictions the different type of sections may have. Aligning
* to the next 1KB boundary just for a bit of safety-slush (paranoia really).
*/
uint64_t totalSz = alignTo((codeSize + roDataSize + rwDataSize), 1024);
VLOG(1) << "reserveAllocationSpace: codesize " << codeSize << " codeAlign "
<< codeAlign << " roDataSize " << roDataSize << " roDataAlign "
<< roDataAlign << " rwDataSize " << rwDataSize << " rwDataAlign "
<< rwDataAlign << " (Total Size: " << totalSz << ")";
Slabs.emplace_back(totalSz, codeSize, roDataSize + rwDataSize + 128);
const auto &currSlab = currentSlab();
VLOG(1) << "reserveAllocationSpace: " << std::hex << "SlabBase "
<< currSlab.memBlock.base() << " textSegBaseAlloc "
<< currSlab.textSegBase << " textSegLimit " << currSlab.textSegLimit
<< " dataSegBaseAlloc " << currSlab.dataSegBase << " dataSegLimit "
<< currSlab.dataSegLimit;
}
uint8_t *OIMemoryManager::allocateCodeSection(
uintptr_t size, unsigned alignment, [[maybe_unused]] unsigned sectionID,
StringRef sectionName) {
VLOG(1) << "allocateCodeSection(Size = " << size
<< ", Alignment = " << alignment
<< ", SectionName = " << sectionName.data() << ")";
return currentSlab().allocate(size, alignment, true /* isCode */);
}
uint8_t *OIMemoryManager::allocateDataSection(
uintptr_t size, unsigned alignment, [[maybe_unused]] unsigned sectionID,
StringRef sectionName, [[maybe_unused]] bool isReadOnly) {
VLOG(1) << "allocateDataSection(Size = " << size
<< ", Alignment = " << alignment
<< ", SectionName = " << sectionName.data() << ")";
return currentSlab().allocate(size, alignment, false /* isCode */);
}
/*
* This is called to locate external symbols when relocations are
* resolved. We have to lookup the symbol in the remote process every time,
* which sucks for performance. However, relocation can happen while the remote
* process is running, so this code is out of the hot path.
* We can't rely on LLVM to do this job because we are resolving symbols of a
* remote process. LLVM only handles resolving symbols for the current process.
*/
JITSymbol OIMemoryManager::findSymbol(const std::string &name) {
if (auto synth = syntheticSymbols.find(name);
synth != end(syntheticSymbols)) {
VLOG(1) << "findSymbol(" << name << ") = Synth " << std::hex
<< synth->second;
return JITSymbol(synth->second, JITSymbolFlags::Exported);
}
if (auto sym = symbols->locateSymbol(name)) {
VLOG(1) << "findSymbol(" << name << ") = " << std::hex << sym->addr;
return JITSymbol(sym->addr, JITSymbolFlags::Exported);
}
if (name.compare(0, 37, "_ZN6apache6thrift18TStructDataStorage") == 0 &&
name.compare(name.size() - 16, 16, "13isset_indexesE") == 0) {
/*
* Hack to make weak symbols work with MCJIT.
*
* MCJIT converts weak symbols into strong symbols, which means weak symbols
* we define in the JIT code will not be overridden by strong symbols in the
* remote process.
*
* Instead, if we want something to act as a weak symbol, we must not
* provide a definition at all. Then MCJIT will always search for it in the
* remote processes.
* - If a symbol is found in the remote process, it will be used as normal
* - If no symbol is found, we end up here. Return an address of "-1" to
* signal that the symbol was not resolved without raising an error.
*
* Before dereferencing the weak symbol in the JIT code, it should be
* compared against nullptr (not "-1"!).
*
* Note that __attribute__((weak)) is still required on the "weak" symbol's
* declaration. Otherwise the compiler may optimise away the null-checks.
*/
VLOG(1) << "findSymbol(" << name << ") = -1";
return JITSymbol(-1, JITSymbolFlags::Exported);
}
VLOG(1) << "findSymbol(" << name << ") = not found";
return JITSymbol(nullptr);
}
std::optional<std::string> OICompiler::decodeInst(
const std::vector<std::byte> &funcText, uintptr_t offset) {
auto disassembler = Disassembler((const uint8_t *)funcText.data() + offset,
funcText.size() - offset);
auto inst = disassembler();
if (!inst) {
return std::nullopt;
}
VLOG(1) << "Decoded instruction: " << inst->disassembly
<< " size: " << inst->opcodes.size();
return std::string(inst->disassembly);
}
OICompiler::OICompiler(std::shared_ptr<SymbolService> symbolService, Config cfg)
: symbols{std::move(symbolService)}, config{std::move(cfg)} {
}
/*
* The constructor must be declared/defined, since the header uses forward
* declarations with std::unique_ptr. The compiler doesn't have all the
* information to generate the unique_ptr's destructor. So the destructor must
* be part of OICompiler.cpp, which have the complete type information for the
* forward declared classes.
*/
OICompiler::~OICompiler() = default;
/*
* Disassembles the opcodes housed in the Slabs' code segments.
*/
static constexpr size_t kMaxInterFuncInstrPadding = 16;
static void debugDisAsm(
const SmallVector<OIMemoryManager::Slab, 4> &Slabs,
const OICompiler::RelocResult::RelocInfos &ObjectRelocInfos) {
VLOG(1) << "\nDisassembled Code";
/* Outer loop on each Object files that has been loaded */
assert(Slabs.size() == ObjectRelocInfos.size());
for (const auto &S : boost::combine(Slabs, ObjectRelocInfos)) {
const auto &[ObjFile, ObjRelInfo] = std::tie(S.get<0>(), S.get<1>());
/* Inner loop on each Function Section of a given Object file */
for (const auto &textSec : ObjFile.functionSections) {
const auto offset =
(uintptr_t)textSec.base() - (uintptr_t)ObjFile.memBlock.base();
const auto baseRelocAddress = ObjRelInfo.RelocAddr + offset;
size_t instrCnt = 0;
size_t byteCnt = 0;
size_t consNop = 0;
auto dg = OICompiler::Disassembler((uint8_t *)textSec.base(),
textSec.allocatedSize());
while (auto inst = dg()) {
instrCnt++;
byteCnt += inst->opcodes.size();
/*
* I currently don't know the size of the generated object code housed
* in the slab. I don't want to display all the 'nop' instructions at
* the end of that buffer but I do want to display the 'nops' that are
* padding in between the generated instructions. The following kinda
* sucks...
*/
if (inst->opcodes.size() == 1 && inst->opcodes[0] == nopInst) {
if (++consNop == kMaxInterFuncInstrPadding + 1) {
/*
* We're in the nop padding after all the generated instructions so
* stop.
*/
break;
}
} else {
consNop = 0;
}
VLOG(1) << std::hex << inst->offset + baseRelocAddress << ": "
<< inst->disassembly.data();
}
VLOG(1) << "Number of Instructions: " << instrCnt
<< " Instruction bytes: " << byteCnt;
}
}
}
bool OICompiler::compile(const std::string &code, const fs::path &sourcePath,
const fs::path &objectPath) {
Metrics::Tracing _("compile");
/*
* Note to whoever: if you're having problems compiling code, especially
* header issues, then make sure you thoroughly read the options list in
* include/clang/Basic/LangOptions.def.
*/
auto compInv = std::make_shared<CompilerInvocation>();
compInv->getLangOpts()->CPlusPlus = true;
compInv->getLangOpts()->CPlusPlus11 = true;
compInv->getLangOpts()->CPlusPlus14 = true;
compInv->getLangOpts()->CPlusPlus17 = true;
compInv->getLangOpts()->CPlusPlus20 = true;
// Required for various `__GCC_ATOMIC_*` macros to be defined
compInv->getLangOpts()->GNUCVersion = 11 * 100 * 100; // 11.0.0
compInv->getLangOpts()->Bool = true;
compInv->getLangOpts()->WChar = true;
compInv->getLangOpts()->CXXOperatorNames = true;
compInv->getLangOpts()->DoubleSquareBracketAttributes = true;
compInv->getLangOpts()->ImplicitInt = false;
compInv->getLangOpts()->Exceptions = true;
compInv->getLangOpts()->CXXExceptions = true;
compInv->getPreprocessorOpts();
compInv->getPreprocessorOpts().addRemappedFile(
sourcePath.string(), MemoryBuffer::getMemBufferCopy(code).release());
compInv->getPreprocessorOpts().UsePredefines = true;
compInv->getFrontendOpts().Inputs.push_back(
FrontendInputFile(sourcePath.string(), InputKind{Language::CXX}));
compInv->getFrontendOpts().OutputFile = objectPath.string();
compInv->getFrontendOpts().ProgramAction = clang::frontend::EmitObj;
auto &headerSearchOptions = compInv->getHeaderSearchOpts();
for (const auto &path : config.userHeaderPaths) {
headerSearchOptions.AddPath(
path, clang::frontend::IncludeDirGroup::IndexHeaderMap, false, false);
}
for (const auto &path : config.sysHeaderPaths) {
headerSearchOptions.AddPath(path, clang::frontend::IncludeDirGroup::System,
false, false);
}
compInv->getFrontendOpts().OutputFile = objectPath;
compInv->getTargetOpts().Triple =
llvm::Triple::normalize(llvm::sys::getProcessTriple());
compInv->getCodeGenOpts().RelocationModel = llvm::Reloc::Static;
compInv->getCodeGenOpts().CodeModel = "large";
compInv->getCodeGenOpts().OptimizationLevel = 3;
compInv->getCodeGenOpts().NoUseJumpTables = 1;
if (config.generateJitDebugInfo) {
compInv->getCodeGenOpts().setDebugInfo(codegenoptions::FullDebugInfo);
}
CompilerInstance compInstance;
compInstance.setInvocation(compInv);
compInstance.createDiagnostics();
EmitObjAction compilerAction;
bool execute = compInstance.ExecuteAction(compilerAction);
if (!execute) {
LOG(ERROR) << "Execute failed";
return false;
}
/* LLVM 12 seems to be unable to handle the large files we create,
and consistently dies with the message:
'fatal error: sorry, this include generates a translation unit too large
for Clang to process.'
So this is disabled for now.
if (VLOG_IS_ON(2)) {
// TODO: Maybe accept file path as an arg to dump the preprocessed file.
// Dumping to /tmp seems to require root permission
if (access("oi_preprocessed", F_OK) == 0 &&
access("oi_preprocessed", R_OK | W_OK) != 0) {
LOG(ERROR) << "Trying to write oi_preprocessed, "
<< "but it cannot be overwritten. Either remove it or run "
"oid with root priviledges ";
} else {
compInv->getFrontendOpts().OutputFile = "oi_preprocessed";
compInv->getLangOpts()->LineComment = 1;
compInv->getPreprocessorOutputOpts().ShowCPP = 1;
auto act = new PrintPreprocessedAction();
CI.ExecuteAction(*act);
VLOG(1) << "Dumped preprocessed output to file: "
<< compInv->getFrontendOpts().OutputFile;
}
}
*/
return true;
}
std::optional<OICompiler::RelocResult> OICompiler::applyRelocs(
uintptr_t baseRelocAddress, const std::set<fs::path> &objectFiles,
const std::unordered_map<std::string, uintptr_t> &syntheticSymbols) {
Metrics::Tracing relocationTracing("relocation");
memMgr = std::make_unique<OIMemoryManager>(symbols, syntheticSymbols);
RuntimeDyld dyld(*memMgr, *memMgr);
/* Load all the object files into the MemoryManager */
for (const auto &objPath : objectFiles) {
VLOG(1) << "Loading object file " << objPath;
auto objFile = ObjectFile::createObjectFile(objPath.c_str());
if (!objFile) {
raw_os_ostream(LOG(ERROR)) << "Failed to load object file " << objPath
<< ": " << objFile.takeError();
return std::nullopt;
}
dyld.loadObject(*objFile->getBinary());
if (dyld.hasError()) {
LOG(ERROR) << "load object failed: " << dyld.getErrorString().data();
return std::nullopt;
}
}
RelocResult res;
res.relocInfos.reserve(memMgr->Slabs.size());
/* Provides mapping addresses to the MemoryManager */
uintptr_t currentRelocAddress = baseRelocAddress;
for (const auto &slab : memMgr->Slabs) {
for (const auto &funcSection : slab.functionSections) {
auto offset =
(uintptr_t)funcSection.base() - (uintptr_t)slab.memBlock.base();
dyld.mapSectionAddress(funcSection.base(), currentRelocAddress + offset);
VLOG(1) << std::hex << "Relocated code " << funcSection.base() << " to "
<< currentRelocAddress + offset;
}
for (const auto &dataSection : slab.dataSections) {
auto offset =
(uintptr_t)dataSection.base() - (uintptr_t)slab.memBlock.base();
dyld.mapSectionAddress(dataSection.base(), currentRelocAddress + offset);
VLOG(1) << std::hex << "Relocated data " << dataSection.base() << " to "
<< currentRelocAddress + offset;
}
res.relocInfos.push_back(RelocResult::RelocInfo{
(uintptr_t)slab.memBlock.base(), currentRelocAddress,
slab.memBlock.allocatedSize()});
currentRelocAddress =
alignTo(currentRelocAddress + slab.memBlock.allocatedSize(), 128);
res.newBaseRelocAddr = currentRelocAddress;
}
/* Apply relocation, record EH, etc. */
dyld.finalizeWithMemoryManagerLocking();
if (dyld.hasError()) {
LOG(ERROR) << "relocation finalization failed: "
<< dyld.getErrorString().str();
return std::nullopt;
}
/* Copy symbol table into `res` */
auto symbolTable = dyld.getSymbolTable();
res.symbols.reserve(symbolTable.size());
for (const auto &[symName, sym] : symbolTable) {
res.symbols.emplace(symName.str(), sym.getAddress());
}
relocationTracing.stop();
if (VLOG_IS_ON(3)) {
debugDisAsm(memMgr->Slabs, res.relocInfos);
}
return res;
}

222
src/OICompiler.h Normal file
View File

@ -0,0 +1,222 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <algorithm>
#include <array>
#include <filesystem>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <string_view>
#include <unordered_map>
#include "SymbolService.h"
#include "X86InstDefs.h"
namespace fs = std::filesystem;
class OIMemoryManager;
/**
* `OICompiler` provides the tools to compile and relocate code.
* It also provides utilities to manipulate X86 assembly instructions
* and retrieve the type to introspect for a given `irequest`.
*/
class OICompiler {
public:
/* Configuration option for `OICompiler` */
struct Config {
/* Whether to generate DWARF debug info for the JIT code */
bool generateJitDebugInfo = false;
std::vector<std::string> userHeaderPaths{};
std::vector<std::string> sysHeaderPaths{};
};
/**
* The result of a call to `applyRelocs()`.
* It contains the next BaseRelocAddress for further relocation,
* information about the Objects' buffers location and symbols.
*/
struct RelocResult {
/**
* Information about an Object file and its relocation.
*
* BaseAddr contains the address of the object file buffer.
* RelocAddr contains the relocated address of the object file buffer.
* Size is the size of the object file buffer in bytes.
*
* You typically need to copy the BaseAddr buffer to RelocAddr in order to
* finalize the relocation.
* Note that we don't manage separate segments for code and data. Thus, the
* RelocInfo struct describes one Slab which contains the two segments.
*/
struct RelocInfo {
uintptr_t BaseAddr, RelocAddr;
size_t Size;
};
using RelocInfos = std::vector<RelocInfo>;
using SymTable = std::unordered_map<std::string, uintptr_t>;
uintptr_t newBaseRelocAddr;
RelocInfos relocInfos;
SymTable symbols;
};
/**
* Generator that takes a series of opcodes in
* and output the corresponding disassembled instructions.
*/
class Disassembler {
public:
/*
* Please forgive me :(
* We have to remain compatible with C++17 for OICompiler.cpp
* So we use std::basic_string_view instead of std::span.
*/
template <typename T>
using Span = std::basic_string_view<T>;
/**
* Instruction holds the information returned by the disassembler.
* The fields are valid until a new Instruction struct has been
* output by the disassembler.
* There is no ownership on the fields, so copy the values
* in owning data-structure, if you want to extend the lifetime.
*/
struct Instruction {
uintptr_t offset;
Span<uint8_t> opcodes;
std::string_view disassembly;
};
/**
* Create a disassembler from anything that resemble a std::span.
*/
template <typename... Args>
Disassembler(Args &&...args) : funcText(std::forward<Args>(args)...) {
}
/*
* Disassemble the next instuction, if any, and return the
* corresponding Instruction struct.
*/
std::optional<Instruction> operator()();
private:
uintptr_t offset = 0;
Span<uint8_t> funcText;
std::array<char, 128> disassemblyBuffer;
};
OICompiler(std::shared_ptr<SymbolService>, Config);
~OICompiler();
/**
* Compile the given @param code and write the result in @param objectPath.
*
* @param code the C++ source code to compile
* @param sourcePath path/name of the code to compile (not used)
* @param objectPath path where to write the resulting Object file
*
* @return true if the compilation succeeded, false otherwise.
*/
bool compile(const std::string &, const fs::path &, const fs::path &);
/**
* Load the @param objectFiles in memory and apply relocation at
* @param BaseRelocAddress. Note that it doesn't copy the object files at the
* @param BaseRelocAddress, but returns all the information to make the copy.
* Note: synthetic variables are symbols we define, but are used within the
* JIT code. See 'OITraceCode.cpp' and its global variables declared with
* extern.
*
* @param BaseRelocAddress where will the relocated code be located
* @param objectFiles paths to the object files to load and relocate
* @param syntheticSymbols a symbol table for synthetic variables
*
* @return a `std::optional` containing @ref RelocResult if the relocation was
* successful. Calling `applyRelocs()` again invalidates the Segments
* information. So make sure you copy the Segments' content before doing
* another call.
*/
std::optional<RelocResult> applyRelocs(
uintptr_t, const std::set<fs::path> &,
const std::unordered_map<std::string, uintptr_t> &);
/**
* Locates all the offsets of the given @param insts opcodes
* in the @param funcText. Typically used to find all `ret` instructions
* within a function.
*
* @param funcText the binary instruction of a function
* @param insts an array of instruction buffers to look for in @param funcText
*
* @return an optional with the offsets where the given instructions were
* found
*/
template <class FuncTextRange, class NeedlesRange>
static std::optional<std::vector<uintptr_t>> locateOpcodes(
const FuncTextRange &funcText, const NeedlesRange &needles);
/**
* @return a string representation of the opcode(s) of the instruction found
* at @param offset within function's binary instructions @param funcText.
*/
static std::optional<std::string> decodeInst(const std::vector<std::byte> &,
uintptr_t);
private:
std::shared_ptr<SymbolService> symbols;
Config config;
/**
* memMgr is only used by applyReloc, but its lifetime must be larger than
* the duration of the function. The RelocResult returned references addrs
* manager by the Memory Manager, so we need to let the caller copy the code
* and data sections to their final location before release the Objects
* memory.
* This is why memMgr is a std::unique_ptr in the class instead of a local
* variable in the applyReloc function.
*/
std::unique_ptr<OIMemoryManager> memMgr;
};
template <class FuncTextRange, class NeedlesRange>
std::optional<std::vector<uintptr_t>> OICompiler::locateOpcodes(
const FuncTextRange &funcText, const NeedlesRange &needles) {
auto DG = Disassembler((uint8_t *)std::data(funcText), std::size(funcText));
std::vector<uintptr_t> locs;
while (auto inst = DG()) {
auto it = std::find_if(
std::begin(needles), std::end(needles), [&](const auto &needle) {
// Inst->opcodes.starts_with(needle);
return 0 ==
inst->opcodes.find(OICompiler::Disassembler::Span<uint8_t>(
std::data(needle), std::size(needle)));
});
if (it != std::end(needles)) {
locs.push_back(inst->offset);
}
}
return locs;
}

720
src/OID.cpp Normal file
View File

@ -0,0 +1,720 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/init/Init.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <boost/scope_exit.hpp>
#include <csignal>
#include <cstdlib>
#include <filesystem>
#include <iostream>
extern "C" {
#include <getopt.h>
#include <libgen.h>
}
#include "Metrics.h"
#include "OIDebugger.h"
#include "OIOpts.h"
#include "PaddingHunter.h"
#include "TimeUtils.h"
#include "TreeBuilder.h"
/* Global for signal handling */
std::weak_ptr<OIDebugger> weak_oid;
namespace fs = std::filesystem;
using namespace ObjectIntrospection;
// Using an enum inside a namespace here instead of an `enum class` because
// enums defined via `enum class` aren't implicitly convertible to `int`, and
// having to cast the argument for each call to `exit` would be ugly.
namespace ExitStatus {
enum ExitStatus {
Success = EXIT_SUCCESS,
UsageError,
FileNotFoundError,
ConfigGenerationError,
ScriptParsingError,
StopTargetError,
SegmentRemovalError,
SegmentInitError,
CompilationError,
PatchingError,
ProcessingTargetDataError,
OidObjectError,
CacheUploadError,
};
}
/*
* This is the main driver code for the Object Introspection (OI) debugger.
* The 'oid' debugger is the driver application which instruments a target
* application to collect data and then reaps that data from the target.
*
* The flow of work in 'oid' can, roughly speaking, be split into several
* phases:
*
* Phase 1 - Object Discovery
* Using the 'drgn' debugger library, discover the container types in a
* given parent object and its descendent objects. With this information we
* can locate the addresses in memory of these container objects.
*
* Phase 2 - Code Generation
* Auto generate C++ code to iterate over the data structures of interest,
* calculate the size of these objects and record the data.
*
* Phase 3 - Object Code Generation
* JIT compile the C++ code into object code and relocate the resulting
* text into the traget processes address space. This is done using
* clang/llvm APIs.
*
* Phase 4 - Target Process Instrumentation
* The generated object code is injected into the target process in a text
* segment created apriori. Threads are captured and controlled at probe
* sites using breakpoint traps and the ptrace(2) interfaces.
*
* Phase 5 - Data processing
* The results are retrieved from the target processes data buffer and
* processed. The data buffer is a data segment that we mapped into the
* target process.
*
* In addition to the above phases we have process control which is
* currently based around ptrace(2).
*/
constexpr static OIOpts opts{
OIOpt{'h', "help", no_argument, nullptr, "Print this message and exit"},
OIOpt{'p', "pid", required_argument, "<pid>",
"Target process to attach to"},
OIOpt{'c', "config-file", required_argument, nullptr,
"</path/to/oid.toml>"},
OIOpt{'x', "data-buf-size", required_argument, "<bytes>",
"Size of data segment (default:1MB)\n"
"Accepts multiplicative suffix: K, M, G, T, P, E"},
OIOpt{'d', "debug-level", required_argument, "<level>",
"Verbose level for logging"},
OIOpt{'l', "jit-logging", no_argument, nullptr, "Enable JIT's logs"},
OIOpt{'r', "remove-mappings", no_argument, nullptr,
"Remove oid mappings from target process"},
OIOpt{'s', "script", required_argument, nullptr, "</path/to/script.oid>"},
OIOpt{'S', "script-source", required_argument, nullptr, "type:symbol:arg"},
OIOpt{'t', "timeout", required_argument, "<seconds>",
"How long to probe the target process for"},
OIOpt{'k', "custom-code-file", required_argument, nullptr,
"</path/to/code.cpp>\n"
"Use your own CPP file instead of CodeGen"},
OIOpt{'e', "compile-and-exit", no_argument, nullptr,
"Compile only then exit"},
OIOpt{'o', "cache-path", required_argument, "<path>",
"Enable caching using the provided directory"},
OIOpt{'u', "cache-remote", required_argument, nullptr,
"Enable upload/download of cache files\n"
"Pick from {both,upload,download}"},
OIOpt{'i', "debug-path", required_argument, nullptr,
"</path/to/binary>\n"
"Run oid on a executable with debug infos instead of a running "
"process"},
// Optional arguments are pretty nasty - it will only work as
// "--dump-json=PATH" and not "--dump-json PATH". Try and make this take a
// required argument at a later point
OIOpt{'J', "dump-json", optional_argument, "[oid_out.json]",
"File to dump the results to, as JSON\n"
"(in addition to the default RocksDB output)"},
OIOpt{
'B', "dump-data-segment", no_argument, nullptr,
"Dump the data segment's content, before TreeBuilder processes it\n"
"Each argument gets its own dump file: 'dataseg.<oid-pid>.<arg>.dump'"},
OIOpt{'j', "generate-jit-debug", no_argument, nullptr,
"Output debug info for the generated JIT code"},
OIOpt{'n', "chase-raw-pointers", no_argument, nullptr,
"Generate probe for raw pointers"},
OIOpt{'a', "log-all-structs", no_argument, nullptr, "Log all structures"},
OIOpt{'z', "disable-packed-structs", no_argument, nullptr,
"Disable appending packed attributes to the definition of structs"},
OIOpt{'w', "disable-padding-hunter", no_argument, nullptr,
"Disable Padding Hunter\n"
"Padded structs will be written to file called PADDING"},
OIOpt{'T', "capture-thrift-isset", no_argument, nullptr,
"Capture the isset value for Thrift fields"},
OIOpt{'m', "mode", required_argument, "[prod]",
"Allows to specify a mode of operation/group of settings"},
};
void usage() {
std::cout << "usage: oid ...\n";
std::cout << opts;
std::cout << "\n\tFor problem reporting, questions and general comments "
"please pop along"
"\n\tto the Object Introspection Workplace group at "
"https://fburl.com/oid.\n"
<< std::endl;
}
/*
* This handler currently isn't completely async-signal-safe. It's mostly
* all in the segment removal code and is commented in appropriate places.
* The error messages are obviously not safe either.
*/
void sigIntHandler(int sigNum) {
VLOG(1) << "Received SIGNAL " << sigNum;
if (auto oid = weak_oid.lock()) {
oid->stopAll();
} else {
/*
* A small window exists between install a handler and creating the main
* debugger object.
*/
LOG(ERROR) << "Failed to find oid object when handling signal";
exit(ExitStatus::OidObjectError);
}
}
void installSigHandlers(void) {
struct sigaction nact {};
struct sigaction oact {};
nact.sa_handler = sigIntHandler;
sigemptyset(&nact.sa_mask);
nact.sa_flags = SA_SIGINFO;
sigaction(SIGINT, nullptr, &oact);
if (oact.sa_handler != SIG_IGN) {
sigaction(SIGINT, &nact, nullptr);
}
/* Also stop on SIGALRM, for handling timeout */
sigaction(SIGALRM, &nact, nullptr);
}
std::optional<long> strunittol(const char *str) {
errno = 0;
char *strend = nullptr;
long retval = strtol(str, &strend, 10);
if (errno != 0) {
return std::nullopt;
}
switch (*strend) {
case 'E':
retval *= 1024;
[[fallthrough]];
case 'P':
retval *= 1024;
[[fallthrough]];
case 'T':
retval *= 1024;
[[fallthrough]];
case 'G':
retval *= 1024;
[[fallthrough]];
case 'M':
retval *= 1024;
[[fallthrough]];
case 'K':
retval *= 1024;
if (*(strend + 1) != '\0') {
return std::nullopt;
}
[[fallthrough]];
case '\0':
break;
default:
return std::nullopt;
}
return retval;
}
namespace Oid {
struct Config {
pid_t pid;
std::string debugInfoFile;
std::string configFile;
fs::path cacheBasePath;
fs::path customCodeFile;
size_t dataSegSize;
int timeout_s;
bool cacheRemoteUpload;
bool cacheRemoteDownload;
bool enableJitLogging;
bool removeMappings;
bool generateJitDebug;
bool compAndExit;
bool genPaddingStats = true;
bool attachToProcess = true;
bool hardDisableDrgn = false;
};
} // namespace Oid
static ExitStatus::ExitStatus runScript(const std::string &fileName,
std::istream &script,
const Oid::Config &oidConfig,
const OICodeGen::Config &codeGenConfig,
const TreeBuilder::Config &tbConfig) {
if (!fileName.empty()) {
VLOG(1) << "SCR FILE: " << fileName;
}
auto progStart = time_hr::now();
std::shared_ptr<OIDebugger> oid; // share oid with the global signal handler
if (oidConfig.pid != 0) {
oid = std::make_shared<OIDebugger>(oidConfig.pid, oidConfig.configFile,
codeGenConfig, tbConfig);
} else {
oid = std::make_shared<OIDebugger>(
oidConfig.debugInfoFile, oidConfig.configFile, codeGenConfig, tbConfig);
}
weak_oid = oid; // set the weak_ptr for signal handlers
if (!oidConfig.cacheBasePath.empty()) {
oid->setCacheBasePath(oidConfig.cacheBasePath);
}
oid->setCacheRemoteEnabled(oidConfig.cacheRemoteUpload,
oidConfig.cacheRemoteDownload);
oid->setCustomCodeFile(oidConfig.customCodeFile);
oid->setEnableJitLogging(oidConfig.enableJitLogging);
oid->setGenerateJitDebugInfo(oidConfig.generateJitDebug);
oid->setHardDisableDrgn(oidConfig.hardDisableDrgn);
VLOG(1) << "OIDebugger constructor took " << std::dec
<< time_ns(time_hr::now() - progStart) << " nsecs";
LOG(INFO) << "Script file: " << fileName;
if (!oid->parseScript(script)) {
LOG(ERROR) << "Error parsing input file '" << fileName << "'";
return ExitStatus::ScriptParsingError;
}
if (oidConfig.attachToProcess && !oid->stopTarget()) {
LOG(ERROR) << "Couldn't stop target process with PID " << oidConfig.pid;
return ExitStatus::StopTargetError;
}
auto initStart = time_hr::now();
/*
* Remove any existing mappings if the '-r' flag is used or if any of the
* segments have been explicitly changed on the command line. It's a bit of
* a heavy hammer to remove both text and data if only one of the relevant
* parameters have been set but that can always be modified in the future
* if necessary.
*/
if (oidConfig.attachToProcess) {
if (oidConfig.removeMappings) {
if (!oid->segConfigExists()) {
LOG(INFO) << "No config exists for pid " << oidConfig.pid
<< " : cannot remove mappings";
} else if (!oid->unmapSegments(true)) {
LOG(ERROR) << "Failed to remove segments in target process with PID "
<< oidConfig.pid;
return ExitStatus::SegmentRemovalError;
}
return ExitStatus::Success;
}
if (oidConfig.dataSegSize > 0) {
oid->setDataSegmentSize(oidConfig.dataSegSize);
}
if (!oid->segmentInit()) {
oid->contTargetThread();
LOG(ERROR) << "Failed to initialise segments in target process with PID "
<< oidConfig.pid;
return ExitStatus::SegmentInitError;
}
// continue and detach main thread
oid->contTargetThread();
}
VLOG(1) << "init took " << std::dec << time_ns(time_hr::now() - initStart)
<< " nsecs\n"
<< "Compilation Started";
auto compileStart = time_hr::now();
if (!oid->compileCode()) {
LOG(ERROR) << "Compilation failed";
return ExitStatus::CompilationError;
}
VLOG(1) << "Compilation Finished (" << std::dec
<< time_ns(time_hr::now() - compileStart) << " nsecs)";
if (oidConfig.compAndExit) {
// Ensure the .th cache file also gets created
oid->getTreeBuilderTyping();
if (oidConfig.genPaddingStats) {
PaddingHunter paddingHunter;
paddingHunter.localPaddedStructs = oid->getPaddingInfo();
paddingHunter.processLocalPaddingInfo();
paddingHunter.outputPaddingInfo();
}
} else {
installSigHandlers();
/*
* Sigh. This is nonsense really and is tied to a single probe enabling.
* This will need re-architecting when we move to multiple enablings.
*/
if (!oid->isGlobalDataProbeEnabled()) {
oid->setMode(OIDebugger::OID_MODE_FUNC);
}
/*
* I think we might be able to just fit the global variable work entirely
* under patchFunctions and therefore leave the shape of the code at
* this level pretty much unaltered.
*/
if (!oid->stopTarget()) {
LOG(ERROR) << "Couldn't stop target process with PID " << oidConfig.pid;
return ExitStatus::StopTargetError;
}
if (!oid->patchFunctions()) {
oid->contTargetThread();
LOG(ERROR) << "Error patching functions";
return ExitStatus::PatchingError;
}
oid->contTargetThread(false);
if (oidConfig.timeout_s > 0) {
alarm(oidConfig.timeout_s);
}
while (!oid->isInterrupted()) {
if (oid->processTrap(oidConfig.pid) == OIDebugger::OID_DONE) {
break;
}
};
// Disable timeout timer
alarm(0);
// Cleanup all the remaining traps that were injected
if (!oid->removeTraps(0)) {
LOG(ERROR) << "Failed to remove instrumentation...";
}
{ // Resume stopped thread before cleanup
VLOG(1) << "Resuming stopped threads...";
Metrics::Tracing __("resume_threads");
while (oid->processTrap(oidConfig.pid, false) == OIDebugger::OID_CONT) {
}
}
oid->restoreState();
if (!oid->isInterrupted() && !oid->processTargetData()) {
LOG(ERROR) << "Problems processing target data";
return ExitStatus::ProcessingTargetDataError;
}
}
// Upload cache artifacts if present
if (!oid->uploadCache()) {
LOG(ERROR) << "cache upload requested and failed";
return ExitStatus::CacheUploadError;
}
std::cout << "SUCCESS " << fileName << std::endl;
VLOG(1) << "Entire process took " << time_ns(time_hr::now() - progStart)
<< " nsecs";
return ExitStatus::Success;
}
int main(int argc, char *argv[]) {
int debugLevel = 1;
Oid::Config oidConfig = {};
std::string scriptFile;
std::string scriptSource;
std::string configGenOption;
std::optional<fs::path> jsonPath{std::nullopt};
bool logAllStructs = true;
bool chaseRawPointers = false;
bool packStructs = true;
bool dumpDataSegment = false;
bool captureThriftIsset = false;
Metrics::Tracing _("main");
#ifndef OSS_ENABLE
folly::InitOptions init;
init.useGFlags(false);
init.removeFlags(false);
folly::init(&argc, &argv, init);
#else
google::InitGoogleLogging(argv[0]);
#endif
google::SetStderrLogging(google::WARNING);
int c = 0;
while ((c = getopt_long(argc, argv, opts.shortOpts(), opts.longOpts(),
nullptr)) != -1) {
switch (c) {
case 'm': {
if (strcmp("prod", optarg) == 0) {
// change default settings for prod
oidConfig.hardDisableDrgn = true;
oidConfig.cacheRemoteDownload = true;
oidConfig.cacheBasePath = "/tmp/oid-cache";
chaseRawPointers = true;
} else {
LOG(ERROR) << "Invalid mode: " << optarg << " specified!";
usage();
return ExitStatus::UsageError;
}
break;
}
case 'x': {
auto dataSegSizeArg = strunittol(optarg);
if (!dataSegSizeArg.has_value() || dataSegSizeArg.value() <= 0) {
LOG(ERROR) << "Invalid value specified for data buffer size";
usage();
return ExitStatus::UsageError;
}
oidConfig.dataSegSize = static_cast<size_t>(dataSegSizeArg.value());
break;
}
case 'p':
oidConfig.pid = atoi(optarg);
break;
case 'd':
debugLevel = atoi(optarg);
google::LogToStderr();
google::SetStderrLogging(google::INFO);
// Enable debug logging for *only* our project,
// and not the rest of fbcode
google::SetVLOGLevel("Common", debugLevel);
google::SetVLOGLevel("Descs", debugLevel);
google::SetVLOGLevel("FuncGen", debugLevel);
google::SetVLOGLevel("GobsService", debugLevel);
google::SetVLOGLevel("ManifoldCache", debugLevel);
google::SetVLOGLevel("Metrics", debugLevel);
google::SetVLOGLevel("OICache", debugLevel);
google::SetVLOGLevel("OICodeGen", debugLevel);
google::SetVLOGLevel("OICompiler", debugLevel);
google::SetVLOGLevel("OID", debugLevel);
google::SetVLOGLevel("OIDebugger", debugLevel);
google::SetVLOGLevel("OILexer", debugLevel);
google::SetVLOGLevel("OILibrary", debugLevel);
google::SetVLOGLevel("OILibraryImpl", debugLevel);
google::SetVLOGLevel("OILogging", debugLevel);
google::SetVLOGLevel("OIOpts", debugLevel);
google::SetVLOGLevel("OIParser", debugLevel);
google::SetVLOGLevel("OIUtils", debugLevel);
google::SetVLOGLevel("PaddingHunter", debugLevel);
google::SetVLOGLevel("Serialize", debugLevel);
google::SetVLOGLevel("SymbolService", debugLevel);
google::SetVLOGLevel("TimeUtils", debugLevel);
google::SetVLOGLevel("TrapInfo", debugLevel);
google::SetVLOGLevel("TreeBuilder", debugLevel);
// Upstream glog defines `GLOG_INFO` as 0 https://fburl.com/ydjajhz0,
// but internally it's defined as 1 https://fburl.com/code/9fwams75
gflags::SetCommandLineOption("minloglevel", "0");
break;
case 'l':
oidConfig.enableJitLogging = true;
break;
case 'k':
oidConfig.customCodeFile = optarg;
if (!fs::exists(oidConfig.customCodeFile)) {
LOG(ERROR) << "Non existent generated code file: "
<< oidConfig.customCodeFile;
usage();
return ExitStatus::FileNotFoundError;
}
if (oidConfig.customCodeFile == "/tmp/tmp_oid_output_2.cpp") {
LOG(ERROR) << "Cannot use generatedCodePath:"
<< oidConfig.customCodeFile;
return ExitStatus::UsageError;
}
break;
case 'e':
oidConfig.compAndExit = true;
break;
case 'j':
oidConfig.generateJitDebug = true;
break;
case 'c':
oidConfig.configFile = std::string(optarg);
if (!fs::exists(oidConfig.configFile)) {
LOG(ERROR) << "Non existent config file: " << oidConfig.configFile;
usage();
return ExitStatus::FileNotFoundError;
}
break;
case 'i':
oidConfig.debugInfoFile = std::string(optarg);
oidConfig.attachToProcess = false;
oidConfig.compAndExit = true;
if (!fs::exists(oidConfig.debugInfoFile)) {
LOG(ERROR) << "Non existent debuginfo file: "
<< oidConfig.debugInfoFile;
usage();
return ExitStatus::FileNotFoundError;
}
break;
case 'o':
oidConfig.cacheBasePath = optarg;
break;
case 'u':
if (strcmp(optarg, "both") == 0) {
oidConfig.cacheRemoteUpload = true;
oidConfig.cacheRemoteDownload = true;
} else if (strcmp(optarg, "upload") == 0) {
oidConfig.cacheRemoteUpload = true;
} else if (strcmp(optarg, "download") == 0) {
oidConfig.cacheRemoteDownload = true;
} else {
LOG(ERROR) << "Invalid download option: " << optarg << " specified!";
usage();
return ExitStatus::UsageError;
}
break;
case 'r':
oidConfig.removeMappings = true;
break;
case 'n':
chaseRawPointers = true;
break;
case 'a':
logAllStructs = true;
break;
case 'z':
packStructs = false;
break;
case 'B':
dumpDataSegment = true;
break;
case 's':
scriptFile = std::string(optarg);
break;
case 'S':
scriptSource = std::string(optarg);
break;
case 't':
oidConfig.timeout_s = atoi(optarg);
break;
case 'w':
oidConfig.genPaddingStats = false;
break;
case 'J':
jsonPath = optarg != nullptr ? optarg : "oid_out.json";
break;
case 'T':
captureThriftIsset = true;
break;
case 'h':
default:
usage();
return ExitStatus::Success;
}
}
if (oidConfig.configFile.empty()) {
oidConfig.configFile = "/usr/local/share/oi/base.oid.toml";
if (!fs::exists(oidConfig.configFile)) {
LOG(ERROR) << "Non existent default config file: "
<< oidConfig.configFile;
usage();
return ExitStatus::FileNotFoundError;
}
LOG(INFO) << "Using default config file " << oidConfig.configFile;
}
if (oidConfig.pid != 0 && !oidConfig.debugInfoFile.empty()) {
LOG(INFO) << "'-p' and '-b' are mutually exclusive";
usage();
return ExitStatus::UsageError;
}
if ((oidConfig.pid == 0 && oidConfig.debugInfoFile.empty()) ||
oidConfig.configFile.empty()) {
usage();
return ExitStatus::UsageError;
}
if (!oidConfig.removeMappings && scriptFile.empty() && scriptSource.empty()) {
LOG(INFO) << "One of '-s', '-r' or '-S' must be specified";
usage();
return ExitStatus::UsageError;
}
OICodeGen::Config codeGenConfig{
.useDataSegment = true,
.chaseRawPointers = chaseRawPointers,
.packStructs = packStructs,
.genPaddingStats = oidConfig.genPaddingStats,
.captureThriftIsset = captureThriftIsset,
};
TreeBuilder::Config tbConfig{
.logAllStructs = logAllStructs,
.chaseRawPointers = chaseRawPointers,
.genPaddingStats = oidConfig.genPaddingStats,
.dumpDataSegment = dumpDataSegment,
.jsonPath = jsonPath,
};
if (!scriptFile.empty()) {
if (!std::filesystem::exists(scriptFile)) {
LOG(ERROR) << "Non-existent script file: " << scriptFile;
return ExitStatus::FileNotFoundError;
}
std::ifstream script(scriptFile);
auto status =
runScript(scriptFile, script, oidConfig, codeGenConfig, tbConfig);
if (status != ExitStatus::Success) {
return status;
}
} else if (!scriptSource.empty()) {
std::istringstream script(scriptSource);
auto status =
runScript(scriptFile, script, oidConfig, codeGenConfig, tbConfig);
if (status != ExitStatus::Success) {
return status;
}
}
if (Metrics::Tracing::isEnabled()) {
LOG(INFO) << "Will write metrics (" << Metrics::Tracing::isEnabled()
<< ") in " << Metrics::Tracing::outputPath();
} else {
LOG(INFO) << "Will not write any metric: " << Metrics::Tracing::isEnabled();
}
return ExitStatus::Success;
}

2984
src/OIDebugger.cpp Normal file

File diff suppressed because it is too large Load Diff

278
src/OIDebugger.h Normal file
View File

@ -0,0 +1,278 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <filesystem>
#include <fstream>
#include "OICache.h"
#include "OICodeGen.h"
#include "OICompiler.h"
#include "OIParser.h"
#include "SymbolService.h"
#include "TrapInfo.h"
#include "TreeBuilder.h"
#include "X86InstDefs.h"
namespace fs = std::filesystem;
class OIDebugger {
OIDebugger(std::string, OICodeGen::Config, TreeBuilder::Config);
public:
OIDebugger(pid_t, std::string, OICodeGen::Config, TreeBuilder::Config);
OIDebugger(fs::path, std::string, OICodeGen::Config, TreeBuilder::Config);
bool segmentInit(void);
bool stopTarget(void);
bool interruptTarget(void);
bool compileCode();
bool processTargetData();
bool executeCode(pid_t);
void setDataSegmentSize(size_t);
void setGenerateJitDebugInfo(bool genJitDebugInfo) {
compilerConfig.generateJitDebugInfo = genJitDebugInfo;
}
void restoreState(void);
bool segConfigExists(void) const {
return segConfig.existingConfig;
};
enum oidMode { OID_MODE_THREAD, OID_MODE_FUNC };
void setMode(oidMode newMode) {
mode = newMode;
};
bool targetAttach(void);
enum processTrapRet { OID_ERR, OID_CONT, OID_DONE };
OIDebugger::processTrapRet processTrap(pid_t, bool = true, bool = true);
bool contTargetThread(bool detach = true) const;
bool isGlobalDataProbeEnabled(void) const;
static uint64_t singlestepInst(pid_t, struct user_regs_struct &);
static bool singleStepFunc(pid_t, uint64_t);
bool parseScript(std::istream &script);
bool patchFunctions();
void stopAll();
bool removeTraps(pid_t);
bool removeTrap(pid_t, const trapInfo &);
void enableDrgn();
bool unmapSegments(bool deleteSegConf = false);
bool isInterrupted(void) const {
return oidShouldExit;
};
void setCacheBasePath(fs::path basePath) {
if (fs::exists(basePath.parent_path()) && !fs::exists(basePath)) {
// Create cachedir if parent directory exists
// TODO if returning false here, throw an error
fs::create_directory(basePath);
}
cache.basePath = std::move(basePath);
}
void setCacheRemoteEnabled(bool upload, bool download) {
cache.enableUpload = upload;
cache.enableDownload = download;
cache.abortOnLoadFail = download && !upload;
}
void setHardDisableDrgn(bool val) {
symbols->setHardDisableDrgn(val);
}
bool uploadCache() {
return std::all_of(
std::begin(pdata), std::end(pdata), [this](const auto &req) {
return std::all_of(
std::begin(req.args), std::end(req.args),
[this, &req](const auto &arg) {
return cache.upload(irequest{req.type, req.func, arg});
});
});
}
bool downloadCache() {
return std::all_of(
std::begin(pdata), std::end(pdata), [this](const auto &req) {
return std::all_of(
std::begin(req.args), std::end(req.args),
[this, &req](const auto &arg) {
return cache.download(irequest{req.type, req.func, arg});
});
});
};
std::pair<RootInfo, TypeHierarchy> getTreeBuilderTyping() {
assert(pdata.numReqs() == 1);
auto [type, th, _] = typeInfos.at(pdata.getReq().getReqForArg());
return {type, th};
};
std::map<std::string, PaddingInfo> getPaddingInfo() {
assert(pdata.numReqs() == 1);
return std::get<2>(typeInfos.at(pdata.getReq().getReqForArg()));
}
void setCustomCodeFile(fs::path newCCT) {
customCodeFile = std::move(newCCT);
}
void setEnableJitLogging(bool enable) {
enableJitLogging = enable;
}
private:
std::string configFilePath;
bool debug = false;
bool enableJitLogging = false;
pid_t traceePid{};
uint64_t objectAddr{};
oidMode mode{OID_MODE_THREAD};
enum class SegType { text, data };
enum class StatusType {
sleep,
traced,
running,
zombie,
dead,
diskSleep,
stopped,
other,
bad
};
static OIDebugger::StatusType getTaskState(pid_t pid);
static std::string taskStateToString(OIDebugger::StatusType);
size_t dataSegSize{1 << 20};
size_t textSegSize{(1 << 22) + (1 << 20)};
std::vector<pid_t> threadList;
ParseData pdata{};
uint64_t replayInstsCurIdx{};
bool oidShouldExit{false};
uint64_t count{};
bool sigIntHandlerActive{false};
const int sizeofInt3 = 1;
const int replayInstSize = 512;
bool trapsRemoved{false};
std::shared_ptr<SymbolService> symbols;
OICache cache{};
/*
* Map address of valid INT3 instruction to metadata for that interrupt.
* It MUST be an ordered map (std::map) to handle overlapping traps.
*/
std::map<uint64_t, std::shared_ptr<trapInfo>> activeTraps;
std::unordered_map<pid_t, std::shared_ptr<trapInfo>> threadTrapState;
std::unordered_map<uintptr_t, uintptr_t> replayInstMap;
std::unordered_map<irequest, std::tuple<RootInfo, TypeHierarchy,
std::map<std::string, PaddingInfo>>>
typeInfos;
template <typename Sys, typename... Args>
std::optional<typename Sys::RetType> remoteSyscall(Args...);
bool setupLogFile(void);
bool cleanupLogFile(void);
using ObjectAddrMap =
std::unordered_map<std::variant<std::shared_ptr<GlobalDesc>,
std::shared_ptr<FuncDesc::TargetObject>>,
uintptr_t>;
ObjectAddrMap remoteObjAddrs{};
bool setupSegment(SegType);
bool unmapSegment(SegType);
bool writeTargetMemory(void *, void *, size_t) const;
bool readTargetMemory(void *, void *, size_t) const;
std::optional<std::pair<OIDebugger::ObjectAddrMap::key_type, uintptr_t>>
locateJitCodeStart(const irequest &,
const OICompiler::RelocResult::SymTable &);
bool writePrologue(const prequest &,
const OICompiler::RelocResult::SymTable &);
bool readInstFromTarget(uintptr_t, uint8_t *, size_t);
void createSegmentConfigFile(void);
void deleteSegmentConfig(bool);
std::optional<std::shared_ptr<trapInfo>> makeTrapInfo(const prequest &,
const trapType,
const uint64_t);
bool functionPatch(const prequest &);
bool canProcessTrapForThread(pid_t) const;
bool replayTrappedInstr(const trapInfo &, pid_t, struct user_regs_struct &,
struct user_fpregs_struct &) const;
bool locateObjectsAddresses(const trapInfo &, struct user_regs_struct &);
processTrapRet processFuncTrap(const trapInfo &, pid_t,
struct user_regs_struct &,
struct user_fpregs_struct &);
processTrapRet processJitCodeRet(const trapInfo &, pid_t);
bool processGlobal(const std::string &);
static void dumpRegs(const char *, pid_t, struct user_regs_struct *);
std::optional<uintptr_t> nextReplayInstrAddr(const trapInfo &);
static int getExtendedWaitEventType(int);
static bool isExtendedWait(int);
void dumpAlltaskStates(void);
std::optional<std::vector<uintptr_t>> findRetLocs(FuncDesc &);
OICompiler::Config compilerConfig{};
OICodeGen::Config generatorConfig{};
TreeBuilder::Config treeBuilderConfig{};
std::optional<std::string> generateCode(const irequest &);
std::fstream segmentConfigFile;
fs::path segConfigFilePath;
fs::path customCodeFile;
struct c {
uintptr_t textSegBase{};
size_t textSegSize{};
uintptr_t constStart{};
uintptr_t jitCodeStart{};
uintptr_t replayInstBase{};
bool existingConfig{false};
uintptr_t dataSegBase{};
size_t dataSegSize{};
uintptr_t cookie{};
int logFile{};
} segConfig{};
/*
* The first 3 words of the data segment contain:
* 1. The OID identifier a.k.a. "magic id", 01DE8 in hex
* 2. A random value (cookie) to make sure that the data
* segment we are reading from was not populated in
* an older run.
* 3. The size of the data segment as written by the JIT-ed
* code.
*/
struct DataHeader {
uintptr_t magicId;
uintptr_t cookie;
uintptr_t size;
/*
* Flexible Array Member are not standard in C++, but this is
* exactly what we need for the `data` field. These pragmas
* disable the pedantic warnings, so the compiler stops yelling at us.
* We want the header to be the size of the fields above. This is
* important for the `decodeTargetData` method, to give the right size
* to `folly::ByteRange range()`.
*/
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
uint8_t data[];
#pragma GCC diagnostic pop
};
bool decodeTargetData(const DataHeader &, std::vector<uint64_t> &) const;
static constexpr size_t prologueLength = 64;
static constexpr size_t constLength = 64;
};

168
src/OIGenerator.cpp Normal file
View File

@ -0,0 +1,168 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "OIGenerator.h"
#include <glog/logging.h>
#include <fstream>
#include <variant>
#include "DrgnUtils.h"
#include "OIUtils.h"
namespace ObjectIntrospection {
std::vector<std::tuple<drgn_qualified_type, std::string>>
OIGenerator::findOilTypesAndNames(drgnplusplus::program& prog) {
std::vector<std::tuple<drgn_qualified_type, std::string>> out;
for (auto& func : drgnplusplus::func_iterator(prog)) {
std::string fqdn;
{
char* fqdnChars;
size_t fqdnLen;
if (drgnplusplus::error err(
drgn_type_fully_qualified_name(func.type, &fqdnChars, &fqdnLen));
err) {
LOG(ERROR) << "error getting drgn type fully qualified name: " << err;
throw err;
}
fqdn = std::string(fqdnChars, fqdnLen);
}
if (!fqdn.starts_with("ObjectIntrospection::getObjectSize<")) {
continue;
}
if (drgn_type_num_parameters(func.type) != 2) {
continue;
}
if (drgn_type_num_template_parameters(func.type) != 1) {
continue;
}
auto templateParameters = drgn_type_template_parameters(func.type);
drgn_type_template_parameter param = templateParameters[0];
drgn_qualified_type paramType;
if (auto err = drgnplusplus::error(
drgn_template_parameter_type(&param, &paramType))) {
LOG(ERROR) << "error getting drgn template parameter type: " << err;
throw err;
}
LOG(INFO) << "found OIL type: " << drgn_type_name(paramType.type);
std::string linkageName;
{
char* linkageNameCstr;
if (auto err = drgnplusplus::error(
drgn_type_linkage_name(func.type, &linkageNameCstr))) {
throw err;
}
linkageName = linkageNameCstr;
}
LOG(INFO) << "found linkage name: " << linkageName;
out.push_back({paramType, linkageName});
}
return out;
}
bool OIGenerator::generateForType(const OICodeGen::Config& generatorConfig,
const OICompiler::Config& compilerConfig,
const drgn_qualified_type& type,
const std::string& linkageName) {
auto codegen = OICodeGen::buildFromConfig(generatorConfig);
if (!codegen) {
LOG(ERROR) << "failed to initialise codegen";
return false;
}
std::string code =
#include "OITraceCode.cpp"
;
codegen->setRootType(type);
codegen->setLinkageName(linkageName);
if (!codegen->generateFunctionsForTypesDrgn(code)) {
LOG(ERROR) << "failed to generate code";
return false;
}
std::string sourcePath = sourceFileDumpPath;
if (sourceFileDumpPath.empty()) {
// This is the path Clang acts as if it has compiled from e.g. for debug
// information. It does not need to exist.
sourcePath = "oil_jit.cpp";
} else {
std::ofstream outputFile(sourcePath);
outputFile << code;
}
OICompiler compiler{{}, compilerConfig};
return compiler.compile(code, sourcePath, outputPath);
}
int OIGenerator::generate(fs::path& primaryObject) {
drgnplusplus::program prog;
{
std::array<const char*, 1> objectPaths = {{primaryObject.c_str()}};
if (auto err = drgnplusplus::error(drgn_program_load_debug_info(
prog.get(), std::data(objectPaths), std::size(objectPaths), false,
false))) {
LOG(ERROR) << "error loading debug info program: " << err;
throw err;
}
}
std::vector<std::tuple<drgn_qualified_type, std::string>> oilTypes =
findOilTypesAndNames(prog);
if (size_t count = oilTypes.size(); count > 1) {
LOG(WARNING) << "oilgen can currently only generate for one type per "
"compilation unit and we found "
<< count;
}
OICodeGen::Config generatorConfig{};
OICompiler::Config compilerConfig{};
if (!OIUtils::processConfigFile(configFilePath, compilerConfig,
generatorConfig)) {
LOG(ERROR) << "failed to process config file";
return -1;
}
generatorConfig.useDataSegment = false;
size_t failures = 0;
for (const auto& [type, linkageName] : oilTypes) {
if (!generateForType(generatorConfig, compilerConfig, type, linkageName)) {
LOG(WARNING) << "failed to generate for symbol `" << linkageName
<< "`. this is non-fatal but the call will not work.";
failures++;
}
}
size_t successes = oilTypes.size() - failures;
LOG(INFO) << "object introspection generation complete. " << successes
<< " successes and " << failures << " failures.";
return (failures > 0) ? -1 : 0;
}
} // namespace ObjectIntrospection

56
src/OIGenerator.h Normal file
View File

@ -0,0 +1,56 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <filesystem>
#include "DrgnUtils.h"
#include "OICodeGen.h"
#include "OICompiler.h"
namespace fs = std::filesystem;
namespace ObjectIntrospection {
class OIGenerator {
public:
int generate(fs::path& primaryObject);
void setOutputPath(fs::path _outputPath) {
outputPath = std::move(_outputPath);
}
void setConfigFilePath(fs::path _configFilePath) {
configFilePath = std::move(_configFilePath);
}
void setSourceFileDumpPath(fs::path _sourceFileDumpPath) {
sourceFileDumpPath = std::move(_sourceFileDumpPath);
}
private:
fs::path outputPath;
fs::path configFilePath;
fs::path sourceFileDumpPath;
std::vector<std::tuple<drgn_qualified_type, std::string>>
findOilTypesAndNames(drgnplusplus::program& prog);
bool generateForType(const OICodeGen::Config& generatorConfig,
const OICompiler::Config& compilerConfig,
const drgn_qualified_type& type,
const std::string& linkageName);
};
} // namespace ObjectIntrospection

51
src/OILexer.h Normal file
View File

@ -0,0 +1,51 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
/* It looks like the yyFlexLexerOnce method is not the correct way of doing
this but it works for now. Fix it in the future. */
#if !defined(yyFlexLexerOnce)
#include <FlexLexer.h>
#endif
/* #pragma once
#include <FlexLexer.h> */
#include "OIParser.tab.hh"
#include "location.hh"
namespace ObjectIntrospection {
class OIScanner : public yyFlexLexer {
public:
OIScanner(std::istream *in) : yyFlexLexer(in){};
virtual ~OIScanner(){};
// get rid of override virtual function warning
using FlexLexer::yylex;
virtual int yylex(OIParser::semantic_type *const lval,
OIParser::location_type *location);
// YY_DECL defined in OILexer.l
// Method body created by flex in OILexer.yy.cc
private:
/* yyval ptr */
OIParser::semantic_type *yylval = nullptr;
};
} // namespace ObjectIntrospection

99
src/OILexer.l Normal file
View File

@ -0,0 +1,99 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
%{
/* C++ string header, for string ops below */
#include <string>
/* Implementation of yyFlexScanner */
#include "OILexer.h"
#undef YY_DECL
#define YY_DECL \
int ObjectIntrospection::OIScanner::yylex(ObjectIntrospection::OIParser::semantic_type * const lval, \
ObjectIntrospection::OIParser::location_type *loc )
/* typedef to make the returns for the tokens shorter */
using token = ObjectIntrospection::OIParser::token;
/* update location on matching */
#define YY_USER_ACTION loc->step(); loc->columns(yyleng);
%}
%option debug
%option nodefault
%option yyclass="ObjectIntrospection::OIScanner"
%option noyywrap
%option c++
%x COMMENT
%%
%{ /** Code executed at the beginning of yylex **/
yylval = lval;
%}
(arg[0-9]|retval|this) {
yylval->emplace<std::list<std::string>>(std::list<std::string>{yytext});
return( token::OI_ARG );
}
/*
* We very much rely on the fact that the probetype rule sits before the
* function matching rule below as they'll both match. In that case lex will
* return the first.
*/
(return|entry|global) {
yylval->emplace<std::string>(yytext);
return( token::OI_PROBETYPE );
}
/* oid uses mangled symbols to specify the function */
[a-zA-Z_0-9.$]+ {
yylval->emplace<std::string>(yytext);
return( token::OI_FUNC );
}
":" {
yylval->emplace<char>(yytext[0]);
return (token::OI_COLON);
}
<*>\n {
// Update line number
loc->lines();
}
, {
yylval->emplace<char>(yytext[0]);
return(token::OI_COMMA);
}
"//"[^\n]* /* skip one-line comments */
"/*" BEGIN(COMMENT); /* skip multi-lines comments */
<COMMENT>[^*\n]* /* skip comment's content */
<COMMENT>"*"+[^*/\n]* /* skip '*' */
<COMMENT>"*"+"/" BEGIN(INITIAL);
[ \t]+ /* skip whitespace */
<<EOF>> {
if (YYSTATE == COMMENT) {
throw ObjectIntrospection::OIParser::syntax_error(
*loc, "unterminated /* comment");
} else {
return( token::OI_EOF );
}
}
%%

63
src/OILibrary.cpp Normal file
View File

@ -0,0 +1,63 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "OILibraryImpl.h"
bool debug = false;
namespace ObjectIntrospection {
bool operator==(const options& lhs, const options& rhs) {
return lhs.configFilePath == rhs.configFilePath &&
lhs.cacheDirPath == rhs.cacheDirPath &&
lhs.debugFilePath == rhs.debugFilePath &&
lhs.debugLevel == rhs.debugLevel &&
lhs.chaseRawPointers == rhs.chaseRawPointers;
}
bool operator!=(const options& lhs, const options& rhs) {
return !(lhs == rhs);
}
OILibrary::OILibrary(void* TemplateFunc, options opt) : opts(opt) {
this->pimpl_ = new OILibraryImpl(this, TemplateFunc);
}
OILibrary::~OILibrary() {
delete pimpl_;
}
int OILibrary::init() {
if (!pimpl_->processConfigFile()) {
return Response::OIL_BAD_CONFIG_FILE;
}
if (!pimpl_->mapSegment()) {
return Response::OIL_SEGMENT_INIT_FAIL;
}
pimpl_->initCompiler();
return pimpl_->compileCode();
}
int OILibrary::getObjectSize(void* ObjectAddr, size_t* size) {
if (fp == nullptr) {
return Response::OIL_UNINITIALISED;
}
*size = (*fp)(ObjectAddr);
return Response::OIL_SUCCESS;
}
} // namespace ObjectIntrospection

298
src/OILibraryImpl.cpp Normal file
View File

@ -0,0 +1,298 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "OILibraryImpl.h"
#include <fcntl.h>
#include <glog/logging.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <boost/format.hpp>
#include <fstream>
#include "OIUtils.h"
extern "C" {
#include <libelf.h>
}
namespace ObjectIntrospection {
/**
* We need a way to identify the BLOB/Object file, but we don't have access
* to the function's name or the introspected type's name without
* initialising drgn.
* Exclusively to OIL, we won't use the type's name, but the templated
* function address. We assume the address of the templated function
* changes at every compilation, so we don't re-use object files that
* are for an older version of the binary.
*/
const std::string function_identifier(uintptr_t functionAddress) {
return (boost::format("%x") % (uint32_t)(functionAddress % UINT32_MAX)).str();
}
OILibraryImpl::OILibraryImpl(OILibrary *self, void *TemplateFunc)
: _self(self), _TemplateFunc(TemplateFunc) {
if (_self->opts.debugLevel != 0) {
google::LogToStderr();
google::SetStderrLogging(0);
google::SetVLOGLevel("*", _self->opts.debugLevel);
// Upstream glog defines `GLOG_INFO` as 0 https://fburl.com/ydjajhz0,
// but internally it's defined as 1 https://fburl.com/code/9fwams75
//
// We don't want to link gflags in OIL, so setting it via the flags rather
// than with gflags::SetCommandLineOption
FLAGS_minloglevel = 0;
}
}
OILibraryImpl::~OILibraryImpl() {
unmapSegment();
}
bool OILibraryImpl::mapSegment() {
void *textSeg =
mmap(NULL, segConfig.textSegSize, PROT_EXEC | PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (textSeg == MAP_FAILED) {
PLOG(ERROR) << "error mapping text segment";
return false;
}
segConfig.textSegBase = textSeg;
return true;
}
bool OILibraryImpl::unmapSegment() {
if (segConfig.textSegBase != nullptr &&
munmap(segConfig.textSegBase, segConfig.textSegSize) != 0) {
PLOG(ERROR) << "error unmapping text segment";
return false;
}
return true;
}
void OILibraryImpl::initCompiler() {
symbols = std::make_shared<SymbolService>(getpid());
_cache.symbols = symbols;
compilerConfig.generateJitDebugInfo = _self->opts.generateJitDebugInfo;
generatorConfig.useDataSegment = false;
generatorConfig.chaseRawPointers = _self->opts.chaseRawPointers;
generatorConfig.packStructs = true;
generatorConfig.genPaddingStats = false;
_cache.basePath = _self->opts.cacheDirPath;
_cache.enableUpload = _self->opts.enableUpload;
_cache.enableDownload = _self->opts.enableDownload;
_cache.abortOnLoadFail = _self->opts.abortOnLoadFail;
}
bool OILibraryImpl::processConfigFile() {
return OIUtils::processConfigFile(_self->opts.configFilePath, compilerConfig,
generatorConfig);
}
template <class T, class F>
class Cleanup {
T resource;
F cleanupFunc;
public:
Cleanup(T _resource, F _cleanupFunc)
: resource{_resource}, cleanupFunc{_cleanupFunc} {};
~Cleanup() {
cleanupFunc(resource);
}
};
void close_file(std::FILE *fp) {
std::fclose(fp);
}
static inline void logElfError(const char *message) {
const char *elf_error_message = elf_errmsg(0);
if (elf_error_message)
LOG(ERROR) << message << ": " << elf_error_message;
else
LOG(ERROR) << message;
}
int OILibraryImpl::compileCode() {
OICompiler compiler{symbols, compilerConfig};
int objectMemfd = memfd_create("oil_object_code", 0);
if (!objectMemfd) {
PLOG(ERROR) << "failed to create memfd for object code";
return Response::OIL_COMPILATION_FAILURE;
}
using unique_file_t = std::unique_ptr<std::FILE, decltype(&close_file)>;
unique_file_t objectStream(fdopen(objectMemfd, "w+"), &close_file);
if (!objectStream) {
PLOG(ERROR) << "failed to convert memfd to stream";
// This only needs to be cleaned up in the error case, as the fclose
// on the unique_file_t will clean up the underlying fd if it was
// created successfully.
close(objectMemfd);
return Response::OIL_COMPILATION_FAILURE;
}
auto objectPath =
fs::path((boost::format("/dev/fd/%1%") % objectMemfd).str());
if (_self->opts.forceJIT) {
struct drgn_program *prog = symbols->getDrgnProgram();
if (!prog) {
return Response::OIL_COMPILATION_FAILURE;
}
struct drgn_symbol *sym;
if (auto err = drgn_program_find_symbol_by_address(
prog, (uintptr_t)_TemplateFunc, &sym)) {
LOG(ERROR) << "Error when finding symbol by address " << err->code << " "
<< err->message;
drgn_error_destroy(err);
return Response::OIL_COMPILATION_FAILURE;
}
const char *name = drgn_symbol_name(sym);
drgn_symbol_destroy(sym);
auto rootType =
OICodeGen::getRootType(*symbols.get(), irequest{"entry", name, "arg0"});
if (!rootType.has_value()) {
LOG(ERROR) << "Failed to get type of probe argument";
return Response::OIL_COMPILATION_FAILURE;
}
std::string code =
#include "OITraceCode.cpp"
;
auto codegen = OICodeGen::buildFromConfig(generatorConfig);
if (!codegen) {
return OIL_COMPILATION_FAILURE;
}
codegen->setRootType(rootType->type);
if (!codegen->generate(code)) {
return Response::OIL_COMPILATION_FAILURE;
}
std::string sourcePath = _self->opts.sourceFileDumpPath;
if (_self->opts.sourceFileDumpPath.empty()) {
// This is the path Clang acts as if it has compiled from e.g. for debug
// information. It does not need to exist.
sourcePath = "oil_jit.cpp";
} else {
std::ofstream outputFile(sourcePath);
outputFile << code;
}
if (!compiler.compile(code, sourcePath, objectPath)) {
return Response::OIL_COMPILATION_FAILURE;
}
} else {
auto executable =
open(fs::read_symlink("/proc/self/exe").c_str(), O_RDONLY);
if (executable == -1) {
PLOG(ERROR) << "Failed to open executable file";
return Response::OIL_COMPILATION_FAILURE;
}
auto __executable_cleanup = Cleanup(executable, close);
elf_version(EV_CURRENT);
auto elf = elf_begin(executable, ELF_C_READ, NULL);
auto __elf_cleanup = Cleanup(elf, elf_end);
GElf_Ehdr ehdr;
if (!gelf_getehdr(elf, &ehdr)) {
logElfError("Failed to get ELF object file header");
return Response::OIL_COMPILATION_FAILURE;
}
size_t string_table_index;
if (elf_getshdrstrndx(elf, &string_table_index) != 0) {
logElfError("Failed to get index of the section header string table");
return Response::OIL_COMPILATION_FAILURE;
}
Elf_Scn *section = NULL;
bool done = false;
const auto identifier = function_identifier((uintptr_t)_TemplateFunc);
const auto section_name = OI_SECTION_PREFIX.data() + identifier;
while ((section = elf_nextscn(elf, section))) {
GElf_Shdr section_header;
GElf_Shdr *header = gelf_getshdr(section, &section_header);
if (!header)
continue;
const char *name = elf_strptr(elf, string_table_index, header->sh_name);
if (name && strcmp(name, section_name.c_str()) == 0) {
Elf_Data *section_data;
if (!(section_data = elf_getdata(section, NULL))) {
LOG(ERROR) << "Failed to get data from section '" << name
<< "': " << elf_errmsg(0);
return Response::OIL_COMPILATION_FAILURE;
}
if (section_data->d_size == 0) {
LOG(ERROR) << "Section '" << name << "' is empty";
return Response::OIL_COMPILATION_FAILURE;
}
if (fwrite(section_data->d_buf, 1, section_data->d_size,
objectStream.get()) < section_data->d_size) {
PLOG(ERROR)
<< "Failed to write object file contents to temporary file";
return Response::OIL_COMPILATION_FAILURE;
}
done = true;
break;
}
}
if (!done) {
LOG(ERROR) << "Did not find section '" << section_name
<< "' in the executable";
return Response::OIL_COMPILATION_FAILURE;
}
fflush(objectStream.get());
}
auto relocRes = compiler.applyRelocs(
reinterpret_cast<uint64_t>(segConfig.textSegBase), {objectPath}, {});
if (!relocRes.has_value()) {
return Response::OIL_RELOCATION_FAILURE;
}
const auto &[_, segments, jitSymbols] = relocRes.value();
// Locate the probe's entry point
_self->fp = nullptr;
for (const auto &[symName, symAddr] : jitSymbols) {
if (symName.starts_with("_Z7getSize")) {
_self->fp = (size_t(*)(void *))symAddr;
break;
}
}
if (!_self->fp)
return Response::OIL_RELOCATION_FAILURE;
// Copy relocated segments in their final destination
for (const auto &[BaseAddr, RelocAddr, Size] : segments)
memcpy((void *)RelocAddr, (void *)BaseAddr, Size);
return Response::OIL_SUCCESS;
}
} // namespace ObjectIntrospection

56
src/OILibraryImpl.h Normal file
View File

@ -0,0 +1,56 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "OICache.h"
#include "OICodeGen.h"
#include "OICompiler.h"
#include "ObjectIntrospection.h"
#include "SymbolService.h"
namespace ObjectIntrospection {
const std::string function_identifier(uintptr_t);
class OILibraryImpl {
public:
OILibraryImpl(OILibrary *, void *);
~OILibraryImpl();
bool mapSegment();
bool unmapSegment();
void initCompiler();
int compileCode();
bool processConfigFile();
void enableLayoutAnalysis();
private:
class OILibrary *_self;
void *_TemplateFunc;
OICompiler::Config compilerConfig{};
OICodeGen::Config generatorConfig{};
std::shared_ptr<SymbolService> symbols{};
OICache _cache{};
struct c {
void *textSegBase = nullptr;
size_t textSegSize = 1u << 22;
} segConfig;
};
} // namespace ObjectIntrospection

114
src/OIOpts.h Normal file
View File

@ -0,0 +1,114 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <array>
#include <cstring>
#include <iomanip>
#include <ostream>
extern "C" {
#include <getopt.h>
}
struct OIOpt {
char shortName;
const char *longName;
int has_arg;
const char *argName;
const char *usage;
};
template <size_t N>
class OIOpts {
public:
template <typename... Opts>
constexpr explicit OIOpts(Opts &&...options)
: _opts{std::forward<decltype(options)>(options)...} {
// Create the short opts string
size_t shortOptIndex = 0;
for (const auto &opt : _opts) {
_shortOpts[shortOptIndex++] = opt.shortName;
for (int i = 0; i < opt.has_arg; ++i)
_shortOpts[shortOptIndex++] = ':';
}
// Pad the remaining with NULL bytes
while (shortOptIndex < _shortOpts.size())
_shortOpts[shortOptIndex++] = '\0';
// Create the array of long opts
for (size_t i = 0; i < _opts.size(); ++i) {
const auto &opt = _opts[i];
_longOpts[i] = {opt.longName, opt.has_arg, nullptr, opt.shortName};
}
// Add empty record to mark the end of long opts
_longOpts[_opts.size()] = {nullptr, no_argument, nullptr, '\0'};
}
constexpr const char *shortOpts() const {
return _shortOpts.data();
}
constexpr const struct option *longOpts() const {
return _longOpts.data();
}
template <size_t M>
friend std::ostream &operator<<(std::ostream &os, const OIOpts<M> &opts);
private:
std::array<OIOpt, N> _opts;
std::array<char, 3 * N + 1> _shortOpts{};
std::array<struct option, N + 1> _longOpts{};
};
template <size_t M>
std::ostream &operator<<(std::ostream &os, const OIOpts<M> &opts) {
int maxLongName = 0;
for (const auto &opt : opts._opts) {
size_t longNameWidth = strlen(opt.longName);
if (opt.argName)
longNameWidth += 1 + strlen(opt.argName);
maxLongName = std::max(maxLongName, (int)longNameWidth);
}
for (const auto &opt : opts._opts) {
auto fullName = std::string(opt.longName);
if (opt.argName) {
fullName += ' ';
fullName += opt.argName;
}
os << " -" << opt.shortName << ",--";
os << std::setw(maxLongName) << std::left;
os << fullName << " ";
std::string_view usage = opt.usage;
std::string_view::size_type old_pos = 0, new_pos = 0;
while ((new_pos = usage.find('\n', old_pos)) != std::string::npos) {
os << usage.substr(old_pos, new_pos - old_pos + 1);
os << std::setw(maxLongName + 9) << ' ';
old_pos = new_pos + 1;
}
os << usage.substr(old_pos) << '\n';
}
return os;
}
template <typename... Opts>
OIOpts(Opts... opts) -> OIOpts<sizeof...(opts)>;

120
src/OIParser.h Normal file
View File

@ -0,0 +1,120 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cassert>
#include <list>
#include <memory>
#include <string>
#include <vector>
struct irequest {
irequest(std::string t, std::string f, std::string a) noexcept
: type(std::move(t)), func(std::move(f)), arg(std::move(a)) {
}
const std::string type{}, func{}, arg{};
[[nodiscard]] bool isReturnRetVal() const noexcept {
return type == "return" && arg == "retval";
}
[[nodiscard]] const std::string toString() const {
return type + ":" + func + ":" + arg;
}
};
namespace std {
template <>
struct hash<irequest> {
std::size_t operator()(const irequest &req) const noexcept {
auto h = hash<std::string>();
return h(req.type) ^ h(req.func) ^ h(req.arg);
}
};
template <>
struct equal_to<irequest> {
bool operator()(const irequest &lhs, const irequest &rhs) const noexcept {
return lhs.type == rhs.type && lhs.func == rhs.func && lhs.arg == rhs.arg;
}
};
} // namespace std
struct prequest {
prequest(std::string t, std::string f, std::vector<std::string> as) noexcept
: type(std::move(t)), func(std::move(f)), args(std::move(as)) {
}
const std::string type{}, func{};
const std::vector<std::string> args{};
[[nodiscard]] irequest getReqForArg(size_t idx = 0) const {
if (type == "global")
return {type, func, ""};
assert(idx < args.size());
return {type, func, args[idx]};
}
};
class ParseData {
private:
using RequestVector = std::vector<prequest>;
RequestVector reqs{};
public:
void addReq(std::string type, std::string func, std::list<std::string> args) {
// Convert the args std::list into a more efficient std::vector
reqs.emplace_back(std::move(type), std::move(func),
std::vector(std::make_move_iterator(args.begin()),
std::make_move_iterator(args.end())));
}
size_t numReqs() const noexcept {
return reqs.size();
}
[[nodiscard]] const prequest &getReq(size_t idx = 0) const noexcept {
assert(idx < reqs.size());
return reqs[idx];
}
/* Delegate iterator to the RequestVector */
using iterator = RequestVector::iterator;
using const_iterator = RequestVector::const_iterator;
iterator begin() noexcept {
return reqs.begin();
}
const_iterator begin() const noexcept {
return reqs.begin();
}
const_iterator cbegin() const noexcept {
return reqs.begin();
}
iterator end() noexcept {
return reqs.end();
}
const_iterator end() const noexcept {
return reqs.end();
}
const_iterator cend() const noexcept {
return reqs.end();
}
};

106
src/OIParser.yy Normal file
View File

@ -0,0 +1,106 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This is the bison grammar responsible for generating the ObjectIntrospection::OIParser class.
* This class gives us a number of things worth calling out here if only to
* remind me later :-):
*
* - A variant interface that replaces the C union interface for the
* parsers semantic values. Enabled by setting 'api.value.type variant' below.
*/
%skeleton "lalr1.cc"
%defines
%define api.namespace {ObjectIntrospection}
%define api.parser.class {OIParser}
%define parse.trace
%define parse.error verbose
%define parse.lac full
%code requires{
#include <list>
namespace ObjectIntrospection {
class OIScanner;
}
class ParseData;
}
/*
* ObjectIntrospection::OI_Parser constructor parameters. The scanner object is produced
* by flex and is derived from the yyFlexLexer class. The parser calls
* its yylex() implementation to generate input tokens. The ParseData
* object is spopulated by the lexer/parser introspection specifications
* specified in the input file.
*/
%parse-param { OIScanner &scanner }
%parse-param { ParseData &pdata }
%code{
#include <iostream>
#include <cstdlib>
#include <fstream>
#include <string>
#include <glog/logging.h>
#include "OIParser.h"
#include "OILexer.h"
#undef yylex
#define yylex scanner.yylex
}
%define api.value.type variant
%define parse.assert
%locations
%token <char>OI_COLON
%token <std::string>OI_PROBETYPE
%token <std::string>OI_FUNC
%token <std::list<std::string>>OI_ARG
%token <char>OI_COMMA
%token OI_EOF 0
%type <std::list<std::string>> oi_args
%%
script: oi_blocks OI_EOF
oi_blocks: oi_block | oi_blocks oi_block
oi_args: OI_ARG OI_COMMA oi_args
{
$$ = std::move($3);
$$.push_front(std::move($1.front()));
}
| OI_ARG;
oi_block: OI_PROBETYPE OI_COLON OI_FUNC OI_COLON oi_args
{
pdata.addReq(std::move($1), std::move($3), std::move($5));
}
| OI_PROBETYPE OI_COLON OI_FUNC
{
pdata.addReq(std::move($1), std::move($3), {});
}
;
%%
void
ObjectIntrospection::OIParser::error(const location_type &l, const std::string &err_message)
{
LOG(ERROR) << "OI Parse Error: " << err_message << " at " << l;
}

157
src/OITraceCode.cpp Normal file
View File

@ -0,0 +1,157 @@
R"(
#define NDEBUG 1
// Required for compatibility with new glibc headers
#define __malloc__(x, y) __malloc__
#if !__has_builtin(__builtin_free)
#define __builtin_free(x) free(x)
#endif
#pragma clang diagnostic ignored "-Wunknown-attributes"
// clang-format off
// The header xmmintrin.h must come first. Otherwise it results in errors
// jemalloc during JIT compilation
#include <xmmintrin.h>
#include <cstdint>
#include <utility>
#include <unistd.h>
// clang-format on
#define C10_USING_CUSTOM_GENERATED_MACROS
// These globals are set by oid, see end of OIDebugger::compileCode()
extern uintptr_t dataBase;
extern size_t dataSize;
extern uintptr_t cookieValue;
extern int logFile;
constexpr int oidMagicId = 0x01DE8;
#include <array>
namespace {
class {
private:
// 1 MiB of pointers
std::array<uintptr_t, (1 << 20) / sizeof(uintptr_t)> data;
// twang_mix64 hash function, taken from Folly where it is used
// as the default hash function for 64-bit integers
constexpr static uint64_t twang_mix64(uint64_t key) noexcept {
key = (~key) + (key << 21); // key *= (1 << 21) - 1; key -= 1;
key = key ^ (key >> 24);
key = key + (key << 3) + (key << 8); // key *= 1 + (1 << 3) + (1 << 8)
key = key ^ (key >> 14);
key = key + (key << 2) + (key << 4); // key *= 1 + (1 << 2) + (1 << 4)
key = key ^ (key >> 28);
key = key + (key << 31); // key *= 1 + (1 << 31)
return key;
}
public:
void initialize() noexcept { data.fill(0); }
// Adds the pointer to the set.
// Returns `true` if the value was newly added,
// or `false` if the value was already present.
bool add(uintptr_t pointer) noexcept {
__builtin_assume(pointer > 0);
uint64_t index = twang_mix64(pointer) % data.size();
while (true) {
uintptr_t entry = data[index];
if (entry == 0) {
data[index] = pointer;
return true;
}
if (entry == pointer) {
return false;
}
index = (index + 1) % data.size();
}
}
} static pointers;
void __jlogptr(uintptr_t ptr) {
static constexpr char hexdigits[] = "0123456789abcdef";
static constexpr size_t ptrlen = 2 * sizeof(ptr);
static char hexstr[ptrlen + 1] = {};
size_t i = ptrlen;
while (i--) {
hexstr[i] = hexdigits[ptr & 0xf];
ptr = ptr >> 4;
}
hexstr[ptrlen] = '\n';
write(logFile, hexstr, sizeof(hexstr));
}
} // namespace
// Unforunately, this is a hack for AdFilterData.
class PredictorInterface;
class PredictionCompositionNode;
constexpr size_t kGEMMLOWPCacheLineSize = 64;
template <typename T>
struct AllocAligned {
// Allocate a T aligned at an `align` byte address
template <typename... Args>
static T* alloc(Args&&... args) {
void* p = nullptr;
#if defined(__ANDROID__)
p = memalign(kGEMMLOWPCacheLineSize, sizeof(T));
#elif defined(_MSC_VER)
p = _aligned_malloc(sizeof(T), kGEMMLOWPCacheLineSize);
#else
posix_memalign((void**)&p, kGEMMLOWPCacheLineSize, sizeof(T));
#endif
if (p) {
return new (p) T(std::forward<Args>(args)...);
}
return nullptr;
}
// Free a T previously allocated via AllocAligned<T>::alloc()
static void release(T* p) {
if (p) {
p->~T();
#if defined(_MSC_VER)
_aligned_free((void*)p);
#else
free((void*)p);
#endif
}
}
};
// Deleter object for unique_ptr for an aligned object
template <typename T>
struct AlignedDeleter {
void operator()(T* p) const { AllocAligned<T>::release(p); }
};
// alignas(0) is ignored according to docs so can be default
template <unsigned int N, unsigned int align = 0>
struct alignas(align) DummySizedOperator {
char c[N];
};
// The empty class specialization is, unfortunately, necessary. When this operator
// is passed as a template parameter to something like unordered_map, even though
// an empty class and a class with a single character have size one, there is some
// empty class optimization that changes the static size of the container if an
// empty class is passed.
// DummySizedOperator<0,0> also collapses to this
template <>
struct DummySizedOperator<0> {
};
)"

157
src/OIUtils.cpp Normal file
View File

@ -0,0 +1,157 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <glog/logging.h>
#include <toml++/toml.h>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/property_tree/ini_parser.hpp>
#include <boost/property_tree/ptree.hpp>
#include "OICodeGen.h"
#include "OICompiler.h"
namespace OIUtils {
using namespace std::literals;
bool processConfigFileToml(const std::string& configFilePath,
OICompiler::Config& compilerConfig,
OICodeGen::Config& generatorConfig) {
toml::table config;
try {
config = toml::parse_file(configFilePath);
} catch (const toml::parse_error& ex) {
LOG(ERROR) << "processConfigFileToml: " << configFilePath << " : "
<< ex.description();
return false;
}
if (toml::table* types = config["types"].as_table()) {
if (toml::array* arr = (*types)["containers"].as_array()) {
arr->for_each([&](auto&& el) {
if constexpr (toml::is_string<decltype(el)>) {
generatorConfig.containerConfigPaths.emplace(std::string(el));
}
});
}
}
if (toml::table* headers = config["headers"].as_table()) {
if (toml::array* arr = (*headers)["user_paths"].as_array()) {
arr->for_each([&](auto&& el) {
if constexpr (toml::is_string<decltype(el)>) {
compilerConfig.userHeaderPaths.emplace_back(el);
}
});
}
if (toml::array* arr = (*headers)["system_paths"].as_array()) {
arr->for_each([&](auto&& el) {
if constexpr (toml::is_string<decltype(el)>) {
compilerConfig.sysHeaderPaths.emplace_back(el);
}
});
}
}
if (toml::table* codegen = config["codegen"].as_table()) {
if (toml::array* arr = (*codegen)["default_headers"].as_array()) {
arr->for_each([&](auto&& el) {
if constexpr (toml::is_string<decltype(el)>) {
generatorConfig.defaultHeaders.emplace(el);
}
});
}
if (toml::array* arr = (*codegen)["default_namespaces"].as_array()) {
arr->for_each([&](auto&& el) {
if constexpr (toml::is_string<decltype(el)>) {
generatorConfig.defaultNamespaces.emplace(el);
}
});
}
if (toml::array* arr = (*codegen)["ignore"].as_array()) {
for (auto&& el : *arr) {
if (toml::table* ignore = el.as_table()) {
auto* type = (*ignore)["type"].as_string();
if (!type) {
LOG(ERROR) << "Config entry 'ignore' must specify a type";
return false;
}
auto* members = (*ignore)["members"].as_array();
if (!members) {
generatorConfig.membersToStub.emplace_back(type->value_or(""sv),
"*"sv);
} else {
for (auto&& member : *members) {
generatorConfig.membersToStub.emplace_back(type->value_or(""sv),
member.value_or(""sv));
}
}
}
}
}
}
return true;
}
bool processConfigFileIni(const std::string& configFilePath,
OICompiler::Config& compilerConfig,
OICodeGen::Config& generatorConfig) {
boost::property_tree::ptree pt;
try {
boost::property_tree::ini_parser::read_ini(configFilePath, pt);
} catch (const boost::property_tree::ini_parser_error& ex) {
LOG(ERROR) << "processConfigFileIni: " << configFilePath << " : "
<< ex.message();
return false;
}
// XXX Obviously we don't require the fields so handle non-existent entries
auto userHeaderPaths = pt.get<std::string>("headers.userPath", "");
boost::split(compilerConfig.userHeaderPaths, userHeaderPaths,
boost::is_any_of(":"));
auto systemHeaderPaths = pt.get<std::string>("headers.systemPath", "");
boost::split(compilerConfig.sysHeaderPaths, systemHeaderPaths,
boost::is_any_of(":"));
std::string configHeaders = pt.get<std::string>("codegen.defaultHeaders", "");
boost::algorithm::split(generatorConfig.defaultHeaders, configHeaders,
boost::algorithm::is_any_of(":"));
generatorConfig.defaultHeaders.erase("");
std::string configNamespaces =
pt.get<std::string>("codegen.defaultNamespaces", "");
boost::algorithm::split(generatorConfig.defaultNamespaces, configNamespaces,
boost::algorithm::is_any_of("+"));
generatorConfig.defaultNamespaces.erase("");
return true;
}
bool processConfigFile(const std::string& configFilePath,
OICompiler::Config& compilerConfig,
OICodeGen::Config& generatorConfig) {
// TODO: remove the option to parse as INI entirely
return processConfigFileToml(configFilePath, compilerConfig,
generatorConfig) ||
processConfigFileIni(configFilePath, compilerConfig, generatorConfig);
}
} // namespace OIUtils

25
src/OIUtils.h Normal file
View File

@ -0,0 +1,25 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "OICodeGen.h"
#include "OICompiler.h"
namespace OIUtils {
bool processConfigFile(const std::string& configFilePath,
OICompiler::Config& compilerConfig,
OICodeGen::Config& generatorConfig);
}

72
src/PaddingHunter.cpp Normal file
View File

@ -0,0 +1,72 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "PaddingHunter.h"
#include <algorithm>
#include <fstream>
void PaddingHunter::processLocalPaddingInfo() {
for (auto &lPS : localPaddedStructs) {
if (paddedStructs.find(lPS.first) != paddedStructs.end()) {
if (localPaddedStructs[lPS.first].instancesCnt >
paddedStructs[lPS.first].instancesCnt) {
paddedStructs[lPS.first].instancesCnt =
localPaddedStructs[lPS.first].instancesCnt;
}
} else {
paddedStructs[lPS.first] = lPS.second;
}
}
}
void PaddingHunter::outputPaddingInfo() {
std::ofstream paddingStatsFile;
paddingStatsFile.open(paddingStatsFileName);
uint64_t sum = 0;
std::vector<std::pair<std::string, PaddingInfo>> paddedStructsVec;
for (auto &paddedStruct : paddedStructs) {
paddedStructsVec.push_back({paddedStruct.first, paddedStruct.second});
}
for (auto &paddedStruct : paddedStructsVec) {
sum += paddedStruct.second.paddingSize * paddedStruct.second.instancesCnt;
}
paddingStatsFile << "Total Saving Opportunity: " << sum << "\n\n\n";
std::sort(paddedStructsVec.begin(), paddedStructsVec.end(),
[](const std::pair<std::string, PaddingInfo> &left,
const std::pair<std::string, PaddingInfo> &right) {
return left.second.instancesCnt * left.second.savingSize >
right.second.instancesCnt * right.second.savingSize;
});
for (auto &paddedStruct : paddedStructsVec) {
paddingStatsFile << "Name: " << paddedStruct.first
<< ", object size: " << paddedStruct.second.structSize
<< ", saving size: " << paddedStruct.second.savingSize
<< ", padding size: " << paddedStruct.second.paddingSize
<< ", isSet size: " << paddedStruct.second.isSetSize
<< ", instance_cnt: " << paddedStruct.second.instancesCnt
<< "\nSaving opportunity: "
<< paddedStruct.second.savingSize *
paddedStruct.second.instancesCnt
<< " bytes\n\n"
<< paddedStruct.second.definition << "\n\n\n";
}
paddingStatsFile.close();
}

88
src/PaddingHunter.h Normal file
View File

@ -0,0 +1,88 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <map>
#include <string>
#include <vector>
struct PaddingInfo {
public:
PaddingInfo() = default;
PaddingInfo(size_t strSize, int saveSz, size_t paddSz, size_t issetSz,
std::string def, size_t instCnt)
: structSize{strSize},
alignmentRequirement{8},
savingSize{static_cast<size_t>(saveSz)},
paddingSize{paddSz},
isSetSize{issetSz},
isSetOffset{0},
definition{def},
instancesCnt{instCnt},
isThriftStruct{false} {};
size_t structSize;
size_t alignmentRequirement;
size_t savingSize;
size_t paddingSize;
size_t isSetSize;
size_t isSetOffset;
std::string definition;
size_t instancesCnt;
bool isThriftStruct;
std::vector<size_t> paddings;
size_t savingFromPacking() const {
size_t unpackedSize = isSetSize;
size_t packedSize = (unpackedSize + 8 - 1) / 8;
return unpackedSize - packedSize;
}
void computeSaving() {
/* Sum of members whose size is not multiple of alignment */
size_t oddSum = 0;
savingSize = 0;
for (size_t padding : paddings) {
oddSum += (alignmentRequirement - padding / 8);
}
if (isThriftStruct) {
if (isSetSize) {
savingSize = savingFromPacking();
oddSum += isSetOffset - savingFromPacking();
}
savingSize +=
paddingSize - (alignmentRequirement - oddSum % alignmentRequirement) %
alignmentRequirement;
} else {
savingSize = paddingSize;
}
}
};
class PaddingHunter {
public:
std::map<std::string, PaddingInfo> paddedStructs;
std::map<std::string, PaddingInfo> localPaddedStructs;
std::string paddingStatsFileName = "PADDING";
// we do a max reduction on instance count across the probe points
void processLocalPaddingInfo();
void outputPaddingInfo();
};

386
src/Serialize.cpp Normal file
View File

@ -0,0 +1,386 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Serialize.h"
#include <boost/format.hpp>
#include <boost/serialization/serialization.hpp>
#include <boost/serialization/version.hpp>
#include <stdexcept>
#include "OICodeGen.h"
namespace boost::serialization {
template <typename T>
void verify_version(const unsigned int version) {
const auto expected_version = boost::serialization::version<T>::value;
if (expected_version != version) {
auto error = (boost::format("Failed to serialize type `%1%`, as the class "
"version did not match "
"(cache had version %2%"
", but OID expected version %3%)") %
typeid(T).name() % version % expected_version)
.str();
throw std::runtime_error(error);
}
}
using iarchive = boost::archive::text_iarchive;
using oarchive = boost::archive::text_oarchive;
// The default value for `boost::serialization::version` for a class is 0
// if it is not specified via `BOOST_CLASS_VERSION`. Therefore the
// `static_assert` in the below macro prevents us from accidentally
// serializing a new class without explicitly setting a version for it.
#define INSTANCIATE_SERIALIZE(Type) \
static_assert( \
boost::serialization::version<Type>::value > 0, \
"No class version was defined for type `" #Type \
"`, please add an invocation of `DEFINE_TYPE_VERSION` for this " \
"type."); \
template void serialize(iarchive &, Type &, const unsigned int); \
template void serialize(oarchive &, Type &, const unsigned int);
template <class Archive>
void serialize(Archive &ar, PaddingInfo &p, const unsigned int version) {
verify_version<PaddingInfo>(version);
ar &p.structSize;
ar &p.paddingSize;
ar &p.definition;
ar &p.instancesCnt;
ar &p.savingSize;
}
INSTANCIATE_SERIALIZE(PaddingInfo)
template <class Archive>
void serialize(Archive &ar, ContainerInfo &info, const unsigned int version) {
verify_version<ContainerInfo>(version);
ar &info.typeName;
// Unfortunately boost serialization doesn't support `std::optional`,
// so we have to do this ourselves
size_t numTemplateParams = 0;
if (Archive::is_saving::value) {
numTemplateParams =
info.numTemplateParams.value_or(std::numeric_limits<size_t>::max());
}
ar &numTemplateParams;
if (Archive::is_loading::value) {
if (numTemplateParams == std::numeric_limits<size_t>::max()) {
info.numTemplateParams = std::nullopt;
} else {
info.numTemplateParams = numTemplateParams;
}
}
ar &info.ctype;
ar &info.header;
ar &info.ns;
}
INSTANCIATE_SERIALIZE(ContainerInfo)
template <class Archive>
void serialize(Archive &ar, struct drgn_location_description &location,
const unsigned int version) {
verify_version<struct drgn_location_description>(version);
ar &location.start;
ar &location.end;
ar &location.expr_size;
if (Archive::is_loading::value) {
// It is important to call `malloc` here instead of allocating with `new`
// since these structs are usually allocated and deallocated directly by
// `drgn`, which is written in C.
location.expr =
(const char *)malloc(sizeof(*location.expr) * location.expr_size);
}
ar &make_array<char>(const_cast<char *>(location.expr), location.expr_size);
}
INSTANCIATE_SERIALIZE(struct drgn_location_description)
template <class Archive>
void serialize(Archive &ar, struct drgn_object_locator &locator,
const unsigned int version) {
verify_version<struct drgn_object_locator>(version);
ar &locator.module_start;
ar &locator.module_end;
ar &locator.module_bias;
ar &locator.locations_size;
ar &locator.frame_base_locations_size;
if (Archive::is_loading::value) {
// It is important to call `malloc` here instead of allocating with `new`
// since these structs are usually allocated and deallocated directly by
// `drgn`, which is written in C.
locator.locations = (struct drgn_location_description *)malloc(
sizeof(*locator.locations) * locator.locations_size);
locator.frame_base_locations = (struct drgn_location_description *)malloc(
sizeof(*locator.frame_base_locations) *
locator.frame_base_locations_size);
}
ar &make_array<struct drgn_location_description>(locator.locations,
locator.locations_size);
ar &make_array<struct drgn_location_description>(
locator.frame_base_locations, locator.frame_base_locations_size);
ar &locator.qualified_type;
}
INSTANCIATE_SERIALIZE(struct drgn_object_locator)
template <class Archive>
void serialize(Archive &ar, FuncDesc::Arg &arg, const unsigned int version) {
verify_version<FuncDesc::Arg>(version);
ar &arg.typeName;
ar &arg.valid;
ar &arg.locator;
}
INSTANCIATE_SERIALIZE(FuncDesc::Arg)
template <class Archive>
void serialize(Archive &ar, FuncDesc::Retval &retval,
const unsigned int version) {
verify_version<FuncDesc::Retval>(version);
ar &retval.typeName;
ar &retval.valid;
}
INSTANCIATE_SERIALIZE(FuncDesc::Retval)
template <class Archive>
void serialize(Archive &ar, FuncDesc::Range &range,
const unsigned int version) {
verify_version<FuncDesc::Range>(version);
ar &range.start;
ar &range.end;
}
INSTANCIATE_SERIALIZE(FuncDesc::Range)
template <class Archive>
void serialize(Archive &ar, FuncDesc &fd, const unsigned int version) {
verify_version<FuncDesc>(version);
ar &fd.symName;
ar &fd.ranges;
ar &fd.isMethod;
ar &fd.arguments;
ar &fd.retval;
}
INSTANCIATE_SERIALIZE(FuncDesc)
template <class Archive>
void serialize(Archive &ar, GlobalDesc &gd, const unsigned int version) {
verify_version<GlobalDesc>(version);
ar &gd.symName;
ar &gd.typeName;
ar &gd.baseAddr;
}
INSTANCIATE_SERIALIZE(GlobalDesc)
template <class Archive>
static void serialize_c_string(Archive &ar, char **string) {
size_t length;
if (Archive::is_saving::value) {
length = *string ? strlen(*string) : 0;
}
ar &length;
if (Archive::is_loading::value) {
*string = length ? (char *)malloc(sizeof(char) * (length + 1)) : NULL;
}
if (length > 0) {
ar &make_array<char>(*string, length + 1);
}
}
// ################################# CAUTION #################################
// The below code is *very* defensive and *very* precisely structured. Please
// DO NOT modify it without careful consideration and deliberation, as you
// are liable to break things otherwise. Something as simple as changing the
// order of two lines can cause cache corruption, so please make sure you know
// what you're doing (or ask someone who does) before touching anything.
// ###########################################################################
template <class Archive>
void serialize(Archive &ar, struct drgn_type &type,
const unsigned int version) {
#define assert_in_same_union(member_1, member_2) \
do { \
_Pragma("GCC diagnostic push"); \
_Pragma("GCC diagnostic ignored \"-Wpedantic\""); \
static_assert(offsetof(typeof(type._private), member_1) == \
offsetof(typeof(type._private), member_2)); \
_Pragma("GCC diagnostic pop"); \
} while (0)
verify_version<struct drgn_type>(version);
// We want to ensure that if the definition of `struct drgn_type` is changed
// at any point in the future that our code stops compiling, instead of us
// silently ignoring any newly added fields.
static_assert(sizeof(type) == 120);
// Ensure any unused fields are zeroed out for safety, as to avoid subtle
// bugs resulting from mistakenly unserialized fields containing garbage.
if (Archive::is_loading::value) {
memset(&type, 0, sizeof(type));
}
// For the most part, we serialize fields in the order they are declared to
// make it easy to visually confirm that we haven't missed anything.
//
// `.kind` MUST be serialized first, not just because it's declared first,
// but because all of the `drgn_type_has_*` functions rely on the value of
// `.kind`
ar &type._private.kind;
ar &type._private.is_complete;
ar &type._private.primitive;
ar &type._private.qualifiers;
// `.program` is NULL, per the initial `memset`
if (Archive::is_loading::value) {
type._private.language = &drgn_language_cpp;
}
// AVOIDING OVERSERIALIZATION:
// Many drgn structures contain pointers to `struct drgn_type`. We avoid
// serializing these structures (and therefore avoid recursively serializing
// `struct drgn_type`s in order to avoid serializing massive amounts of data.
// In other words, our serialization of `struct drgn_type` is shallow.
// First union: `name`, `tag`, `num_parameters`
assert_in_same_union(name, tag);
assert_in_same_union(name, num_parameters);
if (drgn_type_has_name(&type)) {
serialize_c_string(ar, const_cast<char **>(&type._private.name));
} else if (drgn_type_has_tag(&type)) {
serialize_c_string(ar, const_cast<char **>(&type._private.tag));
} else if (drgn_type_has_parameters(&type)) {
// Leave `num_parameters` set to 0 per the initial `memset`,
// see "AVOIDING OVERSERIALIZATION" comment above
}
// Second union: `size`, `length`, `num_enumerators`, `is_variadic`
assert_in_same_union(size, length);
assert_in_same_union(size, num_enumerators);
assert_in_same_union(size, is_variadic);
if (drgn_type_has_size(&type)) {
ar &type._private.size;
} else if (drgn_type_has_length(&type)) {
ar &type._private.length;
} else if (drgn_type_has_enumerators(&type)) {
ar &type._private.num_enumerators;
} else if (drgn_type_has_is_variadic(&type)) {
ar &type._private.is_variadic;
}
// Third union: `little_endian`, `members`, `enumerators`, `parameters`
assert_in_same_union(little_endian, members);
assert_in_same_union(little_endian, enumerators);
assert_in_same_union(little_endian, parameters);
if (drgn_type_has_little_endian(&type)) {
ar &type._private.little_endian;
} else if (drgn_type_has_members(&type)) {
// Leave `members` set to NULL per the initial `memset`,
// see "AVOIDING OVERSERIALIZATION" comment above
} else if (drgn_type_has_enumerators(&type)) {
// Leave `enumerators` set to NULL per the initial `memset`,
// see "AVOIDING OVERSERIALIZATION" comment above
} else if (drgn_type_has_parameters(&type)) {
// Leave `parameters` set to NULL per the initial `memset`,
// see "AVOIDING OVERSERIALIZATION" comment above
}
// Leave `template_parameters`, `parents`, `num_template_parameters`,
// and `num_parents` set to NULL/0 per the initial `memset`, see
// "AVOIDING OVERSERIALIZATION" comment above
ar &type._private.die_addr;
// `.module` is NULL, per the initial `memset`
if (Archive::is_saving::value) {
struct drgn_error *err = drgn_type_sizeof(&type, &type._private.oi_size);
if (err) {
drgn_error_destroy(err);
type._private.oi_size =
std::numeric_limits<decltype(type._private.oi_size)>::max();
}
}
ar &type._private.oi_size;
// It's important that `oi_name` is declared here and not inside the
// if statement so that its data isn't freed when we call
// `serialize_c_string`.
std::string oi_name;
if (Archive::is_saving::value) {
oi_name = OICodeGen::typeToName(&type);
type._private.oi_name = oi_name.c_str();
}
serialize_c_string(ar, const_cast<char **>(&type._private.oi_name));
if (drgn_type_kind(&type) == DRGN_TYPE_ARRAY) {
ar &type._private.type;
}
#undef assert_in_same_union
}
INSTANCIATE_SERIALIZE(struct drgn_type)
template <class Archive>
void serialize(Archive &ar, struct DrgnClassMemberInfo &m,
const unsigned int version) {
verify_version<struct DrgnClassMemberInfo>(version);
ar &m.type;
ar &m.member_name;
ar &m.bit_offset;
ar &m.bit_field_size;
}
INSTANCIATE_SERIALIZE(DrgnClassMemberInfo)
template <class Archive>
void serialize(Archive &ar, struct drgn_qualified_type &type,
const unsigned int version) {
verify_version<struct drgn_qualified_type>(version);
ar &type.type;
ar &type.qualifiers;
}
INSTANCIATE_SERIALIZE(struct drgn_qualified_type)
template <class Archive>
void serialize(Archive &ar, RootInfo &rootInfo, const unsigned int version) {
verify_version<RootInfo>(version);
ar &rootInfo.varName;
ar &rootInfo.type;
}
INSTANCIATE_SERIALIZE(RootInfo)
template <class Archive>
void serialize(Archive &ar, struct TypeHierarchy &th,
const unsigned int version) {
verify_version<TypeHierarchy>(version);
ar &th.classMembersMap;
ar &th.containerTypeMap;
ar &th.typedefMap;
ar &th.sizeMap;
ar &th.knownDummyTypeList;
ar &th.pointerToTypeMap;
ar &th.thriftIssetStructTypes;
}
INSTANCIATE_SERIALIZE(struct TypeHierarchy)
// INSTANCIATE_SERIALIZE(std::map<struct drgn_type *, struct drgn_type *>)
} // namespace boost::serialization

80
src/Serialize.h Normal file
View File

@ -0,0 +1,80 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <boost/archive/text_iarchive.hpp>
#include <boost/archive/text_oarchive.hpp>
#include <boost/serialization/map.hpp>
#include <boost/serialization/set.hpp>
#include <boost/serialization/shared_ptr.hpp>
#include <boost/serialization/string.hpp>
#include <boost/serialization/unordered_map.hpp>
#include <boost/serialization/vector.hpp>
#include "Common.h"
#include "ContainerInfo.h"
#include "PaddingHunter.h"
#include "SymbolService.h"
#define DEFINE_TYPE_VERSION(Type, size, version) \
static_assert( \
sizeof(Type) == size, \
"Type `" #Type \
"` has changed, please update the `size` parameter and increment the " \
"`version` parameter of the corresponding invocation " \
"of `DEFINE_TYPE_VERSION` in " __FILE__); \
BOOST_CLASS_VERSION(Type, version)
DEFINE_TYPE_VERSION(PaddingInfo, 120, 3)
DEFINE_TYPE_VERSION(ContainerInfo, 168, 4)
DEFINE_TYPE_VERSION(struct drgn_location_description, 32, 2)
DEFINE_TYPE_VERSION(struct drgn_object_locator, 72, 2)
DEFINE_TYPE_VERSION(FuncDesc::Arg, 128, 2)
DEFINE_TYPE_VERSION(FuncDesc::Retval, 56, 2)
DEFINE_TYPE_VERSION(FuncDesc::Range, 16, 2)
DEFINE_TYPE_VERSION(FuncDesc, 104, 4)
DEFINE_TYPE_VERSION(GlobalDesc, 72, 4)
DEFINE_TYPE_VERSION(struct drgn_type, 120, 3)
DEFINE_TYPE_VERSION(DrgnClassMemberInfo, 64, 3)
DEFINE_TYPE_VERSION(struct drgn_qualified_type, 16, 2)
DEFINE_TYPE_VERSION(RootInfo, 48, 2)
DEFINE_TYPE_VERSION(TypeHierarchy, 336, 5)
#undef DEFINE_TYPE_VERSION
namespace boost::serialization {
#define DECL_SERIALIZE(Type) \
template <class Archive> \
void serialize(Archive &, Type &, const unsigned int)
DECL_SERIALIZE(PaddingInfo);
DECL_SERIALIZE(ContainerInfo);
DECL_SERIALIZE(FuncDesc::Arg);
DECL_SERIALIZE(FuncDesc);
DECL_SERIALIZE(GlobalDesc);
DECL_SERIALIZE(struct drgn_type);
DECL_SERIALIZE(struct drgn_qualified_type);
DECL_SERIALIZE(RootInfo);
DECL_SERIALIZE(DrgnClassMemberInfo);
DECL_SERIALIZE(TypeHierarchy);
#undef DECL_SERIALIZE
} // namespace boost::serialization

731
src/SymbolService.cpp Normal file
View File

@ -0,0 +1,731 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "SymbolService.h"
#include <glog/logging.h>
#include <algorithm>
#include <boost/scope_exit.hpp>
#include <cassert>
#include <cstring>
#include <fstream>
#include "OICodeGen.h"
#include "OIParser.h"
extern "C" {
#include <elfutils/known-dwarf.h>
#include <elfutils/libdwfl.h>
#include "drgn.h"
#include "dwarf.h"
}
static bool LoadExecutableAddressRange(
pid_t pid, std::vector<std::pair<uint64_t, uint64_t>> &exeAddrs) {
std::ifstream f("/proc/" + std::to_string(pid) + "/maps");
if (f.is_open()) {
std::string line;
uint64_t start = 0;
uint64_t end = 0;
uint64_t offset = 0;
uint64_t inode = 0;
uint dmajor = 0;
uint dminor = 0;
int nread = -1;
constexpr int permissionsLen = 4;
char perm[permissionsLen + 1];
while (std::getline(f, line)) {
if (sscanf(line.c_str(),
"%" PRIx64 "-%" PRIx64 " %s %" PRIx64 " %x:%x %" PRIu64 " %n",
&start, &end, perm, &offset, &dmajor, &dminor, &inode,
&nread) < 7 ||
nread <= 0) {
return false;
}
if (strlen(perm) != permissionsLen) {
return false;
}
if (perm[2] == 'x') {
exeAddrs.emplace_back(start, end);
}
}
}
return true;
}
#undef PREMISSIONS_LEN
static bool isExecutableAddr(
uint64_t addr, const std::vector<std::pair<uint64_t, uint64_t>> &exeAddrs) {
assert(std::is_sorted(begin(exeAddrs), end(exeAddrs)));
// Find the smallest exeAddrs range where addr < range.end
auto it = std::upper_bound(
begin(exeAddrs), end(exeAddrs), std::make_pair(addr, addr),
[](const auto &r1, const auto &r2) { return r1.second < r2.second; });
return it != end(exeAddrs) && addr >= it->first;
}
SymbolService::SymbolService(std::variant<pid_t, fs::path> newTarget) {
target = std::move(newTarget);
if (target.index() == 0) {
// Update target processes memory map
LoadExecutableAddressRange(std::get<pid_t>(target), executableAddrs);
}
}
SymbolService::~SymbolService() {
if (prog != nullptr) {
drgn_program_destroy(prog);
}
}
struct ModParams {
const char *st;
GElf_Sym s;
GElf_Addr value;
std::vector<std::pair<uint64_t, uint64_t>> &exeAddrs;
};
/**
* Callback for dwfl_getmodules(). For the provided module we iterate
* through its symbol table and look for the given symbol. Values
* are passed in and out via the 'arg' parameter.
*
* @param[in] arg[0] - The symbol to locate.
* @param[out] arg[1] - Symbol information if found.
* @param[out] arg[2] - Address of the symbol if found.
*
*/
static int moduleCallback(Dwfl_Module *mod, void ** /* userData */,
const char *name, Dwarf_Addr /* start */, void *arg) {
ModParams *m = (ModParams *)arg;
int nsym = dwfl_module_getsymtab(mod);
VLOG(1) << "mod name: " << name << " "
<< "nsyms " << nsym;
// FIXME: There's surely a better way to distinguish debuginfo modules from
// actual code modules.
char debugSuffix[] = ".debuginfo";
size_t debugSuffixLen = sizeof(debugSuffix) - 1;
size_t nameLen = strlen(name);
if (debugSuffixLen <= nameLen) {
if (strncmp(name + nameLen - debugSuffixLen, debugSuffix, debugSuffixLen) ==
0) {
VLOG(1) << "Skipping debuginfo module";
m->value = 0;
return DWARF_CB_OK;
}
}
/* I think the first entry is always UNDEF */
for (int i = 1; i < nsym; ++i) {
Elf *elf = nullptr;
GElf_Word shndxp = 0;
const char *sname = dwfl_module_getsym_info(mod, i, &m->s, &m->value,
&shndxp, &elf, nullptr);
if (sname == nullptr || sname[0] == '\0') {
continue;
}
switch
GELF_ST_TYPE(m->s.st_info) {
case STT_SECTION:
case STT_FILE:
case STT_TLS:
case STT_NOTYPE:
break;
case STT_OBJECT:
if (shndxp != SHN_UNDEF && m->st && !strcmp(sname, m->st)) {
VLOG(1) << "Symbol lookup successful for " << sname << " in module "
<< name;
m->st = nullptr;
return DWARF_CB_ABORT;
}
break;
default:
/*
* I don't understand why the only symbol that is presented
* to us here has NOTYPE yet readelf shows me it is defined
* as an STT_FUNC. Confused...
*/
if (shndxp != SHN_UNDEF && m->st && !strcmp(sname, m->st) &&
isExecutableAddr(m->value, m->exeAddrs)) {
m->st = nullptr;
VLOG(1) << "Symbol lookup successful for " << sname << " in module "
<< name;
return DWARF_CB_ABORT;
}
break;
}
}
// Set m->value to 0 if symbol is not found
m->value = 0;
return DWARF_CB_OK;
}
/**
* Resolve a symbol to its location in the target ELF binary.
*
* @param[in] symName - symbol to resolve
* @return - A std::optional with the symbol's information
*/
std::optional<SymbolInfo> SymbolService::locateSymbol(
const std::string &symName) {
static char *debuginfo_path;
static const Dwfl_Callbacks proc_callbacks{
.find_elf = dwfl_linux_proc_find_elf,
.find_debuginfo = dwfl_standard_find_debuginfo,
.section_address = dwfl_offline_section_address,
.debuginfo_path = &debuginfo_path,
};
Dwfl *dwfl = dwfl_begin(&proc_callbacks);
if (dwfl == nullptr) {
LOG(ERROR) << "dwfl_begin: " << dwfl_errmsg(dwfl_errno());
return std::nullopt;
}
BOOST_SCOPE_EXIT_ALL(&) {
dwfl_end(dwfl);
};
switch (target.index()) {
case 0: {
auto pid = std::get<pid_t>(target);
if (int err = dwfl_linux_proc_report(dwfl, pid)) {
LOG(ERROR) << "dwfl_linux_proc_report: " << dwfl_errmsg(err);
return std::nullopt;
}
break;
}
case 1: {
const auto &exe = std::get<fs::path>(target);
Dwfl_Module *mod =
dwfl_report_offline(dwfl, exe.c_str(), exe.c_str(), -1);
if (mod == nullptr) {
LOG(ERROR) << "dwfl_report_offline: " << dwfl_errmsg(dwfl_errno());
return std::nullopt;
}
Dwarf_Addr start = 0;
Dwarf_Addr end = 0;
if (dwfl_module_info(mod, nullptr, &start, &end, nullptr, nullptr,
nullptr, nullptr) == nullptr) {
LOG(ERROR) << "dwfl_module_info: " << dwfl_errmsg(dwfl_errno());
return std::nullopt;
}
VLOG(1) << "Module info for " << exe << ": start= " << std::hex << start
<< ", end=" << end;
// Add module's boundary to executableAddrs
executableAddrs = {{start, end}};
break;
}
}
if (dwfl_report_end(dwfl, nullptr, nullptr) != 0) {
LOG(ERROR) << "dwfl_report_end: " << dwfl_errmsg(-1);
}
ModParams m = {
.st = symName.c_str(), .s = {}, .value = 0, .exeAddrs = executableAddrs};
dwfl_getmodules(dwfl, moduleCallback, (void *)&m, 0);
if (m.value == 0) {
return std::nullopt;
}
return SymbolInfo{m.value, m.s.st_size};
}
static std::string bytesToHexString(const unsigned char *bytes, int nbbytes) {
static const char characters[] = "0123456789abcdef";
std::string ret(nbbytes * 2, 0);
for (int i = 0; i < nbbytes; ++i) {
ret[2 * i] = characters[bytes[i] >> 4];
ret[2 * i + 1] = characters[bytes[i] & 0x0F];
}
return ret;
}
/**
* Callback for dwfl_getmodules(). For the provided module we lookup
* its build ID and pass it back via the 'arg' parameter.
* We expect the target program to always be the first module passed
* to this callback. So we always return DWARF_CB_ABORT, as this is
* the only build ID we are interested in.
*/
static int buildIDCallback(Dwfl_Module *mod, void ** /* userData */,
const char *name, Dwarf_Addr /* start */,
void *arg) {
auto *buildID = static_cast<std::optional<std::string> *>(arg);
// We must call dwfl_module_getelf before using dwfl_module_build_id
GElf_Addr bias = 0;
Elf *elf = dwfl_module_getelf(mod, &bias);
if (elf == nullptr) {
LOG(ERROR) << "Failed to getelf for " << name << ": " << dwfl_errmsg(-1);
return DWARF_CB_ABORT;
}
GElf_Addr vaddr = 0;
const unsigned char *bytes = nullptr;
int nbbytes = dwfl_module_build_id(mod, &bytes, &vaddr);
if (nbbytes <= 0) {
*buildID = std::nullopt;
LOG(ERROR) << "Build ID not found for " << name;
} else {
*buildID = bytesToHexString(bytes, nbbytes);
VLOG(1) << "Build ID lookup successful for " << name << ": "
<< buildID->value();
}
return DWARF_CB_ABORT;
}
std::optional<std::string> SymbolService::locateBuildID() {
static char *debuginfoPath;
static const Dwfl_Callbacks procCallbacks = {
.find_elf = dwfl_linux_proc_find_elf,
.find_debuginfo = dwfl_standard_find_debuginfo,
.section_address = dwfl_offline_section_address,
.debuginfo_path = &debuginfoPath,
};
Dwfl *dwfl = dwfl_begin(&procCallbacks);
if (dwfl == nullptr) {
LOG(ERROR) << "dwfl_begin: " << dwfl_errmsg(dwfl_errno());
return std::nullopt;
}
BOOST_SCOPE_EXIT_ALL(&) {
dwfl_end(dwfl);
};
switch (target.index()) {
case 0: {
auto pid = std::get<pid_t>(target);
if (auto err = dwfl_linux_proc_report(dwfl, pid)) {
LOG(ERROR) << "dwfl_linux_proc_report: " << dwfl_errmsg(err);
}
break;
}
case 1: {
const auto &exe = std::get<fs::path>(target);
if (dwfl_report_offline(dwfl, exe.c_str(), exe.c_str(), -1) == nullptr) {
LOG(ERROR) << "dwfl_report_offline: " << dwfl_errmsg(dwfl_errno());
return std::nullopt;
}
break;
}
}
if (dwfl_report_end(dwfl, nullptr, nullptr) != 0) {
LOG(ERROR) << "dwfl_report_end: " << dwfl_errmsg(-1);
}
std::optional<std::string> buildID;
dwfl_getmodules(dwfl, buildIDCallback, (void *)&buildID, 0);
return buildID;
}
struct drgn_program *SymbolService::getDrgnProgram() {
if (hardDisableDrgn) {
LOG(ERROR) << "drgn is disabled, refusing to initialize";
return nullptr;
}
if (prog != nullptr) {
return prog;
}
LOG(INFO) << "Initialising drgn. This might take a while";
switch (target.index()) {
case 0: {
if (auto *err = drgn_program_from_pid(std::get<pid_t>(target), &prog)) {
LOG(ERROR) << "Failed to initialize drgn: " << err->code << " "
<< err->message;
return nullptr;
}
auto executable = fs::read_symlink(
"/proc/" + std::to_string(std::get<pid_t>(target)) + "/exe");
const auto *executableCStr = executable.c_str();
if (auto *err = drgn_program_load_debug_info(prog, &executableCStr, 1,
false, false)) {
LOG(ERROR) << "Error loading debug info: " << err->message;
return nullptr;
}
break;
}
case 1: {
if (auto *err = drgn_program_create(nullptr, &prog)) {
LOG(ERROR) << "Failed to create empty drgn program: " << err->code
<< " " << err->message;
return nullptr;
}
const char *path = std::get<fs::path>(target).c_str();
if (auto *err =
drgn_program_load_debug_info(prog, &path, 1, false, false)) {
LOG(ERROR) << "Failed to read debug info: " << err->code << " "
<< err->message;
drgn_program_destroy(prog);
prog = nullptr;
return prog;
}
LOG(INFO) << "Successfully read debug info";
break;
}
}
return prog;
}
/*
* Although 'parseFormalParam' has an all-encompassing sounding name, its sole
* task is to extract the location information for this parameter if any exist.
*/
static void parseFormalParam(Dwarf_Die &param, struct drgn_module *module,
struct drgn_program *prog, Dwarf_Die &funcDie,
std::shared_ptr<FuncDesc> &fd) {
/*
* NOTE: It is vital that the function descriptors list of arguments
* are in order and that an entry exists for each argument position
* even if something goes wrong here when extracting the formal parameter.
* We *must* pay careful attention to that especially when introducing
* any new error handling.
*/
auto farg = fd->addArgument();
auto *err =
drgn_object_locator_init(prog, module, &funcDie, &param, &farg->locator);
if (err) {
LOG(ERROR) << "Could not initialize drgn_object_locator for parameter: "
<< err->code << ", " << err->message;
farg->valid = false;
return;
}
const char *name = nullptr;
Dwarf_Attribute attr;
if (dwarf_attr_integrate(&param, DW_AT_name, &attr)) {
if (!(name = dwarf_formstring(&attr))) {
LOG(ERROR) << "DW_AT_name exists but no name extracted";
}
} else {
VLOG(1) << "Parameter has no DW_AT_name attribute!";
}
if (name && !strcmp(name, "this")) {
VLOG(1) << "'this' pointer found";
fd->isMethod = true;
}
farg->typeName =
SymbolService::getTypeName(farg->locator.qualified_type.type);
VLOG(1) << "Type of argument '" << name << "': " << farg->typeName;
farg->valid = true;
VLOG(1) << "Adding function arg address: " << farg;
}
static bool handleInlinedFunction(const irequest &request,
std::shared_ptr<FuncDesc> funcDesc,
struct drgn_qualified_type &funcType,
Dwarf_Die &funcDie,
struct drgn_module *&module) {
VLOG(1) << "Function '" << funcDesc->symName << "' has been inlined";
struct drgn_type_inlined_instances_iterator *iter = nullptr;
auto *err = drgn_type_inlined_instances_iterator_init(funcType.type, &iter);
if (err) {
LOG(ERROR) << "Error creating inlined instances iterator: " << err->message;
return false;
}
if (strcmp(drgn_type_parameters(funcType.type)[0].name, "this") == 0) {
funcDesc->isMethod = true;
}
auto index = funcDesc->getArgumentIndex(request.arg, false);
if (!index.has_value()) {
return false;
}
auto *argumentName = drgn_type_parameters(funcType.type)[index.value()].name;
struct drgn_type *inlinedInstance = nullptr;
bool foundInstance = false;
// The index at which the parameter was actually found in the inlined
// instance. This may differ from the index of the parameter in the function
// definition, as oftentimes as the result of compiler optimizations, some
// parameters will be omitted altogether from inlined instances.
size_t foundIndex = 0;
while (!foundInstance) {
err = drgn_type_inlined_instances_iterator_next(iter, &inlinedInstance);
if (err) {
LOG(ERROR) << "Error advancing inlined instances iterator: "
<< err->message;
return false;
}
if (!inlinedInstance) {
LOG(ERROR) << "Could not find an inlined instance of this function "
"with the argument '"
<< argumentName << "'";
return false;
}
auto numParameters = drgn_type_num_parameters(inlinedInstance);
auto *parameters = drgn_type_parameters(inlinedInstance);
for (size_t i = 0; i < numParameters; i++) {
if (strcmp(argumentName, parameters[i].name) == 0) {
foundInstance = true;
foundIndex = i;
break;
}
}
}
if (foundIndex != index) {
// We patch the parameters of `inlinedInstance` such that
// each parameter is found at the index one would expect from
// the function definition, matching the representation of the
// abstract root.
auto targetParameter = drgn_type_parameters(inlinedInstance)[foundIndex];
inlinedInstance->_private.num_parameters =
drgn_type_num_parameters(funcType.type);
// Allocating with `calloc` since `drgn` manages the lifetimes of its
// own structures, and it is written in C.
inlinedInstance->_private.parameters = (struct drgn_type_parameter *)calloc(
inlinedInstance->_private.num_parameters,
sizeof(*inlinedInstance->_private.parameters));
inlinedInstance->_private.parameters[index.value()] = targetParameter;
}
err = drgn_type_dwarf_die(inlinedInstance, &funcDie);
if (err) {
LOG(ERROR) << "Error obtaining DWARF DIE from type: " << err->message;
return false;
}
funcType.type = inlinedInstance;
module = inlinedInstance->_private.module;
return true;
}
static std::optional<std::shared_ptr<FuncDesc>> createFuncDesc(
struct drgn_program *prog, const irequest &request) {
VLOG(1) << "Creating function description for: " << request.func;
Dwarf_Die funcDie;
struct drgn_qualified_type ft {};
struct drgn_module *module = nullptr;
if (auto *err = drgn_program_find_type_by_symbol_name(
prog, request.func.c_str(), &ft, &funcDie, &module)) {
LOG(ERROR) << "Error when finding type by symbol: " << err->code << " "
<< err->message;
return std::nullopt;
}
if (drgn_type_kind(ft.type) != DRGN_TYPE_FUNCTION) {
LOG(ERROR) << "Type corresponding to symbol '" << request.func
<< "' is not a function";
return std::nullopt;
}
auto fd = std::make_shared<FuncDesc>(request.func);
if (dwarf_func_inline(&funcDie) == 1) {
if (!handleInlinedFunction(request, fd, ft, funcDie, module)) {
return std::nullopt;
}
}
ptrdiff_t offset = 0;
uintptr_t base = 0;
uintptr_t start = 0;
uintptr_t end = 0;
while ((offset = dwarf_ranges(&funcDie, offset, &base, &start, &end)) > 0) {
fd->ranges.emplace_back(start, end);
}
if (offset < 0) {
LOG(ERROR) << "Error while finding ranges of function: "
<< dwarf_errmsg(dwarf_errno());
return std::nullopt;
}
auto retType = drgn_type_type(ft.type);
auto retTypeName = SymbolService::getTypeName(retType.type);
VLOG(1) << "Retval has type: " << retTypeName;
if (!retTypeName.empty() && retTypeName != "void") {
/*
* I really can't figure out at the minute how to deduce from the DWARF
* which register is used for the return value. I don't think we can just
* assume it's 'rax' as according to the AMD64 ABI V1.0 Section 12.1.3 we
* can use 'rax', 'rdi, and I think it may be more complex than that. More
* investigation required.
* Moreover, we must fabricate a pointer type to the return type for the
* locator code to properly interpret the register's content. This WILL
* break for return-by-value instead of return-by-reference. But this kind
* of assumption is in-line we what we need to improve about return-value
* locating, so this will be good-enough for now.
*
* For now, fabricate a 'Retval' object for rax.
*/
fd->retval = std::make_shared<FuncDesc::Retval>();
fd->retval->typeName = std::move(retTypeName);
fd->retval->valid = true;
}
// Add params
bool isVariadic = false;
fd->arguments.reserve(drgn_type_num_parameters(ft.type));
Dwarf_Die child;
int r = dwarf_child(&funcDie, &child);
while (r == 0) {
switch (dwarf_tag(&child)) {
case DW_TAG_formal_parameter:
if (isVariadic) {
LOG(WARNING) << "Formal parameter after unspecified "
"parameters tag!";
}
parseFormalParam(child, module, prog, funcDie, fd);
break;
case DW_TAG_unspecified_parameters:
if (isVariadic) {
VLOG(1) << "Multiple variadic parameters!";
}
VLOG(1) << "Unspecified parameters tag";
isVariadic = true;
break;
default:
break;
}
r = dwarf_siblingof(&child, &child);
}
if (r == -1) {
LOG(ERROR) << "Couldn't parse DIE children";
}
return fd;
}
/*
* Locate the function descriptor from the function descriptor cache or create
* one if it doesn't exist. Just take the
* up front hit of looking everything up now.
*/
std::shared_ptr<FuncDesc> SymbolService::findFuncDesc(const irequest &request) {
if (auto it = funcDescs.find(request.func); it != end(funcDescs)) {
VLOG(1) << "Found funcDesc for " << request.func;
return it->second;
}
struct drgn_program *drgnProg = getDrgnProgram();
if (drgnProg == nullptr) {
return nullptr;
}
auto fd = createFuncDesc(drgnProg, request);
if (!fd.has_value()) {
LOG(ERROR) << "Failed to create FuncDesc for " << request.func;
return nullptr;
}
VLOG(1) << "findFuncDesc returning " << std::hex << fd.value()->symName;
funcDescs.emplace(request.func, fd.value());
return fd.value();
}
std::shared_ptr<GlobalDesc> SymbolService::findGlobalDesc(
const std::string &global) {
if (auto it = globalDescs.find(global); it != end(globalDescs)) {
VLOG(1) << "Found globalDesc for " << global;
return it->second;
}
auto sym = locateSymbol(global);
if (!sym.has_value()) {
LOG(ERROR) << "Failed to get address for global " << global;
return nullptr;
}
VLOG(1) << "locateGlobal: address of " << global << " " << std::hex
<< sym->addr;
struct drgn_program *drgnProg = getDrgnProgram();
if (drgnProg == nullptr) {
return nullptr;
}
auto gd = std::make_shared<GlobalDesc>(global, sym->addr);
struct drgn_object globalObj {};
drgn_object_init(&globalObj, drgnProg);
BOOST_SCOPE_EXIT_ALL(&) {
drgn_object_deinit(&globalObj);
};
if (auto *err = drgn_program_find_object(drgnProg, global.c_str(), nullptr,
DRGN_FIND_OBJECT_ANY, &globalObj)) {
LOG(ERROR) << "Failed to lookup global variable '" << global
<< "': " << err->code << " " << err->message;
return nullptr;
}
auto globalType = drgn_object_qualified_type(&globalObj);
gd->typeName = getTypeName(globalType.type);
VLOG(1) << "findGlobalDesc returning " << std::hex << gd;
globalDescs.emplace(global, gd);
return gd;
}
std::string SymbolService::getTypeName(struct drgn_type *type) {
if (drgn_type_kind(type) == DRGN_TYPE_POINTER) {
type = drgn_type_type(type).type;
}
return OICodeGen::typeToName(type);
}

65
src/SymbolService.h Normal file
View File

@ -0,0 +1,65 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <filesystem>
#include <memory>
#include <optional>
#include <string>
#include <unordered_map>
#include <variant>
#include <vector>
#include "Descs.h"
namespace fs = std::filesystem;
struct drgn_program;
struct irequest;
struct SymbolInfo {
uint64_t addr;
uint64_t size;
};
class SymbolService {
public:
SymbolService(std::variant<pid_t, fs::path>);
~SymbolService();
struct drgn_program *getDrgnProgram();
std::optional<std::string> locateBuildID();
std::optional<SymbolInfo> locateSymbol(const std::string &);
std::shared_ptr<FuncDesc> findFuncDesc(const irequest &);
std::shared_ptr<GlobalDesc> findGlobalDesc(const std::string &);
static std::string getTypeName(struct drgn_type *);
std::unordered_map<std::string, std::shared_ptr<FuncDesc>> funcDescs;
std::unordered_map<std::string, std::shared_ptr<GlobalDesc>> globalDescs;
void setHardDisableDrgn(bool val) {
hardDisableDrgn = val;
}
private:
std::variant<pid_t, fs::path> target{0};
struct drgn_program *prog{nullptr};
std::vector<std::pair<uint64_t, uint64_t>> executableAddrs{};
bool hardDisableDrgn = false;
};

75
src/Syscall.h Normal file
View File

@ -0,0 +1,75 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <algorithm>
#include <cstddef>
#include <cstdint>
extern "C" {
#include <fcntl.h>
#include <sys/syscall.h>
#include <sys/types.h>
}
namespace {
template <size_t N>
struct StringLiteral {
constexpr StringLiteral(const char (&str)[N]) {
std::copy_n(str, N, value);
}
char value[N];
};
} // namespace
/*
* The Syscall structure describes a `syscall(2)` in a generic manner.
* The struct is used by `OIDebugger::remoteSyscall` to define the return type
* and statically check the number of arguments being passed. Currently, I don't
* know how to use the _Args to also statically check the type of the arguments.
* In the meantime, I can use the size of _Args to do a simple count check.
*/
template <StringLiteral _Name, unsigned long _SysNum, typename _RetType,
typename... _Args>
struct Syscall {
/* User friendly syscall name */
static constexpr auto Name = _Name.value;
/* The syscall's number (see <sys/syscall.h>) */
static constexpr unsigned long SysNum = _SysNum;
/* The syscall's return type */
using RetType = _RetType;
/* The number of arguments the syscall takes */
static constexpr size_t ArgsCount = sizeof...(_Args);
static_assert(ArgsCount <= 6,
"X64 syscalls support a maximum of 6 arguments");
};
/*
* The list of syscalls we want to be able to use on the remote process.
* The types passed to `struct Syscall` come directly from `man 2 <SYSCALL>`.
* Don't hesitate to expand this list if you need more syscalls!
*/
using SysOpen = Syscall<"open", SYS_open, int, const char *, int, mode_t>;
using SysClose = Syscall<"close", SYS_close, int, int>;
using SysFsync = Syscall<"fsync", SYS_fsync, int, int>;
using SysMmap =
Syscall<"mmap", SYS_mmap, void *, void *, size_t, int, int, int, off_t>;
using SysMunmap = Syscall<"munmap", SYS_munmap, int, void *, size_t>;

24
src/TimeUtils.h Normal file
View File

@ -0,0 +1,24 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <chrono>
using time_hr = std::chrono::high_resolution_clock;
template <typename Duration>
auto time_ns(Duration const &d) {
return std::chrono::duration_cast<std::chrono::nanoseconds>(d).count();
}

134
src/TrapInfo.h Normal file
View File

@ -0,0 +1,134 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cstdint>
#include <string>
#include "Metrics.h"
#include "OICompiler.h"
extern "C" {
#include <sys/user.h>
}
/*
* Breakpoint traps (INT3) instructions are the primary mechanism used to
* transfer control from the traced process to the debugger. There are several
* variants of traps :
* OID_TRAP_JITCODERET: transfers control from JIT'd code sequences and
* other setup operations.
* OID_TRAP_VECT_ENTRY: transfers control from function entry sites.
* OID_TRAP_VECT_ENTRYRET: transfer control from function entry sites as
* part of function return tracing. Used to capture
* function argument parameters for use in function
* return introspection.
* OID_TRAP_VECT_RET: transfers control from function return sites.
*
* The differing types of re-vectoring operations share a lot of state but
* differ in a few ways. For example, we don't need to stash the original
* instructions for a OID_TRAP_JITCODERET sequence.
*
* Note that we have two maps of trapInfo objects:
* 'activeTraps': a mapping of active breakpoints in the target process
* 'threadTrapState': a mapping of thread to breakpoint. Used when a thread
* is executing a OID_TRAP_JITCODERET sequence as a result of a OID_TRAP_VECT
* breakpoint (i.e., executing JIT'd measurement code as a result of of a
* thread vectoring in from an instrumented application).
*/
enum trapType {
OID_TRAP_JITCODERET = 0,
OID_TRAP_VECT_ENTRY = 1,
OID_TRAP_VECT_ENTRYRET = 2,
OID_TRAP_VECT_RET = 3
};
const uint64_t GLOBAL_VARIABLE_TRAP_ADDR = 0xfeedfacefeedface;
class trapInfo {
public:
trapType trapKind{OID_TRAP_JITCODERET};
/* The text address of the breakpoint trap (used for id of trap) */
uintptr_t trapAddr{};
/*
* Relocated memory location in prologue of target object - only used for
* OID_TRAP_VECT* traps.
*/
uintptr_t prologueObjAddr{};
/*
* If this is a OID_TRAP_JITCODERET trap and this is true then vector the
* thread back.
*/
bool fromVect{false};
/*
* For function entry traps we need to stash the first 8 bytes of text.
* (NOTE: we actually only need 1 but ptrace() minimum unit is 8 bytes.
*/
union {
unsigned long origText{0};
uint8_t origTextBytes[8];
};
/*
* For OID_TRAP_VECT_ENTRYRET traps we construct the patched version
* of all traps before enabling them. The following 8 bytes just make
* the code a bit cleaner for that case and are not used when processing
* traps.
*/
union {
unsigned long patchedText{0};
uint8_t patchedTextBytes[8];
};
/* Populated with registers of interrupted thread on entry to trap */
struct user_regs_struct savedRegs;
/* Floating point registers */
struct user_fpregs_struct savedFPregs;
/* This is just temp while we implement proper register/argument support */
std::vector<std::shared_ptr<FuncDesc::TargetObject>> args;
/*
* Instructions that have been patched over must be replayed so that the
* effects are observable in the thread. To do this we stash the original
* instruction in the target process at this address which is used as a
* single step target for execution.
*/
uintptr_t replayInstAddr{};
ObjectIntrospection::Metrics::Tracing lifetime{"trap"};
trapInfo() = default;
trapInfo(trapType t, uint64_t ta, uint64_t po = 0, bool fv = false)
: trapKind{t}, trapAddr{ta}, prologueObjAddr{po}, fromVect{fv} {
}
};
inline std::ostream &operator<<(std::ostream &out, const trapInfo &t) {
static const char *trapTypeDescs[] = {
"JIT Code Return", // OID_TRAP_JITCODERET
"Vector Entry", // OID_TRAP_VECT_ENTRY
"Vector Entry Return", // OID_TRAP_VECT_ENTRYRET
"Vector Return", // OID_TRAP_VECT_RET
};
return out << "Trap " << trapTypeDescs[t.trapKind] << " @"
<< (void *)t.trapAddr;
}

894
src/TreeBuilder.cpp Normal file
View File

@ -0,0 +1,894 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "TreeBuilder.h"
#include <glog/logging.h>
#include <boost/algorithm/string/regex.hpp>
#include <boost/scope_exit.hpp>
#include <fstream>
#include <iostream>
#include <limits>
#include <msgpack.hpp>
#include <stdexcept>
#include "ContainerInfo.h"
#include "Metrics.h"
#include "OICodeGen.h"
#include "PaddingHunter.h"
#include "rocksdb/db.h"
#include "rocksdb/options.h"
extern "C" {
#include <drgn.h>
#include <sys/types.h>
}
TreeBuilder::TreeBuilder(Config c) : config{std::move(c)} {
buffer = std::make_unique<msgpack::sbuffer>();
auto testdbPath = "/tmp/testdb_" + std::to_string(getpid());
if (auto status = rocksdb::DestroyDB(testdbPath, {}); !status.ok()) {
LOG(FATAL) << "RocksDB error while destroying database: "
<< status.ToString();
}
const int twoMinutes = 120;
rocksdb::Options options;
options.compression = rocksdb::kZSTD;
options.create_if_missing = true;
options.statistics = rocksdb::CreateDBStatistics();
options.stats_dump_period_sec = twoMinutes;
options.PrepareForBulkLoad();
options.OptimizeForSmallDb();
if (auto status = rocksdb::DB::Open(options, testdbPath, &db); !status.ok()) {
LOG(FATAL) << "RocksDB error while opening database: " << status.ToString();
}
}
struct TreeBuilder::Variable {
struct drgn_type *type;
std::string_view name;
std::string typePath;
std::optional<bool> isset = std::nullopt;
bool isStubbed = false;
};
struct TreeBuilder::DBHeader {
/**
* Version of the database schema. See TreeBuilder.h for more info.
*/
Version version;
/**
* List of IDs corresponding to the root of the probed objects.
*/
std::vector<NodeID> rootIDs;
MSGPACK_DEFINE_ARRAY(version, rootIDs)
};
struct TreeBuilder::Node {
struct ContainerStats {
/**
* The number of elements currently present in the container
* (e.g. `std::vector::size()`).
*/
size_t length;
/**
* The maximum number of elements the container can
* currently hold (e.g. `std::vector::capacity()`).
*/
size_t capacity;
/**
* The static size (see comment for `staticSize` below for clarification on
* what this means) of each element in a container. For example, if this
* node corresponds to a `std::vector<int>` then `elementStaticSize`
* would be `sizeof(int)`.
*/
size_t elementStaticSize;
MSGPACK_DEFINE_ARRAY(length, capacity, elementStaticSize)
};
/**
* The unique identifier for this node, used as the key for this
* node's entry in RocksDB.
*/
NodeID id;
/**
* Roughly corresponds to the name you would use to refer to this node in
* the code (e.g. variable name, member name, etc.). In some cases there is
* no meaningful name (e.g. the elements of a vector, the node behind a
* `typedef`) and this is left empty.
*/
std::string_view name{};
/**
* The type of this node, as it would be written in the code
* (e.g. `std::vector<int>`, `float`, `MyStruct`).
*/
std::string typeName{};
std::string typePath{};
bool isTypedef{};
/**
* The compile-time-determinable size (i.e. memory footprint measured in
* bytes) of this node, essentially corresponding to `sizeof(TYPE)`.
* Just like the semantics of `sizeof`, this is inherently inclusive of the
* type's members (if it is a `struct`, `class`, or `union`).
*/
size_t staticSize{};
/**
* The size (i.e. memory usage measured in bytes) of the dynamically
* allocated data used by this node (e.g. the heap-allocated memory
* associated with a `std::vector`). This includes the `dynamicSize` of all
* children (whether they be `struct`/`class` members, or the elements of a
* container).
*/
size_t dynamicSize{};
std::optional<size_t> paddingSavingsSize{std::nullopt};
std::optional<uintptr_t> pointer{std::nullopt};
std::optional<ContainerStats> containerStats{std::nullopt};
/**
* Range of this node's children (start is inclusive, end is exclusive)
*
* If this node represents a container, `children` contains all
* of the container's elements.
* If this node represents a `struct` or `class`, `children`
* contains all of its members.
* If this node is a `typedef` or a pointer, `children` should contain a
* single entry corresponding to the referenced type.
*/
std::optional<std::pair<NodeID, NodeID>> children{std::nullopt};
std::optional<bool> isset{std::nullopt};
MSGPACK_DEFINE_ARRAY(id, name, typeName, typePath, isTypedef, staticSize,
dynamicSize, paddingSavingsSize, containerStats, pointer,
children, isset)
};
TreeBuilder::~TreeBuilder() {
/* FB: Remove error IDs, Strobelight doesn't handle them yet */
std::erase(rootIDs, ERROR_NODE_ID);
/*
* Now that all the Nodes have been inserted in the DB,
* we can insert the DBHeader with the proper list of rootIDs.
*/
const DBHeader header{.version = VERSION, .rootIDs = std::move(rootIDs)};
auto serializedHeader = serialize(header);
rocksdb::WriteOptions options{};
options.disableWAL = true;
if (auto status =
db->Put(options, std::to_string(ROOT_NODE_ID), serializedHeader);
!status.ok()) {
LOG(ERROR) << "RocksDB error while writing DBHeader: " << status.ToString();
}
if (auto status = db->Close(); !status.ok()) {
LOG(ERROR) << "RocksDB error while closing database: " << status.ToString();
}
delete db;
}
bool TreeBuilder::emptyOutput() const {
return std::ranges::all_of(rootIDs,
[](auto &id) { return id == ERROR_NODE_ID; });
}
void TreeBuilder::build(const std::vector<uint64_t> &data,
const std::string &argName, struct drgn_type *type,
const TypeHierarchy &typeHierarchy) {
th = &typeHierarchy;
oidData = &data;
pointers.clear();
oidDataIndex = 3; // HACK: OID's first 3 outputs are dummy 0s
ObjectIntrospection::Metrics::Tracing _("build_tree");
VLOG(1) << "Building tree...";
{
auto &rootID = rootIDs.emplace_back(nextNodeID++);
try {
// The first value is the address of the root object
pointers.insert(next());
process(rootID, {.type = type, .name = argName, .typePath = argName});
} catch (...) {
// Mark the failure using the error node ID
rootID = ERROR_NODE_ID;
throw;
}
}
VLOG(1) << "Finished building tree";
rocksdb::CompactRangeOptions opts;
rocksdb::Status s = db->CompactRange(opts, nullptr, nullptr);
if (!s.ok()) {
LOG(FATAL) << "RocksDB error while compacting: " << s.ToString();
}
VLOG(1) << "Finished compacting db";
// Were all object sizes consumed?
if (oidDataIndex != oidData->size()) {
LOG(WARNING) << "WARNING: some object sizes not consumed;"
<< "object tree may be inaccurate. "
<< "reported: " << oidData->size() << " consumed "
<< oidDataIndex;
} else {
VLOG(1) << "Consumed all object sizes: " << oidDataIndex;
}
th = nullptr;
oidData = nullptr;
}
void TreeBuilder::dumpJson() {
if (!config.jsonPath.has_value()) {
LOG(ERROR) << "No output path was provided for JSON";
return;
}
std::ofstream output(*config.jsonPath);
output << '[';
for (auto rootID : rootIDs) {
if (rootID == ERROR_NODE_ID) {
// On error, output an empty object to maintain offsets
output << "{},";
} else {
JSON(rootID, output);
output << ',';
}
}
/* Remove the trailing comma */
if (!rootIDs.empty()) {
output.seekp(-1, std::ios_base::cur);
}
output << "]\n"; // Text files should end with a newline per POSIX
VLOG(1) << "Finished writing JSON to disk";
}
void TreeBuilder::setPaddedStructs(
std::map<std::string, PaddingInfo> *_paddedStructs) {
this->paddedStructs = _paddedStructs;
}
static std::string drgnTypeToName(struct drgn_type *type) {
if (type->_private.program != nullptr) {
return OICodeGen::typeToName(type);
}
return type->_private.oi_name ? type->_private.oi_name : "";
}
static struct drgn_error *drgnTypeSizeof(struct drgn_type *type,
uint64_t *ret) {
static struct drgn_error incompleteTypeError = {
.code = DRGN_ERROR_TYPE,
.needs_destroy = false,
.errnum = 0,
.path = NULL,
.address = 0,
.message = (char *)"cannot get size of incomplete type",
};
if (drgn_type_kind(type) == DRGN_TYPE_FUNCTION) {
*ret = sizeof(uintptr_t);
return nullptr;
}
if (type->_private.program != nullptr) {
return drgn_type_sizeof(type, ret);
}
// If type has no size, report an error to trigger a sizeMap lookup
if (type->_private.oi_size ==
std::numeric_limits<decltype(type->_private.size)>::max()) {
return &incompleteTypeError;
}
*ret = type->_private.oi_size;
return nullptr;
}
uint64_t TreeBuilder::getDrgnTypeSize(struct drgn_type *type) {
uint64_t size = 0;
struct drgn_error *err = drgnTypeSizeof(type, &size);
BOOST_SCOPE_EXIT(err) {
drgn_error_destroy(err);
}
BOOST_SCOPE_EXIT_END
if (err == nullptr) {
return size;
}
std::string typeName = drgnTypeToName(type);
for (auto &[typeName2, size2] : th->sizeMap)
if (typeName.starts_with(typeName2))
return size2;
if (typeName.starts_with("basic_string<char, std::char_traits<char>, "
"std::allocator<char> >")) {
return sizeof(std::string);
}
throw std::runtime_error("Failed to get size: " + std::to_string(err->code) +
" " + err->message);
}
uint64_t TreeBuilder::next() {
if (oidDataIndex >= oidData->size()) {
throw std::runtime_error("Unexpected end of data");
}
VLOG(3) << "next = " << (void *)(*oidData)[oidDataIndex];
return (*oidData)[oidDataIndex++];
}
bool TreeBuilder::isContainer(const Variable &variable) {
return th->containerTypeMap.contains(variable.type) ||
(drgn_type_kind(variable.type) == DRGN_TYPE_ARRAY &&
drgn_type_length(variable.type) > 0);
}
bool TreeBuilder::isPrimitive(struct drgn_type *type) {
while (drgn_type_kind(type) == DRGN_TYPE_TYPEDEF) {
auto entry = th->typedefMap.find(type);
if (entry == th->typedefMap.end())
return false;
type = entry->second;
}
return drgn_type_primitive(type) != DRGN_NOT_PRIMITIVE_TYPE;
}
bool TreeBuilder::shouldProcess(uintptr_t pointer) {
if (pointer == 0U) {
return false;
}
auto [_, unprocessed] = pointers.emplace(pointer);
return unprocessed;
}
static std::string_view drgnKindStr(struct drgn_type *type) {
auto kind = OICodeGen::drgnKindStr(type);
// -1 is for the null terminator
kind.remove_prefix(sizeof("DRGN_TYPE_") - 1);
return kind;
}
TreeBuilder::Node TreeBuilder::process(NodeID id, Variable variable) {
Node node{
.id = id,
.name = variable.name,
.typeName = drgnTypeToName(variable.type),
.typePath = std::move(variable.typePath),
.staticSize = getDrgnTypeSize(variable.type),
.isset = variable.isset,
};
VLOG(2) << "Processing node [" << id << "] (name: '" << variable.name
<< "', typeName: '" << node.typeName
<< "', kind: " << drgnKindStr(variable.type) << ")"
<< (variable.isStubbed ? " STUBBED" : "")
<< (th->knownDummyTypeList.contains(variable.type) ? " DUMMY" : "");
if (!variable.isStubbed) {
switch (drgn_type_kind(variable.type)) {
case DRGN_TYPE_POINTER:
if (config.chaseRawPointers) {
// Pointers to incomplete types are stubbed out
// See OICodeGen::enumeratePointerType
if (th->knownDummyTypeList.contains(variable.type)) {
break;
}
auto entry = th->pointerToTypeMap.find(variable.type);
if (entry != th->pointerToTypeMap.end()) {
auto innerTypeKind = drgn_type_kind(entry->second);
if (innerTypeKind != DRGN_TYPE_FUNCTION) {
node.pointer = next();
if (innerTypeKind == DRGN_TYPE_VOID ||
!shouldProcess(*node.pointer)) {
break;
}
}
auto childID = nextNodeID++;
auto child = process(childID, Variable{entry->second, "", ""});
node.children = {childID, childID + 1};
node.dynamicSize = child.staticSize + child.dynamicSize;
}
}
break;
case DRGN_TYPE_TYPEDEF: {
const static boost::regex standardIntegerRegex{
"((s?size)|(u?int(_fast|_least)?(8|16|32|64|128|ptr))|(ptrdiff))_"
"t"};
// We don't expand typedefs for well-known integer types from `stdint.h`
// to prevent our output from being extremely verbose. We treat them as
// if they are primitives directly (hence this check coming *before* we
// set `node.isTypedef`).
if (boost::regex_match(node.typeName, standardIntegerRegex)) {
break;
}
node.isTypedef = true;
auto entry = th->typedefMap.find(variable.type);
if (entry != th->typedefMap.end()) {
auto childID = nextNodeID++;
auto child = process(childID, Variable{entry->second, "", ""});
node.children = {childID, childID + 1};
node.dynamicSize = child.dynamicSize;
}
} break;
case DRGN_TYPE_CLASS:
case DRGN_TYPE_STRUCT:
case DRGN_TYPE_ARRAY:
if (th->knownDummyTypeList.contains(variable.type)) {
break;
} else if (isContainer(variable)) {
processContainer(variable, node);
} else {
auto entry = th->classMembersMap.find(variable.type);
if (entry == th->classMembersMap.end() || entry->second.empty()) {
break;
}
const auto &members = entry->second;
node.children = {nextNodeID, nextNodeID + members.size()};
nextNodeID += members.size();
auto childID = node.children->first;
bool captureThriftIsset =
th->thriftIssetStructTypes.contains(variable.type);
for (std::size_t i = 0; i < members.size(); i++) {
std::optional<bool> isset;
if (captureThriftIsset && i < members.size() - 1) {
// Retrieve isset value for each member variable, except Thrift's
// __isset field, which we assume comes last.
// A value of -1 indicates a non-optional field for which we
// don't record an isset value.
auto val = next();
if (val != (uint64_t)-1) {
isset = val;
}
}
const auto &member = members[i];
auto child =
process(childID++,
Variable{member.type, member.member_name,
member.member_name, isset, member.isStubbed});
node.dynamicSize += child.dynamicSize;
}
}
break;
default:
// The remaining types are all described entirely by their static size,
// and hence need no special handling.
break;
}
if (config.genPaddingStats) {
auto entry = paddedStructs->find(node.typeName);
if (entry != paddedStructs->end()) {
entry->second.instancesCnt++;
node.paddingSavingsSize = entry->second.savingSize;
}
}
}
rocksdb::WriteOptions options{};
options.disableWAL = true;
auto status = db->Put(options, std::to_string(node.id), serialize(node));
if (!status.ok()) {
throw std::runtime_error("RocksDB error while inserting node [" +
std::to_string(node.id) +
"]: " + status.ToString());
}
return node;
}
void TreeBuilder::processContainer(const Variable &variable, Node &node) {
VLOG(1) << "Processing container [" << node.id << "] of type '"
<< node.typeName << "'";
ContainerTypeEnum kind = UNKNOWN_TYPE;
std::vector<struct drgn_qualified_type> elementTypes;
if (drgn_type_kind(variable.type) == DRGN_TYPE_ARRAY) {
kind = ARRAY_TYPE;
struct drgn_type *arrayElementType = nullptr;
size_t numElems = 0;
OICodeGen::getDrgnArrayElementType(variable.type, &arrayElementType,
numElems);
assert(numElems > 0);
elementTypes.push_back(
drgn_qualified_type{arrayElementType, (enum drgn_qualifiers)(0)});
} else {
auto entry = th->containerTypeMap.find(variable.type);
if (entry == th->containerTypeMap.end()) {
throw std::runtime_error(
"Could not find container information for type with name '" +
node.typeName + "'");
}
auto &[containerInfo, templateTypes] = entry->second;
kind = containerInfo.ctype;
for (const auto &tt : templateTypes) {
elementTypes.push_back(tt);
}
}
/**
* Some containers (conditionally) store their contents *directly* inside
* themselves (as opposed to having a pointer to heap-allocated memory).
* `std::pair` and `std::array` are two trivial examples, but some types vary
* whether their contents are stored inline or externally depending on
* runtime conditions (usually the number of elements currently present in
* the container).
*/
bool contentsStoredInline = false;
// Initialize, then take a reference to the underlying value for convenience
// so that we don't have to dereference the optional every time we want to use
// it.
auto &containerStats =
node.containerStats.emplace(Node::ContainerStats{0, 0, 0});
for (auto &type : elementTypes) {
containerStats.elementStaticSize += getDrgnTypeSize(type.type);
}
switch (kind) {
case OPTIONAL_TYPE:
contentsStoredInline = true;
containerStats.length = containerStats.capacity = 1;
if (next() == 0U) {
containerStats.length = 0;
return;
}
break;
case FOLLY_OPTIONAL_TYPE:
// TODO: Not sure why we are capturing pointer for folly::Optional but
// not std::optional. Both are supposed to store data inline.
contentsStoredInline = true;
node.pointer = next();
containerStats.length = containerStats.capacity = 1;
if (*node.pointer == 0) {
containerStats.length = 0;
return;
}
break;
case SHRD_PTR_TYPE:
case UNIQ_PTR_TYPE:
node.pointer = next();
containerStats.length = *node.pointer ? 1 : 0;
containerStats.capacity = 1;
if (!shouldProcess(*node.pointer)) {
return;
}
break;
case TRY_TYPE:
case REF_WRAPPER_TYPE:
node.pointer = next();
containerStats.length = containerStats.capacity = 1;
if (!shouldProcess(*node.pointer)) {
return;
}
break;
case CONTAINER_ADAPTER_TYPE: {
node.pointer = next();
// Copy the underlying container's sizes and stats directly into this
// container adapter
node.children = {nextNodeID, nextNodeID + 1};
nextNodeID += 1;
auto childID = node.children->first;
// elementTypes is only populated with the underlying container type for
// container adapters
auto containerType = elementTypes[0];
auto child = process(
childID++, {.type = containerType.type,
.name = "",
.typePath = drgnTypeToName(containerType.type) + "[]"});
node.dynamicSize = child.dynamicSize;
node.containerStats = child.containerStats;
return;
}
case STD_VARIANT_TYPE: {
containerStats.length = containerStats.capacity = 1;
containerStats.elementStaticSize = 0;
for (auto &type : elementTypes) {
auto paramSize = getDrgnTypeSize(type.type);
containerStats.elementStaticSize =
std::max(containerStats.elementStaticSize, paramSize);
}
node.dynamicSize = 0;
// When a std::variant is valueless_by_exception, its index will be
// std::variant_npos (i.e. 0xffffffffffffffff).
//
// libstdc++ and libc++ both optimise the storage required for#
// std::variant's index value by using fewer than 8-bytes when possible.
// e.g. for a std::variant<A, B>, only three index values are required:
// one each for A and B and one for variant_npos. variant_npos may be
// represented internally by 0xff and only converted back to
// 0xffffffffffffffff when index() is called.
//
// However, this conversion may be optimised away in the target process,
// so we need to treat any invalid index as variant_npos.
if (auto index = next(); index < elementTypes.size()) {
// Recurse only into the type of the template parameter which
// is currently stored in this variant
node.children = {nextNodeID, nextNodeID + 1};
nextNodeID += 1;
auto childID = node.children->first;
auto elementType = elementTypes[index];
auto child = process(
childID++, {.type = elementType.type,
.name = "",
.typePath = drgnTypeToName(elementType.type) + "[]"});
node.dynamicSize = child.dynamicSize;
}
return;
}
case PAIR_TYPE:
contentsStoredInline = true;
containerStats.length = containerStats.capacity = 1;
break;
case SEQ_TYPE:
case MICROLIST_TYPE:
case SORTED_VEC_SET_TYPE:
case FEED_QUICK_HASH_SET:
case FEED_QUICK_HASH_MAP:
case FB_HASH_MAP_TYPE:
case FB_HASH_SET_TYPE:
case MAP_SEQ_TYPE:
case FOLLY_SMALL_HEAP_VECTOR_MAP:
case REPEATED_FIELD_TYPE:
node.pointer = next();
containerStats.capacity = next();
containerStats.length = next();
break;
case LIST_TYPE:
node.pointer = next();
containerStats.length = containerStats.capacity = next();
break;
case FOLLY_IOBUFQUEUE_TYPE:
node.pointer = next();
containerStats.length = containerStats.capacity = 0;
if (!shouldProcess(*node.pointer)) {
return;
}
// Fallthrough to the IOBuf data if we have a valid pointer
[[fallthrough]];
case FOLLY_IOBUF_TYPE:
containerStats.capacity = next();
containerStats.length = next();
break;
case FB_STRING_TYPE:
node.pointer = next();
containerStats.capacity = next();
containerStats.length = next();
// If this string's data is potentially shared (cutoff for sharing is 255)
if (containerStats.capacity >= 255) {
// Contents aren't actually stored inline in this case,
// but we set this to `true` so that we don't double-count
// this string's data if we have seen it before.
contentsStoredInline = !shouldProcess(*node.pointer);
} else {
// No sense in recording the pointer value if the string isn't
// potentially shared
node.pointer.reset();
// Account for Small String Optimization (SSO)
const int fbStringSsoCutOff = 23;
constexpr size_t ssoCutoff = fbStringSsoCutOff;
contentsStoredInline = containerStats.capacity <= ssoCutoff;
}
break;
case STRING_TYPE:
containerStats.capacity = next();
containerStats.length = next();
// Account for Small String Optimization (SSO)
// LLVM libc++: sizeof(string) = 24, SSO cutoff = 22
// GNU libstdc++: sizeof(string) = 32, SSO cutoff = 15
{
const int llvmSizeOf = 24;
const int llvmSsoCutOff = 22;
[[maybe_unused]] const int gnuSizeOf = 32;
const int gnuSsoCutOff = 15;
assert(node.staticSize == llvmSizeOf || node.staticSize == gnuSizeOf);
size_t ssoCutoff =
node.staticSize == llvmSizeOf ? llvmSsoCutOff : gnuSsoCutOff;
contentsStoredInline = containerStats.capacity <= ssoCutoff;
}
break;
case CAFFE2_BLOB_TYPE:
// This is a weird one, need to ask why we just overwite size like this
node.dynamicSize = next();
return;
case ARRAY_TYPE:
contentsStoredInline = true;
containerStats.length = containerStats.capacity = next();
break;
case SMALL_VEC_TYPE: {
size_t maxInline = next();
containerStats.capacity = next();
containerStats.length = next();
contentsStoredInline = containerStats.capacity <= maxInline;
} break;
case BOOST_BIMAP_TYPE:
// TODO: Hard to know the overhead of boost bimap. It isn't documented in
// the boost docs. Need to look closer at the implementation.
containerStats.length = containerStats.capacity = next();
break;
case SET_TYPE:
case STD_MAP_TYPE:
// Account for node overhead
containerStats.elementStaticSize += next();
containerStats.length = containerStats.capacity = next();
break;
case UNORDERED_SET_TYPE:
case STD_UNORDERED_MAP_TYPE: {
// Account for node overhead
containerStats.elementStaticSize += next();
size_t bucketCount = next();
// Both libc++ and libstdc++ define buckets as an array of raw pointers
node.dynamicSize += bucketCount * sizeof(void *);
containerStats.length = containerStats.capacity = next();
} break;
case F14_MAP:
case F14_SET:
// F14 maps/sets don't actually store their contents inline, but the
// intention of setting this to `true` is to skip the usual calculation
// performed to determine `node.dynamicSize`, since F14 maps very
// conveniently provide a `getAllocatedMemorySize()` method which we can
// use instead.
contentsStoredInline = true;
node.dynamicSize += next();
containerStats.capacity = next();
containerStats.length = next();
break;
case RADIX_TREE_TYPE:
case MULTI_MAP_TYPE:
case BY_MULTI_QRT_TYPE:
containerStats.length = containerStats.capacity = next();
break;
case THRIFT_ISSET_TYPE:
// Dummy container
containerStats.elementStaticSize = 0;
break;
default:
throw std::runtime_error("Unknown container (type was 0x" +
std::to_string(kind) + ")");
break;
}
if (!contentsStoredInline) {
node.dynamicSize +=
containerStats.elementStaticSize * containerStats.capacity;
}
// A cutoff value used to sanity-check our results. If a container
// is larger than this, chances are that we've read uninitialized data,
// or there's a bug in Codegen.
constexpr size_t CONTAINER_SIZE_THRESHOLD = 1ULL << 38;
if (containerStats.elementStaticSize * containerStats.capacity >=
CONTAINER_SIZE_THRESHOLD) {
throw std::runtime_error(
"Container size exceeds threshold, this is likely due to reading "
"uninitialized data in the target process");
}
if (std::ranges::all_of(
elementTypes.cbegin(), elementTypes.cend(),
[this](auto &type) { return isPrimitive(type.type); })) {
VLOG(1)
<< "Container [" << node.id
<< "] contains only primitive types, skipping processing its members";
return;
}
auto numChildren = containerStats.length * elementTypes.size();
if (numChildren == 0) {
VLOG(1) << "Container [" << node.id << "] has no children";
return;
}
node.children = {nextNodeID, nextNodeID + numChildren};
VLOG(1) << "Container [" << node.id << "]'s children cover range ["
<< node.children->first << ", " << node.children->second << ")";
nextNodeID += numChildren;
auto childID = node.children->first;
for (size_t i = 0; i < containerStats.length; i++) {
for (auto &type : elementTypes) {
auto child =
process(childID++, {.type = type.type,
.name = "",
.typePath = drgnTypeToName(type.type) + "[]"});
node.dynamicSize += child.dynamicSize;
}
}
}
template <class T>
std::string_view TreeBuilder::serialize(const T &data) {
buffer->clear();
msgpack::pack(*buffer, data);
// It is *very* important that we construct the `std::string_view` with an
// explicit length, since `buffer->data()` may contain null bytes.
return std::string_view(buffer->data(), buffer->size());
}
void TreeBuilder::JSON(NodeID id, std::ofstream &output) {
std::string data;
auto status = db->Get(rocksdb::ReadOptions(), std::to_string(id), &data);
if (!status.ok()) {
throw std::runtime_error("RocksDB error while reading node [" +
std::to_string(id) + "]: " + status.ToString());
}
Node node;
msgpack::unpack(data.data(), data.size()).get().convert(node);
// Remove all backslashes to ensure the output is valid JSON
std::replace(node.typePath.begin(), node.typePath.end(), '\\', ' ');
std::replace(node.typeName.begin(), node.typeName.end(), '\\', ' ');
output << "{";
output << "\"name\":\"" << node.name << "\",";
output << "\"typePath\":\"" << node.typePath << "\",";
output << "\"typeName\":\"" << node.typeName << "\",";
output << "\"isTypedef\":" << (node.isTypedef ? "true" : "false") << ",";
output << "\"staticSize\":" << node.staticSize << ",";
output << "\"dynamicSize\":" << node.dynamicSize;
if (node.paddingSavingsSize.has_value()) {
output << ",";
output << "\"paddingSavingsSize\":" << *node.paddingSavingsSize;
}
if (node.pointer.has_value()) {
output << ",";
output << "\"pointer\":" << *node.pointer;
}
if (node.containerStats.has_value()) {
output << ",";
output << "\"length\":" << node.containerStats->length << ",";
output << "\"capacity\":" << node.containerStats->capacity << ",";
output << "\"elementStaticSize\":"
<< node.containerStats->elementStaticSize;
}
if (node.isset.has_value()) {
output << ",";
output << "\"isset\":" << (*node.isset ? "true" : "false");
}
if (node.children.has_value()) {
output << ",";
output << "\"members\":[";
auto [childIDStart, childIDEnd] = *node.children;
assert(childIDStart < childIDEnd);
// Trailing commas are disallowed in JSON, so we pull
// out the first iteration of the loop.
JSON(childIDStart, output);
for (auto childID = childIDStart + 1; childID < childIDEnd; childID++) {
output << ",";
JSON(childID, output);
}
output << "]";
}
output << "}";
}

114
src/TreeBuilder.h Normal file
View File

@ -0,0 +1,114 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <map>
#include <memory>
#include <msgpack/sbuffer_decl.hpp>
#include <optional>
#include <string>
#include <unordered_set>
#include <vector>
#include "Common.h"
// The rocksdb includes are extremely heavy and bloat compile times,
// so we just forward-declare `DB` to avoid making other compile units
// pay the cost of including the relevant headers.
namespace rocksdb {
class DB;
}
// Forward declared, comes from PaddingInfo.h
struct PaddingInfo;
class TreeBuilder {
public:
struct Config {
// Don't set default values for the config so the user gets
// an "unitialized field" warning if he missed any.
bool logAllStructs;
bool chaseRawPointers;
bool genPaddingStats;
bool dumpDataSegment;
std::optional<std::string> jsonPath;
};
TreeBuilder(Config);
~TreeBuilder();
void build(const std::vector<uint64_t> &, const std::string &,
struct drgn_type *, const TypeHierarchy &);
void dumpJson();
void setPaddedStructs(std::map<std::string, PaddingInfo> *paddedStructs);
bool emptyOutput() const;
private:
typedef uint64_t Version;
typedef uint64_t NodeID;
struct DBHeader;
struct Node;
struct Variable;
const TypeHierarchy *th = nullptr;
const std::vector<uint64_t> *oidData = nullptr;
std::map<std::string, PaddingInfo> *paddedStructs = nullptr;
/*
* The RocksDB output needs versioning so they are imported correctly in
* Scuba. Version 1 had no concept of versioning and no header.
* We currently are at version 2.1:
* - Introduce the Error ID at index 1023, but don't output it
* Changelog v2:
* - Introduce the DBHeader at index 0
* - Introduce the versioning
* - Handle multiple root_ids, to import multiple objects in Scuba
*/
static constexpr Version VERSION = 2;
static constexpr NodeID ROOT_NODE_ID = 0;
static constexpr NodeID ERROR_NODE_ID = 1023;
static constexpr NodeID FIRST_NODE_ID = 1024;
/*
* The first 1024 IDs are reserved for future use.
* ID 0: DBHeader
* ID 1023: Error - an error occured while TreeBuilding
*/
NodeID nextNodeID = FIRST_NODE_ID;
const Config config{};
size_t oidDataIndex;
std::vector<NodeID> rootIDs{};
/*
* Used exclusively by `TreeBuilder::serialize()` to avoid having
* to allocate a new buffer every time we serialize a `Node`.
*/
std::unique_ptr<msgpack::sbuffer> buffer;
rocksdb::DB *db = nullptr;
std::unordered_set<uintptr_t> pointers{};
uint64_t getDrgnTypeSize(struct drgn_type *type);
uint64_t next();
bool isContainer(const Variable &variable);
bool isPrimitive(struct drgn_type *type);
bool shouldProcess(uintptr_t pointer);
Node process(NodeID id, Variable variable);
void processContainer(const Variable &variable, Node &node);
template <class T>
std::string_view serialize(const T &);
void JSON(NodeID id, std::ofstream &output);
};

26
src/X86InstDefs.h Normal file
View File

@ -0,0 +1,26 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
static constexpr uint8_t int3Inst = 0xCC;
static constexpr uint8_t nopInst = 0x90;
static constexpr uint8_t movabsrdi0Inst = 0x48; /* movabs %rdi */
static constexpr uint8_t movabsrdi1Inst = 0xbf;
static constexpr uint8_t movabsrax0Inst = 0x48; /* movabs %rax */
static constexpr uint8_t movabsrax1Inst = 0xb8;
static constexpr uint8_t callRaxInst0Inst = 0xff;
static constexpr uint8_t callRaxInst1Inst = 0xd0;
static constexpr long syscallInsts = 0x9090909090050fcc;

65
test/CMakeLists.txt Normal file
View File

@ -0,0 +1,65 @@
find_package(GTest REQUIRED)
if (TARGET GTest::gmock_main)
# Only modern version of GTest defines the gmock_main target
set(GMOCK_MAIN_LIBS GTest::gmock_main)
else()
# To support older version of the lib,
# We manually locate the lib and its dependencies instead
find_library(GMOCK_MAIN_LIB NAMES libgmock_main.a REQUIRED)
find_library(GMOCK_LIB NAMES libgmock.a REQUIRED)
set(GMOCK_MAIN_LIBS ${GMOCK_MAIN_LIB} ${GMOCK_LIB} GTest::GTest)
endif()
message(STATUS "Using GMockMain: ${GMOCK_MAIN_LIBS}")
enable_testing()
include(GoogleTest)
function(cpp_unittest)
cmake_parse_arguments(
PARSE_ARGV 0
TEST
"" "NAME" "SRCS;DEPS;PROPERTIES"
)
add_executable(
${TEST_NAME}
${TEST_SRCS}
)
target_link_libraries(
${TEST_NAME}
${GMOCK_MAIN_LIBS} glog
${TEST_DEPS}
)
target_precompile_headers(${TEST_NAME} REUSE_FROM oicore)
gtest_discover_tests(${TEST_NAME} PROPERTIES ${TEST_PROPERTIES})
endfunction()
# Integration tests
# These tests can't be run in parallel with the other tests.
# There is some sort of conflict triggering the following error:
# dwfl_linux_proc_report: bzip2 decompression failed
# The integration tests are now triggered by the main Makefile.
#add_test(
# NAME integration
# COMMAND make test-integration
# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
#)
# Unit tests
cpp_unittest(
NAME test_parser
SRCS test_parser.cpp
DEPS oid_parser
)
cpp_unittest(
NAME test_compiler
SRCS test_compiler.cpp
DEPS oicore
)
include_directories("${PROJECT_SOURCE_DIR}/extern/drgn/build")
add_subdirectory("integration")

14
test/Makefile Normal file
View File

@ -0,0 +1,14 @@
CXX=clang++
CXXFLAGS=-g -fdebug-types-section -I../extern/folly -O2 -pthread -no-pie
FILTER ?=
TARGETS=integration_mttest integration_sleepy integration_packed integration_cycles mapiter mttest1 mttest2 mttest2_inline mttest3 mttest4 tester userDef1 vector inlined_test
mttest2_inline: CXXFLAGS+=-DMTTEST2_INLINE_DO_STUFF
all: $(TARGETS)
clean:
rm -f $(TARGETS)
test-integration: integration_mttest integration_sleepy
sudo python3 integration.py -k '$(FILTER)'

31
test/TARGETS Normal file
View File

@ -0,0 +1,31 @@
load("@fbcode_macros//build_defs:cpp_binary.bzl", "cpp_binary")
cpp_binary(
name = "mapiter",
srcs = ["mapiter.cpp"],
deps = [
],
)
cpp_binary(
name = "vector",
srcs = ["vector.cpp"],
deps = [
],
)
cpp_binary(
name = "userDef1",
srcs = ["userDef1.cpp"],
deps = [
],
)
cpp_binary(
name = "mttest1",
srcs = ["mttest1.cpp"],
deps = [
"//common/init:init",
],
external_deps = [("glibc", None, "pthread")],
)

58
test/ci.oid.toml Normal file
View File

@ -0,0 +1,58 @@
[types]
containers = [
"/home/circleci/project/types/array_type.toml",
"/home/circleci/project/types/string_type.toml",
"/home/circleci/project/types/cxx11_string_type.toml",
"/home/circleci/project/types/folly_iobuf_type.toml",
"/home/circleci/project/types/folly_iobuf_queue_type.toml",
"/home/circleci/project/types/set_type.toml",
"/home/circleci/project/types/unordered_set_type.toml",
"/home/circleci/project/types/seq_type.toml",
"/home/circleci/project/types/list_type.toml",
"/home/circleci/project/types/cxx11_list_type.toml",
"/home/circleci/project/types/deque_list_type.toml",
"/home/circleci/project/types/shrd_ptr_type.toml",
"/home/circleci/project/types/uniq_ptr_type.toml",
"/home/circleci/project/types/std_map_type.toml",
"/home/circleci/project/types/std_unordered_map_type.toml",
"/home/circleci/project/types/pair_type.toml",
"/home/circleci/project/types/stack_container_adapter_type.toml",
"/home/circleci/project/types/queue_container_adapter_type.toml",
"/home/circleci/project/types/priority_queue_container_adapter_type.toml",
"/home/circleci/project/types/ref_wrapper_type.toml",
"/home/circleci/project/types/multi_map_type.toml",
"/home/circleci/project/types/folly_small_heap_vector_map.toml",
"/home/circleci/project/types/folly_optional_type.toml",
"/home/circleci/project/types/optional_type.toml",
"/home/circleci/project/types/try_type.toml",
"/home/circleci/project/types/fb_string_type.toml",
"/home/circleci/project/types/small_vec_type.toml",
"/home/circleci/project/types/f14_fast_map.toml",
"/home/circleci/project/types/f14_node_map.toml",
"/home/circleci/project/types/f14_vector_map.toml",
"/home/circleci/project/types/f14_fast_set.toml",
"/home/circleci/project/types/f14_node_set.toml",
"/home/circleci/project/types/f14_vector_set.toml",
"/home/circleci/project/types/sorted_vec_set_type.toml",
"/home/circleci/project/types/map_seq_type.toml",
"/home/circleci/project/types/boost_bimap_type.toml",
"/home/circleci/project/types/repeated_field_type.toml",
"/home/circleci/project/types/repeated_ptr_field_type.toml",
"/home/circleci/project/types/caffe2_blob_type.toml",
"/home/circleci/project/types/std_variant.toml",
"/home/circleci/project/types/thrift_isset_type.toml",
]
[headers]
user_paths = [
"/home/circleci/project/extern/folly",
]
system_paths = [
"/usr/include/c++/11",
"/usr/include/x86_64-linux-gnu/c++/11",
"/usr/include/c++/11/backward",
"/usr/local/include",
"/usr/lib/llvm-12/lib/clang/12.0.1/include",
"/usr/include/x86_64-linux-gnu",
"/usr/include",
]

18
test/convert_to_junit.sh Executable file
View File

@ -0,0 +1,18 @@
#!/bin/bash
set -e
set -u
ctest2junit_xsl=$(readlink -f `dirname ${BASH_SOURCE[0]}`)/ctest_to_junit.xsl
tests_dir=$1
if [ ! -d $tests_dir ];
then
echo "ERROR! $tests_dir is not directory!"
exit 1
fi
tag=$(head -n 1 $tests_dir/Testing/TAG)
xsltproc --output build/results/ctest/results.xml $ctest2junit_xsl $tests_dir/Testing/$tag/Test.xml
echo "Test report converted successfully"

129
test/ctest_to_junit.xsl Normal file
View File

@ -0,0 +1,129 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:param name="pass" select="'this'"/>
<xsl:output method="xml" indent="yes"/>
<xsl:template match="/Site">
<xsl:variable name="SuiteName"><xsl:value-of select="@Name"/></xsl:variable>
<xsl:variable name="StartDate"><xsl:value-of select="Testing/StartDateTime"/></xsl:variable>
<xsl:variable name="DoTime"><xsl:value-of select="Testing/EndTestTime - Testing/StartTestTime"/></xsl:variable>
<xsl:variable name="TotalTests"><xsl:value-of select="count(Testing/Test)"/></xsl:variable>
<xsl:variable name="Skipped"><xsl:value-of select="count(Testing/Test[@Status='notrun'])"/></xsl:variable>
<xsl:variable name="Disabled"><xsl:value-of select="count(Testing/Test/Results/Measurement[Value='Disabled'])"/></xsl:variable>
<xsl:variable name="Failed"><xsl:value-of select="count(Testing/Test[@Status='failed'])"/></xsl:variable>
<testsuite name="{$SuiteName}" time="{$DoTime}" timestamp="{$StartDate}" tests="{$TotalTests}" failures="{$Failed}" skipped="{$Skipped}" disabled="{$Disabled}">
<xsl:variable name="BuildName"><xsl:value-of select="@BuildName"/></xsl:variable>
<xsl:variable name="BuildStamp"><xsl:value-of select="@BuildStamp"/></xsl:variable>
<xsl:variable name="Name"><xsl:value-of select="@Name"/></xsl:variable>
<xsl:variable name="Generator"><xsl:value-of select="@Generator"/></xsl:variable>
<xsl:variable name="CompilerName"><xsl:value-of select="@CompilerName"/></xsl:variable>
<xsl:variable name="OSName"><xsl:value-of select="@OSName"/></xsl:variable>
<xsl:variable name="Hostname"><xsl:value-of select="@Hostname"/></xsl:variable>
<xsl:variable name="OSRelease"><xsl:value-of select="@OSRelease"/></xsl:variable>
<xsl:variable name="OSVersion"><xsl:value-of select="@OSVersion"/></xsl:variable>
<xsl:variable name="OSPlatform"><xsl:value-of select="@OSPlatform"/></xsl:variable>
<xsl:variable name="Is64Bits"><xsl:value-of select="@Is64Bits"/></xsl:variable>
<xsl:variable name="VendorString"><xsl:value-of select="@VendorString"/></xsl:variable>
<xsl:variable name="VendorID"><xsl:value-of select="@VendorID"/></xsl:variable>
<xsl:variable name="FamilyID"><xsl:value-of select="@FamilyID"/></xsl:variable>
<xsl:variable name="ModelID"><xsl:value-of select="@ModelID"/></xsl:variable>
<xsl:variable name="ProcessorCacheSize"><xsl:value-of select="@ProcessorCacheSize"/></xsl:variable>
<xsl:variable name="NumberOfLogicalCPU"><xsl:value-of select="@NumberOfLogicalCPU"/></xsl:variable>
<xsl:variable name="NumberOfPhysicalCPU"><xsl:value-of select="@NumberOfPhysicalCPU"/></xsl:variable>
<xsl:variable name="TotalVirtualMemory"><xsl:value-of select="@TotalVirtualMemory"/></xsl:variable>
<xsl:variable name="TotalPhysicalMemory"><xsl:value-of select="@TotalPhysicalMemory"/></xsl:variable>
<xsl:variable name="LogicalProcessorsPerPhysical"><xsl:value-of select="@LogicalProcessorsPerPhysical"/></xsl:variable>
<xsl:variable name="ProcessorClockFrequency"><xsl:value-of select="@ProcessorClockFrequency"/></xsl:variable>
<properties>
<property name="BuildName" value="{$BuildName} {$DoTime}" />
<property name="BuildStamp" value="{$BuildStamp}" />
<property name="Name" value="{$Name}" />
<property name="Generator" value="{$Generator}" />
<property name="CompilerName" value="{$CompilerName}" />
<property name="OSName" value="{$OSName}" />
<property name="Hostname" value="{$Hostname}" />
<property name="OSRelease" value="{$OSRelease}" />
<property name="OSVersion" value="{$OSVersion}" />
<property name="OSPlatform" value="{$OSPlatform}" />
<property name="Is64Bits" value="{$Is64Bits}" />
<property name="VendorString" value="{$VendorString}" />
<property name="VendorID" value="{$VendorID}" />
<property name="FamilyID" value="{$FamilyID}" />
<property name="ModelID" value="{$ModelID}" />
<property name="ProcessorCacheSize" value="{$ProcessorCacheSize}" />
<property name="NumberOfLogicalCPU" value="{$NumberOfLogicalCPU}" />
<property name="NumberOfPhysicalCPU" value="{$NumberOfPhysicalCPU}" />
<property name="TotalVirtualMemory" value="{$TotalVirtualMemory}" />
<property name="TotalPhysicalMemory" value="{$TotalPhysicalMemory}" />
<property name="LogicalProcessorsPerPhysical" value="{$LogicalProcessorsPerPhysical}" />
<property name="ProcessorClockFrequency" value="{$ProcessorClockFrequency}" />
</properties>
<xsl:apply-templates select="Testing/Test"/>
<system-out>
BuildName: <xsl:value-of select="$BuildName" />
BuildStamp: <xsl:value-of select="$BuildStamp" />
Name: <xsl:value-of select="$Name" />
Generator: <xsl:value-of select="$Generator" />
CompilerName: <xsl:value-of select="$CompilerName" />
OSName: <xsl:value-of select="$OSName" />
Hostname: <xsl:value-of select="$Hostname" />
OSRelease: <xsl:value-of select="$OSRelease" />
OSVersion: <xsl:value-of select="$OSVersion" />
OSPlatform: <xsl:value-of select="$OSPlatform" />
Is64Bits: <xsl:value-of select="$Is64Bits" />
VendorString: <xsl:value-of select="$VendorString" />
VendorID: <xsl:value-of select="$VendorID" />
FamilyID: <xsl:value-of select="$FamilyID" />
ModelID: <xsl:value-of select="$ModelID" />
ProcessorCacheSize: <xsl:value-of select="$ProcessorCacheSize" />
NumberOfLogicalCPU: <xsl:value-of select="$NumberOfLogicalCPU" />
NumberOfPhysicalCPU: <xsl:value-of select="$NumberOfPhysicalCPU" />
TotalVirtualMemory: <xsl:value-of select="$TotalVirtualMemory" />
TotalPhysicalMemory: <xsl:value-of select="$TotalPhysicalMemory" />
LogicalProcessorsPerPhysical: <xsl:value-of select="$LogicalProcessorsPerPhysical" />
ProcessorClockFrequency: <xsl:value-of select="$ProcessorClockFrequency" />
</system-out>
</testsuite>
</xsl:template>
<xsl:template match="Testing/Test">
<xsl:variable name="testcasename"><xsl:value-of select="Name"/></xsl:variable>
<xsl:variable name="testclassname"><xsl:value-of select="concat($pass, substring(Path,2))"/></xsl:variable>
<xsl:variable name="exectime">
<xsl:for-each select="Results/NamedMeasurement">
<xsl:if test="@name='Execution Time'">
<xsl:value-of select="Value"/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<testcase name="{$testcasename}" classname="{$testclassname}" time="{$exectime}">
<xsl:if test="@Status = 'passed'">
<system-out><xsl:value-of select="Results/Measurement/Value/text()"/></system-out>
</xsl:if>
<xsl:if test="@Status = 'failed'">
<xsl:variable name="failtype">
<xsl:for-each select="Results/NamedMeasurement">
<xsl:if test="@name = 'Exit Code'">
<xsl:value-of select="Value"/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<xsl:variable name="failcode">
<xsl:for-each select="Results/NamedMeasurement">
<xsl:if test="@name = 'Exit Value'">
<xsl:value-of select="Value"/>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<failure message="{$failtype} ({$failcode})"><xsl:value-of select="Results/Measurement/Value/text()" /></failure>
</xsl:if>
<xsl:if test="@Status = 'notrun'">
<skipped><xsl:value-of select="Results/Measurement/Value/text()" /></skipped>
</xsl:if>
</testcase>
</xsl:template>
</xsl:stylesheet>

70
test/inlined_test.cpp Normal file
View File

@ -0,0 +1,70 @@
#include <unistd.h>
#include <cstdlib>
#include <vector>
#define INLINE static inline __attribute__((always_inline))
template <class T>
INLINE std::vector<T> combine(const std::vector<T>& x,
const std::vector<T>& y) {
std::vector<T> combined;
combined.reserve(x.size() + y.size());
for (auto& elem : x)
combined.push_back(elem);
for (auto& elem : y)
combined.push_back(elem);
return combined;
}
template <class T>
INLINE std::vector<T> flatten(const std::vector<std::vector<T>>& vec) {
std::vector<T> flattened;
for (auto& elem : vec)
flattened = combine(elem, flattened);
return flattened;
}
template <class T>
INLINE std::vector<T> flatten_combine(const std::vector<std::vector<T>>& x,
const std::vector<std::vector<T>>& y) {
auto x_flat = flatten(x);
auto y_flat = flatten(y);
return combine(x_flat, y_flat);
}
#define MAX_SIZE 256
void fill(std::vector<int>& vec, int n) {
n %= MAX_SIZE;
vec.clear();
vec.reserve(n);
for (int i = 0; i < n; i++)
vec.push_back(rand());
}
void fill_vec(std::vector<std::vector<int>>& vec, int n) {
n %= MAX_SIZE;
vec.clear();
vec.reserve(n);
for (int i = 0; i < n; i++) {
vec.emplace_back();
auto& last = vec.back();
fill(last, rand());
}
}
int main() {
size_t exit_code = 0;
for (int i = 0; i < 100; i++) {
std::vector<std::vector<int>> x;
std::vector<std::vector<int>> y;
fill_vec(x, rand());
fill_vec(y, rand());
auto result = flatten_combine(x, y);
for (auto value : result)
exit_code += rand() % (value + 1);
sleep(1);
}
return (int)exit_code;
}

View File

@ -0,0 +1 @@
entry:_ZL7flattenIiESt6vectorIT_SaIS1_EERKS0_IS3_SaIS3_EE:arg0

View File

@ -0,0 +1 @@
entry:_ZL15flatten_combineIiESt6vectorIT_SaIS1_EERKS0_IS3_SaIS3_EES7_:arg0

300
test/integration.py Normal file
View File

@ -0,0 +1,300 @@
import glob
import json
import os.path
import shutil
import subprocess
import tempfile
import unittest
from contextlib import contextmanager
from enum import Enum
ExitStatus = Enum(
"ExitStatus",
(
"SUCCESS USAGE_ERROR FILE_NOT_FOUND_ERROR "
"CONFIG_GENERATION_ERROR SCRIPT_PARSING_ERROR "
"STOP_TARGET_ERROR SEGMENT_REMOVAL_ERROR "
"SEGMENT_INIT_ERROR COMPILATION_ERROR PATCHING_ERROR "
"PROCESSING_TARGET_DATA_ERROR OID_OBJECT_ERROR"
),
start=0,
)
OUTPUT_PATH = "oid_out.json"
def copy_file(from_path, to_path):
"""
Copies a file from `from_path` to `to_path`, preserving its permissions.
"""
shutil.copy2(from_path, to_path)
shutil.copymode(from_path, to_path)
class OIDebuggerTestCase(unittest.TestCase):
def setUp(self):
# Store PWD before moving out of it
self.original_working_directory = os.getcwd()
# Store OI's source directory before moving out of it
self.oid_source_dir = os.path.dirname(
os.path.dirname(os.path.abspath(__file__))
)
# This needs to be a class variable, otherwise it won't be referenced
# by any object alive by the end of this class method's execution and
# and the directory will be automatically removed before executing the
# tests themselves.
self.temp = tempfile.TemporaryDirectory(
dir=os.path.join(self.oid_source_dir, "build/")
)
os.chdir(self.temp.name)
self.oid = os.path.join(self.oid_source_dir, "build/oid")
self.oid_conf = os.path.join(self.oid_source_dir, "build/testing.oid.toml")
self.binary_path = os.path.join(
self.oid_source_dir, "test", "integration_mttest"
)
self.sleepy_binary_path = os.path.join(
self.oid_source_dir, "test", "integration_sleepy"
)
self.custom_generated_code_file = os.path.join(
self.temp.name, "custom_oid_output.cpp"
)
self.custom_generated_code_file = f"{self.temp.name}/custom_oid_output.cpp"
self.script_packed = f"{self.temp.name}/integration_packed_arg0.oid"
self.default_script = "integration_entry_doStuff_arg0.oid"
def tearDown(self):
os.chdir(self.original_working_directory)
self.temp.cleanup()
@contextmanager
def spawn_oid(self, script_path, test_cmd=None, oid_opt=""):
"""
Spawns a test binary and oid with the specified oid binary path, script
and test command.
Will take care of cleaning up the process properly.
"""
if test_cmd is None:
test_cmd = self.binary_path + " 100"
debuggee_proc = subprocess.Popen(
test_cmd,
shell=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
debuggee_pid = debuggee_proc.pid
cmd = f"OID_METRICS_TRACE=time {self.oid} --dump-json --config-file {self.oid_conf} --script {script_path} -t60 --pid {debuggee_pid} {oid_opt}"
proc = subprocess.run(
cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
try:
yield proc
finally:
debuggee_proc.terminate()
outs, _errs = debuggee_proc.communicate()
def script(self, script_name=None):
return os.path.join(
self.oid_source_dir, "test", script_name or self.default_script
)
def expectReturncode(self, proc, returncode):
if proc.returncode != returncode.value:
print()
print(proc.stdout.decode("utf-8"))
print(proc.stderr.decode("utf-8"))
self.assertEqual(proc.returncode, returncode.value)
def test_help_works(self):
proc = subprocess.run(f"{self.oid} --help", shell=True, stdout=subprocess.PIPE)
self.expectReturncode(proc, ExitStatus.SUCCESS)
self.assertIn(b"usage: ", proc.stdout)
def test_attach_more_than_once_works(self):
with subprocess.Popen(
f"{self.binary_path} 100",
shell=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
) as debuggee_proc:
debuggee_pid = debuggee_proc.pid
for i in range(2):
proc = subprocess.run(
f"{self.oid} --dump-json --config-file {self.oid_conf} --script {self.script()} -t60 --pid {debuggee_pid}",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
self.expectReturncode(proc, ExitStatus.SUCCESS)
debuggee_proc.terminate()
with open(OUTPUT_PATH, "r") as f:
output = json.loads(f.read())
self.assertEqual(output[0]["typeName"], "Foo")
self.assertEqual(output[0]["staticSize"], 2176)
self.assertEqual(output[0]["dynamicSize"], 76)
self.assertEqual(len(output[0]["members"]), 25)
def test_data_segment_size_change(self):
with subprocess.Popen(
f"{self.binary_path} 1000",
shell=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
) as debuggee_proc:
debuggee_pid = debuggee_proc.pid
for data_segment_size in ("1M", "2M", "1M"):
proc = subprocess.run(
f"{self.oid} --dump-json --config-file {self.oid_conf} --script {self.script()} -t60 --pid {debuggee_pid} -x {data_segment_size}",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
self.expectReturncode(proc, ExitStatus.SUCCESS)
debuggee_proc.terminate()
with open(OUTPUT_PATH, "r") as f:
output = json.loads(f.read())
self.assertEqual(output[0]["typeName"], "Foo")
self.assertEqual(output[0]["staticSize"], 2176)
self.assertEqual(output[0]["dynamicSize"], 76)
self.assertEqual(len(output[0]["members"]), 25)
def test_custom_generated_file(self):
with subprocess.Popen(
f"{self.binary_path} 100",
shell=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
) as debuggee_proc:
debuggee_pid = debuggee_proc.pid
proc = subprocess.run(
f"{self.oid} --script {self.script()} --config-file {self.oid_conf} -t60 --pid {debuggee_pid}",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
self.expectReturncode(proc, ExitStatus.SUCCESS)
copy_file(
"/tmp/tmp_oid_output_2.cpp",
self.custom_generated_code_file,
)
proc = subprocess.run(
f"{self.oid} --script {self.script()} --config-file {self.oid_conf} --pid {debuggee_pid} -t60 --generated-code-file {self.custom_generated_code_file}",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
self.expectReturncode(proc, ExitStatus.SUCCESS)
debuggee_proc.terminate()
# with open("oid_out.json", "r") as f:
# output = json.loads(f.read())
# self.assertEqual(output["typeName"], "Foo")
# self.assertEqual(output["staticSize"] + output["dynamicSize"], 2062)
# self.assertEqual(len(output["members"]), 9)
# debuggee_proc.terminate()
# outs, _errs = debuggee_proc.communicate()
def test_symbol_not_found_in_binary_fails(self):
with self.spawn_oid(self.script(), test_cmd="/bin/sleep 100") as proc:
self.expectReturncode(proc, ExitStatus.COMPILATION_ERROR)
self.assertIn(
b"Failed to create FuncDesc for",
proc.stderr,
)
def test_non_existant_script_fails(self):
with self.spawn_oid("not_there.oid") as proc:
self.expectReturncode(proc, ExitStatus.FILE_NOT_FOUND_ERROR)
def test_no_args_shows_help(self):
proc = subprocess.run(
self.oid,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
self.expectReturncode(proc, ExitStatus.USAGE_ERROR)
self.assertIn(b"usage: ", proc.stdout)
def test_metrics_data_is_generated(self):
with self.spawn_oid(self.script()) as proc:
self.expectReturncode(proc, ExitStatus.SUCCESS)
# Just checking that the file exists and it's valid JSON
with open("oid_metrics.json", "r") as f:
json.loads(f.read())
# Ensure removeTrap properly repatch small functions
def test_probe_return_arg0_small_fun(self):
# Check function incN is smaller than the POKETEXT window (8 bytes)
readIncNSize = subprocess.run(
f"readelf -s {self.binary_path} | grep _ZN3Foo4incNEi | awk '{{print $3}}'",
capture_output=True,
shell=True,
check=True,
)
incNSize = int(readIncNSize.stdout.decode("ascii"))
self.assertLessEqual(incNSize, 8)
with self.spawn_oid(self.script("integration_return_incN_arg0.oid")) as proc:
self.expectReturncode(proc, ExitStatus.SUCCESS)
with open(OUTPUT_PATH, "r") as f:
output = json.loads(f.read())
self.assertEqual(output[0]["typeName"], "int")
self.assertEqual(output[0]["staticSize"] + output[0]["dynamicSize"], 4)
self.assertNotIn("members", output[0])
def test_error_function_no_this(self):
with self.spawn_oid(self.script("integration_entry_doStuff_this.oid")) as proc:
self.expectReturncode(proc, ExitStatus.COMPILATION_ERROR)
self.assertIn(
b"has no 'this' parameter",
proc.stderr,
)
def test_error_method_no_arg0(self):
with self.spawn_oid(self.script("integration_entry_inc_arg0.oid")) as proc:
self.expectReturncode(proc, ExitStatus.COMPILATION_ERROR)
self.assertIn(
b"Argument index 0 too large. Args count: 0",
proc.stderr,
)
def test_error_timeout(self):
with self.spawn_oid(
self.script(),
test_cmd=f"{self.sleepy_binary_path} 100",
oid_opt="-d2 --timeout=1",
) as proc:
self.expectReturncode(proc, ExitStatus.SUCCESS)
self.assertIn(b"Received SIGNAL 14", proc.stderr)
self.assertIn(b"processTrap: Error in waitpid", proc.stderr)
self.assertIn(b"Interrupted system call", proc.stderr)
if __name__ == "__main__":
print("[debug] Running OI's integration tests")
unittest.main(verbosity=2)

View File

@ -0,0 +1,130 @@
# Add new test definition files to this list:
set(INTEGRATION_TEST_CONFIGS
anonymous.toml
container_enums.toml
cycles.toml
multi_arg.toml
namespaces.toml
packed.toml
padding.toml
pointers.toml
pointers_function.toml
pointers_incomplete.toml
primitives.toml
references.toml
simple_multiple_multilevel_inheritance.toml
simple_struct.toml
std_array.toml
std_deque_del_allocator.toml
std_list_del_allocator.toml
std_deque.toml
std_map_custom_comparator.toml
std_optional.toml
std_pair.toml
std_queue.toml
std_reference_wrapper.toml
std_priority_queue.toml
std_set_custom_comparator.toml
std_smart_ptr.toml
std_stack.toml
std_string.toml
std_unordered_map.toml
std_unordered_map_custom_operator.toml
std_unordered_set_custom_operator.toml
std_variant.toml
std_vector.toml
std_vector_del_allocator.toml
typedefed_parent.toml
)
find_package(Thrift)
if (${THRIFT_FOUND})
# Add test definition files requiring the Thrift compiler to this list:
set(THRIFT_TEST_CONFIGS
thrift_isset.toml
thrift_isset_missing.toml
thrift_namespaces.toml
)
list(APPEND INTEGRATION_TEST_CONFIGS ${THRIFT_TEST_CONFIGS})
endif()
list(TRANSFORM INTEGRATION_TEST_CONFIGS PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/")
# disable position independent executables that oid can't yet handle
# todo: update to more modern cmake syntax
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -no-pie")
set(INTEGRATION_TEST_TARGET_SRC integration_test_target.cpp)
set(INTEGRATION_TEST_RUNNER_SRC integration_test_runner.cpp)
find_program(PYTHON_CMD NAMES python3.6 python3)
set(THRIFT_TESTS ${THRIFT_TEST_CONFIGS})
list(TRANSFORM THRIFT_TESTS REPLACE ".toml$" "")
set(INTEGRATION_TEST_THRIFT_SRCS ${THRIFT_TESTS})
list(TRANSFORM INTEGRATION_TEST_THRIFT_SRCS APPEND ".thrift")
add_custom_command(
OUTPUT
${INTEGRATION_TEST_TARGET_SRC}
${INTEGRATION_TEST_RUNNER_SRC}
${INTEGRATION_TEST_THRIFT_SRCS}
COMMAND ${PYTHON_CMD}
${CMAKE_CURRENT_SOURCE_DIR}/gen_tests.py
${INTEGRATION_TEST_TARGET_SRC}
${INTEGRATION_TEST_RUNNER_SRC}
${INTEGRATION_TEST_CONFIGS}
MAIN_DEPENDENCY gen_tests.py
DEPENDS ${INTEGRATION_TEST_CONFIGS})
add_executable(integration_test_target ${INTEGRATION_TEST_TARGET_SRC})
target_compile_options(integration_test_target PRIVATE -O1)
target_link_libraries(integration_test_target PRIVATE oil Boost::headers ${Boost_LIBRARIES})
add_executable(integration_test_runner ${INTEGRATION_TEST_RUNNER_SRC} runner_common.cpp)
target_include_directories(integration_test_runner PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
# GMOCK_MAIN_LIBS is set in test/CMakeLists.txt
target_link_libraries(integration_test_runner PRIVATE
${GMOCK_MAIN_LIBS}
Boost::headers
${Boost_LIBRARIES})
target_compile_definitions(integration_test_runner PRIVATE
TARGET_EXE_PATH="${CMAKE_CURRENT_BINARY_DIR}/integration_test_target"
OID_EXE_PATH="$<TARGET_FILE:oid>"
CONFIG_FILE_PATH="${CMAKE_BINARY_DIR}/testing.oid.toml")
if (${THRIFT_FOUND})
foreach(THRIFT_TEST IN LISTS THRIFT_TESTS)
set(THRIFT_SRC "${THRIFT_TEST}.thrift")
set(THRIFT_TYPES_H "thrift/annotation/gen-cpp2/${THRIFT_TEST}_types.h")
set(THRIFT_DATA_CPP "thrift/annotation/gen-cpp2/${THRIFT_TEST}_data.cpp")
add_custom_command(
OUTPUT
${THRIFT_TYPES_H}
${THRIFT_DATA_CPP}
COMMAND
${THRIFT_COMPILER}
-r
--gen mstch_cpp2
-o thrift/annotation/
-I ${THRIFT_INCLUDE_DIRS}
${THRIFT_SRC}
MAIN_DEPENDENCY ${THRIFT_SRC})
add_custom_target(integration_test_thrift_sources_${THRIFT_TEST} DEPENDS ${THRIFT_TYPES_H})
add_dependencies(integration_test_target integration_test_thrift_sources_${THRIFT_TEST})
target_sources(integration_test_target PRIVATE ${THRIFT_DATA_CPP})
endforeach()
target_include_directories(integration_test_target PRIVATE
${THRIFT_INCLUDE_DIRS}
${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(integration_test_target PRIVATE glog::glog)
endif()
if (DEFINED ENV{CI})
gtest_discover_tests(integration_test_runner EXTRA_ARGS "--verbose" "--preserve-on-failure")
else()
gtest_discover_tests(integration_test_runner)
endif()

273
test/integration/README.md Normal file
View File

@ -0,0 +1,273 @@
# Integration Tests
This directory contains test definition files for OI's integration tests.
## Running tests
There are a number of ways to run these integration tests.
1. Run the `integration_test_runner` executable directly. This provides some
additional options to aid debugging:
- `--verbose` Verbose output
- `--preserve` Do not clean up files generated by OID after tests are finished
- `--force` Run tests that have been marked as "skipped"
1. Run a number of the integration tests in parallel:
```ctest --test-dir build/test/integration -j$(nproc) [--tests-regex <regex>]```
1. Run along with all tests with either of:
```
make test-devel
make test-static
```
## Adding tests
1. Create a new test definition file in this directory and populate it as needed. See [Test Definition Format](#test-definition-format) for details.
1. Add your new definition file to the `INTEGRATION_TEST_CONFIGS` list in [`CMakeLists.txt`](CMakeLists.txt)
## Test Definition Format
Test definitions are stored in the [TOML](https://toml.io/) file format.
Example:
```toml
includes = ["vector", "unordered_map"]
definitions = '''
struct Foo {
std::vector<int> v;
};
using Bar = std::unordered_map<int, int>;
'''
[cases]
[cases.my_first_test_case]
param_types = ["const Foo&", "const Bar&"]
setup = '''
Foo foo;
foo.v = {4,5,6};
Bar bar;
bar[2] = 3;
return {foo, bar};
'''
expect_json = '{"staticSize":4,"dynamicSize":32}'
[cases.another_test_case]
param_types = ["int"]
setup = 'return 123;'
```
### Details
- `includes`
Header files required for this test.
Example:
```
includes = ["vector", "unordered_map"]
```
- `definitions`
C++ type definitions required for a test can be defined here.
Anything defined in this section will be automatically wrapped in a namespace
and will be private to this test.
Example:
```
definitions = '''
struct Foo {
std::vector<int> v;
};
using Bar = std::unordered_map<int, int>;
'''
```
- `thrift_definitions`
Thrift type definitions can be specified here. These will be passed to the
Thrift compiler which will generate C++ code from them.
**CAUTION**: Generated Thrift types are not wrapped in a namespace, so type
names must be globally unique between all tests.
Example:
```
thrift_definitions = '''
struct MyThriftStruct {
1: optional i32 a;
2: optional i32 b;
}
'''
```
- `raw_definitions`
This section allows specifying of arbitrary C++ code which will be directly
copied into the target program's source without being wrapped in a namespace.
It should not be used for most tests. The purpose is to allow defining code
required for a specific test to compile, avoiding the need to add new
dependencies to the build system for one-off tests.
- `cases` **Required**
A list of individual test cases, each with their own setup, OI probe
definition and expected results, but sharing any type definitions created in
this test file.
Test cases should be grouped into related areas and put into shared test files.
- `param_types` **Required**
Paramter types of the function to probe.
oid does not have complete support for probing pass-by-value parameters, so
it is recommended to define all parameters as reference or pointer types.
Example:
```
param_types = ["const std::vector<int>&", "const Foo&"]
```
- `arg_types`
Types of the arguments being passed to the probed function. Defaults to
`param_types` with const, volatile and references removed.
It is only necessary to specify `arg_types` when they will differ from the
parameter types expected by the probed function. This can be useful for
testing inheritance.
Example:
```
param_types = ["BaseClass *"]
arg_types = ["DerivedClass"]
```
- `setup` **Required**
A snippit of C++ code which creates and returns values to be passed to the
function being probed as a part of this test case. The returned value should
be a tuple of `param_types`, although the curly brakcets/braces can be
omitted in most cases when there is only a single value in the tuple.
Example:
```
setup = '''
std::vector<int> ret = {1,2,3};
return {ret, Foo(1)};
'''
```
- `type`
OI probe type. Defaults to `entry`.
Example:
```
type = "return"
```
- `args`
Comma separated list of arguments to introspect. Defaults to `arg0`.
Example:
```
args = "arg0,arg1"
```
- `cli_options`
Additional command line arguments passed to oid.
Example:
```
cli_options = ["--chase-raw-pointers"]
```
- `oid_skip`, `oil_skip`
Skip running this test. Defaults to false.
Example:
```
oid_skip = true
```
- `expect_oid_exit_code`, `expect_oil_exit_code`
Exit code expected from OI. Defaults to 0.
Example:
```
expect_oid_exit_code = 6
```
- `expect_json`
JSON expected to match results from OI.
Only keys which appear in these expected results are used for comparison.
This means that irrelevant or non-reproducable keys can be omitted and they
will be ignored. Missing keys in the actual results will still cause test
failures.
Example:
```
expect_json = '{"staticSize":4,"dynamicSize":0}'
```
To ensure that a given key does not appear in the results, the special
**NOT** key can be used, with the value set to the undesired key's name.
This example checks that the JSON result does not contain the key "members":
```
expect_json = '{"NOT":"members"}'
```
The **NOT** key can also be used to check that a given key's value is not
equal to some expected value.
This example checks that the result has a key named `pointer`, but that its
value is not equal to 0:
```
expect_json = '{"NOT":{"pointer":0}}'
```
- `expect_stdout`
Regex expected to match OI's stdout.
Example:
```
expect_stdout = ".*SUCCESS.*"
```
- `expect_stderr`
Regex expected to match OI's stderr.
Example:
```
expect_stderr = ".*Successfully detached from pid.*"
```
- `expect_not_stdout`
Regex expected to not match OI's stdout.
Example:
```
expect_not_stdout = "ABC"
```
- `expect_not_stderr`
Regex expected to not match OI's stderr.
Example:
```
expect_not_stderr = ".*ERROR.*"
```

View File

@ -0,0 +1,312 @@
definitions = '''
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wnested-anon-types"
struct Node {
int a, b, c;
};
struct DynNode {
std::vector<Node> nodes;
};
typedef struct {
struct Node *node;
} AnonStruct;
struct AnonStructContainer {
struct { struct Node *node; } anon;
};
struct AnonStructPtrContainer {
struct { struct Node *node; } *anon;
};
struct AnonTypedefContainer {
AnonStruct anon;
};
struct AnonUnionContainer {
union {char c; short int d;};
union {int a; double b;};
int e;
};
struct NestedAnonContainer {
struct {
union {
char c;
struct {
int x;
double y;
} d;
};
struct {
int a, b, c;
union {
union { char x; int y; };
long z;
};
AnonStruct as;
} v;
} m;
union {
union { int x1; char y1; };
union { double z1; long w1; };
struct {
long a, b, c;
} s1;
};
union {
union { int x2; char y2; };
union { double z2; long w2; };
struct {
char a, b, c;
} s2;
};
};
/* This test is disable due to GCC not supporting it
struct AnonArrayContainer {
struct {
float *x;
DynNode ns[4];
};
};
*/
#pragma clang diagnostic pop
'''
[cases]
[cases.regular_struct]
param_types = ["const Node&"]
setup = "return Node{1, 2, 3};"
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"staticSize":12,
"dynamicSize":0,
"members":[
{"name":"a", "staticSize":4, "dynamicSize":0},
{"name":"b", "staticSize":4, "dynamicSize":0},
{"name":"c", "staticSize":4, "dynamicSize":0}
]}]'''
[cases.anon_struct]
oil_skip = "oil can't chase raw pointers safely"
param_types = ["const AnonStructContainer&"]
setup = '''
return AnonStructContainer{
.anon = {
.node = new Node{1, 2, 3}
}
};
'''
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"staticSize": 8,
"dynamicSize": 12,
"members": [{
"name": "anon",
"typeName": "__anon_struct_0",
"staticSize": 8,
"dynamicSize": 12,
"members": [{
"name": "node",
"typeName": "struct Node *",
"staticSize": 8,
"dynamicSize": 12,
"members": [{
"typeName": "Node",
"staticSize": 12,
"dynamicSize": 0,
"members": [
{ "name": "a", "staticSize": 4, "dynamicSize": 0 },
{ "name": "b", "staticSize": 4, "dynamicSize": 0 },
{ "name": "c", "staticSize": 4, "dynamicSize": 0 }
]
}]
}]
}]
}]'''
[cases.anon_struct_ptr]
skip = "We don't support pointer to anon-structs yet"
param_types = ["const AnonStructPtrContainer&"]
setup = '''
return AnonStructPtrContainer{
.anon = (decltype(AnonStructPtrContainer::anon))new (Node*){
new Node{1, 2, 3}
}
};'''
cli_options = ["--chase-raw-pointers"]
[cases.anon_typedef]
oil_skip = "oil can't chase raw pointers safely"
param_types = ["const AnonTypedefContainer&"]
setup = '''
return AnonTypedefContainer{
.anon = {
.node = new Node{1, 2, 3}
}
};'''
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"staticSize": 8,
"dynamicSize": 12,
"members": [{
"name": "anon",
"typeName": "AnonStruct",
"isTypedef": true,
"staticSize": 8,
"dynamicSize": 12,
"members": [{
"name": "",
"typeName": "__anon_struct_0",
"isTypedef": false,
"staticSize": 8,
"dynamicSize": 12,
"members": [{
"name": "node",
"typeName": "struct Node *",
"staticSize": 8,
"dynamicSize": 12,
"members": [{
"typeName": "Node",
"staticSize": 12,
"members": [
{ "name": "a", "staticSize": 4, "dynamicSize": 0 },
{ "name": "b", "staticSize": 4, "dynamicSize": 0 },
{ "name": "c", "staticSize": 4, "dynamicSize": 0 }
]
}]
}]
}]
}]
}]'''
[cases.anon_union]
param_types = ["const AnonUnionContainer&"]
setup = 'return AnonUnionContainer{ .a = 3 };'
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"staticSize": 24,
"dynamicSize": 0,
"members": [
{"name":"__anon_member_0", "staticSize":2, "dynamicSize":0},
{"name":"__anon_member_1", "staticSize":8, "dynamicSize":0},
{"name":"e", "staticSize":4, "dynamicSize":0, "typeName":"int"}
]
}]'''
[cases.nested_anon_struct]
oil_skip = "oil can't chase raw pointers safely"
param_types = ["const NestedAnonContainer&"]
setup = 'return NestedAnonContainer{.m = { .v = {.as = {new Node{1, 2, 3}}}}};'
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"staticSize": 80,
"dynamicSize": 12,
"members": [{
"name": "m",
"staticSize": 48,
"dynamicSize": 12,
"members": [
{ "staticSize": 16, "dynamicSize": 0 },
{ "name": "v",
"staticSize": 32,
"dynamicSize": 12,
"paddingSavingsSize": 4,
"members": [
{ "name": "a", "staticSize": 4, "dynamicSize": 0 },
{ "name": "b", "staticSize": 4, "dynamicSize": 0 },
{ "name": "c", "staticSize": 4, "dynamicSize": 0 },
{ "staticSize": 8, "dynamicSize": 0 },
{ "name": "as",
"typeName": "AnonStruct",
"isTypedef": true,
"staticSize": 8,
"dynamicSize": 12,
"members": [{
"isTypedef": false,
"staticSize": 8,
"dynamicSize": 12,
"members": [{
"name": "node",
"staticSize": 8,
"dynamicSize": 12,
"members": [{
"staticSize": 12,
"dynamicSize": 0,
"members": [
{ "name": "a", "staticSize": 4, "dynamicSize": 0 },
{ "name": "b", "staticSize": 4, "dynamicSize": 0 },
{ "name": "c", "staticSize": 4, "dynamicSize": 0 }
]
}]
}]
}]
}]
}]
}, {
"staticSize": 24,
"dynamicSize": 0
}, {
"staticSize": 8,
"dynamicSize": 0
}]
}]'''
# This test is disabled due to GCC not supporting it
# [cases.anon_array]
# param_types = ["const AnonArrayContainer&"]
# setup = '''
# auto result = AnonArrayContainer{
# .x = new float{123.456},
# .ns = {
# DynNode{ .nodes = { Node{1, 2, 3}, Node{4, 5, 6}, Node{7, 8, 9} } },
# DynNode{},
# DynNode{ .nodes = std::vector<Node>(1, Node{0, 0, 0}) },
# DynNode{ .nodes = std::vector<Node>(42, Node{1, 1, 1}) },
# },
# };
# result.ns[3].nodes.resize(22);
# return result;
# '''
# cli_options = ["--chase-raw-pointers"]
# expect_json = '''[{
# "staticSize": 104,
# "dynamicSize": 556,
# "members": [{
# "staticSize": 104,
# "dynamicSize": 556,
# "members": [{
# "name": "x",
# "staticSize": 8,
# "dynamicSize": 4,
# "members": [{ "staticSize": 4, "dynamicSize": 0 }]
# }, {
# "name": "ns",
# "staticSize": 96,
# "dynamicSize": 552,
# "length": 4,
# "capacity": 4,
# "elementStaticSize": 24,
# "members": [{
# "dynamicSize": 36,
# "members": [{ "length": 3, "capacity": 3, "elementStaticSize": 12 }]
# }, {
# "dynamicSize": 0,
# "members": [{ "length": 0, "capacity": 0, "elementStaticSize": 12 }]
# }, {
# "dynamicSize": 12,
# "members": [{ "length": 1, "capacity": 1, "elementStaticSize": 12 }]
# }, {
# "dynamicSize": 504,
# "members": [{ "length": 22, "capacity": 42, "elementStaticSize": 12 }]
# }]
# }]
# }]
# }]'''

View File

@ -0,0 +1,32 @@
includes = ["vector"]
definitions = '''
namespace MyNS {
enum class ScopedEnum {
Zero = 0,
One = 1,
Two = 2,
};
enum UNSCOPED_ENUM {
ZERO = 0,
ONE = 1,
TWO = 2,
};
} // MyNS
'''
[cases]
[cases.scoped_enum_type]
param_types = ["const std::vector<MyNS::ScopedEnum>&"]
setup = "return {};"
[cases.scoped_enum_val]
param_types = ["const std::array<int, static_cast<size_t>(MyNS::ScopedEnum::Two)>&"]
setup = "return {};"
expect_json = '[{"staticSize":8, "dynamicSize":0, "length":2, "capacity":2, "elementStaticSize":4}]'
[cases.unscoped_enum_type]
param_types = ["const std::vector<MyNS::UNSCOPED_ENUM>&"]
setup = "return {};"
[cases.unscoped_enum_val]
param_types = ["const std::array<int, MyNS::ONE>&"]
setup = "return {};"
expect_json = '[{"staticSize":4, "dynamicSize":0, "length":1, "capacity":1, "elementStaticSize":4}]'

View File

@ -0,0 +1,134 @@
includes = ["memory"]
definitions = '''
struct RawNode {
uint64_t value;
struct RawNode* next;
};
struct UniqueNode {
uint64_t value;
std::unique_ptr<struct UniqueNode> next;
};
struct SharedNode {
uint64_t value;
std::shared_ptr<struct SharedNode> next;
};
'''
[cases]
[cases.raw_ptr]
oil_skip = "oil can't chase pointers safely"
param_types = ["RawNode*"]
setup = '''
RawNode *first = new RawNode{1, nullptr};
RawNode *second = new RawNode{2, nullptr};
RawNode *third = new RawNode{3, nullptr};
first->next = second;
second->next = third;
third->next = first;
return first;
'''
cli_options = ["--chase-raw-pointers"]
expect_json = '''
[{
"typeName": "RawNode",
"isTypedef": false,
"staticSize": 16,
"dynamicSize": 32,
"members": [
{
"name": "value",
"typePath": "value",
"typeName": "uint64_t",
"staticSize": 8,
"dynamicSize": 0
},
{
"name": "next",
"typePath": "next",
"typeName": "struct RawNode *",
"isTypedef": false,
"staticSize": 8,
"dynamicSize": 32,
"members": [
{
"name": "",
"typePath": "",
"typeName": "RawNode",
"isTypedef": false,
"staticSize": 16,
"dynamicSize": 16,
"members": [
{
"name": "value",
"typePath": "value",
"typeName": "uint64_t",
"staticSize": 8,
"dynamicSize": 0
},
{
"name": "next",
"typePath": "next",
"typeName": "struct RawNode *",
"isTypedef": false,
"staticSize": 8,
"dynamicSize": 16,
"members": [
{
"name": "",
"typePath": "",
"typeName": "RawNode",
"isTypedef": false,
"staticSize": 16,
"dynamicSize": 0,
"members": [
{
"name": "value",
"typePath": "value",
"typeName": "uint64_t",
"staticSize": 8,
"dynamicSize": 0
},
{
"name": "next",
"typePath": "next",
"typeName": "struct RawNode *",
"isTypedef": false,
"staticSize": 8,
"dynamicSize": 0
}
]
}
]
}
]
}
]
}
]
}]
'''
[cases.unique_ptr]
param_types = ["UniqueNode*"]
setup = '''
auto first = std::make_unique<UniqueNode>();
auto firstPtr = first.get();
first->next = std::make_unique<UniqueNode>();
first->next->next = std::make_unique<UniqueNode>();
first->next->next->next = std::move(first);
return firstPtr;
'''
# TODO check JSON
[cases.shared_ptr]
param_types = ["SharedNode*"]
setup = '''
auto first = std::make_shared<SharedNode>();
auto firstPtr = first.get();
first->next = std::make_shared<SharedNode>();
first->next->next = std::make_shared<SharedNode>();
first->next->next->next = first;
return firstPtr;
'''
# TODO check JSON

View File

@ -0,0 +1,452 @@
import json
import os
import pathlib
import sys
import toml
def is_thrift_test(config):
return "thrift_definitions" in config
def get_case_name(test_suite, test_case):
return f"{test_suite}_{test_case}"
def get_target_oid_func_name(test_suite, test_case):
case_name = get_case_name(test_suite, test_case)
return f"oid_test_case_{case_name}"
def get_target_oil_func_name(test_suite, test_case):
case_name = get_case_name(test_suite, test_case)
return f"oil_test_case_{case_name}"
def get_namespace(test_suite):
return f"ns_{test_suite}"
def add_headers(f, custom_headers, thrift_headers):
f.write(
"""
#include <boost/current_function.hpp>
#include <chrono>
#include <filesystem>
#include <iostream>
#include <thread>
#include <tuple>
#include <ObjectIntrospection.h>
"""
)
for header in custom_headers:
f.write(f"#include <{header}>\n")
for header in thrift_headers:
f.write(f'#include "{header}"\n')
def add_test_setup(f, config):
ns = get_namespace(config["suite"])
# fmt: off
f.write(
f"\n"
f'{config.get("raw_definitions", "")}\n'
f"namespace {ns} {{\n"
f'{config.get("definitions", "")}\n'
)
# fmt: on
def define_traceable_func(name, params, body):
return (
f"\n"
f' extern "C" {{\n'
f" void __attribute__((noinline)) {name}({params}) {{\n"
f"{body}"
f" }}\n"
f" }}\n"
)
cases = config["cases"]
for case_name, case in cases.items():
# generate getter for an object of this type
param_types = ", ".join(
f"std::remove_cvref_t<{param}>" for param in case["param_types"]
)
if "arg_types" in case:
arg_types = ", ".join(case["arg_types"])
else:
arg_types = param_types
f.write(
f"\n"
f" std::tuple<{arg_types}> get_{case_name}() {{\n"
f'{case["setup"]}\n'
f" }}\n"
)
# generate oid and oil targets
params_str = ", ".join(
f"{param} a{i}" for i, param in enumerate(case["param_types"])
)
oid_func_body = "".join(
f" std::cout << (uintptr_t)(&a{i}) << std::endl;\n"
for i in range(len(case["param_types"]))
)
oid_func_body += " std::cout << BOOST_CURRENT_FUNCTION << std::endl;\n"
f.write(
define_traceable_func(
get_target_oid_func_name(config["suite"], case_name),
params_str,
oid_func_body,
)
)
oil_func_body = (
f"\n"
f"ObjectIntrospection::options opts{{\n"
f' .configFilePath = std::getenv("CONFIG_FILE_PATH"),\n'
f" .debugLevel = 3,\n"
f' .sourceFileDumpPath = "oil_jit_code.cpp",\n'
f" .forceJIT = true,\n"
f"}};"
)
oil_func_body += ' std::cout << "{\\"results\\": [" << std::endl;\n'
oil_func_body += ' std::cout << "," << std::endl;\n'.join(
f" size_t size{i} = 0;\n"
f" auto ret{i} = ObjectIntrospection::getObjectSize(&a{i}, &size{i}, opts);\n"
f' std::cout << "{{\\"ret\\": " << ret{i} << ", \\"size\\": " << size{i} << "}}" << std::endl;\n'
for i in range(len(case["param_types"]))
)
oil_func_body += ' std::cout << "]}" << std::endl;\n'
f.write(
define_traceable_func(
get_target_oil_func_name(config["suite"], case_name),
params_str,
oil_func_body,
)
)
f.write(f"}} // namespace {ns}\n")
def add_common_code(f):
f.write(
"""
int main(int argc, char *argv[]) {
if (argc < 3 || argc > 4) {
std::cerr << "usage: " << argv[0] << " oid/oil CASE [ITER]" << std::endl;
return -1;
}
std::string mode = argv[1];
std::string test_case = argv[2];
int iterations = 1000;
if (argc == 4) {
std::istringstream iss(argv[3]);
iss >> iterations;
if (iss.fail())
iterations = 1000;
}
"""
)
def add_dispatch_code(f, config):
ns = get_namespace(config["suite"])
for case_name in config["cases"]:
case_str = get_case_name(config["suite"], case_name)
oil_func_name = get_target_oil_func_name(config["suite"], case_name)
oid_func_name = get_target_oid_func_name(config["suite"], case_name)
f.write(
f' if (test_case == "{case_str}") {{\n'
f" auto val = {ns}::get_{case_name}();\n"
f" for (int i=0; i<iterations; i++) {{\n"
f' if (mode == "oil") {{\n'
f" std::apply({ns}::{oil_func_name}, val);\n"
f" }} else {{\n"
f" std::apply({ns}::{oid_func_name}, val);\n"
f" }}\n"
f" std::this_thread::sleep_for(std::chrono::milliseconds(100));\n"
f" }}\n"
f" return 0;\n"
f" }}\n"
)
def add_footer(f):
f.write(
"""
std::cerr << "Unknown test case: " << argv[1] << " " << argv[2] << std::endl;
return -1;
}
"""
)
def gen_target(output_target_name, test_configs):
with open(output_target_name, "w") as f:
headers = set()
thrift_headers = []
for config in test_configs:
headers.update(config.get("includes", []))
if is_thrift_test(config):
thrift_headers += [
f"thrift/annotation/gen-cpp2/{config['suite']}_types.h"
]
add_headers(f, sorted(headers), thrift_headers)
for config in test_configs:
add_test_setup(f, config)
add_common_code(f)
for config in test_configs:
add_dispatch_code(f, config)
add_footer(f)
def get_probe_name(probe_type, test_suite, test_case, args):
func_name = get_target_oid_func_name(test_suite, test_case)
return probe_type + ":" + func_name + ":" + args
def add_tests(f, config):
for case_name, case in config["cases"].items():
add_oid_integration_test(f, config, case_name, case)
add_oil_integration_test(f, config, case_name, case)
def add_oid_integration_test(f, config, case_name, case):
probe_type = case.get("type", "entry")
args = case.get("args", "arg0")
probe_str = get_probe_name(probe_type, config["suite"], case_name, args)
case_str = get_case_name(config["suite"], case_name)
exit_code = case.get("expect_oid_exit_code", 0)
cli_options = (
"{" + ", ".join(f'"{option}"' for option in case.get("cli_options", ())) + "}"
)
config_extra = case.get("config", "")
f.write(
f"\n"
f'TEST_F(OidIntegration, {config["suite"]}_{case_name}) {{\n'
f"{generate_skip(case, 'oid')}"
f' std::string configOptions = R"--(\n'
f"{config_extra}\n"
f' )--";\n'
f" ba::io_context ctx;\n"
f" auto [target, oid] = runOidOnProcess(\n"
f" {{\n"
f" .ctx = ctx,\n"
f' .targetArgs = "oid {case_str}",\n'
f' .scriptSource = "{probe_str}",\n'
f" }},\n"
f" {cli_options},\n"
f" std::move(configOptions));\n"
f" ASSERT_EQ(exit_code(oid), {exit_code});\n"
f" EXPECT_EQ(target.proc.running(), true);\n"
)
if "expect_json" in case:
try:
json.loads(case["expect_json"])
except json.decoder.JSONDecodeError as error:
print(
f"\x1b[31m`expect_json` value for test case {config['suite']}.{case_name} was invalid JSON: {error}\x1b[0m",
file=sys.stderr,
)
sys.exit(1)
f.write(
f"\n"
f" std::stringstream expected_json_ss;\n"
f' expected_json_ss << R"--({case["expect_json"]})--";\n'
f" bpt::ptree expected_json, actual_json;\n"
f" bpt::read_json(expected_json_ss, expected_json);\n"
f' bpt::read_json("oid_out.json", actual_json);\n'
f" compare_json(expected_json, actual_json);\n"
)
if "expect_stdout" in case:
f.write(
f' std::string stdout_regex = R"--({case["expect_stdout"]})--";\n'
f" EXPECT_THAT(stdout_, MatchesRegex(stdout_regex));\n"
)
if "expect_stderr" in case:
f.write(
f' std::string stderr_regex = R"--({case["expect_stderr"]})--";\n'
f" EXPECT_THAT(stderr_, MatchesRegex(stderr_regex));\n"
)
if "expect_not_stdout" in case:
f.write(
f' std::string not_stdout_regex = R"--({case["expect_not_stdout"]})--";\n'
f" EXPECT_THAT(stdout_, Not(MatchesRegex(not_stdout_regex)));\n"
)
if "expect_not_stderr" in case:
f.write(
f' std::string not_stderr_regex = R"--({case["expect_not_stderr"]})--";\n'
f" EXPECT_THAT(stderr_, Not(MatchesRegex(not_stderr_regex)));\n"
)
f.write(f"}}\n")
def add_oil_integration_test(f, config, case_name, case):
case_str = get_case_name(config["suite"], case_name)
exit_code = case.get("expect_oil_exit_code", 0)
f.write(
f"\n"
f'TEST_F(OilIntegration, {config["suite"]}_{case_name}) {{\n'
f"{generate_skip(case, 'oil')}"
f" ba::io_context ctx;\n"
f" auto target = runOilTarget({{\n"
f" .ctx = ctx,\n"
f' .targetArgs = "oil {case_str} 1",\n'
f" }});\n\n"
f" ASSERT_EQ(exit_code(target), {exit_code});\n"
f"\n"
f" bpt::ptree result_json;\n"
f" auto json_ss = std::stringstream(stdout_);\n"
f" bpt::read_json(json_ss, result_json);\n"
f" std::vector<size_t> sizes;\n"
f' for (const auto& each : result_json.get_child("results")) {{\n'
f" const auto& result = each.second;\n"
f' int oilResult = result.get<int>("ret");\n'
f' size_t oilSize = result.get<size_t>("size");\n'
f" ASSERT_EQ(oilResult, 0);\n"
f" sizes.push_back(oilSize);\n"
f" }}"
)
if "expect_json" in case:
try:
json.loads(case["expect_json"])
except json.decoder.JSONDecodeError as error:
print(
f"\x1b[31m`expect_json` value for test case {config['suite']}.{case_name} was invalid JSON: {error}\x1b[0m",
file=sys.stderr,
)
sys.exit(1)
f.write(
f"\n"
f" std::stringstream expected_json_ss;\n"
f' expected_json_ss << R"--({case["expect_json"]})--";\n'
f" bpt::ptree expected_json;\n"
f" bpt::read_json(expected_json_ss, expected_json);\n"
f" auto sizes_it = sizes.begin();\n"
f" for (auto it = expected_json.begin(); it != expected_json.end(); ++it, ++sizes_it) {{\n"
f" auto node = it->second;\n"
f' size_t expected_size = node.get<size_t>("staticSize");\n'
f' expected_size += node.get<size_t>("dynamicSize");\n'
f" EXPECT_EQ(*sizes_it, expected_size);\n"
f" }}\n"
)
f.write(f"}}\n")
def generate_skip(case, specific):
possibly_skip = ""
skip_reason = case.get("skip", False)
specific_skip_reason = case.get(f"{specific}_skip", False)
if specific_skip_reason or skip_reason:
possibly_skip += " if (!run_skipped_tests) {\n"
possibly_skip += " GTEST_SKIP()"
if type(specific_skip_reason) == str:
possibly_skip += f' << "{specific_skip_reason}"'
elif type(skip_reason) == str:
possibly_skip += f' << "{skip_reason}"'
possibly_skip += ";\n"
possibly_skip += " }\n"
return possibly_skip
def gen_runner(output_runner_name, test_configs):
with open(output_runner_name, "w") as f:
f.write(
"#include <boost/property_tree/json_parser.hpp>\n"
"#include <boost/property_tree/ptree.hpp>\n"
"#include <fstream>\n"
"#include <gmock/gmock.h>\n"
"#include <gtest/gtest.h>\n"
"#include <string>\n"
"#include <sstream>\n"
'#include "runner_common.h"\n'
"\n"
"namespace ba = boost::asio;\n"
"namespace bpt = boost::property_tree;\n"
"\n"
"using ::testing::MatchesRegex;\n"
"\n"
"extern bool run_skipped_tests;\n"
)
for config in test_configs:
add_tests(f, config)
def gen_thrift(test_configs):
for config in test_configs:
if not is_thrift_test(config):
continue
output_thrift_name = f"{config['suite']}.thrift"
with open(output_thrift_name, "w") as f:
f.write(config["thrift_definitions"])
print(f"Thrift out: {output_thrift_name}")
def main():
if len(sys.argv) < 4:
print("Usage: gen_tests.py OUTPUT_TARGET OUTPUT_RUNNER INPUT1 [INPUT2 ...]")
exit(1)
output_target = sys.argv[1]
output_runner = sys.argv[2]
inputs = sys.argv[3:]
print(f"Output target: {output_target}")
print(f"Output runner: {output_runner}")
print(f"Input files: {inputs}")
test_configs = []
test_suites = set()
while len(inputs) > 0:
test_path = inputs.pop()
if test_path.endswith(".toml"):
test_suite = pathlib.Path(test_path).stem
if test_suite in test_suites:
raise Exception(f"Test suite {test_suite} is defined multiple times")
test_suites.add(test_suite)
config = toml.load(test_path)
config["suite"] = test_suite
test_configs += [config]
elif os.path.isdir(test_path):
for root, dirs, files in os.walk(test_path):
for name in files:
if name.endswith(".toml"):
path = os.path.join(root, name)
print("Found definition file at {path}")
inputs.append(path)
else:
raise Exception(
"Test definition inputs must have the '.toml' extension or be a directory"
)
gen_target(output_target, test_configs)
gen_runner(output_runner, test_configs)
gen_thrift(test_configs)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,38 @@
definitions = '''
struct Foo {
int a, b, c;
};
struct Bar {
std::string a, b, c;
};
'''
[cases]
[cases.a]
oil_skip = "OIL doesn't support the 'codegen.ignore' config yet"
param_types = ["const Bar&"]
setup = """
return Bar{
"The first member of the struct Bar",
"The second member of the struct Bar",
"The 3rd member of the struct Bar"
};
"""
config = """
[[codegen.ignore]]
type = "Foo"
members = ["a"]
[[codegen.ignore]]
type = "Bar"
members = ["b"]
"""
expect_json = '''[{
"staticSize":96,
"dynamicSize":66,
"members":[
{"name":"a", "staticSize":32, "dynamicSize":34},
{"name":"b", "staticSize":32, "dynamicSize":0},
{"name":"c", "staticSize":32, "dynamicSize":32}
]}]'''

View File

@ -0,0 +1,49 @@
definitions = '''
struct NodeA {
int x, y, z;
};
// Structure that mimic the inside of a std::string
// So we can create bogus std::string and test TreeBuilder failures
struct StringInner {
uintptr_t buffer, size, capacity, extra;
};
'''
[cases]
[cases.a]
param_types = ["int", "double"]
args = "arg0,arg1"
setup = "return {1,2.0};"
expect_json = '[{"staticSize":4, "dynamicSize":0},{"staticSize":8, "dynamicSize":0}]'
# TODO separate sizes for each argument?
# Test that TreeBuilder failing to run on the first arg doesn't impact the second arg
[cases.tb_fail_first_arg]
oil_skip = "oil doesn't handle invalid strings"
param_types = ["const std::string&", "const NodeA&"]
args = "arg0,arg1"
setup = """
// Create a string with an invalid size/capacity to trip TreeBuilder
StringInner strIn{0, (uintptr_t)-1, (uintptr_t)-1, 0};
std::string *str = (std::string*)&strIn;
return { std::move(*str), NodeA{4, 5, 6} };
"""
expect_json = '[{},{"staticSize":12, "dynamicSize":0}]'
[cases.tb_all_fail_crashes]
oil_skip = "oil doesn't handle invalid strings"
param_types = ["const std::string&", "const std::string&"]
args = "arg0,arg1"
setup = """
// Create a string with an invalid size/capacity to trip TreeBuilder
StringInner strIn1{0, (uintptr_t)-1, (uintptr_t)-1, 0};
std::string *str1 = (std::string*)&strIn1;
StringInner strIn2{0, (uintptr_t)-1, (uintptr_t)-1, 0};
std::string *str2 = (std::string*)&strIn2;
return { std::move(*str1), std::move(*str2) };
"""
expect_oid_exit_code = 6
expect_stderr = ".*Nothing to output: failed to run TreeBuilder on any argument.*"

View File

@ -0,0 +1,31 @@
# This test checks that we can correctly distinguish between types with the same
# name in different namespaces.
includes = ["queue", "stack"]
definitions = '''
namespace nsA {
struct Foo {
int x;
};
} // namespace nsA
namespace nsB {
struct Foo {
int y;
int z;
};
} // namespace nsB
'''
[cases]
[cases.queue]
param_types = ["const std::queue<std::pair<nsA::Foo, nsB::Foo>>&"]
setup = "return std::queue<std::pair<ns_namespaces::nsA::Foo, ns_namespaces::nsB::Foo>>({{ns_namespaces::nsA::Foo(), ns_namespaces::nsB::Foo()}});"
expect_json = '''[{
"typeName": "queue<std::pair<ns_namespaces::nsA::Foo, ns_namespaces::nsB::Foo>, std::deque<std::pair<ns_namespaces::nsA::Foo, ns_namespaces::nsB::Foo>, std::allocator<std::pair<ns_namespaces::nsA::Foo, ns_namespaces::nsB::Foo> > > >",
"staticSize": 80, "dynamicSize": 12, "length": 1, "capacity": 1, "elementStaticSize": 12
}]'''
[cases.stack]
param_types = ["const std::stack<std::pair<nsA::Foo, nsB::Foo>>&"]
setup = "return std::stack<std::pair<ns_namespaces::nsA::Foo, ns_namespaces::nsB::Foo>>({{ns_namespaces::nsA::Foo(), ns_namespaces::nsB::Foo()}});"
expect_json = '''[{
"typeName": "stack<std::pair<ns_namespaces::nsA::Foo, ns_namespaces::nsB::Foo>, std::deque<std::pair<ns_namespaces::nsA::Foo, ns_namespaces::nsB::Foo>, std::allocator<std::pair<ns_namespaces::nsA::Foo, ns_namespaces::nsB::Foo> > > >",
"staticSize": 80, "dynamicSize": 12, "length": 1, "capacity": 1, "elementStaticSize": 12
}]'''

View File

@ -0,0 +1,19 @@
definitions = '''
struct __attribute__((__packed__)) Foo {
char *p; /* 8 bytes */
char c; /* 1 byte */
long x; /* 8 bytes */
};
'''
[cases]
[cases.a]
param_types = ["const Foo&"]
setup = "return {};"
expect_json = '''[{
"staticSize":17,
"dynamicSize":0,
"members":[
{"name":"p", "staticSize":8, "dynamicSize":0},
{"name":"c", "staticSize":1, "dynamicSize":0},
{"name":"x", "staticSize":8, "dynamicSize":0}
]}]'''

View File

@ -0,0 +1,102 @@
definitions = '''
struct Foo {
int *a;
bool b;
long c;
};
struct Bar {
int *a;
bool b;
long c;
Foo d;
};
/* The names generated for parent's padding use their own index,
* which can conflict with the child's generated name.
* We must ensure there are no such conflicts, even across multiple parents.
*/
struct PaddedGrandParentA {
bool x;
short y;
};
struct PaddedGrandParentB {
bool x;
int y;
};
struct PaddedParentA : public PaddedGrandParentA {
bool a;
int b;
short c;
};
struct PaddedParentB : public PaddedGrandParentA, public PaddedGrandParentB {
bool a;
long b;
};
/* Create lots of padding holes so there is a colision between the child and
* its parent generated padding names.
*/
struct PaddedChild : public PaddedParentA, public PaddedParentB {
bool a;
long long b;
bool c; short d;
bool e; short f;
bool g; short h;
bool i; short j;
bool k; short l;
bool m; short n;
bool o; short p;
bool q; short r;
bool s; short t;
bool u; short v;
bool w; short x;
bool y; short z;
};
'''
[cases]
[cases.bool_padding]
param_types = ["const Foo&"]
setup = "return Foo{0, false, 0};"
expect_json = '''[{
"staticSize":24,
"dynamicSize":0,
"members":[
{ "name":"a", "staticSize":8, "dynamicSize":0 },
{ "name":"b", "staticSize":1, "dynamicSize":0 },
{ "name":"c", "staticSize":8, "dynamicSize":0 }
]}]'''
[cases.nested_padding]
param_types = ["const Bar&"]
setup = "return Bar{0, false, 0, Foo { 0, false, 0 }};"
expect_json = '''[{
"staticSize":48,
"dynamicSize":0,
"members":[
{ "name":"a", "staticSize":8, "dynamicSize":0 },
{ "name":"b", "staticSize":1, "dynamicSize":0 },
{ "name":"c", "staticSize":8, "dynamicSize":0 },
{
"name":"d",
"staticSize":24,
"dynamicSize":0,
"members": [
{ "name":"a", "staticSize":8, "dynamicSize":0 },
{ "name":"b", "staticSize":1, "dynamicSize":0 },
{ "name":"c", "staticSize":8, "dynamicSize":0 }
]}
]}]'''
[cases.parent_padding]
param_types = ["const PaddedChild&"]
setup = "return PaddedChild{};"
expect_json = '''[{
"staticSize": 104,
"dynamicSize": 0,
"paddingSavingsSize": 19
}]'''

View File

@ -0,0 +1,238 @@
includes = ["vector"]
definitions = '''
struct PrimitivePtrs {
int a;
int *b;
void *c; // No dynamic size, we can't know what it points to!
};
struct VectorPtr {
std::vector<int> *vec;
};
'''
[cases]
[cases.int]
skip = "top-level pointers are skipped over"
oil_skip = "oil can't chase pointers safely"
param_types = ["int*"]
setup = "return new int(1);"
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"typeName": "int *",
"staticSize": 8,
"dynamicSize": 4,
"NOT": {"pointer": 0},
"members": [
{
"typeName": "int",
"staticSize": 4,
"dynamicSize": 0
}
]
}]'''
[cases.int_no_follow]
skip = "top-level pointers are skipped over"
param_types = ["int*"]
setup = "return new int(1);"
expect_json = '''[{
"typeName": "int *",
"staticSize": 8,
"dynamicSize": 0,
"NOT": {"pointer": 0},
"NOT": "members"
}]'''
[cases.int_null]
skip = "top-level pointers are skipped over"
param_types = ["int*"]
setup = "return nullptr;"
expect_json = '''[{
"typeName": "int *",
"staticSize": 8,
"dynamicSize": 0,
"pointer": 0,
"NOT": "members"
}]'''
[cases.void]
skip = "top-level pointers are skipped over"
param_types = ["void*"]
setup = "return new int(1);"
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"typeName": "void *",
"staticSize": 8,
"dynamicSize": 0,
"NOT": {"pointer": 0},
"NOT": "members"
}]'''
[cases.void_no_follow]
skip = "top-level pointers are skipped over"
param_types = ["void*"]
setup = "return new int(1);"
expect_json = '''[{
"typeName": "void *",
"staticSize": 8,
"dynamicSize": 0,
"NOT": {"pointer": 0},
"NOT": "members"
}]'''
[cases.void_null]
skip = "top-level pointers are skipped over"
param_types = ["void*"]
setup = "return nullptr;"
expect_json = '''[{
"typeName": "void *",
"staticSize": 8,
"dynamicSize": 0,
"pointer": 0,
"NOT": "members"
}]'''
[cases.vector]
skip = "top-level pointers are skipped over"
oil_skip = "oil can't chase pointers safely"
param_types = ["std::vector<int>*"]
setup = "return new std::vector<int>{1,2,3};"
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"typeName": "std::vector<int> *",
"staticSize": 8,
"dynamicSize": 36,
"NOT": {"pointer": 0},
"members": [
{
"typeName": "std::vector<int>",
"staticSize": 24,
"dynamicSize": 12
}
]
}]'''
[cases.vector_no_follow]
skip = "top-level pointers are skipped over"
param_types = ["std::vector<int>*"]
setup = "return new std::vector<int>{1,2,3};"
expect_json = '''[{
"typeName": "std::vector<int> *",
"staticSize": 8,
"dynamicSize": 0,
"NOT": {"pointer": 0},
"NOT": "members"
}]'''
[cases.vector_null]
skip = "BAD DATA SEGMENT!!! top-level pointers are skipped over"
param_types = ["std::vector<int>*"]
setup = "return nullptr;"
expect_json = '''[{
"typeName": "std::vector<int> *",
"staticSize": 8,
"dynamicSize": 0,
"pointer": 0,
"NOT": "members"
}]'''
[cases.struct_primitive_ptrs]
oil_skip = "oil can't chase pointers safely"
param_types = ["const PrimitivePtrs&"]
setup = "return PrimitivePtrs{0, new int(0), new int(0)};"
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"staticSize":24,
"dynamicSize":4,
"members":[
{"name":"a", "staticSize":4, "dynamicSize":0},
{"name":"b", "staticSize":8, "dynamicSize":4},
{"name":"c", "staticSize":8, "dynamicSize":0}
]}]'''
[cases.struct_primitive_ptrs_no_follow]
param_types = ["const PrimitivePtrs&"]
setup = "return PrimitivePtrs{0, new int(0), new int(0)};"
expect_json = '''[{
"staticSize":24,
"dynamicSize":0,
"members":[
{"name":"a", "staticSize":4, "dynamicSize":0},
{"name":"b", "staticSize":8, "dynamicSize":0},
{"name":"c", "staticSize":8, "dynamicSize":0}
]}]'''
[cases.struct_primitive_ptrs_null]
param_types = ["const PrimitivePtrs&"]
setup = "return PrimitivePtrs{0, nullptr, nullptr};"
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"staticSize":24,
"dynamicSize":0,
"members":[
{"name":"a", "staticSize":4, "dynamicSize":0},
{"name":"b", "staticSize":8, "dynamicSize":0},
{"name":"c", "staticSize":8, "dynamicSize":0}
]}]'''
[cases.struct_vector_ptr]
oil_skip = "oil can't chase pointers safely"
param_types = ["const VectorPtr&"]
setup = "return VectorPtr{new std::vector<int>{1,2,3}};"
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"staticSize":8,
"dynamicSize":36,
"members":[
{"name":"vec", "staticSize":8, "dynamicSize":36}
]}]'''
[cases.struct_vector_ptr_no_follow]
param_types = ["const VectorPtr&"]
setup = "return VectorPtr{new std::vector<int>{1,2,3}};"
expect_json = '''[{
"staticSize":8,
"dynamicSize":0,
"members":[
{"name":"vec", "staticSize":8, "dynamicSize":0}
]}]'''
[cases.struct_vector_ptr_null]
param_types = ["const VectorPtr&"]
setup = "return VectorPtr{nullptr};"
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"staticSize":8,
"dynamicSize":0,
"members":[
{"name":"vec", "staticSize":8, "dynamicSize":0}
]}]'''
[cases.vector_of_pointers]
oil_skip = "oil can't chase pointers safely"
param_types = ["const std::vector<int*>&"]
setup = "return {{new int(1), nullptr, new int(3)}};"
cli_options = ["--chase-raw-pointers"]
expect_json = '''[{
"staticSize":24,
"dynamicSize":32,
"length":3,
"capacity":3,
"elementStaticSize":8,
"members":[
{"staticSize":8, "dynamicSize":4, "NOT": {"pointer":0}},
{"staticSize":8, "dynamicSize":0, "pointer":0},
{"staticSize":8, "dynamicSize":4, "NOT": {"pointer":0}}
]}]'''
[cases.vector_of_pointers_no_follow]
oid_skip = "pointer field is missing from results"
param_types = ["const std::vector<int*>&"]
setup = "return {{new int(1), nullptr, new int(3)}};"
expect_json = '''[{
"staticSize":24,
"dynamicSize":24,
"length":3,
"capacity":3,
"elementStaticSize":8,
"members":[
{"staticSize":8, "dynamicSize":0, "NOT": {"pointer":0}},
{"staticSize":8, "dynamicSize":0, "pointer":0},
{"staticSize":8, "dynamicSize":0, "NOT": {"pointer":0}}
]}]'''

Some files were not shown because too many files have changed in this diff Show More