container_info: switch to boost::regex (#465)

Summary:

OI was previously using `std::regex_match` to match container names. This was bad because `libstdc++`'s implementation of regex is awful. In the case of limited inlining it was causing a stack overflow when running CodeGen for large types (I think types with large names but I never got to the bottom of it).

Replace this with the competent `boost::regex_match` that we already have a dependency on.

Reviewed By: ajor

Differential Revision: D53002752
This commit is contained in:
Jake Hillion 2024-01-23 10:15:43 -08:00 committed by Facebook GitHub Bot
parent 7de35863f5
commit 978352aac1
8 changed files with 36 additions and 33 deletions

View File

@ -34,6 +34,8 @@ add_library(container_info
) )
target_link_libraries(container_info target_link_libraries(container_info
features features
Boost::regex
glog::glog glog::glog
toml toml
) )

View File

@ -75,12 +75,12 @@ const char* containerTypeEnumToStr(ContainerTypeEnum ty) {
return nullptr; return nullptr;
} }
std::regex matcher; boost::regex matcher;
if (std::optional<std::string> str = if (std::optional<std::string> str =
(*info)["matcher"].value<std::string>()) { (*info)["matcher"].value<std::string>()) {
matcher = std::regex(*str, std::regex_constants::grep); matcher = boost::regex(*str, boost::regex_constants::grep);
} else { } else {
matcher = std::regex("^" + typeName, std::regex_constants::grep); matcher = boost::regex("^" + typeName, boost::regex_constants::grep);
} }
std::optional<size_t> numTemplateParams = std::optional<size_t> numTemplateParams =
@ -194,8 +194,9 @@ namespace {
* *
* The type name "name" should match "name" and "name<xxx>". * The type name "name" should match "name" and "name<xxx>".
*/ */
std::regex getMatcher(const std::string& typeName) { boost::regex getMatcher(const std::string& typeName) {
return std::regex("^" + typeName + "$|^" + typeName + "<.*>$"); return boost::regex("^" + typeName + "$|^" + typeName + "<.*>$",
boost::regex_constants::extended);
} }
} // namespace } // namespace
@ -225,7 +226,7 @@ ContainerInfo::ContainerInfo(const fs::path& path) {
throw ContainerInfoError(path, "`info.type_name` is a required field"); throw ContainerInfoError(path, "`info.type_name` is a required field");
} }
matcher = getMatcher(typeName); matcher_ = getMatcher(typeName);
if (std::optional<std::string> str = info["ctype"].value<std::string>()) { if (std::optional<std::string> str = info["ctype"].value<std::string>()) {
ctype = containerTypeEnumFromStr(*str); ctype = containerTypeEnumFromStr(*str);
@ -334,9 +335,9 @@ ContainerInfo::ContainerInfo(std::string typeName_,
ContainerTypeEnum ctype_, ContainerTypeEnum ctype_,
std::string header_) std::string header_)
: typeName(std::move(typeName_)), : typeName(std::move(typeName_)),
matcher(getMatcher(typeName)),
ctype(ctype_), ctype(ctype_),
header(std::move(header_)), header(std::move(header_)),
codegen(Codegen{ codegen(Codegen{
"// DummyDecl %1%\n", "// DummyFunc %1%\n", "// DummyFunc\n"}) { "// DummyDecl %1%\n", "// DummyFunc %1%\n", "// DummyFunc\n"}),
matcher_(getMatcher(typeName)) {
} }

View File

@ -14,9 +14,9 @@
* limitations under the License. * limitations under the License.
*/ */
#pragma once #pragma once
#include <boost/regex.hpp>
#include <filesystem> #include <filesystem>
#include <optional> #include <optional>
#include <regex>
#include <set> #include <set>
#include <string> #include <string>
#include <vector> #include <vector>
@ -49,7 +49,7 @@ struct ContainerInfo {
// Old ctors, remove with OICodeGen: // Old ctors, remove with OICodeGen:
ContainerInfo() = default; ContainerInfo() = default;
ContainerInfo(std::string typeName_, ContainerInfo(std::string typeName_,
std::regex matcher_, boost::regex matcher,
std::optional<size_t> numTemplateParams_, std::optional<size_t> numTemplateParams_,
ContainerTypeEnum ctype_, ContainerTypeEnum ctype_,
std::string header_, std::string header_,
@ -61,7 +61,6 @@ struct ContainerInfo {
oi::detail::FeatureSet requiredFeatures, oi::detail::FeatureSet requiredFeatures,
ContainerInfo::Codegen codegen_) ContainerInfo::Codegen codegen_)
: typeName(std::move(typeName_)), : typeName(std::move(typeName_)),
matcher(std::move(matcher_)),
numTemplateParams(numTemplateParams_), numTemplateParams(numTemplateParams_),
ctype(ctype_), ctype(ctype_),
header(std::move(header_)), header(std::move(header_)),
@ -71,7 +70,8 @@ struct ContainerInfo {
underlyingContainerIndex(underlyingContainerIndex_), underlyingContainerIndex(underlyingContainerIndex_),
stubTemplateParams(std::move(stubTemplateParams_)), stubTemplateParams(std::move(stubTemplateParams_)),
requiredFeatures(requiredFeatures), requiredFeatures(requiredFeatures),
codegen(std::move(codegen_)) { codegen(std::move(codegen_)),
matcher_(std::move(matcher)) {
} }
ContainerInfo(ContainerInfo&&) = default; ContainerInfo(ContainerInfo&&) = default;
@ -83,8 +83,11 @@ struct ContainerInfo {
return copy; return copy;
} }
bool matches(std::string_view sv) const {
return boost::regex_search(sv.begin(), sv.end(), matcher_);
}
std::string typeName; std::string typeName;
std::regex matcher;
std::optional<size_t> numTemplateParams; std::optional<size_t> numTemplateParams;
ContainerTypeEnum ctype = UNKNOWN_TYPE; ContainerTypeEnum ctype = UNKNOWN_TYPE;
std::string header; std::string header;
@ -110,6 +113,8 @@ struct ContainerInfo {
private: private:
ContainerInfo(const ContainerInfo&) = default; ContainerInfo(const ContainerInfo&) = default;
ContainerInfo& operator=(const ContainerInfo& other) = default; ContainerInfo& operator=(const ContainerInfo& other) = default;
boost::regex matcher_;
}; };
class ContainerInfoError : public std::runtime_error { class ContainerInfoError : public std::runtime_error {

View File

@ -150,7 +150,7 @@ OICodeGen::getContainerInfo(drgn_type* type) {
for (auto it = containerInfoList.rbegin(); it != containerInfoList.rend(); for (auto it = containerInfoList.rbegin(); it != containerInfoList.rend();
++it) { ++it) {
const ContainerInfo& info = **it; const ContainerInfo& info = **it;
if (std::regex_search(nameStr, info.matcher)) { if (info.matches(nameStr)) {
return info; return info;
} }
} }

View File

@ -24,7 +24,6 @@
#include <clang/Sema/Sema.h> #include <clang/Sema/Sema.h>
#include <glog/logging.h> #include <glog/logging.h>
#include <regex>
#include <stdexcept> #include <stdexcept>
#include "oi/type_graph/Types.h" #include "oi/type_graph/Types.h"
@ -423,7 +422,7 @@ bool ClangTypeParser::chasePointer() const {
ContainerInfo* ClangTypeParser::getContainerInfo( ContainerInfo* ClangTypeParser::getContainerInfo(
const std::string& fqName) const { const std::string& fqName) const {
for (const auto& containerInfo : containers_) { for (const auto& containerInfo : containers_) {
if (std::regex_search(fqName, containerInfo->matcher)) { if (containerInfo->matches(fqName)) {
return containerInfo.get(); return containerInfo.get();
} }
} }

View File

@ -15,8 +15,6 @@
*/ */
#include "IdentifyContainers.h" #include "IdentifyContainers.h"
#include <regex>
#include "TypeGraph.h" #include "TypeGraph.h"
#include "oi/ContainerInfo.h" #include "oi/ContainerInfo.h"
@ -53,7 +51,7 @@ Type& IdentifyContainers::mutate(Type& type) {
Type& IdentifyContainers::visit(Class& c) { Type& IdentifyContainers::visit(Class& c) {
for (const auto& containerInfo : containers_) { for (const auto& containerInfo : containers_) {
if (!std::regex_search(c.fqName(), containerInfo->matcher)) { if (!containerInfo->matches(c.fqName())) {
continue; continue;
} }

View File

@ -71,7 +71,7 @@ void TypeIdentifier::visit(Container& c) {
if (Class* paramClass = dynamic_cast<Class*>(&param.type())) { if (Class* paramClass = dynamic_cast<Class*>(&param.type())) {
bool replaced = false; bool replaced = false;
for (const auto& info : passThroughTypes_) { for (const auto& info : passThroughTypes_) {
if (std::regex_search(paramClass->fqName(), info.matcher)) { if (info.matches(paramClass->fqName())) {
// Create dummy containers. Use a map so previously deduplicated nodes // Create dummy containers. Use a map so previously deduplicated nodes
// remain deduplicated. // remain deduplicated.
Container* dummy; Container* dummy;

View File

@ -5,19 +5,17 @@
TEST(ContainerInfoTest, matcher) { TEST(ContainerInfoTest, matcher) {
ContainerInfo info{"std::vector", SEQ_TYPE, "vector"}; ContainerInfo info{"std::vector", SEQ_TYPE, "vector"};
EXPECT_TRUE(std::regex_search("std::vector<int>", info.matcher)); EXPECT_TRUE(info.matches("std::vector<int>"));
EXPECT_TRUE(std::regex_search("std::vector<std::list<int>>", info.matcher)); EXPECT_TRUE(info.matches("std::vector<std::list<int>>"));
EXPECT_TRUE(std::regex_search("std::vector", info.matcher)); EXPECT_TRUE(info.matches("std::vector"));
EXPECT_FALSE(std::regex_search("vector", info.matcher)); EXPECT_FALSE(info.matches("vector"));
EXPECT_FALSE(std::regex_search("non_std::vector<int>", info.matcher)); EXPECT_FALSE(info.matches("non_std::vector<int>"));
EXPECT_FALSE(std::regex_search("std::vector_other<int>", info.matcher)); EXPECT_FALSE(info.matches("std::vector_other<int>"));
EXPECT_FALSE(std::regex_search("std::list<std::vector<int>>", info.matcher)); EXPECT_FALSE(info.matches("std::list<std::vector<int>>"));
EXPECT_FALSE(std::regex_search("std::vector::value_type", info.matcher)); EXPECT_FALSE(info.matches("std::vector::value_type"));
EXPECT_FALSE(std::regex_search("std::vector<int>::value_type", info.matcher)); EXPECT_FALSE(info.matches("std::vector<int>::value_type"));
EXPECT_FALSE(std::regex_search("std::vector<std::vector<int>>::value_type", EXPECT_FALSE(info.matches("std::vector<std::vector<int>>::value_type"));
info.matcher));
// Uh-oh, here's a case that I don't think regexes are powerful enough to // Uh-oh, here's a case that I don't think regexes are powerful enough to
// match: EXPECT_FALSE(std::regex_search("std::vector<int>::subtype<bool>", // match: EXPECT_FALSE(info.matches("std::vector<int>::subtype<bool>"));
// info.matcher));
} }