New Upstream Release - re2
Ready changes
Summary
Merged new upstream version: 20230601 (was: 20230301).
Diff
diff --git a/.bazelrc b/.bazelrc
new file mode 100644
index 0000000..8141639
--- /dev/null
+++ b/.bazelrc
@@ -0,0 +1,16 @@
+# Copyright 2022 The RE2 Authors. All Rights Reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Abseil requires C++14 at minimum.
+# Previously, the flag was set via `BAZEL_CXXOPTS`. On macOS, we also had to set
+# `BAZEL_USE_CPP_ONLY_TOOLCHAIN` since Bazel wouldn't respect the former without
+# the latter. However, the latter stopped Bazel from using Xcode and `-framework
+# Foundation`, which CCTZ (vendored into Abseil) requires.
+build --enable_platform_specific_config
+build:linux --cxxopt=-std=c++14
+build:macos --cxxopt=-std=c++14
+build:windows --cxxopt=/std:c++14
+
+# Print test logs for failed tests.
+test --test_output=errors
diff --git a/.github/bazel.sh b/.github/bazel.sh
index fbe92e6..7295ec6 100755
--- a/.github/bazel.sh
+++ b/.github/bazel.sh
@@ -3,7 +3,7 @@ set -eux
bazel clean
bazel build --compilation_mode=dbg -- //:all
-bazel test --compilation_mode=dbg --test_output=errors -- //:all \
+bazel test --compilation_mode=dbg -- //:all \
-//:dfa_test \
-//:exhaustive1_test \
-//:exhaustive2_test \
@@ -13,7 +13,7 @@ bazel test --compilation_mode=dbg --test_output=errors -- //:all \
bazel clean
bazel build --compilation_mode=opt -- //:all
-bazel test --compilation_mode=opt --test_output=errors -- //:all \
+bazel test --compilation_mode=opt -- //:all \
-//:dfa_test \
-//:exhaustive1_test \
-//:exhaustive2_test \
diff --git a/.github/cmake.sh b/.github/cmake.sh
index 145a843..782334e 100755
--- a/.github/cmake.sh
+++ b/.github/cmake.sh
@@ -1,11 +1,11 @@
#!/bin/bash
set -eux
-cmake . -D CMAKE_BUILD_TYPE=Debug
+cmake . -D CMAKE_BUILD_TYPE=Debug -D RE2_BUILD_TESTING=ON "$@"
cmake --build . --config Debug --clean-first
ctest -C Debug --output-on-failure -E 'dfa|exhaustive|random'
-cmake . -D CMAKE_BUILD_TYPE=Release
+cmake . -D CMAKE_BUILD_TYPE=Release -D RE2_BUILD_TESTING=ON "$@"
cmake --build . --config Release --clean-first
ctest -C Release --output-on-failure -E 'dfa|exhaustive|random'
diff --git a/.github/workflows/ci-cmake.yml b/.github/workflows/ci-cmake.yml
index e8cd380..cd73561 100644
--- a/.github/workflows/ci-cmake.yml
+++ b/.github/workflows/ci-cmake.yml
@@ -3,13 +3,54 @@ on:
push:
branches: [main]
jobs:
- build:
- runs-on: ${{ matrix.os }}
+ # GitHub Actions doesn't provide GCC 13, so we use a Docker container instead.
+ # (Also, the Benchmark package on Ubuntu 22.04 LTS is problematic whereas this
+ # Docker container is based on Debian bookworm and so it has a newer version.)
+ build-linux:
+ runs-on: ubuntu-latest
+ container: gcc:13
strategy:
fail-fast: false
- matrix:
- os: [macos-latest, ubuntu-latest, windows-latest]
steps:
- uses: actions/checkout@v3
+ - name: Install CMake
+ run: |
+ apt update -y
+ apt install -y cmake
+ shell: bash
+ - name: Install Abseil, GoogleTest and Benchmark
+ run: |
+ apt update -y
+ apt install -y libabsl-dev libgtest-dev libbenchmark-dev
+ shell: bash
- run: .github/cmake.sh
shell: bash
+ build-macos:
+ runs-on: macos-latest
+ strategy:
+ fail-fast: false
+ steps:
+ - uses: actions/checkout@v3
+ - name: Install Abseil, GoogleTest and Benchmark
+ run: |
+ brew update
+ brew install abseil googletest google-benchmark
+ shell: bash
+ - run: .github/cmake.sh -D RE2_BUILD_FRAMEWORK=ON
+ shell: bash
+ build-windows:
+ runs-on: windows-latest
+ strategy:
+ fail-fast: false
+ steps:
+ - uses: actions/checkout@v3
+ - name: Install Abseil, GoogleTest and Benchmark
+ run: |
+ # TODO: Remove this in September 2023, which is when the default triplet for
+ # vcpkg libraries will change from x86-windows to the detected host triplet.
+ export VCPKG_DEFAULT_TRIPLET=x64-windows
+ vcpkg update
+ vcpkg install abseil gtest benchmark
+ shell: bash
+ - run: .github/cmake.sh -D CMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake
+ shell: bash
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 807a5ca..4067c61 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,7 +8,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- ver: [11, 14, 17, 20]
+ ver: [17, 20]
env:
CC: clang
CXX: clang++
@@ -20,6 +20,11 @@ jobs:
CXXFLAGS: -O3 -g -std=c++${{ matrix.ver }}
steps:
- uses: actions/checkout@v3
+ - name: Install Abseil, GoogleTest and Benchmark
+ run: |
+ brew update
+ brew install abseil googletest google-benchmark
+ shell: bash
- run: make && make test
shell: bash
build-clang:
@@ -27,7 +32,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- ver: [13, 14, 15]
+ ver: [14, 15, 16]
env:
CC: clang-${{ matrix.ver }}
CXX: clang++-${{ matrix.ver }}
@@ -39,19 +44,46 @@ jobs:
chmod +x ./llvm.sh
sudo ./llvm.sh ${{ matrix.ver }}
shell: bash
+ - name: Install Abseil, GoogleTest and Benchmark
+ run: |
+ sudo apt update -y
+ sudo apt install -y libabsl-dev libgtest-dev libbenchmark-dev
+ shell: bash
- run: make && make test
shell: bash
build-gcc:
runs-on: ubuntu-latest
- container: gcc:${{ matrix.ver }}
strategy:
fail-fast: false
matrix:
- ver: [10, 11, 12]
+ ver: [11, 12]
+ env:
+ CC: gcc-${{ matrix.ver }}
+ CXX: g++-${{ matrix.ver }}
+ steps:
+ - uses: actions/checkout@v3
+ - name: Install Abseil, GoogleTest and Benchmark
+ run: |
+ sudo apt update -y
+ sudo apt install -y libabsl-dev libgtest-dev libbenchmark-dev
+ shell: bash
+ - run: make && make test
+ shell: bash
+ # GitHub Actions doesn't provide GCC 13, so we use a Docker container instead.
+ build-gcc-13:
+ runs-on: ubuntu-latest
+ container: gcc:13
+ strategy:
+ fail-fast: false
env:
CC: gcc
CXX: g++
steps:
- uses: actions/checkout@v3
+ - name: Install Abseil, GoogleTest and Benchmark
+ run: |
+ apt update -y
+ apt install -y libabsl-dev libgtest-dev libbenchmark-dev
+ shell: bash
- run: make && make test
shell: bash
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index e3c9d07..808832f 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -6,8 +6,180 @@ on:
required: true
type: number
jobs:
- no-op:
+ bdist_wheel-linux:
+ name: Linux ${{ matrix.os }}, ${{ matrix.arch.name }}, Python ${{ matrix.ver }}
+ runs-on: ${{ matrix.arch.runs-on }}
+ container:
+ image: quay.io/pypa/${{ matrix.os }}_${{ matrix.arch.python-name }}
+ options: --init
+ strategy:
+ fail-fast: false
+ matrix:
+ arch:
+ - { name: X64, python-name: x86_64, runs-on: [ubuntu-latest] }
+ - { name: ARM64, python-name: aarch64, runs-on: [self-hosted, linux, arm64] }
+ os: [manylinux2014, manylinux_2_28]
+ ver: ['3.7', '3.8', '3.9', '3.10', '3.11']
+ env:
+ BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ steps:
+ - uses: actions/checkout@v3
+ # TODO(junyer): Use `v2` whenever a new release is tagged.
+ - uses: bazelbuild/setup-bazelisk@6244971d4f7ba9aca943c2f3ede2bbd813fcca51
+ - name: Prepare Python ${{ matrix.ver }} environment
+ run: |
+ ln -sf /usr/local/bin/python${{ matrix.ver }} /usr/local/bin/python
+ ln -sf /usr/local/bin/python${{ matrix.ver }} /usr/local/bin/python3
+ python -m pip install --upgrade pip
+ python -m pip install --upgrade wheel auditwheel
+ python -m pip install --upgrade absl-py
+ shell: bash
+ - name: Build wheel
+ run: |
+ python setup.py bdist_wheel
+ python -m auditwheel repair --wheel-dir=. dist/*
+ shell: bash
+ working-directory: python
+ - name: Test wheel
+ run: |
+ python -m pip install google_re2-*.whl
+ python re2_test.py
+ shell: bash
+ working-directory: python
+ - uses: actions/upload-artifact@v3
+ with:
+ name: ${{ hashFiles('python/google_re2-*.whl') }}
+ path: python/google_re2-*.whl
+ retention-days: 1
+ bdist_wheel-macos:
+ name: macOS ${{ matrix.os }}, ${{ matrix.arch.name }}, Python ${{ matrix.ver }}
+ runs-on: macos-${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ arch:
+ - { name: X64, bazel-name: x86_64, python-name: x86_64 }
+ - { name: ARM64, bazel-name: arm64, python-name: arm64 }
+ os: [11, 12]
+ ver: ['3.7', '3.8', '3.9', '3.10', '3.11']
+ env:
+ BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ BAZEL_CPU: darwin_${{ matrix.arch.bazel-name }}
+ # Stop macOS from reporting the system version as 10.x.
+ # Otherwise, Python refuses to install the built wheel!
+ SYSTEM_VERSION_COMPAT: 0
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.ver }}
+ - name: Prepare Python ${{ matrix.ver }} environment
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install --upgrade wheel delocate
+ python -m pip install --upgrade absl-py
+ shell: bash
+ - name: Build wheel
+ run: |
+ python setup.py bdist_wheel \
+ --plat-name=macosx-${{ matrix.os }}.0-${{ matrix.arch.python-name }}
+ python -m delocate.cmd.delocate_wheel --wheel-dir=. dist/*
+ shell: bash
+ working-directory: python
+ - if: matrix.arch.name == runner.arch
+ name: Test wheel
+ run: |
+ python -m pip install google_re2-*.whl
+ python re2_test.py
+ shell: bash
+ working-directory: python
+ - uses: actions/upload-artifact@v3
+ with:
+ name: ${{ hashFiles('python/google_re2-*.whl') }}
+ path: python/google_re2-*.whl
+ retention-days: 1
+ bdist_wheel-windows:
+ name: Windows, ${{ matrix.arch.name }}, Python ${{ matrix.ver }}
+ runs-on: windows-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ arch:
+ - { name: X64, bazel-name: x64, python-name: amd64 }
+ # FIXME: Compiling succeeds, but linking fails with an error like
+ # "LINK : fatal error LNK1104: cannot open file 'python311.lib'".
+ # Maybe we will need GitHub-hosted runners for Windows on ARM64?!
+ # - { name: ARM64, bazel-name: arm64, python-name: arm64 }
+ ver: ['3.7', '3.8', '3.9', '3.10', '3.11']
+ env:
+ BAZELISK_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ BAZEL_CPU: ${{ matrix.arch.bazel-name }}_windows
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.ver }}
+ - name: Prepare Python ${{ matrix.ver }} environment
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install --upgrade wheel
+ python -m pip install --upgrade absl-py
+ shell: bash
+ - name: Build wheel
+ run: |
+ python setup.py bdist_wheel \
+ --plat-name=win_${{ matrix.arch.python-name }}
+ cp dist/* .
+ shell: bash
+ working-directory: python
+ - if: matrix.arch.name == runner.arch
+ name: Test wheel
+ run: |
+ python -m pip install google_re2-*.whl
+ python re2_test.py
+ shell: bash
+ working-directory: python
+ - uses: actions/upload-artifact@v3
+ with:
+ name: ${{ hashFiles('python/google_re2-*.whl') }}
+ path: python/google_re2-*.whl
+ retention-days: 1
+ publish:
+ needs:
+ - bdist_wheel-linux
+ - bdist_wheel-macos
+ - bdist_wheel-windows
runs-on: ubuntu-latest
steps:
- - run: echo "These aren't the droids you're looking for."
+ - uses: actions/checkout@v3
+ - uses: actions/setup-python@v4
+ with:
+ python-version: '3.x'
+ - name: Prepare Python 3.x environment
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install --upgrade wheel
+ shell: bash
+ - if: inputs.build == 1
+ name: Build source
+ run: |
+ python setup.py sdist
+ shell: bash
+ working-directory: python
+ - uses: actions/download-artifact@v3
+ with:
+ path: python
+ - name: Set build number to ${{ inputs.build }}
+ run: |
+ mkdir -p dist
+ for WHL in */google_re2-*.whl; do
+ python -m wheel unpack ${WHL}
+ python -m wheel pack --dest-dir=dist --build-number=${{ inputs.build }} google_re2-*
+ rm -rf google_re2-*
+ done
shell: bash
+ working-directory: python
+ - uses: pypa/gh-action-pypi-publish@release/v1
+ with:
+ password: ${{ secrets.PYPI_API_TOKEN }}
+ packages_dir: python/dist
diff --git a/BUILD b/BUILD.bazel
similarity index 85%
rename from BUILD
rename to BUILD.bazel
index 446c8b2..3f6dafe 100644
--- a/BUILD
+++ b/BUILD.bazel
@@ -36,7 +36,6 @@ cc_library(
"re2/simplify.cc",
"re2/sparse_array.h",
"re2/sparse_set.h",
- "re2/stringpiece.cc",
"re2/tostring.cc",
"re2/unicode_casefold.cc",
"re2/unicode_casefold.h",
@@ -44,13 +43,10 @@ cc_library(
"re2/unicode_groups.h",
"re2/walker-inl.h",
"util/logging.h",
- "util/mix.h",
- "util/mutex.h",
"util/rune.cc",
"util/strutil.cc",
"util/strutil.h",
"util/utf.h",
- "util/util.h",
],
hdrs = [
"re2/filtered_re2.h",
@@ -73,6 +69,19 @@ cc_library(
"//conditions:default": ["-pthread"],
}),
visibility = ["//visibility:public"],
+ deps = [
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/container:fixed_array",
+ "@com_google_absl//absl/container:flat_hash_map",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_absl//absl/container:inlined_vector",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/types:optional",
+ "@com_google_absl//absl/types:span",
+ ],
)
cc_library(
@@ -93,20 +102,24 @@ cc_library(
"re2/testing/regexp_generator.h",
"re2/testing/string_generator.h",
"re2/testing/tester.h",
- "util/benchmark.h",
- "util/flags.h",
"util/malloc_counter.h",
"util/pcre.h",
- "util/test.h",
],
- deps = [":re2"],
+ deps = [
+ ":re2",
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_googletest//:gtest",
+ ],
)
cc_library(
name = "test",
testonly = 1,
- srcs = ["util/test.cc"],
- deps = [":testing"],
+ srcs = [],
+ deps = [
+ ":testing",
+ "@com_google_googletest//:gtest_main",
+ ],
)
cc_test(
@@ -252,8 +265,11 @@ cc_test(
cc_library(
name = "benchmark",
testonly = 1,
- srcs = ["util/benchmark.cc"],
- deps = [":testing"],
+ srcs = [],
+ deps = [
+ ":testing",
+ "@com_github_google_benchmark//:benchmark_main",
+ ],
)
cc_binary(
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ebd1223..5312252 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,24 +24,20 @@ option(RE2_BUILD_FRAMEWORK "build RE2 as a framework" OFF)
# CMake seems to have no way to enable/disable testing per subproject,
# so we provide an option similar to BUILD_TESTING, but just for RE2.
-option(RE2_BUILD_TESTING "enable testing for RE2" ON)
+option(RE2_BUILD_TESTING "enable testing for RE2" OFF)
# The pkg-config Requires: field.
set(REQUIRES)
# ABI version
# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
-set(SONAME 10)
+set(SONAME 11)
set(EXTRA_TARGET_LINK_LIBRARIES)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
- if(MSVC_VERSION LESS 1900)
- message(FATAL_ERROR "you need Visual Studio 2015 or later")
- endif()
- if(BUILD_SHARED_LIBS)
- # See http://www.kitware.com/blog/home/post/939 for details.
- set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
+ if(MSVC_VERSION LESS 1920)
+ message(FATAL_ERROR "you need Visual Studio 2019 or later")
endif()
# CMake defaults to /W3, but some users like /W4 (or /Wall) and /WX,
# so we disable various warnings that aren't particularly helpful.
@@ -61,8 +57,33 @@ if(UNIX)
find_package(Threads REQUIRED)
endif()
+set(ABSL_DEPS
+ absl_base
+ absl_core_headers
+ absl_fixed_array
+ absl_flags
+ absl_flat_hash_map
+ absl_flat_hash_set
+ absl_inlined_vector
+ absl_optional
+ absl_span
+ absl_str_format
+ absl_strings
+ absl_synchronization
+ )
+
+# If a top-level project has called add_directory(abseil-cpp) already (possibly
+# indirectly), let that take precedence over any copy of Abseil that might have
+# been installed on the system. And likewise for ICU, GoogleTest and Benchmark.
+if(NOT TARGET absl::base)
+ find_package(absl REQUIRED)
+endif()
+list(APPEND REQUIRES ${ABSL_DEPS})
+
if(RE2_USE_ICU)
- find_package(ICU REQUIRED COMPONENTS uc)
+ if(NOT TARGET ICU::uc)
+ find_package(ICU REQUIRED COMPONENTS uc)
+ endif()
add_definitions(-DRE2_USE_ICU)
list(APPEND REQUIRES icu-uc)
endif()
@@ -72,6 +93,12 @@ if(USEPCRE)
list(APPEND EXTRA_TARGET_LINK_LIBRARIES pcre)
endif()
+# TODO(junyer): Use string(JOIN " " ...) whenever CMake 3.12 (or newer) becomes
+# the minimum required: that will make this hack slightly less filthy. For now,
+# CMake does the same thing as string(CONCAT ...), basically, if we don't quote
+# ${REQUIRES}, so quote it despite prevailing style.
+string(REPLACE ";" " " REQUIRES "${REQUIRES}")
+
set(RE2_SOURCES
re2/bitmap256.cc
re2/bitstate.cc
@@ -90,7 +117,6 @@ set(RE2_SOURCES
re2/regexp.cc
re2/set.cc
re2/simplify.cc
- re2/stringpiece.cc
re2/tostring.cc
re2/unicode_casefold.cc
re2/unicode_groups.cc
@@ -106,7 +132,7 @@ set(RE2_HEADERS
)
add_library(re2 ${RE2_SOURCES})
-target_compile_features(re2 PUBLIC cxx_std_11)
+target_compile_features(re2 PUBLIC cxx_std_14)
target_include_directories(re2 PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
# CMake gives "set_target_properties called with incorrect number of arguments."
# errors if we don't quote ${RE2_HEADERS}, so quote it despite prevailing style.
@@ -114,6 +140,11 @@ set_target_properties(re2 PROPERTIES PUBLIC_HEADER "${RE2_HEADERS}")
set_target_properties(re2 PROPERTIES SOVERSION ${SONAME} VERSION ${SONAME}.0.0)
add_library(re2::re2 ALIAS re2)
+if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" AND BUILD_SHARED_LIBS)
+ set_target_properties(re2 PROPERTIES
+ WINDOWS_EXPORT_ALL_SYMBOLS ON)
+endif()
+
if(APPLE AND RE2_BUILD_FRAMEWORK)
set_target_properties(re2 PROPERTIES
FRAMEWORK TRUE
@@ -125,11 +156,23 @@ if(UNIX)
target_link_libraries(re2 PUBLIC Threads::Threads)
endif()
+foreach(dep ${ABSL_DEPS})
+ string(REGEX REPLACE "^absl_" "absl::" dep ${dep})
+ target_link_libraries(re2 PUBLIC ${dep})
+endforeach()
+
if(RE2_USE_ICU)
target_link_libraries(re2 PUBLIC ICU::uc)
endif()
if(RE2_BUILD_TESTING)
+ if(NOT TARGET GTest::gtest)
+ find_package(GTest REQUIRED)
+ endif()
+ if(NOT TARGET benchmark::benchmark)
+ find_package(benchmark REQUIRED)
+ endif()
+
set(TESTING_SOURCES
re2/testing/backtrack.cc
re2/testing/dump.cc
@@ -141,9 +184,9 @@ if(RE2_BUILD_TESTING)
util/pcre.cc
)
- add_library(testing STATIC ${TESTING_SOURCES})
- target_compile_features(testing PUBLIC cxx_std_11)
- target_link_libraries(testing PUBLIC re2)
+ add_library(testing ${TESTING_SOURCES})
+ target_compile_features(testing PUBLIC cxx_std_14)
+ target_link_libraries(testing PUBLIC re2 GTest::gtest)
set(TEST_TARGETS
charclass_test
@@ -174,16 +217,16 @@ if(RE2_BUILD_TESTING)
)
foreach(target ${TEST_TARGETS})
- add_executable(${target} re2/testing/${target}.cc util/test.cc)
- target_compile_features(${target} PUBLIC cxx_std_11)
- target_link_libraries(${target} testing ${EXTRA_TARGET_LINK_LIBRARIES})
+ add_executable(${target} re2/testing/${target}.cc)
+ target_compile_features(${target} PUBLIC cxx_std_14)
+ target_link_libraries(${target} PUBLIC testing GTest::gtest_main ${EXTRA_TARGET_LINK_LIBRARIES})
add_test(NAME ${target} COMMAND ${target})
endforeach()
foreach(target ${BENCHMARK_TARGETS})
- add_executable(${target} re2/testing/${target}.cc util/benchmark.cc)
- target_compile_features(${target} PUBLIC cxx_std_11)
- target_link_libraries(${target} testing ${EXTRA_TARGET_LINK_LIBRARIES})
+ add_executable(${target} re2/testing/${target}.cc)
+ target_compile_features(${target} PUBLIC cxx_std_14)
+ target_link_libraries(${target} PUBLIC testing benchmark::benchmark_main ${EXTRA_TARGET_LINK_LIBRARIES})
endforeach()
endif()
diff --git a/Makefile b/Makefile
index 34230ca..705d7fa 100644
--- a/Makefile
+++ b/Makefile
@@ -2,6 +2,26 @@
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
+# Build against Abseil.
+ABSL_DEPS=\
+ absl_base\
+ absl_core_headers\
+ absl_fixed_array\
+ absl_flags\
+ absl_flat_hash_map\
+ absl_flat_hash_set\
+ absl_inlined_vector\
+ absl_optional\
+ absl_span\
+ absl_str_format\
+ absl_strings\
+ absl_synchronization\
+
+CCABSL=$(shell pkg-config $(ABSL_DEPS) --cflags)
+# GCC barfs on `-Wl` whereas Clang doesn't mind, but it's unclear what
+# causes it to manifest on Ubuntu 22.04 LTS, so filter it out for now.
+LDABSL=$(shell pkg-config $(ABSL_DEPS) --libs | sed -e 's/-Wl / /g')
+
# To build against ICU for full Unicode properties support,
# uncomment the next two lines:
# CCICU=$(shell pkg-config icu-uc --cflags) -DRE2_USE_ICU
@@ -17,8 +37,8 @@ CXX?=g++
CXXFLAGS?=-O3 -g
LDFLAGS?=
# required
-RE2_CXXFLAGS?=-pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCICU) $(CCPCRE)
-RE2_LDFLAGS?=-pthread $(LDICU) $(LDPCRE)
+RE2_CXXFLAGS?=-pthread -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -I. $(CCABSL) $(CCICU) $(CCPCRE)
+RE2_LDFLAGS?=-pthread $(LDABSL) $(LDICU) $(LDPCRE)
AR?=ar
ARFLAGS?=rsc
NM?=nm
@@ -43,14 +63,14 @@ SED_INPLACE=sed -i
endif
# The pkg-config Requires: field.
-REQUIRES=
+REQUIRES=$(ABSL_DEPS)
ifdef LDICU
REQUIRES+=icu-uc
endif
# ABI version
# http://tldp.org/HOWTO/Program-Library-HOWTO/shared-libraries.html
-SONAME=10
+SONAME=11
# To rebuild the Tables generated by Perl and Python scripts (requires Internet
# access for Unicode data), uncomment the following line:
@@ -84,17 +104,11 @@ INSTALL_HFILES=\
re2/stringpiece.h\
HFILES=\
- util/benchmark.h\
- util/flags.h\
util/logging.h\
util/malloc_counter.h\
- util/mix.h\
- util/mutex.h\
util/pcre.h\
util/strutil.h\
- util/test.h\
util/utf.h\
- util/util.h\
re2/bitmap256.h\
re2/filtered_re2.h\
re2/pod_array.h\
@@ -135,7 +149,6 @@ OFILES=\
obj/re2/regexp.o\
obj/re2/set.o\
obj/re2/simplify.o\
- obj/re2/stringpiece.o\
obj/re2/tostring.o\
obj/re2/unicode_casefold.o\
obj/re2/unicode_groups.o\
@@ -216,25 +229,24 @@ obj/so/libre2.$(SOEXT): $(SOFILES) libre2.symbols libre2.symbols.darwin
ln -sf libre2.$(SOEXTVER) $@
.PRECIOUS: obj/dbg/test/%
-obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES) obj/dbg/util/test.o
+obj/dbg/test/%: obj/dbg/libre2.a obj/dbg/re2/testing/%.o $(DTESTOFILES)
@mkdir -p obj/dbg/test
- $(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) obj/dbg/util/test.o obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
+ $(CXX) -o $@ obj/dbg/re2/testing/$*.o $(DTESTOFILES) -lgtest -lgtest_main obj/dbg/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
.PRECIOUS: obj/test/%
-obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
+obj/test/%: obj/libre2.a obj/re2/testing/%.o $(TESTOFILES)
@mkdir -p obj/test
- $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
+ $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) -lgtest -lgtest_main obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
# Test the shared lib, falling back to the static lib for private symbols
.PRECIOUS: obj/so/test/%
-obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES) obj/util/test.o
+obj/so/test/%: obj/so/libre2.$(SOEXT) obj/libre2.a obj/re2/testing/%.o $(TESTOFILES)
@mkdir -p obj/so/test
- $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) obj/util/test.o -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
+ $(CXX) -o $@ obj/re2/testing/$*.o $(TESTOFILES) -lgtest -lgtest_main -Lobj/so -lre2 obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
-# Filter out dump.o because testing::TempDir() isn't available for it.
-obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o
+obj/test/regexp_benchmark: obj/libre2.a obj/re2/testing/regexp_benchmark.o $(TESTOFILES)
@mkdir -p obj/test
- $(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(filter-out obj/re2/testing/dump.o, $(TESTOFILES)) obj/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
+ $(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(TESTOFILES) -lgtest -lbenchmark -lbenchmark_main obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
# re2_fuzzer is a target for fuzzers like libFuzzer and AFL. This fake fuzzing
# is simply a way to check that the target builds and then to run it against a
@@ -346,7 +358,7 @@ else
@cp testinstall.cc obj/static-testinstall.cc
(cd obj && export PKG_CONFIG_PATH=$(DESTDIR)$(libdir)/pkgconfig; \
$(CXX) static-testinstall.cc -o static-testinstall $(CXXFLAGS) $(LDFLAGS) \
- $$(pkg-config re2 --cflags --libs | sed -e "s#-lre2#-l:libre2.a#"))
+ $$(pkg-config re2 --cflags --libs | sed -e 's/-lre2/-l:libre2.a/'))
obj/static-testinstall
endif
diff --git a/README b/README
index caee6af..469d6f3 100644
--- a/README
+++ b/README
@@ -10,6 +10,11 @@ make test
make install
make testinstall
+Building RE2 requires Abseil (https://github.com/abseil/abseil-cpp)
+to be installed on your system. Building the testing for RE2 requires
+GoogleTest (https://github.com/google/googletest) and Benchmark
+(https://github.com/google/benchmark) to be installed as well.
+
There is a fair amount of documentation (including code snippets) in
the re2.h header file.
diff --git a/WORKSPACE b/WORKSPACE
deleted file mode 100644
index b35619c..0000000
--- a/WORKSPACE
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright 2009 The RE2 Authors. All Rights Reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-# Bazel (http://bazel.io/) WORKSPACE file for RE2.
-
-workspace(name = "com_googlesource_code_re2")
diff --git a/WORKSPACE.bazel b/WORKSPACE.bazel
new file mode 100644
index 0000000..c89e979
--- /dev/null
+++ b/WORKSPACE.bazel
@@ -0,0 +1,62 @@
+# Copyright 2009 The RE2 Authors. All Rights Reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Bazel (http://bazel.io/) WORKSPACE file for RE2.
+
+workspace(name = "com_googlesource_code_re2")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+http_archive(
+ name = "com_google_absl",
+ strip_prefix = "abseil-cpp-master",
+ urls = ["https://github.com/abseil/abseil-cpp/archive/master.zip"],
+)
+
+http_archive(
+ name = "bazel_skylib",
+ strip_prefix = "bazel-skylib-main",
+ urls = ["https://github.com/bazelbuild/bazel-skylib/archive/main.zip"],
+)
+
+http_archive(
+ name = "com_github_google_benchmark",
+ strip_prefix = "benchmark-main",
+ urls = ["https://github.com/google/benchmark/archive/main.zip"],
+)
+
+http_archive(
+ name = "com_google_googletest",
+ strip_prefix = "googletest-main",
+ urls = ["https://github.com/google/googletest/archive/main.zip"],
+)
+
+http_archive(
+ name = "rules_python",
+ strip_prefix = "rules_python-main",
+ urls = ["https://github.com/bazelbuild/rules_python/archive/main.zip"],
+)
+
+http_archive(
+ name = "io_abseil_py",
+ strip_prefix = "abseil-py-main",
+ urls = ["https://github.com/abseil/abseil-py/archive/main.zip"],
+)
+
+http_archive(
+ name = "pybind11_bazel",
+ strip_prefix = "pybind11_bazel-master",
+ urls = ["https://github.com/pybind/pybind11_bazel/archive/master.zip"],
+)
+
+http_archive(
+ name = "pybind11",
+ build_file = "@pybind11_bazel//:pybind11.BUILD",
+ strip_prefix = "pybind11-master",
+ urls = ["https://github.com/pybind/pybind11/archive/master.zip"],
+)
+
+load("@pybind11_bazel//:python_configure.bzl", "python_configure")
+
+python_configure(name = "local_config_python")
diff --git a/app/BUILD.bazel b/app/BUILD.bazel
new file mode 100644
index 0000000..01ff8d4
--- /dev/null
+++ b/app/BUILD.bazel
@@ -0,0 +1,19 @@
+# Copyright 2009 The RE2 Authors. All Rights Reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Bazel (http://bazel.io/) BUILD file for RE2 app.
+
+cc_binary(
+ name = "_re2.js",
+ srcs = ["_re2.cc"],
+ linkopts = [
+ "--bind",
+ "-sENVIRONMENT=web",
+ "-sMODULARIZE=1",
+ "-sEXPORT_ES6=1",
+ "-sEXPORT_NAME=loadModule",
+ "-sUSE_PTHREADS=0",
+ ],
+ deps = ["//:re2"],
+)
diff --git a/app/_re2.cc b/app/_re2.cc
new file mode 100644
index 0000000..a63313e
--- /dev/null
+++ b/app/_re2.cc
@@ -0,0 +1,94 @@
+// Copyright 2022 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <memory>
+#include <string>
+
+#include <emscripten/bind.h>
+#include "re2/prog.h"
+#include "re2/re2.h"
+#include "re2/regexp.h"
+
+namespace re2_app {
+
+struct Info {
+ std::string pattern;
+ std::string error;
+ std::string prefix;
+ bool prefix_foldcase = false;
+ std::string accel_prefix;
+ bool accel_prefix_foldcase = false;
+ int num_captures;
+ bool is_one_pass;
+ bool can_bit_state;
+ std::string bytecode;
+ std::string bytemap;
+};
+
+Info GetInfo(const std::string& pattern) {
+ Info info;
+ info.pattern = pattern;
+
+ RE2::Options options;
+ re2::RegexpStatus status;
+ re2::Regexp* regexp = re2::Regexp::Parse(
+ pattern, static_cast<re2::Regexp::ParseFlags>(options.ParseFlags()),
+ &status);
+ if (regexp == nullptr) {
+ info.error = "failed to parse pattern: " + status.Text();
+ return info;
+ }
+
+ std::string prefix;
+ bool prefix_foldcase;
+ re2::Regexp* suffix;
+ if (regexp->RequiredPrefix(&prefix, &prefix_foldcase, &suffix)) {
+ info.prefix = prefix;
+ info.prefix_foldcase = prefix_foldcase;
+ } else {
+ suffix = regexp->Incref();
+ }
+
+ std::unique_ptr<re2::Prog> prog(suffix->CompileToProg(options.max_mem()));
+ if (prog == nullptr) {
+ info.error = "failed to compile forward Prog";
+ suffix->Decref();
+ regexp->Decref();
+ return info;
+ }
+
+ if (regexp->RequiredPrefixForAccel(&prefix, &prefix_foldcase)) {
+ info.accel_prefix = prefix;
+ info.accel_prefix_foldcase = prefix_foldcase;
+ }
+
+ info.num_captures = suffix->NumCaptures();
+ info.is_one_pass = prog->IsOnePass();
+ info.can_bit_state = prog->CanBitState();
+ info.bytecode = prog->Dump();
+ info.bytemap = prog->DumpByteMap();
+
+ suffix->Decref();
+ regexp->Decref();
+ return info;
+}
+
+EMSCRIPTEN_BINDINGS(_re2) {
+ emscripten::value_object<Info>("Info")
+ .field("pattern", &Info::pattern)
+ .field("error", &Info::error)
+ .field("prefix", &Info::prefix)
+ .field("prefix_foldcase", &Info::prefix_foldcase)
+ .field("accel_prefix", &Info::accel_prefix)
+ .field("accel_prefix_foldcase", &Info::accel_prefix_foldcase)
+ .field("num_captures", &Info::num_captures)
+ .field("is_one_pass", &Info::is_one_pass)
+ .field("can_bit_state", &Info::can_bit_state)
+ .field("bytecode", &Info::bytecode)
+ .field("bytemap", &Info::bytemap);
+
+ emscripten::function("getInfo", &GetInfo);
+}
+
+} // namespace re2_app
diff --git a/app/_re2.d.ts b/app/_re2.d.ts
new file mode 100644
index 0000000..dff5e49
--- /dev/null
+++ b/app/_re2.d.ts
@@ -0,0 +1,23 @@
+// Copyright 2022 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+export type Info = {
+ pattern: ArrayBuffer|Uint8Array|Uint8ClampedArray|Int8Array|string,
+ error: ArrayBuffer|Uint8Array|Uint8ClampedArray|Int8Array|string,
+ prefix: ArrayBuffer|Uint8Array|Uint8ClampedArray|Int8Array|string,
+ prefix_foldcase: boolean,
+ accel_prefix: ArrayBuffer|Uint8Array|Uint8ClampedArray|Int8Array|string,
+ accel_prefix_foldcase: boolean,
+ num_captures: number,
+ is_one_pass: boolean,
+ can_bit_state: boolean,
+ bytecode: ArrayBuffer|Uint8Array|Uint8ClampedArray|Int8Array|string,
+ bytemap: ArrayBuffer|Uint8Array|Uint8ClampedArray|Int8Array|string,
+};
+
+export interface MainModule {
+ getInfo(pattern: ArrayBuffer|Uint8Array|Uint8ClampedArray|Int8Array|string): Info;
+}
+
+export default function loadModule(): Promise<MainModule>;
diff --git a/app/app.ts b/app/app.ts
new file mode 100644
index 0000000..4b9e7bd
--- /dev/null
+++ b/app/app.ts
@@ -0,0 +1,111 @@
+// Copyright 2022 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+import {css, html, LitElement, render} from 'lit';
+import {customElement} from 'lit/decorators.js';
+
+import /*default*/ loadModule from './_re2';
+import {Info, MainModule} from './_re2';
+
+var _re2: MainModule;
+loadModule().then((module: MainModule) => {
+ _re2 = module;
+ render(html`<title>re2-dev</title><re2-dev></re2-dev>`, document.body);
+});
+
+@customElement('re2-dev')
+export class RE2Dev extends LitElement {
+ private _pattern: string = '';
+ private _info: Info|null = null;
+
+ constructor() {
+ super();
+ this._pattern = decodeURIComponent(window.location.hash.slice(1));
+ this._info = this._pattern ? _re2.getInfo(this._pattern) : null;
+ this.requestUpdate();
+ }
+
+ private _onChange = (e: Event) => {
+ this._pattern = (e.target as HTMLInputElement).value;
+ this._info = this._pattern ? _re2.getInfo(this._pattern) : null;
+ this.requestUpdate();
+ window.location.hash = '#' + encodeURIComponent(this._pattern);
+ };
+
+ static override styles = css`
+.code {
+ font-family: monospace;
+ white-space: pre-line;
+}
+`;
+
+ override render() {
+ var fragments = [];
+ fragments.push(html`
+<div>
+ <input type="text" size="48" @change=${this._onChange} .value=${this._pattern}>
+</div>
+`);
+
+ if (this._info === null) {
+ return html`${fragments}`;
+ }
+
+ if (this._info.error) {
+ fragments.push(html`
+<br>
+<div>
+ error:
+ <span class="code">${this._info.error}</span>
+</div>
+`);
+ return html`${fragments}`;
+ }
+
+ fragments.push(html`
+<br>
+<div>
+ pattern:
+ <span class="code">${this._info.pattern}</span>
+ <br>
+ prefix:
+ <span class="code">${this._info.prefix}</span>
+ ·
+ _foldcase:
+ <span class="code">${this._info.prefix_foldcase}</span>
+ <br>
+ accel_prefix:
+ <span class="code">${this._info.accel_prefix}</span>
+ ·
+ _foldcase:
+ <span class="code">${this._info.accel_prefix_foldcase}</span>
+ <br>
+ num_captures:
+ <span class="code">${this._info.num_captures}</span>
+ <br>
+ is_one_pass:
+ <span class="code">${this._info.is_one_pass}</span>
+ <br>
+ can_bit_state:
+ <span class="code">${this._info.can_bit_state}</span>
+ <br>
+ <br>
+ bytecode:
+ <br>
+ <span class="code">${this._info.bytecode}</span>
+ <br>
+ bytemap:
+ <br>
+ <span class="code">${this._info.bytemap}</span>
+</div>
+`);
+ return html`${fragments}`;
+ }
+}
+
+declare global {
+ interface HTMLElementTagNameMap {
+ 're2-dev': RE2Dev;
+ }
+}
diff --git a/app/build.sh b/app/build.sh
new file mode 100755
index 0000000..c272bf5
--- /dev/null
+++ b/app/build.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+set -eux
+
+SRCDIR=$(readlink --canonicalize $(dirname $0))
+DSTDIR=$(mktemp --directory --tmpdir $(basename $0).XXXXXXXXXX)
+
+BAZEL=/tmp/bazel
+BAZELISK_RELEASE=v1.16.0
+
+if [[ ${UID} -ne 0 ]]; then
+ if [[ -d deploy ]]; then
+ echo -e '\033[1;31m' "** The ${PWD}/deploy directory exists! Refusing to clobber it! **" '\033[0m'
+ exit 1
+ fi
+ mkdir deploy
+ sudo docker run -i -t --pull always --rm -v ${SRCDIR}/..:/src -v ${PWD}:/dst emscripten/emsdk /src/app/$(basename $0)
+ ls -l deploy
+else
+ wget -O ${BAZEL} https://github.com/bazelbuild/bazelisk/releases/download/${BAZELISK_RELEASE}/bazelisk-linux-amd64
+ chmod +x ${BAZEL}
+
+ cd ${SRCDIR}
+ # Emscripten doesn't support `-fstack-protector`.
+ AR=emar CC=emcc \
+ ${BAZEL} build --compilation_mode=opt \
+ --copt=-fno-stack-protector \
+ -- :all
+ # Bazel doesn't retain the `_re2.wasm` artifact;
+ # we have to redo the link command to obtain it.
+ pushd ..
+ emcc @bazel-bin/app/_re2.js-2.params
+ cd bazel-bin/app
+ cp _re2.js _re2.wasm ${DSTDIR}
+ popd
+ # Clean up the sundry Bazel output directories.
+ ${BAZEL} clean --expunge
+ cp app.ts index.html _re2.d.ts ${DSTDIR}
+ cp package.json rollup.config.js tsconfig.json ${DSTDIR}
+
+ cd ${DSTDIR}
+ npm install
+ npx tsc
+ npx rollup -c rollup.config.js -d deploy
+ mv deploy/* /dst/deploy
+fi
+
+cd ${SRCDIR}
+rm -rf ${DSTDIR}
+
+exit 0
diff --git a/app/index.html b/app/index.html
new file mode 100644
index 0000000..d229e56
--- /dev/null
+++ b/app/index.html
@@ -0,0 +1,5 @@
+<!DOCTYPE html>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<style>:root { color-scheme: dark light; }</style>
+<script type="module" src="app.js"></script>
diff --git a/app/package.json b/app/package.json
new file mode 100644
index 0000000..e702789
--- /dev/null
+++ b/app/package.json
@@ -0,0 +1,14 @@
+{
+ "dependencies": {
+ "lit": "*"
+ },
+ "devDependencies": {
+ "@rollup/plugin-node-resolve": "*",
+ "@rollup/plugin-terser": "*",
+ "@web/rollup-plugin-html": "*",
+ "@web/rollup-plugin-import-meta-assets": "*",
+ "rollup": "~2",
+ "tslib": "*",
+ "typescript": "*"
+ }
+}
diff --git a/app/rollup.config.js b/app/rollup.config.js
new file mode 100644
index 0000000..3a20e66
--- /dev/null
+++ b/app/rollup.config.js
@@ -0,0 +1,28 @@
+// Copyright 2022 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+import nodeResolve from '@rollup/plugin-node-resolve';
+import terser from '@rollup/plugin-terser';
+import html from '@web/rollup-plugin-html';
+import {importMetaAssets} from '@web/rollup-plugin-import-meta-assets';
+
+export default {
+ input: 'index.html',
+ output: {
+ entryFileNames: '[hash].js',
+ chunkFileNames: '[hash].js',
+ assetFileNames: '[hash][extname]',
+ format: 'es',
+ },
+ preserveEntrySignatures: false,
+ plugins:
+ [
+ html({
+ minify: true,
+ }),
+ nodeResolve(),
+ terser(),
+ importMetaAssets(),
+ ],
+};
diff --git a/app/tsconfig.json b/app/tsconfig.json
new file mode 100644
index 0000000..86cc302
--- /dev/null
+++ b/app/tsconfig.json
@@ -0,0 +1,17 @@
+{
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "esnext",
+ "moduleResolution": "node",
+ "noEmitOnError": true,
+ "lib": ["esnext", "dom"],
+ "strict": true,
+ "esModuleInterop": false,
+ "allowSyntheticDefaultImports": true,
+ "experimentalDecorators": true,
+ "importHelpers": true,
+ "sourceMap": true,
+ "inlineSources": true,
+ "incremental": true
+ }
+}
diff --git a/debian/changelog b/debian/changelog
index b19f501..4a29ec7 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+re2 (20230601-1) UNRELEASED; urgency=low
+
+ * New upstream release.
+
+ -- Debian Janitor <janitor@jelmer.uk> Thu, 01 Jun 2023 02:59:58 -0000
+
re2 (20230301-1) experimental; urgency=medium
* Newupstream release.
diff --git a/debian/patches/debian-unicode-data.patch b/debian/patches/debian-unicode-data.patch
index 80da869..51f560d 100644
--- a/debian/patches/debian-unicode-data.patch
+++ b/debian/patches/debian-unicode-data.patch
@@ -10,10 +10,10 @@ Forwarded: not-needed
re2/unicode.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
-diff --git a/re2/unicode.py b/re2/unicode.py
-index 1b68cbe..4ffcfcf 100644
---- a/re2/unicode.py
-+++ b/re2/unicode.py
+Index: re2.git/re2/unicode.py
+===================================================================
+--- re2.git.orig/re2/unicode.py
++++ re2.git/re2/unicode.py
@@ -13,7 +13,7 @@ import re
import urllib.request
diff --git a/debian/patches/parallel-test-runner.patch b/debian/patches/parallel-test-runner.patch
index 0dd4658..c1d7dad 100644
--- a/debian/patches/parallel-test-runner.patch
+++ b/debian/patches/parallel-test-runner.patch
@@ -13,11 +13,11 @@ Forwarded: not-needed
Makefile | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
-diff --git a/Makefile b/Makefile
-index 34230ca..bafe910 100644
---- a/Makefile
-+++ b/Makefile
-@@ -268,7 +268,13 @@ clean:
+Index: re2.git/Makefile
+===================================================================
+--- re2.git.orig/Makefile
++++ re2.git/Makefile
+@@ -280,7 +280,13 @@ clean:
testofiles: $(TESTOFILES)
.PHONY: test
diff --git a/python/BUILD.bazel b/python/BUILD.bazel
new file mode 100644
index 0000000..73547c7
--- /dev/null
+++ b/python/BUILD.bazel
@@ -0,0 +1,36 @@
+# Copyright 2009 The RE2 Authors. All Rights Reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Bazel (http://bazel.io/) BUILD file for RE2 Python.
+
+load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
+load("@rules_python//python:defs.bzl", "py_library", "py_test")
+
+pybind_extension(
+ name = "_re2",
+ srcs = ["_re2.cc"],
+ deps = [
+ "//:re2",
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+py_library(
+ name = "re2",
+ srcs = ["re2.py"],
+ data = [":_re2.so"],
+ imports = ["."],
+ visibility = ["//visibility:public"],
+)
+
+py_test(
+ name = "re2_test",
+ size = "small",
+ srcs = ["re2_test.py"],
+ deps = [
+ ":re2",
+ "@io_abseil_py//absl/testing:absltest",
+ "@io_abseil_py//absl/testing:parameterized",
+ ],
+)
diff --git a/python/LICENSE b/python/LICENSE
new file mode 120000
index 0000000..ea5b606
--- /dev/null
+++ b/python/LICENSE
@@ -0,0 +1 @@
+../LICENSE
\ No newline at end of file
diff --git a/python/README b/python/README
new file mode 100644
index 0000000..782378f
--- /dev/null
+++ b/python/README
@@ -0,0 +1 @@
+Building requires Python 3 and pybind11 to be installed on your system.
diff --git a/python/_re2.cc b/python/_re2.cc
new file mode 100644
index 0000000..8564f8a
--- /dev/null
+++ b/python/_re2.cc
@@ -0,0 +1,338 @@
+// Copyright 2019 The RE2 Authors. All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include "absl/strings/string_view.h"
+#include "re2/filtered_re2.h"
+#include "re2/re2.h"
+#include "re2/set.h"
+
+#ifdef _WIN32
+#include <basetsd.h>
+#define ssize_t SSIZE_T
+#endif
+
+namespace re2_python {
+
+// This is conventional.
+namespace py = pybind11;
+
+// In terms of the pybind11 API, a py::buffer is merely a py::object that
+// supports the buffer interface/protocol and you must explicitly request
+// a py::buffer_info in order to access the actual bytes. Under the hood,
+// the py::buffer_info manages a reference count to the py::buffer, so it
+// must be constructed and subsequently destructed while holding the GIL.
+static inline absl::string_view FromBytes(const py::buffer_info& bytes) {
+ char* data = reinterpret_cast<char*>(bytes.ptr);
+ ssize_t size = bytes.size;
+ return absl::string_view(data, size);
+}
+
+static inline int OneCharLen(const char* ptr) {
+ return "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"[(*ptr & 0xFF) >> 4];
+}
+
+// Helper function for when Python encodes str to bytes and then needs to
+// convert str offsets to bytes offsets. Assumes that text is valid UTF-8.
+ssize_t CharLenToBytes(py::buffer buffer, ssize_t pos, ssize_t len) {
+ auto bytes = buffer.request();
+ auto text = FromBytes(bytes);
+ auto ptr = text.data() + pos;
+ auto end = text.data() + text.size();
+ while (ptr < end && len > 0) {
+ ptr += OneCharLen(ptr);
+ --len;
+ }
+ return ptr - (text.data() + pos);
+}
+
+// Helper function for when Python decodes bytes to str and then needs to
+// convert bytes offsets to str offsets. Assumes that text is valid UTF-8.
+ssize_t BytesToCharLen(py::buffer buffer, ssize_t pos, ssize_t endpos) {
+ auto bytes = buffer.request();
+ auto text = FromBytes(bytes);
+ auto ptr = text.data() + pos;
+ auto end = text.data() + endpos;
+ ssize_t len = 0;
+ while (ptr < end) {
+ ptr += OneCharLen(ptr);
+ ++len;
+ }
+ return len;
+}
+
+std::unique_ptr<RE2> RE2InitShim(py::buffer buffer,
+ const RE2::Options& options) {
+ auto bytes = buffer.request();
+ auto pattern = FromBytes(bytes);
+ return std::make_unique<RE2>(pattern, options);
+}
+
+py::bytes RE2ErrorShim(const RE2& self) {
+ // Return std::string as bytes. That is, without decoding to str.
+ return self.error();
+}
+
+std::vector<std::pair<py::bytes, int>> RE2NamedCapturingGroupsShim(
+ const RE2& self) {
+ const int num_groups = self.NumberOfCapturingGroups();
+ std::vector<std::pair<py::bytes, int>> groups;
+ groups.reserve(num_groups);
+ for (const auto& it : self.NamedCapturingGroups()) {
+ groups.emplace_back(it.first, it.second);
+ }
+ return groups;
+}
+
+std::vector<int> RE2ProgramFanoutShim(const RE2& self) {
+ std::vector<int> histogram;
+ self.ProgramFanout(&histogram);
+ return histogram;
+}
+
+std::vector<int> RE2ReverseProgramFanoutShim(const RE2& self) {
+ std::vector<int> histogram;
+ self.ReverseProgramFanout(&histogram);
+ return histogram;
+}
+
+std::tuple<bool, py::bytes, py::bytes> RE2PossibleMatchRangeShim(
+ const RE2& self, int maxlen) {
+ std::string min, max;
+ // Return std::string as bytes. That is, without decoding to str.
+ return {self.PossibleMatchRange(&min, &max, maxlen), min, max};
+}
+
+std::vector<std::pair<ssize_t, ssize_t>> RE2MatchShim(const RE2& self,
+ RE2::Anchor anchor,
+ py::buffer buffer,
+ ssize_t pos,
+ ssize_t endpos) {
+ auto bytes = buffer.request();
+ auto text = FromBytes(bytes);
+ const int num_groups = self.NumberOfCapturingGroups() + 1; // need $0
+ std::vector<absl::string_view> groups;
+ groups.resize(num_groups);
+ py::gil_scoped_release release_gil;
+ if (!self.Match(text, pos, endpos, anchor, groups.data(), groups.size())) {
+ // Ensure that groups are null before converting to spans!
+ for (auto& it : groups) {
+ it = absl::string_view();
+ }
+ }
+ std::vector<std::pair<ssize_t, ssize_t>> spans;
+ spans.reserve(num_groups);
+ for (const auto& it : groups) {
+ if (it.data() == NULL) {
+ spans.emplace_back(-1, -1);
+ } else {
+ spans.emplace_back(it.data() - text.data(),
+ it.data() - text.data() + it.size());
+ }
+ }
+ return spans;
+}
+
+py::bytes RE2QuoteMetaShim(py::buffer buffer) {
+ auto bytes = buffer.request();
+ auto pattern = FromBytes(bytes);
+ // Return std::string as bytes. That is, without decoding to str.
+ return RE2::QuoteMeta(pattern);
+}
+
+class Set {
+ public:
+ Set(RE2::Anchor anchor, const RE2::Options& options)
+ : set_(options, anchor) {}
+
+ ~Set() = default;
+
+ // Not copyable or movable.
+ Set(const Set&) = delete;
+ Set& operator=(const Set&) = delete;
+
+ int Add(py::buffer buffer) {
+ auto bytes = buffer.request();
+ auto pattern = FromBytes(bytes);
+ int index = set_.Add(pattern, /*error=*/NULL); // -1 on error
+ return index;
+ }
+
+ bool Compile() {
+ // Compiling can fail.
+ return set_.Compile();
+ }
+
+ std::vector<int> Match(py::buffer buffer) const {
+ auto bytes = buffer.request();
+ auto text = FromBytes(bytes);
+ std::vector<int> matches;
+ py::gil_scoped_release release_gil;
+ set_.Match(text, &matches);
+ return matches;
+ }
+
+ private:
+ RE2::Set set_;
+};
+
+class Filter {
+ public:
+ Filter() = default;
+ ~Filter() = default;
+
+ // Not copyable or movable.
+ Filter(const Filter&) = delete;
+ Filter& operator=(const Filter&) = delete;
+
+ int Add(py::buffer buffer, const RE2::Options& options) {
+ auto bytes = buffer.request();
+ auto pattern = FromBytes(bytes);
+ int index = -1; // not clobbered on error
+ filter_.Add(pattern, options, &index);
+ return index;
+ }
+
+ bool Compile() {
+ std::vector<std::string> atoms;
+ filter_.Compile(&atoms);
+ RE2::Options options;
+ options.set_literal(true);
+ options.set_case_sensitive(false);
+ set_ = std::make_unique<RE2::Set>(options, RE2::UNANCHORED);
+ for (int i = 0; i < static_cast<int>(atoms.size()); ++i) {
+ if (set_->Add(atoms[i], /*error=*/NULL) != i) {
+ // Should never happen: the atom is a literal!
+ py::pybind11_fail("set_->Add() failed");
+ }
+ }
+ // Compiling can fail.
+ return set_->Compile();
+ }
+
+ std::vector<int> Match(py::buffer buffer, bool potential) const {
+ auto bytes = buffer.request();
+ auto text = FromBytes(bytes);
+ std::vector<int> atoms;
+ py::gil_scoped_release release_gil;
+ set_->Match(text, &atoms);
+ std::vector<int> matches;
+ if (potential) {
+ filter_.AllPotentials(atoms, &matches);
+ } else {
+ filter_.AllMatches(text, atoms, &matches);
+ }
+ return matches;
+ }
+
+ const RE2& GetRE2(int index) const {
+ return filter_.GetRE2(index);
+ }
+
+ private:
+ re2::FilteredRE2 filter_;
+ std::unique_ptr<RE2::Set> set_;
+};
+
+PYBIND11_MODULE(_re2, module) {
+ module.def("CharLenToBytes", &CharLenToBytes);
+ module.def("BytesToCharLen", &BytesToCharLen);
+
+ // CLASSES
+ // class RE2
+ // enum Anchor
+ // class Options
+ // enum Encoding
+ // class Set
+ // class Filter
+ py::class_<RE2> re2(module, "RE2");
+ py::enum_<RE2::Anchor> anchor(re2, "Anchor");
+ py::class_<RE2::Options> options(re2, "Options");
+ py::enum_<RE2::Options::Encoding> encoding(options, "Encoding");
+ py::class_<Set> set(module, "Set");
+ py::class_<Filter> filter(module, "Filter");
+
+ anchor.value("UNANCHORED", RE2::Anchor::UNANCHORED);
+ anchor.value("ANCHOR_START", RE2::Anchor::ANCHOR_START);
+ anchor.value("ANCHOR_BOTH", RE2::Anchor::ANCHOR_BOTH);
+
+ encoding.value("UTF8", RE2::Options::Encoding::EncodingUTF8);
+ encoding.value("LATIN1", RE2::Options::Encoding::EncodingLatin1);
+
+ options.def(py::init<>())
+ .def_property("max_mem", //
+ &RE2::Options::max_mem, //
+ &RE2::Options::set_max_mem) //
+ .def_property("encoding", //
+ &RE2::Options::encoding, //
+ &RE2::Options::set_encoding) //
+ .def_property("posix_syntax", //
+ &RE2::Options::posix_syntax, //
+ &RE2::Options::set_posix_syntax) //
+ .def_property("longest_match", //
+ &RE2::Options::longest_match, //
+ &RE2::Options::set_longest_match) //
+ .def_property("log_errors", //
+ &RE2::Options::log_errors, //
+ &RE2::Options::set_log_errors) //
+ .def_property("literal", //
+ &RE2::Options::literal, //
+ &RE2::Options::set_literal) //
+ .def_property("never_nl", //
+ &RE2::Options::never_nl, //
+ &RE2::Options::set_never_nl) //
+ .def_property("dot_nl", //
+ &RE2::Options::dot_nl, //
+ &RE2::Options::set_dot_nl) //
+ .def_property("never_capture", //
+ &RE2::Options::never_capture, //
+ &RE2::Options::set_never_capture) //
+ .def_property("case_sensitive", //
+ &RE2::Options::case_sensitive, //
+ &RE2::Options::set_case_sensitive) //
+ .def_property("perl_classes", //
+ &RE2::Options::perl_classes, //
+ &RE2::Options::set_perl_classes) //
+ .def_property("word_boundary", //
+ &RE2::Options::word_boundary, //
+ &RE2::Options::set_word_boundary) //
+ .def_property("one_line", //
+ &RE2::Options::one_line, //
+ &RE2::Options::set_one_line); //
+
+ re2.def(py::init(&RE2InitShim))
+ .def("ok", &RE2::ok)
+ .def("error", &RE2ErrorShim)
+ .def("options", &RE2::options)
+ .def("NumberOfCapturingGroups", &RE2::NumberOfCapturingGroups)
+ .def("NamedCapturingGroups", &RE2NamedCapturingGroupsShim)
+ .def("ProgramSize", &RE2::ProgramSize)
+ .def("ReverseProgramSize", &RE2::ReverseProgramSize)
+ .def("ProgramFanout", &RE2ProgramFanoutShim)
+ .def("ReverseProgramFanout", &RE2ReverseProgramFanoutShim)
+ .def("PossibleMatchRange", &RE2PossibleMatchRangeShim)
+ .def("Match", &RE2MatchShim)
+ .def_static("QuoteMeta", &RE2QuoteMetaShim);
+
+ set.def(py::init<RE2::Anchor, const RE2::Options&>())
+ .def("Add", &Set::Add)
+ .def("Compile", &Set::Compile)
+ .def("Match", &Set::Match);
+
+ filter.def(py::init<>())
+ .def("Add", &Filter::Add)
+ .def("Compile", &Filter::Compile)
+ .def("Match", &Filter::Match)
+ .def("GetRE2", &Filter::GetRE2,
+ py::return_value_policy::reference_internal);
+}
+
+} // namespace re2_python
diff --git a/python/re2.py b/python/re2.py
new file mode 100644
index 0000000..8a6d985
--- /dev/null
+++ b/python/re2.py
@@ -0,0 +1,582 @@
+# Copyright 2019 The RE2 Authors. All Rights Reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+r"""A drop-in replacement for the re module.
+
+It uses RE2 under the hood, of course, so various PCRE features
+(e.g. backreferences, look-around assertions) are not supported.
+See https://github.com/google/re2/wiki/Syntax for the canonical
+reference, but known syntactic "gotchas" relative to Python are:
+
+ * PCRE supports \Z and \z; RE2 supports \z; Python supports \z,
+ but calls it \Z. You must rewrite \Z to \z in pattern strings.
+
+Known differences between this module's API and the re module's API:
+
+ * The error class does not provide any error information as attributes.
+ * The Options class replaces the re module's flags with RE2's options as
+ gettable/settable properties. Please see re2.h for their documentation.
+ * The pattern string and the input string do not have to be the same type.
+ Any str will be encoded to UTF-8.
+ * The pattern string cannot be str if the options specify Latin-1 encoding.
+
+This module's LRU cache contains a maximum of 128 regular expression objects.
+Each regular expression object's underlying RE2 object uses a maximum of 8MiB
+of memory (by default). Hence, this module's LRU cache uses a maximum of 1GiB
+of memory (by default), but in most cases, it should use much less than that.
+"""
+
+import codecs
+import functools
+import itertools
+
+import _re2
+
+
+class error(Exception):
+ pass
+
+
+class Options(_re2.RE2.Options):
+
+ __slots__ = ()
+
+ NAMES = (
+ 'max_mem',
+ 'encoding',
+ 'posix_syntax',
+ 'longest_match',
+ 'log_errors',
+ 'literal',
+ 'never_nl',
+ 'dot_nl',
+ 'never_capture',
+ 'case_sensitive',
+ 'perl_classes',
+ 'word_boundary',
+ 'one_line',
+ )
+
+
+def compile(pattern, options=None):
+ if isinstance(pattern, _Regexp):
+ if options:
+ raise error('pattern is already compiled, so '
+ 'options may not be specified')
+ pattern = pattern._pattern
+ options = options or Options()
+ values = tuple(getattr(options, name) for name in Options.NAMES)
+ return _Regexp._make(pattern, values)
+
+
+def search(pattern, text, options=None):
+ return compile(pattern, options=options).search(text)
+
+
+def match(pattern, text, options=None):
+ return compile(pattern, options=options).match(text)
+
+
+def fullmatch(pattern, text, options=None):
+ return compile(pattern, options=options).fullmatch(text)
+
+
+def finditer(pattern, text, options=None):
+ return compile(pattern, options=options).finditer(text)
+
+
+def findall(pattern, text, options=None):
+ return compile(pattern, options=options).findall(text)
+
+
+def split(pattern, text, maxsplit=0, options=None):
+ return compile(pattern, options=options).split(text, maxsplit)
+
+
+def subn(pattern, repl, text, count=0, options=None):
+ return compile(pattern, options=options).subn(repl, text, count)
+
+
+def sub(pattern, repl, text, count=0, options=None):
+ return compile(pattern, options=options).sub(repl, text, count)
+
+
+def _encode(t):
+ return t.encode(encoding='utf-8')
+
+
+def _decode(b):
+ return b.decode(encoding='utf-8')
+
+
+def escape(pattern):
+ if isinstance(pattern, str):
+ encoded_pattern = _encode(pattern)
+ escaped = _re2.RE2.QuoteMeta(encoded_pattern)
+ decoded_escaped = _decode(escaped)
+ return decoded_escaped
+ else:
+ escaped = _re2.RE2.QuoteMeta(pattern)
+ return escaped
+
+
+def purge():
+ return _Regexp._make.cache_clear()
+
+
+_Anchor = _re2.RE2.Anchor
+_NULL_SPAN = (-1, -1)
+
+
+class _Regexp(object):
+
+ __slots__ = ('_pattern', '_regexp')
+
+ @classmethod
+ @functools.lru_cache(typed=True)
+ def _make(cls, pattern, values):
+ options = Options()
+ for name, value in zip(Options.NAMES, values):
+ setattr(options, name, value)
+ return cls(pattern, options)
+
+ def __init__(self, pattern, options):
+ self._pattern = pattern
+ if isinstance(self._pattern, str):
+ if options.encoding == Options.Encoding.LATIN1:
+ raise error('string type of pattern is str, but '
+ 'encoding specified in options is LATIN1')
+ encoded_pattern = _encode(self._pattern)
+ self._regexp = _re2.RE2(encoded_pattern, options)
+ else:
+ self._regexp = _re2.RE2(self._pattern, options)
+ if not self._regexp.ok():
+ raise error(self._regexp.error())
+
+ def __getstate__(self):
+ options = {name: getattr(self.options, name) for name in Options.NAMES}
+ return self._pattern, options
+
+ def __setstate__(self, state):
+ pattern, options = state
+ values = tuple(options[name] for name in Options.NAMES)
+ other = _Regexp._make(pattern, values)
+ self._pattern = other._pattern
+ self._regexp = other._regexp
+
+ def _match(self, anchor, text, pos=None, endpos=None):
+ pos = 0 if pos is None else max(0, min(pos, len(text)))
+ endpos = len(text) if endpos is None else max(0, min(endpos, len(text)))
+ if pos > endpos:
+ return
+ if isinstance(text, str):
+ encoded_text = _encode(text)
+ encoded_pos = _re2.CharLenToBytes(encoded_text, 0, pos)
+ if endpos == len(text):
+ # This is the common case.
+ encoded_endpos = len(encoded_text)
+ else:
+ encoded_endpos = encoded_pos + _re2.CharLenToBytes(
+ encoded_text, encoded_pos, endpos - pos)
+ decoded_offsets = {0: 0}
+ last_offset = 0
+ while True:
+ spans = self._regexp.Match(anchor, encoded_text, encoded_pos,
+ encoded_endpos)
+ if spans[0] == _NULL_SPAN:
+ break
+
+ # This algorithm is linear in the length of encoded_text. Specifically,
+ # no matter how many groups there are for a given regular expression or
+ # how many iterations through the loop there are for a given generator,
+ # this algorithm uses a single, straightforward pass over encoded_text.
+ offsets = sorted(set(itertools.chain(*spans)))
+ if offsets[0] == -1:
+ offsets = offsets[1:]
+ # Discard the rest of the items because they are useless now - and we
+ # could accumulate one item per str offset in the pathological case!
+ decoded_offsets = {last_offset: decoded_offsets[last_offset]}
+ for offset in offsets:
+ decoded_offsets[offset] = (
+ decoded_offsets[last_offset] +
+ _re2.BytesToCharLen(encoded_text, last_offset, offset))
+ last_offset = offset
+
+ def decode(span):
+ if span == _NULL_SPAN:
+ return span
+ return decoded_offsets[span[0]], decoded_offsets[span[1]]
+
+ decoded_spans = [decode(span) for span in spans]
+ yield _Match(self, text, pos, endpos, decoded_spans)
+ if encoded_pos == encoded_endpos:
+ break
+ elif encoded_pos == spans[0][1]:
+ # We matched the empty string at encoded_pos and would be stuck, so
+ # in order to make forward progress, increment the str offset.
+ encoded_pos += _re2.CharLenToBytes(encoded_text, encoded_pos, 1)
+ else:
+ encoded_pos = spans[0][1]
+ else:
+ while True:
+ spans = self._regexp.Match(anchor, text, pos, endpos)
+ if spans[0] == _NULL_SPAN:
+ break
+ yield _Match(self, text, pos, endpos, spans)
+ if pos == endpos:
+ break
+ elif pos == spans[0][1]:
+ # We matched the empty string at pos and would be stuck, so in order
+ # to make forward progress, increment the bytes offset.
+ pos += 1
+ else:
+ pos = spans[0][1]
+
+ def search(self, text, pos=None, endpos=None):
+ return next(self._match(_Anchor.UNANCHORED, text, pos, endpos), None)
+
+ def match(self, text, pos=None, endpos=None):
+ return next(self._match(_Anchor.ANCHOR_START, text, pos, endpos), None)
+
+ def fullmatch(self, text, pos=None, endpos=None):
+ return next(self._match(_Anchor.ANCHOR_BOTH, text, pos, endpos), None)
+
+ def finditer(self, text, pos=None, endpos=None):
+ return self._match(_Anchor.UNANCHORED, text, pos, endpos)
+
+ def findall(self, text, pos=None, endpos=None):
+ empty = type(text)()
+ items = []
+ for match in self.finditer(text, pos, endpos):
+ if not self.groups:
+ item = match.group()
+ elif self.groups == 1:
+ item = match.groups(default=empty)[0]
+ else:
+ item = match.groups(default=empty)
+ items.append(item)
+ return items
+
+ def _split(self, cb, text, maxsplit=0):
+ if maxsplit < 0:
+ return [text], 0
+ elif maxsplit > 0:
+ matchiter = itertools.islice(self.finditer(text), maxsplit)
+ else:
+ matchiter = self.finditer(text)
+ pieces = []
+ end = 0
+ numsplit = 0
+ for match in matchiter:
+ pieces.append(text[end:match.start()])
+ pieces.extend(cb(match))
+ end = match.end()
+ numsplit += 1
+ pieces.append(text[end:])
+ return pieces, numsplit
+
+ def split(self, text, maxsplit=0):
+ cb = lambda match: [match[group] for group in range(1, self.groups + 1)]
+ pieces, _ = self._split(cb, text, maxsplit)
+ return pieces
+
+ def subn(self, repl, text, count=0):
+ cb = lambda match: [repl(match) if callable(repl) else match.expand(repl)]
+ empty = type(text)()
+ pieces, numsplit = self._split(cb, text, count)
+ joined_pieces = empty.join(pieces)
+ return joined_pieces, numsplit
+
+ def sub(self, repl, text, count=0):
+ joined_pieces, _ = self.subn(repl, text, count)
+ return joined_pieces
+
+ @property
+ def pattern(self):
+ return self._pattern
+
+ @property
+ def options(self):
+ return self._regexp.options()
+
+ @property
+ def groups(self):
+ return self._regexp.NumberOfCapturingGroups()
+
+ @property
+ def groupindex(self):
+ groups = self._regexp.NamedCapturingGroups()
+ if isinstance(self._pattern, str):
+ decoded_groups = [(_decode(group), index) for group, index in groups]
+ return dict(decoded_groups)
+ else:
+ return dict(groups)
+
+ @property
+ def programsize(self):
+ return self._regexp.ProgramSize()
+
+ @property
+ def reverseprogramsize(self):
+ return self._regexp.ReverseProgramSize()
+
+ @property
+ def programfanout(self):
+ return self._regexp.ProgramFanout()
+
+ @property
+ def reverseprogramfanout(self):
+ return self._regexp.ReverseProgramFanout()
+
+ def possiblematchrange(self, maxlen):
+ ok, min, max = self._regexp.PossibleMatchRange(maxlen)
+ if not ok:
+ raise error('failed to compute match range')
+ return min, max
+
+
+class _Match(object):
+
+ __slots__ = ('_regexp', '_text', '_pos', '_endpos', '_spans')
+
+ def __init__(self, regexp, text, pos, endpos, spans):
+ self._regexp = regexp
+ self._text = text
+ self._pos = pos
+ self._endpos = endpos
+ self._spans = spans
+
+ # Python prioritises three-digit octal numbers over group escapes.
+ # For example, \100 should not be handled the same way as \g<10>0.
+ _OCTAL_RE = compile('\\\\[0-7][0-7][0-7]')
+
+ # Python supports \1 through \99 (inclusive) and \g<...> syntax.
+ _GROUP_RE = compile('\\\\[1-9][0-9]?|\\\\g<\\w+>')
+
+ @classmethod
+ @functools.lru_cache(typed=True)
+ def _split(cls, template):
+ if isinstance(template, str):
+ backslash = '\\'
+ else:
+ backslash = b'\\'
+ empty = type(template)()
+ pieces = [empty]
+ index = template.find(backslash)
+ while index != -1:
+ piece, template = template[:index], template[index:]
+ pieces[-1] += piece
+ octal_match = cls._OCTAL_RE.match(template)
+ group_match = cls._GROUP_RE.match(template)
+ if (not octal_match) and group_match:
+ index = group_match.end()
+ piece, template = template[:index], template[index:]
+ pieces.extend((piece, empty))
+ else:
+ # 2 isn't enough for \o, \x, \N, \u and \U escapes, but none of those
+ # should contain backslashes, so break them here and then fix them at
+ # the beginning of the next loop iteration or right before returning.
+ index = 2
+ piece, template = template[:index], template[index:]
+ pieces[-1] += piece
+ index = template.find(backslash)
+ pieces[-1] += template
+ return pieces
+
+ def expand(self, template):
+ if isinstance(template, str):
+ unescape = codecs.unicode_escape_decode
+ else:
+ unescape = codecs.escape_decode
+ empty = type(template)()
+ # Make a copy so that we don't clobber the cached pieces!
+ pieces = list(self._split(template))
+ for index, piece in enumerate(pieces):
+ if not index % 2:
+ pieces[index], _ = unescape(piece)
+ else:
+ if len(piece) <= 3: # \1 through \99 (inclusive)
+ group = int(piece[1:])
+ else: # \g<...>
+ group = piece[3:-1]
+ try:
+ group = int(group)
+ except ValueError:
+ pass
+ pieces[index] = self.__getitem__(group) or empty
+ joined_pieces = empty.join(pieces)
+ return joined_pieces
+
+ def __getitem__(self, group):
+ if not isinstance(group, int):
+ try:
+ group = self._regexp.groupindex[group]
+ except KeyError:
+ raise IndexError('bad group name')
+ if not 0 <= group <= self._regexp.groups:
+ raise IndexError('bad group index')
+ span = self._spans[group]
+ if span == _NULL_SPAN:
+ return None
+ return self._text[span[0]:span[1]]
+
+ def group(self, *groups):
+ if not groups:
+ groups = (0,)
+ items = (self.__getitem__(group) for group in groups)
+ return next(items) if len(groups) == 1 else tuple(items)
+
+ def groups(self, default=None):
+ items = []
+ for group in range(1, self._regexp.groups + 1):
+ item = self.__getitem__(group)
+ items.append(default if item is None else item)
+ return tuple(items)
+
+ def groupdict(self, default=None):
+ items = []
+ for group, index in self._regexp.groupindex.items():
+ item = self.__getitem__(index)
+ items.append((group, default) if item is None else (group, item))
+ return dict(items)
+
+ def start(self, group=0):
+ if not 0 <= group <= self._regexp.groups:
+ raise IndexError('bad group index')
+ return self._spans[group][0]
+
+ def end(self, group=0):
+ if not 0 <= group <= self._regexp.groups:
+ raise IndexError('bad group index')
+ return self._spans[group][1]
+
+ def span(self, group=0):
+ if not 0 <= group <= self._regexp.groups:
+ raise IndexError('bad group index')
+ return self._spans[group]
+
+ @property
+ def re(self):
+ return self._regexp
+
+ @property
+ def string(self):
+ return self._text
+
+ @property
+ def pos(self):
+ return self._pos
+
+ @property
+ def endpos(self):
+ return self._endpos
+
+ @property
+ def lastindex(self):
+ max_end = -1
+ max_group = None
+ # We look for the rightmost right parenthesis by keeping the first group
+ # that ends at max_end because that is the leftmost/outermost group when
+ # there are nested groups!
+ for group in range(1, self._regexp.groups + 1):
+ end = self._spans[group][1]
+ if max_end < end:
+ max_end = end
+ max_group = group
+ return max_group
+
+ @property
+ def lastgroup(self):
+ max_group = self.lastindex
+ if not max_group:
+ return None
+ for group, index in self._regexp.groupindex.items():
+ if max_group == index:
+ return group
+ return None
+
+
+class Set(object):
+ """A Pythonic wrapper around RE2::Set."""
+
+ __slots__ = ('_set')
+
+ def __init__(self, anchor, options=None):
+ options = options or Options()
+ self._set = _re2.Set(anchor, options)
+
+ @classmethod
+ def SearchSet(cls, options=None):
+ return cls(_Anchor.UNANCHORED, options=options)
+
+ @classmethod
+ def MatchSet(cls, options=None):
+ return cls(_Anchor.ANCHOR_START, options=options)
+
+ @classmethod
+ def FullMatchSet(cls, options=None):
+ return cls(_Anchor.ANCHOR_BOTH, options=options)
+
+ def Add(self, pattern):
+ if isinstance(pattern, str):
+ encoded_pattern = _encode(pattern)
+ index = self._set.Add(encoded_pattern)
+ else:
+ index = self._set.Add(pattern)
+ if index == -1:
+ raise error('failed to add %r to Set' % pattern)
+ return index
+
+ def Compile(self):
+ if not self._set.Compile():
+ raise error('failed to compile Set')
+
+ def Match(self, text):
+ if isinstance(text, str):
+ encoded_text = _encode(text)
+ matches = self._set.Match(encoded_text)
+ else:
+ matches = self._set.Match(text)
+ return matches or None
+
+
+class Filter(object):
+ """A Pythonic wrapper around FilteredRE2."""
+
+ __slots__ = ('_filter', '_patterns')
+
+ def __init__(self):
+ self._filter = _re2.Filter()
+ self._patterns = []
+
+ def Add(self, pattern, options=None):
+ options = options or Options()
+ if isinstance(pattern, str):
+ encoded_pattern = _encode(pattern)
+ index = self._filter.Add(encoded_pattern, options)
+ else:
+ index = self._filter.Add(pattern, options)
+ if index == -1:
+ raise error('failed to add %r to Filter' % pattern)
+ self._patterns.append(pattern)
+ return index
+
+ def Compile(self):
+ if not self._filter.Compile():
+ raise error('failed to compile Filter')
+
+ def Match(self, text, potential=False):
+ if isinstance(text, str):
+ encoded_text = _encode(text)
+ matches = self._filter.Match(encoded_text, potential)
+ else:
+ matches = self._filter.Match(text, potential)
+ return matches or None
+
+ def re(self, index):
+ if not 0 <= index < len(self._patterns):
+ raise IndexError('bad index')
+ proxy = object.__new__(_Regexp)
+ proxy._pattern = self._patterns[index]
+ proxy._regexp = self._filter.GetRE2(index)
+ return proxy
diff --git a/python/re2_test.py b/python/re2_test.py
new file mode 100644
index 0000000..86aa9ae
--- /dev/null
+++ b/python/re2_test.py
@@ -0,0 +1,482 @@
+# Copyright 2019 The RE2 Authors. All Rights Reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+"""Tests for google3.third_party.re2.python.re2."""
+
+import collections
+import pickle
+import re
+
+from absl.testing import absltest
+from absl.testing import parameterized
+import re2
+
+
+class OptionsTest(parameterized.TestCase):
+
+ @parameterized.parameters(*re2.Options.NAMES)
+ def test_option(self, name):
+ options = re2.Options()
+ value = getattr(options, name)
+ if isinstance(value, re2.Options.Encoding):
+ value = next(v for v in type(value).__members__.values() if v != value)
+ elif isinstance(value, bool):
+ value = not value
+ elif isinstance(value, int):
+ value = value + 1
+ else:
+ raise TypeError('option {!r}: {!r} {!r}'.format(name, type(value), value))
+ setattr(options, name, value)
+ self.assertEqual(value, getattr(options, name))
+
+
+class Re2CompileTest(parameterized.TestCase):
+ """Contains tests that apply to the re2 module only.
+
+ We disagree with Python on the string types of group names,
+ so there is no point attempting to verify consistency.
+ """
+
+ @parameterized.parameters(
+ (u'(foo*)(?P<bar>qux+)', 2, [(u'bar', 2)]),
+ (b'(foo*)(?P<bar>qux+)', 2, [(b'bar', 2)]),
+ (u'(foo*)(?P<中文>qux+)', 2, [(u'中文', 2)]),
+ )
+ def test_compile(self, pattern, expected_groups, expected_groupindex):
+ regexp = re2.compile(pattern)
+ self.assertIs(regexp, re2.compile(pattern)) # cached
+ self.assertIs(regexp, re2.compile(regexp)) # cached
+ with self.assertRaisesRegex(re2.error,
+ ('pattern is already compiled, so '
+ 'options may not be specified')):
+ options = re2.Options()
+ options.log_errors = not options.log_errors
+ re2.compile(regexp, options=options)
+ self.assertIsNotNone(regexp.options)
+ self.assertEqual(expected_groups, regexp.groups)
+ self.assertDictEqual(dict(expected_groupindex), regexp.groupindex)
+
+ def test_compile_with_options(self):
+ options = re2.Options()
+ options.max_mem = 100
+ with self.assertRaisesRegex(re2.error, 'pattern too large'):
+ re2.compile('.{1000}', options=options)
+
+ def test_programsize_reverseprogramsize(self):
+ regexp = re2.compile('a+b')
+ self.assertEqual(7, regexp.programsize)
+ self.assertEqual(7, regexp.reverseprogramsize)
+
+ def test_programfanout_reverseprogramfanout(self):
+ regexp = re2.compile('a+b')
+ self.assertListEqual([1, 1], regexp.programfanout)
+ self.assertListEqual([3], regexp.reverseprogramfanout)
+
+ @parameterized.parameters(
+ (u'abc', 0, None),
+ (b'abc', 0, None),
+ (u'abc', 10, (b'abc', b'abc')),
+ (b'abc', 10, (b'abc', b'abc')),
+ (u'ab*c', 10, (b'ab', b'ac')),
+ (b'ab*c', 10, (b'ab', b'ac')),
+ (u'ab+c', 10, (b'abb', b'abc')),
+ (b'ab+c', 10, (b'abb', b'abc')),
+ (u'ab?c', 10, (b'abc', b'ac')),
+ (b'ab?c', 10, (b'abc', b'ac')),
+ (u'.*', 10, (b'', b'\xf4\xbf\xbf\xc0')),
+ (b'.*', 10, None),
+ (u'\\C*', 10, None),
+ (b'\\C*', 10, None),
+ )
+ def test_possiblematchrange(self, pattern, maxlen, expected_min_max):
+ # For brevity, the string type of pattern determines the encoding.
+ # It would otherwise be possible to have bytes with UTF8, but as per
+ # the module docstring, it isn't permitted to have str with LATIN1.
+ options = re2.Options()
+ if isinstance(pattern, str):
+ options.encoding = re2.Options.Encoding.UTF8
+ else:
+ options.encoding = re2.Options.Encoding.LATIN1
+ regexp = re2.compile(pattern, options=options)
+ if expected_min_max:
+ self.assertEqual(expected_min_max, regexp.possiblematchrange(maxlen))
+ else:
+ with self.assertRaisesRegex(re2.error, 'failed to compute match range'):
+ regexp.possiblematchrange(maxlen)
+
+
+Params = collections.namedtuple(
+ 'Params', ('pattern', 'text', 'spans', 'search', 'match', 'fullmatch'))
+
+PARAMS = [
+ Params(u'\\d+', u'Hello, world.', None, False, False, False),
+ Params(b'\\d+', b'Hello, world.', None, False, False, False),
+ Params(u'\\s+', u'Hello, world.', [(6, 7)], True, False, False),
+ Params(b'\\s+', b'Hello, world.', [(6, 7)], True, False, False),
+ Params(u'\\w+', u'Hello, world.', [(0, 5)], True, True, False),
+ Params(b'\\w+', b'Hello, world.', [(0, 5)], True, True, False),
+ Params(u'(\\d+)?', u'Hello, world.', [(0, 0), (-1, -1)], True, True, False),
+ Params(b'(\\d+)?', b'Hello, world.', [(0, 0), (-1, -1)], True, True, False),
+ Params(u'youtube(_device|_md|_gaia|_multiday|_multiday_gaia)?',
+ u'youtube_ads', [(0, 7), (-1, -1)], True, True, False),
+ Params(b'youtube(_device|_md|_gaia|_multiday|_multiday_gaia)?',
+ b'youtube_ads', [(0, 7), (-1, -1)], True, True, False),
+]
+
+
+def upper(match):
+ return match.group().upper()
+
+
+class ReRegexpTest(parameterized.TestCase):
+ """Contains tests that apply to the re and re2 modules."""
+
+ MODULE = re
+
+ @parameterized.parameters((p.pattern,) for p in PARAMS)
+ def test_pickle(self, pattern):
+ regexp = self.MODULE.compile(pattern)
+ rick = pickle.loads(pickle.dumps(regexp))
+ self.assertEqual(regexp.pattern, rick.pattern)
+
+ @parameterized.parameters(
+ (p.pattern, p.text, (p.spans if p.search else None)) for p in PARAMS)
+ def test_search(self, pattern, text, expected_spans):
+ match = self.MODULE.search(pattern, text)
+ if expected_spans is None:
+ self.assertIsNone(match)
+ else:
+ spans = [match.span(group) for group in range(match.re.groups + 1)]
+ self.assertListEqual(expected_spans, spans)
+
+ def test_search_with_pos_and_endpos(self):
+ regexp = self.MODULE.compile(u'.+') # empty string NOT allowed
+ text = u'I \u2665 RE2!'
+ # Note that len(text) is the position of the empty string at the end of
+ # text, so range() stops at len(text) + 1 in order to include len(text).
+ for pos in range(len(text) + 1):
+ for endpos in range(pos, len(text) + 1):
+ match = regexp.search(text, pos=pos, endpos=endpos)
+ if pos == endpos:
+ self.assertIsNone(match)
+ else:
+ self.assertEqual(pos, match.pos)
+ self.assertEqual(endpos, match.endpos)
+ self.assertEqual(pos, match.start())
+ self.assertEqual(endpos, match.end())
+ self.assertTupleEqual((pos, endpos), match.span())
+
+ def test_search_with_bogus_pos_and_endpos(self):
+ regexp = self.MODULE.compile(u'.*') # empty string allowed
+ text = u'I \u2665 RE2!'
+
+ match = regexp.search(text, pos=-100)
+ self.assertEqual(0, match.pos)
+ match = regexp.search(text, pos=100)
+ self.assertEqual(8, match.pos)
+
+ match = regexp.search(text, endpos=-100)
+ self.assertEqual(0, match.endpos)
+ match = regexp.search(text, endpos=100)
+ self.assertEqual(8, match.endpos)
+
+ match = regexp.search(text, pos=100, endpos=-100)
+ self.assertIsNone(match)
+
+ @parameterized.parameters(
+ (p.pattern, p.text, (p.spans if p.match else None)) for p in PARAMS)
+ def test_match(self, pattern, text, expected_spans):
+ match = self.MODULE.match(pattern, text)
+ if expected_spans is None:
+ self.assertIsNone(match)
+ else:
+ spans = [match.span(group) for group in range(match.re.groups + 1)]
+ self.assertListEqual(expected_spans, spans)
+
+ @parameterized.parameters(
+ (p.pattern, p.text, (p.spans if p.fullmatch else None)) for p in PARAMS)
+ def test_fullmatch(self, pattern, text, expected_spans):
+ match = self.MODULE.fullmatch(pattern, text)
+ if expected_spans is None:
+ self.assertIsNone(match)
+ else:
+ spans = [match.span(group) for group in range(match.re.groups + 1)]
+ self.assertListEqual(expected_spans, spans)
+
+ @parameterized.parameters(
+ (u'', u'', [(0, 0)]),
+ (b'', b'', [(0, 0)]),
+ (u'', u'x', [(0, 0), (1, 1)]),
+ (b'', b'x', [(0, 0), (1, 1)]),
+ (u'', u'xy', [(0, 0), (1, 1), (2, 2)]),
+ (b'', b'xy', [(0, 0), (1, 1), (2, 2)]),
+ (u'.', u'xy', [(0, 1), (1, 2)]),
+ (b'.', b'xy', [(0, 1), (1, 2)]),
+ (u'x', u'xy', [(0, 1)]),
+ (b'x', b'xy', [(0, 1)]),
+ (u'y', u'xy', [(1, 2)]),
+ (b'y', b'xy', [(1, 2)]),
+ (u'z', u'xy', []),
+ (b'z', b'xy', []),
+ (u'\\w*', u'Hello, world.', [(0, 5), (5, 5), (6, 6), (7, 12), (12, 12),
+ (13, 13)]),
+ (b'\\w*', b'Hello, world.', [(0, 5), (5, 5), (6, 6), (7, 12), (12, 12),
+ (13, 13)]),
+ )
+ def test_finditer(self, pattern, text, expected_matches):
+ matches = [match.span() for match in self.MODULE.finditer(pattern, text)]
+ self.assertListEqual(expected_matches, matches)
+
+ @parameterized.parameters(
+ (u'\\w\\w+', u'Hello, world.', [u'Hello', u'world']),
+ (b'\\w\\w+', b'Hello, world.', [b'Hello', b'world']),
+ (u'(\\w)\\w+', u'Hello, world.', [u'H', u'w']),
+ (b'(\\w)\\w+', b'Hello, world.', [b'H', b'w']),
+ (u'(\\w)(\\w+)', u'Hello, world.', [(u'H', u'ello'), (u'w', u'orld')]),
+ (b'(\\w)(\\w+)', b'Hello, world.', [(b'H', b'ello'), (b'w', b'orld')]),
+ (u'(\\w)(\\w+)?', u'Hello, w.', [(u'H', u'ello'), (u'w', u'')]),
+ (b'(\\w)(\\w+)?', b'Hello, w.', [(b'H', b'ello'), (b'w', b'')]),
+ )
+ def test_findall(self, pattern, text, expected_matches):
+ matches = self.MODULE.findall(pattern, text)
+ self.assertListEqual(expected_matches, matches)
+
+ @parameterized.parameters(
+ (u'\\W+', u'Hello, world.', -1, [u'Hello, world.']),
+ (b'\\W+', b'Hello, world.', -1, [b'Hello, world.']),
+ (u'\\W+', u'Hello, world.', 0, [u'Hello', u'world', u'']),
+ (b'\\W+', b'Hello, world.', 0, [b'Hello', b'world', b'']),
+ (u'\\W+', u'Hello, world.', 1, [u'Hello', u'world.']),
+ (b'\\W+', b'Hello, world.', 1, [b'Hello', b'world.']),
+ (u'(\\W+)', u'Hello, world.', -1, [u'Hello, world.']),
+ (b'(\\W+)', b'Hello, world.', -1, [b'Hello, world.']),
+ (u'(\\W+)', u'Hello, world.', 0, [u'Hello', u', ', u'world', u'.', u'']),
+ (b'(\\W+)', b'Hello, world.', 0, [b'Hello', b', ', b'world', b'.', b'']),
+ (u'(\\W+)', u'Hello, world.', 1, [u'Hello', u', ', u'world.']),
+ (b'(\\W+)', b'Hello, world.', 1, [b'Hello', b', ', b'world.']),
+ )
+ def test_split(self, pattern, text, maxsplit, expected_pieces):
+ pieces = self.MODULE.split(pattern, text, maxsplit)
+ self.assertListEqual(expected_pieces, pieces)
+
+ @parameterized.parameters(
+ (u'\\w+', upper, u'Hello, world.', -1, u'Hello, world.', 0),
+ (b'\\w+', upper, b'Hello, world.', -1, b'Hello, world.', 0),
+ (u'\\w+', upper, u'Hello, world.', 0, u'HELLO, WORLD.', 2),
+ (b'\\w+', upper, b'Hello, world.', 0, b'HELLO, WORLD.', 2),
+ (u'\\w+', upper, u'Hello, world.', 1, u'HELLO, world.', 1),
+ (b'\\w+', upper, b'Hello, world.', 1, b'HELLO, world.', 1),
+ (u'\\w+', u'MEEP', u'Hello, world.', -1, u'Hello, world.', 0),
+ (b'\\w+', b'MEEP', b'Hello, world.', -1, b'Hello, world.', 0),
+ (u'\\w+', u'MEEP', u'Hello, world.', 0, u'MEEP, MEEP.', 2),
+ (b'\\w+', b'MEEP', b'Hello, world.', 0, b'MEEP, MEEP.', 2),
+ (u'\\w+', u'MEEP', u'Hello, world.', 1, u'MEEP, world.', 1),
+ (b'\\w+', b'MEEP', b'Hello, world.', 1, b'MEEP, world.', 1),
+ (u'\\\\', u'\\\\\\\\', u'Hello,\\world.', 0, u'Hello,\\\\world.', 1),
+ (b'\\\\', b'\\\\\\\\', b'Hello,\\world.', 0, b'Hello,\\\\world.', 1),
+ )
+ def test_subn_sub(self, pattern, repl, text, count, expected_joined_pieces,
+ expected_numsplit):
+ joined_pieces, numsplit = self.MODULE.subn(pattern, repl, text, count)
+ self.assertEqual(expected_joined_pieces, joined_pieces)
+ self.assertEqual(expected_numsplit, numsplit)
+
+ joined_pieces = self.MODULE.sub(pattern, repl, text, count)
+ self.assertEqual(expected_joined_pieces, joined_pieces)
+
+
+class Re2RegexpTest(ReRegexpTest):
+ """Contains tests that apply to the re2 module only."""
+
+ MODULE = re2
+
+ def test_compile_with_latin1_encoding(self):
+ options = re2.Options()
+ options.encoding = re2.Options.Encoding.LATIN1
+ with self.assertRaisesRegex(re2.error,
+ ('string type of pattern is str, but '
+ 'encoding specified in options is LATIN1')):
+ re2.compile(u'.?', options=options)
+
+ # ... whereas this is fine, of course.
+ re2.compile(b'.?', options=options)
+
+ @parameterized.parameters(
+ (u'\\p{Lo}', u'\u0ca0_\u0ca0', [(0, 1), (2, 3)]),
+ (b'\\p{Lo}', b'\xe0\xb2\xa0_\xe0\xb2\xa0', [(0, 3), (4, 7)]),
+ )
+ def test_finditer_with_utf8(self, pattern, text, expected_matches):
+ matches = [match.span() for match in self.MODULE.finditer(pattern, text)]
+ self.assertListEqual(expected_matches, matches)
+
+ def test_purge(self):
+ re2.compile('Goodbye, world.')
+ self.assertGreater(re2._Regexp._make.cache_info().currsize, 0)
+ re2.purge()
+ self.assertEqual(re2._Regexp._make.cache_info().currsize, 0)
+
+
+class Re2EscapeTest(parameterized.TestCase):
+ """Contains tests that apply to the re2 module only.
+
+ We disagree with Python on the escaping of some characters,
+ so there is no point attempting to verify consistency.
+ """
+
+ @parameterized.parameters(
+ (u'a*b+c?', u'a\\*b\\+c\\?'),
+ (b'a*b+c?', b'a\\*b\\+c\\?'),
+ )
+ def test_escape(self, pattern, expected_escaped):
+ escaped = re2.escape(pattern)
+ self.assertEqual(expected_escaped, escaped)
+
+
+class ReMatchTest(parameterized.TestCase):
+ """Contains tests that apply to the re and re2 modules."""
+
+ MODULE = re
+
+ def test_expand(self):
+ pattern = u'(?P<S>[\u2600-\u26ff]+).*?(?P<P>[^\\s\\w]+)'
+ text = u'I \u2665 RE2!\n'
+ match = self.MODULE.search(pattern, text)
+
+ self.assertEqual(u'\u2665\n!', match.expand(u'\\1\\n\\2'))
+ self.assertEqual(u'\u2665\n!', match.expand(u'\\g<1>\\n\\g<2>'))
+ self.assertEqual(u'\u2665\n!', match.expand(u'\\g<S>\\n\\g<P>'))
+ self.assertEqual(u'\\1\\2\n\u2665!', match.expand(u'\\\\1\\\\2\\n\\1\\2'))
+
+ def test_expand_with_octal(self):
+ pattern = u'()()()()()()()()()(\\w+)'
+ text = u'Hello, world.'
+ match = self.MODULE.search(pattern, text)
+
+ self.assertEqual(u'Hello\n', match.expand(u'\\g<0>\\n'))
+ self.assertEqual(u'Hello\n', match.expand(u'\\g<10>\\n'))
+
+ self.assertEqual(u'\x00\n', match.expand(u'\\0\\n'))
+ self.assertEqual(u'\x00\n', match.expand(u'\\00\\n'))
+ self.assertEqual(u'\x00\n', match.expand(u'\\000\\n'))
+ self.assertEqual(u'\x000\n', match.expand(u'\\0000\\n'))
+
+ self.assertEqual(u'\n', match.expand(u'\\1\\n'))
+ self.assertEqual(u'Hello\n', match.expand(u'\\10\\n'))
+ self.assertEqual(u'@\n', match.expand(u'\\100\\n'))
+ self.assertEqual(u'@0\n', match.expand(u'\\1000\\n'))
+
+ def test_getitem_group_groups_groupdict(self):
+ pattern = u'(?P<S>[\u2600-\u26ff]+).*?(?P<P>[^\\s\\w]+)'
+ text = u'Hello, world.\nI \u2665 RE2!\nGoodbye, world.\n'
+ match = self.MODULE.search(pattern, text)
+
+ self.assertEqual(u'\u2665 RE2!', match[0])
+ self.assertEqual(u'\u2665', match[1])
+ self.assertEqual(u'!', match[2])
+ self.assertEqual(u'\u2665', match[u'S'])
+ self.assertEqual(u'!', match[u'P'])
+
+ self.assertEqual(u'\u2665 RE2!', match.group())
+ self.assertEqual(u'\u2665 RE2!', match.group(0))
+ self.assertEqual(u'\u2665', match.group(1))
+ self.assertEqual(u'!', match.group(2))
+ self.assertEqual(u'\u2665', match.group(u'S'))
+ self.assertEqual(u'!', match.group(u'P'))
+
+ self.assertTupleEqual((u'\u2665', u'!'), match.group(1, 2))
+ self.assertTupleEqual((u'\u2665', u'!'), match.group(u'S', u'P'))
+ self.assertTupleEqual((u'\u2665', u'!'), match.groups())
+ self.assertDictEqual({u'S': u'\u2665', u'P': u'!'}, match.groupdict())
+
+ def test_bogus_group_start_end_and_span(self):
+ pattern = u'(?P<S>[\u2600-\u26ff]+).*?(?P<P>[^\\s\\w]+)'
+ text = u'I \u2665 RE2!\n'
+ match = self.MODULE.search(pattern, text)
+
+ self.assertRaises(IndexError, match.group, -1)
+ self.assertRaises(IndexError, match.group, 3)
+ self.assertRaises(IndexError, match.group, 'X')
+
+ self.assertRaises(IndexError, match.start, -1)
+ self.assertRaises(IndexError, match.start, 3)
+
+ self.assertRaises(IndexError, match.end, -1)
+ self.assertRaises(IndexError, match.end, 3)
+
+ self.assertRaises(IndexError, match.span, -1)
+ self.assertRaises(IndexError, match.span, 3)
+
+ @parameterized.parameters(
+ (u'((a)(b))((c)(d))', u'foo bar qux', None, None),
+ (u'(?P<one>(a)(b))((c)(d))', u'foo abcd qux', 4, None),
+ (u'(?P<one>(a)(b))(?P<four>(c)(d))', u'foo abcd qux', 4, 'four'),
+ )
+ def test_lastindex_lastgroup(self, pattern, text, expected_lastindex,
+ expected_lastgroup):
+ match = self.MODULE.search(pattern, text)
+ if expected_lastindex is None:
+ self.assertIsNone(match)
+ else:
+ self.assertEqual(expected_lastindex, match.lastindex)
+ self.assertEqual(expected_lastgroup, match.lastgroup)
+
+
+class Re2MatchTest(ReMatchTest):
+ """Contains tests that apply to the re2 module only."""
+
+ MODULE = re2
+
+
+class SetTest(absltest.TestCase):
+
+ def test_search(self):
+ s = re2.Set.SearchSet()
+ self.assertEqual(0, s.Add('\\d+'))
+ self.assertEqual(1, s.Add('\\s+'))
+ self.assertEqual(2, s.Add('\\w+'))
+ self.assertRaises(re2.error, s.Add, '(MEEP')
+ s.Compile()
+ self.assertItemsEqual([1, 2], s.Match('Hello, world.'))
+
+ def test_match(self):
+ s = re2.Set.MatchSet()
+ self.assertEqual(0, s.Add('\\d+'))
+ self.assertEqual(1, s.Add('\\s+'))
+ self.assertEqual(2, s.Add('\\w+'))
+ self.assertRaises(re2.error, s.Add, '(MEEP')
+ s.Compile()
+ self.assertItemsEqual([2], s.Match('Hello, world.'))
+
+ def test_fullmatch(self):
+ s = re2.Set.FullMatchSet()
+ self.assertEqual(0, s.Add('\\d+'))
+ self.assertEqual(1, s.Add('\\s+'))
+ self.assertEqual(2, s.Add('\\w+'))
+ self.assertRaises(re2.error, s.Add, '(MEEP')
+ s.Compile()
+ self.assertIsNone(s.Match('Hello, world.'))
+
+
+class FilterTest(absltest.TestCase):
+
+ def test_match(self):
+ f = re2.Filter()
+ self.assertEqual(0, f.Add('Hello, \\w+\\.'))
+ self.assertEqual(1, f.Add('\\w+, world\\.'))
+ self.assertEqual(2, f.Add('Goodbye, \\w+\\.'))
+ self.assertRaises(re2.error, f.Add, '(MEEP')
+ f.Compile()
+ self.assertItemsEqual([0, 1], f.Match('Hello, world.', potential=True))
+ self.assertItemsEqual([0, 1], f.Match('HELLO, WORLD.', potential=True))
+ self.assertItemsEqual([0, 1], f.Match('Hello, world.'))
+ self.assertIsNone(f.Match('HELLO, WORLD.'))
+
+ self.assertRaises(IndexError, f.re, -1)
+ self.assertRaises(IndexError, f.re, 3)
+ self.assertEqual('Goodbye, \\w+\\.', f.re(2).pattern)
+ # Verify whether the underlying RE2 object is usable.
+ self.assertEqual(0, f.re(2).groups)
+
+
+if __name__ == '__main__':
+ absltest.main()
diff --git a/python/setup.py b/python/setup.py
new file mode 100644
index 0000000..b0cbd5b
--- /dev/null
+++ b/python/setup.py
@@ -0,0 +1,105 @@
+# Copyright 2019 The RE2 Authors. All Rights Reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+import os
+import setuptools
+import setuptools.command.build_ext
+import shutil
+import sys
+
+long_description = r"""A drop-in replacement for the re module.
+
+It uses RE2 under the hood, of course, so various PCRE features
+(e.g. backreferences, look-around assertions) are not supported.
+See https://github.com/google/re2/wiki/Syntax for the canonical
+reference, but known syntactic "gotchas" relative to Python are:
+
+ * PCRE supports \Z and \z; RE2 supports \z; Python supports \z,
+ but calls it \Z. You must rewrite \Z to \z in pattern strings.
+
+Known differences between this module's API and the re module's API:
+
+ * The error class does not provide any error information as attributes.
+ * The Options class replaces the re module's flags with RE2's options as
+ gettable/settable properties. Please see re2.h for their documentation.
+ * The pattern string and the input string do not have to be the same type.
+ Any str will be encoded to UTF-8.
+ * The pattern string cannot be str if the options specify Latin-1 encoding.
+
+Known issues with regard to building the C++ extension:
+
+ * Building requires RE2 to be installed on your system.
+ On Debian, for example, install the libre2-dev package.
+ * Building requires pybind11 to be installed on your system OR venv.
+ On Debian, for example, install the pybind11-dev package.
+ For a venv, install the pybind11 package from PyPI.
+ * Building on macOS is known to work, but has been known to fail.
+ For example, the system Python may not know which compiler flags
+ to set when building bindings for software installed by Homebrew;
+ see https://docs.brew.sh/Homebrew-and-Python#brewed-python-modules.
+ * Building on Windows has not been tested yet and will probably fail.
+"""
+
+
+class BuildExt(setuptools.command.build_ext.build_ext):
+
+ def build_extension(self, ext):
+ if 'GITHUB_ACTIONS' not in os.environ:
+ return super().build_extension(ext)
+
+ # For @pybind11_bazel's `python_configure()`.
+ os.environ['PYTHON_BIN_PATH'] = sys.executable
+
+ cmd = ['bazel', 'clean', '--expunge']
+ self.spawn(cmd)
+
+ cmd = ['bazel', 'build']
+ if 'BAZEL_CPU' in os.environ:
+ cmd.append(f'--cpu={os.environ["BAZEL_CPU"].lower()}')
+ cmd += ['--compilation_mode=opt', '--', ':all']
+ self.spawn(cmd)
+
+ # This ensures that f'_re2.{importlib.machinery.EXTENSION_SUFFIXES[0]}'
+ # is the filename in the destination directory, which is what's needed.
+ shutil.copyfile('../bazel-bin/python/_re2.so',
+ self.get_ext_fullpath(ext.name))
+
+
+def include_dirs():
+ try:
+ import pybind11
+ yield pybind11.get_include()
+ except ModuleNotFoundError:
+ pass
+
+
+ext_module = setuptools.Extension(
+ name='_re2',
+ sources=['_re2.cc'],
+ include_dirs=list(include_dirs()),
+ libraries=['re2'],
+ extra_compile_args=['-fvisibility=hidden'],
+)
+
+setuptools.setup(
+ name='google-re2',
+ version='1.0',
+ description='RE2 Python bindings',
+ long_description=long_description,
+ long_description_content_type='text/plain',
+ author='The RE2 Authors',
+ author_email='re2-dev@googlegroups.com',
+ url='https://github.com/google/re2',
+ py_modules=['re2'],
+ ext_modules=[ext_module],
+ classifiers=[
+ 'Development Status :: 5 - Production/Stable',
+ 'Intended Audience :: Developers',
+ 'License :: OSI Approved :: BSD License',
+ 'Programming Language :: C++',
+ 'Programming Language :: Python :: 3.7',
+ ],
+ cmdclass={'build_ext': BuildExt},
+ python_requires='~=3.7',
+)
diff --git a/re2/bitmap256.cc b/re2/bitmap256.cc
index 1509909..f6fbca3 100644
--- a/re2/bitmap256.cc
+++ b/re2/bitmap256.cc
@@ -6,7 +6,7 @@
#include <stdint.h>
-#include "util/util.h"
+#include "absl/base/macros.h"
#include "util/logging.h"
namespace re2 {
@@ -27,15 +27,15 @@ int Bitmap256::FindNextSetBit(int c) const {
case 1:
if (words_[1] != 0)
return (1 * 64) + FindLSBSet(words_[1]);
- FALLTHROUGH_INTENDED;
+ ABSL_FALLTHROUGH_INTENDED;
case 2:
if (words_[2] != 0)
return (2 * 64) + FindLSBSet(words_[2]);
- FALLTHROUGH_INTENDED;
+ ABSL_FALLTHROUGH_INTENDED;
case 3:
if (words_[3] != 0)
return (3 * 64) + FindLSBSet(words_[3]);
- FALLTHROUGH_INTENDED;
+ ABSL_FALLTHROUGH_INTENDED;
default:
return -1;
}
diff --git a/re2/bitstate.cc b/re2/bitstate.cc
index 877e548..38a0b87 100644
--- a/re2/bitstate.cc
+++ b/re2/bitstate.cc
@@ -42,9 +42,8 @@ class BitState {
// The usual Search prototype.
// Can only call Search once per BitState.
- bool Search(const StringPiece& text, const StringPiece& context,
- bool anchored, bool longest,
- StringPiece* submatch, int nsubmatch);
+ bool Search(absl::string_view text, absl::string_view context, bool anchored,
+ bool longest, absl::string_view* submatch, int nsubmatch);
private:
inline bool ShouldVisit(int id, const char* p);
@@ -53,14 +52,14 @@ class BitState {
bool TrySearch(int id, const char* p);
// Search parameters
- Prog* prog_; // program being run
- StringPiece text_; // text being searched
- StringPiece context_; // greater context of text being searched
- bool anchored_; // whether search is anchored at text.begin()
- bool longest_; // whether search wants leftmost-longest match
- bool endmatch_; // whether match must end at text.end()
- StringPiece* submatch_; // submatches to fill in
- int nsubmatch_; // # of submatches to fill in
+ Prog* prog_; // program being run
+ absl::string_view text_; // text being searched
+ absl::string_view context_; // greater context of text being searched
+ bool anchored_; // whether search is anchored at text.begin()
+ bool longest_; // whether search wants leftmost-longest match
+ bool endmatch_; // whether match must end at text.end()
+ absl::string_view* submatch_; // submatches to fill in
+ int nsubmatch_; // # of submatches to fill in
// Search state
static constexpr int kVisitedBits = 64;
@@ -256,9 +255,9 @@ bool BitState::TrySearch(int id0, const char* p0) {
if (submatch_[0].data() == NULL ||
(longest_ && p > submatch_[0].data() + submatch_[0].size())) {
for (int i = 0; i < nsubmatch_; i++)
- submatch_[i] =
- StringPiece(cap_[2 * i],
- static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
+ submatch_[i] = absl::string_view(
+ cap_[2 * i],
+ static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
}
// If going for first match, we're done.
@@ -285,9 +284,9 @@ bool BitState::TrySearch(int id0, const char* p0) {
}
// Search text (within context) for prog_.
-bool BitState::Search(const StringPiece& text, const StringPiece& context,
- bool anchored, bool longest,
- StringPiece* submatch, int nsubmatch) {
+bool BitState::Search(absl::string_view text, absl::string_view context,
+ bool anchored, bool longest, absl::string_view* submatch,
+ int nsubmatch) {
// Search parameters.
text_ = text;
context_ = context;
@@ -303,7 +302,7 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
submatch_ = submatch;
nsubmatch_ = nsubmatch;
for (int i = 0; i < nsubmatch_; i++)
- submatch_[i] = StringPiece();
+ submatch_[i] = absl::string_view();
// Allocate scratch space.
int nvisited = prog_->list_count() * static_cast<int>(text.size()+1);
@@ -353,16 +352,13 @@ bool BitState::Search(const StringPiece& text, const StringPiece& context,
}
// Bit-state search.
-bool Prog::SearchBitState(const StringPiece& text,
- const StringPiece& context,
- Anchor anchor,
- MatchKind kind,
- StringPiece* match,
- int nmatch) {
+bool Prog::SearchBitState(absl::string_view text, absl::string_view context,
+ Anchor anchor, MatchKind kind,
+ absl::string_view* match, int nmatch) {
// If full match, we ask for an anchored longest match
// and then check that match[0] == text.
// So make sure match[0] exists.
- StringPiece sp0;
+ absl::string_view sp0;
if (kind == kFullMatch) {
anchor = kAnchored;
if (nmatch < 1) {
diff --git a/re2/compile.cc b/re2/compile.cc
index 03bffab..aa79887 100644
--- a/re2/compile.cc
+++ b/re2/compile.cc
@@ -10,9 +10,10 @@
#include <stdint.h>
#include <string.h>
-#include <unordered_map>
#include <utility>
+#include "absl/base/macros.h"
+#include "absl/container/flat_hash_map.h"
#include "util/logging.h"
#include "util/utf.h"
#include "re2/pod_array.h"
@@ -211,7 +212,7 @@ class Compiler : public Regexp::Walker<Frag> {
int64_t max_mem_; // Total memory budget.
- std::unordered_map<uint64_t, int> rune_cache_;
+ absl::flat_hash_map<uint64_t, int> rune_cache_;
Frag rune_range_;
RE2::Anchor anchor_; // anchor mode for RE2::Set
@@ -478,7 +479,7 @@ static uint64_t MakeRuneCacheKey(uint8_t lo, uint8_t hi, bool foldcase,
int Compiler::CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
int next) {
uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next);
- std::unordered_map<uint64_t, int>::const_iterator it = rune_cache_.find(key);
+ absl::flat_hash_map<uint64_t, int>::const_iterator it = rune_cache_.find(key);
if (it != rune_cache_.end())
return it->second;
int id = UncachedRuneByteSuffix(lo, hi, foldcase, next);
@@ -1243,7 +1244,7 @@ Prog* Compiler::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
// Make sure DFA has enough memory to operate,
// since we're not going to fall back to the NFA.
bool dfa_failed = false;
- StringPiece sp = "hello, world";
+ absl::string_view sp = "hello, world";
prog->SearchDFA(sp, sp, Prog::kAnchored, Prog::kManyMatch,
NULL, &dfa_failed, NULL);
if (dfa_failed) {
diff --git a/re2/dfa.cc b/re2/dfa.cc
index 55def2b..a177596 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -28,23 +28,25 @@
#include <algorithm>
#include <atomic>
#include <deque>
-#include <mutex>
#include <new>
#include <string>
-#include <unordered_map>
-#include <unordered_set>
#include <utility>
#include <vector>
+#include "absl/base/call_once.h"
+#include "absl/base/macros.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/strings/str_format.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/types/span.h"
#include "util/logging.h"
-#include "util/mix.h"
-#include "util/mutex.h"
#include "util/strutil.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/sparse_set.h"
-#include "re2/stringpiece.h"
// Silence "zero-sized array in struct/union" warning for DFA::State::next_.
#ifdef _MSC_VER
@@ -88,9 +90,9 @@ class DFA {
// returning the leftmost end of the match instead of the rightmost one.
// If the DFA cannot complete the search (for example, if it is out of
// memory), it sets *failed and returns false.
- bool Search(const StringPiece& text, const StringPiece& context,
- bool anchored, bool want_earliest_match, bool run_forward,
- bool* failed, const char** ep, SparseSet* matches);
+ bool Search(absl::string_view text, absl::string_view context, bool anchored,
+ bool want_earliest_match, bool run_forward, bool* failed,
+ const char** ep, SparseSet* matches);
// Builds out all states for the entire DFA.
// If cb is not empty, it receives one callback per state built.
@@ -114,6 +116,18 @@ class DFA {
struct State {
inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; }
+ template <typename H>
+ friend H AbslHashValue(H h, const State& a) {
+ const absl::Span<const int> ainst(a.inst_, a.ninst_);
+ return H::combine(std::move(h), a.flag_, ainst);
+ }
+
+ friend bool operator==(const State& a, const State& b) {
+ const absl::Span<const int> ainst(a.inst_, a.ninst_);
+ const absl::Span<const int> binst(b.inst_, b.ninst_);
+ return &a == &b || (a.flag_ == b.flag_ && ainst == binst);
+ }
+
int* inst_; // Instruction pointers in the state.
int ninst_; // # of inst_ pointers.
uint32_t flag_; // Empty string bitfield flags in effect on the way
@@ -124,11 +138,11 @@ class DFA {
// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932)
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1
std::atomic<State*> next_[0]; // Outgoing arrows from State,
+ // one per input byte class
#else
std::atomic<State*> next_[]; // Outgoing arrows from State,
+ // one per input byte class
#endif
-
- // one per input byte class
};
enum {
@@ -143,11 +157,7 @@ class DFA {
struct StateHash {
size_t operator()(const State* a) const {
DCHECK(a != NULL);
- HashMix mix(a->flag_);
- for (int i = 0; i < a->ninst_; i++)
- mix.Mix(a->inst_[i]);
- mix.Mix(0);
- return mix.get();
+ return absl::Hash<State>()(*a);
}
};
@@ -155,24 +165,15 @@ class DFA {
bool operator()(const State* a, const State* b) const {
DCHECK(a != NULL);
DCHECK(b != NULL);
- if (a == b)
- return true;
- if (a->flag_ != b->flag_)
- return false;
- if (a->ninst_ != b->ninst_)
- return false;
- for (int i = 0; i < a->ninst_; i++)
- if (a->inst_[i] != b->inst_[i])
- return false;
- return true;
+ return *a == *b;
}
};
- typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
+ typedef absl::flat_hash_set<State*, StateHash, StateEqual> StateSet;
private:
// Make it easier to swap in a scalable reader-writer mutex.
- using CacheMutex = Mutex;
+ using CacheMutex = absl::Mutex;
enum {
// Indices into start_ for unanchored searches.
@@ -238,7 +239,7 @@ class DFA {
// Search parameters
struct SearchParams {
- SearchParams(const StringPiece& text, const StringPiece& context,
+ SearchParams(absl::string_view text, absl::string_view context,
RWLocker* cache_lock)
: text(text),
context(context),
@@ -252,8 +253,8 @@ class DFA {
ep(NULL),
matches(NULL) {}
- StringPiece text;
- StringPiece context;
+ absl::string_view text;
+ absl::string_view context;
bool anchored;
bool can_prefix_accel;
bool want_earliest_match;
@@ -325,7 +326,7 @@ class DFA {
Prog::MatchKind kind_; // The kind of DFA.
bool init_failed_; // initialization failed (out of memory)
- Mutex mutex_; // mutex_ >= cache_mutex_.r
+ absl::Mutex mutex_; // mutex_ >= cache_mutex_.r
// Scratch areas, protected by mutex_.
Workq* q0_; // Two pre-allocated work queues.
@@ -428,7 +429,7 @@ DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
q1_(NULL),
mem_budget_(max_mem) {
if (ExtraDebug)
- fprintf(stderr, "\nkind %d\n%s\n", kind_, prog_->DumpUnanchored().c_str());
+ absl::FPrintF(stderr, "\nkind %d\n%s\n", kind_, prog_->DumpUnanchored());
int nmark = 0;
if (kind_ == Prog::kLongestMatch)
nmark = prog_->size();
@@ -498,7 +499,7 @@ std::string DFA::DumpWorkq(Workq* q) {
s += "|";
sep = "";
} else {
- s += StringPrintf("%s%d", sep, *it);
+ s += absl::StrFormat("%s%d", sep, *it);
sep = ",";
}
}
@@ -515,7 +516,7 @@ std::string DFA::DumpState(State* state) {
return "*";
std::string s;
const char* sep = "";
- s += StringPrintf("(%p)", state);
+ s += absl::StrFormat("(%p)", state);
for (int i = 0; i < state->ninst_; i++) {
if (state->inst_[i] == Mark) {
s += "|";
@@ -524,11 +525,11 @@ std::string DFA::DumpState(State* state) {
s += "||";
sep = "";
} else {
- s += StringPrintf("%s%d", sep, state->inst_[i]);
+ s += absl::StrFormat("%s%d", sep, state->inst_[i]);
sep = ",";
}
}
- s += StringPrintf(" flag=%#x", state->flag_);
+ s += absl::StrFormat(" flag=%#x", state->flag_);
return s;
}
@@ -596,16 +597,35 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
//mutex_.AssertHeld();
// Construct array of instruction ids for the new state.
- // Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
- // those are the only operators with any effect in
- // RunWorkqOnEmptyString or RunWorkqOnByte.
+ // In some cases, kInstAltMatch may trigger an upgrade to FullMatchState.
+ // Otherwise, "compress" q down to list heads for storage; StateToWorkq()
+ // will "decompress" it for computation by exploring from each list head.
+ //
+ // Historically, only kInstByteRange, kInstEmptyWidth and kInstMatch were
+ // useful to keep, but it turned out that kInstAlt was necessary to keep:
+ //
+ // > [*] kInstAlt would seem useless to record in a state, since
+ // > we've already followed both its arrows and saved all the
+ // > interesting states we can reach from there. The problem
+ // > is that one of the empty-width instructions might lead
+ // > back to the same kInstAlt (if an empty-width operator is starred),
+ // > producing a different evaluation order depending on whether
+ // > we keep the kInstAlt to begin with. Sigh.
+ // > A specific case that this affects is /(^|a)+/ matching "a".
+ // > If we don't save the kInstAlt, we will match the whole "a" (0,1)
+ // > but in fact the correct leftmost-first match is the leading "" (0,0).
+ //
+ // Recall that flattening transformed the Prog from "tree" form to "list"
+ // form: in the former, kInstAlt existed explicitly... and abundantly; in
+ // the latter, it's implied between the instructions that compose a list.
+ // Thus, because the information wasn't lost, the bug doesn't remanifest.
PODArray<int> inst(q->size());
int n = 0;
uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions
bool sawmatch = false; // whether queue contains guaranteed kInstMatch
bool sawmark = false; // whether queue contains a Mark
if (ExtraDebug)
- fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag);
+ absl::FPrintF(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q), flag);
for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
int id = *it;
if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id)))
@@ -630,10 +650,10 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
(kind_ != Prog::kLongestMatch || !sawmark) &&
(flag & kFlagMatch)) {
if (ExtraDebug)
- fprintf(stderr, " -> FullMatchState\n");
+ absl::FPrintF(stderr, " -> FullMatchState\n");
return FullMatchState;
}
- FALLTHROUGH_INTENDED;
+ ABSL_FALLTHROUGH_INTENDED;
default:
// Record iff id is the head of its list, which must
// be the case if id-1 is the last of *its* list. :)
@@ -676,7 +696,7 @@ DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
// if the state is *not* a matching state.
if (n == 0 && flag == 0) {
if (ExtraDebug)
- fprintf(stderr, " -> DeadState\n");
+ absl::FPrintF(stderr, " -> DeadState\n");
return DeadState;
}
@@ -740,25 +760,29 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) {
StateSet::iterator it = state_cache_.find(&state);
if (it != state_cache_.end()) {
if (ExtraDebug)
- fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str());
+ absl::FPrintF(stderr, " -cached-> %s\n", DumpState(*it));
return *it;
}
// Must have enough memory for new state.
// In addition to what we're going to allocate,
- // the state cache hash table seems to incur about 40 bytes per
- // State*, empirically.
- const int kStateCacheOverhead = 40;
+ // the state cache hash table seems to incur about 18 bytes per
+ // State*. Worst case for non-small sets is it being half full, where each
+ // value present takes up 1 byte hash sample plus the pointer itself.
+ const int kStateCacheOverhead = 18;
int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
- int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
- ninst*sizeof(int);
- if (mem_budget_ < mem + kStateCacheOverhead) {
+ int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>);
+ int instmem = ninst*sizeof(int);
+ if (mem_budget_ < mem + instmem + kStateCacheOverhead) {
mem_budget_ = -1;
return NULL;
}
- mem_budget_ -= mem + kStateCacheOverhead;
+ mem_budget_ -= mem + instmem + kStateCacheOverhead;
// Allocate new state along with room for next_ and inst_.
+ // inst_ is stored separately since it's colder; this also
+ // means that the States for a given DFA are the same size
+ // class, so the allocator can hopefully pack them better.
char* space = std::allocator<char>().allocate(mem);
State* s = new (space) State;
(void) new (s->next_) std::atomic<State*>[nnext];
@@ -766,12 +790,13 @@ DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) {
// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64658)
for (int i = 0; i < nnext; i++)
(void) new (s->next_ + i) std::atomic<State*>(NULL);
- s->inst_ = new (s->next_ + nnext) int[ninst];
- memmove(s->inst_, inst, ninst*sizeof s->inst_[0]);
+ s->inst_ = std::allocator<int>().allocate(ninst);
+ (void) new (s->inst_) int[ninst];
+ memmove(s->inst_, inst, instmem);
s->ninst_ = ninst;
s->flag_ = flag;
if (ExtraDebug)
- fprintf(stderr, " -> %s\n", DumpState(s).c_str());
+ absl::FPrintF(stderr, " -> %s\n", DumpState(s));
// Put state in cache and return it.
state_cache_.insert(s);
@@ -785,12 +810,12 @@ void DFA::ClearCache() {
while (begin != end) {
StateSet::iterator tmp = begin;
++begin;
+ // Deallocate the instruction array, which is stored separately as above.
+ std::allocator<int>().deallocate((*tmp)->inst_, (*tmp)->ninst_);
// Deallocate the blob of memory that we allocated in DFA::CachedState().
// We recompute mem in order to benefit from sized delete where possible.
- int ninst = (*tmp)->ninst_;
int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
- int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
- ninst*sizeof(int);
+ int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>);
std::allocator<char>().deallocate(reinterpret_cast<char*>(*tmp), mem);
}
state_cache_.clear();
@@ -985,8 +1010,8 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
}
if (ExtraDebug)
- fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n",
- DumpWorkq(oldq).c_str(), c, flag, DumpWorkq(newq).c_str(), *ismatch);
+ absl::FPrintF(stderr, "%s on %d[%#x] -> %s [%d]\n",
+ DumpWorkq(oldq), c, flag, DumpWorkq(newq), *ismatch);
}
// Processes input byte c in state, returning new state.
@@ -994,7 +1019,7 @@ void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
DFA::State* DFA::RunStateOnByteUnlocked(State* state, int c) {
// Keep only one RunStateOnByte going
// even if the DFA is being run by multiple threads.
- MutexLock l(&mutex_);
+ absl::MutexLock l(&mutex_);
return RunStateOnByte(state, c);
}
@@ -1134,9 +1159,9 @@ DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) {
mu_->ReaderLock();
}
-// This function is marked as NO_THREAD_SAFETY_ANALYSIS because
+// This function is marked as ABSL_NO_THREAD_SAFETY_ANALYSIS because
// the annotations don't support lock upgrade.
-void DFA::RWLocker::LockForWriting() NO_THREAD_SAFETY_ANALYSIS {
+void DFA::RWLocker::LockForWriting() ABSL_NO_THREAD_SAFETY_ANALYSIS {
if (!writing_) {
mu_->ReaderUnlock();
mu_->WriterLock();
@@ -1246,7 +1271,7 @@ DFA::StateSaver::~StateSaver() {
DFA::State* DFA::StateSaver::Restore() {
if (is_special_)
return special_;
- MutexLock l(&dfa_->mutex_);
+ absl::MutexLock l(&dfa_->mutex_);
State* s = dfa_->CachedState(inst_, ninst_, flag_);
if (s == NULL)
LOG(DFATAL) << "StateSaver failed to restore state.";
@@ -1342,13 +1367,13 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
State* s = start;
if (ExtraDebug)
- fprintf(stderr, "@stx: %s\n", DumpState(s).c_str());
+ absl::FPrintF(stderr, "@stx: %s\n", DumpState(s));
if (s->IsMatch()) {
matched = true;
lastmatch = p;
if (ExtraDebug)
- fprintf(stderr, "match @stx! [%s]\n", DumpState(s).c_str());
+ absl::FPrintF(stderr, "match @stx! [%s]\n", DumpState(s));
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
int id = s->inst_[i];
@@ -1365,7 +1390,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
while (p != ep) {
if (ExtraDebug)
- fprintf(stderr, "@%td: %s\n", p - bp, DumpState(s).c_str());
+ absl::FPrintF(stderr, "@%d: %s\n", p - bp, DumpState(s));
if (can_prefix_accel && s == start) {
// In start state, only way out is to find the prefix,
@@ -1465,7 +1490,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
else
lastmatch = p + 1;
if (ExtraDebug)
- fprintf(stderr, "match @%td! [%s]\n", lastmatch - bp, DumpState(s).c_str());
+ absl::FPrintF(stderr, "match @%d! [%s]\n", lastmatch - bp, DumpState(s));
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
int id = s->inst_[i];
@@ -1484,7 +1509,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
// Process one more byte to see if it triggers a match.
// (Remember, matches are delayed one byte.)
if (ExtraDebug)
- fprintf(stderr, "@etx: %s\n", DumpState(s).c_str());
+ absl::FPrintF(stderr, "@etx: %s\n", DumpState(s));
int lastbyte;
if (run_forward) {
@@ -1532,7 +1557,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
matched = true;
lastmatch = p;
if (ExtraDebug)
- fprintf(stderr, "match @etx! [%s]\n", DumpState(s).c_str());
+ absl::FPrintF(stderr, "match @etx! [%s]\n", DumpState(s));
if (params->matches != NULL && kind_ == Prog::kManyMatch) {
for (int i = s->ninst_ - 1; i >= 0; i--) {
int id = s->inst_[i];
@@ -1623,8 +1648,8 @@ bool DFA::FastSearchLoop(SearchParams* params) {
// state for the DFA search loop. Fills in params and returns true on success.
// Returns false on failure.
bool DFA::AnalyzeSearch(SearchParams* params) {
- const StringPiece& text = params->text;
- const StringPiece& context = params->context;
+ absl::string_view text = params->text;
+ absl::string_view context = params->context;
// Sanity check: make sure that text lies within context.
if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
@@ -1694,9 +1719,9 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
params->can_prefix_accel = true;
if (ExtraDebug)
- fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n",
- params->anchored, params->run_forward, flags,
- DumpState(params->start).c_str(), params->can_prefix_accel);
+ absl::FPrintF(stderr, "anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n",
+ params->anchored, params->run_forward, flags,
+ DumpState(params->start), params->can_prefix_accel);
return true;
}
@@ -1709,7 +1734,7 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
if (start != NULL)
return true;
- MutexLock l(&mutex_);
+ absl::MutexLock l(&mutex_);
start = info->start.load(std::memory_order_relaxed);
if (start != NULL)
return true;
@@ -1728,14 +1753,9 @@ bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
}
// The actual DFA search: calls AnalyzeSearch and then FastSearchLoop.
-bool DFA::Search(const StringPiece& text,
- const StringPiece& context,
- bool anchored,
- bool want_earliest_match,
- bool run_forward,
- bool* failed,
- const char** epp,
- SparseSet* matches) {
+bool DFA::Search(absl::string_view text, absl::string_view context,
+ bool anchored, bool want_earliest_match, bool run_forward,
+ bool* failed, const char** epp, SparseSet* matches) {
*epp = NULL;
if (!ok()) {
*failed = true;
@@ -1744,9 +1764,9 @@ bool DFA::Search(const StringPiece& text,
*failed = false;
if (ExtraDebug) {
- fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str());
- fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
- std::string(text).c_str(), anchored, want_earliest_match, run_forward, kind_);
+ absl::FPrintF(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored());
+ absl::FPrintF(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
+ text, anchored, want_earliest_match, run_forward, kind_);
}
RWLocker l(&cache_mutex_);
@@ -1770,7 +1790,7 @@ bool DFA::Search(const StringPiece& text,
return true;
}
if (ExtraDebug)
- fprintf(stderr, "start %s\n", DumpState(params.start).c_str());
+ absl::FPrintF(stderr, "start %s\n", DumpState(params.start));
bool ret = FastSearchLoop(¶ms);
if (params.failed) {
*failed = true;
@@ -1789,17 +1809,17 @@ DFA* Prog::GetDFA(MatchKind kind) {
// "longest match" DFA, because RE2 never does reverse
// "first match" searches.
if (kind == kFirstMatch) {
- std::call_once(dfa_first_once_, [](Prog* prog) {
+ absl::call_once(dfa_first_once_, [](Prog* prog) {
prog->dfa_first_ = new DFA(prog, kFirstMatch, prog->dfa_mem_ / 2);
}, this);
return dfa_first_;
} else if (kind == kManyMatch) {
- std::call_once(dfa_first_once_, [](Prog* prog) {
+ absl::call_once(dfa_first_once_, [](Prog* prog) {
prog->dfa_first_ = new DFA(prog, kManyMatch, prog->dfa_mem_);
}, this);
return dfa_first_;
} else {
- std::call_once(dfa_longest_once_, [](Prog* prog) {
+ absl::call_once(dfa_longest_once_, [](Prog* prog) {
if (!prog->reversed_)
prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_ / 2);
else
@@ -1823,12 +1843,11 @@ void Prog::DeleteDFA(DFA* dfa) {
//
// This is the only external interface (class DFA only exists in this file).
//
-bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
- Anchor anchor, MatchKind kind, StringPiece* match0,
+bool Prog::SearchDFA(absl::string_view text, absl::string_view context,
+ Anchor anchor, MatchKind kind, absl::string_view* match0,
bool* failed, SparseSet* matches) {
*failed = false;
- StringPiece context = const_context;
if (context.data() == NULL)
context = text;
bool caret = anchor_start();
@@ -1889,10 +1908,10 @@ bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
if (match0) {
if (reversed_)
*match0 =
- StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep));
+ absl::string_view(ep, static_cast<size_t>(text.data() + text.size() - ep));
else
*match0 =
- StringPiece(text.data(), static_cast<size_t>(ep - text.data()));
+ absl::string_view(text.data(), static_cast<size_t>(ep - text.data()));
}
return true;
}
@@ -1905,7 +1924,7 @@ int DFA::BuildAllStates(const Prog::DFAStateCallback& cb) {
// Pick out start state for unanchored search
// at beginning of text.
RWLocker l(&cache_mutex_);
- SearchParams params(StringPiece(), StringPiece(), &l);
+ SearchParams params(absl::string_view(), absl::string_view(), &l);
params.anchored = false;
if (!AnalyzeSearch(¶ms) ||
params.start == NULL ||
@@ -1915,7 +1934,7 @@ int DFA::BuildAllStates(const Prog::DFAStateCallback& cb) {
// Add start state to work queue.
// Note that any State* that we handle here must point into the cache,
// so we can simply depend on pointer-as-a-number hashing and equality.
- std::unordered_map<State*, int> m;
+ absl::flat_hash_map<State*, int> m;
std::deque<State*> q;
m.emplace(params.start, static_cast<int>(m.size()));
q.push_back(params.start);
@@ -1989,11 +2008,11 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
// Also note that previously_visited_states[UnseenStatePtr] will, in the STL
// tradition, implicitly insert a '0' value at first use. We take advantage
// of that property below.
- std::unordered_map<State*, int> previously_visited_states;
+ absl::flat_hash_map<State*, int> previously_visited_states;
// Pick out start state for anchored search at beginning of text.
RWLocker l(&cache_mutex_);
- SearchParams params(StringPiece(), StringPiece(), &l);
+ SearchParams params(absl::string_view(), absl::string_view(), &l);
params.anchored = true;
if (!AnalyzeSearch(¶ms))
return false;
@@ -2033,7 +2052,7 @@ bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
// Build minimum prefix.
State* s = params.start;
min->clear();
- MutexLock lock(&mutex_);
+ absl::MutexLock lock(&mutex_);
for (int i = 0; i < maxlen; i++) {
if (previously_visited_states[s] > kMaxEltRepetitions)
break;
diff --git a/re2/filtered_re2.cc b/re2/filtered_re2.cc
index 5df9745..49cf686 100644
--- a/re2/filtered_re2.cc
+++ b/re2/filtered_re2.cc
@@ -8,7 +8,6 @@
#include <string>
#include <utility>
-#include "util/util.h"
#include "util/logging.h"
#include "re2/prefilter.h"
#include "re2/prefilter_tree.h"
@@ -46,7 +45,7 @@ FilteredRE2& FilteredRE2::operator=(FilteredRE2&& other) {
return *this;
}
-RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
+RE2::ErrorCode FilteredRE2::Add(absl::string_view pattern,
const RE2::Options& options, int* id) {
RE2* re = new RE2(pattern, options);
RE2::ErrorCode code = re->error_code();
@@ -85,14 +84,14 @@ void FilteredRE2::Compile(std::vector<std::string>* atoms) {
compiled_ = true;
}
-int FilteredRE2::SlowFirstMatch(const StringPiece& text) const {
+int FilteredRE2::SlowFirstMatch(absl::string_view text) const {
for (size_t i = 0; i < re2_vec_.size(); i++)
if (RE2::PartialMatch(text, *re2_vec_[i]))
return static_cast<int>(i);
return -1;
}
-int FilteredRE2::FirstMatch(const StringPiece& text,
+int FilteredRE2::FirstMatch(absl::string_view text,
const std::vector<int>& atoms) const {
if (!compiled_) {
LOG(DFATAL) << "FirstMatch called before Compile.";
@@ -106,10 +105,9 @@ int FilteredRE2::FirstMatch(const StringPiece& text,
return -1;
}
-bool FilteredRE2::AllMatches(
- const StringPiece& text,
- const std::vector<int>& atoms,
- std::vector<int>* matching_regexps) const {
+bool FilteredRE2::AllMatches(absl::string_view text,
+ const std::vector<int>& atoms,
+ std::vector<int>* matching_regexps) const {
matching_regexps->clear();
std::vector<int> regexps;
prefilter_tree_->RegexpsGivenStrings(atoms, ®exps);
@@ -119,9 +117,8 @@ bool FilteredRE2::AllMatches(
return !matching_regexps->empty();
}
-void FilteredRE2::AllPotentials(
- const std::vector<int>& atoms,
- std::vector<int>* potential_regexps) const {
+void FilteredRE2::AllPotentials(const std::vector<int>& atoms,
+ std::vector<int>* potential_regexps) const {
prefilter_tree_->RegexpsGivenStrings(atoms, potential_regexps);
}
diff --git a/re2/filtered_re2.h b/re2/filtered_re2.h
index dd618c7..a9abd69 100644
--- a/re2/filtered_re2.h
+++ b/re2/filtered_re2.h
@@ -25,6 +25,7 @@
#include <string>
#include <vector>
+#include "absl/strings/string_view.h"
#include "re2/re2.h"
namespace re2 {
@@ -47,7 +48,7 @@ class FilteredRE2 {
// Uses RE2 constructor to create a RE2 object (re). Returns
// re->error_code(). If error_code is other than NoError, then re is
// deleted and not added to re2_vec_.
- RE2::ErrorCode Add(const StringPiece& pattern,
+ RE2::ErrorCode Add(absl::string_view pattern,
const RE2::Options& options,
int* id);
@@ -63,17 +64,17 @@ class FilteredRE2 {
// Returns -1 on no match. Can be called prior to Compile.
// Does not do any filtering: simply tries to Match the
// regexps in a loop.
- int SlowFirstMatch(const StringPiece& text) const;
+ int SlowFirstMatch(absl::string_view text) const;
// Returns the index of the first matching regexp.
// Returns -1 on no match. Compile has to be called before
// calling this.
- int FirstMatch(const StringPiece& text,
+ int FirstMatch(absl::string_view text,
const std::vector<int>& atoms) const;
// Returns the indices of all matching regexps, after first clearing
// matched_regexps.
- bool AllMatches(const StringPiece& text,
+ bool AllMatches(absl::string_view text,
const std::vector<int>& atoms,
std::vector<int>* matching_regexps) const;
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index b39ea3d..b42db55 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -15,8 +15,6 @@
#include "re2/set.h"
#include "re2/walker-inl.h"
-using re2::StringPiece;
-
// NOT static, NOT signed.
uint8_t dummy = 0;
@@ -97,8 +95,8 @@ class SubstringWalker : public re2::Regexp::Walker<int> {
SubstringWalker& operator=(const SubstringWalker&) = delete;
};
-void TestOneInput(StringPiece pattern, const RE2::Options& options,
- RE2::Anchor anchor, StringPiece text) {
+void TestOneInput(absl::string_view pattern, const RE2::Options& options,
+ RE2::Anchor anchor, absl::string_view text) {
// Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
// Otherwise, we will waste time on inputs that have long runs of various
// character classes. The fuzzer has shown itself to be easily capable of
@@ -178,7 +176,7 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options,
if (re.NumberOfCapturingGroups() == 0) {
// Avoid early return due to too many arguments.
- StringPiece sp = text;
+ absl::string_view sp = text;
RE2::FullMatch(sp, re);
RE2::PartialMatch(sp, re);
RE2::Consume(&sp, re);
@@ -187,7 +185,7 @@ void TestOneInput(StringPiece pattern, const RE2::Options& options,
} else {
// Okay, we have at least one capturing group...
// Try conversion for variously typed arguments.
- StringPiece sp = text;
+ absl::string_view sp = text;
short s;
RE2::FullMatch(sp, re, &s);
long l;
diff --git a/re2/mimics_pcre.cc b/re2/mimics_pcre.cc
index b1d6a51..ac0c69d 100644
--- a/re2/mimics_pcre.cc
+++ b/re2/mimics_pcre.cc
@@ -22,7 +22,6 @@
//
// Regexp::MimicsPCRE checks for any of these conditions.
-#include "util/util.h"
#include "util/logging.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
diff --git a/re2/nfa.cc b/re2/nfa.cc
index c7339f8..a655884 100644
--- a/re2/nfa.cc
+++ b/re2/nfa.cc
@@ -32,8 +32,8 @@
#include <utility>
#include <vector>
+#include "absl/strings/str_format.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/regexp.h"
@@ -60,9 +60,8 @@ class NFA {
// Submatch[0] is the entire match. When there is a choice in
// which text matches each subexpression, the submatch boundaries
// are chosen to match what a backtracking implementation would choose.
- bool Search(const StringPiece& text, const StringPiece& context,
- bool anchored, bool longest,
- StringPiece* submatch, int nsubmatch);
+ bool Search(absl::string_view text, absl::string_view context, bool anchored,
+ bool longest, absl::string_view* submatch, int nsubmatch);
private:
struct Thread {
@@ -92,7 +91,7 @@ class NFA {
// Enqueues only the ByteRange instructions that match byte c.
// context is used (with p) for evaluating empty-width specials.
// p is the current input position, and t0 is the current thread.
- void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
+ void AddToThreadq(Threadq* q, int id0, int c, absl::string_view context,
const char* p, Thread* t0);
// Run runq on byte c, appending new states to nextq.
@@ -102,7 +101,7 @@ class NFA {
// p-1 will be used when processing Match instructions.
// Frees all the threads on runq.
// If there is a shortcut to the end, returns that shortcut.
- int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
+ int Step(Threadq* runq, Threadq* nextq, int c, absl::string_view context,
const char* p);
// Returns text version of capture information, for debugging.
@@ -192,7 +191,7 @@ void NFA::Decref(Thread* t) {
// Enqueues only the ByteRange instructions that match byte c.
// context is used (with p) for evaluating empty-width specials.
// p is the current input position, and t0 is the current thread.
-void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
+void NFA::AddToThreadq(Threadq* q, int id0, int c, absl::string_view context,
const char* p, Thread* t0) {
if (id0 == 0)
return;
@@ -225,7 +224,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
continue;
if (q->has_index(id)) {
if (ExtraDebug)
- fprintf(stderr, " [%d%s]\n", id, FormatCapture(t0->capture).c_str());
+ absl::FPrintF(stderr, " [%d%s]\n", id, FormatCapture(t0->capture));
continue;
}
@@ -288,7 +287,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
t = Incref(t0);
*tp = t;
if (ExtraDebug)
- fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str());
+ absl::FPrintF(stderr, " + %d%s\n", id, FormatCapture(t0->capture));
if (ip->hint() == 0)
break;
@@ -300,7 +299,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
t = Incref(t0);
*tp = t;
if (ExtraDebug)
- fprintf(stderr, " ! %d%s\n", id, FormatCapture(t0->capture).c_str());
+ absl::FPrintF(stderr, " ! %d%s\n", id, FormatCapture(t0->capture));
Next:
if (ip->last())
@@ -328,7 +327,7 @@ void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
// p-1 will be used when processing Match instructions.
// Frees all the threads on runq.
// If there is a shortcut to the end, returns that shortcut.
-int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
+int NFA::Step(Threadq* runq, Threadq* nextq, int c, absl::string_view context,
const char* p) {
nextq->clear();
@@ -435,23 +434,22 @@ std::string NFA::FormatCapture(const char** capture) {
if (capture[i] == NULL)
s += "(?,?)";
else if (capture[i+1] == NULL)
- s += StringPrintf("(%td,?)",
- capture[i] - btext_);
+ s += absl::StrFormat("(%d,?)",
+ capture[i] - btext_);
else
- s += StringPrintf("(%td,%td)",
- capture[i] - btext_,
- capture[i+1] - btext_);
+ s += absl::StrFormat("(%d,%d)",
+ capture[i] - btext_,
+ capture[i+1] - btext_);
}
return s;
}
-bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
- bool anchored, bool longest,
- StringPiece* submatch, int nsubmatch) {
+bool NFA::Search(absl::string_view text, absl::string_view context,
+ bool anchored, bool longest, absl::string_view* submatch,
+ int nsubmatch) {
if (start_ == 0)
return false;
- StringPiece context = const_context;
if (context.data() == NULL)
context = text;
@@ -497,8 +495,8 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
etext_ = text.data() + text.size();
if (ExtraDebug)
- fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
- std::string(text).c_str(), std::string(context).c_str(), anchored, longest);
+ absl::FPrintF(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
+ text, context, anchored, longest);
// Set up search.
Threadq* runq = &q0_;
@@ -517,14 +515,14 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
else if (p < etext_)
c = p[0] & 0xFF;
- fprintf(stderr, "%c:", c);
+ absl::FPrintF(stderr, "%c:", c);
for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
Thread* t = i->value();
if (t == NULL)
continue;
- fprintf(stderr, " %d%s", i->index(), FormatCapture(t->capture).c_str());
+ absl::FPrintF(stderr, " %d%s", i->index(), FormatCapture(t->capture));
}
- fprintf(stderr, "\n");
+ absl::FPrintF(stderr, "\n");
}
// This is a no-op the first time around the loop because runq is empty.
@@ -592,7 +590,7 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
// If all the threads have died, stop early.
if (runq->size() == 0) {
if (ExtraDebug)
- fprintf(stderr, "dead\n");
+ absl::FPrintF(stderr, "dead\n");
break;
}
@@ -616,27 +614,26 @@ bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
if (matched_) {
for (int i = 0; i < nsubmatch; i++)
- submatch[i] =
- StringPiece(match_[2 * i],
- static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
+ submatch[i] = absl::string_view(
+ match_[2 * i],
+ static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
if (ExtraDebug)
- fprintf(stderr, "match (%td,%td)\n",
- match_[0] - btext_,
- match_[1] - btext_);
+ absl::FPrintF(stderr, "match (%d,%d)\n",
+ match_[0] - btext_,
+ match_[1] - btext_);
return true;
}
return false;
}
-bool
-Prog::SearchNFA(const StringPiece& text, const StringPiece& context,
- Anchor anchor, MatchKind kind,
- StringPiece* match, int nmatch) {
+bool Prog::SearchNFA(absl::string_view text, absl::string_view context,
+ Anchor anchor, MatchKind kind, absl::string_view* match,
+ int nmatch) {
if (ExtraDebug)
Dump();
NFA nfa(this);
- StringPiece sp;
+ absl::string_view sp;
if (kind == kFullMatch) {
anchor = kAnchored;
if (nmatch == 0) {
diff --git a/re2/onepass.cc b/re2/onepass.cc
index 2639746..7931cf9 100644
--- a/re2/onepass.cc
+++ b/re2/onepass.cc
@@ -57,14 +57,14 @@
#include <string>
#include <vector>
-#include "util/util.h"
+#include "absl/container/fixed_array.h"
+#include "absl/container/inlined_vector.h"
+#include "absl/strings/str_format.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "util/utf.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/sparse_set.h"
-#include "re2/stringpiece.h"
// Silence "zero-sized array in struct/union" warning for OneState::action.
#ifdef _MSC_VER
@@ -189,7 +189,7 @@ void OnePass_Checks() {
"kMaxCap disagrees with kMaxOnePassCapture");
}
-static bool Satisfy(uint32_t cond, const StringPiece& context, const char* p) {
+static bool Satisfy(uint32_t cond, absl::string_view context, const char* p) {
uint32_t satisfied = Prog::EmptyFlags(context, p);
if (cond & kEmptyAllFlags & ~satisfied)
return false;
@@ -211,10 +211,9 @@ static inline OneState* IndexToNode(uint8_t* nodes, int statesize,
return reinterpret_cast<OneState*>(nodes + statesize*nodeindex);
}
-bool Prog::SearchOnePass(const StringPiece& text,
- const StringPiece& const_context,
+bool Prog::SearchOnePass(absl::string_view text, absl::string_view context,
Anchor anchor, MatchKind kind,
- StringPiece* match, int nmatch) {
+ absl::string_view* match, int nmatch) {
if (anchor != kAnchored && kind != kFullMatch) {
LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches.";
return false;
@@ -234,7 +233,6 @@ bool Prog::SearchOnePass(const StringPiece& text,
for (int i = 0; i < ncap; i++)
matchcap[i] = NULL;
- StringPiece context = const_context;
if (context.data() == NULL)
context = text;
if (anchor_start() && BeginPtr(context) != BeginPtr(text))
@@ -339,13 +337,12 @@ done:
if (!matched)
return false;
for (int i = 0; i < nmatch; i++)
- match[i] =
- StringPiece(matchcap[2 * i],
- static_cast<size_t>(matchcap[2 * i + 1] - matchcap[2 * i]));
+ match[i] = absl::string_view(
+ matchcap[2 * i],
+ static_cast<size_t>(matchcap[2 * i + 1] - matchcap[2 * i]));
return true;
}
-
// Analysis to determine whether a given regexp program is one-pass.
// If ip is not on workq, adds ip to work queue and returns true.
@@ -404,16 +401,17 @@ bool Prog::IsOnePass() {
int stacksize = inst_count(kInstCapture) +
inst_count(kInstEmptyWidth) +
inst_count(kInstNop) + 1; // + 1 for start inst
- PODArray<InstCond> stack(stacksize);
+ absl::FixedArray<InstCond, 64> stack_storage(stacksize);
+ InstCond* stack = stack_storage.data();
int size = this->size();
- PODArray<int> nodebyid(size); // indexed by ip
- memset(nodebyid.data(), 0xFF, size*sizeof nodebyid[0]);
+ absl::FixedArray<int, 128> nodebyid_storage(size, -1); // indexed by ip
+ int* nodebyid = nodebyid_storage.data();
// Originally, nodes was a uint8_t[maxnodes*statesize], but that was
// unnecessarily optimistic: why allocate a large amount of memory
// upfront for a large program when it is unlikely to be one-pass?
- std::vector<uint8_t> nodes;
+ absl::InlinedVector<uint8_t, 2048> nodes;
Instq tovisit(size), workq(size);
AddQ(&tovisit, start());
@@ -462,7 +460,7 @@ bool Prog::IsOnePass() {
if (nextindex == -1) {
if (nalloc >= maxnodes) {
if (ExtraDebug)
- LOG(ERROR) << StringPrintf(
+ LOG(ERROR) << absl::StrFormat(
"Not OnePass: hit node limit %d >= %d", nalloc, maxnodes);
goto fail;
}
@@ -487,7 +485,7 @@ bool Prog::IsOnePass() {
node->action[b] = newact;
} else if (act != newact) {
if (ExtraDebug)
- LOG(ERROR) << StringPrintf(
+ LOG(ERROR) << absl::StrFormat(
"Not OnePass: conflict on byte %#x at state %d", c, *it);
goto fail;
}
@@ -508,7 +506,7 @@ bool Prog::IsOnePass() {
node->action[b] = newact;
} else if (act != newact) {
if (ExtraDebug)
- LOG(ERROR) << StringPrintf(
+ LOG(ERROR) << absl::StrFormat(
"Not OnePass: conflict on byte %#x at state %d", c, *it);
goto fail;
}
@@ -549,7 +547,7 @@ bool Prog::IsOnePass() {
// If already on work queue, (1) is violated: bail out.
if (!AddQ(&workq, ip->out())) {
if (ExtraDebug)
- LOG(ERROR) << StringPrintf(
+ LOG(ERROR) << absl::StrFormat(
"Not OnePass: multiple paths %d -> %d", *it, ip->out());
goto fail;
}
@@ -560,7 +558,7 @@ bool Prog::IsOnePass() {
if (matched) {
// (3) is violated
if (ExtraDebug)
- LOG(ERROR) << StringPrintf(
+ LOG(ERROR) << absl::StrFormat(
"Not OnePass: multiple matches from %d", *it);
goto fail;
}
@@ -597,15 +595,15 @@ bool Prog::IsOnePass() {
if (nodeindex == -1)
continue;
OneState* node = IndexToNode(nodes.data(), statesize, nodeindex);
- dump += StringPrintf("node %d id=%d: matchcond=%#x\n",
- nodeindex, id, node->matchcond);
+ dump += absl::StrFormat("node %d id=%d: matchcond=%#x\n",
+ nodeindex, id, node->matchcond);
for (int i = 0; i < bytemap_range_; i++) {
if ((node->action[i] & kImpossible) == kImpossible)
continue;
- dump += StringPrintf(" %d cond %#x -> %d id=%d\n",
- i, node->action[i] & 0xFFFF,
- node->action[i] >> kIndexShift,
- idmap[node->action[i] >> kIndexShift]);
+ dump += absl::StrFormat(" %d cond %#x -> %d id=%d\n",
+ i, node->action[i] & 0xFFFF,
+ node->action[i] >> kIndexShift,
+ idmap[node->action[i] >> kIndexShift]);
}
}
LOG(ERROR) << "nodes:\n" << dump;
diff --git a/re2/parse.cc b/re2/parse.cc
index d7a9fe5..67a4857 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -25,13 +25,11 @@
#include <string>
#include <vector>
-#include "util/util.h"
+#include "absl/base/macros.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "util/utf.h"
#include "re2/pod_array.h"
#include "re2/regexp.h"
-#include "re2/stringpiece.h"
#include "re2/unicode_casefold.h"
#include "re2/unicode_groups.h"
#include "re2/walker-inl.h"
@@ -70,7 +68,7 @@ void Regexp::FUZZING_ONLY_set_maximum_repeat_count(int i) {
class Regexp::ParseState {
public:
- ParseState(ParseFlags flags, const StringPiece& whole_regexp,
+ ParseState(ParseFlags flags, absl::string_view whole_regexp,
RegexpStatus* status);
~ParseState();
@@ -107,18 +105,18 @@ class Regexp::ParseState {
// Pushes a repeat operator regexp onto the stack.
// A valid argument for the operator must already be on the stack.
// s is the name of the operator, for use in error messages.
- bool PushRepeatOp(RegexpOp op, const StringPiece& s, bool nongreedy);
+ bool PushRepeatOp(RegexpOp op, absl::string_view s, bool nongreedy);
// Pushes a repetition regexp onto the stack.
// A valid argument for the operator must already be on the stack.
- bool PushRepetition(int min, int max, const StringPiece& s, bool nongreedy);
+ bool PushRepetition(int min, int max, absl::string_view s, bool nongreedy);
// Checks whether a particular regexp op is a marker.
bool IsMarker(RegexpOp op);
// Processes a left parenthesis in the input.
// Pushes a marker onto the stack.
- bool DoLeftParen(const StringPiece& name);
+ bool DoLeftParen(absl::string_view name);
bool DoLeftParenNoCapture();
// Processes a vertical bar in the input.
@@ -142,24 +140,23 @@ class Regexp::ParseState {
// Parse a character class into *out_re.
// Removes parsed text from s.
- bool ParseCharClass(StringPiece* s, Regexp** out_re,
+ bool ParseCharClass(absl::string_view* s, Regexp** out_re,
RegexpStatus* status);
// Parse a character class character into *rp.
// Removes parsed text from s.
- bool ParseCCCharacter(StringPiece* s, Rune *rp,
- const StringPiece& whole_class,
+ bool ParseCCCharacter(absl::string_view* s, Rune* rp,
+ absl::string_view whole_class,
RegexpStatus* status);
// Parse a character class range into rr.
// Removes parsed text from s.
- bool ParseCCRange(StringPiece* s, RuneRange* rr,
- const StringPiece& whole_class,
+ bool ParseCCRange(absl::string_view* s, RuneRange* rr,
+ absl::string_view whole_class,
RegexpStatus* status);
// Parse a Perl flag set or non-capturing group from s.
- bool ParsePerlFlags(StringPiece* s);
-
+ bool ParsePerlFlags(absl::string_view* s);
// Finishes the current concatenation,
// collapsing it into a single regexp on the stack.
@@ -177,7 +174,7 @@ class Regexp::ParseState {
private:
ParseFlags flags_;
- StringPiece whole_regexp_;
+ absl::string_view whole_regexp_;
RegexpStatus* status_;
Regexp* stacktop_;
int ncap_; // number of capturing parens seen
@@ -192,7 +189,7 @@ const RegexpOp kLeftParen = static_cast<RegexpOp>(kMaxRegexpOp+1);
const RegexpOp kVerticalBar = static_cast<RegexpOp>(kMaxRegexpOp+2);
Regexp::ParseState::ParseState(ParseFlags flags,
- const StringPiece& whole_regexp,
+ absl::string_view whole_regexp,
RegexpStatus* status)
: flags_(flags), whole_regexp_(whole_regexp),
status_(status), stacktop_(NULL), ncap_(0) {
@@ -269,7 +266,7 @@ bool Regexp::ParseState::PushRegexp(Regexp* re) {
// Searches the case folding tables and returns the CaseFold* that contains r.
// If there isn't one, returns the CaseFold* with smallest f->lo bigger than r.
// If there isn't one, returns NULL.
-const CaseFold* LookupCaseFold(const CaseFold *f, int n, Rune r) {
+const CaseFold* LookupCaseFold(const CaseFold* f, int n, Rune r) {
const CaseFold* ef = f + n;
// Binary search for entry containing r.
@@ -297,7 +294,7 @@ const CaseFold* LookupCaseFold(const CaseFold *f, int n, Rune r) {
}
// Returns the result of applying the fold f to the rune r.
-Rune ApplyFold(const CaseFold *f, Rune r) {
+Rune ApplyFold(const CaseFold* f, Rune r) {
switch (f->delta) {
default:
return r + f->delta;
@@ -305,7 +302,7 @@ Rune ApplyFold(const CaseFold *f, Rune r) {
case EvenOddSkip: // even <-> odd but only applies to every other
if ((r - f->lo) % 2)
return r;
- FALLTHROUGH_INTENDED;
+ ABSL_FALLTHROUGH_INTENDED;
case EvenOdd: // even <-> odd
if (r%2 == 0)
return r + 1;
@@ -314,7 +311,7 @@ Rune ApplyFold(const CaseFold *f, Rune r) {
case OddEvenSkip: // odd <-> even but only applies to every other
if ((r - f->lo) % 2)
return r;
- FALLTHROUGH_INTENDED;
+ ABSL_FALLTHROUGH_INTENDED;
case OddEven: // odd <-> even
if (r%2 == 1)
return r + 1;
@@ -472,7 +469,7 @@ bool Regexp::ParseState::PushSimpleOp(RegexpOp op) {
// Pushes a repeat operator regexp onto the stack.
// A valid argument for the operator must already be on the stack.
// The char c is the name of the operator, for use in error messages.
-bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s,
+bool Regexp::ParseState::PushRepeatOp(RegexpOp op, absl::string_view s,
bool nongreedy) {
if (stacktop_ == NULL || IsMarker(stacktop_->op())) {
status_->set_code(kRegexpRepeatArgument);
@@ -565,8 +562,7 @@ int RepetitionWalker::ShortVisit(Regexp* re, int parent_arg) {
// Pushes a repetition regexp onto the stack.
// A valid argument for the operator must already be on the stack.
-bool Regexp::ParseState::PushRepetition(int min, int max,
- const StringPiece& s,
+bool Regexp::ParseState::PushRepetition(int min, int max, absl::string_view s,
bool nongreedy) {
if ((max != -1 && max < min) ||
min > maximum_repeat_count ||
@@ -609,7 +605,7 @@ bool Regexp::ParseState::IsMarker(RegexpOp op) {
// Processes a left parenthesis in the input.
// Pushes a marker onto the stack.
-bool Regexp::ParseState::DoLeftParen(const StringPiece& name) {
+bool Regexp::ParseState::DoLeftParen(absl::string_view name) {
Regexp* re = new Regexp(kLeftParen, flags_);
re->cap_ = ++ncap_;
if (name.data() != NULL)
@@ -774,8 +770,8 @@ Regexp* Regexp::RemoveLeadingRegexp(Regexp* re) {
// Returns the leading string that re starts with.
// The returned Rune* points into a piece of re,
// so it must not be used after the caller calls re->Decref().
-Rune* Regexp::LeadingString(Regexp* re, int *nrune,
- Regexp::ParseFlags *flags) {
+Rune* Regexp::LeadingString(Regexp* re, int* nrune,
+ Regexp::ParseFlags* flags) {
while (re->op() == kRegexpConcat && re->nsub() > 0)
re = re->sub()[0];
@@ -806,7 +802,7 @@ void Regexp::RemoveLeadingString(Regexp* re, int n) {
Regexp* stk[4];
size_t d = 0;
while (re->op() == kRegexpConcat) {
- if (d < arraysize(stk))
+ if (d < ABSL_ARRAYSIZE(stk))
stk[d++] = re;
re = re->sub()[0];
}
@@ -1325,7 +1321,7 @@ bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
// Parses a decimal integer, storing it in *np.
// Sets *s to span the remainder of the string.
-static bool ParseInteger(StringPiece* s, int* np) {
+static bool ParseInteger(absl::string_view* s, int* np) {
if (s->empty() || !isdigit((*s)[0] & 0xFF))
return false;
// Disallow leading zeros.
@@ -1351,10 +1347,10 @@ static bool ParseInteger(StringPiece* s, int* np) {
// sets *hi to -1 to signify this.
// {,2} is NOT a valid suffix.
// The Maybe in the name signifies that the regexp parse
-// doesn't fail even if ParseRepetition does, so the StringPiece
+// doesn't fail even if ParseRepetition does, so the string_view
// s must NOT be edited unless MaybeParseRepetition returns true.
-static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
- StringPiece s = *sp;
+static bool MaybeParseRepetition(absl::string_view* sp, int* lo, int* hi) {
+ absl::string_view s = *sp;
if (s.empty() || s[0] != '{')
return false;
s.remove_prefix(1); // '{'
@@ -1385,12 +1381,13 @@ static bool MaybeParseRepetition(StringPiece* sp, int* lo, int* hi) {
return true;
}
-// Removes the next Rune from the StringPiece and stores it in *r.
+// Removes the next Rune from the string_view and stores it in *r.
// Returns number of bytes removed from sp.
// Behaves as though there is a terminating NUL at the end of sp.
// Argument order is backwards from usual Google style
// but consistent with chartorune.
-static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
+static int StringViewToRune(Rune* r, absl::string_view* sp,
+ RegexpStatus* status) {
// fullrune() takes int, not size_t. However, it just looks
// at the leading byte and treats any length >= 4 the same.
if (fullrune(sp->data(), static_cast<int>(std::min(size_t{4}, sp->size())))) {
@@ -1411,18 +1408,18 @@ static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus* status) {
if (status != NULL) {
status->set_code(kRegexpBadUTF8);
- status->set_error_arg(StringPiece());
+ status->set_error_arg(absl::string_view());
}
return -1;
}
// Returns whether name is valid UTF-8.
// If not, sets status to kRegexpBadUTF8.
-static bool IsValidUTF8(const StringPiece& s, RegexpStatus* status) {
- StringPiece t = s;
+static bool IsValidUTF8(absl::string_view s, RegexpStatus* status) {
+ absl::string_view t = s;
Rune r;
while (!t.empty()) {
- if (StringPieceToRune(&r, &t, status) < 0)
+ if (StringViewToRune(&r, &t, status) < 0)
return false;
}
return true;
@@ -1450,23 +1447,23 @@ static int UnHex(int c) {
// Parse an escape sequence (e.g., \n, \{).
// Sets *s to span the remainder of the string.
// Sets *rp to the named character.
-static bool ParseEscape(StringPiece* s, Rune* rp,
+static bool ParseEscape(absl::string_view* s, Rune* rp,
RegexpStatus* status, int rune_max) {
const char* begin = s->data();
if (s->empty() || (*s)[0] != '\\') {
// Should not happen - caller always checks.
status->set_code(kRegexpInternalError);
- status->set_error_arg(StringPiece());
+ status->set_error_arg(absl::string_view());
return false;
}
if (s->size() == 1) {
status->set_code(kRegexpTrailingBackslash);
- status->set_error_arg(StringPiece());
+ status->set_error_arg(absl::string_view());
return false;
}
Rune c, c1;
s->remove_prefix(1); // backslash
- if (StringPieceToRune(&c, s, status) < 0)
+ if (StringViewToRune(&c, s, status) < 0)
return false;
int code;
switch (c) {
@@ -1492,7 +1489,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
// Single non-zero octal digit is a backreference; not supported.
if (s->empty() || (*s)[0] < '0' || (*s)[0] > '7')
goto BadEscape;
- FALLTHROUGH_INTENDED;
+ ABSL_FALLTHROUGH_INTENDED;
case '0':
// consume up to three octal digits; already have one.
code = c - '0';
@@ -1516,7 +1513,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
case 'x':
if (s->empty())
goto BadEscape;
- if (StringPieceToRune(&c, s, status) < 0)
+ if (StringViewToRune(&c, s, status) < 0)
return false;
if (c == '{') {
// Any number of digits in braces.
@@ -1525,7 +1522,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
// Perl accepts any text at all; it ignores all text
// after the first non-hex digit. We require only hex digits,
// and at least one.
- if (StringPieceToRune(&c, s, status) < 0)
+ if (StringViewToRune(&c, s, status) < 0)
return false;
int nhex = 0;
code = 0;
@@ -1536,7 +1533,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
goto BadEscape;
if (s->empty())
goto BadEscape;
- if (StringPieceToRune(&c, s, status) < 0)
+ if (StringViewToRune(&c, s, status) < 0)
return false;
}
if (c != '}' || nhex == 0)
@@ -1547,7 +1544,7 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
// Easy case: two hex digits.
if (s->empty())
goto BadEscape;
- if (StringPieceToRune(&c1, s, status) < 0)
+ if (StringViewToRune(&c1, s, status) < 0)
return false;
if (!IsHex(c) || !IsHex(c1))
goto BadEscape;
@@ -1593,7 +1590,7 @@ BadEscape:
// Unrecognized escape sequence.
status->set_code(kRegexpBadEscape);
status->set_error_arg(
- StringPiece(begin, static_cast<size_t>(s->data() - begin)));
+ absl::string_view(begin, static_cast<size_t>(s->data() - begin)));
return false;
}
@@ -1621,21 +1618,21 @@ void CharClassBuilder::AddRangeFlags(
}
// Look for a group with the given name.
-static const UGroup* LookupGroup(const StringPiece& name,
- const UGroup *groups, int ngroups) {
+static const UGroup* LookupGroup(absl::string_view name,
+ const UGroup* groups, int ngroups) {
// Simple name lookup.
for (int i = 0; i < ngroups; i++)
- if (StringPiece(groups[i].name) == name)
+ if (absl::string_view(groups[i].name) == name)
return &groups[i];
return NULL;
}
// Look for a POSIX group with the given name (e.g., "[:^alpha:]")
-static const UGroup* LookupPosixGroup(const StringPiece& name) {
+static const UGroup* LookupPosixGroup(absl::string_view name) {
return LookupGroup(name, posix_groups, num_posix_groups);
}
-static const UGroup* LookupPerlGroup(const StringPiece& name) {
+static const UGroup* LookupPerlGroup(absl::string_view name) {
return LookupGroup(name, perl_groups, num_perl_groups);
}
@@ -1646,16 +1643,16 @@ static URange32 any32[] = { { 65536, Runemax } };
static UGroup anygroup = { "Any", +1, any16, 1, any32, 1 };
// Look for a Unicode group with the given name (e.g., "Han")
-static const UGroup* LookupUnicodeGroup(const StringPiece& name) {
+static const UGroup* LookupUnicodeGroup(absl::string_view name) {
// Special case: "Any" means any.
- if (name == StringPiece("Any"))
+ if (name == absl::string_view("Any"))
return &anygroup;
return LookupGroup(name, unicode_groups, num_unicode_groups);
}
#endif
// Add a UGroup or its negation to the character class.
-static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign,
+static void AddUGroup(CharClassBuilder* cc, const UGroup* g, int sign,
Regexp::ParseFlags parse_flags) {
if (sign == +1) {
for (int i = 0; i < g->nr16; i++) {
@@ -1705,16 +1702,17 @@ static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign,
// not the Perl empty-string classes (\b \B \A \Z \z).
// On success, sets *s to span the remainder of the string
// and returns the corresponding UGroup.
-// The StringPiece must *NOT* be edited unless the call succeeds.
-const UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_flags) {
+// The string_view must *NOT* be edited unless the call succeeds.
+const UGroup* MaybeParsePerlCCEscape(absl::string_view* s,
+ Regexp::ParseFlags parse_flags) {
if (!(parse_flags & Regexp::PerlClasses))
return NULL;
if (s->size() < 2 || (*s)[0] != '\\')
return NULL;
- // Could use StringPieceToRune, but there aren't
+ // Could use StringViewToRune, but there aren't
// any non-ASCII Perl group names.
- StringPiece name(s->data(), 2);
- const UGroup *g = LookupPerlGroup(name);
+ absl::string_view name(s->data(), 2);
+ const UGroup* g = LookupPerlGroup(name);
if (g == NULL)
return NULL;
s->remove_prefix(name.size());
@@ -1729,9 +1727,9 @@ enum ParseStatus {
// Maybe parses a Unicode character group like \p{Han} or \P{Han}
// (the latter is a negated group).
-ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
- CharClassBuilder *cc,
- RegexpStatus* status) {
+ParseStatus ParseUnicodeGroup(absl::string_view* s,
+ Regexp::ParseFlags parse_flags,
+ CharClassBuilder* cc, RegexpStatus* status) {
// Decide whether to parse.
if (!(parse_flags & Regexp::UnicodeGroups))
return kParseNothing;
@@ -1745,34 +1743,34 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
int sign = +1; // -1 = negated char class
if (c == 'P')
sign = -sign;
- StringPiece seq = *s; // \p{Han} or \pL
- StringPiece name; // Han or L
+ absl::string_view seq = *s; // \p{Han} or \pL
+ absl::string_view name; // Han or L
s->remove_prefix(2); // '\\', 'p'
- if (!StringPieceToRune(&c, s, status))
+ if (!StringViewToRune(&c, s, status))
return kParseError;
if (c != '{') {
// Name is the bit of string we just skipped over for c.
const char* p = seq.data() + 2;
- name = StringPiece(p, static_cast<size_t>(s->data() - p));
+ name = absl::string_view(p, static_cast<size_t>(s->data() - p));
} else {
// Name is in braces. Look for closing }
size_t end = s->find('}', 0);
- if (end == StringPiece::npos) {
+ if (end == absl::string_view::npos) {
if (!IsValidUTF8(seq, status))
return kParseError;
status->set_code(kRegexpBadCharRange);
status->set_error_arg(seq);
return kParseError;
}
- name = StringPiece(s->data(), end); // without '}'
+ name = absl::string_view(s->data(), end); // without '}'
s->remove_prefix(end + 1); // with '}'
if (!IsValidUTF8(name, status))
return kParseError;
}
// Chop seq where s now begins.
- seq = StringPiece(seq.data(), static_cast<size_t>(s->data() - seq.data()));
+ seq = absl::string_view(seq.data(), static_cast<size_t>(s->data() - seq.data()));
if (!name.empty() && name[0] == '^') {
sign = -sign;
@@ -1781,7 +1779,7 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
#if !defined(RE2_USE_ICU)
// Look up the group in the RE2 Unicode data.
- const UGroup *g = LookupUnicodeGroup(name);
+ const UGroup* g = LookupUnicodeGroup(name);
if (g == NULL) {
status->set_code(kRegexpBadCharRange);
status->set_error_arg(seq);
@@ -1819,9 +1817,9 @@ ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
// Parses a character class name like [:alnum:].
// Sets *s to span the remainder of the string.
// Adds the ranges corresponding to the class to ranges.
-static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
- CharClassBuilder *cc,
- RegexpStatus* status) {
+static ParseStatus ParseCCName(absl::string_view* s,
+ Regexp::ParseFlags parse_flags,
+ CharClassBuilder* cc, RegexpStatus* status) {
// Check begins with [:
const char* p = s->data();
const char* ep = s->data() + s->size();
@@ -1839,9 +1837,9 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
// Got it. Check that it's valid.
q += 2;
- StringPiece name(p, static_cast<size_t>(q - p));
+ absl::string_view name(p, static_cast<size_t>(q - p));
- const UGroup *g = LookupPosixGroup(name);
+ const UGroup* g = LookupPosixGroup(name);
if (g == NULL) {
status->set_code(kRegexpBadCharRange);
status->set_error_arg(name);
@@ -1857,8 +1855,8 @@ static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
// There are fewer special characters here than in the rest of the regexp.
// Sets *s to span the remainder of the string.
// Sets *rp to the character.
-bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
- const StringPiece& whole_class,
+bool Regexp::ParseState::ParseCCCharacter(absl::string_view* s, Rune* rp,
+ absl::string_view whole_class,
RegexpStatus* status) {
if (s->empty()) {
status->set_code(kRegexpMissingBracket);
@@ -1872,7 +1870,7 @@ bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
return ParseEscape(s, rp, status, rune_max_);
// Otherwise take the next rune.
- return StringPieceToRune(rp, s, status) >= 0;
+ return StringViewToRune(rp, s, status) >= 0;
}
// Parses a character class character, or, if the character
@@ -1880,10 +1878,10 @@ bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
// For single characters, rr->lo == rr->hi.
// Sets *s to span the remainder of the string.
// Sets *rp to the character.
-bool Regexp::ParseState::ParseCCRange(StringPiece* s, RuneRange* rr,
- const StringPiece& whole_class,
+bool Regexp::ParseState::ParseCCRange(absl::string_view* s, RuneRange* rr,
+ absl::string_view whole_class,
RegexpStatus* status) {
- StringPiece os = *s;
+ absl::string_view os = *s;
if (!ParseCCCharacter(s, &rr->lo, whole_class, status))
return false;
// [a-] means (a|-), so check for final ].
@@ -1893,8 +1891,8 @@ bool Regexp::ParseState::ParseCCRange(StringPiece* s, RuneRange* rr,
return false;
if (rr->hi < rr->lo) {
status->set_code(kRegexpBadCharRange);
- status->set_error_arg(
- StringPiece(os.data(), static_cast<size_t>(s->data() - os.data())));
+ status->set_error_arg(absl::string_view(
+ os.data(), static_cast<size_t>(s->data() - os.data())));
return false;
}
} else {
@@ -1906,14 +1904,13 @@ bool Regexp::ParseState::ParseCCRange(StringPiece* s, RuneRange* rr,
// Parses a possibly-negated character class expression like [^abx-z[:digit:]].
// Sets *s to span the remainder of the string.
// Sets *out_re to the regexp for the class.
-bool Regexp::ParseState::ParseCharClass(StringPiece* s,
- Regexp** out_re,
+bool Regexp::ParseState::ParseCharClass(absl::string_view* s, Regexp** out_re,
RegexpStatus* status) {
- StringPiece whole_class = *s;
+ absl::string_view whole_class = *s;
if (s->empty() || (*s)[0] != '[') {
// Caller checked this.
status->set_code(kRegexpInternalError);
- status->set_error_arg(StringPiece());
+ status->set_error_arg(absl::string_view());
return false;
}
bool negated = false;
@@ -1935,16 +1932,16 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
// Except that Perl allows - anywhere.
if ((*s)[0] == '-' && !first && !(flags_&PerlX) &&
(s->size() == 1 || (*s)[1] != ']')) {
- StringPiece t = *s;
+ absl::string_view t = *s;
t.remove_prefix(1); // '-'
Rune r;
- int n = StringPieceToRune(&r, &t, status);
+ int n = StringViewToRune(&r, &t, status);
if (n < 0) {
re->Decref();
return false;
}
status->set_code(kRegexpBadCharRange);
- status->set_error_arg(StringPiece(s->data(), 1+n));
+ status->set_error_arg(absl::string_view(s->data(), 1+n));
re->Decref();
return false;
}
@@ -1979,7 +1976,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
}
// Look for Perl character class symbols (extension).
- const UGroup *g = MaybeParsePerlCCEscape(s, flags_);
+ const UGroup* g = MaybeParsePerlCCEscape(s, flags_);
if (g != NULL) {
AddUGroup(re->ccb_, g, g->sign, flags_);
continue;
@@ -2014,7 +2011,7 @@ bool Regexp::ParseState::ParseCharClass(StringPiece* s,
}
// Returns whether name is a valid capture name.
-static bool IsValidCaptureName(const StringPiece& name) {
+static bool IsValidCaptureName(absl::string_view name) {
if (name.empty())
return false;
@@ -2028,17 +2025,17 @@ static bool IsValidCaptureName(const StringPiece& name) {
// if they start doing that for capture names, we won't follow suit.
static const CharClass* const cc = []() {
CharClassBuilder ccb;
- for (StringPiece group :
+ for (absl::string_view group :
{"Lu", "Ll", "Lt", "Lm", "Lo", "Nl", "Mn", "Mc", "Nd", "Pc"})
AddUGroup(&ccb, LookupGroup(group, unicode_groups, num_unicode_groups),
+1, Regexp::NoParseFlags);
return ccb.GetCharClass();
}();
- StringPiece t = name;
+ absl::string_view t = name;
Rune r;
while (!t.empty()) {
- if (StringPieceToRune(&r, &t, NULL) < 0)
+ if (StringViewToRune(&r, &t, NULL) < 0)
return false;
if (cc->Contains(r))
continue;
@@ -2052,8 +2049,8 @@ static bool IsValidCaptureName(const StringPiece& name) {
// The caller must check that s begins with "(?".
// Returns true on success. If the Perl flag is not
// well-formed or not supported, sets status_ and returns false.
-bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
- StringPiece t = *s;
+bool Regexp::ParseState::ParsePerlFlags(absl::string_view* s) {
+ absl::string_view t = *s;
// Caller is supposed to check this.
if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') {
@@ -2082,7 +2079,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
if (t.size() > 2 && t[0] == 'P' && t[1] == '<') {
// Pull out name.
size_t end = t.find('>', 2);
- if (end == StringPiece::npos) {
+ if (end == absl::string_view::npos) {
if (!IsValidUTF8(*s, status_))
return false;
status_->set_code(kRegexpBadNamedCapture);
@@ -2091,8 +2088,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
}
// t is "P<name>...", t[end] == '>'
- StringPiece capture(t.data()-2, end+3); // "(?P<name>"
- StringPiece name(t.data()+2, end-2); // "name"
+ absl::string_view capture(t.data()-2, end+3); // "(?P<name>"
+ absl::string_view name(t.data()+2, end-2); // "name"
if (!IsValidUTF8(name, status_))
return false;
if (!IsValidCaptureName(name)) {
@@ -2118,7 +2115,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
for (bool done = false; !done; ) {
if (t.empty())
goto BadPerlOp;
- if (StringPieceToRune(&c, &t, status_) < 0)
+ if (StringViewToRune(&c, &t, status_) < 0)
return false;
switch (c) {
default:
@@ -2191,7 +2188,7 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
BadPerlOp:
status_->set_code(kRegexpBadPerlOp);
status_->set_error_arg(
- StringPiece(s->data(), static_cast<size_t>(t.data() - s->data())));
+ absl::string_view(s->data(), static_cast<size_t>(t.data() - s->data())));
return false;
}
@@ -2199,7 +2196,7 @@ BadPerlOp:
// into UTF8 encoding in string.
// Can't use EncodingUtils::EncodeLatin1AsUTF8 because it is
// deprecated and because it rejects code points 0x80-0x9F.
-void ConvertLatin1ToUTF8(const StringPiece& latin1, std::string* utf) {
+void ConvertLatin1ToUTF8(absl::string_view latin1, std::string* utf) {
char buf[UTFmax];
utf->clear();
@@ -2214,7 +2211,7 @@ void ConvertLatin1ToUTF8(const StringPiece& latin1, std::string* utf) {
// returning the corresponding Regexp tree.
// The caller must Decref the return value when done with it.
// Returns NULL on error.
-Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
+Regexp* Regexp::Parse(absl::string_view s, ParseFlags global_flags,
RegexpStatus* status) {
// Make status non-NULL (easier on everyone else).
RegexpStatus xstatus;
@@ -2222,7 +2219,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
status = &xstatus;
ParseState ps(global_flags, s, status);
- StringPiece t = s;
+ absl::string_view t = s;
// Convert regexp to UTF-8 (easier on the rest of the parser).
if (global_flags & Latin1) {
@@ -2236,7 +2233,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
// Special parse loop for literal string.
while (!t.empty()) {
Rune r;
- if (StringPieceToRune(&r, &t, status) < 0)
+ if (StringViewToRune(&r, &t, status) < 0)
return NULL;
if (!ps.PushLiteral(r))
return NULL;
@@ -2244,13 +2241,13 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
return ps.DoFinish();
}
- StringPiece lastunary = StringPiece();
+ absl::string_view lastunary = absl::string_view();
while (!t.empty()) {
- StringPiece isunary = StringPiece();
+ absl::string_view isunary = absl::string_view();
switch (t[0]) {
default: {
Rune r;
- if (StringPieceToRune(&r, &t, status) < 0)
+ if (StringViewToRune(&r, &t, status) < 0)
return NULL;
if (!ps.PushLiteral(r))
return NULL;
@@ -2269,7 +2266,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
if (!ps.DoLeftParenNoCapture())
return NULL;
} else {
- if (!ps.DoLeftParen(StringPiece()))
+ if (!ps.DoLeftParen(absl::string_view()))
return NULL;
}
t.remove_prefix(1); // '('
@@ -2325,7 +2322,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
op = kRegexpQuest;
goto Rep;
Rep:
- StringPiece opstr = t;
+ absl::string_view opstr = t;
bool nongreedy = false;
t.remove_prefix(1); // '*' or '+' or '?'
if (ps.flags() & PerlX) {
@@ -2338,14 +2335,14 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
// a** is a syntax error, not a double-star.
// (and a++ means something else entirely, which we don't support!)
status->set_code(kRegexpRepeatOp);
- status->set_error_arg(StringPiece(
+ status->set_error_arg(absl::string_view(
lastunary.data(),
static_cast<size_t>(t.data() - lastunary.data())));
return NULL;
}
}
- opstr = StringPiece(opstr.data(),
- static_cast<size_t>(t.data() - opstr.data()));
+ opstr = absl::string_view(opstr.data(),
+ static_cast<size_t>(t.data() - opstr.data()));
if (!ps.PushRepeatOp(op, opstr, nongreedy))
return NULL;
isunary = opstr;
@@ -2354,7 +2351,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
case '{': { // Counted repetition.
int lo, hi;
- StringPiece opstr = t;
+ absl::string_view opstr = t;
if (!MaybeParseRepetition(&t, &lo, &hi)) {
// Treat like a literal.
if (!ps.PushLiteral('{'))
@@ -2371,14 +2368,14 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
if (!lastunary.empty()) {
// Not allowed to stack repetition operators.
status->set_code(kRegexpRepeatOp);
- status->set_error_arg(StringPiece(
+ status->set_error_arg(absl::string_view(
lastunary.data(),
static_cast<size_t>(t.data() - lastunary.data())));
return NULL;
}
}
- opstr = StringPiece(opstr.data(),
- static_cast<size_t>(t.data() - opstr.data()));
+ opstr = absl::string_view(opstr.data(),
+ static_cast<size_t>(t.data() - opstr.data()));
if (!ps.PushRepetition(lo, hi, opstr, nongreedy))
return NULL;
isunary = opstr;
@@ -2428,7 +2425,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
break;
}
Rune r;
- if (StringPieceToRune(&r, &t, status) < 0)
+ if (StringViewToRune(&r, &t, status) < 0)
return NULL;
if (!ps.PushLiteral(r))
return NULL;
@@ -2454,7 +2451,7 @@ Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
}
}
- const UGroup *g = MaybeParsePerlCCEscape(&t, ps.flags());
+ const UGroup* g = MaybeParsePerlCCEscape(&t, ps.flags());
if (g != NULL) {
Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase);
re->ccb_ = new CharClassBuilder;
diff --git a/re2/prefilter.cc b/re2/prefilter.cc
index 37b0cf8..3c7886f 100644
--- a/re2/prefilter.cc
+++ b/re2/prefilter.cc
@@ -10,9 +10,8 @@
#include <utility>
#include <vector>
-#include "util/util.h"
+#include "absl/strings/str_format.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "util/utf.h"
#include "re2/re2.h"
#include "re2/unicode_casefold.h"
@@ -664,7 +663,7 @@ std::string Prefilter::DebugString() const {
switch (op_) {
default:
LOG(DFATAL) << "Bad op in Prefilter::DebugString: " << op_;
- return StringPrintf("op%d", op_);
+ return absl::StrFormat("op%d", op_);
case NONE:
return "*no-matches*";
case ATOM:
diff --git a/re2/prefilter.h b/re2/prefilter.h
index e149e59..b2545e1 100644
--- a/re2/prefilter.h
+++ b/re2/prefilter.h
@@ -13,7 +13,6 @@
#include <string>
#include <vector>
-#include "util/util.h"
#include "util/logging.h"
namespace re2 {
diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc
index 409794e..41f65a3 100644
--- a/re2/prefilter_tree.cc
+++ b/re2/prefilter_tree.cc
@@ -13,9 +13,8 @@
#include <utility>
#include <vector>
-#include "util/util.h"
+#include "absl/strings/str_format.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "re2/prefilter.h"
#include "re2/re2.h"
@@ -80,14 +79,14 @@ Prefilter* PrefilterTree::CanonicalNode(NodeMap* nodes, Prefilter* node) {
std::string PrefilterTree::NodeString(Prefilter* node) const {
// Adding the operation disambiguates AND/OR/atom nodes.
- std::string s = StringPrintf("%d", node->op()) + ":";
+ std::string s = absl::StrFormat("%d", node->op()) + ":";
if (node->op() == Prefilter::ATOM) {
s += node->atom();
} else {
for (size_t i = 0; i < node->subs()->size(); i++) {
if (i > 0)
s += ',';
- s += StringPrintf("%d", (*node->subs())[i]->unique_id());
+ s += absl::StrFormat("%d", (*node->subs())[i]->unique_id());
}
}
return s;
@@ -380,7 +379,7 @@ std::string PrefilterTree::DebugNodeString(Prefilter* node) const {
for (size_t i = 0; i < node->subs()->size(); i++) {
if (i > 0)
node_string += ',';
- node_string += StringPrintf("%d", (*node->subs())[i]->unique_id());
+ node_string += absl::StrFormat("%d", (*node->subs())[i]->unique_id());
node_string += ":";
node_string += DebugNodeString((*node->subs())[i]);
}
diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h
index 6de1c38..3eb8056 100644
--- a/re2/prefilter_tree.h
+++ b/re2/prefilter_tree.h
@@ -20,7 +20,6 @@
#include <string>
#include <vector>
-#include "util/util.h"
#include "re2/prefilter.h"
#include "re2/sparse_array.h"
@@ -59,7 +58,7 @@ class PrefilterTree {
private:
typedef SparseArray<int> IntMap;
- // TODO(junyer): Use std::unordered_set<Prefilter*> instead?
+ // TODO(junyer): Use absl::flat_hash_set<Prefilter*> instead?
// It should be trivial to get rid of the stringification...
typedef std::map<std::string, Prefilter*> NodeMap;
diff --git a/re2/prog.cc b/re2/prog.cc
index 3b9596a..6cadcfa 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -19,11 +19,10 @@
#include <memory>
#include <utility>
-#include "util/util.h"
+#include "absl/base/macros.h"
+#include "absl/strings/str_format.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "re2/bitmap256.h"
-#include "re2/stringpiece.h"
namespace re2 {
@@ -74,34 +73,34 @@ void Prog::Inst::InitFail() {
std::string Prog::Inst::Dump() {
switch (opcode()) {
default:
- return StringPrintf("opcode %d", static_cast<int>(opcode()));
+ return absl::StrFormat("opcode %d", static_cast<int>(opcode()));
case kInstAlt:
- return StringPrintf("alt -> %d | %d", out(), out1_);
+ return absl::StrFormat("alt -> %d | %d", out(), out1_);
case kInstAltMatch:
- return StringPrintf("altmatch -> %d | %d", out(), out1_);
+ return absl::StrFormat("altmatch -> %d | %d", out(), out1_);
case kInstByteRange:
- return StringPrintf("byte%s [%02x-%02x] %d -> %d",
- foldcase() ? "/i" : "",
- lo_, hi_, hint(), out());
+ return absl::StrFormat("byte%s [%02x-%02x] %d -> %d",
+ foldcase() ? "/i" : "",
+ lo_, hi_, hint(), out());
case kInstCapture:
- return StringPrintf("capture %d -> %d", cap_, out());
+ return absl::StrFormat("capture %d -> %d", cap_, out());
case kInstEmptyWidth:
- return StringPrintf("emptywidth %#x -> %d",
- static_cast<int>(empty_), out());
+ return absl::StrFormat("emptywidth %#x -> %d",
+ static_cast<int>(empty_), out());
case kInstMatch:
- return StringPrintf("match! %d", match_id());
+ return absl::StrFormat("match! %d", match_id());
case kInstNop:
- return StringPrintf("nop -> %d", out());
+ return absl::StrFormat("nop -> %d", out());
case kInstFail:
- return StringPrintf("fail");
+ return absl::StrFormat("fail");
}
}
@@ -143,7 +142,7 @@ static std::string ProgToString(Prog* prog, Workq* q) {
for (Workq::iterator i = q->begin(); i != q->end(); ++i) {
int id = *i;
Prog::Inst* ip = prog->inst(id);
- s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
+ s += absl::StrFormat("%d. %s\n", id, ip->Dump());
AddToQueue(q, ip->out());
if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch)
AddToQueue(q, ip->out1());
@@ -156,9 +155,9 @@ static std::string FlattenedProgToString(Prog* prog, int start) {
for (int id = start; id < prog->size(); id++) {
Prog::Inst* ip = prog->inst(id);
if (ip->last())
- s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
+ s += absl::StrFormat("%d. %s\n", id, ip->Dump());
else
- s += StringPrintf("%d+ %s\n", id, ip->Dump().c_str());
+ s += absl::StrFormat("%d+ %s\n", id, ip->Dump());
}
return s;
}
@@ -189,7 +188,7 @@ std::string Prog::DumpByteMap() {
while (c < 256-1 && bytemap_[c+1] == b)
c++;
int hi = c;
- map += StringPrintf("[%02x-%02x] -> %d\n", lo, hi, b);
+ map += absl::StrFormat("[%02x-%02x] -> %d\n", lo, hi, b);
}
return map;
}
@@ -284,7 +283,7 @@ void Prog::Optimize() {
}
}
-uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
+uint32_t Prog::EmptyFlags(absl::string_view text, const char* p) {
int flags = 0;
// ^ and \A
@@ -813,7 +812,7 @@ void Prog::EmitList(int root, SparseArray<int>* rootmap,
flat->back().set_opcode(kInstAltMatch);
flat->back().set_out(static_cast<int>(flat->size()));
flat->back().out1_ = static_cast<uint32_t>(flat->size())+1;
- FALLTHROUGH_INTENDED;
+ ABSL_FALLTHROUGH_INTENDED;
case kInstAlt:
stk->push_back(ip->out1());
diff --git a/re2/prog.h b/re2/prog.h
index 72c9856..41923f3 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -11,12 +11,12 @@
#include <stdint.h>
#include <functional>
-#include <mutex>
#include <string>
#include <vector>
#include <type_traits>
-#include "util/util.h"
+#include "absl/base/call_once.h"
+#include "absl/strings/string_view.h"
#include "util/logging.h"
#include "re2/pod_array.h"
#include "re2/re2.h"
@@ -249,7 +249,7 @@ class Prog {
// Returns the set of kEmpty flags that are in effect at
// position p within context.
- static uint32_t EmptyFlags(const StringPiece& context, const char* p);
+ static uint32_t EmptyFlags(absl::string_view context, const char* p);
// Returns whether byte c is a word character: ASCII only.
// Used by the implementation of \b and \B.
@@ -274,15 +274,15 @@ class Prog {
// If a particular submatch is not matched during the regexp match,
// it is set to NULL.
//
- // Matching text == StringPiece(NULL, 0) is treated as any other empty
+ // Matching text == absl::string_view() is treated as any other empty
// string, but note that on return, it will not be possible to distinguish
// submatches that matched that empty string from submatches that didn't
// match anything. Either way, match[i] == NULL.
// Search using NFA: can find submatches but kind of slow.
- bool SearchNFA(const StringPiece& text, const StringPiece& context,
- Anchor anchor, MatchKind kind,
- StringPiece* match, int nmatch);
+ bool SearchNFA(absl::string_view text, absl::string_view context,
+ Anchor anchor, MatchKind kind, absl::string_view* match,
+ int nmatch);
// Search using DFA: much faster than NFA but only finds
// end of match and can use a lot more memory.
@@ -290,8 +290,8 @@ class Prog {
// If the DFA runs out of memory, sets *failed to true and returns false.
// If matches != NULL and kind == kManyMatch and there is a match,
// SearchDFA fills matches with the match IDs of the final matching state.
- bool SearchDFA(const StringPiece& text, const StringPiece& context,
- Anchor anchor, MatchKind kind, StringPiece* match0,
+ bool SearchDFA(absl::string_view text, absl::string_view context,
+ Anchor anchor, MatchKind kind, absl::string_view* match0,
bool* failed, SparseSet* matches);
// The callback issued after building each DFA state with BuildEntireDFA().
@@ -321,16 +321,16 @@ class Prog {
// but much faster than NFA (competitive with PCRE)
// for those expressions.
bool IsOnePass();
- bool SearchOnePass(const StringPiece& text, const StringPiece& context,
- Anchor anchor, MatchKind kind,
- StringPiece* match, int nmatch);
+ bool SearchOnePass(absl::string_view text, absl::string_view context,
+ Anchor anchor, MatchKind kind, absl::string_view* match,
+ int nmatch);
// Bit-state backtracking. Fast on small cases but uses memory
// proportional to the product of the list count and the text size.
bool CanBitState() { return list_heads_.data() != NULL; }
- bool SearchBitState(const StringPiece& text, const StringPiece& context,
- Anchor anchor, MatchKind kind,
- StringPiece* match, int nmatch);
+ bool SearchBitState(absl::string_view text, absl::string_view context,
+ Anchor anchor, MatchKind kind, absl::string_view* match,
+ int nmatch);
static const int kMaxOnePassCapture = 5; // $0 through $4
@@ -340,10 +340,9 @@ class Prog {
// It is also recursive, so can't use in production (will overflow stacks).
// The name "Unsafe" here is supposed to be a flag that
// you should not be using this function.
- bool UnsafeSearchBacktrack(const StringPiece& text,
- const StringPiece& context,
+ bool UnsafeSearchBacktrack(absl::string_view text, absl::string_view context,
Anchor anchor, MatchKind kind,
- StringPiece* match, int nmatch);
+ absl::string_view* match, int nmatch);
// Computes range for any strings matching regexp. The min and max can in
// some cases be arbitrarily precise, so the caller gets to specify the
@@ -444,8 +443,8 @@ class Prog {
uint8_t bytemap_[256]; // map from input bytes to byte classes
- std::once_flag dfa_first_once_;
- std::once_flag dfa_longest_once_;
+ absl::once_flag dfa_first_once_;
+ absl::once_flag dfa_longest_once_;
Prog(const Prog&) = delete;
Prog& operator=(const Prog&) = delete;
@@ -455,10 +454,10 @@ class Prog {
// that don't allow comparisons between different objects - not even if
// those objects are views into the same string! Thus, we provide these
// conversion functions for convenience.
-static inline const char* BeginPtr(const StringPiece& s) {
+static inline const char* BeginPtr(absl::string_view s) {
return s.data();
}
-static inline const char* EndPtr(const StringPiece& s) {
+static inline const char* EndPtr(absl::string_view s) {
return s.data() + s.size();
}
diff --git a/re2/re2.cc b/re2/re2.cc
index b24c6d6..c0011e9 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -21,12 +21,13 @@
#include <algorithm>
#include <atomic>
#include <iterator>
-#include <mutex>
#include <string>
#include <utility>
#include <vector>
-#include "util/util.h"
+#include "absl/base/macros.h"
+#include "absl/container/fixed_array.h"
+#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "util/strutil.h"
#include "util/utf.h"
@@ -129,7 +130,7 @@ static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
return RE2::ErrorInternal;
}
-static std::string trunc(const StringPiece& pattern) {
+static std::string trunc(absl::string_view pattern) {
if (pattern.size() < 100)
return std::string(pattern);
return std::string(pattern.substr(0, 100)) + "...";
@@ -144,11 +145,11 @@ RE2::RE2(const std::string& pattern) {
Init(pattern, DefaultOptions);
}
-RE2::RE2(const StringPiece& pattern) {
+RE2::RE2(absl::string_view pattern) {
Init(pattern, DefaultOptions);
}
-RE2::RE2(const StringPiece& pattern, const Options& options) {
+RE2::RE2(absl::string_view pattern, const Options& options) {
Init(pattern, options);
}
@@ -196,9 +197,9 @@ int RE2::Options::ParseFlags() const {
return flags;
}
-void RE2::Init(const StringPiece& pattern, const Options& options) {
- static std::once_flag empty_once;
- std::call_once(empty_once, []() {
+void RE2::Init(absl::string_view pattern, const Options& options) {
+ static absl::once_flag empty_once;
+ absl::call_once(empty_once, []() {
(void) new (empty_storage) EmptyStorage;
});
@@ -261,7 +262,7 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
// We used to compute this lazily, but it's used during the
// typical control flow for a match call, so we now compute
- // it eagerly, which avoids the overhead of std::once_flag.
+ // it eagerly, which avoids the overhead of absl::once_flag.
num_captures_ = suffix_regexp_->NumCaptures();
// Could delay this until the first match call that
@@ -274,7 +275,7 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
// Returns rprog_, computing it if needed.
re2::Prog* RE2::ReverseProg() const {
- std::call_once(rprog_once_, [](const RE2* re) {
+ absl::call_once(rprog_once_, [](const RE2* re) {
re->rprog_ =
re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
if (re->rprog_ == NULL) {
@@ -382,7 +383,7 @@ int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
// Returns named_groups_, computing it if needed.
const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
- std::call_once(named_groups_once_, [](const RE2* re) {
+ absl::call_once(named_groups_once_, [](const RE2* re) {
if (re->suffix_regexp_ != NULL)
re->named_groups_ = re->suffix_regexp_->NamedCaptures();
if (re->named_groups_ == NULL)
@@ -393,7 +394,7 @@ const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
// Returns group_names_, computing it if needed.
const std::map<int, std::string>& RE2::CapturingGroupNames() const {
- std::call_once(group_names_once_, [](const RE2* re) {
+ absl::call_once(group_names_once_, [](const RE2* re) {
if (re->suffix_regexp_ != NULL)
re->group_names_ = re->suffix_regexp_->CaptureNames();
if (re->group_names_ == NULL)
@@ -404,17 +405,17 @@ const std::map<int, std::string>& RE2::CapturingGroupNames() const {
/***** Convenience interfaces *****/
-bool RE2::FullMatchN(const StringPiece& text, const RE2& re,
+bool RE2::FullMatchN(absl::string_view text, const RE2& re,
const Arg* const args[], int n) {
return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
}
-bool RE2::PartialMatchN(const StringPiece& text, const RE2& re,
+bool RE2::PartialMatchN(absl::string_view text, const RE2& re,
const Arg* const args[], int n) {
return re.DoMatch(text, UNANCHORED, NULL, args, n);
}
-bool RE2::ConsumeN(StringPiece* input, const RE2& re,
+bool RE2::ConsumeN(absl::string_view* input, const RE2& re,
const Arg* const args[], int n) {
size_t consumed;
if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
@@ -425,7 +426,7 @@ bool RE2::ConsumeN(StringPiece* input, const RE2& re,
}
}
-bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
+bool RE2::FindAndConsumeN(absl::string_view* input, const RE2& re,
const Arg* const args[], int n) {
size_t consumed;
if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
@@ -438,12 +439,12 @@ bool RE2::FindAndConsumeN(StringPiece* input, const RE2& re,
bool RE2::Replace(std::string* str,
const RE2& re,
- const StringPiece& rewrite) {
- StringPiece vec[kVecSize];
+ absl::string_view rewrite) {
+ absl::string_view vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
if (nvec > 1 + re.NumberOfCapturingGroups())
return false;
- if (nvec > static_cast<int>(arraysize(vec)))
+ if (nvec > static_cast<int>(ABSL_ARRAYSIZE(vec)))
return false;
if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
return false;
@@ -460,12 +461,12 @@ bool RE2::Replace(std::string* str,
int RE2::GlobalReplace(std::string* str,
const RE2& re,
- const StringPiece& rewrite) {
- StringPiece vec[kVecSize];
+ absl::string_view rewrite) {
+ absl::string_view vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
if (nvec > 1 + re.NumberOfCapturingGroups())
return false;
- if (nvec > static_cast<int>(arraysize(vec)))
+ if (nvec > static_cast<int>(ABSL_ARRAYSIZE(vec)))
return false;
const char* p = str->data();
@@ -528,15 +529,15 @@ int RE2::GlobalReplace(std::string* str,
return count;
}
-bool RE2::Extract(const StringPiece& text,
+bool RE2::Extract(absl::string_view text,
const RE2& re,
- const StringPiece& rewrite,
+ absl::string_view rewrite,
std::string* out) {
- StringPiece vec[kVecSize];
+ absl::string_view vec[kVecSize];
int nvec = 1 + MaxSubmatch(rewrite);
if (nvec > 1 + re.NumberOfCapturingGroups())
return false;
- if (nvec > static_cast<int>(arraysize(vec)))
+ if (nvec > static_cast<int>(ABSL_ARRAYSIZE(vec)))
return false;
if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
return false;
@@ -545,7 +546,7 @@ bool RE2::Extract(const StringPiece& text,
return re.Rewrite(out, rewrite, vec, nvec);
}
-std::string RE2::QuoteMeta(const StringPiece& unquoted) {
+std::string RE2::QuoteMeta(absl::string_view unquoted) {
std::string result;
result.reserve(unquoted.size() << 1);
@@ -644,11 +645,11 @@ static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
/***** Actual matching and rewriting code *****/
-bool RE2::Match(const StringPiece& text,
+bool RE2::Match(absl::string_view text,
size_t startpos,
size_t endpos,
Anchor re_anchor,
- StringPiece* submatch,
+ absl::string_view* submatch,
int nsubmatch) const {
if (!ok()) {
if (options_.log_errors())
@@ -665,7 +666,7 @@ bool RE2::Match(const StringPiece& text,
return false;
}
- StringPiece subtext = text;
+ absl::string_view subtext = text;
subtext.remove_prefix(startpos);
subtext.remove_suffix(text.size() - endpos);
@@ -673,8 +674,8 @@ bool RE2::Match(const StringPiece& text,
// Don't ask for the location if we won't use it.
// SearchDFA can do extra optimizations in that case.
- StringPiece match;
- StringPiece* matchp = &match;
+ absl::string_view match;
+ absl::string_view* matchp = &match;
if (nsubmatch == 0)
matchp = NULL;
@@ -857,7 +858,7 @@ bool RE2::Match(const StringPiece& text,
if (ncap == 1)
submatch[0] = match;
} else {
- StringPiece subtext1;
+ absl::string_view subtext1;
if (skipped_test) {
// DFA ran out of memory or was skipped:
// need to search in entire original text.
@@ -895,17 +896,17 @@ bool RE2::Match(const StringPiece& text,
// Adjust overall match for required prefix that we stripped off.
if (prefixlen > 0 && nsubmatch > 0)
- submatch[0] = StringPiece(submatch[0].data() - prefixlen,
- submatch[0].size() + prefixlen);
+ submatch[0] = absl::string_view(submatch[0].data() - prefixlen,
+ submatch[0].size() + prefixlen);
// Zero submatches that don't exist in the regexp.
for (int i = ncap; i < nsubmatch; i++)
- submatch[i] = StringPiece();
+ submatch[i] = absl::string_view();
return true;
}
-// Internal matcher - like Match() but takes Args not StringPieces.
-bool RE2::DoMatch(const StringPiece& text,
+// Internal matcher - like Match() but takes Args not string_views.
+bool RE2::DoMatch(absl::string_view text,
Anchor re_anchor,
size_t* consumed,
const Arg* const* args,
@@ -928,19 +929,10 @@ bool RE2::DoMatch(const StringPiece& text,
else
nvec = n+1;
- StringPiece* vec;
- StringPiece stkvec[kVecSize];
- StringPiece* heapvec = NULL;
-
- if (nvec <= static_cast<int>(arraysize(stkvec))) {
- vec = stkvec;
- } else {
- vec = new StringPiece[nvec];
- heapvec = vec;
- }
+ absl::FixedArray<absl::string_view, kVecSize> vec_storage(nvec);
+ absl::string_view* vec = vec_storage.data();
if (!Match(text, 0, text.size(), re_anchor, vec, nvec)) {
- delete[] heapvec;
return false;
}
@@ -949,27 +941,24 @@ bool RE2::DoMatch(const StringPiece& text,
if (n == 0 || args == NULL) {
// We are not interested in results
- delete[] heapvec;
return true;
}
// If we got here, we must have matched the whole pattern.
for (int i = 0; i < n; i++) {
- const StringPiece& s = vec[i+1];
+ absl::string_view s = vec[i+1];
if (!args[i]->Parse(s.data(), s.size())) {
// TODO: Should we indicate what the error was?
- delete[] heapvec;
return false;
}
}
- delete[] heapvec;
return true;
}
// Checks that the rewrite string is well-formed with respect to this
// regular expression.
-bool RE2::CheckRewriteString(const StringPiece& rewrite,
+bool RE2::CheckRewriteString(absl::string_view rewrite,
std::string* error) const {
int max_token = -1;
for (const char *s = rewrite.data(), *end = s + rewrite.size();
@@ -998,7 +987,7 @@ bool RE2::CheckRewriteString(const StringPiece& rewrite,
}
if (max_token > NumberOfCapturingGroups()) {
- *error = StringPrintf(
+ *error = absl::StrFormat(
"Rewrite schema requests %d matches, but the regexp only has %d "
"parenthesized subexpressions.",
max_token, NumberOfCapturingGroups());
@@ -1009,7 +998,7 @@ bool RE2::CheckRewriteString(const StringPiece& rewrite,
// Returns the maximum submatch needed for the rewrite to be done by Replace().
// E.g. if rewrite == "foo \\2,\\1", returns 2.
-int RE2::MaxSubmatch(const StringPiece& rewrite) {
+int RE2::MaxSubmatch(absl::string_view rewrite) {
int max = 0;
for (const char *s = rewrite.data(), *end = s + rewrite.size();
s < end; s++) {
@@ -1029,8 +1018,8 @@ int RE2::MaxSubmatch(const StringPiece& rewrite) {
// Append the "rewrite" string, with backslash subsitutions from "vec",
// to string "out".
bool RE2::Rewrite(std::string* out,
- const StringPiece& rewrite,
- const StringPiece* vec,
+ absl::string_view rewrite,
+ const absl::string_view* vec,
int veclen) const {
for (const char *s = rewrite.data(), *end = s + rewrite.size();
s < end; s++) {
@@ -1049,7 +1038,7 @@ bool RE2::Rewrite(std::string* out,
}
return false;
}
- StringPiece snip = vec[n];
+ absl::string_view snip = vec[n];
if (!snip.empty())
out->append(snip.data(), snip.size());
} else if (c == '\\') {
@@ -1081,9 +1070,9 @@ bool Parse(const char* str, size_t n, std::string* dest) {
}
template <>
-bool Parse(const char* str, size_t n, StringPiece* dest) {
+bool Parse(const char* str, size_t n, absl::string_view* dest) {
if (dest == NULL) return true;
- *dest = StringPiece(str, n);
+ *dest = absl::string_view(str, n);
return true;
}
diff --git a/re2/re2.h b/re2/re2.h
index 449162a..5ac5c46 100644
--- a/re2/re2.h
+++ b/re2/re2.h
@@ -75,7 +75,7 @@
// have succeeded or one conversion has failed.
// On conversion failure, the pointees will be in an indeterminate state
// because the caller has no way of knowing which conversion failed.
-// However, conversion cannot fail for types like string and StringPiece
+// However, conversion cannot fail for types like string and string_view
// that do not inspect the submatch contents. Hence, in the common case
// where all of the pointees are of such types, failure is always due to
// match failure and thus none of the pointees will have been modified.
@@ -140,12 +140,12 @@
//
// The "Consume" operation may be useful if you want to repeatedly
// match regular expressions at the front of a string and skip over
-// them as they match. This requires use of the "StringPiece" type,
+// them as they match. This requires use of the string_view type,
// which represents a sub-range of a real string.
//
// Example: read lines of the form "var = value" from a string.
-// std::string contents = ...; // Fill string somehow
-// StringPiece input(contents); // Wrap a StringPiece around it
+// std::string contents = ...; // Fill string somehow
+// absl::string_view input(contents); // Wrap a string_view around it
//
// std::string var;
// int value;
@@ -206,7 +206,6 @@
#include <stdint.h>
#include <algorithm>
#include <map>
-#include <mutex>
#include <string>
#include <type_traits>
#include <vector>
@@ -215,6 +214,9 @@
#include <TargetConditionals.h>
#endif
+#include "absl/base/call_once.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
#include "re2/stringpiece.h"
namespace re2 {
@@ -273,11 +275,11 @@ class RE2 {
// Need to have the const char* and const std::string& forms for implicit
// conversions when passing string literals to FullMatch and PartialMatch.
- // Otherwise the StringPiece form would be sufficient.
+ // Otherwise the absl::string_view form would be sufficient.
RE2(const char* pattern);
RE2(const std::string& pattern);
- RE2(const StringPiece& pattern);
- RE2(const StringPiece& pattern, const Options& options);
+ RE2(absl::string_view pattern);
+ RE2(absl::string_view pattern, const Options& options);
~RE2();
// Not copyable.
@@ -336,13 +338,13 @@ class RE2 {
// the functions whose names are the prefix before the 'N'. It is sometimes
// useful to invoke them directly, but the syntax is awkward, so the 'N'-less
// versions should be preferred.
- static bool FullMatchN(const StringPiece& text, const RE2& re,
+ static bool FullMatchN(absl::string_view text, const RE2& re,
const Arg* const args[], int n);
- static bool PartialMatchN(const StringPiece& text, const RE2& re,
+ static bool PartialMatchN(absl::string_view text, const RE2& re,
const Arg* const args[], int n);
- static bool ConsumeN(StringPiece* input, const RE2& re,
+ static bool ConsumeN(absl::string_view* input, const RE2& re,
const Arg* const args[], int n);
- static bool FindAndConsumeN(StringPiece* input, const RE2& re,
+ static bool FindAndConsumeN(absl::string_view* input, const RE2& re,
const Arg* const args[], int n);
private:
@@ -374,10 +376,10 @@ class RE2 {
//
// The provided pointer arguments can be pointers to any scalar numeric
// type, or one of:
- // std::string (matched piece is copied to string)
- // StringPiece (StringPiece is mutated to point to matched piece)
- // T (where "bool T::ParseFrom(const char*, size_t)" exists)
- // (void*)NULL (the corresponding matched sub-pattern is not copied)
+ // std::string (matched piece is copied to string)
+ // absl::string_view (string_view is mutated to point to matched piece)
+ // T ("bool T::ParseFrom(const char*, size_t)" must exist)
+ // (void*)NULL (the corresponding matched sub-pattern is not copied)
//
// Returns true iff all of the following conditions are satisfied:
// a. "text" matches "re" fully - from the beginning to the end of "text".
@@ -395,7 +397,7 @@ class RE2 {
// int number;
// RE2::FullMatch("abc", "[a-z]+(\\d+)?", &number);
template <typename... A>
- static bool FullMatch(const StringPiece& text, const RE2& re, A&&... a) {
+ static bool FullMatch(absl::string_view text, const RE2& re, A&&... a) {
return Apply(FullMatchN, text, re, Arg(std::forward<A>(a))...);
}
@@ -411,7 +413,7 @@ class RE2 {
// number of sub-patterns, the "i"th captured sub-pattern is
// ignored.
template <typename... A>
- static bool PartialMatch(const StringPiece& text, const RE2& re, A&&... a) {
+ static bool PartialMatch(absl::string_view text, const RE2& re, A&&... a) {
return Apply(PartialMatchN, text, re, Arg(std::forward<A>(a))...);
}
@@ -429,7 +431,7 @@ class RE2 {
// number of sub-patterns, the "i"th captured sub-pattern is
// ignored.
template <typename... A>
- static bool Consume(StringPiece* input, const RE2& re, A&&... a) {
+ static bool Consume(absl::string_view* input, const RE2& re, A&&... a) {
return Apply(ConsumeN, input, re, Arg(std::forward<A>(a))...);
}
@@ -447,7 +449,7 @@ class RE2 {
// number of sub-patterns, the "i"th captured sub-pattern is
// ignored.
template <typename... A>
- static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) {
+ static bool FindAndConsume(absl::string_view* input, const RE2& re, A&&... a) {
return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);
}
@@ -466,7 +468,7 @@ class RE2 {
// false otherwise.
static bool Replace(std::string* str,
const RE2& re,
- const StringPiece& rewrite);
+ absl::string_view rewrite);
// Like Replace(), except replaces successive non-overlapping occurrences
// of the pattern in the string with the rewrite. E.g.
@@ -483,7 +485,7 @@ class RE2 {
// Returns the number of replacements made.
static int GlobalReplace(std::string* str,
const RE2& re,
- const StringPiece& rewrite);
+ absl::string_view rewrite);
// Like Replace, except that if the pattern matches, "rewrite"
// is copied into "out" with substitutions. The non-matching
@@ -493,9 +495,9 @@ class RE2 {
// successfully; if no match occurs, the string is left unaffected.
//
// REQUIRES: "text" must not alias any part of "*out".
- static bool Extract(const StringPiece& text,
+ static bool Extract(absl::string_view text,
const RE2& re,
- const StringPiece& rewrite,
+ absl::string_view rewrite,
std::string* out);
// Escapes all potentially meaningful regexp characters in
@@ -504,7 +506,7 @@ class RE2 {
// 1.5-2.0?
// may become:
// 1\.5\-2\.0\?
- static std::string QuoteMeta(const StringPiece& unquoted);
+ static std::string QuoteMeta(absl::string_view unquoted);
// Computes range for any strings matching regexp. The min and max can in
// some cases be arbitrarily precise, so the caller gets to specify the
@@ -565,15 +567,15 @@ class RE2 {
// Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(),
// but will be handled correctly.
//
- // Passing text == StringPiece(NULL, 0) will be handled like any other
+ // Passing text == absl::string_view() will be handled like any other
// empty string, but note that on return, it will not be possible to tell
// whether submatch i matched the empty string or did not match:
// either way, submatch[i].data() == NULL.
- bool Match(const StringPiece& text,
+ bool Match(absl::string_view text,
size_t startpos,
size_t endpos,
Anchor re_anchor,
- StringPiece* submatch,
+ absl::string_view* submatch,
int nsubmatch) const;
// Check that the given rewrite string is suitable for use with this
@@ -584,12 +586,12 @@ class RE2 {
// '\' followed by anything other than a digit or '\'.
// A true return value guarantees that Replace() and Extract() won't
// fail because of a bad rewrite string.
- bool CheckRewriteString(const StringPiece& rewrite,
+ bool CheckRewriteString(absl::string_view rewrite,
std::string* error) const;
// Returns the maximum submatch needed for the rewrite to be done by
// Replace(). E.g. if rewrite == "foo \\2,\\1", returns 2.
- static int MaxSubmatch(const StringPiece& rewrite);
+ static int MaxSubmatch(absl::string_view rewrite);
// Append the "rewrite" string, with backslash substitutions from "vec",
// to string "out".
@@ -597,8 +599,8 @@ class RE2 {
// rewrite string. CheckRewriteString guarantees that the rewrite will
// be sucessful.
bool Rewrite(std::string* out,
- const StringPiece& rewrite,
- const StringPiece* vec,
+ absl::string_view rewrite,
+ const absl::string_view* vec,
int veclen) const;
// Constructor options
@@ -757,9 +759,9 @@ class RE2 {
static void FUZZING_ONLY_set_maximum_global_replace_count(int i);
private:
- void Init(const StringPiece& pattern, const Options& options);
+ void Init(absl::string_view pattern, const Options& options);
- bool DoMatch(const StringPiece& text,
+ bool DoMatch(absl::string_view text,
Anchor re_anchor,
size_t* consumed,
const Arg* const args[],
@@ -792,9 +794,9 @@ class RE2 {
// Map from capture indices to names
mutable const std::map<int, std::string>* group_names_;
- mutable std::once_flag rprog_once_;
- mutable std::once_flag named_groups_once_;
- mutable std::once_flag group_names_once_;
+ mutable absl::once_flag rprog_once_;
+ mutable absl::once_flag named_groups_once_;
+ mutable absl::once_flag group_names_once_;
};
/***** Implementation details *****/
@@ -805,7 +807,7 @@ namespace re2_internal {
template <typename T> struct Parse3ary : public std::false_type {};
template <> struct Parse3ary<void> : public std::true_type {};
template <> struct Parse3ary<std::string> : public std::true_type {};
-template <> struct Parse3ary<StringPiece> : public std::true_type {};
+template <> struct Parse3ary<absl::string_view> : public std::true_type {};
template <> struct Parse3ary<char> : public std::true_type {};
template <> struct Parse3ary<signed char> : public std::true_type {};
template <> struct Parse3ary<unsigned char> : public std::true_type {};
@@ -829,6 +831,42 @@ template <> struct Parse4ary<unsigned long long> : public std::true_type {};
template <typename T>
bool Parse(const char* str, size_t n, T* dest, int radix);
+// Support absl::optional<T> for all T with a stock parser.
+template <typename T> struct Parse3ary<absl::optional<T>> : public Parse3ary<T> {};
+template <typename T> struct Parse4ary<absl::optional<T>> : public Parse4ary<T> {};
+
+template <typename T>
+bool Parse(const char* str, size_t n, absl::optional<T>* dest) {
+ if (str == NULL) {
+ if (dest != NULL)
+ dest->reset();
+ return true;
+ }
+ T tmp;
+ if (Parse(str, n, &tmp)) {
+ if (dest != NULL)
+ dest->emplace(std::move(tmp));
+ return true;
+ }
+ return false;
+}
+
+template <typename T>
+bool Parse(const char* str, size_t n, absl::optional<T>* dest, int radix) {
+ if (str == NULL) {
+ if (dest != NULL)
+ dest->reset();
+ return true;
+ }
+ T tmp;
+ if (Parse(str, n, &tmp, radix)) {
+ if (dest != NULL)
+ dest->emplace(std::move(tmp));
+ return true;
+ }
+ return false;
+}
+
} // namespace re2_internal
class RE2::Arg {
@@ -955,7 +993,7 @@ class LazyRE2 {
// Named accessor/initializer:
RE2* get() const {
- std::call_once(once_, &LazyRE2::Init, this);
+ absl::call_once(once_, &LazyRE2::Init, this);
return ptr_;
}
@@ -965,7 +1003,7 @@ class LazyRE2 {
NoArg barrier_against_excess_initializers_;
mutable RE2* ptr_;
- mutable std::once_flag once_;
+ mutable absl::once_flag once_;
private:
static void Init(const LazyRE2* lazy_re2) {
diff --git a/re2/regexp.cc b/re2/regexp.cc
index 74ecb31..3cfb5ae 100644
--- a/re2/regexp.cc
+++ b/re2/regexp.cc
@@ -12,16 +12,15 @@
#include <string.h>
#include <algorithm>
#include <map>
-#include <mutex>
#include <string>
#include <vector>
-#include "util/util.h"
+#include "absl/base/call_once.h"
+#include "absl/base/macros.h"
+#include "absl/synchronization/mutex.h"
#include "util/logging.h"
-#include "util/mutex.h"
#include "util/utf.h"
#include "re2/pod_array.h"
-#include "re2/stringpiece.h"
#include "re2/walker-inl.h"
namespace re2 {
@@ -76,12 +75,12 @@ bool Regexp::QuickDestroy() {
// Similar to EmptyStorage in re2.cc.
struct RefStorage {
- Mutex ref_mutex;
+ absl::Mutex ref_mutex;
std::map<Regexp*, int> ref_map;
};
alignas(RefStorage) static char ref_storage[sizeof(RefStorage)];
-static inline Mutex* ref_mutex() {
+static inline absl::Mutex* ref_mutex() {
return &reinterpret_cast<RefStorage*>(ref_storage)->ref_mutex;
}
@@ -93,20 +92,20 @@ int Regexp::Ref() {
if (ref_ < kMaxRef)
return ref_;
- MutexLock l(ref_mutex());
+ absl::MutexLock l(ref_mutex());
return (*ref_map())[this];
}
// Increments reference count, returns object as convenience.
Regexp* Regexp::Incref() {
if (ref_ >= kMaxRef-1) {
- static std::once_flag ref_once;
- std::call_once(ref_once, []() {
+ static absl::once_flag ref_once;
+ absl::call_once(ref_once, []() {
(void) new (ref_storage) RefStorage;
});
// Store ref count in overflow map.
- MutexLock l(ref_mutex());
+ absl::MutexLock l(ref_mutex());
if (ref_ == kMaxRef) {
// already overflowed
(*ref_map())[this]++;
@@ -126,7 +125,7 @@ Regexp* Regexp::Incref() {
void Regexp::Decref() {
if (ref_ == kMaxRef) {
// Ref count is stored in overflow map.
- MutexLock l(ref_mutex());
+ absl::MutexLock l(ref_mutex());
int r = (*ref_map())[this] - 1;
if (r < kMaxRef) {
ref_ = static_cast<uint16_t>(r);
@@ -519,7 +518,7 @@ static const char *kErrorStrings[] = {
};
std::string RegexpStatus::CodeText(enum RegexpStatusCode code) {
- if (code < 0 || code >= arraysize(kErrorStrings))
+ if (code < 0 || code >= ABSL_ARRAYSIZE(kErrorStrings))
code = kRegexpInternalError;
return kErrorStrings[code];
}
diff --git a/re2/regexp.h b/re2/regexp.h
index b6446f9..df49894 100644
--- a/re2/regexp.h
+++ b/re2/regexp.h
@@ -92,10 +92,9 @@
#include <set>
#include <string>
-#include "util/util.h"
+#include "absl/strings/string_view.h"
#include "util/logging.h"
#include "util/utf.h"
-#include "re2/stringpiece.h"
namespace re2 {
@@ -195,10 +194,10 @@ class RegexpStatus {
~RegexpStatus() { delete tmp_; }
void set_code(RegexpStatusCode code) { code_ = code; }
- void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; }
+ void set_error_arg(absl::string_view error_arg) { error_arg_ = error_arg; }
void set_tmp(std::string* tmp) { delete tmp_; tmp_ = tmp; }
RegexpStatusCode code() const { return code_; }
- const StringPiece& error_arg() const { return error_arg_; }
+ absl::string_view error_arg() const { return error_arg_; }
bool ok() const { return code() == kRegexpSuccess; }
// Copies state from status.
@@ -213,9 +212,9 @@ class RegexpStatus {
std::string Text() const;
private:
- RegexpStatusCode code_; // Kind of error
- StringPiece error_arg_; // Piece of regexp containing syntax error.
- std::string* tmp_; // Temporary storage, possibly where error_arg_ is.
+ RegexpStatusCode code_; // Kind of error.
+ absl::string_view error_arg_; // Piece of regexp containing syntax error.
+ std::string* tmp_; // Temporary storage, possibly for error_arg_.
RegexpStatus(const RegexpStatus&) = delete;
RegexpStatus& operator=(const RegexpStatus&) = delete;
@@ -352,7 +351,7 @@ class Regexp {
// Parses string s to produce regular expression, returned.
// Caller must release return value with re->Decref().
// On failure, sets *status (if status != NULL) and returns NULL.
- static Regexp* Parse(const StringPiece& s, ParseFlags flags,
+ static Regexp* Parse(absl::string_view s, ParseFlags flags,
RegexpStatus* status);
// Returns a _new_ simplified version of the current regexp.
@@ -369,7 +368,7 @@ class Regexp {
// Parses the regexp src and then simplifies it and sets *dst to the
// string representation of the simplified form. Returns true on success.
// Returns false and sets *status (if status != NULL) on parse error.
- static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags,
+ static bool SimplifyRegexp(absl::string_view src, ParseFlags flags,
std::string* dst, RegexpStatus* status);
// Returns the number of capturing groups in the regexp.
@@ -467,7 +466,7 @@ class Regexp {
class ParseState;
friend class ParseState;
- friend bool ParseCharClass(StringPiece* s, Regexp** out_re,
+ friend bool ParseCharClass(absl::string_view* s, Regexp** out_re,
RegexpStatus* status);
// Helper for testing [sic].
diff --git a/re2/set.cc b/re2/set.cc
index fe0ea05..b9c918e 100644
--- a/re2/set.cc
+++ b/re2/set.cc
@@ -9,13 +9,11 @@
#include <memory>
#include <utility>
-#include "util/util.h"
#include "util/logging.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
-#include "re2/stringpiece.h"
namespace re2 {
@@ -52,7 +50,7 @@ RE2::Set& RE2::Set::operator=(Set&& other) {
return *this;
}
-int RE2::Set::Add(const StringPiece& pattern, std::string* error) {
+int RE2::Set::Add(absl::string_view pattern, std::string* error) {
if (compiled_) {
LOG(DFATAL) << "RE2::Set::Add() called after compiling";
return -1;
@@ -121,11 +119,11 @@ bool RE2::Set::Compile() {
return prog_ != nullptr;
}
-bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
+bool RE2::Set::Match(absl::string_view text, std::vector<int>* v) const {
return Match(text, v, NULL);
}
-bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
+bool RE2::Set::Match(absl::string_view text, std::vector<int>* v,
ErrorInfo* error_info) const {
if (!compiled_) {
if (error_info != NULL)
diff --git a/re2/set.h b/re2/set.h
index 8d64f30..3fe419b 100644
--- a/re2/set.h
+++ b/re2/set.h
@@ -10,6 +10,7 @@
#include <utility>
#include <vector>
+#include "absl/strings/string_view.h"
#include "re2/re2.h"
namespace re2 {
@@ -50,7 +51,7 @@ class RE2::Set {
// Indices are assigned in sequential order starting from 0.
// Errors do not increment the index; if error is not NULL, *error will hold
// the error message from the parser.
- int Add(const StringPiece& pattern, std::string* error);
+ int Add(absl::string_view pattern, std::string* error);
// Compiles the set in preparation for matching.
// Returns false if the compiler runs out of memory.
@@ -61,12 +62,12 @@ class RE2::Set {
// Returns true if text matches at least one of the regexps in the set.
// Fills v (if not NULL) with the indices of the matching regexps.
// Callers must not expect v to be sorted.
- bool Match(const StringPiece& text, std::vector<int>* v) const;
+ bool Match(absl::string_view text, std::vector<int>* v) const;
// As above, but populates error_info (if not NULL) when none of the regexps
// in the set matched. This can inform callers when DFA execution fails, for
// example, because they might wish to handle that case differently.
- bool Match(const StringPiece& text, std::vector<int>* v,
+ bool Match(absl::string_view text, std::vector<int>* v,
ErrorInfo* error_info) const;
private:
diff --git a/re2/simplify.cc b/re2/simplify.cc
index 0df9051..8cd10cf 100644
--- a/re2/simplify.cc
+++ b/re2/simplify.cc
@@ -8,7 +8,6 @@
#include <string>
-#include "util/util.h"
#include "util/logging.h"
#include "util/utf.h"
#include "re2/pod_array.h"
@@ -20,7 +19,7 @@ namespace re2 {
// Parses the regexp src and then simplifies it and sets *dst to the
// string representation of the simplified form. Returns true on success.
// Returns false and sets *error (if error != NULL) on error.
-bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,
+bool Regexp::SimplifyRegexp(absl::string_view src, ParseFlags flags,
std::string* dst, RegexpStatus* status) {
Regexp* re = Parse(src, flags, status);
if (re == NULL)
diff --git a/re2/stringpiece.cc b/re2/stringpiece.cc
deleted file mode 100644
index ef2e287..0000000
--- a/re2/stringpiece.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright 2004 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "re2/stringpiece.h"
-
-#include <ostream>
-
-#include "util/util.h"
-
-namespace re2 {
-
-const StringPiece::size_type StringPiece::npos; // initialized in stringpiece.h
-
-StringPiece::size_type StringPiece::copy(char* buf, size_type n,
- size_type pos) const {
- size_type ret = std::min(size_ - pos, n);
- memcpy(buf, data_ + pos, ret);
- return ret;
-}
-
-StringPiece StringPiece::substr(size_type pos, size_type n) const {
- if (pos > size_) pos = size_;
- if (n > size_ - pos) n = size_ - pos;
- return StringPiece(data_ + pos, n);
-}
-
-StringPiece::size_type StringPiece::find(const StringPiece& s,
- size_type pos) const {
- if (pos > size_) return npos;
- const_pointer result = std::search(data_ + pos, data_ + size_,
- s.data_, s.data_ + s.size_);
- size_type xpos = result - data_;
- return xpos + s.size_ <= size_ ? xpos : npos;
-}
-
-StringPiece::size_type StringPiece::find(char c, size_type pos) const {
- if (size_ <= 0 || pos >= size_) return npos;
- const_pointer result = std::find(data_ + pos, data_ + size_, c);
- return result != data_ + size_ ? result - data_ : npos;
-}
-
-StringPiece::size_type StringPiece::rfind(const StringPiece& s,
- size_type pos) const {
- if (size_ < s.size_) return npos;
- if (s.size_ == 0) return std::min(size_, pos);
- const_pointer last = data_ + std::min(size_ - s.size_, pos) + s.size_;
- const_pointer result = std::find_end(data_, last, s.data_, s.data_ + s.size_);
- return result != last ? result - data_ : npos;
-}
-
-StringPiece::size_type StringPiece::rfind(char c, size_type pos) const {
- if (size_ <= 0) return npos;
- for (size_t i = std::min(pos + 1, size_); i != 0;) {
- if (data_[--i] == c) return i;
- }
- return npos;
-}
-
-std::ostream& operator<<(std::ostream& o, const StringPiece& p) {
- o.write(p.data(), p.size());
- return o;
-}
-
-} // namespace re2
diff --git a/re2/stringpiece.h b/re2/stringpiece.h
index b9d6661..6d11d16 100644
--- a/re2/stringpiece.h
+++ b/re2/stringpiece.h
@@ -1,212 +1,23 @@
-// Copyright 2001-2010 The RE2 Authors. All Rights Reserved.
+// Copyright 2022 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#ifndef RE2_STRINGPIECE_H_
#define RE2_STRINGPIECE_H_
-// A string-like object that points to a sized piece of memory.
-//
-// Functions or methods may use const StringPiece& parameters to accept either
-// a "const char*" or a "string" value that will be implicitly converted to
-// a StringPiece. The implicit conversion means that it is often appropriate
-// to include this .h file in other files rather than forward-declaring
-// StringPiece as would be appropriate for most other Google classes.
-//
-// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
-// conversions from "const char*" to "string" and back again.
-//
-//
-// Arghh! I wish C++ literals were "string".
-
-#include <stddef.h>
-#include <string.h>
-#include <algorithm>
-#include <iosfwd>
-#include <iterator>
-#include <string>
-#ifdef __cpp_lib_string_view
-#include <string_view>
-#endif
+#include "absl/strings/string_view.h"
namespace re2 {
-class StringPiece {
- public:
- typedef std::char_traits<char> traits_type;
- typedef char value_type;
- typedef char* pointer;
- typedef const char* const_pointer;
- typedef char& reference;
- typedef const char& const_reference;
- typedef const char* const_iterator;
- typedef const_iterator iterator;
- typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
- typedef const_reverse_iterator reverse_iterator;
- typedef size_t size_type;
- typedef ptrdiff_t difference_type;
- static const size_type npos = static_cast<size_type>(-1);
-
- // We provide non-explicit singleton constructors so users can pass
- // in a "const char*" or a "string" wherever a "StringPiece" is
- // expected.
- StringPiece()
- : data_(NULL), size_(0) {}
-#ifdef __cpp_lib_string_view
- StringPiece(const std::string_view& str)
- : data_(str.data()), size_(str.size()) {}
-#endif
- StringPiece(const std::string& str)
- : data_(str.data()), size_(str.size()) {}
- StringPiece(const char* str)
- : data_(str), size_(str == NULL ? 0 : strlen(str)) {}
- StringPiece(const char* str, size_type len)
- : data_(str), size_(len) {}
-
- const_iterator begin() const { return data_; }
- const_iterator end() const { return data_ + size_; }
- const_reverse_iterator rbegin() const {
- return const_reverse_iterator(data_ + size_);
- }
- const_reverse_iterator rend() const {
- return const_reverse_iterator(data_);
- }
-
- size_type size() const { return size_; }
- size_type length() const { return size_; }
- bool empty() const { return size_ == 0; }
-
- const_reference operator[](size_type i) const { return data_[i]; }
- const_pointer data() const { return data_; }
-
- void remove_prefix(size_type n) {
- data_ += n;
- size_ -= n;
- }
-
- void remove_suffix(size_type n) {
- size_ -= n;
- }
-
- void set(const char* str) {
- data_ = str;
- size_ = str == NULL ? 0 : strlen(str);
- }
-
- void set(const char* str, size_type len) {
- data_ = str;
- size_ = len;
- }
-
-#ifdef __cpp_lib_string_view
- // Converts to `std::basic_string_view`.
- operator std::basic_string_view<char, traits_type>() const {
- if (!data_) return {};
- return std::basic_string_view<char, traits_type>(data_, size_);
- }
-#endif
-
- // Converts to `std::basic_string`.
- template <typename A>
- explicit operator std::basic_string<char, traits_type, A>() const {
- if (!data_) return {};
- return std::basic_string<char, traits_type, A>(data_, size_);
- }
-
- std::string as_string() const {
- return std::string(data_, size_);
- }
-
- // We also define ToString() here, since many other string-like
- // interfaces name the routine that converts to a C++ string
- // "ToString", and it's confusing to have the method that does that
- // for a StringPiece be called "as_string()". We also leave the
- // "as_string()" method defined here for existing code.
- std::string ToString() const {
- return std::string(data_, size_);
- }
-
- void CopyToString(std::string* target) const {
- target->assign(data_, size_);
- }
-
- void AppendToString(std::string* target) const {
- target->append(data_, size_);
- }
-
- size_type copy(char* buf, size_type n, size_type pos = 0) const;
- StringPiece substr(size_type pos = 0, size_type n = npos) const;
-
- int compare(const StringPiece& x) const {
- size_type min_size = std::min(size(), x.size());
- if (min_size > 0) {
- int r = memcmp(data(), x.data(), min_size);
- if (r < 0) return -1;
- if (r > 0) return 1;
- }
- if (size() < x.size()) return -1;
- if (size() > x.size()) return 1;
- return 0;
- }
-
- // Does "this" start with "x"?
- bool starts_with(const StringPiece& x) const {
- return x.empty() ||
- (size() >= x.size() && memcmp(data(), x.data(), x.size()) == 0);
- }
-
- // Does "this" end with "x"?
- bool ends_with(const StringPiece& x) const {
- return x.empty() ||
- (size() >= x.size() &&
- memcmp(data() + (size() - x.size()), x.data(), x.size()) == 0);
- }
-
- bool contains(const StringPiece& s) const {
- return find(s) != npos;
- }
-
- size_type find(const StringPiece& s, size_type pos = 0) const;
- size_type find(char c, size_type pos = 0) const;
- size_type rfind(const StringPiece& s, size_type pos = npos) const;
- size_type rfind(char c, size_type pos = npos) const;
-
- private:
- const_pointer data_;
- size_type size_;
-};
-
-inline bool operator==(const StringPiece& x, const StringPiece& y) {
- StringPiece::size_type len = x.size();
- if (len != y.size()) return false;
- return x.data() == y.data() || len == 0 ||
- memcmp(x.data(), y.data(), len) == 0;
-}
-
-inline bool operator!=(const StringPiece& x, const StringPiece& y) {
- return !(x == y);
-}
-
-inline bool operator<(const StringPiece& x, const StringPiece& y) {
- StringPiece::size_type min_size = std::min(x.size(), y.size());
- int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size);
- return (r < 0) || (r == 0 && x.size() < y.size());
-}
-
-inline bool operator>(const StringPiece& x, const StringPiece& y) {
- return y < x;
-}
-
-inline bool operator<=(const StringPiece& x, const StringPiece& y) {
- return !(x > y);
-}
-
-inline bool operator>=(const StringPiece& x, const StringPiece& y) {
- return !(x < y);
-}
-
-// Allow StringPiece to be logged.
-std::ostream& operator<<(std::ostream& o, const StringPiece& p);
+// RE2 has two versions: "sans Abseil" in the main branch; and "avec Abseil" in
+// the abseil branch. This has led to a diamond dependency problem for projects
+// like Envoy: as per https://github.com/google/re2/issues/388, GoogleTest took
+// a dependency on RE2 avec Abseil, but other things depend on RE2 sans Abseil.
+// To resolve this conflict until both versions can migrate to std::string_view
+// (C++17), those other things must be able to #include "re2/stringpiece.h" and
+// use re2::StringPiece. (This is a hack, obviously, but it beats telling every
+// project in this situation that they have to perform source transformations.)
+using StringPiece = absl::string_view;
} // namespace re2
diff --git a/re2/testing/backtrack.cc b/re2/testing/backtrack.cc
index 920a453..90071bb 100644
--- a/re2/testing/backtrack.cc
+++ b/re2/testing/backtrack.cc
@@ -27,7 +27,7 @@
#include <stdint.h>
#include <string.h>
-#include "util/util.h"
+#include "absl/base/macros.h"
#include "util/logging.h"
#include "re2/pod_array.h"
#include "re2/prog.h"
@@ -55,9 +55,8 @@ class Backtracker {
public:
explicit Backtracker(Prog* prog);
- bool Search(const StringPiece& text, const StringPiece& context,
- bool anchored, bool longest,
- StringPiece* submatch, int nsubmatch);
+ bool Search(absl::string_view text, absl::string_view context, bool anchored,
+ bool longest, absl::string_view* submatch, int nsubmatch);
private:
// Explores from instruction id at string position p looking for a match.
@@ -69,14 +68,14 @@ class Backtracker {
bool Try(int id, const char* p);
// Search parameters
- Prog* prog_; // program being run
- StringPiece text_; // text being searched
- StringPiece context_; // greater context of text being searched
- bool anchored_; // whether search is anchored at text.begin()
- bool longest_; // whether search wants leftmost-longest match
- bool endmatch_; // whether search must end at text.end()
- StringPiece *submatch_; // submatches to fill in
- int nsubmatch_; // # of submatches to fill in
+ Prog* prog_; // program being run
+ absl::string_view text_; // text being searched
+ absl::string_view context_; // greater context of text being searched
+ bool anchored_; // whether search is anchored at text.begin()
+ bool longest_; // whether search wants leftmost-longest match
+ bool endmatch_; // whether search must end at text.end()
+ absl::string_view* submatch_; // submatches to fill in
+ int nsubmatch_; // # of submatches to fill in
// Search state
const char* cap_[64]; // capture registers
@@ -96,9 +95,9 @@ Backtracker::Backtracker(Prog* prog)
}
// Runs a backtracking search.
-bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
+bool Backtracker::Search(absl::string_view text, absl::string_view context,
bool anchored, bool longest,
- StringPiece* submatch, int nsubmatch) {
+ absl::string_view* submatch, int nsubmatch) {
text_ = text;
context_ = context;
if (context_.data() == NULL)
@@ -112,17 +111,17 @@ bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
endmatch_ = prog_->anchor_end();
submatch_ = submatch;
nsubmatch_ = nsubmatch;
- CHECK_LT(2*nsubmatch_, static_cast<int>(arraysize(cap_)));
+ CHECK_LT(2*nsubmatch_, static_cast<int>(ABSL_ARRAYSIZE(cap_)));
memset(cap_, 0, sizeof cap_);
// We use submatch_[0] for our own bookkeeping,
// so it had better exist.
- StringPiece sp0;
+ absl::string_view sp0;
if (nsubmatch < 1) {
submatch_ = &sp0;
nsubmatch_ = 1;
}
- submatch_[0] = StringPiece();
+ submatch_[0] = absl::string_view();
// Allocate new visited_ bitmap -- size is proportional
// to text, so have to reallocate on each call to Search.
@@ -203,7 +202,7 @@ bool Backtracker::Try(int id, const char* p) {
case kInstCapture:
if (0 <= ip->cap() &&
- ip->cap() < static_cast<int>(arraysize(cap_))) {
+ ip->cap() < static_cast<int>(ABSL_ARRAYSIZE(cap_))) {
// Capture p to register, but save old value.
const char* q = cap_[ip->cap()];
cap_[ip->cap()] = p;
@@ -232,7 +231,7 @@ bool Backtracker::Try(int id, const char* p) {
(longest_ && p > submatch_[0].data() + submatch_[0].size())) {
// First match so far - or better match.
for (int i = 0; i < nsubmatch_; i++)
- submatch_[i] = StringPiece(
+ submatch_[i] = absl::string_view(
cap_[2 * i], static_cast<size_t>(cap_[2 * i + 1] - cap_[2 * i]));
}
return true;
@@ -243,16 +242,14 @@ bool Backtracker::Try(int id, const char* p) {
}
// Runs a backtracking search.
-bool Prog::UnsafeSearchBacktrack(const StringPiece& text,
- const StringPiece& context,
- Anchor anchor,
- MatchKind kind,
- StringPiece* match,
+bool Prog::UnsafeSearchBacktrack(absl::string_view text,
+ absl::string_view context, Anchor anchor,
+ MatchKind kind, absl::string_view* match,
int nmatch) {
// If full match, we ask for an anchored longest match
// and then check that match[0] == text.
// So make sure match[0] exists.
- StringPiece sp0;
+ absl::string_view sp0;
if (kind == kFullMatch) {
anchor = kAnchored;
if (nmatch < 1) {
diff --git a/re2/testing/charclass_test.cc b/re2/testing/charclass_test.cc
index 9c2a32f..ad95d6c 100644
--- a/re2/testing/charclass_test.cc
+++ b/re2/testing/charclass_test.cc
@@ -6,7 +6,9 @@
#include <stdio.h>
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "absl/strings/str_format.h"
+#include "gtest/gtest.h"
#include "util/utf.h"
#include "re2/regexp.h"
@@ -88,25 +90,25 @@ static CCTest tests[] = {
template <typename CharClass>
static void Broke(const char *desc, const CCTest* t, CharClass* cc) {
if (t == NULL) {
- printf("\t%s:", desc);
+ absl::PrintF("\t%s:", desc);
} else {
- printf("\n");
- printf("CharClass added: [%s]", desc);
+ absl::PrintF("\n");
+ absl::PrintF("CharClass added: [%s]", desc);
for (int k = 0; t->add[k].lo >= 0; k++)
- printf(" %d-%d", t->add[k].lo, t->add[k].hi);
- printf("\n");
+ absl::PrintF(" %d-%d", t->add[k].lo, t->add[k].hi);
+ absl::PrintF("\n");
if (t->remove >= 0)
- printf("Removed > %d\n", t->remove);
- printf("\twant:");
+ absl::PrintF("Removed > %d\n", t->remove);
+ absl::PrintF("\twant:");
for (int k = 0; t->final[k].lo >= 0; k++)
- printf(" %d-%d", t->final[k].lo, t->final[k].hi);
- printf("\n");
- printf("\thave:");
+ absl::PrintF(" %d-%d", t->final[k].lo, t->final[k].hi);
+ absl::PrintF("\n");
+ absl::PrintF("\thave:");
}
for (typename CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
- printf(" %d-%d", it->lo, it->hi);
- printf("\n");
+ absl::PrintF(" %d-%d", it->lo, it->hi);
+ absl::PrintF("\n");
}
bool ShouldContain(CCTest *t, int x) {
@@ -155,7 +157,7 @@ bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
}
if (cc->size() != size) {
Broke(desc, t, cc);
- printf("wrong size: want %d have %d\n", size, cc->size());
+ absl::PrintF("wrong size: want %d have %d\n", size, cc->size());
return false;
}
@@ -164,8 +166,8 @@ bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
j = Runemax;
if (ShouldContain(t, j) != cc->Contains(j)) {
Broke(desc, t, cc);
- printf("want contains(%d)=%d, got %d\n",
- j, ShouldContain(t, j), cc->Contains(j));
+ absl::PrintF("want contains(%d)=%d, got %d\n",
+ j, ShouldContain(t, j), cc->Contains(j));
return false;
}
}
@@ -177,16 +179,16 @@ bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
if (ShouldContain(t, j) == ncc->Contains(j)) {
Broke(desc, t, cc);
Broke("ncc", NULL, ncc);
- printf("want ncc contains(%d)!=%d, got %d\n",
- j, ShouldContain(t, j), ncc->Contains(j));
+ absl::PrintF("want ncc contains(%d)!=%d, got %d\n",
+ j, ShouldContain(t, j), ncc->Contains(j));
Delete(ncc);
return false;
}
if (ncc->size() != Runemax+1 - cc->size()) {
Broke(desc, t, cc);
Broke("ncc", NULL, ncc);
- printf("ncc size should be %d is %d\n",
- Runemax+1 - cc->size(), ncc->size());
+ absl::PrintF("ncc size should be %d is %d\n",
+ Runemax+1 - cc->size(), ncc->size());
Delete(ncc);
return false;
}
@@ -197,7 +199,7 @@ bool CorrectCC(CharClass *cc, CCTest *t, const char *desc) {
TEST(TestCharClassBuilder, Adds) {
int nfail = 0;
- for (size_t i = 0; i < arraysize(tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(tests); i++) {
CharClassBuilder ccb;
CCTest* t = &tests[i];
for (int j = 0; t->add[j].lo >= 0; j++)
diff --git a/re2/testing/compile_test.cc b/re2/testing/compile_test.cc
index 4718830..f6899d3 100644
--- a/re2/testing/compile_test.cc
+++ b/re2/testing/compile_test.cc
@@ -6,7 +6,8 @@
#include <string>
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/regexp.h"
#include "re2/prog.h"
@@ -127,7 +128,7 @@ static Test tests[] = {
TEST(TestRegexpCompileToProg, Simple) {
int failed = 0;
- for (size_t i = 0; i < arraysize(tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(tests); i++) {
const re2::Test& t = tests[i];
Regexp* re = Regexp::Parse(t.regexp, Regexp::PerlX|Regexp::Latin1, NULL);
if (re == NULL) {
@@ -156,7 +157,7 @@ TEST(TestRegexpCompileToProg, Simple) {
EXPECT_EQ(failed, 0);
}
-static void DumpByteMap(StringPiece pattern, Regexp::ParseFlags flags,
+static void DumpByteMap(absl::string_view pattern, Regexp::ParseFlags flags,
std::string* bytemap) {
Regexp* re = Regexp::Parse(pattern, flags, NULL);
EXPECT_TRUE(re != NULL);
@@ -257,7 +258,7 @@ TEST(TestCompile, InsufficientMemory) {
re->Decref();
}
-static void Dump(StringPiece pattern, Regexp::ParseFlags flags,
+static void Dump(absl::string_view pattern, Regexp::ParseFlags flags,
std::string* forward, std::string* reverse) {
Regexp* re = Regexp::Parse(pattern, flags, NULL);
EXPECT_TRUE(re != NULL);
diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc
index 842daaf..b0759f7 100644
--- a/re2/testing/dfa_test.cc
+++ b/re2/testing/dfa_test.cc
@@ -7,11 +7,12 @@
#include <thread>
#include <vector>
-#include "util/test.h"
-#include "util/flags.h"
+#include "absl/base/macros.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/str_format.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
#include "util/malloc_counter.h"
-#include "util/strutil.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
@@ -20,9 +21,9 @@
static const bool UsingMallocCounter = false;
-DEFINE_FLAG(int, size, 8, "log2(number of DFA nodes)");
-DEFINE_FLAG(int, repeat, 2, "Repetition count.");
-DEFINE_FLAG(int, threads, 4, "number of threads");
+ABSL_FLAG(int, size, 8, "log2(number of DFA nodes)");
+ABSL_FLAG(int, repeat, 2, "Repetition count.");
+ABSL_FLAG(int, threads, 4, "number of threads");
namespace re2 {
@@ -50,7 +51,7 @@ static void DoBuild(Prog* prog) {
TEST(Multithreaded, BuildEntireDFA) {
// Create regexp with 2^FLAGS_size states in DFA.
std::string s = "a";
- for (int i = 0; i < GetFlag(FLAGS_size); i++)
+ for (int i = 0; i < absl::GetFlag(FLAGS_size); i++)
s += "[ab]";
s += "b";
Regexp* re = Regexp::Parse(s, Regexp::LikePerl, NULL);
@@ -68,14 +69,14 @@ TEST(Multithreaded, BuildEntireDFA) {
}
// Build the DFA simultaneously in a bunch of threads.
- for (int i = 0; i < GetFlag(FLAGS_repeat); i++) {
+ for (int i = 0; i < absl::GetFlag(FLAGS_repeat); i++) {
Prog* prog = re->CompileToProg(0);
ASSERT_TRUE(prog != NULL);
std::vector<std::thread> threads;
- for (int j = 0; j < GetFlag(FLAGS_threads); j++)
+ for (int j = 0; j < absl::GetFlag(FLAGS_threads); j++)
threads.emplace_back(DoBuild, prog);
- for (int j = 0; j < GetFlag(FLAGS_threads); j++)
+ for (int j = 0; j < absl::GetFlag(FLAGS_threads); j++)
threads[j].join();
// One more compile, to make sure everything is okay.
@@ -154,7 +155,7 @@ TEST(SingleThreaded, SearchDFA) {
// Empirically, n = 18 is a good compromise between the two.
const int n = 18;
- Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
+ Regexp* re = Regexp::Parse(absl::StrFormat("0[01]{%d}$", n),
Regexp::LikePerl, NULL);
ASSERT_TRUE(re != NULL);
@@ -172,12 +173,14 @@ TEST(SingleThreaded, SearchDFA) {
for (int i = 0; i < 10; i++) {
bool matched = false;
bool failed = false;
- matched = prog->SearchDFA(match, StringPiece(), Prog::kUnanchored,
- Prog::kFirstMatch, NULL, &failed, NULL);
+ matched =
+ prog->SearchDFA(match, absl::string_view(), Prog::kUnanchored,
+ Prog::kFirstMatch, NULL, &failed, NULL);
ASSERT_FALSE(failed);
ASSERT_TRUE(matched);
- matched = prog->SearchDFA(no_match, StringPiece(), Prog::kUnanchored,
- Prog::kFirstMatch, NULL, &failed, NULL);
+ matched =
+ prog->SearchDFA(no_match, absl::string_view(), Prog::kUnanchored,
+ Prog::kFirstMatch, NULL, &failed, NULL);
ASSERT_FALSE(failed);
ASSERT_FALSE(matched);
}
@@ -201,17 +204,19 @@ TEST(SingleThreaded, SearchDFA) {
// Helper function: searches for match, which should match,
// and no_match, which should not.
-static void DoSearch(Prog* prog, const StringPiece& match,
- const StringPiece& no_match) {
+static void DoSearch(Prog* prog, absl::string_view match,
+ absl::string_view no_match) {
for (int i = 0; i < 2; i++) {
bool matched = false;
bool failed = false;
- matched = prog->SearchDFA(match, StringPiece(), Prog::kUnanchored,
- Prog::kFirstMatch, NULL, &failed, NULL);
+ matched =
+ prog->SearchDFA(match, absl::string_view(), Prog::kUnanchored,
+ Prog::kFirstMatch, NULL, &failed, NULL);
ASSERT_FALSE(failed);
ASSERT_TRUE(matched);
- matched = prog->SearchDFA(no_match, StringPiece(), Prog::kUnanchored,
- Prog::kFirstMatch, NULL, &failed, NULL);
+ matched =
+ prog->SearchDFA(no_match, absl::string_view(), Prog::kUnanchored,
+ Prog::kFirstMatch, NULL, &failed, NULL);
ASSERT_FALSE(failed);
ASSERT_FALSE(matched);
}
@@ -224,7 +229,7 @@ TEST(Multithreaded, SearchDFA) {
// Same as single-threaded test above.
const int n = 18;
- Regexp* re = Regexp::Parse(StringPrintf("0[01]{%d}$", n),
+ Regexp* re = Regexp::Parse(absl::StrFormat("0[01]{%d}$", n),
Regexp::LikePerl, NULL);
ASSERT_TRUE(re != NULL);
std::string no_match = DeBruijnString(n);
@@ -243,14 +248,14 @@ TEST(Multithreaded, SearchDFA) {
// Run the search simultaneously in a bunch of threads.
// Reuse same flags for Multithreaded.BuildDFA above.
- for (int i = 0; i < GetFlag(FLAGS_repeat); i++) {
+ for (int i = 0; i < absl::GetFlag(FLAGS_repeat); i++) {
Prog* prog = re->CompileToProg(1<<n);
ASSERT_TRUE(prog != NULL);
std::vector<std::thread> threads;
- for (int j = 0; j < GetFlag(FLAGS_threads); j++)
+ for (int j = 0; j < absl::GetFlag(FLAGS_threads); j++)
threads.emplace_back(DoSearch, prog, match, no_match);
- for (int j = 0; j < GetFlag(FLAGS_threads); j++)
+ for (int j = 0; j < absl::GetFlag(FLAGS_threads); j++)
threads[j].join();
delete prog;
@@ -281,15 +286,16 @@ ReverseTest reverse_tests[] = {
TEST(DFA, ReverseMatch) {
int nfail = 0;
- for (size_t i = 0; i < arraysize(reverse_tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(reverse_tests); i++) {
const ReverseTest& t = reverse_tests[i];
Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
ASSERT_TRUE(re != NULL);
Prog* prog = re->CompileToReverseProg(0);
ASSERT_TRUE(prog != NULL);
bool failed = false;
- bool matched = prog->SearchDFA(t.text, StringPiece(), Prog::kUnanchored,
- Prog::kFirstMatch, NULL, &failed, NULL);
+ bool matched =
+ prog->SearchDFA(t.text, absl::string_view(), Prog::kUnanchored,
+ Prog::kFirstMatch, NULL, &failed, NULL);
if (matched != t.match) {
LOG(ERROR) << t.regexp << " on " << t.text << ": want " << t.match;
nfail++;
@@ -336,7 +342,7 @@ CallbackTest callback_tests[] = {
TEST(DFA, Callback) {
int nfail = 0;
- for (size_t i = 0; i < arraysize(callback_tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(callback_tests); i++) {
const CallbackTest& t = callback_tests[i];
Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
ASSERT_TRUE(re != NULL);
@@ -349,7 +355,7 @@ TEST(DFA, Callback) {
dump += " ";
dump += match ? "[[" : "[";
for (int b = 0; b < prog->bytemap_range() + 1; b++)
- dump += StringPrintf("%d,", next[b]);
+ dump += absl::StrFormat("%d,", next[b]);
dump.pop_back();
dump += match ? "]]" : "]";
});
diff --git a/re2/testing/dump.cc b/re2/testing/dump.cc
index cad0910..5cddd23 100644
--- a/re2/testing/dump.cc
+++ b/re2/testing/dump.cc
@@ -18,11 +18,11 @@
#include <string>
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "absl/strings/str_format.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "util/utf.h"
-#include "re2/stringpiece.h"
#include "re2/regexp.h"
namespace re2 {
@@ -55,8 +55,8 @@ static const char* kOpcodeNames[] = {
// Create string representation of regexp with explicit structure.
// Nothing pretty, just for testing.
static void DumpRegexpAppending(Regexp* re, std::string* s) {
- if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
- *s += StringPrintf("op%d", re->op());
+ if (re->op() < 0 || re->op() >= ABSL_ARRAYSIZE(kOpcodeNames)) {
+ *s += absl::StrFormat("op%d", re->op());
} else {
switch (re->op()) {
default:
@@ -129,7 +129,7 @@ static void DumpRegexpAppending(Regexp* re, std::string* s) {
DumpRegexpAppending(re->sub()[0], s);
break;
case kRegexpRepeat:
- s->append(StringPrintf("%d,%d ", re->min(), re->max()));
+ s->append(absl::StrFormat("%d,%d ", re->min(), re->max()));
DumpRegexpAppending(re->sub()[0], s);
break;
case kRegexpCharClass: {
@@ -139,9 +139,9 @@ static void DumpRegexpAppending(Regexp* re, std::string* s) {
RuneRange rr = *it;
s->append(sep);
if (rr.lo == rr.hi)
- s->append(StringPrintf("%#x", rr.lo));
+ s->append(absl::StrFormat("%#x", rr.lo));
else
- s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
+ s->append(absl::StrFormat("%#x-%#x", rr.lo, rr.hi));
sep = " ";
}
break;
diff --git a/re2/testing/exhaustive1_test.cc b/re2/testing/exhaustive1_test.cc
index eef2dae..9337989 100644
--- a/re2/testing/exhaustive1_test.cc
+++ b/re2/testing/exhaustive1_test.cc
@@ -7,7 +7,7 @@
#include <string>
#include <vector>
-#include "util/test.h"
+#include "gtest/gtest.h"
#include "re2/testing/exhaustive_tester.h"
namespace re2 {
diff --git a/re2/testing/exhaustive2_test.cc b/re2/testing/exhaustive2_test.cc
index ae89ece..14f629d 100644
--- a/re2/testing/exhaustive2_test.cc
+++ b/re2/testing/exhaustive2_test.cc
@@ -9,7 +9,7 @@
#include <string>
#include <vector>
-#include "util/test.h"
+#include "gtest/gtest.h"
#include "re2/testing/exhaustive_tester.h"
namespace re2 {
diff --git a/re2/testing/exhaustive3_test.cc b/re2/testing/exhaustive3_test.cc
index 1fe46b6..de703c0 100644
--- a/re2/testing/exhaustive3_test.cc
+++ b/re2/testing/exhaustive3_test.cc
@@ -9,7 +9,7 @@
#include <string>
#include <vector>
-#include "util/test.h"
+#include "gtest/gtest.h"
#include "util/utf.h"
#include "re2/testing/exhaustive_tester.h"
diff --git a/re2/testing/exhaustive_test.cc b/re2/testing/exhaustive_test.cc
index 514fd90..5e586f1 100644
--- a/re2/testing/exhaustive_test.cc
+++ b/re2/testing/exhaustive_test.cc
@@ -4,7 +4,7 @@
// Exhaustive testing of regular expression matching.
-#include "util/test.h"
+#include "gtest/gtest.h"
#include "re2/testing/exhaustive_tester.h"
namespace re2 {
diff --git a/re2/testing/exhaustive_tester.cc b/re2/testing/exhaustive_tester.cc
index b0409c3..a57f700 100644
--- a/re2/testing/exhaustive_tester.cc
+++ b/re2/testing/exhaustive_tester.cc
@@ -13,10 +13,11 @@
#include <stdio.h>
-#include "util/test.h"
-#include "util/flags.h"
+#include "absl/base/macros.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/str_format.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "re2/testing/exhaustive_tester.h"
#include "re2/testing/tester.h"
@@ -25,15 +26,15 @@
#define LOGGING 0
#endif
-DEFINE_FLAG(bool, show_regexps, false, "show regexps during testing");
+ABSL_FLAG(bool, show_regexps, false, "show regexps during testing");
-DEFINE_FLAG(int, max_bad_regexp_inputs, 1,
- "Stop testing a regular expression after finding this many "
- "strings that break it.");
+ABSL_FLAG(int, max_bad_regexp_inputs, 1,
+ "Stop testing a regular expression after finding this many "
+ "strings that break it.");
namespace re2 {
-static char* escape(const StringPiece& sp) {
+static char* escape(absl::string_view sp) {
static char buf[512];
char* p = buf;
*p++ = '\"';
@@ -55,20 +56,21 @@ static char* escape(const StringPiece& sp) {
return buf;
}
-static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
+static void PrintResult(const RE2& re, absl::string_view input,
+ RE2::Anchor anchor, absl::string_view* m, int n) {
if (!re.Match(input, 0, input.size(), anchor, m, n)) {
- printf("-");
+ absl::PrintF("-");
return;
}
for (int i = 0; i < n; i++) {
if (i > 0)
- printf(" ");
+ absl::PrintF(" ");
if (m[i].data() == NULL)
- printf("-");
+ absl::PrintF("-");
else
- printf("%td-%td",
- BeginPtr(m[i]) - BeginPtr(input),
- EndPtr(m[i]) - BeginPtr(input));
+ absl::PrintF("%d-%d",
+ BeginPtr(m[i]) - BeginPtr(input),
+ EndPtr(m[i]) - BeginPtr(input));
}
}
@@ -79,11 +81,13 @@ void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
regexps_++;
std::string regexp = const_regexp;
if (!topwrapper_.empty()) {
- regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
+ auto fmt = absl::ParsedFormat<'s'>::New(topwrapper_);
+ CHECK(fmt != nullptr);
+ regexp = absl::StrFormat(*fmt, regexp);
}
- if (GetFlag(FLAGS_show_regexps)) {
- printf("\r%s", regexp.c_str());
+ if (absl::GetFlag(FLAGS_show_regexps)) {
+ absl::PrintF("\r%s", regexp);
fflush(stdout);
}
@@ -93,32 +97,32 @@ void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
if (randomstrings_)
LOG(ERROR) << "Cannot log with random strings.";
if (regexps_ == 1) { // first
- printf("strings\n");
+ absl::PrintF("strings\n");
strgen_.Reset();
while (strgen_.HasNext())
- printf("%s\n", escape(strgen_.Next()));
- printf("regexps\n");
+ absl::PrintF("%s\n", escape(strgen_.Next()));
+ absl::PrintF("regexps\n");
}
- printf("%s\n", escape(regexp));
+ absl::PrintF("%s\n", escape(regexp));
RE2 re(regexp);
RE2::Options longest;
longest.set_longest_match(true);
RE2 relongest(regexp, longest);
int ngroup = re.NumberOfCapturingGroups()+1;
- StringPiece* group = new StringPiece[ngroup];
+ absl::string_view* group = new absl::string_view[ngroup];
strgen_.Reset();
while (strgen_.HasNext()) {
- StringPiece input = strgen_.Next();
+ absl::string_view input = strgen_.Next();
PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
- printf(";");
+ absl::PrintF(";");
PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
- printf(";");
+ absl::PrintF(";");
PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
- printf(";");
+ absl::PrintF(";");
PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
- printf("\n");
+ absl::PrintF("\n");
}
delete[] group;
return;
@@ -137,7 +141,7 @@ void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
tests_++;
if (!tester.TestInput(strgen_.Next())) {
failures_++;
- if (++bad_inputs >= GetFlag(FLAGS_max_bad_regexp_inputs))
+ if (++bad_inputs >= absl::GetFlag(FLAGS_max_bad_regexp_inputs))
break;
}
}
@@ -164,8 +168,8 @@ void ExhaustiveTest(int maxatoms, int maxops,
topwrapper);
t.Generate();
if (!LOGGING) {
- printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
- t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
+ absl::PrintF("%d regexps, %d tests, %d failures [%d/%d str]\n",
+ t.regexps(), t.tests(), t.failures(), maxstrlen, stralphabet.size());
}
EXPECT_EQ(0, t.failures());
}
@@ -177,7 +181,7 @@ void EgrepTest(int maxatoms, int maxops, const std::string& alphabet,
const std::string& wrapper) {
const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
- for (size_t i = 0; i < arraysize(tops); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(tops); i++) {
ExhaustiveTest(maxatoms, maxops,
Split("", alphabet),
RegexpGenerator::EgrepOps(),
diff --git a/re2/testing/exhaustive_tester.h b/re2/testing/exhaustive_tester.h
index 3a14282..906be0c 100644
--- a/re2/testing/exhaustive_tester.h
+++ b/re2/testing/exhaustive_tester.h
@@ -9,7 +9,6 @@
#include <string>
#include <vector>
-#include "util/util.h"
#include "re2/testing/regexp_generator.h"
#include "re2/testing/string_generator.h"
diff --git a/re2/testing/filtered_re2_test.cc b/re2/testing/filtered_re2_test.cc
index add935e..a8d2dfc 100644
--- a/re2/testing/filtered_re2_test.cc
+++ b/re2/testing/filtered_re2_test.cc
@@ -9,7 +9,8 @@
#include <vector>
#include <utility>
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/filtered_re2.h"
#include "re2/re2.h"
@@ -185,14 +186,14 @@ bool CheckExpectedAtoms(const char* atoms[],
TEST(FilteredRE2Test, AtomTests) {
int nfail = 0;
- for (size_t i = 0; i < arraysize(atom_tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(atom_tests); i++) {
FilterTestVars v;
AtomTest* t = &atom_tests[i];
size_t nregexp, natom;
- for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
+ for (nregexp = 0; nregexp < ABSL_ARRAYSIZE(t->regexps); nregexp++)
if (t->regexps[nregexp] == NULL)
break;
- for (natom = 0; natom < arraysize(t->atoms); natom++)
+ for (natom = 0; natom < ABSL_ARRAYSIZE(t->atoms); natom++)
if (t->atoms[natom] == NULL)
break;
AddRegexpsAndCompile(t->regexps, nregexp, &v);
@@ -224,7 +225,7 @@ TEST(FilteredRE2Test, MatchEmptyPattern) {
// the index we use for the test is for the correct test.
EXPECT_EQ("CheckEmptyPattern", std::string(t->testname));
size_t nregexp;
- for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
+ for (nregexp = 0; nregexp < ABSL_ARRAYSIZE(t->regexps); nregexp++)
if (t->regexps[nregexp] == NULL)
break;
AddRegexpsAndCompile(t->regexps, nregexp, &v);
@@ -241,7 +242,7 @@ TEST(FilteredRE2Test, MatchTests) {
// for this test.
EXPECT_EQ("SubstrAtomRemovesSuperStrInOr", std::string(t->testname));
size_t nregexp;
- for (nregexp = 0; nregexp < arraysize(t->regexps); nregexp++)
+ for (nregexp = 0; nregexp < ABSL_ARRAYSIZE(t->regexps); nregexp++)
if (t->regexps[nregexp] == NULL)
break;
AddRegexpsAndCompile(t->regexps, nregexp, &v);
@@ -288,8 +289,8 @@ TEST(FilteredRE2Test, EmptyStringInStringSetBug) {
FilterTestVars v(0); // override the minimum atom length
const char* regexps[] = {"-R.+(|ADD=;AA){12}}"};
const char* atoms[] = {"", "-r", "add=;aa", "}"};
- AddRegexpsAndCompile(regexps, arraysize(regexps), &v);
- EXPECT_TRUE(CheckExpectedAtoms(atoms, arraysize(atoms),
+ AddRegexpsAndCompile(regexps, ABSL_ARRAYSIZE(regexps), &v);
+ EXPECT_TRUE(CheckExpectedAtoms(atoms, ABSL_ARRAYSIZE(atoms),
"EmptyStringInStringSetBug", &v));
}
diff --git a/re2/testing/mimics_pcre_test.cc b/re2/testing/mimics_pcre_test.cc
index 01ab41e..829659d 100644
--- a/re2/testing/mimics_pcre_test.cc
+++ b/re2/testing/mimics_pcre_test.cc
@@ -2,7 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/prog.h"
#include "re2/regexp.h"
@@ -58,7 +59,7 @@ static PCRETest tests[] = {
};
TEST(MimicsPCRE, SimpleTests) {
- for (size_t i = 0; i < arraysize(tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(tests); i++) {
const PCRETest& t = tests[i];
for (size_t j = 0; j < 2; j++) {
Regexp::ParseFlags flags = Regexp::LikePerl;
diff --git a/re2/testing/null_walker.cc b/re2/testing/null_walker.cc
index 2bdea02..745364b 100644
--- a/re2/testing/null_walker.cc
+++ b/re2/testing/null_walker.cc
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include "util/test.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
diff --git a/re2/testing/parse_test.cc b/re2/testing/parse_test.cc
index e571127..9d39544 100644
--- a/re2/testing/parse_test.cc
+++ b/re2/testing/parse_test.cc
@@ -6,7 +6,8 @@
#include <string>
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/regexp.h"
@@ -262,7 +263,7 @@ void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags,
// Test that regexps parse to expected structures.
TEST(TestParse, SimpleRegexps) {
- TestParse(tests, arraysize(tests), kTestFlags, "simple");
+ TestParse(tests, ABSL_ARRAYSIZE(tests), kTestFlags, "simple");
}
Test foldcase_tests[] = {
@@ -279,7 +280,7 @@ Test foldcase_tests[] = {
// Test that parsing with FoldCase works.
TEST(TestParse, FoldCase) {
- TestParse(foldcase_tests, arraysize(foldcase_tests), Regexp::FoldCase, "foldcase");
+ TestParse(foldcase_tests, ABSL_ARRAYSIZE(foldcase_tests), Regexp::FoldCase, "foldcase");
}
Test literal_tests[] = {
@@ -288,7 +289,7 @@ Test literal_tests[] = {
// Test that parsing with Literal works.
TEST(TestParse, Literal) {
- TestParse(literal_tests, arraysize(literal_tests), Regexp::Literal, "literal");
+ TestParse(literal_tests, ABSL_ARRAYSIZE(literal_tests), Regexp::Literal, "literal");
}
Test matchnl_tests[] = {
@@ -301,7 +302,7 @@ Test matchnl_tests[] = {
// Test that parsing with MatchNL works.
// (Also tested above during simple cases.)
TEST(TestParse, MatchNL) {
- TestParse(matchnl_tests, arraysize(matchnl_tests), Regexp::MatchNL, "with MatchNL");
+ TestParse(matchnl_tests, ABSL_ARRAYSIZE(matchnl_tests), Regexp::MatchNL, "with MatchNL");
}
Test nomatchnl_tests[] = {
@@ -313,7 +314,7 @@ Test nomatchnl_tests[] = {
// Test that parsing without MatchNL works.
TEST(TestParse, NoMatchNL) {
- TestParse(nomatchnl_tests, arraysize(nomatchnl_tests), Regexp::NoParseFlags, "without MatchNL");
+ TestParse(nomatchnl_tests, ABSL_ARRAYSIZE(nomatchnl_tests), Regexp::NoParseFlags, "without MatchNL");
}
Test prefix_tests[] = {
@@ -357,7 +358,7 @@ Test prefix_tests[] = {
// Test that prefix factoring works.
TEST(TestParse, Prefix) {
- TestParse(prefix_tests, arraysize(prefix_tests), Regexp::PerlX, "prefix");
+ TestParse(prefix_tests, ABSL_ARRAYSIZE(prefix_tests), Regexp::PerlX, "prefix");
}
Test nested_tests[] = {
@@ -373,7 +374,7 @@ Test nested_tests[] = {
// Test that nested repetition works.
TEST(TestParse, Nested) {
- TestParse(nested_tests, arraysize(nested_tests), Regexp::PerlX, "nested");
+ TestParse(nested_tests, ABSL_ARRAYSIZE(nested_tests), Regexp::PerlX, "nested");
}
// Invalid regular expressions
@@ -428,20 +429,20 @@ const char* only_posix[] = {
// Test that parser rejects bad regexps.
TEST(TestParse, InvalidRegexps) {
- for (size_t i = 0; i < arraysize(badtests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(badtests); i++) {
ASSERT_TRUE(Regexp::Parse(badtests[i], Regexp::PerlX, NULL) == NULL)
<< " " << badtests[i];
ASSERT_TRUE(Regexp::Parse(badtests[i], Regexp::NoParseFlags, NULL) == NULL)
<< " " << badtests[i];
}
- for (size_t i = 0; i < arraysize(only_posix); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(only_posix); i++) {
ASSERT_TRUE(Regexp::Parse(only_posix[i], Regexp::PerlX, NULL) == NULL)
<< " " << only_posix[i];
Regexp* re = Regexp::Parse(only_posix[i], Regexp::NoParseFlags, NULL);
ASSERT_TRUE(re != NULL) << " " << only_posix[i];
re->Decref();
}
- for (size_t i = 0; i < arraysize(only_perl); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(only_perl); i++) {
ASSERT_TRUE(Regexp::Parse(only_perl[i], Regexp::NoParseFlags, NULL) == NULL)
<< " " << only_perl[i];
Regexp* re = Regexp::Parse(only_perl[i], Regexp::PerlX, NULL);
@@ -452,7 +453,7 @@ TEST(TestParse, InvalidRegexps) {
// Test that ToString produces original regexp or equivalent one.
TEST(TestToString, EquivalentParse) {
- for (size_t i = 0; i < arraysize(tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(tests); i++) {
RegexpStatus status;
Regexp::ParseFlags f = kTestFlags;
if (tests[i].flags != 0) {
diff --git a/re2/testing/possible_match_test.cc b/re2/testing/possible_match_test.cc
index 0ec90ae..fe199c6 100644
--- a/re2/testing/possible_match_test.cc
+++ b/re2/testing/possible_match_test.cc
@@ -6,9 +6,10 @@
#include <string>
#include <vector>
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "absl/strings/escaping.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
@@ -107,12 +108,12 @@ static PrefixTest tests[] = {
};
TEST(PossibleMatchRange, HandWritten) {
- for (size_t i = 0; i < arraysize(tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(tests); i++) {
for (size_t j = 0; j < 2; j++) {
const PrefixTest& t = tests[i];
std::string min, max;
if (j == 0) {
- LOG(INFO) << "Checking regexp=" << CEscape(t.regexp);
+ LOG(INFO) << "Checking regexp=" << absl::CEscape(t.regexp);
Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
ASSERT_TRUE(re != NULL);
Prog* prog = re->CompileToProg(0);
@@ -142,26 +143,26 @@ TEST(PossibleMatchRange, Failures) {
// are no valid UTF-8 strings beginning with byte 0xFF.
EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1).
PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
+ << "min=" << absl::CEscape(min) << ", max=" << absl::CEscape(max);
EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1).
PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
+ << "min=" << absl::CEscape(min) << ", max=" << absl::CEscape(max);
EXPECT_FALSE(RE2(".+hello", RE2::Latin1).
PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
+ << "min=" << absl::CEscape(min) << ", max=" << absl::CEscape(max);
EXPECT_FALSE(RE2(".*hello", RE2::Latin1).
PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
+ << "min=" << absl::CEscape(min) << ", max=" << absl::CEscape(max);
EXPECT_FALSE(RE2(".*", RE2::Latin1).
PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
+ << "min=" << absl::CEscape(min) << ", max=" << absl::CEscape(max);
EXPECT_FALSE(RE2("\\C*").
PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
+ << "min=" << absl::CEscape(min) << ", max=" << absl::CEscape(max);
// Fails because it's a malformed regexp.
EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10))
- << "min=" << CEscape(min) << ", max=" << CEscape(max);
+ << "min=" << absl::CEscape(min) << ", max=" << absl::CEscape(max);
}
// Exhaustive test: generate all regexps within parameters,
@@ -201,7 +202,7 @@ class PossibleMatchTester : public RegexpGenerator {
void PossibleMatchTester::HandleRegexp(const std::string& regexp) {
regexps_++;
- VLOG(3) << CEscape(regexp);
+ VLOG(3) << absl::CEscape(regexp);
RE2 re(regexp, RE2::Latin1);
ASSERT_EQ(re.error(), "");
@@ -213,12 +214,12 @@ void PossibleMatchTester::HandleRegexp(const std::string& regexp) {
// complicated expressions.
if(strstr(regexp.c_str(), "\\C*"))
return;
- LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp);
+ LOG(QFATAL) << "PossibleMatchRange failed on: " << absl::CEscape(regexp);
}
strgen_.Reset();
while (strgen_.HasNext()) {
- const StringPiece& s = strgen_.Next();
+ absl::string_view s = strgen_.Next();
tests_++;
if (!RE2::FullMatch(s, re))
continue;
diff --git a/re2/testing/random_test.cc b/re2/testing/random_test.cc
index 44712eb..d076b39 100644
--- a/re2/testing/random_test.cc
+++ b/re2/testing/random_test.cc
@@ -8,14 +8,15 @@
#include <string>
#include <vector>
-#include "util/test.h"
-#include "util/flags.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/str_format.h"
+#include "gtest/gtest.h"
#include "re2/testing/exhaustive_tester.h"
-DEFINE_FLAG(int, regexpseed, 404, "Random regexp seed.");
-DEFINE_FLAG(int, regexpcount, 100, "How many random regexps to generate.");
-DEFINE_FLAG(int, stringseed, 200, "Random string seed.");
-DEFINE_FLAG(int, stringcount, 100, "How many random strings to generate.");
+ABSL_FLAG(int, regexpseed, 404, "Random regexp seed.");
+ABSL_FLAG(int, regexpcount, 100, "How many random regexps to generate.");
+ABSL_FLAG(int, stringseed, 200, "Random string seed.");
+ABSL_FLAG(int, stringcount, 100, "How many random strings to generate.");
namespace re2 {
@@ -38,12 +39,12 @@ static void RandomTest(int maxatoms, int maxops,
ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
maxstrlen, stralphabet, wrapper, "");
- t.RandomStrings(GetFlag(FLAGS_stringseed),
- GetFlag(FLAGS_stringcount));
- t.GenerateRandom(GetFlag(FLAGS_regexpseed),
- GetFlag(FLAGS_regexpcount));
- printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
- t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
+ t.RandomStrings(absl::GetFlag(FLAGS_stringseed),
+ absl::GetFlag(FLAGS_stringcount));
+ t.GenerateRandom(absl::GetFlag(FLAGS_regexpseed),
+ absl::GetFlag(FLAGS_regexpcount));
+ absl::PrintF("%d regexps, %d tests, %d failures [%d/%d str]\n",
+ t.regexps(), t.tests(), t.failures(), maxstrlen, stralphabet.size());
EXPECT_EQ(0, t.failures());
}
diff --git a/re2/testing/re2_arg_test.cc b/re2/testing/re2_arg_test.cc
index f62e17c..4b00be3 100644
--- a/re2/testing/re2_arg_test.cc
+++ b/re2/testing/re2_arg_test.cc
@@ -10,7 +10,8 @@
#include <stdint.h>
#include <string.h>
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/re2.h"
@@ -87,7 +88,7 @@ const SuccessTable kSuccessTable[] = {
{ "18446744073709551616", 0, { false, false, false, false, false, false }},
};
-const int kNumStrings = arraysize(kSuccessTable);
+const int kNumStrings = ABSL_ARRAYSIZE(kSuccessTable);
// It's ugly to use a macro, but we apparently can't use the EXPECT_EQ
// macro outside of a TEST block and this seems to be the only way to
@@ -157,4 +158,26 @@ TEST(RE2ArgTest, ParseFromTest) {
#endif
}
+TEST(RE2ArgTest, OptionalDoubleTest) {
+ absl::optional<double> opt;
+ RE2::Arg arg(&opt);
+ EXPECT_TRUE(arg.Parse(NULL, 0));
+ EXPECT_FALSE(opt.has_value());
+ EXPECT_FALSE(arg.Parse("", 0));
+ EXPECT_TRUE(arg.Parse("28.30", 5));
+ EXPECT_TRUE(opt.has_value());
+ EXPECT_EQ(*opt, 28.30);
+}
+
+TEST(RE2ArgTest, OptionalIntWithCRadixTest) {
+ absl::optional<int> opt;
+ RE2::Arg arg = RE2::CRadix(&opt);
+ EXPECT_TRUE(arg.Parse(NULL, 0));
+ EXPECT_FALSE(opt.has_value());
+ EXPECT_FALSE(arg.Parse("", 0));
+ EXPECT_TRUE(arg.Parse("0xb0e", 5));
+ EXPECT_TRUE(opt.has_value());
+ EXPECT_EQ(*opt, 2830);
+}
+
} // namespace re2
diff --git a/re2/testing/re2_test.cc b/re2/testing/re2_test.cc
index b1f7d73..151525f 100644
--- a/re2/testing/re2_test.cc
+++ b/re2/testing/re2_test.cc
@@ -18,9 +18,10 @@
#include <unistd.h> /* for sysconf */
#endif
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "absl/strings/str_format.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "re2/re2.h"
#include "re2/regexp.h"
@@ -238,7 +239,7 @@ TEST(RE2, Consume) {
std::string word;
std::string s(" aaa b!@#$@#$cccc");
- StringPiece input(s);
+ absl::string_view input(s);
ASSERT_TRUE(RE2::Consume(&input, r, &word));
ASSERT_EQ(word, "aaa") << " input: " << input;
@@ -249,7 +250,7 @@ TEST(RE2, Consume) {
TEST(RE2, ConsumeN) {
const std::string s(" one two three 4");
- StringPiece input(s);
+ absl::string_view input(s);
RE2::Arg argv[2];
const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
@@ -276,7 +277,7 @@ TEST(RE2, FindAndConsume) {
std::string word;
std::string s(" aaa b!@#$@#$cccc");
- StringPiece input(s);
+ absl::string_view input(s);
ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
ASSERT_EQ(word, "aaa");
@@ -296,7 +297,7 @@ TEST(RE2, FindAndConsume) {
TEST(RE2, FindAndConsumeN) {
const std::string s(" one two three 4");
- StringPiece input(s);
+ absl::string_view input(s);
RE2::Arg argv[2];
const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
@@ -345,17 +346,17 @@ TEST(RE2, MatchNumberPeculiarity) {
TEST(RE2, Match) {
RE2 re("((\\w+):([0-9]+))"); // extracts host and port
- StringPiece group[4];
+ absl::string_view group[4];
// No match.
- StringPiece s = "zyzzyva";
+ absl::string_view s = "zyzzyva";
ASSERT_FALSE(
- re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
+ re.Match(s, 0, s.size(), RE2::UNANCHORED, group, ABSL_ARRAYSIZE(group)));
// Matches and extracts.
s = "a chrisr:9000 here";
ASSERT_TRUE(
- re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
+ re.Match(s, 0, s.size(), RE2::UNANCHORED, group, ABSL_ARRAYSIZE(group)));
ASSERT_EQ(group[0], "chrisr:9000");
ASSERT_EQ(group[1], "chrisr:9000");
ASSERT_EQ(group[2], "chrisr");
@@ -528,7 +529,7 @@ TEST(EmptyCharset, Fuzz) {
"[^\\D\\d]",
"[^\\D[:digit:]]"
};
- for (size_t i = 0; i < arraysize(empties); i++)
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(empties); i++)
ASSERT_FALSE(RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
}
@@ -542,8 +543,8 @@ TEST(EmptyCharset, BitstateAssumptions) {
"((((()))))" "([^\\S\\s]|[^\\S\\s])?",
"((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
};
- StringPiece group[6];
- for (size_t i = 0; i < arraysize(nop_empties); i++)
+ absl::string_view group[6];
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(nop_empties); i++)
ASSERT_TRUE(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
}
@@ -672,15 +673,15 @@ TEST(RE2, FullMatchIntegerArg) {
TEST(RE2, FullMatchStringArg) {
std::string s;
- // String-arg
+ // string-arg
ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", &s));
ASSERT_EQ(s, std::string("ell"));
}
-TEST(RE2, FullMatchStringPieceArg) {
+TEST(RE2, FullMatchStringViewArg) {
int i;
- // StringPiece-arg
- StringPiece sp;
+ absl::string_view sp;
+ // string_view-arg
ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
ASSERT_EQ(sp.size(), 4);
ASSERT_TRUE(memcmp(sp.data(), "ruby", 4) == 0);
@@ -742,7 +743,7 @@ TEST(RE2, FullMatchTypedNullArg) {
// Ignore non-void* NULL arg
ASSERT_TRUE(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (std::string*)NULL));
- ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
+ ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (absl::string_view*)NULL));
ASSERT_TRUE(RE2::FullMatch("1234", "(.*)", (int*)NULL));
ASSERT_TRUE(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
@@ -777,7 +778,8 @@ TEST(RE2, NULTerminated) {
v[pagesize - 1] = '1';
x = 0;
- ASSERT_TRUE(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
+ ASSERT_TRUE(
+ RE2::FullMatch(absl::string_view(v + pagesize - 1, 1), "(.*)", &x));
ASSERT_EQ(x, 1);
#endif
}
@@ -914,10 +916,10 @@ TEST(RE2, FloatingPointFullMatchTypes) {
// implementation of strtof(3). And apparently MSVC too. Sigh.
#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
- ASSERT_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
+ ASSERT_EQ(v, 0.1f) << absl::StrFormat("%.8g != %.8g", v, 0.1f);
ASSERT_TRUE(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
ASSERT_EQ(v, 6700000000081920.1f)
- << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
+ << absl::StrFormat("%.8g != %.8g", v, 6700000000081920.1f);
#endif
}
{
@@ -929,10 +931,10 @@ TEST(RE2, FloatingPointFullMatchTypes) {
ASSERT_EQ(v, double(1e23));
ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
- ASSERT_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
+ ASSERT_EQ(v, 0.1) << absl::StrFormat("%.17g != %.17g", v, 0.1);
ASSERT_TRUE(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
ASSERT_EQ(v, 1.0000000596046448)
- << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
+ << absl::StrFormat("%.17g != %.17g", v, 1.0000000596046448);
}
}
@@ -1242,21 +1244,21 @@ TEST(RE2, DeepRecursion) {
// not implementing case-folding.
TEST(CaseInsensitive, MatchAndConsume) {
std::string text = "A fish named *Wanda*";
- StringPiece sp(text);
- StringPiece result;
+ absl::string_view sp(text);
+ absl::string_view result;
EXPECT_TRUE(RE2::PartialMatch(text, "(?i)([wand]{5})", &result));
EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
}
-// RE2 should permit implicit conversions from string, StringPiece, const char*,
+// RE2 should permit implicit conversions from string, string_view, const char*,
// and C string literals.
TEST(RE2, ImplicitConversions) {
std::string re_string(".");
- StringPiece re_stringpiece(".");
- const char* re_cstring = ".";
+ absl::string_view re_string_view(".");
+ const char* re_c_string = ".";
EXPECT_TRUE(RE2::PartialMatch("e", re_string));
- EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
- EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
+ EXPECT_TRUE(RE2::PartialMatch("e", re_string_view));
+ EXPECT_TRUE(RE2::PartialMatch("e", re_c_string));
EXPECT_TRUE(RE2::PartialMatch("e", "."));
}
@@ -1309,7 +1311,7 @@ static struct ErrorTest {
{ "zz(?P<name\377>abc)", RE2::ErrorBadUTF8, "" },
};
TEST(RE2, ErrorCodeAndArg) {
- for (size_t i = 0; i < arraysize(error_tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(error_tests); i++) {
RE2 re(error_tests[i].regexp, RE2::Quiet);
EXPECT_FALSE(re.ok());
EXPECT_EQ(re.error_code(), error_tests[i].error_code) << re.error();
@@ -1332,13 +1334,13 @@ static struct NeverTest {
TEST(RE2, NeverNewline) {
RE2::Options opt;
opt.set_never_nl(true);
- for (size_t i = 0; i < arraysize(never_tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(never_tests); i++) {
const NeverTest& t = never_tests[i];
RE2 re(t.regexp, opt);
if (t.match == NULL) {
EXPECT_FALSE(re.PartialMatch(t.text, re));
} else {
- StringPiece m;
+ absl::string_view m;
EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
EXPECT_EQ(m, t.match);
}
@@ -1371,7 +1373,7 @@ TEST(RE2, BitstateCaptureBug) {
RE2::Options opt;
opt.set_max_mem(20000);
RE2 re("(_________$)", opt);
- StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
+ absl::string_view s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
}
@@ -1450,10 +1452,10 @@ TEST(RE2, NullVsEmptyString) {
RE2 re(".*");
EXPECT_TRUE(re.ok());
- StringPiece null;
+ absl::string_view null;
EXPECT_TRUE(RE2::FullMatch(null, re));
- StringPiece empty("");
+ absl::string_view empty("");
EXPECT_TRUE(RE2::FullMatch(empty, re));
}
@@ -1465,25 +1467,25 @@ TEST(RE2, NullVsEmptyStringSubmatches) {
EXPECT_TRUE(re.ok());
// matches[0] is overall match, [1] is (), [2] is (foo), [3] is nonexistent.
- StringPiece matches[4];
+ absl::string_view matches[4];
- for (size_t i = 0; i < arraysize(matches); i++)
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(matches); i++)
matches[i] = "bar";
- StringPiece null;
+ absl::string_view null;
EXPECT_TRUE(re.Match(null, 0, null.size(), RE2::UNANCHORED,
- matches, arraysize(matches)));
- for (size_t i = 0; i < arraysize(matches); i++) {
+ matches, ABSL_ARRAYSIZE(matches)));
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(matches); i++) {
EXPECT_TRUE(matches[i].data() == NULL); // always null
EXPECT_TRUE(matches[i].empty());
}
- for (size_t i = 0; i < arraysize(matches); i++)
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(matches); i++)
matches[i] = "bar";
- StringPiece empty("");
+ absl::string_view empty("");
EXPECT_TRUE(re.Match(empty, 0, empty.size(), RE2::UNANCHORED,
- matches, arraysize(matches)));
+ matches, ABSL_ARRAYSIZE(matches)));
EXPECT_TRUE(matches[0].data() != NULL); // empty, not null
EXPECT_TRUE(matches[0].empty());
EXPECT_TRUE(matches[1].data() != NULL); // empty, not null
@@ -1497,7 +1499,7 @@ TEST(RE2, NullVsEmptyStringSubmatches) {
// Issue 1816809
TEST(RE2, Bug1816809) {
RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
- StringPiece piece("llx-3;llx4");
+ absl::string_view piece("llx-3;llx4");
std::string x;
EXPECT_TRUE(RE2::Consume(&piece, re, &x));
}
@@ -1615,7 +1617,7 @@ TEST(RE2, Bug26356109) {
ASSERT_TRUE(re.ok());
std::string s = "abc";
- StringPiece m;
+ absl::string_view m;
ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
ASSERT_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
@@ -1645,7 +1647,7 @@ TEST(RE2, Issue310) {
// (?:|a)* matched more text than (?:|a)+ did.
std::string s = "aaa";
- StringPiece m;
+ absl::string_view m;
RE2 star("(?:|a)*");
ASSERT_TRUE(star.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
diff --git a/re2/testing/regexp_benchmark.cc b/re2/testing/regexp_benchmark.cc
index 3eeb098..5352b31 100644
--- a/re2/testing/regexp_benchmark.cc
+++ b/re2/testing/regexp_benchmark.cc
@@ -9,19 +9,18 @@
#include <stdlib.h>
#include <string>
#include <thread>
-#include <unordered_map>
#include <utility>
-#include "util/benchmark.h"
-#include "util/test.h"
-#include "util/flags.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/str_format.h"
+#include "absl/synchronization/mutex.h"
+#include "benchmark/benchmark.h"
#include "util/logging.h"
#include "util/malloc_counter.h"
-#include "util/strutil.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
-#include "util/mutex.h"
#include "util/pcre.h"
namespace re2 {
@@ -41,7 +40,7 @@ void Test() {
CHECK(prog->IsOnePass());
CHECK(prog->CanBitState());
const char* text = "650-253-0001";
- StringPiece sp[4];
+ absl::string_view sp[4];
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
CHECK_EQ(sp[0], "650-253-0001");
CHECK_EQ(sp[1], "650");
@@ -61,22 +60,22 @@ void MemoryUsage() {
CHECK(re);
// Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
// because LOG(INFO) might do a big allocation before they get evaluated.
- fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n",
- mc.HeapGrowth(), mc.PeakHeapGrowth());
+ absl::FPrintF(stderr, "Regexp: %7d bytes (peak=%d)\n",
+ mc.HeapGrowth(), mc.PeakHeapGrowth());
mc.Reset();
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->IsOnePass());
CHECK(prog->CanBitState());
- fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n",
- mc.HeapGrowth(), mc.PeakHeapGrowth());
+ absl::FPrintF(stderr, "Prog: %7d bytes (peak=%d)\n",
+ mc.HeapGrowth(), mc.PeakHeapGrowth());
mc.Reset();
- StringPiece sp[4];
+ absl::string_view sp[4];
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
- fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n",
- mc.HeapGrowth(), mc.PeakHeapGrowth());
+ absl::FPrintF(stderr, "Search: %7d bytes (peak=%d)\n",
+ mc.HeapGrowth(), mc.PeakHeapGrowth());
delete prog;
re->Decref();
}
@@ -85,22 +84,22 @@ void MemoryUsage() {
MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
PCRE re(regexp, PCRE::UTF8);
- fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n",
- mc.HeapGrowth(), mc.PeakHeapGrowth());
+ absl::FPrintF(stderr, "RE: %7d bytes (peak=%d)\n",
+ mc.HeapGrowth(), mc.PeakHeapGrowth());
PCRE::FullMatch(text, re);
- fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n",
- mc.HeapGrowth(), mc.PeakHeapGrowth());
+ absl::FPrintF(stderr, "RE: %7d bytes (peak=%d)\n",
+ mc.HeapGrowth(), mc.PeakHeapGrowth());
}
{
MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
PCRE* re = new PCRE(regexp, PCRE::UTF8);
- fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n",
- mc.HeapGrowth(), mc.PeakHeapGrowth());
+ absl::FPrintF(stderr, "PCRE*: %7d bytes (peak=%d)\n",
+ mc.HeapGrowth(), mc.PeakHeapGrowth());
PCRE::FullMatch(text, *re);
- fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n",
- mc.HeapGrowth(), mc.PeakHeapGrowth());
+ absl::FPrintF(stderr, "PCRE*: %7d bytes (peak=%d)\n",
+ mc.HeapGrowth(), mc.PeakHeapGrowth());
delete re;
}
@@ -108,15 +107,15 @@ void MemoryUsage() {
MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
RE2 re(regexp);
- fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n",
- mc.HeapGrowth(), mc.PeakHeapGrowth());
+ absl::FPrintF(stderr, "RE2: %7d bytes (peak=%d)\n",
+ mc.HeapGrowth(), mc.PeakHeapGrowth());
RE2::FullMatch(text, re);
- fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n",
- mc.HeapGrowth(), mc.PeakHeapGrowth());
+ absl::FPrintF(stderr, "RE2: %7d bytes (peak=%d)\n",
+ mc.HeapGrowth(), mc.PeakHeapGrowth());
}
- fprintf(stderr, "sizeof: PCRE=%zd RE2=%zd Prog=%zd Inst=%zd\n",
- sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst));
+ absl::FPrintF(stderr, "sizeof: PCRE=%d RE2=%d Prog=%d Inst=%d\n",
+ sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst));
}
int NumCPUs() {
@@ -128,7 +127,7 @@ int NumCPUs() {
// and not interesting.
typedef void SearchImpl(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match);
SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, SearchPCRE,
@@ -136,7 +135,7 @@ SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, SearchPCRE,
SearchCachedBitState, SearchCachedPCRE, SearchCachedRE2;
typedef void ParseImpl(benchmark::State& state, const char* regexp,
- const StringPiece& text);
+ absl::string_view text);
ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState, Parse1PCRE, Parse1RE2,
Parse1Backtrack, Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
@@ -318,8 +317,8 @@ void FindAndConsume(benchmark::State& state) {
s.append("Hello World");
RE2 re("((Hello World))");
for (auto _ : state) {
- StringPiece t = s;
- StringPiece u;
+ absl::string_view t = s;
+ absl::string_view u;
CHECK(RE2::FindAndConsume(&t, re, &u));
CHECK_EQ(u, "Hello World");
}
@@ -442,7 +441,7 @@ BENCHMARK_RANGE(Search_AltMatch_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCP
// Benchmark: use regexp to find phone number.
void SearchDigits(benchmark::State& state, SearchImpl* search) {
- StringPiece s("650-253-0001");
+ absl::string_view s("650-253-0001");
search(state, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true);
state.SetItemsProcessed(state.iterations());
}
@@ -467,7 +466,7 @@ BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
void Parse3Digits(benchmark::State& state,
void (*parse3)(benchmark::State&, const char*,
- const StringPiece&)) {
+ absl::string_view)) {
parse3(state, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
state.SetItemsProcessed(state.iterations());
}
@@ -506,7 +505,7 @@ BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
void Parse3DigitDs(benchmark::State& state,
void (*parse3)(benchmark::State&, const char*,
- const StringPiece&)) {
+ absl::string_view)) {
parse3(state, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
state.SetItemsProcessed(state.iterations());
}
@@ -547,7 +546,7 @@ BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
void Parse1Split(benchmark::State& state,
void (*parse1)(benchmark::State&, const char*,
- const StringPiece&)) {
+ absl::string_view)) {
parse1(state, "[0-9]+-(.*)", "650-253-0001");
state.SetItemsProcessed(state.iterations());
}
@@ -584,7 +583,7 @@ BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
void Parse1SplitHard(benchmark::State& state,
void (*run)(benchmark::State&, const char*,
- const StringPiece&)) {
+ absl::string_view)) {
run(state, "[0-9]+.(.*)", "650-253-0001");
state.SetItemsProcessed(state.iterations());
}
@@ -619,7 +618,7 @@ BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
void Parse1SplitBig1(benchmark::State& state,
void (*run)(benchmark::State&, const char*,
- const StringPiece&)) {
+ absl::string_view)) {
std::string s;
s.append(100000, 'x');
s.append("650-253-0001");
@@ -639,7 +638,7 @@ BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
void Parse1SplitBig2(benchmark::State& state,
void (*run)(benchmark::State&, const char*,
- const StringPiece&)) {
+ absl::string_view)) {
std::string s;
s.append("650-253-");
s.append(100000, '0');
@@ -756,20 +755,20 @@ void RunBuild(benchmark::State& state, const std::string& regexp,
} // namespace re2
-DEFINE_FLAG(std::string, compile_regexp, "(.*)-(\\d+)-of-(\\d+)",
- "regexp for compile benchmarks");
+ABSL_FLAG(std::string, compile_regexp, "(.*)-(\\d+)-of-(\\d+)",
+ "regexp for compile benchmarks");
namespace re2 {
-void BM_PCRE_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompilePCRE); }
-void BM_Regexp_Parse(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), ParseRegexp); }
-void BM_Regexp_Simplify(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyRegexp); }
-void BM_CompileToProg(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileToProg); }
-void BM_CompileByteMap(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileByteMap); }
-void BM_Regexp_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRegexp); }
-void BM_Regexp_SimplifyCompile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyCompileRegexp); }
-void BM_Regexp_NullWalk(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), NullWalkRegexp); }
-void BM_RE2_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRE2); }
+void BM_PCRE_Compile(benchmark::State& state) { RunBuild(state, absl::GetFlag(FLAGS_compile_regexp), CompilePCRE); }
+void BM_Regexp_Parse(benchmark::State& state) { RunBuild(state, absl::GetFlag(FLAGS_compile_regexp), ParseRegexp); }
+void BM_Regexp_Simplify(benchmark::State& state) { RunBuild(state, absl::GetFlag(FLAGS_compile_regexp), SimplifyRegexp); }
+void BM_CompileToProg(benchmark::State& state) { RunBuild(state, absl::GetFlag(FLAGS_compile_regexp), CompileToProg); }
+void BM_CompileByteMap(benchmark::State& state) { RunBuild(state, absl::GetFlag(FLAGS_compile_regexp), CompileByteMap); }
+void BM_Regexp_Compile(benchmark::State& state) { RunBuild(state, absl::GetFlag(FLAGS_compile_regexp), CompileRegexp); }
+void BM_Regexp_SimplifyCompile(benchmark::State& state) { RunBuild(state, absl::GetFlag(FLAGS_compile_regexp), SimplifyCompileRegexp); }
+void BM_Regexp_NullWalk(benchmark::State& state) { RunBuild(state, absl::GetFlag(FLAGS_compile_regexp), NullWalkRegexp); }
+void BM_RE2_Compile(benchmark::State& state) { RunBuild(state, absl::GetFlag(FLAGS_compile_regexp), CompileRE2); }
#ifdef USEPCRE
BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
@@ -859,7 +858,7 @@ DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
// Anchored says whether to run an anchored search.
void SearchDFA(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
@@ -867,8 +866,8 @@ void SearchDFA(benchmark::State& state, const char* regexp,
Prog* prog = re->CompileToProg(0);
CHECK(prog);
bool failed = false;
- CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
- NULL, &failed, NULL),
+ CHECK_EQ(prog->SearchDFA(text, absl::string_view(), anchor,
+ Prog::kFirstMatch, NULL, &failed, NULL),
expect_match);
CHECK(!failed);
delete prog;
@@ -877,15 +876,15 @@ void SearchDFA(benchmark::State& state, const char* regexp,
}
void SearchNFA(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
- CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
- NULL, 0),
+ CHECK_EQ(prog->SearchNFA(text, absl::string_view(), anchor,
+ Prog::kFirstMatch, NULL, 0),
expect_match);
delete prog;
re->Decref();
@@ -893,7 +892,7 @@ void SearchNFA(benchmark::State& state, const char* regexp,
}
void SearchOnePass(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
@@ -909,7 +908,7 @@ void SearchOnePass(benchmark::State& state, const char* regexp,
}
void SearchBitState(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
@@ -925,7 +924,7 @@ void SearchBitState(benchmark::State& state, const char* regexp,
}
void SearchPCRE(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
for (auto _ : state) {
PCRE re(regexp, PCRE::UTF8);
@@ -938,7 +937,7 @@ void SearchPCRE(benchmark::State& state, const char* regexp,
}
void SearchRE2(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
for (auto _ : state) {
RE2 re(regexp);
@@ -955,9 +954,9 @@ void SearchRE2(benchmark::State& state, const char* regexp,
// search time without the per-regexp overhead.
Prog* GetCachedProg(const char* regexp) {
- static auto& mutex = *new Mutex;
- MutexLock lock(&mutex);
- static auto& cache = *new std::unordered_map<std::string, Prog*>;
+ static auto& mutex = *new absl::Mutex;
+ absl::MutexLock lock(&mutex);
+ static auto& cache = *new absl::flat_hash_map<std::string, Prog*>;
Prog* prog = cache[regexp];
if (prog == NULL) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
@@ -973,9 +972,9 @@ Prog* GetCachedProg(const char* regexp) {
}
PCRE* GetCachedPCRE(const char* regexp) {
- static auto& mutex = *new Mutex;
- MutexLock lock(&mutex);
- static auto& cache = *new std::unordered_map<std::string, PCRE*>;
+ static auto& mutex = *new absl::Mutex;
+ absl::MutexLock lock(&mutex);
+ static auto& cache = *new absl::flat_hash_map<std::string, PCRE*>;
PCRE* re = cache[regexp];
if (re == NULL) {
re = new PCRE(regexp, PCRE::UTF8);
@@ -986,9 +985,9 @@ PCRE* GetCachedPCRE(const char* regexp) {
}
RE2* GetCachedRE2(const char* regexp) {
- static auto& mutex = *new Mutex;
- MutexLock lock(&mutex);
- static auto& cache = *new std::unordered_map<std::string, RE2*>;
+ static auto& mutex = *new absl::Mutex;
+ absl::MutexLock lock(&mutex);
+ static auto& cache = *new absl::flat_hash_map<std::string, RE2*>;
RE2* re = cache[regexp];
if (re == NULL) {
re = new RE2(regexp);
@@ -999,31 +998,31 @@ RE2* GetCachedRE2(const char* regexp) {
}
void SearchCachedDFA(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
Prog* prog = GetCachedProg(regexp);
for (auto _ : state) {
bool failed = false;
- CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
- NULL, &failed, NULL),
+ CHECK_EQ(prog->SearchDFA(text, absl::string_view(), anchor,
+ Prog::kFirstMatch, NULL, &failed, NULL),
expect_match);
CHECK(!failed);
}
}
void SearchCachedNFA(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
Prog* prog = GetCachedProg(regexp);
for (auto _ : state) {
- CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
- NULL, 0),
+ CHECK_EQ(prog->SearchNFA(text, absl::string_view(), anchor,
+ Prog::kFirstMatch, NULL, 0),
expect_match);
}
}
void SearchCachedOnePass(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
Prog* prog = GetCachedProg(regexp);
CHECK(prog->IsOnePass());
@@ -1034,7 +1033,7 @@ void SearchCachedOnePass(benchmark::State& state, const char* regexp,
}
void SearchCachedBitState(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
Prog* prog = GetCachedProg(regexp);
CHECK(prog->CanBitState());
@@ -1045,7 +1044,7 @@ void SearchCachedBitState(benchmark::State& state, const char* regexp,
}
void SearchCachedPCRE(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
PCRE& re = *GetCachedPCRE(regexp);
for (auto _ : state) {
@@ -1057,7 +1056,7 @@ void SearchCachedPCRE(benchmark::State& state, const char* regexp,
}
void SearchCachedRE2(benchmark::State& state, const char* regexp,
- const StringPiece& text, Prog::Anchor anchor,
+ absl::string_view text, Prog::Anchor anchor,
bool expect_match) {
RE2& re = *GetCachedRE2(regexp);
for (auto _ : state) {
@@ -1072,14 +1071,14 @@ void SearchCachedRE2(benchmark::State& state, const char* regexp,
// extracting three submatches. Expects match always.
void Parse3NFA(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
- StringPiece sp[4]; // 4 because sp[0] is whole match.
- CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
+ absl::string_view sp[4]; // 4 because sp[0] is whole match.
+ CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
Prog::kFullMatch, sp, 4));
delete prog;
re->Decref();
@@ -1087,14 +1086,14 @@ void Parse3NFA(benchmark::State& state, const char* regexp,
}
void Parse3OnePass(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->IsOnePass());
- StringPiece sp[4]; // 4 because sp[0] is whole match.
+ absl::string_view sp[4]; // 4 because sp[0] is whole match.
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
delete prog;
re->Decref();
@@ -1102,14 +1101,14 @@ void Parse3OnePass(benchmark::State& state, const char* regexp,
}
void Parse3BitState(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->CanBitState());
- StringPiece sp[4]; // 4 because sp[0] is whole match.
+ absl::string_view sp[4]; // 4 because sp[0] is whole match.
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
delete prog;
re->Decref();
@@ -1117,13 +1116,13 @@ void Parse3BitState(benchmark::State& state, const char* regexp,
}
void Parse3Backtrack(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
- StringPiece sp[4]; // 4 because sp[0] is whole match.
+ absl::string_view sp[4]; // 4 because sp[0] is whole match.
CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
delete prog;
re->Decref();
@@ -1131,77 +1130,77 @@ void Parse3Backtrack(benchmark::State& state, const char* regexp,
}
void Parse3PCRE(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
for (auto _ : state) {
PCRE re(regexp, PCRE::UTF8);
CHECK_EQ(re.error(), "");
- StringPiece sp1, sp2, sp3;
+ absl::string_view sp1, sp2, sp3;
CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
}
}
void Parse3RE2(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
for (auto _ : state) {
RE2 re(regexp);
CHECK_EQ(re.error(), "");
- StringPiece sp1, sp2, sp3;
+ absl::string_view sp1, sp2, sp3;
CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
}
}
void Parse3CachedNFA(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
Prog* prog = GetCachedProg(regexp);
- StringPiece sp[4]; // 4 because sp[0] is whole match.
+ absl::string_view sp[4]; // 4 because sp[0] is whole match.
for (auto _ : state) {
- CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
+ CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
Prog::kFullMatch, sp, 4));
}
}
void Parse3CachedOnePass(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
Prog* prog = GetCachedProg(regexp);
CHECK(prog->IsOnePass());
- StringPiece sp[4]; // 4 because sp[0] is whole match.
+ absl::string_view sp[4]; // 4 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
}
}
void Parse3CachedBitState(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
Prog* prog = GetCachedProg(regexp);
CHECK(prog->CanBitState());
- StringPiece sp[4]; // 4 because sp[0] is whole match.
+ absl::string_view sp[4]; // 4 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
}
}
void Parse3CachedBacktrack(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
Prog* prog = GetCachedProg(regexp);
- StringPiece sp[4]; // 4 because sp[0] is whole match.
+ absl::string_view sp[4]; // 4 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
}
}
void Parse3CachedPCRE(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
PCRE& re = *GetCachedPCRE(regexp);
- StringPiece sp1, sp2, sp3;
+ absl::string_view sp1, sp2, sp3;
for (auto _ : state) {
CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
}
}
void Parse3CachedRE2(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
RE2& re = *GetCachedRE2(regexp);
- StringPiece sp1, sp2, sp3;
+ absl::string_view sp1, sp2, sp3;
for (auto _ : state) {
CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
}
@@ -1211,14 +1210,14 @@ void Parse3CachedRE2(benchmark::State& state, const char* regexp,
// extracting three submatches. Expects match always.
void Parse1NFA(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
- StringPiece sp[2]; // 2 because sp[0] is whole match.
- CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
+ absl::string_view sp[2]; // 2 because sp[0] is whole match.
+ CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
Prog::kFullMatch, sp, 2));
delete prog;
re->Decref();
@@ -1226,14 +1225,14 @@ void Parse1NFA(benchmark::State& state, const char* regexp,
}
void Parse1OnePass(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->IsOnePass());
- StringPiece sp[2]; // 2 because sp[0] is whole match.
+ absl::string_view sp[2]; // 2 because sp[0] is whole match.
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
delete prog;
re->Decref();
@@ -1241,14 +1240,14 @@ void Parse1OnePass(benchmark::State& state, const char* regexp,
}
void Parse1BitState(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
for (auto _ : state) {
Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
CHECK(re);
Prog* prog = re->CompileToProg(0);
CHECK(prog);
CHECK(prog->CanBitState());
- StringPiece sp[2]; // 2 because sp[0] is whole match.
+ absl::string_view sp[2]; // 2 because sp[0] is whole match.
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
delete prog;
re->Decref();
@@ -1256,114 +1255,114 @@ void Parse1BitState(benchmark::State& state, const char* regexp,
}
void Parse1PCRE(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
for (auto _ : state) {
PCRE re(regexp, PCRE::UTF8);
CHECK_EQ(re.error(), "");
- StringPiece sp1;
+ absl::string_view sp1;
CHECK(PCRE::FullMatch(text, re, &sp1));
}
}
void Parse1RE2(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
for (auto _ : state) {
RE2 re(regexp);
CHECK_EQ(re.error(), "");
- StringPiece sp1;
+ absl::string_view sp1;
CHECK(RE2::FullMatch(text, re, &sp1));
}
}
void Parse1CachedNFA(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
Prog* prog = GetCachedProg(regexp);
- StringPiece sp[2]; // 2 because sp[0] is whole match.
+ absl::string_view sp[2]; // 2 because sp[0] is whole match.
for (auto _ : state) {
- CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
+ CHECK(prog->SearchNFA(text, absl::string_view(), Prog::kAnchored,
Prog::kFullMatch, sp, 2));
}
}
void Parse1CachedOnePass(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
Prog* prog = GetCachedProg(regexp);
CHECK(prog->IsOnePass());
- StringPiece sp[2]; // 2 because sp[0] is whole match.
+ absl::string_view sp[2]; // 2 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
}
}
void Parse1CachedBitState(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
Prog* prog = GetCachedProg(regexp);
CHECK(prog->CanBitState());
- StringPiece sp[2]; // 2 because sp[0] is whole match.
+ absl::string_view sp[2]; // 2 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
}
}
void Parse1CachedBacktrack(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
Prog* prog = GetCachedProg(regexp);
- StringPiece sp[2]; // 2 because sp[0] is whole match.
+ absl::string_view sp[2]; // 2 because sp[0] is whole match.
for (auto _ : state) {
CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
}
}
void Parse1CachedPCRE(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
PCRE& re = *GetCachedPCRE(regexp);
- StringPiece sp1;
+ absl::string_view sp1;
for (auto _ : state) {
CHECK(PCRE::FullMatch(text, re, &sp1));
}
}
void Parse1CachedRE2(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
RE2& re = *GetCachedRE2(regexp);
- StringPiece sp1;
+ absl::string_view sp1;
for (auto _ : state) {
CHECK(RE2::FullMatch(text, re, &sp1));
}
}
void SearchParse2CachedPCRE(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
PCRE& re = *GetCachedPCRE(regexp);
for (auto _ : state) {
- StringPiece sp1, sp2;
+ absl::string_view sp1, sp2;
CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
}
}
void SearchParse2CachedRE2(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
RE2& re = *GetCachedRE2(regexp);
for (auto _ : state) {
- StringPiece sp1, sp2;
+ absl::string_view sp1, sp2;
CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
}
}
void SearchParse1CachedPCRE(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
PCRE& re = *GetCachedPCRE(regexp);
for (auto _ : state) {
- StringPiece sp1;
+ absl::string_view sp1;
CHECK(PCRE::PartialMatch(text, re, &sp1));
}
}
void SearchParse1CachedRE2(benchmark::State& state, const char* regexp,
- const StringPiece& text) {
+ absl::string_view text) {
RE2& re = *GetCachedRE2(regexp);
for (auto _ : state) {
- StringPiece sp1;
+ absl::string_view sp1;
CHECK(RE2::PartialMatch(text, re, &sp1));
}
}
@@ -1409,7 +1408,7 @@ static std::string http_text =
"alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
void HTTPPartialMatchPCRE(benchmark::State& state) {
- StringPiece a;
+ absl::string_view a;
PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
for (auto _ : state) {
PCRE::PartialMatch(http_text, re, &a);
@@ -1417,7 +1416,7 @@ void HTTPPartialMatchPCRE(benchmark::State& state) {
}
void HTTPPartialMatchRE2(benchmark::State& state) {
- StringPiece a;
+ absl::string_view a;
RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
for (auto _ : state) {
RE2::PartialMatch(http_text, re, &a);
@@ -1433,7 +1432,7 @@ static std::string smallhttp_text =
"GET /abc HTTP/1.1";
void SmallHTTPPartialMatchPCRE(benchmark::State& state) {
- StringPiece a;
+ absl::string_view a;
PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
for (auto _ : state) {
PCRE::PartialMatch(smallhttp_text, re, &a);
@@ -1441,7 +1440,7 @@ void SmallHTTPPartialMatchPCRE(benchmark::State& state) {
}
void SmallHTTPPartialMatchRE2(benchmark::State& state) {
- StringPiece a;
+ absl::string_view a;
RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
for (auto _ : state) {
RE2::PartialMatch(smallhttp_text, re, &a);
@@ -1454,7 +1453,7 @@ BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
void DotMatchPCRE(benchmark::State& state) {
- StringPiece a;
+ absl::string_view a;
PCRE re("(?-s)^(.+)");
for (auto _ : state) {
PCRE::PartialMatch(http_text, re, &a);
@@ -1462,7 +1461,7 @@ void DotMatchPCRE(benchmark::State& state) {
}
void DotMatchRE2(benchmark::State& state) {
- StringPiece a;
+ absl::string_view a;
RE2 re("(?-s)^(.+)");
for (auto _ : state) {
RE2::PartialMatch(http_text, re, &a);
@@ -1475,7 +1474,7 @@ BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
void ASCIIMatchPCRE(benchmark::State& state) {
- StringPiece a;
+ absl::string_view a;
PCRE re("(?-s)^([ -~]+)");
for (auto _ : state) {
PCRE::PartialMatch(http_text, re, &a);
@@ -1483,7 +1482,7 @@ void ASCIIMatchPCRE(benchmark::State& state) {
}
void ASCIIMatchRE2(benchmark::State& state) {
- StringPiece a;
+ absl::string_view a;
RE2 re("(?-s)^([ -~]+)");
for (auto _ : state) {
RE2::PartialMatch(http_text, re, &a);
diff --git a/re2/testing/regexp_generator.cc b/re2/testing/regexp_generator.cc
index 3eeda25..b1761ed 100644
--- a/re2/testing/regexp_generator.cc
+++ b/re2/testing/regexp_generator.cc
@@ -29,9 +29,11 @@
#include <string>
#include <vector>
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "absl/strings/escaping.h"
+#include "absl/strings/str_format.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "util/utf.h"
#include "re2/testing/regexp_generator.h"
@@ -47,7 +49,7 @@ const std::vector<std::string>& RegexpGenerator::EgrepOps() {
"%s?",
"%s\\C*",
};
- static std::vector<std::string> v(ops, ops + arraysize(ops));
+ static std::vector<std::string> v(ops, ops + ABSL_ARRAYSIZE(ops));
return v;
}
@@ -199,19 +201,21 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
regexps.push(post[i]);
break;
case 1: {
+ auto fmt = absl::ParsedFormat<'s'>::New(post[i]);
+ CHECK(fmt != nullptr);
std::string a = regexps.top();
regexps.pop();
- regexps.push("(?:" + StringPrintf(post[i].c_str(), a.c_str()) + ")");
+ regexps.push("(?:" + absl::StrFormat(*fmt, a) + ")");
break;
}
case 2: {
+ auto fmt = absl::ParsedFormat<'s', 's'>::New(post[i]);
+ CHECK(fmt != nullptr);
std::string b = regexps.top();
regexps.pop();
std::string a = regexps.top();
regexps.pop();
- regexps.push("(?:" +
- StringPrintf(post[i].c_str(), a.c_str(), b.c_str()) +
- ")");
+ regexps.push("(?:" + absl::StrFormat(*fmt, a, b) + ")");
break;
}
}
@@ -219,13 +223,13 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
if (regexps.size() != 1) {
// Internal error - should never happen.
- printf("Bad regexp program:\n");
+ absl::PrintF("Bad regexp program:\n");
for (size_t i = 0; i < post.size(); i++) {
- printf(" %s\n", CEscape(post[i]).c_str());
+ absl::PrintF(" %s\n", absl::CEscape(post[i]));
}
- printf("Stack after running program:\n");
+ absl::PrintF("Stack after running program:\n");
while (!regexps.empty()) {
- printf(" %s\n", CEscape(regexps.top()).c_str());
+ absl::PrintF(" %s\n", absl::CEscape(regexps.top()));
regexps.pop();
}
LOG(FATAL) << "Bad regexp program.";
@@ -238,7 +242,7 @@ void RegexpGenerator::RunPostfix(const std::vector<std::string>& post) {
}
// Split s into an vector of strings, one for each UTF-8 character.
-std::vector<std::string> Explode(const StringPiece& s) {
+std::vector<std::string> Explode(absl::string_view s) {
std::vector<std::string> v;
for (const char *q = s.data(); q < s.data() + s.size(); ) {
@@ -253,7 +257,7 @@ std::vector<std::string> Explode(const StringPiece& s) {
// Split string everywhere a substring is found, returning
// vector of pieces.
-std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
+std::vector<std::string> Split(absl::string_view sep, absl::string_view s) {
std::vector<std::string> v;
if (sep.empty())
@@ -261,7 +265,7 @@ std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s) {
const char *p = s.data();
for (const char *q = s.data(); q + sep.size() <= s.data() + s.size(); q++) {
- if (StringPiece(q, sep.size()) == sep) {
+ if (absl::string_view(q, sep.size()) == sep) {
v.push_back(std::string(p, q - p));
p = q + sep.size();
q = p - 1; // -1 for ++ in loop
diff --git a/re2/testing/regexp_generator.h b/re2/testing/regexp_generator.h
index 7d72aff..e1be1a9 100644
--- a/re2/testing/regexp_generator.h
+++ b/re2/testing/regexp_generator.h
@@ -13,8 +13,7 @@
#include <string>
#include <vector>
-#include "util/util.h"
-#include "re2/stringpiece.h"
+#include "absl/strings/string_view.h"
namespace re2 {
@@ -66,11 +65,11 @@ class RegexpGenerator {
// Helpers for preparing arguments to RegexpGenerator constructor.
// Returns one string for each character in s.
-std::vector<std::string> Explode(const StringPiece& s);
+std::vector<std::string> Explode(absl::string_view s);
// Splits string everywhere sep is found, returning
// vector of pieces.
-std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s);
+std::vector<std::string> Split(absl::string_view sep, absl::string_view s);
} // namespace re2
diff --git a/re2/testing/regexp_test.cc b/re2/testing/regexp_test.cc
index f7e7e92..ef8f59d 100644
--- a/re2/testing/regexp_test.cc
+++ b/re2/testing/regexp_test.cc
@@ -9,7 +9,7 @@
#include <string>
#include <vector>
-#include "util/test.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/regexp.h"
diff --git a/re2/testing/required_prefix_test.cc b/re2/testing/required_prefix_test.cc
index 60a11f8..231fd34 100644
--- a/re2/testing/required_prefix_test.cc
+++ b/re2/testing/required_prefix_test.cc
@@ -4,7 +4,8 @@
#include <string>
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/prog.h"
#include "re2/regexp.h"
@@ -44,7 +45,7 @@ static PrefixTest tests[] = {
};
TEST(RequiredPrefix, SimpleTests) {
- for (size_t i = 0; i < arraysize(tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(tests); i++) {
const PrefixTest& t = tests[i];
for (size_t j = 0; j < 2; j++) {
Regexp::ParseFlags flags = Regexp::LikePerl;
@@ -106,7 +107,7 @@ static PrefixTest for_accel_tests[] = {
};
TEST(RequiredPrefixForAccel, SimpleTests) {
- for (size_t i = 0; i < arraysize(for_accel_tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(for_accel_tests); i++) {
const PrefixTest& t = for_accel_tests[i];
for (size_t j = 0; j < 2; j++) {
Regexp::ParseFlags flags = Regexp::LikePerl;
@@ -171,7 +172,7 @@ static const char* prefix_accel_tests[] = {
};
TEST(PrefixAccel, SimpleTests) {
- for (size_t i = 0; i < arraysize(prefix_accel_tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(prefix_accel_tests); i++) {
const char* pattern = prefix_accel_tests[i];
Regexp* re = Regexp::Parse(pattern, Regexp::LikePerl, NULL);
ASSERT_TRUE(re != NULL);
diff --git a/re2/testing/search_test.cc b/re2/testing/search_test.cc
index 5d86dbf..166652a 100644
--- a/re2/testing/search_test.cc
+++ b/re2/testing/search_test.cc
@@ -2,7 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "gtest/gtest.h"
#include "re2/prog.h"
#include "re2/regexp.h"
#include "re2/testing/tester.h"
@@ -314,7 +315,7 @@ RegexpTest simple_tests[] = {
TEST(Regexp, SearchTests) {
int failures = 0;
- for (size_t i = 0; i < arraysize(simple_tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(simple_tests); i++) {
const RegexpTest& t = simple_tests[i];
if (!TestRegexpOnText(t.regexp, t.text))
failures++;
diff --git a/re2/testing/set_test.cc b/re2/testing/set_test.cc
index 5a760c4..fdbc0b2 100644
--- a/re2/testing/set_test.cc
+++ b/re2/testing/set_test.cc
@@ -7,7 +7,7 @@
#include <vector>
#include <utility>
-#include "util/test.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/re2.h"
#include "re2/set.h"
diff --git a/re2/testing/simplify_test.cc b/re2/testing/simplify_test.cc
index 9dcd4ac..d2c136a 100644
--- a/re2/testing/simplify_test.cc
+++ b/re2/testing/simplify_test.cc
@@ -7,7 +7,8 @@
#include <string.h>
#include <string>
-#include "util/test.h"
+#include "absl/base/macros.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/regexp.h"
@@ -245,7 +246,7 @@ static Test tests[] = {
};
TEST(TestSimplify, SimpleRegexps) {
- for (size_t i = 0; i < arraysize(tests); i++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(tests); i++) {
RegexpStatus status;
VLOG(1) << "Testing " << tests[i].regexp;
Regexp* re = Regexp::Parse(tests[i].regexp,
diff --git a/re2/testing/string_generator.cc b/re2/testing/string_generator.cc
index 44837fe..1891b14 100644
--- a/re2/testing/string_generator.cc
+++ b/re2/testing/string_generator.cc
@@ -11,7 +11,7 @@
#include <string>
#include <vector>
-#include "util/test.h"
+#include "gtest/gtest.h"
#include "util/logging.h"
#include "re2/testing/string_generator.h"
@@ -81,11 +81,11 @@ bool StringGenerator::RandomDigits() {
// currently described by digits_. Calls IncrementDigits
// after computing the string, so that it knows the answer
// for subsequent HasNext() calls.
-const StringPiece& StringGenerator::Next() {
+absl::string_view StringGenerator::Next() {
CHECK(hasnext_);
if (generate_null_) {
generate_null_ = false;
- sp_ = StringPiece();
+ sp_ = absl::string_view();
return sp_;
}
s_.clear();
diff --git a/re2/testing/string_generator.h b/re2/testing/string_generator.h
index 73fbb51..0d6f5fc 100644
--- a/re2/testing/string_generator.h
+++ b/re2/testing/string_generator.h
@@ -14,8 +14,7 @@
#include <string>
#include <vector>
-#include "util/util.h"
-#include "re2/stringpiece.h"
+#include "absl/strings/string_view.h"
namespace re2 {
@@ -24,7 +23,7 @@ class StringGenerator {
StringGenerator(int maxlen, const std::vector<std::string>& alphabet);
~StringGenerator() {}
- const StringPiece& Next();
+ absl::string_view Next();
bool HasNext() { return hasnext_; }
// Resets generator to start sequence over.
@@ -45,11 +44,11 @@ class StringGenerator {
std::vector<std::string> alphabet_; // Alphabet, one string per letter.
// Iteration state.
- StringPiece sp_; // Last StringPiece returned by Next().
- std::string s_; // String data in last StringPiece returned by Next().
+ absl::string_view sp_; // Last string_view returned by Next().
+ std::string s_; // String data in last string_view returned by Next().
bool hasnext_; // Whether Next() can be called again.
std::vector<int> digits_; // Alphabet indices for next string.
- bool generate_null_; // Whether to generate a NULL StringPiece next.
+ bool generate_null_; // Whether to generate a NULL string_view next.
bool random_; // Whether generated strings are random.
int nrandom_; // Number of random strings left to generate.
std::minstd_rand0 rng_; // Random number generator.
diff --git a/re2/testing/string_generator_test.cc b/re2/testing/string_generator_test.cc
index d0f84f4..b1273d9 100644
--- a/re2/testing/string_generator_test.cc
+++ b/re2/testing/string_generator_test.cc
@@ -7,7 +7,7 @@
#include <stdint.h>
#include <string>
-#include "util/test.h"
+#include "gtest/gtest.h"
#include "util/utf.h"
#include "re2/testing/string_generator.h"
#include "re2/testing/regexp_generator.h"
@@ -41,7 +41,7 @@ static void RunTest(int len, const std::string& alphabet, bool donull) {
if (donull) {
g.GenerateNULL();
EXPECT_TRUE(g.HasNext());
- StringPiece sp = g.Next();
+ absl::string_view sp = g.Next();
EXPECT_EQ(sp.data(), static_cast<const char*>(NULL));
EXPECT_EQ(sp.size(), 0);
}
diff --git a/re2/testing/tester.cc b/re2/testing/tester.cc
index b0c22f2..a094cb4 100644
--- a/re2/testing/tester.cc
+++ b/re2/testing/tester.cc
@@ -9,24 +9,25 @@
#include <string.h>
#include <string>
-#include "util/util.h"
-#include "util/flags.h"
+#include "absl/base/macros.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/escaping.h"
+#include "absl/strings/str_format.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "re2/testing/tester.h"
#include "re2/prog.h"
#include "re2/re2.h"
#include "re2/regexp.h"
-DEFINE_FLAG(bool, dump_prog, false, "dump regexp program");
-DEFINE_FLAG(bool, log_okay, false, "log successful runs");
-DEFINE_FLAG(bool, dump_rprog, false, "dump reversed regexp program");
+ABSL_FLAG(bool, dump_prog, false, "dump regexp program");
+ABSL_FLAG(bool, log_okay, false, "log successful runs");
+ABSL_FLAG(bool, dump_rprog, false, "dump reversed regexp program");
-DEFINE_FLAG(int, max_regexp_failures, 100,
- "maximum number of regexp test failures (-1 = unlimited)");
+ABSL_FLAG(int, max_regexp_failures, 100,
+ "maximum number of regexp test failures (-1 = unlimited)");
-DEFINE_FLAG(std::string, regexp_engines, "",
- "pattern to select regexp engines to test");
+ABSL_FLAG(std::string, regexp_engines, "",
+ "pattern to select regexp engines to test");
namespace re2 {
@@ -50,7 +51,7 @@ const char* engine_names[kEngineMax] = {
// Returns the name of the engine.
static const char* EngineName(Engine e) {
CHECK_GE(e, 0);
- CHECK_LT(e, arraysize(engine_names));
+ CHECK_LT(e, ABSL_ARRAYSIZE(engine_names));
CHECK(engine_names[e] != NULL);
return engine_names[e];
}
@@ -63,11 +64,11 @@ static uint32_t Engines() {
if (did_parse)
return cached_engines;
- if (GetFlag(FLAGS_regexp_engines).empty()) {
+ if (absl::GetFlag(FLAGS_regexp_engines).empty()) {
cached_engines = ~0;
} else {
for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
- if (GetFlag(FLAGS_regexp_engines).find(EngineName(i)) != std::string::npos)
+ if (absl::GetFlag(FLAGS_regexp_engines).find(EngineName(i)) != std::string::npos)
cached_engines |= 1<<i;
}
@@ -97,7 +98,7 @@ struct TestInstance::Result {
void ClearSubmatch() {
for (int i = 0; i < kMaxSubmatch; i++)
- submatch[i] = StringPiece();
+ submatch[i] = absl::string_view();
}
bool skipped; // test skipped: wasn't applicable
@@ -105,24 +106,24 @@ struct TestInstance::Result {
bool untrusted; // don't really trust the answer
bool have_submatch; // computed all submatch info
bool have_submatch0; // computed just submatch[0]
- StringPiece submatch[kMaxSubmatch];
+ absl::string_view submatch[kMaxSubmatch];
};
typedef TestInstance::Result Result;
// Formats a single capture range s in text in the form (a,b)
// where a and b are the starting and ending offsets of s in text.
-static std::string FormatCapture(const StringPiece& text,
- const StringPiece& s) {
+static std::string FormatCapture(absl::string_view text,
+ absl::string_view s) {
if (s.data() == NULL)
return "(?,?)";
- return StringPrintf("(%td,%td)",
- BeginPtr(s) - BeginPtr(text),
- EndPtr(s) - BeginPtr(text));
+ return absl::StrFormat("(%d,%d)",
+ BeginPtr(s) - BeginPtr(text),
+ EndPtr(s) - BeginPtr(text));
}
// Returns whether text contains non-ASCII (>= 0x80) bytes.
-static bool NonASCII(const StringPiece& text) {
+static bool NonASCII(absl::string_view text) {
for (size_t i = 0; i < text.size(); i++)
if ((uint8_t)text[i] >= 0x80)
return true;
@@ -174,15 +175,15 @@ static ParseMode parse_modes[] = {
};
static std::string FormatMode(Regexp::ParseFlags flags) {
- for (size_t i = 0; i < arraysize(parse_modes); i++)
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(parse_modes); i++)
if (parse_modes[i].parse_flags == flags)
return parse_modes[i].desc;
- return StringPrintf("%#x", static_cast<uint32_t>(flags));
+ return absl::StrFormat("%#x", static_cast<uint32_t>(flags));
}
// Constructs and saves all the matching engines that
// will be required for the given tests.
-TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
+TestInstance::TestInstance(absl::string_view regexp_str, Prog::MatchKind kind,
Regexp::ParseFlags flags)
: regexp_str_(regexp_str),
kind_(kind),
@@ -195,14 +196,14 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
re_(NULL),
re2_(NULL) {
- VLOG(1) << CEscape(regexp_str);
+ VLOG(1) << absl::CEscape(regexp_str);
// Compile regexp to prog.
// Always required - needed for backtracking (reference implementation).
RegexpStatus status;
regexp_ = Regexp::Parse(regexp_str, flags, &status);
if (regexp_ == NULL) {
- LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
+ LOG(INFO) << "Cannot parse: " << absl::CEscape(regexp_str_)
<< " mode: " << FormatMode(flags);
error_ = true;
return;
@@ -210,14 +211,14 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
num_captures_ = regexp_->NumCaptures();
prog_ = regexp_->CompileToProg(0);
if (prog_ == NULL) {
- LOG(INFO) << "Cannot compile: " << CEscape(regexp_str_);
+ LOG(INFO) << "Cannot compile: " << absl::CEscape(regexp_str_);
error_ = true;
return;
}
- if (GetFlag(FLAGS_dump_prog)) {
+ if (absl::GetFlag(FLAGS_dump_prog)) {
LOG(INFO) << "Prog for "
<< " regexp "
- << CEscape(regexp_str_)
+ << absl::CEscape(regexp_str_)
<< " (" << FormatKind(kind_)
<< ", " << FormatMode(flags_)
<< ")\n"
@@ -228,11 +229,11 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
if (Engines() & ((1<<kEngineDFA)|(1<<kEngineDFA1))) {
rprog_ = regexp_->CompileToReverseProg(0);
if (rprog_ == NULL) {
- LOG(INFO) << "Cannot reverse compile: " << CEscape(regexp_str_);
+ LOG(INFO) << "Cannot reverse compile: " << absl::CEscape(regexp_str_);
error_ = true;
return;
}
- if (GetFlag(FLAGS_dump_rprog))
+ if (absl::GetFlag(FLAGS_dump_rprog))
LOG(INFO) << rprog_->Dump();
}
@@ -256,7 +257,7 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
options.set_longest_match(true);
re2_ = new RE2(re, options);
if (!re2_->error().empty()) {
- LOG(INFO) << "Cannot RE2: " << CEscape(re);
+ LOG(INFO) << "Cannot RE2: " << absl::CEscape(re);
error_ = true;
return;
}
@@ -282,7 +283,7 @@ TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
// add one more layer of parens.
re_ = new PCRE("("+re+")", o);
if (!re_->error().empty()) {
- LOG(INFO) << "Cannot PCRE: " << CEscape(re);
+ LOG(INFO) << "Cannot PCRE: " << absl::CEscape(re);
error_ = true;
return;
}
@@ -301,11 +302,9 @@ TestInstance::~TestInstance() {
// Runs a single search using the named engine type.
// This interface hides all the irregularities of the various
// engine interfaces from the rest of this file.
-void TestInstance::RunSearch(Engine type,
- const StringPiece& orig_text,
- const StringPiece& orig_context,
- Prog::Anchor anchor,
- Result* result) {
+void TestInstance::RunSearch(Engine type, absl::string_view orig_text,
+ absl::string_view orig_context,
+ Prog::Anchor anchor, Result* result) {
if (regexp_ == NULL) {
result->skipped = true;
return;
@@ -314,8 +313,8 @@ void TestInstance::RunSearch(Engine type,
if (nsubmatch > kMaxSubmatch)
nsubmatch = kMaxSubmatch;
- StringPiece text = orig_text;
- StringPiece context = orig_context;
+ absl::string_view text = orig_text;
+ absl::string_view context = orig_context;
switch (type) {
default:
@@ -368,8 +367,8 @@ void TestInstance::RunSearch(Engine type,
result->submatch,
&result->skipped, NULL)) {
LOG(ERROR) << "Reverse DFA inconsistency: "
- << CEscape(regexp_str_)
- << " on " << CEscape(text);
+ << absl::CEscape(regexp_str_)
+ << " on " << absl::CEscape(text);
result->matched = false;
}
}
@@ -438,19 +437,19 @@ void TestInstance::RunSearch(Engine type,
// whitespace, not just vertical tab. Regexp::MimicsPCRE() is
// unable to handle all cases of this, unfortunately, so just
// catch them here. :(
- if (regexp_str_.find("\\v") != StringPiece::npos &&
- (text.find('\n') != StringPiece::npos ||
- text.find('\f') != StringPiece::npos ||
- text.find('\r') != StringPiece::npos)) {
+ if (regexp_str_.find("\\v") != absl::string_view::npos &&
+ (text.find('\n') != absl::string_view::npos ||
+ text.find('\f') != absl::string_view::npos ||
+ text.find('\r') != absl::string_view::npos)) {
result->skipped = true;
break;
}
// PCRE 8.34 or so started allowing vertical tab to match \s,
// following a change made in Perl 5.18. RE2 does not.
- if ((regexp_str_.find("\\s") != StringPiece::npos ||
- regexp_str_.find("\\S") != StringPiece::npos) &&
- text.find('\v') != StringPiece::npos) {
+ if ((regexp_str_.find("\\s") != absl::string_view::npos ||
+ regexp_str_.find("\\S") != absl::string_view::npos) &&
+ text.find('\v') != absl::string_view::npos) {
result->skipped = true;
break;
}
@@ -513,7 +512,7 @@ static bool ResultOkay(const Result& r, const Result& correct) {
}
// Runs a single test.
-bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
+bool TestInstance::RunCase(absl::string_view text, absl::string_view context,
Prog::Anchor anchor) {
// Backtracking is the gold standard.
Result correct;
@@ -521,12 +520,12 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
if (correct.skipped) {
if (regexp_ == NULL)
return true;
- LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_)
+ LOG(ERROR) << "Skipped backtracking! " << absl::CEscape(regexp_str_)
<< " " << FormatMode(flags_);
return false;
}
- VLOG(1) << "Try: regexp " << CEscape(regexp_str_)
- << " text " << CEscape(text)
+ VLOG(1) << "Try: regexp " << absl::CEscape(regexp_str_)
+ << " text " << absl::CEscape(text)
<< " (" << FormatKind(kind_)
<< ", " << FormatAnchor(anchor)
<< ", " << FormatMode(flags_)
@@ -541,7 +540,7 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
Result r;
RunSearch(i, text, context, anchor, &r);
if (ResultOkay(r, correct)) {
- if (GetFlag(FLAGS_log_okay))
+ if (absl::GetFlag(FLAGS_log_okay))
LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);
continue;
}
@@ -571,14 +570,14 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
if (r.submatch[i].data() != correct.submatch[i].data() ||
r.submatch[i].size() != correct.submatch[i].size()) {
LOG(INFO) <<
- StringPrintf(" $%d: should be %s is %s",
- i,
- FormatCapture(text, correct.submatch[i]).c_str(),
- FormatCapture(text, r.submatch[i]).c_str());
+ absl::StrFormat(" $%d: should be %s is %s",
+ i,
+ FormatCapture(text, correct.submatch[i]),
+ FormatCapture(text, r.submatch[i]));
} else {
LOG(INFO) <<
- StringPrintf(" $%d: %s ok", i,
- FormatCapture(text, r.submatch[i]).c_str());
+ absl::StrFormat(" $%d: %s ok", i,
+ FormatCapture(text, r.submatch[i]));
}
}
}
@@ -586,7 +585,7 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
if (!all_okay) {
// This will be initialised once (after flags have been initialised)
// and that is desirable because we want to enforce a global limit.
- static int max_regexp_failures = GetFlag(FLAGS_max_regexp_failures);
+ static int max_regexp_failures = absl::GetFlag(FLAGS_max_regexp_failures);
if (max_regexp_failures > 0 && --max_regexp_failures == 0)
LOG(QFATAL) << "Too many regexp failures.";
}
@@ -595,22 +594,22 @@ bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
}
void TestInstance::LogMatch(const char* prefix, Engine e,
- const StringPiece& text, const StringPiece& context,
+ absl::string_view text, absl::string_view context,
Prog::Anchor anchor) {
LOG(INFO) << prefix
<< EngineName(e)
<< " regexp "
- << CEscape(regexp_str_)
+ << absl::CEscape(regexp_str_)
<< " "
- << CEscape(regexp_->ToString())
+ << absl::CEscape(regexp_->ToString())
<< " text "
- << CEscape(text)
+ << absl::CEscape(text)
<< " ("
<< BeginPtr(text) - BeginPtr(context)
<< ","
<< EndPtr(text) - BeginPtr(context)
<< ") of context "
- << CEscape(context)
+ << absl::CEscape(context)
<< " (" << FormatKind(kind_)
<< ", " << FormatAnchor(anchor)
<< ", " << FormatMode(flags_)
@@ -624,10 +623,10 @@ static Prog::MatchKind kinds[] = {
};
// Test all possible match kinds and parse modes.
-Tester::Tester(const StringPiece& regexp) {
+Tester::Tester(absl::string_view regexp) {
error_ = false;
- for (size_t i = 0; i < arraysize(kinds); i++) {
- for (size_t j = 0; j < arraysize(parse_modes); j++) {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(kinds); i++) {
+ for (size_t j = 0; j < ABSL_ARRAYSIZE(parse_modes); j++) {
TestInstance* t = new TestInstance(regexp, kinds[i],
parse_modes[j].parse_flags);
error_ |= t->error();
@@ -641,8 +640,8 @@ Tester::~Tester() {
delete v_[i];
}
-bool Tester::TestCase(const StringPiece& text, const StringPiece& context,
- Prog::Anchor anchor) {
+bool Tester::TestCase(absl::string_view text, absl::string_view context,
+ Prog::Anchor anchor) {
bool okay = true;
for (size_t i = 0; i < v_.size(); i++)
okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor));
@@ -654,10 +653,10 @@ static Prog::Anchor anchors[] = {
Prog::kUnanchored
};
-bool Tester::TestInput(const StringPiece& text) {
+bool Tester::TestInput(absl::string_view text) {
bool okay = TestInputInContext(text, text);
if (!text.empty()) {
- StringPiece sp;
+ absl::string_view sp;
sp = text;
sp.remove_prefix(1);
okay &= TestInputInContext(sp, text);
@@ -668,16 +667,16 @@ bool Tester::TestInput(const StringPiece& text) {
return okay;
}
-bool Tester::TestInputInContext(const StringPiece& text,
- const StringPiece& context) {
+bool Tester::TestInputInContext(absl::string_view text,
+ absl::string_view context) {
bool okay = true;
- for (size_t i = 0; i < arraysize(anchors); i++)
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(anchors); i++)
okay &= TestCase(text, context, anchors[i]);
return okay;
}
-bool TestRegexpOnText(const StringPiece& regexp,
- const StringPiece& text) {
+bool TestRegexpOnText(absl::string_view regexp,
+ absl::string_view text) {
Tester t(regexp);
return t.TestInput(text);
}
diff --git a/re2/testing/tester.h b/re2/testing/tester.h
index 47d0c43..59be5ea 100644
--- a/re2/testing/tester.h
+++ b/re2/testing/tester.h
@@ -10,7 +10,7 @@
#include <vector>
-#include "re2/stringpiece.h"
+#include "absl/strings/string_view.h"
#include "re2/prog.h"
#include "re2/regexp.h"
#include "re2/re2.h"
@@ -51,7 +51,7 @@ class TestInstance {
public:
struct Result;
- TestInstance(const StringPiece& regexp, Prog::MatchKind kind,
+ TestInstance(absl::string_view regexp, Prog::MatchKind kind,
Regexp::ParseFlags flags);
~TestInstance();
Regexp::ParseFlags flags() { return flags_; }
@@ -59,20 +59,18 @@ class TestInstance {
// Runs a single test case: search in text, which is in context,
// using the given anchoring.
- bool RunCase(const StringPiece& text, const StringPiece& context,
+ bool RunCase(absl::string_view text, absl::string_view context,
Prog::Anchor anchor);
private:
// Runs a single search using the named engine type.
- void RunSearch(Engine type,
- const StringPiece& text, const StringPiece& context,
- Prog::Anchor anchor,
- Result *result);
+ void RunSearch(Engine type, absl::string_view text, absl::string_view context,
+ Prog::Anchor anchor, Result* result);
- void LogMatch(const char* prefix, Engine e, const StringPiece& text,
- const StringPiece& context, Prog::Anchor anchor);
+ void LogMatch(const char* prefix, Engine e, absl::string_view text,
+ absl::string_view context, Prog::Anchor anchor);
- const StringPiece regexp_str_; // regexp being tested
+ absl::string_view regexp_str_; // regexp being tested
Prog::MatchKind kind_; // kind of match
Regexp::ParseFlags flags_; // flags for parsing regexp_str_
bool error_; // error during constructor?
@@ -91,21 +89,21 @@ class TestInstance {
// A group of TestInstances for all possible configurations.
class Tester {
public:
- explicit Tester(const StringPiece& regexp);
+ explicit Tester(absl::string_view regexp);
~Tester();
bool error() { return error_; }
// Runs a single test case: search in text, which is in context,
// using the given anchoring.
- bool TestCase(const StringPiece& text, const StringPiece& context,
+ bool TestCase(absl::string_view text, absl::string_view context,
Prog::Anchor anchor);
// Run TestCase(text, text, anchor) for all anchoring modes.
- bool TestInput(const StringPiece& text);
+ bool TestInput(absl::string_view text);
// Run TestCase(text, context, anchor) for all anchoring modes.
- bool TestInputInContext(const StringPiece& text, const StringPiece& context);
+ bool TestInputInContext(absl::string_view text, absl::string_view context);
private:
bool error_;
@@ -116,7 +114,7 @@ class Tester {
};
// Run all possible tests using regexp and text.
-bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text);
+bool TestRegexpOnText(absl::string_view regexp, absl::string_view text);
} // namespace re2
diff --git a/re2/tostring.cc b/re2/tostring.cc
index 9c1c038..33179fd 100644
--- a/re2/tostring.cc
+++ b/re2/tostring.cc
@@ -8,9 +8,8 @@
#include <string.h>
#include <string>
-#include "util/util.h"
+#include "absl/strings/str_format.h"
#include "util/logging.h"
-#include "util/strutil.h"
#include "util/utf.h"
#include "re2/regexp.h"
#include "re2/walker-inl.h"
@@ -216,11 +215,11 @@ int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
case kRegexpRepeat:
if (re->max() == -1)
- t_->append(StringPrintf("{%d,}", re->min()));
+ t_->append(absl::StrFormat("{%d,}", re->min()));
else if (re->min() == re->max())
- t_->append(StringPrintf("{%d}", re->min()));
+ t_->append(absl::StrFormat("{%d}", re->min()));
else
- t_->append(StringPrintf("{%d,%d}", re->min(), re->max()));
+ t_->append(absl::StrFormat("{%d,%d}", re->min(), re->max()));
if (re->parse_flags() & Regexp::NonGreedy)
t_->append("?");
if (prec < PrecUnary)
@@ -291,7 +290,7 @@ int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
// There's no syntax accepted by the parser to generate
// this node (it is generated by RE2::Set) so make something
// up that is readable but won't compile.
- t_->append(StringPrintf("(?HaveMatch:%d)", re->match_id()));
+ t_->append(absl::StrFormat("(?HaveMatch:%d)", re->match_id()));
break;
}
@@ -332,10 +331,10 @@ static void AppendCCChar(std::string* t, Rune r) {
}
if (r < 0x100) {
- *t += StringPrintf("\\x%02x", static_cast<int>(r));
+ *t += absl::StrFormat("\\x%02x", static_cast<int>(r));
return;
}
- *t += StringPrintf("\\x{%x}", static_cast<int>(r));
+ *t += absl::StrFormat("\\x{%x}", static_cast<int>(r));
}
static void AppendCCRange(std::string* t, Rune lo, Rune hi) {
diff --git a/re2/unicode_casefold.h b/re2/unicode_casefold.h
index 8bdbb42..4acad68 100644
--- a/re2/unicode_casefold.h
+++ b/re2/unicode_casefold.h
@@ -41,7 +41,6 @@
#include <stdint.h>
-#include "util/util.h"
#include "util/utf.h"
namespace re2 {
diff --git a/re2/unicode_groups.h b/re2/unicode_groups.h
index 75f55da..6dc6532 100644
--- a/re2/unicode_groups.h
+++ b/re2/unicode_groups.h
@@ -20,7 +20,6 @@
#include <stdint.h>
-#include "util/util.h"
#include "util/utf.h"
namespace re2 {
diff --git a/re2/walker-inl.h b/re2/walker-inl.h
index 4d064a0..45763a7 100644
--- a/re2/walker-inl.h
+++ b/re2/walker-inl.h
@@ -15,6 +15,7 @@
#include <stack>
+#include "absl/base/macros.h"
#include "util/logging.h"
#include "re2/regexp.h"
@@ -190,7 +191,7 @@ template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
s->child_args = &s->child_arg;
else if (re->nsub_ > 1)
s->child_args = new T[re->nsub_];
- FALLTHROUGH_INTENDED;
+ ABSL_FALLTHROUGH_INTENDED;
}
default: {
if (re->nsub_ > 0) {
diff --git a/re2Config.cmake.in b/re2Config.cmake.in
index 1ea3ff0..6a177c6 100644
--- a/re2Config.cmake.in
+++ b/re2Config.cmake.in
@@ -13,6 +13,8 @@ if(UNIX)
find_dependency(Threads REQUIRED)
endif()
+find_dependency(absl REQUIRED)
+
if(@RE2_USE_ICU@)
find_dependency(ICU REQUIRED COMPONENTS uc)
endif()
diff --git a/util/benchmark.cc b/util/benchmark.cc
deleted file mode 100644
index e39c334..0000000
--- a/util/benchmark.cc
+++ /dev/null
@@ -1,131 +0,0 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <algorithm>
-#include <chrono>
-
-#include "util/benchmark.h"
-#include "util/flags.h"
-#include "re2/re2.h"
-
-#ifdef _WIN32
-#define snprintf _snprintf
-#endif
-
-using ::testing::Benchmark;
-
-static Benchmark* benchmarks[10000];
-static int nbenchmarks;
-
-void Benchmark::Register() {
- lo_ = std::max(1, lo_);
- hi_ = std::max(lo_, hi_);
- benchmarks[nbenchmarks++] = this;
-}
-
-static int64_t nsec() {
- return std::chrono::duration_cast<std::chrono::nanoseconds>(
- std::chrono::steady_clock::now().time_since_epoch())
- .count();
-}
-
-static int64_t t0;
-static int64_t ns;
-static int64_t bytes;
-static int64_t items;
-
-void StartBenchmarkTiming() {
- if (t0 == 0) {
- t0 = nsec();
- }
-}
-
-void StopBenchmarkTiming() {
- if (t0 != 0) {
- ns += nsec() - t0;
- t0 = 0;
- }
-}
-
-void SetBenchmarkBytesProcessed(int64_t b) { bytes = b; }
-
-void SetBenchmarkItemsProcessed(int64_t i) { items = i; }
-
-static void RunFunc(Benchmark* b, int iters, int arg) {
- t0 = nsec();
- ns = 0;
- bytes = 0;
- items = 0;
- b->func()(iters, arg);
- StopBenchmarkTiming();
-}
-
-static int round(int n) {
- int base = 1;
- while (base * 10 < n) base *= 10;
- if (n < 2 * base) return 2 * base;
- if (n < 5 * base) return 5 * base;
- return 10 * base;
-}
-
-static void RunBench(Benchmark* b, int arg) {
- int iters, last;
-
- // Run once just in case it's expensive.
- iters = 1;
- RunFunc(b, iters, arg);
- while (ns < (int)1e9 && iters < (int)1e9) {
- last = iters;
- if (ns / iters == 0) {
- iters = (int)1e9;
- } else {
- iters = (int)1e9 / static_cast<int>(ns / iters);
- }
- iters = std::max(last + 1, std::min(iters + iters / 2, 100 * last));
- iters = round(iters);
- RunFunc(b, iters, arg);
- }
-
- char mb[100];
- char suf[100];
- mb[0] = '\0';
- suf[0] = '\0';
- if (ns > 0 && bytes > 0)
- snprintf(mb, sizeof mb, "\t%7.2f MB/s",
- ((double)bytes / 1e6) / ((double)ns / 1e9));
- if (b->has_arg()) {
- if (arg >= (1 << 20)) {
- snprintf(suf, sizeof suf, "/%dM", arg / (1 << 20));
- } else if (arg >= (1 << 10)) {
- snprintf(suf, sizeof suf, "/%dK", arg / (1 << 10));
- } else {
- snprintf(suf, sizeof suf, "/%d", arg);
- }
- }
- printf("%s%s\t%8d\t%10lld ns/op%s\n", b->name(), suf, iters,
- (long long)ns / iters, mb);
- fflush(stdout);
-}
-
-static bool WantBench(const char* name, int argc, const char** argv) {
- if (argc == 1) return true;
- for (int i = 1; i < argc; i++) {
- if (RE2::PartialMatch(name, argv[i]))
- return true;
- }
- return false;
-}
-
-int main(int argc, const char** argv) {
- for (int i = 0; i < nbenchmarks; i++) {
- Benchmark* b = benchmarks[i];
- if (!WantBench(b->name(), argc, argv))
- continue;
- for (int arg = b->lo(); arg <= b->hi(); arg <<= 1)
- RunBench(b, arg);
- }
-}
diff --git a/util/benchmark.h b/util/benchmark.h
deleted file mode 100644
index d97b49e..0000000
--- a/util/benchmark.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_BENCHMARK_H_
-#define UTIL_BENCHMARK_H_
-
-#include <stdint.h>
-#include <functional>
-
-#include "util/logging.h"
-#include "util/util.h"
-
-// Globals for the old benchmark API.
-void StartBenchmarkTiming();
-void StopBenchmarkTiming();
-void SetBenchmarkBytesProcessed(int64_t b);
-void SetBenchmarkItemsProcessed(int64_t i);
-
-namespace benchmark {
-
-// The new benchmark API implemented as a layer over the old benchmark API.
-// (Please refer to https://github.com/google/benchmark for documentation.)
-class State {
- private:
- class Iterator {
- public:
- // Benchmark code looks like this:
- //
- // for (auto _ : state) {
- // // ...
- // }
- //
- // We try to avoid compiler warnings about such variables being unused.
- struct ATTRIBUTE_UNUSED Value {};
-
- explicit Iterator(int64_t iters) : iters_(iters) {}
-
- bool operator!=(const Iterator& that) const {
- if (iters_ != that.iters_) {
- return true;
- } else {
- // We are about to stop the loop, so stop timing.
- StopBenchmarkTiming();
- return false;
- }
- }
-
- Value operator*() const {
- return Value();
- }
-
- Iterator& operator++() {
- --iters_;
- return *this;
- }
-
- private:
- int64_t iters_;
- };
-
- public:
- explicit State(int64_t iters)
- : iters_(iters), arg_(0), has_arg_(false) {}
-
- State(int64_t iters, int64_t arg)
- : iters_(iters), arg_(arg), has_arg_(true) {}
-
- Iterator begin() {
- // We are about to start the loop, so start timing.
- StartBenchmarkTiming();
- return Iterator(iters_);
- }
-
- Iterator end() {
- return Iterator(0);
- }
-
- void SetBytesProcessed(int64_t b) { SetBenchmarkBytesProcessed(b); }
- void SetItemsProcessed(int64_t i) { SetBenchmarkItemsProcessed(i); }
- int64_t iterations() const { return iters_; }
- // Pretend to support multiple arguments.
- int64_t range(int pos) const { CHECK(has_arg_); return arg_; }
-
- private:
- int64_t iters_;
- int64_t arg_;
- bool has_arg_;
-
- State(const State&) = delete;
- State& operator=(const State&) = delete;
-};
-
-} // namespace benchmark
-
-namespace testing {
-
-class Benchmark {
- public:
- Benchmark(const char* name, void (*func)(benchmark::State&))
- : name_(name),
- func_([func](int iters, int arg) {
- benchmark::State state(iters);
- func(state);
- }),
- lo_(0),
- hi_(0),
- has_arg_(false) {
- Register();
- }
-
- Benchmark(const char* name, void (*func)(benchmark::State&), int lo, int hi)
- : name_(name),
- func_([func](int iters, int arg) {
- benchmark::State state(iters, arg);
- func(state);
- }),
- lo_(lo),
- hi_(hi),
- has_arg_(true) {
- Register();
- }
-
- // Pretend to support multiple threads.
- Benchmark* ThreadRange(int lo, int hi) { return this; }
-
- const char* name() const { return name_; }
- const std::function<void(int, int)>& func() const { return func_; }
- int lo() const { return lo_; }
- int hi() const { return hi_; }
- bool has_arg() const { return has_arg_; }
-
- private:
- void Register();
-
- const char* name_;
- std::function<void(int, int)> func_;
- int lo_;
- int hi_;
- bool has_arg_;
-
- Benchmark(const Benchmark&) = delete;
- Benchmark& operator=(const Benchmark&) = delete;
-};
-
-} // namespace testing
-
-#define BENCHMARK(f) \
- ::testing::Benchmark* _benchmark_##f = \
- (new ::testing::Benchmark(#f, f))
-
-#define BENCHMARK_RANGE(f, lo, hi) \
- ::testing::Benchmark* _benchmark_##f = \
- (new ::testing::Benchmark(#f, f, lo, hi))
-
-#endif // UTIL_BENCHMARK_H_
diff --git a/util/flags.h b/util/flags.h
deleted file mode 100644
index 3386b72..0000000
--- a/util/flags.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_FLAGS_H_
-#define UTIL_FLAGS_H_
-
-// Simplified version of Google's command line flags.
-// Does not support parsing the command line.
-// If you want to do that, see
-// https://gflags.github.io/gflags/
-
-#define DEFINE_FLAG(type, name, deflt, desc) \
- namespace re2 { type FLAGS_##name = deflt; }
-
-#define DECLARE_FLAG(type, name) \
- namespace re2 { extern type FLAGS_##name; }
-
-namespace re2 {
-template <typename T>
-T GetFlag(const T& flag) {
- return flag;
-}
-} // namespace re2
-
-#endif // UTIL_FLAGS_H_
diff --git a/util/logging.h b/util/logging.h
index 5b2217f..946962b 100644
--- a/util/logging.h
+++ b/util/logging.h
@@ -13,7 +13,7 @@
#include <ostream>
#include <sstream>
-#include "util/util.h"
+#include "absl/base/attributes.h"
// Debug-only checking.
#define DCHECK(condition) assert(condition)
@@ -93,7 +93,7 @@ class LogMessageFatal : public LogMessage {
public:
LogMessageFatal(const char* file, int line)
: LogMessage(file, line) {}
- ATTRIBUTE_NORETURN ~LogMessageFatal() {
+ ABSL_ATTRIBUTE_NORETURN ~LogMessageFatal() {
Flush();
abort();
}
diff --git a/util/mix.h b/util/mix.h
deleted file mode 100644
index d85c172..0000000
--- a/util/mix.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2016 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_MIX_H_
-#define UTIL_MIX_H_
-
-#include <stddef.h>
-#include <limits>
-
-namespace re2 {
-
-// Silence "truncation of constant value" warning for kMul in 32-bit mode.
-// Since this is a header file, push and then pop to limit the scope.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable: 4309)
-#endif
-
-class HashMix {
- public:
- HashMix() : hash_(1) {}
- explicit HashMix(size_t val) : hash_(val + 83) {}
- void Mix(size_t val) {
- static const size_t kMul = static_cast<size_t>(0xdc3eb94af8ab4c93ULL);
- hash_ *= kMul;
- hash_ = ((hash_ << 19) |
- (hash_ >> (std::numeric_limits<size_t>::digits - 19))) + val;
- }
- size_t get() const { return hash_; }
- private:
- size_t hash_;
-};
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-} // namespace re2
-
-#endif // UTIL_MIX_H_
diff --git a/util/mutex.h b/util/mutex.h
deleted file mode 100644
index 57c5732..0000000
--- a/util/mutex.h
+++ /dev/null
@@ -1,164 +0,0 @@
-// Copyright 2007 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_MUTEX_H_
-#define UTIL_MUTEX_H_
-
-/*
- * A simple mutex wrapper, supporting locks and read-write locks.
- * You should assume the locks are *not* re-entrant.
- */
-
-#ifdef RE2_NO_THREADS
-#include <assert.h>
-#define MUTEX_IS_LOCK_COUNTER
-#else
-#ifdef _WIN32
-// Requires Windows Vista or Windows Server 2008 at minimum.
-#include <windows.h>
-#if defined(WINVER) && WINVER >= 0x0600
-#define MUTEX_IS_WIN32_SRWLOCK
-#endif
-#else
-#ifndef _POSIX_C_SOURCE
-#define _POSIX_C_SOURCE 200809L
-#endif
-#include <unistd.h>
-#if defined(_POSIX_READER_WRITER_LOCKS) && _POSIX_READER_WRITER_LOCKS > 0
-#define MUTEX_IS_PTHREAD_RWLOCK
-#endif
-#endif
-#endif
-
-#if defined(MUTEX_IS_LOCK_COUNTER)
-typedef int MutexType;
-#elif defined(MUTEX_IS_WIN32_SRWLOCK)
-typedef SRWLOCK MutexType;
-#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
-#include <pthread.h>
-#include <stdlib.h>
-typedef pthread_rwlock_t MutexType;
-#else
-#include <shared_mutex>
-typedef std::shared_mutex MutexType;
-#endif
-
-namespace re2 {
-
-class Mutex {
- public:
- inline Mutex();
- inline ~Mutex();
- inline void Lock(); // Block if needed until free then acquire exclusively
- inline void Unlock(); // Release a lock acquired via Lock()
- // Note that on systems that don't support read-write locks, these may
- // be implemented as synonyms to Lock() and Unlock(). So you can use
- // these for efficiency, but don't use them anyplace where being able
- // to do shared reads is necessary to avoid deadlock.
- inline void ReaderLock(); // Block until free or shared then acquire a share
- inline void ReaderUnlock(); // Release a read share of this Mutex
- inline void WriterLock() { Lock(); } // Acquire an exclusive lock
- inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock()
-
- private:
- MutexType mutex_;
-
- // Catch the error of writing Mutex when intending MutexLock.
- Mutex(Mutex *ignored);
-
- Mutex(const Mutex&) = delete;
- Mutex& operator=(const Mutex&) = delete;
-};
-
-#if defined(MUTEX_IS_LOCK_COUNTER)
-
-Mutex::Mutex() : mutex_(0) { }
-Mutex::~Mutex() { assert(mutex_ == 0); }
-void Mutex::Lock() { assert(--mutex_ == -1); }
-void Mutex::Unlock() { assert(mutex_++ == -1); }
-void Mutex::ReaderLock() { assert(++mutex_ > 0); }
-void Mutex::ReaderUnlock() { assert(mutex_-- > 0); }
-
-#elif defined(MUTEX_IS_WIN32_SRWLOCK)
-
-Mutex::Mutex() : mutex_(SRWLOCK_INIT) { }
-Mutex::~Mutex() { }
-void Mutex::Lock() { AcquireSRWLockExclusive(&mutex_); }
-void Mutex::Unlock() { ReleaseSRWLockExclusive(&mutex_); }
-void Mutex::ReaderLock() { AcquireSRWLockShared(&mutex_); }
-void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); }
-
-#elif defined(MUTEX_IS_PTHREAD_RWLOCK)
-
-#define SAFE_PTHREAD(fncall) \
- do { \
- if ((fncall) != 0) abort(); \
- } while (0)
-
-Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); }
-Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); }
-void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock(&mutex_)); }
-void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
-void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock(&mutex_)); }
-void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock(&mutex_)); }
-
-#undef SAFE_PTHREAD
-
-#else
-
-Mutex::Mutex() { }
-Mutex::~Mutex() { }
-void Mutex::Lock() { mutex_.lock(); }
-void Mutex::Unlock() { mutex_.unlock(); }
-void Mutex::ReaderLock() { mutex_.lock_shared(); }
-void Mutex::ReaderUnlock() { mutex_.unlock_shared(); }
-
-#endif
-
-// --------------------------------------------------------------------------
-// Some helper classes
-
-// MutexLock(mu) acquires mu when constructed and releases it when destroyed.
-class MutexLock {
- public:
- explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); }
- ~MutexLock() { mu_->Unlock(); }
- private:
- Mutex * const mu_;
-
- MutexLock(const MutexLock&) = delete;
- MutexLock& operator=(const MutexLock&) = delete;
-};
-
-// ReaderMutexLock and WriterMutexLock do the same, for rwlocks
-class ReaderMutexLock {
- public:
- explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); }
- ~ReaderMutexLock() { mu_->ReaderUnlock(); }
- private:
- Mutex * const mu_;
-
- ReaderMutexLock(const ReaderMutexLock&) = delete;
- ReaderMutexLock& operator=(const ReaderMutexLock&) = delete;
-};
-
-class WriterMutexLock {
- public:
- explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); }
- ~WriterMutexLock() { mu_->WriterUnlock(); }
- private:
- Mutex * const mu_;
-
- WriterMutexLock(const WriterMutexLock&) = delete;
- WriterMutexLock& operator=(const WriterMutexLock&) = delete;
-};
-
-// Catch bug where variable name is omitted, e.g. MutexLock (&mu);
-#define MutexLock(x) static_assert(false, "MutexLock declaration missing variable name")
-#define ReaderMutexLock(x) static_assert(false, "ReaderMutexLock declaration missing variable name")
-#define WriterMutexLock(x) static_assert(false, "WriterMutexLock declaration missing variable name")
-
-} // namespace re2
-
-#endif // UTIL_MUTEX_H_
diff --git a/util/pcre.cc b/util/pcre.cc
index b689851..82b4f59 100644
--- a/util/pcre.cc
+++ b/util/pcre.cc
@@ -15,11 +15,10 @@
#include <string>
#include <utility>
-#include "util/util.h"
-#include "util/flags.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/str_format.h"
#include "util/logging.h"
#include "util/pcre.h"
-#include "util/strutil.h"
// Silence warnings about the wacky formatting in the operator() functions.
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
@@ -33,10 +32,10 @@
// not exceed main thread stacks. Note that other threads
// often have smaller stacks, and therefore tightening
// regexp_stack_limit may frequently be necessary.
-DEFINE_FLAG(int, regexp_stack_limit, 256 << 10,
- "default PCRE stack limit (bytes)");
-DEFINE_FLAG(int, regexp_match_limit, 1000000,
- "default PCRE match limit (function calls)");
+ABSL_FLAG(int, regexp_stack_limit, 256 << 10,
+ "default PCRE stack limit (bytes)");
+ABSL_FLAG(int, regexp_match_limit, 1000000,
+ "default PCRE match limit (function calls)");
#ifndef USEPCRE
@@ -191,24 +190,11 @@ pcre* PCRE::Compile(Anchor anchor) {
/***** Convenience interfaces *****/
-bool PCRE::FullMatchFunctor::operator ()(const StringPiece& text,
- const PCRE& re,
- const Arg& a0,
- const Arg& a1,
- const Arg& a2,
- const Arg& a3,
- const Arg& a4,
- const Arg& a5,
- const Arg& a6,
- const Arg& a7,
- const Arg& a8,
- const Arg& a9,
- const Arg& a10,
- const Arg& a11,
- const Arg& a12,
- const Arg& a13,
- const Arg& a14,
- const Arg& a15) const {
+bool PCRE::FullMatchFunctor::operator()(
+ absl::string_view text, const PCRE& re, const Arg& a0, const Arg& a1,
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, const Arg& a6,
+ const Arg& a7, const Arg& a8, const Arg& a9, const Arg& a10, const Arg& a11,
+ const Arg& a12, const Arg& a13, const Arg& a14, const Arg& a15) const {
const Arg* args[kMaxArgs];
int n = 0;
if (&a0 == &no_more_args) goto done; args[n++] = &a0;
@@ -234,24 +220,11 @@ done:
return re.DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
}
-bool PCRE::PartialMatchFunctor::operator ()(const StringPiece& text,
- const PCRE& re,
- const Arg& a0,
- const Arg& a1,
- const Arg& a2,
- const Arg& a3,
- const Arg& a4,
- const Arg& a5,
- const Arg& a6,
- const Arg& a7,
- const Arg& a8,
- const Arg& a9,
- const Arg& a10,
- const Arg& a11,
- const Arg& a12,
- const Arg& a13,
- const Arg& a14,
- const Arg& a15) const {
+bool PCRE::PartialMatchFunctor::operator()(
+ absl::string_view text, const PCRE& re, const Arg& a0, const Arg& a1,
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, const Arg& a6,
+ const Arg& a7, const Arg& a8, const Arg& a9, const Arg& a10, const Arg& a11,
+ const Arg& a12, const Arg& a13, const Arg& a14, const Arg& a15) const {
const Arg* args[kMaxArgs];
int n = 0;
if (&a0 == &no_more_args) goto done; args[n++] = &a0;
@@ -277,24 +250,11 @@ done:
return re.DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
}
-bool PCRE::ConsumeFunctor::operator ()(StringPiece* input,
- const PCRE& pattern,
- const Arg& a0,
- const Arg& a1,
- const Arg& a2,
- const Arg& a3,
- const Arg& a4,
- const Arg& a5,
- const Arg& a6,
- const Arg& a7,
- const Arg& a8,
- const Arg& a9,
- const Arg& a10,
- const Arg& a11,
- const Arg& a12,
- const Arg& a13,
- const Arg& a14,
- const Arg& a15) const {
+bool PCRE::ConsumeFunctor::operator()(
+ absl::string_view* input, const PCRE& pattern, const Arg& a0, const Arg& a1,
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, const Arg& a6,
+ const Arg& a7, const Arg& a8, const Arg& a9, const Arg& a10, const Arg& a11,
+ const Arg& a12, const Arg& a13, const Arg& a14, const Arg& a15) const {
const Arg* args[kMaxArgs];
int n = 0;
if (&a0 == &no_more_args) goto done; args[n++] = &a0;
@@ -326,24 +286,11 @@ done:
}
}
-bool PCRE::FindAndConsumeFunctor::operator ()(StringPiece* input,
- const PCRE& pattern,
- const Arg& a0,
- const Arg& a1,
- const Arg& a2,
- const Arg& a3,
- const Arg& a4,
- const Arg& a5,
- const Arg& a6,
- const Arg& a7,
- const Arg& a8,
- const Arg& a9,
- const Arg& a10,
- const Arg& a11,
- const Arg& a12,
- const Arg& a13,
- const Arg& a14,
- const Arg& a15) const {
+bool PCRE::FindAndConsumeFunctor::operator()(
+ absl::string_view* input, const PCRE& pattern, const Arg& a0, const Arg& a1,
+ const Arg& a2, const Arg& a3, const Arg& a4, const Arg& a5, const Arg& a6,
+ const Arg& a7, const Arg& a8, const Arg& a9, const Arg& a10, const Arg& a11,
+ const Arg& a12, const Arg& a13, const Arg& a14, const Arg& a15) const {
const Arg* args[kMaxArgs];
int n = 0;
if (&a0 == &no_more_args) goto done; args[n++] = &a0;
@@ -375,9 +322,8 @@ done:
}
}
-bool PCRE::Replace(std::string *str,
- const PCRE& pattern,
- const StringPiece& rewrite) {
+bool PCRE::Replace(std::string* str, const PCRE& pattern,
+ absl::string_view rewrite) {
int vec[kVecSize] = {};
int matches = pattern.TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
if (matches == 0)
@@ -393,9 +339,8 @@ bool PCRE::Replace(std::string *str,
return true;
}
-int PCRE::GlobalReplace(std::string *str,
- const PCRE& pattern,
- const StringPiece& rewrite) {
+int PCRE::GlobalReplace(std::string* str, const PCRE& pattern,
+ absl::string_view rewrite) {
int count = 0;
int vec[kVecSize] = {};
std::string out;
@@ -451,10 +396,8 @@ int PCRE::GlobalReplace(std::string *str,
return count;
}
-bool PCRE::Extract(const StringPiece &text,
- const PCRE& pattern,
- const StringPiece &rewrite,
- std::string *out) {
+bool PCRE::Extract(absl::string_view text, const PCRE& pattern,
+ absl::string_view rewrite, std::string* out) {
int vec[kVecSize] = {};
int matches = pattern.TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
if (matches == 0)
@@ -463,7 +406,7 @@ bool PCRE::Extract(const StringPiece &text,
return pattern.Rewrite(out, rewrite, text, vec, matches);
}
-std::string PCRE::QuoteMeta(const StringPiece& unquoted) {
+std::string PCRE::QuoteMeta(absl::string_view unquoted) {
std::string result;
result.reserve(unquoted.size() << 1);
@@ -508,12 +451,8 @@ void PCRE::ClearHitLimit() {
hit_limit_ = 0;
}
-int PCRE::TryMatch(const StringPiece& text,
- size_t startpos,
- Anchor anchor,
- bool empty_ok,
- int *vec,
- int vecsize) const {
+int PCRE::TryMatch(absl::string_view text, size_t startpos, Anchor anchor,
+ bool empty_ok, int* vec, int vecsize) const {
pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
if (re == NULL) {
PCREPORT(ERROR) << "Matching against invalid re: " << *error_;
@@ -522,12 +461,12 @@ int PCRE::TryMatch(const StringPiece& text,
int match_limit = match_limit_;
if (match_limit <= 0) {
- match_limit = GetFlag(FLAGS_regexp_match_limit);
+ match_limit = absl::GetFlag(FLAGS_regexp_match_limit);
}
int stack_limit = stack_limit_;
if (stack_limit <= 0) {
- stack_limit = GetFlag(FLAGS_regexp_stack_limit);
+ stack_limit = absl::GetFlag(FLAGS_regexp_stack_limit);
}
pcre_extra extra = { 0 };
@@ -604,12 +543,8 @@ int PCRE::TryMatch(const StringPiece& text,
return rc;
}
-bool PCRE::DoMatchImpl(const StringPiece& text,
- Anchor anchor,
- size_t* consumed,
- const Arg* const* args,
- int n,
- int* vec,
+bool PCRE::DoMatchImpl(absl::string_view text, Anchor anchor, size_t* consumed,
+ const Arg* const* args, int n, int* vec,
int vecsize) const {
assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
if (NumberOfCapturingGroups() < n) {
@@ -654,11 +589,8 @@ bool PCRE::DoMatchImpl(const StringPiece& text,
return true;
}
-bool PCRE::DoMatch(const StringPiece& text,
- Anchor anchor,
- size_t* consumed,
- const Arg* const args[],
- int n) const {
+bool PCRE::DoMatch(absl::string_view text, Anchor anchor, size_t* consumed,
+ const Arg* const args[], int n) const {
assert(n >= 0);
const int vecsize = (1 + n) * 3; // results + PCRE workspace
// (as for kVecSize)
@@ -668,8 +600,8 @@ bool PCRE::DoMatch(const StringPiece& text,
return b;
}
-bool PCRE::Rewrite(std::string *out, const StringPiece &rewrite,
- const StringPiece &text, int *vec, int veclen) const {
+bool PCRE::Rewrite(std::string* out, absl::string_view rewrite,
+ absl::string_view text, int* vec, int veclen) const {
int number_of_capturing_groups = NumberOfCapturingGroups();
for (const char *s = rewrite.data(), *end = s + rewrite.size();
s < end; s++) {
@@ -704,7 +636,7 @@ bool PCRE::Rewrite(std::string *out, const StringPiece &rewrite,
return true;
}
-bool PCRE::CheckRewriteString(const StringPiece& rewrite,
+bool PCRE::CheckRewriteString(absl::string_view rewrite,
std::string* error) const {
int max_token = -1;
for (const char *s = rewrite.data(), *end = s + rewrite.size();
@@ -733,7 +665,7 @@ bool PCRE::CheckRewriteString(const StringPiece& rewrite,
}
if (max_token > NumberOfCapturingGroups()) {
- *error = StringPrintf(
+ *error = absl::StrFormat(
"Rewrite schema requests %d matches, but the regexp only has %d "
"parenthesized subexpressions.",
max_token, NumberOfCapturingGroups());
@@ -742,7 +674,6 @@ bool PCRE::CheckRewriteString(const StringPiece& rewrite,
return true;
}
-
// Return the number of capturing subpatterns, or -1 if the
// regexp wasn't valid on construction.
int PCRE::NumberOfCapturingGroups() const {
@@ -774,9 +705,9 @@ bool PCRE::Arg::parse_string(const char* str, size_t n, void* dest) {
return true;
}
-bool PCRE::Arg::parse_stringpiece(const char* str, size_t n, void* dest) {
+bool PCRE::Arg::parse_string_view(const char* str, size_t n, void* dest) {
if (dest == NULL) return true;
- *(reinterpret_cast<StringPiece*>(dest)) = StringPiece(str, n);
+ *(reinterpret_cast<absl::string_view*>(dest)) = absl::string_view(str, n);
return true;
}
diff --git a/util/pcre.h b/util/pcre.h
index f0a7180..c270942 100644
--- a/util/pcre.h
+++ b/util/pcre.h
@@ -120,12 +120,12 @@
//
// The "Consume" operation may be useful if you want to repeatedly
// match regular expressions at the front of a string and skip over
-// them as they match. This requires use of the "StringPiece" type,
+// them as they match. This requires use of the string_view type,
// which represents a sub-range of a real string.
//
// Example: read lines of the form "var = value" from a string.
-// std::string contents = ...; // Fill string somehow
-// StringPiece input(contents); // Wrap a StringPiece around it
+// std::string contents = ...; // Fill string somehow
+// absl::string_view input(contents); // Wrap a string_view around it
//
// std::string var;
// int value;
@@ -161,8 +161,7 @@
// Octal(&a), Hex(&b), CRadix(&c), CRadix(&d));
// will leave 64 in a, b, c, and d.
-#include "util/util.h"
-#include "re2/stringpiece.h"
+#include "absl/strings/string_view.h"
#ifdef USEPCRE
#include <pcre.h>
@@ -246,10 +245,10 @@ class PCRE {
//
// The provided pointer arguments can be pointers to any scalar numeric
// type, or one of:
- // std::string (matched piece is copied to string)
- // StringPiece (StringPiece is mutated to point to matched piece)
- // T (where "bool T::ParseFrom(const char*, size_t)" exists)
- // (void*)NULL (the corresponding matched sub-pattern is not copied)
+ // std::string (matched piece is copied to string)
+ // absl::string_view (string_view is mutated to point to matched piece)
+ // T ("bool T::ParseFrom(const char*, size_t)" must exist)
+ // (void*)NULL (the corresponding matched sub-pattern is not copied)
//
// Returns true iff all of the following conditions are satisfied:
// a. "text" matches "pattern" exactly
@@ -267,7 +266,7 @@ class PCRE {
// int number;
// PCRE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
struct FullMatchFunctor {
- bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
+ bool operator ()(absl::string_view text, const PCRE& re, // 3..16 args
const Arg& ptr1 = no_more_args,
const Arg& ptr2 = no_more_args,
const Arg& ptr3 = no_more_args,
@@ -291,7 +290,7 @@ class PCRE {
// Exactly like FullMatch(), except that "pattern" is allowed to match
// a substring of "text".
struct PartialMatchFunctor {
- bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args
+ bool operator ()(absl::string_view text, const PCRE& re, // 3..16 args
const Arg& ptr1 = no_more_args,
const Arg& ptr2 = no_more_args,
const Arg& ptr3 = no_more_args,
@@ -316,7 +315,7 @@ class PCRE {
// match a prefix of "text", and "input" is advanced past the matched
// text. Note: "input" is modified iff this routine returns true.
struct ConsumeFunctor {
- bool operator ()(StringPiece* input, const PCRE& pattern, // 3..16 args
+ bool operator ()(absl::string_view* input, const PCRE& pattern, // 3..16 args
const Arg& ptr1 = no_more_args,
const Arg& ptr2 = no_more_args,
const Arg& ptr3 = no_more_args,
@@ -342,7 +341,7 @@ class PCRE {
// "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next
// word in "s" and stores it in "word".
struct FindAndConsumeFunctor {
- bool operator ()(StringPiece* input, const PCRE& pattern,
+ bool operator ()(absl::string_view* input, const PCRE& pattern, // 3..16 args
const Arg& ptr1 = no_more_args,
const Arg& ptr2 = no_more_args,
const Arg& ptr3 = no_more_args,
@@ -376,9 +375,8 @@ class PCRE {
//
// Returns true if the pattern matches and a replacement occurs,
// false otherwise.
- static bool Replace(std::string *str,
- const PCRE& pattern,
- const StringPiece& rewrite);
+ static bool Replace(std::string* str, const PCRE& pattern,
+ absl::string_view rewrite);
// Like Replace(), except replaces all occurrences of the pattern in
// the string with the rewrite. Replacements are not subject to
@@ -390,9 +388,8 @@ class PCRE {
// will leave "s" containing "yada dada doo"
//
// Returns the number of replacements made.
- static int GlobalReplace(std::string *str,
- const PCRE& pattern,
- const StringPiece& rewrite);
+ static int GlobalReplace(std::string* str, const PCRE& pattern,
+ absl::string_view rewrite);
// Like Replace, except that if the pattern matches, "rewrite"
// is copied into "out" with substitutions. The non-matching
@@ -400,10 +397,8 @@ class PCRE {
//
// Returns true iff a match occurred and the extraction happened
// successfully; if no match occurs, the string is left unaffected.
- static bool Extract(const StringPiece &text,
- const PCRE& pattern,
- const StringPiece &rewrite,
- std::string *out);
+ static bool Extract(absl::string_view text, const PCRE& pattern,
+ absl::string_view rewrite, std::string* out);
// Check that the given @p rewrite string is suitable for use with
// this PCRE. It checks that:
@@ -418,8 +413,7 @@ class PCRE {
// @param error An error message is recorded here, iff we return false.
// Otherwise, it is unchanged.
// @return true, iff @p rewrite is suitable for use with the PCRE.
- bool CheckRewriteString(const StringPiece& rewrite,
- std::string* error) const;
+ bool CheckRewriteString(absl::string_view rewrite, std::string* error) const;
// Returns a copy of 'unquoted' with all potentially meaningful
// regexp characters backslash-escaped. The returned string, used
@@ -428,7 +422,7 @@ class PCRE {
// 1.5-2.0?
// becomes:
// 1\.5\-2\.0\?
- static std::string QuoteMeta(const StringPiece& unquoted);
+ static std::string QuoteMeta(absl::string_view unquoted);
/***** Generic matching interface (not so nice to use) *****/
@@ -441,9 +435,7 @@ class PCRE {
// General matching routine. Stores the length of the match in
// "*consumed" if successful.
- bool DoMatch(const StringPiece& text,
- Anchor anchor,
- size_t* consumed,
+ bool DoMatch(absl::string_view text, Anchor anchor, size_t* consumed,
const Arg* const* args, int n) const;
// Return the number of capturing subpatterns, or -1 if the
@@ -465,29 +457,17 @@ class PCRE {
// against "foo", "bar", and "baz" respectively.
// When matching PCRE("(foo)|hello") against "hello", it will return 1.
// But the values for all subpattern are filled in into "vec".
- int TryMatch(const StringPiece& text,
- size_t startpos,
- Anchor anchor,
- bool empty_ok,
- int *vec,
- int vecsize) const;
+ int TryMatch(absl::string_view text, size_t startpos, Anchor anchor,
+ bool empty_ok, int* vec, int vecsize) const;
// Append the "rewrite" string, with backslash subsitutions from "text"
// and "vec", to string "out".
- bool Rewrite(std::string *out,
- const StringPiece &rewrite,
- const StringPiece &text,
- int *vec,
- int veclen) const;
+ bool Rewrite(std::string* out, absl::string_view rewrite,
+ absl::string_view text, int* vec, int veclen) const;
// internal implementation for DoMatch
- bool DoMatchImpl(const StringPiece& text,
- Anchor anchor,
- size_t* consumed,
- const Arg* const args[],
- int n,
- int* vec,
- int vecsize) const;
+ bool DoMatchImpl(absl::string_view text, Anchor anchor, size_t* consumed,
+ const Arg* const args[], int n, int* vec, int vecsize) const;
// Compile the regexp for the specified anchoring mode
pcre* Compile(Anchor anchor);
@@ -586,7 +566,7 @@ class PCRE::Arg {
MAKE_PARSER(float, parse_float);
MAKE_PARSER(double, parse_double);
MAKE_PARSER(std::string, parse_string);
- MAKE_PARSER(StringPiece, parse_stringpiece);
+ MAKE_PARSER(absl::string_view, parse_string_view);
MAKE_PARSER(short, parse_short);
MAKE_PARSER(unsigned short, parse_ushort);
@@ -613,14 +593,14 @@ class PCRE::Arg {
void* arg_;
Parser parser_;
- static bool parse_null (const char* str, size_t n, void* dest);
- static bool parse_char (const char* str, size_t n, void* dest);
- static bool parse_schar (const char* str, size_t n, void* dest);
- static bool parse_uchar (const char* str, size_t n, void* dest);
- static bool parse_float (const char* str, size_t n, void* dest);
- static bool parse_double (const char* str, size_t n, void* dest);
- static bool parse_string (const char* str, size_t n, void* dest);
- static bool parse_stringpiece (const char* str, size_t n, void* dest);
+ static bool parse_null (const char* str, size_t n, void* dest);
+ static bool parse_char (const char* str, size_t n, void* dest);
+ static bool parse_schar (const char* str, size_t n, void* dest);
+ static bool parse_uchar (const char* str, size_t n, void* dest);
+ static bool parse_float (const char* str, size_t n, void* dest);
+ static bool parse_double (const char* str, size_t n, void* dest);
+ static bool parse_string (const char* str, size_t n, void* dest);
+ static bool parse_string_view (const char* str, size_t n, void* dest);
#define DECLARE_INTEGER_PARSER(name) \
private: \
diff --git a/util/strutil.cc b/util/strutil.cc
index fb7e6b1..da06f85 100644
--- a/util/strutil.cc
+++ b/util/strutil.cc
@@ -2,79 +2,10 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-#include <stdarg.h>
-#include <stdio.h>
-
#include "util/strutil.h"
-#ifdef _WIN32
-#define snprintf _snprintf
-#define vsnprintf _vsnprintf
-#endif
-
namespace re2 {
-// ----------------------------------------------------------------------
-// CEscapeString()
-// Copies 'src' to 'dest', escaping dangerous characters using
-// C-style escape sequences. 'src' and 'dest' should not overlap.
-// Returns the number of bytes written to 'dest' (not including the \0)
-// or (size_t)-1 if there was insufficient space.
-// ----------------------------------------------------------------------
-static size_t CEscapeString(const char* src, size_t src_len,
- char* dest, size_t dest_len) {
- const char* src_end = src + src_len;
- size_t used = 0;
-
- for (; src < src_end; src++) {
- if (dest_len - used < 2) // space for two-character escape
- return (size_t)-1;
-
- unsigned char c = *src;
- switch (c) {
- case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break;
- case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break;
- case '\t': dest[used++] = '\\'; dest[used++] = 't'; break;
- case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
- case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
- case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
- default:
- // Note that if we emit \xNN and the src character after that is a hex
- // digit then that digit must be escaped too to prevent it being
- // interpreted as part of the character code by C.
- if (c < ' ' || c > '~') {
- if (dest_len - used < 5) // space for four-character escape + \0
- return (size_t)-1;
- snprintf(dest + used, 5, "\\%03o", c);
- used += 4;
- } else {
- dest[used++] = c; break;
- }
- }
- }
-
- if (dest_len - used < 1) // make sure that there is room for \0
- return (size_t)-1;
-
- dest[used] = '\0'; // doesn't count towards return value though
- return used;
-}
-
-// ----------------------------------------------------------------------
-// CEscape()
-// Copies 'src' to result, escaping dangerous characters using
-// C-style escape sequences. 'src' and 'dest' should not overlap.
-// ----------------------------------------------------------------------
-std::string CEscape(const StringPiece& src) {
- const size_t dest_len = src.size() * 4 + 1; // Maximum possible expansion
- char* dest = new char[dest_len];
- const size_t used = CEscapeString(src.data(), src.size(),
- dest, dest_len);
- std::string s = std::string(dest, used);
- delete[] dest;
- return s;
-}
-
void PrefixSuccessor(std::string* prefix) {
// We can increment the last character in the string and be done
// unless that character is 255, in which case we have to erase the
@@ -92,58 +23,4 @@ void PrefixSuccessor(std::string* prefix) {
}
}
-static void StringAppendV(std::string* dst, const char* format, va_list ap) {
- // First try with a small fixed size buffer
- char space[1024];
-
- // It's possible for methods that use a va_list to invalidate
- // the data in it upon use. The fix is to make a copy
- // of the structure before using it and use that copy instead.
- va_list backup_ap;
- va_copy(backup_ap, ap);
- int result = vsnprintf(space, sizeof(space), format, backup_ap);
- va_end(backup_ap);
-
- if ((result >= 0) && (static_cast<size_t>(result) < sizeof(space))) {
- // It fit
- dst->append(space, result);
- return;
- }
-
- // Repeatedly increase buffer size until it fits
- int length = sizeof(space);
- while (true) {
- if (result < 0) {
- // Older behavior: just try doubling the buffer size
- length *= 2;
- } else {
- // We need exactly "result+1" characters
- length = result+1;
- }
- char* buf = new char[length];
-
- // Restore the va_list before we use it again
- va_copy(backup_ap, ap);
- result = vsnprintf(buf, length, format, backup_ap);
- va_end(backup_ap);
-
- if ((result >= 0) && (result < length)) {
- // It fit
- dst->append(buf, result);
- delete[] buf;
- return;
- }
- delete[] buf;
- }
-}
-
-std::string StringPrintf(const char* format, ...) {
- va_list ap;
- va_start(ap, format);
- std::string result;
- StringAppendV(&result, format, ap);
- va_end(ap);
- return result;
-}
-
} // namespace re2
diff --git a/util/strutil.h b/util/strutil.h
index a69908a..f5d87a5 100644
--- a/util/strutil.h
+++ b/util/strutil.h
@@ -7,14 +7,9 @@
#include <string>
-#include "re2/stringpiece.h"
-#include "util/util.h"
-
namespace re2 {
-std::string CEscape(const StringPiece& src);
void PrefixSuccessor(std::string* prefix);
-std::string StringPrintf(const char* format, ...);
} // namespace re2
diff --git a/util/test.cc b/util/test.cc
deleted file mode 100644
index 028616b..0000000
--- a/util/test.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include <stdio.h>
-#include <string>
-
-#include "util/test.h"
-
-namespace testing {
-std::string TempDir() { return "/tmp/"; }
-} // namespace testing
-
-struct Test {
- void (*fn)(void);
- const char *name;
-};
-
-static Test tests[10000];
-static int ntests;
-
-void RegisterTest(void (*fn)(void), const char *name) {
- tests[ntests].fn = fn;
- tests[ntests++].name = name;
-}
-
-int main(int argc, char** argv) {
- for (int i = 0; i < ntests; i++) {
- printf("%s\n", tests[i].name);
- tests[i].fn();
- }
- printf("PASS\n");
- return 0;
-}
diff --git a/util/test.h b/util/test.h
deleted file mode 100644
index 54e6f8f..0000000
--- a/util/test.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_TEST_H_
-#define UTIL_TEST_H_
-
-#include "util/util.h"
-#include "util/logging.h"
-
-namespace testing {
-std::string TempDir();
-} // namespace testing
-
-#define TEST(x, y) \
- void x##y(void); \
- TestRegisterer r##x##y(x##y, # x "." # y); \
- void x##y(void)
-
-void RegisterTest(void (*)(void), const char*);
-
-class TestRegisterer {
- public:
- TestRegisterer(void (*fn)(void), const char *s) {
- RegisterTest(fn, s);
- }
-};
-
-// fatal assertions
-#define ASSERT_TRUE CHECK
-#define ASSERT_FALSE(x) CHECK(!(x))
-#define ASSERT_EQ CHECK_EQ
-#define ASSERT_NE CHECK_NE
-#define ASSERT_LT CHECK_LT
-#define ASSERT_LE CHECK_LE
-#define ASSERT_GT CHECK_GT
-#define ASSERT_GE CHECK_GE
-
-// nonfatal assertions
-// TODO(rsc): Do a better job?
-#define EXPECT_TRUE CHECK
-#define EXPECT_FALSE(x) CHECK(!(x))
-#define EXPECT_EQ CHECK_EQ
-#define EXPECT_NE CHECK_NE
-#define EXPECT_LT CHECK_LT
-#define EXPECT_LE CHECK_LE
-#define EXPECT_GT CHECK_GT
-#define EXPECT_GE CHECK_GE
-
-#endif // UTIL_TEST_H_
diff --git a/util/util.h b/util/util.h
deleted file mode 100644
index 56e46c1..0000000
--- a/util/util.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2009 The RE2 Authors. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef UTIL_UTIL_H_
-#define UTIL_UTIL_H_
-
-#define arraysize(array) (sizeof(array)/sizeof((array)[0]))
-
-#ifndef ATTRIBUTE_NORETURN
-#if defined(__GNUC__)
-#define ATTRIBUTE_NORETURN __attribute__((noreturn))
-#elif defined(_MSC_VER)
-#define ATTRIBUTE_NORETURN __declspec(noreturn)
-#else
-#define ATTRIBUTE_NORETURN
-#endif
-#endif
-
-#ifndef ATTRIBUTE_UNUSED
-#if defined(__GNUC__)
-#define ATTRIBUTE_UNUSED __attribute__((unused))
-#else
-#define ATTRIBUTE_UNUSED
-#endif
-#endif
-
-#ifndef FALLTHROUGH_INTENDED
-#if defined(__clang__)
-#define FALLTHROUGH_INTENDED [[clang::fallthrough]]
-#elif defined(__GNUC__) && __GNUC__ >= 7
-#define FALLTHROUGH_INTENDED [[gnu::fallthrough]]
-#else
-#define FALLTHROUGH_INTENDED do {} while (0)
-#endif
-#endif
-
-#ifndef NO_THREAD_SAFETY_ANALYSIS
-#define NO_THREAD_SAFETY_ANALYSIS
-#endif
-
-#endif // UTIL_UTIL_H_