Skip to content

Commit

Permalink
vre: Migrate to pcre2
Browse files Browse the repository at this point in the history
Now that VRE is the only regular expression API we use, we can migrate
its backend to pcre2. The existing 'pcre_*' parameters are also renamed
to reflect this migration, and 'pcre_match_limit_recursion' gets special
treatment and is renamed to pcre2_depth_limit.

This creates an additional API breakage in VRE: the `match_recursion`
field in `struct vre_limits` is also renamed. One last breakage is the
removal of `VRE_has_jit` used by only one undocumented varnishtest
feature, and the pcre_jit feature is only used by one test case that no
longer fails.

The pcre jit compilation feature was broken anyway: sealing it at
compile time will not reflect what VRE actually links to. Once we have
a test case needing the jit feature, we can introduce a better API for
that check.

There is one outstanding performance problem, the ovector that was
previously allocated on the stack now needs to be allocated from the
heap. It might be possible to implement a pcre2 context to fix that or
maybe pool them, but for now we have heap allocations on the critical
path. The VRE_sub() function makes sure to make a single ovector
allocation (technically a pcre2_match_data allocation) since it's the
only one guaranteed to loop on a single regular expression for the
`regsuball()` use case.

Closes #3616
Closes #3559
  • Loading branch information
dridi committed Jul 5, 2021
1 parent 9219b0d commit 0436ea6
Show file tree
Hide file tree
Showing 19 changed files with 179 additions and 242 deletions.
2 changes: 1 addition & 1 deletion .circleci/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ RUN set -e;\
libtool \
libunwind-devel \
make \
pcre-devel \
pcre2-devel \
python3 \
python-sphinx
2 changes: 1 addition & 1 deletion .circleci/Dockerfile.alpine
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ RUN set -e; \
libtool \
libunwind-dev \
linux-headers \
pcre-dev \
pcre2-dev \
py-docutils \
py3-sphinx \
tar
2 changes: 1 addition & 1 deletion .circleci/Dockerfile.archlinux
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ RUN set -e; \
libtool \
libunwind \
linux-headers \
pcre \
pcre2 \
python-docutils \
python-sphinx \
tar
2 changes: 1 addition & 1 deletion .circleci/Dockerfile.ubuntu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN set -e; \
libedit-dev \
libjemalloc-dev \
libncurses-dev \
libpcre3-dev \
libpcre2-dev \
libtool \
libunwind-dev \
pkg-config \
Expand Down
10 changes: 5 additions & 5 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
libtool \
libunwind-devel \
make \
pcre-devel \
pcre2-devel \
python3 \
python-sphinx
- checkout
Expand Down Expand Up @@ -201,7 +201,7 @@ jobs:
libtool \
libunwind-devel \
make \
pcre-devel \
pcre2-devel \
python3 \
sudo
elif [ << parameters.dist >> = debian -o << parameters.dist >> = ubuntu ]; then
Expand All @@ -219,7 +219,7 @@ jobs:
libedit-dev \
libjemalloc-dev \
libncurses-dev \
libpcre3-dev \
libpcre2-dev \
libtool \
libunwind-dev \
pkg-config \
Expand All @@ -239,7 +239,7 @@ jobs:
libtool \
libunwind-dev \
linux-headers \
pcre-dev \
pcre2-dev \
py-docutils \
py3-sphinx \
tar \
Expand All @@ -258,7 +258,7 @@ jobs:
libtool \
libunwind \
linux-headers \
pcre \
pcre2 \
python-docutils \
python-sphinx \
tar
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
- python3-docutils
- python3-sphinx
- libunwind-dev
- libpcre3-dev
- libpcre2-dev
before_script:
- ./autogen.sh
- ./configure --enable-maintainer-mode --with-unwind
Expand Down
2 changes: 1 addition & 1 deletion bin/varnishd/cache/cache_vrt_re.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ VPI_re_init(vre_t **rep, const char *re)

/* This was already check-compiled by the VCL compiler */
t = VRE_compile(re, 0, &error, &erroroffset,
cache_param->pcre_jit_compilation);
cache_param->pcre2_jit_compilation);
AN(t);
*rep = t;
}
Expand Down
2 changes: 1 addition & 1 deletion bin/varnishd/flint.lnt
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
-emacro(835, O_LARGEFILE) // Info 835: A zero has been given as left argument to operator '<<'

-emacro(845, HTTPH) // Info 845: The left argument to operator '&&' is certain to be 0
-esym(773, PCRE_DATE) // Expression-like macro '___' not parenthesized
-esym(773, PCRE2_DATE) // Expression-like macro '___' not parenthesized

//////////////
// Macros defined differently in each VMOD
Expand Down
52 changes: 0 additions & 52 deletions bin/varnishtest/tests/r01576.vtc

This file was deleted.

4 changes: 2 additions & 2 deletions bin/varnishtest/tests/r01644.vtc
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ varnish v1 -vcl+backend {

} -start

varnish v1 -cliok "param.set pcre_match_limit 100"
varnish v1 -cliok "param.set pcre2_match_limit 100"

client c1 {
txreq
rxresp
expect resp.http.foo == 100
} -run

varnish v1 -cliok "param.set pcre_match_limit 200"
varnish v1 -cliok "param.set pcre2_match_limit 200"

client c1 {
txreq
Expand Down
2 changes: 1 addition & 1 deletion bin/varnishtest/tests/u00006.vtc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ shell -err -expect {-I: "foo" matches zero tags} \
"varnishlog -I foo:bar"
shell -err -expect {-I: "Resp" is ambiguous} \
"varnishlog -I Resp:bar"
shell -err -expect {-I: Regex error at position 4 (pcre error 14)} \
shell -err -expect {-I: Regex error at position 4 (missing closing parenthesis)} \
{varnishlog -I "(foo"}
shell -err -expect "-t: Invalid argument" \
"varnishlog -t -1"
Expand Down
2 changes: 1 addition & 1 deletion bin/varnishtest/tests/v00004.vtc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ feature cmd {test -z "$GCOVPROG"}
# - 2019 header madness
# - 5 ESI levels down
# - 10 VCL subs down
# - PCRE regsub
# - PCRE2 regsub

server s1 {
rxreq
Expand Down
1 change: 0 additions & 1 deletion bin/varnishtest/vtc_misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,6 @@ cmd_feature(CMD_ARGS)

FEATURE("ipv4", ipvx_works("127.0.0.1"));
FEATURE("ipv6", ipvx_works("[::1]"));
FEATURE("pcre_jit", VRE_has_jit);
FEATURE("64bit", sizeof(void*) == 8);
FEATURE("disable_aslr", addr_no_randomize_works());
FEATURE("dns", dns_works());
Expand Down
85 changes: 34 additions & 51 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -111,67 +111,50 @@ AC_SUBST(LIBM)
m4_ifndef([PKG_PROG_PKG_CONFIG], [m4_fatal([pkg.m4 missing, please install pkg-config])])
PKG_PROG_PKG_CONFIG
if test -n $PKG_CONFIG; then
PKG_CHECK_MODULES([PCRE], [libpcre])
PKG_CHECK_MODULES([PCRE2], [libpcre2-8])
else
AC_CHECK_PROG(PCRE_CONFIG, pcre-config, pcre-config)
AC_ARG_WITH(pcre-config,
AS_HELP_STRING([--with-pcre-config=PATH],
[Location of PCRE pcre-config (auto)]),
[pcre_config="$withval"],
[pcre_config=""])

if test "x$pcre_config" != "x" ; then
AC_MSG_CHECKING(for $pcre_config)

if test -f $pcre_config ; then
PCRE_CONFIG=$pcre_config
AC_CHECK_PROG(PCRE2_CONFIG, pcre2-config, pcre2-config)
AC_ARG_WITH(pcre2-config,
AS_HELP_STRING([--with-pcre2-config=PATH],
[Location of PCRE2 pcre2-config (auto)]),
[pcre2_config="$withval"],
[pcre2_config=""])

if test "x$pcre2_config" != "x" ; then
AC_MSG_CHECKING(for $pcre2_config)

if test -f $pcre2_config ; then
PCRE2_CONFIG=$pcre2_config
AC_MSG_RESULT(yes)
else
AC_MSG_RESULT(no - searching PATH)
fi
fi
if test "x$PCRE_CONFIG" = "x"; then
AC_CHECK_PROGS(PCRE_CONFIG, pcre-config)
if test "x$PCRE2_CONFIG" = "x"; then
AC_CHECK_PROGS(PCRE2_CONFIG, pcre2-config)
fi
PCRE_CFLAGS=`$PCRE_CONFIG --cflags`
PCRE_LIBS=`$PCRE_CONFIG --libs`
PCRE2_CFLAGS=`$PCRE2_CONFIG --cflags`
PCRE2_LIBS=`$PCRE2_CONFIG --libs8`
fi
AC_SUBST(PCRE_CFLAGS)
AC_SUBST(PCRE_LIBS)
AC_SUBST(PCRE2_CFLAGS)
AC_SUBST(PCRE2_LIBS)
AC_DEFINE([PCRE2_CODE_UNIT_WIDTH], [8], [Work with 8-bit characters for PCRE2])

# --enable-pcre-jit
AC_ARG_ENABLE(pcre-jit,
AS_HELP_STRING([--enable-pcre-jit],
[use the PCRE JIT compiler (default is YES)]),
save_LIBS="${LIBS}"
LIBS="${LIBS} ${PCRE2_LIBS}"
AC_CHECK_FUNCS([pcre2_set_depth_limit_8], [
AC_DEFINE([HAVE_PCRE2_SET_DEPTH_LIMIT], [1], [Use pcre2_set_depth_limit()])
])
LIBS="${save_LIBS}"

# --enable-pcre2-jit
AC_ARG_ENABLE(pcre2-jit,
AS_HELP_STRING([--enable-pcre2-jit],
[use the PCRE2 JIT compiler (default is YES)]),
[],
[enable_pcre_jit=yes])
if test "$enable_pcre_jit" = yes; then
AC_MSG_CHECKING(for PCRE JIT usability)
save_CFLAGS="${CFLAGS}"
CFLAGS="${CFLAGS} ${PCRE_CFLAGS}"
save_LIBS="${LIBS}"
LIBS="${LIBS} ${PCRE_LIBS}"
AC_RUN_IFELSE(
[AC_LANG_PROGRAM([[
#include <pcre.h>
#if PCRE_MAJOR != 8 || PCRE_MINOR < 32
#error no jit
#endif
]],[[
const char *error;
pcre *re;
int erroroffset;
re = pcre_compile(".", 0, &error, &erroroffset, NULL);
if (!pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error))
return (1);
]])],
[AC_MSG_RESULT(yes)
AC_DEFINE([USE_PCRE_JIT], [1], [Use the PCRE JIT compiler])
],
[AC_MSG_RESULT(no)]
)
CFLAGS="${save_CFLAGS}"
LIBS="${save_LIBS}"
[enable_pcre2_jit=yes])
if test "$enable_pcre2_jit" = yes; then
AC_DEFINE([USE_PCRE2_JIT], [1], [Use the PCRE2 JIT compiler])
fi


Expand Down
37 changes: 18 additions & 19 deletions include/tbl/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -707,14 +707,14 @@ PARAM_SIMPLE(
)

PARAM_SIMPLE(
/* name */ pcre_jit_compilation,
/* name */ pcre2_jit_compilation,
/* type */ boolean,
/* min */ NULL,
/* max */ NULL,
/* def */ "on",
/* units */ "bool",
/* descr */
"Use the pcre JIT compiler if available."
"Use the pcre2 JIT compiler if available."
)

PARAM_SIMPLE(
Expand Down Expand Up @@ -1615,47 +1615,46 @@ PARAM_VCC(
)

/*--------------------------------------------------------------------
* PCRE parameters
* PCRE2 parameters
*/

# define PARAM_PCRE(nm, pv, min, def, descr) \
# define PARAM_PCRE2(nm, pv, min, def, descr) \
PARAM(, , nm, tweak_uint, &mgt_param.vre_limits.pv, \
min, NULL, def, NULL, descr)

PARAM_PCRE(
/* name */ pcre_match_limit,
PARAM_PCRE2(
/* name */ pcre2_match_limit,
/* priv */ match,
/* min */ "1",
/* def */ "10000",
/* descr */
"The limit for the number of calls to the internal match()"
" function in pcre_exec().\n\n"
"(See: PCRE_EXTRA_MATCH_LIMIT in pcre docs.)\n\n"
"The limit for the number of calls to the internal match"
" logic in pcre2_match().\n\n"
"(See: pcre2_set_match_limit() in pcre2 docs.)\n\n"
"This parameter limits how much CPU time"
" regular expression matching can soak up."
)

PARAM_PCRE(
/* name */ pcre_match_limit_recursion,
/* priv */ match_recursion,
PARAM_PCRE2(
/* name */ pcre2_depth_limit,
/* priv */ depth,
/* min */ "1",
/* def */ "20",
/* descr */
"The recursion depth-limit for the internal match() function"
" in a pcre_exec().\n\n"
"(See: PCRE_EXTRA_MATCH_LIMIT_RECURSION in pcre docs.)\n\n"
"The recursion depth-limit for the internal match logic"
" in a pcre2_match().\n\n"
"(See: pcre2_set_depth_limit() in pcre2 docs.)\n\n"
"This puts an upper limit on the amount of stack used"
" by PCRE for certain classes of regular expressions.\n\n"
" by PCRE2 for certain classes of regular expressions.\n\n"
"We have set the default value low in order to"
" prevent crashes, at the cost of possible regexp"
" matching failures.\n\n"
"Matching failures will show up in the log as VCL_Error"
" messages with regexp errors -27 or -21.\n\n"
"Testcase r01576 can be useful when tuning this parameter."
" messages with regexp errors -27 or -21."
)

# undef PARAM_ALL
# undef PARAM_PCRE
# undef PARAM_PCRE2
# undef PARAM_STRING
# undef PARAM_VCC
#endif /* defined(PARAM_ALL) */
Expand Down
11 changes: 5 additions & 6 deletions include/vre.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
*
* Regular expression support
*
* We wrap PCRE in VRE to make to make it feasible to use something else
* We wrap PCRE2 in VRE to make to make it feasible to use something else
* without hunting down stuff through out the Varnish source code.
*
*/
Expand All @@ -44,16 +44,15 @@ struct vsb;

struct vre_limits {
unsigned match;
unsigned match_recursion;
unsigned depth;
};

typedef struct vre vre_t;

/* This maps to PCRE error codes */
#define VRE_ERROR_NOMATCH (-1)
/* This maps to PCRE2 error codes */
extern const int VRE_ERROR_NOMATCH;

/* And those to PCRE options */
extern const unsigned VRE_has_jit;
/* And those to PCRE2 options */
extern const unsigned VRE_CASELESS;

vre_t *VRE_compile(const char *, unsigned, int *, int *, unsigned);
Expand Down
4 changes: 2 additions & 2 deletions lib/libvarnish/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
AM_CPPFLAGS = \
-I$(top_srcdir)/include \
-I$(top_builddir)/include \
@PCRE_CFLAGS@
@PCRE2_CFLAGS@

AM_CFLAGS = $(AM_LT_CFLAGS) @SAN_CFLAGS@
AM_LDFLAGS = $(AM_LT_LDFLAGS) @SAN_LDFLAGS@
Expand Down Expand Up @@ -44,7 +44,7 @@ libvarnish_la_SOURCES = \
vtim.c \
vus.c

libvarnish_la_LIBADD = @PCRE_LIBS@
libvarnish_la_LIBADD = @PCRE2_LIBS@

TESTS = vav_test vjsn_test vnum_c_test vbh_test vsb_test

Expand Down

0 comments on commit 0436ea6

Please sign in to comment.