Skip to content
Permalink
Browse files Browse the repository at this point in the history
CVE-2021-41115: Use re2 for user-supplied linkifier patterns.
Zulip attempts to validate that the regular expressions that admins
enter for linkifiers are well-formatted, and only contain a specific
subset of regex grammar.  The process of checking these
properties (via a regex!) can cause denial-of-service via
backtracking.

Furthermore, this validation itself does not prevent the creation of
linkifiers which themselves cause denial-of-service when they are
executed.  As the validator accepts literally anything inside of a
`(?P<word>...)` block, any quadratic backtracking expression can be
hidden therein.

Switch user-provided linkifier patterns to be matched in the Markdown
processor by the `re2` library, which is guaranteed constant-time.
This somewhat limits the possible features of the regular
expression (notably, look-head and -behind, and back-references);
however, these features had never been advertised as working in the
context of linkifiers.

A migration removes any existing linkifiers which would not function
under re2, after printing them for posterity during the upgrade; they
are unlikely to be common, and are impossible to fix automatically.

The denial-of-service in the linkifier validator was discovered by
@erik-krogh and @yoff, as GHSL-2021-118.
  • Loading branch information
alexmv committed Oct 4, 2021
1 parent d3091a6 commit e2d303c
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 43 deletions.
10 changes: 5 additions & 5 deletions frontend_tests/puppeteer_tests/realm-linkifier.ts
Expand Up @@ -42,15 +42,15 @@ async function test_delete_linkifier(page: Page): Promise<void> {
async function test_add_invalid_linkifier_pattern(page: Page): Promise<void> {
await page.waitForSelector(".admin-linkifier-form", {visible: true});
await common.fill_form(page, "form.admin-linkifier-form", {
pattern: "a$",
pattern: "(foo",
url_format_string: "https://trac.example.com/ticket/%(id)s",
});
await page.click("form.admin-linkifier-form button.button");

await page.waitForSelector("div#admin-linkifier-pattern-status", {visible: true});
assert.strictEqual(
await common.get_text_from_selector(page, "div#admin-linkifier-pattern-status"),
"Failed: Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-].",
"Failed: Bad regular expression: missing ): (foo",
);
}

Expand Down Expand Up @@ -83,8 +83,8 @@ async function test_edit_invalid_linkifier(page: Page): Promise<void> {
await page.click(".linkifier_row .edit");
await page.waitForFunction(() => document.activeElement === $("#linkifier-edit-form-modal")[0]);
await common.fill_form(page, "form.linkifier-edit-form", {
pattern: "####",
url_format_string: "####",
pattern: "#(?P<id>d????)",
url_format_string: "????",
});
await page.click(".submit-linkifier-info-change");

Expand All @@ -96,7 +96,7 @@ async function test_edit_invalid_linkifier(page: Page): Promise<void> {
);
assert.strictEqual(
edit_linkifier_pattern_status,
"Failed: Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-].",
"Failed: Bad regular expression: bad repetition operator: ????",
);

const edit_linkifier_format_status_selector = "div#edit-linkifier-format-status";
Expand Down
43 changes: 34 additions & 9 deletions zerver/lib/markdown/__init__.py
Expand Up @@ -37,6 +37,7 @@
import markdown.postprocessors
import markdown.treeprocessors
import markdown.util
import re2
import requests
from django.conf import settings
from django.db.models import Q
Expand Down Expand Up @@ -1779,39 +1780,55 @@ def run(self, lines: List[str]) -> List[str]:
# Name for the outer capture group we use to separate whitespace and
# other delimiters from the actual content. This value won't be an
# option in user-entered capture groups.
BEFORE_CAPTURE_GROUP = "linkifier_before_match"
OUTER_CAPTURE_GROUP = "linkifier_actual_match"
AFTER_CAPTURE_GROUP = "linkifier_after_match"


def prepare_linkifier_pattern(source: str) -> str:
"""Augment a linkifier so it only matches after start-of-string,
whitespace, or opening delimiters, won't match if there are word
characters directly after, and saves what was matched as
OUTER_CAPTURE_GROUP."""
return fr"""(?<![^\s'"\(,:<])(?P<{OUTER_CAPTURE_GROUP}>{source})(?!\w)"""
return fr"""(?P<{BEFORE_CAPTURE_GROUP}>^|\s|['"\(,:<])(?P<{OUTER_CAPTURE_GROUP}>{source})(?P<{AFTER_CAPTURE_GROUP}>$|[^\pL\pN])"""


# Given a regular expression pattern, linkifies groups that match it
# using the provided format string to construct the URL.
class LinkifierPattern(markdown.inlinepatterns.Pattern):
class LinkifierPattern(markdown.inlinepatterns.InlineProcessor):
"""Applied a given linkifier to the input"""

def __init__(
self,
source_pattern: str,
format_string: str,
markdown_instance: Optional[markdown.Markdown] = None,
md: markdown.Markdown,
) -> None:
self.pattern = prepare_linkifier_pattern(source_pattern)
# Do not write errors to stderr (this still raises exceptions)
options = re2.Options()
options.log_errors = False

self.md = md
self.compiled_re = re2.compile(prepare_linkifier_pattern(source_pattern), options=options)
self.format_string = format_string
markdown.inlinepatterns.Pattern.__init__(self, self.pattern, markdown_instance)

def handleMatch(self, m: Match[str]) -> Union[Element, str]:
def handleMatch( # type: ignore[override] # supertype incompatible with supersupertype
self, m: Match[str], data: str
) -> Union[Tuple[Element, int, int], Tuple[None, None, None]]:
db_data = self.md.zulip_db_data
return url_to_a(
url = url_to_a(
db_data,
self.format_string % m.groupdict(),
markdown.util.AtomicString(m.group(OUTER_CAPTURE_GROUP)),
)
if isinstance(url, str):
return None, None, None

return (
url,
m.start(2),
m.end(2),
)


class UserMentionPattern(markdown.inlinepatterns.InlineProcessor):
Expand Down Expand Up @@ -2336,11 +2353,19 @@ def topic_links(linkifiers_key: int, topic_name: str) -> List[Dict[str, str]]:
matches: List[Dict[str, Union[str, int]]] = []
linkifiers = linkifiers_for_realm(linkifiers_key)

options = re2.Options()
options.log_errors = False
for linkifier in linkifiers:
raw_pattern = linkifier["pattern"]
url_format_string = linkifier["url_format"]
pattern = prepare_linkifier_pattern(raw_pattern)
for m in re.finditer(pattern, topic_name):
try:
pattern = re2.compile(prepare_linkifier_pattern(raw_pattern), options=options)
except re2.error:
# An invalid regex shouldn't be possible here, and logging
# here on an invalid regex would spam the logs with every
# message sent; simply move on.
continue
for m in pattern.finditer(topic_name):
match_details = m.groupdict()
match_text = match_details["linkifier_actual_match"]
# We format the linkifier's url string using the matched text.
Expand Down
39 changes: 39 additions & 0 deletions zerver/migrations/0359_re2_linkifiers.py
@@ -0,0 +1,39 @@
import re2
from django.db import migrations
from django.db.backends.postgresql.schema import DatabaseSchemaEditor
from django.db.migrations.state import StateApps


def delete_re2_invalid(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None:
options = re2.Options()
options.log_errors = False

RealmFilter = apps.get_model("zerver", "RealmFilter")
found_errors = False
for linkifier in RealmFilter.objects.all():
try:
re2.compile(linkifier.pattern, options=options)
except re2.error:
if not found_errors:
print()
found_errors = True
print(
f"Deleting linkifier {linkifier.id} in realm {linkifier.realm.string_id} which is not compatible with new re2 engine:"
)
print(f" {linkifier.pattern} -> {linkifier.url_format_string}")
linkifier.delete()


class Migration(migrations.Migration):

dependencies = [
("zerver", "0325_alter_realmplayground_unique_together"),
]

operations = [
migrations.RunPython(
delete_re2_invalid,
reverse_code=migrations.RunPython.noop,
elidable=True,
)
]
25 changes: 13 additions & 12 deletions zerver/models.py
Expand Up @@ -19,6 +19,7 @@
)

import django.contrib.auth
import re2
from bitfield import BitField
from bitfield.types import BitHandler
from django.conf import settings
Expand Down Expand Up @@ -906,19 +907,19 @@ def flush_realm_emoji(sender: Any, **kwargs: Any) -> None:


def filter_pattern_validator(value: str) -> None:
regex = re.compile(r"^(?:(?:[\w\-#_= /:]*|[+]|[!])(\(\?P<\w+>.+\)))+$")
error_msg = _("Invalid linkifier pattern. Valid characters are {}.").format(
"[ a-zA-Z_#=/:+!-]",
)

if not regex.match(str(value)):
raise ValidationError(error_msg)

try:
re.compile(value)
except re.error:
# Regex is invalid
raise ValidationError(error_msg)
# Do not write errors to stderr (this still raises exceptions)
options = re2.Options()
options.log_errors = False

re2.compile(value, options=options)
except re2.error as e:
if len(e.args) >= 1:
if isinstance(e.args[0], str): # nocoverage
raise ValidationError(_("Bad regular expression: {}").format(e.args[0]))
if isinstance(e.args[0], bytes):
raise ValidationError(_("Bad regular expression: {}").format(e.args[0].decode()))
raise ValidationError(_("Unknown regular expression error")) # nocoverage


def filter_format_validator(value: str) -> None:
Expand Down
9 changes: 4 additions & 5 deletions zerver/tests/test_markdown.py
Expand Up @@ -1401,26 +1401,25 @@ def test_multiple_matching_realm_patterns(self) -> None:
url_format_string = r"https://trac.example.com/ticket/%(id)s"
linkifier_1 = RealmFilter(
realm=realm,
pattern=r"(?P<id>ABC\-[0-9]+)(?![A-Z0-9-])",
pattern=r"(?P<id>ABC\-[0-9]+)",
url_format_string=url_format_string,
)
linkifier_1.save()
self.assertEqual(
linkifier_1.__str__(),
r"<RealmFilter(zulip): (?P<id>ABC\-[0-9]+)(?![A-Z0-9-])"
" https://trac.example.com/ticket/%(id)s>",
r"<RealmFilter(zulip): (?P<id>ABC\-[0-9]+) https://trac.example.com/ticket/%(id)s>",
)

url_format_string = r"https://other-trac.example.com/ticket/%(id)s"
linkifier_2 = RealmFilter(
realm=realm,
pattern=r"(?P<id>[A-Z][A-Z0-9]*\-[0-9]+)(?![A-Z0-9-])",
pattern=r"(?P<id>[A-Z][A-Z0-9]*\-[0-9]+)",
url_format_string=url_format_string,
)
linkifier_2.save()
self.assertEqual(
linkifier_2.__str__(),
r"<RealmFilter(zulip): (?P<id>[A-Z][A-Z0-9]*\-[0-9]+)(?![A-Z0-9-])"
r"<RealmFilter(zulip): (?P<id>[A-Z][A-Z0-9]*\-[0-9]+)"
" https://other-trac.example.com/ticket/%(id)s>",
)

Expand Down
18 changes: 6 additions & 12 deletions zerver/tests/test_realm_linkifiers.py
Expand Up @@ -27,17 +27,13 @@ def test_create(self) -> None:
result = self.client_post("/json/realm/filters", info=data)
self.assert_json_error(result, "This field cannot be blank.")

data["pattern"] = "$a"
data["pattern"] = "(foo"
result = self.client_post("/json/realm/filters", info=data)
self.assert_json_error(
result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]."
)
self.assert_json_error(result, "Bad regular expression: missing ): (foo")

data["pattern"] = r"ZUL-(?P<id>\d++)"
data["pattern"] = r"ZUL-(?P<id>\d????)"
result = self.client_post("/json/realm/filters", info=data)
self.assert_json_error(
result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]."
)
self.assert_json_error(result, "Bad regular expression: bad repetition operator: ????")

data["pattern"] = r"ZUL-(?P<id>\d+)"
data["url_format_string"] = "$fgfg"
Expand Down Expand Up @@ -154,13 +150,11 @@ def test_update(self) -> None:
)

data = {
"pattern": r"ZUL-(?P<id>\d++)",
"pattern": r"ZUL-(?P<id>\d????)",
"url_format_string": "https://realm.com/my_realm_filter/%(id)s",
}
result = self.client_patch(f"/json/realm/filters/{linkifier_id}", info=data)
self.assert_json_error(
result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]."
)
self.assert_json_error(result, "Bad regular expression: bad repetition operator: ????")

data["pattern"] = r"ZUL-(?P<id>\d+)"
data["url_format_string"] = "$fgfg"
Expand Down

0 comments on commit e2d303c

Please sign in to comment.