Skip to content
Permalink
Browse files Browse the repository at this point in the history
Fix: unblounded regex backtracking in timeunit parsing
  • Loading branch information
Wanasit Tanakitrungruang committed Mar 13, 2021
1 parent 3f294e0 commit 98815b5
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 29 deletions.
10 changes: 3 additions & 7 deletions src/locales/de/constants.ts
@@ -1,5 +1,5 @@
import { OpUnitType } from "dayjs";
import { matchAnyPattern } from "../../utils/pattern";
import { matchAnyPattern, repeatedTimeunitPattern } from "../../utils/pattern";
import { findMostLikelyADYear } from "../../calculation/years";
import { TimeUnits } from "../../utils/timeunits";

Expand Down Expand Up @@ -149,14 +149,10 @@ export function parseYear(match: string): number {

//-----------------------------

const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s*(${matchAnyPattern(TIME_UNIT_DICTIONARY)})\\s*`;
const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s{0,5}(${matchAnyPattern(TIME_UNIT_DICTIONARY)})\\s{0,5}`;
const SINGLE_TIME_UNIT_REGEX = new RegExp(SINGLE_TIME_UNIT_PATTERN, "i");

const SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE = SINGLE_TIME_UNIT_PATTERN.replace(/\((?!\?)/g, "(?:");

export const TIME_UNITS_PATTERN =
`(?:(?:about|around)\\s*)?` +
`${SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE}\\s*(?:,?\\s*${SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE})*`;
export const TIME_UNITS_PATTERN = repeatedTimeunitPattern("", SINGLE_TIME_UNIT_PATTERN);

export function parseTimeUnits(timeunitText): TimeUnits {
const fragments = {};
Expand Down
10 changes: 3 additions & 7 deletions src/locales/en/constants.ts
@@ -1,5 +1,5 @@
import { OpUnitType } from "dayjs";
import { matchAnyPattern } from "../../utils/pattern";
import { matchAnyPattern, repeatedTimeunitPattern } from "../../utils/pattern";
import { findMostLikelyADYear } from "../../calculation/years";
import { TimeUnits } from "../../utils/timeunits";

Expand Down Expand Up @@ -223,14 +223,10 @@ export function parseYear(match: string): number {

//-----------------------------

const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s*(${matchAnyPattern(TIME_UNIT_DICTIONARY)})\\s*`;
const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s{0,5}(${matchAnyPattern(TIME_UNIT_DICTIONARY)})\\s{0,5}`;
const SINGLE_TIME_UNIT_REGEX = new RegExp(SINGLE_TIME_UNIT_PATTERN, "i");

const SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE = SINGLE_TIME_UNIT_PATTERN.replace(/\((?!\?)/g, "(?:");

export const TIME_UNITS_PATTERN =
`(?:(?:about|around)\\s*)?` +
`${SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE}\\s*(?:,?\\s*${SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE})*`;
export const TIME_UNITS_PATTERN = repeatedTimeunitPattern(`(?:(?:about|around)\\s*)?`, SINGLE_TIME_UNIT_PATTERN);

export function parseTimeUnits(timeunitText): TimeUnits {
const fragments = {};
Expand Down
8 changes: 3 additions & 5 deletions src/locales/fr/constants.ts
@@ -1,5 +1,5 @@
import { OpUnitType, QUnitType } from "dayjs";
import { matchAnyPattern } from "../../utils/pattern";
import { matchAnyPattern, repeatedTimeunitPattern } from "../../utils/pattern";

export const WEEKDAY_DICTIONARY: { [word: string]: number } = {
"dimanche": 0,
Expand Down Expand Up @@ -160,12 +160,10 @@ export function parseYear(match: string): number {

//-----------------------------

const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s*(${matchAnyPattern(TIME_UNIT_DICTIONARY)})\\s*`;
const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s{0,5}(${matchAnyPattern(TIME_UNIT_DICTIONARY)})\\s{0,5}`;
const SINGLE_TIME_UNIT_REGEX = new RegExp(SINGLE_TIME_UNIT_PATTERN, "i");

const SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE = SINGLE_TIME_UNIT_PATTERN.replace(/\((?!\?)/g, "(?:");

export const TIME_UNITS_PATTERN = `(?:${SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE})+`;
export const TIME_UNITS_PATTERN = repeatedTimeunitPattern("", SINGLE_TIME_UNIT_PATTERN);

export function parseTimeUnits(timeunitText): { [c in OpUnitType | QUnitType]?: number } {
const fragments = {};
Expand Down
10 changes: 3 additions & 7 deletions src/locales/nl/constants.ts
@@ -1,5 +1,5 @@
import { OpUnitType } from "dayjs";
import { matchAnyPattern } from "../../utils/pattern";
import { matchAnyPattern, repeatedTimeunitPattern } from "../../utils/pattern";
import { findMostLikelyADYear } from "../../calculation/years";
import { TimeUnits } from "../../utils/timeunits";

Expand Down Expand Up @@ -207,14 +207,10 @@ export function parseYear(match: string): number {

//-----------------------------

const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s*(${matchAnyPattern(TIME_UNIT_DICTIONARY)})\\s*`;
const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s{0,5}(${matchAnyPattern(TIME_UNIT_DICTIONARY)})\\s{0,5}`;
const SINGLE_TIME_UNIT_REGEX = new RegExp(SINGLE_TIME_UNIT_PATTERN, "i");

const SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE = SINGLE_TIME_UNIT_PATTERN.replace(/\((?!\?)/g, "(?:");

export const TIME_UNITS_PATTERN =
`(?:(?:binnen|in)\\s*)?` +
`${SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE}\\s*(?:,?\\s*${SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE})*`;
export const TIME_UNITS_PATTERN = repeatedTimeunitPattern(`(?:(?:binnen|in)\\s*)?`, SINGLE_TIME_UNIT_PATTERN);

export function parseTimeUnits(timeunitText): TimeUnits {
const fragments = {};
Expand Down
5 changes: 3 additions & 2 deletions src/utils/pattern.ts
@@ -1,7 +1,8 @@
type DictionaryLike = string[] | { [word: string]: unknown } | Map<string, unknown>;

export function patternWithWordBreak(regExp: RegExp): RegExp {
return RegExp("" + regExp.source);
export function repeatedTimeunitPattern(prefix: string, singleTimeunitPattern: string): string {
const singleTimeunitPatternNoCapture = singleTimeunitPattern.replace(/\((?!\?)/g, "(?:");
return `${prefix}${singleTimeunitPatternNoCapture}\\s*(?:,?\\s{0,5}${singleTimeunitPatternNoCapture}){0,10}`;
}

export function extractTerms(dictionary: DictionaryLike): string[] {
Expand Down
27 changes: 27 additions & 0 deletions test/en/en_performance.test.ts
@@ -0,0 +1,27 @@
import * as chrono from "../../src";
import { measureMilliSec } from "../test_util";

test("Test - Benchmarking against whitespace backtracking", () => {
const time = measureMilliSec(() => {
const str =
"BGR3 " +
" 186 " +
" days " +
" " +
" " +
" 18 hours " +
" " +
" " +
" 37 minutes " +
" " +
" " +
" 01 " +
" seconds";

const results = chrono.parse(str);
expect(results.length).toBe(0);
});

console.log(time);
expect(time).toBeLessThan(1000);
});
9 changes: 8 additions & 1 deletion test/test_util.ts
@@ -1,5 +1,5 @@
import { BufferedDebugHandler } from "../src/debugging";
import { ParsedResult, ParsingOption } from "../src";
import { en, ParsedResult, ParsingOption } from "../src";

interface ChronoLike {
parse(text: string, ref?: Date, option?: ParsingOption): ParsedResult[];
Expand Down Expand Up @@ -73,6 +73,13 @@ export function testUnexpectedResult(chrono: ChronoLike, text: string, refDate?:
}
}

export function measureMilliSec(block: () => void): number {
const startTime = new Date().getMilliseconds();
block();
const endTime = new Date().getMilliseconds();
return endTime - startTime;
}

// --------------------------------------------------

declare global {
Expand Down

0 comments on commit 98815b5

Please sign in to comment.