From 371bd61ba1022771797155af079ff1754b78f1f4 Mon Sep 17 00:00:00 2001 From: Odomontois Date: Thu, 10 Feb 2022 19:03:32 +0300 Subject: [PATCH] Fast charIn for ranges like 'A' to 'Z' (#360) --- .../js/src/main/scala/cats/parse/BitSet.scala | 10 ++++++ .../src/main/scala/cats/parse/BitSet.scala | 6 ++++ .../src/main/scala/cats/parse/Parser.scala | 31 +++++++++++++------ .../test/scala/cats/parse/ParserTest.scala | 16 ++++++++++ 4 files changed, 53 insertions(+), 10 deletions(-) diff --git a/core/js/src/main/scala/cats/parse/BitSet.scala b/core/js/src/main/scala/cats/parse/BitSet.scala index 1a273d17..3e14f75b 100644 --- a/core/js/src/main/scala/cats/parse/BitSet.scala +++ b/core/js/src/main/scala/cats/parse/BitSet.scala @@ -56,4 +56,14 @@ object BitSetUtil { bs.flatMap { case (m, bs) => toIter(m, bs) }.toSet } + + def bitSetForRange(count: Int): BitSet = { + val bs = new BitSet(count) + var cur = 0 + while (cur < count) { + bs += cur + cur += 1 + } + bs + } } diff --git a/core/jvm/src/main/scala/cats/parse/BitSet.scala b/core/jvm/src/main/scala/cats/parse/BitSet.scala index 476511bb..b84674cb 100644 --- a/core/jvm/src/main/scala/cats/parse/BitSet.scala +++ b/core/jvm/src/main/scala/cats/parse/BitSet.scala @@ -61,4 +61,10 @@ object BitSetUtil { bs.flatMap { case (m, bs) => toIter(m, bs) }.toSet } + + def bitSetForRange(count: Int): BitSet = { + val bs = new BitSet(count) + bs.flip(0, count) + bs + } } diff --git a/core/shared/src/main/scala/cats/parse/Parser.scala b/core/shared/src/main/scala/cats/parse/Parser.scala index eabb67cb..70656240 100644 --- a/core/shared/src/main/scala/cats/parse/Parser.scala +++ b/core/shared/src/main/scala/cats/parse/Parser.scala @@ -28,6 +28,7 @@ import cats.implicits._ import scala.collection.immutable.SortedSet import scala.collection.mutable.ListBuffer import java.util.Arrays +import scala.collection.immutable.NumericRange /** Parser0[A] attempts to extract an `A` value from the given input, potentially moving its offset * forward in the process. @@ -1533,16 +1534,22 @@ object Parser { /** An empty iterable is the same as fail */ def charIn(cs: Iterable[Char]): Parser[Char] = - if (cs.isEmpty) fail - else { - val ary = cs.toArray - Arrays.sort(ary) - rangesFor(ary) match { - case NonEmptyList((low, high), Nil) if low == Char.MinValue && high == Char.MaxValue => - anyChar - case notAnyChar => - Impl.CharIn(ary(0).toInt, BitSetUtil.bitSetFor(ary), notAnyChar) - } + cs match { + case _ if cs.isEmpty => fail + case Impl.CharsRange(Char.MinValue, Char.MaxValue) => + anyChar + case Impl.CharsRange(start, end) => + val bitSet = BitSetUtil.bitSetForRange(end.toInt - start.toInt + 1) + Impl.CharIn(start.toInt, bitSet, NonEmptyList.one(start -> end)) + case _ => + val ary = cs.toArray + Arrays.sort(ary) + rangesFor(ary) match { + case NonEmptyList((low, high), Nil) if low == Char.MinValue && high == Char.MaxValue => + anyChar + case notAnyChar => + Impl.CharIn(ary(0).toInt, BitSetUtil.bitSetFor(ary), notAnyChar) + } } /** Parse any single character in a set of characters as lower or upper case @@ -2988,6 +2995,10 @@ object Parser { a } } + object CharsRange { + def unapply(range: NumericRange.Inclusive[Char]): Option[(Char, Char)] = + if (range.step == 1) Some(range.start -> range.end) else None + } } } diff --git a/core/shared/src/test/scala/cats/parse/ParserTest.scala b/core/shared/src/test/scala/cats/parse/ParserTest.scala index 4454b088..8e3e77a8 100644 --- a/core/shared/src/test/scala/cats/parse/ParserTest.scala +++ b/core/shared/src/test/scala/cats/parse/ParserTest.scala @@ -1181,6 +1181,22 @@ class ParserTest extends munit.ScalaCheckSuite { } } + property("charIn range matches charIn list") { + forAll { (c1: Char, c2: Char) => + val start = c1.min(c2) + val end = c1.max(c2) + + val p1 = Parser.charIn(start to end) + val p2 = Parser.charIn((start to end).toList) + + assertEquals(p1, p2) + } + } + + property("charIn full range == anyChar") { + assertEquals(Parser.charIn(Char.MinValue to Char.MaxValue), Parser.anyChar) + } + property("Parser.end gives the right error") { forAll { (str: String) => Parser.end.parse(str) match {