Skip to content

Commit

Permalink
Fast charIn for ranges like 'A' to 'Z' (#360)
Browse files Browse the repository at this point in the history
  • Loading branch information
Odomontois committed Feb 10, 2022
1 parent 82ef549 commit 371bd61
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 10 deletions.
10 changes: 10 additions & 0 deletions core/js/src/main/scala/cats/parse/BitSet.scala
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,14 @@ object BitSetUtil {

bs.flatMap { case (m, bs) => toIter(m, bs) }.toSet
}

def bitSetForRange(count: Int): BitSet = {
val bs = new BitSet(count)
var cur = 0
while (cur < count) {
bs += cur
cur += 1
}
bs
}
}
6 changes: 6 additions & 0 deletions core/jvm/src/main/scala/cats/parse/BitSet.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,10 @@ object BitSetUtil {

bs.flatMap { case (m, bs) => toIter(m, bs) }.toSet
}

def bitSetForRange(count: Int): BitSet = {
val bs = new BitSet(count)
bs.flip(0, count)
bs
}
}
31 changes: 21 additions & 10 deletions core/shared/src/main/scala/cats/parse/Parser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import cats.implicits._
import scala.collection.immutable.SortedSet
import scala.collection.mutable.ListBuffer
import java.util.Arrays
import scala.collection.immutable.NumericRange

/** Parser0[A] attempts to extract an `A` value from the given input, potentially moving its offset
* forward in the process.
Expand Down Expand Up @@ -1533,16 +1534,22 @@ object Parser {
/** An empty iterable is the same as fail
*/
def charIn(cs: Iterable[Char]): Parser[Char] =
if (cs.isEmpty) fail
else {
val ary = cs.toArray
Arrays.sort(ary)
rangesFor(ary) match {
case NonEmptyList((low, high), Nil) if low == Char.MinValue && high == Char.MaxValue =>
anyChar
case notAnyChar =>
Impl.CharIn(ary(0).toInt, BitSetUtil.bitSetFor(ary), notAnyChar)
}
cs match {
case _ if cs.isEmpty => fail
case Impl.CharsRange(Char.MinValue, Char.MaxValue) =>
anyChar
case Impl.CharsRange(start, end) =>
val bitSet = BitSetUtil.bitSetForRange(end.toInt - start.toInt + 1)
Impl.CharIn(start.toInt, bitSet, NonEmptyList.one(start -> end))
case _ =>
val ary = cs.toArray
Arrays.sort(ary)
rangesFor(ary) match {
case NonEmptyList((low, high), Nil) if low == Char.MinValue && high == Char.MaxValue =>
anyChar
case notAnyChar =>
Impl.CharIn(ary(0).toInt, BitSetUtil.bitSetFor(ary), notAnyChar)
}
}

/** Parse any single character in a set of characters as lower or upper case
Expand Down Expand Up @@ -2988,6 +2995,10 @@ object Parser {
a
}
}
object CharsRange {
def unapply(range: NumericRange.Inclusive[Char]): Option[(Char, Char)] =
if (range.step == 1) Some(range.start -> range.end) else None
}
}
}

Expand Down
16 changes: 16 additions & 0 deletions core/shared/src/test/scala/cats/parse/ParserTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1181,6 +1181,22 @@ class ParserTest extends munit.ScalaCheckSuite {
}
}

property("charIn range matches charIn list") {
forAll { (c1: Char, c2: Char) =>
val start = c1.min(c2)
val end = c1.max(c2)

val p1 = Parser.charIn(start to end)
val p2 = Parser.charIn((start to end).toList)

assertEquals(p1, p2)
}
}

property("charIn full range == anyChar") {
assertEquals(Parser.charIn(Char.MinValue to Char.MaxValue), Parser.anyChar)
}

property("Parser.end gives the right error") {
forAll { (str: String) =>
Parser.end.parse(str) match {
Expand Down

0 comments on commit 371bd61

Please sign in to comment.