Skip to content

Commit

Permalink
Fast charIn for ranges like 'A' to 'Z'
Browse files Browse the repository at this point in the history
  • Loading branch information
Odomontois committed Jan 27, 2022
1 parent 3dda8ab commit 22288d9
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 10 deletions.
11 changes: 11 additions & 0 deletions core/js/src/main/scala/cats/parse/BitSet.scala
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,15 @@ object BitSetUtil {

bs.flatMap { case (m, bs) => toIter(m, bs) }.toSet
}

def bitSetForRange(from: Int, to: Int): BitSet = {
val count = to - from + 1
val bs = new BitSet(count)
var cur = 0
while (cur < count) {
bs += cur
cur += 1
}
bs
}
}
7 changes: 7 additions & 0 deletions core/jvm/src/main/scala/cats/parse/BitSet.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,11 @@ object BitSetUtil {

bs.flatMap { case (m, bs) => toIter(m, bs) }.toSet
}

def bitSetForRange(from: Int, to: Int): BitSet = {
val count = to - from + 1
val bs = new BitSet(count)
bs.flip(0, count - 1)
bs
}
}
27 changes: 17 additions & 10 deletions core/shared/src/main/scala/cats/parse/Parser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import cats.implicits._
import scala.collection.immutable.SortedSet
import scala.collection.mutable.ListBuffer
import java.util.Arrays
import scala.collection.immutable.NumericRange

/** Parser0[A] attempts to extract an `A` value from the given input, potentially moving its offset
* forward in the process.
Expand Down Expand Up @@ -1533,16 +1534,22 @@ object Parser {
/** An empty iterable is the same as fail
*/
def charIn(cs: Iterable[Char]): Parser[Char] =
if (cs.isEmpty) fail
else {
val ary = cs.toArray
Arrays.sort(ary)
rangesFor(ary) match {
case NonEmptyList((low, high), Nil) if low == Char.MinValue && high == Char.MaxValue =>
anyChar
case notAnyChar =>
Impl.CharIn(ary(0).toInt, BitSetUtil.bitSetFor(ary), notAnyChar)
}
cs match {
case _ if cs.isEmpty => fail
case range: NumericRange.Inclusive[Char] if range.step.toInt == 1 =>
val start = range.start.toInt
val end = range.end.toInt
val bitSet = BitSetUtil.bitSetForRange(start, end)
Impl.CharIn(start, bitSet, NonEmptyList.one(range.start -> range.end))
case _ =>
val ary = cs.toArray
Arrays.sort(ary)
rangesFor(ary) match {
case NonEmptyList((low, high), Nil) if low == Char.MinValue && high == Char.MaxValue =>
anyChar
case notAnyChar =>
Impl.CharIn(ary(0).toInt, BitSetUtil.bitSetFor(ary), notAnyChar)
}
}

/** Parse any single character in a set of characters as lower or upper case
Expand Down
12 changes: 12 additions & 0 deletions core/shared/src/test/scala/cats/parse/ParserTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1181,6 +1181,18 @@ class ParserTest extends munit.ScalaCheckSuite {
}
}

property("charIn range matches charIn list") {
forAll { (c1: Char, c2: Char, str: String) =>
val start = c1.min(c2)
val end = c2.max(c2)

val p1 = Parser.charIn(start to end)
val p2 = Parser.charIn((start to end).toList)

assertEquals(p1.parse(str), p2.parse(str))
}
}

property("Parser.end gives the right error") {
forAll { (str: String) =>
Parser.end.parse(str) match {
Expand Down

0 comments on commit 22288d9

Please sign in to comment.