Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement (Almost) All The Unicode Caseless Matching Systems #232

Draft
wants to merge 11 commits into
base: series/2.x
Choose a base branch
from
5 changes: 4 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,15 @@ lazy val bench = project
.enablePlugins(JmhPlugin)
.settings(
name := "case-insensitive-bench",
libraryDependencies ++= List(
"org.scalacheck" %% "scalacheck" % scalacheckV
),
console / initialCommands := {
fullImports(List("cats", "cats.syntax.all", "org.typelevel.ci"), wildcardImport.value)
},
consoleQuick / initialCommands := ""
)
.dependsOn(core.jvm)
.dependsOn(core.jvm, testing.jvm)

lazy val docs = project
.in(file("site"))
Expand Down
88 changes: 54 additions & 34 deletions core/src/main/scala/org/typelevel/ci/CIString.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,54 +22,68 @@ import java.io.Serializable
import org.typelevel.ci.compat._
import scala.math.Ordered

/** A case-insensitive String.
/** A case insensitive representation of a `String`.
*
* Two CI strings are equal if and only if they are the same length, and each corresponding
* character is equal after calling either `toUpper` or `toLower`.
* There are several different ways to define a case insensitive match with Unicode. According to
* the Unicode standard, this is the "most correct" definition. If you are just looking for a case
* insensitive `String`, you should either use this or [[CanonicalFullCaseFoldedString]].
*
* Ordering is based on a string comparison after folding each character to uppercase and then back
* to lowercase.
* The only difference is whether or not you want to keep track of the original input `String`
* value. If you don't care about that, then [[CanonicalFullCaseFoldedString]] uses less memory and
* is likely ''slightly'' faster for most operations.
*
* All comparisons are insensitive to locales.
* {{{
* scala> CIString("ß")
* val res0: org.typelevel.ci.CIString = ß
*
* @param toString
* The original value the CI String was constructed with.
* scala> CanonicalFullCaseFoldedString("ß")
* val res1: org.typelevel.ci.CanonicalFullCaseFoldedString = ss
*
* scala> res0.asCanonicalFullCaseFoldedString == res1
* val res2: Boolean = true
*
* scala> res0.toString
* val res3: String = ß
*
* scala> res1.toString
* val res4: String = ss
*
* scala> res0.asCanonicalFullCaseFoldedString.toString
* val res5: String = ss
* }}}
*
* @see
* [[https://www.unicode.org/versions/Unicode14.0.0/ch03.pdf#G34145 Unicode Caseless Matching]]
*/
final class CIString private (override val toString: String)
extends Ordered[CIString]
with Serializable {

/** The [[CanonicalFullCaseFoldedString]] representation of this `String`.
*
* This is the input `String`, case folded using full Unicode case folding (without the Turkic
* rules), and normalized for Unicode canonical caseless matching.
*
* For any two given Unicode text value, they are considered canonically caseless equivalent to
* each other if they both result in this [[CanonicalFullCaseFoldedString]].
*/
lazy val asCanonicalFullCaseFoldedString: CanonicalFullCaseFoldedString =
CanonicalFullCaseFoldedString(this.toString)

override def equals(that: Any): Boolean =
that match {
case that: CIString =>
this.toString.equalsIgnoreCase(that.toString)
asCanonicalFullCaseFoldedString == that.asCanonicalFullCaseFoldedString
case _ => false
}

@transient private[this] var hash = 0
override def hashCode(): Int = {
if (hash == 0)
hash = calculateHash
hash
}

private[this] def calculateHash: Int = {
var h = 17
var i = 0
val len = toString.length
while (i < len) {
// Strings are equal igoring case if either their uppercase or lowercase
// forms are equal. Equality of one does not imply the other, so we need
// to go in both directions. A character is not guaranteed to make this
// round trip, but it doesn't matter as long as all equal characters
// hash the same.
h = h * 31 + toString.charAt(i).toUpper.toLower
i += 1
}
h
}
override def hashCode(): Int =
this.asCanonicalFullCaseFoldedString.hashCode

override def compare(that: CIString): Int =
this.toString.compareToIgnoreCase(that.toString)
Order[CanonicalFullCaseFoldedString].compare(
asCanonicalFullCaseFoldedString,
that.asCanonicalFullCaseFoldedString)

def transform(f: String => String): CIString = CIString(f(toString))

Expand All @@ -79,15 +93,21 @@ final class CIString private (override val toString: String)

def trim: CIString = transform(_.trim)

def length: Int = toString.length
@deprecated(
message =
"Please use asCanonicalFullCaseFoldedString.length or toString.length, depending on your use case, instead. CIString represents a Unicode canonical caseless string with full case folding. Full case folding can change the length (in terms of number of Char values) of a String. This makes length on CIString confusing to use because it is unclear which length this method refers to. As 1.3.0 it is defined to refer to the length of the full case folded representation of the String, since this will be the same for all input Strings.",
since = "1.3.0")
def length: Int = asCanonicalFullCaseFoldedString.toString.length

@deprecated("Use toString", "0.1.0")
def value: String = toString
}

@suppressUnusedImportWarningForCompat
object CIString {
def apply(value: String): CIString = new CIString(value)

def apply(value: String): CIString =
new CIString(value)

val empty = CIString("")

Expand Down
87 changes: 87 additions & 0 deletions core/src/main/scala/org/typelevel/ci/CIStringCS.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright 2020 Typelevel
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.typelevel.ci

import cats._
import cats.kernel._
import cats.syntax.all._

final class CIStringCS private (
override val toString: String,
val asCanonicalSimpleCaseFoldedString: CanonicalSimpleCaseFoldedString)
extends Serializable {

override def equals(that: Any): Boolean =
that match {
case that: CIStringCS =>
asCanonicalSimpleCaseFoldedString == that.asCanonicalSimpleCaseFoldedString
case _ =>
false
}

override def hashCode(): Int =
asCanonicalSimpleCaseFoldedString.hashCode
}

object CIStringCS {

def apply(value: String): CIStringCS =
new CIStringCS(
value,
CanonicalSimpleCaseFoldedString(value)
)

val empty: CIStringCS = apply("")

implicit val hashAndOrderForCIStringCS: Hash[CIStringCS] with Order[CIStringCS] =
new Hash[CIStringCS] with Order[CIStringCS] {
override def hash(x: CIStringCS): Int =
x.hashCode

override def compare(x: CIStringCS, y: CIStringCS): Int =
x.asCanonicalSimpleCaseFoldedString.compare(y.asCanonicalSimpleCaseFoldedString)
}

implicit val orderingForCIStringCS: Ordering[CIStringCS] =
hashAndOrderForCIStringCS.toOrdering

implicit val showForCIStringCS: Show[CIStringCS] =
Show.fromToString

implicit val lowerBoundForCIStringCS: LowerBounded[CIStringCS] =
new LowerBounded[CIStringCS] {
override val partialOrder: PartialOrder[CIStringCS] =
hashAndOrderForCIStringCS

override val minBound: CIStringCS =
empty
}

implicit val monoidForCIStringCS: Monoid[CIStringCS] =
new Monoid[CIStringCS] {
override val empty: CIStringCS = CIStringCS.empty

override def combine(x: CIStringCS, y: CIStringCS): CIStringCS =
CIStringCS(x.toString + y.toString)

override def combineAll(xs: IterableOnce[CIStringCS]): CIStringCS = {
val sb: StringBuilder = new StringBuilder
xs.iterator.foreach(cfs => sb.append(cfs.toString))
CIStringCS(sb.toString)
}
}
}
87 changes: 87 additions & 0 deletions core/src/main/scala/org/typelevel/ci/CIStringS.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright 2020 Typelevel
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.typelevel.ci

import cats._
import cats.kernel._
import cats.syntax.all._

final class CIStringS private (
override val toString: String,
val asSimpleCaseFoldedString: SimpleCaseFoldedString)
extends Serializable {

override def equals(that: Any): Boolean =
that match {
case that: CIStringS =>
asSimpleCaseFoldedString == that.asSimpleCaseFoldedString
case _ =>
false
}

override def hashCode(): Int =
asSimpleCaseFoldedString.hashCode
}

object CIStringS {

def apply(value: String): CIStringS =
new CIStringS(
value,
SimpleCaseFoldedString(value)
)

val empty: CIStringS = apply("")

implicit val hashAndOrderForCIStringS: Hash[CIStringS] with Order[CIStringS] =
new Hash[CIStringS] with Order[CIStringS] {
override def hash(x: CIStringS): Int =
x.hashCode

override def compare(x: CIStringS, y: CIStringS): Int =
x.asSimpleCaseFoldedString.compare(y.asSimpleCaseFoldedString)
}

implicit val orderingForCIStringS: Ordering[CIStringS] =
hashAndOrderForCIStringS.toOrdering

implicit val showForCIStringS: Show[CIStringS] =
Show.fromToString

implicit val lowerBoundForCIStringS: LowerBounded[CIStringS] =
new LowerBounded[CIStringS] {
override val partialOrder: PartialOrder[CIStringS] =
hashAndOrderForCIStringS

override val minBound: CIStringS =
empty
}

implicit val monoidForCIStringS: Monoid[CIStringS] =
new Monoid[CIStringS] {
override val empty: CIStringS = CIStringS.empty

override def combine(x: CIStringS, y: CIStringS): CIStringS =
CIStringS(x.toString + y.toString)

override def combineAll(xs: IterableOnce[CIStringS]): CIStringS = {
val sb: StringBuilder = new StringBuilder
xs.iterator.foreach(cfs => sb.append(cfs.toString))
CIStringS(sb.toString)
}
}
}
Loading