Permalink
Browse files

Adds scalacheck tests for SizeHint

Put SizeHint into its own file.
  • Loading branch information...
johnynek committed Sep 1, 2012
1 parent fa98fb4 commit 170756c237dcde291cc628d1ff1dd7a4a1c30d8d
View
@@ -10,6 +10,12 @@ scalaVersion := "2.9.2"
resolvers += "Concurrent Maven Repo" at "http://conjars.org/repo"
+// Use ScalaCheck
+resolvers ++= Seq(
+ "snapshots" at "http://oss.sonatype.org/content/repositories/snapshots",
+ "releases" at "http://oss.sonatype.org/content/repositories/releases"
+)
+
//resolvers += "Twitter Artifactory" at "http://artifactory.local.twitter.com/libs-releases-local"
libraryDependencies += "cascading" % "cascading-core" % "2.0.2"
@@ -30,6 +36,11 @@ libraryDependencies += "commons-lang" % "commons-lang" % "2.4"
libraryDependencies += "org.scala-tools.testing" % "specs_2.8.1" % "1.6.6" % "test"
+libraryDependencies ++= Seq(
+ "org.scalacheck" %% "scalacheck" % "1.10.0" % "test",
+ "org.scala-tools.testing" % "specs_2.9.0-1" % "1.6.8" % "test"
+)
+
parallelExecution in Test := false
seq(assemblySettings: _*)
@@ -96,51 +96,6 @@ object Matrix {
}
}
-sealed abstract class SizeHint {
- def * (other : SizeHint) : SizeHint
- def + (other : SizeHint) : SizeHint
- def total : Option[Long]
- def setCols(cols : Long) : SizeHint
- def setRows(rows : Long) : SizeHint
- def setColsToRows : SizeHint
- def setRowsToCols : SizeHint
- def transpose : SizeHint
-}
-
-// If we have no idea, we still don't have any idea, this is like NaN
-case object NoClue extends SizeHint {
- def * (other : SizeHint) = NoClue
- def + (other : SizeHint) = NoClue
- def total = None
- def setCols(cols : Long) = FiniteHint(-1L, cols)
- def setRows(rows : Long) = FiniteHint(rows, -1L)
- def setColsToRows = NoClue
- def setRowsToCols = NoClue
- def transpose = NoClue
-}
-
-case class FiniteHint(rows : Long = -1L, cols : Long = -1L) extends SizeHint {
- def *(other : SizeHint) = {
- other match {
- case NoClue => NoClue
- case FiniteHint(orows, ocols) => FiniteHint(rows, ocols)
- }
- }
- def +(other : SizeHint) = {
- other match {
- case NoClue => NoClue
- // In this case, a hint on one side, will overwrite lack of knowledge (-1L)
- case FiniteHint(orows, ocols) => FiniteHint(scala.math.max(rows,orows), scala.math.max(cols,ocols))
- }
- }
- def total = if(rows >= 0 && cols >= 0) { Some(rows * cols) } else None
- def setCols(ncols : Long) = FiniteHint(rows, ncols)
- def setRows(nrows : Long) = FiniteHint(nrows, cols)
- def setColsToRows = FiniteHint(rows, rows)
- def setRowsToCols = FiniteHint(cols, cols)
- def transpose = FiniteHint(cols, rows)
-}
-
// The linear algebra objects (Matrix, *Vector, Scalar) wrap pipes and have some
// common properties. The main common pattern is the desire to write them to sources
// without needless duplication of code.
@@ -30,30 +30,6 @@ import cascading.tuple.Fields
import scala.math.Ordering
import scala.annotation.tailrec
-/** Allows us to sort matrices by approximate type
- */
-object SizeHintOrdering extends Ordering[SizeHint] with java.io.Serializable {
- def compare(left : SizeHint, right : SizeHint) : Int = {
- (left, right) match {
- case (NoClue, FiniteHint(_,_)) => 1
- case (FiniteHint(_,_),NoClue) => -1
- case (NoClue, NoClue) => 0
- // Both have a size:
- case _ => {
- if( left.total.isEmpty ) {
- 1
- }
- else if (right.total.isEmpty) {
- -1
- }
- else {
- left.total.get.compareTo(right.total.get)
- }
- }
- }
- }
-}
-
/** Abstracts the approach taken to join the two matrices
*/
abstract class MatrixJoiner extends java.io.Serializable {
@@ -0,0 +1,127 @@
+/*
+Copyright 2012 Twitter, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package com.twitter.scalding.mathematics
+
+object SizeHint {
+ implicit val ordering = SizeHintOrdering
+}
+
+sealed abstract class SizeHint {
+ def * (other : SizeHint) : SizeHint
+ def + (other : SizeHint) : SizeHint
+ def total : Option[Long]
+ def setCols(cols : Long) : SizeHint
+ def setRows(rows : Long) : SizeHint
+ def setColsToRows : SizeHint
+ def setRowsToCols : SizeHint
+ def transpose : SizeHint
+}
+
+// If we have no idea, we still don't have any idea, this is like NaN
+case object NoClue extends SizeHint {
+ def * (other : SizeHint) = NoClue
+ def + (other : SizeHint) = NoClue
+ def total = None
+ def setCols(cols : Long) = FiniteHint(-1L, cols)
+ def setRows(rows : Long) = FiniteHint(rows, -1L)
+ def setColsToRows = NoClue
+ def setRowsToCols = NoClue
+ def transpose = NoClue
+}
+
+case class FiniteHint(rows : Long = -1L, cols : Long = -1L) extends SizeHint {
+ def *(other : SizeHint) = {
+ other match {
+ case NoClue => NoClue
+ case FiniteHint(orows, ocols) => FiniteHint(rows, ocols)
+ case sp@SparseHint(_,_,_) => (SparseHint(1.0, rows, cols) * sp)
+ }
+ }
+ def +(other : SizeHint) = {
+ other match {
+ case NoClue => NoClue
+ // In this case, a hint on one side, will overwrite lack of knowledge (-1L)
+ case FiniteHint(orows, ocols) => FiniteHint(scala.math.max(rows,orows), scala.math.max(cols,ocols))
+ case sp@SparseHint(_,_,_) => (sp + this)
+ }
+ }
+ def total = if(rows >= 0 && cols >= 0) { Some(rows * cols) } else None
+ def setCols(ncols : Long) = FiniteHint(rows, ncols)
+ def setRows(nrows : Long) = FiniteHint(nrows, cols)
+ def setColsToRows = FiniteHint(rows, rows)
+ def setRowsToCols = FiniteHint(cols, cols)
+ def transpose = FiniteHint(cols, rows)
+}
+
+// sparsity is the fraction of the rows and columns that are expected to be present
+case class SparseHint(sparsity : Double, rows : Long, cols : Long) extends SizeHint {
+ def * (other : SizeHint) : SizeHint = {
+ other match {
+ case NoClue => NoClue
+ case FiniteHint(r, c) => (this * SparseHint(1.0, r, c))
+ case SparseHint(sp,r,c) => {
+ // if I occupy a bin with probability p, and you q, then both: pq
+ // There are cols samples of the, above, so the probability one is present:
+ // 1-(1-pq)^cols ~ (cols * p * q) min 1.0
+ val newSp = (cols * sp * sparsity)
+ if(newSp >= 1.0) {
+ FiniteHint(rows, c)
+ }
+ else {
+ SparseHint(newSp, rows, c)
+ }
+ }
+ }
+ }
+ def + (other : SizeHint) : SizeHint = {
+ other match {
+ case NoClue => NoClue
+ case FiniteHint(r, c) => (this + SparseHint(1.0, r, c))
+ case SparseHint(sp,r,c) => {
+ // if I occupy a bin with probability p, and you q, then either: p + q - pq
+ if ((sparsity == 1.0) || (sp == 1.0)) {
+ FiniteHint(rows max r, cols max c)
+ }
+ else {
+ val newSp = sparsity + sp - sp*sparsity
+ SparseHint(newSp, rows max r, cols max c)
+ }
+ }
+ }
+ }
+ def total : Option[Long] = {
+ if((rows >= 0) && (cols >= 0)) {
+ Some((rows * cols * sparsity).toLong)
+ }
+ else
+ None
+ }
+ def setCols(c : Long) : SizeHint = copy(cols = c)
+ def setRows(r : Long) : SizeHint = copy(rows = r)
+ def setColsToRows : SizeHint = copy(cols = rows)
+ def setRowsToCols : SizeHint = copy(rows = cols)
+ def transpose : SizeHint = copy(cols = rows, rows = cols)
+}
+
+/** Allows us to sort matrices by approximate type
+ */
+object SizeHintOrdering extends Ordering[SizeHint] with java.io.Serializable {
+ def compare(left : SizeHint, right : SizeHint) : Int = {
+ left.total.getOrElse(-1L)
+ .compareTo(java.lang.Long.valueOf(right.total.getOrElse(-1L)))
+ }
+}
+
@@ -0,0 +1,51 @@
+package com.twitter.scalding.mathematics
+
+import com.twitter.scalding._
+import org.specs._
+
+import org.scalacheck.Arbitrary
+import org.scalacheck.Arbitrary.arbitrary
+import org.scalacheck.Properties
+import org.scalacheck.Prop.forAll
+import org.scalacheck.Gen._
+
+object SizeHintProps extends Properties("SizeHint") {
+
+ val noClueGen = value(NoClue)
+
+ val finiteHintGen = for ( rows <- choose(-1L, 1000000L);
+ cols <- choose(-1L, 1000000L))
+ yield FiniteHint(rows, cols)
+
+ val sparseHintGen = for ( rows <- choose(-1L, 1000000L);
+ cols <- choose(-1L, 1000000L);
+ sparsity <- choose(0.0, 1.0))
+ yield SparseHint(sparsity, rows, cols)
+
+ implicit val finiteArb : Arbitrary[FiniteHint] = Arbitrary { finiteHintGen }
+ implicit val sparseArb : Arbitrary[SparseHint] = Arbitrary { sparseHintGen }
+ implicit val genHint : Arbitrary[SizeHint] = Arbitrary { oneOf(noClueGen, finiteHintGen, sparseHintGen) }
+
+ property("a+b is at least as big as a") = forAll { (a : SizeHint, b : SizeHint) =>
+ val addT = for( ta <- a.total; tsum <- (a+b).total) yield (tsum >= ta)
+ addT.getOrElse(true)
+ }
+
+ property("ordering makes sense") = forAll { (a : SizeHint, b : SizeHint) =>
+ (List(a,b).max.total.getOrElse(-1L) >= a.total.getOrElse(-1L))
+ }
+
+ property("addition increases sparsity fraction") = forAll { (a : SparseHint, b : SparseHint) =>
+ (a + b).asInstanceOf[SparseHint].sparsity >= a.sparsity
+ }
+
+ property("transpose preserves size") = forAll { (a : SizeHint) =>
+ a.transpose.total == a.total
+ }
+
+ property("squaring a finite hint preserves size") = forAll { (a : FiniteHint) =>
+ val sq = a.setRowsToCols
+ val sq2 = a.setColsToRows
+ (sq.total == (sq * sq).total) && (sq2.total == (sq2 * sq2).total)
+ }
+}

0 comments on commit 170756c

Please sign in to comment.