Skip to content

Commit

Permalink
Merge branch 'develop' into rubanm/generic_lzo_scheme
Browse files Browse the repository at this point in the history
  • Loading branch information
rubanm committed May 15, 2015
2 parents 925291c + 20fb806 commit 92cd15e
Show file tree
Hide file tree
Showing 58 changed files with 1,966 additions and 299 deletions.
26 changes: 13 additions & 13 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,47 +7,47 @@ script:
matrix:
include:
#BASE TESTS
- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-args scalding-date"
script: "scripts/run_test.sh"

- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-args scalding-date"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-avro scalding-hraven scalding-commons"
script: "scripts/run_test.sh"

- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-avro scalding-hraven scalding-commons"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-core"
script: "scripts/run_test.sh"

- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-core"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-hadoop-test"
script: "scripts/run_test.sh"

- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-hadoop-test"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-jdbc scalding-json"
script: "scripts/run_test.sh"

- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-jdbc scalding-json"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-macros"
script: "scripts/run_test.sh"

Expand All @@ -56,27 +56,27 @@ matrix:
script: "scripts/run_test.sh"

# not committed yet
# - scala: 2.10.4
# - scala: 2.10.5
# env: BUILD="base" TEST_TARGET="scalding-commons-macros"
# script: "scripts/run_test.sh"

# - scala: 2.11.5
# env: BUILD="base" TEST_TARGET="scalding-commons-macros"
# script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-parquet scalding-parquet-scrooge"
script: "scripts/run_test.sh"

- scala: 2.11.5
env: BUILD="base" TEST_TARGET="scalding-parquet scalding-parquet-scrooge"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="base" TEST_TARGET="scalding-repl"
script: "scripts/run_test.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="test tutorials"
script:
- "scripts/build_assembly_no_test.sh scalding"
Expand All @@ -88,7 +88,7 @@ matrix:
- "scripts/build_assembly_no_test.sh scalding"
- "scripts/test_tutorials.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="test matrix tutorials"
script:
- "scripts/build_assembly_no_test.sh scalding"
Expand All @@ -100,7 +100,7 @@ matrix:
- "scripts/build_assembly_no_test.sh scalding"
- "scripts/test_matrix_tutorials.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="test repl and typed tutorials"
script:
- "scripts/build_assembly_no_test.sh scalding-repl"
Expand All @@ -114,7 +114,7 @@ matrix:
- "scripts/build_assembly_no_test.sh scalding-core"
- "scripts/test_typed_tutorials.sh"

- scala: 2.10.4
- scala: 2.10.5
env: BUILD="test execution tutorials"
script:
- "scripts/build_assembly_no_test.sh execution-tutorial"
Expand Down
46 changes: 46 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,51 @@
# Scalding #

### Version 0.14.0 ###
* add .unit to Execution object #1189
* Override hashCode for Args #1190
* Put a value in a exception message #1191
* Add an exclusiveUpper method to DateRange #1194
* Covert LzoTextDelimited to Cascading scheme. #1179
* Remove Travis IRC notifications #1200
* add LookupJoin and LookupJoinTest changes from summingbird #1199
* Add a new ExecutionApp tutorial #1196
* Move main simple example to be the typed API, and put the .'s at the sta... #1193
* Add Execution.withArgs #1205
* Config/Cascading updater #1197
* Remove algebird serializers #1206
* remove warnings in CumulativeSum #1215
* Implicit execution context / easier switching between modes #1113
* add row l1 normalize #1214
* provide Args as an implicit val #1219
* call sourceConfInit when reading from taps in local mode #1228
* Add distinctCount and distinctValues helper methods to KeyedList. #1232
* import hygiene: remove unused imports and remove JavaConversions use #1239
* Swap hash and filename for filename-extension-sensitive code #1243
* Remove more unused imports #1240
* Provide useHdfsLocalMode for an easy switch to mapreduce local mode #1244
* upgrade scalacheck and scalatest #1246
* Optimize string and (hopefully) number comparisons a bit #1241
* Note the active FlowProcess for Joiners #1235
* Make sure Executions are executed at most once #1253
* Fix Config.getUniqueIDs #1254
* Add MustHasReducers trait. #1252
* Make sure the EvalCache thread isDaemon #1255
* Use non-regex split function #1251
* make InputSizeReducerEstimator work for any CompositeTap #1256
* TimePathedSource helper methods #1257
* Fix for reducer estimation not working correctly if withReducers is set to 1 reducer #1263
* Add make(dest) to TypedPipe #1217
* Fix SimpleDateFormat caching by default #1265
* upgrade sbt and sbt launcher script #1270
* Add TypedPipeDiff for comparing typed pipes #1266
* Change separator from \1 to \u0001 #1271
* Disable reducer estimation for map-only steps #1276
* Local sources support multiple paths #1275
* fix the spelling of the cumulativeSumTest file #1281
* Hydrate both sides of sampledCounts in skewJoinWithSmaller #1278
* Bijection 0.8.0, algebird 0.10.0, chill 0.6.0, scala 2.10.5 #1287
* Remove some deprecated items #1288

### Version 0.13.1 ###
* Back out 4 changes to be binary compatible: https://github.com/twitter/scalding/pull/1187
* Use java.util.Random instead of scala.util.Random: https://github.com/twitter/scalding/pull/1186
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Scalding is a Scala library that makes it easy to specify Hadoop MapReduce jobs.

![Scalding Logo](https://raw.github.com/twitter/scalding/develop/logo/scalding.png)

Current version: `0.13.1`
Current version: `0.14.0`

## Word Count

Expand Down
44 changes: 30 additions & 14 deletions project/Build.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,23 @@ object ScaldingBuild extends Build {
}
def isScala210x(scalaVersion: String) = scalaBinaryVersion(scalaVersion) == "2.10"

val algebirdVersion = "0.9.0"
val algebirdVersion = "0.10.0"
val avroVersion = "1.7.4"
val bijectionVersion = "0.7.2"
val bijectionVersion = "0.8.0"
val cascadingAvroVersion = "2.1.2"
val chillVersion = "0.5.2"
val chillVersion = "0.6.0"
val dfsDatastoresVersion = "1.3.4"
val elephantbirdVersion = "4.7"
val elephantbirdVersion = "4.8"
val hadoopLzoVersion = "0.4.16"
val hadoopVersion = "1.2.1"
val hbaseVersion = "0.94.10"
val hravenVersion = "0.9.13"
val jacksonVersion = "2.4.2"
val json4SVersion = "3.2.11"
val paradiseVersion = "2.0.1"
val parquetVersion = "1.6.0rc4"
val protobufVersion = "2.4.1"
val quasiquotesVersion = "2.0.1"
val scalaCheckVersion = "1.12.2"
val scalaTestVersion = "2.2.4"
val scalameterVersion = "0.6"
Expand All @@ -47,9 +49,9 @@ object ScaldingBuild extends Build {
val sharedSettings = Project.defaultSettings ++ assemblySettings ++ scalariformSettings ++ Seq(
organization := "com.twitter",

scalaVersion := "2.10.4",
scalaVersion := "2.10.5",

crossScalaVersions := Seq("2.10.4", "2.11.5"),
crossScalaVersions := Seq("2.10.5", "2.11.5"),

ScalariformKeys.preferences := formattingPreferences,

Expand Down Expand Up @@ -224,7 +226,7 @@ object ScaldingBuild extends Build {
Some(subProj)
.filterNot(unreleasedModules.contains(_))
.map {
s => "com.twitter" % ("scalding-" + s + "_2.10") % "0.13.0"
s => "com.twitter" % ("scalding-" + s + "_2.10") % "0.14.0"
}

def module(name: String) = {
Expand Down Expand Up @@ -270,7 +272,7 @@ object ScaldingBuild extends Build {
"org.slf4j" % "slf4j-api" % slf4jVersion,
"org.slf4j" % "slf4j-log4j12" % slf4jVersion % "provided"
)
).dependsOn(scaldingArgs, scaldingDate, maple)
).dependsOn(scaldingArgs, scaldingDate, scaldingSerialization, maple)

lazy val scaldingCommons = module("commons").settings(
libraryDependencies ++= Seq(
Expand Down Expand Up @@ -303,17 +305,20 @@ object ScaldingBuild extends Build {
).dependsOn(scaldingCore)

lazy val scaldingParquet = module("parquet").settings(
libraryDependencies ++= Seq(
libraryDependencies <++= (scalaVersion) { scalaVersion => Seq(
// see https://issues.apache.org/jira/browse/PARQUET-143 for exclusions
"com.twitter" % "parquet-cascading" % parquetVersion
exclude("com.twitter", "parquet-pig")
exclude("com.twitter.elephantbird", "elephant-bird-pig")
exclude("com.twitter.elephantbird", "elephant-bird-core"),
"org.apache.thrift" % "libthrift" % "0.7.0",
"org.slf4j" % "slf4j-api" % slf4jVersion,
"org.apache.hadoop" % "hadoop-core" % hadoopVersion % "provided"
)
).dependsOn(scaldingCore)
"org.apache.hadoop" % "hadoop-core" % hadoopVersion % "provided",
"org.scala-lang" % "scala-reflect" % scalaVersion,
"com.twitter" %% "bijection-macros" % bijectionVersion
) ++ (if(isScala210x(scalaVersion)) Seq("org.scalamacros" %% "quasiquotes" % quasiquotesVersion) else Seq())
}, addCompilerPlugin("org.scalamacros" % "paradise" % paradiseVersion cross CrossVersion.full))
.dependsOn(scaldingCore, scaldingHadoopTest)

def scaldingParquetScroogeDeps(version: String) = {
if (isScala210x(version))
Expand Down Expand Up @@ -387,6 +392,17 @@ object ScaldingBuild extends Build {
run <<= (run in Unprovided)
)

// zero dependency serialization module
lazy val scaldingSerialization = module("serialization")
lazy val scaldingSerializationMacros = module("serialization-macros").settings(
libraryDependencies <++= (scalaVersion) { scalaVersion => Seq(
"org.scala-lang" % "scala-library" % scalaVersion,
"org.scala-lang" % "scala-reflect" % scalaVersion
) ++ (if(isScala210x(scalaVersion)) Seq("org.scalamacros" %% "quasiquotes" % "2.0.1") else Seq())
},
addCompilerPlugin("org.scalamacros" % "paradise" % "2.0.1" cross CrossVersion.full)
).dependsOn(scaldingSerialization)

lazy val scaldingJson = module("json").settings(
libraryDependencies <++= (scalaVersion) { scalaVersion => Seq(
"org.apache.hadoop" % "hadoop-core" % hadoopVersion % "provided",
Expand Down Expand Up @@ -423,9 +439,9 @@ object ScaldingBuild extends Build {
"org.scala-lang" % "scala-library" % scalaVersion,
"org.scala-lang" % "scala-reflect" % scalaVersion,
"com.twitter" %% "bijection-macros" % bijectionVersion
) ++ (if(isScala210x(scalaVersion)) Seq("org.scalamacros" %% "quasiquotes" % "2.0.1") else Seq())
) ++ (if(isScala210x(scalaVersion)) Seq("org.scalamacros" %% "quasiquotes" % quasiquotesVersion) else Seq())
},
addCompilerPlugin("org.scalamacros" % "paradise" % "2.0.1" cross CrossVersion.full)
addCompilerPlugin("org.scalamacros" % "paradise" % paradiseVersion cross CrossVersion.full)
).dependsOn(scaldingCore, scaldingHadoopTest)

// This one uses a different naming convention
Expand Down
2 changes: 1 addition & 1 deletion scalding-core/src/main/scala/com/twitter/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ package object scalding {
/**
* Make sure this is in sync with version.sbt
*/
val scaldingVersion: String = "0.13.1"
val scaldingVersion: String = "0.14.0"

object RichPathFilter {
implicit def toRichPathFilter(f: PathFilter) = new RichPathFilter(f)
Expand Down
34 changes: 0 additions & 34 deletions scalding-core/src/main/scala/com/twitter/scalding/Job.scala
Original file line number Diff line number Diff line change
Expand Up @@ -470,40 +470,6 @@ abstract class ExecutionJob[+T](args: Args) extends Job(args) {
}
}

/*
* this allows you to use ExecutionContext style, but wrap it in a job
* val ecFn = { (implicit ec: ExecutionContext) =>
* // do stuff here
* };
* class MyClass(args: Args) extends ExecutionContextJob(args) {
* def job = ecFn
* }
* Now you can run it with Tool as a standard Job-framework style.
* Only use this if you have an existing ExecutionContext style function
* you want to run as a Job
*/
@deprecated("Use ExecutionJob", "2014-07-29")
abstract class ExecutionContextJob[+T](args: Args) extends Job(args) {
/**
* This can be assigned from a Function1:
* def job = (ectxJob: (ExecutionContext => T))
*/
def job: Reader[ExecutionContext, T]
/**
* This is the result of calling the job on the context for this job
* you should NOT call this in the job Reader (or reference this class at all
* in reader
*/
@transient final lazy val result: Try[T] = ec.map(job(_)) // mutate the flowDef with the job

private[this] final def ec: Try[ExecutionContext] =
Config.tryFrom(config).map { conf => ExecutionContext.newContext(conf)(flowDef, mode) }

override def buildFlow: Flow[_] = {
val forcedResult = result.get // make sure we have applied job once
super.buildFlow
}
}

/*
* Run a list of shell commands through bash in the given order. Return success
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,6 @@ trait ReduceOperations[+Self <: ReduceOperations[Self]] extends java.io.Serializ
hyperLogLogMap[T, HLL](f, errPercent) { hll => hll }
}

@deprecated("use of approximateUniqueCount is preferred.", "0.8.3")
def approxUniques(f: (Fields, Fields), errPercent: Double = 1.0) = {
// Legacy (pre-bijection) approximate unique count that uses in.toString.getBytes to
// obtain a long hash code. We specify the kludgy CTuple => Array[Byte] bijection
// explicitly.
implicit def kludgeHasher(in: CTuple) = in.toString.getBytes("UTF-8")
hyperLogLogMap[CTuple, Double](f, errPercent) { _.estimatedSize }
}

private[this] def hyperLogLogMap[T <% Array[Byte]: TupleConverter, U: TupleSetter](f: (Fields, Fields), errPercent: Double = 1.0)(fn: HLL => U) = {
//bits = log(m) == 2 *log(104/errPercent) = 2log(104) - 2*log(errPercent)
def log2(x: Double) = scala.math.log(x) / scala.math.log(2.0)
Expand Down Expand Up @@ -320,17 +311,6 @@ trait ReduceOperations[+Self <: ReduceOperations[Self]] extends java.io.Serializ
def sum[T](fs: Symbol*)(implicit sg: Semigroup[T], tconv: TupleConverter[T], tset: TupleSetter[T]): Self =
sum[T](fs -> fs)(sg, tconv, tset)

@deprecated("Use sum", "0.9.0")
def plus[T](fd: (Fields, Fields))(implicit sg: Semigroup[T], tconv: TupleConverter[T], tset: TupleSetter[T]): Self =
sum[T](fd)(sg, tconv, tset)
/**
* The same as `plus(fs -> fs)`
* Assumed to be a commutative operation. If you don't want that, use .forceToReducers
*/
@deprecated("Use sum", "0.9.0")
def plus[T](fs: Symbol*)(implicit sg: Semigroup[T], tconv: TupleConverter[T], tset: TupleSetter[T]): Self =
sum[T](fs -> fs)(sg, tconv, tset)

/**
* Returns the product of all the items in this grouping
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class KryoHadoop(config: Config) extends KryoInstantiator {
newK.register(classOf[com.twitter.algebird.HyperLogLogMonoid], new HLLMonoidSerializer)
newK.register(classOf[com.twitter.algebird.Moments], new MomentsSerializer)
newK.addDefaultSerializer(classOf[com.twitter.algebird.HLL], new HLLSerializer)
// Don't serialize Boxed instances using Kryo.
newK.addDefaultSerializer(classOf[com.twitter.scalding.serialization.Boxed[_]], new ThrowingSerializer)
/**
* AdaptiveVector is IndexedSeq, which picks up the chill IndexedSeq serializer
* (which is its own bug), force using the fields serializer here
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@ import com.esotericsoftware.kryo.io.{ Input, Output }

import com.twitter.scalding._

/**
* This is a runtime check for types we should never be serializing
*/
class ThrowingSerializer[T] extends KSerializer[T] {
override def write(kryo: Kryo, output: Output, t: T) {
sys.error(s"Kryo should never be used to serialize an instance: $t")
}
override def read(kryo: Kryo, input: Input, t: Class[T]): T =
sys.error("Kryo should never be used to serialize an instance, class: $t")
}

/**
* *
* Below are some serializers for objects in the scalding project.
Expand Down
Loading

0 comments on commit 92cd15e

Please sign in to comment.