Skip to content

Commit

Permalink
Merge pull request #271 from mklich/develop
Browse files Browse the repository at this point in the history
Default Fields of TextLine for Local and Hadoop don't match
  • Loading branch information
johnynek committed Jan 12, 2013
2 parents 78edf58 + e9128bb commit 4fa0735
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/main/scala/com/twitter/scalding/FileSource.scala
Expand Up @@ -157,7 +157,7 @@ class ScaldingMultiSourceTap(taps : Seq[Tap[JobConf, RecordReader[_,_], OutputCo
trait TextLineScheme extends Mappable[String] {
import Dsl._
override val converter = implicitly[TupleConverter[String]]
override def localScheme = new CLTextLine()
override def localScheme = new CLTextLine(new Fields("offset","line"), Fields.ALL)
override def hdfsScheme = HadoopSchemeInstance(new CHTextLine())
//In textline, 0 is the byte position, the actual text string is in column 1
override def sourceFields = Dsl.intFields(Seq(1))
Expand Down
6 changes: 3 additions & 3 deletions src/test/scala/com/twitter/scalding/CoreTest.scala
Expand Up @@ -725,15 +725,15 @@ class ForceReducersTest extends Specification with TupleConversions {
class ToListJob(args : Args) extends Job(args) {
TextLine(args("in")).read
.flatMap('line -> 'words){l : String => l.split(" ")}
.groupBy('num){ _.toList[String]('words -> 'wordList) }
.groupBy('offset){ _.toList[String]('words -> 'wordList) }
.map('wordList -> 'wordList){w : List[String] => w.mkString(" ")}
.project('num, 'wordList)
.project('offset, 'wordList)
.write(Tsv(args("out")))
}

class NullListJob(args : Args) extends Job(args) {
TextLine(args("in")).read
.groupBy('num){ _.toList[String]('line -> 'lineList).spillThreshold(100) }
.groupBy('offset){ _.toList[String]('line -> 'lineList).spillThreshold(100) }
.map('lineList -> 'lineList) { ll : List[String] => ll.mkString(" ") }
.write(Tsv(args("out")))
}
Expand Down

0 comments on commit 4fa0735

Please sign in to comment.