diff --git a/src/main/scala/com/twitter/scalding/FileSource.scala b/src/main/scala/com/twitter/scalding/FileSource.scala index 0fe8fcc2c3..3978b8ae18 100644 --- a/src/main/scala/com/twitter/scalding/FileSource.scala +++ b/src/main/scala/com/twitter/scalding/FileSource.scala @@ -157,7 +157,7 @@ class ScaldingMultiSourceTap(taps : Seq[Tap[JobConf, RecordReader[_,_], OutputCo trait TextLineScheme extends Mappable[String] { import Dsl._ override val converter = implicitly[TupleConverter[String]] - override def localScheme = new CLTextLine() + override def localScheme = new CLTextLine(new Fields("offset","line"), Fields.ALL) override def hdfsScheme = HadoopSchemeInstance(new CHTextLine()) //In textline, 0 is the byte position, the actual text string is in column 1 override def sourceFields = Dsl.intFields(Seq(1)) diff --git a/src/test/scala/com/twitter/scalding/CoreTest.scala b/src/test/scala/com/twitter/scalding/CoreTest.scala index 9850b8b6b9..57934ef49e 100644 --- a/src/test/scala/com/twitter/scalding/CoreTest.scala +++ b/src/test/scala/com/twitter/scalding/CoreTest.scala @@ -725,15 +725,15 @@ class ForceReducersTest extends Specification with TupleConversions { class ToListJob(args : Args) extends Job(args) { TextLine(args("in")).read .flatMap('line -> 'words){l : String => l.split(" ")} - .groupBy('num){ _.toList[String]('words -> 'wordList) } + .groupBy('offset){ _.toList[String]('words -> 'wordList) } .map('wordList -> 'wordList){w : List[String] => w.mkString(" ")} - .project('num, 'wordList) + .project('offset, 'wordList) .write(Tsv(args("out"))) } class NullListJob(args : Args) extends Job(args) { TextLine(args("in")).read - .groupBy('num){ _.toList[String]('line -> 'lineList).spillThreshold(100) } + .groupBy('offset){ _.toList[String]('line -> 'lineList).spillThreshold(100) } .map('lineList -> 'lineList) { ll : List[String] => ll.mkString(" ") } .write(Tsv(args("out"))) }