Skip to content

Commit

Permalink
Fixed reduce estimator for paths with a glob pattern
Browse files Browse the repository at this point in the history
  • Loading branch information
dieu committed Mar 2, 2017
1 parent 8bebb57 commit 1cbcac6
Showing 1 changed file with 12 additions and 1 deletion.
Expand Up @@ -54,12 +54,23 @@ object Common {
def unrollTaps(step: FlowStep[JobConf]): Seq[Tap[_, _, _]] =
unrollTaps(step.getSources.asScala.toSeq)

/**
* Get the total size of the input paths, which may contain a glob
* pattern in its path, so we must be ready to handle that case.
*/
def inputSizes(step: FlowStep[JobConf]): Seq[(Path, Long)] = {
val conf = step.getConfig

FileInputFormat
.getInputPaths(conf)
.map { path => path -> path.getFileSystem(conf).getContentSummary(path).getLength }
.map { path =>
val fs = path.getFileSystem(conf)
val size = fs.globStatus(path)
.map(status => fs.getContentSummary(status.getPath).getLength)
.sum

path -> size
}
}

def totalInputSize(step: FlowStep[JobConf]): Long = inputSizes(step).map(_._2).sum
Expand Down

0 comments on commit 1cbcac6

Please sign in to comment.