Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

readIndex : avoid 2 extra RPCs to HDFS for each file.

  • Loading branch information...
commit 8546ace436d342bc3dd99cb1c76a24a2a3f8db38 1 parent eecf59a
@rangadi rangadi authored
Showing with 21 additions and 7 deletions.
  1. +21 −7 src/java/com/hadoop/compression/lzo/LzoIndex.java
View
28 src/java/com/hadoop/compression/lzo/LzoIndex.java
@@ -20,7 +20,9 @@
import java.io.EOFException;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.List;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
@@ -166,17 +168,29 @@ public static LzoIndex readIndex(FileSystem fs, Path lzoFile) throws IOException
FSDataInputStream indexIn = null;
try {
Path indexFile = lzoFile.suffix(LZO_INDEX_SUFFIX);
- if (!fs.exists(indexFile)) {
+
+ try {
+ indexIn = fs.open(indexFile);
+ } catch (IOException fileNotFound) {
// return empty index, fall back to the unsplittable mode
return new LzoIndex();
}
- long indexLen = fs.getFileStatus(indexFile).getLen();
- int blocks = (int) (indexLen / 8);
- LzoIndex index = new LzoIndex(blocks);
- indexIn = fs.open(indexFile);
- for (int i = 0; i < blocks; i++) {
- index.set(i, indexIn.readLong());
+ int capacity = 16 * 1024; //number of 256KB lzo blocks in a 4GB file
+ List<Long> blocks = new ArrayList<Long>(capacity);
+
+ // read until EOF
+ while (true) {
+ try {
+ blocks.add(indexIn.readLong());
+ } catch (EOFException e) {
+ break;
+ }
+ }
+
+ LzoIndex index = new LzoIndex(blocks.size());
+ for (int i = 0; i < blocks.size(); i++) {
+ index.set(i, blocks.get(i));
}
return index;
} finally {
Please sign in to comment.
Something went wrong with that request. Please try again.