Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

readIndex : avoid creating Long objects for each lzo block #39

Merged
merged 1 commit into from

2 participants

@rangadi
Collaborator

This is a follow up to #37 . reads index more efficiently. Avoids creating Long objects.

@dvryaboy dvryaboy merged commit d01a197 into from
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
Showing with 21 additions and 28 deletions.
  1. +21 −28 src/java/com/hadoop/compression/lzo/LzoIndex.java
View
49 src/java/com/hadoop/compression/lzo/LzoIndex.java
@@ -20,9 +20,8 @@
import java.io.EOFException;
import java.io.IOException;
-import java.util.ArrayList;
+import java.nio.ByteBuffer;
import java.util.Arrays;
-import java.util.List;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
@@ -30,6 +29,8 @@
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
@@ -166,38 +167,30 @@ public long alignSliceEndToIndex(long end, long fileSize) {
*/
public static LzoIndex readIndex(FileSystem fs, Path lzoFile) throws IOException {
FSDataInputStream indexIn = null;
+ Path indexFile = lzoFile.suffix(LZO_INDEX_SUFFIX);
+
try {
- Path indexFile = lzoFile.suffix(LZO_INDEX_SUFFIX);
+ indexIn = fs.open(indexFile);
+ } catch (IOException fileNotFound) {
+ // return empty index, fall back to the unsplittable mode
+ return new LzoIndex();
+ }
- try {
- indexIn = fs.open(indexFile);
- } catch (IOException fileNotFound) {
- // return empty index, fall back to the unsplittable mode
- return new LzoIndex();
- }
+ int capacity = 16 * 1024 * 8; //size for a 4GB file (with 256KB lzo blocks)
+ DataOutputBuffer bytes = new DataOutputBuffer(capacity);
- int capacity = 16 * 1024; //number of 256KB lzo blocks in a 4GB file
- List<Long> blocks = new ArrayList<Long>(capacity);
+ // copy indexIn and close it
+ IOUtils.copyBytes(indexIn, bytes, 4*1024, true);
- // read until EOF
- while (true) {
- try {
- blocks.add(indexIn.readLong());
- } catch (EOFException e) {
- break;
- }
- }
+ ByteBuffer bytesIn = ByteBuffer.wrap(bytes.getData(), 0, bytes.getLength());
+ int blocks = bytesIn.remaining()/8;
+ LzoIndex index = new LzoIndex(blocks);
- LzoIndex index = new LzoIndex(blocks.size());
- for (int i = 0; i < blocks.size(); i++) {
- index.set(i, blocks.get(i));
- }
- return index;
- } finally {
- if (indexIn != null) {
- indexIn.close();
- }
+ for (int i = 0; i < blocks; i++) {
+ index.set(i, bytesIn.getLong());
}
+
+ return index;
}
/**
Something went wrong with that request. Please try again.