Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Read index improvement #37

Merged
merged 2 commits into from

2 participants

@rangadi
Collaborator

Dmitriy noticed we were invoking extra FileSystem calls before reading lzo index. Each of those is an extra RPC to to NameNode and slows down job initialization.

Also fixed LDFLAGS for mac.

@dvryaboy dvryaboy merged commit c3e17d2 into twitter:master
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
Showing with 27 additions and 8 deletions.
  1. +6 −1 build.xml
  2. +21 −7 src/java/com/hadoop/compression/lzo/LzoIndex.java
View
7 build.xml
@@ -113,6 +113,11 @@
<property name="ivy.publish.pattern" value="hadoop-[revision]-core.[ext]"/>
<property name="jar.name" location="${build.dir}/${final.name}.jar" />
+ <!-- set -no-as-needed for gnu ld -->
+ <condition property="native.ldflags" value="" else="-Wl,--no-as-needed">
+ <os family="mac"/>
+ </condition>
+
<!-- the normal classpath -->
<path id="classpath">
<pathelement location="${build.classes}"/>
@@ -247,7 +252,7 @@
<exec dir="${build.native}" executable="sh" failonerror="true">
<env key="OS_NAME" value="${os.name}"/>
<env key="OS_ARCH" value="${os.arch}"/>
- <env key="LDFLAGS" value="-Wl,--no-as-needed"/>
+ <env key="LDFLAGS" value="${native.ldflags}"/>
<env key="JVM_DATA_MODEL" value="${sun.arch.data.model}"/>
<env key="NATIVE_SRCDIR" value="${native.src.dir}"/>
<arg line="${native.src.dir}/configure"/>
View
28 src/java/com/hadoop/compression/lzo/LzoIndex.java
@@ -20,7 +20,9 @@
import java.io.EOFException;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.List;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
@@ -166,17 +168,29 @@ public static LzoIndex readIndex(FileSystem fs, Path lzoFile) throws IOException
FSDataInputStream indexIn = null;
try {
Path indexFile = lzoFile.suffix(LZO_INDEX_SUFFIX);
- if (!fs.exists(indexFile)) {
+
+ try {
+ indexIn = fs.open(indexFile);
+ } catch (IOException fileNotFound) {
// return empty index, fall back to the unsplittable mode
return new LzoIndex();
}
- long indexLen = fs.getFileStatus(indexFile).getLen();
- int blocks = (int) (indexLen / 8);
- LzoIndex index = new LzoIndex(blocks);
- indexIn = fs.open(indexFile);
- for (int i = 0; i < blocks; i++) {
- index.set(i, indexIn.readLong());
+ int capacity = 16 * 1024; //number of 256KB lzo blocks in a 4GB file
+ List<Long> blocks = new ArrayList<Long>(capacity);
+
+ // read until EOF
+ while (true) {
+ try {
+ blocks.add(indexIn.readLong());
+ } catch (EOFException e) {
+ break;
+ }
+ }
+
+ LzoIndex index = new LzoIndex(blocks.size());
+ for (int i = 0; i < blocks.size(); i++) {
+ index.set(i, blocks.get(i));
}
return index;
} finally {
Something went wrong with that request. Please try again.