Permalink
Browse files

Merge remote branch 'kw/master'

  • Loading branch information...
2 parents 47d4714 + 3d19b14 commit ceb643f15057b0b5c2664233db33062bf49cb8c6 @toddlipcon toddlipcon committed Sep 5, 2011
View
@@ -0,0 +1 @@
+$Format:%H$
View
@@ -0,0 +1 @@
+.archive-version export-subst
View
@@ -15,11 +15,13 @@
*~
.classpath
+.idea
.project
.settings
.svn
build/
bin/
+out/
*.ipr
*.iml
*.iws
View
@@ -5,7 +5,7 @@ Hadoop-LZO is a project to bring splittable LZO compression to Hadoop. LZO is a
### Origins
-This project builds off the great work done at [code.google.com/p/hadoop-gpl-compression](code.google.com/p/hadoop-gpl-compression). As of issue 41, the differences in this codebase are the following.
+This project builds off the great work done at [http://code.google.com/p/hadoop-gpl-compression](http://code.google.com/p/hadoop-gpl-compression). As of issue 41, the differences in this codebase are the following.
- it fixes a few bugs in hadoop-gpl-compression -- notably, it allows the decompressor to read small or uncompressable lzo files, and also fixes the compressor to follow the lzo standard when compressing small or uncompressible chunks. it also fixes a number of inconsistenly caught and thrown exception cases that can occur when the lzo writer gets killed mid-stream, plus some other smaller issues (see commit log).
- it adds the ability to work with Hadoop streaming via the com.apache.hadoop.mapred.DeprecatedLzoTextInputFormat class
View
@@ -28,7 +28,7 @@
<property name="Name" value="Hadoop GPL Compression"/>
<property name="name" value="hadoop-lzo"/>
- <property name="version" value="0.4.10"/>
+ <property name="version" value="0.4.13"/>
<property name="final.name" value="${name}-${version}"/>
<property name="year" value="2008"/>
@@ -1,7 +1,27 @@
-#!/bin/sh
+#!/bin/bash
-if [ -z "$BUILD_REVISION" ]; then
- git rev-parse HEAD
-else
+# Allow user to specify - this is done by packages
+if [ -n "$BUILD_REVISION" ]; then
echo $BUILD_REVISION
+ exit
fi
+
+# If we're in git, use that
+BUILD_REVISION=$(git rev-parse HEAD 2>/dev/null)
+if [ -n "$BUILD_REVISION" ]; then
+ echo $BUILD_REVISION
+ exit
+fi
+
+# Otherwise try to use the .archive-version file which
+# is filled in by git exports (eg github downloads)
+BIN=$(dirname ${BASH_SOURCE:-0})
+BUILD_REVISION=$(cat $BIN/../.archive-version 2>/dev/null)
+
+if [[ "$BUILD_REVISION" != *Format* ]]; then
+ echo "$BUILD_REVISION"
+ exit
+fi
+
+# Give up
+echo "Unknown build revision"
@@ -46,17 +46,21 @@
public static final String LZO_COMPRESSOR_KEY = "io.compression.codec.lzo.compressor";
public static final String LZO_DECOMPRESSOR_KEY = "io.compression.codec.lzo.decompressor";
+ public static final String LZO_COMPRESSION_LEVEL_KEY = "io.compression.codec.lzo.compression.level";
public static final String LZO_BUFFER_SIZE_KEY = "io.compression.codec.lzo.buffersize";
public static final int DEFAULT_LZO_BUFFER_SIZE = 256 * 1024;
public static final int MAX_BLOCK_SIZE = 64*1024*1024;
+ public static final int UNDEFINED_COMPRESSION_LEVEL = -999; // Constant from LzoCompressor.c
private Configuration conf;
+ @Override
public void setConf(Configuration conf) {
this.conf = conf;
}
+ @Override
public Configuration getConf() {
return conf;
}
@@ -86,6 +90,7 @@ public Configuration getConf() {
* else <code>false</code>
*/
public static boolean isNativeLzoLoaded(Configuration conf) {
+ assert conf != null : "Configuration cannot be null!";
return nativeLzoLoaded && conf.getBoolean("hadoop.native.lib", true);
}
@@ -100,10 +105,12 @@ public static String getRevisionHash() {
}
}
+ @Override
public CompressionOutputStream createOutputStream(OutputStream out) throws IOException {
return createOutputStream(out, createCompressor());
}
+ @Override
public CompressionOutputStream createOutputStream(OutputStream out,
Compressor compressor) throws IOException {
// Ensure native-lzo library is loaded & initialized
@@ -141,6 +148,7 @@ public CompressionOutputStream createOutputStream(OutputStream out,
compressionOverhead);
}
+ @Override
public Class<? extends Compressor> getCompressorType() {
// Ensure native-lzo library is loaded & initialized
if (!isNativeLzoLoaded(conf)) {
@@ -149,20 +157,24 @@ public CompressionOutputStream createOutputStream(OutputStream out,
return LzoCompressor.class;
}
+ @Override
public Compressor createCompressor() {
// Ensure native-lzo library is loaded & initialized
+ assert conf != null : "Configuration cannot be null! You must call setConf() before creating a compressor.";
if (!isNativeLzoLoaded(conf)) {
throw new RuntimeException("native-lzo library not available");
}
return new LzoCompressor(conf);
}
+ @Override
public CompressionInputStream createInputStream(InputStream in)
throws IOException {
return createInputStream(in, createDecompressor());
}
+ @Override
public CompressionInputStream createInputStream(InputStream in,
Decompressor decompressor)
throws IOException {
@@ -174,6 +186,7 @@ public CompressionInputStream createInputStream(InputStream in,
conf.getInt(LZO_BUFFER_SIZE_KEY, DEFAULT_LZO_BUFFER_SIZE));
}
+ @Override
public Class<? extends Decompressor> getDecompressorType() {
// Ensure native-lzo library is loaded & initialized
if (!isNativeLzoLoaded(conf)) {
@@ -182,6 +195,7 @@ public CompressionInputStream createInputStream(InputStream in,
return LzoDecompressor.class;
}
+ @Override
public Decompressor createDecompressor() {
// Ensure native-lzo library is loaded & initialized
if (!isNativeLzoLoaded(conf)) {
@@ -197,37 +211,54 @@ public Decompressor createDecompressor() {
* Get the default filename extension for this kind of compression.
* @return the extension including the '.'
*/
+ @Override
public String getDefaultExtension() {
return ".lzo_deflate";
}
static LzoCompressor.CompressionStrategy getCompressionStrategy(Configuration conf) {
+ assert conf != null : "Configuration cannot be null!";
return LzoCompressor.CompressionStrategy.valueOf(
conf.get(LZO_COMPRESSOR_KEY,
LzoCompressor.CompressionStrategy.LZO1X_1.name()));
}
static LzoDecompressor.CompressionStrategy getDecompressionStrategy(Configuration conf) {
+ assert conf != null : "Configuration cannot be null!";
return LzoDecompressor.CompressionStrategy.valueOf(
conf.get(LZO_DECOMPRESSOR_KEY,
LzoDecompressor.CompressionStrategy.LZO1X.name()));
}
+ static int getCompressionLevel(Configuration conf) {
+ assert conf != null : "Configuration cannot be null!";
+ return conf.getInt(LZO_COMPRESSION_LEVEL_KEY, UNDEFINED_COMPRESSION_LEVEL);
+ }
+
static int getBufferSize(Configuration conf) {
+ assert conf != null : "Configuration cannot be null!";
return conf.getInt(LZO_BUFFER_SIZE_KEY, DEFAULT_LZO_BUFFER_SIZE);
}
public static void setCompressionStrategy(Configuration conf,
LzoCompressor.CompressionStrategy strategy) {
+ assert conf != null : "Configuration cannot be null!";
conf.set(LZO_COMPRESSOR_KEY, strategy.name());
}
public static void setDecompressionStrategy(Configuration conf,
LzoDecompressor.CompressionStrategy strategy) {
+ assert conf != null : "Configuration cannot be null!";
conf.set(LZO_DECOMPRESSOR_KEY, strategy.name());
}
+ public static void setCompressionLevel(Configuration conf, int compressionLevel) {
+ assert conf != null : "Configuration cannot be null!";
+ conf.setInt(LZO_COMPRESSION_LEVEL_KEY, compressionLevel);
+ }
+
public static void setBufferSize(Configuration conf, int bufferSize) {
+ assert conf != null : "Configuration cannot be null!";
conf.setInt(LZO_BUFFER_SIZE_KEY, bufferSize);
}
@@ -57,6 +57,7 @@
private int workingMemoryBufLen = 0; // The length of 'working memory' buf.
@SuppressWarnings("unused")
private ByteBuffer workingMemoryBuf; // The 'working memory' for lzo.
+ private int lzoCompressionLevel;
/**
* Used when the user doesn't specify a configuration. We cache a single
@@ -209,9 +210,10 @@ public void reinit(Configuration conf) {
conf = defaultConfiguration;
}
LzoCompressor.CompressionStrategy strategy = LzoCodec.getCompressionStrategy(conf);
+ int compressionLevel = LzoCodec.getCompressionLevel(conf);
int bufferSize = LzoCodec.getBufferSize(conf);
- init(strategy, bufferSize);
+ init(strategy, compressionLevel, bufferSize);
}
/**
@@ -221,7 +223,7 @@ public void reinit(Configuration conf) {
* @param directBufferSize size of the direct buffer to be used.
*/
public LzoCompressor(CompressionStrategy strategy, int directBufferSize) {
- init(strategy, directBufferSize);
+ init(strategy, LzoCodec.UNDEFINED_COMPRESSION_LEVEL, directBufferSize);
}
/**
@@ -254,8 +256,9 @@ private ByteBuffer realloc(ByteBuffer buf, int newSize) {
return ByteBuffer.allocateDirect(newSize);
}
- private void init(CompressionStrategy strategy, int directBufferSize) {
+ private void init(CompressionStrategy strategy, int compressionLevel, int directBufferSize) {
this.strategy = strategy;
+ this.lzoCompressionLevel = compressionLevel;
this.directBufferSize = directBufferSize;
uncompressedDirectBuf = realloc(uncompressedDirectBuf, directBufferSize);
@@ -0,0 +1,75 @@
+/*
+ * This file is part of Hadoop-Gpl-Compression.
+ *
+ * Hadoop-Gpl-Compression is free software: you can redistribute it
+ * and/or modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * Hadoop-Gpl-Compression is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Hadoop-Gpl-Compression. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+package com.hadoop.compression.lzo;
+
+import org.apache.hadoop.conf.Configuration;
+
+import com.hadoop.compression.lzo.LzoIndexer;
+import com.hadoop.compression.lzo.LzopCodec;
+
+public class LzoInputFormatCommon {
+ /**
+ * The boolean property <code>lzo.text.input.format.ignore.nonlzo</code> tells
+ * the LZO text input format whether it should silently ignore non-LZO input
+ * files. When the property is true (which is the default), non-LZO files will
+ * be silently ignored. When the property is false, non-LZO files will be
+ * processed using the standard TextInputFormat.
+ */
+ public static final String IGNORE_NONLZO_KEY = "lzo.text.input.format.ignore.nonlzo";
+ /**
+ * Default value of the <code>lzo.text.input.format.ignore.nonlzo</code>
+ * property.
+ */
+ public static final boolean DEFAULT_IGNORE_NONLZO = true;
+ /**
+ * Full extension for LZO index files (".lzo.index").
+ */
+ public static final String FULL_LZO_INDEX_SUFFIX =
+ LzopCodec.DEFAULT_LZO_EXTENSION + LzoIndex.LZO_INDEX_SUFFIX;
+
+ /**
+ * @param conf the Configuration object
+ * @return the value of the <code>lzo.text.input.format.ignore.nonlzo</code>
+ * property in <code>conf</code>, or <code>DEFAULT_IGNORE_NONLZO</code>
+ * if the property is not set.
+ */
+ public static boolean getIgnoreNonLzoProperty(Configuration conf) {
+ return conf.getBoolean(IGNORE_NONLZO_KEY, DEFAULT_IGNORE_NONLZO);
+ }
+
+ /**
+ * Checks if the given filename ends in ".lzo".
+ *
+ * @param filename filename to check.
+ * @return true if the filename ends in ".lzo"
+ */
+ public static boolean isLzoFile(String filename) {
+ return filename.endsWith(LzopCodec.DEFAULT_LZO_EXTENSION);
+ }
+
+ /**
+ * Checks if the given filename ends in ".lzo.index".
+ *
+ * @param filename filename to check.
+ * @return true if the filename ends in ".lzo.index"
+ */
+ public static boolean isLzoIndexFile(String filename) {
+ return filename.endsWith(FULL_LZO_INDEX_SUFFIX);
+ }
+}
@@ -18,6 +18,7 @@
package com.hadoop.compression.lzo;
+import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -42,22 +43,34 @@
public static final int LZOP_VERSION = 0x1010;
/** Latest verion of lzop this should be compatible with */
public static final int LZOP_COMPAT_VERSION = 0x0940;
+ public static final String DEFAULT_LZO_EXTENSION = ".lzo";
@Override
public CompressionOutputStream createOutputStream(OutputStream out) throws IOException {
return createOutputStream(out, createCompressor());
}
+ public CompressionOutputStream createIndexedOutputStream(OutputStream out,
+ DataOutputStream indexOut)
+ throws IOException {
+ return createIndexedOutputStream(out, indexOut, createCompressor());
+ }
+
@Override
public CompressionOutputStream createOutputStream(OutputStream out,
Compressor compressor) throws IOException {
+ return createIndexedOutputStream(out, null, compressor);
+ }
+
+ public CompressionOutputStream createIndexedOutputStream(OutputStream out,
+ DataOutputStream indexOut, Compressor compressor) throws IOException {
if (!isNativeLzoLoaded(getConf())) {
throw new RuntimeException("native-lzo library not available");
}
LzoCompressor.CompressionStrategy strategy = LzoCompressor.CompressionStrategy.valueOf(
getConf().get(LZO_COMPRESSOR_KEY, LzoCompressor.CompressionStrategy.LZO1X_1.name()));
int bufferSize = getConf().getInt(LZO_BUFFER_SIZE_KEY, DEFAULT_LZO_BUFFER_SIZE);
- return new LzopOutputStream(out, compressor, bufferSize, strategy);
+ return new LzopOutputStream(out, indexOut, compressor, bufferSize, strategy);
}
@Override
@@ -95,6 +108,6 @@ public Decompressor createDecompressor() {
@Override
public String getDefaultExtension() {
- return ".lzo";
+ return DEFAULT_LZO_EXTENSION;
}
}
Oops, something went wrong.

0 comments on commit ceb643f

Please sign in to comment.