Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'thrift-reimp' of https://github.com/rangadi/elephant-bird…
… into eb1.2.4
- Loading branch information
Showing
14 changed files
with
897 additions
and
779 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
41 changes: 41 additions & 0 deletions
41
src/java/com/twitter/elephantbird/pig/store/LzoThriftB64LinePigStorage.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package com.twitter.elephantbird.pig.store; | ||
|
||
import java.io.IOException; | ||
|
||
import org.apache.commons.codec.binary.Base64; | ||
import org.apache.pig.data.Tuple; | ||
import org.apache.thrift.TBase; | ||
|
||
import com.twitter.elephantbird.mapreduce.io.ThriftConverter; | ||
import com.twitter.elephantbird.pig.util.PigToThrift; | ||
import com.twitter.elephantbird.pig.util.PigUtil; | ||
import com.twitter.elephantbird.util.Protobufs; | ||
import com.twitter.elephantbird.util.TypeRef; | ||
|
||
/** | ||
* Serializes Pig Tuples into Base-64 encoded, line-delimited Thrift objects. | ||
* The fields in the pig tuple must correspond exactly to the fields in | ||
* the Thrift object, as no name-matching is performed (names of the tuple | ||
* fields are not currently accessible to a StoreFunc. It will be in 0.7, | ||
* so something more flexible will be possible) | ||
*/ | ||
public class LzoThriftB64LinePigStorage<T extends TBase<?, ?>> extends LzoBaseStoreFunc { | ||
|
||
private TypeRef<T> typeRef; | ||
private Base64 base64 = new Base64(); | ||
private PigToThrift<T> pigToThrift; | ||
private ThriftConverter<T> converter; | ||
|
||
public LzoThriftB64LinePigStorage(String thriftClassName) { | ||
typeRef = PigUtil.getThriftTypeRef(thriftClassName); | ||
pigToThrift = PigToThrift.newInstance(typeRef); | ||
converter = ThriftConverter.newInstance(typeRef); | ||
} | ||
|
||
public void putNext(Tuple f) throws IOException { | ||
if (f == null) return; | ||
T tObj = pigToThrift.getThriftObject(f); | ||
os_.write(base64.encode(converter.toBytes(tObj))); | ||
os_.write(Protobufs.NEWLINE_UTF8_BYTE); | ||
} | ||
} |
52 changes: 52 additions & 0 deletions
52
src/java/com/twitter/elephantbird/pig/store/LzoThriftBlockPigStorage.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package com.twitter.elephantbird.pig.store; | ||
|
||
import java.io.IOException; | ||
|
||
import org.apache.pig.data.Tuple; | ||
import org.apache.thrift.TBase; | ||
|
||
import com.twitter.elephantbird.mapreduce.io.ThriftBlockWriter; | ||
import com.twitter.elephantbird.pig.util.PigToThrift; | ||
import com.twitter.elephantbird.pig.util.PigUtil; | ||
import com.twitter.elephantbird.util.TypeRef; | ||
import java.io.OutputStream; | ||
|
||
/** | ||
* Serializes Pig Tuples into Block encoded Thrift objects. | ||
* The fields in the pig tuple must correspond exactly to the fields in | ||
* Thrift struct, as no name-matching is performed (names of the tuple | ||
* fields are not currently accessible to a StoreFunc. | ||
* It will be in 0.7, so something more flexible will be possible) | ||
*/ | ||
public class LzoThriftBlockPigStorage<T extends TBase<?, ?>> extends LzoBaseStoreFunc { | ||
|
||
private TypeRef<T> typeRef; | ||
private ThriftBlockWriter<T> writer; | ||
private PigToThrift<T> pigToThrift; | ||
private int numRecordsPerBlock = 10000; // is this too high? | ||
|
||
public LzoThriftBlockPigStorage(String thriftClassName) { | ||
typeRef = PigUtil.getThriftTypeRef(thriftClassName); | ||
pigToThrift = PigToThrift.newInstance(typeRef); | ||
} | ||
|
||
@Override | ||
public void bindTo(OutputStream os) throws IOException { | ||
super.bindTo(os); | ||
writer = new ThriftBlockWriter<T>(os_, typeRef.getRawClass(), numRecordsPerBlock); | ||
} | ||
|
||
@Override | ||
public void putNext(Tuple f) throws IOException { | ||
if (f == null) return; | ||
writer.write(pigToThrift.getThriftObject(f)); | ||
} | ||
|
||
@Override | ||
public void finish() throws IOException { | ||
if (writer != null) { | ||
writer.close(); | ||
} | ||
super.finish(); | ||
} | ||
} |
Oops, something went wrong.