diff --git a/src/main/java/org/apache/avro/mapreduce/AvroDeserializer.java b/src/main/java/org/apache/avro/mapreduce/AvroDeserializer.java new file mode 100644 index 0000000..f040f9f --- /dev/null +++ b/src/main/java/org/apache/avro/mapreduce/AvroDeserializer.java @@ -0,0 +1,91 @@ +// (c) Copyright 2011 Odiago, Inc. + +package org.apache.avro.mapreduce; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.avro.Schema; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.mapred.AvroWrapper; +import org.apache.avro.specific.SpecificDatumReader; +import org.apache.hadoop.io.serializer.Deserializer; + +/** + * Deserializes AvroWrapper objects within Hadoop. + * + *

Keys and values containing Avro tyeps are more efficiently serialized outside of the + * WritableSerialization model, so they are wrapper in {@link + * org.apache.avro.mapred.AvroWrapper} objects and deserialization is handled by this + * class.

+ * + *

MapReduce jobs that use AvroWrapper objects as keys or values need to be configured + * with {@link org.apache.avro.mapreduce.AvroSerialization}. Use {@link + * org.apache.avro.mapreduce.AvroJob} to help with Job configuration.

+ * + * @param The type of Avro wrapper. + * @param The Java type of the Avro data being wrapped. + */ +public abstract class AvroDeserializer, D> implements Deserializer { + /** The Avro reader schema for deserializing. */ + private final Schema mReaderSchema; + + /** The Avro datum reader for deserializing. */ + private final DatumReader mAvroDatumReader; + + /** An Avro binary decoder for deserializing. */ + private BinaryDecoder mAvroDecoder; + + /** + * Constructor. + * + * @param readerSchema The Avro reader schema for the data to deserialize. + */ + protected AvroDeserializer(Schema readerSchema) { + mReaderSchema = readerSchema; + mAvroDatumReader = new SpecificDatumReader(readerSchema); + } + + /** + * Gets the reader schema used for deserializing. + * + * @return The reader schema. + */ + public Schema getReaderSchema() { + return mReaderSchema; + } + + /** {@inheritDoc} */ + @Override + public void open(InputStream inputStream) throws IOException { + mAvroDecoder = DecoderFactory.get().directBinaryDecoder(inputStream, mAvroDecoder); + } + + /** {@inheritDoc} */ + @Override + public T deserialize(T avroWrapperToReuse) throws IOException { + // Create a new Avro wrapper if there isn't one to reuse. + if (null == avroWrapperToReuse) { + avroWrapperToReuse = createAvroWrapper(); + } + + // Deserialize the Avro datum from the input stream. + avroWrapperToReuse.datum(mAvroDatumReader.read(avroWrapperToReuse.datum(), mAvroDecoder)); + return avroWrapperToReuse; + } + + /** {@inheritDoc} */ + @Override + public void close() throws IOException { + mAvroDecoder.inputStream().close(); + } + + /** + * Creates a new empty T (extends AvroWrapper) instance. + * + * @return A new empty T instance. + */ + protected abstract T createAvroWrapper(); +} diff --git a/src/main/java/org/apache/avro/mapreduce/AvroKeyComparator.java b/src/main/java/org/apache/avro/mapreduce/AvroKeyComparator.java index 34a07b7..a5220af 100644 --- a/src/main/java/org/apache/avro/mapreduce/AvroKeyComparator.java +++ b/src/main/java/org/apache/avro/mapreduce/AvroKeyComparator.java @@ -1,51 +1,45 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +// (c) Copyright 2011 Odiago, Inc. package org.apache.avro.mapreduce; -import org.apache.hadoop.io.RawComparator; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.conf.Configuration; - import org.apache.avro.Schema; import org.apache.avro.io.BinaryData; import org.apache.avro.mapred.AvroKey; import org.apache.avro.specific.SpecificData; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.RawComparator; -/** The {@link RawComparator} used by jobs configured with {@link AvroJob}. */ +/** + * The {@link org.apache.hadoop.io.RawComparator} used by jobs configured with + * {@link org.apache.avro.mapreduce.AvroJob}. + * + *

Compares AvroKeys output from the map phase for sorting.

+ */ public class AvroKeyComparator extends Configured implements RawComparator> { + /** The schema of the Avro data in the key to compare. */ + private Schema mSchema; - private Schema schema; - + /** {@inheritDoc} */ @Override public void setConf(Configuration conf) { super.setConf(conf); - if (conf != null) { - schema = AvroJob.getMapOutputKeySchema(conf); + if (null != conf) { + // The MapReduce framework will be using this comparator to sort AvroKey objects + // output from the map phase, so use the schema defined for the map output key. + mSchema = AvroJob.getMapOutputKeySchema(conf); } } + /** {@inheritDoc} */ + @Override public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { - return BinaryData.compare(b1, s1, b2, s2, schema); + return BinaryData.compare(b1, s1, b2, s2, mSchema); } + /** {@inheritDoc} */ + @Override public int compare(AvroKey x, AvroKey y) { - return SpecificData.get().compare(x.datum(), y.datum(), schema); + return SpecificData.get().compare(x.datum(), y.datum(), mSchema); } - -} \ No newline at end of file +} diff --git a/src/main/java/org/apache/avro/mapreduce/AvroKeyDeserializer.java b/src/main/java/org/apache/avro/mapreduce/AvroKeyDeserializer.java new file mode 100644 index 0000000..235d0cf --- /dev/null +++ b/src/main/java/org/apache/avro/mapreduce/AvroKeyDeserializer.java @@ -0,0 +1,34 @@ +// (c) Copyright 2011 Odiago, Inc. + +package org.apache.avro.mapreduce; + +import org.apache.avro.Schema; +import org.apache.avro.mapred.AvroKey; + +/** + * Deserializes AvroKey objects within Hadoop. + * + * @param The java type of the avro data to deserialize. + * + * @see org.apache.avro.mapreduce.AvroDeserializer + */ +public class AvroKeyDeserializer extends AvroDeserializer, D> { + /** + * Constructor. + * + * @param readerSchema The Avro reader schema for the data to deserialize. + */ + public AvroKeyDeserializer(Schema readerSchema) { + super(readerSchema); + } + + /** + * Creates a new empty AvroKey instance. + * + * @return a new empty AvroKey. + */ + @Override + protected AvroKey createAvroWrapper() { + return new AvroKey(null); + } +} diff --git a/src/main/java/org/apache/avro/mapreduce/AvroSerialization.java b/src/main/java/org/apache/avro/mapreduce/AvroSerialization.java index 77afdb7..20aeec2 100644 --- a/src/main/java/org/apache/avro/mapreduce/AvroSerialization.java +++ b/src/main/java/org/apache/avro/mapreduce/AvroSerialization.java @@ -1,143 +1,60 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +// (c) Copyright 2011 Odiago, Inc. package org.apache.avro.mapreduce; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; - -import org.apache.hadoop.io.serializer.Serialization; -import org.apache.hadoop.io.serializer.Deserializer; -import org.apache.hadoop.io.serializer.Serializer; -import org.apache.hadoop.conf.Configured; - import org.apache.avro.Schema; -import org.apache.avro.io.BinaryEncoder; -import org.apache.avro.io.BinaryDecoder; -import org.apache.avro.io.DecoderFactory; -import org.apache.avro.io.DatumReader; -import org.apache.avro.io.DatumWriter; -import org.apache.avro.io.EncoderFactory; import org.apache.avro.mapred.AvroKey; import org.apache.avro.mapred.AvroValue; import org.apache.avro.mapred.AvroWrapper; -import org.apache.avro.reflect.ReflectDatumWriter; -import org.apache.avro.specific.SpecificDatumReader; - -/** The {@link Serialization} used by jobs configured with {@link AvroJob}. */ -public class AvroSerialization extends Configured - implements Serialization> { +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.serializer.Deserializer; +import org.apache.hadoop.io.serializer.Serialization; +import org.apache.hadoop.io.serializer.Serializer; +/** + * The {@link org.apache.hadoop.io.serializer.Serialization} used by jobs configured with + * {@link org.apache.avro.mapreduce.AvroJob}. + * + * @param The Java type of the Avro data to serialize. + */ +public class AvroSerialization extends Configured implements Serialization> { + /** {@inheritDoc} */ + @Override public boolean accept(Class c) { - return AvroWrapper.class.isAssignableFrom(c); - } - - /** Returns the specified map output deserializer. Defaults to the final - * output deserializer if no map output schema was specified. */ - public Deserializer> getDeserializer(Class> c) { - // We need not rely on mapred.task.is.map here to determine whether map - // output or final output is desired, since the mapreduce framework never - // creates a deserializer for final output, only for map output. - boolean isKey = AvroKey.class.isAssignableFrom(c); - Schema schema = isKey - ? AvroJob.getMapOutputKeySchema(getConf()) - : AvroJob.getMapOutputValueSchema(getConf()); - return new AvroWrapperDeserializer(new SpecificDatumReader(schema), - isKey); + return AvroKey.class.isAssignableFrom(c) || AvroValue.class.isAssignableFrom(c); } - - private static final DecoderFactory FACTORY = DecoderFactory.get(); - private class AvroWrapperDeserializer - implements Deserializer> { - - private DatumReader reader; - private BinaryDecoder decoder; - private boolean isKey; - - public AvroWrapperDeserializer(DatumReader reader, boolean isKey) { - this.reader = reader; - this.isKey = isKey; - } - - public void open(InputStream in) { - this.decoder = FACTORY.directBinaryDecoder(in, decoder); - } - - public AvroWrapper deserialize(AvroWrapper wrapper) - throws IOException { - T datum = reader.read(wrapper == null ? null : wrapper.datum(), decoder); - if (wrapper == null) { - wrapper = isKey? new AvroKey(datum) : new AvroValue(datum); - } else { - wrapper.datum(datum); - } - return wrapper; - } - - public void close() throws IOException { - decoder.inputStream().close(); + /** + * Gets an object capable of deserializing the output from a Mapper. + * + * @param c The class to get a deserializer for. + * @return A deserializer for objects of class c. + */ + @Override + public Deserializer> getDeserializer(Class> c) { + if (AvroKey.class.isAssignableFrom(c)) { + return new AvroKeyDeserializer(AvroJob.getMapOutputKeySchema(getConf())); + } else if (AvroValue.class.isAssignableFrom(c)) { + return new AvroValueDeserializer(AvroJob.getMapOutputValueSchema(getConf())); + } else { + throw new IllegalStateException("Only AvroKey and AvroValue are supported."); } - - } - - /** Returns the specified output serializer. */ - public Serializer> getSerializer(Class> c) { - // Here we must rely on mapred.task.is.map to tell whether the map output - // or final output is needed. - boolean isMap = getConf().getBoolean("mapred.task.is.map", false); - Schema schema = !isMap - ? AvroJob.getOutputSchema(getConf()) - : (AvroKey.class.isAssignableFrom(c) - ? AvroJob.getMapOutputKeySchema(getConf()) - : AvroJob.getMapOutputValueSchema(getConf())); - return new AvroWrapperSerializer(new ReflectDatumWriter(schema)); } - private class AvroWrapperSerializer implements Serializer> { - - private DatumWriter writer; - private OutputStream out; - private BinaryEncoder encoder; - - public AvroWrapperSerializer(DatumWriter writer) { - this.writer = writer; - } - - public void open(OutputStream out) { - this.out = out; - this.encoder = new EncoderFactory().configureBlockSize(512) - .binaryEncoder(out, null); - } - - public void serialize(AvroWrapper wrapper) throws IOException { - writer.write(wrapper.datum(), encoder); - // would be a lot faster if the Serializer interface had a flush() - // method and the Hadoop framework called it when needed rather - // than for every record. - encoder.flush(); - } - - public void close() throws IOException { - out.close(); - } - + /** + * Gets an object capable of serializing output from a Mapper. + * + *

This may be for Map output + */ + public Serializer> getSerializer(Class> c) { + Schema schema; + if (AvroKey.class.isAssignableFrom(c)) { + schema = AvroJob.getMapOutputKeySchema(getConf()); + } else if (AvroValue.class.isAssignableFrom(c)) { + schema = AvroJob.getMapOutputValueSchema(getConf()); + } else { + throw new IllegalStateException("Only AvroKey and AvroValue are supported."); + } + return new AvroSerializer(schema); } - } diff --git a/src/main/java/org/apache/avro/mapreduce/AvroSerializer.java b/src/main/java/org/apache/avro/mapreduce/AvroSerializer.java new file mode 100644 index 0000000..d9068d0 --- /dev/null +++ b/src/main/java/org/apache/avro/mapreduce/AvroSerializer.java @@ -0,0 +1,96 @@ +// (c) Copyright 2011 Odiago, Inc. + +package org.apache.avro.mapreduce; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.avro.Schema; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.mapred.AvroWrapper; +import org.apache.avro.specific.SpecificDatumWriter; +import org.apache.hadoop.io.serializer.Serializer; + +/** + * Serializes AvroWrapper objects within Hadoop. + * + *

Keys and values containing Avro types are more efficiently serialized outside of the + * WritableSerialization model, so they are wrapped in {@link + * org.apache.avro.mapred.AvroWrapper} objects and serialization is handled by this + * class.

+ * + *

MapReduce jobs that use AvroWrapper objects as keys or values need to be configured + * with {@link org.apache.avro.mapreduce.AvroSerialization}. Use {@link + * org.apache.avro.mapreduce.AvroJob} to help with Job configuration.

+ * + * @param The Java type of the Avro data. + */ +public class AvroSerializer implements Serializer> { + /** + * The block size for the Avro encoder. + * + * This number was copied from the AvroSerialization of org.apache.avro.mapred in Avro 1.5.1. + * + * TODO(gwu): Do some benchmarking with different numbers here to see if it is important. + */ + private static final int AVRO_ENCODER_BLOCK_SIZE_BYTES = 512; + + /** An factory for creating Avro datum encoders. */ + private static EncoderFactory mEncoderFactory + = new EncoderFactory().configureBlockSize(AVRO_ENCODER_BLOCK_SIZE_BYTES); + + /** The writer schema for the data to serialize. */ + private final Schema mWriterSchema; + + /** The Avro datum writer for serializing. */ + private final DatumWriter mAvroDatumWriter; + + /** The Avro encoder for serializing. */ + private BinaryEncoder mAvroEncoder; + + /** The output stream for serializing. */ + private OutputStream mOutputStream; + + /** + * Constructor. + * + * @param writerSchema The writer schema for the Avro data being serialized. + */ + public AvroSerializer(Schema writerSchema) { + mWriterSchema = writerSchema; + mAvroDatumWriter = new SpecificDatumWriter(writerSchema); + } + + /** + * Gets the writer schema being used for serialization. + * + * @return The writer schema. + */ + public Schema getWriterSchema() { + return mWriterSchema; + } + + /** {@inheritDoc} */ + @Override + public void open(OutputStream outputStream) throws IOException { + mOutputStream = outputStream; + mAvroEncoder = mEncoderFactory.binaryEncoder(outputStream, mAvroEncoder); + } + + /** {@inheritDoc} */ + @Override + public void serialize(AvroWrapper avroWrapper) throws IOException { + mAvroDatumWriter.write(avroWrapper.datum(), mAvroEncoder); + // This would be a lot faster if the Serializer interface had a flush() method and the + // Hadoop framework called it when needed. For now, we'll have to flush on every record. + mAvroEncoder.flush(); + } + + /** {@inheritDoc} */ + @Override + public void close() throws IOException { + mOutputStream.close(); + } +} diff --git a/src/main/java/org/apache/avro/mapreduce/AvroValueDeserializer.java b/src/main/java/org/apache/avro/mapreduce/AvroValueDeserializer.java new file mode 100644 index 0000000..91fd03c --- /dev/null +++ b/src/main/java/org/apache/avro/mapreduce/AvroValueDeserializer.java @@ -0,0 +1,34 @@ +// (c) Copyright 2011 Odiago, Inc. + +package org.apache.avro.mapreduce; + +import org.apache.avro.Schema; +import org.apache.avro.mapred.AvroValue; + +/** + * Deserializes AvroValue objects within Hadoop. + * + * @param The java type of the avro data to deserialize. + * + * @see org.apache.avro.mapreduce.AvroDeserializer + */ +public class AvroValueDeserializer extends AvroDeserializer, D> { + /** + * Constructor. + * + * @param readerSchema The Avro reader schema for the data to deserialize. + */ + public AvroValueDeserializer(Schema readerSchema) { + super(readerSchema); + } + + /** + * Creates a new empty AvroValue instance. + * + * @return a new empty AvroValue. + */ + @Override + protected AvroValue createAvroWrapper() { + return new AvroValue(null); + } +} diff --git a/src/test/java/org/apache/avro/mapreduce/TestAvroKeyDeserializer.java b/src/test/java/org/apache/avro/mapreduce/TestAvroKeyDeserializer.java new file mode 100644 index 0000000..7ef3403 --- /dev/null +++ b/src/test/java/org/apache/avro/mapreduce/TestAvroKeyDeserializer.java @@ -0,0 +1,52 @@ +// (c) Copyright 2011 Odiago, Inc. + +package org.apache.avro.mapreduce; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.mapred.AvroKey; +import org.junit.Test; + +public class TestAvroKeyDeserializer { + @Test + public void testDeserialize() throws IOException { + // Create a deserializer. + Schema readerSchema = Schema.create(Schema.Type.STRING); + AvroKeyDeserializer deserializer + = new AvroKeyDeserializer(readerSchema); + + // Check the reader schema. + assertEquals(readerSchema, deserializer.getReaderSchema()); + + // Write some records to deserialize. + Schema writerSchema = Schema.create(Schema.Type.STRING); + DatumWriter datumWriter = new GenericDatumWriter(writerSchema); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + Encoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null); + datumWriter.write("record1", encoder); + datumWriter.write("record2", encoder); + encoder.flush(); + + // Deserialize the records. + ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray()); + deserializer.open(inputStream); + AvroKey record = null; + + record = deserializer.deserialize(record); + assertEquals("record1", record.datum().toString()); + + record = deserializer.deserialize(record); + assertEquals("record2", record.datum().toString()); + + deserializer.close(); + } +} diff --git a/src/test/java/org/apache/avro/mapreduce/TestAvroSerialization.java b/src/test/java/org/apache/avro/mapreduce/TestAvroSerialization.java new file mode 100644 index 0000000..a72f999 --- /dev/null +++ b/src/test/java/org/apache/avro/mapreduce/TestAvroSerialization.java @@ -0,0 +1,101 @@ +// (c) Copyright 2011 Odiago, Inc. + +package org.apache.avro.mapreduce; + +import static org.junit.Assert.*; + +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapred.AvroValue; +import org.apache.avro.mapred.AvroWrapper; +import org.apache.hadoop.io.serializer.Deserializer; +import org.apache.hadoop.io.serializer.Serializer; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.ReflectionUtils; +import org.junit.Test; + +public class TestAvroSerialization { + @Test + public void testAccept() { + AvroSerialization serialization = new AvroSerialization(); + + assertTrue(serialization.accept(AvroKey.class)); + assertTrue(serialization.accept(AvroValue.class)); + assertFalse(serialization.accept(AvroWrapper.class)); + assertFalse(serialization.accept(String.class)); + } + + @Test + public void testGetSerializerForKey() throws IOException { + // Set the writer schema in the job configuration. + Schema writerSchema = Schema.create(Schema.Type.STRING); + Job job = new Job(); + AvroJob.setMapOutputKeySchema(job, writerSchema); + + // Get a serializer from the configuration. + AvroSerialization serialization + = ReflectionUtils.newInstance(AvroSerialization.class, job.getConfiguration()); + Serializer serializer = serialization.getSerializer(AvroKey.class); + assertTrue(serializer instanceof AvroSerializer); + AvroSerializer avroSerializer = (AvroSerializer) serializer; + + // Check that the writer schema is set correctly on the serializer. + assertEquals(writerSchema, avroSerializer.getWriterSchema()); + } + + @Test + public void testGetSerializerForValue() throws IOException { + // Set the writer schema in the job configuration. + Schema writerSchema = Schema.create(Schema.Type.STRING); + Job job = new Job(); + AvroJob.setMapOutputValueSchema(job, writerSchema); + + // Get a serializer from the configuration. + AvroSerialization serialization + = ReflectionUtils.newInstance(AvroSerialization.class, job.getConfiguration()); + Serializer serializer = serialization.getSerializer(AvroValue.class); + assertTrue(serializer instanceof AvroSerializer); + AvroSerializer avroSerializer = (AvroSerializer) serializer; + + // Check that the writer schema is set correctly on the serializer. + assertEquals(writerSchema, avroSerializer.getWriterSchema()); + } + + @Test + public void testGetDeserializerForKey() throws IOException { + // Set the reader schema in the job configuration. + Schema readerSchema = Schema.create(Schema.Type.STRING); + Job job = new Job(); + AvroJob.setMapOutputKeySchema(job, readerSchema); + + // Get a deserializer from the configuration. + AvroSerialization serialization + = ReflectionUtils.newInstance(AvroSerialization.class, job.getConfiguration()); + Deserializer deserializer = serialization.getDeserializer(AvroKey.class); + assertTrue(deserializer instanceof AvroKeyDeserializer); + AvroKeyDeserializer avroDeserializer = (AvroKeyDeserializer) deserializer; + + // Check that the reader schema is set correctly on the deserializer. + assertEquals(readerSchema, avroDeserializer.getReaderSchema()); + } + + @Test + public void testGetDeserializerForValue() throws IOException { + // Set the reader schema in the job configuration. + Schema readerSchema = Schema.create(Schema.Type.STRING); + Job job = new Job(); + AvroJob.setMapOutputValueSchema(job, readerSchema); + + // Get a deserializer from the configuration. + AvroSerialization serialization + = ReflectionUtils.newInstance(AvroSerialization.class, job.getConfiguration()); + Deserializer deserializer = serialization.getDeserializer(AvroValue.class); + assertTrue(deserializer instanceof AvroValueDeserializer); + AvroValueDeserializer avroDeserializer = (AvroValueDeserializer) deserializer; + + // Check that the reader schema is set correctly on the deserializer. + assertEquals(readerSchema, avroDeserializer.getReaderSchema()); + } +} diff --git a/src/test/java/org/apache/avro/mapreduce/TestAvroSerializer.java b/src/test/java/org/apache/avro/mapreduce/TestAvroSerializer.java new file mode 100644 index 0000000..f0f692b --- /dev/null +++ b/src/test/java/org/apache/avro/mapreduce/TestAvroSerializer.java @@ -0,0 +1,51 @@ +// (c) Copyright 2011 Odiago, Inc. + +package org.apache.avro.mapreduce; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.mapred.AvroKey; +import org.junit.Test; + +public class TestAvroSerializer { + @Test + public void testSerialize() throws IOException { + // Create a serializer. + Schema writerSchema = Schema.create(Schema.Type.STRING); + AvroSerializer serializer = new AvroSerializer(writerSchema); + + // Check the writer schema. + assertEquals(writerSchema, serializer.getWriterSchema()); + + // Serialize two records, 'record1' and 'record2'. + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + serializer.open(outputStream); + serializer.serialize(new AvroKey("record1")); + serializer.serialize(new AvroKey("record2")); + serializer.close(); + + // Make sure the records were serialized correctly. + ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray()); + Schema readerSchema = Schema.create(Schema.Type.STRING); + DatumReader datumReader = new GenericDatumReader(readerSchema); + Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null); + CharSequence record = null; + + record = datumReader.read(record, decoder); + assertEquals("record1", record.toString()); + + record = datumReader.read(record, decoder); + assertEquals("record2", record.toString()); + + inputStream.close(); + } +} diff --git a/src/test/java/org/apache/avro/mapreduce/TestAvroValueDeserializer.java b/src/test/java/org/apache/avro/mapreduce/TestAvroValueDeserializer.java new file mode 100644 index 0000000..5a2cdba --- /dev/null +++ b/src/test/java/org/apache/avro/mapreduce/TestAvroValueDeserializer.java @@ -0,0 +1,52 @@ +// (c) Copyright 2011 Odiago, Inc. + +package org.apache.avro.mapreduce; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.mapred.AvroValue; +import org.junit.Test; + +public class TestAvroValueDeserializer { + @Test + public void testDeserialize() throws IOException { + // Create a deserializer. + Schema readerSchema = Schema.create(Schema.Type.STRING); + AvroValueDeserializer deserializer + = new AvroValueDeserializer(readerSchema); + + // Check the reader schema. + assertEquals(readerSchema, deserializer.getReaderSchema()); + + // Write some records to deserialize. + Schema writerSchema = Schema.create(Schema.Type.STRING); + DatumWriter datumWriter = new GenericDatumWriter(writerSchema); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + Encoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null); + datumWriter.write("record1", encoder); + datumWriter.write("record2", encoder); + encoder.flush(); + + // Deserialize the records. + ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray()); + deserializer.open(inputStream); + AvroValue record = null; + + record = deserializer.deserialize(record); + assertEquals("record1", record.datum().toString()); + + record = deserializer.deserialize(record); + assertEquals("record2", record.datum().toString()); + + deserializer.close(); + } +}