Skip to content
Browse files

Better docs and some cleanup for hadoop store builder.

  • Loading branch information...
1 parent 8bf82db commit 2a99b18c0e98ff7824fcd17d0c9f512784a07136 @jkreps jkreps committed May 24, 2009
View
5 build.xml
@@ -220,8 +220,9 @@
</tar>
</target>
- <target name="hadoop-benchmark-jar" depends="build, contrib-build" description="Build a jar file that includes all contrib code.">
- <jar destfile="${dist.dir}/${name}-${curr.release}-all.jar">
+ <target name="hadoop-benchmark-jar" depends="build, contrib-build"
+ description="Build a jar file that includes all contrib code plus the necessary jars for running the hadoop benchmark.">
+ <jar destfile="${dist.dir}/hadoop-benchmark.jar">
<fileset dir="${classes.dir}">
<include name="**/*.*" />
</fileset>
View
15 ...-store-builder/src/java/voldemort/store/readonly/mr/AbstractHadoopStoreBuilderMapper.java
@@ -33,19 +33,18 @@
import voldemort.utils.ByteUtils;
/**
- * Mapper reads input data and translates it into data serialized with the
- * appropriate Serializer for the given store. Override makeKey() and
- * makeValue() to create the appropriate objects to pass into the Serializer.
+ * A base class that can be used for building voldemort read-only stores. To use
+ * it you need to override the makeKey and makeValue methods which specify how
+ * to construct the key and value from the values given in map().
*
- * This mapper expects the store name to be defined by the property
- * voldemort.store.name, and it expects to find distributed cache files
- * cluster.xml and stores.xml.
+ * The values given by makeKey and makeValue will then be serialized with the
+ * appropriate voldemort Serializer.
*
* @author bbansal, jay
*
*/
-public abstract class AbstractHadoopStoreBuilderMapper<K, V> extends HadoopStoreBuilderBase
- implements Mapper<K, V, BytesWritable, BytesWritable> {
+public abstract class AbstractHadoopStoreBuilderMapper<K, V> extends
+ AbstractStoreBuilderConfigurable implements Mapper<K, V, BytesWritable, BytesWritable> {
private MessageDigest md5er;
private ConsistentRoutingStrategy routingStrategy;
View
9 ...e/readonly/mr/HadoopStoreBuilderBase.java → .../mr/AbstractStoreBuilderConfigurable.java
@@ -28,7 +28,14 @@
import voldemort.xml.ClusterMapper;
import voldemort.xml.StoreDefinitionsMapper;
-public class HadoopStoreBuilderBase {
+/**
+ * A base class with basic configuration values shared by all the mapper,
+ * reducer, and partitioner
+ *
+ * @author jay
+ *
+ */
+public class AbstractStoreBuilderConfigurable {
private int numChunks;
private Cluster cluster;
View
19 contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilder.java
@@ -40,9 +40,7 @@
import voldemort.xml.StoreDefinitionsMapper;
/**
- * Creates a simple Read-Only Voldemort store for easy batch update.
- * <p>
- * Creates a read-only store from the specified input data
+ * Builds a read-only voldemort store as a hadoop job from the given input data.
*
* @author bbansal, jay
*/
@@ -73,11 +71,13 @@
* @param inputFormatClass The input format to use for reading values
* @param cluster The voldemort cluster for which the stores are being built
* @param storeDef The store definition of the store
- * @param replicationFactor
- * @param chunkSizeBytes
- * @param tempDir
- * @param outputDir
- * @param path
+ * @param replicationFactor The replication factor to use for storing the
+ * built store.
+ * @param chunkSizeBytes The size of the chunks used by the read-only store
+ * @param tempDir The temporary directory to use in hadoop for intermediate
+ * reducer output
+ * @param outputDir The directory in which to place the built stores
+ * @param inputPath The path from which to read input data
*/
@SuppressWarnings("unchecked")
public HadoopStoreBuilder(Configuration conf,
@@ -106,6 +106,9 @@ public HadoopStoreBuilder(Configuration conf,
+ MIN_CHUNK_SIZE + "..." + MAX_CHUNK_SIZE);
}
+ /**
+ * Run the job
+ */
public void build() {
JobConf conf = new JobConf(config);
conf.setInt("io.file.buffer.size", 64 * 1024);
View
4 ...oop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderPartitioner.java
@@ -24,12 +24,12 @@
/**
* A Partitioner that splits data so that all data for the same nodeId, chunkId
- * combination ends up in the same reduce
+ * combination ends up in the same reduce (and hence in the same store chunk)
*
* @author bbansal, jay
*
*/
-public class HadoopStoreBuilderPartitioner extends HadoopStoreBuilderBase implements
+public class HadoopStoreBuilderPartitioner extends AbstractStoreBuilderConfigurable implements
Partitioner<BytesWritable, BytesWritable> {
public int getPartition(BytesWritable key, BytesWritable value, int numReduceTasks) {
View
2 .../hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderReducer.java
@@ -41,7 +41,7 @@
* @author bbansal, jay
*
*/
-public class HadoopStoreBuilderReducer extends HadoopStoreBuilderBase implements
+public class HadoopStoreBuilderReducer extends AbstractStoreBuilderConfigurable implements
Reducer<BytesWritable, BytesWritable, Text, Text> {
private static final Logger logger = Logger.getLogger(HadoopStoreBuilderReducer.class);

0 comments on commit 2a99b18

Please sign in to comment.
Something went wrong with that request. Please try again.