diff --git a/.classpath b/.classpath
index 1c153f728f..9daadee954 100644
--- a/.classpath
+++ b/.classpath
@@ -51,8 +51,11 @@
 	<classpathentry kind="lib" path="lib/libthrift-0.5.0.jar"/>
 	<classpathentry kind="lib" path="lib/compress-lzf-0.9.1.jar"/>
 	<classpathentry kind="lib" path="lib/snappy-0.2.jar"/>
-	<classpathentry kind="lib" path="lib/httpclient-4.1.2.jar" />
-	<classpathentry kind="lib" path="lib/httpcore-4.1.2.jar" />
+	<classpathentry kind="lib" path="lib/httpclient-4.1.2.jar"/>
+	<classpathentry kind="lib" path="lib/httpcore-4.1.2.jar"/>
 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
+	<classpathentry kind="lib" path="lib/joda-time-1.6.jar"/>
+	<classpathentry kind="lib" path="lib/mail-1.4.1.jar"/>
+	<classpathentry kind="lib" path="lib/azkaban-common-0.05.jar"/>
 	<classpathentry kind="output" path="classes"/>
 </classpath>
diff --git a/.gitignore b/.gitignore
index 18cd22df29..cd6301beb2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,4 @@ server.state
 .version
 .temp
 .idea
+data/
diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs
index cbe13973a0..8d8bb87867 100644
--- a/.settings/org.eclipse.jdt.core.prefs
+++ b/.settings/org.eclipse.jdt.core.prefs
@@ -1,4 +1,4 @@
-#Fri Dec 30 14:37:10 PST 2011
+#Thu Aug 30 10:43:57 PDT 2012
 eclipse.preferences.version=1
 org.eclipse.jdt.core.codeComplete.argumentPrefixes=
 org.eclipse.jdt.core.codeComplete.argumentSuffixes=
@@ -11,9 +11,9 @@ org.eclipse.jdt.core.codeComplete.staticFieldSuffixes=
 org.eclipse.jdt.core.codeComplete.staticFinalFieldPrefixes=
 org.eclipse.jdt.core.codeComplete.staticFinalFieldSuffixes=
 org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
 org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
-org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.compliance=1.6
 org.eclipse.jdt.core.compiler.debug.lineNumber=generate
 org.eclipse.jdt.core.compiler.debug.localVariable=generate
 org.eclipse.jdt.core.compiler.debug.sourceFile=generate
@@ -77,7 +77,7 @@ org.eclipse.jdt.core.compiler.problem.unusedParameterWhenOverridingConcrete=disa
 org.eclipse.jdt.core.compiler.problem.unusedPrivateMember=warning
 org.eclipse.jdt.core.compiler.problem.unusedWarningToken=warning
 org.eclipse.jdt.core.compiler.problem.varargsArgumentNeedCast=warning
-org.eclipse.jdt.core.compiler.source=1.5
+org.eclipse.jdt.core.compiler.source=1.6
 org.eclipse.jdt.core.formatter.align_type_members_on_columns=false
 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=82
 org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=82
diff --git a/META-INF/MANIFEST.MF b/META-INF/MANIFEST.MF
new file mode 100644
index 0000000000..ff3f61348a
--- /dev/null
+++ b/META-INF/MANIFEST.MF
@@ -0,0 +1,7 @@
+Manifest-Version: 1.0
+Ant-Version: Apache Ant 1.7.1
+Created-By: 20.2-b06 (Sun Microsystems Inc.)
+Implementation-Title: Voldemort
+Implementation-Version: 1.0.0
+Implementation-Vendor: LinkedIn
+
diff --git a/bin/generate_cluster_xml.py b/bin/generate_cluster_xml.py
index 09baf35894..1811fdd2fc 100644
--- a/bin/generate_cluster_xml.py
+++ b/bin/generate_cluster_xml.py
@@ -1,42 +1,90 @@
+#!/usr/bin/python
+
 import sys
 import random
+import argparse
+
+# Get a random seed
+rseed = int(random.randint(00000000001,99999999999))
+
+# Setup and argument parser
+parser = argparse.ArgumentParser(description='Build a voldemort cluster.xml.')
+# Add supported arguments
+parser.add_argument('-N', '--name', type=str, default='voldemort', dest='name',
+                    help='the name you want to give the cluster')
+parser.add_argument('-n', '--nodes', type=int, default=2, dest='nodes',
+                    help='the number of nodes in the cluster')
+parser.add_argument('-p', '--partitions', type=int, default=300,
+                    dest='partitions', help='number of partitions per node')
+parser.add_argument('-s', '--socket-port', type=int, default=6666,
+                    dest='sock_port', help='socket port number')
+parser.add_argument('-a', '--admin-port', type=int, default=6667,
+                    dest='admin_port', help='admin port number')
+parser.add_argument('-H', '--http-port', type=int, default=6665,
+                    dest='http_port', help='http port number')
+genType = parser.add_mutually_exclusive_group()
+genType.add_argument('-S', '--seed', type=int, default=rseed, dest='seed',
+                    help='seed for randomizing partition distribution')
+genType.add_argument('-l', '--loops', type=int, default=1000, dest='loops',
+                    help='loop n times, using a different random seed every \
+                          time (Note: not currently supported)')
+parser.add_argument('-z', '--zones', type=int, dest='zones',
+                    help='if using zones, the number of zones you will have\
+                          (Note: you must add your own <zone> fields \
+                          manually)')
+
+# Parse arguments
+args = parser.parse_args()
+
+# Check args
+if args.zones:
+  zones = args.zones
+  if (args.nodes % zones) != 0:
+    print "Number of nodes must be evenly divisible by number of zones"
+    sys.exit(1)
+
+# Store arguments
+nodes = args.nodes
+partitions = args.partitions
+name = args.name
+http_port = args.http_port
+sock_port = args.sock_port
+admin_port = args.admin_port
+seed = args.seed
+
+# Generate the full list of partition IDs
+part_ids = range(nodes * partitions)
+# Generate full list of zone IDs
+if args.zones:
+  zone_ids = range(zones)
+  zone_id = 0
+
+# Shuffle up the partitions
+random.seed(seed)
+random.shuffle(part_ids)
+
+# Printing cluster.xml
+print "<!-- Partition distribution generated using seed [%d] -->" % seed
+print "<cluster>"
+print "  <name>%s</name>" % name
+
+for i in xrange(nodes):
+  node_partitions = ", ".join(str(p) for p in sorted(part_ids[i*partitions:(i+1)*partitions]))
+
+  print "  <server>"
+  print "    <id>%d</id>" % i
+  print "    <host>host%d</host>" % i
+  print "    <http-port>%d</http-port>" % http_port
+  print "    <socket-port>%d</socket-port>" % sock_port
+  print "    <admin-port>%d</admin-port>" % admin_port
+  print "    <partitions>%s</partitions>" % node_partitions
+  # If zones are being used, assign a zone-id
+  if args.zones:
+    print "    <zone-id>%d</zone-id>" % zone_id
+    if zone_id == (zones - 1):
+      zone_id = 0
+    else:
+      zone_id += 1
+  print "  </server>"
 
-if len(sys.argv) != 3:
-    print >> sys.stderr, "USAGE: python generate_partitions.py <nodes_file> <partitions_per_node>"
-    sys.exit()
-
-FORMAT_WIDTH = 10
-
-nodes = 0
-for line in open(sys.argv[1],'r'):
-	nodes+=1
-
-partitions = int(sys.argv[2])
-
-ids = range(nodes * partitions)
-
-# use known seed so this is repeatable
-random.seed(92873498274)
-random.shuffle(ids)
-
-print '<cluster>'
-print '<name>prodcluster</name>'
-id = 0
-for host in open(sys.argv[1],'r'):
-    print '<server>'
-    print "  <id>%d</id>" % id
-    print "  <host>%s</host>" % host.strip()
-    print '  <http-port>8081</http-port>'
-    print '  <socket-port>6666</socket-port>'
-    print '  <partitions>',
-    node_ids = sorted(ids[id*partitions:(id+1)*partitions])
-    for j in xrange(len(node_ids)):
-        print str(node_ids[j]) + ',',
-        if j % FORMAT_WIDTH == FORMAT_WIDTH - 1:
-            print '    ',
-    print '  </partitions>'
-    print '</server>'
-    id += 1
-print '</cluster>'
-
-        
+print "</cluster>"
diff --git a/bin/repeat-junit-test.sh b/bin/repeat-junit-test.sh
new file mode 100755
index 0000000000..eb16319847
--- /dev/null
+++ b/bin/repeat-junit-test.sh
@@ -0,0 +1,93 @@
+#!/bin/bash -e
+
+# Copyright 2012 LinkedIn, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+usage()  {
+    echo
+    echo "Usage:"
+    echo "bin/repeat-junit-test.sh test_file num_times"
+    echo 
+    cat <<EOF 
+Invoke bin/repeat-junit-test.sh from the root of a Voldemort
+checkout. bin/repeat-junit-test.sh invokes 'ant junit-test' num_times
+for test test_name.
+
+The argument num_times must be an integer. The argument test_name must
+be a class name suitable for 'ant junit-test'. I.e., a fully qualified
+java class name. Remember, the class name does not include the .java
+extension. An example test_name is voldemort.utils.ServerTestUtilsTest.
+
+The pretty html junit output that ends up in dist/junit-single-report
+on a single invocation of 'ant junit-test' is collected in a temp
+directory. This circumvents the normal behavior of ant in which
+dist/junit-single-report is overwritten with each invocation of 'ant
+junit-test'.
+
+bin/repeat-junit-test.sh is useful to run after adding a new test
+case, or when trying to reproduce intermittent failures of a specific
+test.
+EOF
+}
+
+if [ $# != 2 ]; then
+    echo "ERROR: Incorrect number of arguments: $# provided, 2 needed." >&2
+    usage
+    exit 1
+fi
+
+TESTNAME=$1
+# Hackish test that makes sure some java file exists for given
+# testname. No guarantee that junit-test can run the specified test,
+# but at least protects against typos.
+FILENAME=`echo $TESTNAME | sed 's/.*\.//g'`.java
+FINDFILE=`find . -name "$FILENAME" | wc -l`
+if [[ $FINDFILE == 0 ]]
+then
+   echo "ERROR: Did not find an appropriate file (with name $FILENAME), given test name $TESTNAME." >&2
+   usage
+   exit 1
+fi
+
+
+NUMTIMES=$2
+if [[ ! $NUMTIMES == +([0-9]) ]]
+then
+   echo "ERROR: argument num_times is not an integer: $NUMTIMES." >&2
+   usage
+   exit 1
+fi
+
+TMPDIR=`mktemp -d -p '/tmp/'`
+
+for ((i=1;i<=$NUMTIMES;i++)); do 
+    echo
+    echo "STARTING ITERATION $i"
+    echo
+
+    # Run junit-test and capture stdout to .out and stderr to .err
+    junitiout="$TMPDIR/TEST-$TESTNAME-$i.out"
+    junitierr="$TMPDIR/TEST-$TESTNAME-$i.err"
+    ant junit-test -Dtest.name=$TESTNAME > >(tee $junitiout) 2> >(tee $junitierr >&2)
+
+    # Collect results
+    junitidir="$TMPDIR/junit-single-report-$TESTNAME-$i"
+    echo
+    echo "COLLECTING RESULTS OF ITERATION $i IN $junitidir"
+    cp -r dist/junit-single-reports $junitidir
+    mv $junitiout $junitidir
+    mv $junitierr $junitidir
+done
+
+
diff --git a/bin/repeat-junit.sh b/bin/repeat-junit.sh
new file mode 100755
index 0000000000..352abc6b4b
--- /dev/null
+++ b/bin/repeat-junit.sh
@@ -0,0 +1,75 @@
+#!/bin/bash -e
+
+# Copyright 2012 LinkedIn, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+usage()  {
+    echo
+    echo "Usage:"
+    echo "bin/repeat-junit.sh num_times"
+    echo 
+    cat <<EOF 
+Invoke bin/repeat-junit.sh from the root of a Voldemort
+checkout. bin/repeat-junit.sh invokes 'ant junit' num_times. 
+
+The argument num_times must be an integer. 
+
+The pretty html junit output that ends up in dist/junit-reports on a
+single invocation of 'ant junit' is collected in a temp
+directory. This circumvents the normal behavior of ant in which
+dist/junit-reports is overwritten with each invocation of 'ant
+junit'. 
+
+bin/repeat-junit.sh is useful to run after making some substantial
+changes, or when trying to track down intermittent failures (that
+occur more on your local box then on a Hudson test machine...).
+EOF
+}
+
+if [ $# != 1 ]; then
+    echo "ERROR: Incorrect number of arguments: $# provided, 1 needed." >&2
+    usage
+    exit 1
+fi
+
+NUMTIMES=$1
+if [[ ! $NUMTIMES == +([0-9]) ]]
+then
+   echo "ERROR: argument num_times is not an integer: $NUMTIMES." >&2
+   usage
+   exit 1
+fi
+
+TMPDIR=`mktemp -d -p '/tmp/'`
+
+for ((i=1;i<=$NUMTIMES;i++)); do 
+    echo
+    echo "STARTING ITERATION $i"
+    echo
+
+    # Run junit and capture stdout to .out and stderr to .err
+    junitiout="$TMPDIR/junit-$i.out"
+    junitierr="$TMPDIR/junit-$i.err"
+    ant junit > >(tee $junitiout) 2> >(tee $junitierr >&2)
+
+    # Collect results
+    junitidir="$TMPDIR/junit-reports-$i"
+    echo
+    echo "COLLECTING RESULTS OF ITERATION $i IN $junitidir"
+    cp -r dist/junit-reports $junitidir
+    mv $junitiout $junitidir
+    mv $junitierr $junitidir
+done
+
+
diff --git a/build.properties b/build.properties
index 3bc09bc26c..f5bf1aa2de 100644
--- a/build.properties
+++ b/build.properties
@@ -9,10 +9,15 @@ classes.dir=dist/classes
 resources.dir=dist/resources
 commontestsrc.dir=test/common
 unittestsrc.dir=test/unit
+longtestsrc.dir=test/long
 inttestsrc.dir=test/integration
 testclasses.dir=dist/testclasses
 testreport.dir=dist/junit-reports
 testhtml.dir=dist/junit-reports/html
+singletestreport.dir=dist/junit-single-reports
+singletesthtml.dir=dist/junit-single-reports/html
+longtestreport.dir=dist/junit-long-reports
+longtesthtml.dir=dist/junit-long-reports/html
 
 ## Contrib
 contrib.root.dir=contrib
@@ -34,4 +39,4 @@ tomcat.manager.password=tomcat
 tomcat.context=/voldemort
 
 ## Release
-curr.release=0.90.1
+curr.release=1.0.0
diff --git a/build.xml b/build.xml
index 1f9a535f22..c7911874ef 100644
--- a/build.xml
+++ b/build.xml
@@ -5,7 +5,7 @@
 
   <property name="name" value="voldemort" />
   <property name="display.name" value="Voldemort" />
-  <property name="author" value="Jay Kreps, Roshan Sumbaly, Alex Feinberg, Bhupesh Bansal, Lei Gao" />
+  <property name="author" value="Jay Kreps, Roshan Sumbaly, Alex Feinberg, Bhupesh Bansal, Lei Gao, Chinmay Soman, Vinoth Chandar, Zhongjie Wu" />
   <property environment="env" />
 
   <path id="main-classpath">
@@ -27,6 +27,7 @@
   </condition>
 
   <path id="contrib-classpath">
+    <pathelement path="${resources.dir}" />
     <fileset dir="${dist.dir}">
       <include name="${name}-${curr.release}.jar" />
     </fileset>
@@ -37,6 +38,7 @@
   </path>
 
   <path id="test-classpath">
+    <pathelement path="${resources.dir}" />
     <pathelement path="${env.VOLD_TEST_JARS}" />
     <path refid="main-classpath" />
     <pathelement path="${testclasses.dir}" />
@@ -76,6 +78,12 @@
         <exclude name="**/log4j.properties" />
       </fileset>
     </copy>
+    <replace-dir dir="META-INF" />
+    <manifest file="META-INF/MANIFEST.MF">       
+      <attribute name="Implementation-Title" value="Voldemort" />
+      <attribute name="Implementation-Version" value="${curr.release}" />
+      <attribute name="Implementation-Vendor" value="LinkedIn" />
+    </manifest>
     <!-- place to put log4j.properties -->
     <replace-dir dir="${resources.dir}"/>
     <copy file="${java.dir}/log4j.properties" todir="${resources.dir}"/>
@@ -103,6 +111,7 @@
       <src path="${unittestsrc.dir}" />
       <src path="${inttestsrc.dir}" />
       <src path="${commontestsrc.dir}" />
+      <src path="${longtestsrc.dir}" />
       <classpath refid="main-classpath" />
     </javac>
   </target>
@@ -140,6 +149,9 @@
       <fileset dir="${java.dir}">
         <include name="**/*.xsd" />
       </fileset>
+    	<fileset dir=".">
+    		<include name="META-INF/MANIFEST.MF" />
+      </fileset>
     </jar>
   </target>
 
@@ -210,7 +222,7 @@
   <target name="contrib-junit" depends="contrib-jar" description="Run contrib junit tests except EC2 and Krati tests.">
     <replace-dir dir="${contribtestreport.dir}" />
     <replace-dir dir="${contribtesthtml.dir}" />
-    <junit printsummary="yes" maxmemory="1024m" showoutput="true" failureProperty="test.failure">
+    <junit printsummary="yes" maxmemory="2048m" showoutput="true" failureProperty="test.failure">
       <classpath refid="contrib-test-classpath" />
       <formatter type="xml" />
       <batchtest fork="yes" todir="${contribtestreport.dir}">
@@ -237,7 +249,7 @@
     </copy>
     <replace-dir dir="${contribtestreport.dir}" />
     <replace-dir dir="${contribtesthtml.dir}" />
-    <junit printsummary="yes" maxmemory="1024m" showoutput="true" failureProperty="test.failure">
+    <junit printsummary="yes" maxmemory="2048m" showoutput="true" failureProperty="test.failure">
       <syspropertyset>
         <propertyref prefix="ec2" />
       </syspropertyset>
@@ -369,7 +381,7 @@
   <target name="junit" depends="build, buildtest" description="Run junit tests.">
     <replace-dir dir="${testreport.dir}" />
     <replace-dir dir="${testhtml.dir}" />
-    <junit printsummary="yes" showoutput="true" maxmemory="1024m">
+    <junit printsummary="yes" showoutput="true" maxmemory="2048m" timeout="1200000">
       <classpath refid="test-classpath" />
       <formatter type="xml" />
       <batchtest fork="yes" todir="${testreport.dir}">
@@ -387,14 +399,45 @@
     </junitreport>
   </target>
  
-  <target name="junit-test" depends="build, buildtest, contrib-jar" description="Run single junit test with -Dtest.name=">
-    <junit printsummary="yes" showoutput="true" maxmemory="1024m">
+  <target name="junit-long" depends="build, buildtest, junit" description="Run long junit tests that uses larger data sets than normal junit tests.">
+    <replace-dir dir="${longtestreport.dir}" />
+    <replace-dir dir="${longtesthtml.dir}" />
+    <junit printsummary="yes" showoutput="true" maxmemory="2048m" fork="yes" timeout="5400000">
+      <classpath refid="test-classpath" />
+      <formatter type="xml" />
+      <batchtest todir="${longtestreport.dir}">
+        <fileset dir="${longtestsrc.dir}">
+          <include name="**/*Test.java" />
+        </fileset>
+      </batchtest>
+    </junit>
+    <junitreport todir="${longtesthtml.dir}">
+      <fileset dir="${longtestreport.dir}">
+        <include name="TEST-*.xml" />
+      </fileset>
+      <report todir="${longtesthtml.dir}" format="frames" />
+    </junitreport>
+  </target>
+
+  <target name="junit-test" depends="build, buildtest, contrib-jar" description="Run single junit test for class ClassName with -Dtest.name=[ClassName]  (Note: Use the class name, not the file name with the .java extension)">
+    <replace-dir dir="${singletestreport.dir}" />
+    <replace-dir dir="${singletesthtml.dir}" />
+    <junit printsummary="on" showoutput="true" maxmemory="2048m">
       <classpath refid="contrib-test-classpath" />
-      <test name="${test.name}" />
+      <classpath path="${log4j.properties.dir}" />
+      <formatter type="plain" />
+      <formatter type="xml" />
+      <test name="${test.name}"  todir="${singletestreport.dir}"/>
     </junit>
+    <junitreport todir="${singletesthtml.dir}">
+      <fileset dir="${singletestreport.dir}">
+        <include name="TEST-*.xml" />
+      </fileset>
+      <report todir="${singletesthtml.dir}" format="frames" />
+    </junitreport>
   </target>
 
-  <target name="junit-all" depends="junit, contrib-junit" description="Run All junit tests including contrib.">
+  <target name="junit-all" depends="junit-long, contrib-junit" description="Run All junit tests including contrib.">
   </target>
 
   <macrodef name="make-javadocs">
diff --git a/clients/python/voldemort/client.py b/clients/python/voldemort/client.py
index 830d38bbfb..d8a83e4c2a 100644
--- a/clients/python/voldemort/client.py
+++ b/clients/python/voldemort/client.py
@@ -239,6 +239,9 @@ def _send_request(self, connection, req_bytes):
     ## read a response from the connection
     def _receive_response(self, connection):
         size_bytes = connection.recv(4)
+        if not size_bytes:
+            raise VoldemortException('Connection closed')
+
         size = struct.unpack('>i', size_bytes)[0]
 
         bytes_read = 0
@@ -252,6 +255,7 @@ def _receive_response(self, connection):
         return ''.join(data)
 
 
+
     ## Bootstrap cluster metadata from a list of urls of nodes in the cluster.
     ## The urls are tuples in the form (host, port).
     ## A dictionary of node_id => node is returned.
diff --git a/clients/python/voldemort/protocol/voldemort_admin_pb2.py b/clients/python/voldemort/protocol/voldemort_admin_pb2.py
index e2c3451c07..008ac0cda0 100644
--- a/clients/python/voldemort/protocol/voldemort_admin_pb2.py
+++ b/clients/python/voldemort/protocol/voldemort_admin_pb2.py
@@ -10,7 +10,7 @@
 DESCRIPTOR = descriptor.FileDescriptor(
   name='voldemort-admin.proto',
   package='voldemort',
-  serialized_pb='\n\x15voldemort-admin.proto\x12\tvoldemort\x1a\x16voldemort-client.proto\"!\n\x12GetMetadataRequest\x12\x0b\n\x03key\x18\x01 \x02(\x0c\"]\n\x13GetMetadataResponse\x12%\n\x07version\x18\x01 \x01(\x0b\x32\x14.voldemort.Versioned\x12\x1f\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x10.voldemort.Error\"M\n\x15UpdateMetadataRequest\x12\x0b\n\x03key\x18\x01 \x02(\x0c\x12\'\n\tversioned\x18\x02 \x02(\x0b\x32\x14.voldemort.Versioned\"9\n\x16UpdateMetadataResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"7\n\tFileEntry\x12\x11\n\tfile_name\x18\x01 \x02(\t\x12\x17\n\x0f\x66ile_size_bytes\x18\x02 \x02(\x03\"F\n\x0ePartitionEntry\x12\x0b\n\x03key\x18\x01 \x02(\x0c\x12\'\n\tversioned\x18\x02 \x02(\x0b\x32\x14.voldemort.Versioned\"\x8e\x01\n\x1dUpdatePartitionEntriesRequest\x12\r\n\x05store\x18\x01 \x02(\t\x12\x32\n\x0fpartition_entry\x18\x02 \x02(\x0b\x32\x19.voldemort.PartitionEntry\x12*\n\x06\x66ilter\x18\x03 \x01(\x0b\x32\x1a.voldemort.VoldemortFilter\"A\n\x1eUpdatePartitionEntriesResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"-\n\x0fVoldemortFilter\x12\x0c\n\x04name\x18\x01 \x02(\t\x12\x0c\n\x04\x64\x61ta\x18\x02 \x02(\x0c\"\xaf\x01\n\x18UpdateSlopEntriesRequest\x12\r\n\x05store\x18\x01 \x02(\t\x12\x0b\n\x03key\x18\x02 \x02(\x0c\x12\'\n\x07version\x18\x03 \x02(\x0b\x32\x16.voldemort.VectorClock\x12,\n\x0crequest_type\x18\x04 \x02(\x0e\x32\x16.voldemort.RequestType\x12\r\n\x05value\x18\x05 \x01(\x0c\x12\x11\n\ttransform\x18\x06 \x01(\x0c\"<\n\x19UpdateSlopEntriesResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"d\n\x1a\x46\x65tchPartitionFilesRequest\x12\r\n\x05store\x18\x01 \x02(\t\x12\x37\n\x14replica_to_partition\x18\x02 \x03(\x0b\x32\x19.voldemort.PartitionTuple\"\xd7\x01\n\x1c\x46\x65tchPartitionEntriesRequest\x12\x37\n\x14replica_to_partition\x18\x01 \x03(\x0b\x32\x19.voldemort.PartitionTuple\x12\r\n\x05store\x18\x02 \x02(\t\x12*\n\x06\x66ilter\x18\x03 \x01(\x0b\x32\x1a.voldemort.VoldemortFilter\x12\x14\n\x0c\x66\x65tch_values\x18\x04 \x01(\x08\x12\x14\n\x0cskip_records\x18\x05 \x01(\x03\x12\x17\n\x0finitial_cluster\x18\x06 \x01(\t\"\x81\x01\n\x1d\x46\x65tchPartitionEntriesResponse\x12\x32\n\x0fpartition_entry\x18\x01 \x01(\x0b\x32\x19.voldemort.PartitionEntry\x12\x0b\n\x03key\x18\x02 \x01(\x0c\x12\x1f\n\x05\x65rror\x18\x03 \x01(\x0b\x32\x10.voldemort.Error\"\xac\x01\n\x1d\x44\x65letePartitionEntriesRequest\x12\r\n\x05store\x18\x01 \x02(\t\x12\x37\n\x14replica_to_partition\x18\x02 \x03(\x0b\x32\x19.voldemort.PartitionTuple\x12*\n\x06\x66ilter\x18\x03 \x01(\x0b\x32\x1a.voldemort.VoldemortFilter\x12\x17\n\x0finitial_cluster\x18\x04 \x01(\t\"P\n\x1e\x44\x65letePartitionEntriesResponse\x12\r\n\x05\x63ount\x18\x01 \x01(\x03\x12\x1f\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x10.voldemort.Error\"\xcf\x01\n\x1dInitiateFetchAndUpdateRequest\x12\x0f\n\x07node_id\x18\x01 \x02(\x05\x12\r\n\x05store\x18\x02 \x02(\t\x12*\n\x06\x66ilter\x18\x03 \x01(\x0b\x32\x1a.voldemort.VoldemortFilter\x12\x37\n\x14replica_to_partition\x18\x04 \x03(\x0b\x32\x19.voldemort.PartitionTuple\x12\x17\n\x0finitial_cluster\x18\x05 \x01(\t\x12\x10\n\x08optimize\x18\x06 \x01(\x08\"1\n\x1b\x41syncOperationStatusRequest\x12\x12\n\nrequest_id\x18\x01 \x02(\x05\"/\n\x19\x41syncOperationStopRequest\x12\x12\n\nrequest_id\x18\x01 \x02(\x05\"=\n\x1a\x41syncOperationStopResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"2\n\x19\x41syncOperationListRequest\x12\x15\n\rshow_complete\x18\x02 \x02(\x08\"R\n\x1a\x41syncOperationListResponse\x12\x13\n\x0brequest_ids\x18\x01 \x03(\x05\x12\x1f\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x10.voldemort.Error\":\n\x0ePartitionTuple\x12\x14\n\x0creplica_type\x18\x01 \x02(\x05\x12\x12\n\npartitions\x18\x02 \x03(\x05\"e\n\x16PerStorePartitionTuple\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x37\n\x14replica_to_partition\x18\x02 \x03(\x0b\x32\x19.voldemort.PartitionTuple\"\xf8\x01\n\x19RebalancePartitionInfoMap\x12\x12\n\nstealer_id\x18\x01 \x02(\x05\x12\x10\n\x08\x64onor_id\x18\x02 \x02(\x05\x12\x0f\n\x07\x61ttempt\x18\x03 \x02(\x05\x12\x43\n\x18replica_to_add_partition\x18\x04 \x03(\x0b\x32!.voldemort.PerStorePartitionTuple\x12\x46\n\x1breplica_to_delete_partition\x18\x05 \x03(\x0b\x32!.voldemort.PerStorePartitionTuple\x12\x17\n\x0finitial_cluster\x18\x06 \x02(\t\"f\n\x1cInitiateRebalanceNodeRequest\x12\x46\n\x18rebalance_partition_info\x18\x01 \x02(\x0b\x32$.voldemort.RebalancePartitionInfoMap\"m\n#InitiateRebalanceNodeOnDonorRequest\x12\x46\n\x18rebalance_partition_info\x18\x01 \x03(\x0b\x32$.voldemort.RebalancePartitionInfoMap\"\x8a\x01\n\x1c\x41syncOperationStatusResponse\x12\x12\n\nrequest_id\x18\x01 \x01(\x05\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x0e\n\x06status\x18\x03 \x01(\t\x12\x10\n\x08\x63omplete\x18\x04 \x01(\x08\x12\x1f\n\x05\x65rror\x18\x05 \x01(\x0b\x32\x10.voldemort.Error\"\'\n\x16TruncateEntriesRequest\x12\r\n\x05store\x18\x01 \x02(\t\":\n\x17TruncateEntriesResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"*\n\x0f\x41\x64\x64StoreRequest\x12\x17\n\x0fstoreDefinition\x18\x01 \x02(\t\"3\n\x10\x41\x64\x64StoreResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"\'\n\x12\x44\x65leteStoreRequest\x12\x11\n\tstoreName\x18\x01 \x02(\t\"6\n\x13\x44\x65leteStoreResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"P\n\x11\x46\x65tchStoreRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x11\n\tstore_dir\x18\x02 \x02(\t\x12\x14\n\x0cpush_version\x18\x03 \x01(\x03\"9\n\x10SwapStoreRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x11\n\tstore_dir\x18\x02 \x02(\t\"P\n\x11SwapStoreResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\x12\x1a\n\x12previous_store_dir\x18\x02 \x01(\t\"@\n\x14RollbackStoreRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x14\n\x0cpush_version\x18\x02 \x02(\x03\"8\n\x15RollbackStoreResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"&\n\x10RepairJobRequest\x12\x12\n\nstore_name\x18\x01 \x01(\t\"4\n\x11RepairJobResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"=\n\x14ROStoreVersionDirMap\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x11\n\tstore_dir\x18\x02 \x02(\t\"/\n\x19GetROMaxVersionDirRequest\x12\x12\n\nstore_name\x18\x01 \x03(\t\"y\n\x1aGetROMaxVersionDirResponse\x12:\n\x11ro_store_versions\x18\x01 \x03(\x0b\x32\x1f.voldemort.ROStoreVersionDirMap\x12\x1f\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x10.voldemort.Error\"3\n\x1dGetROCurrentVersionDirRequest\x12\x12\n\nstore_name\x18\x01 \x03(\t\"}\n\x1eGetROCurrentVersionDirResponse\x12:\n\x11ro_store_versions\x18\x01 \x03(\x0b\x32\x1f.voldemort.ROStoreVersionDirMap\x12\x1f\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x10.voldemort.Error\"/\n\x19GetROStorageFormatRequest\x12\x12\n\nstore_name\x18\x01 \x03(\t\"y\n\x1aGetROStorageFormatResponse\x12:\n\x11ro_store_versions\x18\x01 \x03(\x0b\x32\x1f.voldemort.ROStoreVersionDirMap\x12\x1f\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x10.voldemort.Error\"@\n\x17\x46\x61iledFetchStoreRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x11\n\tstore_dir\x18\x02 \x02(\t\";\n\x18\x46\x61iledFetchStoreResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"\xe6\x01\n\x1bRebalanceStateChangeRequest\x12K\n\x1drebalance_partition_info_list\x18\x01 \x03(\x0b\x32$.voldemort.RebalancePartitionInfoMap\x12\x16\n\x0e\x63luster_string\x18\x02 \x02(\t\x12\x0f\n\x07swap_ro\x18\x03 \x02(\x08\x12\x1f\n\x17\x63hange_cluster_metadata\x18\x04 \x02(\x08\x12\x1e\n\x16\x63hange_rebalance_state\x18\x05 \x02(\x08\x12\x10\n\x08rollback\x18\x06 \x02(\x08\"?\n\x1cRebalanceStateChangeResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"G\n DeleteStoreRebalanceStateRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x0f\n\x07node_id\x18\x02 \x02(\x05\"D\n!DeleteStoreRebalanceStateResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"h\n\x13NativeBackupRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x12\n\nbackup_dir\x18\x02 \x02(\t\x12\x14\n\x0cverify_files\x18\x03 \x02(\x08\x12\x13\n\x0bincremental\x18\x04 \x02(\x08\"\xb7\x0e\n\x15VoldemortAdminRequest\x12)\n\x04type\x18\x01 \x02(\x0e\x32\x1b.voldemort.AdminRequestType\x12\x33\n\x0cget_metadata\x18\x02 \x01(\x0b\x32\x1d.voldemort.GetMetadataRequest\x12\x39\n\x0fupdate_metadata\x18\x03 \x01(\x0b\x32 .voldemort.UpdateMetadataRequest\x12J\n\x18update_partition_entries\x18\x04 \x01(\x0b\x32(.voldemort.UpdatePartitionEntriesRequest\x12H\n\x17\x66\x65tch_partition_entries\x18\x05 \x01(\x0b\x32\'.voldemort.FetchPartitionEntriesRequest\x12J\n\x18\x64\x65lete_partition_entries\x18\x06 \x01(\x0b\x32(.voldemort.DeletePartitionEntriesRequest\x12K\n\x19initiate_fetch_and_update\x18\x07 \x01(\x0b\x32(.voldemort.InitiateFetchAndUpdateRequest\x12\x46\n\x16\x61sync_operation_status\x18\x08 \x01(\x0b\x32&.voldemort.AsyncOperationStatusRequest\x12H\n\x17initiate_rebalance_node\x18\t \x01(\x0b\x32\'.voldemort.InitiateRebalanceNodeRequest\x12\x42\n\x14\x61sync_operation_stop\x18\n \x01(\x0b\x32$.voldemort.AsyncOperationStopRequest\x12\x42\n\x14\x61sync_operation_list\x18\x0b \x01(\x0b\x32$.voldemort.AsyncOperationListRequest\x12;\n\x10truncate_entries\x18\x0c \x01(\x0b\x32!.voldemort.TruncateEntriesRequest\x12-\n\tadd_store\x18\r \x01(\x0b\x32\x1a.voldemort.AddStoreRequest\x12\x33\n\x0c\x64\x65lete_store\x18\x0e \x01(\x0b\x32\x1d.voldemort.DeleteStoreRequest\x12\x31\n\x0b\x66\x65tch_store\x18\x0f \x01(\x0b\x32\x1c.voldemort.FetchStoreRequest\x12/\n\nswap_store\x18\x10 \x01(\x0b\x32\x1b.voldemort.SwapStoreRequest\x12\x37\n\x0erollback_store\x18\x11 \x01(\x0b\x32\x1f.voldemort.RollbackStoreRequest\x12\x44\n\x16get_ro_max_version_dir\x18\x12 \x01(\x0b\x32$.voldemort.GetROMaxVersionDirRequest\x12L\n\x1aget_ro_current_version_dir\x18\x13 \x01(\x0b\x32(.voldemort.GetROCurrentVersionDirRequest\x12\x44\n\x15\x66\x65tch_partition_files\x18\x14 \x01(\x0b\x32%.voldemort.FetchPartitionFilesRequest\x12@\n\x13update_slop_entries\x18\x16 \x01(\x0b\x32#.voldemort.UpdateSlopEntriesRequest\x12>\n\x12\x66\x61iled_fetch_store\x18\x18 \x01(\x0b\x32\".voldemort.FailedFetchStoreRequest\x12\x43\n\x15get_ro_storage_format\x18\x19 \x01(\x0b\x32$.voldemort.GetROStorageFormatRequest\x12\x46\n\x16rebalance_state_change\x18\x1a \x01(\x0b\x32&.voldemort.RebalanceStateChangeRequest\x12/\n\nrepair_job\x18\x1b \x01(\x0b\x32\x1b.voldemort.RepairJobRequest\x12X\n initiate_rebalance_node_on_donor\x18\x1c \x01(\x0b\x32..voldemort.InitiateRebalanceNodeOnDonorRequest\x12Q\n\x1c\x64\x65lete_store_rebalance_state\x18\x1d \x01(\x0b\x32+.voldemort.DeleteStoreRebalanceStateRequest\x12\x35\n\rnative_backup\x18\x1e \x01(\x0b\x32\x1e.voldemort.NativeBackupRequest*\xb4\x05\n\x10\x41\x64minRequestType\x12\x10\n\x0cGET_METADATA\x10\x00\x12\x13\n\x0fUPDATE_METADATA\x10\x01\x12\x1c\n\x18UPDATE_PARTITION_ENTRIES\x10\x02\x12\x1b\n\x17\x46\x45TCH_PARTITION_ENTRIES\x10\x03\x12\x1c\n\x18\x44\x45LETE_PARTITION_ENTRIES\x10\x04\x12\x1d\n\x19INITIATE_FETCH_AND_UPDATE\x10\x05\x12\x1a\n\x16\x41SYNC_OPERATION_STATUS\x10\x06\x12\x1b\n\x17INITIATE_REBALANCE_NODE\x10\x07\x12\x18\n\x14\x41SYNC_OPERATION_STOP\x10\x08\x12\x18\n\x14\x41SYNC_OPERATION_LIST\x10\t\x12\x14\n\x10TRUNCATE_ENTRIES\x10\n\x12\r\n\tADD_STORE\x10\x0b\x12\x10\n\x0c\x44\x45LETE_STORE\x10\x0c\x12\x0f\n\x0b\x46\x45TCH_STORE\x10\r\x12\x0e\n\nSWAP_STORE\x10\x0e\x12\x12\n\x0eROLLBACK_STORE\x10\x0f\x12\x1a\n\x16GET_RO_MAX_VERSION_DIR\x10\x10\x12\x1e\n\x1aGET_RO_CURRENT_VERSION_DIR\x10\x11\x12\x19\n\x15\x46\x45TCH_PARTITION_FILES\x10\x12\x12\x17\n\x13UPDATE_SLOP_ENTRIES\x10\x14\x12\x16\n\x12\x46\x41ILED_FETCH_STORE\x10\x16\x12\x19\n\x15GET_RO_STORAGE_FORMAT\x10\x17\x12\x1a\n\x16REBALANCE_STATE_CHANGE\x10\x18\x12\x0e\n\nREPAIR_JOB\x10\x19\x12$\n INITIATE_REBALANCE_NODE_ON_DONOR\x10\x1a\x12 \n\x1c\x44\x45LETE_STORE_REBALANCE_STATE\x10\x1b\x12\x11\n\rNATIVE_BACKUP\x10\x1c\x42-\n\x1cvoldemort.client.protocol.pbB\x0bVAdminProtoH\x01')
+  serialized_pb='\n\x15voldemort-admin.proto\x12\tvoldemort\x1a\x16voldemort-client.proto\"!\n\x12GetMetadataRequest\x12\x0b\n\x03key\x18\x01 \x02(\x0c\"]\n\x13GetMetadataResponse\x12%\n\x07version\x18\x01 \x01(\x0b\x32\x14.voldemort.Versioned\x12\x1f\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x10.voldemort.Error\"M\n\x15UpdateMetadataRequest\x12\x0b\n\x03key\x18\x01 \x02(\x0c\x12\'\n\tversioned\x18\x02 \x02(\x0b\x32\x14.voldemort.Versioned\"9\n\x16UpdateMetadataResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"7\n\tFileEntry\x12\x11\n\tfile_name\x18\x01 \x02(\t\x12\x17\n\x0f\x66ile_size_bytes\x18\x02 \x02(\x03\"F\n\x0ePartitionEntry\x12\x0b\n\x03key\x18\x01 \x02(\x0c\x12\'\n\tversioned\x18\x02 \x02(\x0b\x32\x14.voldemort.Versioned\"\x8e\x01\n\x1dUpdatePartitionEntriesRequest\x12\r\n\x05store\x18\x01 \x02(\t\x12\x32\n\x0fpartition_entry\x18\x02 \x02(\x0b\x32\x19.voldemort.PartitionEntry\x12*\n\x06\x66ilter\x18\x03 \x01(\x0b\x32\x1a.voldemort.VoldemortFilter\"A\n\x1eUpdatePartitionEntriesResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"-\n\x0fVoldemortFilter\x12\x0c\n\x04name\x18\x01 \x02(\t\x12\x0c\n\x04\x64\x61ta\x18\x02 \x02(\x0c\"\xaf\x01\n\x18UpdateSlopEntriesRequest\x12\r\n\x05store\x18\x01 \x02(\t\x12\x0b\n\x03key\x18\x02 \x02(\x0c\x12\'\n\x07version\x18\x03 \x02(\x0b\x32\x16.voldemort.VectorClock\x12,\n\x0crequest_type\x18\x04 \x02(\x0e\x32\x16.voldemort.RequestType\x12\r\n\x05value\x18\x05 \x01(\x0c\x12\x11\n\ttransform\x18\x06 \x01(\x0c\"<\n\x19UpdateSlopEntriesResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"d\n\x1a\x46\x65tchPartitionFilesRequest\x12\r\n\x05store\x18\x01 \x02(\t\x12\x37\n\x14replica_to_partition\x18\x02 \x03(\x0b\x32\x19.voldemort.PartitionTuple\"\xd7\x01\n\x1c\x46\x65tchPartitionEntriesRequest\x12\x37\n\x14replica_to_partition\x18\x01 \x03(\x0b\x32\x19.voldemort.PartitionTuple\x12\r\n\x05store\x18\x02 \x02(\t\x12*\n\x06\x66ilter\x18\x03 \x01(\x0b\x32\x1a.voldemort.VoldemortFilter\x12\x14\n\x0c\x66\x65tch_values\x18\x04 \x01(\x08\x12\x14\n\x0cskip_records\x18\x05 \x01(\x03\x12\x17\n\x0finitial_cluster\x18\x06 \x01(\t\"\x81\x01\n\x1d\x46\x65tchPartitionEntriesResponse\x12\x32\n\x0fpartition_entry\x18\x01 \x01(\x0b\x32\x19.voldemort.PartitionEntry\x12\x0b\n\x03key\x18\x02 \x01(\x0c\x12\x1f\n\x05\x65rror\x18\x03 \x01(\x0b\x32\x10.voldemort.Error\"\xac\x01\n\x1d\x44\x65letePartitionEntriesRequest\x12\r\n\x05store\x18\x01 \x02(\t\x12\x37\n\x14replica_to_partition\x18\x02 \x03(\x0b\x32\x19.voldemort.PartitionTuple\x12*\n\x06\x66ilter\x18\x03 \x01(\x0b\x32\x1a.voldemort.VoldemortFilter\x12\x17\n\x0finitial_cluster\x18\x04 \x01(\t\"P\n\x1e\x44\x65letePartitionEntriesResponse\x12\r\n\x05\x63ount\x18\x01 \x01(\x03\x12\x1f\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x10.voldemort.Error\"\xcf\x01\n\x1dInitiateFetchAndUpdateRequest\x12\x0f\n\x07node_id\x18\x01 \x02(\x05\x12\r\n\x05store\x18\x02 \x02(\t\x12*\n\x06\x66ilter\x18\x03 \x01(\x0b\x32\x1a.voldemort.VoldemortFilter\x12\x37\n\x14replica_to_partition\x18\x04 \x03(\x0b\x32\x19.voldemort.PartitionTuple\x12\x17\n\x0finitial_cluster\x18\x05 \x01(\t\x12\x10\n\x08optimize\x18\x06 \x01(\x08\"1\n\x1b\x41syncOperationStatusRequest\x12\x12\n\nrequest_id\x18\x01 \x02(\x05\"/\n\x19\x41syncOperationStopRequest\x12\x12\n\nrequest_id\x18\x01 \x02(\x05\"=\n\x1a\x41syncOperationStopResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"2\n\x19\x41syncOperationListRequest\x12\x15\n\rshow_complete\x18\x02 \x02(\x08\"R\n\x1a\x41syncOperationListResponse\x12\x13\n\x0brequest_ids\x18\x01 \x03(\x05\x12\x1f\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x10.voldemort.Error\":\n\x0ePartitionTuple\x12\x14\n\x0creplica_type\x18\x01 \x02(\x05\x12\x12\n\npartitions\x18\x02 \x03(\x05\"e\n\x16PerStorePartitionTuple\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x37\n\x14replica_to_partition\x18\x02 \x03(\x0b\x32\x19.voldemort.PartitionTuple\"\xf8\x01\n\x19RebalancePartitionInfoMap\x12\x12\n\nstealer_id\x18\x01 \x02(\x05\x12\x10\n\x08\x64onor_id\x18\x02 \x02(\x05\x12\x0f\n\x07\x61ttempt\x18\x03 \x02(\x05\x12\x43\n\x18replica_to_add_partition\x18\x04 \x03(\x0b\x32!.voldemort.PerStorePartitionTuple\x12\x46\n\x1breplica_to_delete_partition\x18\x05 \x03(\x0b\x32!.voldemort.PerStorePartitionTuple\x12\x17\n\x0finitial_cluster\x18\x06 \x02(\t\"f\n\x1cInitiateRebalanceNodeRequest\x12\x46\n\x18rebalance_partition_info\x18\x01 \x02(\x0b\x32$.voldemort.RebalancePartitionInfoMap\"m\n#InitiateRebalanceNodeOnDonorRequest\x12\x46\n\x18rebalance_partition_info\x18\x01 \x03(\x0b\x32$.voldemort.RebalancePartitionInfoMap\"\x8a\x01\n\x1c\x41syncOperationStatusResponse\x12\x12\n\nrequest_id\x18\x01 \x01(\x05\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x0e\n\x06status\x18\x03 \x01(\t\x12\x10\n\x08\x63omplete\x18\x04 \x01(\x08\x12\x1f\n\x05\x65rror\x18\x05 \x01(\x0b\x32\x10.voldemort.Error\"\'\n\x16TruncateEntriesRequest\x12\r\n\x05store\x18\x01 \x02(\t\":\n\x17TruncateEntriesResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"*\n\x0f\x41\x64\x64StoreRequest\x12\x17\n\x0fstoreDefinition\x18\x01 \x02(\t\"3\n\x10\x41\x64\x64StoreResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"\'\n\x12\x44\x65leteStoreRequest\x12\x11\n\tstoreName\x18\x01 \x02(\t\"6\n\x13\x44\x65leteStoreResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"P\n\x11\x46\x65tchStoreRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x11\n\tstore_dir\x18\x02 \x02(\t\x12\x14\n\x0cpush_version\x18\x03 \x01(\x03\"9\n\x10SwapStoreRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x11\n\tstore_dir\x18\x02 \x02(\t\"P\n\x11SwapStoreResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\x12\x1a\n\x12previous_store_dir\x18\x02 \x01(\t\"@\n\x14RollbackStoreRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x14\n\x0cpush_version\x18\x02 \x02(\x03\"8\n\x15RollbackStoreResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"&\n\x10RepairJobRequest\x12\x12\n\nstore_name\x18\x01 \x01(\t\"4\n\x11RepairJobResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"=\n\x14ROStoreVersionDirMap\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x11\n\tstore_dir\x18\x02 \x02(\t\"/\n\x19GetROMaxVersionDirRequest\x12\x12\n\nstore_name\x18\x01 \x03(\t\"y\n\x1aGetROMaxVersionDirResponse\x12:\n\x11ro_store_versions\x18\x01 \x03(\x0b\x32\x1f.voldemort.ROStoreVersionDirMap\x12\x1f\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x10.voldemort.Error\"3\n\x1dGetROCurrentVersionDirRequest\x12\x12\n\nstore_name\x18\x01 \x03(\t\"}\n\x1eGetROCurrentVersionDirResponse\x12:\n\x11ro_store_versions\x18\x01 \x03(\x0b\x32\x1f.voldemort.ROStoreVersionDirMap\x12\x1f\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x10.voldemort.Error\"/\n\x19GetROStorageFormatRequest\x12\x12\n\nstore_name\x18\x01 \x03(\t\"y\n\x1aGetROStorageFormatResponse\x12:\n\x11ro_store_versions\x18\x01 \x03(\x0b\x32\x1f.voldemort.ROStoreVersionDirMap\x12\x1f\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x10.voldemort.Error\"@\n\x17\x46\x61iledFetchStoreRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x11\n\tstore_dir\x18\x02 \x02(\t\";\n\x18\x46\x61iledFetchStoreResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"\xe6\x01\n\x1bRebalanceStateChangeRequest\x12K\n\x1drebalance_partition_info_list\x18\x01 \x03(\x0b\x32$.voldemort.RebalancePartitionInfoMap\x12\x16\n\x0e\x63luster_string\x18\x02 \x02(\t\x12\x0f\n\x07swap_ro\x18\x03 \x02(\x08\x12\x1f\n\x17\x63hange_cluster_metadata\x18\x04 \x02(\x08\x12\x1e\n\x16\x63hange_rebalance_state\x18\x05 \x02(\x08\x12\x10\n\x08rollback\x18\x06 \x02(\x08\"?\n\x1cRebalanceStateChangeResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"G\n DeleteStoreRebalanceStateRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x0f\n\x07node_id\x18\x02 \x02(\x05\"D\n!DeleteStoreRebalanceStateResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"h\n\x13NativeBackupRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x12\n\nbackup_dir\x18\x02 \x02(\t\x12\x14\n\x0cverify_files\x18\x03 \x02(\x08\x12\x13\n\x0bincremental\x18\x04 \x02(\x08\">\n\x14ReserveMemoryRequest\x12\x12\n\nstore_name\x18\x01 \x02(\t\x12\x12\n\nsize_in_mb\x18\x02 \x02(\x03\"8\n\x15ReserveMemoryResponse\x12\x1f\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x10.voldemort.Error\"\xf0\x0e\n\x15VoldemortAdminRequest\x12)\n\x04type\x18\x01 \x02(\x0e\x32\x1b.voldemort.AdminRequestType\x12\x33\n\x0cget_metadata\x18\x02 \x01(\x0b\x32\x1d.voldemort.GetMetadataRequest\x12\x39\n\x0fupdate_metadata\x18\x03 \x01(\x0b\x32 .voldemort.UpdateMetadataRequest\x12J\n\x18update_partition_entries\x18\x04 \x01(\x0b\x32(.voldemort.UpdatePartitionEntriesRequest\x12H\n\x17\x66\x65tch_partition_entries\x18\x05 \x01(\x0b\x32\'.voldemort.FetchPartitionEntriesRequest\x12J\n\x18\x64\x65lete_partition_entries\x18\x06 \x01(\x0b\x32(.voldemort.DeletePartitionEntriesRequest\x12K\n\x19initiate_fetch_and_update\x18\x07 \x01(\x0b\x32(.voldemort.InitiateFetchAndUpdateRequest\x12\x46\n\x16\x61sync_operation_status\x18\x08 \x01(\x0b\x32&.voldemort.AsyncOperationStatusRequest\x12H\n\x17initiate_rebalance_node\x18\t \x01(\x0b\x32\'.voldemort.InitiateRebalanceNodeRequest\x12\x42\n\x14\x61sync_operation_stop\x18\n \x01(\x0b\x32$.voldemort.AsyncOperationStopRequest\x12\x42\n\x14\x61sync_operation_list\x18\x0b \x01(\x0b\x32$.voldemort.AsyncOperationListRequest\x12;\n\x10truncate_entries\x18\x0c \x01(\x0b\x32!.voldemort.TruncateEntriesRequest\x12-\n\tadd_store\x18\r \x01(\x0b\x32\x1a.voldemort.AddStoreRequest\x12\x33\n\x0c\x64\x65lete_store\x18\x0e \x01(\x0b\x32\x1d.voldemort.DeleteStoreRequest\x12\x31\n\x0b\x66\x65tch_store\x18\x0f \x01(\x0b\x32\x1c.voldemort.FetchStoreRequest\x12/\n\nswap_store\x18\x10 \x01(\x0b\x32\x1b.voldemort.SwapStoreRequest\x12\x37\n\x0erollback_store\x18\x11 \x01(\x0b\x32\x1f.voldemort.RollbackStoreRequest\x12\x44\n\x16get_ro_max_version_dir\x18\x12 \x01(\x0b\x32$.voldemort.GetROMaxVersionDirRequest\x12L\n\x1aget_ro_current_version_dir\x18\x13 \x01(\x0b\x32(.voldemort.GetROCurrentVersionDirRequest\x12\x44\n\x15\x66\x65tch_partition_files\x18\x14 \x01(\x0b\x32%.voldemort.FetchPartitionFilesRequest\x12@\n\x13update_slop_entries\x18\x16 \x01(\x0b\x32#.voldemort.UpdateSlopEntriesRequest\x12>\n\x12\x66\x61iled_fetch_store\x18\x18 \x01(\x0b\x32\".voldemort.FailedFetchStoreRequest\x12\x43\n\x15get_ro_storage_format\x18\x19 \x01(\x0b\x32$.voldemort.GetROStorageFormatRequest\x12\x46\n\x16rebalance_state_change\x18\x1a \x01(\x0b\x32&.voldemort.RebalanceStateChangeRequest\x12/\n\nrepair_job\x18\x1b \x01(\x0b\x32\x1b.voldemort.RepairJobRequest\x12X\n initiate_rebalance_node_on_donor\x18\x1c \x01(\x0b\x32..voldemort.InitiateRebalanceNodeOnDonorRequest\x12Q\n\x1c\x64\x65lete_store_rebalance_state\x18\x1d \x01(\x0b\x32+.voldemort.DeleteStoreRebalanceStateRequest\x12\x35\n\rnative_backup\x18\x1e \x01(\x0b\x32\x1e.voldemort.NativeBackupRequest\x12\x37\n\x0ereserve_memory\x18\x1f \x01(\x0b\x32\x1f.voldemort.ReserveMemoryRequest*\xc8\x05\n\x10\x41\x64minRequestType\x12\x10\n\x0cGET_METADATA\x10\x00\x12\x13\n\x0fUPDATE_METADATA\x10\x01\x12\x1c\n\x18UPDATE_PARTITION_ENTRIES\x10\x02\x12\x1b\n\x17\x46\x45TCH_PARTITION_ENTRIES\x10\x03\x12\x1c\n\x18\x44\x45LETE_PARTITION_ENTRIES\x10\x04\x12\x1d\n\x19INITIATE_FETCH_AND_UPDATE\x10\x05\x12\x1a\n\x16\x41SYNC_OPERATION_STATUS\x10\x06\x12\x1b\n\x17INITIATE_REBALANCE_NODE\x10\x07\x12\x18\n\x14\x41SYNC_OPERATION_STOP\x10\x08\x12\x18\n\x14\x41SYNC_OPERATION_LIST\x10\t\x12\x14\n\x10TRUNCATE_ENTRIES\x10\n\x12\r\n\tADD_STORE\x10\x0b\x12\x10\n\x0c\x44\x45LETE_STORE\x10\x0c\x12\x0f\n\x0b\x46\x45TCH_STORE\x10\r\x12\x0e\n\nSWAP_STORE\x10\x0e\x12\x12\n\x0eROLLBACK_STORE\x10\x0f\x12\x1a\n\x16GET_RO_MAX_VERSION_DIR\x10\x10\x12\x1e\n\x1aGET_RO_CURRENT_VERSION_DIR\x10\x11\x12\x19\n\x15\x46\x45TCH_PARTITION_FILES\x10\x12\x12\x17\n\x13UPDATE_SLOP_ENTRIES\x10\x14\x12\x16\n\x12\x46\x41ILED_FETCH_STORE\x10\x16\x12\x19\n\x15GET_RO_STORAGE_FORMAT\x10\x17\x12\x1a\n\x16REBALANCE_STATE_CHANGE\x10\x18\x12\x0e\n\nREPAIR_JOB\x10\x19\x12$\n INITIATE_REBALANCE_NODE_ON_DONOR\x10\x1a\x12 \n\x1c\x44\x45LETE_STORE_REBALANCE_STATE\x10\x1b\x12\x11\n\rNATIVE_BACKUP\x10\x1c\x12\x12\n\x0eRESERVE_MEMORY\x10\x1d\x42-\n\x1cvoldemort.client.protocol.pbB\x0bVAdminProtoH\x01')
 
 _ADMINREQUESTTYPE = descriptor.EnumDescriptor(
   name='AdminRequestType',
@@ -126,11 +126,15 @@
       name='NATIVE_BACKUP', index=26, number=28,
       options=None,
       type=None),
+    descriptor.EnumValueDescriptor(
+      name='RESERVE_MEMORY', index=27, number=29,
+      options=None,
+      type=None),
   ],
   containing_type=None,
   options=None,
-  serialized_start=6792,
-  serialized_end=7484,
+  serialized_start=6971,
+  serialized_end=7683,
 )
 
 
@@ -161,6 +165,7 @@
 INITIATE_REBALANCE_NODE_ON_DONOR = 26
 DELETE_STORE_REBALANCE_STATE = 27
 NATIVE_BACKUP = 28
+RESERVE_MEMORY = 29
 
 
 
@@ -2124,6 +2129,69 @@
 )
 
 
+_RESERVEMEMORYREQUEST = descriptor.Descriptor(
+  name='ReserveMemoryRequest',
+  full_name='voldemort.ReserveMemoryRequest',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    descriptor.FieldDescriptor(
+      name='store_name', full_name='voldemort.ReserveMemoryRequest.store_name', index=0,
+      number=1, type=9, cpp_type=9, label=2,
+      has_default_value=False, default_value=unicode("", "utf-8"),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+    descriptor.FieldDescriptor(
+      name='size_in_mb', full_name='voldemort.ReserveMemoryRequest.size_in_mb', index=1,
+      number=2, type=3, cpp_type=2, label=2,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  extension_ranges=[],
+  serialized_start=4941,
+  serialized_end=5003,
+)
+
+
+_RESERVEMEMORYRESPONSE = descriptor.Descriptor(
+  name='ReserveMemoryResponse',
+  full_name='voldemort.ReserveMemoryResponse',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    descriptor.FieldDescriptor(
+      name='error', full_name='voldemort.ReserveMemoryResponse.error', index=0,
+      number=1, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  extension_ranges=[],
+  serialized_start=5005,
+  serialized_end=5061,
+)
+
+
 _VOLDEMORTADMINREQUEST = descriptor.Descriptor(
   name='VoldemortAdminRequest',
   full_name='voldemort.VoldemortAdminRequest',
@@ -2327,6 +2395,13 @@
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
+    descriptor.FieldDescriptor(
+      name='reserve_memory', full_name='voldemort.VoldemortAdminRequest.reserve_memory', index=28,
+      number=31, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
   ],
   extensions=[
   ],
@@ -2336,8 +2411,8 @@
   options=None,
   is_extendable=False,
   extension_ranges=[],
-  serialized_start=4942,
-  serialized_end=6789,
+  serialized_start=5064,
+  serialized_end=6968,
 )
 
 import voldemort_client_pb2
@@ -2387,6 +2462,7 @@
 _REBALANCESTATECHANGEREQUEST.fields_by_name['rebalance_partition_info_list'].message_type = _REBALANCEPARTITIONINFOMAP
 _REBALANCESTATECHANGERESPONSE.fields_by_name['error'].message_type = voldemort_client_pb2._ERROR
 _DELETESTOREREBALANCESTATERESPONSE.fields_by_name['error'].message_type = voldemort_client_pb2._ERROR
+_RESERVEMEMORYRESPONSE.fields_by_name['error'].message_type = voldemort_client_pb2._ERROR
 _VOLDEMORTADMINREQUEST.fields_by_name['type'].enum_type = _ADMINREQUESTTYPE
 _VOLDEMORTADMINREQUEST.fields_by_name['get_metadata'].message_type = _GETMETADATAREQUEST
 _VOLDEMORTADMINREQUEST.fields_by_name['update_metadata'].message_type = _UPDATEMETADATAREQUEST
@@ -2415,6 +2491,7 @@
 _VOLDEMORTADMINREQUEST.fields_by_name['initiate_rebalance_node_on_donor'].message_type = _INITIATEREBALANCENODEONDONORREQUEST
 _VOLDEMORTADMINREQUEST.fields_by_name['delete_store_rebalance_state'].message_type = _DELETESTOREREBALANCESTATEREQUEST
 _VOLDEMORTADMINREQUEST.fields_by_name['native_backup'].message_type = _NATIVEBACKUPREQUEST
+_VOLDEMORTADMINREQUEST.fields_by_name['reserve_memory'].message_type = _RESERVEMEMORYREQUEST
 
 class GetMetadataRequest(message.Message):
   __metaclass__ = reflection.GeneratedProtocolMessageType
@@ -2746,6 +2823,18 @@ class NativeBackupRequest(message.Message):
   
   # @@protoc_insertion_point(class_scope:voldemort.NativeBackupRequest)
 
+class ReserveMemoryRequest(message.Message):
+  __metaclass__ = reflection.GeneratedProtocolMessageType
+  DESCRIPTOR = _RESERVEMEMORYREQUEST
+  
+  # @@protoc_insertion_point(class_scope:voldemort.ReserveMemoryRequest)
+
+class ReserveMemoryResponse(message.Message):
+  __metaclass__ = reflection.GeneratedProtocolMessageType
+  DESCRIPTOR = _RESERVEMEMORYRESPONSE
+  
+  # @@protoc_insertion_point(class_scope:voldemort.ReserveMemoryResponse)
+
 class VoldemortAdminRequest(message.Message):
   __metaclass__ = reflection.GeneratedProtocolMessageType
   DESCRIPTOR = _VOLDEMORTADMINREQUEST
diff --git a/config/single_node_cluster/config/stores.xml b/config/single_node_cluster/config/stores.xml
index ef4654295a..d488d2b62d 100644
--- a/config/single_node_cluster/config/stores.xml
+++ b/config/single_node_cluster/config/stores.xml
@@ -1,32 +1,39 @@
-<stores>
-  <store>
-    <name>test</name>
-    <persistence>bdb</persistence>
-    <description>Test store</description>
-    <owners> harry@hogwarts.edu, hermoine@hogwarts.edu </owners>
-    <routing>client</routing>
-    <replication-factor>1</replication-factor>
-    <required-reads>1</required-reads>
-    <required-writes>1</required-writes>
-    <key-serializer>
-      <type>string</type>
-    </key-serializer>
-    <value-serializer>
-      <type>string</type>
-    </value-serializer>
-  </store>
-  <view>
-    <name>test-view</name>
-    <view-of>test</view-of>
-    <owners> ron@hogwarts.edu </owners>
-    <view-class>
-      voldemort.store.views.UpperCaseView
-    </view-class>
-    <value-serializer>
-      <type>string</type>
-    </value-serializer>
-    <transforms-serializer>
-    	<type>string</type>
-    </transforms-serializer>
-  </view>
-</stores>
+<stores>
+  <store>
+    <name>test</name>
+    <persistence>bdb</persistence>
+    <description>Test store</description>
+    <owners>harry@hogwarts.edu, hermoine@hogwarts.edu</owners>
+    <routing-strategy>consistent-routing</routing-strategy>
+    <routing>client</routing>
+    <replication-factor>1</replication-factor>
+    <required-reads>1</required-reads>
+    <required-writes>1</required-writes>
+    <key-serializer>
+      <type>string</type>
+    </key-serializer>
+    <value-serializer>
+      <type>string</type>
+    </value-serializer>
+  </store>
+  <store>
+    <name>test-evolution</name>
+    <persistence>bdb</persistence>
+    <description>Test store</description>
+    <owners>harry@hogwarts.edu, hermoine@hogwarts.edu</owners>
+    <routing-strategy>consistent-routing</routing-strategy>
+    <routing>client</routing>
+    <replication-factor>1</replication-factor>
+    <required-reads>1</required-reads>
+    <required-writes>1</required-writes>
+    <key-serializer>
+      <type>string</type>
+    </key-serializer>
+    <value-serializer>
+      <type>avro-generic-versioned</type>
+      <schema-info version="0">{"type": "record", "name": "myrec","fields": [{ "name": "original", "type": "string" }]}</schema-info>
+      <schema-info version="1">{"type": "record", "name": "myrec","fields": [{ "name": "original", "type": "string" }, { "name": "new-field", "type": "string", "default":"" }]}</schema-info>
+    </value-serializer>
+  </store>
+ 
+</stores>
\ No newline at end of file
diff --git a/contrib/ec2-testing/test/voldemort/utils/Ec2FailureDetectorTest.java b/contrib/ec2-testing/test/voldemort/utils/Ec2FailureDetectorTest.java
index 44bff93296..9010979361 100644
--- a/contrib/ec2-testing/test/voldemort/utils/Ec2FailureDetectorTest.java
+++ b/contrib/ec2-testing/test/voldemort/utils/Ec2FailureDetectorTest.java
@@ -150,7 +150,7 @@ public void testAllNodesOffline() throws Exception {
         test(store);
         assertEquals(hostNamePairs.size(), failureDetector.getAvailableNodeCount());
 
-        for(Node n: failureDetector.getConfig().getNodes())
+        for(Node n: failureDetector.getConfig().getCluster().getNodes())
             assertTrue(failureDetector.isAvailable(n));
 
         // 2. Stop all the nodes, then test enough that we can cause the nodes
@@ -159,19 +159,19 @@ public void testAllNodesOffline() throws Exception {
         test(store);
         assertEquals(0, failureDetector.getAvailableNodeCount());
 
-        for(Node n: failureDetector.getConfig().getNodes())
+        for(Node n: failureDetector.getConfig().getCluster().getNodes())
             assertFalse(failureDetector.isAvailable(n));
 
         // 3. Now start the cluster up, test, and make sure everything's OK.
         startClusterAsync(hostNames, ec2FailureDetectorTestConfig, nodeIds);
 
-        for(Node n: failureDetector.getConfig().getNodes())
+        for(Node n: failureDetector.getConfig().getCluster().getNodes())
             failureDetector.waitForAvailability(n);
 
         test(store);
         assertEquals(hostNamePairs.size(), failureDetector.getAvailableNodeCount());
 
-        for(Node n: failureDetector.getConfig().getNodes())
+        for(Node n: failureDetector.getConfig().getCluster().getNodes())
             assertTrue(failureDetector.isAvailable(n));
     }
 
@@ -252,7 +252,7 @@ private Node getNodeByHostName(String hostName, FailureDetector failureDetector)
             throws Exception {
         Integer offlineNodeId = nodeIds.get(hostName);
 
-        for(Node n: failureDetector.getConfig().getNodes()) {
+        for(Node n: failureDetector.getConfig().getCluster().getNodes()) {
             if(offlineNodeId.equals(n.getId()))
                 return n;
         }
diff --git a/contrib/hadoop-store-builder/perf/voldemort/contrib/batchindexer/performance/BdbBuildPerformanceTest.java b/contrib/hadoop-store-builder/perf/voldemort/contrib/batchindexer/performance/BdbBuildPerformanceTest.java
index 9df60c11fe..b2c67df2ee 100644
--- a/contrib/hadoop-store-builder/perf/voldemort/contrib/batchindexer/performance/BdbBuildPerformanceTest.java
+++ b/contrib/hadoop-store-builder/perf/voldemort/contrib/batchindexer/performance/BdbBuildPerformanceTest.java
@@ -28,6 +28,7 @@
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.SequenceFileRecordReader;
 
+import voldemort.TestUtils;
 import voldemort.performance.PerformanceTest;
 import voldemort.server.VoldemortConfig;
 import voldemort.store.Store;
@@ -51,7 +52,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException
         String storeName = args[1];
         String jsonDataFile = args[2];
 
-        final Store<ByteArray, byte[], byte[]> store = new BdbStorageConfiguration(new VoldemortConfig(new Props(new File(serverPropsFile)))).getStore(storeName);
+        final Store<ByteArray, byte[], byte[]> store = new BdbStorageConfiguration(new VoldemortConfig(new Props(new File(serverPropsFile)))).getStore(TestUtils.makeStoreDefinition(storeName));
 
         final AtomicInteger obsoletes = new AtomicInteger(0);
 
diff --git a/contrib/hadoop-store-builder/perf/voldemort/contrib/batchindexer/performance/MysqlBuildPerformanceTest.java b/contrib/hadoop-store-builder/perf/voldemort/contrib/batchindexer/performance/MysqlBuildPerformanceTest.java
index ccf09e6c1a..d7fe084ea1 100644
--- a/contrib/hadoop-store-builder/perf/voldemort/contrib/batchindexer/performance/MysqlBuildPerformanceTest.java
+++ b/contrib/hadoop-store-builder/perf/voldemort/contrib/batchindexer/performance/MysqlBuildPerformanceTest.java
@@ -28,6 +28,7 @@
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.SequenceFileRecordReader;
 
+import voldemort.TestUtils;
 import voldemort.performance.PerformanceTest;
 import voldemort.server.VoldemortConfig;
 import voldemort.store.Store;
@@ -51,7 +52,7 @@ public static void main(String[] args) throws FileNotFoundException, IOException
         String storeName = args[1];
         String jsonDataFile = args[2];
 
-        final Store<ByteArray, byte[], byte[]> store = new MysqlStorageConfiguration(new VoldemortConfig(new Props(new File(serverPropsFile)))).getStore(storeName);
+        final Store<ByteArray, byte[], byte[]> store = new MysqlStorageConfiguration(new VoldemortConfig(new Props(new File(serverPropsFile)))).getStore(TestUtils.makeStoreDefinition(storeName));
 
         final AtomicInteger obsoletes = new AtomicInteger(0);
 
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/disk/HadoopStoreWriter.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/disk/HadoopStoreWriter.java
new file mode 100644
index 0000000000..87bebe74d8
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/disk/HadoopStoreWriter.java
@@ -0,0 +1,339 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.disk;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.log4j.Logger;
+
+import voldemort.VoldemortException;
+import voldemort.cluster.Cluster;
+import voldemort.store.StoreDefinition;
+import voldemort.store.readonly.ReadOnlyUtils;
+import voldemort.store.readonly.checksum.CheckSum;
+import voldemort.store.readonly.checksum.CheckSum.CheckSumType;
+import voldemort.utils.ByteUtils;
+import voldemort.xml.ClusterMapper;
+import voldemort.xml.StoreDefinitionsMapper;
+
+// The default Voldemort keyvalue writer
+// generates index and data files
+public class HadoopStoreWriter implements KeyValueWriter<BytesWritable, BytesWritable> {
+
+    private static final Logger logger = Logger.getLogger(HadoopStoreWriter.class);
+
+    private DataOutputStream indexFileStream = null;
+    private DataOutputStream valueFileStream = null;
+    private int position;
+    private String taskId = null;
+
+    private int nodeId = -1;
+    private int partitionId = -1;
+    private int chunkId = -1;
+    private int replicaType = -1;
+
+    private Path taskIndexFileName;
+    private Path taskValueFileName;
+
+    private JobConf conf;
+    private CheckSumType checkSumType;
+    private CheckSum checkSumDigestIndex;
+    private CheckSum checkSumDigestValue;
+
+    private String outputDir;
+
+    private FileSystem fs;
+
+    private int numChunks;
+    private Cluster cluster;
+    private StoreDefinition storeDef;
+    private boolean saveKeys;
+    private boolean reducerPerBucket;
+
+    public Cluster getCluster() {
+        checkNotNull(cluster);
+        return cluster;
+    }
+
+    public boolean getSaveKeys() {
+        return this.saveKeys;
+    }
+
+    public boolean getReducerPerBucket() {
+        return this.reducerPerBucket;
+    }
+
+    public StoreDefinition getStoreDef() {
+        checkNotNull(storeDef);
+        return storeDef;
+    }
+
+    public String getStoreName() {
+        checkNotNull(storeDef);
+        return storeDef.getName();
+    }
+
+    private final void checkNotNull(Object o) {
+        if(o == null)
+            throw new VoldemortException("Not configured yet!");
+    }
+
+    public int getNumChunks() {
+        return this.numChunks;
+    }
+
+    @Override
+    public void conf(JobConf job) {
+
+        conf = job;
+        try {
+
+            this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml")));
+            List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new StringReader(conf.get("stores.xml")));
+            if(storeDefs.size() != 1)
+                throw new IllegalStateException("Expected to find only a single store, but found multiple!");
+            this.storeDef = storeDefs.get(0);
+
+            this.numChunks = conf.getInt("num.chunks", -1);
+            if(this.numChunks < 1)
+                throw new VoldemortException("num.chunks not specified in the job conf.");
+            this.saveKeys = conf.getBoolean("save.keys", false);
+            this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false);
+            this.conf = job;
+            this.position = 0;
+            this.outputDir = job.get("final.output.dir");
+            this.taskId = job.get("mapred.task.id");
+            this.checkSumType = CheckSum.fromString(job.get("checksum.type"));
+            this.checkSumDigestIndex = CheckSum.getInstance(checkSumType);
+            this.checkSumDigestValue = CheckSum.getInstance(checkSumType);
+
+            this.taskIndexFileName = new Path(FileOutputFormat.getOutputPath(job), getStoreName()
+                                                                                   + "."
+                                                                                   + this.taskId
+                                                                                   + ".index");
+            this.taskValueFileName = new Path(FileOutputFormat.getOutputPath(job), getStoreName()
+                                                                                   + "."
+                                                                                   + this.taskId
+                                                                                   + ".data");
+
+            if(this.fs == null)
+                this.fs = this.taskIndexFileName.getFileSystem(job);
+
+            this.indexFileStream = fs.create(this.taskIndexFileName);
+            this.valueFileStream = fs.create(this.taskValueFileName);
+
+            logger.info("Opening " + this.taskIndexFileName + " and " + this.taskValueFileName
+                        + " for writing.");
+
+        } catch(IOException e) {
+            throw new RuntimeException("Failed to open Input/OutputStream", e);
+        }
+
+    }
+
+    @Override
+    public void write(BytesWritable key, Iterator<BytesWritable> iterator, Reporter reporter)
+            throws IOException {
+
+        // Write key and position
+        this.indexFileStream.write(key.get(), 0, key.getSize());
+        this.indexFileStream.writeInt(this.position);
+
+        // Run key through checksum digest
+        if(this.checkSumDigestIndex != null) {
+            this.checkSumDigestIndex.update(key.get(), 0, key.getSize());
+            this.checkSumDigestIndex.update(this.position);
+        }
+
+        short numTuples = 0;
+        ByteArrayOutputStream stream = new ByteArrayOutputStream();
+        DataOutputStream valueStream = new DataOutputStream(stream);
+
+        while(iterator.hasNext()) {
+            BytesWritable writable = iterator.next();
+            byte[] valueBytes = writable.get();
+            int offsetTillNow = 0;
+
+            // Read node Id
+            if(this.nodeId == -1)
+                this.nodeId = ByteUtils.readInt(valueBytes, offsetTillNow);
+            offsetTillNow += ByteUtils.SIZE_OF_INT;
+
+            // Read partition id
+            if(this.partitionId == -1)
+                this.partitionId = ByteUtils.readInt(valueBytes, offsetTillNow);
+            offsetTillNow += ByteUtils.SIZE_OF_INT;
+
+            // Read chunk id
+            if(this.chunkId == -1)
+                this.chunkId = ReadOnlyUtils.chunk(key.get(), getNumChunks());
+
+            // Read replica type
+            if(getSaveKeys()) {
+                if(this.replicaType == -1)
+                    this.replicaType = (int) ByteUtils.readBytes(valueBytes,
+                                                                 offsetTillNow,
+                                                                 ByteUtils.SIZE_OF_BYTE);
+                offsetTillNow += ByteUtils.SIZE_OF_BYTE;
+            }
+
+            int valueLength = writable.getSize() - offsetTillNow;
+            if(getSaveKeys()) {
+                // Write ( key_length, value_length, key,
+                // value )
+                valueStream.write(valueBytes, offsetTillNow, valueLength);
+            } else {
+                // Write (value_length + value)
+                valueStream.writeInt(valueLength);
+                valueStream.write(valueBytes, offsetTillNow, valueLength);
+            }
+
+            numTuples++;
+
+            // If we have multiple values for this md5 that is a collision,
+            // throw an exception--either the data itself has duplicates, there
+            // are trillions of keys, or someone is attempting something
+            // malicious ( We obviously expect collisions when we save keys )
+            if(!getSaveKeys() && numTuples > 1)
+                throw new VoldemortException("Duplicate keys detected for md5 sum "
+                                             + ByteUtils.toHexString(ByteUtils.copy(key.get(),
+                                                                                    0,
+                                                                                    key.getSize())));
+
+        }
+
+        if(numTuples < 0) {
+            // Overflow
+            throw new VoldemortException("Found too many collisions: chunk " + chunkId
+                                         + " has exceeded " + Short.MAX_VALUE + " collisions.");
+        } else if(numTuples > 1) {
+            // Update number of collisions + max keys per collision
+            reporter.incrCounter(CollisionCounter.NUM_COLLISIONS, 1);
+
+            long numCollisions = reporter.getCounter(CollisionCounter.MAX_COLLISIONS).getCounter();
+            if(numTuples > numCollisions) {
+                reporter.incrCounter(CollisionCounter.MAX_COLLISIONS, numTuples - numCollisions);
+            }
+        }
+
+        // Flush the value
+        valueStream.flush();
+        byte[] value = stream.toByteArray();
+
+        // Start writing to file now
+        // First, if save keys flag set the number of keys
+        if(getSaveKeys()) {
+
+            this.valueFileStream.writeShort(numTuples);
+            this.position += ByteUtils.SIZE_OF_SHORT;
+
+            if(this.checkSumDigestValue != null) {
+                this.checkSumDigestValue.update(numTuples);
+            }
+        }
+
+        this.valueFileStream.write(value);
+        this.position += value.length;
+
+        if(this.checkSumDigestValue != null) {
+            this.checkSumDigestValue.update(value);
+        }
+
+        if(this.position < 0)
+            throw new VoldemortException("Chunk overflow exception: chunk " + chunkId
+                                         + " has exceeded " + Integer.MAX_VALUE + " bytes.");
+    }
+
+    @Override
+    public void close() throws IOException {
+
+        this.indexFileStream.close();
+        this.valueFileStream.close();
+
+        if(this.nodeId == -1 || this.chunkId == -1 || this.partitionId == -1) {
+            // Issue 258 - No data was read in the reduce phase, do not create
+            // any output
+            return;
+        }
+
+        // If the replica type read was not valid, shout out
+        if(getSaveKeys() && this.replicaType == -1) {
+            throw new RuntimeException("Could not read the replica type correctly for node "
+                                       + nodeId + " ( partition - " + this.partitionId + " )");
+        }
+
+        String fileNamePrefix = null;
+        if(getSaveKeys()) {
+            fileNamePrefix = new String(Integer.toString(this.partitionId) + "_"
+                                        + Integer.toString(this.replicaType) + "_"
+                                        + Integer.toString(this.chunkId));
+        } else {
+            fileNamePrefix = new String(Integer.toString(this.partitionId) + "_"
+                                        + Integer.toString(this.chunkId));
+        }
+
+        // Initialize the node directory
+        Path nodeDir = new Path(this.outputDir, "node-" + this.nodeId);
+
+        // Create output directory, if it doesn't exist
+        FileSystem outputFs = nodeDir.getFileSystem(this.conf);
+        outputFs.mkdirs(nodeDir);
+
+        // Write the checksum and output files
+        if(this.checkSumType != CheckSumType.NONE) {
+
+            if(this.checkSumDigestIndex != null && this.checkSumDigestValue != null) {
+                Path checkSumIndexFile = new Path(nodeDir, fileNamePrefix + ".index.checksum");
+                Path checkSumValueFile = new Path(nodeDir, fileNamePrefix + ".data.checksum");
+
+                FSDataOutputStream output = outputFs.create(checkSumIndexFile);
+                output.write(this.checkSumDigestIndex.getCheckSum());
+                output.close();
+
+                output = outputFs.create(checkSumValueFile);
+                output.write(this.checkSumDigestValue.getCheckSum());
+                output.close();
+            } else {
+                throw new RuntimeException("Failed to open checksum digest for node " + nodeId
+                                           + " ( partition - " + this.partitionId + ", chunk - "
+                                           + chunkId + " )");
+            }
+        }
+
+        // Generate the final chunk files
+        Path indexFile = new Path(nodeDir, fileNamePrefix + ".index");
+        Path valueFile = new Path(nodeDir, fileNamePrefix + ".data");
+
+        logger.info("Moving " + this.taskIndexFileName + " to " + indexFile);
+        outputFs.rename(taskIndexFileName, indexFile);
+        logger.info("Moving " + this.taskValueFileName + " to " + valueFile);
+        outputFs.rename(this.taskValueFileName, valueFile);
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/disk/HadoopStoreWriterPerBucket.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/disk/HadoopStoreWriterPerBucket.java
new file mode 100644
index 0000000000..6fdf34f910
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/disk/HadoopStoreWriterPerBucket.java
@@ -0,0 +1,358 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.disk;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.log4j.Logger;
+
+import voldemort.VoldemortException;
+import voldemort.cluster.Cluster;
+import voldemort.store.StoreDefinition;
+import voldemort.store.readonly.ReadOnlyUtils;
+import voldemort.store.readonly.checksum.CheckSum;
+import voldemort.store.readonly.checksum.CheckSum.CheckSumType;
+import voldemort.utils.ByteUtils;
+import voldemort.xml.ClusterMapper;
+import voldemort.xml.StoreDefinitionsMapper;
+
+public class HadoopStoreWriterPerBucket implements KeyValueWriter<BytesWritable, BytesWritable> {
+
+    private static final Logger logger = Logger.getLogger(HadoopStoreWriterPerBucket.class);
+
+    private DataOutputStream[] indexFileStream = null;
+    private DataOutputStream[] valueFileStream = null;
+    private int[] position;
+    private String taskId = null;
+
+    private int nodeId = -1;
+    private int partitionId = -1;
+    private int replicaType = -1;
+
+    private Path[] taskIndexFileName;
+    private Path[] taskValueFileName;
+
+    private JobConf conf;
+    private CheckSumType checkSumType;
+    private CheckSum[] checkSumDigestIndex;
+    private CheckSum[] checkSumDigestValue;
+
+    private String outputDir;
+
+    private FileSystem fs;
+
+    @Override
+    public void conf(JobConf job) {
+
+        JobConf conf = job;
+        try {
+
+            this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml")));
+            List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new StringReader(conf.get("stores.xml")));
+            if(storeDefs.size() != 1)
+                throw new IllegalStateException("Expected to find only a single store, but found multiple!");
+            this.storeDef = storeDefs.get(0);
+
+            this.numChunks = conf.getInt("num.chunks", -1);
+            if(this.numChunks < 1)
+                throw new VoldemortException("num.chunks not specified in the job conf.");
+
+            this.saveKeys = conf.getBoolean("save.keys", false);
+            this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false);
+            this.conf = job;
+            this.outputDir = job.get("final.output.dir");
+            this.taskId = job.get("mapred.task.id");
+            this.checkSumType = CheckSum.fromString(job.get("checksum.type"));
+
+            this.checkSumDigestIndex = new CheckSum[getNumChunks()];
+            this.checkSumDigestValue = new CheckSum[getNumChunks()];
+            this.position = new int[getNumChunks()];
+            this.taskIndexFileName = new Path[getNumChunks()];
+            this.taskValueFileName = new Path[getNumChunks()];
+            this.indexFileStream = new DataOutputStream[getNumChunks()];
+            this.valueFileStream = new DataOutputStream[getNumChunks()];
+
+            for(int chunkId = 0; chunkId < getNumChunks(); chunkId++) {
+
+                this.checkSumDigestIndex[chunkId] = CheckSum.getInstance(checkSumType);
+                this.checkSumDigestValue[chunkId] = CheckSum.getInstance(checkSumType);
+                this.position[chunkId] = 0;
+
+                this.taskIndexFileName[chunkId] = new Path(FileOutputFormat.getOutputPath(job),
+                                                           getStoreName() + "."
+                                                                   + Integer.toString(chunkId)
+                                                                   + "_" + this.taskId + ".index");
+                this.taskValueFileName[chunkId] = new Path(FileOutputFormat.getOutputPath(job),
+                                                           getStoreName() + "."
+                                                                   + Integer.toString(chunkId)
+                                                                   + "_" + this.taskId + ".data");
+
+                if(this.fs == null)
+                    this.fs = this.taskIndexFileName[chunkId].getFileSystem(job);
+
+                this.indexFileStream[chunkId] = fs.create(this.taskIndexFileName[chunkId]);
+                this.valueFileStream[chunkId] = fs.create(this.taskValueFileName[chunkId]);
+
+                logger.info("Opening " + this.taskIndexFileName[chunkId] + " and "
+                            + this.taskValueFileName[chunkId] + " for writing.");
+            }
+
+        } catch(IOException e) {
+            // throw new RuntimeException("Failed to open Input/OutputStream",
+            // e);
+            e.printStackTrace();
+        }
+
+    }
+
+    @Override
+    public void write(BytesWritable key, Iterator<BytesWritable> iterator, Reporter reporter)
+            throws IOException {
+
+        // Read chunk id
+        int chunkId = ReadOnlyUtils.chunk(key.get(), getNumChunks());
+
+        // Write key and position
+        this.indexFileStream[chunkId].write(key.get(), 0, key.getSize());
+        this.indexFileStream[chunkId].writeInt(this.position[chunkId]);
+
+        // Run key through checksum digest
+        if(this.checkSumDigestIndex[chunkId] != null) {
+            this.checkSumDigestIndex[chunkId].update(key.get(), 0, key.getSize());
+            this.checkSumDigestIndex[chunkId].update(this.position[chunkId]);
+        }
+
+        short numTuples = 0;
+        ByteArrayOutputStream stream = new ByteArrayOutputStream();
+        DataOutputStream valueStream = new DataOutputStream(stream);
+
+        while(iterator.hasNext()) {
+            BytesWritable writable = iterator.next();
+            byte[] valueBytes = writable.get();
+            int offsetTillNow = 0;
+
+            // Read node Id
+            if(this.nodeId == -1)
+                this.nodeId = ByteUtils.readInt(valueBytes, offsetTillNow);
+            offsetTillNow += ByteUtils.SIZE_OF_INT;
+
+            // Read partition id
+            if(this.partitionId == -1)
+                this.partitionId = ByteUtils.readInt(valueBytes, offsetTillNow);
+            offsetTillNow += ByteUtils.SIZE_OF_INT;
+
+            // Read replica type
+            if(getSaveKeys()) {
+                if(this.replicaType == -1)
+                    this.replicaType = (int) ByteUtils.readBytes(valueBytes,
+                                                                 offsetTillNow,
+                                                                 ByteUtils.SIZE_OF_BYTE);
+                offsetTillNow += ByteUtils.SIZE_OF_BYTE;
+            }
+
+            int valueLength = writable.getSize() - offsetTillNow;
+            if(getSaveKeys()) {
+                // Write ( key_length, value_length, key,
+                // value )
+                valueStream.write(valueBytes, offsetTillNow, valueLength);
+            } else {
+                // Write (value_length + value)
+                valueStream.writeInt(valueLength);
+                valueStream.write(valueBytes, offsetTillNow, valueLength);
+            }
+
+            numTuples++;
+
+            // If we have multiple values for this md5 that is a collision,
+            // throw an exception--either the data itself has duplicates, there
+            // are trillions of keys, or someone is attempting something
+            // malicious ( We obviously expect collisions when we save keys )
+            if(!getSaveKeys() && numTuples > 1)
+                throw new VoldemortException("Duplicate keys detected for md5 sum "
+                                             + ByteUtils.toHexString(ByteUtils.copy(key.get(),
+                                                                                    0,
+                                                                                    key.getSize())));
+
+        }
+
+        if(numTuples < 0) {
+            // Overflow
+            throw new VoldemortException("Found too many collisions: chunk " + chunkId
+                                         + " has exceeded " + Short.MAX_VALUE + " collisions.");
+        } else if(numTuples > 1) {
+            // Update number of collisions + max keys per collision
+            reporter.incrCounter(CollisionCounter.NUM_COLLISIONS, 1);
+
+            long numCollisions = reporter.getCounter(CollisionCounter.MAX_COLLISIONS).getCounter();
+            if(numTuples > numCollisions) {
+                reporter.incrCounter(CollisionCounter.MAX_COLLISIONS, numTuples - numCollisions);
+            }
+        }
+
+        // Flush the value
+        valueStream.flush();
+        byte[] value = stream.toByteArray();
+
+        // Start writing to file now
+        // First, if save keys flag set the number of keys
+        if(getSaveKeys()) {
+
+            this.valueFileStream[chunkId].writeShort(numTuples);
+            this.position[chunkId] += ByteUtils.SIZE_OF_SHORT;
+
+            if(this.checkSumDigestValue[chunkId] != null) {
+                this.checkSumDigestValue[chunkId].update(numTuples);
+            }
+        }
+
+        this.valueFileStream[chunkId].write(value);
+        this.position[chunkId] += value.length;
+
+        if(this.checkSumDigestValue[chunkId] != null) {
+            this.checkSumDigestValue[chunkId].update(value);
+        }
+
+        if(this.position[chunkId] < 0)
+            throw new VoldemortException("Chunk overflow exception: chunk " + chunkId
+                                         + " has exceeded " + Integer.MAX_VALUE + " bytes.");
+
+    }
+
+    @Override
+    public void close() throws IOException {
+
+        for(int chunkId = 0; chunkId < getNumChunks(); chunkId++) {
+            this.indexFileStream[chunkId].close();
+            this.valueFileStream[chunkId].close();
+        }
+
+        if(this.nodeId == -1 || this.partitionId == -1) {
+            // Issue 258 - No data was read in the reduce phase, do not create
+            // any output
+            return;
+        }
+
+        // If the replica type read was not valid, shout out
+        if(getSaveKeys() && this.replicaType == -1) {
+            throw new RuntimeException("Could not read the replica type correctly for node "
+                                       + nodeId + " ( partition - " + this.partitionId + " )");
+        }
+
+        String fileNamePrefix = null;
+        if(getSaveKeys()) {
+            fileNamePrefix = new String(Integer.toString(this.partitionId) + "_"
+                                        + Integer.toString(this.replicaType) + "_");
+        } else {
+            fileNamePrefix = new String(Integer.toString(this.partitionId) + "_");
+        }
+
+        // Initialize the node directory
+        Path nodeDir = new Path(this.outputDir, "node-" + this.nodeId);
+
+        // Create output directory, if it doesn't exist
+        FileSystem outputFs = nodeDir.getFileSystem(this.conf);
+        outputFs.mkdirs(nodeDir);
+
+        // Write the checksum and output files
+        for(int chunkId = 0; chunkId < getNumChunks(); chunkId++) {
+
+            String chunkFileName = fileNamePrefix + Integer.toString(chunkId);
+            if(this.checkSumType != CheckSumType.NONE) {
+
+                if(this.checkSumDigestIndex[chunkId] != null
+                   && this.checkSumDigestValue[chunkId] != null) {
+                    Path checkSumIndexFile = new Path(nodeDir, chunkFileName + ".index.checksum");
+                    Path checkSumValueFile = new Path(nodeDir, chunkFileName + ".data.checksum");
+
+                    FSDataOutputStream output = outputFs.create(checkSumIndexFile);
+                    output.write(this.checkSumDigestIndex[chunkId].getCheckSum());
+                    output.close();
+
+                    output = outputFs.create(checkSumValueFile);
+                    output.write(this.checkSumDigestValue[chunkId].getCheckSum());
+                    output.close();
+                } else {
+                    throw new RuntimeException("Failed to open checksum digest for node " + nodeId
+                                               + " ( partition - " + this.partitionId
+                                               + ", chunk - " + chunkId + " )");
+                }
+            }
+
+            // Generate the final chunk files
+            Path indexFile = new Path(nodeDir, chunkFileName + ".index");
+            Path valueFile = new Path(nodeDir, chunkFileName + ".data");
+
+            logger.info("Moving " + this.taskIndexFileName[chunkId] + " to " + indexFile);
+            fs.rename(taskIndexFileName[chunkId], indexFile);
+            logger.info("Moving " + this.taskValueFileName[chunkId] + " to " + valueFile);
+            fs.rename(this.taskValueFileName[chunkId], valueFile);
+
+        }
+
+    }
+
+    private int numChunks;
+    private Cluster cluster;
+    private StoreDefinition storeDef;
+    private boolean saveKeys;
+    private boolean reducerPerBucket;
+
+    public Cluster getCluster() {
+        checkNotNull(cluster);
+        return cluster;
+    }
+
+    public boolean getSaveKeys() {
+        return this.saveKeys;
+    }
+
+    public boolean getReducerPerBucket() {
+        return this.reducerPerBucket;
+    }
+
+    public StoreDefinition getStoreDef() {
+        checkNotNull(storeDef);
+        return storeDef;
+    }
+
+    public String getStoreName() {
+        checkNotNull(storeDef);
+        return storeDef.getName();
+    }
+
+    private final void checkNotNull(Object o) {
+        if(o == null)
+            throw new VoldemortException("Not configured yet!");
+    }
+
+    public int getNumChunks() {
+        return this.numChunks;
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/disk/KeyValueWriter.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/disk/KeyValueWriter.java
new file mode 100644
index 0000000000..eda4341b35
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/disk/KeyValueWriter.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.disk;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+
+// Interface used by reducers to layout the datqa on disk
+public interface KeyValueWriter<K, V> {
+
+    public static enum CollisionCounter {
+
+        NUM_COLLISIONS,
+        MAX_COLLISIONS;
+    }
+
+    public void conf(JobConf job);
+
+    public void write(K key, Iterator<V> iterator, Reporter reporter) throws IOException;
+
+    public void close() throws IOException;
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/fetcher/HdfsFetcher.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/fetcher/HdfsFetcher.java
index 25a819773e..a5cbb371d9 100644
--- a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/fetcher/HdfsFetcher.java
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/fetcher/HdfsFetcher.java
@@ -16,6 +16,7 @@
 
 package voldemort.store.readonly.fetcher;
 
+import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
@@ -67,6 +68,7 @@ public class HdfsFetcher implements FileFetcher {
     private EventThrottler throttler = null;
     private long minBytesPerSecond = 0;
     private DynamicThrottleLimit globalThrottleLimit = null;
+    private static final int NUM_RETRIES = 3;
 
     public HdfsFetcher(VoldemortConfig config) {
         this(config.getMaxBytesPerSecond(),
@@ -281,46 +283,66 @@ private void copyFileWithCheckSum(FileSystem fs,
         logger.info("Starting copy of " + source + " to " + dest);
         FSDataInputStream input = null;
         OutputStream output = null;
-        try {
-            input = fs.open(source);
-            output = new FileOutputStream(dest);
-            byte[] buffer = new byte[bufferSize];
-            while(true) {
-                int read = input.read(buffer);
-                if(read < 0) {
-                    break;
-                } else if(read < bufferSize) {
-                    buffer = ByteUtils.copy(buffer, 0, read);
-                }
-                output.write(buffer);
-                if(fileCheckSumGenerator != null)
-                    fileCheckSumGenerator.update(buffer);
-                if(throttler != null)
-                    throttler.maybeThrottle(read);
-                stats.recordBytes(read);
-                if(stats.getBytesSinceLastReport() > reportingIntervalBytes) {
-                    NumberFormat format = NumberFormat.getNumberInstance();
-                    format.setMaximumFractionDigits(2);
-                    logger.info(stats.getTotalBytesCopied() / (1024 * 1024) + " MB copied at "
-                                + format.format(stats.getBytesPerSecond() / (1024 * 1024))
-                                + " MB/sec - " + format.format(stats.getPercentCopied())
-                                + " % complete");
-                    if(this.status != null) {
-                        this.status.setStatus(stats.getTotalBytesCopied()
-                                              / (1024 * 1024)
-                                              + " MB copied at "
-                                              + format.format(stats.getBytesPerSecond()
-                                                              / (1024 * 1024)) + " MB/sec - "
-                                              + format.format(stats.getPercentCopied())
-                                              + " % complete");
+        for(int attempt = 0; attempt < NUM_RETRIES; attempt++) {
+            boolean success = true;
+            try {
+
+                input = fs.open(source);
+                output = new BufferedOutputStream(new FileOutputStream(dest));
+                byte[] buffer = new byte[bufferSize];
+                while(true) {
+                    int read = input.read(buffer);
+                    if(read < 0) {
+                        break;
+                    } else {
+                        output.write(buffer, 0, read);
                     }
-                    stats.reset();
+
+                    if(fileCheckSumGenerator != null)
+                        fileCheckSumGenerator.update(buffer, 0, read);
+                    if(throttler != null)
+                        throttler.maybeThrottle(read);
+                    stats.recordBytes(read);
+                    if(stats.getBytesSinceLastReport() > reportingIntervalBytes) {
+                        NumberFormat format = NumberFormat.getNumberInstance();
+                        format.setMaximumFractionDigits(2);
+                        logger.info(stats.getTotalBytesCopied() / (1024 * 1024) + " MB copied at "
+                                    + format.format(stats.getBytesPerSecond() / (1024 * 1024))
+                                    + " MB/sec - " + format.format(stats.getPercentCopied())
+                                    + " % complete, destination:" + dest);
+                        if(this.status != null) {
+                            this.status.setStatus(stats.getTotalBytesCopied()
+                                                  / (1024 * 1024)
+                                                  + " MB copied at "
+                                                  + format.format(stats.getBytesPerSecond()
+                                                                  / (1024 * 1024)) + " MB/sec - "
+                                                  + format.format(stats.getPercentCopied())
+                                                  + " % complete, destination:" + dest);
+                        }
+                        stats.reset();
+                    }
+                }
+                logger.info("Completed copy of " + source + " to " + dest);
+
+            } catch(IOException ioe) {
+                success = false;
+                logger.error("Error during copying file ", ioe);
+                ioe.printStackTrace();
+                if(attempt < NUM_RETRIES - 1) {
+                    logger.info("retrying copying");
+                } else {
+                    throw ioe;
+                }
+
+            } finally {
+                IOUtils.closeQuietly(output);
+                IOUtils.closeQuietly(input);
+                if(success) {
+                    break;
                 }
+
             }
-            logger.info("Completed copy of " + source + " to " + dest);
-        } finally {
-            IOUtils.closeQuietly(output);
-            IOUtils.closeQuietly(input);
+
         }
     }
 
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AbstractHadoopStoreBuilderMapper.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AbstractHadoopStoreBuilderMapper.java
index 95ef6582f3..ea18558da6 100644
--- a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AbstractHadoopStoreBuilderMapper.java
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AbstractHadoopStoreBuilderMapper.java
@@ -26,7 +26,6 @@
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
 
-import voldemort.cluster.Node;
 import voldemort.routing.ConsistentRoutingStrategy;
 import voldemort.serialization.DefaultSerializerFactory;
 import voldemort.serialization.Serializer;
@@ -34,6 +33,7 @@
 import voldemort.serialization.SerializerFactory;
 import voldemort.store.compress.CompressionStrategy;
 import voldemort.store.compress.CompressionStrategyFactory;
+import voldemort.store.readonly.mr.utils.MapperKeyValueWriter;
 import voldemort.utils.ByteUtils;
 
 /**
@@ -79,96 +79,28 @@ public void map(K key,
         byte[] keyBytes = keySerializer.toBytes(makeKey(key, value));
         byte[] valBytes = valueSerializer.toBytes(makeValue(key, value));
 
-        // Compress key and values if required
-        if(keySerializerDefinition.hasCompression()) {
-            keyBytes = keyCompressor.deflate(keyBytes);
-        }
-
-        if(valueSerializerDefinition.hasCompression()) {
-            valBytes = valueCompressor.deflate(valBytes);
-        }
-
-        // Get the output byte arrays ready to populate
-        byte[] outputValue;
-        BytesWritable outputKey;
-
-        // Leave initial offset for (a) node id (b) partition id
-        // since they are written later
-        int offsetTillNow = 2 * ByteUtils.SIZE_OF_INT;
-
-        if(getSaveKeys()) {
-
-            // In order - 4 ( for node id ) + 4 ( partition id ) + 1 ( replica
-            // type - primary | secondary | tertiary... ] + 4 ( key size )
-            // size ) + 4 ( value size ) + key + value
-            outputValue = new byte[valBytes.length + keyBytes.length + ByteUtils.SIZE_OF_BYTE + 4
-                                   * ByteUtils.SIZE_OF_INT];
-
-            // Write key length - leave byte for replica type
-            offsetTillNow += ByteUtils.SIZE_OF_BYTE;
-            ByteUtils.writeInt(outputValue, keyBytes.length, offsetTillNow);
-
-            // Write value length
-            offsetTillNow += ByteUtils.SIZE_OF_INT;
-            ByteUtils.writeInt(outputValue, valBytes.length, offsetTillNow);
-
-            // Write key
-            offsetTillNow += ByteUtils.SIZE_OF_INT;
-            System.arraycopy(keyBytes, 0, outputValue, offsetTillNow, keyBytes.length);
-
-            // Write value
-            offsetTillNow += keyBytes.length;
-            System.arraycopy(valBytes, 0, outputValue, offsetTillNow, valBytes.length);
-
-            // Generate MR key - upper 8 bytes of 16 byte md5
-            outputKey = new BytesWritable(ByteUtils.copy(md5er.digest(keyBytes),
-                                                         0,
-                                                         2 * ByteUtils.SIZE_OF_INT));
-
-        } else {
-
-            // In order - 4 ( for node id ) + 4 ( partition id ) + value
-            outputValue = new byte[valBytes.length + 2 * ByteUtils.SIZE_OF_INT];
-
-            // Write value
-            System.arraycopy(valBytes, 0, outputValue, offsetTillNow, valBytes.length);
-
-            // Generate MR key - 16 byte md5
-            outputKey = new BytesWritable(md5er.digest(keyBytes));
-
-        }
-
-        // Generate partition and node list this key is destined for
-        List<Integer> partitionList = routingStrategy.getPartitionList(keyBytes);
-        Node[] partitionToNode = routingStrategy.getPartitionToNode();
-
-        for(int replicaType = 0; replicaType < partitionList.size(); replicaType++) {
-
-            // Node id
-            ByteUtils.writeInt(outputValue,
-                               partitionToNode[partitionList.get(replicaType)].getId(),
-                               0);
-
-            if(getSaveKeys()) {
-                // Primary partition id
-                ByteUtils.writeInt(outputValue, partitionList.get(0), ByteUtils.SIZE_OF_INT);
-
-                // Replica type
-                ByteUtils.writeBytes(outputValue,
-                                     replicaType,
-                                     2 * ByteUtils.SIZE_OF_INT,
-                                     ByteUtils.SIZE_OF_BYTE);
-            } else {
-                // Partition id
-                ByteUtils.writeInt(outputValue,
-                                   partitionList.get(replicaType),
-                                   ByteUtils.SIZE_OF_INT);
-            }
-            BytesWritable outputVal = new BytesWritable(outputValue);
+        MapperKeyValueWriter mapWriter = new MapperKeyValueWriter();
+
+        List mapperList = mapWriter.map(routingStrategy,
+                                        keySerializer,
+                                        valueSerializer,
+                                        valueCompressor,
+                                        keyCompressor,
+                                        keySerializerDefinition,
+                                        valueSerializerDefinition,
+                                        keyBytes,
+                                        valBytes,
+                                        getSaveKeys(),
+                                        md5er);
+
+        for(int i = 0; i < mapperList.size(); i++) {
+            voldemort.utils.Pair<BytesWritable, BytesWritable> pair = (voldemort.utils.Pair<BytesWritable, BytesWritable>) mapperList.get(i);
+            BytesWritable outputKey = pair.getFirst();
+            BytesWritable outputVal = pair.getSecond();
 
             output.collect(outputKey, outputVal);
-
         }
+
         md5er.reset();
     }
 
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AvroStoreBuilderMapper.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AvroStoreBuilderMapper.java
new file mode 100644
index 0000000000..2318346387
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AvroStoreBuilderMapper.java
@@ -0,0 +1,255 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.nio.ByteBuffer;
+import java.security.MessageDigest;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.mapred.AvroCollector;
+import org.apache.avro.mapred.AvroMapper;
+import org.apache.avro.mapred.Pair;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+import org.apache.hadoop.mapred.Reporter;
+
+import voldemort.VoldemortException;
+import voldemort.cluster.Cluster;
+import voldemort.routing.ConsistentRoutingStrategy;
+import voldemort.serialization.DefaultSerializerFactory;
+import voldemort.serialization.Serializer;
+import voldemort.serialization.SerializerDefinition;
+import voldemort.serialization.SerializerFactory;
+import voldemort.serialization.avro.AvroGenericSerializer;
+import voldemort.serialization.avro.versioned.AvroVersionedGenericSerializer;
+import voldemort.store.StoreDefinition;
+import voldemort.store.compress.CompressionStrategy;
+import voldemort.store.compress.CompressionStrategyFactory;
+import voldemort.store.readonly.mr.utils.HadoopUtils;
+import voldemort.store.readonly.mr.utils.MapperKeyValueWriter;
+import voldemort.utils.ByteUtils;
+import voldemort.xml.ClusterMapper;
+import voldemort.xml.StoreDefinitionsMapper;
+import azkaban.common.utils.Props;
+
+/**
+ * Avro container files are not sequence input format files they contain records
+ * instead of k/v pairs to consume these files we use the AvroMapper
+ */
+public class AvroStoreBuilderMapper extends
+        AvroMapper<GenericData.Record, Pair<ByteBuffer, ByteBuffer>> implements JobConfigurable {
+
+    protected MessageDigest md5er;
+    protected ConsistentRoutingStrategy routingStrategy;
+    protected Serializer keySerializer;
+    protected Serializer valueSerializer;
+
+    private String keySchema;
+    private String valSchema;
+
+    private String keyField;
+    private String valField;
+
+    private CompressionStrategy valueCompressor;
+    private CompressionStrategy keyCompressor;
+    private SerializerDefinition keySerializerDefinition;
+    private SerializerDefinition valueSerializerDefinition;
+
+    /**
+     * Create the voldemort key and value from the input Avro record by
+     * extracting the key and value and map it out for each of the responsible
+     * voldemort nodes
+     * 
+     * 
+     * The output value is the node_id & partition_id of the responsible node
+     * followed by serialized value
+     */
+    @Override
+    public void map(GenericData.Record record,
+                    AvroCollector<Pair<ByteBuffer, ByteBuffer>> collector,
+                    Reporter reporter) throws IOException {
+
+        byte[] keyBytes = keySerializer.toBytes(record.get(keyField));
+        byte[] valBytes = valueSerializer.toBytes(record.get(valField));
+
+        MapperKeyValueWriter mapWriter = new MapperKeyValueWriter();
+
+        List mapperList = mapWriter.map(routingStrategy,
+                                        keySerializer,
+                                        valueSerializer,
+                                        valueCompressor,
+                                        keyCompressor,
+                                        keySerializerDefinition,
+                                        valueSerializerDefinition,
+                                        keyBytes,
+                                        valBytes,
+                                        getSaveKeys(),
+                                        md5er);
+
+        for(int i = 0; i < mapperList.size(); i++) {
+            voldemort.utils.Pair<BytesWritable, BytesWritable> pair = (voldemort.utils.Pair<BytesWritable, BytesWritable>) mapperList.get(i);
+            BytesWritable outputKey = pair.getFirst();
+            BytesWritable outputVal = pair.getSecond();
+
+            ByteBuffer keyBuffer = null, valueBuffer = null;
+
+            byte[] md5KeyBytes = outputKey.getBytes();
+            keyBuffer = ByteBuffer.allocate(md5KeyBytes.length);
+            keyBuffer.put(md5KeyBytes);
+            keyBuffer.rewind();
+
+            byte[] outputValue = outputVal.getBytes();
+            valueBuffer = ByteBuffer.allocate(outputValue.length);
+            valueBuffer.put(outputValue);
+            valueBuffer.rewind();
+
+            Pair<ByteBuffer, ByteBuffer> p = new Pair<ByteBuffer, ByteBuffer>(keyBuffer,
+                                                                              valueBuffer);
+
+            collector.collect(p);
+        }
+
+        md5er.reset();
+    }
+
+    @Override
+    public void configure(JobConf conf) {
+
+        super.setConf(conf);
+        // from parent code
+
+        md5er = ByteUtils.getDigest("md5");
+
+        this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml")));
+        List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new StringReader(conf.get("stores.xml")));
+
+        if(storeDefs.size() != 1)
+            throw new IllegalStateException("Expected to find only a single store, but found multiple!");
+        this.storeDef = storeDefs.get(0);
+
+        this.numChunks = conf.getInt("num.chunks", -1);
+        if(this.numChunks < 1)
+            throw new VoldemortException("num.chunks not specified in the job conf.");
+
+        this.saveKeys = conf.getBoolean("save.keys", true);
+        this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false);
+
+        keySerializerDefinition = getStoreDef().getKeySerializer();
+        valueSerializerDefinition = getStoreDef().getValueSerializer();
+
+        try {
+            SerializerFactory factory = new DefaultSerializerFactory();
+
+            if(conf.get("serializer.factory") != null) {
+                factory = (SerializerFactory) Class.forName(conf.get("serializer.factory"))
+                                                   .newInstance();
+            }
+
+            keySerializer = factory.getSerializer(keySerializerDefinition);
+            valueSerializer = factory.getSerializer(valueSerializerDefinition);
+
+            keyField = conf.get("avro.key.field");
+
+            valField = conf.get("avro.value.field");
+
+            keySchema = conf.get("avro.key.schema");
+            valSchema = conf.get("avro.val.schema");
+
+            if(keySerializerDefinition.getName().equals("avro-generic")) {
+                keySerializer = new AvroGenericSerializer(keySchema);
+                valueSerializer = new AvroGenericSerializer(valSchema);
+            } else {
+
+                if(keySerializerDefinition.hasVersion()) {
+                    Map<Integer, String> versions = new HashMap<Integer, String>();
+                    for(Map.Entry<Integer, String> entry: keySerializerDefinition.getAllSchemaInfoVersions()
+                                                                                 .entrySet())
+                        versions.put(entry.getKey(), entry.getValue());
+                    keySerializer = new AvroVersionedGenericSerializer(versions);
+                } else
+                    keySerializer = new AvroVersionedGenericSerializer(keySerializerDefinition.getCurrentSchemaInfo());
+
+                if(valueSerializerDefinition.hasVersion()) {
+                    Map<Integer, String> versions = new HashMap<Integer, String>();
+                    for(Map.Entry<Integer, String> entry: valueSerializerDefinition.getAllSchemaInfoVersions()
+                                                                                   .entrySet())
+                        versions.put(entry.getKey(), entry.getValue());
+                    valueSerializer = new AvroVersionedGenericSerializer(versions);
+                } else
+                    valueSerializer = new AvroVersionedGenericSerializer(valueSerializerDefinition.getCurrentSchemaInfo());
+
+            }
+
+        } catch(Exception e) {
+            throw new RuntimeException(e);
+        }
+
+        keyCompressor = new CompressionStrategyFactory().get(keySerializerDefinition.getCompression());
+        valueCompressor = new CompressionStrategyFactory().get(valueSerializerDefinition.getCompression());
+
+        routingStrategy = new ConsistentRoutingStrategy(getCluster().getNodes(),
+                                                        getStoreDef().getReplicationFactor());
+
+        Props props = HadoopUtils.getPropsFromJob(conf);
+
+    }
+
+    private int numChunks;
+    private Cluster cluster;
+    private StoreDefinition storeDef;
+    private boolean saveKeys;
+    private boolean reducerPerBucket;
+
+    public Cluster getCluster() {
+        checkNotNull(cluster);
+        return cluster;
+    }
+
+    public boolean getSaveKeys() {
+        return this.saveKeys;
+    }
+
+    public boolean getReducerPerBucket() {
+        return this.reducerPerBucket;
+    }
+
+    public StoreDefinition getStoreDef() {
+        checkNotNull(storeDef);
+        return storeDef;
+    }
+
+    public String getStoreName() {
+        checkNotNull(storeDef);
+        return storeDef.getName();
+    }
+
+    private final void checkNotNull(Object o) {
+        if(o == null)
+            throw new VoldemortException("Not configured yet!");
+    }
+
+    public int getNumChunks() {
+        return this.numChunks;
+    }
+
+}
\ No newline at end of file
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AvroStoreBuilderPartitioner.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AvroStoreBuilderPartitioner.java
new file mode 100644
index 0000000000..a3c4b0c9dc
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AvroStoreBuilderPartitioner.java
@@ -0,0 +1,150 @@
+package voldemort.store.readonly.mr;
+
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.nio.ByteBuffer;
+import java.util.List;
+
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapred.AvroValue;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Partitioner;
+
+import voldemort.VoldemortException;
+import voldemort.cluster.Cluster;
+import voldemort.store.StoreDefinition;
+import voldemort.store.readonly.ReadOnlyUtils;
+import voldemort.utils.ByteUtils;
+import voldemort.xml.ClusterMapper;
+import voldemort.xml.StoreDefinitionsMapper;
+
+/**
+ * A Partitioner that splits data so that all data for the same nodeId, chunkId
+ * combination ends up in the same reduce (and hence in the same store chunk)
+ */
+@SuppressWarnings("deprecation")
+public class AvroStoreBuilderPartitioner implements
+        Partitioner<AvroKey<ByteBuffer>, AvroValue<ByteBuffer>> {
+
+    @Override
+    public int getPartition(AvroKey<ByteBuffer> key, AvroValue<ByteBuffer> value, int numReduceTasks) {
+
+        byte[] keyBytes = null, valueBytes;
+
+        keyBytes = new byte[key.datum().remaining()];
+        key.datum().get(keyBytes);
+
+        valueBytes = new byte[value.datum().remaining()];
+        value.datum().get(valueBytes);
+
+        ByteBuffer keyBuffer = null, valueBuffer = null;
+
+        keyBuffer = ByteBuffer.allocate(keyBytes.length);
+        keyBuffer.put(keyBytes);
+        keyBuffer.rewind();
+
+        valueBuffer = ByteBuffer.allocate(valueBytes.length);
+        valueBuffer.put(valueBytes);
+        valueBuffer.rewind();
+
+        key.datum(keyBuffer);
+        value.datum(valueBuffer);
+
+        int partitionId = ByteUtils.readInt(valueBytes, ByteUtils.SIZE_OF_INT);
+        int chunkId = ReadOnlyUtils.chunk(keyBytes, getNumChunks());
+        if(getSaveKeys()) {
+            int replicaType = (int) ByteUtils.readBytes(valueBytes,
+                                                        2 * ByteUtils.SIZE_OF_INT,
+                                                        ByteUtils.SIZE_OF_BYTE);
+            if(getReducerPerBucket()) {
+                return (partitionId * getStoreDef().getReplicationFactor() + replicaType)
+                       % numReduceTasks;
+            } else {
+                return ((partitionId * getStoreDef().getReplicationFactor() * getNumChunks())
+                        + (replicaType * getNumChunks()) + chunkId)
+                       % numReduceTasks;
+            }
+        } else {
+            if(getReducerPerBucket()) {
+                return partitionId % numReduceTasks;
+            } else {
+                return (partitionId * getNumChunks() + chunkId) % numReduceTasks;
+            }
+
+        }
+    }
+
+    private int numChunks;
+    private Cluster cluster;
+    private StoreDefinition storeDef;
+    private boolean saveKeys;
+    private boolean reducerPerBucket;
+
+    @Override
+    public void configure(JobConf conf) {
+        this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml")));
+        List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new StringReader(conf.get("stores.xml")));
+        if(storeDefs.size() != 1)
+            throw new IllegalStateException("Expected to find only a single store, but found multiple!");
+        this.storeDef = storeDefs.get(0);
+
+        this.numChunks = conf.getInt("num.chunks", -1);
+        if(this.numChunks < 1)
+            throw new VoldemortException("num.chunks not specified in the job conf.");
+
+        this.saveKeys = conf.getBoolean("save.keys", false);
+        this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false);
+    }
+
+    @SuppressWarnings("unused")
+    public void close() throws IOException {}
+
+    public Cluster getCluster() {
+        checkNotNull(cluster);
+        return cluster;
+    }
+
+    public boolean getSaveKeys() {
+        return this.saveKeys;
+    }
+
+    public boolean getReducerPerBucket() {
+        return this.reducerPerBucket;
+    }
+
+    public StoreDefinition getStoreDef() {
+        checkNotNull(storeDef);
+        return storeDef;
+    }
+
+    public String getStoreName() {
+        checkNotNull(storeDef);
+        return storeDef.getName();
+    }
+
+    private final void checkNotNull(Object o) {
+        if(o == null)
+            throw new VoldemortException("Not configured yet!");
+    }
+
+    public int getNumChunks() {
+        return this.numChunks;
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AvroStoreBuilderReducer.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AvroStoreBuilderReducer.java
new file mode 100644
index 0000000000..43173a78d8
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AvroStoreBuilderReducer.java
@@ -0,0 +1,115 @@
+package voldemort.store.readonly.mr;
+
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapred.AvroValue;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import voldemort.store.readonly.disk.HadoopStoreWriter;
+import voldemort.store.readonly.disk.KeyValueWriter;
+import azkaban.common.utils.Utils;
+
+/**
+ * Take key md5s and value bytes and build a Avro read-only store from these
+ * values
+ */
+public class AvroStoreBuilderReducer implements
+        Reducer<AvroKey<ByteBuffer>, AvroValue<ByteBuffer>, Text, Text>, JobConfigurable, Closeable {
+
+    // The Class implementing the keyvaluewriter
+    // this provides a pluggable mechanism for generating your own on disk
+    // format for the data and index files
+    String keyValueWriterClass;
+    @SuppressWarnings("rawtypes")
+    KeyValueWriter writer;
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void reduce(AvroKey<ByteBuffer> keyAvro,
+                       Iterator<AvroValue<ByteBuffer>> iterator,
+                       OutputCollector<Text, Text> collector,
+                       Reporter reporter) throws IOException {
+
+        ByteBuffer keyBuffer = keyAvro.datum();
+        keyBuffer.rewind();
+
+        byte[] keyBytes = null, valueBytes;
+
+        keyBytes = new byte[keyBuffer.remaining()];
+        keyBuffer.get(keyBytes);
+
+        BytesWritable key = new BytesWritable(keyBytes);
+
+        ArrayList<BytesWritable> valueList = new ArrayList();
+
+        while(iterator.hasNext()) {
+            ByteBuffer writable = iterator.next().datum();
+            writable.rewind();
+            // BytesWritable writable = iterator.next();
+            valueBytes = null;
+            valueBytes = new byte[writable.remaining()];
+            writable.get(valueBytes);
+
+            BytesWritable value = new BytesWritable(valueBytes);
+            valueList.add(value);
+
+        }
+
+        writer.write(key, valueList.iterator(), reporter);
+
+    }
+
+    @Override
+    public void configure(JobConf job) {
+
+        JobConf conf = job;
+        try {
+
+            keyValueWriterClass = conf.get("writer.class");
+            if(keyValueWriterClass != null)
+                writer = (KeyValueWriter) Utils.callConstructor(keyValueWriterClass);
+            else
+                writer = new HadoopStoreWriter();
+
+            writer.conf(job);
+
+        } catch(Exception e) {
+            // throw new RuntimeException("Failed to open Input/OutputStream",
+            // e);
+            e.printStackTrace();
+        }
+    }
+
+    @Override
+    public void close() throws IOException {
+
+        writer.close();
+    }
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AvroStoreBuilderReducerPerBucket.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AvroStoreBuilderReducerPerBucket.java
new file mode 100644
index 0000000000..1d41419e6d
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/AvroStoreBuilderReducerPerBucket.java
@@ -0,0 +1,114 @@
+package voldemort.store.readonly.mr;
+
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapred.AvroValue;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Closeable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.log4j.Logger;
+
+import voldemort.store.readonly.disk.HadoopStoreWriterPerBucket;
+import voldemort.store.readonly.disk.KeyValueWriter;
+import azkaban.common.utils.Utils;
+
+/**
+ * Take key md5s and value bytes and build a Avro read-only store from these
+ * values
+ */
+public class AvroStoreBuilderReducerPerBucket implements
+        Reducer<AvroKey<ByteBuffer>, AvroValue<ByteBuffer>, Text, Text>, JobConfigurable, Closeable {
+
+    private static final Logger logger = Logger.getLogger(AvroStoreBuilderReducerPerBucket.class);
+
+    String keyValueWriterClass;
+    @SuppressWarnings("rawtypes")
+    KeyValueWriter writer;
+
+    @Override
+    public void reduce(AvroKey<ByteBuffer> keyAvro,
+                       Iterator<AvroValue<ByteBuffer>> iterator,
+                       OutputCollector<Text, Text> collector,
+                       Reporter reporter) throws IOException {
+
+        ByteBuffer keyBuffer = keyAvro.datum();
+        keyBuffer.rewind();
+
+        byte[] keyBytes = null, valueBytes;
+
+        keyBytes = new byte[keyBuffer.remaining()];
+        keyBuffer.get(keyBytes);
+
+        BytesWritable key = new BytesWritable(keyBytes);
+
+        ArrayList<BytesWritable> valueList = new ArrayList();
+
+        while(iterator.hasNext()) {
+            ByteBuffer writable = iterator.next().datum();
+            writable.rewind();
+            // BytesWritable writable = iterator.next();
+            valueBytes = null;
+            valueBytes = new byte[writable.remaining()];
+            writable.get(valueBytes);
+
+            BytesWritable value = new BytesWritable(valueBytes);
+            valueList.add(value);
+
+        }
+
+        writer.write(key, valueList.iterator(), reporter);
+
+    }
+
+    @Override
+    public void configure(JobConf job) {
+
+        JobConf conf = job;
+        try {
+
+            keyValueWriterClass = conf.get("writer.class");
+            if(keyValueWriterClass != null)
+                writer = (KeyValueWriter) Utils.callConstructor(keyValueWriterClass);
+            else
+                writer = new HadoopStoreWriterPerBucket();
+
+            writer.conf(job);
+
+        } catch(Exception e) {
+            // throw new RuntimeException("Failed to open Input/OutputStream",
+            // e);
+            e.printStackTrace();
+        }
+    }
+
+    @Override
+    public void close() throws IOException {
+
+        writer.close();
+    }
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilder.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilder.java
index 22b1f711ba..c1e3c9c70f 100644
--- a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilder.java
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilder.java
@@ -17,10 +17,15 @@
 package voldemort.store.readonly.mr;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 
+import org.apache.avro.Schema;
+import org.apache.avro.mapred.AvroJob;
+import org.apache.avro.mapred.AvroOutputFormat;
+import org.apache.avro.mapred.Pair;
 import org.apache.commons.codec.binary.Hex;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -36,6 +41,7 @@
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputFormat;
 import org.apache.hadoop.mapred.RunningJob;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.log4j.Logger;
@@ -48,6 +54,7 @@
 import voldemort.store.readonly.ReadOnlyStorageMetadata;
 import voldemort.store.readonly.checksum.CheckSum;
 import voldemort.store.readonly.checksum.CheckSum.CheckSumType;
+import voldemort.store.readonly.disk.KeyValueWriter;
 import voldemort.utils.Utils;
 import voldemort.xml.ClusterMapper;
 import voldemort.xml.StoreDefinitionsMapper;
@@ -66,7 +73,7 @@ public class HadoopStoreBuilder {
     private static final Logger logger = Logger.getLogger(HadoopStoreBuilder.class);
 
     private final Configuration config;
-    private final Class<? extends AbstractHadoopStoreBuilderMapper<?, ?>> mapperClass;
+    private final Class mapperClass;
     @SuppressWarnings("unchecked")
     private final Class<? extends InputFormat> inputFormatClass;
     private final Cluster cluster;
@@ -80,6 +87,8 @@ public class HadoopStoreBuilder {
     private boolean reducerPerBucket = false;
     private int numChunks = -1;
 
+    private boolean isAvro;
+
     /**
      * Kept for backwards compatibility. We do not use replicationFactor any
      * more since it is derived from the store definition
@@ -99,7 +108,7 @@ public class HadoopStoreBuilder {
     @SuppressWarnings("unchecked")
     @Deprecated
     public HadoopStoreBuilder(Configuration conf,
-                              Class<? extends AbstractHadoopStoreBuilderMapper<?, ?>> mapperClass,
+                              Class mapperClass,
                               Class<? extends InputFormat> inputFormatClass,
                               Cluster cluster,
                               StoreDefinition storeDef,
@@ -135,7 +144,7 @@ public HadoopStoreBuilder(Configuration conf,
      */
     @SuppressWarnings("unchecked")
     public HadoopStoreBuilder(Configuration conf,
-                              Class<? extends AbstractHadoopStoreBuilderMapper<?, ?>> mapperClass,
+                              Class mapperClass,
                               Class<? extends InputFormat> inputFormatClass,
                               Cluster cluster,
                               StoreDefinition storeDef,
@@ -153,6 +162,7 @@ public HadoopStoreBuilder(Configuration conf,
         this.chunkSizeBytes = chunkSizeBytes;
         this.tempDir = tempDir;
         this.outputDir = Utils.notNull(outputDir);
+        isAvro = false;
         if(chunkSizeBytes > MAX_CHUNK_SIZE || chunkSizeBytes < MIN_CHUNK_SIZE)
             throw new VoldemortException("Invalid chunk size, chunk size must be in the range "
                                          + MIN_CHUNK_SIZE + "..." + MAX_CHUNK_SIZE);
@@ -175,7 +185,7 @@ public HadoopStoreBuilder(Configuration conf,
      */
     @SuppressWarnings("unchecked")
     public HadoopStoreBuilder(Configuration conf,
-                              Class<? extends AbstractHadoopStoreBuilderMapper<?, ?>> mapperClass,
+                              Class mapperClass,
                               Class<? extends InputFormat> inputFormatClass,
                               Cluster cluster,
                               StoreDefinition storeDef,
@@ -218,7 +228,7 @@ public HadoopStoreBuilder(Configuration conf,
      */
     @SuppressWarnings("unchecked")
     public HadoopStoreBuilder(Configuration conf,
-                              Class<? extends AbstractHadoopStoreBuilderMapper<?, ?>> mapperClass,
+                              Class mapperClass,
                               Class<? extends InputFormat> inputFormatClass,
                               Cluster cluster,
                               StoreDefinition storeDef,
@@ -265,7 +275,7 @@ public HadoopStoreBuilder(Configuration conf,
      */
     @SuppressWarnings("unchecked")
     public HadoopStoreBuilder(Configuration conf,
-                              Class<? extends AbstractHadoopStoreBuilderMapper<?, ?>> mapperClass,
+                              Class mapperClass,
                               Class<? extends InputFormat> inputFormatClass,
                               Cluster cluster,
                               StoreDefinition storeDef,
@@ -290,6 +300,7 @@ public HadoopStoreBuilder(Configuration conf,
         this.saveKeys = saveKeys;
         this.reducerPerBucket = reducerPerBucket;
         this.numChunks = numChunks;
+        isAvro = false;
         if(numChunks <= 0)
             throw new VoldemortException("Number of chunks should be greater than zero");
     }
@@ -306,14 +317,16 @@ public void build() {
                      new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef)));
             conf.setBoolean("save.keys", saveKeys);
             conf.setBoolean("reducer.per.bucket", reducerPerBucket);
-            conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class);
-            conf.setMapperClass(mapperClass);
-            conf.setMapOutputKeyClass(BytesWritable.class);
-            conf.setMapOutputValueClass(BytesWritable.class);
-            if(reducerPerBucket) {
-                conf.setReducerClass(HadoopStoreBuilderReducerPerBucket.class);
-            } else {
-                conf.setReducerClass(HadoopStoreBuilderReducer.class);
+            if(!isAvro) {
+                conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class);
+                conf.setMapperClass(mapperClass);
+                conf.setMapOutputKeyClass(BytesWritable.class);
+                conf.setMapOutputValueClass(BytesWritable.class);
+                if(reducerPerBucket) {
+                    conf.setReducerClass(HadoopStoreBuilderReducerPerBucket.class);
+                } else {
+                    conf.setReducerClass(HadoopStoreBuilderReducer.class);
+                }
             }
             conf.setInputFormat(inputFormatClass);
             conf.setOutputFormat(SequenceFileOutputFormat.class);
@@ -380,6 +393,35 @@ public void build() {
             conf.setInt("num.chunks", numChunks);
             conf.setNumReduceTasks(numReducers);
 
+            if(isAvro) {
+                conf.setPartitionerClass(AvroStoreBuilderPartitioner.class);
+                // conf.setMapperClass(mapperClass);
+                conf.setMapOutputKeyClass(ByteBuffer.class);
+                conf.setMapOutputValueClass(ByteBuffer.class);
+
+                conf.setInputFormat(inputFormatClass);
+
+                conf.setOutputFormat((Class<? extends OutputFormat>) AvroOutputFormat.class);
+                conf.setOutputKeyClass(ByteBuffer.class);
+                conf.setOutputValueClass(ByteBuffer.class);
+
+                // AvroJob confs for the avro mapper
+                AvroJob.setInputSchema(conf, Schema.parse(config.get("avro.rec.schema")));
+
+                AvroJob.setOutputSchema(conf,
+                                        Pair.getPairSchema(Schema.create(Schema.Type.BYTES),
+                                                           Schema.create(Schema.Type.BYTES)));
+
+                AvroJob.setMapperClass(conf, mapperClass);
+
+                if(reducerPerBucket) {
+                    conf.setReducerClass(AvroStoreBuilderReducerPerBucket.class);
+                } else {
+                    conf.setReducerClass(AvroStoreBuilderReducer.class);
+                }
+
+            }
+
             logger.info("Number of chunks: " + numChunks + ", number of reducers: " + numReducers
                         + ", save keys: " + saveKeys + ", reducerPerBucket: " + reducerPerBucket);
             logger.info("Building store...");
@@ -391,14 +433,14 @@ public void build() {
             if(saveKeys) {
                 if(reducerPerBucket) {
                     logger.info("Number of collisions in the job - "
-                                + counters.getCounter(HadoopStoreBuilderReducerPerBucket.CollisionCounter.NUM_COLLISIONS));
+                                + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS));
                     logger.info("Maximum number of collisions for one entry - "
-                                + counters.getCounter(HadoopStoreBuilderReducerPerBucket.CollisionCounter.MAX_COLLISIONS));
+                                + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS));
                 } else {
                     logger.info("Number of collisions in the job - "
-                                + counters.getCounter(HadoopStoreBuilderReducer.CollisionCounter.NUM_COLLISIONS));
+                                + counters.getCounter(KeyValueWriter.CollisionCounter.NUM_COLLISIONS));
                     logger.info("Maximum number of collisions for one entry - "
-                                + counters.getCounter(HadoopStoreBuilderReducer.CollisionCounter.MAX_COLLISIONS));
+                                + counters.getCounter(KeyValueWriter.CollisionCounter.MAX_COLLISIONS));
                 }
             }
 
@@ -490,6 +532,17 @@ public boolean accept(Path arg0) {
 
     }
 
+    /**
+     * Run the job
+     */
+    public void buildAvro() {
+
+        isAvro = true;
+        build();
+        return;
+
+    }
+
     /**
      * A comparator that sorts index files last. This is required to maintain
      * the order while calculating checksum
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderReducer.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderReducer.java
index 5e1cb8297f..0b3a496f0d 100644
--- a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderReducer.java
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderReducer.java
@@ -16,64 +16,29 @@
 
 package voldemort.store.readonly.mr;
 
-import java.io.ByteArrayOutputStream;
-import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.Iterator;
 
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.log4j.Logger;
 
-import voldemort.VoldemortException;
-import voldemort.store.readonly.ReadOnlyUtils;
-import voldemort.store.readonly.checksum.CheckSum;
-import voldemort.store.readonly.checksum.CheckSum.CheckSumType;
-import voldemort.utils.ByteUtils;
+import voldemort.store.readonly.disk.HadoopStoreWriter;
+import voldemort.store.readonly.disk.KeyValueWriter;
+import azkaban.common.utils.Utils;
 
 /**
  * Take key md5s and value bytes and build a read-only store from these values
  */
 @SuppressWarnings("deprecation")
-public class HadoopStoreBuilderReducer extends AbstractStoreBuilderConfigurable implements
-        Reducer<BytesWritable, BytesWritable, Text, Text> {
+public class HadoopStoreBuilderReducer implements Reducer<BytesWritable, BytesWritable, Text, Text> {
 
-    private static final Logger logger = Logger.getLogger(HadoopStoreBuilderReducer.class);
-
-    private DataOutputStream indexFileStream = null;
-    private DataOutputStream valueFileStream = null;
-    private int position;
-    private String taskId = null;
-
-    private int nodeId = -1;
-    private int partitionId = -1;
-    private int chunkId = -1;
-    private int replicaType = -1;
-
-    private Path taskIndexFileName;
-    private Path taskValueFileName;
-
-    private JobConf conf;
-    private CheckSumType checkSumType;
-    private CheckSum checkSumDigestIndex;
-    private CheckSum checkSumDigestValue;
-
-    private String outputDir;
-
-    private FileSystem fs;
-
-    protected static enum CollisionCounter {
-        NUM_COLLISIONS,
-        MAX_COLLISIONS;
-    }
+    String keyValueWriterClass;
+    @SuppressWarnings("rawtypes")
+    KeyValueWriter writer;
 
     /**
      * Reduce should get sorted MD5 of Voldemort key ( either 16 bytes if saving
@@ -82,220 +47,36 @@ protected static enum CollisionCounter {
      * partition-id, replica-type, [key-size, value-size, key, value]* if saving
      * keys is enabled
      */
+    @SuppressWarnings("unchecked")
     public void reduce(BytesWritable key,
                        Iterator<BytesWritable> iterator,
                        OutputCollector<Text, Text> output,
                        Reporter reporter) throws IOException {
 
-        // Write key and position
-        this.indexFileStream.write(key.get(), 0, key.getSize());
-        this.indexFileStream.writeInt(this.position);
-
-        // Run key through checksum digest
-        if(this.checkSumDigestIndex != null) {
-            this.checkSumDigestIndex.update(key.get(), 0, key.getSize());
-            this.checkSumDigestIndex.update(this.position);
-        }
-
-        short numTuples = 0;
-        ByteArrayOutputStream stream = new ByteArrayOutputStream();
-        DataOutputStream valueStream = new DataOutputStream(stream);
-
-        while(iterator.hasNext()) {
-            BytesWritable writable = iterator.next();
-            byte[] valueBytes = writable.get();
-            int offsetTillNow = 0;
-
-            // Read node Id
-            if(this.nodeId == -1)
-                this.nodeId = ByteUtils.readInt(valueBytes, offsetTillNow);
-            offsetTillNow += ByteUtils.SIZE_OF_INT;
-
-            // Read partition id
-            if(this.partitionId == -1)
-                this.partitionId = ByteUtils.readInt(valueBytes, offsetTillNow);
-            offsetTillNow += ByteUtils.SIZE_OF_INT;
-
-            // Read chunk id
-            if(this.chunkId == -1)
-                this.chunkId = ReadOnlyUtils.chunk(key.get(), getNumChunks());
-
-            // Read replica type
-            if(getSaveKeys()) {
-                if(this.replicaType == -1)
-                    this.replicaType = (int) ByteUtils.readBytes(valueBytes,
-                                                                 offsetTillNow,
-                                                                 ByteUtils.SIZE_OF_BYTE);
-                offsetTillNow += ByteUtils.SIZE_OF_BYTE;
-            }
-
-            int valueLength = writable.getSize() - offsetTillNow;
-            if(getSaveKeys()) {
-                // Write ( key_length, value_length, key,
-                // value )
-                valueStream.write(valueBytes, offsetTillNow, valueLength);
-            } else {
-                // Write (value_length + value)
-                valueStream.writeInt(valueLength);
-                valueStream.write(valueBytes, offsetTillNow, valueLength);
-            }
-
-            numTuples++;
-
-            // If we have multiple values for this md5 that is a collision,
-            // throw an exception--either the data itself has duplicates, there
-            // are trillions of keys, or someone is attempting something
-            // malicious ( We obviously expect collisions when we save keys )
-            if(!getSaveKeys() && numTuples > 1)
-                throw new VoldemortException("Duplicate keys detected for md5 sum "
-                                             + ByteUtils.toHexString(ByteUtils.copy(key.get(),
-                                                                                    0,
-                                                                                    key.getSize())));
-
-        }
-
-        if(numTuples < 0) {
-            // Overflow
-            throw new VoldemortException("Found too many collisions: chunk " + chunkId
-                                         + " has exceeded " + Short.MAX_VALUE + " collisions.");
-        } else if(numTuples > 1) {
-            // Update number of collisions + max keys per collision
-            reporter.incrCounter(CollisionCounter.NUM_COLLISIONS, 1);
-
-            long numCollisions = reporter.getCounter(CollisionCounter.MAX_COLLISIONS).getCounter();
-            if(numTuples > numCollisions) {
-                reporter.incrCounter(CollisionCounter.MAX_COLLISIONS, numTuples - numCollisions);
-            }
-        }
-
-        // Flush the value
-        valueStream.flush();
-        byte[] value = stream.toByteArray();
-
-        // Start writing to file now
-        // First, if save keys flag set the number of keys
-        if(getSaveKeys()) {
-
-            this.valueFileStream.writeShort(numTuples);
-            this.position += ByteUtils.SIZE_OF_SHORT;
-
-            if(this.checkSumDigestValue != null) {
-                this.checkSumDigestValue.update(numTuples);
-            }
-        }
-
-        this.valueFileStream.write(value);
-        this.position += value.length;
-
-        if(this.checkSumDigestValue != null) {
-            this.checkSumDigestValue.update(value);
-        }
-
-        if(this.position < 0)
-            throw new VoldemortException("Chunk overflow exception: chunk " + chunkId
-                                         + " has exceeded " + Integer.MAX_VALUE + " bytes.");
+        writer.write(key, iterator, reporter);
 
     }
 
     @Override
     public void configure(JobConf job) {
-        super.configure(job);
-        try {
-            this.conf = job;
-            this.position = 0;
-            this.outputDir = job.get("final.output.dir");
-            this.taskId = job.get("mapred.task.id");
-            this.checkSumType = CheckSum.fromString(job.get("checksum.type"));
-            this.checkSumDigestIndex = CheckSum.getInstance(checkSumType);
-            this.checkSumDigestValue = CheckSum.getInstance(checkSumType);
 
-            this.taskIndexFileName = new Path(FileOutputFormat.getOutputPath(job), getStoreName()
-                                                                                   + "."
-                                                                                   + this.taskId
-                                                                                   + ".index");
-            this.taskValueFileName = new Path(FileOutputFormat.getOutputPath(job), getStoreName()
-                                                                                   + "."
-                                                                                   + this.taskId
-                                                                                   + ".data");
-
-            if(this.fs == null)
-                this.fs = this.taskIndexFileName.getFileSystem(job);
+        try {
 
-            this.indexFileStream = fs.create(this.taskIndexFileName);
-            this.valueFileStream = fs.create(this.taskValueFileName);
+            keyValueWriterClass = job.get("writer.class");
+            if(keyValueWriterClass != null)
+                writer = (KeyValueWriter) Utils.callConstructor(keyValueWriterClass);
+            else
+                writer = new HadoopStoreWriter();
 
-            logger.info("Opening " + this.taskIndexFileName + " and " + this.taskValueFileName
-                        + " for writing.");
+            writer.conf(job);
 
-        } catch(IOException e) {
+        } catch(Exception e) {
             throw new RuntimeException("Failed to open Input/OutputStream", e);
         }
     }
 
     @Override
     public void close() throws IOException {
-
-        this.indexFileStream.close();
-        this.valueFileStream.close();
-
-        if(this.nodeId == -1 || this.chunkId == -1 || this.partitionId == -1) {
-            // Issue 258 - No data was read in the reduce phase, do not create
-            // any output
-            return;
-        }
-
-        // If the replica type read was not valid, shout out
-        if(getSaveKeys() && this.replicaType == -1) {
-            throw new RuntimeException("Could not read the replica type correctly for node "
-                                       + nodeId + " ( partition - " + this.partitionId + " )");
-        }
-
-        String fileNamePrefix = null;
-        if(getSaveKeys()) {
-            fileNamePrefix = new String(Integer.toString(this.partitionId) + "_"
-                                        + Integer.toString(this.replicaType) + "_"
-                                        + Integer.toString(this.chunkId));
-        } else {
-            fileNamePrefix = new String(Integer.toString(this.partitionId) + "_"
-                                        + Integer.toString(this.chunkId));
-        }
-
-        // Initialize the node directory
-        Path nodeDir = new Path(this.outputDir, "node-" + this.nodeId);
-
-        // Create output directory, if it doesn't exist
-        FileSystem outputFs = nodeDir.getFileSystem(this.conf);
-        outputFs.mkdirs(nodeDir);
-
-        // Write the checksum and output files
-        if(this.checkSumType != CheckSumType.NONE) {
-
-            if(this.checkSumDigestIndex != null && this.checkSumDigestValue != null) {
-                Path checkSumIndexFile = new Path(nodeDir, fileNamePrefix + ".index.checksum");
-                Path checkSumValueFile = new Path(nodeDir, fileNamePrefix + ".data.checksum");
-
-                FSDataOutputStream output = outputFs.create(checkSumIndexFile);
-                output.write(this.checkSumDigestIndex.getCheckSum());
-                output.close();
-
-                output = outputFs.create(checkSumValueFile);
-                output.write(this.checkSumDigestValue.getCheckSum());
-                output.close();
-            } else {
-                throw new RuntimeException("Failed to open checksum digest for node " + nodeId
-                                           + " ( partition - " + this.partitionId + ", chunk - "
-                                           + chunkId + " )");
-            }
-        }
-
-        // Generate the final chunk files
-        Path indexFile = new Path(nodeDir, fileNamePrefix + ".index");
-        Path valueFile = new Path(nodeDir, fileNamePrefix + ".data");
-
-        logger.info("Moving " + this.taskIndexFileName + " to " + indexFile);
-        outputFs.rename(taskIndexFileName, indexFile);
-        logger.info("Moving " + this.taskValueFileName + " to " + valueFile);
-        outputFs.rename(this.taskValueFileName, valueFile);
-
+        writer.close();
     }
 }
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderReducerPerBucket.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderReducerPerBucket.java
index 4b76f5662b..d38af9d9a4 100644
--- a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderReducerPerBucket.java
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderReducerPerBucket.java
@@ -16,28 +16,20 @@
 
 package voldemort.store.readonly.mr;
 
-import java.io.ByteArrayOutputStream;
-import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.Iterator;
 
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileOutputFormat;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.log4j.Logger;
 
-import voldemort.VoldemortException;
-import voldemort.store.readonly.ReadOnlyUtils;
-import voldemort.store.readonly.checksum.CheckSum;
-import voldemort.store.readonly.checksum.CheckSum.CheckSumType;
-import voldemort.utils.ByteUtils;
+import voldemort.store.readonly.disk.HadoopStoreWriterPerBucket;
+import voldemort.store.readonly.disk.KeyValueWriter;
+import azkaban.common.utils.Utils;
 
 /**
  * Take key md5s and value bytes and build a read-only store from these values
@@ -48,31 +40,9 @@ public class HadoopStoreBuilderReducerPerBucket extends AbstractStoreBuilderConf
 
     private static final Logger logger = Logger.getLogger(HadoopStoreBuilderReducerPerBucket.class);
 
-    private DataOutputStream[] indexFileStream = null;
-    private DataOutputStream[] valueFileStream = null;
-    private int[] position;
-    private String taskId = null;
-
-    private int nodeId = -1;
-    private int partitionId = -1;
-    private int replicaType = -1;
-
-    private Path[] taskIndexFileName;
-    private Path[] taskValueFileName;
-
-    private JobConf conf;
-    private CheckSumType checkSumType;
-    private CheckSum[] checkSumDigestIndex;
-    private CheckSum[] checkSumDigestValue;
-
-    private String outputDir;
-
-    private FileSystem fs;
-
-    protected static enum CollisionCounter {
-        NUM_COLLISIONS,
-        MAX_COLLISIONS;
-    }
+    String keyValueWriterClass;
+    @SuppressWarnings("rawtypes")
+    KeyValueWriter writer;
 
     /**
      * Reduce should get sorted MD5 of Voldemort key ( either 16 bytes if saving
@@ -81,237 +51,36 @@ protected static enum CollisionCounter {
      * partition-id, replica-type, [key-size, value-size, key, value]* if saving
      * keys is enabled
      */
+    @SuppressWarnings("unchecked")
     public void reduce(BytesWritable key,
                        Iterator<BytesWritable> iterator,
                        OutputCollector<Text, Text> output,
                        Reporter reporter) throws IOException {
 
-        // Read chunk id
-        int chunkId = ReadOnlyUtils.chunk(key.get(), getNumChunks());
-
-        // Write key and position
-        this.indexFileStream[chunkId].write(key.get(), 0, key.getSize());
-        this.indexFileStream[chunkId].writeInt(this.position[chunkId]);
-
-        // Run key through checksum digest
-        if(this.checkSumDigestIndex[chunkId] != null) {
-            this.checkSumDigestIndex[chunkId].update(key.get(), 0, key.getSize());
-            this.checkSumDigestIndex[chunkId].update(this.position[chunkId]);
-        }
-
-        short numTuples = 0;
-        ByteArrayOutputStream stream = new ByteArrayOutputStream();
-        DataOutputStream valueStream = new DataOutputStream(stream);
-
-        while(iterator.hasNext()) {
-            BytesWritable writable = iterator.next();
-            byte[] valueBytes = writable.get();
-            int offsetTillNow = 0;
-
-            // Read node Id
-            if(this.nodeId == -1)
-                this.nodeId = ByteUtils.readInt(valueBytes, offsetTillNow);
-            offsetTillNow += ByteUtils.SIZE_OF_INT;
-
-            // Read partition id
-            if(this.partitionId == -1)
-                this.partitionId = ByteUtils.readInt(valueBytes, offsetTillNow);
-            offsetTillNow += ByteUtils.SIZE_OF_INT;
-
-            // Read replica type
-            if(getSaveKeys()) {
-                if(this.replicaType == -1)
-                    this.replicaType = (int) ByteUtils.readBytes(valueBytes,
-                                                                 offsetTillNow,
-                                                                 ByteUtils.SIZE_OF_BYTE);
-                offsetTillNow += ByteUtils.SIZE_OF_BYTE;
-            }
-
-            int valueLength = writable.getSize() - offsetTillNow;
-            if(getSaveKeys()) {
-                // Write ( key_length, value_length, key,
-                // value )
-                valueStream.write(valueBytes, offsetTillNow, valueLength);
-            } else {
-                // Write (value_length + value)
-                valueStream.writeInt(valueLength);
-                valueStream.write(valueBytes, offsetTillNow, valueLength);
-            }
-
-            numTuples++;
-
-            // If we have multiple values for this md5 that is a collision,
-            // throw an exception--either the data itself has duplicates, there
-            // are trillions of keys, or someone is attempting something
-            // malicious ( We obviously expect collisions when we save keys )
-            if(!getSaveKeys() && numTuples > 1)
-                throw new VoldemortException("Duplicate keys detected for md5 sum "
-                                             + ByteUtils.toHexString(ByteUtils.copy(key.get(),
-                                                                                    0,
-                                                                                    key.getSize())));
-
-        }
-
-        if(numTuples < 0) {
-            // Overflow
-            throw new VoldemortException("Found too many collisions: chunk " + chunkId
-                                         + " has exceeded " + Short.MAX_VALUE + " collisions.");
-        } else if(numTuples > 1) {
-            // Update number of collisions + max keys per collision
-            reporter.incrCounter(CollisionCounter.NUM_COLLISIONS, 1);
-
-            long numCollisions = reporter.getCounter(CollisionCounter.MAX_COLLISIONS).getCounter();
-            if(numTuples > numCollisions) {
-                reporter.incrCounter(CollisionCounter.MAX_COLLISIONS, numTuples - numCollisions);
-            }
-        }
-
-        // Flush the value
-        valueStream.flush();
-        byte[] value = stream.toByteArray();
-
-        // Start writing to file now
-        // First, if save keys flag set the number of keys
-        if(getSaveKeys()) {
-
-            this.valueFileStream[chunkId].writeShort(numTuples);
-            this.position[chunkId] += ByteUtils.SIZE_OF_SHORT;
-
-            if(this.checkSumDigestValue[chunkId] != null) {
-                this.checkSumDigestValue[chunkId].update(numTuples);
-            }
-        }
-
-        this.valueFileStream[chunkId].write(value);
-        this.position[chunkId] += value.length;
-
-        if(this.checkSumDigestValue[chunkId] != null) {
-            this.checkSumDigestValue[chunkId].update(value);
-        }
-
-        if(this.position[chunkId] < 0)
-            throw new VoldemortException("Chunk overflow exception: chunk " + chunkId
-                                         + " has exceeded " + Integer.MAX_VALUE + " bytes.");
+        writer.write(key, iterator, reporter);
 
     }
 
     @Override
     public void configure(JobConf job) {
-        super.configure(job);
-        try {
-            this.conf = job;
-            this.outputDir = job.get("final.output.dir");
-            this.taskId = job.get("mapred.task.id");
-            this.checkSumType = CheckSum.fromString(job.get("checksum.type"));
-
-            this.checkSumDigestIndex = new CheckSum[getNumChunks()];
-            this.checkSumDigestValue = new CheckSum[getNumChunks()];
-            this.position = new int[getNumChunks()];
-            this.taskIndexFileName = new Path[getNumChunks()];
-            this.taskValueFileName = new Path[getNumChunks()];
-            this.indexFileStream = new DataOutputStream[getNumChunks()];
-            this.valueFileStream = new DataOutputStream[getNumChunks()];
-
-            for(int chunkId = 0; chunkId < getNumChunks(); chunkId++) {
-
-                this.checkSumDigestIndex[chunkId] = CheckSum.getInstance(checkSumType);
-                this.checkSumDigestValue[chunkId] = CheckSum.getInstance(checkSumType);
-                this.position[chunkId] = 0;
-
-                this.taskIndexFileName[chunkId] = new Path(FileOutputFormat.getOutputPath(job),
-                                                           getStoreName() + "."
-                                                                   + Integer.toString(chunkId)
-                                                                   + "_" + this.taskId + ".index");
-                this.taskValueFileName[chunkId] = new Path(FileOutputFormat.getOutputPath(job),
-                                                           getStoreName() + "."
-                                                                   + Integer.toString(chunkId)
-                                                                   + "_" + this.taskId + ".data");
 
-                if(this.fs == null)
-                    this.fs = this.taskIndexFileName[chunkId].getFileSystem(job);
+        try {
 
-                this.indexFileStream[chunkId] = fs.create(this.taskIndexFileName[chunkId]);
-                this.valueFileStream[chunkId] = fs.create(this.taskValueFileName[chunkId]);
+            keyValueWriterClass = job.get("writer.class");
+            if(keyValueWriterClass != null)
+                writer = (KeyValueWriter) Utils.callConstructor(keyValueWriterClass);
+            else
+                writer = new HadoopStoreWriterPerBucket();
 
-                logger.info("Opening " + this.taskIndexFileName[chunkId] + " and "
-                            + this.taskValueFileName[chunkId] + " for writing.");
-            }
+            writer.conf(job);
 
-        } catch(IOException e) {
+        } catch(Exception e) {
             throw new RuntimeException("Failed to open Input/OutputStream", e);
         }
     }
 
     @Override
     public void close() throws IOException {
-
-        for(int chunkId = 0; chunkId < getNumChunks(); chunkId++) {
-            this.indexFileStream[chunkId].close();
-            this.valueFileStream[chunkId].close();
-        }
-
-        if(this.nodeId == -1 || this.partitionId == -1) {
-            // Issue 258 - No data was read in the reduce phase, do not create
-            // any output
-            return;
-        }
-
-        // If the replica type read was not valid, shout out
-        if(getSaveKeys() && this.replicaType == -1) {
-            throw new RuntimeException("Could not read the replica type correctly for node "
-                                       + nodeId + " ( partition - " + this.partitionId + " )");
-        }
-
-        String fileNamePrefix = null;
-        if(getSaveKeys()) {
-            fileNamePrefix = new String(Integer.toString(this.partitionId) + "_"
-                                        + Integer.toString(this.replicaType) + "_");
-        } else {
-            fileNamePrefix = new String(Integer.toString(this.partitionId) + "_");
-        }
-
-        // Initialize the node directory
-        Path nodeDir = new Path(this.outputDir, "node-" + this.nodeId);
-
-        // Create output directory, if it doesn't exist
-        FileSystem outputFs = nodeDir.getFileSystem(this.conf);
-        outputFs.mkdirs(nodeDir);
-
-        // Write the checksum and output files
-        for(int chunkId = 0; chunkId < getNumChunks(); chunkId++) {
-
-            String chunkFileName = fileNamePrefix + Integer.toString(chunkId);
-            if(this.checkSumType != CheckSumType.NONE) {
-
-                if(this.checkSumDigestIndex[chunkId] != null
-                   && this.checkSumDigestValue[chunkId] != null) {
-                    Path checkSumIndexFile = new Path(nodeDir, chunkFileName + ".index.checksum");
-                    Path checkSumValueFile = new Path(nodeDir, chunkFileName + ".data.checksum");
-
-                    FSDataOutputStream output = outputFs.create(checkSumIndexFile);
-                    output.write(this.checkSumDigestIndex[chunkId].getCheckSum());
-                    output.close();
-
-                    output = outputFs.create(checkSumValueFile);
-                    output.write(this.checkSumDigestValue[chunkId].getCheckSum());
-                    output.close();
-                } else {
-                    throw new RuntimeException("Failed to open checksum digest for node " + nodeId
-                                               + " ( partition - " + this.partitionId
-                                               + ", chunk - " + chunkId + " )");
-                }
-            }
-
-            // Generate the final chunk files
-            Path indexFile = new Path(nodeDir, chunkFileName + ".index");
-            Path valueFile = new Path(nodeDir, chunkFileName + ".data");
-
-            logger.info("Moving " + this.taskIndexFileName[chunkId] + " to " + indexFile);
-            fs.rename(taskIndexFileName[chunkId], indexFile);
-            logger.info("Moving " + this.taskValueFileName[chunkId] + " to " + valueFile);
-            fs.rename(this.taskValueFileName[chunkId], valueFile);
-
-        }
-
+        writer.close();
     }
 }
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderUtils.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderUtils.java
index eaf3d438ff..79fb9aac6c 100644
--- a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderUtils.java
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreBuilderUtils.java
@@ -1,3 +1,19 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
 package voldemort.store.readonly.mr;
 
 import java.io.ByteArrayOutputStream;
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreJobRunner.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreJobRunner.java
index 69b0d70250..366f30f5f7 100644
--- a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreJobRunner.java
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HadoopStoreJobRunner.java
@@ -1,3 +1,19 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
 package voldemort.store.readonly.mr;
 
 import java.io.BufferedReader;
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HdfsDataFileChunk.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HdfsDataFileChunk.java
index ae4c6466a6..34b790f074 100644
--- a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HdfsDataFileChunk.java
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/HdfsDataFileChunk.java
@@ -1,3 +1,19 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
 package voldemort.store.readonly.mr;
 
 import java.io.IOException;
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/IdentityJsonMapper.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/IdentityJsonMapper.java
new file mode 100644
index 0000000000..ce623d9b6f
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/IdentityJsonMapper.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import voldemort.store.readonly.mr.serialization.JsonMapper;
+
+public class IdentityJsonMapper extends JsonMapper {
+
+    @Override
+    public void mapObjects(Object key,
+                           Object value,
+                           OutputCollector<Object, Object> output,
+                           Reporter reporter) throws IOException {
+        output.collect(key, value);
+    }
+
+}
\ No newline at end of file
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/IdentityJsonReducer.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/IdentityJsonReducer.java
new file mode 100644
index 0000000000..b6dc13d0d1
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/IdentityJsonReducer.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import voldemort.store.readonly.mr.serialization.JsonReducer;
+
+public class IdentityJsonReducer extends JsonReducer {
+
+    @Override
+    public void reduceObjects(Object key,
+                              Iterator<Object> values,
+                              OutputCollector<Object, Object> collector,
+                              Reporter reporter) throws IOException {
+        while(values.hasNext()) {
+            collector.collect(key, values.next());
+        }
+    }
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/JobState.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/JobState.java
new file mode 100644
index 0000000000..fd01c87135
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/JobState.java
@@ -0,0 +1,92 @@
+package voldemort.store.readonly.mr;
+
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+/**
+ * Captures the job state
+ */
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.List;
+
+import org.apache.hadoop.mapred.JobConf;
+
+import voldemort.VoldemortException;
+import voldemort.cluster.Cluster;
+import voldemort.store.StoreDefinition;
+import voldemort.xml.ClusterMapper;
+import voldemort.xml.StoreDefinitionsMapper;
+
+public class JobState {
+
+    private int numChunks;
+    private Cluster cluster;
+    private StoreDefinition storeDef;
+    private boolean saveKeys;
+    private boolean reducerPerBucket;
+
+    public void configure(JobConf conf) {
+        this.cluster = new ClusterMapper().readCluster(new StringReader(conf.get("cluster.xml")));
+        List<StoreDefinition> storeDefs = new StoreDefinitionsMapper().readStoreList(new StringReader(conf.get("stores.xml")));
+        if(storeDefs.size() != 1)
+            throw new IllegalStateException("Expected to find only a single store, but found multiple!");
+        this.storeDef = storeDefs.get(0);
+
+        this.numChunks = conf.getInt("num.chunks", -1);
+        if(this.numChunks < 1)
+            throw new VoldemortException("num.chunks not specified in the job conf.");
+
+        this.saveKeys = conf.getBoolean("save.keys", false);
+        this.reducerPerBucket = conf.getBoolean("reducer.per.bucket", false);
+    }
+
+    @SuppressWarnings("unused")
+    public void close() throws IOException {}
+
+    public Cluster getCluster() {
+        checkNotNull(cluster);
+        return cluster;
+    }
+
+    public boolean getSaveKeys() {
+        return this.saveKeys;
+    }
+
+    public boolean getReducerPerBucket() {
+        return this.reducerPerBucket;
+    }
+
+    public StoreDefinition getStoreDef() {
+        checkNotNull(storeDef);
+        return storeDef;
+    }
+
+    public String getStoreName() {
+        checkNotNull(storeDef);
+        return storeDef.getName();
+    }
+
+    private final void checkNotNull(Object o) {
+        if(o == null)
+            throw new VoldemortException("Not configured yet!");
+    }
+
+    public int getNumChunks() {
+        return this.numChunks;
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/VoldemortStoreBuilderMapper.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/VoldemortStoreBuilderMapper.java
new file mode 100644
index 0000000000..d074ca41e6
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/VoldemortStoreBuilderMapper.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr;
+
+import java.util.Map;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+
+import voldemort.serialization.json.JsonTypeSerializer;
+import voldemort.store.readonly.mr.azkaban.StoreBuilderTransformation;
+import voldemort.store.readonly.mr.utils.HadoopUtils;
+import azkaban.common.utils.Props;
+import azkaban.common.utils.Utils;
+
+public class VoldemortStoreBuilderMapper extends AbstractHadoopStoreBuilderMapper<Object, Object> {
+
+    private String _keySelection;
+    private String _valSelection;
+    private JsonTypeSerializer _inputKeySerializer;
+    private JsonTypeSerializer _inputValueSerializer;
+    private StoreBuilderTransformation _keyTrans;
+    private StoreBuilderTransformation _valTrans;
+
+    @Override
+    public Object makeKey(Object key, Object value) {
+        return makeResult((BytesWritable) key, _inputKeySerializer, _keySelection, _keyTrans);
+    }
+
+    @Override
+    public Object makeValue(Object key, Object value) {
+        return makeResult((BytesWritable) value, _inputValueSerializer, _valSelection, _valTrans);
+    }
+
+    private Object makeResult(BytesWritable writable,
+                              JsonTypeSerializer serializer,
+                              String selection,
+                              StoreBuilderTransformation trans) {
+        Object obj = serializer.toObject(writable.get());
+        if(selection != null) {
+            Map m = (Map) obj;
+            obj = m.get(selection);
+        }
+
+        if(trans != null)
+            obj = trans.transform(obj);
+
+        return obj;
+    }
+
+    @Override
+    public void configure(JobConf conf) {
+        super.configure(conf);
+        Props props = HadoopUtils.getPropsFromJob(conf);
+
+        _keySelection = props.getString("key.selection", null);
+        _valSelection = props.getString("value.selection", null);
+        _inputKeySerializer = getSchemaFromJob(conf, "mapper.input.key.schema");
+        _inputValueSerializer = getSchemaFromJob(conf, "mapper.input.value.schema");
+        String _keyTransClass = props.getString("key.transformation.class", null);
+        String _valueTransClass = props.getString("value.transformation.class", null);
+
+        if(_keyTransClass != null)
+            _keyTrans = (StoreBuilderTransformation) Utils.callConstructor(_keyTransClass);
+        if(_valueTransClass != null)
+            _valTrans = (StoreBuilderTransformation) Utils.callConstructor(_valueTransClass);
+    }
+
+    protected JsonTypeSerializer getSchemaFromJob(JobConf conf, String key) {
+        if(conf.get(key) == null)
+            throw new IllegalArgumentException("Missing required parameter '" + key + "' on job.");
+        return new JsonTypeSerializer(conf.get(key));
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/AbstractHadoopJob.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/AbstractHadoopJob.java
new file mode 100644
index 0000000000..bcd0170e9f
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/AbstractHadoopJob.java
@@ -0,0 +1,284 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.azkaban;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.filecache.DistributedCache;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.Counters.Counter;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.RunningJob;
+import org.joda.time.DateTime;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+import voldemort.store.readonly.mr.IdentityJsonReducer;
+import voldemort.store.readonly.mr.utils.HadoopUtils;
+import azkaban.common.jobs.AbstractJob;
+import azkaban.common.utils.Props;
+
+/**
+ * An abstract Base class for Hadoop Jobs
+ * 
+ * @author bbansal
+ * 
+ */
+public abstract class AbstractHadoopJob extends AbstractJob {
+
+    public static String COMMON_FILE_DATE_PATTERN = "yyyy-MM-dd-HH-mm";
+    public static final String HADOOP_PREFIX = "hadoop-conf.";
+    public static final String LATEST_SUFFIX = "#LATEST";
+    public static final String CURRENT_SUFFIX = "#CURRENT";
+    private final Props _props;
+    private RunningJob _runningJob;
+
+    public AbstractHadoopJob(String name, Props props) {
+        super(name);
+        this._props = props;
+    }
+
+    public void run(JobConf conf) throws Exception {
+        _runningJob = new JobClient(conf).submitJob(conf);
+        info("See " + _runningJob.getTrackingURL() + " for details.");
+        _runningJob.waitForCompletion();
+
+        if(!_runningJob.isSuccessful()) {
+            throw new Exception("Hadoop job:" + getId() + " failed!");
+        }
+
+        // dump all counters
+        Counters counters = _runningJob.getCounters();
+        for(String groupName: counters.getGroupNames()) {
+            Counters.Group group = counters.getGroup(groupName);
+            info("Group: " + group.getDisplayName());
+            for(Counter counter: group)
+                info(counter.getDisplayName() + ":\t" + counter.getValue());
+        }
+    }
+
+    public JobConf createJobConf(Class<? extends Mapper> mapperClass) throws IOException,
+            URISyntaxException {
+        JobConf conf = createJobConf(mapperClass, IdentityJsonReducer.class);
+        conf.setNumReduceTasks(0);
+
+        return conf;
+    }
+
+    public JobConf createJobConf(Class<? extends Mapper> mapperClass,
+                                 Class<? extends Reducer> reducerClass,
+                                 Class<? extends Reducer> combinerClass) throws IOException,
+            URISyntaxException {
+        JobConf conf = createJobConf(mapperClass, reducerClass);
+        conf.setCombinerClass(combinerClass);
+
+        return conf;
+    }
+
+    public JobConf createJobConf(Class<? extends Mapper> mapperClass,
+                                 Class<? extends Reducer> reducerClass) throws IOException,
+            URISyntaxException {
+        JobConf conf = new JobConf();
+        // set custom class loader with custom find resource strategy.
+
+        conf.setJobName(getId());
+        conf.setMapperClass(mapperClass);
+        conf.setReducerClass(reducerClass);
+
+        String hadoop_ugi = _props.getString("hadoop.job.ugi", null);
+        if(hadoop_ugi != null) {
+            conf.set("hadoop.job.ugi", hadoop_ugi);
+        }
+
+        if(_props.getBoolean("is.local", false)) {
+            conf.set("mapred.job.tracker", "local");
+            conf.set("fs.default.name", "file:///");
+            conf.set("mapred.local.dir", "/tmp/map-red");
+
+            info("Running locally, no hadoop jar set.");
+        } else {
+            setClassLoaderAndJar(conf, getClass());
+            info("Setting hadoop jar file for class:" + getClass() + "  to " + conf.getJar());
+            info("*************************************************************************");
+            info("          Running on Real Hadoop Cluster(" + conf.get("mapred.job.tracker")
+                 + ")           ");
+            info("*************************************************************************");
+        }
+
+        // set JVM options if present
+        if(_props.containsKey("mapred.child.java.opts")) {
+            conf.set("mapred.child.java.opts", _props.getString("mapred.child.java.opts"));
+            info("mapred.child.java.opts set to " + _props.getString("mapred.child.java.opts"));
+        }
+
+        // set input and output paths if they are present
+        if(_props.containsKey("input.paths")) {
+            List<String> inputPaths = _props.getStringList("input.paths");
+            if(inputPaths.size() == 0)
+                throw new IllegalArgumentException("Must specify at least one value for property 'input.paths'");
+            for(String path: inputPaths) {
+                // Implied stuff, but good implied stuff
+                if(path.endsWith(LATEST_SUFFIX)) {
+                    FileSystem fs = FileSystem.get(conf);
+
+                    PathFilter filter = new PathFilter() {
+
+                        @Override
+                        public boolean accept(Path arg0) {
+                            return !arg0.getName().startsWith("_")
+                                   && !arg0.getName().startsWith(".");
+                        }
+                    };
+
+                    String latestPath = path.substring(0, path.length() - LATEST_SUFFIX.length());
+                    FileStatus[] statuses = fs.listStatus(new Path(latestPath), filter);
+
+                    Arrays.sort(statuses);
+
+                    path = statuses[statuses.length - 1].getPath().toString();
+                    System.out.println("Using latest folder: " + path);
+                }
+                HadoopUtils.addAllSubPaths(conf, new Path(path));
+            }
+        }
+
+        if(_props.containsKey("output.path")) {
+            String location = _props.get("output.path");
+            if(location.endsWith("#CURRENT")) {
+                DateTimeFormatter format = DateTimeFormat.forPattern(COMMON_FILE_DATE_PATTERN);
+                String destPath = format.print(new DateTime());
+                location = location.substring(0, location.length() - "#CURRENT".length())
+                           + destPath;
+                System.out.println("Store location set to " + location);
+            }
+
+            FileOutputFormat.setOutputPath(conf, new Path(location));
+            // For testing purpose only remove output file if exists
+            if(_props.getBoolean("force.output.overwrite", false)) {
+                FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
+                fs.delete(FileOutputFormat.getOutputPath(conf), true);
+            }
+        }
+
+        // Adds External jars to hadoop classpath
+        String externalJarList = _props.getString("hadoop.external.jarFiles", null);
+        if(externalJarList != null) {
+            String[] jarFiles = externalJarList.split(",");
+            for(String jarFile: jarFiles) {
+                info("Adding extenral jar File:" + jarFile);
+                DistributedCache.addFileToClassPath(new Path(jarFile), conf);
+            }
+        }
+
+        // Adds distributed cache files
+        String cacheFileList = _props.getString("hadoop.cache.files", null);
+        if(cacheFileList != null) {
+            String[] cacheFiles = cacheFileList.split(",");
+            for(String cacheFile: cacheFiles) {
+                info("Adding Distributed Cache File:" + cacheFile);
+                DistributedCache.addCacheFile(new URI(cacheFile), conf);
+            }
+        }
+
+        // Adds distributed cache files
+        String archiveFileList = _props.getString("hadoop.cache.archives", null);
+        if(archiveFileList != null) {
+            String[] archiveFiles = archiveFileList.split(",");
+            for(String archiveFile: archiveFiles) {
+                info("Adding Distributed Cache Archive File:" + archiveFile);
+                DistributedCache.addCacheArchive(new URI(archiveFile), conf);
+            }
+        }
+
+        String hadoopCacheJarDir = _props.getString("hdfs.default.classpath.dir", null);
+        if(hadoopCacheJarDir != null) {
+            FileSystem fs = FileSystem.get(conf);
+            if(fs != null) {
+                FileStatus[] status = fs.listStatus(new Path(hadoopCacheJarDir));
+
+                if(status != null) {
+                    for(int i = 0; i < status.length; ++i) {
+                        if(!status[i].isDir()) {
+                            Path path = new Path(hadoopCacheJarDir, status[i].getPath().getName());
+                            info("Adding Jar to Distributed Cache Archive File:" + path);
+
+                            DistributedCache.addFileToClassPath(path, conf);
+                        }
+                    }
+                } else {
+                    info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " is empty.");
+                }
+            } else {
+                info("hdfs.default.classpath.dir " + hadoopCacheJarDir
+                     + " filesystem doesn't exist");
+            }
+        }
+
+        // May want to add this to HadoopUtils, but will await refactoring
+        for(String key: getProps().keySet()) {
+            String lowerCase = key.toLowerCase();
+            if(lowerCase.startsWith(HADOOP_PREFIX)) {
+                String newKey = key.substring(HADOOP_PREFIX.length());
+                conf.set(newKey, getProps().get(key));
+            }
+        }
+
+        HadoopUtils.setPropsInJob(conf, getProps());
+        return conf;
+    }
+
+    public Props getProps() {
+        return this._props;
+    }
+
+    public void cancel() throws Exception {
+        if(_runningJob != null)
+            _runningJob.killJob();
+    }
+
+    public double getProgress() throws IOException {
+        if(_runningJob == null)
+            return 0.0;
+        else
+            return (double) (_runningJob.mapProgress() + _runningJob.reduceProgress()) / 2.0d;
+    }
+
+    public Counters getCounters() throws IOException {
+        return _runningJob.getCounters();
+    }
+
+    public static void setClassLoaderAndJar(JobConf conf, Class jobClass) {
+        conf.setClassLoader(Thread.currentThread().getContextClassLoader());
+        String jar = HadoopUtils.findContainingJar(jobClass, Thread.currentThread()
+                                                                   .getContextClassLoader());
+        if(jar != null) {
+            conf.setJar(jar);
+        }
+    }
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/AbstractVoldemortBatchCopyJob.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/AbstractVoldemortBatchCopyJob.java
new file mode 100644
index 0000000000..3bf5af72d4
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/AbstractVoldemortBatchCopyJob.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.azkaban;
+
+import java.io.IOException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+
+import voldemort.cluster.Cluster;
+import voldemort.cluster.Node;
+import voldemort.store.readonly.mr.utils.HadoopUtils;
+import azkaban.common.jobs.AbstractJob;
+import azkaban.common.utils.Props;
+
+/**
+ * A test job that throws an exception
+ * 
+ * @author bbansal Required Properties
+ *         <ul>
+ *         <li>voldemort.cluster.file</li>
+ *         <li>voldemort.store.name</li>
+ *         <li>input.path</li>
+ *         <li>dest.path</li>
+ *         <li>source.host</li>
+ *         <li>dest.host</li>
+ *         </ul>
+ */
+public abstract class AbstractVoldemortBatchCopyJob extends AbstractJob {
+
+    private final Props _props;
+
+    public AbstractVoldemortBatchCopyJob(String name, Props props) throws IOException {
+        super(name);
+        _props = props;
+    }
+
+    public void run() throws Exception {
+        JobConf conf = new JobConf();
+        HadoopUtils.copyInAllProps(_props, conf);
+
+        Cluster cluster = HadoopUtils.readCluster(_props.get("voldemort.cluster.file"), conf);
+        final String storeName = _props.get("voldemort.store.name");
+        final Path inputDir = new Path(_props.get("input.path"));
+
+        ExecutorService executors = Executors.newFixedThreadPool(cluster.getNumberOfNodes());
+        final Semaphore semaphore = new Semaphore(0, false);
+        final AtomicInteger countSuccess = new AtomicInteger(0);
+        final boolean[] succeeded = new boolean[cluster.getNumberOfNodes()];
+        final String destinationDir = _props.get("dest.path");
+        final String sourceHost = _props.getString("src.host", "localhost");
+
+        for(final Node node: cluster.getNodes()) {
+
+            executors.execute(new Runnable() {
+
+                public void run() {
+                    int id = node.getId();
+                    String indexFile = inputDir + "/" + storeName + ".index" + "_"
+                                       + Integer.toString(id);
+                    String dataFile = inputDir + "/" + storeName + ".data" + "_"
+                                      + Integer.toString(id);
+
+                    String host = node.getHost();
+                    try {
+                        // copyFileToLocal(sourceHost,
+                        // indexFile,
+                        // host,
+                        // VoldemortSwapperUtils.getIndexDestinationFile(node.getId(),
+                        // destinationDir));
+                        // copyFileToLocal(sourceHost,
+                        // dataFile,
+                        // host,
+                        // VoldemortSwapperUtils.getDataDestinationFile(node.getId(),
+                        // destinationDir));
+
+                        succeeded[node.getId()] = true;
+                        countSuccess.incrementAndGet();
+                    } catch(Exception e) {
+                        error("copy to Remote node failed for node:" + node.getId(), e);
+                    }
+
+                    semaphore.release();
+                }
+            });
+        }
+
+        // wait for all operations to complete
+        semaphore.acquire(cluster.getNumberOfNodes());
+
+        try {
+            if(countSuccess.get() == cluster.getNumberOfNodes()
+               || _props.getBoolean("swap.partial.index", false)) {
+                int counter = 0;
+                // lets try to swap only the successful nodes
+                for(Node node: cluster.getNodes()) {
+                    // data refresh succeeded
+                    if(succeeded[node.getId()]) {
+                        VoldemortSwapperUtils.doSwap(storeName, node, destinationDir);
+                        counter++;
+                    }
+                }
+                info(counter + " node out of " + cluster.getNumberOfNodes()
+                     + " refreshed with fresh index/data for store '" + storeName + "'");
+            } else {
+                error("Failed to copy Index Files for the entire cluster.");
+            }
+        } finally {
+            // stop all executors Now
+            executors.shutdown();
+        }
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/Job.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/Job.java
new file mode 100644
index 0000000000..57dc73d006
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/Job.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.azkaban;
+
+import java.util.Properties;
+
+/**
+ * This interface defines a Raw Job interface. Each job defines
+ * <ul>
+ * <li>Job Type : {HADOOP, UNIX, JAVA, SUCCESS_TEST, CONTROLLER}</li>
+ * <li>Job ID/Name : {String}</li>
+ * <li>Arguments: Key/Value Map for Strings</li>
+ * </ul>
+ * 
+ * A job is required to have a constructor Job(String jobId, Props props)
+ */
+
+public interface Job {
+
+    /**
+     * Returns a unique(should be checked in xml) string name/id for the Job.
+     * 
+     * @return
+     */
+    public String getId();
+
+    /**
+     * Run the job. In general this method can only be run once. Must either
+     * succeed or throw an exception.
+     */
+    public void run() throws Exception;
+
+    /**
+     * Best effort attempt to cancel the job.
+     * 
+     * @throws Exception If cancel fails
+     */
+    public void cancel() throws Exception;
+
+    /**
+     * Returns a progress report between [0 - 1.0] to indicate the percentage
+     * complete
+     * 
+     * @throws Exception If getting progress fails
+     */
+    public double getProgress() throws Exception;
+
+    /**
+     * Get the generated properties from this job.
+     * 
+     * @return
+     */
+    public Properties getJobGeneratedProperties();
+
+    /**
+     * Determine if the job was cancelled.
+     * 
+     * @return
+     */
+    public boolean isCanceled();
+}
\ No newline at end of file
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/StoreBuilderTransformation.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/StoreBuilderTransformation.java
new file mode 100644
index 0000000000..f496ab4cd6
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/StoreBuilderTransformation.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.azkaban;
+
+/**
+ * An interface to use for processing rows in the voldemort store builder
+ * 
+ * @author jkreps
+ * 
+ */
+public interface StoreBuilderTransformation {
+
+    public Object transform(Object obj);
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/UndefinedPropertyException.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/UndefinedPropertyException.java
new file mode 100644
index 0000000000..a3b6e7cc23
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/UndefinedPropertyException.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.azkaban;
+
+public class UndefinedPropertyException extends RuntimeException {
+
+    private static final long serialVersionUID = 1;
+
+    public UndefinedPropertyException(String message) {
+        super(message);
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortBatchIndexJob.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortBatchIndexJob.java
new file mode 100644
index 0000000000..245887651f
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortBatchIndexJob.java
@@ -0,0 +1,421 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.azkaban;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.lib.HashPartitioner;
+import org.apache.log4j.Logger;
+
+import voldemort.cluster.Cluster;
+import voldemort.cluster.Node;
+import voldemort.routing.ConsistentRoutingStrategy;
+import voldemort.serialization.DefaultSerializerFactory;
+import voldemort.serialization.Serializer;
+import voldemort.store.StoreDefinition;
+import voldemort.store.readonly.mr.serialization.JsonConfigurable;
+import voldemort.store.readonly.mr.utils.HadoopUtils;
+import voldemort.utils.ByteUtils;
+import azkaban.common.utils.Props;
+
+/**
+ * Creates Index and value files using Voldemort hash keys for easy batch
+ * update.
+ * <p>
+ * Creates two files
+ * <ul>
+ * <li>Index File: Keeps the position Index for each key sorted by MD5(key)
+ * chunk-size is KEY_HASH_SIZE(16 bytes) + POSITION_SIZE(8 bytes)</li>
+ * <li>Values file: saves the value corrosponding to the Key MD5</li>
+ * <ul>
+ * <p>
+ * Required Properties
+ * <ul>
+ * <li>voldemort.cluster.file</li>
+ * <li>voldemort.storedef.file</li>
+ * <li>voldemort.store.name</li>
+ * <li>voldemort.store.version</li>
+ * <li>input.data.check.percent</li>
+ * </ul>
+ * 
+ * @author bbansal
+ * 
+ * @deprecated Use {@link VoldemortStoreBuilderJob} instead.
+ * 
+ */
+@Deprecated
+public class VoldemortBatchIndexJob extends AbstractHadoopJob {
+
+    private Cluster _cluster = null;
+
+    private static Logger logger = Logger.getLogger(VoldemortStoreBuilderJob.class);
+
+    public VoldemortBatchIndexJob(String name, Props props) throws FileNotFoundException {
+        super(name, props);
+    }
+
+    /**
+     * @deprecated use
+     *             {@link VoldemortStoreBuilderJob#execute(String, String, String, String, int)}
+     *             the parameter voldemort store version is deprecated and no
+     *             longer used. Version is read from the store definition
+     *             istead.
+     *             <p>
+     * @param voldemortClusterLocalFile
+     * @param storeName
+     * @param inputPath
+     * @param outputPath
+     * @param voldemortStoreVersion
+     * @param voldemortCheckDataPercent
+     * @throws IOException
+     * @throws URISyntaxException
+     */
+    @Deprecated
+    public void execute(String voldemortClusterLocalFile,
+                        String storeName,
+                        String inputPath,
+                        String outputPath,
+                        int voldemortStoreVersion,
+                        int voldemortCheckDataPercent) throws IOException, URISyntaxException {
+        execute(voldemortClusterLocalFile,
+                storeName,
+                inputPath,
+                outputPath,
+                voldemortCheckDataPercent);
+    }
+
+    /**
+     * Method to allow this process to be a instance call from another Job.
+     * 
+     * @storeName to dump the value
+     * @inputFile to generate the VFILE
+     * 
+     * 
+     */
+    public void execute(String voldemortClusterLocalFile,
+                        String storeName,
+                        String inputPath,
+                        String outputPath,
+                        int voldemortCheckDataPercent) throws IOException, URISyntaxException {
+        JobConf conf = createJobConf(VoldemortBatchIndexMapper.class,
+                                     VoldemortBatchIndexReducer.class);
+
+        try {
+            // get the voldemort cluster definition
+            // We need to use cluster.xml here where it not yet localized by
+            // TaskRunner
+            _cluster = HadoopUtils.readCluster(voldemortClusterLocalFile, conf);
+        } catch(Exception e) {
+            logger.error("Failed to read Voldemort cluster details", e);
+            throw new RuntimeException("", e);
+        }
+
+        // set the partitioner
+        conf.setPartitionerClass(VoldemortBatchIndexPartitoner.class);
+        conf.setNumReduceTasks(_cluster.getNumberOfNodes());
+
+        // Blow Away the O/p if force.overwirte is available
+
+        FileInputFormat.setInputPaths(conf, inputPath);
+
+        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+
+        if(getProps().getBoolean("force.output.overwrite", false)) {
+            FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
+            fs.delete(FileOutputFormat.getOutputPath(conf), true);
+        }
+
+        conf.setInputFormat(SequenceFileInputFormat.class);
+        conf.setOutputFormat(SequenceFileOutputFormat.class);
+        conf.setMapOutputKeyClass(BytesWritable.class);
+        conf.setMapOutputValueClass(BytesWritable.class);
+        conf.setOutputKeyClass(BytesWritable.class);
+        conf.setOutputValueClass(BytesWritable.class);
+
+        conf.setNumReduceTasks(_cluster.getNumberOfNodes());
+
+        // get the store information
+
+        conf.setStrings("voldemort.index.filename", storeName + ".index");
+        conf.setStrings("voldemort.data.filename", storeName + ".data");
+        conf.setInt("input.data.check.percent", voldemortCheckDataPercent);
+        conf.setStrings("voldemort.store.name", storeName);
+
+        // run(conf);
+        JobClient.runJob(conf);
+
+    }
+
+    @Override
+    public void run() throws Exception {
+
+        execute(getProps().get("voldemort.cluster.file"),
+                getProps().get("voldemort.store.name"),
+                getProps().get("input.path"),
+                getProps().get("output.path"),
+                getProps().getInt("input.data.check.percent", 0));
+
+    }
+
+    /**
+     * TODO HIGH : Doesnot check with Voldemort schema should validate the
+     * voldemort schema before writing.
+     * 
+     * @author bbansal
+     * 
+     */
+    public static class VoldemortBatchIndexMapper extends JsonConfigurable implements
+            Mapper<BytesWritable, BytesWritable, BytesWritable, BytesWritable> {
+
+        private static Logger logger = Logger.getLogger(VoldemortBatchIndexMapper.class);
+        private Cluster _cluster = null;
+        private StoreDefinition _storeDef = null;
+        private ConsistentRoutingStrategy _routingStrategy = null;
+        private Serializer _keySerializer;
+        private Serializer _valueSerializer;
+        private int _checkPercent;
+        private int _version;
+
+        public void map(BytesWritable key,
+                        BytesWritable value,
+                        OutputCollector<BytesWritable, BytesWritable> output,
+                        Reporter reporter) throws IOException {
+            byte[] keyBytes = ByteUtils.copy(key.get(), 0, key.getSize());
+            byte[] valBytes = ByteUtils.copy(value.get(), 0, value.getSize());
+
+            ByteArrayOutputStream versionedKey = new ByteArrayOutputStream();
+            DataOutputStream keyDin = new DataOutputStream(versionedKey);
+            keyDin.write(_version);
+            keyDin.write(keyBytes);
+            keyDin.close();
+
+            if(logger.isDebugEnabled()) {
+                logger.debug("Original key: size:" + versionedKey.toByteArray().length + " val:"
+                             + ByteUtils.toHexString(versionedKey.toByteArray()));
+                logger.debug("MD5 val: size:" + ByteUtils.md5(versionedKey.toByteArray()).length
+                             + " val:"
+                             + ByteUtils.toHexString(ByteUtils.md5(versionedKey.toByteArray())));
+                logger.debug(" value bytes:" + value.getSize() + " ["
+                             + ByteUtils.toHexString(valBytes) + "]");
+            }
+
+            List<Node> nodes = _routingStrategy.routeRequest(keyBytes);
+            for(Node node: nodes) {
+                ByteArrayOutputStream versionedValue = new ByteArrayOutputStream();
+                DataOutputStream valueDin = new DataOutputStream(versionedValue);
+                valueDin.writeInt(node.getId());
+                valueDin.write(_version);
+                valueDin.write(valBytes);
+                valueDin.close();
+
+                // check input
+                if(Math.ceil(Math.random() * 100.0) < _checkPercent) {
+                    checkJsonType(versionedKey.toByteArray(),
+                                  ByteUtils.copy(versionedValue.toByteArray(),
+                                                 4,
+                                                 versionedValue.size()));
+                }
+
+                BytesWritable outputKey = new BytesWritable(ByteUtils.md5(versionedKey.toByteArray()));
+                BytesWritable outputVal = new BytesWritable(versionedValue.toByteArray());
+
+                output.collect(outputKey, outputVal);
+            }
+        }
+
+        public void checkJsonType(byte[] key, byte[] value) {
+            try {
+                _keySerializer.toObject(key);
+                _valueSerializer.toObject(value);
+            } catch(Exception e) {
+                throw new RuntimeException("Failed to Serialize key/Value check data and config schema.",
+                                           e);
+            }
+        }
+
+        public void configure(JobConf conf) {
+            Props props = HadoopUtils.getPropsFromJob(conf);
+
+            // get the voldemort cluster.xml and store.xml files.
+            try {
+                _cluster = HadoopUtils.readCluster(props.get("voldemort.cluster.file"), conf);
+                _storeDef = HadoopUtils.readStoreDef(props.get("voldemort.store.file"),
+                                                     props.get("voldemort.store.name"),
+                                                     conf);
+
+                _checkPercent = conf.getInt("input.data.check.percent", 0);
+                _routingStrategy = new ConsistentRoutingStrategy(_cluster.getNodes(),
+                                                                 _storeDef.getReplicationFactor());
+                _keySerializer = (Serializer<Object>) new DefaultSerializerFactory().getSerializer(_storeDef.getKeySerializer());
+                _valueSerializer = (Serializer<Object>) new DefaultSerializerFactory().getSerializer(_storeDef.getValueSerializer());
+
+                _version = _storeDef.getKeySerializer().getCurrentSchemaVersion();
+                _routingStrategy = new ConsistentRoutingStrategy(_cluster.getNodes(),
+                                                                 _storeDef.getReplicationFactor());
+
+                if(_routingStrategy == null) {
+                    throw new RuntimeException("Failed to create routing strategy");
+                }
+            } catch(Exception e) {
+                logger.error("Failed to read Voldemort cluster/storeDef details", e);
+                throw new RuntimeException("", e);
+            }
+        }
+    }
+
+    public static class VoldemortBatchIndexPartitoner extends
+            HashPartitioner<BytesWritable, BytesWritable> {
+
+        @Override
+        public int getPartition(BytesWritable key, BytesWritable value, int numReduceTasks) {
+            // The partition id is first 4 bytes in the value.
+            DataInputStream buffer = new DataInputStream(new ByteArrayInputStream(value.get()));
+            int nodeId = -2;
+            try {
+                nodeId = buffer.readInt();
+            } catch(IOException e) {
+                throw new RuntimeException("Failed to parse nodeId from buffer.", e);
+            }
+            return (nodeId) % numReduceTasks;
+        }
+    }
+
+    public static class VoldemortBatchIndexReducer implements
+            Reducer<BytesWritable, BytesWritable, Text, Text> {
+
+        private DataOutputStream _indexFileStream = null;
+        private DataOutputStream _valueFileStream = null;
+
+        private long _position = 0;
+
+        private JobConf _conf = null;
+        private String _taskId = "dummy";
+        private int _nodeId = -1;
+
+        String indexFileName;
+        String dataFileName;
+        Path taskIndexFileName;
+        Path taskValueFileName;
+        String storeName;
+
+        /**
+         * Reduce should get sorted MD5 keys here with a single value (appended
+         * in begining with 4 bits of nodeId)
+         */
+        public void reduce(BytesWritable key,
+                           Iterator<BytesWritable> values,
+                           OutputCollector<Text, Text> output,
+                           Reporter reporter) throws IOException {
+            byte[] keyBytes = ByteUtils.copy(key.get(), 0, key.getSize());
+
+            while(values.hasNext()) {
+                BytesWritable value = values.next();
+                byte[] valBytes = ByteUtils.copy(value.get(), 0, value.getSize());
+
+                if(_nodeId == -1) {
+                    DataInputStream buffer = new DataInputStream(new ByteArrayInputStream(valBytes));
+                    _nodeId = buffer.readInt();
+                }
+                // strip first 4 bytes from value here added in mapper for
+                // partitioner
+                // convenience.
+                byte[] value1 = ByteUtils.copy(valBytes, 4, valBytes.length);
+
+                if(logger.isDebugEnabled()) {
+                    logger.debug("Reduce Original key: size:" + keyBytes.length + " val:"
+                                 + ByteUtils.toHexString(keyBytes));
+                    logger.debug("Reduce value bytes:" + value1.length + " ["
+                                 + ByteUtils.toHexString(value1) + "]");
+                }
+
+                // Write Index Key/ position
+                _indexFileStream.write(keyBytes);
+                _indexFileStream.writeLong(_position);
+
+                _valueFileStream.writeInt(value1.length);
+                _valueFileStream.write(value1);
+                _position += value1.length + 4;
+
+                if(_position < 0) {
+                    logger.error("Position bigger than Integer size, split input files.");
+                    System.exit(1);
+                }
+            }
+
+        }
+
+        public void configure(JobConf job) {
+            Props props = HadoopUtils.getPropsFromJob(job);
+
+            try {
+                _position = 0;
+                _conf = job;
+
+                _taskId = job.get("mapred.task.id");
+
+                storeName = props.get("voldemort.store.name");
+                taskIndexFileName = new Path(FileOutputFormat.getOutputPath(_conf),
+                                             _conf.get("voldemort.index.filename") + "_" + _taskId);
+                taskValueFileName = new Path(FileOutputFormat.getOutputPath(_conf),
+                                             _conf.get("voldemort.data.filename") + "_" + _taskId);
+
+                FileSystem fs = taskIndexFileName.getFileSystem(job);
+
+                _indexFileStream = fs.create(taskIndexFileName, (short) 1);
+                _valueFileStream = fs.create(taskValueFileName, (short) 1);
+            } catch(IOException e) {
+                throw new RuntimeException("Failed to open Input/OutputStream", e);
+            }
+        }
+
+        public void close() throws IOException {
+
+            _indexFileStream.close();
+            _valueFileStream.close();
+
+            Path hdfsIndexFile = new Path(FileOutputFormat.getOutputPath(_conf), _nodeId + ".index");
+            Path hdfsValueFile = new Path(FileOutputFormat.getOutputPath(_conf), _nodeId + ".data");
+
+            FileSystem fs = hdfsIndexFile.getFileSystem(_conf);
+            fs.rename(taskIndexFileName, hdfsIndexFile);
+            fs.rename(taskValueFileName, hdfsValueFile);
+        }
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortBuildAndPushJob.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortBuildAndPushJob.java
new file mode 100644
index 0000000000..eb87c77ac1
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortBuildAndPushJob.java
@@ -0,0 +1,886 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.azkaban;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.HttpURLConnection;
+import java.net.URI;
+import java.net.URL;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.log4j.Logger;
+
+import voldemort.client.protocol.admin.AdminClient;
+import voldemort.client.protocol.admin.AdminClientConfig;
+import voldemort.cluster.Cluster;
+import voldemort.serialization.SerializerDefinition;
+import voldemort.serialization.json.JsonTypeDefinition;
+import voldemort.store.StoreDefinition;
+import voldemort.store.readonly.checksum.CheckSum;
+import voldemort.store.readonly.checksum.CheckSum.CheckSumType;
+import voldemort.store.readonly.mr.azkaban.VoldemortStoreBuilderJob.VoldemortStoreBuilderConf;
+import voldemort.store.readonly.mr.azkaban.VoldemortSwapJob.VoldemortSwapConf;
+import voldemort.store.readonly.mr.utils.AvroUtils;
+import voldemort.store.readonly.mr.utils.HadoopUtils;
+import voldemort.store.readonly.mr.utils.JsonSchema;
+import voldemort.store.readonly.mr.utils.VoldemortUtils;
+import voldemort.utils.Utils;
+import azkaban.common.jobs.AbstractJob;
+import azkaban.common.utils.Props;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+public class VoldemortBuildAndPushJob extends AbstractJob {
+
+    private final Logger log;
+
+    private final Props props;
+
+    private Cluster cluster;
+
+    private List<StoreDefinition> storeDefs;
+
+    private final String storeName;
+
+    private final List<String> clusterUrl;
+
+    private final int nodeId;
+
+    private final List<String> dataDirs;
+
+    // Reads from properties to check if this takes Avro input
+    private final boolean isAvroJob;
+
+    private final String keyField;
+
+    private final String valueField;
+
+    private final boolean isAvroVersioned;
+
+    private static final String AVRO_GENERIC_TYPE_NAME = "avro-generic";
+
+    // New serialization types for avro versioning support
+    // We cannot change existing serializer classes since
+    // this will break existing clients while looking for the version byte
+
+    private static final String AVRO_GENERIC_VERSIONED_TYPE_NAME = "avro-generic-versioned";
+
+    /* Informed stuff */
+    private final String informedURL = "http://informed.corp.linkedin.com/_post";
+    private final List<Future> informedResults;
+    private ExecutorService informedExecutor;
+
+    public VoldemortBuildAndPushJob(String name, Props props) {
+        super(name);
+        this.props = props;
+        this.storeName = props.getString("push.store.name").trim();
+        this.clusterUrl = new ArrayList<String>();
+        this.dataDirs = new ArrayList<String>();
+
+        String clusterUrlText = props.getString("push.cluster");
+        for(String url: Utils.COMMA_SEP.split(clusterUrlText.trim()))
+            if(url.trim().length() > 0)
+                this.clusterUrl.add(url);
+
+        if(clusterUrl.size() <= 0)
+            throw new RuntimeException("Number of urls should be atleast 1");
+
+        // Support multiple output dirs if the user mentions only "push", no
+        // "build".
+        // If user mentions both then should have only one
+        String dataDirText = props.getString("build.output.dir");
+        for(String dataDir: Utils.COMMA_SEP.split(dataDirText.trim()))
+            if(dataDir.trim().length() > 0)
+                this.dataDirs.add(dataDir);
+
+        if(dataDirs.size() <= 0)
+            throw new RuntimeException("Number of data dirs should be atleast 1");
+
+        this.nodeId = props.getInt("push.node", 0);
+        this.log = Logger.getLogger(name);
+        this.informedResults = Lists.newArrayList();
+        this.informedExecutor = Executors.newFixedThreadPool(2);
+
+        isAvroJob = props.getBoolean("build.type.avro", false);
+
+        // Set default to false
+        // this ensures existing clients who are not aware of the new serializer
+        // type dont bail out
+        isAvroVersioned = props.getBoolean("avro.serializer.versioned", false);
+
+        keyField = props.getString("avro.key.field", null);
+
+        valueField = props.getString("avro.value.field", null);
+
+        if(isAvroJob) {
+            if(keyField == null)
+                throw new RuntimeException("The key field must be specified in the properties for the Avro build and push job!");
+
+            if(valueField == null)
+                throw new RuntimeException("The value field must be specified in the properties for the Avro build and push job!");
+
+        }
+
+    }
+
+    @Override
+    public void run() throws Exception {
+        boolean build = props.getBoolean("build", true);
+        boolean push = props.getBoolean("push", true);
+
+        if(build && push && dataDirs.size() != 1) {
+            // Should have only one data directory ( which acts like the parent
+            // directory to all
+            // urls )
+            throw new RuntimeException(" Should have only one data directory ( which acts like root directory ) since they are auto-generated during build phase ");
+        } else if(!build && push && dataDirs.size() != clusterUrl.size()) {
+            // Number of data directories should be equal to number of cluster
+            // urls
+            throw new RuntimeException(" Since we are only pushing, number of data directories ( comma separated ) should be equal to number of cluster urls ");
+        }
+
+        // Check every url individually
+        HashMap<String, Exception> exceptions = Maps.newHashMap();
+
+        for(int index = 0; index < clusterUrl.size(); index++) {
+            String url = clusterUrl.get(index);
+
+            log.info("Working on " + url);
+
+            try {
+
+                if(isAvroJob)
+                    verifyAvroSchemaAndVersions(url, isAvroVersioned);
+                else
+                    verifySchema(url);
+
+                String buildOutputDir;
+                if(build) {
+                    buildOutputDir = runBuildStore(props, url);
+                } else {
+                    buildOutputDir = dataDirs.get(index);
+                }
+
+                if(push) {
+                    if(log.isDebugEnabled())
+                        log.debug("Informing about push start ...");
+                    informedResults.add(this.informedExecutor.submit(new InformedClient(this.props,
+                                                                                        "Running",
+                                                                                        this.getId())));
+
+                    runPushStore(props, url, buildOutputDir);
+                }
+
+                if(build && push && !props.getBoolean("build.output.keep", false)) {
+                    JobConf jobConf = new JobConf();
+
+                    if(props.containsKey("hadoop.job.ugi")) {
+                        jobConf.set("hadoop.job.ugi", props.getString("hadoop.job.ugi"));
+                    }
+
+                    log.info("Deleting " + buildOutputDir);
+                    HadoopUtils.deletePathIfExists(jobConf, buildOutputDir);
+                    log.info("Deleted " + buildOutputDir);
+                }
+
+                if(log.isDebugEnabled())
+                    log.debug("Informing about push finish ...");
+                informedResults.add(this.informedExecutor.submit(new InformedClient(this.props,
+                                                                                    "Finished",
+                                                                                    this.getId())));
+
+                for(Future result: informedResults) {
+                    try {
+                        result.get();
+                    } catch(Exception e) {
+                        this.log.error("Exception in consumer", e);
+                    }
+                }
+                this.informedExecutor.shutdownNow();
+            } catch(Exception e) {
+                log.error("Exception during build and push for url " + url, e);
+                exceptions.put(url, e);
+            }
+        }
+
+        if(exceptions.size() > 0) {
+            log.error("Got exceptions while pushing to " + Joiner.on(",").join(exceptions.keySet())
+                      + " => " + Joiner.on(",").join(exceptions.values()));
+            System.exit(-1);
+        }
+    }
+
+    public void verifySchema(String url) throws Exception {
+        // create new json store def with schema from the metadata in the input
+        // path
+        JsonSchema schema = HadoopUtils.getSchemaFromPath(getInputPath());
+        int replicationFactor = props.getInt("build.replication.factor", 2);
+        int requiredReads = props.getInt("build.required.reads", 1);
+        int requiredWrites = props.getInt("build.required.writes", 1);
+        String description = props.getString("push.store.description", "");
+        String owners = props.getString("push.store.owners", "");
+        String keySchema = "\n\t\t<type>json</type>\n\t\t<schema-info version=\"0\">"
+                           + schema.getKeyType() + "</schema-info>\n\t";
+        String valSchema = "\n\t\t<type>json</type>\n\t\t<schema-info version=\"0\">"
+                           + schema.getValueType() + "</schema-info>\n\t";
+
+        boolean hasCompression = false;
+        if(props.containsKey("build.compress.value"))
+            hasCompression = true;
+
+        if(hasCompression) {
+            valSchema += "\t<compression><type>gzip</type></compression>\n\t";
+        }
+
+        if(props.containsKey("build.force.schema.key")) {
+            keySchema = props.get("build.force.schema.key");
+        }
+
+        if(props.containsKey("build.force.schema.value")) {
+            valSchema = props.get("build.force.schema.value");
+        }
+
+        String newStoreDefXml = VoldemortUtils.getStoreDefXml(storeName,
+                                                              replicationFactor,
+                                                              requiredReads,
+                                                              requiredWrites,
+                                                              props.containsKey("build.preferred.reads") ? props.getInt("build.preferred.reads")
+                                                                                                        : null,
+                                                              props.containsKey("build.preferred.writes") ? props.getInt("build.preferred.writes")
+                                                                                                         : null,
+                                                              (props.containsKey("push.force.schema.key")) ? props.getString("push.force.schema.key")
+                                                                                                          : keySchema,
+                                                              (props.containsKey("push.force.schema.value")) ? props.getString("push.force.schema.value")
+                                                                                                            : valSchema,
+                                                              description,
+                                                              owners);
+
+        log.info("Verifying store: \n" + newStoreDefXml.toString());
+
+        StoreDefinition newStoreDef = VoldemortUtils.getStoreDef(newStoreDefXml);
+
+        // get store def from cluster
+        log.info("Getting store definition from: " + url + " (node id " + this.nodeId + ")");
+
+        AdminClient adminClient = new AdminClient(url, new AdminClientConfig());
+        try {
+            List<StoreDefinition> remoteStoreDefs = adminClient.getRemoteStoreDefList(this.nodeId)
+                                                               .getValue();
+            boolean foundStore = false;
+
+            // go over all store defs and see if one has the same name as the
+            // store we're trying
+            // to build
+            for(StoreDefinition remoteStoreDef: remoteStoreDefs) {
+                if(remoteStoreDef.getName().equals(storeName)) {
+                    // if the store already exists, but doesn't match what we
+                    // want to push, we need
+                    // to worry
+                    if(!remoteStoreDef.equals(newStoreDef)) {
+                        // it is possible that the stores actually DO match, but
+                        // the
+                        // json in the key/value serializers is out of order (eg
+                        // {'a': 'int32', 'b': 'int32'} could have a/b reversed.
+                        // this is just a reflection of the fact that voldemort
+                        // json
+                        // type defs use hashmaps that are unordered, and pig
+                        // uses
+                        // bags that are unordered as well. it's therefore
+                        // unpredictable what order the keys will come out of
+                        // pig.
+                        // let's check to see if the key/value serializers are
+                        // REALLY equal.
+                        SerializerDefinition localKeySerializerDef = newStoreDef.getKeySerializer();
+                        SerializerDefinition localValueSerializerDef = newStoreDef.getValueSerializer();
+                        SerializerDefinition remoteKeySerializerDef = remoteStoreDef.getKeySerializer();
+                        SerializerDefinition remoteValueSerializerDef = remoteStoreDef.getValueSerializer();
+
+                        if(remoteKeySerializerDef.getName().equals("json")
+                           && remoteValueSerializerDef.getName().equals("json")
+                           && remoteKeySerializerDef.getAllSchemaInfoVersions().size() == 1
+                           && remoteValueSerializerDef.getAllSchemaInfoVersions().size() == 1) {
+                            JsonTypeDefinition remoteKeyDef = JsonTypeDefinition.fromJson(remoteKeySerializerDef.getCurrentSchemaInfo());
+                            JsonTypeDefinition remoteValDef = JsonTypeDefinition.fromJson(remoteValueSerializerDef.getCurrentSchemaInfo());
+                            JsonTypeDefinition localKeyDef = JsonTypeDefinition.fromJson(localKeySerializerDef.getCurrentSchemaInfo());
+                            JsonTypeDefinition localValDef = JsonTypeDefinition.fromJson(localValueSerializerDef.getCurrentSchemaInfo());
+
+                            if(remoteKeyDef.equals(localKeyDef) && remoteValDef.equals(localValDef)) {
+                                String compressionPolicy = "";
+                                if(hasCompression) {
+                                    compressionPolicy = "\n\t\t<compression><type>gzip</type></compression>";
+                                }
+
+                                // if the key/value serializers are REALLY equal
+                                // (even though the strings may not match), then
+                                // just use the remote stores to GUARANTEE that
+                                // they
+                                // match, and try again.
+                                newStoreDefXml = VoldemortUtils.getStoreDefXml(storeName,
+                                                                               replicationFactor,
+                                                                               requiredReads,
+                                                                               requiredWrites,
+                                                                               props.containsKey("build.preferred.reads") ? props.getInt("build.preferred.reads")
+                                                                                                                         : null,
+                                                                               props.containsKey("build.preferred.writes") ? props.getInt("build.preferred.writes")
+                                                                                                                          : null,
+                                                                               "\n\t\t<type>json</type>\n\t\t<schema-info version=\"0\">"
+                                                                                       + remoteKeySerializerDef.getCurrentSchemaInfo()
+                                                                                       + "</schema-info>\n\t",
+                                                                               "\n\t\t<type>json</type>\n\t\t<schema-info version=\"0\">"
+                                                                                       + remoteValueSerializerDef.getCurrentSchemaInfo()
+                                                                                       + "</schema-info>"
+                                                                                       + compressionPolicy
+                                                                                       + "\n\t");
+
+                                newStoreDef = VoldemortUtils.getStoreDef(newStoreDefXml);
+
+                                if(!remoteStoreDef.equals(newStoreDef)) {
+                                    // if we still get a fail, then we know that
+                                    // the
+                                    // store defs don't match for reasons OTHER
+                                    // than
+                                    // the key/value serializer
+                                    throw new RuntimeException("Your store schema is identical, but the store definition does not match. Have: "
+                                                               + newStoreDef
+                                                               + "\nBut expected: "
+                                                               + remoteStoreDef);
+                                }
+                            } else {
+                                // if the key/value serializers are not equal
+                                // (even
+                                // in java, not just json strings), then fail
+                                throw new RuntimeException("Your store definition does not match the store definition that is already in the cluster. Tried to resolve identical schemas between local and remote, but failed. Have: "
+                                                           + newStoreDef
+                                                           + "\nBut expected: "
+                                                           + remoteStoreDef);
+                            }
+                        }
+                    }
+
+                    foundStore = true;
+                    break;
+                }
+            }
+
+            // if the store doesn't exist yet, create it
+            if(!foundStore) {
+                // New requirement - Make sure the user had description and
+                // owner specified
+                if(description.length() == 0) {
+                    throw new RuntimeException("Description field missing in store definition. "
+                                               + "Please add \"push.store.description\" with a line describing your store");
+                }
+
+                if(owners.length() == 0) {
+                    throw new RuntimeException("Owner field missing in store definition. "
+                                               + "Please add \"push.store.owners\" with value being comma-separated list of LinkedIn email ids");
+
+                }
+
+                log.info("Could not find store " + storeName
+                         + " on Voldemort. Adding it to all nodes ");
+                adminClient.addStore(newStoreDef);
+            }
+
+            // don't use newStoreDef because we want to ALWAYS use the JSON
+            // definition since the store builder assumes that you are using
+            // JsonTypeSerializer. This allows you to tweak your value/key store
+            // xml
+            // as you see fit, but still uses the json sequence file meta data
+            // to
+            // build the store.
+            storeDefs = ImmutableList.of(VoldemortUtils.getStoreDef(VoldemortUtils.getStoreDefXml(storeName,
+                                                                                                  replicationFactor,
+                                                                                                  requiredReads,
+                                                                                                  requiredWrites,
+                                                                                                  props.containsKey("build.preferred.reads") ? props.getInt("build.preferred.reads")
+                                                                                                                                            : null,
+                                                                                                  props.containsKey("build.preferred.writes") ? props.getInt("build.preferred.writes")
+                                                                                                                                             : null,
+                                                                                                  keySchema,
+                                                                                                  valSchema)));
+            cluster = adminClient.getAdminClientCluster();
+        } finally {
+            adminClient.stop();
+        }
+    }
+
+    public String runBuildStore(Props props, String url) throws Exception {
+        int replicationFactor = props.getInt("build.replication.factor", 2);
+        int chunkSize = props.getInt("build.chunk.size", 1024 * 1024 * 1024);
+        Path tempDir = new Path(props.getString("build.temp.dir", "/tmp/vold-build-and-push-"
+                                                                  + new Random().nextLong()));
+        URI uri = new URI(url);
+        Path outputDir = new Path(props.getString("build.output.dir"), uri.getHost());
+        Path inputPath = getInputPath();
+        String keySelection = props.getString("build.key.selection", null);
+        String valSelection = props.getString("build.value.selection", null);
+        CheckSumType checkSumType = CheckSum.fromString(props.getString("checksum.type",
+                                                                        CheckSum.toString(CheckSumType.MD5)));
+        boolean saveKeys = props.getBoolean("save.keys", true);
+        boolean reducerPerBucket = props.getBoolean("reducer.per.bucket", false);
+        int numChunks = props.getInt("num.chunks", -1);
+
+        if(isAvroJob) {
+            String recSchema = getRecordSchema();
+            String keySchema = getKeySchema();
+            String valSchema = getValueSchema();
+
+            new VoldemortStoreBuilderJob(this.getId() + "-build-store",
+                                         props,
+                                         new VoldemortStoreBuilderConf(replicationFactor,
+                                                                       chunkSize,
+                                                                       tempDir,
+                                                                       outputDir,
+                                                                       inputPath,
+                                                                       cluster,
+                                                                       storeDefs,
+                                                                       storeName,
+                                                                       keySelection,
+                                                                       valSelection,
+                                                                       null,
+                                                                       null,
+                                                                       checkSumType,
+                                                                       saveKeys,
+                                                                       reducerPerBucket,
+                                                                       numChunks,
+                                                                       keyField,
+                                                                       valueField,
+                                                                       recSchema,
+                                                                       keySchema,
+                                                                       valSchema), true).run();
+            return outputDir.toString();
+        }
+        new VoldemortStoreBuilderJob(this.getId() + "-build-store",
+                                     props,
+                                     new VoldemortStoreBuilderConf(replicationFactor,
+                                                                   chunkSize,
+                                                                   tempDir,
+                                                                   outputDir,
+                                                                   inputPath,
+                                                                   cluster,
+                                                                   storeDefs,
+                                                                   storeName,
+                                                                   keySelection,
+                                                                   valSelection,
+                                                                   null,
+                                                                   null,
+                                                                   checkSumType,
+                                                                   saveKeys,
+                                                                   reducerPerBucket,
+                                                                   numChunks)).run();
+        return outputDir.toString();
+    }
+
+    public void runPushStore(Props props, String url, String dataDir) throws Exception {
+        // For backwards compatibility http timeout = admin timeout
+        int httpTimeoutMs = 1000 * props.getInt("push.http.timeout.seconds", 24 * 60 * 60);
+        long pushVersion = props.getLong("push.version", -1L);
+        if(props.containsKey("push.version.timestamp")) {
+            DateFormat format = new SimpleDateFormat("yyyyMMddHHmmss");
+            pushVersion = Long.parseLong(format.format(new Date()));
+        }
+        int maxBackoffDelayMs = 1000 * props.getInt("push.backoff.delay.seconds", 60);
+        boolean rollback = props.getBoolean("push.rollback", true);
+
+        new VoldemortSwapJob(this.getId() + "-push-store",
+                             props,
+                             new VoldemortSwapConf(cluster,
+                                                   dataDir,
+                                                   storeName,
+                                                   httpTimeoutMs,
+                                                   pushVersion,
+                                                   maxBackoffDelayMs,
+                                                   rollback)).run();
+    }
+
+    /**
+     * Get the sanitized input path. At the moment of writing, this means the
+     * #LATEST tag is expanded.
+     */
+    private Path getInputPath() throws IOException {
+        Path path = new Path(props.getString("build.input.path"));
+        return HadoopUtils.getSanitizedPath(path);
+    }
+
+    // Get the schema for the Avro Record from the object container file
+    public String getRecordSchema() throws IOException {
+        Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath());
+
+        String recSchema = schema.toString();
+
+        return recSchema;
+
+    }
+
+    // Extract schema of the key field
+    public String getKeySchema() throws IOException {
+        Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath());
+
+        String keySchema = schema.getField(keyField).schema().toString();
+
+        return keySchema;
+
+    }
+
+    // Extract schema of the value field
+    public String getValueSchema() throws IOException {
+        Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath());
+
+        String valueSchema = schema.getField(valueField).schema().toString();
+
+        return valueSchema;
+
+    }
+
+    // Verify if the new avro schema being pushed is the same one as the last
+    // version present on the server
+    // supports schema evolution
+
+    public void verifyAvroSchemaAndVersions(String url, boolean isVersioned) throws Exception {
+        // create new n store def with schema from the metadata in the input
+        // path
+        Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath());
+        int replicationFactor = props.getInt("build.replication.factor", 2);
+        int requiredReads = props.getInt("build.required.reads", 1);
+        int requiredWrites = props.getInt("build.required.writes", 1);
+        String description = props.getString("push.store.description", "");
+        String owners = props.getString("push.store.owners", "");
+
+        String serializerName;
+
+        if(isVersioned)
+            serializerName = AVRO_GENERIC_VERSIONED_TYPE_NAME;
+        else
+            serializerName = AVRO_GENERIC_TYPE_NAME;
+
+        String keySchema = "\n\t\t<type>" + serializerName
+                           + "</type>\n\t\t<schema-info version=\"0\">"
+                           + schema.getField(keyField).schema() + "</schema-info>\n\t";
+        String valSchema = "\n\t\t<type>" + serializerName
+                           + "</type>\n\t\t<schema-info version=\"0\">"
+                           + schema.getField(valueField).schema() + "</schema-info>\n\t";
+
+        boolean hasCompression = false;
+        if(props.containsKey("build.compress.value"))
+            hasCompression = true;
+
+        if(hasCompression) {
+            valSchema += "\t<compression><type>gzip</type></compression>\n\t";
+        }
+
+        if(props.containsKey("build.force.schema.key")) {
+            keySchema = props.get("build.force.schema.key");
+        }
+
+        if(props.containsKey("build.force.schema.value")) {
+            valSchema = props.get("build.force.schema.value");
+        }
+
+        String newStoreDefXml = VoldemortUtils.getStoreDefXml(storeName,
+                                                              replicationFactor,
+                                                              requiredReads,
+                                                              requiredWrites,
+                                                              props.containsKey("build.preferred.reads") ? props.getInt("build.preferred.reads")
+                                                                                                        : null,
+                                                              props.containsKey("build.preferred.writes") ? props.getInt("build.preferred.writes")
+                                                                                                         : null,
+                                                              (props.containsKey("push.force.schema.key")) ? props.getString("push.force.schema.key")
+                                                                                                          : keySchema,
+                                                              (props.containsKey("push.force.schema.value")) ? props.getString("push.force.schema.value")
+                                                                                                            : valSchema,
+                                                              description,
+                                                              owners);
+
+        log.info("Verifying store: \n" + newStoreDefXml.toString());
+
+        StoreDefinition newStoreDef = VoldemortUtils.getStoreDef(newStoreDefXml);
+
+        // get store def from cluster
+        log.info("Getting store definition from: " + url + " (node id " + this.nodeId + ")");
+
+        AdminClient adminClient = new AdminClient(url, new AdminClientConfig());
+        try {
+            List<StoreDefinition> remoteStoreDefs = adminClient.getRemoteStoreDefList(this.nodeId)
+                                                               .getValue();
+            boolean foundStore = false;
+
+            // go over all store defs and see if one has the same name as the
+            // store we're trying
+            // to build
+            for(StoreDefinition remoteStoreDef: remoteStoreDefs) {
+
+                if(remoteStoreDef.getName().equals(storeName)) {
+                    // if the store already exists, but doesn't match what we
+                    // want to push, we need
+                    // to worry
+
+                    if(!remoteStoreDef.equals(newStoreDef)) {
+
+                        // let's check to see if the key/value serializers are
+                        // REALLY equal.
+                        SerializerDefinition localKeySerializerDef = newStoreDef.getKeySerializer();
+                        SerializerDefinition localValueSerializerDef = newStoreDef.getValueSerializer();
+                        SerializerDefinition remoteKeySerializerDef = remoteStoreDef.getKeySerializer();
+                        SerializerDefinition remoteValueSerializerDef = remoteStoreDef.getValueSerializer();
+
+                        if(remoteKeySerializerDef.getName().equals(serializerName)
+                           && remoteValueSerializerDef.getName().equals(serializerName)) {
+
+                            Schema remoteKeyDef = Schema.parse(remoteKeySerializerDef.getCurrentSchemaInfo());
+                            Schema remoteValDef = Schema.parse(remoteValueSerializerDef.getCurrentSchemaInfo());
+                            Schema localKeyDef = Schema.parse(localKeySerializerDef.getCurrentSchemaInfo());
+                            Schema localValDef = Schema.parse(localValueSerializerDef.getCurrentSchemaInfo());
+
+                            if(remoteKeyDef.equals(localKeyDef) && remoteValDef.equals(localValDef)) {
+
+                                String compressionPolicy = "";
+                                if(hasCompression) {
+                                    compressionPolicy = "\n\t\t<compression><type>gzip</type></compression>";
+                                }
+
+                                // if the key/value serializers are REALLY equal
+                                // (even though the strings may not match), then
+                                // just use the remote stores to GUARANTEE that
+                                // they
+                                // match, and try again.
+
+                                String keySerializerStr = "\n\t\t<type>"
+                                                          + remoteKeySerializerDef.getName()
+                                                          + "</type>";
+
+                                if(remoteKeySerializerDef.hasVersion()) {
+
+                                    Map<Integer, String> versions = new HashMap<Integer, String>();
+                                    for(Map.Entry<Integer, String> entry: remoteKeySerializerDef.getAllSchemaInfoVersions()
+                                                                                                .entrySet()) {
+                                        keySerializerStr += "\n\t\t <schema-info version=\""
+                                                            + entry.getKey() + "\">"
+                                                            + entry.getValue()
+                                                            + "</schema-info>\n\t";
+                                    }
+
+                                } else {
+                                    keySerializerStr = "\n\t\t<type>"
+                                                       + serializerName
+                                                       + "</type>\n\t\t<schema-info version=\"0\">"
+                                                       + remoteKeySerializerDef.getCurrentSchemaInfo()
+                                                       + "</schema-info>\n\t";
+                                }
+
+                                keySchema = keySerializerStr;
+                                String valueSerializerStr = "\n\t\t<type>"
+                                                            + remoteValueSerializerDef.getName()
+                                                            + "</type>";
+
+                                if(remoteValueSerializerDef.hasVersion()) {
+
+                                    Map<Integer, String> versions = new HashMap<Integer, String>();
+                                    for(Map.Entry<Integer, String> entry: remoteValueSerializerDef.getAllSchemaInfoVersions()
+                                                                                                  .entrySet()) {
+                                        valueSerializerStr += "\n\t\t <schema-info version=\""
+                                                              + entry.getKey() + "\">"
+                                                              + entry.getValue()
+                                                              + "</schema-info>\n\t";
+                                    }
+                                    valueSerializerStr += compressionPolicy + "\n\t";
+
+                                } else {
+
+                                    valueSerializerStr = "\n\t\t<type>"
+                                                         + serializerName
+                                                         + "</type>\n\t\t<schema-info version=\"0\">"
+                                                         + remoteValueSerializerDef.getCurrentSchemaInfo()
+                                                         + "</schema-info>" + compressionPolicy
+                                                         + "\n\t";
+
+                                }
+                                valSchema = valueSerializerStr;
+
+                                newStoreDefXml = VoldemortUtils.getStoreDefXml(storeName,
+                                                                               replicationFactor,
+                                                                               requiredReads,
+                                                                               requiredWrites,
+                                                                               props.containsKey("build.preferred.reads") ? props.getInt("build.preferred.reads")
+                                                                                                                         : null,
+                                                                               props.containsKey("build.preferred.writes") ? props.getInt("build.preferred.writes")
+                                                                                                                          : null,
+                                                                               keySerializerStr,
+                                                                               valueSerializerStr);
+
+                                newStoreDef = VoldemortUtils.getStoreDef(newStoreDefXml);
+
+                                if(!remoteStoreDef.equals(newStoreDef)) {
+                                    // if we still get a fail, then we know that
+                                    // the
+                                    // store defs don't match for reasons OTHER
+                                    // than
+                                    // the key/value serializer
+                                    throw new RuntimeException("Your store schema is identical, but the store definition does not match. Have: "
+                                                               + newStoreDef
+                                                               + "\nBut expected: "
+                                                               + remoteStoreDef);
+                                }
+
+                            } else {
+                                // if the key/value serializers are not equal
+                                // (even
+                                // in java, not just json strings), then fail
+                                throw new RuntimeException("Your store definition does not match the store definition that is already in the cluster. Tried to resolve identical schemas between local and remote, but failed. Have: "
+                                                           + newStoreDef
+                                                           + "\nBut expected: "
+                                                           + remoteStoreDef);
+                            }
+                        }
+                    }
+
+                    foundStore = true;
+
+                    break;
+                }
+            }
+
+            // if the store doesn't exist yet, create it
+            if(!foundStore) {
+
+                // New requirement - Make sure the user had description and
+                // owner specified
+                if(description.length() == 0) {
+                    throw new RuntimeException("Description field missing in store definition. "
+                                               + "Please add \"push.store.description\" with a line describing your store");
+                }
+
+                if(owners.length() == 0) {
+                    throw new RuntimeException("Owner field missing in store definition. "
+                                               + "Please add \"push.store.owners\" with value being comma-separated list of LinkedIn email ids");
+
+                }
+
+                log.info("Could not find store " + storeName
+                         + " on Voldemort. Adding it to all nodes ");
+                adminClient.addStore(newStoreDef);
+            }
+
+            storeDefs = ImmutableList.of(VoldemortUtils.getStoreDef(VoldemortUtils.getStoreDefXml(storeName,
+                                                                                                  replicationFactor,
+                                                                                                  requiredReads,
+                                                                                                  requiredWrites,
+                                                                                                  props.containsKey("build.preferred.reads") ? props.getInt("build.preferred.reads")
+                                                                                                                                            : null,
+                                                                                                  props.containsKey("build.preferred.writes") ? props.getInt("build.preferred.writes")
+                                                                                                                                             : null,
+                                                                                                  keySchema,
+                                                                                                  valSchema)));
+            cluster = adminClient.getAdminClientCluster();
+        } finally {
+            adminClient.stop();
+        }
+    }
+
+    private class InformedClient implements Runnable {
+
+        private Props props;
+        private String status;
+        private String source;
+
+        public InformedClient(Props props, String status, String source) {
+            this.props = props;
+            this.status = status;
+            this.source = source;
+        }
+
+        @SuppressWarnings("unchecked")
+        @Override
+        public void run() {
+            try {
+                URL url = new URL(informedURL);
+                HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+                conn.setRequestMethod("POST");
+                conn.setDoOutput(true);
+                conn.setDoInput(true);
+                conn.setRequestProperty("Content-Type", "application/json");
+
+                String storeName = this.props.getString("push.store.name", "null");
+                String clusterName = this.props.getString("push.cluster", "null");
+                String owners = this.props.getString("push.store.owners", "null");
+                String replicationFactor = this.props.getString("build.replication.factor", "null");
+
+                // JSON Object did not work for some reason. Hence doing my own
+                // Json.
+                String message = "Store : " + storeName.replaceAll("[\'\"]", "") + ",  Status : "
+                                 + this.status.replaceAll("[\'\"]", "") + ",  URL : "
+                                 + clusterName.replaceAll("[\'\"]", "") + ",  owners : "
+                                 + owners.replaceAll("[\'\"]", "") + ",  replication : "
+                                 + replicationFactor.replaceAll("[\'\"]", "");
+                String payloadStr = "{\"message\":\"" + message
+                                    + "\",\"topic\":\"build-and-push\",\"source\":\"" + this.source
+                                    + "\",\"user\":\"bandp\"}";
+                if(log.isDebugEnabled())
+                    log.debug("Payload : " + payloadStr);
+
+                OutputStream out = conn.getOutputStream();
+                out.write(payloadStr.getBytes());
+                out.close();
+
+                if(conn.getResponseCode() != 200) {
+                    System.out.println(conn.getResponseCode());
+                    log.error("Illegal response : " + conn.getResponseMessage());
+                    throw new IOException(conn.getResponseMessage());
+                }
+
+                // Buffer the result into a string
+                BufferedReader rd = new BufferedReader(new InputStreamReader(conn.getInputStream()));
+                StringBuilder sb = new StringBuilder();
+                String line;
+                while((line = rd.readLine()) != null) {
+                    sb.append(line);
+                }
+                rd.close();
+
+                if(log.isDebugEnabled())
+                    log.debug("Received response: " + sb);
+
+                conn.disconnect();
+
+            } catch(Exception e) {
+                log.error(e.getMessage());
+                e.printStackTrace();
+            }
+        }
+
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortMultiStoreBuildAndPushJob.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortMultiStoreBuildAndPushJob.java
new file mode 100644
index 0000000000..80280a5d53
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortMultiStoreBuildAndPushJob.java
@@ -0,0 +1,850 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.azkaban;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.text.SimpleDateFormat;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.TreeMap;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.log4j.Logger;
+
+import voldemort.VoldemortException;
+import voldemort.client.protocol.admin.AdminClient;
+import voldemort.client.protocol.admin.AdminClientConfig;
+import voldemort.cluster.Cluster;
+import voldemort.cluster.Node;
+import voldemort.serialization.SerializerDefinition;
+import voldemort.serialization.json.JsonTypeDefinition;
+import voldemort.store.StoreDefinition;
+import voldemort.store.readonly.ReadOnlyUtils;
+import voldemort.store.readonly.checksum.CheckSum;
+import voldemort.store.readonly.checksum.CheckSum.CheckSumType;
+import voldemort.store.readonly.mr.azkaban.VoldemortStoreBuilderJob.VoldemortStoreBuilderConf;
+import voldemort.store.readonly.mr.utils.HadoopUtils;
+import voldemort.store.readonly.mr.utils.JsonSchema;
+import voldemort.store.readonly.mr.utils.VoldemortUtils;
+import voldemort.store.readonly.swapper.AdminStoreSwapper;
+import voldemort.utils.Pair;
+import azkaban.common.jobs.AbstractJob;
+import azkaban.common.utils.Props;
+
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Multimap;
+
+public class VoldemortMultiStoreBuildAndPushJob extends AbstractJob {
+
+    private final Logger log;
+
+    private final Props props;
+
+    private List<String> storeNames;
+
+    private final List<String> clusterUrls;
+
+    /**
+     * The input directories to use on a per store name basis
+     */
+    private final HashMap<String, Path> inputDirsPerStore;
+
+    /**
+     * The final output that the output directory is stored in is as follows - [
+     * outputDir ]/[ store_name ]/[ cluster_name ]
+     */
+    private final Path outputDir;
+
+    /**
+     * The node id to which we'll query and check if the stores already exists.
+     * If the store doesn't exist, it creates it.
+     */
+    private final int nodeId;
+
+    /**
+     * Get the sanitized input path. At the moment of writing, this means the
+     * #LATEST tag is expanded.
+     */
+    private Path getPath(String pathString) throws IOException {
+        Path path = new Path(pathString);
+        return HadoopUtils.getSanitizedPath(path);
+    }
+
+    public VoldemortMultiStoreBuildAndPushJob(String name, Props props) throws IOException {
+        super(name);
+        this.props = props;
+        this.log = Logger.getLogger(name);
+        this.nodeId = props.getInt("check.node", 0);
+
+        // Get the input directories
+        List<String> inputDirsPathString = VoldemortUtils.getCommaSeparatedStringValues(props.getString("build.input.path"),
+                                                                                        "input directory");
+
+        // Get the store names
+        this.storeNames = VoldemortUtils.getCommaSeparatedStringValues(props.getString("push.store.name"),
+                                                                       "store name");
+
+        // Check if the number of stores = input directories ( obviously )
+        if(this.storeNames.size() != inputDirsPathString.size()) {
+            throw new RuntimeException("Number of stores ( " + this.storeNames.size()
+                                       + " ) is not equal to number of input directories ( "
+                                       + inputDirsPathString.size() + " )");
+        }
+
+        // Convert them to Path
+        this.inputDirsPerStore = Maps.newHashMap();
+        int index = 0;
+        for(String inputDirPathString: inputDirsPathString) {
+            this.inputDirsPerStore.put(storeNames.get(index), getPath(inputDirPathString));
+            index++;
+        }
+
+        // Get the output directory
+        String outputDirString = props.getString("build.output.dir",
+                                                 "/tmp/voldemort-build-and-push-temp-"
+                                                         + new Random().nextLong());
+        this.outputDir = getPath(outputDirString);
+
+        log.info("Storing output of all push jobs in " + this.outputDir);
+
+        // Get the cluster urls to push to
+        this.clusterUrls = VoldemortUtils.getCommaSeparatedStringValues(props.getString("push.cluster"),
+                                                                        "cluster urls");
+
+    }
+
+    /**
+     * Given the filesystem and a path recursively goes in and calculates the
+     * size
+     * 
+     * @param fs Filesystem
+     * @param path The root path whose length we need to calculate
+     * @return The length in long
+     * @throws IOException
+     */
+    public long sizeOfPath(FileSystem fs, Path path) throws IOException {
+        long size = 0;
+        FileStatus[] statuses = fs.listStatus(path);
+        if(statuses != null) {
+            for(FileStatus status: statuses) {
+                if(status.isDir())
+                    size += sizeOfPath(fs, status.getPath());
+                else
+                    size += status.getLen();
+            }
+        }
+        return size;
+    }
+
+    @Override
+    public void run() throws Exception {
+
+        // Mapping of Pair [ cluster url, store name ] to List of previous node
+        // directories.
+        // Required for rollback...
+        Multimap<Pair<String, String>, Pair<Integer, String>> previousNodeDirPerClusterStore = HashMultimap.create();
+
+        // Retrieve filesystem information for checking if folder exists
+        final FileSystem fs = outputDir.getFileSystem(new Configuration());
+
+        // Step 1 ) Order the stores depending on the size of the store
+        TreeMap<Long, String> storeNameSortedBySize = Maps.newTreeMap();
+        for(String storeName: storeNames) {
+            storeNameSortedBySize.put(sizeOfPath(fs, inputDirsPerStore.get(storeName)), storeName);
+        }
+
+        log.info("Store names along with their input file sizes - " + storeNameSortedBySize);
+
+        // This will collect it in ascending order of size
+        this.storeNames = Lists.newArrayList(storeNameSortedBySize.values());
+
+        // Reverse it such that is in descending order of size
+        Collections.reverse(this.storeNames);
+
+        log.info("Store names in the order of which we'll run build and push - " + this.storeNames);
+
+        // Step 2 ) Get the push version if set
+        final long pushVersion = props.containsKey("push.version.timestamp") ? Long.parseLong(new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()))
+                                                                            : props.getLong("push.version",
+                                                                                            -1L);
+
+        // Mapping of Pair [ cluster url, store name ] to Future with list of
+        // node dirs
+        HashMap<Pair<String, String>, Future<List<String>>> fetchDirsPerStoreCluster = Maps.newHashMap();
+
+        // Store mapping of url to cluster metadata
+        final ConcurrentHashMap<String, Cluster> urlToCluster = new ConcurrentHashMap<String, Cluster>();
+
+        // Mapping of Pair [ cluster url, store name ] to List of node
+        // directories
+        final HashMap<Pair<String, String>, List<String>> nodeDirPerClusterStore = new HashMap<Pair<String, String>, List<String>>();
+
+        // Iterate over all of them and check if they are complete
+        final HashMap<Pair<String, String>, Exception> exceptions = Maps.newHashMap();
+
+        ExecutorService executor = null;
+        try {
+            executor = Executors.newFixedThreadPool(props.getInt("build.push.parallel", 1));
+
+            // Step 3 ) Start the building + pushing of all stores in parallel
+            for(final String storeName: storeNames) {
+                // Go over every cluster and do the build phase
+                for(int index = 0; index < clusterUrls.size(); index++) {
+                    final String url = clusterUrls.get(index);
+                    fetchDirsPerStoreCluster.put(Pair.create(url, storeName),
+                                                 executor.submit(new Callable<List<String>>() {
+
+                                                     @Override
+                                                     public List<String> call() throws Exception {
+
+                                                         log.info("========= Working on build + push phase for store '"
+                                                                  + storeName
+                                                                  + "' and cluster '"
+                                                                  + url + "' ==========");
+
+                                                         // Create an admin
+                                                         // client which will be
+                                                         // used by
+                                                         // everyone
+                                                         AdminClient adminClient = null;
+
+                                                         // Executor inside
+                                                         // executor - your mind
+                                                         // just
+                                                         // exploded!
+                                                         ExecutorService internalExecutor = null;
+
+                                                         try {
+                                                             // Retrieve admin
+                                                             // client for
+                                                             // verification of
+                                                             // schema + pushing
+                                                             adminClient = new AdminClient(url,
+                                                                                           new AdminClientConfig());
+
+                                                             // Verify the store
+                                                             // exists ( If not,
+                                                             // add it
+                                                             // the
+                                                             // store )
+                                                             Pair<StoreDefinition, Cluster> metadata = verifySchema(storeName,
+                                                                                                                    url,
+                                                                                                                    inputDirsPerStore.get(storeName),
+                                                                                                                    adminClient);
+
+                                                             // Populate the url
+                                                             // to cluster
+                                                             // metadata
+                                                             urlToCluster.put(url,
+                                                                              metadata.getSecond());
+
+                                                             // Create output
+                                                             // directory path
+                                                             URI uri = new URI(url);
+
+                                                             Path outputDirPath = new Path(outputDir
+                                                                                                   + Path.SEPARATOR
+                                                                                                   + storeName,
+                                                                                           uri.getHost());
+
+                                                             log.info("Running build phase for store '"
+                                                                      + storeName
+                                                                      + "' and url '"
+                                                                      + url
+                                                                      + "'. Reading from input directory '"
+                                                                      + inputDirsPerStore.get(storeName)
+                                                                      + "' and writing to "
+                                                                      + outputDirPath);
+
+                                                             runBuildStore(metadata.getSecond(),
+                                                                           metadata.getFirst(),
+                                                                           inputDirsPerStore.get(storeName),
+                                                                           outputDirPath);
+
+                                                             log.info("Finished running build phase for store "
+                                                                      + storeName
+                                                                      + " and url '"
+                                                                      + url
+                                                                      + "'. Written to directory "
+                                                                      + outputDirPath);
+
+                                                             long storePushVersion = pushVersion;
+                                                             if(storePushVersion == -1L) {
+                                                                 log.info("Retrieving version number for store '"
+                                                                          + storeName
+                                                                          + "' and cluster '"
+                                                                          + url
+                                                                          + "'");
+
+                                                                 Map<String, Long> pushVersions = adminClient.getROMaxVersion(Lists.newArrayList(storeName));
+
+                                                                 if(pushVersions == null
+                                                                    || !pushVersions.containsKey(storeName)) {
+                                                                     throw new RuntimeException("Could not retrieve version for store '"
+                                                                                                + storeName
+                                                                                                + "'");
+                                                                 }
+
+                                                                 storePushVersion = pushVersions.get(storeName);
+                                                                 storePushVersion++;
+
+                                                                 log.info("Retrieved max version number for store '"
+                                                                          + storeName
+                                                                          + "' and cluster '"
+                                                                          + url
+                                                                          + "' = "
+                                                                          + storePushVersion);
+                                                             }
+
+                                                             log.info("Running push for cluster url "
+                                                                      + url);
+
+                                                             // Used for
+                                                             // parallel pushing
+                                                             internalExecutor = Executors.newCachedThreadPool();
+
+                                                             AdminStoreSwapper swapper = new AdminStoreSwapper(metadata.getSecond(),
+                                                                                                               internalExecutor,
+                                                                                                               adminClient,
+                                                                                                               1000 * props.getInt("timeout.seconds",
+                                                                                                                                   24 * 60 * 60),
+                                                                                                               true,
+                                                                                                               true);
+
+                                                             // Convert to
+                                                             // hadoop specific
+                                                             // path
+                                                             String outputDirPathString = outputDirPath.makeQualified(fs)
+                                                                                                       .toString();
+
+                                                             if(!fs.exists(outputDirPath)) {
+                                                                 throw new RuntimeException("Output directory for store "
+                                                                                            + storeName
+                                                                                            + " and cluster '"
+                                                                                            + url
+                                                                                            + "' - "
+                                                                                            + outputDirPathString
+                                                                                            + " does not exist");
+                                                             }
+
+                                                             log.info("Pushing data to store '"
+                                                                      + storeName + "' on cluster "
+                                                                      + url + " from path  "
+                                                                      + outputDirPathString
+                                                                      + " with version "
+                                                                      + storePushVersion);
+
+                                                             List<String> nodeDirs = swapper.invokeFetch(storeName,
+                                                                                                         outputDirPathString,
+                                                                                                         storePushVersion);
+
+                                                             log.info("Successfully pushed data to store '"
+                                                                      + storeName
+                                                                      + "' on cluster "
+                                                                      + url
+                                                                      + " from path  "
+                                                                      + outputDirPathString
+                                                                      + " with version "
+                                                                      + storePushVersion);
+
+                                                             return nodeDirs;
+                                                         } finally {
+                                                             if(internalExecutor != null) {
+                                                                 internalExecutor.shutdownNow();
+                                                                 internalExecutor.awaitTermination(10,
+                                                                                                   TimeUnit.SECONDS);
+                                                             }
+                                                             if(adminClient != null) {
+                                                                 adminClient.stop();
+                                                             }
+                                                         }
+                                                     }
+
+                                                 }));
+
+                }
+
+            }
+
+            for(final String storeName: storeNames) {
+                for(int index = 0; index < clusterUrls.size(); index++) {
+                    Pair<String, String> key = Pair.create(clusterUrls.get(index), storeName);
+                    Future<List<String>> nodeDirs = fetchDirsPerStoreCluster.get(key);
+                    try {
+                        nodeDirPerClusterStore.put(key, nodeDirs.get());
+                    } catch(Exception e) {
+                        exceptions.put(key, e);
+                    }
+                }
+            }
+
+        } finally {
+            if(executor != null) {
+                executor.shutdownNow();
+                executor.awaitTermination(10, TimeUnit.SECONDS);
+            }
+        }
+
+        // ===== If we got exceptions during the build + push, delete data from
+        // successful
+        // nodes ======
+        if(!exceptions.isEmpty()) {
+
+            log.error("Got an exception during pushes. Deleting data already pushed on successful nodes");
+
+            for(int index = 0; index < clusterUrls.size(); index++) {
+                String clusterUrl = clusterUrls.get(index);
+                Cluster cluster = urlToCluster.get(clusterUrl);
+
+                AdminClient adminClient = null;
+                try {
+                    adminClient = new AdminClient(cluster, new AdminClientConfig());
+                    for(final String storeName: storeNames) {
+                        // Check if the [ cluster , store name ] succeeded. We
+                        // need to roll it back
+                        Pair<String, String> key = Pair.create(clusterUrl, storeName);
+
+                        if(nodeDirPerClusterStore.containsKey(key)) {
+                            List<String> nodeDirs = nodeDirPerClusterStore.get(key);
+
+                            log.info("Deleting data for successful pushes to " + clusterUrl
+                                     + " and store " + storeName);
+                            int nodeId = 0;
+                            for(String nodeDir: nodeDirs) {
+                                try {
+                                    log.info("Deleting data ( " + nodeDir
+                                             + " ) for successful pushes to '" + clusterUrl
+                                             + "' and store '" + storeName + "' and node " + nodeId);
+                                    adminClient.failedFetchStore(nodeId, storeName, nodeDir);
+                                    log.info("Successfully deleted data for successful pushes to '"
+                                             + clusterUrl + "' and store '" + storeName
+                                             + "' and node " + nodeId);
+
+                                } catch(Exception e) {
+                                    log.error("Failure while deleting data on node " + nodeId
+                                              + " for store '" + storeName + "' and url '"
+                                              + clusterUrl + "'");
+                                }
+                                nodeId++;
+                            }
+                        }
+                    }
+                } finally {
+                    if(adminClient != null) {
+                        adminClient.stop();
+                    }
+                }
+            }
+
+            int errorNo = 1;
+            for(Pair<String, String> key: exceptions.keySet()) {
+                log.error("Error no " + errorNo + "] Error pushing for cluster '" + key.getFirst()
+                          + "' and store '" + key.getSecond() + "' :", exceptions.get(key));
+                errorNo++;
+            }
+
+            throw new VoldemortException("Exception during build + push");
+        }
+
+        // ====== Delete the temporary directory since we don't require it
+        // ======
+        if(!props.getBoolean("build.output.keep", false)) {
+            JobConf jobConf = new JobConf();
+
+            if(props.containsKey("hadoop.job.ugi")) {
+                jobConf.set("hadoop.job.ugi", props.getString("hadoop.job.ugi"));
+            }
+
+            log.info("Deleting output directory since we have finished the pushes " + outputDir);
+            HadoopUtils.deletePathIfExists(jobConf, outputDir.toString());
+            log.info("Successfully deleted output directory since we have finished the pushes"
+                     + outputDir);
+        }
+
+        // ====== Time to swap the stores one node at a time ========
+        try {
+            for(int index = 0; index < clusterUrls.size(); index++) {
+                String url = clusterUrls.get(index);
+                Cluster cluster = urlToCluster.get(url);
+
+                AdminClient adminClient = new AdminClient(cluster, new AdminClientConfig());
+
+                log.info("Swapping all stores on cluster " + url);
+                try {
+                    // Go over every node and swap
+                    for(Node node: cluster.getNodes()) {
+
+                        log.info("Swapping all stores on cluster " + url + " and node "
+                                 + node.getId());
+
+                        // Go over every store and swap
+                        for(String storeName: storeNames) {
+
+                            Pair<String, String> key = Pair.create(url, storeName);
+                            log.info("Swapping '" + storeName + "' store on cluster " + url
+                                     + " and node " + node.getId() + " - "
+                                     + nodeDirPerClusterStore.get(key).get(node.getId()));
+
+                            previousNodeDirPerClusterStore.put(key,
+                                                               Pair.create(node.getId(),
+                                                                           adminClient.swapStore(node.getId(),
+                                                                                                 storeName,
+                                                                                                 nodeDirPerClusterStore.get(key)
+                                                                                                                       .get(node.getId()))));
+                            log.info("Successfully swapped '" + storeName + "' store on cluster "
+                                     + url + " and node " + node.getId());
+
+                        }
+
+                    }
+                } finally {
+                    if(adminClient != null) {
+                        adminClient.stop();
+                    }
+                }
+            }
+        } catch(Exception e) {
+
+            log.error("Got an exception during swaps. Rolling back data already pushed on successful nodes");
+
+            for(Pair<String, String> clusterStoreTuple: previousNodeDirPerClusterStore.keySet()) {
+                Collection<Pair<Integer, String>> nodeToPreviousDirs = previousNodeDirPerClusterStore.get(clusterStoreTuple);
+                String url = clusterStoreTuple.getFirst();
+                Cluster cluster = urlToCluster.get(url);
+
+                log.info("Rolling back for cluster " + url + " and store  "
+                         + clusterStoreTuple.getSecond());
+
+                AdminClient adminClient = new AdminClient(cluster, new AdminClientConfig());
+                try {
+                    for(Pair<Integer, String> nodeToPreviousDir: nodeToPreviousDirs) {
+                        log.info("Rolling back for cluster " + url + " and store "
+                                 + clusterStoreTuple.getSecond() + " and node "
+                                 + nodeToPreviousDir.getFirst() + " to dir "
+                                 + nodeToPreviousDir.getSecond());
+                        adminClient.rollbackStore(nodeToPreviousDir.getFirst(),
+                                                  nodeToPreviousDir.getSecond(),
+                                                  ReadOnlyUtils.getVersionId(new File(nodeToPreviousDir.getSecond())));
+                        log.info("Successfully rolled back for cluster " + url + " and store "
+                                 + clusterStoreTuple.getSecond() + " and node "
+                                 + nodeToPreviousDir.getFirst() + " to dir "
+                                 + nodeToPreviousDir.getSecond());
+
+                    }
+                } finally {
+                    if(adminClient != null) {
+                        adminClient.stop();
+                    }
+                }
+            }
+            throw e;
+        }
+    }
+
+    /**
+     * Verify if the store exists on the cluster ( pointed by url ). Also use
+     * the input path to retrieve the metadata
+     * 
+     * @param storeName Store name
+     * @param url The url of the cluster
+     * @param inputPath The input path where the files exist. This will be used
+     *        for building the store
+     * @param adminClient Admin Client used to verify the schema
+     * @return Returns a pair of store definition + cluster metadata
+     * @throws IOException Exception due to input path being bad
+     */
+    public Pair<StoreDefinition, Cluster> verifySchema(String storeName,
+                                                       String url,
+                                                       Path inputPath,
+                                                       AdminClient adminClient) throws IOException {
+        // create new json store def with schema from the metadata in the input
+        // path
+        JsonSchema schema = HadoopUtils.getSchemaFromPath(inputPath);
+        int replicationFactor = props.getInt("build.replication.factor." + storeName,
+                                             props.getInt("build.replication.factor", 2));
+        int requiredReads = props.getInt("build.required.reads." + storeName,
+                                         props.getInt("build.required.reads", 1));
+        int requiredWrites = props.getInt("build.required.writes." + storeName,
+                                          props.getInt("build.required.writes", 1));
+
+        int preferredReads = props.getInt("build.preferred.reads." + storeName,
+                                          props.getInt("build.preferred.reads", -1));
+        int preferredWrites = props.getInt("build.preferred.writes." + storeName,
+                                           props.getInt("build.preferred.writes", -1));
+
+        String description = props.getString("push.store.description." + storeName,
+                                             props.getString("push.store.description", ""));
+        String owners = props.getString("push.store.owners." + storeName,
+                                        props.getString("push.store.owners", ""));
+
+        // Generate the key and value schema
+        String keySchema = "\n\t\t<type>json</type>\n\t\t<schema-info version=\"0\">"
+                           + schema.getKeyType() + "</schema-info>\n\t";
+        String valSchema = "\n\t\t<type>json</type>\n\t\t<schema-info version=\"0\">"
+                           + schema.getValueType() + "</schema-info>\n\t";
+
+        String keySchemaCompression = "";
+        if(props.containsKey("build.compress.key." + storeName)
+           || (storeNames.size() == 1 && props.containsKey("build.compress.key"))) {
+            keySchemaCompression = "\t<compression><type>gzip</type></compression>\n\t";
+            keySchema += keySchemaCompression;
+        }
+
+        String valueSchemaCompression = "";
+        if(props.containsKey("build.compress.value." + storeName)
+           || (storeNames.size() == 1 && props.containsKey("build.compress.value"))) {
+            valueSchemaCompression = "\t<compression><type>gzip</type></compression>\n\t";
+            valSchema += valueSchemaCompression;
+        }
+
+        if(props.containsKey("build.force.schema.key." + storeName)) {
+            keySchema = props.get("build.force.schema.key." + storeName);
+        }
+
+        if(props.containsKey("build.force.schema.value." + storeName)) {
+            valSchema = props.get("build.force.schema.value." + storeName);
+        }
+
+        // For backwards compatibility check build.force.schema.*
+        if(props.containsKey("build.force.schema.key") && storeNames.size() == 1) {
+            keySchema = props.get("build.force.schema.key");
+        }
+
+        if(props.containsKey("build.force.schema.value") && storeNames.size() == 1) {
+            valSchema = props.get("build.force.schema.value");
+        }
+
+        String newStoreDefXml = VoldemortUtils.getStoreDefXml(storeName,
+                                                              replicationFactor,
+                                                              requiredReads,
+                                                              requiredWrites,
+                                                              (preferredReads < 0) ? null
+                                                                                  : preferredReads,
+                                                              (preferredWrites < 0) ? null
+                                                                                   : preferredWrites,
+                                                              keySchema,
+                                                              valSchema,
+                                                              description,
+                                                              owners);
+
+        log.info("Verifying store: \n" + newStoreDefXml.toString());
+
+        StoreDefinition newStoreDef = VoldemortUtils.getStoreDef(newStoreDefXml);
+
+        // get store def from cluster
+        log.info("Getting store definition from: " + url + " ( node id " + this.nodeId + " )");
+
+        List<StoreDefinition> remoteStoreDefs = adminClient.getRemoteStoreDefList(this.nodeId)
+                                                           .getValue();
+        boolean foundStore = false;
+
+        // go over all store defs and see if one has the same name as the store
+        // we're trying
+        // to build
+        for(StoreDefinition remoteStoreDef: remoteStoreDefs) {
+            if(remoteStoreDef.getName().equals(storeName)) {
+                // if the store already exists, but doesn't match what we want
+                // to push, we need
+                // to worry
+                if(!remoteStoreDef.equals(newStoreDef)) {
+                    // it is possible that the stores actually DO match, but the
+                    // json in the key/value serializers is out of order (eg
+                    // {'a': 'int32', 'b': 'int32'} could have a/b reversed.
+                    // this is just a reflection of the fact that voldemort json
+                    // type defs use hashmaps that are unordered, and pig uses
+                    // bags that are unordered as well. it's therefore
+                    // unpredictable what order the keys will come out of pig.
+                    // let's check to see if the key/value serializers are
+                    // REALLY equal.
+                    SerializerDefinition localKeySerializerDef = newStoreDef.getKeySerializer();
+                    SerializerDefinition localValueSerializerDef = newStoreDef.getValueSerializer();
+                    SerializerDefinition remoteKeySerializerDef = remoteStoreDef.getKeySerializer();
+                    SerializerDefinition remoteValueSerializerDef = remoteStoreDef.getValueSerializer();
+
+                    if(remoteKeySerializerDef.getName().equals("json")
+                       && remoteValueSerializerDef.getName().equals("json")
+                       && remoteKeySerializerDef.getAllSchemaInfoVersions().size() == 1
+                       && remoteValueSerializerDef.getAllSchemaInfoVersions().size() == 1) {
+                        JsonTypeDefinition remoteKeyDef = JsonTypeDefinition.fromJson(remoteKeySerializerDef.getCurrentSchemaInfo());
+                        JsonTypeDefinition remoteValDef = JsonTypeDefinition.fromJson(remoteValueSerializerDef.getCurrentSchemaInfo());
+                        JsonTypeDefinition localKeyDef = JsonTypeDefinition.fromJson(localKeySerializerDef.getCurrentSchemaInfo());
+                        JsonTypeDefinition localValDef = JsonTypeDefinition.fromJson(localValueSerializerDef.getCurrentSchemaInfo());
+
+                        if(remoteKeyDef.equals(localKeyDef) && remoteValDef.equals(localValDef)) {
+                            // if the key/value serializers are REALLY equal
+                            // (even though the strings may not match), then
+                            // just use the remote stores to GUARANTEE that they
+                            // match, and try again.
+                            keySchema = "\n\t\t<type>json</type>\n\t\t<schema-info version=\"0\">"
+                                        + remoteKeySerializerDef.getCurrentSchemaInfo()
+                                        + "</schema-info>\n\t" + keySchemaCompression;
+                            valSchema = "\n\t\t<type>json</type>\n\t\t<schema-info version=\"0\">"
+                                        + remoteValueSerializerDef.getCurrentSchemaInfo()
+                                        + "</schema-info>\n\t" + valueSchemaCompression;
+                            newStoreDefXml = VoldemortUtils.getStoreDefXml(storeName,
+                                                                           replicationFactor,
+                                                                           requiredReads,
+                                                                           requiredWrites,
+                                                                           (preferredReads < 0) ? null
+                                                                                               : preferredReads,
+                                                                           (preferredWrites < 0) ? null
+                                                                                                : preferredWrites,
+                                                                           keySchema,
+                                                                           valSchema,
+                                                                           description,
+                                                                           owners);
+
+                            newStoreDef = VoldemortUtils.getStoreDef(newStoreDefXml);
+
+                            if(!remoteStoreDef.equals(newStoreDef)) {
+                                // if we still get a fail, then we know that the
+                                // store defs don't match for reasons OTHER than
+                                // the key/value serializer
+                                throw new RuntimeException("Your store schema is identical, but the store definition does not match. Have: "
+                                                           + newStoreDef
+                                                           + "\nBut expected: "
+                                                           + remoteStoreDef);
+                            }
+                        } else {
+                            // if the key/value serializers are not equal (even
+                            // in java, not just json strings), then fail
+                            throw new RuntimeException("Your store definition does not match the store definition that is already in the cluster. Tried to resolve identical schemas between local and remote, but failed. Have: "
+                                                       + newStoreDef
+                                                       + "\nBut expected: "
+                                                       + remoteStoreDef);
+                        }
+                    }
+                }
+
+                foundStore = true;
+                break;
+            }
+        }
+
+        // if the store doesn't exist yet, create it
+        if(!foundStore) {
+            // New requirement - Make sure the user had description and owner
+            // specified
+            if(description.length() == 0) {
+                throw new RuntimeException("Description field missing in store definition. "
+                                           + "Please add \"push.store.description\" with a line describing your store");
+            }
+
+            if(owners.length() == 0) {
+                throw new RuntimeException("Owner field missing in store definition. "
+                                           + "Please add \"push.store.owners\" with value being comma-separated list of LinkedIn email ids");
+
+            }
+
+            log.info("Could not find store " + storeName
+                     + " on Voldemort. Adding it to all nodes for cluster " + url);
+            adminClient.addStore(newStoreDef);
+        }
+
+        // don't use newStoreDef because we want to ALWAYS use the JSON
+        // definition since the store builder assumes that you are using
+        // JsonTypeSerializer. This allows you to tweak your value/key store xml
+        // as you see fit, but still uses the json sequence file meta data to
+        // build the store.
+        StoreDefinition storeDef = VoldemortUtils.getStoreDef(VoldemortUtils.getStoreDefXml(storeName,
+                                                                                            replicationFactor,
+                                                                                            requiredReads,
+                                                                                            requiredWrites,
+                                                                                            (preferredReads < 0) ? null
+                                                                                                                : preferredReads,
+                                                                                            (preferredWrites < 0) ? null
+                                                                                                                 : preferredWrites,
+                                                                                            keySchema,
+                                                                                            valSchema,
+                                                                                            description,
+                                                                                            owners));
+        Cluster cluster = adminClient.getAdminClientCluster();
+
+        return Pair.create(storeDef, cluster);
+    }
+
+    /**
+     * Only build the store
+     * 
+     * @param cluster Cluster metadata
+     * @param storeDef Store definition metadata
+     * @param inputPath The input path where the current data is present
+     * @param outputPath The output location where we'll like to store our data
+     * @throws Exception
+     */
+    public void runBuildStore(Cluster cluster,
+                              StoreDefinition storeDef,
+                              Path inputPath,
+                              Path outputPath) throws Exception {
+        int chunkSize = props.getInt("build.chunk.size." + storeDef.getName(),
+                                     props.getInt("build.chunk.size", (int) 1024 * 1024 * 1024));
+        Path tempDir = new Path(props.getString("build.temp.dir", "/tmp/voldemort-build-temp-"
+                                                                  + new Random().nextLong()));
+
+        String keySelection = props.getString("build.key.selection." + storeDef.getName(),
+                                              props.getString("build.key.selection", null));
+        String valSelection = props.getString("build.value.selection." + storeDef.getName(),
+                                              props.getString("build.key.selection", null));
+
+        int numChunks = props.getInt("num.chunks." + storeDef.getName(),
+                                     props.getInt("num.chunks", -1));
+
+        CheckSumType checkSumType = CheckSum.fromString(props.getString("checksum.type",
+                                                                        CheckSum.toString(CheckSumType.MD5)));
+        boolean saveKeys = props.getBoolean("save.keys", true);
+        boolean reducerPerBucket = props.getBoolean("reducer.per.bucket", false);
+
+        new VoldemortStoreBuilderJob(this.getId() + "-build-store",
+                                     props,
+                                     new VoldemortStoreBuilderConf(storeDef.getReplicationFactor(),
+                                                                   chunkSize,
+                                                                   tempDir,
+                                                                   outputPath,
+                                                                   inputPath,
+                                                                   cluster,
+                                                                   Lists.newArrayList(storeDef),
+                                                                   storeDef.getName(),
+                                                                   keySelection,
+                                                                   valSelection,
+                                                                   null,
+                                                                   null,
+                                                                   checkSumType,
+                                                                   saveKeys,
+                                                                   reducerPerBucket,
+                                                                   numChunks)).run();
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortRollbackJob.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortRollbackJob.java
new file mode 100644
index 0000000000..bb22c1b1fd
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortRollbackJob.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.azkaban;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.log4j.Logger;
+
+import voldemort.VoldemortException;
+import voldemort.client.protocol.admin.AdminClient;
+import voldemort.client.protocol.admin.AdminClientConfig;
+import voldemort.cluster.Cluster;
+import voldemort.cluster.Node;
+import voldemort.store.readonly.mr.utils.VoldemortUtils;
+import voldemort.store.readonly.swapper.AdminStoreSwapper;
+import azkaban.common.jobs.AbstractJob;
+import azkaban.common.utils.Props;
+
+import com.google.common.collect.Maps;
+
+/**
+ * This job rolls back to [ current version - 1 ]. This may not always work
+ * since some folks may start using the version feature to put their own version
+ * number. But till that doesn't happen, this will back.
+ * 
+ */
+public class VoldemortRollbackJob extends AbstractJob {
+
+    private final Logger log;
+
+    private final Props props;
+
+    private List<String> storeNames;
+
+    private List<String> clusterUrls;
+
+    public VoldemortRollbackJob(String name, Props props) throws IOException {
+        super(name);
+        this.props = props;
+        this.log = Logger.getLogger(name);
+        this.storeNames = VoldemortUtils.getCommaSeparatedStringValues(props.getString("store.name"),
+                                                                       "store names");
+        this.clusterUrls = VoldemortUtils.getCommaSeparatedStringValues(props.getString("push.cluster"),
+                                                                        "cluster urls");
+
+    }
+
+    @Override
+    public void run() throws Exception {
+
+        // Go over every cluster and rollback one store at a time
+        for(String clusterUrl: clusterUrls) {
+
+            AdminClient adminClient = null;
+            ExecutorService service = null;
+            try {
+                service = Executors.newCachedThreadPool();
+                adminClient = new AdminClient(clusterUrl, new AdminClientConfig());
+                Cluster cluster = adminClient.getAdminClientCluster();
+                AdminStoreSwapper swapper = new AdminStoreSwapper(cluster,
+                                                                  service,
+                                                                  adminClient,
+                                                                  1000 * props.getInt("timeout.seconds",
+                                                                                      24 * 60 * 60),
+                                                                  true,
+                                                                  true);
+
+                // Get the current version for all stores on all nodes
+                Map<Integer, Map<String, Long>> previousVersions = Maps.newHashMap();
+                for(Node node: cluster.getNodes()) {
+                    Map<String, Long> currentVersion = adminClient.getROCurrentVersion(node.getId(),
+                                                                                       storeNames);
+
+                    log.info("Retrieving current version information on node " + node.getId());
+                    Map<String, Long> previousVersion = Maps.newHashMap();
+                    for(Entry<String, Long> entry: currentVersion.entrySet()) {
+                        previousVersion.put(entry.getKey(), entry.getValue() - 1);
+                        if(entry.getValue() == 0) {
+                            throw new VoldemortException("Store '" + entry.getKey() + "' on node "
+                                                         + node.getId()
+                                                         + " does not have version to rollback to");
+                        }
+                    }
+                    previousVersions.put(node.getId(), previousVersion);
+                }
+
+                // Swap one store at a time
+                for(String storeName: storeNames) {
+                    for(Node node: cluster.getNodes()) {
+                        log.info("Rolling back data on node " + node.getId() + " and for store "
+                                 + storeName + " to version "
+                                 + previousVersions.get(node.getId()).get(storeName));
+                        swapper.invokeRollback(storeName,
+                                               previousVersions.get(node.getId()).get(storeName));
+                        log.info("Successfully rolled back data on node " + node.getId()
+                                 + " and for store " + storeName);
+                    }
+                }
+            } finally {
+                if(service != null) {
+                    service.shutdownNow();
+                    service.awaitTermination(10, TimeUnit.SECONDS);
+                    service = null;
+                }
+                if(adminClient != null) {
+                    adminClient.stop();
+                    adminClient = null;
+                }
+            }
+        }
+    }
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortStoreBuilderJob.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortStoreBuilderJob.java
new file mode 100644
index 0000000000..e35e264456
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortStoreBuilderJob.java
@@ -0,0 +1,469 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.azkaban;
+
+import java.io.FileNotFoundException;
+import java.io.InputStreamReader;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.avro.mapred.AvroInputFormat;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
+
+import voldemort.cluster.Cluster;
+import voldemort.store.StoreDefinition;
+import voldemort.store.readonly.checksum.CheckSum;
+import voldemort.store.readonly.checksum.CheckSum.CheckSumType;
+import voldemort.store.readonly.mr.AvroStoreBuilderMapper;
+import voldemort.store.readonly.mr.HadoopStoreBuilder;
+import voldemort.store.readonly.mr.VoldemortStoreBuilderMapper;
+import voldemort.store.readonly.mr.serialization.JsonSequenceFileInputFormat;
+import voldemort.xml.ClusterMapper;
+import voldemort.xml.StoreDefinitionsMapper;
+import azkaban.common.utils.Props;
+
+/**
+ * Build a voldemort store from input data.
+ * 
+ * @author jkreps
+ * 
+ */
+public class VoldemortStoreBuilderJob extends AbstractHadoopJob {
+
+    private VoldemortStoreBuilderConf conf;
+    private boolean isAvro;
+
+    public VoldemortStoreBuilderJob(String name, Props props) throws Exception {
+        super(name, props);
+        this.conf = new VoldemortStoreBuilderConf(createJobConf(VoldemortStoreBuilderMapper.class),
+                                                  props);
+        this.isAvro = false;
+    }
+
+    public VoldemortStoreBuilderJob(String name, Props props, VoldemortStoreBuilderConf conf)
+                                                                                             throws FileNotFoundException {
+        super(name, props);
+        this.conf = conf;
+        this.isAvro = false;
+    }
+
+    public VoldemortStoreBuilderJob(String name, Props props, boolean isAvro) throws Exception {
+        super(name, props);
+        this.conf = new VoldemortStoreBuilderConf(createJobConf(VoldemortStoreBuilderMapper.class),
+                                                  props);
+        this.isAvro = isAvro;
+    }
+
+    public VoldemortStoreBuilderJob(String name,
+                                    Props props,
+                                    VoldemortStoreBuilderConf conf,
+                                    boolean isAvro) throws FileNotFoundException {
+        super(name, props);
+        this.conf = conf;
+        this.isAvro = isAvro;
+    }
+
+    public static final class VoldemortStoreBuilderConf {
+
+        private int replicationFactor;
+        private int chunkSize;
+        private Path tempDir;
+        private Path outputDir;
+        private Path inputPath;
+        private Cluster cluster;
+        private List<StoreDefinition> storeDefs;
+        private String storeName;
+        private String keySelection;
+        private String valSelection;
+        private String keyTrans;
+        private String valTrans;
+        private CheckSumType checkSumType;
+        private boolean saveKeys;
+        private boolean reducerPerBucket;
+        private int numChunks = -1;
+
+        private String recSchema;
+        private String keySchema;
+        private String valSchema;
+
+        private String keyField;
+        private String valueField;
+
+        public VoldemortStoreBuilderConf(int replicationFactor,
+                                         int chunkSize,
+                                         Path tempDir,
+                                         Path outputDir,
+                                         Path inputPath,
+                                         Cluster cluster,
+                                         List<StoreDefinition> storeDefs,
+                                         String storeName,
+                                         String keySelection,
+                                         String valSelection,
+                                         String keyTrans,
+                                         String valTrans,
+                                         CheckSumType checkSumType,
+                                         boolean saveKeys,
+                                         boolean reducerPerBucket,
+                                         int numChunks) {
+            this.replicationFactor = replicationFactor;
+            this.chunkSize = chunkSize;
+            this.tempDir = tempDir;
+            this.outputDir = outputDir;
+            this.inputPath = inputPath;
+            this.cluster = cluster;
+            this.storeDefs = storeDefs;
+            this.storeName = storeName;
+            this.keySelection = keySelection;
+            this.valSelection = valSelection;
+            this.keyTrans = keyTrans;
+            this.valTrans = valTrans;
+            this.checkSumType = checkSumType;
+            this.saveKeys = saveKeys;
+            this.reducerPerBucket = reducerPerBucket;
+            this.numChunks = numChunks;
+        }
+
+        // requires job conf in order to get files from the filesystem
+        public VoldemortStoreBuilderConf(JobConf configuration, Props props) throws Exception {
+            this(props.getInt("replication.factor", 2),
+                 props.getInt("chunk.size", 1024 * 1024 * 1024),
+                 new Path(props.getString("temp.dir",
+                                          "/tmp/vold-build-and-push-" + new Random().nextLong())),
+                 new Path(props.getString("output.dir")),
+                 new Path(props.getString("input.path")),
+                 new ClusterMapper().readCluster(new InputStreamReader(new Path(props.getString("cluster.xml")).getFileSystem(configuration)
+                                                                                                               .open(new Path(props.getString("cluster.xml"))))),
+                 new StoreDefinitionsMapper().readStoreList(new InputStreamReader(new Path(props.getString("stores.xml")).getFileSystem(configuration)
+                                                                                                                         .open(new Path(props.getString("stores.xml"))))),
+                 props.getString("store.name"),
+                 props.getString("key.selection", null),
+                 props.getString("value.selection", null),
+                 props.getString("key.transformation.class", null),
+                 props.getString("value.transformation.class", null),
+                 CheckSum.fromString(props.getString("checksum.type",
+                                                     CheckSum.toString(CheckSumType.MD5))),
+                 props.getBoolean("save.keys", true),
+                 props.getBoolean("reducer.per.bucket", false),
+                 props.getInt("num.chunks", -1));
+        }
+
+        // new constructor to include the key val and record schema for Avro job
+
+        public VoldemortStoreBuilderConf(int replicationFactor,
+                                         int chunkSize,
+                                         Path tempDir,
+                                         Path outputDir,
+                                         Path inputPath,
+                                         Cluster cluster,
+                                         List<StoreDefinition> storeDefs,
+                                         String storeName,
+                                         String keySelection,
+                                         String valSelection,
+                                         String keyTrans,
+                                         String valTrans,
+                                         CheckSumType checkSumType,
+                                         boolean saveKeys,
+                                         boolean reducerPerBucket,
+                                         int numChunks,
+                                         String keyField,
+                                         String valueField,
+                                         String recSchema,
+                                         String keySchema,
+                                         String valSchema) {
+            this.replicationFactor = replicationFactor;
+            this.chunkSize = chunkSize;
+            this.tempDir = tempDir;
+            this.outputDir = outputDir;
+            this.inputPath = inputPath;
+            this.cluster = cluster;
+            this.storeDefs = storeDefs;
+            this.storeName = storeName;
+            this.keySelection = keySelection;
+            this.valSelection = valSelection;
+            this.keyTrans = keyTrans;
+            this.valTrans = valTrans;
+            this.checkSumType = checkSumType;
+            this.saveKeys = saveKeys;
+            this.reducerPerBucket = reducerPerBucket;
+            this.numChunks = numChunks;
+
+            this.keyField = keyField;
+            this.valueField = valueField;
+            this.recSchema = recSchema;
+            this.keySchema = keySchema;
+            this.valSchema = valSchema;
+
+        }
+
+        // requires job conf in order to get files from the filesystem
+        public VoldemortStoreBuilderConf(JobConf configuration,
+                                         Props props,
+                                         String keyField,
+                                         String valueField,
+                                         String recSchema,
+                                         String keySchema,
+                                         String valSchema) throws Exception {
+            this(props.getInt("replication.factor", 2),
+                 props.getInt("chunk.size", 1024 * 1024 * 1024),
+                 new Path(props.getString("temp.dir",
+                                          "/tmp/vold-build-and-push-" + new Random().nextLong())),
+                 new Path(props.getString("output.dir")),
+                 new Path(props.getString("input.path")),
+                 new ClusterMapper().readCluster(new InputStreamReader(new Path(props.getString("cluster.xml")).getFileSystem(configuration)
+                                                                                                               .open(new Path(props.getString("cluster.xml"))))),
+                 new StoreDefinitionsMapper().readStoreList(new InputStreamReader(new Path(props.getString("stores.xml")).getFileSystem(configuration)
+                                                                                                                         .open(new Path(props.getString("stores.xml"))))),
+                 props.getString("store.name"),
+                 props.getString("key.selection", null),
+                 props.getString("value.selection", null),
+                 props.getString("key.transformation.class", null),
+                 props.getString("value.transformation.class", null),
+                 CheckSum.fromString(props.getString("checksum.type",
+                                                     CheckSum.toString(CheckSumType.MD5))),
+                 props.getBoolean("save.keys", true),
+                 props.getBoolean("reducer.per.bucket", false),
+                 props.getInt("num.chunks", -1),
+                 keyField,
+                 valueField,
+                 recSchema,
+                 keySchema,
+                 valSchema);
+        }
+
+        public int getReplicationFactor() {
+            return replicationFactor;
+        }
+
+        public int getChunkSize() {
+            return chunkSize;
+        }
+
+        public Path getTempDir() {
+            return tempDir;
+        }
+
+        public Path getOutputDir() {
+            return outputDir;
+        }
+
+        public Path getInputPath() {
+            return inputPath;
+        }
+
+        public String getStoreName() {
+            return storeName;
+        }
+
+        public String getKeySelection() {
+            return keySelection;
+        }
+
+        public String getValSelection() {
+            return valSelection;
+        }
+
+        public String getKeyTrans() {
+            return keyTrans;
+        }
+
+        public String getValTrans() {
+            return valTrans;
+        }
+
+        public Cluster getCluster() {
+            return cluster;
+        }
+
+        public List<StoreDefinition> getStoreDefs() {
+            return storeDefs;
+        }
+
+        public CheckSumType getCheckSumType() {
+            return checkSumType;
+        }
+
+        public boolean getSaveKeys() {
+            return saveKeys;
+        }
+
+        public boolean getReducerPerBucket() {
+            return reducerPerBucket;
+        }
+
+        public int getNumChunks() {
+            return numChunks;
+        }
+
+        public String getRecSchema() {
+            return recSchema;
+        }
+
+        public void setRecSchema(String recSchema) {
+            this.recSchema = recSchema;
+        }
+
+        public String getKeySchema() {
+            return keySchema;
+        }
+
+        public void setKeySchema(String keySchema) {
+            this.keySchema = keySchema;
+        }
+
+        public String getValSchema() {
+            return valSchema;
+        }
+
+        public void setValSchema(String valSchema) {
+            this.valSchema = valSchema;
+        }
+
+        public String getValueField() {
+            return valueField;
+        }
+
+        public void setValueField(String valueField) {
+            this.valueField = valueField;
+        }
+
+        public String getKeyField() {
+            return keyField;
+        }
+
+        public void setKeyField(String keyField) {
+            this.keyField = keyField;
+        }
+
+    }
+
+    @Override
+    public void run() throws Exception {
+        JobConf configuration = this.createJobConf(VoldemortStoreBuilderMapper.class);
+
+        // Only if its a avro job we supply some additional fields
+        // for the key value schema of the avro record
+        if(isAvro) {
+            String recSchema = conf.getRecSchema();
+            String keySchema = conf.getKeySchema();
+            String valSchema = conf.getValSchema();
+
+            String keyField = conf.getKeyField();
+            String valueField = conf.getValueField();
+
+            configuration.set("avro.rec.schema", recSchema);
+            configuration.set("avro.key.schema", keySchema);
+            configuration.set("avro.val.schema", valSchema);
+
+            configuration.set("avro.key.field", keyField);
+            configuration.set("avro.value.field", valueField);
+        }
+        int chunkSize = conf.getChunkSize();
+        Path tempDir = conf.getTempDir();
+        Path outputDir = conf.getOutputDir();
+        Path inputPath = conf.getInputPath();
+        Cluster cluster = conf.getCluster();
+        List<StoreDefinition> storeDefs = conf.getStoreDefs();
+        String storeName = conf.getStoreName();
+        CheckSumType checkSumType = conf.getCheckSumType();
+        boolean saveKeys = conf.getSaveKeys();
+        boolean reducerPerBucket = conf.getReducerPerBucket();
+
+        StoreDefinition storeDef = null;
+        for(StoreDefinition def: storeDefs)
+            if(storeName.equals(def.getName()))
+                storeDef = def;
+        if(storeDef == null)
+            throw new IllegalArgumentException("Store '" + storeName + "' not found.");
+
+        FileSystem fs = outputDir.getFileSystem(configuration);
+        if(fs.exists(outputDir)) {
+            info("Deleting previous output in " + outputDir + " for building store " + storeName);
+            fs.delete(outputDir, true);
+        }
+
+        HadoopStoreBuilder builder = null;
+
+        if(isAvro) {
+
+            if(conf.getNumChunks() == -1) {
+                builder = new HadoopStoreBuilder(configuration,
+
+                                                 AvroStoreBuilderMapper.class,
+                                                 (Class<? extends InputFormat>) AvroInputFormat.class,
+                                                 cluster,
+                                                 storeDef,
+                                                 chunkSize,
+                                                 tempDir,
+                                                 outputDir,
+                                                 inputPath,
+                                                 checkSumType,
+                                                 saveKeys,
+                                                 reducerPerBucket);
+            } else {
+                builder = new HadoopStoreBuilder(configuration,
+                                                 AvroStoreBuilderMapper.class,
+                                                 (Class<? extends InputFormat>) AvroInputFormat.class,
+                                                 cluster,
+                                                 storeDef,
+                                                 tempDir,
+                                                 outputDir,
+                                                 inputPath,
+                                                 checkSumType,
+                                                 saveKeys,
+                                                 reducerPerBucket,
+                                                 conf.getNumChunks());
+            }
+
+            builder.buildAvro();
+            return;
+        }
+
+        if(conf.getNumChunks() == -1) {
+            builder = new HadoopStoreBuilder(configuration,
+                                             VoldemortStoreBuilderMapper.class,
+                                             JsonSequenceFileInputFormat.class,
+                                             cluster,
+                                             storeDef,
+                                             chunkSize,
+                                             tempDir,
+                                             outputDir,
+                                             inputPath,
+                                             checkSumType,
+                                             saveKeys,
+                                             reducerPerBucket);
+        } else {
+            builder = new HadoopStoreBuilder(configuration,
+                                             VoldemortStoreBuilderMapper.class,
+                                             JsonSequenceFileInputFormat.class,
+                                             cluster,
+                                             storeDef,
+                                             tempDir,
+                                             outputDir,
+                                             inputPath,
+                                             checkSumType,
+                                             saveKeys,
+                                             reducerPerBucket,
+                                             conf.getNumChunks());
+        }
+
+        builder.build();
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortSwapJob.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortSwapJob.java
new file mode 100644
index 0000000000..f4e51b5dd1
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortSwapJob.java
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.azkaban;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+
+import voldemort.client.protocol.admin.AdminClient;
+import voldemort.client.protocol.admin.AdminClientConfig;
+import voldemort.cluster.Cluster;
+import voldemort.store.readonly.mr.utils.HadoopUtils;
+import voldemort.store.readonly.swapper.AdminStoreSwapper;
+import azkaban.common.jobs.AbstractJob;
+import azkaban.common.utils.Props;
+
+/*
+ * Call voldemort to swap the current store for the specified store
+ */
+public class VoldemortSwapJob extends AbstractJob {
+
+    private final Props _props;
+    private VoldemortSwapConf swapConf;
+
+    public VoldemortSwapJob(String id, Props props) throws IOException {
+        super(id);
+        _props = props;
+        swapConf = new VoldemortSwapConf(_props);
+    }
+
+    public VoldemortSwapJob(String id, Props props, VoldemortSwapConf conf) throws IOException {
+        super(id);
+        _props = props;
+        swapConf = conf;
+    }
+
+    public static final class VoldemortSwapConf {
+
+        private Cluster cluster;
+        private String dataDir;
+        private String storeName;
+        private int httpTimeoutMs;
+        private long pushVersion;
+        private int maxBackoffDelayMs = 60 * 1000;
+        private boolean rollback = false;
+
+        public VoldemortSwapConf(Props props) throws IOException {
+            this(HadoopUtils.readCluster(props.getString("cluster.xml"), new Configuration()),
+                 props.getString("data.dir"),
+                 props.get("store.name"),
+                 1000 * props.getInt("http.timeout.seconds", 24 * 60 * 60), // for
+                                                                            // backwards
+                 // compatibility
+                 // http timeout =
+                 // admin client
+                 // timeout
+                 props.getLong("push.version", -1L));
+        }
+
+        public VoldemortSwapConf(Cluster cluster,
+                                 String dataDir,
+                                 String storeName,
+                                 int httpTimeoutMs,
+                                 long pushVersion,
+                                 int maxBackoffDelayMs,
+                                 boolean rollback) {
+            this.cluster = cluster;
+            this.dataDir = dataDir;
+            this.storeName = storeName;
+            this.httpTimeoutMs = httpTimeoutMs;
+            this.pushVersion = pushVersion;
+            this.maxBackoffDelayMs = maxBackoffDelayMs;
+            this.rollback = rollback;
+        }
+
+        public VoldemortSwapConf(Cluster cluster,
+                                 String dataDir,
+                                 String storeName,
+                                 int httpTimeoutMs,
+                                 long pushVersion) {
+            this.cluster = cluster;
+            this.dataDir = dataDir;
+            this.storeName = storeName;
+            this.httpTimeoutMs = httpTimeoutMs;
+            this.pushVersion = pushVersion;
+        }
+
+        public Cluster getCluster() {
+            return cluster;
+        }
+
+        public String getDataDir() {
+            return dataDir;
+        }
+
+        public String getStoreName() {
+            return storeName;
+        }
+
+        public int getHttpTimeoutMs() {
+            return httpTimeoutMs;
+        }
+
+        public long getPushVersion() {
+            return pushVersion;
+        }
+
+        public int getMaxBackoffDelayMs() {
+            return maxBackoffDelayMs;
+        }
+
+        public boolean getRollback() {
+            return rollback;
+        }
+    }
+
+    public void run() throws Exception {
+        String dataDir = swapConf.getDataDir();
+        String storeName = swapConf.getStoreName();
+        int httpTimeoutMs = swapConf.getHttpTimeoutMs();
+        long pushVersion = swapConf.getPushVersion();
+        Cluster cluster = swapConf.getCluster();
+        ExecutorService executor = Executors.newCachedThreadPool();
+
+        // Read the hadoop configuration settings
+        JobConf conf = new JobConf();
+        Path dataPath = new Path(dataDir);
+        dataDir = dataPath.makeQualified(FileSystem.get(conf)).toString();
+
+        /*
+         * Set the protocol according to config: webhdfs if its enabled
+         * Otherwise use hftp.
+         */
+        Configuration hadoopConfig = new Configuration();
+        String protocolName = hadoopConfig.get("dfs.webhdfs.enabled");
+        String protocolPort = "";
+        if(hadoopConfig.get("dfs.http.address").split(":").length >= 2)
+            protocolPort = hadoopConfig.get("dfs.http.address").split(":")[1];
+        protocolName = (protocolName == null) ? "hftp" : "webhdfs";
+
+        /*
+         * Replace the default protocol and port with the one derived as above
+         */
+        String existingProtocol = "";
+        String existingPort = "";
+        String[] pathComponents = dataDir.split(":");
+        if(pathComponents.length >= 3) {
+            existingProtocol = pathComponents[0];
+            existingPort = pathComponents[2].split("/")[0];
+        }
+        info("Existing protocol = " + existingProtocol + " and port = " + existingPort);
+        if(protocolName.length() > 0 && protocolPort.length() > 0) {
+            dataDir = dataDir.replaceFirst(existingProtocol, protocolName);
+            dataDir = dataDir.replaceFirst(existingPort, protocolPort);
+        }
+        info("dfs.webhdfs.enabled = " + hadoopConfig.get("dfs.webhdfs.enabled")
+             + " and new protocol = " + protocolName + " and port = " + protocolPort);
+
+        // Create admin client
+        AdminClient client = new AdminClient(cluster,
+                                             new AdminClientConfig().setMaxConnectionsPerNode(cluster.getNumberOfNodes())
+                                                                    .setAdminConnectionTimeoutSec(httpTimeoutMs / 1000)
+                                                                    .setMaxBackoffDelayMs(swapConf.getMaxBackoffDelayMs()));
+
+        if(pushVersion == -1L) {
+
+            // Need to retrieve max version
+            ArrayList<String> stores = new ArrayList<String>();
+            stores.add(storeName);
+            Map<String, Long> pushVersions = client.getROMaxVersion(stores);
+
+            if(pushVersions == null || !pushVersions.containsKey(storeName)) {
+                throw new RuntimeException("Push version could not be determined for store "
+                                           + storeName);
+            }
+            pushVersion = pushVersions.get(storeName);
+            pushVersion++;
+        }
+
+        // do the swap
+        info("Initiating swap of " + storeName + " with dataDir:" + dataDir);
+        AdminStoreSwapper swapper = new AdminStoreSwapper(cluster,
+                                                          executor,
+                                                          client,
+                                                          httpTimeoutMs,
+                                                          swapConf.getRollback(),
+                                                          swapConf.getRollback());
+        swapper.swapStoreData(storeName, dataDir, pushVersion);
+        info("Swap complete.");
+        executor.shutdownNow();
+        executor.awaitTermination(10, TimeUnit.SECONDS);
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortSwapperUtils.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortSwapperUtils.java
new file mode 100644
index 0000000000..6f87b5c406
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/azkaban/VoldemortSwapperUtils.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.azkaban;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+
+import voldemort.cluster.Node;
+
+@Deprecated
+public class VoldemortSwapperUtils {
+
+    public static void doSwap(String storeName, Node node, String destinationDir)
+            throws IOException {
+        // construct data operation = swap
+        String data = URLEncoder.encode("operation", "UTF-8") + "="
+                      + URLEncoder.encode("swap", "UTF-8");
+        // add index = indexFileName
+        data += "&" + URLEncoder.encode("index", "UTF-8") + "="
+                + URLEncoder.encode(getIndexDestinationFile(node.getId(), destinationDir), "UTF-8");
+        // add data = dataFileName
+        data += "&" + URLEncoder.encode("data", "UTF-8") + "="
+                + URLEncoder.encode(getDataDestinationFile(node.getId(), destinationDir), "UTF-8");
+        // add store= storeName
+        data += "&" + URLEncoder.encode("store", "UTF-8") + "="
+                + URLEncoder.encode(storeName, "UTF-8");
+
+        // Send data
+        URL url = new URL("http://" + node.getHost() + ":" + node.getHttpPort() + "/read-only/mgmt");
+        System.out.println("swapping node:" + node.getId() + " with url:" + url.toString()
+                           + " data:" + data);
+        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
+        connection.setRequestMethod("POST");
+        connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
+
+        connection.setRequestProperty("Content-Length",
+                                      "" + Integer.toString(data.getBytes().length));
+        connection.setRequestProperty("Content-Language", "en-US");
+
+        connection.setUseCaches(false);
+        connection.setDoInput(true);
+        connection.setDoOutput(true);
+
+        OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream());
+        wr.write(data);
+        wr.flush();
+        wr.close();
+        // Get Response
+        InputStream is = connection.getInputStream();
+        BufferedReader rd = new BufferedReader(new InputStreamReader(is));
+        String line;
+        StringBuffer response = new StringBuffer();
+        while((line = rd.readLine()) != null) {
+            response.append(line);
+            response.append('\r');
+        }
+        System.out.println("doSwap Completed for " + node + "  Response:" + response.toString());
+        rd.close();
+    }
+
+    public static String getIndexDestinationFile(int nodeId, String destinationDir) {
+        return destinationDir + "/" + "node-" + nodeId + ".index";
+    }
+
+    public static String getDataDestinationFile(int nodeId, String destinationDir) {
+        return destinationDir + "/" + "node-" + nodeId + ".data";
+    }
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonConfigurable.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonConfigurable.java
new file mode 100644
index 0000000000..becba118cd
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonConfigurable.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.serialization;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+
+import voldemort.serialization.json.JsonTypeSerializer;
+import azkaban.common.utils.Utils;
+
+/**
+ * Base class for a JsonMapper or JsonReducer with a few basic fields
+ * 
+ * @author jkreps
+ * 
+ */
+public abstract class JsonConfigurable implements JobConfigurable, Closeable {
+
+    private volatile boolean _isConfigured = false;
+    private JsonTypeSerializer _inputKeySerializer;
+    private JsonTypeSerializer _inputValueSerializer;
+    private JsonTypeSerializer _outputKeySerializer;
+    private JsonTypeSerializer _outputValueSerializer;
+
+    public void close() throws IOException {
+
+    }
+
+    public JsonTypeSerializer getInputKeySerializer() {
+        return _inputKeySerializer;
+    }
+
+    public JsonTypeSerializer getInputValueSerializer() {
+        return _inputValueSerializer;
+    }
+
+    public JsonTypeSerializer getOutputKeySerializer() {
+        return _outputKeySerializer;
+    }
+
+    public JsonTypeSerializer getOutputValueSerializer() {
+        return _outputValueSerializer;
+    }
+
+    protected void setInputKeySerializer(JsonTypeSerializer inputKeySerializer) {
+        _inputKeySerializer = Utils.nonNull(inputKeySerializer);
+    }
+
+    protected void setInputValueSerializer(JsonTypeSerializer inputValueSerializer) {
+        _inputValueSerializer = Utils.nonNull(inputValueSerializer);
+    }
+
+    protected void setOutputKeySerializer(JsonTypeSerializer outputKeySerializer) {
+        _outputKeySerializer = Utils.nonNull(outputKeySerializer);
+    }
+
+    protected void setOutputValueSerializer(JsonTypeSerializer outputValueSerializer) {
+        _outputValueSerializer = Utils.nonNull(outputValueSerializer);
+    }
+
+    protected void setConfigured(boolean isConfigured) {
+        _isConfigured = isConfigured;
+    }
+
+    public boolean isConfigured() {
+        return _isConfigured;
+    }
+
+    protected JsonTypeSerializer getSchemaFromJob(JobConf conf, String key) {
+        if(conf.get(key) == null)
+            throw new IllegalArgumentException("Missing required parameter '" + key + "' on job.");
+        return new JsonTypeSerializer(conf.get(key));
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonDeserializerComparator.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonDeserializerComparator.java
new file mode 100644
index 0000000000..50cb0243c1
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonDeserializerComparator.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.serialization;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.InputBuffer;
+import org.apache.hadoop.io.RawComparator;
+
+import voldemort.serialization.SerializationException;
+import voldemort.serialization.json.JsonTypeSerializer;
+
+/**
+ * A hadoop RawComparator that deserializes first. Usefull for sorting JSON
+ * objects
+ * 
+ * @author jay
+ * 
+ */
+public class JsonDeserializerComparator implements RawComparator<BytesWritable>, Configurable {
+
+    /**
+     * Should be same as BytesWritable.Length
+     */
+    private int LENGTH_BYTES = 4;
+
+    private Configuration config;
+    private InputBuffer buffer = new InputBuffer();
+    private DataInputStream dataInput = new DataInputStream(buffer);
+    private JsonTypeSerializer serializer;
+
+    public Configuration getConf() {
+        return this.config;
+    }
+
+    public void setConf(Configuration config) {
+        if(config.get("json.schema") == null)
+            throw new IllegalArgumentException("No schema has been set!");
+        this.serializer = new JsonTypeSerializer(config.get("json.schema"));
+    }
+
+    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+        return compareBytes(b1,
+                            s1 + LENGTH_BYTES,
+                            l1 - LENGTH_BYTES,
+                            b2,
+                            s2 + LENGTH_BYTES,
+                            l2 - LENGTH_BYTES);
+    }
+
+    public int compareBytes(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+        if(serializer == null)
+            throw new SerializationException("No serializer has been set!");
+        try {
+            buffer.reset(b1, s1, l1);
+            Object key1 = serializer.toObject(dataInput);
+
+            buffer.reset(b2, s2, l2);
+            Object key2 = serializer.toObject(dataInput);
+
+            if(key1 instanceof Comparable) {
+                return this.compareSerializedObjects(key1, key2);
+            } else {
+                return customCompare(key1, key2, serializer);
+            }
+        } catch(IOException e) {
+            throw new SerializationException(e);
+        }
+    }
+
+    public int customCompare(Object key1, Object key2, JsonTypeSerializer serializer) {
+        byte[] b1 = serializer.toBytes(key1);
+        byte[] b2 = serializer.toBytes(key2);
+
+        return BytesWritable.Comparator.compareBytes(b1, 0, b1.length, b2, 0, b2.length);
+    }
+
+    public int compare(BytesWritable o1, BytesWritable o2) {
+        return this.compareBytes(o1.getBytes(), 0, o1.getLength(), o2.getBytes(), 0, o2.getLength());
+    }
+
+    public int compareSerializedObjects(Object o1, Object o2) {
+        if(o1 == o2)
+            return 0;
+        else if(o1 == null)
+            return -1;
+        else if(o2 == null)
+            return 1;
+        else if(o1.getClass() != o2.getClass())
+            throw new IllegalArgumentException("Attempt to compare two items of different classes: "
+                                               + o1.getClass() + " and " + o2.getClass());
+        else if(o1 instanceof Comparable)
+            return ((Comparable) o1).compareTo(o2);
+
+        throw new IllegalArgumentException("Incomparable object type!");
+    }
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonMapper.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonMapper.java
new file mode 100644
index 0000000000..038519c70b
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonMapper.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.serialization;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * A Mapper that handles JSON serialization
+ * 
+ * @author jkreps
+ * 
+ */
+public abstract class JsonMapper extends JsonConfigurable implements
+        Mapper<BytesWritable, BytesWritable, BytesWritable, BytesWritable> {
+
+    public abstract void mapObjects(Object key,
+                                    Object value,
+                                    OutputCollector<Object, Object> output,
+                                    Reporter reporter) throws IOException;
+
+    public void configure(JobConf conf) {
+        setInputKeySerializer(getSchemaFromJob(conf, "mapper.input.key.schema"));
+        setInputValueSerializer(getSchemaFromJob(conf, "mapper.input.value.schema"));
+        setOutputKeySerializer(getSchemaFromJob(conf, "mapper.output.key.schema"));
+        setOutputValueSerializer(getSchemaFromJob(conf, "mapper.output.value.schema"));
+
+        // set comparator for input Key Schema
+        if(conf.getBoolean("use.json.comparator", false)) {
+            conf.setOutputKeyComparatorClass(JsonDeserializerComparator.class);
+            conf.set("json.schema", conf.get("mapper.output.key.schema"));
+        }
+        setConfigured(true);
+    }
+
+    @SuppressWarnings("unchecked")
+    public void map(BytesWritable key,
+                    BytesWritable value,
+                    OutputCollector<BytesWritable, BytesWritable> output,
+                    Reporter reporter) throws IOException {
+        if(!isConfigured())
+            throw new IllegalStateException("JsonMapper's configure method wasn't called.  Please make sure that super.configure() is called.");
+
+        mapObjects(getInputKeySerializer().toObject(key.get()),
+                   getInputValueSerializer().toObject(value.get()),
+                   getOutputCollector(output),
+                   reporter);
+    }
+
+    protected OutputCollector<Object, Object> getOutputCollector(OutputCollector<BytesWritable, BytesWritable> output) {
+        return new JsonOutputCollector<Object, Object>(output,
+                                                       getOutputKeySerializer(),
+                                                       getOutputValueSerializer());
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonOutputCollector.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonOutputCollector.java
new file mode 100644
index 0000000000..5309aeaec0
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonOutputCollector.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.serialization;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.OutputCollector;
+
+import voldemort.serialization.Serializer;
+import azkaban.common.utils.Utils;
+
+/**
+ * Output collector that handles JSON serialization
+ * 
+ * @author jkreps
+ * 
+ */
+public class JsonOutputCollector<K, V> implements OutputCollector<K, V> {
+
+    private final Serializer<Object> keySerializer;
+    private final Serializer<Object> valueSerializer;
+    private final OutputCollector<BytesWritable, BytesWritable> innerCollector;
+
+    public JsonOutputCollector(OutputCollector<BytesWritable, BytesWritable> innerCollector,
+                               Serializer<Object> keySerializer,
+                               Serializer<Object> valueSerializer) {
+        this.innerCollector = Utils.nonNull(innerCollector);
+        this.keySerializer = Utils.nonNull(keySerializer);
+        this.valueSerializer = Utils.nonNull(valueSerializer);
+    }
+
+    public void collect(K key, V value) throws IOException {
+        innerCollector.collect(new BytesWritable(keySerializer.toBytes(key)),
+                               new BytesWritable(valueSerializer.toBytes(value)));
+    }
+
+    public Serializer<Object> getKeySerializer() {
+        return keySerializer;
+    }
+
+    public Serializer<Object> getValueSerializer() {
+        return valueSerializer;
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonReducer.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonReducer.java
new file mode 100644
index 0000000000..fdde87b834
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonReducer.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.serialization;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import voldemort.serialization.Serializer;
+
+/**
+ * Base reducer class that uses JSON serialization
+ * 
+ * @author jkreps
+ * 
+ */
+public abstract class JsonReducer extends JsonConfigurable implements
+        Reducer<BytesWritable, BytesWritable, BytesWritable, BytesWritable> {
+
+    public void configure(JobConf conf) {
+        setInputKeySerializer(getSchemaFromJob(conf, "mapper.output.key.schema"));
+        setInputValueSerializer(getSchemaFromJob(conf, "mapper.output.value.schema"));
+        setOutputKeySerializer(getSchemaFromJob(conf, "reducer.output.key.schema"));
+        setOutputValueSerializer(getSchemaFromJob(conf, "reducer.output.value.schema"));
+
+        // set comparator for input Key Schema
+        if(conf.getBoolean("use.json.comparator", false)) {
+            conf.setOutputKeyComparatorClass(JsonDeserializerComparator.class);
+            conf.set("json.schema", conf.get("mapper.output.key.schema"));
+        }
+        setConfigured(true);
+    }
+
+    public abstract void reduceObjects(Object key,
+                                       Iterator<Object> values,
+                                       OutputCollector<Object, Object> collector,
+                                       Reporter reporter) throws IOException;
+
+    public void reduce(BytesWritable key,
+                       Iterator<BytesWritable> values,
+                       OutputCollector<BytesWritable, BytesWritable> output,
+                       Reporter reporter) throws IOException {
+        reduceObjects(getInputKeySerializer().toObject(key.get()),
+                      new TranslatingIterator(getInputValueSerializer(), values),
+                      getOutputCollector(output),
+                      reporter);
+    }
+
+    protected OutputCollector<Object, Object> getOutputCollector(OutputCollector<BytesWritable, BytesWritable> output) {
+        return new JsonOutputCollector<Object, Object>(output,
+                                                       getOutputKeySerializer(),
+                                                       getOutputValueSerializer());
+    }
+
+    private static class TranslatingIterator implements Iterator<Object> {
+
+        private final Serializer serializer;
+        private final Iterator<BytesWritable> inner;
+
+        public TranslatingIterator(Serializer serializer, Iterator<BytesWritable> inner) {
+            this.serializer = serializer;
+            this.inner = inner;
+        }
+
+        public boolean hasNext() {
+            return inner.hasNext();
+        }
+
+        public Object next() {
+            return serializer.toObject(inner.next().get());
+        }
+
+        public void remove() {
+            inner.remove();
+        }
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonSequenceFileInputFormat.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonSequenceFileInputFormat.java
new file mode 100644
index 0000000000..a1d0ccfd50
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonSequenceFileInputFormat.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.serialization;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.log4j.Logger;
+
+import voldemort.store.readonly.mr.utils.HadoopUtils;
+
+/**
+ * Extends {@link SequenceFileInputFormat} to support our JSON based
+ * serialization format.
+ * 
+ * Reads in a SequenceFile Read out the schema from Metadata and save it as keys
+ * in configuration.
+ * <ul>
+ * <li>"mapper.input.key.schema"</li>
+ * <li>"mapper.input.value.schema"</li>
+ * </ul>
+ * 
+ * @author bbansal
+ * 
+ */
+public class JsonSequenceFileInputFormat extends
+        SequenceFileInputFormat<BytesWritable, BytesWritable> {
+
+    protected static final Logger log = Logger.getLogger(JsonSequenceFileInputFormat.class.getName());
+
+    @Override
+    public RecordReader<BytesWritable, BytesWritable> getRecordReader(InputSplit split,
+                                                                      JobConf conf,
+                                                                      Reporter reporter)
+            throws IOException {
+        String inputPathString = ((FileSplit) split).getPath().toUri().getPath();
+        log.info("Input file path:" + inputPathString);
+        Path inputPath = new Path(inputPathString);
+
+        SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf),
+                                                             inputPath,
+                                                             conf);
+        SequenceFile.Metadata meta = reader.getMetadata();
+
+        try {
+            Text keySchema = meta.get(new Text("key.schema"));
+            Text valueSchema = meta.get(new Text("value.schema"));
+
+            if(0 == keySchema.getLength() || 0 == valueSchema.getLength()) {
+                throw new Exception();
+            }
+
+            // update Joboconf with schemas
+            conf.set("mapper.input.key.schema", keySchema.toString());
+            conf.set("mapper.input.value.schema", valueSchema.toString());
+        } catch(Exception e) {
+            throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n");
+        }
+        return super.getRecordReader(split, conf, reporter);
+    }
+
+    @Override
+    protected FileStatus[] listStatus(JobConf job) throws IOException {
+        String dirs = job.get("mapred.input.dir", "");
+        String[] list = StringUtils.split(dirs);
+
+        List<FileStatus> status = new ArrayList<FileStatus>();
+        for(int i = 0; i < list.length; i++) {
+            status.addAll(getAllSubFileStatus(job, new Path(list[i])));
+        }
+
+        return status.toArray(new FileStatus[0]);
+    }
+
+    private List<FileStatus> getAllSubFileStatus(JobConf inputConf, Path filterMemberPath)
+            throws IOException {
+        List<FileStatus> list = new ArrayList<FileStatus>();
+
+        FileSystem fs = filterMemberPath.getFileSystem(inputConf);
+        FileStatus[] subFiles = fs.listStatus(filterMemberPath);
+
+        if(null != subFiles) {
+            if(fs.isDirectory(filterMemberPath)) {
+                for(FileStatus subFile: subFiles) {
+                    if(!HadoopUtils.shouldPathBeIgnored(subFile.getPath())) {
+                        list.addAll(getAllSubFileStatus(inputConf, subFile.getPath()));
+                    }
+                }
+            } else {
+                if(subFiles.length > 0 && !HadoopUtils.shouldPathBeIgnored(subFiles[0].getPath())) {
+                    list.add(subFiles[0]);
+                }
+            }
+        }
+
+        return list;
+    }
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonSequenceFileOutputFormat.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonSequenceFileOutputFormat.java
new file mode 100644
index 0000000000..b8b1f13dc9
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/serialization/JsonSequenceFileOutputFormat.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.serialization;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import voldemort.serialization.json.JsonTypeDefinition;
+
+/**
+ * Copy of hadoop's SequenceFileOutputFormat modified to set the schema as
+ * metadata on output files
+ * 
+ * @author jkreps
+ * 
+ */
+public class JsonSequenceFileOutputFormat extends
+        SequenceFileOutputFormat<BytesWritable, BytesWritable> {
+
+    public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(FileSystem ignored,
+                                                                      JobConf job,
+                                                                      String name,
+                                                                      Progressable progress)
+            throws IOException {
+
+        // Shamelessly copy in hadoop code to allow us to set the metadata with
+        // our schema
+
+        // get the path of the temporary output file
+        Path file = FileOutputFormat.getTaskOutputPath(job, name);
+
+        FileSystem fs = file.getFileSystem(job);
+        CompressionType compressionType = CompressionType.BLOCK;
+        // find the right codec
+        Class<?> codecClass = getOutputCompressorClass(job, DefaultCodec.class);
+        CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, job);
+
+        // set the schema metadata
+        /* begin jays code */
+        SequenceFile.Metadata meta = new SequenceFile.Metadata();
+        meta.set(new Text("key.schema"), new Text(getSchema("reducer.output.key.schema", job)));
+        meta.set(new Text("value.schema"), new Text(getSchema("reducer.output.value.schema", job)));
+
+        final SequenceFile.Writer out = SequenceFile.createWriter(fs,
+                                                                  job,
+                                                                  file,
+                                                                  job.getOutputKeyClass(),
+                                                                  job.getOutputValueClass(),
+                                                                  compressionType,
+                                                                  codec,
+                                                                  progress,
+                                                                  meta);
+        /* end jays code */
+
+        return new RecordWriter<BytesWritable, BytesWritable>() {
+
+            public void write(BytesWritable key, BytesWritable value) throws IOException {
+
+                out.append(key, value);
+            }
+
+            public void close(Reporter reporter) throws IOException {
+                out.close();
+            }
+        };
+    }
+
+    private String getSchema(String prop, JobConf conf) {
+        String schema = conf.get(prop);
+        if(schema == null)
+            throw new IllegalArgumentException("The required property '"
+                                               + prop
+                                               + "' is not defined in the JobConf for this Hadoop job.");
+        // check that it is a valid schema definition
+        JsonTypeDefinition.fromJson(schema);
+
+        return schema;
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/AvroUtils.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/AvroUtils.java
new file mode 100644
index 0000000000..c6aeaf5a72
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/AvroUtils.java
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.utils;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class AvroUtils {
+
+    /**
+     * Pull the schema off of the given file (if it is a file). If it is a
+     * directory, then pull schemas off of all subfiles, and check that they are
+     * all the same schema. If so, return that schema, otherwise throw an
+     * exception
+     * 
+     * @param fs The filesystem to use
+     * @param path The path from which to get the schema
+     * @param checkSameSchema boolean flag to check all files in directory for
+     *        same schema
+     * @return The schema of this file or all its subfiles
+     * @throws IOException
+     */
+
+    @SuppressWarnings({ "unchecked", "rawtypes" })
+    private static Schema getSchemaFromPath(FileSystem fs, Path path, boolean checkSameSchema) {
+
+        try {
+            if(fs.isFile(path)) {
+                BufferedInputStream inStream = null;
+                try {
+                    inStream = new BufferedInputStream(fs.open(path));
+                } catch(IOException e1) {
+                    // TODO Auto-generated catch block
+                    e1.printStackTrace();
+                }
+                GenericDatumReader datum = new GenericDatumReader();
+
+                DataFileStream reader = null;
+                try {
+                    reader = new DataFileStream(inStream, datum);
+                } catch(IOException e) {
+                    // TODO Auto-generated catch block
+                    e.printStackTrace();
+                }
+                return reader.getSchema();
+            } else {
+                FileStatus[] statuses = null;
+                if(fs.isDirectory(path)) {
+                    // this is a directory, get schemas from all subfiles
+                    statuses = fs.listStatus(path);
+                } else {
+                    // this is wildcard path, get schemas from all matched files
+                    statuses = fs.globStatus(path);
+                }
+                if(statuses == null || statuses.length == 0)
+                    throw new IllegalArgumentException("No files found in path pattern "
+                                                       + path.toUri().getPath());
+                List<Schema> schemas = new ArrayList<Schema>();
+                for(FileStatus status: statuses) {
+                    if(!HadoopUtils.shouldPathBeIgnored(status.getPath())) {
+                        if(!checkSameSchema) {
+                            // return first valid schema w/o checking all files
+                            return getSchemaFromPath(fs, status.getPath(), checkSameSchema);
+                        }
+                        schemas.add(getSchemaFromPath(fs, status.getPath(), checkSameSchema));
+                    }
+                }
+
+                // now check that all the schemas are the same
+                if(schemas.size() > 0) {
+                    Schema schema = schemas.get(0);
+                    for(int i = 1; i < schemas.size(); i++)
+                        if(!schema.equals(schemas.get(i)))
+                            throw new IllegalArgumentException("The directory "
+                                                               + path.toString()
+                                                               + " contains heterogenous schemas: found both '"
+                                                               + schema.toString() + "' and '"
+                                                               + schemas.get(i).toString() + "'.");
+
+                    return schema;
+                } else {
+                    throw new IllegalArgumentException("No Valid metadata file found for Path:"
+                                                       + path.toString());
+                }
+            }
+        } catch(Exception e) {
+            // logger.error("failed to get metadata from path:" + path);
+            throw new RuntimeException(e);
+        }
+
+    }
+
+    public static Schema getAvroSchemaFromPath(Path path) throws IOException {
+        return getSchemaFromPath(path.getFileSystem(new Configuration()), path, true);
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/EmailMessage.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/EmailMessage.java
new file mode 100644
index 0000000000..f8902cb9cf
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/EmailMessage.java
@@ -0,0 +1,203 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.utils;
+
+import java.io.File;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Date;
+import java.util.List;
+import java.util.Properties;
+
+import javax.activation.DataHandler;
+import javax.activation.DataSource;
+import javax.activation.FileDataSource;
+import javax.mail.BodyPart;
+import javax.mail.Message;
+import javax.mail.MessagingException;
+import javax.mail.Session;
+import javax.mail.Transport;
+import javax.mail.internet.InternetAddress;
+import javax.mail.internet.MimeBodyPart;
+import javax.mail.internet.MimeMessage;
+import javax.mail.internet.MimeMultipart;
+
+public class EmailMessage {
+
+    private List<String> _toAddress = new ArrayList<String>();
+    private String _mailHost;
+    private String _mailUser;
+    private String _mailPassword;
+    private String _subject;
+    private String _fromAddress;
+    private String _mimeType = "text/plain";
+    private StringBuffer _body = new StringBuffer();
+
+    private ArrayList<BodyPart> _attachments = new ArrayList<BodyPart>();
+
+    public EmailMessage() {
+        this("localhost", "", "");
+    }
+
+    public EmailMessage(String host, String user, String password) {
+        _mailUser = user;
+        _mailHost = host;
+        _mailPassword = password;
+    }
+
+    public EmailMessage setMailHost(String host) {
+        _mailHost = host;
+        return this;
+    }
+
+    public EmailMessage setMailUser(String user) {
+        _mailUser = user;
+        return this;
+    }
+
+    public EmailMessage setMailPassword(String password) {
+        _mailPassword = password;
+        return this;
+    }
+
+    public EmailMessage addAllToAddress(Collection<? extends String> addresses) {
+        _toAddress.addAll(addresses);
+        return this;
+    }
+
+    public EmailMessage addToAddress(String address) {
+        _toAddress.add(address);
+        return this;
+    }
+
+    public EmailMessage setSubject(String subject) {
+        _subject = subject;
+        return this;
+    }
+
+    public EmailMessage setFromAddress(String fromAddress) {
+        _fromAddress = fromAddress;
+        return this;
+    }
+
+    public EmailMessage addAttachment(File file) throws MessagingException {
+        return addAttachment(file.getName(), file);
+    }
+
+    public EmailMessage addAttachment(String attachmentName, File file) throws MessagingException {
+        BodyPart attachmentPart = new MimeBodyPart();
+        DataSource fileDataSource = new FileDataSource(file);
+        attachmentPart.setDataHandler(new DataHandler(fileDataSource));
+        attachmentPart.setFileName(attachmentName);
+        _attachments.add(attachmentPart);
+        return this;
+    }
+
+    public EmailMessage addAttachment(String attachmentName, InputStream stream)
+            throws MessagingException {
+        BodyPart attachmentPart = new MimeBodyPart(stream);
+        attachmentPart.setFileName(attachmentName);
+        _attachments.add(attachmentPart);
+        return this;
+    }
+
+    private void checkSettings() {
+        if(_mailHost == null) {
+            throw new RuntimeException("Mail host not set.");
+        }
+
+        if(_mailUser == null) {
+            throw new RuntimeException("Mail user not set.");
+        }
+
+        if(_mailPassword == null) {
+            throw new RuntimeException("Mail password not set.");
+        }
+
+        if(_fromAddress == null || _fromAddress.length() == 0) {
+            throw new RuntimeException("From address not set.");
+        }
+
+        if(_subject == null) {
+            throw new RuntimeException("Subject cannot be null");
+        }
+
+        if(_toAddress.size() == 0) {
+            throw new RuntimeException("T");
+        }
+    }
+
+    public void sendEmail() throws MessagingException {
+        checkSettings();
+        Properties props = new Properties();
+        props.setProperty("mail.transport.protocol", "smtp");
+        props.put("mail.host", _mailHost);
+        props.put("mail.user", _mailUser);
+        props.put("mail.password", _mailPassword);
+
+        Session session = Session.getDefaultInstance(props);
+        Message message = new MimeMessage(session);
+        InternetAddress from = new InternetAddress(_fromAddress, false);
+        message.setFrom(from);
+        for(String toAddr: _toAddress)
+            message.addRecipient(Message.RecipientType.TO, new InternetAddress(toAddr, false));
+        message.setSubject(_subject);
+        message.setSentDate(new Date());
+
+        if(_attachments.size() > 0) {
+            MimeMultipart multipart = new MimeMultipart("related");
+            // Add attachments
+            for(BodyPart part: _attachments) {
+                multipart.addBodyPart(part);
+            }
+
+            BodyPart messageBodyPart = new MimeBodyPart();
+            messageBodyPart.setContent(_body.toString(), _mimeType);
+            multipart.addBodyPart(messageBodyPart);
+
+            message.setContent(multipart);
+        } else {
+            message.setContent(_body.toString(), _mimeType);
+        }
+
+        Transport transport = session.getTransport();
+        transport.connect();
+        transport.sendMessage(message, message.getRecipients(Message.RecipientType.TO));
+        transport.close();
+    }
+
+    public void setBody(String body) {
+        setBody(body, _mimeType);
+    }
+
+    public void setBody(String body, String mimeType) {
+        _body = new StringBuffer(body);
+        _mimeType = mimeType;
+    }
+
+    public EmailMessage setMimeType(String mimeType) {
+        _mimeType = mimeType;
+        return this;
+    }
+
+    public EmailMessage println(Object str) {
+        _body.append(str);
+
+        return this;
+    }
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/HadoopUtils.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/HadoopUtils.java
new file mode 100644
index 0000000000..08c0b0da7f
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/HadoopUtils.java
@@ -0,0 +1,1001 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.utils;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
+import java.io.StringReader;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.net.URLDecoder;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.TreeMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.filecache.DistributedCache;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.log4j.Logger;
+import org.joda.time.Period;
+
+import voldemort.cluster.Cluster;
+import voldemort.serialization.json.JsonTypeDefinition;
+import voldemort.serialization.json.JsonTypes;
+import voldemort.store.StoreDefinition;
+import voldemort.utils.ByteUtils;
+import voldemort.xml.ClusterMapper;
+import voldemort.xml.StoreDefinitionsMapper;
+import azkaban.common.utils.Props;
+import azkaban.common.utils.UndefinedPropertyException;
+
+/**
+ * Helper functions for Hadoop
+ * 
+ * @author jkreps
+ * 
+ */
+public class HadoopUtils {
+
+    // Any date written with the pattern should be accepted by the regex.
+    public static String COMMON_FILE_DATE_PATTERN = "yyyy-MM-dd-HH-mm";
+    public static String COMMON_FILE_DATE_REGEX = "\\d{4}-\\d{2}-\\d{2}-\\d{2}-\\d{2}";
+
+    private static Logger logger = Logger.getLogger(HadoopUtils.class);
+    private static Object cachedSerializable = null;
+
+    public static FileSystem getFileSystem(Props props) {
+        if(!props.containsKey("hadoop.job.ugi"))
+            throw new RuntimeException("No parameter hadoop.job.ugi set!");
+        return getFileSystem(props.getString("hadoop.job.ugi"));
+    }
+
+    public static FileSystem getFileSystem(String user) {
+        Configuration conf = new Configuration();
+        conf.set("hadoop.job.ugi", user);
+        try {
+            return FileSystem.get(conf);
+        } catch(IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * Add the given object to the distributed cache for this job
+     * 
+     * @param obj A Serializable object to add to the JobConf
+     * @param job The JobConf
+     */
+    public static <T extends Serializable> void setSerializableInCache(JobConf job, T serializable) {
+        try {
+            // TODO: MED /tmp should be changed by conf.getTempDir() or
+            // something
+            Path workDir = new Path(String.format("/tmp/%s/%s/_join.temporary",
+                                                  job.getJobName(),
+                                                  System.currentTimeMillis()));
+
+            Path tempPath = new Path(workDir, "serializable.dat");
+            tempPath.getFileSystem(job).deleteOnExit(tempPath);
+            job.set("serializables.file", tempPath.toUri().getPath());
+
+            ObjectOutputStream objectStream = new ObjectOutputStream(tempPath.getFileSystem(job)
+                                                                             .create(tempPath));
+            objectStream.writeObject(serializable);
+            objectStream.close();
+
+            DistributedCache.addCacheFile(new URI(tempPath.toUri().getPath() + "#"
+                                                  + tempPath.getName()),
+                                          job);
+        } catch(URISyntaxException e) {
+            throw new RuntimeException(e);
+        } catch(IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public static Path getFilePathFromDistributedCache(String filename, Configuration conf)
+            throws IOException {
+
+        Path[] paths = DistributedCache.getLocalCacheFiles(conf);
+        Path filePath = null;
+        File file = new File(filename);
+
+        if(paths == null) {
+            // maybe we are in local mode and hadoop is a complete piece of
+            // shit that doesn't
+            // work in local mode
+            // check if maybe the file is just sitting there on the
+            // filesystem
+            if(file.exists())
+                filePath = new Path(file.getAbsolutePath());
+        } else {
+            for(Path path: paths)
+                if(path.getName().equals(file.getName()))
+                    filePath = path;
+        }
+
+        return filePath;
+
+    }
+
+    /**
+     * Get the FileInputStream from distributed cache
+     * 
+     * @param conf the JobConf
+     * @return FileInputStream file input stream
+     * @throws IOException
+     */
+    public static FileInputStream getFileInputStream(String filename, Configuration conf) {
+        try {
+            Path filePath = getFilePathFromDistributedCache(filename, conf);
+
+            if(filePath == null) {
+                Path[] paths = DistributedCache.getLocalCacheFiles(conf);
+                throw new IllegalStateException("No cache file found by the name of '" + filename
+                                                + "', found only " + paths);
+            }
+            return new FileInputStream(filePath.toString());
+        } catch(IOException e) {
+            throw new RuntimeException(e);
+        }
+
+    }
+
+    /**
+     * Get the given Serializable from the distributed cache as an Object
+     * 
+     * @param conf The JobConf
+     * @return The Object that is read from cache
+     */
+    public static Object readSerializableFromCache(Configuration conf) {
+        /*
+         * Cache the results of this operation, as this function may be called
+         * more than once by the same process (i.e., by combiners).
+         */
+        if(HadoopUtils.cachedSerializable != null)
+            return HadoopUtils.cachedSerializable;
+
+        try {
+            String filename = conf.get("serializables.file");
+            if(filename == null)
+                return null;
+
+            Path serializable = getFilePathFromDistributedCache(filename, conf);
+
+            if(serializable == null) {
+                Path[] paths = DistributedCache.getLocalCacheFiles(conf);
+                throw new IllegalStateException("No serializable cache file found by the name of '"
+                                                + filename + "', found only " + paths);
+            }
+            ObjectInputStream stream = new ObjectInputStream(new FileInputStream(serializable.toString()));
+            Object obj = stream.readObject();
+            stream.close();
+            HadoopUtils.cachedSerializable = obj;
+            return obj;
+        } catch(IOException e) {
+            throw new RuntimeException(e);
+        } catch(ClassNotFoundException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public static Map<String, String> getMetadataFromSequenceFile(String fileName) {
+        Path path = new Path(fileName);
+        try {
+            return getMetadataFromSequenceFile(path.getFileSystem(new Configuration()), path);
+        } catch(IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public static Map<String, String> getMetadataFromSequenceFile(FileSystem fs, String fileName) {
+        return getMetadataFromSequenceFile(fs, new Path(fileName));
+    }
+
+    /**
+     * Read the metadata from a hadoop SequenceFile
+     * 
+     * @param fs The filesystem to read from
+     * @param fileName The file to read from
+     * @return The metadata from this file
+     */
+    public static Map<String, String> getMetadataFromSequenceFile(FileSystem fs, Path path) {
+        try {
+            Configuration conf = new Configuration();
+            conf.setInt("io.file.buffer.size", 4096);
+            SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, new Configuration());
+            SequenceFile.Metadata meta = reader.getMetadata();
+            reader.close();
+            TreeMap<Text, Text> map = meta.getMetadata();
+            Map<String, String> values = new HashMap<String, String>();
+            for(Map.Entry<Text, Text> entry: map.entrySet())
+                values.put(entry.getKey().toString(), entry.getValue().toString());
+
+            return values;
+        } catch(IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public static JsonSchema getSchemaFromPath(Path path) throws IOException {
+        return getSchemaFromPath(path.getFileSystem(new Configuration()), path, true);
+    }
+
+    public static JsonSchema getSchemaFromPath(FileSystem fs, Path path) throws IOException {
+        return getSchemaFromPath(fs, path, true);
+    }
+
+    /**
+     * Pull the schema off of the given file (if it is a file). If it is a
+     * directory, then pull schemas off of all subfiles, and check that they are
+     * all the same schema. If so, return that schema, otherwise throw an
+     * exception
+     * 
+     * @param fs The filesystem to use
+     * @param path The path from which to get the schema
+     * @param checkSameSchema boolean flag to check all files in directory for
+     *        same schema
+     * @return The schema of this file or all its subfiles
+     * @throws IOException
+     */
+    public static JsonSchema getSchemaFromPath(FileSystem fs, Path path, boolean checkSameSchema)
+            throws IOException {
+        try {
+            if(fs.isFile(path)) {
+                // this is a normal file, get a schema from it
+                Map<String, String> m = HadoopUtils.getMetadataFromSequenceFile(fs, path);
+                if(!m.containsKey("value.schema") || !m.containsKey("key.schema"))
+                    throw new IllegalArgumentException("No schema found on file " + path.toString());
+                return new JsonSchema(JsonTypeDefinition.fromJson(m.get("key.schema")),
+                                      JsonTypeDefinition.fromJson(m.get("value.schema")));
+            } else {
+                FileStatus[] statuses = null;
+                if(fs.isDirectory(path)) {
+                    // this is a directory, get schemas from all subfiles
+                    statuses = fs.listStatus(path);
+                } else {
+                    // this is wildcard path, get schemas from all matched files
+                    statuses = fs.globStatus(path);
+                }
+                if(statuses == null || statuses.length == 0)
+                    throw new IllegalArgumentException("No files found in path pattern "
+                                                       + path.toUri().getPath());
+                List<JsonSchema> schemas = new ArrayList<JsonSchema>();
+                for(FileStatus status: statuses) {
+                    if(!HadoopUtils.shouldPathBeIgnored(status.getPath())) {
+                        if(!checkSameSchema) {
+                            // return first valid schema w/o checking all files
+                            return getSchemaFromPath(fs, status.getPath(), checkSameSchema);
+                        }
+                        schemas.add(getSchemaFromPath(fs, status.getPath(), checkSameSchema));
+                    }
+                }
+
+                // now check that all the schemas are the same
+                if(schemas.size() > 0) {
+                    JsonSchema schema = schemas.get(0);
+                    for(int i = 1; i < schemas.size(); i++)
+                        if(!schema.equals(schemas.get(i)))
+                            throw new IllegalArgumentException("The directory "
+                                                               + path.toString()
+                                                               + " contains heterogenous schemas: found both '"
+                                                               + schema.toString() + "' and '"
+                                                               + schemas.get(i).toString() + "'.");
+
+                    return schema;
+                } else {
+                    throw new IllegalArgumentException("No Valid metedata file found for Path:"
+                                                       + path.toString());
+                }
+            }
+        } catch(Exception e) {
+            logger.error("failed to get metadata from path:" + path);
+            throw new RuntimeException(e);
+        }
+    }
+
+    public static String getRequiredString(Configuration conf, String name) {
+        String val = conf.get(name);
+        if(val == null)
+            throw new IllegalArgumentException("Missing required parameter '" + name + "'.");
+        else
+            return val;
+    }
+
+    public static int getRequiredInt(Configuration conf, String name) {
+        return Integer.parseInt(getRequiredString(conf, name));
+    }
+
+    public static void copyInProps(Props props, Configuration conf, String... keys) {
+        for(String key: keys)
+            if(props.get(key) != null)
+                conf.set(key, props.get(key));
+    }
+
+    public static void copyInRequiredProps(Props props, Configuration conf, String... keys) {
+        for(String key: keys)
+            conf.set(key, props.getString(key));
+    }
+
+    /**
+     * Add all the properties in the Props to the given Configuration
+     * 
+     * @param conf The Configuration
+     * @param props The Props
+     * @return The Configuration with all the new properties
+     */
+    public static void copyInAllProps(Props props, Configuration conf) {
+        for(String key: props.keySet())
+            conf.set(key, props.get(key));
+    }
+
+    public static void copyInLocalProps(Props props, Configuration conf) {
+        for(String key: props.localKeySet())
+            conf.set(key, props.get(key));
+    }
+
+    public static Props loadHadoopProps(Props parent, File hadoopConfDir) {
+        // load hadoop properties
+        Configuration config = new Configuration();
+
+        config.addResource(new Path(new File(hadoopConfDir, "hadoop-default.xml").getAbsolutePath()));
+        config.addResource(new Path(new File(hadoopConfDir, "hadoop-site.xml").getAbsolutePath()));
+
+        // copy to props
+        Props props = new Props(parent);
+        for(Entry<String, String> entry: config)
+            props.put(entry.getKey(), config.get(entry.getKey()));
+
+        return props;
+    }
+
+    public static void setPropsInJob(Configuration conf, Props props) {
+        ByteArrayOutputStream output = new ByteArrayOutputStream();
+        try {
+            props.storeFlattened(output);
+            conf.set("azkaban.props", new String(output.toByteArray(), "UTF-8"));
+        } catch(IOException e) {
+            throw new RuntimeException("This is not possible!", e);
+        }
+    }
+
+    public static Props getPropsFromJob(Configuration conf) {
+        String propsString = conf.get("azkaban.props");
+        if(propsString == null)
+            throw new UndefinedPropertyException("The required property azkaban.props was not found in the Configuration.");
+        try {
+            ByteArrayInputStream input = new ByteArrayInputStream(propsString.getBytes("UTF-8"));
+            Properties properties = new Properties();
+            properties.load(input);
+            return new Props(null, properties);
+        } catch(IOException e) {
+            throw new RuntimeException("This is not possible!", e);
+        }
+    }
+
+    public static Cluster readCluster(String clusterFile, Configuration conf) throws IOException {
+        return new ClusterMapper().readCluster(new StringReader(readAsString(new Path(clusterFile))));
+    }
+
+    public static StoreDefinition readStoreDef(String storeFile,
+                                               String storeName,
+                                               Configuration conf) throws IOException {
+
+        List<StoreDefinition> stores = new StoreDefinitionsMapper().readStoreList(new StringReader(readAsString(new Path(storeFile))));
+        for(StoreDefinition def: stores) {
+            if(def.getName().equals(storeName))
+                return def;
+        }
+        throw new RuntimeException("Can't find store definition for store '" + storeName + "'.");
+    }
+
+    public static String getFileFromCache(Configuration conf, String fileName) throws IOException {
+        if("local".equals(conf.get("mapred.job.tracker"))) {
+            // For local mode Distributed cache is not set.
+            // try getting the raw file path.
+            URI[] uris = DistributedCache.getCacheFiles(conf);
+            return getFileFromURIList(uris, fileName);
+        } else {
+            // For Distributed filesystem.
+            Path[] pathList = DistributedCache.getLocalCacheFiles(conf);
+            return getFileFromPathList(pathList, fileName);
+        }
+    }
+
+    public static String getFileFromURIList(URI[] uris, String fileName) throws IOException {
+        for(URI uri: uris) {
+            if(uri.getPath().endsWith(fileName)) {
+                // uri matched
+                return uri.getPath();
+            }
+        }
+        return null;
+    }
+
+    public static String getFileFromPathList(Path[] pathList, String fileName) {
+        for(Path file: pathList) {
+            logger.info("getUriWithFragment path:" + file.toUri().getPath() + " fileName:"
+                        + fileName);
+            if(file.getName().equals(fileName)) {
+                logger.info("FOUND getUriWithFragment path:" + file.toUri().getPath());
+                return file.toUri().getPath();
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * Find a jar that contains a class of the same name, if any. It will return
+     * a jar file, even if that is not the first thing on the class path that
+     * has a class with the same name.
+     * 
+     * @param my_class the class to find.
+     * @return a jar file that contains the class, or null.
+     * @throws IOException
+     */
+    public static String findContainingJar(Class my_class, ClassLoader loader) {
+        String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
+        return findContainingJar(class_file, loader);
+    }
+
+    public static List<String> getFileNames(FileStatus[] statuses) {
+        List<String> fileNames = new ArrayList<String>();
+        if(statuses == null)
+            return fileNames;
+        for(FileStatus status: statuses)
+            fileNames.add(status.getPath().getName());
+        return fileNames;
+    }
+
+    public static String findContainingJar(String fileName, ClassLoader loader) {
+        try {
+            for(Enumeration itr = loader.getResources(fileName); itr.hasMoreElements();) {
+                URL url = (URL) itr.nextElement();
+                logger.info("findContainingJar finds url:" + url);
+                if("jar".equals(url.getProtocol())) {
+                    String toReturn = url.getPath();
+                    if(toReturn.startsWith("file:")) {
+                        toReturn = toReturn.substring("file:".length());
+                    }
+                    toReturn = URLDecoder.decode(toReturn, "UTF-8");
+                    return toReturn.replaceAll("!.*$", "");
+                }
+            }
+        } catch(IOException e) {
+            throw new RuntimeException(e);
+        }
+        return null;
+    }
+
+    public static String printAllClassLoaderPaths(String fileName, ClassLoader loader) {
+        try {
+            for(Enumeration itr = loader.getResources(fileName); itr.hasMoreElements();) {
+                URL url = (URL) itr.nextElement();
+                logger.info("findContainingJar finds url:" + url);
+            }
+        } catch(IOException e) {
+            throw new RuntimeException(e);
+        }
+        return null;
+    }
+
+    public static Period parsePeriod(String periodStr) {
+        Matcher monthsFormat = Pattern.compile("[0-9][0-9]*M").matcher(periodStr);
+        Matcher daysFormat = Pattern.compile("[0-9][0-9]*d").matcher(periodStr);
+        Matcher hoursFormat = Pattern.compile("[0-9][0-9]*h").matcher(periodStr);
+        Matcher minutesFormat = Pattern.compile("[0-9][0-9]*m").matcher(periodStr);
+
+        Period period = new Period();
+        while(monthsFormat.find()) {
+            period = period.plusMonths(Integer.parseInt(monthsFormat.group()
+                                                                    .substring(0,
+                                                                               monthsFormat.group()
+                                                                                           .length() - 1)));
+        }
+        while(daysFormat.find()) {
+            period = period.plusDays(Integer.parseInt(daysFormat.group()
+                                                                .substring(0,
+                                                                           daysFormat.group()
+                                                                                     .length() - 1)));
+        }
+        while(hoursFormat.find()) {
+            period = period.plusHours(Integer.parseInt(hoursFormat.group()
+                                                                  .substring(0,
+                                                                             hoursFormat.group()
+                                                                                        .length() - 1)));
+        }
+        while(minutesFormat.find()) {
+            period = period.plusMinutes(Integer.parseInt(minutesFormat.group()
+                                                                      .substring(0,
+                                                                                 minutesFormat.group()
+                                                                                              .length() - 1)));
+        }
+
+        return period;
+    }
+
+    public static FileSystem getFileSystem(String hdfsUrl, boolean isLocal) throws IOException {
+        // Initialize fs
+        FileSystem fs;
+        if(isLocal) {
+            fs = FileSystem.getLocal(new Configuration());
+        } else {
+            fs = new DistributedFileSystem();
+            try {
+                fs.initialize(new URI(hdfsUrl), new Configuration());
+            } catch(URISyntaxException e) {
+                throw new IllegalArgumentException(e);
+            }
+        }
+        return fs;
+    }
+
+    /**
+     * Given a directory path, return the paths of all directories in that tree
+     * that have no sub-directories.
+     * 
+     * @param directory
+     * @return
+     */
+    public static List<String> getLowestLevelDirectories(FileSystem fs,
+                                                         Path directory,
+                                                         PathFilter pathFilter) throws IOException {
+        List<String> lowestLevelDirectories = new ArrayList<String>();
+
+        if(hasSubDirectories(fs, directory)) {
+            // recurse on each of this directory's sub-directories, ignoring any
+            // files in the
+            // directory
+            FileStatus[] statuses = fs.listStatus(directory);
+            for(FileStatus status: statuses) {
+                if(status.isDir()) {
+                    lowestLevelDirectories.addAll(getLowestLevelDirectories(fs,
+                                                                            status.getPath(),
+                                                                            pathFilter));
+                }
+            }
+        } else if(pathFilter == null || pathFilter.accept(directory)) {
+            // this directory has no sub-directories, and either there is no
+            // filter or it passes the
+            // filter, so add it and return
+            lowestLevelDirectories.add(directory.toString());
+        }
+
+        return lowestLevelDirectories;
+    }
+
+    /**
+     * Given a string representation of a directory path, check whether or not
+     * the directory has any sub-directories
+     * 
+     * @param fs
+     * @param directory
+     * @return true iff the directory has at least one sub-directory
+     * @throws IOException
+     */
+    private static boolean hasSubDirectories(FileSystem fs, Path directory) throws IOException {
+        FileStatus[] statuses = fs.listStatus(directory);
+
+        if(statuses == null)
+            return false;
+
+        for(FileStatus status: statuses) {
+            if(status != null && status.isDir() && !shouldPathBeIgnored(status.getPath())) {
+                // we have found a subDirectory
+                return true;
+            }
+        }
+        // we are done looping through the directory and found no subDirectories
+        return false;
+    }
+
+    public static JobConf addAllSubPaths(JobConf conf, Path path) throws IOException {
+        if(shouldPathBeIgnored(path)) {
+            throw new IllegalArgumentException(String.format("Path[%s] should be ignored.", path));
+        }
+
+        final FileSystem fs = path.getFileSystem(conf);
+
+        if(fs.exists(path)) {
+            for(FileStatus status: fs.listStatus(path)) {
+                if(!shouldPathBeIgnored(status.getPath())) {
+                    if(status.isDir()) {
+                        addAllSubPaths(conf, status.getPath());
+                    } else {
+                        FileInputFormat.addInputPath(conf, status.getPath());
+                    }
+                }
+            }
+        }
+
+        return conf;
+    }
+
+    /**
+     * Check if the path should be ignored. Currently only paths with "_log" are
+     * ignored.
+     * 
+     * @param path
+     * @return
+     * @throws IOException
+     */
+    public static boolean shouldPathBeIgnored(Path path) throws IOException {
+        return path.getName().startsWith("_");
+    }
+
+    public static Map<String, String> getMapByPrefix(Configuration conf, String prefix) {
+        Map<String, String> values = new HashMap<String, String>();
+        for(Entry<String, String> entry: conf) {
+            if(entry.getKey().startsWith(prefix))
+                values.put(entry.getKey().substring(prefix.length()), entry.getValue());
+        }
+        return values;
+    }
+
+    public static void saveProps(Props props, String file) throws IOException {
+        Path path = new Path(file);
+
+        FileSystem fs = null;
+        if(props.containsKey("hadoop.job.ugi")) {
+            fs = getFileSystem(props);
+        } else {
+            fs = path.getFileSystem(new Configuration());
+        }
+
+        saveProps(fs, props, file);
+    }
+
+    public static void saveProps(FileSystem fs, Props props, String file) throws IOException {
+        Path path = new Path(file);
+
+        // create directory if it does not exist.
+        Path parent = path.getParent();
+        if(!fs.exists(parent))
+            fs.mkdirs(parent);
+
+        // write out properties
+        OutputStream output = fs.create(path);
+        try {
+            props.storeFlattened(output);
+        } finally {
+            output.close();
+        }
+    }
+
+    public static Props readProps(String file) throws IOException {
+        Path path = new Path(file);
+        FileSystem fs = path.getFileSystem(new Configuration());
+        if(fs.exists(path)) {
+            InputStream input = fs.open(path);
+            try {
+                // wrap it up in another layer so that the user can override
+                // properties
+                Props p = new Props(null, input);
+                return new Props(p);
+            } finally {
+                input.close();
+            }
+        } else {
+            return new Props();
+        }
+    }
+
+    public static String readAsString(Path path) {
+        InputStream input = null;
+        try {
+            FileSystem fs = path.getFileSystem(new Configuration());
+            input = fs.open(path);
+            return IOUtils.toString(input);
+        } catch(IOException e) {
+            throw new RuntimeException(e);
+        } finally {
+            IOUtils.closeQuietly(input);
+        }
+    }
+
+    public static boolean mkdirs(String pathName) throws IOException {
+        Path path = new Path(pathName);
+        FileSystem fs = path.getFileSystem(new Configuration());
+        return fs.mkdirs(path);
+    }
+
+    public static void deletePathIfExists(JobConf conf, String stepOutputPath) throws IOException {
+        Path path = new Path(stepOutputPath);
+        FileSystem fs = path.getFileSystem(conf);
+        if(fs.exists(path)) {
+            fs.delete(path, true);
+        }
+    }
+
+    /**
+     * Tag the BytesWritable with an integer at the END
+     */
+    public static void appendTag(BytesWritable writable, int tag) {
+        int size = writable.getLength();
+
+        if(writable.getCapacity() < size + 4) {
+            // BytesWritable preserves old values
+            writable.setCapacity(size + 4);
+        }
+
+        ByteUtils.writeInt(writable.getBytes(), tag, size);
+        writable.setSize(size + 4);
+    }
+
+    /**
+     * read and return integer from the END of BytesWritable The tag bytes are
+     * NOT removed
+     */
+    public static int readTag(BytesWritable readable) {
+        return ByteUtils.readInt(readable.getBytes(), readable.getLength() - 4);
+    }
+
+    /**
+     * creates default data for given schema is needed for mappers/reducers
+     * which tries to handle different schema.
+     * 
+     * Outputs<br>
+     * <br>
+     * Map : outputs default value for each subType <br>
+     * List : output empty list <br>
+     * JsonTypes: default 0 or '' empty strings
+     */
+    public static Object createDefaultData(Object typeSchema) {
+        if(typeSchema instanceof List<?>) {
+            ArrayList<Object> list = new ArrayList<Object>(0);
+            return list;
+        } else if(typeSchema instanceof Map<?, ?>) {
+            HashMap<String, Object> map = new HashMap<String, Object>();
+            for(Map.Entry<String, Object> typeEntry: ((Map<String, Object>) typeSchema).entrySet()) {
+                map.put(typeEntry.getKey(), createDefaultData(typeEntry.getValue()));
+            }
+            return map;
+        } else if(typeSchema instanceof JsonTypes) {
+            return createDefaultJsonData((JsonTypes) typeSchema);
+        }
+
+        throw new RuntimeException("Invlaid schema type:" + typeSchema);
+    }
+
+    private static Object createDefaultJsonData(JsonTypes type) {
+
+        if(JsonTypes.BOOLEAN.equals(type))
+            return false;
+        else if(JsonTypes.DATE.equals(type))
+            return new Date();
+        else if(JsonTypes.FLOAT32.equals(type) || JsonTypes.FLOAT64.equals(type)
+                || JsonTypes.INT8.equals(type) || JsonTypes.INT16.equals(type)
+                || JsonTypes.INT32.equals(type))
+            return 0;
+        else if(JsonTypes.BYTES.equals(type)) {
+            byte[] data = new byte[0];
+            return data;
+        } else if(JsonTypes.STRING.equals(type)) {
+            return "";
+        }
+
+        throw new RuntimeException("Invalid JsonType:" + type);
+    }
+
+    /**
+     * Looks for the latest (the alphabetically greatest) path contained in the
+     * given directory that passes the specified regex pattern.
+     * 
+     * @param fs The file system
+     * @param directory The directory that will contain the versions
+     * @param acceptRegex The String pattern
+     * @return
+     * @throws IOException
+     */
+    public static Path getLatestVersionedPath(FileSystem fs, Path directory, String acceptRegex)
+            throws IOException {
+        final String pattern = acceptRegex != null ? acceptRegex : "\\S+";
+
+        PathFilter filter = new PathFilter() {
+
+            @Override
+            public boolean accept(Path arg0) {
+                return !arg0.getName().startsWith("_") && Pattern.matches(pattern, arg0.getName());
+            }
+        };
+
+        FileStatus[] statuses = fs.listStatus(directory, filter);
+
+        if(statuses == null || statuses.length == 0) {
+            return null;
+        }
+
+        Arrays.sort(statuses);
+
+        return statuses[statuses.length - 1].getPath();
+    }
+
+    /**
+     * Looks for the latest (the alphabetically greatest) path contained in the
+     * given directory that passes the specified regex pattern "\\S+" for all
+     * non spaced words.
+     * 
+     * @param fs The file system
+     * @param directory The directory that will contain the versions
+     * @return
+     * @throws IOException
+     */
+    public static Path getLatestVersionedPath(FileSystem fs, Path directory) throws IOException {
+        return getLatestVersionedPath(fs, directory, null);
+    }
+
+    /**
+     * Does the same thing as getLatestVersionedPath, but checks to see if the
+     * directory contains #LATEST. If it doesn't, it just returns what was
+     * passed in.
+     * 
+     * @param fs
+     * @param directory
+     * @return
+     * @throws IOException
+     */
+    public static Path getSanitizedPath(FileSystem fs, Path directory, String acceptRegex)
+            throws IOException {
+        if(directory.getName().endsWith("#LATEST")) {
+            // getparent strips out #LATEST
+            return getLatestVersionedPath(fs, directory.getParent(), acceptRegex);
+        }
+
+        return directory;
+    }
+
+    public static Path getSanitizedPath(Path path) throws IOException {
+        return getSanitizedPath(path.getFileSystem(new Configuration()), path);
+    }
+
+    /**
+     * Does the same thing as getLatestVersionedPath, but checks to see if the
+     * directory contains #LATEST. If it doesn't, it just returns what was
+     * passed in.
+     * 
+     * @param fs
+     * @param directory
+     * @return
+     * @throws IOException
+     */
+    public static Path getSanitizedPath(FileSystem fs, Path directory) throws IOException {
+        if(directory.getName().endsWith("#LATEST")) {
+            // getparent strips out #LATEST
+            return getLatestVersionedPath(fs, directory.getParent(), null);
+        }
+
+        return directory;
+    }
+
+    /**
+     * Easily cleans up old data (alphabetically least) paths that is accepted
+     * by the regex.
+     * 
+     * @param fs The file system
+     * @param directory The directory that will contain the versions
+     * @param acceptRegex The String pattern
+     * @param backupNumber The number of versions we should keep. Otherwise
+     *        we'll clean up.
+     * @return
+     * @throws IOException
+     */
+    public static void cleanupOlderVersions(FileSystem fs,
+                                            Path directory,
+                                            final String acceptRegex,
+                                            int backupNumber) throws IOException {
+        if(backupNumber < 1) {
+            logger.error("Number of versions must be 1 or greater");
+            return;
+        }
+
+        PathFilter filter = new PathFilter() {
+
+            @Override
+            public boolean accept(Path arg0) {
+                return !arg0.getName().startsWith("_")
+                       && Pattern.matches(acceptRegex, arg0.getName());
+            }
+        };
+
+        FileStatus[] statuses = fs.listStatus(directory, filter);
+        if(statuses == null) {
+            logger.info("No backup files found");
+            return;
+        }
+
+        Arrays.sort(statuses);
+
+        int lastIndex = statuses.length - backupNumber;
+        for(int i = 0; i < lastIndex; ++i) {
+            logger.info("Deleting " + statuses[i].getPath());
+            fs.delete(statuses[i].getPath(), true);
+        }
+    }
+
+    public static void cleanupOlderVersions(FileSystem fs, Path directory, int backupNumber)
+            throws IOException {
+        cleanupOlderVersions(fs, directory, "\\S+", backupNumber);
+    }
+
+    /**
+     * Move the file from one place to another. Unlike the raw Hadoop API this
+     * will throw an exception if it fails. Like the Hadoop api it will fail if
+     * a file exists in the destination.
+     * 
+     * @param fs The filesystem
+     * @param from The source file to move
+     * @param to The destination location
+     * @throws IOException
+     */
+    public static void move(FileSystem fs, Path from, Path to) throws IOException {
+        boolean success = fs.rename(from, to);
+        if(!success)
+            throw new RuntimeException("Failed to move " + from + " to " + to);
+    }
+
+    /**
+     * Move the give file to the given location. Delete any existing file in
+     * that location. Use the temp directory to make the operation as
+     * transactional as possible. Throws an exception if the move fails.
+     * 
+     * @param fs The filesystem
+     * @param from The source file
+     * @param to The destination file
+     * @param temp A temp directory to use
+     * @throws IOException
+     */
+    public static void replaceFile(FileSystem fs, Path from, Path to, Path temp) throws IOException {
+        fs.delete(temp, true);
+        move(fs, to, temp);
+        try {
+            move(fs, from, to);
+            fs.delete(temp, true);
+        } catch(IOException e) {
+            // hmm something went wrong, attempt to restore
+            fs.rename(temp, to);
+            throw e;
+        }
+    }
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/JsonSchema.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/JsonSchema.java
new file mode 100644
index 0000000000..f3619b303b
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/JsonSchema.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.utils;
+
+import java.util.Arrays;
+
+import voldemort.serialization.json.JsonTypeDefinition;
+import azkaban.common.utils.Utils;
+
+public class JsonSchema {
+
+    private final JsonTypeDefinition key;
+    private final JsonTypeDefinition value;
+
+    public JsonSchema(JsonTypeDefinition key, JsonTypeDefinition value) {
+        super();
+        this.key = Utils.nonNull(key);
+        this.value = Utils.nonNull(value);
+    }
+
+    public JsonTypeDefinition getKeyType() {
+        return key;
+    }
+
+    public JsonTypeDefinition getValueType() {
+        return value;
+    }
+
+    @Override
+    public int hashCode() {
+        return Arrays.hashCode(new Object[] { key, value });
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if(o == null)
+            return false;
+        if(o == this)
+            return true;
+        if(o.getClass() != JsonSchema.class)
+            return false;
+        JsonSchema s = (JsonSchema) o;
+        return getKeyType().equals(s.getKeyType()) && getValueType().equals(s.getValueType());
+    }
+
+    @Override
+    public String toString() {
+        return key.toString() + " => " + value.toString();
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/KeyValuePartitioner.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/KeyValuePartitioner.java
new file mode 100644
index 0000000000..4853c63534
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/KeyValuePartitioner.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.utils;
+
+import voldemort.store.StoreDefinition;
+import voldemort.store.readonly.ReadOnlyUtils;
+import voldemort.utils.ByteUtils;
+
+public class KeyValuePartitioner {
+
+    public int getPartition(byte[] keyBytes,
+                            byte[] valueBytes,
+                            boolean saveKeys,
+                            boolean reducerPerBucket,
+                            StoreDefinition storeDef,
+                            int numChunks,
+                            int numReduceTasks) {
+        int partitionId = ByteUtils.readInt(valueBytes, ByteUtils.SIZE_OF_INT);
+        int chunkId = ReadOnlyUtils.chunk(keyBytes, numChunks);
+        if(saveKeys) {
+            int replicaType = (int) ByteUtils.readBytes(valueBytes,
+                                                        2 * ByteUtils.SIZE_OF_INT,
+                                                        ByteUtils.SIZE_OF_BYTE);
+            if(reducerPerBucket) {
+                return (partitionId * storeDef.getReplicationFactor() + replicaType)
+                       % numReduceTasks;
+            } else {
+                return ((partitionId * storeDef.getReplicationFactor() * numChunks)
+                        + (replicaType * numChunks) + chunkId)
+                       % numReduceTasks;
+            }
+        } else {
+            if(reducerPerBucket) {
+                return partitionId % numReduceTasks;
+            } else {
+                return (partitionId * numChunks + chunkId) % numReduceTasks;
+            }
+
+        }
+    }
+
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/MapperKeyValueWriter.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/MapperKeyValueWriter.java
new file mode 100644
index 0000000000..61f801d51a
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/MapperKeyValueWriter.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.utils;
+
+import java.io.IOException;
+import java.security.MessageDigest;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.io.BytesWritable;
+
+import voldemort.cluster.Node;
+import voldemort.routing.ConsistentRoutingStrategy;
+import voldemort.serialization.Serializer;
+import voldemort.serialization.SerializerDefinition;
+import voldemort.store.compress.CompressionStrategy;
+import voldemort.utils.ByteUtils;
+import voldemort.utils.Pair;
+
+public class MapperKeyValueWriter {
+
+    public BytesWritable getOutputKey() {
+        return outputKey;
+    }
+
+    public BytesWritable getOutputValue() {
+        return outputVal;
+    }
+
+    BytesWritable outputKey;
+    BytesWritable outputVal;
+
+    public List<Pair<BytesWritable, BytesWritable>> map(ConsistentRoutingStrategy routingStrategy,
+                                                        Serializer keySerializer,
+                                                        Serializer valueSerializer,
+                                                        CompressionStrategy valueCompressor,
+                                                        CompressionStrategy keyCompressor,
+                                                        SerializerDefinition keySerializerDefinition,
+                                                        SerializerDefinition valueSerializerDefinition,
+                                                        byte[] keyBytes,
+                                                        byte[] valBytes,
+                                                        boolean getSaveKeys,
+                                                        MessageDigest md5er) throws IOException {
+
+        List outputList = new ArrayList();
+        // Compress key and values if required
+        if(keySerializerDefinition.hasCompression()) {
+            keyBytes = keyCompressor.deflate(keyBytes);
+        }
+
+        if(valueSerializerDefinition.hasCompression()) {
+            valBytes = valueCompressor.deflate(valBytes);
+        }
+
+        // Get the output byte arrays ready to populate
+        byte[] outputValue;
+
+        // Leave initial offset for (a) node id (b) partition id
+        // since they are written later
+        int offsetTillNow = 2 * ByteUtils.SIZE_OF_INT;
+
+        if(getSaveKeys) {
+
+            // In order - 4 ( for node id ) + 4 ( partition id ) + 1 (
+            // replica
+            // type - primary | secondary | tertiary... ] + 4 ( key size )
+            // size ) + 4 ( value size ) + key + value
+            outputValue = new byte[valBytes.length + keyBytes.length + ByteUtils.SIZE_OF_BYTE + 4
+                                   * ByteUtils.SIZE_OF_INT];
+
+            // Write key length - leave byte for replica type
+            offsetTillNow += ByteUtils.SIZE_OF_BYTE;
+            ByteUtils.writeInt(outputValue, keyBytes.length, offsetTillNow);
+
+            // Write value length
+            offsetTillNow += ByteUtils.SIZE_OF_INT;
+            ByteUtils.writeInt(outputValue, valBytes.length, offsetTillNow);
+
+            // Write key
+            offsetTillNow += ByteUtils.SIZE_OF_INT;
+            System.arraycopy(keyBytes, 0, outputValue, offsetTillNow, keyBytes.length);
+
+            // Write value
+            offsetTillNow += keyBytes.length;
+            System.arraycopy(valBytes, 0, outputValue, offsetTillNow, valBytes.length);
+
+            // Generate MR key - upper 8 bytes of 16 byte md5
+            outputKey = new BytesWritable(ByteUtils.copy(md5er.digest(keyBytes),
+                                                         0,
+                                                         2 * ByteUtils.SIZE_OF_INT));
+
+        } else {
+
+            // In order - 4 ( for node id ) + 4 ( partition id ) + value
+            outputValue = new byte[valBytes.length + 2 * ByteUtils.SIZE_OF_INT];
+
+            // Write value
+            System.arraycopy(valBytes, 0, outputValue, offsetTillNow, valBytes.length);
+
+            // Generate MR key - 16 byte md5
+            outputKey = new BytesWritable(md5er.digest(keyBytes));
+
+        }
+
+        // Generate partition and node list this key is destined for
+        List<Integer> partitionList = routingStrategy.getPartitionList(keyBytes);
+        Node[] partitionToNode = routingStrategy.getPartitionToNode();
+
+        for(int replicaType = 0; replicaType < partitionList.size(); replicaType++) {
+
+            // Node id
+            ByteUtils.writeInt(outputValue,
+                               partitionToNode[partitionList.get(replicaType)].getId(),
+                               0);
+
+            if(getSaveKeys) {
+                // Primary partition id
+                ByteUtils.writeInt(outputValue, partitionList.get(0), ByteUtils.SIZE_OF_INT);
+
+                // Replica type
+                ByteUtils.writeBytes(outputValue,
+                                     replicaType,
+                                     2 * ByteUtils.SIZE_OF_INT,
+                                     ByteUtils.SIZE_OF_BYTE);
+            } else {
+                // Partition id
+                ByteUtils.writeInt(outputValue,
+                                   partitionList.get(replicaType),
+                                   ByteUtils.SIZE_OF_INT);
+            }
+            outputVal = new BytesWritable(outputValue);
+            Pair<BytesWritable, BytesWritable> pair = Pair.create(outputKey, outputVal);
+            outputList.add(pair);
+        }
+
+        return outputList;
+
+    }
+}
diff --git a/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/VoldemortUtils.java b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/VoldemortUtils.java
new file mode 100644
index 0000000000..37586b4b74
--- /dev/null
+++ b/contrib/hadoop-store-builder/src/java/voldemort/store/readonly/mr/utils/VoldemortUtils.java
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.readonly.mr.utils;
+
+import java.io.StringReader;
+import java.util.List;
+
+import voldemort.store.StoreDefinition;
+import voldemort.utils.Utils;
+import voldemort.xml.StoreDefinitionsMapper;
+
+import com.google.common.collect.Lists;
+
+public class VoldemortUtils {
+
+    /**
+     * Given the comma separated list of properties as a string, splits it
+     * multiple strings
+     * 
+     * @param paramValue Concatenated string
+     * @param type Type of parameter ( to throw exception )
+     * @return List of string properties
+     */
+    public static List<String> getCommaSeparatedStringValues(String paramValue, String type) {
+        List<String> commaSeparatedProps = Lists.newArrayList();
+        for(String url: Utils.COMMA_SEP.split(paramValue.trim()))
+            if(url.trim().length() > 0)
+                commaSeparatedProps.add(url);
+
+        if(commaSeparatedProps.size() == 0) {
+            throw new RuntimeException("Number of " + type + " should be greater than zero");
+        }
+        return commaSeparatedProps;
+    }
+
+    public static String getStoreDefXml(String storeName,
+                                        int replicationFactor,
+                                        int requiredReads,
+                                        int requiredWrites,
+                                        Integer preferredReads,
+                                        Integer preferredWrites,
+                                        String keySerializer,
+                                        String valSerializer,
+                                        String description,
+                                        String owners) {
+        StringBuffer storeXml = new StringBuffer();
+
+        storeXml.append("<store>\n\t<name>");
+        storeXml.append(storeName);
+        storeXml.append("</name>\n\t<persistence>read-only</persistence>\n\t");
+        if(description.length() != 0) {
+            storeXml.append("<description>");
+            storeXml.append(description);
+            storeXml.append("</description>\n\t");
+        }
+        if(owners.length() != 0) {
+            storeXml.append("<owners>");
+            storeXml.append(owners);
+            storeXml.append("</owners>\n\t");
+        }
+        storeXml.append("<routing>client</routing>\n\t<replication-factor>");
+        storeXml.append(replicationFactor);
+        storeXml.append("</replication-factor>\n\t<required-reads>");
+        storeXml.append(requiredReads);
+        storeXml.append("</required-reads>\n\t<required-writes>");
+        storeXml.append(requiredWrites);
+        storeXml.append("</required-writes>\n\t");
+        if(preferredReads != null)
+            storeXml.append("<preferred-reads>")
+                    .append(preferredReads)
+                    .append("</preferred-reads>\n\t");
+        if(preferredWrites != null)
+            storeXml.append("<preferred-writes>")
+                    .append(preferredWrites)
+                    .append("</preferred-writes>\n\t");
+        storeXml.append("<key-serializer>");
+        storeXml.append(keySerializer);
+        storeXml.append("</key-serializer>\n\t<value-serializer>");
+        storeXml.append(valSerializer);
+        storeXml.append("</value-serializer>\n</store>");
+
+        return storeXml.toString();
+    }
+
+    public static String getStoreDefXml(String storeName,
+                                        int replicationFactor,
+                                        int requiredReads,
+                                        int requiredWrites,
+                                        Integer preferredReads,
+                                        Integer preferredWrites,
+                                        String keySerializer,
+                                        String valSerializer) {
+        StringBuffer storeXml = new StringBuffer();
+
+        storeXml.append("<store>\n\t<name>");
+        storeXml.append(storeName);
+        storeXml.append("</name>\n\t<persistence>read-only</persistence>\n\t<routing>client</routing>\n\t<replication-factor>");
+        storeXml.append(replicationFactor);
+        storeXml.append("</replication-factor>\n\t<required-reads>");
+        storeXml.append(requiredReads);
+        storeXml.append("</required-reads>\n\t<required-writes>");
+        storeXml.append(requiredWrites);
+        storeXml.append("</required-writes>\n\t");
+        if(preferredReads != null)
+            storeXml.append("<preferred-reads>")
+                    .append(preferredReads)
+                    .append("</preferred-reads>\n\t");
+        if(preferredWrites != null)
+            storeXml.append("<preferred-writes>")
+                    .append(preferredWrites)
+                    .append("</preferred-writes>\n\t");
+        storeXml.append("<key-serializer>");
+        storeXml.append(keySerializer);
+        storeXml.append("</key-serializer>\n\t<value-serializer>");
+        storeXml.append(valSerializer);
+        storeXml.append("</value-serializer>\n</store>");
+
+        return storeXml.toString();
+    }
+
+    public static StoreDefinition getStoreDef(String xml) {
+        return new StoreDefinitionsMapper().readStore(new StringReader(xml));
+    }
+}
diff --git a/contrib/krati/src/java/voldemort/store/krati/KratiStorageConfiguration.java b/contrib/krati/src/java/voldemort/store/krati/KratiStorageConfiguration.java
index 236ffef6cb..5f3a41574c 100644
--- a/contrib/krati/src/java/voldemort/store/krati/KratiStorageConfiguration.java
+++ b/contrib/krati/src/java/voldemort/store/krati/KratiStorageConfiguration.java
@@ -4,11 +4,14 @@
 
 import krati.core.segment.MappedSegmentFactory;
 import krati.core.segment.SegmentFactory;
+
 import org.apache.log4j.Logger;
 
+import voldemort.VoldemortException;
 import voldemort.server.VoldemortConfig;
 import voldemort.store.StorageConfiguration;
 import voldemort.store.StorageEngine;
+import voldemort.store.StoreDefinition;
 import voldemort.utils.ByteArray;
 import voldemort.utils.Props;
 import voldemort.utils.ReflectUtils;
@@ -42,16 +45,16 @@ public KratiStorageConfiguration(VoldemortConfig config) {
 
     public void close() {}
 
-    public StorageEngine<ByteArray, byte[], byte[]> getStore(String storeName) {
+    public StorageEngine<ByteArray, byte[], byte[]> getStore(StoreDefinition storeDef) {
         synchronized(lock) {
-            File storeDir = new File(dataDirectory, storeName);
+            File storeDir = new File(dataDirectory, storeDef.getName());
             if(!storeDir.exists()) {
                 logger.info("Creating krati data directory '" + storeDir.getAbsolutePath() + "'.");
                 storeDir.mkdirs();
             }
 
             SegmentFactory segmentFactory = (SegmentFactory) ReflectUtils.callConstructor(factoryClass);
-            return new KratiStorageEngine(storeName,
+            return new KratiStorageEngine(storeDef.getName(),
                                           segmentFactory,
                                           segmentFileSizeMb,
                                           lockStripes,
@@ -65,4 +68,8 @@ public String getType() {
         return TYPE_NAME;
     }
 
+    public void update(StoreDefinition storeDef) {
+        throw new VoldemortException("Storage config updates not permitted for "
+                                     + this.getClass().getCanonicalName());
+    }
 }
diff --git a/contrib/krati/src/java/voldemort/store/krati/KratiStorageEngine.java b/contrib/krati/src/java/voldemort/store/krati/KratiStorageEngine.java
index 6f5ccac8e2..098c083df3 100644
--- a/contrib/krati/src/java/voldemort/store/krati/KratiStorageEngine.java
+++ b/contrib/krati/src/java/voldemort/store/krati/KratiStorageEngine.java
@@ -291,7 +291,7 @@ public KratiClosableIterator(List<Pair<ByteArray, Versioned<byte[]>>> list) {
         }
 
         public void close() {
-        // Nothing to close here
+            // Nothing to close here
         }
 
         public boolean hasNext() {
diff --git a/lib/azkaban-common-0.05.jar b/lib/azkaban-common-0.05.jar
new file mode 100644
index 0000000000..dc52493851
Binary files /dev/null and b/lib/azkaban-common-0.05.jar differ
diff --git a/lib/joda-time-1.6.jar b/lib/joda-time-1.6.jar
new file mode 100644
index 0000000000..68068a4bcc
Binary files /dev/null and b/lib/joda-time-1.6.jar differ
diff --git a/lib/mail-1.4.1.jar b/lib/mail-1.4.1.jar
new file mode 100644
index 0000000000..59543774f2
Binary files /dev/null and b/lib/mail-1.4.1.jar differ
diff --git a/release_notes.txt b/release_notes.txt
index 62c1867d70..bd77ab6bb9 100644
--- a/release_notes.txt
+++ b/release_notes.txt
@@ -1,4 +1,112 @@
-Release 0.90.1 on 10/10/2011
+Release 1.0.0 on 10/17/2012
+
+NOTE: The large version number jump from 0.96 to 1.0.0 is to
+standardize on a version number of the sort MAJOR.MINOR.PATCH.  This
+change is part of our effort to treat internal and open source
+releases in a much more similar manner. Along these lines, release
+notes for internal releases (like this one) are committed on the
+master branch. We hope this improves transparency as we work towards
+the next open source release.
+
+Changes made since release 0.96
+
+* Auto bootstrapping: ZenStoreClient and System stores
+  * Added server side system stores for managing metadata
+  * ZenStoreClient interacts with system stores
+  * ZenStoreClient auto bootstraps whenever cluster.xml or stores.xml changes
+  * Added a new routing strategy to route to all with local preference
+  * Added a client-registry for publishing client info and config values
+  * Updated LazyClientStore to try to bootstrap during Init
+  * Modified Failure Detector to work on a shared cluster object reference
+* Avro: schema evolution and read only support
+  * Added new Avro serializer type that supports schema evolution
+  * Added Avro support to read only stores
+  * Added LinkedIn build-and-push Azkaban jobs to build read only stores to contrib
+  * Added a schema backwards compatibility check to VoldemortAdminTool and on server startup to prevent mishaps due to bad schemas
+* Non-blocking IO: Fixed design flaw that blocked in the face of slow servers
+  * Asynchronous operations no longer do a blocking checkout to get a SocketDestination
+  * Added additional stats collection for better visibility into request queues
+* Minor features
+  * Enhanced VoldemortAdminTool to update store metadata version
+  * Enhanced VoldemortAdminTool to work with the new system stores
+  * Added feature to voldemort-shell.sh to dump byte & object arrays
+  * Added a SlowStorageEngine for testing degraded mode performance
+  * Added mechanism to isolate BDB cache usage among stores
+  * Enhanced debug logging (for traffic analysis).
+  * Python client bug fixes (from pull request)
+  * Improved messages in request tracing
+  * Cleaned up help/usage messages within the client shell
+  * Added server config to control socket backlog
+  * Added "--query-keys" option to query multiple keys of multiple stores from specific node
+  * Added control to DataCleanupJob Frequency
+  * Unified jmxid as the factory across the board
+* Tools
+  * bin/generate_cluster_xml.py to generate cluster.xml
+  * bin/repeat-junit.sh and bin/repeat-junit-test.sh to repeatedly run tests
+* Bug fixes
+  * Changed getall return behavior to comply with javadoc
+  * Fixed a bug that caused unnecessary serial requests in getall
+  * HFTP performance issue bug fix (fix in byte buffer and copy process)
+  * Fixed a bug that prevented "--fetch-keys" and "--fetch-entries" in admin tool from showing multiple store results
+  * Fixed problem in sample config that prevented the server from starting
+  * Fixed some intermittent BindException failures across many unit tests
+  * Fixed some intermittent rebalance test failures
+  * Wrapped long running tests with timeouts
+
+
+Release 0.96 on 09/05/2012
+
+Changes made since 0.90.1
+
+ * Monitoring:
+     * Append cluster name to various mbeans for better stats display
+     * Implement average throughput in bytes
+     * Add BDB JE stats 
+     * Add 95th and 99th latency tracking 
+     * Add stats for ClientRequestExecutorPool
+     * Add error/exception count and max getall count
+     * BDB+ Data cleanup Monitoring changes
+ * Rebalancing:
+     * Donor-based rebalancing and post cleanup (see https://github.com/voldemort/voldemort/wiki/Voldemort-Donor-Based-Rebalancing for more details)
+     * Rebalancing integration testing framework (under test/integration/voldemort/rebalance/)
+     * Generate multiple cluster.xml files based on the number specified when running the tool and choose the cluster with the smallest std dev as the final-cluster.xml
+     * Add status output to log for updateEntries (used by rebalancing)
+ * Read-only pipeline:
+     * Add hftp and webhdfs support
+     * Read-only bandwidth dynamic throttler
+     * Add minimum throttle limit per store
+     * Add rollback capability to the Admin tool
+ * Voldemort-backed stack and index linked list impl
+ * Change client requests to not process responses after timeout
+ * Modified client request executor timeout to not factor in the NIO selector timeout
+ * Added BDB native backup capabalities, checksum verification and incremental backups (well tested, but not yet used in production)
+ * Add additional client-side tracing for debugging and consistency analytics
+ * Clean up logging during exception at client-side
+ * Security exception handling
+ * Add snappy to CompressionStrategyFactory
+ * Add configurable option to interrupt service being unscheduled
+ * Add logging support for tracking ScanPermit owners (for debugging purposes)
+ * Add a jmx terminate operation for async jobs
+ * Add zone option for restore from replicas
+ * Changing the enable.nio.connector to true by default
+ * Better disconnection handling for python client
+ * Split junit tests into a long and a short test suites
+ * Add separate timeouts for different operations (put, get, delete, and getAll
+ * Allow getAll to return partial results upon timeout
+ * Improved cluster generation tool
+ * Added log4j properties folder for junit test
+ * Bug fixes:
+     * httpclient 3.x to httpclient 4.x
+     * Fix NPE in listing read-only store versions
+     * Fixed 2 failure detector bugs during rebalancing or node swapping
+     * Fixed a thread leak issue in StreamingSlopPusher
+     * Fixed a NIO bug
+     * Fixed a bug in TimeBasedInconsistency resolver.
+     * Fixed race condition in client socket close
+     * Fixed a potential deadlock issue in ScanPermitWrapper
+     * Fixed a bug where a read returns null (on rare occations) when being concurrent with a write
+     * Fixed a performance bug in HdfsFetcher when hftp is used
+
 
 Changes made since 0.90
 
diff --git a/src/java/log4j.properties b/src/java/log4j.properties
index a8faf6c058..9a16ef00d2 100755
--- a/src/java/log4j.properties
+++ b/src/java/log4j.properties
@@ -5,7 +5,8 @@ log4j.rootLogger=INFO, stdout
 
 log4j.appender.stdout=org.apache.log4j.ConsoleAppender
 log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=[%d %c] %p %m %n
+# log4j.appender.stdout.layout.ConversionPattern=[%d %c] %p %m  %n 
+log4j.appender.stdout.layout.ConversionPattern=[%d{ABSOLUTE} %c] %p %m  [%t]%n 
 
 # Turn on all our debugging info
 log4j.logger=INFO
@@ -16,4 +17,4 @@ log4j.logger.voldemort.server.niosocket=INFO
 log4j.logger.voldemort.utils=INFO
 log4j.logger.voldemort.client.rebalance=INFO
 log4j.logger.voldemort.server=INFO
-log4j.logger.krati=WARN
\ No newline at end of file
+log4j.logger.krati=WARN
diff --git a/src/java/voldemort/VoldemortAdminTool.java b/src/java/voldemort/VoldemortAdminTool.java
index 22d61febeb..04d8ab7828 100644
--- a/src/java/voldemort/VoldemortAdminTool.java
+++ b/src/java/voldemort/VoldemortAdminTool.java
@@ -19,6 +19,7 @@
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
 import java.io.BufferedWriter;
+import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.EOFException;
@@ -27,14 +28,22 @@
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.FileWriter;
+import java.io.FilterOutputStream;
 import java.io.IOException;
+import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.PrintStream;
 import java.io.StringReader;
 import java.io.StringWriter;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
 import java.util.Set;
 
 import joptsimple.OptionParser;
@@ -53,6 +62,8 @@
 import voldemort.serialization.Serializer;
 import voldemort.serialization.SerializerDefinition;
 import voldemort.serialization.SerializerFactory;
+import voldemort.serialization.StringSerializer;
+import voldemort.serialization.avro.versioned.SchemaEvolutionValidator;
 import voldemort.server.rebalance.RebalancerState;
 import voldemort.store.StoreDefinition;
 import voldemort.store.compress.CompressionStrategy;
@@ -60,10 +71,12 @@
 import voldemort.store.metadata.MetadataStore;
 import voldemort.store.metadata.MetadataStore.VoldemortState;
 import voldemort.store.readonly.ReadOnlyStorageConfiguration;
+import voldemort.store.system.SystemStoreConstants;
 import voldemort.utils.ByteArray;
 import voldemort.utils.ByteUtils;
 import voldemort.utils.CmdUtils;
 import voldemort.utils.KeyDistributionGenerator;
+import voldemort.utils.MetadataVersionStoreUtils;
 import voldemort.utils.Pair;
 import voldemort.utils.Utils;
 import voldemort.versioning.VectorClock;
@@ -85,6 +98,8 @@
 public class VoldemortAdminTool {
 
     private static final String ALL_METADATA = "all";
+    private static final String STORES_VERSION_KEY = "stores.xml";
+    private static final String CLUSTER_VERSION_KEY = "cluster.xml";
 
     @SuppressWarnings("unchecked")
     public static void main(String[] args) throws Exception {
@@ -128,6 +143,10 @@ public static void main(String[] args) throws Exception {
               .describedAs("store-names")
               .withValuesSeparatedBy(',')
               .ofType(String.class);
+        parser.accepts("store", "Store name for querying keys")
+              .withRequiredArg()
+              .describedAs("store-name")
+              .ofType(String.class);
         parser.accepts("add-stores", "Add stores in this stores.xml")
               .withRequiredArg()
               .describedAs("stores.xml containing just the new stores")
@@ -208,6 +227,31 @@ public static void main(String[] args) throws Exception {
         parser.accepts("backup-incremental",
                        "Perform an incremental backup for point-in-time recovery."
                                + " By default backup has latest consistent snapshot.");
+        parser.accepts("zone", "zone id")
+              .withRequiredArg()
+              .describedAs("zone-id")
+              .ofType(Integer.class);
+        parser.accepts("rollback", "rollback a store")
+              .withRequiredArg()
+              .describedAs("store-name")
+              .ofType(String.class);
+        parser.accepts("version", "Push version of store to rollback to")
+              .withRequiredArg()
+              .describedAs("version")
+              .ofType(Long.class);
+        parser.accepts("verify-metadata-version",
+                       "Verify the version of Metadata on all the cluster nodes");
+        parser.accepts("synchronize-metadata-version",
+                       "Synchronize the metadata versions across all the nodes.");
+        parser.accepts("reserve-memory", "Memory in MB to reserve for the store")
+              .withRequiredArg()
+              .describedAs("size-in-mb")
+              .ofType(Long.class);
+        parser.accepts("query-keys", "Get values of keys on specific nodes")
+              .withRequiredArg()
+              .describedAs("query-keys")
+              .withValuesSeparatedBy(',')
+              .ofType(String.class);
 
         OptionSet options = parser.parse(args);
 
@@ -224,7 +268,8 @@ public static void main(String[] args) throws Exception {
                      || options.has("ro-metadata") || options.has("set-metadata")
                      || options.has("get-metadata") || options.has("check-metadata") || options.has("key-distribution"))
                  || options.has("truncate") || options.has("clear-rebalancing-metadata")
-                 || options.has("async") || options.has("native-backup"))) {
+                 || options.has("async") || options.has("native-backup") || options.has("rollback")
+                 || options.has("verify-metadata-version") || options.has("reserve-memory"))) {
                 System.err.println("Missing required arguments: " + Joiner.on(", ").join(missing));
                 printHelp(System.err, parser);
                 System.exit(1);
@@ -234,8 +279,15 @@ public static void main(String[] args) throws Exception {
         String url = (String) options.valueOf("url");
         Integer nodeId = CmdUtils.valueOf(options, "node", -1);
         int parallelism = CmdUtils.valueOf(options, "restore", 5);
+        Integer zoneId = CmdUtils.valueOf(options, "zone", -1);
+
+        int zone = zoneId == -1 ? 0 : zoneId;
+        AdminClient adminClient = new AdminClient(url, new AdminClientConfig(), zone);
 
-        AdminClient adminClient = new AdminClient(url, new AdminClientConfig());
+        if(options.has("verify-metadata-version")) {
+            checkMetadataVersion(adminClient);
+            return;
+        }
 
         String ops = "";
         if(options.has("delete-partitions")) {
@@ -292,11 +344,30 @@ public static void main(String[] args) throws Exception {
             }
             ops += "n";
         }
+        if(options.has("rollback")) {
+            if(!options.has("version")) {
+                Utils.croak("A read-only push version must be specified with rollback option");
+            }
+            ops += "o";
+        }
+        if(options.has("synchronize-metadata-version")) {
+            ops += "z";
+        }
+        if(options.has("reserve-memory")) {
+            if(!options.has("stores")) {
+                Utils.croak("Specify the list of stores to reserve memory");
+            }
+            ops += "f";
+        }
+        if(options.has("query-keys")) {
+            ops += "q";
+        }
+
         if(ops.length() < 1) {
             Utils.croak("At least one of (delete-partitions, restore, add-node, fetch-entries, "
                         + "fetch-keys, add-stores, delete-store, update-entries, get-metadata, ro-metadata, "
                         + "set-metadata, check-metadata, key-distribution, clear-rebalancing-metadata, async, "
-                        + "repair-job, native-backup) must be specified");
+                        + "repair-job, native-backup, rollback, reserve-memory, verify-metadata-version) must be specified");
         }
 
         List<String> storeNames = null;
@@ -324,7 +395,7 @@ public static void main(String[] args) throws Exception {
                     System.exit(1);
                 }
                 System.out.println("Starting restore");
-                adminClient.restoreDataFromReplications(nodeId, parallelism);
+                adminClient.restoreDataFromReplications(nodeId, parallelism, zoneId);
                 System.out.println("Finished restore");
             }
             if(ops.contains("k")) {
@@ -410,10 +481,47 @@ public static void main(String[] args) throws Exception {
                             throw new VoldemortException("Stores definition xml file path incorrect");
                         StoreDefinitionsMapper mapper = new StoreDefinitionsMapper();
                         List<StoreDefinition> storeDefs = mapper.readStoreList(new File(metadataValue));
+
+                        String AVRO_GENERIC_VERSIONED_TYPE_NAME = "avro-generic-versioned";
+
+                        for(StoreDefinition storeDef: storeDefs) {
+                            SerializerDefinition keySerDef = storeDef.getKeySerializer();
+                            SerializerDefinition valueSerDef = storeDef.getValueSerializer();
+
+                            if(keySerDef.getName().equals(AVRO_GENERIC_VERSIONED_TYPE_NAME)) {
+
+                                SchemaEvolutionValidator.checkSchemaCompatibility(keySerDef);
+
+                            }
+
+                            if(valueSerDef.getName().equals(AVRO_GENERIC_VERSIONED_TYPE_NAME)) {
+
+                                SchemaEvolutionValidator.checkSchemaCompatibility(valueSerDef);
+
+                            }
+                        }
                         executeSetMetadata(nodeId,
                                            adminClient,
                                            MetadataStore.STORES_KEY,
                                            mapper.writeStoreList(storeDefs));
+
+                        /*
+                         * This is a hack to update the metadata version of the
+                         * requested stores. TODO: Add the functionality to
+                         * Admin Client and Server to update one individual
+                         * store definition.
+                         */
+                        if(storeNames != null) {
+                            System.out.println("Updating metadata version for the following stores: "
+                                               + storeNames);
+                            try {
+                                for(String name: storeNames) {
+                                    adminClient.updateMetadataversion(name);
+                                }
+                            } catch(Exception e) {
+                                System.err.println("Error while updating metadata version for the specified store.");
+                            }
+                        }
                     } else if(metadataKey.compareTo(MetadataStore.REBALANCING_STEAL_INFO) == 0) {
                         if(!Utils.isReadableFile(metadataValue))
                             throw new VoldemortException("Rebalancing steal info file path incorrect");
@@ -456,12 +564,131 @@ public static void main(String[] args) throws Exception {
                                          options.has("backup-verify"),
                                          options.has("backup-incremental"));
             }
+            if(ops.contains("o")) {
+                String storeName = (String) options.valueOf("rollback");
+                long pushVersion = (Long) options.valueOf("version");
+                executeRollback(nodeId, storeName, pushVersion, adminClient);
+            }
+            if(ops.contains("z")) {
+                synchronizeMetadataVersion(adminClient, nodeId);
+            }
+            if(ops.contains("f")) {
+                long reserveMB = (Long) options.valueOf("reserve-memory");
+                adminClient.reserveMemory(nodeId, storeNames, reserveMB);
+            }
+            if(ops.contains("q")) {
+                List<String> keyList = (List<String>) options.valuesOf("query-keys");
+                if(storeNames == null || storeNames.size() == 0) {
+                    throw new VoldemortException("Must specify store name using --stores option");
+                }
+                executeQueryKeys(nodeId, adminClient, storeNames, keyList);
+            }
         } catch(Exception e) {
             e.printStackTrace();
             Utils.croak(e.getMessage());
         }
     }
 
+    private static String getMetadataVersionsForNode(AdminClient adminClient, int nodeId) {
+        List<Integer> partitionIdList = Lists.newArrayList();
+        for(Node node: adminClient.getAdminClientCluster().getNodes()) {
+            partitionIdList.addAll(node.getPartitionIds());
+        }
+
+        Iterator<Pair<ByteArray, Versioned<byte[]>>> entriesIterator = adminClient.fetchEntries(nodeId,
+                                                                                                SystemStoreConstants.SystemStoreName.voldsys$_metadata_version_persistence.name(),
+                                                                                                partitionIdList,
+                                                                                                null,
+                                                                                                true);
+        Serializer<String> serializer = new StringSerializer("UTF8");
+        String keyObject = null;
+        String valueObject = null;
+
+        while(entriesIterator.hasNext()) {
+            try {
+                Pair<ByteArray, Versioned<byte[]>> kvPair = entriesIterator.next();
+                byte[] keyBytes = kvPair.getFirst().get();
+                byte[] valueBytes = kvPair.getSecond().getValue();
+                keyObject = serializer.toObject(keyBytes);
+                if(!keyObject.equals(MetadataVersionStoreUtils.VERSIONS_METADATA_KEY)) {
+                    continue;
+                }
+                valueObject = serializer.toObject(valueBytes);
+            } catch(Exception e) {
+                System.err.println("Error while retrieving Metadata versions from node : " + nodeId
+                                   + ". Exception = \n");
+                e.printStackTrace();
+                System.exit(-1);
+            }
+        }
+
+        return valueObject;
+    }
+
+    private static void checkMetadataVersion(AdminClient adminClient) {
+        Map<Properties, Integer> versionsNodeMap = new HashMap<Properties, Integer>();
+
+        for(Node node: adminClient.getAdminClientCluster().getNodes()) {
+            String valueObject = getMetadataVersionsForNode(adminClient, node.getId());
+            Properties props = new Properties();
+            try {
+                props.load(new ByteArrayInputStream(valueObject.getBytes()));
+            } catch(IOException e) {
+                System.err.println("Error while parsing Metadata versions for node : "
+                                   + node.getId() + ". Exception = \n");
+                e.printStackTrace();
+                System.exit(-1);
+            }
+
+            versionsNodeMap.put(props, node.getId());
+        }
+
+        if(versionsNodeMap.keySet().size() > 1) {
+            System.err.println("Mismatching versions detected !!!");
+            for(Entry<Properties, Integer> entry: versionsNodeMap.entrySet()) {
+                System.out.println("**************************** Node: " + entry.getValue()
+                                   + " ****************************");
+                System.out.println(entry.getKey());
+            }
+        } else {
+            System.err.println("All the nodes have the same metadata versions .");
+        }
+
+    }
+
+    private static void synchronizeMetadataVersion(AdminClient adminClient, int baseNodeId) {
+        String valueObject = getMetadataVersionsForNode(adminClient, baseNodeId);
+        Properties props = new Properties();
+        try {
+            props.load(new ByteArrayInputStream(valueObject.getBytes()));
+            if(props.size() == 0) {
+                System.err.println("The specified node does not have any versions metadata ! Exiting ...");
+                System.exit(-1);
+            }
+            adminClient.setMetadataversion(props);
+            System.out.println("Metadata versions synchronized successfully.");
+        } catch(IOException e) {
+            System.err.println("Error while retrieving Metadata versions from node : " + baseNodeId
+                               + ". Exception = \n");
+            e.printStackTrace();
+            System.exit(-1);
+        }
+
+    }
+
+    private static void executeRollback(Integer nodeId,
+                                        String storeName,
+                                        long pushVersion,
+                                        AdminClient adminClient) {
+        if(nodeId < 0) {
+            for(Node node: adminClient.getAdminClientCluster().getNodes()) {
+                adminClient.rollbackStore(node.getId(), storeName, pushVersion);
+            }
+        } else {
+            adminClient.rollbackStore(nodeId, storeName, pushVersion);
+        }
+    }
+
     private static void executeRepairJob(Integer nodeId, AdminClient adminClient) {
         if(nodeId < 0) {
             for(Node node: adminClient.getAdminClientCluster().getNodes()) {
@@ -546,6 +773,8 @@ public static void printHelp(PrintStream stream, OptionParser parser) throws IOE
         stream.println("\t\t./bin/voldemort-admin-tool.sh --fetch-entries --url [url] --node [node-id]");
         stream.println("\t9) Update entries for a set of stores using the output from a binary dump fetch entries");
         stream.println("\t\t./bin/voldemort-admin-tool.sh --update-entries [folder path from output of --fetch-entries --outdir] --url [url] --node [node-id] --stores [comma-separated list of store names]");
+        stream.println("\t10) Query stores for a set of keys on a specific node.");
+        stream.println("\t\t./bin/voldemort-admin-tool.sh --query-keys [comma-separated list of keys] --url [url] --node [node-id] --stores [comma-separated list of store names]");
         stream.println();
         stream.println("READ-ONLY OPERATIONS");
         stream.println("\t1) Retrieve metadata information of read-only data for a particular node and all stores");
@@ -573,6 +802,8 @@ public static void printHelp(PrintStream stream, OptionParser parser) throws IOE
         stream.println("\t5) Backup bdb data natively");
         stream.println("\t\t./bin/voldemort-admin-tool.sh --native-backup [store] --backup-dir [outdir] "
                        + "--backup-timeout [mins] [--backup-verify] [--backup-incremental] --url [url] --node [node-id]");
+        stream.println("\t6) Rollback a read-only store to the specified push version");
+        stream.println("\t\t./bin/voldemort-admin-tool.sh --rollback [store-name] --url [url] --node [node-id] --version [version-num] ");
 
         parser.printHelpOn(stream);
     }
@@ -683,10 +914,10 @@ private static void executeCheckMetadata(AdminClient adminClient, String metadat
         }
     }
 
-    private static void executeSetMetadata(Integer nodeId,
-                                           AdminClient adminClient,
-                                           String key,
-                                           Object value) {
+    public static void executeSetMetadata(Integer nodeId,
+                                          AdminClient adminClient,
+                                          String key,
+                                          Object value) {
 
         List<Integer> nodeIds = Lists.newArrayList();
         VectorClock updatedVersion = null;
@@ -711,21 +942,9 @@ private static void executeSetMetadata(Integer nodeId,
                                                                                    System.currentTimeMillis());
             nodeIds.add(nodeId);
         }
-        for(Integer currentNodeId: nodeIds) {
-            System.out.println("Setting "
-                               + key
-                               + " for "
-                               + adminClient.getAdminClientCluster()
-                                            .getNodeById(currentNodeId)
-                                            .getHost()
-                               + ":"
-                               + adminClient.getAdminClientCluster()
-                                            .getNodeById(currentNodeId)
-                                            .getId());
-            adminClient.updateRemoteMetadata(currentNodeId,
-                                             key,
-                                             Versioned.value(value.toString(), updatedVersion));
-        }
+        adminClient.updateRemoteMetadata(nodeIds,
+                                         key,
+                                         Versioned.value(value.toString(), updatedVersion));
     }
 
     private static void executeROMetadata(Integer nodeId,
@@ -902,7 +1121,7 @@ private static void executeFetchEntries(Integer nodeId,
 
         List<StoreDefinition> storeDefinitionList = adminClient.getRemoteStoreDefList(nodeId)
                                                                .getValue();
-        Map<String, StoreDefinition> storeDefinitionMap = Maps.newHashMap();
+        HashMap<String, StoreDefinition> storeDefinitionMap = Maps.newHashMap();
         for(StoreDefinition storeDefinition: storeDefinitionList) {
             storeDefinitionMap.put(storeDefinition.getName(), storeDefinition);
         }
@@ -916,8 +1135,14 @@ private static void executeFetchEntries(Integer nodeId,
         }
         List<String> stores = storeNames;
         if(stores == null) {
+            // when no stores specified, all user defined store will be fetched,
+            // but not system stores.
             stores = Lists.newArrayList();
             stores.addAll(storeDefinitionMap.keySet());
+        } else {
+            // add system stores to the map so they can be fetched when
+            // specified explicitly
+            storeDefinitionMap.putAll(getSystemStoreDefs());
         }
 
         // Pick up all the partitions
@@ -933,6 +1158,7 @@ private static void executeFetchEntries(Integer nodeId,
             storeDefinition = storeDefinitionMap.get(store);
 
             if(null == storeDefinition) {
+
                 System.out.println("No store found under the name \'" + store + "\'");
                 continue;
             } else {
@@ -940,20 +1166,91 @@ private static void executeFetchEntries(Integer nodeId,
                                    + Joiner.on(", ").join(partitionIdList) + " of " + store);
             }
 
-            Iterator<Pair<ByteArray, Versioned<byte[]>>> entriesIterator = adminClient.fetchEntries(nodeId,
-                                                                                                    store,
-                                                                                                    partitionIdList,
-                                                                                                    null,
-                                                                                                    false);
+            final Iterator<Pair<ByteArray, Versioned<byte[]>>> entriesIterator = adminClient.fetchEntries(nodeId,
+                                                                                                          store,
+                                                                                                          partitionIdList,
+                                                                                                          null,
+                                                                                                          false);
             File outputFile = null;
             if(directory != null) {
                 outputFile = new File(directory, store + ".entries");
             }
 
             if(useAscii) {
-                writeEntriesAscii(entriesIterator, outputFile, storeDefinition);
+                // k-v serializer
+                SerializerDefinition keySerializerDef = storeDefinition.getKeySerializer();
+                SerializerDefinition valueSerializerDef = storeDefinition.getValueSerializer();
+                SerializerFactory serializerFactory = new DefaultSerializerFactory();
+                @SuppressWarnings("unchecked")
+                final Serializer<Object> keySerializer = (Serializer<Object>) serializerFactory.getSerializer(keySerializerDef);
+                @SuppressWarnings("unchecked")
+                final Serializer<Object> valueSerializer = (Serializer<Object>) serializerFactory.getSerializer(valueSerializerDef);
+
+                // compression strategy
+                final CompressionStrategy keyCompressionStrategy;
+                final CompressionStrategy valueCompressionStrategy;
+                if(keySerializerDef != null && keySerializerDef.hasCompression()) {
+                    keyCompressionStrategy = new CompressionStrategyFactory().get(keySerializerDef.getCompression());
+                } else {
+                    keyCompressionStrategy = null;
+                }
+                if(valueSerializerDef != null && valueSerializerDef.hasCompression()) {
+                    valueCompressionStrategy = new CompressionStrategyFactory().get(valueSerializerDef.getCompression());
+                } else {
+                    valueCompressionStrategy = null;
+                }
+
+                writeAscii(outputFile, new Writable() {
+
+                    @Override
+                    public void writeTo(BufferedWriter out) throws IOException {
+                        final StringWriter stringWriter = new StringWriter();
+                        final JsonGenerator generator = new JsonFactory(new ObjectMapper()).createJsonGenerator(stringWriter);
+
+                        while(entriesIterator.hasNext()) {
+                            Pair<ByteArray, Versioned<byte[]>> kvPair = entriesIterator.next();
+                            byte[] keyBytes = kvPair.getFirst().get();
+                            byte[] valueBytes = kvPair.getSecond().getValue();
+                            VectorClock version = (VectorClock) kvPair.getSecond().getVersion();
+
+                            Object keyObject = keySerializer.toObject((null == keyCompressionStrategy) ? keyBytes
+                                                                                                      : keyCompressionStrategy.inflate(keyBytes));
+                            Object valueObject = valueSerializer.toObject((null == valueCompressionStrategy) ? valueBytes
+                                                                                                            : valueCompressionStrategy.inflate(valueBytes));
+                            generator.writeObject(keyObject);
+                            stringWriter.write(' ');
+                            stringWriter.write(version.toString());
+                            generator.writeObject(valueObject);
+
+                            StringBuffer buf = stringWriter.getBuffer();
+                            if(buf.charAt(0) == ' ') {
+                                buf.setCharAt(0, '\n');
+                            }
+                            out.write(buf.toString());
+                            buf.setLength(0);
+                        }
+                        out.write('\n');
+                    }
+                });
             } else {
-                writeEntriesBinary(entriesIterator, outputFile);
+                writeBinary(outputFile, new Printable() {
+
+                    @Override
+                    public void printTo(DataOutputStream out) throws IOException {
+                        while(entriesIterator.hasNext()) {
+                            Pair<ByteArray, Versioned<byte[]>> kvPair = entriesIterator.next();
+                            byte[] keyBytes = kvPair.getFirst().get();
+                            byte[] versionBytes = ((VectorClock) kvPair.getSecond().getVersion()).toBytes();
+                            byte[] valueBytes = kvPair.getSecond().getValue();
+                            out.writeInt(keyBytes.length);
+                            out.write(keyBytes);
+                            out.writeInt(versionBytes.length);
+                            out.write(versionBytes);
+                            out.writeInt(valueBytes.length);
+                            out.write(valueBytes);
+                        }
+                    }
+                });
             }
 
             if(outputFile != null)
@@ -961,6 +1258,15 @@ private static void executeFetchEntries(Integer nodeId,
         }
     }
 
+    private static Map<String, StoreDefinition> getSystemStoreDefs() {
+        Map<String, StoreDefinition> sysStoreDefMap = Maps.newHashMap();
+        List<StoreDefinition> storesDefs = SystemStoreConstants.getAllSystemStoreDefs();
+        for(StoreDefinition def: storesDefs) {
+            sysStoreDefMap.put(def.getName(), def);
+        }
+        return sysStoreDefMap;
+    }
+
     private static void executeUpdateEntries(Integer nodeId,
                                              AdminClient adminClient,
                                              List<String> storeNames,
@@ -1045,93 +1351,6 @@ protected Pair<ByteArray, Versioned<byte[]>> computeNext() {
         };
     }
 
-    private static void writeEntriesAscii(Iterator<Pair<ByteArray, Versioned<byte[]>>> iterator,
-                                          File outputFile,
-                                          StoreDefinition storeDefinition) throws IOException {
-        BufferedWriter writer = null;
-        CompressionStrategy keyCompressionStrategy = null;
-        CompressionStrategy valueCompressionStrategy = null;
-
-        if(outputFile != null) {
-            writer = new BufferedWriter(new FileWriter(outputFile));
-        } else {
-            writer = new BufferedWriter(new OutputStreamWriter(System.out));
-        }
-        SerializerFactory serializerFactory = new DefaultSerializerFactory();
-        StringWriter stringWriter = new StringWriter();
-        JsonGenerator generator = new JsonFactory(new ObjectMapper()).createJsonGenerator(stringWriter);
-
-        SerializerDefinition keySerializerDef = storeDefinition.getKeySerializer();
-        if(null != keySerializerDef && keySerializerDef.hasCompression()) {
-            keyCompressionStrategy = new CompressionStrategyFactory().get(keySerializerDef.getCompression());
-        }
-
-        SerializerDefinition valueSerializerDef = storeDefinition.getValueSerializer();
-        if(null != valueSerializerDef && valueSerializerDef.hasCompression()) {
-            valueCompressionStrategy = new CompressionStrategyFactory().get(valueSerializerDef.getCompression());
-        }
-
-        @SuppressWarnings("unchecked")
-        Serializer<Object> keySerializer = (Serializer<Object>) serializerFactory.getSerializer(storeDefinition.getKeySerializer());
-        @SuppressWarnings("unchecked")
-        Serializer<Object> valueSerializer = (Serializer<Object>) serializerFactory.getSerializer(storeDefinition.getValueSerializer());
-
-        try {
-            while(iterator.hasNext()) {
-                Pair<ByteArray, Versioned<byte[]>> kvPair = iterator.next();
-                byte[] keyBytes = kvPair.getFirst().get();
-                VectorClock version = (VectorClock) kvPair.getSecond().getVersion();
-                byte[] valueBytes = kvPair.getSecond().getValue();
-
-                Object keyObject = keySerializer.toObject((null == keyCompressionStrategy) ? keyBytes
-                                                                                          : keyCompressionStrategy.inflate(keyBytes));
-                Object valueObject = valueSerializer.toObject((null == valueCompressionStrategy) ? valueBytes
-                                                                                                : valueCompressionStrategy.inflate(valueBytes));
-
-                generator.writeObject(keyObject);
-                stringWriter.write(' ');
-                stringWriter.write(version.toString());
-                generator.writeObject(valueObject);
-
-                StringBuffer buf = stringWriter.getBuffer();
-                if(buf.charAt(0) == ' ') {
-                    buf.setCharAt(0, '\n');
-                }
-                writer.write(buf.toString());
-                buf.setLength(0);
-            }
-            writer.write('\n');
-        } finally {
-            writer.close();
-        }
-    }
-
-    private static void writeEntriesBinary(Iterator<Pair<ByteArray, Versioned<byte[]>>> iterator,
-                                           File outputFile) throws IOException {
-        DataOutputStream dos = null;
-        if(outputFile != null) {
-            dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outputFile)));
-        } else {
-            dos = new DataOutputStream(new BufferedOutputStream(System.out));
-        }
-        try {
-            while(iterator.hasNext()) {
-                Pair<ByteArray, Versioned<byte[]>> kvPair = iterator.next();
-                byte[] keyBytes = kvPair.getFirst().get();
-                byte[] versionBytes = ((VectorClock) kvPair.getSecond().getVersion()).toBytes();
-                byte[] valueBytes = kvPair.getSecond().getValue();
-                dos.writeInt(keyBytes.length);
-                dos.write(keyBytes);
-                dos.writeInt(versionBytes.length);
-                dos.write(versionBytes);
-                dos.writeInt(valueBytes.length);
-                dos.write(valueBytes);
-            }
-        } finally {
-            dos.close();
-        }
-    }
-
     private static void executeFetchKeys(Integer nodeId,
                                          AdminClient adminClient,
                                          List<Integer> partitionIdList,
@@ -1157,6 +1376,10 @@ private static void executeFetchKeys(Integer nodeId,
         if(stores == null) {
             stores = Lists.newArrayList();
             stores.addAll(storeDefinitionMap.keySet());
+        } else {
+            // add system stores to the map so they can be fetched when
+            // specified explicitly
+            storeDefinitionMap.putAll(getSystemStoreDefs());
         }
 
         // Pick up all the partitions
@@ -1179,20 +1402,65 @@ private static void executeFetchKeys(Integer nodeId,
                                    + Joiner.on(", ").join(partitionIdList) + " of " + store);
             }
 
-            Iterator<ByteArray> keyIterator = adminClient.fetchKeys(nodeId,
-                                                                    store,
-                                                                    partitionIdList,
-                                                                    null,
-                                                                    false);
+            final Iterator<ByteArray> keyIterator = adminClient.fetchKeys(nodeId,
+                                                                          store,
+                                                                          partitionIdList,
+                                                                          null,
+                                                                          false);
             File outputFile = null;
             if(directory != null) {
                 outputFile = new File(directory, store + ".keys");
             }
 
             if(useAscii) {
-                writeKeysAscii(keyIterator, outputFile, storeDefinition);
+                final SerializerDefinition serializerDef = storeDefinition.getKeySerializer();
+                final SerializerFactory serializerFactory = new DefaultSerializerFactory();
+                @SuppressWarnings("unchecked")
+                final Serializer<Object> serializer = (Serializer<Object>) serializerFactory.getSerializer(serializerDef);
+
+                final CompressionStrategy keysCompressionStrategy;
+                if(serializerDef != null && serializerDef.hasCompression()) {
+                    keysCompressionStrategy = new CompressionStrategyFactory().get(serializerDef.getCompression());
+                } else {
+                    keysCompressionStrategy = null;
+                }
+
+                writeAscii(outputFile, new Writable() {
+
+                    @Override
+                    public void writeTo(BufferedWriter out) throws IOException {
+                        final StringWriter stringWriter = new StringWriter();
+                        final JsonGenerator generator = new JsonFactory(new ObjectMapper()).createJsonGenerator(stringWriter);
+
+                        while(keyIterator.hasNext()) {
+                            // Ugly hack to be able to separate text by newlines
+                            // vs. spaces
+                            byte[] keyBytes = keyIterator.next().get();
+                            Object keyObject = serializer.toObject((null == keysCompressionStrategy) ? keyBytes
+                                                                                                    : keysCompressionStrategy.inflate(keyBytes));
+                            generator.writeObject(keyObject);
+                            StringBuffer buf = stringWriter.getBuffer();
+                            if(buf.charAt(0) == ' ') {
+                                buf.setCharAt(0, '\n');
+                            }
+                            out.write(buf.toString());
+                            buf.setLength(0);
+                        }
+                        out.write('\n');
+                    }
+                });
             } else {
-                writeKeysBinary(keyIterator, outputFile);
+                writeBinary(outputFile, new Printable() {
+
+                    @Override
+                    public void printTo(DataOutputStream out) throws IOException {
+                        while(keyIterator.hasNext()) {
+                            byte[] keyBytes = keyIterator.next().get();
+                            out.writeInt(keyBytes.length);
+                            out.write(keyBytes);
+                        }
+                    }
+                });
             }
 
             if(outputFile != null)
@@ -1200,74 +1468,55 @@ private static void executeFetchKeys(Integer nodeId,
         }
     }
 
-    private static void writeKeysAscii(Iterator<ByteArray> keyIterator,
-                                       File outputFile,
-                                       StoreDefinition storeDefinition) throws IOException {
-        BufferedWriter writer = null;
-        CompressionStrategy keysCompressionStrategy = null;
-        FileWriter fileWriter = null;
-        if(outputFile != null) {
-            fileWriter = new FileWriter(outputFile);
-            writer = new BufferedWriter(fileWriter);
-        } else {
-            writer = new BufferedWriter(new OutputStreamWriter(System.out));
-        }
+    private abstract static class Printable {
 
-        SerializerDefinition serializerDef = storeDefinition.getKeySerializer();
-        if(null != serializerDef && serializerDef.hasCompression()) {
-            keysCompressionStrategy = new CompressionStrategyFactory().get(serializerDef.getCompression());
-        }
+        public abstract void printTo(DataOutputStream out) throws IOException;
+    }
 
-        SerializerFactory serializerFactory = new DefaultSerializerFactory();
-        StringWriter stringWriter = new StringWriter();
-        JsonGenerator generator = new JsonFactory(new ObjectMapper()).createJsonGenerator(stringWriter);
-        @SuppressWarnings("unchecked")
-        Serializer<Object> serializer = (Serializer<Object>) serializerFactory.getSerializer(storeDefinition.getKeySerializer());
-        try {
-            while(keyIterator.hasNext()) {
-                // Ugly hack to be able to separate text by newlines vs. spaces
-                byte[] keyBytes = keyIterator.next().get();
-                Object keyObject = serializer.toObject((null == keysCompressionStrategy) ? keyBytes
-                                                                                        : keysCompressionStrategy.inflate(keyBytes));
-                generator.writeObject(keyObject);
-                StringBuffer buf = stringWriter.getBuffer();
-                if(buf.charAt(0) == ' ') {
-                    buf.setCharAt(0, '\n');
+    private abstract static class Writable {
+
+        public abstract void writeTo(BufferedWriter out) throws IOException;
+    }
+
+    private static void writeBinary(File outputFile, Printable printable) throws IOException {
+        OutputStream outputStream = null;
+        if(outputFile == null) {
+            outputStream = new FilterOutputStream(System.out) {
+
+                @Override
+                public void close() throws IOException {
+                    flush();
                 }
-                writer.write(buf.toString());
-                buf.setLength(0);
-            }
-            writer.write('\n');
+            };
+        } else {
+            outputStream = new FileOutputStream(outputFile);
+        }
+        DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(outputStream));
+        try {
+            printable.printTo(dataOutputStream);
         } finally {
-            if(fileWriter != null) {
-                fileWriter.close();
-            }
-            writer.close();
+            dataOutputStream.close();
         }
     }
 
-    private static void writeKeysBinary(Iterator<ByteArray> keyIterator, File outputFile)
-            throws IOException {
-        DataOutputStream dos = null;
-        FileOutputStream outputStream = null;
-        if(outputFile != null) {
-            outputStream = new FileOutputStream(outputFile);
-            dos = new DataOutputStream(new BufferedOutputStream(outputStream));
+    private static void writeAscii(File outputFile, Writable writable) throws IOException {
+        Writer writer = null;
+        if(outputFile == null) {
+            writer = new OutputStreamWriter(new FilterOutputStream(System.out) {
+
+                @Override
+                public void close() throws IOException {
+                    flush();
+                }
+            });
         } else {
-            dos = new DataOutputStream(new BufferedOutputStream(System.out));
+            writer = new FileWriter(outputFile);
         }
-
+        BufferedWriter bufferedWriter = new BufferedWriter(writer);
         try {
-            while(keyIterator.hasNext()) {
-                byte[] keyBytes = keyIterator.next().get();
-                dos.writeInt(keyBytes.length);
-                dos.write(keyBytes);
-            }
+            writable.writeTo(bufferedWriter);
         } finally {
-            if(outputStream != null) {
-                outputStream.close();
-            }
-            dos.close();
+            bufferedWriter.close();
         }
     }
 
@@ -1291,4 +1540,107 @@ private static void executeDeletePartitions(Integer nodeId,
             adminClient.deletePartitions(nodeId, store, partitionIdList, null);
         }
     }
+
+    private static void executeQueryKeys(final Integer nodeId,
+                                         AdminClient adminClient,
+                                         List<String> storeNames,
+                                         List<String> keys) throws IOException {
+        Serializer<String> serializer = new StringSerializer();
+        List<ByteArray> listKeys = new ArrayList<ByteArray>();
+        for(String key: keys) {
+            listKeys.add(new ByteArray(serializer.toBytes(key)));
+        }
+        for(final String storeName: storeNames) {
+            final Iterator<Pair<ByteArray, Pair<List<Versioned<byte[]>>, Exception>>> iterator = adminClient.queryKeys(nodeId.intValue(),
+                                                                                                                       storeName,
+                                                                                                                       listKeys.iterator());
+            List<StoreDefinition> storeDefinitionList = adminClient.getRemoteStoreDefList(nodeId)
+                                                                   .getValue();
+            StoreDefinition storeDefinition = null;
+            for(StoreDefinition storeDef: storeDefinitionList) {
+                if(storeDef.getName().equals(storeName))
+                    storeDefinition = storeDef;
+            }
+
+            // k-v serializer
+            SerializerDefinition keySerializerDef = storeDefinition.getKeySerializer();
+            SerializerDefinition valueSerializerDef = storeDefinition.getValueSerializer();
+            SerializerFactory serializerFactory = new DefaultSerializerFactory();
+            @SuppressWarnings("unchecked")
+            final Serializer<Object> keySerializer = (Serializer<Object>) serializerFactory.getSerializer(keySerializerDef);
+            @SuppressWarnings("unchecked")
+            final Serializer<Object> valueSerializer = (Serializer<Object>) serializerFactory.getSerializer(valueSerializerDef);
+
+            // compression strategy
+            final CompressionStrategy keyCompressionStrategy;
+            final CompressionStrategy valueCompressionStrategy;
+            if(keySerializerDef != null && keySerializerDef.hasCompression()) {
+                keyCompressionStrategy = new CompressionStrategyFactory().get(keySerializerDef.getCompression());
+            } else {
+                keyCompressionStrategy = null;
+            }
+            if(valueSerializerDef != null && valueSerializerDef.hasCompression()) {
+                valueCompressionStrategy = new CompressionStrategyFactory().get(valueSerializerDef.getCompression());
+            } else {
+                valueCompressionStrategy = null;
+            }
+
+            // write to stdout
+            writeAscii(null, new Writable() {
+
+                @Override
+                public void writeTo(BufferedWriter out) throws IOException {
+                    final StringWriter stringWriter = new StringWriter();
+                    final JsonGenerator generator = new JsonFactory(new ObjectMapper()).createJsonGenerator(stringWriter);
+                    stringWriter.write("Querying keys in node " + nodeId + " of " + storeName
+                                       + "\n");
+
+                    while(iterator.hasNext()) {
+                        Pair<ByteArray, Pair<List<Versioned<byte[]>>, Exception>> kvPair = iterator.next();
+                        // unserialize and write key
+                        byte[] keyBytes = kvPair.getFirst().get();
+                        Object keyObject = keySerializer.toObject((null == keyCompressionStrategy) ? keyBytes
+                                                                                                  : keyCompressionStrategy.inflate(keyBytes));
+                        generator.writeObject(keyObject);
+
+                        // iterate through, unserialize and write values
+                        List<Versioned<byte[]>> values = kvPair.getSecond().getFirst();
+                        if(values != null) {
+                            if(values.size() == 0) {
+                                stringWriter.write(", null");
+                            }
+                            for(Versioned<byte[]> versioned: values) {
+                                VectorClock version = (VectorClock) versioned.getVersion();
+                                byte[] valueBytes = versioned.getValue();
+                                Object valueObject = valueSerializer.toObject((null == valueCompressionStrategy) ? valueBytes
+                                                                                                                : valueCompressionStrategy.inflate(valueBytes));
+
+                                stringWriter.write(", ");
+                                stringWriter.write(version.toString());
+                                stringWriter.write('[');
+                                stringWriter.write(new Date(version.getTimestamp()).toString());
+                                stringWriter.write(']');
+                                generator.writeObject(valueObject);
+                            }
+                        } else {
+                            stringWriter.write(", null");
+                        }
+                        // write out exception
+                        if(kvPair.getSecond().getSecond() != null) {
+                            stringWriter.write(", ");
+                            stringWriter.write(kvPair.getSecond().getSecond().toString());
+                        }
+
+                        StringBuffer buf = stringWriter.getBuffer();
+                        if(buf.charAt(0) == ' ') {
+                            buf.setCharAt(0, '\n');
+                        }
+                        out.write(buf.toString());
+                        buf.setLength(0);
+                    }
+                    out.write('\n');
+                }
+            });
+        }
+    }
 }
diff --git a/src/java/voldemort/VoldemortClientShell.java b/src/java/voldemort/VoldemortClientShell.java
index 81de718b75..93bd7d6a29 100644
--- a/src/java/voldemort/VoldemortClientShell.java
+++ b/src/java/voldemort/VoldemortClientShell.java
@@ -268,26 +268,35 @@ private static void processCommands(StoreClientFactory factory,
                         writer.flush();
                     }
                 } else if(line.startsWith("help")) {
+                    System.out.println();
                     System.out.println("Commands:");
-                    System.out.println("put key value -- Associate the given value with the key.");
-                    System.out.println("get key -- Retrieve the value associated with the key.");
-                    System.out.println("getall key -- Retrieve the value(s) associated with the key.");
-                    System.out.println("delete key -- Remove all values associated with the key.");
-                    System.out.println("preflist key -- Get node preference list for given key.");
-                    System.out.println("getmetadata node_id key -- Get metadata associated with key from node_id.");
-                    System.out.println("fetchkeys node_id store_name partitions <file_name> -- Fetch all keys from given partitions"
-                                       + " (a comma separated list) of store_name on node_id. Optionally, write to file_name.");
-                    System.out.println("fetch node_id store_name partitions <file_name> -- Fetch all entries from given partitions"
-                                       + " (a comma separated list) of store_name on node_id. Optionally, write to file_name.");
-                    System.out.println("help -- Print this message.");
-                    System.out.println("exit -- Exit from this shell.");
+                    System.out.println(PROMPT + "put key value --- Associate the given value with the key.");
+                    System.out.println(PROMPT + "get key --- Retrieve the value associated with the key.");
+                    System.out.println(PROMPT + "getall key1 [key2...] --- Retrieve the value(s) associated with the key(s).");
+                    System.out.println(PROMPT + "delete key --- Remove all values associated with the key.");
+                    System.out.println(PROMPT + "preflist key --- Get node preference list for given key.");
+                    String metaKeyValues = voldemort.store.metadata.MetadataStore.METADATA_KEYS.toString();
+                    System.out.println(PROMPT + "getmetadata node_id meta_key --- Get store metadata associated "
+                                       + "with meta_key from node_id. meta_key may be one of "
+                                       + metaKeyValues.substring(1, metaKeyValues.length() - 1)
+                                       + ".");
+                    System.out.println(PROMPT + "fetchkeys node_id store_name partitions <file_name> --- Fetch all keys "
+                                       + "from given partitions (a comma separated list) of store_name on "
+                                       + "node_id. Optionally, write to file_name. "
+                                       + "Use getmetadata to determine appropriate values for store_name and partitions");
+                    System.out.println(PROMPT + "fetch node_id store_name partitions <file_name> --- Fetch all entries "
+                                       + "from given partitions (a comma separated list) of store_name on "
+                                       + "node_id. Optionally, write to file_name. "
+                                       + "Use getmetadata to determine appropriate values for store_name and partitions");
+                    System.out.println(PROMPT + "help --- Print this message.");
+                    System.out.println(PROMPT + "exit --- Exit from this shell.");
                     System.out.println();
 
                 } else if(line.startsWith("quit") || line.startsWith("exit")) {
                     System.out.println("k k thx bye.");
                     System.exit(0);
                 } else {
-                    System.err.println("Invalid command.");
+                    System.err.println("Invalid command. (Try 'help' for usage.)");
                 }
             } catch(EndOfFileException e) {
                 System.err.println("Expected additional token.");
@@ -298,7 +307,7 @@ private static void processCommands(StoreClientFactory factory,
                 System.err.println("Exception thrown during operation.");
                 e.printStackTrace(System.err);
             } catch(ArrayIndexOutOfBoundsException e) {
-                System.err.println("Invalid command.");
+                System.err.println("Invalid command. (Try 'help' for usage.)");
             } catch(Exception e) {
                 System.err.println("Unexpected error:");
                 e.printStackTrace(System.err);
@@ -372,6 +381,12 @@ private static void printObject(Object o) {
                 System.out.print(", ");
             }
             System.out.print('}');
+        } else if(o instanceof Object[]) {
+            Object[] a = (Object[]) o;
+            System.out.print( Arrays.deepToString(a) );
+        } else if(o instanceof byte[]) {
+            byte[] a = (byte[]) o;
+            System.out.print( Arrays.toString(a) );
         } else {
             System.out.print(o);
         }
diff --git a/src/java/voldemort/client/AbstractStoreClientFactory.java b/src/java/voldemort/client/AbstractStoreClientFactory.java
index 94664d08b0..08502196cb 100644
--- a/src/java/voldemort/client/AbstractStoreClientFactory.java
+++ b/src/java/voldemort/client/AbstractStoreClientFactory.java
@@ -19,7 +19,8 @@
 import java.io.StringReader;
 import java.net.URI;
 import java.net.URISyntaxException;
-import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ExecutorService;
@@ -32,6 +33,7 @@
 import voldemort.cluster.Cluster;
 import voldemort.cluster.Node;
 import voldemort.cluster.failuredetector.FailureDetector;
+import voldemort.common.service.SchedulerService;
 import voldemort.serialization.ByteArraySerializer;
 import voldemort.serialization.IdentitySerializer;
 import voldemort.serialization.SerializationException;
@@ -57,6 +59,7 @@
 import voldemort.store.versioned.InconsistencyResolvingStore;
 import voldemort.utils.ByteArray;
 import voldemort.utils.JmxUtils;
+import voldemort.utils.SystemTime;
 import voldemort.versioning.ChainedResolver;
 import voldemort.versioning.InconsistencyResolver;
 import voldemort.versioning.TimeBasedInconsistencyResolver;
@@ -91,13 +94,18 @@ public abstract class AbstractStoreClientFactory implements StoreClientFactory {
     private final SerializerFactory serializerFactory;
     private final boolean isJmxEnabled;
     private final RequestFormatType requestFormatType;
-    private final int jmxId;
+    protected final int jmxId;
     protected volatile FailureDetector failureDetector;
     private final int maxBootstrapRetries;
     private final StoreStats stats;
     private final ClientConfig config;
     private final RoutedStoreFactory routedStoreFactory;
     private final int clientZoneId;
+    private final String clientContextName;
+    private final AtomicInteger clientSequencer;
+    private final HashSet<SchedulerService> clientAsyncServiceRepo;
+
+    private Cluster cluster;
 
     public AbstractStoreClientFactory(ClientConfig config) {
         this.config = config;
@@ -108,49 +116,105 @@ public AbstractStoreClientFactory(ClientConfig config) {
         this.bootstrapUrls = validateUrls(config.getBootstrapUrls());
         this.isJmxEnabled = config.isJmxEnabled();
         this.requestFormatType = config.getRequestFormatType();
-        this.jmxId = jmxIdCounter.getAndIncrement();
+        this.jmxId = getNextJmxId();
         this.maxBootstrapRetries = config.getMaxBootstrapRetries();
         this.stats = new StoreStats();
         this.clientZoneId = config.getClientZoneId();
+        this.clientContextName = config.getClientContextName();
         this.routedStoreFactory = new RoutedStoreFactory(config.isPipelineRoutedStoreEnabled(),
                                                          threadPool,
-                                                         config.getRoutingTimeout(TimeUnit.MILLISECONDS));
+                                                         config.getTimeoutConfig());
+
+        this.clientSequencer = new AtomicInteger(0);
+        this.clientAsyncServiceRepo = new HashSet<SchedulerService>();
 
         if(this.isJmxEnabled) {
             JmxUtils.registerMbean(threadPool,
                                    JmxUtils.createObjectName(JmxUtils.getPackageName(threadPool.getClass()),
                                                              JmxUtils.getClassName(threadPool.getClass())
-                                                                     + jmxId()));
+                                                                     + JmxUtils.getJmxId(jmxId)));
             JmxUtils.registerMbean(new StoreStatsJmx(stats),
                                    JmxUtils.createObjectName("voldemort.store.stats.aggregate",
-                                                             "aggregate-perf" + jmxId()));
+                                                             "aggregate-perf"
+                                                                     + JmxUtils.getJmxId(jmxId)));
         }
     }
 
+    public int getNextJmxId() {
+        return jmxIdCounter.getAndIncrement();
+    }
+
+    public int getCurrentJmxId() {
+        return jmxIdCounter.get();
+    }
+
     public <K, V> StoreClient<K, V> getStoreClient(String storeName) {
         return getStoreClient(storeName, null);
     }
 
     public <K, V> StoreClient<K, V> getStoreClient(String storeName,
                                                    InconsistencyResolver<Versioned<V>> resolver) {
-        return new DefaultStoreClient<K, V>(storeName, resolver, this, 3);
+
+        StoreClient<K, V> client = null;
+        if(this.config.isDefaultClientEnabled()) {
+            client = new DefaultStoreClient<K, V>(storeName, resolver, this, 3);
+        } else if(this.bootstrapUrls.length > 0
+                  && this.bootstrapUrls[0].getScheme().equals(HttpStoreClientFactory.URL_SCHEME)) {
+            client = new DefaultStoreClient<K, V>(storeName, resolver, this, 3);
+        } else {
+
+            SchedulerService service = new SchedulerService(config.getAsyncJobThreadPoolSize(),
+                                                            SystemTime.INSTANCE,
+                                                            true);
+            clientAsyncServiceRepo.add(service);
+
+            client = new ZenStoreClient<K, V>(storeName,
+                                              resolver,
+                                              this,
+                                              3,
+                                              clientContextName,
+                                              clientSequencer.getAndIncrement(),
+                                              config,
+                                              service);
+        }
+
+        return client;
     }
 
     @SuppressWarnings("unchecked")
     public <K, V, T> Store<K, V, T> getRawStore(String storeName,
                                                 InconsistencyResolver<Versioned<V>> resolver) {
+        return getRawStore(storeName, resolver, null, null, null);
+    }
+
+    @SuppressWarnings("unchecked")
+    public <K, V, T> Store<K, V, T> getRawStore(String storeName,
+                                                InconsistencyResolver<Versioned<V>> resolver,
+                                                String customStoresXml,
+                                                String clusterXmlString,
+                                                FailureDetector fd) {
 
         logger.info("Client zone-id [" + clientZoneId
                     + "] Attempting to obtain metadata for store [" + storeName + "] ");
+
         if(logger.isDebugEnabled()) {
             for(URI uri: bootstrapUrls) {
                 logger.debug("Client Bootstrap url [" + uri + "]");
             }
         }
         // Get cluster and store metadata
-        String clusterXml = bootstrapMetadataWithRetries(MetadataStore.CLUSTER_KEY, bootstrapUrls);
-        Cluster cluster = clusterMapper.readCluster(new StringReader(clusterXml), false);
-        String storesXml = bootstrapMetadataWithRetries(MetadataStore.STORES_KEY, bootstrapUrls);
+        String clusterXml = clusterXmlString;
+        if(clusterXml == null) {
+            logger.debug("Fetching cluster.xml ...");
+            clusterXml = bootstrapMetadataWithRetries(MetadataStore.CLUSTER_KEY, bootstrapUrls);
+        }
+
+        this.cluster = clusterMapper.readCluster(new StringReader(clusterXml), false);
+        String storesXml = customStoresXml;
+        if(storesXml == null) {
+            logger.debug("Fetching stores.xml ...");
+            storesXml = bootstrapMetadataWithRetries(MetadataStore.STORES_KEY, bootstrapUrls);
+        }
 
         if(logger.isDebugEnabled()) {
             logger.debug("Obtained cluster metadata xml" + clusterXml);
@@ -163,11 +227,13 @@ public <K, V, T> Store<K, V, T> getRawStore(String storeName,
         for(StoreDefinition d: storeDefs)
             if(d.getName().equals(storeName))
                 storeDef = d;
-        if(storeDef == null)
+        if(storeDef == null) {
+            logger.error("Bootstrap - unknown store: " + storeName);
             throw new BootstrapFailureException("Unknown store '" + storeName + "'.");
+        }
 
         if(logger.isDebugEnabled()) {
-            logger.debug(cluster.toString(true));
+            logger.debug(this.cluster.toString(true));
             logger.debug(storeDef.toString());
         }
         boolean repairReads = !storeDef.isView();
@@ -181,7 +247,7 @@ public <K, V, T> Store<K, V, T> getRawStore(String storeName,
         if(storeDef.hasHintedHandoffStrategyType())
             slopStores = Maps.newHashMap();
 
-        for(Node node: cluster.getNodes()) {
+        for(Node node: this.cluster.getNodes()) {
             Store<ByteArray, byte[], byte[]> store = getStore(storeDef.getName(),
                                                               node.getHost(),
                                                               getPort(node),
@@ -206,7 +272,18 @@ public <K, V, T> Store<K, V, T> getRawStore(String storeName,
             }
         }
 
-        Store<ByteArray, byte[], byte[]> store = routedStoreFactory.create(cluster,
+        /*
+         * Check if we need to retrieve a reference to the failure detector. For
+         * system stores - the FD reference would be passed in.
+         */
+        FailureDetector failureDetectorRef = fd;
+        if(failureDetectorRef == null) {
+            failureDetectorRef = getFailureDetector();
+        } else {
+            logger.debug("Using existing failure detector.");
+        }
+
+        Store<ByteArray, byte[], byte[]> store = routedStoreFactory.create(this.cluster,
                                                                            storeDef,
                                                                            clientMapping,
                                                                            nonblockingStores,
@@ -214,7 +291,9 @@ public <K, V, T> Store<K, V, T> getRawStore(String storeName,
                                                                            nonblockingSlopStores,
                                                                            repairReads,
                                                                            clientZoneId,
-                                                                           getFailureDetector());
+                                                                           failureDetectorRef,
+                                                                           isJmxEnabled,
+                                                                           this.jmxId);
         store = new LoggingStore(store);
 
         if(isJmxEnabled) {
@@ -222,7 +301,8 @@ public <K, V, T> Store<K, V, T> getRawStore(String storeName,
             store = statStore;
             JmxUtils.registerMbean(new StoreStatsJmx(statStore.getStats()),
                                    JmxUtils.createObjectName(JmxUtils.getPackageName(store.getClass()),
-                                                             store.getName() + jmxId()));
+                                                             store.getName()
+                                                                     + JmxUtils.getJmxId(jmxId)));
         }
 
         if(storeDef.getKeySerializer().hasCompression()
@@ -261,20 +341,43 @@ protected ClientConfig getConfig() {
     }
 
     protected abstract FailureDetector initFailureDetector(final ClientConfig config,
-                                                           final Collection<Node> nodes);
+                                                           Cluster cluster);
 
     public FailureDetector getFailureDetector() {
+        if(this.cluster == null) {
+            logger.info("Cluster is null ! Getting cluster.xml again for setting up FailureDetector.");
+            String clusterXml = bootstrapMetadataWithRetries(MetadataStore.CLUSTER_KEY,
+                                                             bootstrapUrls);
+            this.cluster = clusterMapper.readCluster(new StringReader(clusterXml), false);
+        }
+
         // first check: avoids locking as the field is volatile
         FailureDetector result = failureDetector;
+
         if(result == null) {
-            String clusterXml = bootstrapMetadataWithRetries(MetadataStore.CLUSTER_KEY,
-                                                             bootstrapUrls);
-            Cluster cluster = clusterMapper.readCluster(new StringReader(clusterXml), false);
             synchronized(this) {
                 // second check: avoids double initialization
                 result = failureDetector;
-                if(result == null)
-                    failureDetector = result = initFailureDetector(config, cluster.getNodes());
+                if(result == null) {
+                    logger.info("Failure detector is null. Creating a new FD.");
+                    failureDetector = result = initFailureDetector(config, this.cluster);
+                    if(isJmxEnabled) {
+                        JmxUtils.registerMbean(failureDetector,
+                                               JmxUtils.createObjectName(JmxUtils.getPackageName(failureDetector.getClass()),
+                                                                         JmxUtils.getClassName(failureDetector.getClass())
+                                                                                 + JmxUtils.getJmxId(jmxId)));
+                    }
+                }
+            }
+        } else {
+
+            /*
+             * The existing failure detector might have an old state
+             */
+            logger.info("Failure detector already exists. Updating the state and flushing cached verifier stores.");
+            synchronized(this) {
+                failureDetector.getConfig().setCluster(this.cluster);
+                failureDetector.getConfig().getStoreVerifier().flushCachedStores();
             }
         }
 
@@ -304,7 +407,7 @@ public String bootstrapMetadataWithRetries(String key, URI[] urls) {
             }
         }
 
-        throw new BootstrapFailureException("No available boostrap servers found!");
+        throw new BootstrapFailureException("No available bootstrap servers found!");
     }
 
     public String bootstrapMetadataWithRetries(String key) {
@@ -394,13 +497,33 @@ public void close() {
             this.threadPool.shutdownNow();
         }
 
-        if(failureDetector != null)
+        if(failureDetector != null) {
             failureDetector.destroy();
+
+            if(isJmxEnabled) {
+                JmxUtils.unregisterMbean(JmxUtils.createObjectName(JmxUtils.getPackageName(failureDetector.getClass()),
+                                                                   JmxUtils.getClassName(failureDetector.getClass())
+                                                                           + JmxUtils.getJmxId(jmxId)));
+                JmxUtils.unregisterMbean(JmxUtils.createObjectName(JmxUtils.getPackageName(threadPool.getClass()),
+                                                                   JmxUtils.getClassName(threadPool.getClass())
+                                                                           + JmxUtils.getJmxId(jmxId)));
+                JmxUtils.unregisterMbean(JmxUtils.createObjectName("voldemort.store.stats.aggregate",
+                                                                   "aggregate-perf"
+                                                                           + JmxUtils.getJmxId(jmxId)));
+            }
+        }
+        stopClientAsyncSchedulers();
     }
 
-    /* Give a unique id to avoid jmx clashes */
-    private String jmxId() {
-        return jmxId == 0 ? "" : Integer.toString(jmxId);
+    private void stopClientAsyncSchedulers() {
+        Iterator<SchedulerService> it = clientAsyncServiceRepo.iterator();
+        while(it.hasNext()) {
+            it.next().stop();
+        }
+        clientAsyncServiceRepo.clear();
     }
 
+    protected String getClientContext() {
+        return clientContextName;
+    }
 }
diff --git a/src/java/voldemort/client/CachingStoreClientFactory.java b/src/java/voldemort/client/CachingStoreClientFactory.java
index 993ff7c3a5..3d26636b33 100644
--- a/src/java/voldemort/client/CachingStoreClientFactory.java
+++ b/src/java/voldemort/client/CachingStoreClientFactory.java
@@ -1,12 +1,12 @@
 /*
  * Copyright 2008-2010 LinkedIn, Inc
- *
+ * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
  * the License at
- *
+ * 
  * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * 
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@@ -16,8 +16,12 @@
 
 package voldemort.client;
 
-import com.google.common.collect.ImmutableList;
+import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
 import org.apache.log4j.Logger;
+
 import voldemort.annotations.jmx.JmxManaged;
 import voldemort.annotations.jmx.JmxOperation;
 import voldemort.cluster.failuredetector.FailureDetector;
@@ -26,14 +30,12 @@
 import voldemort.versioning.InconsistencyResolver;
 import voldemort.versioning.Versioned;
 
-import java.util.List;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentMap;
+import com.google.common.collect.ImmutableList;
 
 /**
- * A wrapper for a store {@link StoreClientFactory} which caches requests
- * to <code>getStoreClient</code>
- *
+ * A wrapper for a store {@link StoreClientFactory} which caches requests to
+ * <code>getStoreClient</code>
+ * 
  */
 @JmxManaged(description = "A StoreClientFactory which caches clients")
 public class CachingStoreClientFactory implements StoreClientFactory {
@@ -48,7 +50,6 @@ public CachingStoreClientFactory(StoreClientFactory inner) {
         this.cache = new ConcurrentHashMap<Pair<String, Object>, StoreClient<?, ?>>();
     }
 
-
     @SuppressWarnings("unchecked")
     public <K, V> StoreClient<K, V> getStoreClient(String storeName) {
         Pair<String, Object> key = Pair.create(storeName, null);
@@ -74,7 +75,7 @@ public <K, V> StoreClient<K, V> getStoreClient(String storeName,
 
     public <K, V, T> Store<K, V, T> getRawStore(String storeName,
                                                 InconsistencyResolver<Versioned<V>> resolver) {
-        return inner.getRawStore(storeName, resolver);
+        return getRawStore(storeName, resolver);
     }
 
     public void close() {
diff --git a/src/java/voldemort/client/ClientConfig.java b/src/java/voldemort/client/ClientConfig.java
index f74f31c04d..66a3ec6d4d 100644
--- a/src/java/voldemort/client/ClientConfig.java
+++ b/src/java/voldemort/client/ClientConfig.java
@@ -31,6 +31,7 @@
 import voldemort.client.protocol.RequestFormatType;
 import voldemort.cluster.Zone;
 import voldemort.cluster.failuredetector.FailureDetectorConfig;
+import voldemort.common.VoldemortOpCode;
 import voldemort.serialization.DefaultSerializerFactory;
 import voldemort.serialization.SerializerFactory;
 import voldemort.utils.ConfigurationException;
@@ -55,6 +56,7 @@ public class ClientConfig {
     private volatile boolean socketKeepAlive = false;
     private volatile int selectors = 8;
     private volatile long routingTimeoutMs = 15000;
+    private volatile TimeoutConfig timeoutConfig = new TimeoutConfig(routingTimeoutMs, false);
     private volatile int socketBufferSize = 64 * 1024;
     private volatile SerializerFactory serializerFactory = new DefaultSerializerFactory();
     private volatile List<String> bootstrapUrls = null;
@@ -66,6 +68,9 @@ public class ClientConfig {
     private volatile boolean enablePipelineRoutedStore = true;
     private volatile int clientZoneId = Zone.DEFAULT_ZONE_ID;
 
+    // Flag to control which store client to use. Default = Enhanced
+    private volatile boolean useDefaultClient = false;
+
     private volatile String failureDetectorImplementation = FailureDetectorConfig.DEFAULT_IMPLEMENTATION_CLASS_NAME;
     private volatile long failureDetectorBannagePeriod = FailureDetectorConfig.DEFAULT_BANNAGE_PERIOD;
     private volatile int failureDetectorThreshold = FailureDetectorConfig.DEFAULT_THRESHOLD;
@@ -76,6 +81,21 @@ public class ClientConfig {
     private long failureDetectorRequestLengthThreshold = socketTimeoutMs;
 
     private volatile int maxBootstrapRetries = 2;
+    private volatile String clientContextName = "";
+
+    /* 5 second check interval, in ms */
+    private volatile long asyncCheckMetadataIntervalInMs = 5000;
+    /* 12 hr refresh internval, in seconds */
+    private volatile int clientRegistryRefreshIntervalInSecs = 3600 * 12;
+    private volatile int asyncJobThreadPoolSize = 2;
+
+    /* SystemStore client config */
+    private volatile int sysMaxConnectionsPerNode = 2;
+    private volatile int sysRoutingTimeout = 5000;
+    private volatile int sysSocketTimeout = 5000;
+    private volatile int sysConnectionTimeout = 1500;
+    private volatile boolean sysEnableJmx = false;
+    private volatile boolean sysEnablePipelineRoutedStore = true;
 
     public ClientConfig() {}
 
@@ -91,6 +111,12 @@ public ClientConfig() {}
     public static final String SOCKET_KEEPALIVE_PROPERTY = "socket_keepalive";
     public static final String SELECTORS_PROPERTY = "selectors";
     public static final String ROUTING_TIMEOUT_MS_PROPERTY = "routing_timeout_ms";
+    public static final String GETALL_ROUTING_TIMEOUT_MS_PROPERTY = "getall_routing_timeout_ms";
+    public static final String PUT_ROUTING_TIMEOUT_MS_PROPERTY = "put_routing_timeout_ms";
+    public static final String GET_ROUTING_TIMEOUT_MS_PROPERTY = "get_routing_timeout_ms";
+    public static final String GET_VERSIONS_ROUTING_TIMEOUT_MS_PROPERTY = "getversions_routing_timeout_ms";
+    public static final String DELETE_ROUTING_TIMEOUT_MS_PROPERTY = "delete_routing_timeout_ms";
+    public static final String ALLOW_PARTIAL_GETALLS_PROPERTY = "allow_partial_getalls";
     public static final String NODE_BANNAGE_MS_PROPERTY = "node_bannage_ms";
     public static final String SOCKET_BUFFER_SIZE_PROPERTY = "socket_buffer_size";
     public static final String SERIALIZER_FACTORY_CLASS_PROPERTY = "serializer_factory_class";
@@ -110,6 +136,17 @@ public ClientConfig() {}
     public static final String FAILUREDETECTOR_CATASTROPHIC_ERROR_TYPES_PROPERTY = "failuredetector_catastrophic_error_types";
     public static final String FAILUREDETECTOR_REQUEST_LENGTH_THRESHOLD_PROPERTY = "failuredetector_request_length_threshold";
     public static final String MAX_BOOTSTRAP_RETRIES = "max_bootstrap_retries";
+    public static final String CLIENT_CONTEXT_NAME = "voldemort_client_context_name";
+    public static final String ASYNC_CHECK_METADATA_INTERVAL = "check_metadata_interval_ms";
+    public static final String USE_DEFAULT_CLIENT = "use_default_client";
+    public static final String CLIENT_REGISTRY_REFRESH_INTERVAL = "client_registry_refresh_interval_seconds";
+    public static final String ASYNC_JOB_THREAD_POOL_SIZE = "async_job_thread_pool_size";
+    public static final String SYS_MAX_CONNECTIONS_PER_NODE = "sys_max_connections_per_node";
+    public static final String SYS_ROUTING_TIMEOUT_MS = "sys_routing_timeout_ms";
+    public static final String SYS_CONNECTION_TIMEOUT_MS = "sys_connection_timeout_ms";
+    public static final String SYS_SOCKET_TIMEOUT_MS = "sys_socket_timeout_ms";
+    public static final String SYS_ENABLE_JMX = "sys_enable_jmx";
+    public static final String SYS_ENABLE_PIPELINE_ROUTED_STORE = "sys_enable_pipeline_routed_store";
 
     /**
      * Instantiate the client config using a properties file
@@ -174,6 +211,36 @@ private void setProperties(Properties properties) {
         if(props.containsKey(ROUTING_TIMEOUT_MS_PROPERTY))
             this.setRoutingTimeout(props.getInt(ROUTING_TIMEOUT_MS_PROPERTY), TimeUnit.MILLISECONDS);
 
+        // By default, make all the timeouts equal to routing timeout
+        timeoutConfig = new TimeoutConfig(routingTimeoutMs, false);
+
+        if(props.containsKey(GETALL_ROUTING_TIMEOUT_MS_PROPERTY))
+            timeoutConfig.setOperationTimeout(VoldemortOpCode.GET_ALL_OP_CODE,
+                                              props.getInt(GETALL_ROUTING_TIMEOUT_MS_PROPERTY));
+
+        if(props.containsKey(GET_ROUTING_TIMEOUT_MS_PROPERTY))
+            timeoutConfig.setOperationTimeout(VoldemortOpCode.GET_OP_CODE,
+                                              props.getInt(GET_ROUTING_TIMEOUT_MS_PROPERTY));
+
+        if(props.containsKey(PUT_ROUTING_TIMEOUT_MS_PROPERTY)) {
+            long putTimeoutMs = props.getInt(PUT_ROUTING_TIMEOUT_MS_PROPERTY);
+            timeoutConfig.setOperationTimeout(VoldemortOpCode.PUT_OP_CODE, putTimeoutMs);
+            // By default, use the same thing for getVersions() also
+            timeoutConfig.setOperationTimeout(VoldemortOpCode.GET_VERSION_OP_CODE, putTimeoutMs);
+        }
+
+        // of course, if someone overrides it, we will respect that
+        if(props.containsKey(GET_VERSIONS_ROUTING_TIMEOUT_MS_PROPERTY))
+            timeoutConfig.setOperationTimeout(VoldemortOpCode.GET_VERSION_OP_CODE,
+                                              props.getInt(GET_VERSIONS_ROUTING_TIMEOUT_MS_PROPERTY));
+
+        if(props.containsKey(DELETE_ROUTING_TIMEOUT_MS_PROPERTY))
+            timeoutConfig.setOperationTimeout(VoldemortOpCode.DELETE_OP_CODE,
+                                              props.getInt(DELETE_ROUTING_TIMEOUT_MS_PROPERTY));
+
+        if(props.containsKey(ALLOW_PARTIAL_GETALLS_PROPERTY))
+            timeoutConfig.setPartialGetAllAllowed(props.getBoolean(ALLOW_PARTIAL_GETALLS_PROPERTY));
+
         if(props.containsKey(SOCKET_BUFFER_SIZE_PROPERTY))
             this.setSocketBufferSize(props.getInt(SOCKET_BUFFER_SIZE_PROPERTY));
 
@@ -202,6 +269,9 @@ private void setProperties(Properties properties) {
         if(props.containsKey(CLIENT_ZONE_ID))
             this.setClientZoneId(props.getInt(CLIENT_ZONE_ID));
 
+        if(props.containsKey(USE_DEFAULT_CLIENT))
+            this.enableDefaultClient(props.getBoolean(USE_DEFAULT_CLIENT));
+
         if(props.containsKey(FAILUREDETECTOR_IMPLEMENTATION_PROPERTY))
             this.setFailureDetectorImplementation(props.getString(FAILUREDETECTOR_IMPLEMENTATION_PROPERTY));
 
@@ -238,6 +308,110 @@ private void setProperties(Properties properties) {
 
         if(props.containsKey(MAX_BOOTSTRAP_RETRIES))
             this.setMaxBootstrapRetries(props.getInt(MAX_BOOTSTRAP_RETRIES));
+
+        if(props.containsKey(CLIENT_CONTEXT_NAME)) {
+            this.setClientContextName(props.getString(CLIENT_CONTEXT_NAME));
+        }
+
+        if(props.containsKey(ASYNC_CHECK_METADATA_INTERVAL)) {
+            this.setAsyncMetadataRefreshInMs(props.getLong(ASYNC_CHECK_METADATA_INTERVAL));
+        }
+
+        if(props.containsKey(CLIENT_REGISTRY_REFRESH_INTERVAL)) {
+            this.setClientRegistryUpdateIntervalInSecs(props.getInt(CLIENT_REGISTRY_REFRESH_INTERVAL));
+        }
+
+        if(props.containsKey(ASYNC_JOB_THREAD_POOL_SIZE)) {
+            this.setAsyncJobThreadPoolSize(props.getInt(ASYNC_JOB_THREAD_POOL_SIZE));
+        }
+
+        /* Check for system store paramaters if any */
+        if(props.containsKey(SYS_MAX_CONNECTIONS_PER_NODE)) {
+            this.setSysMaxConnectionsPerNode(props.getInt(SYS_MAX_CONNECTIONS_PER_NODE));
+        }
+
+        if(props.containsKey(SYS_ROUTING_TIMEOUT_MS)) {
+            this.setSysRoutingTimeout(props.getInt(SYS_ROUTING_TIMEOUT_MS));
+        }
+
+        if(props.containsKey(SYS_SOCKET_TIMEOUT_MS)) {
+            this.setSysSocketTimeout(props.getInt(SYS_SOCKET_TIMEOUT_MS));
+        }
+
+        if(props.containsKey(SYS_CONNECTION_TIMEOUT_MS)) {
+            this.setSysConnectionTimeout(props.getInt(SYS_CONNECTION_TIMEOUT_MS));
+        }
+
+        if(props.containsKey(SYS_ENABLE_JMX)) {
+            this.setSysEnableJmx(props.getBoolean(SYS_ENABLE_JMX));
+        }
+
+        if(props.containsKey(SYS_ENABLE_PIPELINE_ROUTED_STORE)) {
+            this.setSysEnablePipelineRoutedStore(props.getBoolean(SYS_ENABLE_PIPELINE_ROUTED_STORE));
+        }
+
+    }
+
+    private ClientConfig setSysMaxConnectionsPerNode(int maxConnectionsPerNode) {
+        if(maxConnectionsPerNode <= 0)
+            throw new IllegalArgumentException("Value must be greater than zero.");
+        this.sysMaxConnectionsPerNode = maxConnectionsPerNode;
+        return this;
+    }
+
+    public int getSysMaxConnectionsPerNode() {
+        return this.sysMaxConnectionsPerNode;
+    }
+
+    private ClientConfig setSysRoutingTimeout(int sysRoutingTimeout) {
+        if(sysRoutingTimeout <= 0)
+            throw new IllegalArgumentException("Value must be greater than zero.");
+        this.sysRoutingTimeout = sysRoutingTimeout;
+        return this;
+    }
+
+    public int getSysRoutingTimeout() {
+        return this.sysRoutingTimeout;
+    }
+
+    private ClientConfig setSysSocketTimeout(int sysSocketTimeout) {
+        if(sysSocketTimeout <= 0)
+            throw new IllegalArgumentException("Value must be greater than zero.");
+        this.sysSocketTimeout = sysSocketTimeout;
+        return this;
+    }
+
+    public int getSysSocketTimeout() {
+        return this.sysSocketTimeout;
+    }
+
+    private ClientConfig setSysConnectionTimeout(int sysConnectionTimeout) {
+        if(sysConnectionTimeout <= 0)
+            throw new IllegalArgumentException("Value must be greater than zero.");
+        this.sysConnectionTimeout = sysConnectionTimeout;
+        return this;
+    }
+
+    public int getSysConnectionTimeout() {
+        return this.sysConnectionTimeout;
+    }
+
+    public boolean getSysEnableJmx() {
+        return this.sysEnableJmx;
+    }
+
+    public ClientConfig setSysEnableJmx(boolean sysEnableJmx) {
+        this.sysEnableJmx = sysEnableJmx;
+        return this;
+    }
+
+    public boolean getSysEnablePipelineRoutedStore() {
+        return this.sysEnablePipelineRoutedStore;
+    }
+
+    public ClientConfig setSysEnablePipelineRoutedStore(boolean sysEnablePipelineRoutedStore) {
+        this.sysEnablePipelineRoutedStore = sysEnablePipelineRoutedStore;
+        return this;
     }
 
     public int getMaxConnectionsPerNode() {
@@ -324,6 +498,26 @@ public ClientConfig setRoutingTimeout(int routingTimeout, TimeUnit unit) {
         return this;
     }
 
+    /**
+     * Set the timeout configuration for the voldemort operations
+     * 
+     * @param tConfig
+     * @return
+     */
+    public ClientConfig setTimeoutConfig(TimeoutConfig tConfig) {
+        this.timeoutConfig = tConfig;
+        return this;
+    }
+
+    /**
+     * Get the timeouts for voldemort operations
+     * 
+     * @return
+     */
+    public TimeoutConfig getTimeoutConfig() {
+        return timeoutConfig;
+    }
+
     /**
      * @deprecated Use {@link #getFailureDetectorBannagePeriod()} instead
      */
@@ -517,7 +711,7 @@ public boolean isLazyEnabled() {
 
     /**
      * Enable lazy initialization of clients?
-     *
+     * 
      * @param enableLazy If true clients will be lazily initialized
      */
     public ClientConfig setEnableLazy(boolean enableLazy) {
@@ -534,6 +728,15 @@ public int getClientZoneId() {
         return this.clientZoneId;
     }
 
+    public ClientConfig enableDefaultClient(boolean enableDefault) {
+        this.useDefaultClient = enableDefault;
+        return this;
+    }
+
+    public boolean isDefaultClientEnabled() {
+        return this.useDefaultClient;
+    }
+
     public boolean isPipelineRoutedStoreEnabled() {
         return enablePipelineRoutedStore;
     }
@@ -634,4 +837,62 @@ public ClientConfig setMaxBootstrapRetries(int maxBootstrapRetries) {
         return this;
     }
 
+    public String getClientContextName() {
+        return clientContextName;
+    }
+
+    /**
+     * Set the client context name
+     * 
+     * @param clientContextName The name of client context
+     */
+    public ClientConfig setClientContextName(String clientContextName) {
+        this.clientContextName = clientContextName;
+        return this;
+    }
+
+    public long getAsyncMetadataRefreshInMs() {
+        return asyncCheckMetadataIntervalInMs;
+    }
+
+    /**
+     * Set the interval on which client checks for metadata change on servers
+     * 
+     * @param asyncCheckMetadataInterval The metadata change interval
+     */
+    public ClientConfig setAsyncMetadataRefreshInMs(long asyncCheckMetadataInterval) {
+
+        this.asyncCheckMetadataIntervalInMs = asyncCheckMetadataInterval;
+        return this;
+    }
+
+    public int getClientRegistryUpdateIntervalInSecs() {
+        return this.clientRegistryRefreshIntervalInSecs;
+    }
+
+    /**
+     * Set the interval on which client refreshes its corresponding entry of the
+     * client registry on the servers
+     * 
+     * @param clientRegistryRefreshIntervalInSecs The refresh interval in
+     *        seconds
+     */
+    public ClientConfig setClientRegistryUpdateIntervalInSecs(int clientRegistryRefrshInterval) {
+        this.clientRegistryRefreshIntervalInSecs = clientRegistryRefrshInterval;
+        return this;
+    }
+
+    public int getAsyncJobThreadPoolSize() {
+        return asyncJobThreadPoolSize;
+    }
+
+    /**
+     * Set the # of threads for the async. job thread pool
+     * 
+     * @param asyncJobThreadPoolSize The max # of threads in the async job
+     */
+    public ClientConfig setAsyncJobThreadPoolSize(int asyncJobThreadPoolSize) {
+        this.asyncJobThreadPoolSize = asyncJobThreadPoolSize;
+        return this;
+    }
 }
diff --git a/src/java/voldemort/client/ClientInfo.java b/src/java/voldemort/client/ClientInfo.java
new file mode 100644
index 0000000000..f84e1e0c77
--- /dev/null
+++ b/src/java/voldemort/client/ClientInfo.java
@@ -0,0 +1,244 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.client;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Serializable;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.log4j.Logger;
+
+/**
+ * A collection of voldemort client side information what will be populated into
+ * the voldemort cluster when a client is connected to a voldemort cluster
+ * 
+ */
+public class ClientInfo implements Serializable {
+
+    private static final long serialVersionUID = 1L;
+
+    protected static final Logger logger = Logger.getLogger(ClientInfo.class);
+
+    private long bootstrapTimestampMs;
+    private String storeName;
+    private String context;
+    private int sequence;
+    private String localHostName;
+    private String deploymentPath;
+    private long updateTimestampMs;
+    private String releaseVersion;
+    private ClientConfig config;
+    private long clusterMetadataVersion;
+
+    public ClientInfo(String storeName,
+                      String clientContext,
+                      int clientSequence,
+                      long bootstrapTime,
+                      String version,
+                      ClientConfig config) {
+        this.bootstrapTimestampMs = bootstrapTime;
+        this.storeName = storeName;
+        this.context = clientContext;
+        this.sequence = clientSequence;
+        this.localHostName = createHostName();
+        this.deploymentPath = createDeploymentPath();
+        this.updateTimestampMs = bootstrapTime;
+        this.releaseVersion = version;
+        this.config = config;
+        this.clusterMetadataVersion = 0;
+
+        if(logger.isDebugEnabled()) {
+            logger.debug(this.toString());
+        }
+    }
+
+    private synchronized String createDeploymentPath() {
+        String currentPath = null;
+        try {
+            currentPath = new File(".").getCanonicalPath();
+        } catch(IOException e) {
+            logger.warn("Unable to obtain client deployment path due to the following error:");
+            logger.warn(e.getMessage());
+        }
+        return currentPath;
+    }
+
+    private synchronized String createHostName() {
+        String hostName = null;
+        try {
+            InetAddress host = InetAddress.getLocalHost();
+            hostName = host.getHostName();
+        } catch(UnknownHostException e) {
+            logger.warn("Unable to obtain client hostname due to the following error:");
+            logger.warn(e.getMessage());
+        }
+        return hostName;
+    }
+
+    public synchronized void setStoreName(String storeName) {
+        this.storeName = storeName;
+    }
+
+    public synchronized String getStoreName() {
+        return storeName;
+    }
+
+    public synchronized void setBootstrapTime(long bootstrapTime) {
+        this.bootstrapTimestampMs = bootstrapTime;
+    }
+
+    public synchronized long getBootstrapTime() {
+        return bootstrapTimestampMs;
+    }
+
+    public synchronized void setContext(String clientContext) {
+        this.context = clientContext;
+    }
+
+    public synchronized String getContext() {
+        return context;
+    }
+
+    public synchronized void setClientSequence(int clientSequence) {
+        this.sequence = clientSequence;
+    }
+
+    public synchronized int getClientSequence() {
+        return sequence;
+    }
+
+    public synchronized void setDeploymentPath(String deploymentPath) {
+        this.deploymentPath = deploymentPath;
+    }
+
+    public synchronized String getDeploymentPath() {
+        return deploymentPath;
+    }
+
+    public synchronized void setLocalHostName(String localHostName) {
+        this.localHostName = localHostName;
+    }
+
+    public synchronized String getLocalHostName() {
+        return localHostName;
+    }
+
+    public synchronized void setUpdateTime(long updateTime) {
+        this.updateTimestampMs = updateTime;
+    }
+
+    public synchronized long getUpdateTime() {
+        return this.updateTimestampMs;
+    }
+
+    public synchronized void setReleaseVersion(String version) {
+        this.releaseVersion = version;
+    }
+
+    public synchronized String getReleaseVersion() {
+        return this.releaseVersion;
+    }
+
+    public synchronized ClientConfig getClientConfig() {
+        return this.config;
+    }
+
+    public synchronized void setClusterMetadataVersion(long newVersion) {
+        this.clusterMetadataVersion = newVersion;
+    }
+
+    /**
+     * At the moment we're not checking if the Config objects are similar. TODO:
+     * reevaluate in the future.
+     */
+    @Override
+    public boolean equals(Object object) {
+        if(this == object)
+            return true;
+        if(object == null)
+            return false;
+        if(!object.getClass().equals(ClientInfo.class))
+            return false;
+        ClientInfo clientInfo = (ClientInfo) object;
+        return (this.bootstrapTimestampMs == clientInfo.bootstrapTimestampMs)
+               && (this.context.equals(clientInfo.context))
+               && (this.deploymentPath.equals(clientInfo.deploymentPath))
+               && (this.localHostName.equals(clientInfo.localHostName))
+               && (this.sequence == clientInfo.sequence)
+               && (this.storeName.equals(clientInfo.storeName))
+               && (this.updateTimestampMs == clientInfo.updateTimestampMs)
+               && (this.releaseVersion == clientInfo.releaseVersion);
+    }
+
+    @Override
+    public synchronized String toString() {
+        StringBuilder builder = new StringBuilder();
+        builder.append("bootstrapTime=").append(bootstrapTimestampMs).append("\n");
+        builder.append("context=").append(context).append("\n");
+        builder.append("deploymentPath=").append(deploymentPath).append("\n");
+        builder.append("localHostName=").append(localHostName).append("\n");
+        builder.append("sequence=").append(sequence).append("\n");
+        builder.append("storeName=").append(storeName).append("\n");
+        builder.append("updateTime=").append(updateTimestampMs).append("\n");
+        builder.append("releaseVersion=").append(releaseVersion).append("\n");
+        builder.append("clusterMetadataVersion=").append(clusterMetadataVersion).append("\n");
+
+        /**
+         * Append the Client Config information. Right now we only track the
+         * following fields max_connections, max_total_connections,
+         * connection_timeout_ms, socket_timeout_ms, routing_timeout_ms,
+         * client_zone_id, failuredetector_implementation
+         * 
+         */
+        builder.append("max_connections=")
+               .append(this.config.getMaxConnectionsPerNode())
+               .append("\n");
+        builder.append("max_total_connections=")
+               .append(this.config.getMaxTotalConnections())
+               .append("\n");
+        builder.append("connection_timeout_ms=")
+               .append(this.config.getConnectionTimeout(TimeUnit.MILLISECONDS))
+               .append("\n");
+        builder.append("socket_timeout_ms=")
+               .append(this.config.getSocketTimeout(TimeUnit.MILLISECONDS))
+               .append("\n");
+        builder.append("routing_timeout_ms=")
+               .append(this.config.getRoutingTimeout(TimeUnit.MILLISECONDS))
+               .append("\n");
+        builder.append("client_zone_id=").append(this.config.getClientZoneId()).append("\n");
+        builder.append("failuredetector_implementation=")
+               .append(this.config.getFailureDetectorImplementation())
+               .append("\n");
+        builder.append("failuredetector_threshold=")
+               .append(this.config.getFailureDetectorThreshold())
+               .append("\n");
+        builder.append("failuredetector_threshold_count_minimum=")
+               .append(this.config.getFailureDetectorThresholdCountMinimum())
+               .append("\n");
+        builder.append("failuredetector_threshold_interval=")
+               .append(this.config.getFailureDetectorThresholdInterval())
+               .append("\n");
+        builder.append("failuredetector_threshold_async_recovery_interval=")
+               .append(this.config.getFailureDetectorAsyncRecoveryInterval())
+               .append("\n");
+
+        return builder.toString();
+    }
+}
diff --git a/src/java/voldemort/client/DefaultStoreClient.java b/src/java/voldemort/client/DefaultStoreClient.java
index 832339b243..efd71ba888 100644
--- a/src/java/voldemort/client/DefaultStoreClient.java
+++ b/src/java/voldemort/client/DefaultStoreClient.java
@@ -57,12 +57,11 @@
 public class DefaultStoreClient<K, V> implements StoreClient<K, V> {
 
     private final Logger logger = Logger.getLogger(DefaultStoreClient.class);
-    private final StoreClientFactory storeFactory;
-
-    private final int metadataRefreshAttempts;
-    private final String storeName;
-    private final InconsistencyResolver<Versioned<V>> resolver;
-    private volatile Store<K, V, Object> store;
+    protected StoreClientFactory storeFactory;
+    protected int metadataRefreshAttempts;
+    protected String storeName;
+    protected InconsistencyResolver<Versioned<V>> resolver;
+    protected volatile Store<K, V, Object> store;
 
     public DefaultStoreClient(String storeName,
                               InconsistencyResolver<Versioned<V>> resolver,
@@ -82,6 +81,9 @@ public DefaultStoreClient(String storeName,
         bootStrap();
     }
 
+    // Default constructor invoked from child class
+    public DefaultStoreClient() {}
+
     @JmxOperation(description = "bootstrap metadata from the cluster.")
     public void bootStrap() {
         logger.info("Bootstrapping metadata for store " + this.storeName);
@@ -155,7 +157,7 @@ public Versioned<V> get(K key, Versioned<V> defaultValue, Object transform) {
                                      + " metadata refresh attempts failed.");
     }
 
-    private List<Version> getVersions(K key) {
+    protected List<Version> getVersions(K key) {
         for(int attempts = 0; attempts < this.metadataRefreshAttempts; attempts++) {
             try {
                 return store.getVersions(key);
@@ -169,7 +171,7 @@ private List<Version> getVersions(K key) {
                                      + " metadata refresh attempts failed.");
     }
 
-    private Versioned<V> getItemOrThrow(K key, Versioned<V> defaultValue, List<Versioned<V>> items) {
+    protected Versioned<V> getItemOrThrow(K key, Versioned<V> defaultValue, List<Versioned<V>> items) {
         if(items.size() == 0)
             return defaultValue;
         else if(items.size() == 1)
@@ -299,7 +301,7 @@ public List<Node> getResponsibleNodes(K key) {
     }
 
     @SuppressWarnings("unused")
-    private Version getVersion(K key) {
+    protected Version getVersion(K key) {
         List<Version> versions = getVersions(key);
         if(versions.size() == 0)
             return null;
diff --git a/src/java/voldemort/client/HttpStoreClientFactory.java b/src/java/voldemort/client/HttpStoreClientFactory.java
index 92978dfadf..6d067ef412 100644
--- a/src/java/voldemort/client/HttpStoreClientFactory.java
+++ b/src/java/voldemort/client/HttpStoreClientFactory.java
@@ -19,7 +19,6 @@
 import static voldemort.cluster.failuredetector.FailureDetectorUtils.create;
 
 import java.net.URI;
-import java.util.Collection;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.http.HttpVersion;
@@ -35,6 +34,7 @@
 
 import voldemort.client.protocol.RequestFormatFactory;
 import voldemort.client.protocol.RequestFormatType;
+import voldemort.cluster.Cluster;
 import voldemort.cluster.Node;
 import voldemort.cluster.failuredetector.ClientStoreVerifier;
 import voldemort.cluster.failuredetector.FailureDetector;
@@ -102,8 +102,7 @@ protected Store<ByteArray, byte[], byte[]> getStore(String name,
     }
 
     @Override
-    protected FailureDetector initFailureDetector(final ClientConfig config,
-                                                  final Collection<Node> nodes) {
+    protected FailureDetector initFailureDetector(final ClientConfig config, Cluster cluster) {
         ClientStoreVerifier storeVerifier = new ClientStoreVerifier() {
 
             @Override
@@ -116,7 +115,7 @@ protected Store<ByteArray, byte[], byte[]> getStoreInternal(Node node) {
 
         };
 
-        FailureDetectorConfig failureDetectorConfig = new FailureDetectorConfig(config).setNodes(nodes)
+        FailureDetectorConfig failureDetectorConfig = new FailureDetectorConfig(config).setCluster(cluster)
                                                                                        .setStoreVerifier(storeVerifier);
 
         return create(failureDetectorConfig, config.isJmxEnabled());
diff --git a/src/java/voldemort/client/LazyStoreClient.java b/src/java/voldemort/client/LazyStoreClient.java
index 3b12468f4d..e7524d2d29 100644
--- a/src/java/voldemort/client/LazyStoreClient.java
+++ b/src/java/voldemort/client/LazyStoreClient.java
@@ -20,6 +20,8 @@
 import java.util.Map;
 import java.util.concurrent.Callable;
 
+import org.apache.log4j.Logger;
+
 import voldemort.VoldemortException;
 import voldemort.cluster.Node;
 import voldemort.versioning.ObsoleteVersionException;
@@ -34,11 +36,35 @@
  */
 public class LazyStoreClient<K, V> implements StoreClient<K, V> {
 
+    private final Logger logger = Logger.getLogger(LazyStoreClient.class);
     private final Callable<StoreClient<K, V>> storeClientThunk;
     private StoreClient<K, V> storeClient;
 
     public LazyStoreClient(Callable<StoreClient<K, V>> storeClientThunk) {
+        this(storeClientThunk, true);
+    }
+
+    /**
+     * A Hybrid store client which tries to do immediate bootstrap. In case of
+     * an exception, we fallback to the lazy way of doing initialization.
+     * 
+     * @param storeClientThunk The callback invoked for doing the actual
+     *        bootstrap
+     * @param instantInit A boolean flag when set indicates that we should try
+     *        to immediately bootstrap
+     */
+    public LazyStoreClient(Callable<StoreClient<K, V>> storeClientThunk, boolean instantInit) {
         this.storeClientThunk = storeClientThunk;
+
+        if(instantInit) {
+            try {
+                storeClient = initStoreClient();
+            } catch(Exception e) {
+                storeClient = null;
+                e.printStackTrace();
+                logger.info("Could not bootstrap right away. Trying on the next call ... ");
+            }
+        }
     }
 
     public synchronized StoreClient<K, V> getStoreClient() {
diff --git a/src/java/voldemort/client/MockStoreClientFactory.java b/src/java/voldemort/client/MockStoreClientFactory.java
index 80613b653b..3e046fff54 100644
--- a/src/java/voldemort/client/MockStoreClientFactory.java
+++ b/src/java/voldemort/client/MockStoreClientFactory.java
@@ -205,5 +205,4 @@ public void close() {
     public FailureDetector getFailureDetector() {
         return failureDetector;
     }
-
 }
diff --git a/src/java/voldemort/client/SocketStoreClientFactory.java b/src/java/voldemort/client/SocketStoreClientFactory.java
index e86ae31322..317ca9a64d 100644
--- a/src/java/voldemort/client/SocketStoreClientFactory.java
+++ b/src/java/voldemort/client/SocketStoreClientFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2010 LinkedIn, Inc
+ * Copyright 2008-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -19,13 +19,13 @@
 import static voldemort.cluster.failuredetector.FailureDetectorUtils.create;
 
 import java.net.URI;
-import java.util.Collection;
 import java.util.List;
 import java.util.concurrent.Callable;
 import java.util.concurrent.TimeUnit;
 
 import voldemort.VoldemortException;
 import voldemort.client.protocol.RequestFormatType;
+import voldemort.cluster.Cluster;
 import voldemort.cluster.Node;
 import voldemort.cluster.failuredetector.ClientStoreVerifier;
 import voldemort.cluster.failuredetector.FailureDetector;
@@ -37,8 +37,8 @@
 import voldemort.store.socket.SocketDestination;
 import voldemort.store.socket.SocketStoreFactory;
 import voldemort.store.socket.clientrequest.ClientRequestExecutorPool;
+import voldemort.store.system.SystemStoreConstants;
 import voldemort.utils.ByteArray;
-import voldemort.utils.JmxUtils;
 import voldemort.versioning.InconsistencyResolver;
 import voldemort.versioning.Versioned;
 
@@ -62,15 +62,14 @@ public SocketStoreClientFactory(ClientConfig config) {
         super(config);
         this.requestRoutingType = RequestRoutingType.getRequestRoutingType(RoutingTier.SERVER.equals(config.getRoutingTier()),
                                                                            false);
-
         this.storeFactory = new ClientRequestExecutorPool(config.getSelectors(),
                                                           config.getMaxConnectionsPerNode(),
                                                           config.getConnectionTimeout(TimeUnit.MILLISECONDS),
                                                           config.getSocketTimeout(TimeUnit.MILLISECONDS),
                                                           config.getSocketBufferSize(),
-                                                          config.getSocketKeepAlive());
-        if(config.isJmxEnabled())
-            JmxUtils.registerMbean(storeFactory, JmxUtils.createObjectName(storeFactory.getClass()));
+                                                          config.getSocketKeepAlive(),
+                                                          config.isJmxEnabled(),
+                                                          jmxId);
     }
 
     @Override
@@ -87,7 +86,8 @@ public StoreClient<K, V> call() throws Exception {
         return getParentStoreClient(storeName, resolver);
     }
 
-    private <K, V> StoreClient<K, V> getParentStoreClient(String storeName, InconsistencyResolver<Versioned<V>> resolver) {
+    private <K, V> StoreClient<K, V> getParentStoreClient(String storeName,
+                                                          InconsistencyResolver<Versioned<V>> resolver) {
         return super.getStoreClient(storeName, resolver);
     }
 
@@ -96,7 +96,8 @@ protected List<Versioned<String>> getRemoteMetadata(String key, URI url) {
         try {
             return super.getRemoteMetadata(key, url);
         } catch(VoldemortException e) {
-            // Fix SNA-4227: When an error occurs during bootstrap, close the socket
+            // Fix SNA-4227: When an error occurs during bootstrap, close the
+            // socket
             SocketDestination destination = new SocketDestination(url.getHost(),
                                                                   url.getPort(),
                                                                   getRequestFormatType());
@@ -114,8 +115,7 @@ protected Store<ByteArray, byte[], byte[]> getStore(String storeName,
     }
 
     @Override
-    protected FailureDetector initFailureDetector(final ClientConfig config,
-                                                  final Collection<Node> nodes) {
+    protected FailureDetector initFailureDetector(final ClientConfig config, Cluster cluster) {
         failureDetectorListener = new FailureDetectorListener() {
 
             public void nodeAvailable(Node node) {
@@ -139,6 +139,7 @@ public void nodeUnavailable(Node node) {
 
             @Override
             protected Store<ByteArray, byte[], byte[]> getStoreInternal(Node node) {
+                logger.debug("Returning a new store verifier for node: " + node);
                 return SocketStoreClientFactory.this.getStore(MetadataStore.METADATA_STORE_NAME,
                                                               node.getHost(),
                                                               node.getSocketPort(),
@@ -147,10 +148,10 @@ protected Store<ByteArray, byte[], byte[]> getStoreInternal(Node node) {
 
         };
 
-        FailureDetectorConfig failureDetectorConfig = new FailureDetectorConfig(config).setNodes(nodes)
+        FailureDetectorConfig failureDetectorConfig = new FailureDetectorConfig(config).setCluster(cluster)
                                                                                        .setStoreVerifier(storeVerifier);
 
-        return create(failureDetectorConfig, true, failureDetectorListener);
+        return create(failureDetectorConfig, false, failureDetectorListener);
     }
 
     @Override
@@ -177,4 +178,13 @@ public void close() {
         super.close();
     }
 
+    public <K, V, T> Store<K, V, T> getSystemStore(String storeName,
+                                                   String clusterXml,
+                                                   FailureDetector fd) {
+        return getRawStore(storeName,
+                           null,
+                           SystemStoreConstants.SYSTEM_STORE_SCHEMA,
+                           clusterXml,
+                           fd);
+    }
 }
diff --git a/src/java/voldemort/client/StoreClientFactory.java b/src/java/voldemort/client/StoreClientFactory.java
index 166eac0c1d..e84afbf438 100644
--- a/src/java/voldemort/client/StoreClientFactory.java
+++ b/src/java/voldemort/client/StoreClientFactory.java
@@ -65,9 +65,6 @@ public <K, V> StoreClient<K, V> getStoreClient(String storeName,
     /**
      * Get the underlying store, not the public StoreClient interface
      * 
-     * @param <K> The key type
-     * @param <V> The value type
-     * @param <T> The transform type
      * @param storeName The name of the store
      * @param resolver The inconsistency resolver
      * @return The appropriate store
diff --git a/src/java/voldemort/client/SystemStore.java b/src/java/voldemort/client/SystemStore.java
new file mode 100644
index 0000000000..4a79c91b25
--- /dev/null
+++ b/src/java/voldemort/client/SystemStore.java
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.client;
+
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.log4j.Logger;
+
+import voldemort.VoldemortException;
+import voldemort.cluster.failuredetector.FailureDetector;
+import voldemort.store.Store;
+import voldemort.store.system.SystemStoreConstants;
+import voldemort.versioning.InconsistentDataException;
+import voldemort.versioning.VectorClock;
+import voldemort.versioning.Version;
+import voldemort.versioning.Versioned;
+
+/**
+ * A client interface for interacting with System stores (managed by the
+ * cluster). The naming convention is kept consistent with SocketStore (which is
+ * also a client interface).
+ * 
+ * @author csoman
+ * 
+ * @param <K> Type of Key
+ * @param <V> Type of Value
+ */
+public class SystemStore<K, V> {
+
+    private final Logger logger = Logger.getLogger(SystemStore.class);
+    private final SocketStoreClientFactory socketStoreFactory;
+    private final String storeName;
+    private volatile Store<K, V, Object> sysStore;
+
+    /**
+     * Wrapper for the actual SystemStore constructor. Used when we dont have
+     * custom Cluster XML, failure detector or a base Voldemort Client config to
+     * be used with this system store client.
+     * 
+     * @param storeName Name of the system store
+     * @param bootstrapUrls Bootstrap URLs used to connect to
+     * @param clientZoneID Primary zone ID for this system store client
+     *        (determines routing strategy)
+     */
+    public SystemStore(String storeName, String[] bootstrapUrls, int clientZoneID) {
+        this(storeName, bootstrapUrls, clientZoneID, null, null, new ClientConfig());
+    }
+
+    /**
+     * Wrapper for the actual SystemStore constructor. Used when we dont have
+     * custom Cluster XML or failure detector to be used with this system store
+     * client.
+     * 
+     * @param storeName Name of the system store
+     * @param bootstrapUrls Bootstrap URLs used to connect to
+     * @param clientZoneID Primary zone ID for this system store client
+     *        (determines routing strategy)
+     * @param baseConfig Base Voldemort Client config which specifies properties
+     *        for this system store client
+     */
+    public SystemStore(String storeName,
+                       String[] bootstrapUrls,
+                       int clientZoneID,
+                       ClientConfig baseConfig) {
+        this(storeName, bootstrapUrls, clientZoneID, null, null, baseConfig);
+    }
+
+    /**
+     * SystemStore Constructor wrapper for the actual constructor. Used when we
+     * dont want to specify a base Voldemort Client Config.
+     * 
+     * @param storeName Name of the system store
+     * @param bootstrapUrls Bootstrap URLs used to connect to
+     * @param clientZoneID Primary zone ID for this system store client
+     *        (determines routing strategy)
+     * @param clusterXml Custom ClusterXml to be used for this system store
+     *        client
+     * @param fd Failure Detector to be used with this system store client
+     */
+    public SystemStore(String storeName,
+                       String[] bootstrapUrls,
+                       int clientZoneID,
+                       String clusterXml,
+                       FailureDetector fd) {
+        this(storeName, bootstrapUrls, clientZoneID, clusterXml, fd, new ClientConfig());
+    }
+
+    /**
+     * SystemStore Constructor that creates a system store client which can be
+     * used to interact with the system stores managed by the cluster
+     * 
+     * @param storeName Name of the system store
+     * @param bootstrapUrls Bootstrap URLs used to connect to
+     * @param clientZoneID Primary zone ID for this system store client
+     *        (determines routing strategy)
+     * @param clusterXml Custom ClusterXml to be used for this system store
+     *        client
+     * @param fd Failure Detector to be used with this system store client
+     * @param baseConfig Base Voldemort Client config which specifies properties
+     *        for this system store client
+     */
+    public SystemStore(String storeName,
+                       String[] bootstrapUrls,
+                       int clientZoneID,
+                       String clusterXml,
+                       FailureDetector fd,
+                       ClientConfig baseConfig) {
+        String prefix = storeName.substring(0, SystemStoreConstants.NAME_PREFIX.length());
+        if(!SystemStoreConstants.NAME_PREFIX.equals(prefix))
+            throw new VoldemortException("Illegal system store : " + storeName);
+
+        ClientConfig config = new ClientConfig();
+        config.setSelectors(1)
+              .setBootstrapUrls(bootstrapUrls)
+              .setMaxConnectionsPerNode(baseConfig.getSysMaxConnectionsPerNode())
+              .setConnectionTimeout(baseConfig.getSysConnectionTimeout(), TimeUnit.MILLISECONDS)
+              .setSocketTimeout(baseConfig.getSysSocketTimeout(), TimeUnit.MILLISECONDS)
+              .setRoutingTimeout(baseConfig.getSysRoutingTimeout(), TimeUnit.MILLISECONDS)
+              .setEnableJmx(baseConfig.getSysEnableJmx())
+              .setEnablePipelineRoutedStore(baseConfig.getSysEnablePipelineRoutedStore())
+              .setClientZoneId(clientZoneID);
+        this.socketStoreFactory = new SocketStoreClientFactory(config);
+        this.storeName = storeName;
+        try {
+            this.sysStore = this.socketStoreFactory.getSystemStore(this.storeName, clusterXml, fd);
+        } catch(Exception e) {
+            logger.debug("Error while creating a system store client for store : " + this.storeName);
+        }
+    }
+
+    public Version putSysStore(K key, V value) {
+        Version version = null;
+        try {
+            logger.debug("Invoking Put for key : " + key + " on store name : " + this.storeName);
+            Versioned<V> versioned = getSysStore(key);
+            if(versioned == null)
+                versioned = Versioned.value(value, new VectorClock());
+            else
+                versioned.setObject(value);
+            this.sysStore.put(key, versioned, null);
+            version = versioned.getVersion();
+        } catch(Exception e) {
+            if(logger.isDebugEnabled()) {
+                logger.debug("Exception caught during putSysStore: " + e);
+            }
+        }
+        return version;
+    }
+
+    public Version putSysStore(K key, Versioned<V> value) {
+        Version version = null;
+        try {
+            logger.debug("Invoking Put for key : " + key + " on store name : " + this.storeName);
+            this.sysStore.put(key, value, null);
+            version = value.getVersion();
+        } catch(Exception e) {
+            if(logger.isDebugEnabled()) {
+                logger.debug("Exception caught during putSysStore: " + e);
+            }
+        }
+        return version;
+    }
+
+    public Versioned<V> getSysStore(K key) {
+        logger.debug("Invoking Get for key : " + key + " on store name : " + this.storeName);
+        Versioned<V> versioned = null;
+        try {
+            List<Versioned<V>> items = this.sysStore.get(key, null);
+
+            if(items.size() == 1)
+                versioned = items.get(0);
+            else if(items.size() > 1)
+                throw new InconsistentDataException("Unresolved versions returned from get(" + key
+                                                    + ") = " + items, items);
+            if(versioned != null)
+                logger.debug("Value for key : " + key + " = " + versioned.getValue()
+                             + " on store name : " + this.storeName);
+            else
+                logger.debug("Got null value");
+        } catch(Exception e) {
+            if(logger.isDebugEnabled()) {
+                logger.debug("Exception caught during getSysStore: " + e);
+            }
+        }
+        return versioned;
+    }
+
+    public V getValueSysStore(K key) {
+        V value = null;
+        try {
+            logger.debug("Invoking Get for key : " + key + " on store name : " + this.storeName);
+            Versioned<V> versioned = getSysStore(key);
+            if(versioned != null) {
+                logger.debug("Value for key : " + key + " = " + versioned.getValue()
+                             + " on store name : " + this.storeName);
+                value = versioned.getValue();
+            }
+        } catch(Exception e) {
+            if(logger.isDebugEnabled()) {
+                logger.debug("Exception caught during getSysStore: " + e);
+            }
+        }
+        return value;
+    }
+}
diff --git a/src/java/voldemort/client/SystemStoreRepository.java b/src/java/voldemort/client/SystemStoreRepository.java
new file mode 100644
index 0000000000..6f9cd42015
--- /dev/null
+++ b/src/java/voldemort/client/SystemStoreRepository.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.client;
+
+import java.util.concurrent.ConcurrentHashMap;
+
+import voldemort.cluster.failuredetector.FailureDetector;
+import voldemort.store.system.SystemStoreConstants;
+
+/**
+ * A repository that creates and maintains all the system stores in one place.
+ * The purpose is to act as a source of truth for all the system stores, since
+ * they can be recreated dynamically (in case cluster.xml changes).
+ */
+
+public class SystemStoreRepository {
+
+    private ConcurrentHashMap<String, SystemStore> sysStoreMap;
+
+    public SystemStoreRepository() {
+        sysStoreMap = new ConcurrentHashMap<String, SystemStore>();
+    }
+
+    public void addSystemStore(SystemStore newSysStore, String storeName) {
+        this.sysStoreMap.put(storeName, newSysStore);
+    }
+
+    public void createSystemStores(ClientConfig config, String clusterXml, FailureDetector fd) {
+        for(SystemStoreConstants.SystemStoreName storeName: SystemStoreConstants.SystemStoreName.values()) {
+            SystemStore sysStore = new SystemStore(storeName.name(),
+                                                   config.getBootstrapUrls(),
+                                                   config.getClientZoneId(),
+                                                   clusterXml,
+                                                   fd,
+                                                   config);
+            this.sysStoreMap.put(storeName.name(), sysStore);
+        }
+    }
+
+    public SystemStore<String, String> getClientRegistryStore() {
+        String name = SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name();
+        SystemStore<String, String> sysRegistryStore = sysStoreMap.get(name);
+        return sysRegistryStore;
+    }
+
+    public SystemStore<String, String> getMetadataVersionStore() {
+        String name = SystemStoreConstants.SystemStoreName.voldsys$_metadata_version_persistence.name();
+        SystemStore<String, String> sysVersionStore = sysStoreMap.get(name);
+        return sysVersionStore;
+    }
+}
diff --git a/src/java/voldemort/client/TimeoutConfig.java b/src/java/voldemort/client/TimeoutConfig.java
new file mode 100644
index 0000000000..0840dcffc2
--- /dev/null
+++ b/src/java/voldemort/client/TimeoutConfig.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.client;
+
+import voldemort.common.OpTimeMap;
+
+/**
+ * Encapsulates the timeouts, in ms, for various Voldemort operations
+ * 
+ */
+public class TimeoutConfig {
+
+    private OpTimeMap timeoutMap;
+
+    private boolean partialGetAllAllowed;
+
+    public TimeoutConfig(long globalTimeout, boolean allowPartialGetAlls) {
+        timeoutMap = new OpTimeMap(globalTimeout);
+        setPartialGetAllAllowed(allowPartialGetAlls);
+    }
+
+    public TimeoutConfig(long getTimeout,
+                         long putTimeout,
+                         long deleteTimeout,
+                         long getAllTimeout,
+                         long getVersionsTimeout,
+                         boolean allowPartialGetAlls) {
+        timeoutMap = new OpTimeMap(getTimeout,
+                                   putTimeout,
+                                   deleteTimeout,
+                                   getAllTimeout,
+                                   getVersionsTimeout);
+        setPartialGetAllAllowed(allowPartialGetAlls);
+    }
+
+    public long getOperationTimeout(Byte opCode) {
+        return timeoutMap.getOpTime(opCode);
+    }
+
+    public void setOperationTimeout(Byte opCode, long timeoutMs) {
+        timeoutMap.setOpTime(opCode, timeoutMs);
+    }
+
+    public boolean isPartialGetAllAllowed() {
+        return partialGetAllAllowed;
+    }
+
+    public void setPartialGetAllAllowed(boolean allowPartialGetAlls) {
+        this.partialGetAllAllowed = allowPartialGetAlls;
+    }
+
+}
diff --git a/src/java/voldemort/client/ZenStoreClient.java b/src/java/voldemort/client/ZenStoreClient.java
new file mode 100644
index 0000000000..7000160ece
--- /dev/null
+++ b/src/java/voldemort/client/ZenStoreClient.java
@@ -0,0 +1,276 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.client;
+
+import java.util.Calendar;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.log4j.Logger;
+
+import voldemort.annotations.concurrency.Threadsafe;
+import voldemort.annotations.jmx.JmxGetter;
+import voldemort.annotations.jmx.JmxManaged;
+import voldemort.annotations.jmx.JmxOperation;
+import voldemort.client.scheduler.AsyncMetadataVersionManager;
+import voldemort.client.scheduler.ClientRegistryRefresher;
+import voldemort.common.service.SchedulerService;
+import voldemort.store.metadata.MetadataStore;
+import voldemort.store.system.SystemStoreConstants;
+import voldemort.utils.JmxUtils;
+import voldemort.utils.ManifestFileReader;
+import voldemort.utils.Utils;
+import voldemort.versioning.InconsistencyResolver;
+import voldemort.versioning.Version;
+import voldemort.versioning.Versioned;
+
+/**
+ * The enhanced {@link voldemort.client.StoreClient StoreClient} implementation
+ * you get back from a {@link voldemort.client.StoreClientFactory
+ * StoreClientFactory}
+ * 
+ * 
+ * @param <K> The key type
+ * @param <V> The value type
+ */
+@Threadsafe
+@JmxManaged(description = "A voldemort client")
+public class ZenStoreClient<K, V> extends DefaultStoreClient<K, V> {
+
+    private final Logger logger = Logger.getLogger(ZenStoreClient.class);
+
+    private final AbstractStoreClientFactory abstractStoreFactory;
+    private final ClientConfig config;
+    private final SystemStoreRepository sysRepository;
+    private final String clientId;
+    private final SchedulerService scheduler;
+    private ClientInfo clientInfo;
+    private String clusterXml;
+    private AsyncMetadataVersionManager asyncMetadataManager = null;
+    private ClientRegistryRefresher clientRegistryRefresher = null;
+
+    public ZenStoreClient(String storeName,
+                          InconsistencyResolver<Versioned<V>> resolver,
+                          AbstractStoreClientFactory storeFactory,
+                          int maxMetadataRefreshAttempts) {
+        this(storeName, resolver, storeFactory, maxMetadataRefreshAttempts, null, 0, null, null);
+    }
+
+    public ZenStoreClient(String storeName,
+                          InconsistencyResolver<Versioned<V>> resolver,
+                          AbstractStoreClientFactory storeFactory,
+                          int maxMetadataRefreshAttempts,
+                          String clientContext,
+                          int clientSequence,
+                          ClientConfig config,
+                          SchedulerService scheduler) {
+
+        super();
+        this.storeName = Utils.notNull(storeName);
+        this.resolver = resolver;
+        this.abstractStoreFactory = Utils.notNull(storeFactory);
+        this.storeFactory = this.abstractStoreFactory;
+        this.metadataRefreshAttempts = maxMetadataRefreshAttempts;
+        this.clientInfo = new ClientInfo(storeName,
+                                         clientContext,
+                                         clientSequence,
+                                         System.currentTimeMillis(),
+                                         ManifestFileReader.getReleaseVersion(),
+                                         config);
+        this.clientId = generateClientId(clientInfo);
+        this.config = config;
+        this.sysRepository = new SystemStoreRepository();
+        this.scheduler = scheduler;
+
+        // Registering self to be able to bootstrap client dynamically via JMX
+        JmxUtils.registerMbean(this,
+                               JmxUtils.createObjectName(JmxUtils.getPackageName(this.getClass()),
+                                                         JmxUtils.getClassName(this.getClass())
+                                                                 + "." + storeName));
+
+        // Bootstrap this client
+        bootStrap();
+
+        // Initialize the background thread for checking metadata version
+        if(config != null) {
+            asyncMetadataManager = scheduleAsyncMetadataVersionManager(clientId.toString(),
+                                                                       config.getAsyncMetadataRefreshInMs());
+        }
+
+        clientRegistryRefresher = registerClient(clientId,
+                                                 config.getClientRegistryUpdateIntervalInSecs());
+        logger.info("Voldemort client created: " + clientId + "\n" + clientInfo);
+    }
+
+    private ClientRegistryRefresher registerClient(String jobId, int intervalInSecs) {
+        ClientRegistryRefresher refresher = null;
+        if(this.sysRepository.getClientRegistryStore() != null) {
+            try {
+                Version version = this.sysRepository.getClientRegistryStore()
+                                                    .putSysStore(clientId, clientInfo.toString());
+                refresher = new ClientRegistryRefresher(this.sysRepository,
+                                                        clientId,
+                                                        clientInfo,
+                                                        version);
+                GregorianCalendar cal = new GregorianCalendar();
+                cal.add(Calendar.SECOND, intervalInSecs);
+
+                if(scheduler != null) {
+                    scheduler.schedule(jobId + refresher.getClass().getName(),
+                                       refresher,
+                                       cal.getTime(),
+                                       TimeUnit.MILLISECONDS.convert(intervalInSecs,
+                                                                     TimeUnit.SECONDS));
+                    logger.info("Client registry refresher thread started, refresh interval: "
+                                + intervalInSecs + " seconds");
+                } else {
+                    logger.warn("Client registry won't run because scheduler service is not configured");
+                }
+            } catch(Exception e) {
+                logger.warn("Unable to register with the cluster due to the following error:", e);
+            }
+        } else {
+            logger.warn(SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name()
+                        + "not found. Unable to registry with voldemort cluster.");
+        }
+        return refresher;
+    }
+
+    private AsyncMetadataVersionManager scheduleAsyncMetadataVersionManager(String jobId,
+                                                                            long interval) {
+        AsyncMetadataVersionManager asyncMetadataManager = null;
+        SystemStore<String, String> versionStore = this.sysRepository.getMetadataVersionStore();
+        if(versionStore == null) {
+            logger.warn("Metadata version system store not found. Cannot run Metadata version check thread.");
+        } else {
+
+            // Create a callback for re-bootstrapping the client
+            Callable<Void> rebootstrapCallback = new Callable<Void>() {
+
+                public Void call() throws Exception {
+                    bootStrap();
+                    return null;
+                }
+            };
+
+            asyncMetadataManager = new AsyncMetadataVersionManager(this.sysRepository,
+                                                                   rebootstrapCallback,
+                                                                   this.storeName);
+
+            // schedule the job to run every 'checkInterval' period, starting
+            // now
+            if(scheduler != null) {
+                scheduler.schedule(jobId + asyncMetadataManager.getClass().getName(),
+                                   asyncMetadataManager,
+                                   new Date(),
+                                   interval);
+                logger.info("Metadata version check thread started. Frequency = Every " + interval
+                            + " ms");
+            } else {
+                logger.warn("Metadata version check thread won't start because the scheduler service is not configured.");
+            }
+        }
+        return asyncMetadataManager;
+    }
+
+    @Override
+    @JmxOperation(description = "bootstrap metadata from the cluster.")
+    public void bootStrap() {
+        logger.info("Bootstrapping metadata for store " + this.storeName);
+
+        /*
+         * Since we need cluster.xml for bootstrapping this client as well as
+         * all the System stores, just fetch it once and pass it around.
+         */
+        clusterXml = abstractStoreFactory.bootstrapMetadataWithRetries(MetadataStore.CLUSTER_KEY);
+
+        // Get client store
+        this.store = abstractStoreFactory.getRawStore(storeName, resolver, null, clusterXml, null);
+
+        // Create system stores
+        logger.info("Creating system stores for store " + this.storeName);
+        this.sysRepository.createSystemStores(this.config,
+                                              this.clusterXml,
+                                              abstractStoreFactory.getFailureDetector());
+
+        /*
+         * Update to the new metadata versions (in case we got here from Invalid
+         * Metadata exception). This will prevent another bootstrap via the
+         * Async metadata checker
+         */
+        if(asyncMetadataManager != null) {
+            asyncMetadataManager.updateMetadataVersions();
+        }
+
+        /*
+         * Every time we bootstrap, update the bootstrap time
+         */
+        if(this.clientInfo != null) {
+            if(this.asyncMetadataManager != null) {
+                this.clientInfo.setClusterMetadataVersion(this.asyncMetadataManager.getClusterMetadataVersion());
+            }
+            this.clientInfo.setBootstrapTime(System.currentTimeMillis());
+        }
+
+        if(this.clientRegistryRefresher == null) {
+            logger.error("Unable to publish the client registry after bootstrap. Client Registry Refresher is NULL.");
+        } else {
+            logger.info("Publishing client registry after Bootstrap.");
+            this.clientRegistryRefresher.publishRegistry();
+        }
+    }
+
+    public String getClientId() {
+        return clientId;
+    }
+
+    @JmxGetter(name = "getClusterMetadataVersion")
+    public String getClusterMetadataVersion() {
+        String result = "Current Cluster Metadata Version : "
+                        + this.asyncMetadataManager.getClusterMetadataVersion();
+        return result;
+    }
+
+    /**
+     * Generate a unique client ID based on: 0. clientContext, if specified; 1.
+     * storeName; 2. deployment path; 3. client sequence
+     * 
+     * @param storeName the name of the store the client is created for
+     * @param contextName the name of the client context
+     * @param clientSequence the client sequence number
+     * @return unique client ID
+     */
+    public String generateClientId(ClientInfo clientInfo) {
+        String contextName = clientInfo.getContext();
+        int clientSequence = clientInfo.getClientSequence();
+
+        String newLine = System.getProperty("line.separator");
+        StringBuilder context = new StringBuilder(contextName == null ? "" : contextName);
+        context.append(0 == clientSequence ? "" : ("." + clientSequence));
+        context.append(".").append(clientInfo.getStoreName());
+        context.append("@").append(clientInfo.getLocalHostName()).append(":");
+        context.append(clientInfo.getDeploymentPath()).append(newLine);
+
+        if(logger.isDebugEnabled()) {
+            logger.debug(context.toString());
+        }
+
+        return context.toString();
+    }
+}
diff --git a/src/java/voldemort/client/protocol/admin/AdminClient.java b/src/java/voldemort/client/protocol/admin/AdminClient.java
index 3b0dd469b6..5e368bac44 100644
--- a/src/java/voldemort/client/protocol/admin/AdminClient.java
+++ b/src/java/voldemort/client/protocol/admin/AdminClient.java
@@ -26,12 +26,14 @@
 import java.nio.channels.Channels;
 import java.nio.channels.FileChannel;
 import java.nio.channels.ReadableByteChannel;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Properties;
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -45,6 +47,7 @@
 import voldemort.VoldemortException;
 import voldemort.client.ClientConfig;
 import voldemort.client.SocketStoreClientFactory;
+import voldemort.client.SystemStore;
 import voldemort.client.protocol.RequestFormatType;
 import voldemort.client.protocol.VoldemortFilter;
 import voldemort.client.protocol.pb.ProtoUtils;
@@ -57,10 +60,12 @@
 import voldemort.cluster.Node;
 import voldemort.routing.RoutingStrategy;
 import voldemort.routing.RoutingStrategyFactory;
+import voldemort.server.RequestRoutingType;
 import voldemort.server.protocol.admin.AsyncOperationStatus;
 import voldemort.server.rebalance.RebalancerState;
 import voldemort.server.rebalance.VoldemortRebalancingException;
 import voldemort.store.ErrorCodeMapper;
+import voldemort.store.Store;
 import voldemort.store.StoreDefinition;
 import voldemort.store.metadata.MetadataStore;
 import voldemort.store.metadata.MetadataStore.VoldemortState;
@@ -72,9 +77,12 @@
 import voldemort.store.slop.Slop;
 import voldemort.store.slop.Slop.Operation;
 import voldemort.store.socket.SocketDestination;
+import voldemort.store.socket.clientrequest.ClientRequestExecutorPool;
+import voldemort.store.system.SystemStoreConstants;
 import voldemort.store.views.ViewStorageConfiguration;
 import voldemort.utils.ByteArray;
 import voldemort.utils.ByteUtils;
+import voldemort.utils.MetadataVersionStoreUtils;
 import voldemort.utils.NetworkClassLoader;
 import voldemort.utils.Pair;
 import voldemort.utils.RebalanceUtils;
@@ -124,12 +132,19 @@ public class AdminClient {
     private static final long PRINT_STATS_INTERVAL = 5 * 60 * 1000; // 5 minutes
     private final AdminClientConfig adminClientConfig;
 
+    private static final String CLUSTER_VERSION_KEY = "cluster.xml";
+    private static final int DEFAULT_ZONE_ID = 0;
+
     public final static List<String> restoreStoreEngineBlackList = Arrays.asList(MysqlStorageConfiguration.TYPE_NAME,
                                                                                  ReadOnlyStorageConfiguration.TYPE_NAME,
                                                                                  ViewStorageConfiguration.TYPE_NAME);
 
     private Cluster currentCluster;
 
+    private SystemStore<String, String> sysStoreVersion = null;
+    private String[] cachedBootstrapURLs = null;
+    private int cachedZoneID = -1;
+
     /**
      * Create an instance of AdminClient given a URL of a node in the cluster.
      * The bootstrap URL is used to get the cluster metadata.
@@ -151,6 +166,7 @@ public AdminClient(String bootstrapURL, AdminClientConfig adminClientConfig) {
         this.networkClassLoader = new NetworkClassLoader(Thread.currentThread()
                                                                .getContextClassLoader());
         this.adminClientConfig = adminClientConfig;
+        cacheSystemStoreParams(bootstrapURL, DEFAULT_ZONE_ID);
     }
 
     /**
@@ -174,6 +190,97 @@ public AdminClient(Cluster cluster, AdminClientConfig adminClientConfig) {
         this.networkClassLoader = new NetworkClassLoader(Thread.currentThread()
                                                                .getContextClassLoader());
         this.adminClientConfig = adminClientConfig;
+
+        Node node = cluster.getNodeById(0);
+        String bootstrapURL = "tcp://" + node.getHost() + ":" + node.getSocketPort();
+        cacheSystemStoreParams(bootstrapURL, DEFAULT_ZONE_ID);
+    }
+
+    /**
+     * Wrapper for the actual AdminClient constructor given the URL of a node in
+     * the cluster.
+     * 
+     * @param bootstrapURL URL pointing to the bootstrap node
+     * @param adminClientConfig Configuration for AdminClient specifying client
+     *        parameters eg. <br>
+     *        <ul>
+     *        <t>
+     *        <li>number of threads</li>
+     *        <li>number of sockets per node</li>
+     *        <li>socket buffer size</li>
+     *        </ul>
+     * @param zoneID The primary Zone ID for the purpose of the SystemStore
+     */
+    public AdminClient(String bootstrapURL, AdminClientConfig adminClientConfig, int zoneID) {
+        this(bootstrapURL, adminClientConfig);
+        cacheSystemStoreParams(bootstrapURL, zoneID);
+    }
+
+    /**
+     * Cache the paramater values for the internal system store client. These
+     * cached values are used every time the system store client needs to be
+     * initialized (useful when the cluster.xml changes).
+     * 
+     * @param bootstrapURL The URL to bootstrap from
+     * @param zoneID Indicates the primary zone of the sytem store client
+     */
+    private void cacheSystemStoreParams(String bootstrapURL, int zoneID) {
+        String[] bootstrapUrls = new String[1];
+        bootstrapUrls[0] = bootstrapURL;
+        this.cachedBootstrapURLs = bootstrapUrls;
+        this.cachedZoneID = zoneID;
+    }
+
+    /**
+     * Create a system store client based on the cached bootstrap URLs and Zone
+     * ID
+     */
+    public void initSystemStoreClient() {
+        if(this.cachedBootstrapURLs != null && this.cachedZoneID >= 0) {
+            try {
+                this.sysStoreVersion = new SystemStore<String, String>(SystemStoreConstants.SystemStoreName.voldsys$_metadata_version_persistence.name(),
+                                                                       this.cachedBootstrapURLs,
+                                                                       this.cachedZoneID);
+            } catch(Exception e) {
+                logger.debug("Error while creating a system store client for metadata version store.");
+            }
+
+        }
+    }
+
+    /**
+     * Update the metadata version for the given key (cluster or store). The new
+     * value set is the current timestamp.
+     * 
+     * @param versionKey The metadata key for which Version should be
+     *        incremented
+     */
+    public void updateMetadataversion(String versionKey) {
+        initSystemStoreClient();
+        Properties props = MetadataVersionStoreUtils.getProperties(this.sysStoreVersion);
+        long newValue = 0;
+        if(props != null && props.getProperty(versionKey) != null) {
+            logger.debug("Version obtained = " + props.getProperty(versionKey));
+            newValue = System.currentTimeMillis();
+        } else {
+            logger.debug("Current version is null. Assuming version 0.");
+            if(props == null) {
+                props = new Properties();
+            }
+        }
+        props.setProperty(versionKey, Long.toString(newValue));
+        MetadataVersionStoreUtils.setProperties(this.sysStoreVersion, props);
+    }
+
+    /**
+     * Set the metadata versions to the given set
+     * 
+     * @param newProperties The new metadata versions to be set across all the
+     *        nodes in the cluster
+     */
+    public void setMetadataversion(Properties newProperties) {
+        initSystemStoreClient();
+        MetadataVersionStoreUtils.setProperties(this.sysStoreVersion, newProperties);
     }
 
     private Cluster getClusterFromBootstrapURL(String bootstrapURL) {
@@ -509,6 +616,67 @@ public Pair<ByteArray, Versioned<byte[]>> computeNext() {
 
     }
 
+    /**
+     * Fetch key/value tuples belonging to a node with given key values
+     * 
+     * <p>
+     * Entries are being queried synchronously <em>as the iteration happens</em>
+     * i.e. the whole result set is <b>not</b> buffered in memory.
+     * 
+     * @param nodeId Id of the node to fetch from
+     * @param storeName Name of the store
+     * @param keys An Iterable of keys
+     * @return An iterator which allows entries to be streamed as they're being
+     *         iterated over.
+     */
+    public Iterator<Pair<ByteArray, Pair<List<Versioned<byte[]>>, Exception>>> queryKeys(int nodeId,
+                                                                                         String storeName,
+                                                                                         final Iterator<ByteArray> keys) {
+
+        Node node = this.getAdminClientCluster().getNodeById(nodeId);
+        ClientConfig clientConfig = new ClientConfig();
+        final Store<ByteArray, byte[], byte[]> store;
+        final ClientRequestExecutorPool clientPool = new ClientRequestExecutorPool(clientConfig.getSelectors(),
+                                                                                   clientConfig.getMaxConnectionsPerNode(),
+                                                                                   clientConfig.getConnectionTimeout(TimeUnit.MILLISECONDS),
+                                                                                   clientConfig.getSocketTimeout(TimeUnit.MILLISECONDS),
+                                                                                   clientConfig.getSocketBufferSize(),
+                                                                                   clientConfig.getSocketKeepAlive());
+        try {
+            store = clientPool.create(storeName,
+                                      node.getHost(),
+                                      node.getSocketPort(),
+                                      clientConfig.getRequestFormatType(),
+                                      RequestRoutingType.IGNORE_CHECKS);
+
+        } catch(Exception e) {
+            clientPool.close();
+            throw new VoldemortException(e);
+        }
+
+        return new AbstractIterator<Pair<ByteArray, Pair<List<Versioned<byte[]>>, Exception>>>() {
+
+            @Override
+            public Pair<ByteArray, Pair<List<Versioned<byte[]>>, Exception>> computeNext() {
+                ByteArray key;
+                Exception exception = null;
+                List<Versioned<byte[]>> value = null;
+                if(!keys.hasNext()) {
+                    clientPool.close();
+                    return endOfData();
+                } else {
+                    key = keys.next();
+                }
+                try {
+                    value = store.get(key, null);
+                } catch(Exception e) {
+                    exception = e;
+                }
+                return Pair.create(key, Pair.create(value, exception));
+            }
+        };
+    }
+
     /**
      * Legacy interface for fetching entries. See
      * {@link AdminClient#fetchKeys(int, String, HashMap, VoldemortFilter, boolean, Cluster, long)}
@@ -647,6 +815,22 @@ public ByteArray computeNext() {
      * @throws InterruptedException
      */
     public void restoreDataFromReplications(int nodeId, int parallelTransfers) {
+        restoreDataFromReplications(nodeId, parallelTransfers, -1);
+    }
+
+    /**
+     * RestoreData from copies on other machines for the given nodeId
+     * <p>
+     * Recovery mechanism to recover and restore data actively from replicated
+     * copies in the cluster.<br>
+     * 
+     * @param nodeId Id of the node to restoreData
+     * @param parallelTransfers number of transfers
+     * @param zoneId zone from which the nodes are chosen from, -1 means no zone
+     *        preference
+     * @throws InterruptedException
+     */
+    public void restoreDataFromReplications(int nodeId, int parallelTransfers, int zoneId) {
         ExecutorService executors = Executors.newFixedThreadPool(parallelTransfers,
                                                                  new ThreadFactory() {
 
@@ -676,7 +860,7 @@ public Thread newThread(Runnable r) {
                 }
             }
             for(StoreDefinition def: writableStores) {
-                restoreStoreFromReplication(nodeId, cluster, def, executors);
+                restoreStoreFromReplication(nodeId, cluster, def, executors, zoneId);
             }
         } finally {
             executors.shutdown();
@@ -703,8 +887,26 @@ public Thread newThread(Runnable r) {
     public Map<Integer, HashMap<Integer, List<Integer>>> getReplicationMapping(int restoringNode,
                                                                                Cluster cluster,
                                                                                StoreDefinition storeDef) {
+        return getReplicationMapping(restoringNode, cluster, storeDef, -1);
+    }
+
+    /**
+     * For a particular node, finds out all the [replica, partition] tuples it
+     * needs to steal in order to be brought back to normal state
+     * 
+     * @param restoringNode The id of the node which needs to be restored
+     * @param cluster The cluster definition
+     * @param storeDef The store definition to use
+     * @param zoneId zone from which nodes are chosen, -1 means no zone
+     *        preference
+     * @return Map of node id to map of replica type and corresponding partition
+     *         list
+     */
+    public Map<Integer, HashMap<Integer, List<Integer>>> getReplicationMapping(int restoringNode,
+                                                                               Cluster cluster,
+                                                                               StoreDefinition storeDef,
+                                                                               int zoneId) {
 
-        Map<Integer, Integer> partitionToNodeId = RebalanceUtils.getCurrentPartitionMapping(cluster);
         Map<Integer, HashMap<Integer, List<Integer>>> returnMap = Maps.newHashMap();
 
         RoutingStrategy strategy = new RoutingStrategyFactory().updateRoutingStrategy(storeDef,
@@ -731,28 +933,12 @@ public Map<Integer, HashMap<Integer, List<Integer>>> getReplicationMapping(int r
                                                      + "being left in replicating list");
                     }
 
-                    // Pick the first element and find its position in the
-                    // origin replicating list
-                    int replicaType = extraCopyReplicatingPartitions.indexOf(replicatingPartitions.get(0));
-                    int partition = extraCopyReplicatingPartitions.get(0);
-                    int nodeId = partitionToNodeId.get(replicatingPartitions.get(0));
-
-                    HashMap<Integer, List<Integer>> replicaToPartitionList = null;
-                    if(returnMap.containsKey(nodeId)) {
-                        replicaToPartitionList = returnMap.get(nodeId);
-                    } else {
-                        replicaToPartitionList = Maps.newHashMap();
-                        returnMap.put(nodeId, replicaToPartitionList);
-                    }
-
-                    List<Integer> partitions = null;
-                    if(replicaToPartitionList.containsKey(replicaType)) {
-                        partitions = replicaToPartitionList.get(replicaType);
-                    } else {
-                        partitions = Lists.newArrayList();
-                        replicaToPartitionList.put(replicaType, partitions);
-                    }
-                    partitions.add(partition);
+                    addDonorWithZonePreference(replicatingPartitions,
+                                               extraCopyReplicatingPartitions,
+                                               returnMap,
+                                               zoneId,
+                                               cluster,
+                                               storeDef);
                 }
 
             }
@@ -760,6 +946,70 @@ public Map<Integer, HashMap<Integer, List<Integer>>> getReplicationMapping(int r
         return returnMap;
     }
 
+    /**
+     * For each partition that need to be restored, find a donor node that owns
+     * the partition AND has the same zone ID as requested. -1 means no zone
+     * preference required when finding a donor node needs to steal in order to
+     * 
+     * @param remainderPartitions The replicating partitions without the one
+     *        needed by the restore node
+     * @param originalPartitions The entire replicating partition list
+     *        (including the one needed by the restore node)
+     * @param donorMap All donor nodes that will be fetched from
+     * @param zondId The zone from which donor nodes will be chosen from; -1
+     *        means all zones are fine
+     * @param cluster The cluster metadata
+     * @param storeDef The store to be restored
+     * @return
+     */
+    private void addDonorWithZonePreference(List<Integer> remainderPartitions,
+                                            List<Integer> originalPartitions,
+                                            Map<Integer, HashMap<Integer, List<Integer>>> donorMap,
+                                            int zoneId,
+                                            Cluster cluster,
+                                            StoreDefinition storeDef) {
+        Map<Integer, Integer> partitionToNodeId = RebalanceUtils.getCurrentPartitionMapping(cluster);
+        int nodeId = -1;
+        int replicaType = -1;
+        int partition = -1;
+        boolean found = false;
+        int index = 0;
+
+        while(!found && index < remainderPartitions.size()) {
+            replicaType = originalPartitions.indexOf(remainderPartitions.get(index));
+            nodeId = partitionToNodeId.get(remainderPartitions.get(index));
+            if(-1 == zoneId || cluster.getNodeById(nodeId).getZoneId() == zoneId) {
+                found = true;
+            } else {
+                index++;
+            }
+        }
+
+        if(!found) {
+            throw new VoldemortException("unable to find a node to fetch partition " + partition
+                                         + " of replica type " + replicaType + " for store "
+                                         + storeDef.getName());
+        }
+
+        partition = originalPartitions.get(0);
+        HashMap<Integer, List<Integer>> replicaToPartitionList = null;
+        if(donorMap.containsKey(nodeId)) {
+            replicaToPartitionList = donorMap.get(nodeId);
+        } else {
+            replicaToPartitionList = Maps.newHashMap();
+            donorMap.put(nodeId, replicaToPartitionList);
+        }
+
+        List<Integer> partitions = null;
+        if(replicaToPartitionList.containsKey(replicaType)) {
+            partitions = replicaToPartitionList.get(replicaType);
+        } else {
+            partitions = Lists.newArrayList();
+            replicaToPartitionList.put(replicaType, partitions);
+        }
+        partitions.add(partition);
+    }
+
     /**
      * For a particular store and node, runs the replication job. This works
      * only for read-write stores
@@ -772,13 +1022,15 @@ public Map<Integer, HashMap<Integer, List<Integer>>> getReplicationMapping(int r
     private void restoreStoreFromReplication(final int restoringNodeId,
                                              final Cluster cluster,
                                              final StoreDefinition storeDef,
-                                             final ExecutorService executorService) {
+                                             final ExecutorService executorService,
+                                             final int zoneId) {
         logger.info("Restoring data for store " + storeDef.getName() + " on node "
                     + restoringNodeId);
 
         Map<Integer, HashMap<Integer, List<Integer>>> restoreMapping = getReplicationMapping(restoringNodeId,
                                                                                              cluster,
-                                                                                             storeDef);
+                                                                                             storeDef,
+                                                                                             zoneId);
         // migrate partition
         for(final Entry<Integer, HashMap<Integer, List<Integer>>> replicationEntry: restoreMapping.entrySet()) {
             final int donorNodeId = replicationEntry.getKey();
@@ -797,6 +1049,7 @@ public void run() {
                                                                null,
                                                                null,
                                                                false);
+
                         waitForCompletion(restoringNodeId,
                                           migrateAsyncId,
                                           adminClientConfig.getRestoreDataTimeoutSec(),
@@ -877,9 +1130,9 @@ public int rebalanceNode(RebalancePartitionsInfo stealInfo) {
     private HashMap<Integer, List<Integer>> getReplicaToPartitionMap(int nodeId,
                                                                      String storeName,
                                                                      List<Integer> partitions) {
-
-        StoreDefinition def = RebalanceUtils.getStoreDefinitionWithName(getRemoteStoreDefList(nodeId).getValue(),
-                                                                        storeName);
+        List<StoreDefinition> allStoreDefs = getRemoteStoreDefList(nodeId).getValue();
+        allStoreDefs.addAll(SystemStoreConstants.getAllSystemStoreDefs());
+        StoreDefinition def = RebalanceUtils.getStoreDefinitionWithName(allStoreDefs, storeName);
         HashMap<Integer, List<Integer>> replicaToPartitionList = Maps.newHashMap();
         for(int replicaNum = 0; replicaNum < def.getReplicationFactor(); replicaNum++) {
             replicaToPartitionList.put(replicaNum, partitions);
@@ -1367,6 +1620,44 @@ public void updateRemoteMetadata(int remoteNodeId, String key, Versioned<String>
             throwException(response.getError());
     }
 
+    /**
+     * Wrapper for updateRemoteMetadata function used against a single Node It
+     * basically loops over the entire list of Nodes that we need to execute the
+     * required operation against. It also increments the version of the
+     * corresponding metadata in the system store.
+     * <p>
+     * 
+     * Metadata keys can be one of {@link MetadataStore#METADATA_KEYS}<br>
+     * eg.<br>
+     * <li>cluster metadata (cluster.xml as string)
+     * <li>stores definitions (stores.xml as string)
+     * <li>Server states <br <br>
+     * See {@link voldemort.store.metadata.MetadataStore} for more information.
+     * 
+     * @param remoteNodeId Id of the node
+     * @param key Metadata key to update
+     * @param value Value for the metadata key
+     * 
+     * */
+    public void updateRemoteMetadata(List<Integer> remoteNodeIds,
+                                     String key,
+                                     Versioned<String> value) {
+        for(Integer currentNodeId: remoteNodeIds) {
+            System.out.println("Setting " + key + " for "
+                               + getAdminClientCluster().getNodeById(currentNodeId).getHost() + ":"
+                               + getAdminClientCluster().getNodeById(currentNodeId).getId());
+            updateRemoteMetadata(currentNodeId, key, value);
+        }
+
+        /*
+         * Assuming everything is fine, we now increment the metadata version
+         * for the key
+         */
+        if(key.equals(CLUSTER_VERSION_KEY)) {
+            updateMetadataversion(key);
+        }
+    }
+
     /**
      * Get the metadata on a remote node.
      * <p>
@@ -2203,6 +2494,18 @@ public void rebalanceStateChange(Cluster existingCluster,
                 throw new VoldemortRebalancingException("Got exceptions from nodes "
                                                         + exceptions.keySet());
             }
+
+            /*
+             * If everything went smoothly, update the version of the cluster
+             * metadata
+             */
+            if(changeClusterMetadata) {
+                try {
+                    updateMetadataversion(CLUSTER_VERSION_KEY);
+                } catch(Exception e) {
+                    logger.info("Exception occurred while setting cluster metadata version during Rebalance state change !!!");
+                }
+            }
         } catch(Exception e) {
 
             if(rollback) {
@@ -2363,4 +2666,42 @@ public void nativeBackup(int nodeId,
         int asyncId = response.getRequestId();
         waitForCompletion(nodeId, asyncId, timeOut, TimeUnit.MINUTES);
     }
+
+    /**
+     * Reserve memory for the stores
+     * 
+     * @param nodeId The node id to reserve, -1 for entire cluster
+     * @param stores list of stores for which to reserve
+     * @param sizeInMB size of reservation
+     */
+    public void reserveMemory(int nodeId, List<String> stores, long sizeInMB) {
+
+        List<Integer> reserveNodes = new ArrayList<Integer>();
+        if(nodeId == -1) {
+            // if no node is specified send it to the entire cluster
+            for(Node node: currentCluster.getNodes())
+                reserveNodes.add(node.getId());
+        } else {
+            reserveNodes.add(nodeId);
+        }
+        for(String storeName: stores) {
+            for(Integer reserveNodeId: reserveNodes) {
+
+                VAdminProto.ReserveMemoryRequest reserveRequest = VAdminProto.ReserveMemoryRequest.newBuilder()
+                                                                                                  .setStoreName(storeName)
+                                                                                                  .setSizeInMb(sizeInMB)
+                                                                                                  .build();
+                VAdminProto.VoldemortAdminRequest adminRequest = VAdminProto.VoldemortAdminRequest.newBuilder()
+                                                                                                  .setReserveMemory(reserveRequest)
+                                                                                                  .setType(VAdminProto.AdminRequestType.RESERVE_MEMORY)
+                                                                                                  .build();
+                VAdminProto.ReserveMemoryResponse.Builder response = sendAndReceive(reserveNodeId,
+                                                                                    adminRequest,
+                                                                                    VAdminProto.ReserveMemoryResponse.newBuilder());
+                if(response.hasError())
+                    throwException(response.getError());
+            }
+            logger.info("Finished reserving memory for store : " + storeName);
+        }
+    }
 }
diff --git a/src/java/voldemort/client/protocol/admin/SocketPool.java b/src/java/voldemort/client/protocol/admin/SocketPool.java
index 2b7e00089c..1ddbf52646 100644
--- a/src/java/voldemort/client/protocol/admin/SocketPool.java
+++ b/src/java/voldemort/client/protocol/admin/SocketPool.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2009 LinkedIn, Inc
+ * Copyright 2008-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -126,7 +126,7 @@ public void checkin(SocketDestination destination, SocketAndStreams socket) {
 
     public void close(SocketDestination destination) {
         socketFactory.setLastClosedTimestamp(destination);
-        pool.close(destination);
+        pool.reset(destination);
     }
 
     /**
diff --git a/src/java/voldemort/client/protocol/pb/VAdminProto.java b/src/java/voldemort/client/protocol/pb/VAdminProto.java
index 419cbe9e9a..4fed345bc4 100644
--- a/src/java/voldemort/client/protocol/pb/VAdminProto.java
+++ b/src/java/voldemort/client/protocol/pb/VAdminProto.java
@@ -37,6 +37,7 @@ public enum AdminRequestType
     INITIATE_REBALANCE_NODE_ON_DONOR(24, 26),
     DELETE_STORE_REBALANCE_STATE(25, 27),
     NATIVE_BACKUP(26, 28),
+    RESERVE_MEMORY(27, 29),
     ;
     
     
@@ -71,6 +72,7 @@ public static AdminRequestType valueOf(int value) {
         case 26: return INITIATE_REBALANCE_NODE_ON_DONOR;
         case 27: return DELETE_STORE_REBALANCE_STATE;
         case 28: return NATIVE_BACKUP;
+        case 29: return RESERVE_MEMORY;
         default: return null;
       }
     }
@@ -101,7 +103,7 @@ public AdminRequestType findValueByNumber(int number) {
     }
     
     private static final AdminRequestType[] VALUES = {
-      GET_METADATA, UPDATE_METADATA, UPDATE_PARTITION_ENTRIES, FETCH_PARTITION_ENTRIES, DELETE_PARTITION_ENTRIES, INITIATE_FETCH_AND_UPDATE, ASYNC_OPERATION_STATUS, INITIATE_REBALANCE_NODE, ASYNC_OPERATION_STOP, ASYNC_OPERATION_LIST, TRUNCATE_ENTRIES, ADD_STORE, DELETE_STORE, FETCH_STORE, SWAP_STORE, ROLLBACK_STORE, GET_RO_MAX_VERSION_DIR, GET_RO_CURRENT_VERSION_DIR, FETCH_PARTITION_FILES, UPDATE_SLOP_ENTRIES, FAILED_FETCH_STORE, GET_RO_STORAGE_FORMAT, REBALANCE_STATE_CHANGE, REPAIR_JOB, INITIATE_REBALANCE_NODE_ON_DONOR, DELETE_STORE_REBALANCE_STATE, NATIVE_BACKUP, 
+      GET_METADATA, UPDATE_METADATA, UPDATE_PARTITION_ENTRIES, FETCH_PARTITION_ENTRIES, DELETE_PARTITION_ENTRIES, INITIATE_FETCH_AND_UPDATE, ASYNC_OPERATION_STATUS, INITIATE_REBALANCE_NODE, ASYNC_OPERATION_STOP, ASYNC_OPERATION_LIST, TRUNCATE_ENTRIES, ADD_STORE, DELETE_STORE, FETCH_STORE, SWAP_STORE, ROLLBACK_STORE, GET_RO_MAX_VERSION_DIR, GET_RO_CURRENT_VERSION_DIR, FETCH_PARTITION_FILES, UPDATE_SLOP_ENTRIES, FAILED_FETCH_STORE, GET_RO_STORAGE_FORMAT, REBALANCE_STATE_CHANGE, REPAIR_JOB, INITIATE_REBALANCE_NODE_ON_DONOR, DELETE_STORE_REBALANCE_STATE, NATIVE_BACKUP, RESERVE_MEMORY, 
     };
     public static AdminRequestType valueOf(
         com.google.protobuf.Descriptors.EnumValueDescriptor desc) {
@@ -19959,6 +19961,646 @@ public Builder clearIncremental() {
     // @@protoc_insertion_point(class_scope:voldemort.NativeBackupRequest)
   }
   
+  public static final class ReserveMemoryRequest extends
+      com.google.protobuf.GeneratedMessage {
+    // Use ReserveMemoryRequest.newBuilder() to construct.
+    private ReserveMemoryRequest() {
+      initFields();
+    }
+    private ReserveMemoryRequest(boolean noInit) {}
+    
+    private static final ReserveMemoryRequest defaultInstance;
+    public static ReserveMemoryRequest getDefaultInstance() {
+      return defaultInstance;
+    }
+    
+    public ReserveMemoryRequest getDefaultInstanceForType() {
+      return defaultInstance;
+    }
+    
+    public static final com.google.protobuf.Descriptors.Descriptor
+        getDescriptor() {
+      return voldemort.client.protocol.pb.VAdminProto.internal_static_voldemort_ReserveMemoryRequest_descriptor;
+    }
+    
+    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+        internalGetFieldAccessorTable() {
+      return voldemort.client.protocol.pb.VAdminProto.internal_static_voldemort_ReserveMemoryRequest_fieldAccessorTable;
+    }
+    
+    // required string store_name = 1;
+    public static final int STORE_NAME_FIELD_NUMBER = 1;
+    private boolean hasStoreName;
+    private java.lang.String storeName_ = "";
+    public boolean hasStoreName() { return hasStoreName; }
+    public java.lang.String getStoreName() { return storeName_; }
+    
+    // required int64 size_in_mb = 2;
+    public static final int SIZE_IN_MB_FIELD_NUMBER = 2;
+    private boolean hasSizeInMb;
+    private long sizeInMb_ = 0L;
+    public boolean hasSizeInMb() { return hasSizeInMb; }
+    public long getSizeInMb() { return sizeInMb_; }
+    
+    private void initFields() {
+    }
+    public final boolean isInitialized() {
+      if (!hasStoreName) return false;
+      if (!hasSizeInMb) return false;
+      return true;
+    }
+    
+    public void writeTo(com.google.protobuf.CodedOutputStream output)
+                        throws java.io.IOException {
+      getSerializedSize();
+      if (hasStoreName()) {
+        output.writeString(1, getStoreName());
+      }
+      if (hasSizeInMb()) {
+        output.writeInt64(2, getSizeInMb());
+      }
+      getUnknownFields().writeTo(output);
+    }
+    
+    private int memoizedSerializedSize = -1;
+    public int getSerializedSize() {
+      int size = memoizedSerializedSize;
+      if (size != -1) return size;
+    
+      size = 0;
+      if (hasStoreName()) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeStringSize(1, getStoreName());
+      }
+      if (hasSizeInMb()) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeInt64Size(2, getSizeInMb());
+      }
+      size += getUnknownFields().getSerializedSize();
+      memoizedSerializedSize = size;
+      return size;
+    }
+    
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest parseFrom(
+        com.google.protobuf.ByteString data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return newBuilder().mergeFrom(data).buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest parseFrom(
+        com.google.protobuf.ByteString data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return newBuilder().mergeFrom(data, extensionRegistry)
+               .buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest parseFrom(byte[] data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return newBuilder().mergeFrom(data).buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest parseFrom(
+        byte[] data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return newBuilder().mergeFrom(data, extensionRegistry)
+               .buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest parseFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return newBuilder().mergeFrom(input).buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest parseFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return newBuilder().mergeFrom(input, extensionRegistry)
+               .buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest parseDelimitedFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      Builder builder = newBuilder();
+      if (builder.mergeDelimitedFrom(input)) {
+        return builder.buildParsed();
+      } else {
+        return null;
+      }
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest parseDelimitedFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      Builder builder = newBuilder();
+      if (builder.mergeDelimitedFrom(input, extensionRegistry)) {
+        return builder.buildParsed();
+      } else {
+        return null;
+      }
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest parseFrom(
+        com.google.protobuf.CodedInputStream input)
+        throws java.io.IOException {
+      return newBuilder().mergeFrom(input).buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest parseFrom(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return newBuilder().mergeFrom(input, extensionRegistry)
+               .buildParsed();
+    }
+    
+    public static Builder newBuilder() { return Builder.create(); }
+    public Builder newBuilderForType() { return newBuilder(); }
+    public static Builder newBuilder(voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest prototype) {
+      return newBuilder().mergeFrom(prototype);
+    }
+    public Builder toBuilder() { return newBuilder(this); }
+    
+    public static final class Builder extends
+        com.google.protobuf.GeneratedMessage.Builder<Builder> {
+      private voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest result;
+      
+      // Construct using voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.newBuilder()
+      private Builder() {}
+      
+      private static Builder create() {
+        Builder builder = new Builder();
+        builder.result = new voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest();
+        return builder;
+      }
+      
+      protected voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest internalGetResult() {
+        return result;
+      }
+      
+      public Builder clear() {
+        if (result == null) {
+          throw new IllegalStateException(
+            "Cannot call clear() after build().");
+        }
+        result = new voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest();
+        return this;
+      }
+      
+      public Builder clone() {
+        return create().mergeFrom(result);
+      }
+      
+      public com.google.protobuf.Descriptors.Descriptor
+          getDescriptorForType() {
+        return voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.getDescriptor();
+      }
+      
+      public voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest getDefaultInstanceForType() {
+        return voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.getDefaultInstance();
+      }
+      
+      public boolean isInitialized() {
+        return result.isInitialized();
+      }
+      public voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest build() {
+        if (result != null && !isInitialized()) {
+          throw newUninitializedMessageException(result);
+        }
+        return buildPartial();
+      }
+      
+      private voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest buildParsed()
+          throws com.google.protobuf.InvalidProtocolBufferException {
+        if (!isInitialized()) {
+          throw newUninitializedMessageException(
+            result).asInvalidProtocolBufferException();
+        }
+        return buildPartial();
+      }
+      
+      public voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest buildPartial() {
+        if (result == null) {
+          throw new IllegalStateException(
+            "build() has already been called on this Builder.");
+        }
+        voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest returnMe = result;
+        result = null;
+        return returnMe;
+      }
+      
+      public Builder mergeFrom(com.google.protobuf.Message other) {
+        if (other instanceof voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest) {
+          return mergeFrom((voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest)other);
+        } else {
+          super.mergeFrom(other);
+          return this;
+        }
+      }
+      
+      public Builder mergeFrom(voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest other) {
+        if (other == voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.getDefaultInstance()) return this;
+        if (other.hasStoreName()) {
+          setStoreName(other.getStoreName());
+        }
+        if (other.hasSizeInMb()) {
+          setSizeInMb(other.getSizeInMb());
+        }
+        this.mergeUnknownFields(other.getUnknownFields());
+        return this;
+      }
+      
+      public Builder mergeFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws java.io.IOException {
+        com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+          com.google.protobuf.UnknownFieldSet.newBuilder(
+            this.getUnknownFields());
+        while (true) {
+          int tag = input.readTag();
+          switch (tag) {
+            case 0:
+              this.setUnknownFields(unknownFields.build());
+              return this;
+            default: {
+              if (!parseUnknownField(input, unknownFields,
+                                     extensionRegistry, tag)) {
+                this.setUnknownFields(unknownFields.build());
+                return this;
+              }
+              break;
+            }
+            case 10: {
+              setStoreName(input.readString());
+              break;
+            }
+            case 16: {
+              setSizeInMb(input.readInt64());
+              break;
+            }
+          }
+        }
+      }
+      
+      
+      // required string store_name = 1;
+      public boolean hasStoreName() {
+        return result.hasStoreName();
+      }
+      public java.lang.String getStoreName() {
+        return result.getStoreName();
+      }
+      public Builder setStoreName(java.lang.String value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  result.hasStoreName = true;
+        result.storeName_ = value;
+        return this;
+      }
+      public Builder clearStoreName() {
+        result.hasStoreName = false;
+        result.storeName_ = getDefaultInstance().getStoreName();
+        return this;
+      }
+      
+      // required int64 size_in_mb = 2;
+      public boolean hasSizeInMb() {
+        return result.hasSizeInMb();
+      }
+      public long getSizeInMb() {
+        return result.getSizeInMb();
+      }
+      public Builder setSizeInMb(long value) {
+        result.hasSizeInMb = true;
+        result.sizeInMb_ = value;
+        return this;
+      }
+      public Builder clearSizeInMb() {
+        result.hasSizeInMb = false;
+        result.sizeInMb_ = 0L;
+        return this;
+      }
+      
+      // @@protoc_insertion_point(builder_scope:voldemort.ReserveMemoryRequest)
+    }
+    
+    static {
+      defaultInstance = new ReserveMemoryRequest(true);
+      voldemort.client.protocol.pb.VAdminProto.internalForceInit();
+      defaultInstance.initFields();
+    }
+    
+    // @@protoc_insertion_point(class_scope:voldemort.ReserveMemoryRequest)
+  }
+  
+  public static final class ReserveMemoryResponse extends
+      com.google.protobuf.GeneratedMessage {
+    // Use ReserveMemoryResponse.newBuilder() to construct.
+    private ReserveMemoryResponse() {
+      initFields();
+    }
+    private ReserveMemoryResponse(boolean noInit) {}
+    
+    private static final ReserveMemoryResponse defaultInstance;
+    public static ReserveMemoryResponse getDefaultInstance() {
+      return defaultInstance;
+    }
+    
+    public ReserveMemoryResponse getDefaultInstanceForType() {
+      return defaultInstance;
+    }
+    
+    public static final com.google.protobuf.Descriptors.Descriptor
+        getDescriptor() {
+      return voldemort.client.protocol.pb.VAdminProto.internal_static_voldemort_ReserveMemoryResponse_descriptor;
+    }
+    
+    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable
+        internalGetFieldAccessorTable() {
+      return voldemort.client.protocol.pb.VAdminProto.internal_static_voldemort_ReserveMemoryResponse_fieldAccessorTable;
+    }
+    
+    // optional .voldemort.Error error = 1;
+    public static final int ERROR_FIELD_NUMBER = 1;
+    private boolean hasError;
+    private voldemort.client.protocol.pb.VProto.Error error_;
+    public boolean hasError() { return hasError; }
+    public voldemort.client.protocol.pb.VProto.Error getError() { return error_; }
+    
+    private void initFields() {
+      error_ = voldemort.client.protocol.pb.VProto.Error.getDefaultInstance();
+    }
+    public final boolean isInitialized() {
+      if (hasError()) {
+        if (!getError().isInitialized()) return false;
+      }
+      return true;
+    }
+    
+    public void writeTo(com.google.protobuf.CodedOutputStream output)
+                        throws java.io.IOException {
+      getSerializedSize();
+      if (hasError()) {
+        output.writeMessage(1, getError());
+      }
+      getUnknownFields().writeTo(output);
+    }
+    
+    private int memoizedSerializedSize = -1;
+    public int getSerializedSize() {
+      int size = memoizedSerializedSize;
+      if (size != -1) return size;
+    
+      size = 0;
+      if (hasError()) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeMessageSize(1, getError());
+      }
+      size += getUnknownFields().getSerializedSize();
+      memoizedSerializedSize = size;
+      return size;
+    }
+    
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse parseFrom(
+        com.google.protobuf.ByteString data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return newBuilder().mergeFrom(data).buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse parseFrom(
+        com.google.protobuf.ByteString data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return newBuilder().mergeFrom(data, extensionRegistry)
+               .buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse parseFrom(byte[] data)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return newBuilder().mergeFrom(data).buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse parseFrom(
+        byte[] data,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws com.google.protobuf.InvalidProtocolBufferException {
+      return newBuilder().mergeFrom(data, extensionRegistry)
+               .buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse parseFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      return newBuilder().mergeFrom(input).buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse parseFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return newBuilder().mergeFrom(input, extensionRegistry)
+               .buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse parseDelimitedFrom(java.io.InputStream input)
+        throws java.io.IOException {
+      Builder builder = newBuilder();
+      if (builder.mergeDelimitedFrom(input)) {
+        return builder.buildParsed();
+      } else {
+        return null;
+      }
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse parseDelimitedFrom(
+        java.io.InputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      Builder builder = newBuilder();
+      if (builder.mergeDelimitedFrom(input, extensionRegistry)) {
+        return builder.buildParsed();
+      } else {
+        return null;
+      }
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse parseFrom(
+        com.google.protobuf.CodedInputStream input)
+        throws java.io.IOException {
+      return newBuilder().mergeFrom(input).buildParsed();
+    }
+    public static voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse parseFrom(
+        com.google.protobuf.CodedInputStream input,
+        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+        throws java.io.IOException {
+      return newBuilder().mergeFrom(input, extensionRegistry)
+               .buildParsed();
+    }
+    
+    public static Builder newBuilder() { return Builder.create(); }
+    public Builder newBuilderForType() { return newBuilder(); }
+    public static Builder newBuilder(voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse prototype) {
+      return newBuilder().mergeFrom(prototype);
+    }
+    public Builder toBuilder() { return newBuilder(this); }
+    
+    public static final class Builder extends
+        com.google.protobuf.GeneratedMessage.Builder<Builder> {
+      private voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse result;
+      
+      // Construct using voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse.newBuilder()
+      private Builder() {}
+      
+      private static Builder create() {
+        Builder builder = new Builder();
+        builder.result = new voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse();
+        return builder;
+      }
+      
+      protected voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse internalGetResult() {
+        return result;
+      }
+      
+      public Builder clear() {
+        if (result == null) {
+          throw new IllegalStateException(
+            "Cannot call clear() after build().");
+        }
+        result = new voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse();
+        return this;
+      }
+      
+      public Builder clone() {
+        return create().mergeFrom(result);
+      }
+      
+      public com.google.protobuf.Descriptors.Descriptor
+          getDescriptorForType() {
+        return voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse.getDescriptor();
+      }
+      
+      public voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse getDefaultInstanceForType() {
+        return voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse.getDefaultInstance();
+      }
+      
+      public boolean isInitialized() {
+        return result.isInitialized();
+      }
+      public voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse build() {
+        if (result != null && !isInitialized()) {
+          throw newUninitializedMessageException(result);
+        }
+        return buildPartial();
+      }
+      
+      private voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse buildParsed()
+          throws com.google.protobuf.InvalidProtocolBufferException {
+        if (!isInitialized()) {
+          throw newUninitializedMessageException(
+            result).asInvalidProtocolBufferException();
+        }
+        return buildPartial();
+      }
+      
+      public voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse buildPartial() {
+        if (result == null) {
+          throw new IllegalStateException(
+            "build() has already been called on this Builder.");
+        }
+        voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse returnMe = result;
+        result = null;
+        return returnMe;
+      }
+      
+      public Builder mergeFrom(com.google.protobuf.Message other) {
+        if (other instanceof voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse) {
+          return mergeFrom((voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse)other);
+        } else {
+          super.mergeFrom(other);
+          return this;
+        }
+      }
+      
+      public Builder mergeFrom(voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse other) {
+        if (other == voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse.getDefaultInstance()) return this;
+        if (other.hasError()) {
+          mergeError(other.getError());
+        }
+        this.mergeUnknownFields(other.getUnknownFields());
+        return this;
+      }
+      
+      public Builder mergeFrom(
+          com.google.protobuf.CodedInputStream input,
+          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+          throws java.io.IOException {
+        com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+          com.google.protobuf.UnknownFieldSet.newBuilder(
+            this.getUnknownFields());
+        while (true) {
+          int tag = input.readTag();
+          switch (tag) {
+            case 0:
+              this.setUnknownFields(unknownFields.build());
+              return this;
+            default: {
+              if (!parseUnknownField(input, unknownFields,
+                                     extensionRegistry, tag)) {
+                this.setUnknownFields(unknownFields.build());
+                return this;
+              }
+              break;
+            }
+            case 10: {
+              voldemort.client.protocol.pb.VProto.Error.Builder subBuilder = voldemort.client.protocol.pb.VProto.Error.newBuilder();
+              if (hasError()) {
+                subBuilder.mergeFrom(getError());
+              }
+              input.readMessage(subBuilder, extensionRegistry);
+              setError(subBuilder.buildPartial());
+              break;
+            }
+          }
+        }
+      }
+      
+      
+      // optional .voldemort.Error error = 1;
+      public boolean hasError() {
+        return result.hasError();
+      }
+      public voldemort.client.protocol.pb.VProto.Error getError() {
+        return result.getError();
+      }
+      public Builder setError(voldemort.client.protocol.pb.VProto.Error value) {
+        if (value == null) {
+          throw new NullPointerException();
+        }
+        result.hasError = true;
+        result.error_ = value;
+        return this;
+      }
+      public Builder setError(voldemort.client.protocol.pb.VProto.Error.Builder builderForValue) {
+        result.hasError = true;
+        result.error_ = builderForValue.build();
+        return this;
+      }
+      public Builder mergeError(voldemort.client.protocol.pb.VProto.Error value) {
+        if (result.hasError() &&
+            result.error_ != voldemort.client.protocol.pb.VProto.Error.getDefaultInstance()) {
+          result.error_ =
+            voldemort.client.protocol.pb.VProto.Error.newBuilder(result.error_).mergeFrom(value).buildPartial();
+        } else {
+          result.error_ = value;
+        }
+        result.hasError = true;
+        return this;
+      }
+      public Builder clearError() {
+        result.hasError = false;
+        result.error_ = voldemort.client.protocol.pb.VProto.Error.getDefaultInstance();
+        return this;
+      }
+      
+      // @@protoc_insertion_point(builder_scope:voldemort.ReserveMemoryResponse)
+    }
+    
+    static {
+      defaultInstance = new ReserveMemoryResponse(true);
+      voldemort.client.protocol.pb.VAdminProto.internalForceInit();
+      defaultInstance.initFields();
+    }
+    
+    // @@protoc_insertion_point(class_scope:voldemort.ReserveMemoryResponse)
+  }
+  
   public static final class VoldemortAdminRequest extends
       com.google.protobuf.GeneratedMessage {
     // Use VoldemortAdminRequest.newBuilder() to construct.
@@ -20182,6 +20824,13 @@ public VoldemortAdminRequest getDefaultInstanceForType() {
     public boolean hasNativeBackup() { return hasNativeBackup; }
     public voldemort.client.protocol.pb.VAdminProto.NativeBackupRequest getNativeBackup() { return nativeBackup_; }
     
+    // optional .voldemort.ReserveMemoryRequest reserve_memory = 31;
+    public static final int RESERVE_MEMORY_FIELD_NUMBER = 31;
+    private boolean hasReserveMemory;
+    private voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest reserveMemory_;
+    public boolean hasReserveMemory() { return hasReserveMemory; }
+    public voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest getReserveMemory() { return reserveMemory_; }
+    
     private void initFields() {
       type_ = voldemort.client.protocol.pb.VAdminProto.AdminRequestType.GET_METADATA;
       getMetadata_ = voldemort.client.protocol.pb.VAdminProto.GetMetadataRequest.getDefaultInstance();
@@ -20211,6 +20860,7 @@ private void initFields() {
       initiateRebalanceNodeOnDonor_ = voldemort.client.protocol.pb.VAdminProto.InitiateRebalanceNodeOnDonorRequest.getDefaultInstance();
       deleteStoreRebalanceState_ = voldemort.client.protocol.pb.VAdminProto.DeleteStoreRebalanceStateRequest.getDefaultInstance();
       nativeBackup_ = voldemort.client.protocol.pb.VAdminProto.NativeBackupRequest.getDefaultInstance();
+      reserveMemory_ = voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.getDefaultInstance();
     }
     public final boolean isInitialized() {
       if (!hasType) return false;
@@ -20283,6 +20933,9 @@ public final boolean isInitialized() {
       if (hasNativeBackup()) {
         if (!getNativeBackup().isInitialized()) return false;
       }
+      if (hasReserveMemory()) {
+        if (!getReserveMemory().isInitialized()) return false;
+      }
       return true;
     }
     
@@ -20373,6 +21026,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output)
       if (hasNativeBackup()) {
         output.writeMessage(30, getNativeBackup());
       }
+      if (hasReserveMemory()) {
+        output.writeMessage(31, getReserveMemory());
+      }
       getUnknownFields().writeTo(output);
     }
     
@@ -20494,6 +21150,10 @@ public int getSerializedSize() {
         size += com.google.protobuf.CodedOutputStream
           .computeMessageSize(30, getNativeBackup());
       }
+      if (hasReserveMemory()) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeMessageSize(31, getReserveMemory());
+      }
       size += getUnknownFields().getSerializedSize();
       memoizedSerializedSize = size;
       return size;
@@ -20736,6 +21396,9 @@ public Builder mergeFrom(voldemort.client.protocol.pb.VAdminProto.VoldemortAdmin
         if (other.hasNativeBackup()) {
           mergeNativeBackup(other.getNativeBackup());
         }
+        if (other.hasReserveMemory()) {
+          mergeReserveMemory(other.getReserveMemory());
+        }
         this.mergeUnknownFields(other.getUnknownFields());
         return this;
       }
@@ -21014,6 +21677,15 @@ public Builder mergeFrom(
               setNativeBackup(subBuilder.buildPartial());
               break;
             }
+            case 250: {
+              voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.Builder subBuilder = voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.newBuilder();
+              if (hasReserveMemory()) {
+                subBuilder.mergeFrom(getReserveMemory());
+              }
+              input.readMessage(subBuilder, extensionRegistry);
+              setReserveMemory(subBuilder.buildPartial());
+              break;
+            }
           }
         }
       }
@@ -22039,6 +22711,43 @@ public Builder clearNativeBackup() {
         return this;
       }
       
+      // optional .voldemort.ReserveMemoryRequest reserve_memory = 31;
+      public boolean hasReserveMemory() {
+        return result.hasReserveMemory();
+      }
+      public voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest getReserveMemory() {
+        return result.getReserveMemory();
+      }
+      public Builder setReserveMemory(voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest value) {
+        if (value == null) {
+          throw new NullPointerException();
+        }
+        result.hasReserveMemory = true;
+        result.reserveMemory_ = value;
+        return this;
+      }
+      public Builder setReserveMemory(voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.Builder builderForValue) {
+        result.hasReserveMemory = true;
+        result.reserveMemory_ = builderForValue.build();
+        return this;
+      }
+      public Builder mergeReserveMemory(voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest value) {
+        if (result.hasReserveMemory() &&
+            result.reserveMemory_ != voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.getDefaultInstance()) {
+          result.reserveMemory_ =
+            voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.newBuilder(result.reserveMemory_).mergeFrom(value).buildPartial();
+        } else {
+          result.reserveMemory_ = value;
+        }
+        result.hasReserveMemory = true;
+        return this;
+      }
+      public Builder clearReserveMemory() {
+        result.hasReserveMemory = false;
+        result.reserveMemory_ = voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.getDefaultInstance();
+        return this;
+      }
+      
       // @@protoc_insertion_point(builder_scope:voldemort.VoldemortAdminRequest)
     }
     
@@ -22326,6 +23035,16 @@ public Builder clearNativeBackup() {
   private static
     com.google.protobuf.GeneratedMessage.FieldAccessorTable
       internal_static_voldemort_NativeBackupRequest_fieldAccessorTable;
+  private static com.google.protobuf.Descriptors.Descriptor
+    internal_static_voldemort_ReserveMemoryRequest_descriptor;
+  private static
+    com.google.protobuf.GeneratedMessage.FieldAccessorTable
+      internal_static_voldemort_ReserveMemoryRequest_fieldAccessorTable;
+  private static com.google.protobuf.Descriptors.Descriptor
+    internal_static_voldemort_ReserveMemoryResponse_descriptor;
+  private static
+    com.google.protobuf.GeneratedMessage.FieldAccessorTable
+      internal_static_voldemort_ReserveMemoryResponse_fieldAccessorTable;
   private static com.google.protobuf.Descriptors.Descriptor
     internal_static_voldemort_VoldemortAdminRequest_descriptor;
   private static
@@ -22463,72 +23182,77 @@ public Builder clearNativeBackup() {
       "\022\037\n\005error\030\001 \001(\0132\020.voldemort.Error\"h\n\023Nat" +
       "iveBackupRequest\022\022\n\nstore_name\030\001 \002(\t\022\022\n\n" +
       "backup_dir\030\002 \002(\t\022\024\n\014verify_files\030\003 \002(\010\022\023" +
-      "\n\013incremental\030\004 \002(\010\"\267\016\n\025VoldemortAdminRe" +
-      "quest\022)\n\004type\030\001 \002(\0162\033.voldemort.AdminReq" +
-      "uestType\0223\n\014get_metadata\030\002 \001(\0132\035.voldemo" +
-      "rt.GetMetadataRequest\0229\n\017update_metadata" +
-      "\030\003 \001(\0132 .voldemort.UpdateMetadataRequest" +
-      "\022J\n\030update_partition_entries\030\004 \001(\0132(.vol" +
-      "demort.UpdatePartitionEntriesRequest\022H\n\027",
-      "fetch_partition_entries\030\005 \001(\0132\'.voldemor" +
-      "t.FetchPartitionEntriesRequest\022J\n\030delete" +
-      "_partition_entries\030\006 \001(\0132(.voldemort.Del" +
-      "etePartitionEntriesRequest\022K\n\031initiate_f" +
-      "etch_and_update\030\007 \001(\0132(.voldemort.Initia" +
-      "teFetchAndUpdateRequest\022F\n\026async_operati" +
-      "on_status\030\010 \001(\0132&.voldemort.AsyncOperati" +
-      "onStatusRequest\022H\n\027initiate_rebalance_no" +
-      "de\030\t \001(\0132\'.voldemort.InitiateRebalanceNo" +
-      "deRequest\022B\n\024async_operation_stop\030\n \001(\0132",
-      "$.voldemort.AsyncOperationStopRequest\022B\n" +
-      "\024async_operation_list\030\013 \001(\0132$.voldemort." +
-      "AsyncOperationListRequest\022;\n\020truncate_en" +
-      "tries\030\014 \001(\0132!.voldemort.TruncateEntriesR" +
-      "equest\022-\n\tadd_store\030\r \001(\0132\032.voldemort.Ad" +
-      "dStoreRequest\0223\n\014delete_store\030\016 \001(\0132\035.vo" +
-      "ldemort.DeleteStoreRequest\0221\n\013fetch_stor" +
-      "e\030\017 \001(\0132\034.voldemort.FetchStoreRequest\022/\n" +
-      "\nswap_store\030\020 \001(\0132\033.voldemort.SwapStoreR" +
-      "equest\0227\n\016rollback_store\030\021 \001(\0132\037.voldemo",
-      "rt.RollbackStoreRequest\022D\n\026get_ro_max_ve" +
-      "rsion_dir\030\022 \001(\0132$.voldemort.GetROMaxVers" +
-      "ionDirRequest\022L\n\032get_ro_current_version_" +
-      "dir\030\023 \001(\0132(.voldemort.GetROCurrentVersio" +
-      "nDirRequest\022D\n\025fetch_partition_files\030\024 \001" +
-      "(\0132%.voldemort.FetchPartitionFilesReques" +
-      "t\022@\n\023update_slop_entries\030\026 \001(\0132#.voldemo" +
-      "rt.UpdateSlopEntriesRequest\022>\n\022failed_fe" +
-      "tch_store\030\030 \001(\0132\".voldemort.FailedFetchS" +
-      "toreRequest\022C\n\025get_ro_storage_format\030\031 \001",
-      "(\0132$.voldemort.GetROStorageFormatRequest" +
-      "\022F\n\026rebalance_state_change\030\032 \001(\0132&.volde" +
-      "mort.RebalanceStateChangeRequest\022/\n\nrepa" +
-      "ir_job\030\033 \001(\0132\033.voldemort.RepairJobReques" +
-      "t\022X\n initiate_rebalance_node_on_donor\030\034 " +
-      "\001(\0132..voldemort.InitiateRebalanceNodeOnD" +
-      "onorRequest\022Q\n\034delete_store_rebalance_st" +
-      "ate\030\035 \001(\0132+.voldemort.DeleteStoreRebalan" +
-      "ceStateRequest\0225\n\rnative_backup\030\036 \001(\0132\036." +
-      "voldemort.NativeBackupRequest*\264\005\n\020AdminR",
-      "equestType\022\020\n\014GET_METADATA\020\000\022\023\n\017UPDATE_M" +
-      "ETADATA\020\001\022\034\n\030UPDATE_PARTITION_ENTRIES\020\002\022" +
-      "\033\n\027FETCH_PARTITION_ENTRIES\020\003\022\034\n\030DELETE_P" +
-      "ARTITION_ENTRIES\020\004\022\035\n\031INITIATE_FETCH_AND" +
-      "_UPDATE\020\005\022\032\n\026ASYNC_OPERATION_STATUS\020\006\022\033\n" +
-      "\027INITIATE_REBALANCE_NODE\020\007\022\030\n\024ASYNC_OPER" +
-      "ATION_STOP\020\010\022\030\n\024ASYNC_OPERATION_LIST\020\t\022\024" +
-      "\n\020TRUNCATE_ENTRIES\020\n\022\r\n\tADD_STORE\020\013\022\020\n\014D" +
-      "ELETE_STORE\020\014\022\017\n\013FETCH_STORE\020\r\022\016\n\nSWAP_S" +
-      "TORE\020\016\022\022\n\016ROLLBACK_STORE\020\017\022\032\n\026GET_RO_MAX",
-      "_VERSION_DIR\020\020\022\036\n\032GET_RO_CURRENT_VERSION" +
-      "_DIR\020\021\022\031\n\025FETCH_PARTITION_FILES\020\022\022\027\n\023UPD" +
-      "ATE_SLOP_ENTRIES\020\024\022\026\n\022FAILED_FETCH_STORE" +
-      "\020\026\022\031\n\025GET_RO_STORAGE_FORMAT\020\027\022\032\n\026REBALAN" +
-      "CE_STATE_CHANGE\020\030\022\016\n\nREPAIR_JOB\020\031\022$\n INI" +
-      "TIATE_REBALANCE_NODE_ON_DONOR\020\032\022 \n\034DELET" +
-      "E_STORE_REBALANCE_STATE\020\033\022\021\n\rNATIVE_BACK" +
-      "UP\020\034B-\n\034voldemort.client.protocol.pbB\013VA" +
-      "dminProtoH\001"
+      "\n\013incremental\030\004 \002(\010\">\n\024ReserveMemoryRequ" +
+      "est\022\022\n\nstore_name\030\001 \002(\t\022\022\n\nsize_in_mb\030\002 " +
+      "\002(\003\"8\n\025ReserveMemoryResponse\022\037\n\005error\030\001 " +
+      "\001(\0132\020.voldemort.Error\"\360\016\n\025VoldemortAdmin" +
+      "Request\022)\n\004type\030\001 \002(\0162\033.voldemort.AdminR" +
+      "equestType\0223\n\014get_metadata\030\002 \001(\0132\035.volde" +
+      "mort.GetMetadataRequest\0229\n\017update_metada",
+      "ta\030\003 \001(\0132 .voldemort.UpdateMetadataReque" +
+      "st\022J\n\030update_partition_entries\030\004 \001(\0132(.v" +
+      "oldemort.UpdatePartitionEntriesRequest\022H" +
+      "\n\027fetch_partition_entries\030\005 \001(\0132\'.voldem" +
+      "ort.FetchPartitionEntriesRequest\022J\n\030dele" +
+      "te_partition_entries\030\006 \001(\0132(.voldemort.D" +
+      "eletePartitionEntriesRequest\022K\n\031initiate" +
+      "_fetch_and_update\030\007 \001(\0132(.voldemort.Init" +
+      "iateFetchAndUpdateRequest\022F\n\026async_opera" +
+      "tion_status\030\010 \001(\0132&.voldemort.AsyncOpera",
+      "tionStatusRequest\022H\n\027initiate_rebalance_" +
+      "node\030\t \001(\0132\'.voldemort.InitiateRebalance" +
+      "NodeRequest\022B\n\024async_operation_stop\030\n \001(" +
+      "\0132$.voldemort.AsyncOperationStopRequest\022" +
+      "B\n\024async_operation_list\030\013 \001(\0132$.voldemor" +
+      "t.AsyncOperationListRequest\022;\n\020truncate_" +
+      "entries\030\014 \001(\0132!.voldemort.TruncateEntrie" +
+      "sRequest\022-\n\tadd_store\030\r \001(\0132\032.voldemort." +
+      "AddStoreRequest\0223\n\014delete_store\030\016 \001(\0132\035." +
+      "voldemort.DeleteStoreRequest\0221\n\013fetch_st",
+      "ore\030\017 \001(\0132\034.voldemort.FetchStoreRequest\022" +
+      "/\n\nswap_store\030\020 \001(\0132\033.voldemort.SwapStor" +
+      "eRequest\0227\n\016rollback_store\030\021 \001(\0132\037.volde" +
+      "mort.RollbackStoreRequest\022D\n\026get_ro_max_" +
+      "version_dir\030\022 \001(\0132$.voldemort.GetROMaxVe" +
+      "rsionDirRequest\022L\n\032get_ro_current_versio" +
+      "n_dir\030\023 \001(\0132(.voldemort.GetROCurrentVers" +
+      "ionDirRequest\022D\n\025fetch_partition_files\030\024" +
+      " \001(\0132%.voldemort.FetchPartitionFilesRequ" +
+      "est\022@\n\023update_slop_entries\030\026 \001(\0132#.volde",
+      "mort.UpdateSlopEntriesRequest\022>\n\022failed_" +
+      "fetch_store\030\030 \001(\0132\".voldemort.FailedFetc" +
+      "hStoreRequest\022C\n\025get_ro_storage_format\030\031" +
+      " \001(\0132$.voldemort.GetROStorageFormatReque" +
+      "st\022F\n\026rebalance_state_change\030\032 \001(\0132&.vol" +
+      "demort.RebalanceStateChangeRequest\022/\n\nre" +
+      "pair_job\030\033 \001(\0132\033.voldemort.RepairJobRequ" +
+      "est\022X\n initiate_rebalance_node_on_donor\030" +
+      "\034 \001(\0132..voldemort.InitiateRebalanceNodeO" +
+      "nDonorRequest\022Q\n\034delete_store_rebalance_",
+      "state\030\035 \001(\0132+.voldemort.DeleteStoreRebal" +
+      "anceStateRequest\0225\n\rnative_backup\030\036 \001(\0132" +
+      "\036.voldemort.NativeBackupRequest\0227\n\016reser" +
+      "ve_memory\030\037 \001(\0132\037.voldemort.ReserveMemor" +
+      "yRequest*\310\005\n\020AdminRequestType\022\020\n\014GET_MET" +
+      "ADATA\020\000\022\023\n\017UPDATE_METADATA\020\001\022\034\n\030UPDATE_P" +
+      "ARTITION_ENTRIES\020\002\022\033\n\027FETCH_PARTITION_EN" +
+      "TRIES\020\003\022\034\n\030DELETE_PARTITION_ENTRIES\020\004\022\035\n" +
+      "\031INITIATE_FETCH_AND_UPDATE\020\005\022\032\n\026ASYNC_OP" +
+      "ERATION_STATUS\020\006\022\033\n\027INITIATE_REBALANCE_N",
+      "ODE\020\007\022\030\n\024ASYNC_OPERATION_STOP\020\010\022\030\n\024ASYNC" +
+      "_OPERATION_LIST\020\t\022\024\n\020TRUNCATE_ENTRIES\020\n\022" +
+      "\r\n\tADD_STORE\020\013\022\020\n\014DELETE_STORE\020\014\022\017\n\013FETC" +
+      "H_STORE\020\r\022\016\n\nSWAP_STORE\020\016\022\022\n\016ROLLBACK_ST" +
+      "ORE\020\017\022\032\n\026GET_RO_MAX_VERSION_DIR\020\020\022\036\n\032GET" +
+      "_RO_CURRENT_VERSION_DIR\020\021\022\031\n\025FETCH_PARTI" +
+      "TION_FILES\020\022\022\027\n\023UPDATE_SLOP_ENTRIES\020\024\022\026\n" +
+      "\022FAILED_FETCH_STORE\020\026\022\031\n\025GET_RO_STORAGE_" +
+      "FORMAT\020\027\022\032\n\026REBALANCE_STATE_CHANGE\020\030\022\016\n\n" +
+      "REPAIR_JOB\020\031\022$\n INITIATE_REBALANCE_NODE_",
+      "ON_DONOR\020\032\022 \n\034DELETE_STORE_REBALANCE_STA" +
+      "TE\020\033\022\021\n\rNATIVE_BACKUP\020\034\022\022\n\016RESERVE_MEMOR" +
+      "Y\020\035B-\n\034voldemort.client.protocol.pbB\013VAd" +
+      "minProtoH\001"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -22975,12 +23699,28 @@ public com.google.protobuf.ExtensionRegistry assignDescriptors(
               new java.lang.String[] { "StoreName", "BackupDir", "VerifyFiles", "Incremental", },
               voldemort.client.protocol.pb.VAdminProto.NativeBackupRequest.class,
               voldemort.client.protocol.pb.VAdminProto.NativeBackupRequest.Builder.class);
-          internal_static_voldemort_VoldemortAdminRequest_descriptor =
+          internal_static_voldemort_ReserveMemoryRequest_descriptor =
             getDescriptor().getMessageTypes().get(55);
+          internal_static_voldemort_ReserveMemoryRequest_fieldAccessorTable = new
+            com.google.protobuf.GeneratedMessage.FieldAccessorTable(
+              internal_static_voldemort_ReserveMemoryRequest_descriptor,
+              new java.lang.String[] { "StoreName", "SizeInMb", },
+              voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.class,
+              voldemort.client.protocol.pb.VAdminProto.ReserveMemoryRequest.Builder.class);
+          internal_static_voldemort_ReserveMemoryResponse_descriptor =
+            getDescriptor().getMessageTypes().get(56);
+          internal_static_voldemort_ReserveMemoryResponse_fieldAccessorTable = new
+            com.google.protobuf.GeneratedMessage.FieldAccessorTable(
+              internal_static_voldemort_ReserveMemoryResponse_descriptor,
+              new java.lang.String[] { "Error", },
+              voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse.class,
+              voldemort.client.protocol.pb.VAdminProto.ReserveMemoryResponse.Builder.class);
+          internal_static_voldemort_VoldemortAdminRequest_descriptor =
+            getDescriptor().getMessageTypes().get(57);
           internal_static_voldemort_VoldemortAdminRequest_fieldAccessorTable = new
             com.google.protobuf.GeneratedMessage.FieldAccessorTable(
               internal_static_voldemort_VoldemortAdminRequest_descriptor,
-              new java.lang.String[] { "Type", "GetMetadata", "UpdateMetadata", "UpdatePartitionEntries", "FetchPartitionEntries", "DeletePartitionEntries", "InitiateFetchAndUpdate", "AsyncOperationStatus", "InitiateRebalanceNode", "AsyncOperationStop", "AsyncOperationList", "TruncateEntries", "AddStore", "DeleteStore", "FetchStore", "SwapStore", "RollbackStore", "GetRoMaxVersionDir", "GetRoCurrentVersionDir", "FetchPartitionFiles", "UpdateSlopEntries", "FailedFetchStore", "GetRoStorageFormat", "RebalanceStateChange", "RepairJob", "InitiateRebalanceNodeOnDonor", "DeleteStoreRebalanceState", "NativeBackup", },
+              new java.lang.String[] { "Type", "GetMetadata", "UpdateMetadata", "UpdatePartitionEntries", "FetchPartitionEntries", "DeletePartitionEntries", "InitiateFetchAndUpdate", "AsyncOperationStatus", "InitiateRebalanceNode", "AsyncOperationStop", "AsyncOperationList", "TruncateEntries", "AddStore", "DeleteStore", "FetchStore", "SwapStore", "RollbackStore", "GetRoMaxVersionDir", "GetRoCurrentVersionDir", "FetchPartitionFiles", "UpdateSlopEntries", "FailedFetchStore", "GetRoStorageFormat", "RebalanceStateChange", "RepairJob", "InitiateRebalanceNodeOnDonor", "DeleteStoreRebalanceState", "NativeBackup", "ReserveMemory", },
               voldemort.client.protocol.pb.VAdminProto.VoldemortAdminRequest.class,
               voldemort.client.protocol.pb.VAdminProto.VoldemortAdminRequest.Builder.class);
           return null;
diff --git a/src/java/voldemort/client/protocol/vold/VoldemortNativeClientRequestFormat.java b/src/java/voldemort/client/protocol/vold/VoldemortNativeClientRequestFormat.java
index 1516c15247..4953f05b0f 100644
--- a/src/java/voldemort/client/protocol/vold/VoldemortNativeClientRequestFormat.java
+++ b/src/java/voldemort/client/protocol/vold/VoldemortNativeClientRequestFormat.java
@@ -29,7 +29,7 @@
 
 import voldemort.VoldemortException;
 import voldemort.client.protocol.RequestFormat;
-import voldemort.serialization.VoldemortOpCode;
+import voldemort.common.VoldemortOpCode;
 import voldemort.server.RequestRoutingType;
 import voldemort.store.ErrorCodeMapper;
 import voldemort.store.StoreUtils;
diff --git a/src/java/voldemort/client/rebalance/RebalanceController.java b/src/java/voldemort/client/rebalance/RebalanceController.java
index f7c7bfb774..ed0a2206a3 100644
--- a/src/java/voldemort/client/rebalance/RebalanceController.java
+++ b/src/java/voldemort/client/rebalance/RebalanceController.java
@@ -718,6 +718,9 @@ private List<RebalanceTask> executeTasks(final int taskId,
             HashMap<Integer, List<RebalancePartitionsInfo>> donorNodeBasedPartitionsInfo = RebalanceUtils.groupPartitionsInfoByNode(rebalancePartitionPlanList,
                                                                                                                                     false);
             for(Entry<Integer, List<RebalancePartitionsInfo>> entries: donorNodeBasedPartitionsInfo.entrySet()) {
+                try {
+		    Thread.sleep(10000);
+                } catch (InterruptedException e) {}
                 DonorBasedRebalanceTask rebalanceTask = new DonorBasedRebalanceTask(taskId,
                                                                                     entries.getValue(),
                                                                                     rebalanceConfig,
diff --git a/src/java/voldemort/client/scheduler/AsyncMetadataVersionManager.java b/src/java/voldemort/client/scheduler/AsyncMetadataVersionManager.java
new file mode 100644
index 0000000000..bca4480e1b
--- /dev/null
+++ b/src/java/voldemort/client/scheduler/AsyncMetadataVersionManager.java
@@ -0,0 +1,204 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.client.scheduler;
+
+import java.util.Properties;
+import java.util.concurrent.Callable;
+
+import org.apache.log4j.Logger;
+
+import voldemort.client.SystemStoreRepository;
+import voldemort.utils.MetadataVersionStoreUtils;
+
+/**
+ * The AsyncMetadataVersionManager is used to track the Metadata version on the
+ * cluster and if necessary Re-bootstrap the client.
+ * 
+ * During initialization, it will retrieve the current version of the
+ * cluster.xml and then periodically check whether this has been updated. During
+ * init if the initial version turns out to be null, it means that no change has
+ * been done to that store since it was created. In this case, we assume version
+ * '0'.
+ * 
+ * At the moment, this only tracks the cluster.xml changes. TODO: Extend this to
+ * track other stuff (like stores.xml)
+ * 
+ * @author csoman
+ * 
+ */
+
+public class AsyncMetadataVersionManager implements Runnable {
+
+    public static final String CLUSTER_VERSION_KEY = "cluster.xml";
+    public static String STORES_VERSION_KEY = "stores.xml";
+    public static final String VERSIONS_METADATA_STORE = "metadata-versions";
+
+    private final Logger logger = Logger.getLogger(this.getClass());
+    private Long currentClusterVersion;
+    private Long currentStoreVersion;
+    private final Callable<Void> storeClientThunk;
+    private final SystemStoreRepository systemStoreRepository;
+    public boolean isActive = false;
+
+    public AsyncMetadataVersionManager(SystemStoreRepository sysRepository,
+                                       Callable<Void> storeClientThunk,
+                                       String storeName) {
+        this.systemStoreRepository = sysRepository;
+
+        if(storeName != null) {
+            STORES_VERSION_KEY = storeName;
+        }
+
+        // Get the properties object from the system store (containing versions)
+        Properties versionProps = MetadataVersionStoreUtils.getProperties(this.systemStoreRepository.getMetadataVersionStore());
+
+        // Initialize base cluster version to do all subsequent comparisons
+        this.currentClusterVersion = initializeVersion(CLUSTER_VERSION_KEY, versionProps);
+
+        // Initialize base store version to do all subsequent comparisons
+        this.currentStoreVersion = initializeVersion(STORES_VERSION_KEY, versionProps);
+
+        logger.debug("Initial cluster.xml version = " + this.currentClusterVersion);
+        logger.debug("Initial store '" + storeName + "' version = " + this.currentClusterVersion);
+
+        this.storeClientThunk = storeClientThunk;
+        this.isActive = true;
+    }
+
+    private Long initializeVersion(String versionKey, Properties versionProps) {
+        Long baseVersion = null;
+        try {
+            baseVersion = getCurrentVersion(versionKey, versionProps);
+        } catch(Exception e) {
+            logger.error("Exception while getting version for key : " + versionKey
+                         + " Exception : " + e);
+        }
+
+        if(baseVersion == null) {
+            baseVersion = new Long(0);
+        }
+        return baseVersion;
+    }
+
+    public Long getCurrentVersion(String versionKey, Properties versionProps) {
+        Long versionValue = null;
+
+        if(versionProps.getProperty(versionKey) != null) {
+            versionValue = Long.parseLong(versionProps.getProperty(versionKey));
+        }
+
+        logger.debug("*********** For key : " + versionKey + " received value = " + versionValue);
+        return versionValue;
+    }
+
+    /*
+     * This method checks for any update in the version for 'versionKey'. If
+     * there is any change, it returns the new version. Otherwise it will return
+     * a null.
+     */
+    public Long fetchNewVersion(String versionKey, Long curVersion, Properties versionProps) {
+        try {
+            Long newVersion = getCurrentVersion(versionKey, versionProps);
+
+            // If version obtained is null, the store is untouched. Continue
+            if(newVersion != null) {
+                logger.debug("MetadataVersion check => Obtained " + versionKey + " version : "
+                             + newVersion);
+
+                /*
+                 * Check if the new version is greater than the current one. We
+                 * should not re-bootstrap on a stale version.
+                 */
+                if(newVersion > curVersion) {
+                    return newVersion;
+                }
+            } else {
+                logger.debug("Metadata unchanged after creation ...");
+            }
+        }
+
+        // Swallow all exceptions here (we dont want to fail the client).
+        catch(Exception e) {
+            logger.debug("Could not retrieve Metadata Version. Exception : " + e);
+        }
+
+        return null;
+    }
+
+    public void run() {
+
+        try {
+            /*
+             * Get the properties object from the system store (containing
+             * versions)
+             */
+            Properties versionProps = MetadataVersionStoreUtils.getProperties(this.systemStoreRepository.getMetadataVersionStore());
+
+            Long newClusterVersion = fetchNewVersion(CLUSTER_VERSION_KEY,
+                                                     this.currentClusterVersion,
+                                                     versionProps);
+            Long newStoreVersion = fetchNewVersion(STORES_VERSION_KEY,
+                                                   this.currentStoreVersion,
+                                                   versionProps);
+
+            // Check if something has been updated
+            if((newClusterVersion != null) || (newStoreVersion != null)) {
+                logger.info("Metadata version mismatch detected. Re-bootstrapping!");
+                try {
+                    if(newClusterVersion != null) {
+                        logger.info("Updating cluster version");
+                        currentClusterVersion = newClusterVersion;
+                    }
+
+                    if(newStoreVersion != null) {
+                        logger.info("Updating store : '" + STORES_VERSION_KEY + "' version");
+                        this.currentStoreVersion = newStoreVersion;
+                    }
+
+                    this.storeClientThunk.call();
+
+                } catch(Exception e) {
+                    if(logger.isDebugEnabled()) {
+                        e.printStackTrace();
+                        logger.debug(e.getMessage());
+                    }
+                }
+            }
+
+        } catch(Exception e) {
+            logger.debug("Could not retrieve metadata versions from the server.");
+        }
+
+    }
+
+    public Long getClusterMetadataVersion() {
+        return this.currentClusterVersion;
+    }
+
+    public Long getStoreMetadataVersion() {
+        return this.currentStoreVersion;
+    }
+
+    // Fetch the latest versions for cluster metadata
+    public void updateMetadataVersions() {
+        Properties versionProps = MetadataVersionStoreUtils.getProperties(this.systemStoreRepository.getMetadataVersionStore());
+        Long newVersion = fetchNewVersion(CLUSTER_VERSION_KEY, null, versionProps);
+        if(newVersion != null) {
+            this.currentClusterVersion = newVersion;
+        }
+    }
+}
diff --git a/src/java/voldemort/client/scheduler/ClientRegistryRefresher.java b/src/java/voldemort/client/scheduler/ClientRegistryRefresher.java
new file mode 100644
index 0000000000..cbffd3bfa9
--- /dev/null
+++ b/src/java/voldemort/client/scheduler/ClientRegistryRefresher.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.client.scheduler;
+
+import org.apache.log4j.Logger;
+
+import voldemort.client.ClientInfo;
+import voldemort.client.SystemStoreRepository;
+import voldemort.versioning.ObsoleteVersionException;
+import voldemort.versioning.Version;
+import voldemort.versioning.Versioned;
+
+/**
+ * An async. job that keeps client registry refreshed while the client is
+ * connected to the cluster
+ * 
+ */
+public class ClientRegistryRefresher implements Runnable {
+
+    private final Logger logger = Logger.getLogger(this.getClass());
+
+    SystemStoreRepository systemStoreRepository;
+    private ClientInfo clientInfo;
+    private final String clientId;
+    private Version lastVersion;
+    private boolean hadConflict;
+
+    public ClientRegistryRefresher(SystemStoreRepository sysRepository,
+                                   String clientId,
+                                   ClientInfo clientInfo,
+                                   Version version) {
+        this.systemStoreRepository = sysRepository;
+        this.clientInfo = clientInfo;
+        this.clientId = clientId;
+        this.lastVersion = version;
+        this.hadConflict = false;
+        logger.info("Initial version obtained from client registry: " + version);
+    }
+
+    /*
+     * Procedure to publish client registry info in the system store.
+     */
+    public synchronized void publishRegistry() {
+        try {
+            if(hadConflict) {
+                /*
+                 * if we previously had a conflict during update, we will try to
+                 * get a newer version before update this time. This case shall
+                 * not happen under regular circumstances. But it is just avoid
+                 * update keeping failing when strange situations occur.
+                 */
+                lastVersion = this.systemStoreRepository.getClientRegistryStore()
+                                                        .getSysStore(clientId)
+                                                        .getVersion();
+                hadConflict = false;
+            }
+            clientInfo.setUpdateTime(System.currentTimeMillis());
+            logger.info("updating client registry with the following info for client: " + clientId
+                        + "\n" + clientInfo);
+            lastVersion = this.systemStoreRepository.getClientRegistryStore()
+                                                    .putSysStore(clientId,
+                                                                 new Versioned<String>(clientInfo.toString(),
+                                                                                       lastVersion));
+        } catch(ObsoleteVersionException e) {
+            Versioned<String> existingValue = this.systemStoreRepository.getClientRegistryStore()
+                                                                        .getSysStore(clientId);
+            logger.warn("Multiple clients are updating the same client registry entry");
+            logger.warn("  current value: " + clientInfo + " " + lastVersion);
+            logger.warn("  existing value: " + existingValue.getValue() + " "
+                        + existingValue.getVersion());
+            hadConflict = true;
+        } catch(Exception e) {
+            logger.warn("encountered the following error while trying to update client registry: "
+                        + e);
+        }
+    }
+
+    public void run() {
+        publishRegistry();
+    }
+}
diff --git a/src/java/voldemort/cluster/Node.java b/src/java/voldemort/cluster/Node.java
index 22b0c3bc29..b1b7e1d4ea 100644
--- a/src/java/voldemort/cluster/Node.java
+++ b/src/java/voldemort/cluster/Node.java
@@ -153,4 +153,10 @@ public int hashCode() {
     public int compareTo(Node other) {
         return Integer.valueOf(this.id).compareTo(other.getId());
     }
+
+    public boolean isEqualState(Node other) {
+        return id == other.getId() && host.equalsIgnoreCase(other.getHost())
+               && httpPort == other.getHttpPort() && socketPort == other.getSocketPort()
+               && adminPort == other.getAdminPort() && zoneId == other.getZoneId();
+    }
 }
\ No newline at end of file
diff --git a/src/java/voldemort/cluster/failuredetector/AbstractFailureDetector.java b/src/java/voldemort/cluster/failuredetector/AbstractFailureDetector.java
index 437b890ea9..ab6c70427e 100644
--- a/src/java/voldemort/cluster/failuredetector/AbstractFailureDetector.java
+++ b/src/java/voldemort/cluster/failuredetector/AbstractFailureDetector.java
@@ -43,7 +43,9 @@ public abstract class AbstractFailureDetector implements FailureDetector {
     // simply a wrapper around a ConcurrentHashMap anyway :(
     protected final ConcurrentHashMap<FailureDetectorListener, Object> listeners;
 
-    protected final Map<Node, NodeStatus> nodeStatusMap;
+    // Maintain the list of nodes and their status by IDs (in order to handle
+    // host swaps)
+    protected final Map<Integer, NodeStatus> idNodeStatusMap;
 
     protected final Logger logger = Logger.getLogger(getClass().getName());
 
@@ -53,11 +55,11 @@ protected AbstractFailureDetector(FailureDetectorConfig failureDetectorConfig) {
 
         this.failureDetectorConfig = failureDetectorConfig;
         listeners = new ConcurrentHashMap<FailureDetectorListener, Object>();
-        nodeStatusMap = new ConcurrentHashMap<Node, NodeStatus>();
+        idNodeStatusMap = new ConcurrentHashMap<Integer, NodeStatus>();
 
-        for(Node node: failureDetectorConfig.getNodes()) {
-            nodeStatusMap.put(node, createNodeStatus(failureDetectorConfig.getTime()
-                                                                          .getMilliseconds()));
+        for(Node node: failureDetectorConfig.getCluster().getNodes()) {
+            idNodeStatusMap.put(node.getId(),
+                                createNodeStatus(failureDetectorConfig.getTime().getMilliseconds()));
         }
     }
 
@@ -91,9 +93,10 @@ public FailureDetectorConfig getConfig() {
     public String getAvailableNodes() {
         List<String> list = new ArrayList<String>();
 
-        for(Node node: getConfig().getNodes())
+        for(Node node: getConfig().getCluster().getNodes()) {
             if(isAvailable(node))
                 list.add(String.valueOf(node.getId()));
+        }
 
         return StringUtils.join(list, ",");
     }
@@ -102,9 +105,10 @@ public String getAvailableNodes() {
     public String getUnavailableNodes() {
         List<String> list = new ArrayList<String>();
 
-        for(Node node: getConfig().getNodes())
+        for(Node node: getConfig().getCluster().getNodes()) {
             if(!isAvailable(node))
                 list.add(String.valueOf(node.getId()));
+        }
 
         return StringUtils.join(list, ",");
     }
@@ -113,16 +117,17 @@ public String getUnavailableNodes() {
     public int getAvailableNodeCount() {
         int available = 0;
 
-        for(Node node: getConfig().getNodes())
+        for(Node node: getConfig().getCluster().getNodes()) {
             if(isAvailable(node))
                 available++;
+        }
 
         return available;
     }
 
     @JmxGetter(name = "nodeCount", description = "The number of total nodes")
     public int getNodeCount() {
-        return getConfig().getNodes().size();
+        return getConfig().getCluster().getNodes().size();
     }
 
     public void waitForAvailability(Node node) throws InterruptedException {
@@ -211,18 +216,19 @@ protected void setUnavailable(Node node, UnreachableStoreException e) {
     }
 
     protected NodeStatus getNodeStatus(Node node) {
-        NodeStatus nodeStatus = nodeStatusMap.get(node);
+        NodeStatus nodeStatus = null;
+        NodeStatus currentNodeStatus = idNodeStatusMap.get(node.getId());
 
-        if(nodeStatus == null) {
-            if(logger.isEnabledFor(Level.WARN))
+        if(currentNodeStatus == null) {
+            if(logger.isEnabledFor(Level.WARN)) {
                 logger.warn("creating new node status for node " + node.getId()
                             + " for failure detector");
+            }
 
             nodeStatus = createNodeStatus(failureDetectorConfig.getTime().getMilliseconds());
-            nodeStatusMap.put(node, nodeStatus);
-            if(!failureDetectorConfig.getNodes().contains(node)) {
-                failureDetectorConfig.addNode(node);
-            }
+            idNodeStatusMap.put(node.getId(), nodeStatus);
+        } else {
+            nodeStatus = currentNodeStatus;
         }
 
         return nodeStatus;
@@ -261,4 +267,27 @@ private boolean setAvailable(NodeStatus nodeStatus, boolean isAvailable) {
         }
     }
 
+    private class CompositeNodeStatus {
+
+        private Node node;
+        private NodeStatus status;
+
+        CompositeNodeStatus(Node node, NodeStatus status) {
+            this.node = node;
+            this.status = status;
+        }
+
+        public void setValues(Node node, NodeStatus status) {
+            this.node = node;
+            this.status = status;
+        }
+
+        public Node getNode() {
+            return this.node;
+        }
+
+        public NodeStatus getStatus() {
+            return this.status;
+        }
+    }
 }
diff --git a/src/java/voldemort/cluster/failuredetector/AsyncRecoveryFailureDetector.java b/src/java/voldemort/cluster/failuredetector/AsyncRecoveryFailureDetector.java
index 4b469bab6c..5bcd1337cc 100644
--- a/src/java/voldemort/cluster/failuredetector/AsyncRecoveryFailureDetector.java
+++ b/src/java/voldemort/cluster/failuredetector/AsyncRecoveryFailureDetector.java
@@ -99,7 +99,7 @@ public void run() {
                 break;
             }
 
-            for(Node node: getConfig().getNodes()) {
+            for(Node node: getConfig().getCluster().getNodes()) {
                 if(isAvailable(node))
                     continue;
 
diff --git a/src/java/voldemort/cluster/failuredetector/BannagePeriodFailureDetector.java b/src/java/voldemort/cluster/failuredetector/BannagePeriodFailureDetector.java
index b8143e1ce7..62966c9dca 100644
--- a/src/java/voldemort/cluster/failuredetector/BannagePeriodFailureDetector.java
+++ b/src/java/voldemort/cluster/failuredetector/BannagePeriodFailureDetector.java
@@ -49,6 +49,7 @@
  */
 
 @JmxManaged(description = "Detects the availability of the nodes on which a Voldemort cluster runs")
+@Deprecated
 public class BannagePeriodFailureDetector extends AbstractFailureDetector {
 
     public BannagePeriodFailureDetector(FailureDetectorConfig failureDetectorConfig) {
@@ -99,7 +100,7 @@ public String getUnavailableNodesBannageExpiration() {
         long bannagePeriod = failureDetectorConfig.getBannagePeriod();
         long currentTime = failureDetectorConfig.getTime().getMilliseconds();
 
-        for(Node node: getConfig().getNodes()) {
+        for(Node node: getConfig().getCluster().getNodes()) {
             if(!isAvailable(node)) {
                 NodeStatus nodeStatus = getNodeStatus(node);
                 long millis = 0;
diff --git a/src/java/voldemort/cluster/failuredetector/BasicStoreVerifier.java b/src/java/voldemort/cluster/failuredetector/BasicStoreVerifier.java
index 13ec8364ab..5d8fbb850f 100644
--- a/src/java/voldemort/cluster/failuredetector/BasicStoreVerifier.java
+++ b/src/java/voldemort/cluster/failuredetector/BasicStoreVerifier.java
@@ -51,4 +51,8 @@ public void verifyStore(Node node) throws UnreachableStoreException, VoldemortEx
         // This is our test.
         store.get(key, null);
     }
+
+    public void flushCachedStores() {
+        this.stores.clear();
+    }
 }
diff --git a/src/java/voldemort/cluster/failuredetector/ClientStoreVerifier.java b/src/java/voldemort/cluster/failuredetector/ClientStoreVerifier.java
index 7b58b16ecb..e04126bbc8 100644
--- a/src/java/voldemort/cluster/failuredetector/ClientStoreVerifier.java
+++ b/src/java/voldemort/cluster/failuredetector/ClientStoreVerifier.java
@@ -61,4 +61,10 @@ public void verifyStore(Node node) throws UnreachableStoreException, VoldemortEx
 
     protected abstract Store<ByteArray, byte[], byte[]> getStoreInternal(Node node);
 
+    public void flushCachedStores() {
+        synchronized(stores) {
+            this.stores.clear();
+        }
+    }
+
 }
diff --git a/src/java/voldemort/cluster/failuredetector/FailureDetectorConfig.java b/src/java/voldemort/cluster/failuredetector/FailureDetectorConfig.java
index 0882a4fb7e..6fc76aa1a1 100644
--- a/src/java/voldemort/cluster/failuredetector/FailureDetectorConfig.java
+++ b/src/java/voldemort/cluster/failuredetector/FailureDetectorConfig.java
@@ -23,8 +23,8 @@
 import java.util.HashSet;
 import java.util.List;
 
-import com.google.common.collect.ImmutableSet;
 import voldemort.client.ClientConfig;
+import voldemort.cluster.Cluster;
 import voldemort.cluster.Node;
 import voldemort.server.VoldemortConfig;
 import voldemort.utils.SystemTime;
@@ -32,6 +32,7 @@
 import voldemort.utils.Utils;
 
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
 
 /**
  * FailureDetectorConfig simply holds all the data that was available to it upon
@@ -83,6 +84,8 @@ public class FailureDetectorConfig {
 
     protected Time time = SystemTime.INSTANCE;
 
+    private Cluster cluster = null;
+
     /**
      * Constructs a new FailureDetectorConfig using all the defaults. This is
      * usually used in the case of unit tests.
@@ -537,6 +540,30 @@ public FailureDetectorConfig setRequestLengthThreshold(long requestLengthThresho
         return this;
     }
 
+    /**
+     * Returns a reference to the cluster object
+     * 
+     * @return Cluster object which determines the source of truth for the
+     *         topology
+     */
+
+    public Cluster getCluster() {
+        return this.cluster;
+    }
+
+    /**
+     * Assigns a cluster which determines the source of truth for the topology
+     * 
+     * @param cluster The Cluster object retrieved during bootstrap; must be
+     *        non-null
+     */
+
+    public FailureDetectorConfig setCluster(Cluster cluster) {
+        Utils.notNull(cluster);
+        this.cluster = cluster;
+        return this;
+    }
+
     /**
      * Returns a list of nodes in the cluster represented by this failure
      * detector configuration.
@@ -544,8 +571,9 @@ public FailureDetectorConfig setRequestLengthThreshold(long requestLengthThresho
      * @return Collection of Node instances, usually determined from the Cluster
      */
 
+    @Deprecated
     public synchronized Collection<Node> getNodes() {
-        return ImmutableSet.copyOf(nodes);
+        return ImmutableSet.copyOf(this.cluster.getNodes());
     }
 
     /**
@@ -556,6 +584,7 @@ public synchronized Collection<Node> getNodes() {
      *        Cluster; must be non-null
      */
 
+    @Deprecated
     public synchronized FailureDetectorConfig setNodes(Collection<Node> nodes) {
         Utils.notNull(nodes);
         this.nodes = new HashSet<Node>(nodes);
@@ -567,6 +596,11 @@ public synchronized void addNode(Node node) {
         nodes.add(node);
     }
 
+    public synchronized void removeNode(Node node) {
+        Utils.notNull(node);
+        nodes.remove(node);
+    }
+
     public StoreVerifier getStoreVerifier() {
         return storeVerifier;
     }
diff --git a/src/java/voldemort/cluster/failuredetector/ServerStoreVerifier.java b/src/java/voldemort/cluster/failuredetector/ServerStoreVerifier.java
index 7054653380..c2f1a9ef7b 100644
--- a/src/java/voldemort/cluster/failuredetector/ServerStoreVerifier.java
+++ b/src/java/voldemort/cluster/failuredetector/ServerStoreVerifier.java
@@ -80,4 +80,8 @@ public void verifyStore(Node node) throws UnreachableStoreException, VoldemortEx
         store.get(KEY, null);
     }
 
+    public void flushCachedStores() {
+        this.stores.clear();
+    }
+
 }
diff --git a/src/java/voldemort/cluster/failuredetector/StoreVerifier.java b/src/java/voldemort/cluster/failuredetector/StoreVerifier.java
index f79307614d..86bc1d9c16 100644
--- a/src/java/voldemort/cluster/failuredetector/StoreVerifier.java
+++ b/src/java/voldemort/cluster/failuredetector/StoreVerifier.java
@@ -48,4 +48,9 @@ public interface StoreVerifier {
 
     public void verifyStore(Node node) throws UnreachableStoreException, VoldemortException;
 
+    /**
+     * Flushes the cached stores if any
+     */
+    public void flushCachedStores();
+
 }
diff --git a/src/java/voldemort/cluster/failuredetector/ThresholdFailureDetector.java b/src/java/voldemort/cluster/failuredetector/ThresholdFailureDetector.java
index db1e00a395..f577892fa7 100644
--- a/src/java/voldemort/cluster/failuredetector/ThresholdFailureDetector.java
+++ b/src/java/voldemort/cluster/failuredetector/ThresholdFailureDetector.java
@@ -96,7 +96,7 @@ public void recordSuccess(Node node, long requestTime) {
     public String getNodeThresholdStats() {
         List<String> list = new ArrayList<String>();
 
-        for(Node node: getConfig().getNodes()) {
+        for(Node node: getConfig().getCluster().getNodes()) {
             NodeStatus nodeStatus = getNodeStatus(node);
             boolean isAvailabile = false;
             long percentage = 0;
diff --git a/src/java/voldemort/common/OpTimeMap.java b/src/java/voldemort/common/OpTimeMap.java
new file mode 100644
index 0000000000..dc5f88fbfa
--- /dev/null
+++ b/src/java/voldemort/common/OpTimeMap.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package voldemort.common;
+
+import java.util.HashMap;
+
+/**
+ * Encapsulates time to Voldemort operation mapping
+ * 
+ */
+public class OpTimeMap {
+
+    private HashMap<Byte, Long> timeMap;
+
+    public OpTimeMap(long time) {
+        this(time, time, time, time, time);
+    }
+
+    public OpTimeMap(long getTime,
+                     long putTime,
+                     long deleteTime,
+                     long getAllTime,
+                     long getVersionsTime) {
+        timeMap = new HashMap<Byte, Long>();
+        timeMap.put(VoldemortOpCode.GET_OP_CODE, getTime);
+        timeMap.put(VoldemortOpCode.PUT_OP_CODE, putTime);
+        timeMap.put(VoldemortOpCode.DELETE_OP_CODE, deleteTime);
+        timeMap.put(VoldemortOpCode.GET_ALL_OP_CODE, getAllTime);
+        timeMap.put(VoldemortOpCode.GET_VERSION_OP_CODE, getVersionsTime);
+    }
+
+    public long getOpTime(Byte opCode) {
+        assert timeMap.containsKey(opCode);
+        return timeMap.get(opCode);
+    }
+
+    public void setOpTime(Byte opCode, long time) {
+        timeMap.put(opCode, time);
+    }
+}
diff --git a/src/java/voldemort/serialization/VoldemortOpCode.java b/src/java/voldemort/common/VoldemortOpCode.java
similarity index 97%
rename from src/java/voldemort/serialization/VoldemortOpCode.java
rename to src/java/voldemort/common/VoldemortOpCode.java
index e07b6bd09e..a38e103125 100644
--- a/src/java/voldemort/serialization/VoldemortOpCode.java
+++ b/src/java/voldemort/common/VoldemortOpCode.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package voldemort.serialization;
+package voldemort.common;
 
 public class VoldemortOpCode {
 
diff --git a/src/java/voldemort/server/AbstractService.java b/src/java/voldemort/common/service/AbstractService.java
similarity index 98%
rename from src/java/voldemort/server/AbstractService.java
rename to src/java/voldemort/common/service/AbstractService.java
index 63bd46cbbf..222faed014 100644
--- a/src/java/voldemort/server/AbstractService.java
+++ b/src/java/voldemort/common/service/AbstractService.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package voldemort.server;
+package voldemort.common.service;
 
 import java.util.concurrent.atomic.AtomicBoolean;
 
diff --git a/src/java/voldemort/server/scheduler/SchedulerService.java b/src/java/voldemort/common/service/SchedulerService.java
similarity index 88%
rename from src/java/voldemort/server/scheduler/SchedulerService.java
rename to src/java/voldemort/common/service/SchedulerService.java
index 110fff97e1..645be4015d 100644
--- a/src/java/voldemort/server/scheduler/SchedulerService.java
+++ b/src/java/voldemort/common/service/SchedulerService.java
@@ -14,7 +14,7 @@
  * the License.
  */
 
-package voldemort.server.scheduler;
+package voldemort.common.service;
 
 import java.util.Date;
 import java.util.List;
@@ -31,9 +31,6 @@
 import voldemort.annotations.jmx.JmxGetter;
 import voldemort.annotations.jmx.JmxManaged;
 import voldemort.annotations.jmx.JmxOperation;
-import voldemort.server.AbstractService;
-import voldemort.server.ServiceType;
-import voldemort.server.VoldemortService;
 import voldemort.utils.Time;
 
 import com.google.common.collect.Lists;
@@ -48,6 +45,7 @@
 public class SchedulerService extends AbstractService {
 
     private static final Logger logger = Logger.getLogger(VoldemortService.class);
+    private boolean mayInterrupt;
 
     private class ScheduledRunnable {
 
@@ -85,11 +83,16 @@ long getIntervalMs() {
     private final ConcurrentHashMap<String, ScheduledRunnable> allJobs;
 
     public SchedulerService(int schedulerThreads, Time time) {
+        this(schedulerThreads, time, true);
+    }
+
+    public SchedulerService(int schedulerThreads, Time time, boolean mayInterrupt) {
         super(ServiceType.SCHEDULER);
         this.time = time;
         this.scheduler = new SchedulerThreadPool(schedulerThreads);
         this.scheduledJobResults = new ConcurrentHashMap<String, ScheduledFuture>();
         this.allJobs = new ConcurrentHashMap<String, ScheduledRunnable>();
+        this.mayInterrupt = mayInterrupt;
     }
 
     @Override
@@ -112,6 +115,18 @@ public void disable(String id) {
         }
     }
 
+    @JmxOperation(description = "Terminate a particular scheduled job", impact = MBeanOperationInfo.ACTION)
+    public void terminate(String id) {
+        if(allJobs.containsKey(id) && scheduledJobResults.containsKey(id)) {
+            ScheduledFuture<?> future = scheduledJobResults.get(id);
+            boolean cancelled = future.cancel(this.mayInterrupt);
+            if(cancelled == true) {
+                logger.info("Removed '" + id + "' from list of scheduled jobs");
+                scheduledJobResults.remove(id);
+            }
+        }
+    }
+
     @JmxOperation(description = "Enable a particular scheduled job", impact = MBeanOperationInfo.ACTION)
     public void enable(String id) {
         if(allJobs.containsKey(id) && !scheduledJobResults.containsKey(id)) {
diff --git a/src/java/voldemort/server/ServiceType.java b/src/java/voldemort/common/service/ServiceType.java
similarity index 94%
rename from src/java/voldemort/server/ServiceType.java
rename to src/java/voldemort/common/service/ServiceType.java
index c2a6a47646..0fe7c7932b 100644
--- a/src/java/voldemort/server/ServiceType.java
+++ b/src/java/voldemort/common/service/ServiceType.java
@@ -1,4 +1,4 @@
-package voldemort.server;
+package voldemort.common.service;
 
 /**
  * The various types of voldemort services
diff --git a/src/java/voldemort/server/VoldemortService.java b/src/java/voldemort/common/service/VoldemortService.java
similarity index 96%
rename from src/java/voldemort/server/VoldemortService.java
rename to src/java/voldemort/common/service/VoldemortService.java
index c85726366b..9a4fffe92f 100644
--- a/src/java/voldemort/server/VoldemortService.java
+++ b/src/java/voldemort/common/service/VoldemortService.java
@@ -14,7 +14,8 @@
  * the License.
  */
 
-package voldemort.server;
+package voldemort.common.service;
+
 
 /**
  * A service that runs in the voldemort server
diff --git a/src/java/voldemort/routing/RouteToAllLocalPrefStrategy.java b/src/java/voldemort/routing/RouteToAllLocalPrefStrategy.java
new file mode 100644
index 0000000000..f721312e7f
--- /dev/null
+++ b/src/java/voldemort/routing/RouteToAllLocalPrefStrategy.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.routing;
+
+import java.util.Collection;
+
+import voldemort.cluster.Node;
+
+/**
+ * A class that denotes a route to all strategy with local preference. This
+ * class is meant to be consistent with the routing hierarchy convention. It
+ * simply returns the list of all nodes (just like RouteToAllStrategy) but is
+ * used to indicate that extra processing will be done down the pipeline.
+ * 
+ * @author csoman
+ * 
+ */
+public class RouteToAllLocalPrefStrategy extends RouteToAllStrategy {
+
+    public RouteToAllLocalPrefStrategy(Collection<Node> nodes) {
+        super(nodes);
+    }
+
+    @Override
+    public String getType() {
+        return RoutingStrategyType.TO_ALL_LOCAL_PREF_STRATEGY;
+    }
+}
diff --git a/src/java/voldemort/routing/RoutingStrategyFactory.java b/src/java/voldemort/routing/RoutingStrategyFactory.java
index c8b8fe0158..b4901ba40c 100644
--- a/src/java/voldemort/routing/RoutingStrategyFactory.java
+++ b/src/java/voldemort/routing/RoutingStrategyFactory.java
@@ -24,6 +24,8 @@ public RoutingStrategy updateRoutingStrategy(StoreDefinition storeDef, Cluster c
             return new ZoneRoutingStrategy(cluster.getNodes(),
                                            storeDef.getZoneReplicationFactor(),
                                            storeDef.getReplicationFactor());
+        } else if(RoutingStrategyType.TO_ALL_LOCAL_PREF_STRATEGY.equals(storeDef.getRoutingStrategyType())) {
+            return new RouteToAllLocalPrefStrategy(cluster.getNodes());
         } else {
             throw new VoldemortException("RoutingStrategyType:" + storeDef.getRoutingStrategyType()
                                          + " not handled by " + this.getClass());
diff --git a/src/java/voldemort/routing/RoutingStrategyType.java b/src/java/voldemort/routing/RoutingStrategyType.java
index 92ead1b1ae..60c1a49733 100644
--- a/src/java/voldemort/routing/RoutingStrategyType.java
+++ b/src/java/voldemort/routing/RoutingStrategyType.java
@@ -10,6 +10,7 @@ public class RoutingStrategyType {
     public final static String CONSISTENT_STRATEGY = "consistent-routing";
     public final static String TO_ALL_STRATEGY = "all-routing";
     public final static String ZONE_STRATEGY = "zone-routing";
+    public final static String TO_ALL_LOCAL_PREF_STRATEGY = "local-pref-all-routing";
 
     private final String name;
 
diff --git a/src/java/voldemort/serialization/DefaultSerializerFactory.java b/src/java/voldemort/serialization/DefaultSerializerFactory.java
index 0f4434605e..8e1b87cff3 100644
--- a/src/java/voldemort/serialization/DefaultSerializerFactory.java
+++ b/src/java/voldemort/serialization/DefaultSerializerFactory.java
@@ -25,6 +25,7 @@
 import voldemort.serialization.avro.AvroGenericSerializer;
 import voldemort.serialization.avro.AvroReflectiveSerializer;
 import voldemort.serialization.avro.AvroSpecificSerializer;
+import voldemort.serialization.avro.versioned.AvroVersionedGenericSerializer;
 import voldemort.serialization.json.JsonTypeDefinition;
 import voldemort.serialization.json.JsonTypeSerializer;
 import voldemort.serialization.protobuf.ProtoBufSerializer;
@@ -50,6 +51,12 @@ public class DefaultSerializerFactory implements SerializerFactory {
     private static final String AVRO_SPECIFIC_TYPE_NAME = "avro-specific";
     private static final String AVRO_REFLECTIVE_TYPE_NAME = "avro-reflective";
 
+    // New serialization types for avro versioning support
+    // We cannot change existing serializer classes since
+    // this will break existing clients while looking for the version byte
+
+    private static final String AVRO_GENERIC_VERSIONED_TYPE_NAME = "avro-generic-versioned";
+
     public Serializer<?> getSerializer(SerializerDefinition serializerDef) {
         String name = serializerDef.getName();
         if(name.equals(JAVA_SERIALIZER_TYPE_NAME)) {
@@ -72,13 +79,24 @@ public Serializer<?> getSerializer(SerializerDefinition serializerDef) {
         } else if(name.equals(PROTO_BUF_TYPE_NAME)) {
             return new ProtoBufSerializer<Message>(serializerDef.getCurrentSchemaInfo());
         } else if(name.equals(THRIFT_TYPE_NAME)) {
-            return new ThriftSerializer<TBase<?,?>>(serializerDef.getCurrentSchemaInfo());
+            return new ThriftSerializer<TBase<?, ?>>(serializerDef.getCurrentSchemaInfo());
         } else if(name.equals(AVRO_GENERIC_TYPE_NAME)) {
             return new AvroGenericSerializer(serializerDef.getCurrentSchemaInfo());
         } else if(name.equals(AVRO_SPECIFIC_TYPE_NAME)) {
             return new AvroSpecificSerializer<SpecificRecord>(serializerDef.getCurrentSchemaInfo());
         } else if(name.equals(AVRO_REFLECTIVE_TYPE_NAME)) {
             return new AvroReflectiveSerializer<Object>(serializerDef.getCurrentSchemaInfo());
+        } else if(name.equals(AVRO_GENERIC_VERSIONED_TYPE_NAME)) {
+            if(serializerDef.hasVersion()) {
+                Map<Integer, String> versions = new HashMap<Integer, String>();
+                for(Map.Entry<Integer, String> entry: serializerDef.getAllSchemaInfoVersions()
+                                                                   .entrySet())
+                    versions.put(entry.getKey(), entry.getValue());
+                return new AvroVersionedGenericSerializer(versions);
+            } else {
+                return new AvroVersionedGenericSerializer(serializerDef.getCurrentSchemaInfo());
+            }
+
         } else {
             throw new IllegalArgumentException("No known serializer type: "
                                                + serializerDef.getName());
diff --git a/src/java/voldemort/serialization/VoldemortOperation.java b/src/java/voldemort/serialization/VoldemortOperation.java
index 4549782640..3026b25695 100644
--- a/src/java/voldemort/serialization/VoldemortOperation.java
+++ b/src/java/voldemort/serialization/VoldemortOperation.java
@@ -22,6 +22,7 @@
 import java.io.DataOutputStream;
 import java.io.IOException;
 
+import voldemort.common.VoldemortOpCode;
 import voldemort.utils.ByteUtils;
 import voldemort.versioning.VectorClock;
 import voldemort.versioning.Version;
diff --git a/src/java/voldemort/serialization/avro/versioned/AvroVersionedGenericSerializer.java b/src/java/voldemort/serialization/avro/versioned/AvroVersionedGenericSerializer.java
new file mode 100644
index 0000000000..bedb81fc19
--- /dev/null
+++ b/src/java/voldemort/serialization/avro/versioned/AvroVersionedGenericSerializer.java
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2011 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package voldemort.serialization.avro.versioned;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericContainer;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.Decoder;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.Encoder;
+
+import voldemort.serialization.SerializationException;
+import voldemort.serialization.SerializationUtils;
+import voldemort.serialization.Serializer;
+
+/**
+ * Avro serializer that uses the generic representation for Avro data. This
+ * representation is best for applications which deal with dynamic data, whose
+ * schemas are not known until runtime.
+ * 
+ * This serializer supports schema versioning
+ */
+public class AvroVersionedGenericSerializer implements Serializer<Object> {
+
+    private final SortedMap<Integer, String> typeDefVersions;
+    private final Integer newestVersion;
+
+    // reader's schema
+    private final Schema typeDef;
+
+    /**
+     * Constructor accepting the schema definition as a JSON string.
+     * 
+     * @param schema a serialized JSON object representing a Avro schema.
+     */
+    public AvroVersionedGenericSerializer(String schema) {
+
+        this.typeDefVersions = new TreeMap<Integer, String>();
+        this.typeDefVersions.put(0, schema);
+        newestVersion = typeDefVersions.lastKey();
+        typeDef = Schema.parse(typeDefVersions.get(newestVersion));
+    }
+
+    public AvroVersionedGenericSerializer(Map<Integer, String> typeDefVersions) {
+
+        this.typeDefVersions = new TreeMap<Integer, String>(typeDefVersions);
+        newestVersion = this.typeDefVersions.lastKey();
+        typeDef = Schema.parse(typeDefVersions.get(newestVersion));
+
+    }
+
+    public byte[] toBytes(Object object) {
+        ByteArrayOutputStream output = new ByteArrayOutputStream();
+        Encoder encoder = new BinaryEncoder(output);
+        GenericDatumWriter<Object> datumWriter = null;
+
+        output.write(newestVersion.byteValue());
+        try {
+            datumWriter = new GenericDatumWriter<Object>(typeDef);
+            datumWriter.write(object, encoder);
+            encoder.flush();
+        } catch(SerializationException sE) {
+            throw sE;
+        } catch(IOException e) {
+            throw new SerializationException(e);
+        } catch(Exception aIOBE) {
+
+            // probably the object sent to us was not created using the latest
+            // schema
+            // We simply check the old version number and serialize it using the
+            // old schema version
+
+            Schema writer = ((GenericContainer) object).getSchema();
+            Integer writerVersion = getSchemaVersion(writer);
+            return toBytes(object, writer, writerVersion);
+
+        } finally {
+            SerializationUtils.close(output);
+        }
+
+        return output.toByteArray();
+    }
+
+    /*
+     * Serialize a given object using a non latest schema With auto rebootstrap
+     * the client gets the latest schema updated on the server However an
+     * application may still create objects using an old schema this lets us
+     * serialize those objects without an exception
+     */
+    private byte[] toBytes(Object object, Schema writer, Integer writerVersion) {
+
+        ByteArrayOutputStream output = new ByteArrayOutputStream();
+        Encoder encoder = new BinaryEncoder(output);
+        GenericDatumWriter<Object> datumWriter = null;
+
+        output.write(writerVersion.byteValue());
+        try {
+            datumWriter = new GenericDatumWriter<Object>(writer);
+            datumWriter.write(object, encoder);
+            encoder.flush();
+        } catch(IOException e) {
+            throw new SerializationException(e);
+        } catch(SerializationException sE) {
+            throw sE;
+        } finally {
+            SerializationUtils.close(output);
+        }
+
+        return output.toByteArray();
+
+    }
+
+    private Integer getSchemaVersion(Schema s) throws SerializationException {
+        for(Entry<Integer, String> entry: typeDefVersions.entrySet()) {
+            Schema version = Schema.parse(entry.getValue());
+            if(s.equals(version))
+                return entry.getKey();
+
+        }
+
+        throw new SerializationException("Writer's schema invalid!");
+    }
+
+    public Object toObject(byte[] bytes) {
+
+        Integer version = Integer.valueOf(bytes[0]);
+
+        if(version > newestVersion)
+            throw new SerializationException("Client needs to rebootstrap! \n Writer's schema version greater than Reader");
+
+        Schema typeDefWriter = Schema.parse(typeDefVersions.get(version));
+
+        byte[] dataBytes = new byte[bytes.length - 1];
+        System.arraycopy(bytes, 1, dataBytes, 0, bytes.length - 1);
+        Decoder decoder = DecoderFactory.defaultFactory().createBinaryDecoder(dataBytes, null);
+        GenericDatumReader<Object> reader = null;
+        try {
+            reader = new GenericDatumReader<Object>(typeDefWriter, typeDef);
+            // writer's schema
+            reader.setSchema(typeDefWriter);
+            // Reader's schema
+            reader.setExpected(typeDef);
+            return reader.read(null, decoder);
+        } catch(IOException e) {
+            throw new SerializationException(e);
+        }
+
+    }
+}
diff --git a/src/java/voldemort/serialization/avro/versioned/SchemaEvolutionValidator.java b/src/java/voldemort/serialization/avro/versioned/SchemaEvolutionValidator.java
new file mode 100644
index 0000000000..76a5724f83
--- /dev/null
+++ b/src/java/voldemort/serialization/avro/versioned/SchemaEvolutionValidator.java
@@ -0,0 +1,874 @@
+/*
+ * Copyright 2011 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.serialization.avro.versioned;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Field;
+import org.apache.avro.Schema.Type;
+import org.apache.log4j.Level;
+import org.codehaus.jackson.JsonNode;
+
+import voldemort.VoldemortException;
+import voldemort.serialization.SerializerDefinition;
+
+/**
+ * Provides methods to compare schemas for schema evolution and indicate any
+ * potential problems.
+ * 
+ * @author Jemiah Westerman <jwesterman@linkedin.com>
+ * 
+ * @version $Revision$
+ */
+public class SchemaEvolutionValidator {
+
+    private static final Schema NULL_TYPE_SCHEMA = Schema.create(Schema.Type.NULL);
+    private final List<String> _recordStack = new ArrayList<String>();
+
+    /**
+     * This main method provides an easy command line tool to compare two
+     * schemas.
+     */
+    public static void main(String[] args) {
+        if(args.length != 2) {
+            System.out.println("Usage: SchemaEvolutionValidator pathToOldSchema pathToNewSchema");
+            return;
+        }
+
+        Schema oldSchema;
+        Schema newSchema;
+
+        try {
+            oldSchema = Schema.parse(new File(args[0]));
+        } catch(Exception ex) {
+            oldSchema = null;
+            System.out.println("Could not open or parse the old schema (" + args[0] + ") due to "
+                               + ex);
+        }
+
+        try {
+            newSchema = Schema.parse(new File(args[1]));
+        } catch(Exception ex) {
+            newSchema = null;
+            System.out.println("Could not open or parse the new schema (" + args[1] + ") due to "
+                               + ex);
+        }
+
+        if(oldSchema == null || newSchema == null) {
+            return;
+        }
+
+        System.out.println("Comparing: ");
+        System.out.println("\t" + args[0]);
+        System.out.println("\t" + args[1]);
+
+        List<Message> messages = SchemaEvolutionValidator.checkBackwardCompatability(oldSchema,
+                                                                                     newSchema,
+                                                                                     oldSchema.getName());
+        Level maxLevel = Level.ALL;
+        for(Message message: messages) {
+            System.out.println(message.getLevel() + ": " + message.getMessage());
+            if(message.getLevel().isGreaterOrEqual(maxLevel)) {
+                maxLevel = message.getLevel();
+            }
+        }
+
+        if(maxLevel.isGreaterOrEqual(Level.ERROR)) {
+            System.out.println(Level.ERROR
+                               + ": The schema is not backward compatible. New clients will not be able to read existing data.");
+        } else if(maxLevel.isGreaterOrEqual(Level.WARN)) {
+            System.out.println(Level.WARN
+                               + ": The schema is partially backward compatible, but old clients will not be able to read data serialized in the new format.");
+        } else {
+            System.out.println(Level.INFO
+                               + ": The schema is backward compatible. Old and new clients will be able to read records serialized by one another.");
+        }
+    }
+
+    /**
+     * Compare two schemas to see if they are backward compatible. Returns a
+     * list of validation messages. <li>ERROR messages indicate the schemas are
+     * not backward compatible and the new schema should not be allowed. If an
+     * ERROR schema is uploaded, clients will not be able to read existing data.
+     * <li>WARN messages indicate that the new schemas may cause problems for
+     * existing clients. However, once all clients are updated to this version
+     * of the schema they should be able to read new and existing data. <li>INFO
+     * messages note changes to the schema, basically providing a friendly list
+     * of what has changed from one version to the next. This includes changes
+     * like the addition of fields, changes to default values, etc.
+     * 
+     * @param oldSchema the original schema
+     * @param newSchema the new schema
+     * @param name the schema name
+     * @return list of messages about the schema evolution
+     */
+    public static List<Message> checkBackwardCompatability(Schema oldSchema,
+                                                           Schema newSchema,
+                                                           String name) {
+        SchemaEvolutionValidator validator = new SchemaEvolutionValidator();
+        List<Message> messages = new ArrayList<Message>();
+        validator.compareTypes(oldSchema, newSchema, messages, name);
+        return messages;
+    }
+
+    /* package private */void compareTypes(Schema oldSchema,
+                                           Schema newSchema,
+                                           List<Message> messages,
+                                           String name) {
+        oldSchema = stripOptionalTypeUnion(oldSchema);
+        newSchema = stripOptionalTypeUnion(newSchema);
+        Schema.Type oldType = oldSchema.getType();
+        Schema.Type newType = newSchema.getType();
+
+        if(oldType != Type.UNION && newType == Type.UNION) {
+            boolean compatibleTypeFound = false;
+            for(Schema s: newSchema.getTypes()) {
+                if((oldType != Type.RECORD && oldType == s.getType())
+                   || (oldType == Type.RECORD && s.getType() == Type.RECORD && oldSchema.getName()
+                                                                                        .equals(s.getName()))) {
+                    compareTypes(oldSchema, s, messages, name);
+                    compatibleTypeFound = true;
+                    break;
+                }
+            }
+
+            if(compatibleTypeFound) {
+                messages.add(new Message(Level.INFO,
+                                         "Type change from " + oldType + " to " + newType
+                                                 + " for field " + name
+                                                 + ". The new union includes the original type."));
+            } else {
+                messages.add(new Message(Level.ERROR,
+                                         "Incompatible type change from "
+                                                 + oldType
+                                                 + " to "
+                                                 + newType
+                                                 + " for field "
+                                                 + name
+                                                 + ". The new union does not include the original type."));
+            }
+        } else if(oldType == Type.RECORD) {
+            if(!_recordStack.contains(oldSchema.getName())) {
+                _recordStack.add(oldSchema.getName());
+                compareRecordTypes(oldSchema, newSchema, messages, name);
+                _recordStack.remove(oldSchema.getName());
+            }
+        } else if(oldType == Type.ENUM) {
+            compareEnumTypes(oldSchema, newSchema, messages, name);
+        } else if(oldType == Type.ARRAY) {
+            compareArrayTypes(oldSchema, newSchema, messages, name);
+        } else if(oldType == Type.MAP) {
+            compareMapTypes(oldSchema, newSchema, messages, name);
+        } else if(oldType == Type.UNION) {
+            compareUnionTypes(oldSchema, newSchema, messages, name);
+        } else if(oldType == Type.FIXED) {
+            compareFixedTypes(oldSchema, newSchema, messages, name);
+        } else {
+            comparePrimitiveTypes(oldSchema, newSchema, messages, name);
+        }
+    }
+
+    /* package private */void compareRecordTypes(Schema oldSchema,
+                                                 Schema newSchema,
+                                                 List<Message> messages,
+                                                 String name) {
+        if(oldSchema == null || newSchema == null || oldSchema.getType() != Schema.Type.RECORD) {
+            throw new IllegalArgumentException("Old schema must be RECORD type. Name=" + name
+                                               + ". Type=" + oldSchema);
+        }
+
+        if(newSchema.getType() != Schema.Type.RECORD) {
+            messages.add(new Message(Level.ERROR, "Illegal type change from " + oldSchema.getType()
+                                                  + " to " + newSchema.getType() + " for field "
+                                                  + name));
+            return;
+        }
+
+        // Check all of the fields in the new schema
+        for(Field newField: newSchema.getFields()) {
+            String fieldName = newField.name();
+            Field oldField = oldSchema.getField(fieldName);
+
+            if(oldField == null) {
+                // This is a new field that did not exist in the original
+                // schema.
+                // Check if it is optional or has a default value.
+                if(isOptional(newField)) {
+                    if(newField.defaultValue() == null) {
+                        messages.add(new Message(Level.INFO, "Added optional field " + name + "."
+                                                             + fieldName
+                                                             + " with no default value."));
+                    } else {
+                        messages.add(new Message(Level.INFO, "Added optional field " + name + "."
+                                                             + fieldName + " with default value: "
+                                                             + newField.defaultValue()));
+                    }
+                } else {
+                    if(newField.defaultValue() == null) {
+                        messages.add(new Message(Level.ERROR, "Added required field " + name + "."
+                                                              + fieldName
+                                                              + " with no default value."));
+                    } else {
+                        messages.add(new Message(Level.INFO, "Added required field " + name + "."
+                                                             + fieldName + " with default value: "
+                                                             + newField.defaultValue()));
+                    }
+                }
+            } else {
+                // This is a field that existed in the original schema.
+
+                // Check if the field was changed from optional to required or
+                // vice versa.
+                boolean newFieldIsOptional = isOptional(newField);
+                boolean oldFieldIsOptional = isOptional(oldField);
+
+                if(oldFieldIsOptional != newFieldIsOptional) {
+                    if(oldFieldIsOptional) {
+                        messages.add(new Message(Level.ERROR,
+                                                 "Existing field " + name + "." + fieldName
+                                                         + " was optional and is now required."));
+                    } else {
+                        messages.add(new Message(Level.WARN, "Existing field " + name + "."
+                                                             + fieldName
+                                                             + " was required and is now optional."));
+                    }
+                }
+
+                // Recursively compare the nested field types
+                compareTypes(oldField.schema(), newField.schema(), messages, name + "." + fieldName);
+
+                // Check if the default value has been changed
+                if(newField.defaultValue() == null) {
+                    if(oldField.defaultValue() != null) {
+                        messages.add(new Message(Level.WARN,
+                                                 "Removed default value for existing field " + name
+                                                         + "." + fieldName
+                                                         + ". The old default was: "
+                                                         + oldField.defaultValue()));
+                    }
+                } else // newField.defaultValue() != null
+                {
+                    if(oldField.defaultValue() == null) {
+                        messages.add(new Message(Level.WARN,
+                                                 "Added a default value for existing field " + name
+                                                         + "." + fieldName
+                                                         + ". The new default is: "
+                                                         + newField.defaultValue()));
+                    } else if(!newField.defaultValue().equals(oldField.defaultValue())) {
+                        messages.add(new Message(Level.INFO,
+                                                 "Changed the default value for existing field "
+                                                         + name + "." + fieldName
+                                                         + ". The old default was: "
+                                                         + oldField.defaultValue()
+                                                         + ". The new default is: "
+                                                         + newField.defaultValue()));
+                    }
+                }
+            }
+
+            // For all fields in the new schema (whether or not it existed in
+            // the old schema), if there is a default value for this field, make
+            // sure it is legal.
+            if(newField.defaultValue() != null) {
+                checkDefaultValueIsLegal(newField, messages, name + "." + fieldName);
+            }
+        }
+
+        // Check if any fields were removed.
+        for(Field oldField: newSchema.getFields()) {
+            String fieldName = oldField.name();
+            Field newField = newSchema.getField(fieldName);
+
+            if(newField == null) {
+                if(isOptional(oldField)) {
+                    messages.add(new Message(Level.INFO, "Removed optional field " + name + "."
+                                                         + fieldName));
+                } else {
+                    messages.add(new Message(Level.WARN, "Removed required field " + name + "."
+                                                         + fieldName));
+                }
+            }
+        }
+
+        // Check if indexing was modified or added to any old fields.
+        for(Field oldField: oldSchema.getFields()) {
+            if(newSchema.getField(oldField.name()) != null) {
+                String oldIndexType = oldField.getProp("indexType");
+                String newIndexType = newSchema.getField(oldField.name()).getProp("indexType");
+
+                // Check if added indexing.
+                if(oldIndexType == null && newIndexType != null) {
+                    messages.add(new Message(Level.ERROR,
+                                             "Cannot add indexing to "
+                                                     + oldField.name()
+                                                     + ". Adding indexing to fields created without indexing is not supported."));
+                }
+
+                // Check if changed indexing
+                if(oldIndexType != null && !oldIndexType.equals(newIndexType)) {
+                    messages.add(new Message(Level.ERROR,
+                                             "Cannot change indexing from "
+                                                     + oldIndexType
+                                                     + " to "
+                                                     + newIndexType
+                                                     + " for "
+                                                     + oldField.name()
+                                                     + ". Changing indexing method is not supported."));
+                }
+            }
+        }
+    }
+
+    /* package private */void compareUnionTypes(Schema oldSchema,
+                                                Schema newSchema,
+                                                List<Message> messages,
+                                                String name) {
+        if(oldSchema == null || newSchema == null || oldSchema.getType() != Schema.Type.UNION) {
+            throw new IllegalArgumentException("Old and new schema must both be UNION types. Name="
+                                               + name + ". Type=" + oldSchema);
+        }
+
+        // Build a list of type names, ignoring nulls which are checked
+        // separately as optional/required fields
+        List<Schema> newTypes = new ArrayList<Schema>();
+        List<String> newTypeNames = new ArrayList<String>();
+        List<Schema> oldTypes = new ArrayList<Schema>();
+        List<String> oldTypeNames = new ArrayList<String>();
+
+        if(newSchema.getType() == Type.UNION) {
+            for(Schema schema: newSchema.getTypes()) {
+                if(schema.getType() != Schema.Type.NULL) {
+                    newTypes.add(schema);
+                    newTypeNames.add(schema.getName());
+                }
+            }
+        } else {
+            newTypes.add(newSchema);
+            newTypeNames.add(newSchema.getName());
+        }
+
+        for(Schema schema: oldSchema.getTypes()) {
+            if(schema.getType() != Schema.Type.NULL) {
+                oldTypes.add(schema);
+                oldTypeNames.add(schema.getName());
+            }
+        }
+
+        if(!newTypeNames.containsAll(oldTypeNames)) {
+            messages.add(new Message(Level.ERROR,
+                                     "Existing UNION field " + name
+                                             + " had one or more types removed. The old types are:"
+                                             + oldTypeNames + ". The new types are: "
+                                             + newTypeNames));
+        }
+        if(!oldTypeNames.containsAll(newTypeNames)) {
+            messages.add(new Message(Level.INFO,
+                                     "Existing UNION field " + name
+                                             + " had one or more types added. The old types are:"
+                                             + oldTypeNames + ". The new types are: "
+                                             + newTypeNames));
+        }
+        if(newTypeNames.containsAll(oldTypeNames) && oldTypeNames.containsAll(newTypeNames)
+           && !newTypeNames.equals(oldTypeNames)) {
+            messages.add(new Message(Level.INFO,
+                                     "Existing UNION field "
+                                             + name
+                                             + " had one or more types reordered. The old types are:"
+                                             + oldTypeNames + ". The new types are: "
+                                             + newTypeNames));
+        }
+
+        for(int i = 0; i < newTypeNames.size(); i++) {
+            String typeName = newTypeNames.get(i);
+            int oldIndex = oldTypeNames.indexOf(typeName);
+            if(oldIndex != -1) {
+                compareTypes(oldTypes.get(oldIndex),
+                             newTypes.get(i),
+                             messages,
+                             name + "." + oldTypes.get(oldIndex).getName());
+            }
+        }
+    }
+
+    /* package private */void compareEnumTypes(Schema oldSchema,
+                                               Schema newSchema,
+                                               List<Message> messages,
+                                               String name) {
+        if(oldSchema == null || newSchema == null || oldSchema.getType() != Schema.Type.ENUM) {
+            throw new IllegalArgumentException("Old schema must be ENUM type. Name=" + name
+                                               + ". Type=" + oldSchema);
+        }
+
+        if(newSchema.getType() != Schema.Type.ENUM) {
+            messages.add(new Message(Level.ERROR, "Illegal type change from " + oldSchema.getType()
+                                                  + " to " + newSchema.getType() + " for field "
+                                                  + name));
+            return;
+        }
+
+        List<String> newEnumSymbols = newSchema.getEnumSymbols();
+        List<String> oldEnumSymbols = oldSchema.getEnumSymbols();
+
+        // Check if enum types were added or removed
+        if(!newEnumSymbols.containsAll(oldEnumSymbols)) {
+            messages.add(new Message(Level.ERROR,
+                                     "Existing ENUM field "
+                                             + name
+                                             + " had one or more enum symbols removed. The old symbols are:"
+                                             + oldEnumSymbols + ". The new symbols are: "
+                                             + newEnumSymbols));
+        }
+        if(!oldEnumSymbols.containsAll(newEnumSymbols)) {
+            messages.add(new Message(Level.INFO,
+                                     "Existing ENUM field "
+                                             + name
+                                             + " had one or more enum symbols added. The old symbols are:"
+                                             + oldEnumSymbols + ". The new symbols are: "
+                                             + newEnumSymbols));
+        }
+
+        // Check if enum types were reordered.
+        if(newEnumSymbols.containsAll(oldEnumSymbols)) {
+            for(int i = 0; i < oldEnumSymbols.size(); i++) {
+                if(!oldEnumSymbols.get(i).equals(newEnumSymbols.get(i))) {
+                    messages.add(new Message(Level.WARN,
+                                             "Existing ENUM field "
+                                                     + name
+                                                     + " had one or more enum symbols reordered. The old symbols are:"
+                                                     + oldEnumSymbols + ". The new symbols are: "
+                                                     + newEnumSymbols));
+                    break;
+                }
+            }
+        }
+    }
+
+    /* package private */void compareArrayTypes(Schema oldSchema,
+                                                Schema newSchema,
+                                                List<Message> messages,
+                                                String name) {
+        if(oldSchema == null || newSchema == null || oldSchema.getType() != Schema.Type.ARRAY) {
+            throw new IllegalArgumentException("Old schema must be ARRAY type. Name=" + name
+                                               + ". Type=" + oldSchema);
+        }
+
+        if(newSchema.getType() != Schema.Type.ARRAY) {
+            messages.add(new Message(Level.ERROR, "Illegal type change from " + oldSchema.getType()
+                                                  + " to " + newSchema.getType() + " for field "
+                                                  + name));
+            return;
+        }
+
+        // Compare the array element types
+        compareTypes(oldSchema.getElementType(),
+                     newSchema.getElementType(),
+                     messages,
+                     name + ".<array element>");
+    }
+
+    /* package private */void compareMapTypes(Schema oldSchema,
+                                              Schema newSchema,
+                                              List<Message> messages,
+                                              String name) {
+        if(oldSchema == null || newSchema == null || oldSchema.getType() != Schema.Type.MAP) {
+            throw new IllegalArgumentException("Old schema must be MAP type. Name=" + name
+                                               + ". Type=" + oldSchema);
+        }
+
+        if(newSchema.getType() != Schema.Type.MAP) {
+            messages.add(new Message(Level.ERROR, "Illegal type change from " + oldSchema.getType()
+                                                  + " to " + newSchema.getType() + " for field "
+                                                  + name));
+            return;
+        }
+
+        // Compare the array element types
+        compareTypes(oldSchema.getValueType(),
+                     newSchema.getValueType(),
+                     messages,
+                     name + ".<map element>");
+    }
+
+    /* package private */void compareFixedTypes(Schema oldSchema,
+                                                Schema newSchema,
+                                                List<Message> messages,
+                                                String name) {
+        if(oldSchema == null || newSchema == null || oldSchema.getType() != Schema.Type.FIXED) {
+            throw new IllegalArgumentException("Old schema must be FIXED type. Name=" + name
+                                               + ". Type=" + oldSchema);
+        }
+
+        if(newSchema.getType() != Schema.Type.FIXED) {
+            messages.add(new Message(Level.ERROR, "Illegal type change from " + oldSchema.getType()
+                                                  + " to " + newSchema.getType() + " for field "
+                                                  + name));
+            return;
+        }
+
+        if(newSchema.getFixedSize() != oldSchema.getFixedSize()) {
+            messages.add(new Message(Level.ERROR, "Illegal size change for fixed type field "
+                                                  + name));
+        }
+    }
+
+    /* package private */void comparePrimitiveTypes(Schema oldSchema,
+                                                    Schema newSchema,
+                                                    List<Message> messages,
+                                                    String name) {
+        if(oldSchema == null || newSchema == null) {
+            throw new IllegalArgumentException("Old schema must both be a primitive type. Name="
+                                               + name + ". Type=" + oldSchema);
+        }
+
+        Schema.Type oldType = oldSchema.getType();
+        Schema.Type newType = newSchema.getType();
+
+        if(oldType != newType) {
+            if(((oldType == Schema.Type.INT && (newType == Schema.Type.LONG
+                                                || newType == Schema.Type.FLOAT || newType == Schema.Type.DOUBLE))
+                || (oldType == Schema.Type.LONG && (newType == Schema.Type.FLOAT || newType == Schema.Type.DOUBLE)) || (oldType == Schema.Type.FLOAT && (newType == Schema.Type.DOUBLE)))) {
+                messages.add(new Message(Level.INFO, "Type change from " + oldSchema.getType()
+                                                     + " to " + newSchema.getType() + " for field "
+                                                     + name));
+            } else {
+                messages.add(new Message(Level.ERROR, "Illegal type change from "
+                                                      + oldSchema.getType() + " to "
+                                                      + newSchema.getType() + " for field " + name));
+            }
+        }
+    }
+
+    /**
+     * Returns true if this field is optional. Optional fields are represented
+     * as a type union containing the null type.
+     * 
+     * @param field
+     * @return
+     */
+    /* package private */boolean isOptional(Field field) {
+        if(field.schema().getType() == Type.UNION) {
+            for(Schema nestedType: field.schema().getTypes()) {
+                if(nestedType.getType() == Type.NULL) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
+    /* package private */Schema stripOptionalTypeUnion(Schema schema) {
+        if(schema.getType() == Schema.Type.UNION && schema.getTypes().size() == 2
+           && schema.getTypes().contains(NULL_TYPE_SCHEMA)) {
+            return schema.getTypes().get(0).equals(NULL_TYPE_SCHEMA) ? schema.getTypes().get(1)
+                                                                    : schema.getTypes().get(0);
+        }
+        return schema;
+    }
+
+    public static class Message {
+
+        private final Level _level;
+        private final String _message;
+
+        public Message(Level level, String message) {
+            super();
+            _level = level;
+            _message = message;
+        }
+
+        public Level getLevel() {
+            return _level;
+        }
+
+        public String getMessage() {
+            return _message;
+        }
+
+        @Override
+        public String toString() {
+            return _level + ": " + _message;
+        }
+
+        @Override
+        public int hashCode() {
+            final int prime = 31;
+            int result = 1;
+            result = prime * result + ((_level == null) ? 0 : _level.toString().hashCode());
+            result = prime * result + ((_message == null) ? 0 : _message.hashCode());
+            return result;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            if(this == obj)
+                return true;
+            if(obj == null)
+                return false;
+            if(getClass() != obj.getClass())
+                return false;
+            Message other = (Message) obj;
+            if(_level == null) {
+                if(other._level != null)
+                    return false;
+            } else if(!_level.equals(other._level))
+                return false;
+            if(_message == null) {
+                if(other._message != null)
+                    return false;
+            } else if(!_message.equals(other._message))
+                return false;
+            return true;
+        }
+    }
+
+    /**
+     * Makes sure the default value is good
+     * 
+     * @param parent
+     * @param field
+     */
+    /* package private */static void checkDefaultValueIsLegal(Field field,
+                                                              List<Message> messages,
+                                                              String name) {
+        if(field == null) {
+            throw new IllegalArgumentException("Field must be non-null. Name=" + name);
+        }
+
+        if(field.defaultValue() != null) {
+            // Get the type schema. If this is a UNION, the default must be of
+            // the leading type
+            Schema fieldSchema = field.schema();
+            if(fieldSchema.getType() == Schema.Type.UNION) {
+                fieldSchema = fieldSchema.getTypes().get(0);
+            }
+
+            // Get the default value
+            JsonNode defaultJson = field.defaultValue();
+
+            String expectedVal = checkDefaultJson(defaultJson, field.schema());
+
+            if(expectedVal != null) {
+                messages.add(new Message(Level.ERROR, "Illegal default value for field " + name
+                                                      + ". The default must be of type "
+                                                      + expectedVal + "."));
+            }
+        }
+    }
+
+    /**
+     * Check that the default json node is a valid default value. If not, return
+     * the expected type as a String.
+     */
+    /* package private */static String checkDefaultJson(JsonNode defaultJson, Schema schema) {
+        Schema.Type fieldType = schema.getType();
+        String expectedVal = null;
+        switch(fieldType) {
+            case NULL:
+                if(!defaultJson.isNull()) {
+                    expectedVal = "null";
+                }
+
+                break;
+            case BOOLEAN:
+                if(!defaultJson.isBoolean()) {
+                    expectedVal = "boolean";
+                }
+                break;
+            case INT:
+                if(!defaultJson.isInt()) {
+                    expectedVal = "int";
+                }
+                break;
+            case LONG:
+                if(!defaultJson.isInt() && !defaultJson.isLong()) {
+                    expectedVal = "long";
+                }
+                break;
+            case FLOAT:
+            case DOUBLE:
+                if(!defaultJson.isNumber()) {
+                    expectedVal = "number";
+                }
+                break;
+            case BYTES:
+                if(defaultJson.isTextual()) {
+                    break;
+                }
+                expectedVal = "bytes (ex. \"\\u00FF\")";
+                break;
+            case STRING:
+                if(!defaultJson.isTextual()) {
+                    expectedVal = "string";
+                }
+                break;
+            case ENUM:
+                if(defaultJson.isTextual()) {
+                    if(schema.hasEnumSymbol(defaultJson.getTextValue())) {
+                        break;
+                    }
+                }
+                expectedVal = "valid enum";
+                break;
+            case FIXED:
+                if(defaultJson.isTextual()) {
+                    byte[] fixed = defaultJson.getValueAsText().getBytes();
+                    if(fixed.length == schema.getFixedSize()) {
+                        break;
+                    }
+                    expectedVal = "fixed size incorrect. Expected size: " + schema.getFixedSize()
+                                  + " got size " + fixed.length;
+                    break;
+                }
+                expectedVal = "fixed (ex. \"\\u00FF\")";
+                break;
+            case ARRAY:
+                if(defaultJson.isArray()) {
+                    // Check all array variables
+                    boolean isGood = true;
+                    for(JsonNode node: defaultJson) {
+                        String val = checkDefaultJson(node, schema.getElementType());
+                        if(val == null) {
+                            continue;
+                        } else {
+                            isGood = false;
+                            break;
+                        }
+                    }
+
+                    if(isGood) {
+                        break;
+                    }
+                }
+                expectedVal = "array of type " + schema.getElementType().toString();
+                break;
+            case MAP:
+                if(defaultJson.isObject()) {
+                    boolean isGood = true;
+                    for(JsonNode node: defaultJson) {
+                        String val = checkDefaultJson(node, schema.getValueType());
+                        if(val == null) {
+                            continue;
+                        } else {
+                            isGood = false;
+                            break;
+                        }
+                    }
+
+                    if(isGood) {
+                        break;
+                    }
+                }
+
+                expectedVal = "map of type " + schema.getValueType().toString();
+                break;
+            case RECORD:
+                if(defaultJson.isObject()) {
+                    boolean isGood = true;
+                    for(Field field: schema.getFields()) {
+                        JsonNode jsonNode = defaultJson.get(field.name());
+
+                        if(jsonNode == null) {
+                            jsonNode = field.defaultValue();
+                            if(jsonNode == null) {
+                                isGood = false;
+                                break;
+                            }
+                        }
+
+                        String val = checkDefaultJson(jsonNode, field.schema());
+                        if(val != null) {
+                            isGood = false;
+                            break;
+                        }
+                    }
+
+                    if(isGood) {
+                        break;
+                    }
+                }
+
+                expectedVal = "record of type " + schema.toString();
+                break;
+            case UNION:
+                // Avro spec states we only need to match with the first item
+                expectedVal = checkDefaultJson(defaultJson, schema.getTypes().get(0));
+                break;
+        }
+
+        return expectedVal;
+    }
+
+    public static void checkSchemaCompatibility(SerializerDefinition serDef) {
+
+        Map<Integer, String> schemaVersions = serDef.getAllSchemaInfoVersions();
+
+        Iterator schemaIterator = schemaVersions.entrySet().iterator();
+
+        Schema firstSchema = null;
+        Schema secondSchema = null;
+
+        String firstSchemaStr;
+        String secondSchemaStr;
+
+        if(!schemaIterator.hasNext())
+            throw new VoldemortException("No schema specified");
+
+        Map.Entry schemaPair = (Map.Entry) schemaIterator.next();
+
+        firstSchemaStr = (String) schemaPair.getValue();
+
+        while(schemaIterator.hasNext()) {
+
+            schemaPair = (Map.Entry) schemaIterator.next();
+
+            secondSchemaStr = (String) schemaPair.getValue();
+            Schema oldSchema = Schema.parse(firstSchemaStr);
+            Schema newSchema = Schema.parse(secondSchemaStr);
+            List<Message> messages = SchemaEvolutionValidator.checkBackwardCompatability(oldSchema,
+                                                                                         newSchema,
+                                                                                         oldSchema.getName());
+            Level maxLevel = Level.ALL;
+            for(Message message: messages) {
+                System.out.println(message.getLevel() + ": " + message.getMessage());
+                if(message.getLevel().isGreaterOrEqual(maxLevel)) {
+                    maxLevel = message.getLevel();
+                }
+            }
+
+            if(maxLevel.isGreaterOrEqual(Level.ERROR)) {
+                System.out.println(Level.ERROR
+                                   + ": The schema is not backward compatible. New clients will not be able to read existing data.");
+                throw new VoldemortException(" The schema is not backward compatible. New clients will not be able to read existing data.");
+            } else if(maxLevel.isGreaterOrEqual(Level.WARN)) {
+                System.out.println(Level.WARN
+                                   + ": The schema is partially backward compatible, but old clients will not be able to read data serialized in the new format.");
+                throw new VoldemortException("The schema is partially backward compatible, but old clients will not be able to read data serialized in the new format.");
+            } else {
+                System.out.println(Level.INFO
+                                   + ": The schema is backward compatible. Old and new clients will be able to read records serialized by one another.");
+            }
+
+            firstSchemaStr = secondSchemaStr;
+
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/java/voldemort/server/AbstractSocketService.java b/src/java/voldemort/server/AbstractSocketService.java
index 8369dad293..51a8a18a43 100644
--- a/src/java/voldemort/server/AbstractSocketService.java
+++ b/src/java/voldemort/server/AbstractSocketService.java
@@ -18,6 +18,9 @@
 
 import voldemort.annotations.jmx.JmxGetter;
 import voldemort.annotations.jmx.JmxManaged;
+import voldemort.common.service.AbstractService;
+import voldemort.common.service.ServiceType;
+import voldemort.common.service.VoldemortService;
 import voldemort.utils.JmxUtils;
 
 /**
diff --git a/src/java/voldemort/server/VoldemortConfig.java b/src/java/voldemort/server/VoldemortConfig.java
index 40524c3709..2ee5f02a09 100644
--- a/src/java/voldemort/server/VoldemortConfig.java
+++ b/src/java/voldemort/server/VoldemortConfig.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2010 LinkedIn, Inc
+ * Copyright 2008-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -22,8 +22,11 @@
 import java.util.List;
 import java.util.Properties;
 
+import voldemort.client.TimeoutConfig;
 import voldemort.client.protocol.RequestFormatType;
 import voldemort.cluster.failuredetector.FailureDetectorConfig;
+import voldemort.common.OpTimeMap;
+import voldemort.common.VoldemortOpCode;
 import voldemort.server.scheduler.slop.StreamingSlopPusherJob;
 import voldemort.store.bdb.BdbStorageConfiguration;
 import voldemort.store.memory.CacheStorageConfiguration;
@@ -83,6 +86,8 @@ public class VoldemortConfig implements Serializable {
     private int bdbLogIteratorReadSize;
     private boolean bdbFairLatches;
     private long bdbStatsCacheTtlMs;
+    private boolean bdbExposeSpaceUtilization;
+    private long bdbMinimumSharedCache;
 
     private String mysqlUsername;
     private String mysqlPassword;
@@ -105,6 +110,9 @@ public class VoldemortConfig implements Serializable {
     private long reportingIntervalBytes;
     private int fetcherBufferSize;
 
+    private OpTimeMap testingSlowQueueingDelays;
+    private OpTimeMap testingSlowConcurrentDelays;
+
     private int coreThreads;
     private int maxThreads;
 
@@ -115,9 +123,11 @@ public class VoldemortConfig implements Serializable {
     private boolean useNioConnector;
     private int nioConnectorSelectors;
     private int nioAdminConnectorSelectors;
+    private int nioAcceptorBacklog;
 
     private int clientSelectors;
     private int clientRoutingTimeoutMs;
+    private TimeoutConfig clientTimeoutConfig;
     private int clientMaxConnectionsPerNode;
     private int clientConnectionTimeoutMs;
     private int clientMaxThreads;
@@ -125,6 +135,7 @@ public class VoldemortConfig implements Serializable {
     private int clientMaxQueuedRequests;
 
     private int schedulerThreads;
+    private boolean mayInterruptService;
 
     private int numScanPermits;
 
@@ -228,6 +239,8 @@ public VoldemortConfig(Props props) {
         this.bdbCleanerMaxBatchFiles = props.getInt("bdb.cleaner.max.batch.files", 0);
         this.bdbReadUncommitted = props.getBoolean("bdb.lock.read_uncommitted", true);
         this.bdbStatsCacheTtlMs = props.getLong("bdb.stats.cache.ttl.ms", 5 * Time.MS_PER_SECOND);
+        this.bdbExposeSpaceUtilization = props.getBoolean("bdb.expose.space.utilization", true);
+        this.bdbMinimumSharedCache = props.getLong("bdb.minimum.shared.cache", 0);
 
         this.readOnlyBackups = props.getInt("readonly.backups", 1);
         this.readOnlySearchStrategy = props.getString("readonly.search.strategy",
@@ -252,9 +265,36 @@ public VoldemortConfig(Props props) {
         this.mysqlValueType = props.getString("mysql.valuetype", "MEDIUMBLOB");
         this.mysqlDsInitialPoolSize = props.getInt("mysql.ds.initialpoolsize", 0);
         this.mysqlDsPoolPreparedStatements = props.getBoolean("mysql.ds.poolpreparedstatements",
-                                                              false);
         this.mysqlDsMaxActiveConnections = props.getInt("mysql.ds.maxactiveconnections", 8);
         this.mysqlDsMinIdleConnections = props.getInt("mysql.ds.minidleconnections", 0);
+
+        this.testingSlowQueueingDelays = new OpTimeMap(0);
+        this.testingSlowQueueingDelays.setOpTime(VoldemortOpCode.GET_OP_CODE,
+                                                 props.getInt("testing.slow.queueing.get.ms", 0));
+        this.testingSlowQueueingDelays.setOpTime(VoldemortOpCode.GET_ALL_OP_CODE,
+                                                 props.getInt("testing.slow.queueing.getall.ms", 0));
+        this.testingSlowQueueingDelays.setOpTime(VoldemortOpCode.GET_VERSION_OP_CODE,
+                                                 props.getInt("testing.slow.queueing.getversions.ms",
+                                                              0));
+        this.testingSlowQueueingDelays.setOpTime(VoldemortOpCode.PUT_OP_CODE,
+                                                 props.getInt("testing.slow.queueing.put.ms", 0));
+        this.testingSlowQueueingDelays.setOpTime(VoldemortOpCode.DELETE_OP_CODE,
+                                                 props.getInt("testing.slow.queueing.delete.ms", 0));
+
+        this.testingSlowConcurrentDelays = new OpTimeMap(0);
+        this.testingSlowConcurrentDelays.setOpTime(VoldemortOpCode.GET_OP_CODE,
+                                                   props.getInt("testing.slow.concurrent.get.ms", 0));
+        this.testingSlowConcurrentDelays.setOpTime(VoldemortOpCode.GET_ALL_OP_CODE,
+                                                   props.getInt("testing.slow.concurrent.getall.ms",
+                                                                0));
+        this.testingSlowConcurrentDelays.setOpTime(VoldemortOpCode.GET_VERSION_OP_CODE,
+                                                   props.getInt("testing.slow.concurrent.getversions.ms",
+                                                                0));
+        this.testingSlowConcurrentDelays.setOpTime(VoldemortOpCode.PUT_OP_CODE,
+                                                   props.getInt("testing.slow.concurrent.put.ms", 0));
+        this.testingSlowConcurrentDelays.setOpTime(VoldemortOpCode.DELETE_OP_CODE,
+                                                   props.getInt("testing.slow.concurrent.delete.ms",
+                                                                0));
         this.maxThreads = props.getInt("max.threads", 100);
         this.coreThreads = props.getInt("core.threads", Math.max(1, maxThreads / 2));
 
@@ -274,18 +314,38 @@ public VoldemortConfig(Props props) {
         this.socketBufferSize = (int) props.getBytes("socket.buffer.size", 64 * 1024);
         this.socketKeepAlive = props.getBoolean("socket.keepalive", false);
 
-        this.useNioConnector = props.getBoolean("enable.nio.connector", false);
+        this.useNioConnector = props.getBoolean("enable.nio.connector", true);
         this.nioConnectorSelectors = props.getInt("nio.connector.selectors",
                                                   Math.max(8, Runtime.getRuntime()
                                                                      .availableProcessors()));
         this.nioAdminConnectorSelectors = props.getInt("nio.admin.connector.selectors",
                                                        Math.max(8, Runtime.getRuntime()
                                                                           .availableProcessors()));
+        // a value <= 0 forces the default to be used
+        this.nioAcceptorBacklog = props.getInt("nio.acceptor.backlog", -1);
 
         this.clientSelectors = props.getInt("client.selectors", 4);
         this.clientMaxConnectionsPerNode = props.getInt("client.max.connections.per.node", 50);
         this.clientConnectionTimeoutMs = props.getInt("client.connection.timeout.ms", 500);
         this.clientRoutingTimeoutMs = props.getInt("client.routing.timeout.ms", 15000);
+        this.clientTimeoutConfig = new TimeoutConfig(this.clientRoutingTimeoutMs, false);
+        this.clientTimeoutConfig.setOperationTimeout(VoldemortOpCode.GET_OP_CODE,
+                                                     props.getInt("client.routing.get.timeout.ms",
+                                                                  this.clientRoutingTimeoutMs));
+        this.clientTimeoutConfig.setOperationTimeout(VoldemortOpCode.GET_ALL_OP_CODE,
+                                                     props.getInt("client.routing.getall.timeout.ms",
+                                                                  this.clientRoutingTimeoutMs));
+        this.clientTimeoutConfig.setOperationTimeout(VoldemortOpCode.PUT_OP_CODE,
+                                                     props.getInt("client.routing.put.timeout.ms",
+                                                                  this.clientRoutingTimeoutMs));
+        this.clientTimeoutConfig.setOperationTimeout(VoldemortOpCode.GET_VERSION_OP_CODE,
+                                                     props.getLong("client.routing.getversions.timeout.ms",
+                                                                   this.clientTimeoutConfig.getOperationTimeout(VoldemortOpCode.PUT_OP_CODE)));
+        this.clientTimeoutConfig.setOperationTimeout(VoldemortOpCode.DELETE_OP_CODE,
+                                                     props.getInt("client.routing.delete.timeout.ms",
+                                                                  this.clientRoutingTimeoutMs));
+        this.clientTimeoutConfig.setPartialGetAllAllowed(props.getBoolean("client.routing.allow.partial.getall",
+                                                                          false));
         this.clientMaxThreads = props.getInt("client.max.threads", 500);
         this.clientThreadIdleMs = props.getInt("client.thread.idle.ms", 100000);
         this.clientMaxQueuedRequests = props.getInt("client.max.queued.requests", 1000);
@@ -319,6 +379,7 @@ public VoldemortConfig(Props props) {
         this.slopZonesDownToTerminate = props.getInt("slop.zones.terminate", 0);
 
         this.schedulerThreads = props.getInt("scheduler.threads", 6);
+        this.mayInterruptService = props.getBoolean("service.interruptible", true);
 
         this.numScanPermits = props.getInt("num.scan.permits", 1);
 
@@ -550,6 +611,19 @@ public void setBdbCacheSize(int bdbCacheSize) {
         this.bdbCacheSize = bdbCacheSize;
     }
 
+    /**
+     * This parameter controls whether we expose space utilization via MBean. If
+     * set to false, stat will always return 0;
+     * 
+     */
+    public boolean getBdbExposeSpaceUtilization() {
+        return bdbExposeSpaceUtilization;
+    }
+
+    public void setBdbExposeSpaceUtilization(boolean bdbExposeSpaceUtilization) {
+        this.bdbExposeSpaceUtilization = bdbExposeSpaceUtilization;
+    }
+
     /**
      * Given by "bdb.flush.transactions". If true then sync transactions to disk
      * immediately. default: false
@@ -1022,6 +1096,10 @@ public void setClientRoutingTimeoutMs(int routingTimeoutMs) {
         this.clientRoutingTimeoutMs = routingTimeoutMs;
     }
 
+    public TimeoutConfig getTimeoutConfig() {
+        return this.clientTimeoutConfig;
+    }
+
     public int getClientMaxConnectionsPerNode() {
         return clientMaxConnectionsPerNode;
     }
@@ -1152,6 +1230,14 @@ public void setBdbStatsCacheTtlMs(long statsCacheTtlMs) {
         this.bdbStatsCacheTtlMs = statsCacheTtlMs;
     }
 
+    public long getBdbMinimumSharedCache() {
+        return this.bdbMinimumSharedCache;
+    }
+
+    public void setBdbMinimumSharedCache(long minimumSharedCache) {
+        this.bdbMinimumSharedCache = minimumSharedCache;
+    }
+
     public int getSchedulerThreads() {
         return schedulerThreads;
     }
@@ -1160,6 +1246,14 @@ public void setSchedulerThreads(int schedulerThreads) {
         this.schedulerThreads = schedulerThreads;
     }
 
+    public boolean canInterruptService() {
+        return mayInterruptService;
+    }
+
+    public void setInterruptible(boolean canInterrupt) {
+        this.mayInterruptService = canInterrupt;
+    }
+
     public String getReadOnlyDataStorageDirectory() {
         return this.readOnlyStorageDir;
     }
@@ -1255,6 +1349,14 @@ public void setNioAdminConnectorSelectors(int nioAdminConnectorSelectors) {
         this.nioAdminConnectorSelectors = nioAdminConnectorSelectors;
     }
 
+    public int getNioAcceptorBacklog() {
+        return nioAcceptorBacklog;
+    }
+
+    public void setNioAcceptorBacklog(int nioAcceptorBacklog) {
+        this.nioAcceptorBacklog = nioAcceptorBacklog;
+    }
+
     public int getAdminSocketBufferSize() {
         return adminStreamBufferSize;
     }
@@ -1503,6 +1605,7 @@ public void setEnableJmxClusterName(boolean enableJmxClusterName) {
         this.enableJmxClusterName = enableJmxClusterName;
     }
 
+<<<<<<< HEAD
     public String getMysqlValueType() {
         return mysqlValueType;
     }
@@ -1541,5 +1644,13 @@ public int getMysqlDsMinIdleConnections() {
 
     public void setMysqlDsMinIdleConnections(int mysqlDsMinIdleConnections) {
         this.mysqlDsMinIdleConnections = mysqlDsMinIdleConnections;
+=======
+    public OpTimeMap testingGetSlowQueueingDelays() {
+        return this.testingSlowQueueingDelays;
+    }
+
+    public OpTimeMap testingGetSlowConcurrentDelays() {
+        return this.testingSlowConcurrentDelays;
+>>>>>>> 5a021db803dcd81e6eeb428aabf02052a9ceddcc
     }
 }
diff --git a/src/java/voldemort/server/VoldemortServer.java b/src/java/voldemort/server/VoldemortServer.java
index 05640baf74..fd22f46f6f 100644
--- a/src/java/voldemort/server/VoldemortServer.java
+++ b/src/java/voldemort/server/VoldemortServer.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2009 LinkedIn, Inc
+ * Copyright 2008-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -33,6 +33,10 @@
 import voldemort.client.protocol.admin.AdminClient;
 import voldemort.cluster.Cluster;
 import voldemort.cluster.Node;
+import voldemort.common.service.AbstractService;
+import voldemort.common.service.SchedulerService;
+import voldemort.common.service.ServiceType;
+import voldemort.common.service.VoldemortService;
 import voldemort.server.gossip.GossipService;
 import voldemort.server.http.HttpService;
 import voldemort.server.jmx.JmxService;
@@ -42,11 +46,11 @@
 import voldemort.server.protocol.admin.AsyncOperationService;
 import voldemort.server.rebalance.Rebalancer;
 import voldemort.server.rebalance.RebalancerService;
-import voldemort.server.scheduler.SchedulerService;
 import voldemort.server.socket.SocketService;
 import voldemort.server.storage.StorageService;
 import voldemort.store.configuration.ConfigurationStorageEngine;
 import voldemort.store.metadata.MetadataStore;
+import voldemort.utils.JNAUtils;
 import voldemort.utils.RebalanceUtils;
 import voldemort.utils.SystemTime;
 import voldemort.utils.Utils;
@@ -91,6 +95,7 @@ public VoldemortServer(VoldemortConfig config, Cluster cluster) {
         super(ServiceType.VOLDEMORT);
         this.voldemortConfig = config;
         this.identityNode = cluster.getNodeById(voldemortConfig.getNodeId());
+
         this.checkHostName();
         this.storeRepository = new StoreRepository();
         // update cluster details in metaDataStore
@@ -147,7 +152,8 @@ private List<VoldemortService> createServices() {
         /* Services are given in the order they must be started */
         List<VoldemortService> services = new ArrayList<VoldemortService>();
         SchedulerService scheduler = new SchedulerService(voldemortConfig.getSchedulerThreads(),
-                                                          SystemTime.INSTANCE);
+                                                          SystemTime.INSTANCE,
+                                                          voldemortConfig.canInterruptService());
         StorageService storageService = new StorageService(storeRepository,
                                                            metadata,
                                                            scheduler,
@@ -181,7 +187,8 @@ private List<VoldemortService> createServices() {
                                                   voldemortConfig.getSocketBufferSize(),
                                                   voldemortConfig.getNioConnectorSelectors(),
                                                   "nio-socket-server",
-                                                  voldemortConfig.isJmxEnabled()));
+                                                  voldemortConfig.isJmxEnabled(),
+                                                  voldemortConfig.getNioAcceptorBacklog()));
             } else {
                 logger.info("Using BIO Connector.");
                 services.add(new SocketService(socketRequestHandlerFactory,
@@ -214,13 +221,15 @@ private List<VoldemortService> createServices() {
                                                                                                      rebalancer);
 
             if(voldemortConfig.getUseNioConnector()) {
+
                 logger.info("Using NIO Connector for Admin Service.");
                 services.add(new NioSocketService(adminRequestHandlerFactory,
                                                   identityNode.getAdminPort(),
                                                   voldemortConfig.getAdminSocketBufferSize(),
                                                   voldemortConfig.getNioAdminConnectorSelectors(),
                                                   "admin-server",
-                                                  voldemortConfig.isJmxEnabled()));
+                                                  voldemortConfig.isJmxEnabled(),
+                                                  voldemortConfig.getNioAcceptorBacklog()));
             } else {
                 logger.info("Using BIO Connector for Admin Service.");
                 services.add(new SocketService(adminRequestHandlerFactory,
@@ -245,6 +254,8 @@ private List<VoldemortService> createServices() {
 
     @Override
     protected void startInner() throws VoldemortException {
+        // lock down jvm heap
+        JNAUtils.tryMlockall();
         logger.info("Starting " + services.size() + " services.");
         long start = System.currentTimeMillis();
         for(VoldemortService service: services)
@@ -277,6 +288,8 @@ protected void stopInner() throws VoldemortException {
 
         if(exceptions.size() > 0)
             throw exceptions.get(0);
+        // release lock of jvm heap
+        JNAUtils.tryMunlockall();
     }
 
     public static void main(String[] args) throws Exception {
@@ -350,4 +363,5 @@ public void restoreDataFromReplication(int numberOfParallelTransfers) {
             adminClient.stop();
         }
     }
+
 }
diff --git a/src/java/voldemort/server/gossip/GossipService.java b/src/java/voldemort/server/gossip/GossipService.java
index bb7602dba5..170ecdc5ac 100644
--- a/src/java/voldemort/server/gossip/GossipService.java
+++ b/src/java/voldemort/server/gossip/GossipService.java
@@ -18,10 +18,10 @@
 
 import voldemort.annotations.jmx.JmxManaged;
 import voldemort.client.protocol.admin.AdminClient;
-import voldemort.server.AbstractService;
-import voldemort.server.ServiceType;
+import voldemort.common.service.AbstractService;
+import voldemort.common.service.SchedulerService;
+import voldemort.common.service.ServiceType;
 import voldemort.server.VoldemortConfig;
-import voldemort.server.scheduler.SchedulerService;
 import voldemort.store.metadata.MetadataStore;
 import voldemort.utils.RebalanceUtils;
 
diff --git a/src/java/voldemort/server/http/HttpService.java b/src/java/voldemort/server/http/HttpService.java
index 75931d9a97..3c4229bd70 100644
--- a/src/java/voldemort/server/http/HttpService.java
+++ b/src/java/voldemort/server/http/HttpService.java
@@ -28,8 +28,8 @@
 import voldemort.annotations.jmx.JmxGetter;
 import voldemort.annotations.jmx.JmxManaged;
 import voldemort.client.protocol.RequestFormatType;
-import voldemort.server.AbstractService;
-import voldemort.server.ServiceType;
+import voldemort.common.service.AbstractService;
+import voldemort.common.service.ServiceType;
 import voldemort.server.StoreRepository;
 import voldemort.server.VoldemortServer;
 import voldemort.server.http.gui.AdminServlet;
diff --git a/src/java/voldemort/server/http/StoreServlet.java b/src/java/voldemort/server/http/StoreServlet.java
index e9ab6eb18d..0e561ef6b3 100644
--- a/src/java/voldemort/server/http/StoreServlet.java
+++ b/src/java/voldemort/server/http/StoreServlet.java
@@ -29,7 +29,7 @@
 
 import org.apache.log4j.Logger;
 
-import voldemort.server.ServiceType;
+import voldemort.common.service.ServiceType;
 import voldemort.server.VoldemortServer;
 import voldemort.server.protocol.RequestHandler;
 import voldemort.utils.Utils;
diff --git a/src/java/voldemort/server/http/gui/ReadOnlyStoreManagementServlet.java b/src/java/voldemort/server/http/gui/ReadOnlyStoreManagementServlet.java
index 088c80c367..d21094905f 100644
--- a/src/java/voldemort/server/http/gui/ReadOnlyStoreManagementServlet.java
+++ b/src/java/voldemort/server/http/gui/ReadOnlyStoreManagementServlet.java
@@ -30,7 +30,7 @@
 import org.apache.log4j.Logger;
 
 import voldemort.VoldemortException;
-import voldemort.server.ServiceType;
+import voldemort.common.service.ServiceType;
 import voldemort.server.VoldemortConfig;
 import voldemort.server.VoldemortServer;
 import voldemort.server.http.VoldemortServletContextListener;
diff --git a/src/java/voldemort/server/http/gui/StatusServlet.java b/src/java/voldemort/server/http/gui/StatusServlet.java
index 3235f32f5e..86d9fa0801 100644
--- a/src/java/voldemort/server/http/gui/StatusServlet.java
+++ b/src/java/voldemort/server/http/gui/StatusServlet.java
@@ -15,8 +15,8 @@
 import javax.servlet.http.HttpServletResponse;
 
 import voldemort.VoldemortException;
+import voldemort.common.service.ServiceType;
 import voldemort.server.AbstractSocketService;
-import voldemort.server.ServiceType;
 import voldemort.server.VoldemortServer;
 import voldemort.server.http.VoldemortServletContextListener;
 import voldemort.store.Store;
diff --git a/src/java/voldemort/server/jmx/JmxService.java b/src/java/voldemort/server/jmx/JmxService.java
index 9056027af5..7d31909516 100644
--- a/src/java/voldemort/server/jmx/JmxService.java
+++ b/src/java/voldemort/server/jmx/JmxService.java
@@ -30,14 +30,12 @@
 
 import voldemort.annotations.jmx.JmxManaged;
 import voldemort.cluster.Cluster;
-import voldemort.server.AbstractService;
-import voldemort.server.ServiceType;
+import voldemort.common.service.AbstractService;
+import voldemort.common.service.ServiceType;
+import voldemort.common.service.VoldemortService;
 import voldemort.server.StoreRepository;
 import voldemort.server.VoldemortServer;
-import voldemort.server.VoldemortService;
 import voldemort.store.Store;
-import voldemort.store.bdb.BdbStorageEngine;
-import voldemort.store.bdb.stats.BdbEnvironmentStats;
 import voldemort.utils.ByteArray;
 import voldemort.utils.JmxUtils;
 
@@ -88,13 +86,6 @@ protected void startInner() {
                 registerBean(store,
                              JmxUtils.createObjectName(JmxUtils.getPackageName(store.getClass()),
                                                        store.getName()));
-            if(store instanceof BdbStorageEngine) {
-                // Temp hack for now
-                BdbStorageEngine bdbStore = (BdbStorageEngine) store;
-                registerBean(bdbStore.getBdbEnvironmentStats(),
-                             JmxUtils.createObjectName(JmxUtils.getPackageName(BdbEnvironmentStats.class),
-                                                       store.getName()));
-            }
         }
     }
 
diff --git a/src/java/voldemort/server/niosocket/AsyncRequestHandler.java b/src/java/voldemort/server/niosocket/AsyncRequestHandler.java
index 55a65fd106..d6af05b642 100644
--- a/src/java/voldemort/server/niosocket/AsyncRequestHandler.java
+++ b/src/java/voldemort/server/niosocket/AsyncRequestHandler.java
@@ -25,6 +25,7 @@
 import java.nio.channels.Selector;
 import java.nio.channels.SocketChannel;
 
+import org.apache.commons.lang.mutable.MutableInt;
 import org.apache.log4j.Level;
 
 import voldemort.VoldemortException;
@@ -59,18 +60,27 @@ public class AsyncRequestHandler extends SelectorManagerWorker {
 
     private StreamRequestHandler streamRequestHandler;
 
+    private MutableInt serverConnectionCount;
+
     public AsyncRequestHandler(Selector selector,
                                SocketChannel socketChannel,
                                RequestHandlerFactory requestHandlerFactory,
-                               int socketBufferSize) {
+                               int socketBufferSize,
+                               MutableInt serverConnectionCount) {
         super(selector, socketChannel, socketBufferSize);
         this.requestHandlerFactory = requestHandlerFactory;
+        this.serverConnectionCount = serverConnectionCount;
     }
 
     @Override
     protected void read(SelectionKey selectionKey) throws IOException {
         int count = 0;
 
+        long startNs = -1;
+
+        if(logger.isDebugEnabled())
+            startNs = System.nanoTime();
+
         if((count = socketChannel.read(inputStream.getBuffer())) == -1)
             throw new EOFException("EOF for " + socketChannel.socket());
 
@@ -117,8 +127,19 @@ protected void read(SelectionKey selectionKey) throws IOException {
         if(logger.isTraceEnabled())
             logger.trace("Starting execution for " + socketChannel.socket());
 
-        streamRequestHandler = requestHandler.handleRequest(new DataInputStream(inputStream),
-                                                            new DataOutputStream(outputStream));
+        DataInputStream dataInputStream = new DataInputStream(inputStream);
+        DataOutputStream dataOutputStream = new DataOutputStream(outputStream);
+
+        streamRequestHandler = requestHandler.handleRequest(dataInputStream,
+							    dataOutputStream);
+
+        if(logger.isDebugEnabled()) {
+            logger.debug("AsyncRequestHandler:read finished request from "
+                         + socketChannel.socket().getRemoteSocketAddress() + " handlerRef: "
+                         + System.identityHashCode(dataInputStream) + " at time: "
+                         + System.currentTimeMillis() + " elapsed time: "
+                         + (System.nanoTime() - startNs) + " ns");
+        }
 
         if(streamRequestHandler != null) {
             // In the case of a StreamRequestHandler, we handle that separately
@@ -277,8 +298,23 @@ private StreamRequestHandlerState handleStreamRequestInternal(SelectionKey selec
             if(logger.isTraceEnabled())
                 traceInputBufferState("Before streaming request handler");
 
+            // this is the lowest level in the NioSocketServer stack at which we
+            // still have a reference to the client IP address and port
+            long startNs = -1;
+
+            if(logger.isDebugEnabled())
+                startNs = System.nanoTime();
+
             state = streamRequestHandler.handleRequest(dataInputStream, dataOutputStream);
 
+            if(logger.isDebugEnabled()) {
+                logger.debug("Handled request from "
+                             + socketChannel.socket().getRemoteSocketAddress() + " handlerRef: "
+                             + System.identityHashCode(dataInputStream) + " at time: "
+                             + System.currentTimeMillis() + " elapsed time: "
+                             + (System.nanoTime() - startNs) + " ns");
+            }
+
             if(logger.isTraceEnabled())
                 traceInputBufferState("After streaming request handler");
         } catch(Exception e) {
@@ -345,4 +381,12 @@ private boolean initRequestHandler(SelectionKey selectionKey) {
         }
     }
 
+    @Override
+    public void close() {
+        if(!isClosed.compareAndSet(false, true))
+            return;
+
+        serverConnectionCount.decrement();
+        closeInternal();
+    }
 }
diff --git a/src/java/voldemort/server/niosocket/NioSelectorManager.java b/src/java/voldemort/server/niosocket/NioSelectorManager.java
index 86ef6aef30..c24327336a 100644
--- a/src/java/voldemort/server/niosocket/NioSelectorManager.java
+++ b/src/java/voldemort/server/niosocket/NioSelectorManager.java
@@ -23,6 +23,7 @@
 import java.util.Queue;
 import java.util.concurrent.ConcurrentLinkedQueue;
 
+import org.apache.commons.lang.mutable.MutableInt;
 import org.apache.log4j.Level;
 
 import voldemort.server.protocol.RequestHandlerFactory;
@@ -99,6 +100,8 @@ public class NioSelectorManager extends SelectorManager {
 
     private final int socketBufferSize;
 
+    private MutableInt numActiveConnections;
+
     public NioSelectorManager(InetSocketAddress endpoint,
                               RequestHandlerFactory requestHandlerFactory,
                               int socketBufferSize) {
@@ -106,6 +109,7 @@ public NioSelectorManager(InetSocketAddress endpoint,
         this.socketChannelQueue = new ConcurrentLinkedQueue<SocketChannel>();
         this.requestHandlerFactory = requestHandlerFactory;
         this.socketBufferSize = socketBufferSize;
+        this.numActiveConnections = new MutableInt(0);
     }
 
     public void accept(SocketChannel socketChannel) {
@@ -155,10 +159,13 @@ protected void processEvents() {
                     AsyncRequestHandler attachment = new AsyncRequestHandler(selector,
                                                                              socketChannel,
                                                                              requestHandlerFactory,
-                                                                             socketBufferSize);
+                                                                             socketBufferSize,
+                                                                             numActiveConnections);
 
-                    if(!isClosed.get())
+                    if(!isClosed.get()) {
                         socketChannel.register(selector, SelectionKey.OP_READ, attachment);
+                        numActiveConnections.increment();
+                    }
                 } catch(ClosedSelectorException e) {
                     if(logger.isDebugEnabled())
                         logger.debug("Selector is closed, exiting");
@@ -177,4 +184,21 @@ protected void processEvents() {
         }
     }
 
+    /**
+     * Returns the number of active connections for this selector manager
+     * 
+     * @return
+     */
+    public Integer getNumActiveConnections() {
+        return numActiveConnections.toInteger();
+    }
+
+    /**
+     * Returns the number of connections queued for registration
+     * 
+     * @return
+     */
+    public Integer getNumQueuedConnections() {
+        return socketChannelQueue.size();
+    }
 }
diff --git a/src/java/voldemort/server/niosocket/NioSocketService.java b/src/java/voldemort/server/niosocket/NioSocketService.java
index 6f3b223d8a..9bc44ac0c8 100644
--- a/src/java/voldemort/server/niosocket/NioSocketService.java
+++ b/src/java/voldemort/server/niosocket/NioSocketService.java
@@ -31,8 +31,9 @@
 import org.apache.log4j.Logger;
 
 import voldemort.VoldemortException;
+import voldemort.annotations.jmx.JmxGetter;
+import voldemort.common.service.ServiceType;
 import voldemort.server.AbstractSocketService;
-import voldemort.server.ServiceType;
 import voldemort.server.StatusManager;
 import voldemort.server.protocol.RequestHandlerFactory;
 import voldemort.utils.DaemonThreadFactory;
@@ -71,6 +72,8 @@ public class NioSocketService extends AbstractSocketService {
 
     private final int socketBufferSize;
 
+    private final int acceptorBacklog;
+
     private final StatusManager statusManager;
 
     private final Thread acceptorThread;
@@ -82,10 +85,12 @@ public NioSocketService(RequestHandlerFactory requestHandlerFactory,
                             int socketBufferSize,
                             int selectors,
                             String serviceName,
-                            boolean enableJmx) {
+                            boolean enableJmx,
+                            int acceptorBacklog) {
         super(ServiceType.SOCKET, port, serviceName, enableJmx);
         this.requestHandlerFactory = requestHandlerFactory;
         this.socketBufferSize = socketBufferSize;
+        this.acceptorBacklog = acceptorBacklog;
 
         try {
             this.serverSocketChannel = ServerSocketChannel.open();
@@ -121,7 +126,7 @@ protected void startInner() {
                 selectorManagerThreadPool.execute(selectorManagers[i]);
             }
 
-            serverSocketChannel.socket().bind(endpoint);
+            serverSocketChannel.socket().bind(endpoint, acceptorBacklog);
             serverSocketChannel.socket().setReceiveBufferSize(socketBufferSize);
             serverSocketChannel.socket().setReuseAddress(true);
 
@@ -258,4 +263,22 @@ public void run() {
 
     }
 
+    @JmxGetter(name = "numActiveConnections", description = "total number of active connections across selector managers")
+    public final int getNumActiveConnections() {
+        int sum = 0;
+        for(NioSelectorManager manager: selectorManagers) {
+            sum += manager.getNumActiveConnections();
+        }
+        return sum;
+    }
+
+    @JmxGetter(name = "numQueuedConnections", description = "total number of connections pending for registration with selector managers")
+    public final int getNumQueuedConnections() {
+        int sum = 0;
+        for(NioSelectorManager manager: selectorManagers) {
+            sum += manager.getNumQueuedConnections();
+        }
+        return sum;
+    }
+
 }
diff --git a/src/java/voldemort/server/protocol/AbstractRequestHandler.java b/src/java/voldemort/server/protocol/AbstractRequestHandler.java
index b549a346b9..c9605abe7e 100644
--- a/src/java/voldemort/server/protocol/AbstractRequestHandler.java
+++ b/src/java/voldemort/server/protocol/AbstractRequestHandler.java
@@ -32,6 +32,7 @@ protected StoreRepository getStoreRepository() {
     }
 
     protected Store<ByteArray, byte[], byte[]> getStore(String name, RequestRoutingType type) {
+
         switch(type) {
             case ROUTED:
                 return storeRepository.getRoutedStore(name);
diff --git a/src/java/voldemort/server/protocol/admin/AdminServiceRequestHandler.java b/src/java/voldemort/server/protocol/admin/AdminServiceRequestHandler.java
index 0254877d93..079088b9a5 100644
--- a/src/java/voldemort/server/protocol/admin/AdminServiceRequestHandler.java
+++ b/src/java/voldemort/server/protocol/admin/AdminServiceRequestHandler.java
@@ -50,6 +50,7 @@
 import voldemort.store.ErrorCodeMapper;
 import voldemort.store.StorageEngine;
 import voldemort.store.StoreDefinition;
+import voldemort.store.StoreDefinitionBuilder;
 import voldemort.store.StoreOperationFailureException;
 import voldemort.store.backup.NativeBackupable;
 import voldemort.store.metadata.MetadataStore;
@@ -259,6 +260,10 @@ public StreamRequestHandler handleRequest(final DataInputStream inputStream,
             case NATIVE_BACKUP:
                 ProtoUtils.writeMessage(outputStream, handleNativeBackup(request.getNativeBackup()));
                 break;
+            case RESERVE_MEMORY:
+                ProtoUtils.writeMessage(outputStream,
+                                        handleReserveMemory(request.getReserveMemory()));
+                break;
             default:
                 throw new VoldemortException("Unkown operation " + request.getType());
         }
@@ -268,70 +273,75 @@ public StreamRequestHandler handleRequest(final DataInputStream inputStream,
 
     private VAdminProto.DeleteStoreRebalanceStateResponse handleDeleteStoreRebalanceState(VAdminProto.DeleteStoreRebalanceStateRequest request) {
         VAdminProto.DeleteStoreRebalanceStateResponse.Builder response = VAdminProto.DeleteStoreRebalanceStateResponse.newBuilder();
+        synchronized(rebalancer) {
+            try {
 
-        try {
+                int nodeId = request.getNodeId();
+                String storeName = request.getStoreName();
 
-            int nodeId = request.getNodeId();
-            String storeName = request.getStoreName();
-
-            logger.info("Removing rebalancing state for donor node " + nodeId + " and store "
-                        + storeName);
-            RebalancePartitionsInfo info = metadataStore.getRebalancerState().find(nodeId);
-            if(info == null) {
-                throw new VoldemortException("Could not find state for donor node " + nodeId);
-            }
+                logger.info("Removing rebalancing state for donor node " + nodeId + " and store "
+                            + storeName + " from stealer node " + metadataStore.getNodeId());
+                RebalancePartitionsInfo info = metadataStore.getRebalancerState().find(nodeId);
+                if(info == null) {
+                    throw new VoldemortException("Could not find state for donor node " + nodeId);
+                }
 
-            HashMap<Integer, List<Integer>> replicaToPartition = info.getReplicaToAddPartitionList(storeName);
-            if(replicaToPartition == null) {
-                throw new VoldemortException("Could not find state for donor node " + nodeId
-                                             + " and store " + storeName);
-            }
+                HashMap<Integer, List<Integer>> replicaToPartition = info.getReplicaToAddPartitionList(storeName);
+                if(replicaToPartition == null) {
+                    throw new VoldemortException("Could not find state for donor node " + nodeId
+                                                 + " and store " + storeName);
+                }
 
-            info.removeStore(storeName);
-            logger.info("Removed rebalancing state for donor node " + nodeId + " and store "
-                        + storeName);
+                info.removeStore(storeName);
+                logger.info("Removed rebalancing state for donor node " + nodeId + " and store "
+                            + storeName + " from stealer node " + metadataStore.getNodeId());
 
-            if(info.getUnbalancedStoreList().isEmpty()) {
-                metadataStore.deleteRebalancingState(info);
-                logger.info("Removed entire rebalancing state for donor node " + nodeId);
+                if(info.getUnbalancedStoreList().isEmpty()) {
+                    metadataStore.deleteRebalancingState(info);
+                    logger.info("Removed entire rebalancing state for donor node " + nodeId
+                                + " from stealer node " + metadataStore.getNodeId());
+                }
+            } catch(VoldemortException e) {
+                response.setError(ProtoUtils.encodeError(errorCodeMapper, e));
+                logger.error("handleDeleteStoreRebalanceState failed for request("
+                                     + request.toString() + ")",
+                             e);
             }
-        } catch(VoldemortException e) {
-            response.setError(ProtoUtils.encodeError(errorCodeMapper, e));
-            logger.error("handleDeleteStoreRebalanceState failed for request(" + request.toString()
-                         + ")", e);
         }
         return response.build();
     }
 
     public VAdminProto.RebalanceStateChangeResponse handleRebalanceStateChange(VAdminProto.RebalanceStateChangeRequest request) {
-
         VAdminProto.RebalanceStateChangeResponse.Builder response = VAdminProto.RebalanceStateChangeResponse.newBuilder();
 
-        try {
-            // Retrieve all values first
-            List<RebalancePartitionsInfo> rebalancePartitionsInfo = Lists.newArrayList();
-            for(RebalancePartitionInfoMap map: request.getRebalancePartitionInfoListList()) {
-                rebalancePartitionsInfo.add(ProtoUtils.decodeRebalancePartitionInfoMap(map));
-            }
-
-            Cluster cluster = new ClusterMapper().readCluster(new StringReader(request.getClusterString()));
-
-            boolean swapRO = request.getSwapRo();
-            boolean changeClusterMetadata = request.getChangeClusterMetadata();
-            boolean changeRebalanceState = request.getChangeRebalanceState();
-            boolean rollback = request.getRollback();
+        synchronized(rebalancer) {
+            try {
+                // Retrieve all values first
+                List<RebalancePartitionsInfo> rebalancePartitionsInfo = Lists.newArrayList();
+                for(RebalancePartitionInfoMap map: request.getRebalancePartitionInfoListList()) {
+                    rebalancePartitionsInfo.add(ProtoUtils.decodeRebalancePartitionInfoMap(map));
+                }
 
-            rebalancer.rebalanceStateChange(cluster,
-                                            rebalancePartitionsInfo,
-                                            swapRO,
-                                            changeClusterMetadata,
-                                            changeRebalanceState,
-                                            rollback);
-        } catch(VoldemortException e) {
-            response.setError(ProtoUtils.encodeError(errorCodeMapper, e));
-            logger.error("handleRebalanceStateChange failed for request(" + request.toString()
-                         + ")", e);
+                Cluster cluster = new ClusterMapper().readCluster(new StringReader(request.getClusterString()));
+
+                boolean swapRO = request.getSwapRo();
+                boolean changeClusterMetadata = request.getChangeClusterMetadata();
+                boolean changeRebalanceState = request.getChangeRebalanceState();
+                boolean rollback = request.getRollback();
+
+                rebalancer.rebalanceStateChange(cluster,
+                                                rebalancePartitionsInfo,
+                                                swapRO,
+                                                changeClusterMetadata,
+                                                changeRebalanceState,
+                                                rollback);
+            } catch(VoldemortException e) {
+                response.setError(ProtoUtils.encodeError(errorCodeMapper, e));
+                logger.error("handleRebalanceStateChange failed for request(" + request.toString()
+                             + ")", e);
+            }
         }
+
         return response.build();
     }
 
@@ -804,11 +814,9 @@ public void operate() {
                                 logger.info(message);
                             }
                         } catch(VoldemortException ve) {
-                            String errorMessage = "File fetcher failed for "
-                                                  + fetchUrl
-                                                  + " and store '"
-                                                  + storeName
-                                                  + "' due to too many push jobs happening at the same time.";
+                            String errorMessage = "File fetcher failed for " + fetchUrl
+                                                  + " and store '" + storeName + "' Reason: \n"
+                                                  + ve.getMessage();
                             updateStatus(errorMessage);
                             logger.error(errorMessage);
                             throw new VoldemortException(errorMessage);
@@ -1454,4 +1462,74 @@ public void stop() {
 
         return response.build();
     }
+
+    public VAdminProto.ReserveMemoryResponse handleReserveMemory(VAdminProto.ReserveMemoryRequest request) {
+        VAdminProto.ReserveMemoryResponse.Builder response = VAdminProto.ReserveMemoryResponse.newBuilder();
+
+        try {
+            String storeName = request.getStoreName();
+            long reserveMB = request.getSizeInMb();
+
+            synchronized(lock) {
+                if(storeRepository.hasLocalStore(storeName)) {
+
+                    logger.info("Setting memory foot print of store '" + storeName + "' to "
+                                + reserveMB + " MB");
+
+                    // update store's metadata (this also has the effect of
+                    // updating the stores.xml file)
+                    List<StoreDefinition> storeDefList = metadataStore.getStoreDefList();
+
+                    for(int i = 0; i < storeDefList.size(); i++) {
+                        StoreDefinition storeDef = storeDefList.get(i);
+                        if(!storeDef.isView() && storeDef.getName().equals(storeName)) {
+                            StoreDefinition newStoreDef = new StoreDefinitionBuilder().setName(storeDef.getName())
+                                                                                      .setType(storeDef.getType())
+                                                                                      .setDescription(storeDef.getDescription())
+                                                                                      .setOwners(storeDef.getOwners())
+                                                                                      .setKeySerializer(storeDef.getKeySerializer())
+                                                                                      .setValueSerializer(storeDef.getValueSerializer())
+                                                                                      .setRoutingPolicy(storeDef.getRoutingPolicy())
+                                                                                      .setRoutingStrategyType(storeDef.getRoutingStrategyType())
+                                                                                      .setReplicationFactor(storeDef.getReplicationFactor())
+                                                                                      .setPreferredReads(storeDef.getPreferredReads())
+                                                                                      .setRequiredReads(storeDef.getRequiredReads())
+                                                                                      .setPreferredWrites(storeDef.getPreferredWrites())
+                                                                                      .setRequiredWrites(storeDef.getRequiredWrites())
+                                                                                      .setRetentionPeriodDays(storeDef.getRetentionDays())
+                                                                                      .setRetentionScanThrottleRate(storeDef.getRetentionScanThrottleRate())
+                                                                                      .setZoneReplicationFactor(storeDef.getZoneReplicationFactor())
+                                                                                      .setZoneCountReads(storeDef.getZoneCountReads())
+                                                                                      .setZoneCountWrites(storeDef.getZoneCountWrites())
+                                                                                      .setHintedHandoffStrategy(storeDef.getHintedHandoffStrategyType())
+                                                                                      .setHintPrefListSize(storeDef.getHintPrefListSize())
+                                                                                      .setMemoryFootprintMB(reserveMB)
+                                                                                      .build();
+
+                            storeDefList.set(i, newStoreDef);
+                            storageService.updateStore(newStoreDef);
+                            break;
+                        }
+                    }
+
+                    // save the changes
+                    try {
+                        metadataStore.put(MetadataStore.STORES_KEY, storeDefList);
+                    } catch(Exception e) {
+                        throw new VoldemortException(e);
+                    }
+
+                } else {
+                    logger.error("Failure to reserve memory. Store '" + storeName
+                                 + "' does not exist");
+                    throw new StoreOperationFailureException(String.format("Store '%s' does not exist on this server",
+                                                                           storeName));
+                }
+            }
+        } catch(VoldemortException e) {
+            response.setError(ProtoUtils.encodeError(errorCodeMapper, e));
+            logger.error("handleReserveMemory failed for request(" + request.toString() + ")", e);
+        }
+        return response.build();
+    }
 }
diff --git a/src/java/voldemort/server/protocol/admin/AsyncOperationService.java b/src/java/voldemort/server/protocol/admin/AsyncOperationService.java
index 521ad2ffc8..de0ad16603 100644
--- a/src/java/voldemort/server/protocol/admin/AsyncOperationService.java
+++ b/src/java/voldemort/server/protocol/admin/AsyncOperationService.java
@@ -25,9 +25,9 @@
 import voldemort.VoldemortException;
 import voldemort.annotations.jmx.JmxManaged;
 import voldemort.annotations.jmx.JmxOperation;
-import voldemort.server.AbstractService;
-import voldemort.server.ServiceType;
-import voldemort.server.scheduler.SchedulerService;
+import voldemort.common.service.AbstractService;
+import voldemort.common.service.SchedulerService;
+import voldemort.common.service.ServiceType;
 
 /**
  * Asynchronous job scheduler for admin service operations.
diff --git a/src/java/voldemort/server/protocol/admin/FetchStreamRequestHandler.java b/src/java/voldemort/server/protocol/admin/FetchStreamRequestHandler.java
index b73eda56ca..177fb5db95 100644
--- a/src/java/voldemort/server/protocol/admin/FetchStreamRequestHandler.java
+++ b/src/java/voldemort/server/protocol/admin/FetchStreamRequestHandler.java
@@ -23,6 +23,7 @@
 import voldemort.store.metadata.MetadataStore;
 import voldemort.store.stats.StreamStats;
 import voldemort.store.stats.StreamStats.Handle;
+import voldemort.store.system.SystemStoreConstants;
 import voldemort.utils.ByteArray;
 import voldemort.utils.ClosableIterator;
 import voldemort.utils.EventThrottler;
@@ -81,7 +82,7 @@ protected FetchStreamRequestHandler(VAdminProto.FetchPartitionEntriesRequest req
         this.handle = stats.makeHandle(operation, replicaToPartitionList);
         this.storageEngine = AdminServiceRequestHandler.getStorageEngine(storeRepository,
                                                                          request.getStore());
-        this.storeDef = metadataStore.getStoreDef(request.getStore());
+        this.storeDef = getStoreDef(request.getStore(), metadataStore);
         if(request.hasInitialCluster()) {
             this.initialCluster = new ClusterMapper().readCluster(new StringReader(request.getInitialCluster()));
         } else {
@@ -105,6 +106,16 @@ protected FetchStreamRequestHandler(VAdminProto.FetchPartitionEntriesRequest req
         }
     }
 
+    private StoreDefinition getStoreDef(String store, MetadataStore metadataStore) {
+        StoreDefinition def = null;
+        if(SystemStoreConstants.isSystemStore(store)) {
+            def = SystemStoreConstants.getSystemStoreDef(store);
+        } else {
+            def = metadataStore.getStoreDef(request.getStore());
+        }
+        return def;
+    }
+
     public final StreamRequestDirection getDirection() {
         return StreamRequestDirection.WRITING;
     }
diff --git a/src/java/voldemort/server/protocol/vold/VoldemortNativeRequestHandler.java b/src/java/voldemort/server/protocol/vold/VoldemortNativeRequestHandler.java
index b552a73b77..cadce8b8bb 100644
--- a/src/java/voldemort/server/protocol/vold/VoldemortNativeRequestHandler.java
+++ b/src/java/voldemort/server/protocol/vold/VoldemortNativeRequestHandler.java
@@ -12,7 +12,7 @@
 import org.apache.log4j.Logger;
 
 import voldemort.VoldemortException;
-import voldemort.serialization.VoldemortOpCode;
+import voldemort.common.VoldemortOpCode;
 import voldemort.server.RequestRoutingType;
 import voldemort.server.StoreRepository;
 import voldemort.server.protocol.AbstractRequestHandler;
@@ -101,6 +101,14 @@ private RequestRoutingType getRoutingType(DataInputStream inputStream) throws IO
     private void handleGetVersion(DataInputStream inputStream,
                                   DataOutputStream outputStream,
                                   Store<ByteArray, byte[], byte[]> store) throws IOException {
+        long startTimeMs = -1;
+        long startTimeNs = -1;
+
+        if(logger.isDebugEnabled()) {
+            startTimeMs = System.currentTimeMillis();
+            startTimeNs = System.nanoTime();
+        }
+
         ByteArray key = readKey(inputStream);
         List<Version> results = null;
         try {
@@ -112,11 +120,26 @@ private void handleGetVersion(DataInputStream inputStream,
             return;
         }
         outputStream.writeInt(results.size());
+
+        String clockStr = "";
+
         for(Version v: results) {
             byte[] clock = ((VectorClock) v).toBytes();
+
+            if(logger.isDebugEnabled())
+                clockStr += clock + " ";
+
             outputStream.writeInt(clock.length);
             outputStream.write(clock);
         }
+
+        if(logger.isDebugEnabled()) {
+            logger.debug("GETVERSIONS started at: " + startTimeMs + " handlerRef: "
+                         + System.identityHashCode(inputStream) + " key: "
+                         + ByteUtils.toHexString(key.get()) + " "
+                         + (System.nanoTime() - startTimeNs) + " ns, keySize: " + key.length()
+                         + "clocks: " + clockStr);
+        }
     }
 
     /**
@@ -269,6 +292,14 @@ private void writeResults(DataOutputStream outputStream, List<Versioned<byte[]>>
     private void handleGet(DataInputStream inputStream,
                            DataOutputStream outputStream,
                            Store<ByteArray, byte[], byte[]> store) throws IOException {
+        long startTimeMs = -1;
+        long startTimeNs = -1;
+
+        if(logger.isDebugEnabled()) {
+            startTimeMs = System.currentTimeMillis();
+            startTimeNs = System.nanoTime();
+        }
+
         ByteArray key = readKey(inputStream);
 
         byte[] transforms = null;
@@ -286,11 +317,22 @@ private void handleGet(DataInputStream inputStream,
             return;
         }
         writeResults(outputStream, results);
+        if(logger.isDebugEnabled()) {
+            debugLogReturnValue(inputStream, key, results, startTimeMs, startTimeNs, "GET");
+        }
     }
 
     private void handleGetAll(DataInputStream inputStream,
                               DataOutputStream outputStream,
                               Store<ByteArray, byte[], byte[]> store) throws IOException {
+        long startTimeMs = -1;
+        long startTimeNs = -1;
+
+        if(logger.isDebugEnabled()) {
+            startTimeMs = System.currentTimeMillis();
+            startTimeNs = System.nanoTime();
+        }
+
         // read keys
         int numKeys = inputStream.readInt();
         List<ByteArray> keys = new ArrayList<ByteArray>(numKeys);
@@ -321,18 +363,72 @@ private void handleGetAll(DataInputStream inputStream,
 
         // write back the results
         outputStream.writeInt(results.size());
+
+        if(logger.isDebugEnabled())
+            logger.debug("GETALL start");
+
         for(Map.Entry<ByteArray, List<Versioned<byte[]>>> entry: results.entrySet()) {
             // write the key
             outputStream.writeInt(entry.getKey().length());
             outputStream.write(entry.getKey().get());
             // write the values
             writeResults(outputStream, entry.getValue());
+
+            if(logger.isDebugEnabled()) {
+                debugLogReturnValue(inputStream,
+                                    entry.getKey(),
+                                    entry.getValue(),
+                                    startTimeMs,
+                                    startTimeNs,
+                                    "GETALL");
+            }
         }
+
+        if(logger.isDebugEnabled())
+            logger.debug("GETALL end");
+    }
+
+    private void debugLogReturnValue(DataInputStream input,
+                                     ByteArray key,
+                                     List<Versioned<byte[]>> values,
+                                     long startTimeMs,
+                                     long startTimeNs,
+                                     String getType) {
+        long totalValueSize = 0;
+        String valueSizeStr = "[";
+        String valueHashStr = "[";
+        String versionsStr = "[";
+        for(Versioned<byte[]> b: values) {
+            int len = b.getValue().length;
+            totalValueSize += len;
+            valueSizeStr += len + ",";
+            valueHashStr += b.hashCode() + ",";
+            versionsStr += b.getVersion();
+        }
+        valueSizeStr += "]";
+        valueHashStr += "]";
+        versionsStr += "]";
+
+        logger.debug(getType + " handlerRef: " + System.identityHashCode(input) + " start time: "
+                     + startTimeMs + " key: " + ByteUtils.toHexString(key.get())
+                     + " elapsed time: " + (System.nanoTime() - startTimeNs) + " ns, keySize: "
+                     + key.length() + " numResults: " + values.size() + " totalResultSize: "
+                     + totalValueSize + " resultSizes: " + valueSizeStr + " resultHashes: "
+                     + valueHashStr + " versions: " + versionsStr + " current time: "
+                     + System.currentTimeMillis());
     }
 
     private void handlePut(DataInputStream inputStream,
                            DataOutputStream outputStream,
                            Store<ByteArray, byte[], byte[]> store) throws IOException {
+        long startTimeMs = -1;
+        long startTimeNs = -1;
+
+        if(logger.isDebugEnabled()) {
+            startTimeMs = System.currentTimeMillis();
+            startTimeNs = System.nanoTime();
+        }
+
         ByteArray key = readKey(inputStream);
         int valueSize = inputStream.readInt();
         byte[] bytes = new byte[valueSize];
@@ -352,11 +448,29 @@ private void handlePut(DataInputStream inputStream,
         } catch(VoldemortException e) {
             writeException(outputStream, e);
         }
+
+        if(logger.isDebugEnabled()) {
+            logger.debug("PUT started at: " + startTimeMs + " handlerRef: "
+                         + System.identityHashCode(inputStream) + " key: "
+                         + ByteUtils.toHexString(key.get()) + " "
+                         + (System.nanoTime() - startTimeNs) + " ns, keySize: " + key.length()
+                         + " valueHash: " + value.hashCode() + " valueSize: " + value.length
+                         + " clockSize: " + clock.sizeInBytes() + " time: "
+                         + System.currentTimeMillis());
+        }
     }
 
     private void handleDelete(DataInputStream inputStream,
                               DataOutputStream outputStream,
                               Store<ByteArray, byte[], byte[]> store) throws IOException {
+        long startTimeMs = -1;
+        long startTimeNs = -1;
+
+        if(logger.isDebugEnabled()) {
+            startTimeMs = System.currentTimeMillis();
+            startTimeNs = System.nanoTime();
+        }
+
         ByteArray key = readKey(inputStream);
         int versionSize = inputStream.readShort();
         byte[] versionBytes = new byte[versionSize];
@@ -369,6 +483,14 @@ private void handleDelete(DataInputStream inputStream,
         } catch(VoldemortException e) {
             writeException(outputStream, e);
         }
+
+        if(logger.isDebugEnabled()) {
+            logger.debug("DELETE started at: " + startTimeMs + " key: "
+                         + ByteUtils.toHexString(key.get()) + " handlerRef: "
+                         + System.identityHashCode(inputStream) + " time: "
+                         + (System.nanoTime() - startTimeNs) + " ns, keySize: " + key.length()
+                         + " clockSize: " + version.sizeInBytes());
+        }
     }
 
     private void writeException(DataOutputStream stream, VoldemortException e) throws IOException {
diff --git a/src/java/voldemort/server/rebalance/Rebalancer.java b/src/java/voldemort/server/rebalance/Rebalancer.java
index 96e412fbbb..13425f45c0 100644
--- a/src/java/voldemort/server/rebalance/Rebalancer.java
+++ b/src/java/voldemort/server/rebalance/Rebalancer.java
@@ -350,7 +350,7 @@ public int rebalanceNodeOnDonor(final List<RebalancePartitionsInfo> stealInfos)
                 // Get a lock for the stealer node
                 if(!acquireRebalancingPermit(stealerNodeId)) {
                     throw new VoldemortException("Node " + metadataStore.getNodeId()
-                                                 + " is already trying to steal from "
+                                                 + " is already trying to push to stealer node "
                                                  + stealerNodeId);
                 }
 
diff --git a/src/java/voldemort/server/rebalance/RebalancerService.java b/src/java/voldemort/server/rebalance/RebalancerService.java
index 1c5b6791a8..caa590241a 100644
--- a/src/java/voldemort/server/rebalance/RebalancerService.java
+++ b/src/java/voldemort/server/rebalance/RebalancerService.java
@@ -1,12 +1,12 @@
 package voldemort.server.rebalance;
 
 import voldemort.annotations.jmx.JmxManaged;
-import voldemort.server.AbstractService;
-import voldemort.server.ServiceType;
+import voldemort.common.service.AbstractService;
+import voldemort.common.service.SchedulerService;
+import voldemort.common.service.ServiceType;
 import voldemort.server.StoreRepository;
 import voldemort.server.VoldemortConfig;
 import voldemort.server.protocol.admin.AsyncOperationService;
-import voldemort.server.scheduler.SchedulerService;
 import voldemort.store.metadata.MetadataStore;
 
 /**
diff --git a/src/java/voldemort/server/rebalance/async/DonorBasedRebalanceAsyncOperation.java b/src/java/voldemort/server/rebalance/async/DonorBasedRebalanceAsyncOperation.java
index b15882a635..8f6a595bb9 100644
--- a/src/java/voldemort/server/rebalance/async/DonorBasedRebalanceAsyncOperation.java
+++ b/src/java/voldemort/server/rebalance/async/DonorBasedRebalanceAsyncOperation.java
@@ -21,11 +21,15 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Map.Entry;
 import java.util.Set;
+import java.util.Map.Entry;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 import java.util.concurrent.SynchronousQueue;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import voldemort.VoldemortException;
@@ -34,7 +38,6 @@
 import voldemort.cluster.Cluster;
 import voldemort.server.StoreRepository;
 import voldemort.server.VoldemortConfig;
-import voldemort.server.protocol.admin.AsyncOperationService;
 import voldemort.server.rebalance.Rebalancer;
 import voldemort.server.rebalance.VoldemortRebalancingException;
 import voldemort.store.StorageEngine;
@@ -59,9 +62,12 @@
  */
 public class DonorBasedRebalanceAsyncOperation extends RebalanceAsyncOperation {
 
-    public final static Pair<ByteArray, Versioned<byte[]>> END = Pair.create(null, null);
+    public static final Pair<ByteArray, Versioned<byte[]>> END = Pair.create(new ByteArray("END".getBytes()),
+                                                                             new Versioned<byte[]>("END".getBytes()));
+    public static final Pair<ByteArray, Versioned<byte[]>> BREAK = Pair.create(new ByteArray("BREAK".getBytes()),
+                                                                               new Versioned<byte[]>("BREAK".getBytes()));
 
-    // Batch 500 entries for each fetchUpdate call.
+    // Batch 1000 entries for each fetchUpdate call.
     private static final int FETCHUPDATE_BATCH_SIZE = 1000;
     // Print scanned entries every 100k
     private static final int SCAN_PROGRESS_COUNT = 100000;
@@ -75,8 +81,9 @@ public class DonorBasedRebalanceAsyncOperation extends RebalanceAsyncOperation {
 
     private final HashMultimap<String, Pair<Integer, HashMap<Integer, List<Integer>>>> storeToNodePartitionMapping;
 
-    private final AsyncOperationService pushSlavesExecutor;
-    private Map<String, List<DonorBasedRebalancePusherSlave>> updatePushSlavePool;
+    // each table being rebalanced is associated with one executor service and a
+    // pool of threads
+    private Map<String, Pair<ExecutorService, List<DonorBasedRebalancePusherSlave>>> updatePushSlavePool;
 
     private HashMultimap<String, Pair<Integer, HashMap<Integer, List<Integer>>>> groupByStores(List<RebalancePartitionsInfo> stealInfos) {
 
@@ -106,10 +113,7 @@ public DonorBasedRebalanceAsyncOperation(Rebalancer rebalancer,
 
         // Group the plans by the store names
         this.storeToNodePartitionMapping = groupByStores(stealInfos);
-
-        pushSlavesExecutor = rebalancer.getAsyncOperationService();
-
-        updatePushSlavePool = Collections.synchronizedMap(new HashMap<String, List<DonorBasedRebalancePusherSlave>>());
+        updatePushSlavePool = Collections.synchronizedMap(new HashMap<String, Pair<ExecutorService, List<DonorBasedRebalancePusherSlave>>>());
     }
 
     @Override
@@ -228,7 +232,17 @@ private void rebalanceStore(final String storeName,
         StorageEngine<ByteArray, byte[], byte[]> storageEngine = storeRepository.getStorageEngine(storeName);
         StoreDefinition storeDef = metadataStore.getStoreDef(storeName);
         List<DonorBasedRebalancePusherSlave> storePushSlaves = Lists.newArrayList();
-        updatePushSlavePool.put(storeName, storePushSlaves);
+        ExecutorService pushSlavesExecutor = Executors.newCachedThreadPool(new ThreadFactory() {
+
+            public Thread newThread(Runnable r) {
+                Thread thread = new Thread(r);
+                thread.setName(r.getClass().getName());
+                return thread;
+            }
+        });
+        updatePushSlavePool.put(storeName,
+                                new Pair<ExecutorService, List<DonorBasedRebalancePusherSlave>>(pushSlavesExecutor,
+                                                                                                storePushSlaves));
 
         if(isReadOnlyStore) {
 
@@ -268,18 +282,15 @@ private void rebalanceStore(final String storeName,
                 final SynchronousQueue<Pair<ByteArray, Versioned<byte[]>>> queue = new SynchronousQueue<Pair<ByteArray, Versioned<byte[]>>>();
                 nodeToQueue.put(tuple.getFirst(), queue);
 
-                int jobId = pushSlavesExecutor.getUniqueRequestId();
                 String jobName = "DonorBasedRebalancePusherSlave for store " + storeName
                                  + " on node " + tuple.getFirst();
-                DonorBasedRebalancePusherSlave updatePushSlave = new DonorBasedRebalancePusherSlave(jobId,
-                                                                                                    jobName,
-                                                                                                    tuple.getFirst(),
+                DonorBasedRebalancePusherSlave updatePushSlave = new DonorBasedRebalancePusherSlave(tuple.getFirst(),
                                                                                                     queue,
                                                                                                     storeName,
                                                                                                     adminClient);
                 storePushSlaves.add(updatePushSlave);
-                pushSlavesExecutor.submitOperation(jobId, updatePushSlave);
-                logger.info("Submitted donor-based pusher job: id=" + jobId + " name=" + jobName);
+                pushSlavesExecutor.execute(updatePushSlave);
+                logger.info("Started a thread for " + jobName);
             }
 
             fetchEntriesForStealers(storageEngine,
@@ -320,10 +331,10 @@ private void fetchEntriesForStealers(StorageEngine<ByteArray, byte[], byte[]> st
                     printProgress(scanned, fetched, startTime, storeName);
                 }
             }
-            terminateAllSlaves(updatePushSlavePool.get(storeName));
+            terminateAllSlaves(storeName);
         } catch(InterruptedException e) {
             logger.info("InterruptedException received while sending entries to remote nodes, the process is terminating...");
-            terminateAllSlavesAsync(updatePushSlavePool.get(storeName));
+            terminateAllSlavesAsync(storeName);
         } finally {
             close(keys, storeName, scanned, fetched, startTime);
         }
@@ -339,7 +350,7 @@ private void putAll(List<Integer> dests,
                 fetched[nodeId]++;
                 nodeToQueue.get(nodeId).put(Pair.create(key, value));
                 if(0 == fetched[nodeId] % FETCHUPDATE_BATCH_SIZE) {
-                    nodeToQueue.get(nodeId).put(END);
+                    nodeToQueue.get(nodeId).put(BREAK);
                 }
             }
         }
@@ -364,25 +375,51 @@ private void close(ClosableIterator<ByteArray> keys,
             keys.close();
     }
 
-    private void terminateAllSlaves(List<DonorBasedRebalancePusherSlave> updatePushSlavePool) {
+    private void terminateAllSlaves(String storeName) {
         // Everything is done, put the terminator in
         logger.info("Terminating DonorBasedRebalancePushSlaves...");
-        for(Iterator<DonorBasedRebalancePusherSlave> it = updatePushSlavePool.iterator(); it.hasNext();) {
+        ExecutorService pushSlavesExecutor = updatePushSlavePool.get(storeName).getFirst();
+        List<DonorBasedRebalancePusherSlave> pushSlaves = updatePushSlavePool.get(storeName)
+                                                                             .getSecond();
+        for(Iterator<DonorBasedRebalancePusherSlave> it = pushSlaves.iterator(); it.hasNext();) {
             it.next().requestCompletion();
         }
 
-        // wait for all async slave to finish
-        for(Iterator<DonorBasedRebalancePusherSlave> it = updatePushSlavePool.iterator(); it.hasNext();) {
-            it.next().waitCompletion();
+        // signal and wait for all slaves to finish
+        pushSlavesExecutor.shutdown();
+        try {
+            if(pushSlavesExecutor.awaitTermination(30, TimeUnit.MINUTES)) {
+                logger.info("All DonorBasedRebalancePushSlaves terminated successfully.");
+            } else {
+                logger.warn("Timed out while waiting for pusher slaves to shutdown!!!");
+            }
+        } catch(InterruptedException e) {
+            logger.warn("Interrupted while waiting for pusher slaves to shutdown!!!");
         }
-        logger.info("All DonorBasedRebalancePushSlaves terminated successfully.");
+        logger.info("DonorBasedRebalancingOperation existed.");
     }
 
-    private void terminateAllSlavesAsync(List<DonorBasedRebalancePusherSlave> updatePushSlavePool) {
-        logger.info("Terminating DonorBasedRebalancePushSlaves asynchronously");
-        for(Iterator<DonorBasedRebalancePusherSlave> it = updatePushSlavePool.iterator(); it.hasNext();) {
-            it.next().setCompletion();
+    private void terminateAllSlavesAsync(String storeName) {
+        logger.info("Terminating DonorBasedRebalancePushSlaves asynchronously.");
+        if(null == storeName) {
+            for(Pair<ExecutorService, List<DonorBasedRebalancePusherSlave>> pair: updatePushSlavePool.values()) {
+                ExecutorService pushSlavesExecutor = pair.getFirst();
+                List<DonorBasedRebalancePusherSlave> pushSlaves = pair.getSecond();
+                for(Iterator<DonorBasedRebalancePusherSlave> it = pushSlaves.iterator(); it.hasNext();) {
+                    it.next().requestCompletion();
+                }
+                pushSlavesExecutor.shutdownNow();
+            }
+        } else {
+            ExecutorService pushSlavesExecutor = updatePushSlavePool.get(storeName).getFirst();
+            List<DonorBasedRebalancePusherSlave> pushSlaves = updatePushSlavePool.get(storeName)
+                                                                                 .getSecond();
+            for(Iterator<DonorBasedRebalancePusherSlave> it = pushSlaves.iterator(); it.hasNext();) {
+                it.next().requestCompletion();
+            }
+            pushSlavesExecutor.shutdownNow();
         }
+        logger.info("DonorBasedRebalancingAsyncOperation existed.");
     }
 
     @Override
@@ -390,9 +427,7 @@ public void stop() {
         running.set(false);
         updateStatus(getHeader(stealInfos) + "Stop called on donor-based rebalance operation");
         logger.info(getHeader(stealInfos) + "Stop called on donor-based rebalance operation");
-        for(List<DonorBasedRebalancePusherSlave> storePushSlaves: updatePushSlavePool.values()) {
-            terminateAllSlavesAsync(storePushSlaves);
-        }
+        terminateAllSlavesAsync(null);
         executors.shutdownNow();
     }
-}
+}
\ No newline at end of file
diff --git a/src/java/voldemort/server/rebalance/async/DonorBasedRebalancePusherSlave.java b/src/java/voldemort/server/rebalance/async/DonorBasedRebalancePusherSlave.java
index fddedbed9c..240205d06c 100644
--- a/src/java/voldemort/server/rebalance/async/DonorBasedRebalancePusherSlave.java
+++ b/src/java/voldemort/server/rebalance/async/DonorBasedRebalancePusherSlave.java
@@ -4,13 +4,11 @@
 import java.util.ArrayList;
 import java.util.NoSuchElementException;
 import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.log4j.Logger;
 
 import voldemort.VoldemortException;
 import voldemort.client.protocol.admin.AdminClient;
-import voldemort.server.protocol.admin.AsyncOperation;
 import voldemort.utils.ByteArray;
 import voldemort.utils.ClosableIterator;
 import voldemort.utils.Pair;
@@ -18,7 +16,7 @@
 
 import com.google.common.collect.Lists;
 
-public class DonorBasedRebalancePusherSlave extends AsyncOperation {
+public class DonorBasedRebalancePusherSlave implements Runnable {
 
     protected final static Logger logger = Logger.getLogger(DonorBasedRebalancePusherSlave.class);
 
@@ -26,28 +24,24 @@ public class DonorBasedRebalancePusherSlave extends AsyncOperation {
     private BlockingQueue<Pair<ByteArray, Versioned<byte[]>>> queue;
     private String storeName;
     private AdminClient adminClient;
-    private ResumableIterator<Pair<ByteArray, Versioned<byte[]>>> nodeIterator = new ResumableIterator<Pair<ByteArray, Versioned<byte[]>>>();
-    private AtomicBoolean stopRequested;
+    private ResumableIterator<Pair<ByteArray, Versioned<byte[]>>> nodeIterator;
 
-    public DonorBasedRebalancePusherSlave(int id,
-                                          String description,
-                                          int nodeId,
+    public DonorBasedRebalancePusherSlave(int nodeId,
                                           BlockingQueue<Pair<ByteArray, Versioned<byte[]>>> queue,
                                           String storeName,
                                           AdminClient adminClient) {
-        super(id, description);
         this.nodeId = nodeId;
         this.queue = queue;
         this.storeName = storeName;
         this.adminClient = adminClient;
-        this.stopRequested = new AtomicBoolean(false);
+        nodeIterator = new ResumableIterator<Pair<ByteArray, Versioned<byte[]>>>();
     }
 
-    @Override
-    public void operate() throws Exception {
+    public void run() throws VoldemortException {
+        boolean needWait = false;
         logger.info("DonorBasedRebalancePusherSlave begains to send partitions for store "
                     + storeName + " to node " + nodeId);
-        while(!isStopRequest()) {
+        while(!nodeIterator.done) {
             try {
                 nodeIterator.reset();
                 adminClient.updateEntries(nodeId, storeName, nodeIterator, null);
@@ -61,41 +55,38 @@ public void operate() throws Exception {
                                  + " to remote node " + nodeId
                                  + ". Will retry again after 5 minutes");
                     logger.error(e.getCause());
-                    Thread.sleep(30000);
+                    needWait = true;
                 } else {
                     throw e;
                 }
             }
+
+            if(needWait) {
+                try {
+                    // sleep for 5 minutes if exception occur while communicate
+                    // with remote node
+                    logger.info("waiting for 5 minutes for the remote node to recover");
+                    Thread.sleep(30000);
+                    needWait = false;
+                } catch(InterruptedException e) {
+                    // continue
+                }
+            }
         }
-        setCompletion();
+
         logger.info("DonorBasedRebalancePusherSlave finished sending partitions for store "
                     + storeName + " to node " + nodeId);
     }
 
-    public void setStopRequest() {
-        stopRequested.set(true);
-    }
-
-    public boolean isStopRequest() {
-        return stopRequested.get();
-    }
-
-    @Override
-    public void stop() {
-        requestCompletion();
-    }
-
     /**
-     * This function will set the request for stop first; Then insert 'END' into
-     * the queue so slave will return from updateEntries. Noted that this order
-     * shall not be changed or the slave will enter updateEntries again.
+     * This function inserts 'END' into the queue so slave will return from
+     * updateEntries.
      * 
      * @param immediateTerminate
      * @param notifySlave
      */
-    public synchronized void requestCompletion() {
+    public void requestCompletion() {
         try {
-            setStopRequest();
             queue.put(DonorBasedRebalanceAsyncOperation.END);
         } catch(InterruptedException e) {
             logger.info("Unable to send termination message to pusher slave for node " + nodeId
@@ -103,28 +94,10 @@ public synchronized void requestCompletion() {
         }
     }
 
-    public synchronized void setCompletion() {
-        getStatus().setComplete(true);
-        notifyAll();
-    }
-
-    public synchronized void waitCompletion() {
-        while(!getStatus().isComplete()) {
-            try {
-                logger.info("Waiting for the completion, with 10s timeout, of pusher slave for "
-                            + getStatus().getDescription() + " with id=" + getStatus().getId());
-                // check for status every 10 seconds
-                wait(10000);
-            } catch(InterruptedException e) {
-                logger.info("Existing wait loop due to interrupt.");
-                break;
-            }
-        }
-    }
-
     // It will always Iterator through 'tentativeList' before iterating 'queue'
     class ResumableIterator<T> implements ClosableIterator<Pair<ByteArray, Versioned<byte[]>>> {
 
+        private boolean done = false;
         private boolean recoveryModeOn = false;
         private int recoveryPosition = 0;
         private Pair<ByteArray, Versioned<byte[]>> currentElem = null;
@@ -153,19 +126,30 @@ public void reset() {
             this.currentElem = null;
         }
 
-        // return when something is available, blocked otherwise
         public boolean hasNext() {
             boolean hasNext = false;
-            while(null == currentElem) {
-                try {
-                    currentElem = getNextElem();
-                } catch(InterruptedException e) {
-                    logger.info("hasNext is interrupted while waiting for the next elem, existing...");
-                    break;
+            if(!done) {
+                while(null == currentElem) {
+                    try {
+                        currentElem = getNextElem();
+                    } catch(InterruptedException e) {
+                        logger.info("hasNext is interrupted while waiting for the next elem, existing...");
+                        break;
+                    }
+                }
+
+                // regular event
+                if(null != currentElem
+                   && !currentElem.equals(DonorBasedRebalanceAsyncOperation.END)
+                   && !currentElem.equals(DonorBasedRebalanceAsyncOperation.BREAK)) {
+                    hasNext = true;
+                }
+
+                // this is the last element returned by this iterator
+                if(currentElem != null && currentElem.equals(DonorBasedRebalanceAsyncOperation.END)) {
+                    done = true;
+                    hasNext = false;
                 }
-            }
-            if(null != currentElem && !currentElem.equals(DonorBasedRebalanceAsyncOperation.END)) {
-                hasNext = true;
             }
             return hasNext;
         }
@@ -173,6 +157,10 @@ public boolean hasNext() {
         // return the element when one or more is available, blocked
         // otherwise
         public Pair<ByteArray, Versioned<byte[]>> next() {
+            if(done) {
+                throw new NoSuchElementException();
+            }
+
             while(null == currentElem) {
                 try {
                     currentElem = getNextElem();
@@ -180,10 +168,17 @@ public Pair<ByteArray, Versioned<byte[]>> next() {
                     logger.info("next is interrupted while waiting for the next elem, existing...");
                     break;
                 }
-                if(null == currentElem || currentElem.equals(DonorBasedRebalanceAsyncOperation.END)) {
+                if(null == currentElem || currentElem.equals(DonorBasedRebalanceAsyncOperation.END)
+                   || currentElem.equals(DonorBasedRebalanceAsyncOperation.BREAK)) {
                     throw new NoSuchElementException();
                 }
             }
+
+            // this is the last element returned by this iterator
+            if(currentElem != null && currentElem.equals(DonorBasedRebalanceAsyncOperation.END)) {
+                done = true;
+            }
+
             Pair<ByteArray, Versioned<byte[]>> returnValue = currentElem;
             currentElem = null;
             return returnValue;
diff --git a/src/java/voldemort/server/scheduler/DataCleanupJob.java b/src/java/voldemort/server/scheduler/DataCleanupJob.java
index 1cef22f870..5adfc4a4f5 100644
--- a/src/java/voldemort/server/scheduler/DataCleanupJob.java
+++ b/src/java/voldemort/server/scheduler/DataCleanupJob.java
@@ -16,10 +16,12 @@
 
 package voldemort.server.scheduler;
 
-import java.util.concurrent.Semaphore;
+import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.log4j.Logger;
 
+import voldemort.annotations.jmx.JmxGetter;
+import voldemort.server.storage.ScanPermitWrapper;
 import voldemort.store.StorageEngine;
 import voldemort.utils.ClosableIterator;
 import voldemort.utils.EventThrottler;
@@ -39,13 +41,15 @@ public class DataCleanupJob<K, V, T> implements Runnable {
     private static final Logger logger = Logger.getLogger(DataCleanupJob.class);
 
     private final StorageEngine<K, V, T> store;
-    private final Semaphore cleanupPermits;
+    private final ScanPermitWrapper cleanupPermits;
     private final long maxAgeMs;
     private final Time time;
     private final EventThrottler throttler;
+    private long totalEntriesScanned = 0;
+    private AtomicLong progressThisRun;
 
     public DataCleanupJob(StorageEngine<K, V, T> store,
-                          Semaphore cleanupPermits,
+                          ScanPermitWrapper cleanupPermits,
                           long maxAgeMs,
                           Time time,
                           EventThrottler throttler) {
@@ -54,10 +58,12 @@ public DataCleanupJob(StorageEngine<K, V, T> store,
         this.maxAgeMs = maxAgeMs;
         this.time = time;
         this.throttler = throttler;
+        this.progressThisRun = new AtomicLong(0);
     }
 
     public void run() {
-        acquireCleanupPermit();
+        acquireCleanupPermit(progressThisRun);
+
         ClosableIterator<Pair<K, Versioned<V>>> iterator = null;
         try {
             logger.info("Starting data cleanup on store \"" + store.getName() + "\"...");
@@ -71,7 +77,7 @@ public void run() {
                     logger.info("Datacleanup job halted.");
                     return;
                 }
-
+                progressThisRun.incrementAndGet();
                 Pair<K, Versioned<V>> keyAndVal = iterator.next();
                 VectorClock clock = (VectorClock) keyAndVal.getSecond().getVersion();
                 if(now - clock.getTimestamp() > maxAgeMs) {
@@ -84,14 +90,21 @@ public void run() {
                 // throttle on number of entries.
                 throttler.maybeThrottle(1);
             }
+            // log the total items scanned, so we will get an idea of data
+            // growth in a cheap, periodic way
             logger.info("Data cleanup on store \"" + store.getName() + "\" is complete; " + deleted
-                        + " items deleted.");
+                        + " items deleted. " + progressThisRun.get() + " items scanned");
+
         } catch(Exception e) {
             logger.error("Error in data cleanup job for store " + store.getName() + ": ", e);
         } finally {
             closeIterator(iterator);
             logger.info("Releasing lock  after data cleanup on \"" + store.getName() + "\".");
             this.cleanupPermits.release();
+            synchronized(this) {
+                totalEntriesScanned += progressThisRun.get();
+                progressThisRun.set(0);
+            }
         }
     }
 
@@ -104,14 +117,18 @@ private void closeIterator(ClosableIterator<Pair<K, Versioned<V>>> iterator) {
         }
     }
 
-    private void acquireCleanupPermit() {
+    private void acquireCleanupPermit(AtomicLong progress) {
         logger.info("Acquiring lock to perform data cleanup on \"" + store.getName() + "\".");
         try {
-            this.cleanupPermits.acquire();
+            this.cleanupPermits.acquire(progress);
         } catch(InterruptedException e) {
             throw new IllegalStateException("Datacleanup interrupted while waiting for cleanup permit.",
                                             e);
         }
     }
 
+    @JmxGetter(name = "numEntriesScanned", description = "Returns number of entries scanned")
+    public synchronized long getEntriesScanned() {
+        return totalEntriesScanned + progressThisRun.get();
+    }
 }
diff --git a/src/java/voldemort/server/scheduler/slop/BlockingSlopPusherJob.java b/src/java/voldemort/server/scheduler/slop/BlockingSlopPusherJob.java
index 0cac6b2a46..774fb94b9d 100644
--- a/src/java/voldemort/server/scheduler/slop/BlockingSlopPusherJob.java
+++ b/src/java/voldemort/server/scheduler/slop/BlockingSlopPusherJob.java
@@ -18,7 +18,6 @@
 
 import java.util.Date;
 import java.util.Map;
-import java.util.concurrent.Semaphore;
 
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
@@ -28,13 +27,14 @@
 import voldemort.cluster.failuredetector.FailureDetector;
 import voldemort.server.StoreRepository;
 import voldemort.server.VoldemortConfig;
+import voldemort.server.storage.ScanPermitWrapper;
 import voldemort.store.StorageEngine;
 import voldemort.store.Store;
 import voldemort.store.UnreachableStoreException;
 import voldemort.store.metadata.MetadataStore;
 import voldemort.store.slop.Slop;
-import voldemort.store.slop.SlopStorageEngine;
 import voldemort.store.slop.Slop.Operation;
+import voldemort.store.slop.SlopStorageEngine;
 import voldemort.utils.ByteArray;
 import voldemort.utils.ClosableIterator;
 import voldemort.utils.EventThrottler;
@@ -62,13 +62,13 @@ public class BlockingSlopPusherJob implements Runnable {
     private final MetadataStore metadataStore;
     private final FailureDetector failureDetector;
     private final long maxWriteBytesPerSec;
-    private final Semaphore repairPermits;
+    private final ScanPermitWrapper repairPermits;
 
     public BlockingSlopPusherJob(StoreRepository storeRepo,
                                  MetadataStore metadataStore,
                                  FailureDetector failureDetector,
                                  VoldemortConfig voldemortConfig,
-                                 Semaphore repairPermits) {
+                                 ScanPermitWrapper repairPermits) {
         this.storeRepo = storeRepo;
         this.metadataStore = metadataStore;
         this.repairPermits = Utils.notNull(repairPermits);
@@ -219,7 +219,7 @@ public void run() {
     private void acquireRepairPermit() {
         logger.info("Acquiring lock to perform blocking slop pusher job ");
         try {
-            this.repairPermits.acquire();
+            this.repairPermits.acquire(null);
             logger.info("Acquired lock to perform blocking slop pusher job ");
         } catch(InterruptedException e) {
             throw new IllegalStateException("Blocking slop pusher job interrupted while waiting for permit.",
diff --git a/src/java/voldemort/server/scheduler/slop/StreamingSlopPusherJob.java b/src/java/voldemort/server/scheduler/slop/StreamingSlopPusherJob.java
index 63d2e538ee..6b5950b11d 100644
--- a/src/java/voldemort/server/scheduler/slop/StreamingSlopPusherJob.java
+++ b/src/java/voldemort/server/scheduler/slop/StreamingSlopPusherJob.java
@@ -10,7 +10,6 @@
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.RejectedExecutionException;
-import java.util.concurrent.Semaphore;
 import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.TimeUnit;
@@ -27,6 +26,7 @@
 import voldemort.cluster.failuredetector.FailureDetector;
 import voldemort.server.StoreRepository;
 import voldemort.server.VoldemortConfig;
+import voldemort.server.storage.ScanPermitWrapper;
 import voldemort.store.StorageEngine;
 import voldemort.store.UnreachableStoreException;
 import voldemort.store.metadata.MetadataStore;
@@ -68,13 +68,13 @@ public class StreamingSlopPusherJob implements Runnable {
     private final Map<Integer, Set<Integer>> zoneMapping;
     private ConcurrentHashMap<Integer, Long> attemptedByNode;
     private ConcurrentHashMap<Integer, Long> succeededByNode;
-    private final Semaphore repairPermits;
+    private final ScanPermitWrapper repairPermits;
 
     public StreamingSlopPusherJob(StoreRepository storeRepo,
                                   MetadataStore metadataStore,
                                   FailureDetector failureDetector,
                                   VoldemortConfig voldemortConfig,
-                                  Semaphore repairPermits) {
+                                  ScanPermitWrapper repairPermits) {
         this.storeRepo = storeRepo;
         this.metadataStore = metadataStore;
         this.failureDetector = failureDetector;
@@ -372,7 +372,7 @@ protected Versioned<Slop> computeNext() {
     private void acquireRepairPermit() {
         logger.info("Acquiring lock to perform streaming slop pusher job ");
         try {
-            this.repairPermits.acquire();
+            this.repairPermits.acquire(null);
             logger.info("Acquired lock to perform streaming slop pusher job ");
         } catch(InterruptedException e) {
             stopAdminClient();
diff --git a/src/java/voldemort/server/socket/SocketService.java b/src/java/voldemort/server/socket/SocketService.java
index c8c7fd3085..68d7bc70dc 100644
--- a/src/java/voldemort/server/socket/SocketService.java
+++ b/src/java/voldemort/server/socket/SocketService.java
@@ -16,8 +16,8 @@
 
 package voldemort.server.socket;
 
+import voldemort.common.service.ServiceType;
 import voldemort.server.AbstractSocketService;
-import voldemort.server.ServiceType;
 import voldemort.server.StatusManager;
 import voldemort.server.protocol.RequestHandlerFactory;
 
diff --git a/src/java/voldemort/server/storage/RepairJob.java b/src/java/voldemort/server/storage/RepairJob.java
index 46ee9fd0bf..aa95547210 100644
--- a/src/java/voldemort/server/storage/RepairJob.java
+++ b/src/java/voldemort/server/storage/RepairJob.java
@@ -4,7 +4,7 @@
 import java.util.Date;
 import java.util.List;
 import java.util.Map;
-import java.util.concurrent.Semaphore;
+import java.util.concurrent.atomic.AtomicLong;
 
 import javax.management.MBeanOperationInfo;
 
@@ -33,14 +33,14 @@ public class RepairJob implements Runnable {
 
     public final static List<String> blackList = Arrays.asList("mysql", "krati", "read-only");
 
-    private final Semaphore repairPermits;
+    private final ScanPermitWrapper repairPermits;
     private final StoreRepository storeRepo;
     private final MetadataStore metadataStore;
     private final int deleteBatchSize;
 
     public RepairJob(StoreRepository storeRepo,
                      MetadataStore metadataStore,
-                     Semaphore repairPermits,
+                     ScanPermitWrapper repairPermits,
                      int deleteBatchSize) {
         this.storeRepo = storeRepo;
         this.metadataStore = metadataStore;
@@ -48,7 +48,9 @@ public RepairJob(StoreRepository storeRepo,
         this.deleteBatchSize = deleteBatchSize;
     }
 
-    public RepairJob(StoreRepository storeRepo, MetadataStore metadataStore, Semaphore repairPermits) {
+    public RepairJob(StoreRepository storeRepo,
+                     MetadataStore metadataStore,
+                     ScanPermitWrapper repairPermits) {
         this(storeRepo, metadataStore, repairPermits, DELETE_BATCH_SIZE);
     }
 
@@ -74,8 +76,8 @@ public void run() {
         for(StoreDefinition storeDef: metadataStore.getStoreDefList()) {
             localStats.put(storeDef.getName(), 0L);
         }
-
-        if(!acquireRepairPermit())
+        AtomicLong progress = new AtomicLong(0);
+        if(!acquireRepairPermit(progress))
             return;
         try {
             // Get routing factory
@@ -92,7 +94,6 @@ public void run() {
                                                                                                    metadataStore.getCluster());
                     long repairSlops = 0L;
                     long numDeletedKeys = 0;
-                    long numScannedKeys = 0;
                     while(iterator.hasNext()) {
                         Pair<ByteArray, Versioned<byte[]>> keyAndVal;
                         keyAndVal = iterator.next();
@@ -102,10 +103,9 @@ public void run() {
                             engine.delete(keyAndVal.getFirst(), keyAndVal.getSecond().getVersion());
                             numDeletedKeys++;
                         }
-                        numScannedKeys++;
-                        if(numScannedKeys % deleteBatchSize == 0)
-                            logger.info("#Scanned:" + numScannedKeys + " #Deleted:"
-                                        + numDeletedKeys);
+                        long itemsScanned = progress.incrementAndGet();
+                        if(itemsScanned % deleteBatchSize == 0)
+                            logger.info("#Scanned:" + itemsScanned + " #Deleted:" + numDeletedKeys);
                     }
                     closeIterator(iterator);
                     localStats.put(storeDef.getName(), repairSlops);
@@ -148,9 +148,9 @@ private boolean isWritableStore(StoreDefinition storeDef) {
         }
     }
 
-    private boolean acquireRepairPermit() {
+    private boolean acquireRepairPermit(AtomicLong progress) {
         logger.info("Acquiring lock to perform repair job ");
-        if(this.repairPermits.tryAcquire()) {
+        if(this.repairPermits.tryAcquire(progress)) {
             logger.info("Acquired lock to perform repair job ");
             return true;
         } else {
diff --git a/src/java/voldemort/server/storage/ScanPermitWrapper.java b/src/java/voldemort/server/storage/ScanPermitWrapper.java
new file mode 100644
index 0000000000..5f4f84f979
--- /dev/null
+++ b/src/java/voldemort/server/storage/ScanPermitWrapper.java
@@ -0,0 +1,88 @@
+package voldemort.server.storage;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.atomic.AtomicLong;
+
+public class ScanPermitWrapper {
+
+    private final Semaphore scanPermits;
+    private Map<String, AtomicLong> permitOwners;
+    private final int numPermits;
+
+    private long totalEntriesScanned;
+
+    public ScanPermitWrapper(final int numPermits) {
+        this.numPermits = numPermits;
+        scanPermits = new Semaphore(numPermits);
+        permitOwners = Collections.synchronizedMap(new HashMap<String, AtomicLong>());
+    }
+
+    public static String getOwnerName() {
+        return Thread.currentThread().getStackTrace()[2].getClassName();
+    }
+
+    public void acquire(AtomicLong progress) throws InterruptedException {
+        this.scanPermits.acquire();
+        synchronized(permitOwners) {
+            permitOwners.put(getOwnerName(), progress);
+        }
+    }
+
+    public void release() {
+        this.scanPermits.release();
+        synchronized(permitOwners) {
+            AtomicLong scannedCount = permitOwners.get(getOwnerName());
+            if(scannedCount != null)
+                totalEntriesScanned += scannedCount.get();
+            permitOwners.remove(getOwnerName());
+        }
+    }
+
+    public List<String> getPermitOwners() {
+        List<String> ownerList = new ArrayList<String>();
+        synchronized(permitOwners) {
+            Iterator<String> i = this.permitOwners.keySet().iterator();
+            while(i.hasNext())
+                ownerList.add(i.next());
+        }
+        return ownerList;
+    }
+
+    public boolean tryAcquire(AtomicLong progress) {
+        boolean gotPermit = this.scanPermits.tryAcquire();
+        if(gotPermit) {
+            synchronized(permitOwners) {
+                permitOwners.put(getOwnerName(), progress);
+            }
+        }
+        return gotPermit;
+    }
+
+    public int availablePermits() {
+        return this.scanPermits.availablePermits();
+    }
+
+    public int getGrantedPermits() {
+        return numPermits - availablePermits();
+    }
+
+    public long getEntriesScanned() {
+        long itemsScanned = 0;
+        synchronized(permitOwners) {
+            for(Map.Entry<String, AtomicLong> progressEntry: permitOwners.entrySet()) {
+                AtomicLong progress = progressEntry.getValue();
+                // slops are not included since they are tracked separately
+                if(progress != null) {
+                    itemsScanned += progress.get();
+                }
+            }
+        }
+        return totalEntriesScanned + itemsScanned;
+    }
+}
diff --git a/src/java/voldemort/server/storage/StorageService.java b/src/java/voldemort/server/storage/StorageService.java
index 659a84de96..ba13fdc5b5 100644
--- a/src/java/voldemort/server/storage/StorageService.java
+++ b/src/java/voldemort/server/storage/StorageService.java
@@ -18,6 +18,7 @@
 
 import static voldemort.cluster.failuredetector.FailureDetectorUtils.create;
 
+import java.io.ByteArrayInputStream;
 import java.lang.management.ManagementFactory;
 import java.util.ArrayList;
 import java.util.Calendar;
@@ -26,11 +27,11 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
-import java.util.concurrent.Semaphore;
 import java.util.concurrent.TimeUnit;
 
 import javax.management.MBeanOperationInfo;
@@ -40,6 +41,7 @@
 import org.apache.log4j.Logger;
 
 import voldemort.VoldemortException;
+import voldemort.annotations.jmx.JmxGetter;
 import voldemort.annotations.jmx.JmxManaged;
 import voldemort.annotations.jmx.JmxOperation;
 import voldemort.client.ClientThreadPool;
@@ -48,23 +50,27 @@
 import voldemort.cluster.failuredetector.FailureDetector;
 import voldemort.cluster.failuredetector.FailureDetectorConfig;
 import voldemort.cluster.failuredetector.ServerStoreVerifier;
+import voldemort.common.service.AbstractService;
+import voldemort.common.service.SchedulerService;
+import voldemort.common.service.ServiceType;
 import voldemort.routing.RoutingStrategy;
 import voldemort.routing.RoutingStrategyFactory;
-import voldemort.server.AbstractService;
+import voldemort.serialization.SerializerDefinition;
+import voldemort.serialization.avro.versioned.SchemaEvolutionValidator;
 import voldemort.server.RequestRoutingType;
-import voldemort.server.ServiceType;
 import voldemort.server.StoreRepository;
 import voldemort.server.VoldemortConfig;
 import voldemort.server.scheduler.DataCleanupJob;
-import voldemort.server.scheduler.SchedulerService;
 import voldemort.server.scheduler.slop.BlockingSlopPusherJob;
 import voldemort.server.scheduler.slop.StreamingSlopPusherJob;
 import voldemort.store.StorageConfiguration;
 import voldemort.store.StorageEngine;
 import voldemort.store.Store;
 import voldemort.store.StoreDefinition;
+import voldemort.store.configuration.FileBackedCachingStorageConfiguration;
 import voldemort.store.invalidmetadata.InvalidMetadataCheckingStore;
 import voldemort.store.logging.LoggingStore;
+import voldemort.store.memory.InMemoryStorageConfiguration;
 import voldemort.store.metadata.MetadataStore;
 import voldemort.store.metadata.MetadataStoreListener;
 import voldemort.store.nonblockingstore.NonblockingStore;
@@ -81,6 +87,7 @@
 import voldemort.store.stats.StatTrackingStore;
 import voldemort.store.stats.StoreStats;
 import voldemort.store.stats.StoreStatsJmx;
+import voldemort.store.system.SystemStoreConstants;
 import voldemort.store.versioned.InconsistencyResolvingStore;
 import voldemort.store.views.ViewStorageConfiguration;
 import voldemort.store.views.ViewStorageEngine;
@@ -107,6 +114,9 @@
 public class StorageService extends AbstractService {
 
     private static final Logger logger = Logger.getLogger(StorageService.class.getName());
+    public static final String VERSIONS_METADATA_STORE = "metadata-versions";
+    public static final String CLUSTER_VERSION_KEY = "cluster.xml";
+    public static final String STORES_VERSION_KEY = "stores.xml";
 
     private final VoldemortConfig voldemortConfig;
     private final StoreRepository storeRepository;
@@ -117,7 +127,7 @@ public class StorageService extends AbstractService {
     private final DynamicThrottleLimit dynThrottleLimit;
 
     // Common permit shared by all job which do a disk scan
-    private final Semaphore scanPermits;
+    private final ScanPermitWrapper scanPermitWrapper;
     private final SocketStoreFactory storeFactory;
     private final ConcurrentMap<String, StorageConfiguration> storageConfigs;
     private final ClientThreadPool clientThreadPool;
@@ -134,7 +144,7 @@ public StorageService(StoreRepository storeRepository,
         this.scheduler = scheduler;
         this.storeRepository = storeRepository;
         this.metadata = metadata;
-        this.scanPermits = new Semaphore(voldemortConfig.getNumScanPermits());
+        this.scanPermitWrapper = new ScanPermitWrapper(voldemortConfig.getNumScanPermits());
         this.storageConfigs = new ConcurrentHashMap<String, StorageConfiguration>();
         this.clientThreadPool = new ClientThreadPool(config.getClientMaxThreads(),
                                                      config.getClientThreadIdleMs(),
@@ -146,8 +156,7 @@ public StorageService(StoreRepository storeRepository,
                                                           config.getSocketBufferSize(),
                                                           config.getSocketKeepAlive());
 
-        FailureDetectorConfig failureDetectorConfig = new FailureDetectorConfig(voldemortConfig).setNodes(metadata.getCluster()
-                                                                                                                  .getNodes())
+        FailureDetectorConfig failureDetectorConfig = new FailureDetectorConfig(voldemortConfig).setCluster(metadata.getCluster())
                                                                                                 .setStoreVerifier(new ServerStoreVerifier(storeFactory,
                                                                                                                                           metadata,
                                                                                                                                           config));
@@ -155,7 +164,7 @@ public StorageService(StoreRepository storeRepository,
         this.storeStats = new StoreStats();
         this.routedStoreFactory = new RoutedStoreFactory(voldemortConfig.isPipelineRoutedStoreEnabled(),
                                                          this.clientThreadPool,
-                                                         voldemortConfig.getClientRoutingTimeoutMs());
+                                                         voldemortConfig.getTimeoutConfig());
 
         /*
          * Initialize the dynamic throttle limit based on the per node limit
@@ -170,6 +179,7 @@ public StorageService(StoreRepository storeRepository,
     }
 
     private void initStorageConfig(String configClassName) {
+        // add the configurations of the storage engines needed by user stores
         try {
             Class<?> configClass = ReflectUtils.loadClass(configClassName);
             StorageConfiguration configuration = (StorageConfiguration) ReflectUtils.callConstructor(configClass,
@@ -187,6 +197,41 @@ private void initStorageConfig(String configClassName) {
 
         if(storageConfigs.size() == 0)
             throw new ConfigurationException("No storage engine has been enabled!");
+
+        // now, add the configurations of the storage engines needed by system
+        // stores, if not yet exist
+        initSystemStorageConfig();
+    }
+
+    private void initSystemStorageConfig() {
+        // add InMemoryStorage used by voldsys$_client_registry
+        if(!storageConfigs.containsKey(InMemoryStorageConfiguration.TYPE_NAME)) {
+            storageConfigs.put(InMemoryStorageConfiguration.TYPE_NAME,
+                               new InMemoryStorageConfiguration());
+        }
+
+        // add FileStorage config here
+        if(!storageConfigs.containsKey(FileBackedCachingStorageConfiguration.TYPE_NAME)) {
+            storageConfigs.put(FileBackedCachingStorageConfiguration.TYPE_NAME,
+                               new FileBackedCachingStorageConfiguration(voldemortConfig));
+        }
+    }
+
+    private void initSystemStores() {
+        List<StoreDefinition> storesDefs = SystemStoreConstants.getAllSystemStoreDefs();
+
+        // TODO: replication factor can't now be determined unless the
+        // cluster.xml is made available to the server at runtime. So we need to
+        // set them here after load they are loaded
+        updateRepFactor(storesDefs);
+
+        for(StoreDefinition storeDef: storesDefs) {
+            openSystemStore(storeDef);
+        }
+    }
+
+    private void updateRepFactor(List<StoreDefinition> storesDefs) {
+        // need impl
     }
 
     @Override
@@ -203,6 +248,9 @@ protected void startInner() {
                                                         metadata.getStoreDefList(),
                                                         storeRepository));
 
+        /* Initialize system stores */
+        initSystemStores();
+
         /* Register slop store */
         if(voldemortConfig.isSlopEnabled()) {
 
@@ -214,7 +262,34 @@ protected void startInner() {
                                                  + voldemortConfig.getSlopStoreType()
                                                  + " storage engine has not been enabled.");
 
-            SlopStorageEngine slopEngine = new SlopStorageEngine(config.getStore(SlopStorageEngine.SLOP_STORE_NAME),
+            // make a dummy store definition object
+            StoreDefinition slopStoreDefinition = new StoreDefinition(SlopStorageEngine.SLOP_STORE_NAME,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      0,
+                                                                      null,
+                                                                      0,
+                                                                      null,
+                                                                      0,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      null,
+                                                                      0);
+            SlopStorageEngine slopEngine = new SlopStorageEngine(config.getStore(slopStoreDefinition),
                                                                  metadata.getCluster());
             registerEngine(slopEngine, false, "slop");
             storeRepository.setSlopStore(slopEngine);
@@ -234,12 +309,12 @@ protected void startInner() {
                                                                                                                                  metadata,
                                                                                                                                  failureDetector,
                                                                                                                                  voldemortConfig,
-                                                                                                                                 scanPermits)
+                                                                                                                                 scanPermitWrapper)
                                                                                                     : new StreamingSlopPusherJob(storeRepository,
                                                                                                                                  metadata,
                                                                                                                                  failureDetector,
                                                                                                                                  voldemortConfig,
-                                                                                                                                 scanPermits),
+                                                                                                                                 scanPermitWrapper),
                                    nextRun,
                                    voldemortConfig.getSlopFrequencyMs());
             }
@@ -247,7 +322,7 @@ protected void startInner() {
             // Create a repair job object and register it with Store repository
             if(voldemortConfig.isRepairEnabled()) {
                 logger.info("Initializing repair job.");
-                RepairJob job = new RepairJob(storeRepository, metadata, scanPermits);
+                RepairJob job = new RepairJob(storeRepository, metadata, scanPermitWrapper);
                 JmxUtils.registerMbean(job, JmxUtils.createObjectName(job.getClass()));
                 storeRepository.registerRepairJob(job);
             }
@@ -256,6 +331,25 @@ protected void startInner() {
         List<StoreDefinition> storeDefs = new ArrayList<StoreDefinition>(this.metadata.getStoreDefList());
         logger.info("Initializing stores:");
 
+        logger.info("Validating schemas:");
+        String AVRO_GENERIC_VERSIONED_TYPE_NAME = "avro-generic-versioned";
+
+        for(StoreDefinition storeDef: storeDefs) {
+            SerializerDefinition keySerDef = storeDef.getKeySerializer();
+            SerializerDefinition valueSerDef = storeDef.getValueSerializer();
+
+            if(keySerDef.getName().equals(AVRO_GENERIC_VERSIONED_TYPE_NAME)) {
+
+                SchemaEvolutionValidator.checkSchemaCompatibility(keySerDef);
+
+            }
+
+            if(valueSerDef.getName().equals(AVRO_GENERIC_VERSIONED_TYPE_NAME)) {
+
+                SchemaEvolutionValidator.checkSchemaCompatibility(valueSerDef);
+
+            }
+        }
         // first initialize non-view stores
         for(StoreDefinition def: storeDefs)
             if(!def.isView())
@@ -263,9 +357,12 @@ protected void startInner() {
 
         // now that we have all our stores, we can initialize views pointing at
         // those stores
-        for(StoreDefinition def: storeDefs)
+        for(StoreDefinition def: storeDefs) {
             if(def.isView())
                 openStore(def);
+        }
+
+        initializeMetadataVersions(storeDefs);
 
         // enable aggregate jmx statistics
         if(voldemortConfig.isStatTrackingEnabled())
@@ -282,6 +379,200 @@ protected void startInner() {
         logger.info("All stores initialized.");
     }
 
+    protected void initializeMetadataVersions(List<StoreDefinition> storeDefs) {
+        Store<ByteArray, byte[], byte[]> versionStore = storeRepository.getLocalStore(SystemStoreConstants.SystemStoreName.voldsys$_metadata_version_persistence.name());
+        Properties props = new Properties();
+
+        try {
+            ByteArray metadataVersionsKey = new ByteArray(VERSIONS_METADATA_STORE.getBytes());
+            List<Versioned<byte[]>> versionList = versionStore.get(metadataVersionsKey, null);
+            VectorClock newClock = null;
+
+            if(versionList != null && versionList.size() > 0) {
+                byte[] versionsByteArray = versionList.get(0).getValue();
+                if(versionsByteArray != null) {
+                    props.load(new ByteArrayInputStream(versionsByteArray));
+                }
+                newClock = (VectorClock) versionList.get(0).getVersion();
+                newClock = newClock.incremented(0, System.currentTimeMillis());
+            } else {
+                newClock = new VectorClock();
+            }
+
+            // Check if version exists for cluster.xml
+            if(!props.containsKey(CLUSTER_VERSION_KEY)) {
+                props.setProperty(CLUSTER_VERSION_KEY, "0");
+            }
+
+            // Check if version exists for stores.xml
+            if(!props.containsKey(STORES_VERSION_KEY)) {
+                props.setProperty(STORES_VERSION_KEY, "0");
+            }
+
+            // Check if version exists for each store
+            for(StoreDefinition def: storeDefs) {
+                if(!props.containsKey(def.getName())) {
+                    props.setProperty(def.getName(), "0");
+                }
+            }
+
+            StringBuilder finalVersionList = new StringBuilder();
+            for(String propName: props.stringPropertyNames()) {
+                finalVersionList.append(propName + "=" + props.getProperty(propName) + "\n");
+            }
+            versionStore.put(metadataVersionsKey,
+                             new Versioned<byte[]>(finalVersionList.toString().getBytes(), newClock),
+                             null);
+
+        } catch(Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    public void openSystemStore(StoreDefinition storeDef) {
+
+        logger.info("Opening system store '" + storeDef.getName() + "' (" + storeDef.getType()
+                    + ").");
+
+        StorageConfiguration config = storageConfigs.get(storeDef.getType());
+        if(config == null)
+            throw new ConfigurationException("Attempt to open system store " + storeDef.getName()
+                                             + " but " + storeDef.getType()
+                                             + " storage engine has not been enabled.");
+
+        final StorageEngine<ByteArray, byte[], byte[]> engine = config.getStore(storeDef);
+
+        // Noted that there is no read-only processing as for user stores.
+
+        // openStore() should have atomic semantics
+        try {
+            registerSystemEngine(engine);
+
+            if(voldemortConfig.isServerRoutingEnabled())
+                registerNodeStores(storeDef, metadata.getCluster(), voldemortConfig.getNodeId());
+
+            if(storeDef.hasRetentionPeriod())
+                scheduleCleanupJob(storeDef, engine);
+        } catch(Exception e) {
+            unregisterSystemEngine(engine);
+            throw new VoldemortException(e);
+        }
+    }
+
+    public void registerSystemEngine(StorageEngine<ByteArray, byte[], byte[]> engine) {
+
+        Cluster cluster = this.metadata.getCluster();
+        storeRepository.addStorageEngine(engine);
+
+        /* Now add any store wrappers that are enabled */
+        Store<ByteArray, byte[], byte[]> store = engine;
+
+        if(voldemortConfig.isVerboseLoggingEnabled())
+            store = new LoggingStore<ByteArray, byte[], byte[]>(store,
+                                                                cluster.getName(),
+                                                                SystemTime.INSTANCE);
+
+        if(voldemortConfig.isMetadataCheckingEnabled())
+            store = new InvalidMetadataCheckingStore(metadata.getNodeId(), store, metadata);
+
+        if(voldemortConfig.isStatTrackingEnabled()) {
+            StatTrackingStore statStore = new StatTrackingStore(store, this.storeStats);
+            store = statStore;
+            if(voldemortConfig.isJmxEnabled()) {
+
+                MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
+                ObjectName name = null;
+                if(this.voldemortConfig.isEnableJmxClusterName())
+                    name = JmxUtils.createObjectName(metadata.getCluster().getName()
+                                                             + "."
+                                                             + JmxUtils.getPackageName(store.getClass()),
+                                                     store.getName());
+                else
+                    name = JmxUtils.createObjectName(JmxUtils.getPackageName(store.getClass()),
+                                                     store.getName());
+
+                synchronized(mbeanServer) {
+                    if(mbeanServer.isRegistered(name))
+                        JmxUtils.unregisterMbean(mbeanServer, name);
+
+                    JmxUtils.registerMbean(mbeanServer,
+                                           JmxUtils.createModelMBean(new StoreStatsJmx(statStore.getStats())),
+                                           name);
+                }
+            }
+        }
+
+        storeRepository.addLocalStore(store);
+    }
+
+    public void unregisterSystemEngine(StorageEngine<ByteArray, byte[], byte[]> engine) {
+        String storeName = engine.getName();
+        Store<ByteArray, byte[], byte[]> store = storeRepository.removeLocalStore(storeName);
+
+        if(store != null) {
+            if(voldemortConfig.isJmxEnabled()) {
+                MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
+
+                if(voldemortConfig.isEnableRebalanceService()) {
+
+                    ObjectName name = null;
+                    if(this.voldemortConfig.isEnableJmxClusterName())
+                        name = JmxUtils.createObjectName(metadata.getCluster().getName()
+                                                                 + "."
+                                                                 + JmxUtils.getPackageName(RedirectingStore.class),
+                                                         store.getName());
+                    else
+                        name = JmxUtils.createObjectName(JmxUtils.getPackageName(RedirectingStore.class),
+                                                         store.getName());
+
+                    synchronized(mbeanServer) {
+                        if(mbeanServer.isRegistered(name))
+                            JmxUtils.unregisterMbean(mbeanServer, name);
+                    }
+
+                }
+
+                if(voldemortConfig.isStatTrackingEnabled()) {
+                    ObjectName name = null;
+                    if(this.voldemortConfig.isEnableJmxClusterName())
+                        name = JmxUtils.createObjectName(metadata.getCluster().getName()
+                                                                 + "."
+                                                                 + JmxUtils.getPackageName(store.getClass()),
+                                                         store.getName());
+                    else
+                        name = JmxUtils.createObjectName(JmxUtils.getPackageName(store.getClass()),
+                                                         store.getName());
+
+                    synchronized(mbeanServer) {
+                        if(mbeanServer.isRegistered(name))
+                            JmxUtils.unregisterMbean(mbeanServer, name);
+                    }
+
+                }
+            }
+            if(voldemortConfig.isServerRoutingEnabled()) {
+                this.storeRepository.removeRoutedStore(storeName);
+                for(Node node: metadata.getCluster().getNodes())
+                    this.storeRepository.removeNodeStore(storeName, node.getId());
+            }
+        }
+
+        storeRepository.removeStorageEngine(storeName);
+        // engine.truncate(); why truncate here when unregister? Isn't close
+        // good enough?
+        engine.close();
+    }
+
+    public void updateStore(StoreDefinition storeDef) {
+        logger.info("Updating store '" + storeDef.getName() + "' (" + storeDef.getType() + ").");
+        StorageConfiguration config = storageConfigs.get(storeDef.getType());
+        if(config == null)
+            throw new ConfigurationException("Attempt to open store " + storeDef.getName()
+                                             + " but " + storeDef.getType()
+                                             + " storage engine has not been enabled.");
+        config.update(storeDef);
+    }
+
     public void openStore(StoreDefinition storeDef) {
 
         logger.info("Opening store '" + storeDef.getName() + "' (" + storeDef.getType() + ").");
@@ -299,7 +590,7 @@ public void openStore(StoreDefinition storeDef) {
             ((ReadOnlyStorageConfiguration) config).setRoutingStrategy(routingStrategy);
         }
 
-        final StorageEngine<ByteArray, byte[], byte[]> engine = config.getStore(storeDef.getName());
+        final StorageEngine<ByteArray, byte[], byte[]> engine = config.getStore(storeDef);
         // Update the routing strategy + add listener to metadata
         if(storeDef.getType().compareTo(ReadOnlyStorageConfiguration.TYPE_NAME) == 0) {
             metadata.addMetadataStoreListener(storeDef.getName(), new MetadataStoreListener() {
@@ -586,18 +877,22 @@ private void scheduleCleanupJob(StoreDefinition storeDef,
         EventThrottler throttler = new EventThrottler(maxReadRate);
 
         Runnable cleanupJob = new DataCleanupJob<ByteArray, byte[], byte[]>(engine,
-                                                                            scanPermits,
+                                                                            scanPermitWrapper,
                                                                             storeDef.getRetentionDays()
                                                                                     * Time.MS_PER_DAY,
                                                                             SystemTime.INSTANCE,
                                                                             throttler);
+        if(voldemortConfig.isJmxEnabled()) {
+            JmxUtils.registerMbean("DataCleanupJob-" + engine.getName(), cleanupJob);
+        }
+
+        long retentionFreqHours = storeDef.hasRetentionFrequencyDays() ? (storeDef.getRetentionFrequencyDays() * Time.HOURS_PER_DAY)
+                                                                      : voldemortConfig.getRetentionCleanupScheduledPeriodInHour();
 
         this.scheduler.schedule("cleanup-" + storeDef.getName(),
                                 cleanupJob,
                                 startTime,
-                                voldemortConfig.getRetentionCleanupScheduledPeriodInHour()
-                                        * Time.MS_PER_HOUR);
-
+                                retentionFreqHours * Time.MS_PER_HOUR);
     }
 
     @Override
@@ -713,10 +1008,10 @@ public void forceCleanupOldDataThrottled(String storeName, int entryScanThrottle
                 if(storeDef.hasRetentionPeriod()) {
                     ExecutorService executor = Executors.newFixedThreadPool(1);
                     try {
-                        if(scanPermits.availablePermits() >= 1) {
+                        if(scanPermitWrapper.availablePermits() >= 1) {
 
                             executor.execute(new DataCleanupJob<ByteArray, byte[], byte[]>(engine,
-                                                                                           scanPermits,
+                                                                                           scanPermitWrapper,
                                                                                            storeDef.getRetentionDays()
                                                                                                    * Time.MS_PER_DAY,
                                                                                            SystemTime.INSTANCE,
@@ -815,4 +1110,18 @@ public DynamicThrottleLimit getDynThrottleLimit() {
         return dynThrottleLimit;
     }
 
+    @JmxGetter(name = "getScanPermitOwners", description = "Returns class names of services holding the scan permit")
+    public List<String> getPermitOwners() {
+        return this.scanPermitWrapper.getPermitOwners();
+    }
+
+    @JmxGetter(name = "numGrantedScanPermits", description = "Returns number of scan permits granted at the moment")
+    public long getGrantedPermits() {
+        return this.scanPermitWrapper.getGrantedPermits();
+    }
+
+    @JmxGetter(name = "numEntriesScanned", description = "Returns number of entries scanned since last call")
+    public long getEntriesScanned() {
+        return this.scanPermitWrapper.getEntriesScanned();
+    }
 }
diff --git a/src/java/voldemort/store/StorageConfiguration.java b/src/java/voldemort/store/StorageConfiguration.java
index c795e215b2..a17f583730 100644
--- a/src/java/voldemort/store/StorageConfiguration.java
+++ b/src/java/voldemort/store/StorageConfiguration.java
@@ -36,16 +36,23 @@ public interface StorageConfiguration {
     /**
      * Get an initialized storage implementation
      * 
-     * @param name The name of the storage
+     * @param storeDef store definition
      * @return The storage engine
      */
-    public StorageEngine<ByteArray, byte[], byte[]> getStore(String name);
+    public StorageEngine<ByteArray, byte[], byte[]> getStore(StoreDefinition storeDef);
 
     /**
      * Get the type of stores returned by this configuration
      */
     public String getType();
 
+    /**
+     * Update the storage configuration at runtime
+     * 
+     * @param storeDef new store definition object
+     */
+    public void update(StoreDefinition storeDef);
+
     /**
      * Close the storage configuration
      */
diff --git a/src/java/voldemort/store/StoreDefinition.java b/src/java/voldemort/store/StoreDefinition.java
index 0df48fcf84..3fd925918b 100644
--- a/src/java/voldemort/store/StoreDefinition.java
+++ b/src/java/voldemort/store/StoreDefinition.java
@@ -23,6 +23,7 @@
 import voldemort.client.RoutingTier;
 import voldemort.serialization.SerializerDefinition;
 import voldemort.store.slop.strategy.HintedHandoffStrategyType;
+import voldemort.store.system.SystemStoreConstants;
 import voldemort.utils.Utils;
 
 import com.google.common.base.Objects;
@@ -50,6 +51,7 @@ public class StoreDefinition implements Serializable {
     private final int requiredReads;
     private final Integer retentionPeriodDays;
     private final Integer retentionScanThrottleRate;
+    private final Integer retentionFrequencyDays;
     private final String routingStrategyType;
     private final String viewOf;
     private final HashMap<Integer, Integer> zoneReplicationFactor;
@@ -60,6 +62,7 @@ public class StoreDefinition implements Serializable {
     private final HintedHandoffStrategyType hintedHandoffStrategyType;
     private final Integer hintPrefListSize;
     private final List<String> owners;
+    private final long memoryFootprintMB;
 
     public StoreDefinition(String name,
                            String type,
@@ -81,24 +84,28 @@ public StoreDefinition(String name,
                            Integer zoneCountWrites,
                            Integer retentionDays,
                            Integer retentionThrottleRate,
+                           Integer retentionFrequencyDays,
                            String factory,
                            HintedHandoffStrategyType hintedHandoffStrategyType,
                            Integer hintPrefListSize,
-                           List<String> owners) {
+                           List<String> owners,
+                           long memoryFootprintMB) {
         this.name = Utils.notNull(name);
-        this.type = Utils.notNull(type);
+        this.type = type;
         this.description = description;
         this.replicationFactor = replicationFactor;
         this.preferredReads = preferredReads;
         this.requiredReads = requiredReads;
         this.preferredWrites = preferredWrites;
         this.requiredWrites = requiredWrites;
-        this.routingPolicy = Utils.notNull(routingPolicy);
-        this.keySerializer = Utils.notNull(keySerializer);
-        this.valueSerializer = Utils.notNull(valueSerializer);
+        this.routingPolicy = routingPolicy;
+        this.keySerializer = keySerializer;
+        this.valueSerializer = valueSerializer;
         this.transformsSerializer = transformsSerializer;
         this.retentionPeriodDays = retentionDays;
         this.retentionScanThrottleRate = retentionThrottleRate;
+        this.retentionFrequencyDays = retentionFrequencyDays;
+        this.memoryFootprintMB = memoryFootprintMB;
         this.routingStrategyType = routingStrategyType;
         this.viewOf = viewOfStore;
         this.valueTransformation = valTrans;
@@ -109,10 +116,16 @@ public StoreDefinition(String name,
         this.hintedHandoffStrategyType = hintedHandoffStrategyType;
         this.hintPrefListSize = hintPrefListSize;
         this.owners = owners;
-        checkParameterLegality();
     }
 
-    private void checkParameterLegality() {
+    protected void checkParameterLegality() {
+
+        // null checks
+        Utils.notNull(this.type);
+        Utils.notNull(routingPolicy);
+        Utils.notNull(keySerializer);
+        Utils.notNull(valueSerializer);
+
         if(requiredReads < 1)
             throw new IllegalArgumentException("Cannot have a requiredReads number less than 1.");
         else if(requiredReads > replicationFactor)
@@ -139,7 +152,8 @@ else if(requiredWrites > replicationFactor)
         if(retentionPeriodDays != null && retentionPeriodDays < 0)
             throw new IllegalArgumentException("Retention days must be non-negative.");
 
-        if(zoneReplicationFactor != null && zoneReplicationFactor.size() != 0) {
+        if(!SystemStoreConstants.isSystemStore(name) && zoneReplicationFactor != null
+           && zoneReplicationFactor.size() != 0) {
 
             if(zoneCountReads == null || zoneCountReads < 0)
                 throw new IllegalArgumentException("Zone Counts reads must be non-negative / non-null");
@@ -273,6 +287,14 @@ public Integer getRetentionScanThrottleRate() {
         return this.retentionScanThrottleRate;
     }
 
+    public boolean hasRetentionFrequencyDays() {
+        return this.retentionFrequencyDays != null;
+    }
+
+    public Integer getRetentionFrequencyDays() {
+        return this.retentionFrequencyDays;
+    }
+
     public boolean isView() {
         return this.viewOf != null;
     }
@@ -329,6 +351,14 @@ public List<String> getOwners() {
         return this.owners;
     }
 
+    public long getMemoryFootprintMB() {
+        return this.memoryFootprintMB;
+    }
+
+    public boolean hasMemoryFootprint() {
+        return memoryFootprintMB != 0;
+    }
+
     @Override
     public boolean equals(Object o) {
         if(this == o)
@@ -371,7 +401,8 @@ && getRoutingPolicy() == def.getRoutingPolicy()
                                 def.getSerializerFactory() != null ? def.getSerializerFactory()
                                                                   : null)
                && Objects.equal(getHintedHandoffStrategyType(), def.getHintedHandoffStrategyType())
-               && Objects.equal(getHintPrefListSize(), def.getHintPrefListSize());
+               && Objects.equal(getHintPrefListSize(), def.getHintPrefListSize())
+               && Objects.equal(getMemoryFootprintMB(), def.getMemoryFootprintMB());
     }
 
     @Override
@@ -402,7 +433,8 @@ public int hashCode() {
                                 hasHintedHandoffStrategyType() ? getHintedHandoffStrategyType()
                                                               : null,
                                 hasHintPreflistSize() ? getHintPrefListSize() : null,
-                                getOwners());
+                                getOwners(),
+                                getMemoryFootprintMB());
     }
 
     @Override
@@ -422,6 +454,6 @@ public String toString() {
                + getZoneCountWrites() + ", serializer factory = " + getSerializerFactory() + ")"
                + ", hinted-handoff-strategy = " + getHintedHandoffStrategyType()
                + ", hint-preflist-size = " + getHintPrefListSize() + ", owners = " + getOwners()
-               + ")";
+               + ", memory-footprint(MB)" + getMemoryFootprintMB() + ")";
     }
 }
diff --git a/src/java/voldemort/store/StoreDefinitionBuilder.java b/src/java/voldemort/store/StoreDefinitionBuilder.java
index 89648a48c5..ba426e1aba 100644
--- a/src/java/voldemort/store/StoreDefinitionBuilder.java
+++ b/src/java/voldemort/store/StoreDefinitionBuilder.java
@@ -30,6 +30,7 @@ public class StoreDefinitionBuilder {
     private int requiredReads = -1;
     private Integer retentionPeriodDays = null;
     private Integer retentionScanThrottleRate = null;
+    private Integer retentionFrequencyDays = null;
     private String routingStrategyType = null;
     private String viewOf = null;
     private HashMap<Integer, Integer> zoneReplicationFactor = null;
@@ -40,6 +41,7 @@ public class StoreDefinitionBuilder {
     private HintedHandoffStrategyType hintedHandoffStrategy = null;
     private Integer hintPrefListSize = null;
     private List<String> owners = null;
+    private long memoryFootprintMB = 0;
 
     public String getName() {
         return Utils.notNull(name);
@@ -179,6 +181,15 @@ public StoreDefinitionBuilder setRetentionScanThrottleRate(Integer retentionScan
         return this;
     }
 
+    public Integer getRetentionFrequencyDays() {
+        return this.retentionFrequencyDays;
+    }
+
+    public StoreDefinitionBuilder setRetentionFrequencyDays(Integer retentionFreqDays) {
+        this.retentionFrequencyDays = retentionFreqDays;
+        return this;
+    }
+
     public String getRoutingStrategyType() {
         return routingStrategyType;
     }
@@ -273,30 +284,43 @@ public StoreDefinitionBuilder setOwners(List<String> owners) {
         return this;
     }
 
+    public long getMemoryFootprintMB() {
+        return memoryFootprintMB;
+    }
+
+    public StoreDefinitionBuilder setMemoryFootprintMB(long memoryFootprintMB) {
+        this.memoryFootprintMB = memoryFootprintMB;
+        return this;
+    }
+
     public StoreDefinition build() {
-        return new StoreDefinition(this.getName(),
-                                   this.getType(),
-                                   this.getDescription(),
-                                   this.getKeySerializer(),
-                                   this.getValueSerializer(),
-                                   this.getTransformsSerializer(),
-                                   this.getRoutingPolicy(),
-                                   this.getRoutingStrategyType(),
-                                   this.getReplicationFactor(),
-                                   this.getPreferredReads(),
-                                   this.getRequiredReads(),
-                                   this.getPreferredWrites(),
-                                   this.getRequiredWrites(),
-                                   this.getViewOf(),
-                                   this.getView(),
-                                   this.getZoneReplicationFactor(),
-                                   this.getZoneCountReads(),
-                                   this.getZoneCountWrites(),
-                                   this.getRetentionPeriodDays(),
-                                   this.getRetentionScanThrottleRate(),
-                                   this.getSerializerFactory(),
-                                   this.getHintedHandoffStrategy(),
-                                   this.getHintPrefListSize(),
-                                   this.getOwners());
+        StoreDefinition storeDef = new StoreDefinition(this.getName(),
+                                                       this.getType(),
+                                                       this.getDescription(),
+                                                       this.getKeySerializer(),
+                                                       this.getValueSerializer(),
+                                                       this.getTransformsSerializer(),
+                                                       this.getRoutingPolicy(),
+                                                       this.getRoutingStrategyType(),
+                                                       this.getReplicationFactor(),
+                                                       this.getPreferredReads(),
+                                                       this.getRequiredReads(),
+                                                       this.getPreferredWrites(),
+                                                       this.getRequiredWrites(),
+                                                       this.getViewOf(),
+                                                       this.getView(),
+                                                       this.getZoneReplicationFactor(),
+                                                       this.getZoneCountReads(),
+                                                       this.getZoneCountWrites(),
+                                                       this.getRetentionPeriodDays(),
+                                                       this.getRetentionScanThrottleRate(),
+                                                       this.getRetentionFrequencyDays(),
+                                                       this.getSerializerFactory(),
+                                                       this.getHintedHandoffStrategy(),
+                                                       this.getHintPrefListSize(),
+                                                       this.getOwners(),
+                                                       this.getMemoryFootprintMB());
+        storeDef.checkParameterLegality();
+        return storeDef;
     }
 }
diff --git a/src/java/voldemort/store/bdb/BdbRuntimeConfig.java b/src/java/voldemort/store/bdb/BdbRuntimeConfig.java
index f90040b78d..e568a02049 100644
--- a/src/java/voldemort/store/bdb/BdbRuntimeConfig.java
+++ b/src/java/voldemort/store/bdb/BdbRuntimeConfig.java
@@ -1,20 +1,23 @@
 package voldemort.store.bdb;
 
-import com.sleepycat.je.LockMode;
 import voldemort.server.VoldemortConfig;
 import voldemort.utils.Time;
 
+import com.sleepycat.je.LockMode;
+
 /**
  * Runtime (i.e., post Environment creation) configuration for BdbStorageEngine
- *
+ * 
  */
 public class BdbRuntimeConfig {
 
     public static final long DEFAULT_STATS_CACHE_TTL_MS = 5 * Time.MS_PER_SECOND;
     public static final LockMode DEFAULT_LOCK_MODE = LockMode.READ_UNCOMMITTED;
+    public static final boolean DEFAULT_EXPOSE_SPACE_UTIL = true;
 
     private long statsCacheTtlMs = DEFAULT_STATS_CACHE_TTL_MS;
     private LockMode lockMode = DEFAULT_LOCK_MODE;
+    private boolean exposeSpaceUtil = DEFAULT_EXPOSE_SPACE_UTIL;
 
     public BdbRuntimeConfig() {
 
@@ -22,9 +25,10 @@ public BdbRuntimeConfig() {
 
     public BdbRuntimeConfig(VoldemortConfig config) {
         LockMode lockMode = config.getBdbReadUncommitted() ? LockMode.READ_UNCOMMITTED
-                                                           : LockMode.DEFAULT;
+                                                          : LockMode.DEFAULT;
         setLockMode(lockMode);
         setStatsCacheTtlMs(config.getBdbStatsCacheTtlMs());
+        setExposeSpaceUtil(config.getBdbExposeSpaceUtilization());
     }
 
     public long getStatsCacheTtlMs() {
@@ -44,4 +48,12 @@ public BdbRuntimeConfig setLockMode(LockMode lockMode) {
         this.lockMode = lockMode;
         return this;
     }
+
+    public void setExposeSpaceUtil(boolean expose) {
+        this.exposeSpaceUtil = expose;
+    }
+
+    public boolean getExposeSpaceUtil() {
+        return this.exposeSpaceUtil;
+    }
 }
diff --git a/src/java/voldemort/store/bdb/BdbStorageConfiguration.java b/src/java/voldemort/store/bdb/BdbStorageConfiguration.java
index b5b0b42c06..512daf4065 100644
--- a/src/java/voldemort/store/bdb/BdbStorageConfiguration.java
+++ b/src/java/voldemort/store/bdb/BdbStorageConfiguration.java
@@ -17,7 +17,9 @@
 package voldemort.store.bdb;
 
 import java.io.File;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.log4j.Logger;
@@ -28,20 +30,22 @@
 import voldemort.store.StorageConfiguration;
 import voldemort.store.StorageEngine;
 import voldemort.store.StorageInitializationException;
+import voldemort.store.StoreDefinition;
 import voldemort.utils.ByteArray;
+import voldemort.utils.ByteUtils;
+import voldemort.utils.JmxUtils;
 import voldemort.utils.Time;
 
+import com.google.common.collect.Maps;
 import com.sleepycat.je.Database;
 import com.sleepycat.je.DatabaseConfig;
 import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.Durability;
 import com.sleepycat.je.Environment;
 import com.sleepycat.je.EnvironmentConfig;
-import com.sleepycat.je.EnvironmentStats;
-import com.sleepycat.je.PreloadConfig;
+import com.sleepycat.je.EnvironmentMutableConfig;
 import com.sleepycat.je.StatsConfig;
 
-import com.google.common.collect.Maps;
-
 /**
  * The configuration that is shared between berkeley db instances. This includes
  * the db environment and the configuration
@@ -55,7 +59,6 @@ public class BdbStorageConfiguration implements StorageConfiguration {
     private static final String SHARED_ENV_KEY = "shared";
 
     private static Logger logger = Logger.getLogger(BdbStorageConfiguration.class);
-
     private final Object lock = new Object();
     private final Map<String, Environment> environments = Maps.newHashMap();
     private final EnvironmentConfig environmentConfig;
@@ -63,20 +66,19 @@ public class BdbStorageConfiguration implements StorageConfiguration {
     private final String bdbMasterDir;
     private final boolean useOneEnvPerStore;
     private final VoldemortConfig voldemortConfig;
+    private long reservedCacheSize = 0;
+    private Set<Environment> unreservedStores;
 
     public BdbStorageConfiguration(VoldemortConfig config) {
         this.voldemortConfig = config;
         environmentConfig = new EnvironmentConfig();
         environmentConfig.setTransactional(true);
-        environmentConfig.setCacheSize(config.getBdbCacheSize());
         if(config.isBdbWriteTransactionsEnabled() && config.isBdbFlushTransactionsEnabled()) {
-            environmentConfig.setTxnNoSync(false);
-            environmentConfig.setTxnWriteNoSync(false);
+            environmentConfig.setDurability(Durability.COMMIT_SYNC);
         } else if(config.isBdbWriteTransactionsEnabled() && !config.isBdbFlushTransactionsEnabled()) {
-            environmentConfig.setTxnNoSync(false);
-            environmentConfig.setTxnWriteNoSync(true);
+            environmentConfig.setDurability(Durability.COMMIT_WRITE_NO_SYNC);
         } else {
-            environmentConfig.setTxnNoSync(true);
+            environmentConfig.setDurability(Durability.COMMIT_NO_SYNC);
         }
         environmentConfig.setAllowCreate(true);
         environmentConfig.setConfigParam(EnvironmentConfig.LOG_FILE_MAX,
@@ -114,20 +116,24 @@ public BdbStorageConfiguration(VoldemortConfig config) {
         databaseConfig.setTransactional(true);
         bdbMasterDir = config.getBdbDataDirectory();
         useOneEnvPerStore = config.isBdbOneEnvPerStore();
-        if(useOneEnvPerStore)
-            environmentConfig.setSharedCache(true);
+        unreservedStores = new HashSet<Environment>();
     }
 
-    public StorageEngine<ByteArray, byte[], byte[]> getStore(String storeName) {
+    public StorageEngine<ByteArray, byte[], byte[]> getStore(StoreDefinition storeDef) {
         synchronized(lock) {
             try {
-                Environment environment = getEnvironment(storeName);
+                String storeName = storeDef.getName();
+                Environment environment = getEnvironment(storeDef);
                 Database db = environment.openDatabase(null, storeName, databaseConfig);
                 BdbRuntimeConfig runtimeConfig = new BdbRuntimeConfig(voldemortConfig);
                 BdbStorageEngine engine = new BdbStorageEngine(storeName,
                                                                environment,
                                                                db,
                                                                runtimeConfig);
+                if(voldemortConfig.isJmxEnabled()) {
+                    // register the environment stats mbean
+                    JmxUtils.registerMbean(storeName, engine.getBdbEnvironmentStats());
+                }
                 return engine;
             } catch(DatabaseException d) {
                 throw new StorageInitializationException(d);
@@ -135,10 +141,30 @@ public StorageEngine<ByteArray, byte[], byte[]> getStore(String storeName) {
         }
     }
 
-    private Environment getEnvironment(String storeName) throws DatabaseException {
+    /**
+     * When a reservation is made, we need to shrink the shared cache
+     * accordingly to guarantee memory foot print of the new store. NOTE: This
+     * is not an instantaeneous operation. Changes will take effect only when
+     * traffic is thrown and eviction happens.( Won't happen until Network ports
+     * are opened anyway which is rightfully done after storage service).When
+     * changing this dynamically, we might want to block until the shared cache
+     * shrinks enough
+     * 
+     */
+    private void adjustCacheSizes() {
+        long newSharedCacheSize = voldemortConfig.getBdbCacheSize() - this.reservedCacheSize;
+        logger.info("Setting the shared cache size to " + newSharedCacheSize);
+        for(Environment environment: unreservedStores) {
+            EnvironmentMutableConfig mConfig = environment.getMutableConfig();
+            mConfig.setCacheSize(newSharedCacheSize);
+            environment.setMutableConfig(mConfig);
+        }
+    }
+
+    public Environment getEnvironment(StoreDefinition storeDef) throws DatabaseException {
+        String storeName = storeDef.getName();
         synchronized(lock) {
             if(useOneEnvPerStore) {
-
                 // if we have already created this environment return a
                 // reference
                 if(environments.containsKey(storeName))
@@ -148,10 +174,41 @@ private Environment getEnvironment(String storeName) throws DatabaseException {
                 File bdbDir = new File(bdbMasterDir, storeName);
                 createBdbDirIfNecessary(bdbDir);
 
+                // configure the BDB cache
+                if(storeDef.hasMemoryFootprint()) {
+                    // make room for the reservation, by adjusting other stores
+                    long reservedBytes = storeDef.getMemoryFootprintMB() * ByteUtils.BYTES_PER_MB;
+                    long newReservedCacheSize = this.reservedCacheSize + reservedBytes;
+
+                    // check that we leave a 'minimum' shared cache
+                    if((voldemortConfig.getBdbCacheSize() - newReservedCacheSize) < voldemortConfig.getBdbMinimumSharedCache()) {
+                        throw new StorageInitializationException("Reservation of "
+                                                                 + storeDef.getMemoryFootprintMB()
+                                                                 + " MB for store "
+                                                                 + storeName
+                                                                 + " violates minimum shared cache size of "
+                                                                 + voldemortConfig.getBdbMinimumSharedCache());
+                    }
+
+                    this.reservedCacheSize = newReservedCacheSize;
+                    adjustCacheSizes();
+                    environmentConfig.setSharedCache(false);
+                    environmentConfig.setCacheSize(reservedBytes);
+                } else {
+                    environmentConfig.setSharedCache(true);
+                    environmentConfig.setCacheSize(voldemortConfig.getBdbCacheSize()
+                                                   - this.reservedCacheSize);
+                }
+
                 Environment environment = new Environment(bdbDir, environmentConfig);
                 logger.info("Creating environment for " + storeName + ": ");
                 logEnvironmentConfig(environment.getConfig());
                 environments.put(storeName, environment);
+
+                // save this up so we can adjust later if needed
+                if(!storeDef.hasMemoryFootprint())
+                    this.unreservedStores.add(environment);
+
                 return environment;
             } else {
                 if(!environments.isEmpty())
@@ -186,18 +243,24 @@ private void logEnvironmentConfig(EnvironmentConfig config) {
                     + config.getConfigParam(EnvironmentConfig.CLEANER_MIN_FILE_UTILIZATION));
         logger.info("    BDB " + EnvironmentConfig.LOG_FILE_MAX + " = "
                     + config.getConfigParam(EnvironmentConfig.LOG_FILE_MAX));
+        logger.info("    BDB " + config.toString().replace('\n', ','));
     }
 
     public String getType() {
         return TYPE_NAME;
     }
 
-    public EnvironmentStats getStats(String storeName, boolean fast) {
-        StatsConfig config = new StatsConfig();
-        config.setFast(fast);
+    public String getStats(String storeName, boolean fast) {
         try {
-            Environment env = getEnvironment(storeName);
-            return env.getStats(config);
+            if(environments.containsKey(storeName)) {
+                StatsConfig config = new StatsConfig();
+                config.setFast(fast);
+                Environment env = environments.get(storeName);
+                return env.getStats(config).toString();
+            } else {
+                // return empty string if environment not created yet
+                return "";
+            }
         } catch(DatabaseException e) {
             throw new VoldemortException(e);
         }
@@ -210,7 +273,7 @@ public String getEnvStatsAsString(String storeName) throws Exception {
 
     @JmxOperation(description = "A variety of stats about one BDB environment.")
     public String getEnvStatsAsString(String storeName, boolean fast) throws Exception {
-        String envStats = getStats(storeName, fast).toString();
+        String envStats = getStats(storeName, fast);
         logger.debug("Bdb Environment stats:\n" + envStats);
         return envStats;
     }
@@ -231,6 +294,22 @@ public void cleanLogs() {
         }
     }
 
+    @JmxOperation(description = "Obtain the number of k-v entries in the store")
+    public long getEntryCount(String storeName) throws Exception {
+        Environment storeEnv = environments.get(storeName);
+        if(storeEnv != null) {
+            Database storeDb = null;
+            try {
+                storeDb = storeEnv.openDatabase(null, storeName, databaseConfig);
+                return storeDb.count();
+            } finally {
+                if(storeDb != null)
+                    storeDb.close();
+            }
+        }
+        return 0;
+    }
+
     public void close() {
         synchronized(lock) {
             try {
@@ -244,4 +323,52 @@ public void close() {
         }
     }
 
+    /**
+     * Detect what has changed in the store definition and rewire BDB
+     * environments accordingly.
+     * 
+     * @param storeDef updated store definition
+     */
+    public void update(StoreDefinition storeDef) {
+        if(!useOneEnvPerStore)
+            throw new VoldemortException("Memory foot print can be set only when using different environments per store");
+
+        String storeName = storeDef.getName();
+        Environment environment = environments.get(storeName);
+        // change reservation amount of reserved store
+        if(!unreservedStores.contains(environment) && storeDef.hasMemoryFootprint()) {
+            EnvironmentMutableConfig mConfig = environment.getMutableConfig();
+            long currentCacheSize = mConfig.getCacheSize();
+            long newCacheSize = storeDef.getMemoryFootprintMB() * ByteUtils.BYTES_PER_MB;
+            if(currentCacheSize != newCacheSize) {
+                long newReservedCacheSize = this.reservedCacheSize - currentCacheSize
+                                            + newCacheSize;
+
+                // check that we leave a 'minimum' shared cache
+                if((voldemortConfig.getBdbCacheSize() - newReservedCacheSize) < voldemortConfig.getBdbMinimumSharedCache()) {
+                    throw new StorageInitializationException("Reservation of "
+                                                             + storeDef.getMemoryFootprintMB()
+                                                             + " MB for store "
+                                                             + storeName
+                                                             + " violates minimum shared cache size of "
+                                                             + voldemortConfig.getBdbMinimumSharedCache());
+                }
+
+                this.reservedCacheSize = newReservedCacheSize;
+                adjustCacheSizes();
+                mConfig.setCacheSize(newCacheSize);
+                environment.setMutableConfig(mConfig);
+                logger.info("Setting private cache for store " + storeDef.getName() + " to "
+                            + newCacheSize);
+            }
+        } else {
+            // we cannot support changing a reserved store to unreserved or vice
+            // versa since the sharedCache param is not mutable
+            throw new VoldemortException("Cannot switch between shared and private cache dynamically");
+        }
+    }
+
+    public long getReservedCacheSize() {
+        return this.reservedCacheSize;
+    }
 }
diff --git a/src/java/voldemort/store/bdb/BdbStorageEngine.java b/src/java/voldemort/store/bdb/BdbStorageEngine.java
index e07b438baf..b74e31cfe6 100644
--- a/src/java/voldemort/store/bdb/BdbStorageEngine.java
+++ b/src/java/voldemort/store/bdb/BdbStorageEngine.java
@@ -102,7 +102,9 @@ public Version toObject(byte[] bytes) {
         };
         this.isOpen = new AtomicBoolean(true);
         this.readLockMode = config.getLockMode();
-        this.bdbEnvironmentStats = new BdbEnvironmentStats(environment, config.getStatsCacheTtlMs());
+        this.bdbEnvironmentStats = new BdbEnvironmentStats(environment,
+                                                           config.getStatsCacheTtlMs(),
+                                                           config.getExposeSpaceUtil());
     }
 
     public String getName() {
@@ -209,14 +211,34 @@ private <T> List<T> get(ByteArray key,
                             Serializer<T> serializer) throws PersistenceFailureException {
         StoreUtils.assertValidKey(key);
 
+        long startTimeNs = -1;
+
+        if(logger.isTraceEnabled())
+            startTimeNs = System.nanoTime();
+
         Cursor cursor = null;
         try {
             cursor = getBdbDatabase().openCursor(null, null);
-            return get(cursor, key, lockMode, serializer);
+            List<T> result = get(cursor, key, lockMode, serializer);
+
+            // If null, try again in different locking mode to
+            // avoid null result due to gap between delete and new write
+            if(result.size() == 0 && lockMode != LockMode.DEFAULT) {
+                return get(cursor, key, LockMode.DEFAULT, serializer);
+            } else {
+                return result;
+            }
         } catch(DatabaseException e) {
             logger.error(e);
             throw new PersistenceFailureException(e);
         } finally {
+            if(logger.isTraceEnabled()) {
+                logger.trace("Completed GET from key " + key + " (keyRef: "
+                             + System.identityHashCode(key) + ") in "
+                             + (System.nanoTime() - startTimeNs) + " ns at "
+                             + System.currentTimeMillis());
+            }
+
             attemptClose(cursor);
         }
     }
@@ -242,12 +264,25 @@ private Database getBdbDatabase() {
     public Map<ByteArray, List<Versioned<byte[]>>> getAll(Iterable<ByteArray> keys,
                                                           Map<ByteArray, byte[]> transforms)
             throws VoldemortException {
+
+        long startTimeNs = -1;
+
+        if(logger.isTraceEnabled())
+            startTimeNs = System.nanoTime();
+
         StoreUtils.assertValidKeys(keys);
         Map<ByteArray, List<Versioned<byte[]>>> result = StoreUtils.newEmptyHashMap(keys);
         Cursor cursor = null;
+
+        String keyStr = "";
+
         try {
             cursor = getBdbDatabase().openCursor(null, null);
             for(ByteArray key: keys) {
+
+                if(logger.isTraceEnabled())
+                    keyStr += ByteUtils.toHexString(key.get()) + " ";
+
                 List<Versioned<byte[]>> values = get(cursor, key, readLockMode, versionedSerializer);
                 if(!values.isEmpty())
                     result.put(key, values);
@@ -258,6 +293,12 @@ public Map<ByteArray, List<Versioned<byte[]>>> getAll(Iterable<ByteArray> keys,
         } finally {
             attemptClose(cursor);
         }
+
+        if(logger.isTraceEnabled())
+            logger.trace("Completed GETALL from keys " + keyStr + " in "
+                         + (System.nanoTime() - startTimeNs) + " ns at "
+                         + System.currentTimeMillis());
+
         return result;
     }
 
@@ -267,6 +308,11 @@ private static <T> List<T> get(Cursor cursor,
                                    Serializer<T> serializer) throws DatabaseException {
         StoreUtils.assertValidKey(key);
 
+        long startTimeNs = -1;
+
+        if(logger.isTraceEnabled())
+            startTimeNs = System.nanoTime();
+
         DatabaseEntry keyEntry = new DatabaseEntry(key.get());
         DatabaseEntry valueEntry = new DatabaseEntry();
         List<T> results = Lists.newArrayList();
@@ -276,6 +322,13 @@ private static <T> List<T> get(Cursor cursor,
                                                                                                                                                         lockMode)) {
             results.add(serializer.toObject(valueEntry.getData()));
         }
+
+        if(logger.isTraceEnabled()) {
+            logger.trace("Completed GET from key " + ByteUtils.toHexString(key.get()) + " in "
+                         + (System.nanoTime() - startTimeNs) + " ns at "
+                         + System.currentTimeMillis());
+        }
+
         return results;
     }
 
@@ -283,6 +336,11 @@ public void put(ByteArray key, Versioned<byte[]> value, byte[] transforms)
             throws PersistenceFailureException {
         StoreUtils.assertValidKey(key);
 
+        long startTimeNs = -1;
+
+        if(logger.isTraceEnabled())
+            startTimeNs = System.nanoTime();
+
         DatabaseEntry keyEntry = new DatabaseEntry(key.get());
         boolean succeeded = false;
         Transaction transaction = null;
@@ -330,10 +388,23 @@ else if(occurred == Occurred.AFTER)
             else
                 attemptAbort(transaction);
         }
+
+        if(logger.isTraceEnabled()) {
+            logger.trace("Completed PUT to key " + ByteUtils.toHexString(key.get()) + " (keyRef: "
+                         + System.identityHashCode(key) + " value " + value + " in "
+                         + (System.nanoTime() - startTimeNs) + " ns at "
+                         + System.currentTimeMillis());
+        }
     }
 
     public boolean delete(ByteArray key, Version version) throws PersistenceFailureException {
         StoreUtils.assertValidKey(key);
+
+        long startTimeNs = -1;
+
+        if(logger.isTraceEnabled())
+            startTimeNs = System.nanoTime();
+
         boolean deletedSomething = false;
         Cursor cursor = null;
         Transaction transaction = null;
@@ -358,6 +429,14 @@ public boolean delete(ByteArray key, Version version) throws PersistenceFailureE
             logger.error(e);
             throw new PersistenceFailureException(e);
         } finally {
+
+            if(logger.isTraceEnabled()) {
+                logger.trace("Completed DELETE of key " + ByteUtils.toHexString(key.get())
+                             + " (keyRef: " + System.identityHashCode(key) + ") in "
+                             + (System.nanoTime() - startTimeNs) + " ns at "
+                             + System.currentTimeMillis());
+            }
+
             try {
                 attemptClose(cursor);
             } finally {
diff --git a/src/java/voldemort/store/bdb/stats/BdbEnvironmentStats.java b/src/java/voldemort/store/bdb/stats/BdbEnvironmentStats.java
index 487d961021..1b97dbd5d3 100644
--- a/src/java/voldemort/store/bdb/stats/BdbEnvironmentStats.java
+++ b/src/java/voldemort/store/bdb/stats/BdbEnvironmentStats.java
@@ -1,24 +1,27 @@
 package voldemort.store.bdb.stats;
 
+import java.util.concurrent.Callable;
 
-import com.sleepycat.je.Environment;
-import com.sleepycat.je.EnvironmentConfig;
-import com.sleepycat.je.EnvironmentStats;
-import com.sleepycat.je.StatsConfig;
 import voldemort.VoldemortException;
 import voldemort.annotations.Experimental;
 import voldemort.annotations.jmx.JmxGetter;
 import voldemort.utils.CachedCallable;
 
-import java.util.concurrent.Callable;
+import com.sleepycat.je.Environment;
+import com.sleepycat.je.EnvironmentConfig;
+import com.sleepycat.je.EnvironmentStats;
+import com.sleepycat.je.StatsConfig;
 
 public class BdbEnvironmentStats {
 
     private final Environment environment;
     private final CachedCallable<EnvironmentStats> fastStats;
+    private final CachedCallable<SpaceUtilizationStats> fastSpaceStats;
+    private final boolean exposeSpaceStats;
 
-    public BdbEnvironmentStats(Environment environment, long ttlMs) {
+    public BdbEnvironmentStats(Environment environment, long ttlMs, boolean exposeSpaceUtil) {
         this.environment = environment;
+        this.exposeSpaceStats = exposeSpaceUtil;
         Callable<EnvironmentStats> fastStatsCallable = new Callable<EnvironmentStats>() {
 
             public EnvironmentStats call() throws Exception {
@@ -26,6 +29,14 @@ public EnvironmentStats call() throws Exception {
             }
         };
         fastStats = new CachedCallable<EnvironmentStats>(fastStatsCallable, ttlMs);
+
+        Callable<SpaceUtilizationStats> fastDbStatsCallable = new Callable<SpaceUtilizationStats>() {
+
+            public SpaceUtilizationStats call() throws Exception {
+                return getSpaceUtilizationStats();
+            }
+        };
+        fastSpaceStats = new CachedCallable<SpaceUtilizationStats>(fastDbStatsCallable, ttlMs);
     }
 
     private EnvironmentStats getEnvironmentStats(boolean fast) {
@@ -34,6 +45,18 @@ private EnvironmentStats getEnvironmentStats(boolean fast) {
         return environment.getStats(config);
     }
 
+    private SpaceUtilizationStats getSpaceUtilizationStats() {
+        return new SpaceUtilizationStats(environment);
+    }
+
+    private SpaceUtilizationStats getFastSpaceUtilizationStats() {
+        try {
+            return fastSpaceStats.call();
+        } catch(Exception e) {
+            throw new VoldemortException(e);
+        }
+    }
+
     private EnvironmentStats getFastStats() {
         try {
             return fastStats.call();
@@ -47,6 +70,8 @@ public String getFastStatsAsString() {
         return getFastStats().toString();
     }
 
+    // 1. Caching
+
     @JmxGetter(name = "NumCacheMiss")
     public long getNumCacheMiss() {
         return getFastStats().getNCacheMiss();
@@ -57,6 +82,22 @@ public long getNumNotResident() {
         return getFastStats().getNNotResident();
     }
 
+    @JmxGetter(name = "TotalCacheSize")
+    public long getTotalCacheSize() {
+        return getFastStats().getSharedCacheTotalBytes();
+    }
+
+    @JmxGetter(name = "AllotedCacheSize")
+    public long getAllotedCacheSize() {
+        return getFastStats().getCacheTotalBytes();
+    }
+
+    @JmxGetter(name = "EvictionPasses")
+    public long getEvictedLNs() {
+        return getFastStats().getNEvictPasses();
+    }
+
+    // 2. IO
     @JmxGetter(name = "NumRandomWrites")
     public long getNumRandomWrites() {
         return getFastStats().getNRandomWrites();
@@ -97,6 +138,18 @@ public long getNumSequentialReadBytes() {
         return getFastStats().getNSequentialReadBytes();
     }
 
+    @JmxGetter(name = "NumFSyncs")
+    public long getNumFSyncs() {
+        return getFastStats().getNFSyncs();
+    }
+
+    // 3. Cleaning & Checkpointing
+
+    @JmxGetter(name = "NumCleanerEntriesRead")
+    public long getNumCleanerEntriesRead() {
+        return getFastStats().getNCleanerEntriesRead();
+    }
+
     @JmxGetter(name = "FileDeletionBacklog")
     public long getFileDeletionBacklog() {
         return getFastStats().getFileDeletionBacklog();
@@ -115,14 +168,14 @@ public long getCleanerBacklog() {
         return getFastStats().getCleanerBacklog();
     }
 
-    @JmxGetter(name = "NumAcquiresWithContention")
-    public long getNumAcquiresWithContention() {
-        return getFastStats().getNAcquiresWithContention();
+    @JmxGetter(name = "NumCleanerRuns")
+    public long getNumCleanerRuns() {
+        return getFastStats().getNCleanerRuns();
     }
 
-    @JmxGetter(name = "NumAcquiresNoWaiters")
-    public long getNumAcquiresNoWaiters() {
-        return getFastStats().getNAcquiresNoWaiters();
+    @JmxGetter(name = "NumCleanerDeletions")
+    public long getNumCleanerDeletions() {
+        return getFastStats().getNCleanerRuns();
     }
 
     @JmxGetter(name = "NumCheckpoints")
@@ -130,42 +183,65 @@ public long getNumCheckpoints() {
         return getFastStats().getNCheckpoints();
     }
 
-    @JmxGetter(name = "NumCleanerEntriesRead")
-    public long getNumCleanerEntriesRead() {
-        return getFastStats().getNCleanerEntriesRead();
+    @JmxGetter(name = "TotalSpace")
+    public long getTotalSpace() {
+        if(this.exposeSpaceStats)
+            return getFastSpaceUtilizationStats().getTotalSpaceUsed();
+        else
+            return 0;
     }
 
-    @JmxGetter(name = "NumFSyncs")
-    public long getNumFSyncs() {
-        return getFastStats().getNFSyncs();
+    @JmxGetter(name = "TotalSpaceUtilized")
+    public long getTotalSpaceUtilized() {
+        if(this.exposeSpaceStats)
+            return getFastSpaceUtilizationStats().getTotalSpaceUtilized();
+        else
+            return 0;
     }
 
-    @JmxGetter(name = "NumCleanerRuns")
-    public long getNumCleanerRuns() {
-        return getFastStats().getNCleanerRuns();
+    @JmxGetter(name = "UtilizationSummary", description = "Displays the disk space utilization for an environment.")
+    public String getUtilizationSummaryAsString() {
+        return getFastSpaceUtilizationStats().getSummariesAsString();
     }
 
-    @JmxGetter(name = "NumCleanerDeletions")
-    public long getNumCleanerDeletions() {
-        return getFastStats().getNCleanerRuns();
+    // 4. Latching/Locking
+
+    @JmxGetter(name = "BtreeLatches")
+    public long getBtreeLatches() {
+        return getFastStats().getRelatchesRequired();
+    }
+
+    @JmxGetter(name = "NumAcquiresWithContention")
+    public long getNumAcquiresWithContention() {
+        return getFastStats().getNAcquiresWithContention();
+    }
+
+    @JmxGetter(name = "NumAcquiresNoWaiters")
+    public long getNumAcquiresNoWaiters() {
+        return getFastStats().getNAcquiresNoWaiters();
     }
 
-    // Compound statistics
+    // Compound statistics derived from raw statistics
 
     @JmxGetter(name = "NumWritesTotal")
     public long getNumWritesTotal() {
         return getNumRandomWrites() + getNumSequentialWrites();
     }
 
+    @JmxGetter(name = "NumWriteBytesTotal")
+    public long getNumWriteBytesTotal() {
+        return getNumSequentialWriteBytes() + getNumRandomWriteBytes();
+    }
+
     @JmxGetter(name = "PercentRandomWrites")
     public double getPercentRandomWrites() {
-       return safeGetPercentage(getNumRandomWrites(), getNumWritesTotal());
+        return safeGetPercentage(getNumRandomWrites(), getNumWritesTotal());
     }
 
     @JmxGetter(name = "PercentageRandomWriteBytes")
     public double getPercentageRandomWriteBytes() {
-        return safeGetPercentage(getNumRandomWriteBytes(), getNumRandomWriteBytes() +
-                                                           getNumSequentialWriteBytes());
+        return safeGetPercentage(getNumRandomWriteBytes(), getNumRandomWriteBytes()
+                                                           + getNumSequentialWriteBytes());
     }
 
     @JmxGetter(name = "NumReadsTotal")
@@ -173,6 +249,11 @@ public long getNumReadsTotal() {
         return getNumRandomReads() + getNumSequentialReads();
     }
 
+    @JmxGetter(name = "NumReadBytesTotal")
+    public long getNumReadBytesTotal() {
+        return getNumRandomReadBytes() + getNumSequentialReadBytes();
+    }
+
     @JmxGetter(name = "PercentageRandomReads")
     public double getPercentageRandomReads() {
         return safeGetPercentage(getNumRandomReads(), getNumReadsTotal());
@@ -180,8 +261,19 @@ public double getPercentageRandomReads() {
 
     @JmxGetter(name = "PercentageRandomReadBytes")
     public double getPercentageRandomReadBytes() {
-        return safeGetPercentage(getNumRandomWriteBytes(), getNumRandomReadBytes() +
-                                                           getNumSequentialReadBytes());
+        return safeGetPercentage(getNumRandomWriteBytes(), getNumRandomReadBytes()
+                                                           + getNumSequentialReadBytes());
+    }
+
+    @JmxGetter(name = "PercentageReads")
+    public double getPercentageReads() {
+        return safeGetPercentage(getNumReadsTotal(), getNumReadsTotal() + getNumWritesTotal());
+    }
+
+    @JmxGetter(name = "PercentageReadBytes")
+    public double getPercentageReadBytes() {
+        return safeGetPercentage(getNumReadBytesTotal(), getNumWriteBytesTotal()
+                                                         + getNumReadBytesTotal());
     }
 
     @Experimental
@@ -193,17 +285,21 @@ public double getPercentageCacheHits() {
     @Experimental
     @JmxGetter(name = "PercentageCacheMisses")
     public double getPercentageCacheMisses() {
-        return safeGetPercentage(getNumCacheMiss(),
-                                 getNumReadsTotal() + getNumWritesTotal());
+        return safeGetPercentage(getNumCacheMiss(), getNumReadsTotal() + getNumWritesTotal());
     }
 
     @JmxGetter(name = "PercentageContended")
     public double getPercentageContended() {
-        return safeGetPercentage(getNumAcquiresWithContention(),
-                                 getNumAcquiresWithContention() + getNumAcquiresNoWaiters());
+        return safeGetPercentage(getNumAcquiresWithContention(), getNumAcquiresWithContention()
+                                                                 + getNumAcquiresNoWaiters());
+    }
+
+    @JmxGetter(name = "PercentageUtilization")
+    public double getPercentageUtilization() {
+        return safeGetPercentage(getTotalSpaceUtilized(), getTotalSpace());
     }
 
     public static double safeGetPercentage(long rawNum, long total) {
-        return total == 0 ? 0.0d : rawNum / (float)total;
+        return total == 0 ? 0.0d : rawNum / (float) total;
     }
 }
diff --git a/src/java/voldemort/store/bdb/stats/SpaceUtilizationStats.java b/src/java/voldemort/store/bdb/stats/SpaceUtilizationStats.java
new file mode 100644
index 0000000000..f741f34731
--- /dev/null
+++ b/src/java/voldemort/store/bdb/stats/SpaceUtilizationStats.java
@@ -0,0 +1,70 @@
+package voldemort.store.bdb.stats;
+
+import java.util.Iterator;
+import java.util.Map;
+import java.util.SortedMap;
+
+import com.sleepycat.je.DbInternal;
+import com.sleepycat.je.Environment;
+import com.sleepycat.je.cleaner.FileSummary;
+import com.sleepycat.je.cleaner.UtilizationProfile;
+import com.sleepycat.je.dbi.EnvironmentImpl;
+
+/**
+ * Obtains the disk space utilization for the BDB environment
+ */
+final public class SpaceUtilizationStats {
+
+    private final EnvironmentImpl envImpl;
+
+    private SortedMap<Long, FileSummary> summaryMap;
+    private long totalSpaceUsed = 0;
+    private long totalSpaceUtilized = 0;
+
+    public SpaceUtilizationStats(Environment env) {
+        this(DbInternal.getEnvironmentImpl(env));
+    }
+
+    private SpaceUtilizationStats(EnvironmentImpl envImpl) {
+        this.envImpl = envImpl;
+        UtilizationProfile profile = this.envImpl.getUtilizationProfile();
+        summaryMap = profile.getFileSummaryMap(true);
+
+        Iterator<Map.Entry<Long, FileSummary>> fileItr = summaryMap.entrySet().iterator();
+        while(fileItr.hasNext()) {
+            Map.Entry<Long, FileSummary> entry = fileItr.next();
+            FileSummary fs = entry.getValue();
+            totalSpaceUsed += fs.totalSize;
+            totalSpaceUtilized += fs.totalSize - fs.getObsoleteSize();
+        }
+    }
+
+    public long getTotalSpaceUsed() {
+        return totalSpaceUsed;
+    }
+
+    public long getTotalSpaceUtilized() {
+        return totalSpaceUtilized;
+    }
+
+    public String getSummariesAsString() {
+        StringBuffer summaryDetails = new StringBuffer();
+        if(summaryMap != null) {
+            summaryDetails.append("file,util%\n");
+            Iterator<Map.Entry<Long, FileSummary>> fileItr = summaryMap.entrySet().iterator();
+            while(fileItr.hasNext()) {
+                Map.Entry<Long, FileSummary> entry = fileItr.next();
+                FileSummary fs = entry.getValue();
+                long bytesUsed = fs.totalSize - fs.getObsoleteSize();
+                summaryDetails.append(String.format("%s,%f\n",
+                                                    Long.toHexString(entry.getKey().longValue()),
+                                                    BdbEnvironmentStats.safeGetPercentage(bytesUsed,
+                                                                                          fs.totalSize)));
+            }
+            summaryDetails.append(String.format("total,%f\n",
+                                                BdbEnvironmentStats.safeGetPercentage(totalSpaceUtilized,
+                                                                                      totalSpaceUsed)));
+        }
+        return summaryDetails.toString();
+    }
+}
\ No newline at end of file
diff --git a/src/java/voldemort/store/configuration/FileBackedCachingStorageConfiguration.java b/src/java/voldemort/store/configuration/FileBackedCachingStorageConfiguration.java
new file mode 100644
index 0000000000..8cd93aa82c
--- /dev/null
+++ b/src/java/voldemort/store/configuration/FileBackedCachingStorageConfiguration.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.configuration;
+
+import voldemort.server.VoldemortConfig;
+import voldemort.store.StorageConfiguration;
+import voldemort.store.StorageEngine;
+import voldemort.store.StoreDefinition;
+import voldemort.utils.ByteArray;
+
+/**
+ * Storage configuration class for FileBackedCachingStorageEngine
+ * 
+ * @author csoman
+ * 
+ */
+
+public class FileBackedCachingStorageConfiguration implements StorageConfiguration {
+
+    public static final String TYPE_NAME = "file-backed-cache";
+    private final String inputPath;
+
+    public FileBackedCachingStorageConfiguration(VoldemortConfig config) {
+        this.inputPath = config.getMetadataDirectory();
+    }
+
+    public StorageEngine<ByteArray, byte[], byte[]> getStore(StoreDefinition storeDef) {
+        return new FileBackedCachingStorageEngine(storeDef.getName(), inputPath);
+    }
+
+    public String getType() {
+        return TYPE_NAME;
+    }
+
+    public void close() {}
+
+    public void update(StoreDefinition storeDef) {
+
+    }
+
+}
diff --git a/src/java/voldemort/store/configuration/FileBackedCachingStorageEngine.java b/src/java/voldemort/store/configuration/FileBackedCachingStorageEngine.java
new file mode 100644
index 0000000000..38817ce272
--- /dev/null
+++ b/src/java/voldemort/store/configuration/FileBackedCachingStorageEngine.java
@@ -0,0 +1,336 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.configuration;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.io.FileUtils;
+import org.apache.log4j.Logger;
+
+import voldemort.VoldemortException;
+import voldemort.store.StorageEngine;
+import voldemort.store.StoreCapabilityType;
+import voldemort.store.StoreUtils;
+import voldemort.utils.ByteArray;
+import voldemort.utils.ClosableIterator;
+import voldemort.utils.Pair;
+import voldemort.versioning.ObsoleteVersionException;
+import voldemort.versioning.Occurred;
+import voldemort.versioning.VectorClock;
+import voldemort.versioning.Version;
+import voldemort.versioning.Versioned;
+
+/**
+ * A Storage Engine used to persist the keys and values in a human readable
+ * format on disk. The data is primarily served off of the cache. After each
+ * put, the entire cache state is flushed to the backing file. The data is UTF-8
+ * serialized when writing to the file in order to make it human readable.
+ * 
+ * The primary purpose of this storage engine is for maintaining the cluster
+ * metadata which is characterized by low QPS and not latency sensitive.
+ * 
+ * @author csoman
+ * 
+ */
+public class FileBackedCachingStorageEngine implements StorageEngine<ByteArray, byte[], byte[]> {
+
+    private final static Logger logger = Logger.getLogger(FileBackedCachingStorageEngine.class);
+    private static final CharSequence NEW_PROPERTY_SEPARATOR = "[name=";
+    private static final String NEW_LINE = System.getProperty("line.separator");
+
+    private final String inputPath;
+    private final String inputDirectory;
+    private final String name;
+    private Map<String, String> metadataMap;
+    private VectorClock cachedVersion = null;
+
+    public FileBackedCachingStorageEngine(String name, String inputDirectory) {
+        this.name = name;
+        this.inputDirectory = inputDirectory;
+        File directory = new File(this.inputDirectory);
+        if(!directory.exists() && directory.canRead()) {
+            throw new IllegalArgumentException("Directory " + directory.getAbsolutePath()
+                                               + " does not exist or can not be read.");
+        }
+
+        this.inputPath = inputDirectory + System.getProperty("file.separator") + name;
+        this.metadataMap = new HashMap<String, String>();
+        this.loadData();
+        if(logger.isDebugEnabled()) {
+            logger.debug("Created a new File backed caching engine. File location = " + inputPath);
+        }
+    }
+
+    private File getVersionFile() {
+        return new File(this.inputDirectory, this.name + ".version");
+    }
+
+    // Read the Vector clock stored in '${name}.version' file
+    private VectorClock readVersion() {
+        try {
+
+            if(this.cachedVersion == null) {
+                File versionFile = getVersionFile();
+                if(versionFile.exists()) {
+                    // read the version file and return version.
+                    String hexCode = FileUtils.readFileToString(versionFile, "UTF-8");
+                    this.cachedVersion = new VectorClock(Hex.decodeHex(hexCode.toCharArray()));
+                }
+            }
+            return this.cachedVersion;
+        } catch(Exception e) {
+            throw new VoldemortException("Failed to read Version for file :" + this.name, e);
+        }
+    }
+
+    // Write a new Vector clock stored in '${name}.version' file
+    private void writeVersion(VectorClock newClock) {
+        File versionFile = getVersionFile();
+        try {
+            if(!versionFile.exists() || versionFile.delete()) {
+                String hexCode = new String(Hex.encodeHex(newClock.toBytes()));
+                FileUtils.writeStringToFile(versionFile, hexCode, "UTF-8");
+                this.cachedVersion = newClock;
+            }
+        } catch(Exception e) {
+            throw new VoldemortException("Failed to write Version for the current file :"
+                                         + this.name, e);
+        }
+    }
+
+    private void loadData() {
+
+        try {
+            BufferedReader reader = new BufferedReader(new FileReader(new File(this.inputPath)));
+            String line = reader.readLine();
+
+            while(line != null) {
+                if(line.startsWith(NEW_PROPERTY_SEPARATOR.toString())) {
+                    String key = null;
+                    StringBuilder value = new StringBuilder();
+                    String parts[] = line.split("=");
+
+                    // Found a new property block.
+                    // First read the key
+                    if(parts.length == 2) {
+                        key = parts[1].substring(0, parts[1].length() - 1);
+
+                        // Now read the value block !
+                        while((line = reader.readLine()) != null && line.length() != 0
+                              && !line.startsWith(NEW_PROPERTY_SEPARATOR.toString())) {
+                            if(value.length() == 0) {
+                                value.append(line);
+                            } else {
+                                value.append(NEW_LINE + line);
+                            }
+                        }
+
+                        // Now add the key and value to the hashmap
+                        this.metadataMap.put(key, value.toString());
+                    }
+                } else {
+                    line = reader.readLine();
+                }
+            }
+        } catch(FileNotFoundException e) {
+            logger.debug("File used for persistence does not exist !!");
+        } catch(IOException e) {
+            logger.debug("Error in flushing data to file : " + e);
+        }
+    }
+
+    // Flush the in-memory data to the file
+    private synchronized void flushData() {
+        BufferedWriter writer = null;
+        try {
+            writer = new BufferedWriter(new FileWriter(new File(this.inputPath)));
+            for(String key: this.metadataMap.keySet()) {
+                writer.write(NEW_PROPERTY_SEPARATOR + key.toString() + "]" + NEW_LINE);
+                writer.write(this.metadataMap.get(key).toString());
+                writer.write("" + NEW_LINE + "" + NEW_LINE);
+            }
+            writer.flush();
+        } catch(IOException e) {
+            logger.error("IO exception while flushing data to file backed storage: "
+                         + e.getMessage());
+        }
+
+        try {
+            if(writer != null)
+                writer.close();
+        } catch(Exception e) {
+            logger.error("Error while flushing data to file backed storage: " + e.getMessage());
+        }
+    }
+
+    public String getName() {
+        return this.name;
+    }
+
+    public void close() throws VoldemortException {}
+
+    public Object getCapability(StoreCapabilityType capability) {
+        throw new VoldemortException("No extra capability.");
+    }
+
+    public ClosableIterator<Pair<ByteArray, Versioned<byte[]>>> entries() {
+        return new FileBackedStorageIterator(this.metadataMap, this);
+    }
+
+    public ClosableIterator<ByteArray> keys() {
+        return StoreUtils.keys(entries());
+    }
+
+    public void truncate() {
+        throw new VoldemortException("Truncate not supported in FileBackedCachingStorageEngine");
+    }
+
+    public boolean isPartitionAware() {
+        return false;
+    }
+
+    // Assigning new Vector clock here: TODO: Decide what vector clock to use ?
+    public List<Versioned<byte[]>> get(ByteArray key, byte[] transforms) throws VoldemortException {
+        StoreUtils.assertValidKey(key);
+        String keyString = new String(key.get());
+        List<Versioned<byte[]>> found = new ArrayList<Versioned<byte[]>>();
+        byte[] resultBytes = null;
+        String value = this.metadataMap.get(keyString);
+        if(value != null) {
+            resultBytes = value.getBytes();
+            found.add(new Versioned<byte[]>(resultBytes, readVersion()));
+        }
+        return found;
+    }
+
+    public Map<ByteArray, List<Versioned<byte[]>>> getAll(Iterable<ByteArray> keys,
+                                                          Map<ByteArray, byte[]> transforms)
+            throws VoldemortException {
+        StoreUtils.assertValidKeys(keys);
+        Map<ByteArray, List<Versioned<byte[]>>> result = StoreUtils.newEmptyHashMap(keys);
+        for(ByteArray key: keys) {
+            List<Versioned<byte[]>> values = get(key, null);
+            if(!values.isEmpty())
+                result.put(key, values);
+        }
+        return result;
+    }
+
+    public List<Version> getVersions(ByteArray key) {
+        List<Versioned<byte[]>> values = get(key, null);
+        List<Version> versions = new ArrayList<Version>(values.size());
+        for(Versioned<?> value: values) {
+            versions.add(value.getVersion());
+        }
+        return versions;
+    }
+
+    public void put(ByteArray key, Versioned<byte[]> value, byte[] transforms)
+            throws VoldemortException {
+        StoreUtils.assertValidKey(key);
+
+        // Validate the Vector clock
+        VectorClock clock = readVersion();
+        if(clock != null) {
+            if(value.getVersion().compare(clock) == Occurred.BEFORE) {
+                throw new ObsoleteVersionException("A successor version " + clock + "  to this "
+                                                   + value.getVersion()
+                                                   + " exists for the current file : " + this.name);
+            } else if(value.getVersion().compare(clock) == Occurred.CONCURRENTLY) {
+                throw new ObsoleteVersionException("Concurrent Operation not allowed on Metadata.");
+            }
+        }
+
+        // Update the cache copy
+        this.metadataMap.put(new String(key.get()), new String(value.getValue()));
+
+        // Flush the data to the file
+        this.flushData();
+
+        // Persist the new Vector clock
+        writeVersion((VectorClock) value.getVersion());
+    }
+
+    public boolean delete(ByteArray key, Version version) throws VoldemortException {
+        boolean deleteSuccessful = false;
+        StoreUtils.assertValidKey(key);
+        String keyString = new String(key.get());
+        String initialValue = this.metadataMap.get(keyString);
+        if(initialValue != null) {
+            String removedValue = this.metadataMap.remove(keyString);
+            if(removedValue != null) {
+                deleteSuccessful = (initialValue.equals(removedValue));
+            }
+        }
+        if(deleteSuccessful) {
+            this.flushData();
+        }
+        return deleteSuccessful;
+    }
+
+    private static class FileBackedStorageIterator implements
+            ClosableIterator<Pair<ByteArray, Versioned<byte[]>>> {
+
+        private final Iterator<Entry<String, String>> iterator;
+        private final FileBackedCachingStorageEngine storageEngineRef;
+
+        public FileBackedStorageIterator(Map<String, String> metadataMap,
+                                         FileBackedCachingStorageEngine storageEngine) {
+            iterator = metadataMap.entrySet().iterator();
+            storageEngineRef = storageEngine;
+        }
+
+        public boolean hasNext() {
+            return iterator.hasNext();
+        }
+
+        public Pair<ByteArray, Versioned<byte[]>> next() {
+            Entry<String, String> entry = iterator.next();
+            Pair<ByteArray, Versioned<byte[]>> nextValue = null;
+            if(entry != null && entry.getKey() != null && entry.getValue() != null) {
+                ByteArray key = new ByteArray(entry.getKey().getBytes());
+                byte[] resultBytes = entry.getValue().getBytes();
+                Versioned<byte[]> versionedValue = new Versioned<byte[]>(resultBytes,
+                                                                         storageEngineRef.readVersion());
+                nextValue = Pair.create(key, versionedValue);
+            }
+
+            return nextValue;
+        }
+
+        public void remove() {
+            throw new UnsupportedOperationException("No removal y'all.");
+        }
+
+        public void close() {}
+
+    }
+
+}
diff --git a/src/java/voldemort/store/logging/LoggingStore.java b/src/java/voldemort/store/logging/LoggingStore.java
index 753834f8ee..cac0acebb9 100644
--- a/src/java/voldemort/store/logging/LoggingStore.java
+++ b/src/java/voldemort/store/logging/LoggingStore.java
@@ -128,9 +128,8 @@ public void put(K key, Versioned<V> value, T transform) throws VoldemortExceptio
     private void printTimedMessage(String operation, boolean success, long startNs) {
         if(logger.isDebugEnabled()) {
             double elapsedMs = (time.getNanoseconds() - startNs) / (double) Time.NS_PER_MS;
-            logger.debug(instanceName + operation + " " + getName()
-                         + " " + (success ? "successful" : "unsuccessful") + " in "
-                         + elapsedMs + " ms");
+            logger.debug(instanceName + operation + " " + getName() + " "
+                         + (success ? "successful" : "unsuccessful") + " in " + elapsedMs + " ms");
         }
     }
 
diff --git a/src/java/voldemort/store/memory/CacheStorageConfiguration.java b/src/java/voldemort/store/memory/CacheStorageConfiguration.java
index 52e610764a..431c0a849a 100644
--- a/src/java/voldemort/store/memory/CacheStorageConfiguration.java
+++ b/src/java/voldemort/store/memory/CacheStorageConfiguration.java
@@ -19,9 +19,11 @@
 import java.util.List;
 import java.util.concurrent.ConcurrentMap;
 
+import voldemort.VoldemortException;
 import voldemort.server.VoldemortConfig;
 import voldemort.store.StorageConfiguration;
 import voldemort.store.StorageEngine;
+import voldemort.store.StoreDefinition;
 import voldemort.utils.ByteArray;
 import voldemort.versioning.Versioned;
 
@@ -45,14 +47,18 @@ public CacheStorageConfiguration(VoldemortConfig config) {}
 
     public void close() {}
 
-    public StorageEngine<ByteArray, byte[], byte[]> getStore(String name) {
+    public StorageEngine<ByteArray, byte[], byte[]> getStore(StoreDefinition storeDef) {
         ConcurrentMap<ByteArray, List<Versioned<byte[]>>> backingMap = new MapMaker().softValues()
                                                                                      .makeMap();
-        return new InMemoryStorageEngine<ByteArray, byte[], byte[]>(name, backingMap);
+        return new InMemoryStorageEngine<ByteArray, byte[], byte[]>(storeDef.getName(), backingMap);
     }
 
     public String getType() {
         return TYPE_NAME;
     }
 
+    public void update(StoreDefinition storeDef) {
+        throw new VoldemortException("Storage config updates not permitted for " + this.getType()
+                                     + " storage engine");
+    }
 }
diff --git a/src/java/voldemort/store/memory/InMemoryStorageConfiguration.java b/src/java/voldemort/store/memory/InMemoryStorageConfiguration.java
index 7193c89cdf..df40f6ae37 100644
--- a/src/java/voldemort/store/memory/InMemoryStorageConfiguration.java
+++ b/src/java/voldemort/store/memory/InMemoryStorageConfiguration.java
@@ -19,9 +19,11 @@
 import java.util.List;
 import java.util.concurrent.ConcurrentHashMap;
 
+import voldemort.VoldemortException;
 import voldemort.server.VoldemortConfig;
 import voldemort.store.StorageConfiguration;
 import voldemort.store.StorageEngine;
+import voldemort.store.StoreDefinition;
 import voldemort.utils.ByteArray;
 import voldemort.versioning.Versioned;
 
@@ -39,8 +41,8 @@ public InMemoryStorageConfiguration() {}
     @SuppressWarnings("unused")
     public InMemoryStorageConfiguration(VoldemortConfig config) {}
 
-    public StorageEngine<ByteArray, byte[], byte[]> getStore(String name) {
-        return new InMemoryStorageEngine<ByteArray, byte[], byte[]>(name,
+    public StorageEngine<ByteArray, byte[], byte[]> getStore(StoreDefinition storeDef) {
+        return new InMemoryStorageEngine<ByteArray, byte[], byte[]>(storeDef.getName(),
                                                                     new ConcurrentHashMap<ByteArray, List<Versioned<byte[]>>>());
     }
 
@@ -50,4 +52,8 @@ public String getType() {
 
     public void close() {}
 
+    public void update(StoreDefinition storeDef) {
+        throw new VoldemortException("Storage config updates not permitted for "
+                                     + this.getClass().getCanonicalName());
+    }
 }
diff --git a/src/java/voldemort/store/metadata/MetadataStore.java b/src/java/voldemort/store/metadata/MetadataStore.java
index 5a82acd99e..97a31aba64 100644
--- a/src/java/voldemort/store/metadata/MetadataStore.java
+++ b/src/java/voldemort/store/metadata/MetadataStore.java
@@ -48,6 +48,7 @@
 import voldemort.store.StoreDefinition;
 import voldemort.store.StoreUtils;
 import voldemort.store.configuration.ConfigurationStorageEngine;
+import voldemort.store.system.SystemStoreConstants;
 import voldemort.utils.ByteArray;
 import voldemort.utils.ByteUtils;
 import voldemort.utils.ClosableIterator;
@@ -73,6 +74,7 @@ public class MetadataStore implements StorageEngine<ByteArray, byte[], byte[]> {
 
     public static final String CLUSTER_KEY = "cluster.xml";
     public static final String STORES_KEY = "stores.xml";
+    public static final String SYSTEM_STORES_KEY = "system.stores";
     public static final String SERVER_STATE_KEY = "server.state";
     public static final String NODE_ID_KEY = "node.id";
     public static final String REBALANCING_STEAL_INFO = "rebalancing.steal.info.key";
@@ -92,6 +94,7 @@ public class MetadataStore implements StorageEngine<ByteArray, byte[], byte[]> {
 
     // helper keys for metadataCacheOnly
     private static final String ROUTING_STRATEGY_KEY = "routing.strategy";
+    private static final String SYSTEM_ROUTING_STRATEGY_KEY = "system.routing.strategy";
 
     public static enum VoldemortState {
         NORMAL_SERVER,
@@ -174,7 +177,8 @@ public synchronized void put(String key, Versioned<Object> value) {
                 updateRoutingStrategies((Cluster) value.getValue(), getStoreDefList());
             } else if(STORES_KEY.equals(key)) {
                 updateRoutingStrategies(getCluster(), (List<StoreDefinition>) value.getValue());
-            }
+            } else if(SYSTEM_STORES_KEY.equals(key))
+                throw new VoldemortException("Cannot overwrite system store definitions");
 
         } else {
             throw new VoldemortException("Unhandled Key:" + key + " for MetadataStore put()");
@@ -191,8 +195,9 @@ public synchronized void put(String key, Versioned<Object> value) {
     public void put(String key, Object value) {
         if(METADATA_KEYS.contains(key)) {
             VectorClock version = (VectorClock) get(key, null).get(0).getVersion();
-            put(key, new Versioned<Object>(value, version.incremented(getNodeId(),
-                                                                      System.currentTimeMillis())));
+            put(key,
+                new Versioned<Object>(value, version.incremented(getNodeId(),
+                                                                 System.currentTimeMillis())));
         } else {
             throw new VoldemortException("Unhandled Key:" + key + " for MetadataStore put()");
         }
@@ -297,6 +302,11 @@ public List<StoreDefinition> getStoreDefList() {
         return (List<StoreDefinition>) metadataCache.get(STORES_KEY).getValue();
     }
 
+    @SuppressWarnings("unchecked")
+    public List<StoreDefinition> getSystemStoreDefList() {
+        return (List<StoreDefinition>) metadataCache.get(SYSTEM_STORES_KEY).getValue();
+    }
+
     public int getNodeId() {
         return (Integer) (metadataCache.get(NODE_ID_KEY).getValue());
     }
@@ -316,14 +326,29 @@ public VoldemortState getServerState() {
     }
 
     public RebalancerState getRebalancerState() {
-        return (RebalancerState) metadataCache.get(REBALANCING_STEAL_INFO).getValue();
+        readLock.lock();
+        try {
+            return (RebalancerState) metadataCache.get(REBALANCING_STEAL_INFO).getValue();
+        } finally {
+            readLock.unlock();
+        }
     }
 
+    /*
+     * First check in the map of regular stores. If not present, check in the
+     * system stores map.
+     */
     @SuppressWarnings("unchecked")
     public RoutingStrategy getRoutingStrategy(String storeName) {
         Map<String, RoutingStrategy> routingStrategyMap = (Map<String, RoutingStrategy>) metadataCache.get(ROUTING_STRATEGY_KEY)
                                                                                                       .getValue();
-        return routingStrategyMap.get(storeName);
+        RoutingStrategy strategy = routingStrategyMap.get(storeName);
+        if(strategy == null) {
+            Map<String, RoutingStrategy> systemRoutingStrategyMap = (Map<String, RoutingStrategy>) metadataCache.get(SYSTEM_ROUTING_STRATEGY_KEY)
+                                                                                                                .getValue();
+            strategy = systemRoutingStrategyMap.get(storeName);
+        }
+        return strategy;
     }
 
     /**
@@ -362,6 +387,17 @@ private void updateRoutingStrategies(Cluster cluster, List<StoreDefinition> stor
         }
     }
 
+    /*
+     * Initialize the routing strategy map for system stores. This is used
+     * during get / put on system stores.
+     */
+    private void initSystemRoutingStrategies(Cluster cluster) {
+        HashMap<String, RoutingStrategy> routingStrategyMap = createRoutingStrategyMap(cluster,
+                                                                                       getSystemStoreDefList());
+        this.metadataCache.put(SYSTEM_ROUTING_STRATEGY_KEY,
+                               new Versioned<Object>(routingStrategyMap));
+    }
+
     /**
      * Add the steal information to the rebalancer state
      * 
@@ -452,6 +488,10 @@ private void init(int nodeId) {
         initCache(CLUSTER_KEY);
         initCache(STORES_KEY);
 
+        // Initialize system store in the metadata cache
+        initSystemCache();
+        initSystemRoutingStrategies(getCluster());
+
         initCache(NODE_ID_KEY, nodeId);
         if(getNodeId() != nodeId)
             throw new RuntimeException("Attempt to start previous node:"
@@ -473,6 +513,12 @@ private synchronized void initCache(String key) {
         metadataCache.put(key, convertStringToObject(key, getInnerValue(key)));
     }
 
+    // Initialize the metadata cache with system store list
+    private synchronized void initSystemCache() {
+        List<StoreDefinition> value = storeMapper.readStoreList(new StringReader(SystemStoreConstants.SYSTEM_STORE_SCHEMA));
+        metadataCache.put(SYSTEM_STORES_KEY, new Versioned<Object>(value));
+    }
+
     private void initCache(String key, Object defaultValue) {
         try {
             initCache(key);
diff --git a/src/java/voldemort/store/mysql/MysqlStorageConfiguration.java b/src/java/voldemort/store/mysql/MysqlStorageConfiguration.java
index 5d7c58ddf5..ec36dceec0 100644
--- a/src/java/voldemort/store/mysql/MysqlStorageConfiguration.java
+++ b/src/java/voldemort/store/mysql/MysqlStorageConfiguration.java
@@ -24,6 +24,7 @@
 import voldemort.server.VoldemortConfig;
 import voldemort.store.StorageConfiguration;
 import voldemort.store.StorageEngine;
+import voldemort.store.StoreDefinition;
 import voldemort.utils.ByteArray;
 
 public class MysqlStorageConfiguration implements StorageConfiguration {
@@ -49,8 +50,8 @@ public MysqlStorageConfiguration(VoldemortConfig config) {
         this.valueType = config.getMysqlValueType();
     }
 
-    public StorageEngine<ByteArray, byte[], byte[]> getStore(String name) {
-        return new MysqlStorageEngine(name, dataSource, valueType);
+    public StorageEngine<ByteArray, byte[], byte[]> getStore(StoreDefinition storeDef) {
+        return new MysqlStorageEngine(storeDef.getName(), dataSource);
     }
 
     public String getType() {
@@ -65,4 +66,8 @@ public void close() {
         }
     }
 
+    public void update(StoreDefinition storeDef) {
+        throw new VoldemortException("Storage config updates not permitted for "
+                                     + this.getClass().getCanonicalName());
+    }
 }
diff --git a/src/java/voldemort/store/nonblockingstore/ThreadPoolBasedNonblockingStoreImpl.java b/src/java/voldemort/store/nonblockingstore/ThreadPoolBasedNonblockingStoreImpl.java
index 1ce3405a50..1fc45756cd 100644
--- a/src/java/voldemort/store/nonblockingstore/ThreadPoolBasedNonblockingStoreImpl.java
+++ b/src/java/voldemort/store/nonblockingstore/ThreadPoolBasedNonblockingStoreImpl.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010 LinkedIn, Inc
+ * Copyright 2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -143,7 +143,7 @@ public void run() {
                                                                                          + operationName
                                                                                          + ": time out exceeded");
                             try {
-                                callback.requestComplete(ex, diff);
+                                callback.requestComplete(ex, diff / Time.NS_PER_MS);
                             } catch(Exception e) {
                                 if(logger.isEnabledFor(Level.WARN))
                                     logger.warn(e, e);
diff --git a/src/java/voldemort/store/readonly/ReadOnlyStorageConfiguration.java b/src/java/voldemort/store/readonly/ReadOnlyStorageConfiguration.java
index 78d8e16bef..ce6759e3c0 100644
--- a/src/java/voldemort/store/readonly/ReadOnlyStorageConfiguration.java
+++ b/src/java/voldemort/store/readonly/ReadOnlyStorageConfiguration.java
@@ -25,10 +25,12 @@
 import javax.management.MBeanServer;
 import javax.management.ObjectName;
 
+import voldemort.VoldemortException;
 import voldemort.routing.RoutingStrategy;
 import voldemort.server.VoldemortConfig;
 import voldemort.store.StorageConfiguration;
 import voldemort.store.StorageEngine;
+import voldemort.store.StoreDefinition;
 import voldemort.utils.ByteArray;
 import voldemort.utils.JmxUtils;
 import voldemort.utils.ReflectUtils;
@@ -65,16 +67,17 @@ public void setRoutingStrategy(RoutingStrategy routingStrategy) {
         this.routingStrategy = routingStrategy;
     }
 
-    public StorageEngine<ByteArray, byte[], byte[]> getStore(String name) {
-        ReadOnlyStorageEngine store = new ReadOnlyStorageEngine(name,
+    public StorageEngine<ByteArray, byte[], byte[]> getStore(StoreDefinition storeDef) {
+        ReadOnlyStorageEngine store = new ReadOnlyStorageEngine(storeDef.getName(),
                                                                 this.searcher,
                                                                 this.routingStrategy,
                                                                 this.nodeId,
-                                                                new File(storageDir, name),
+                                                                new File(storageDir,
+                                                                         storeDef.getName()),
                                                                 numBackups,
                                                                 deleteBackupMs);
         ObjectName objName = JmxUtils.createObjectName(JmxUtils.getPackageName(store.getClass()),
-                                                       name + nodeId);
+                                                       storeDef.getName() + nodeId);
         JmxUtils.registerMbean(ManagementFactory.getPlatformMBeanServer(),
                                JmxUtils.createModelMBean(store),
                                objName);
@@ -87,4 +90,8 @@ public String getType() {
         return TYPE_NAME;
     }
 
+    public void update(StoreDefinition storeDef) {
+        throw new VoldemortException("Storage config updates not permitted for "
+                                     + this.getClass().getCanonicalName());
+    }
 }
diff --git a/src/java/voldemort/store/readonly/ReadOnlyStorageEngine.java b/src/java/voldemort/store/readonly/ReadOnlyStorageEngine.java
index 40f229c275..71faa3e40a 100644
--- a/src/java/voldemort/store/readonly/ReadOnlyStorageEngine.java
+++ b/src/java/voldemort/store/readonly/ReadOnlyStorageEngine.java
@@ -350,7 +350,7 @@ public void swapFiles(String newStoreDirectory) {
      */
     private void deleteBackups() {
         File[] storeDirList = ReadOnlyUtils.getVersionDirs(storeDir, 0L, currentVersionId);
-        if(storeDirList.length > (numBackups + 1)) {
+        if(storeDirList != null && storeDirList.length > (numBackups + 1)) {
             // delete ALL old directories asynchronously
             File[] extraBackups = ReadOnlyUtils.findKthVersionedDir(storeDirList,
                                                                     0,
@@ -424,7 +424,7 @@ public void rollback(File rollbackToDir) {
                 throw new VoldemortException("Cannot parse version id");
 
             File[] backUpDirs = ReadOnlyUtils.getVersionDirs(storeDir, versionId, Long.MAX_VALUE);
-            if(backUpDirs.length <= 1) {
+            if(backUpDirs == null || backUpDirs.length <= 1) {
                 logger.warn("No rollback performed since there are no back-up directories");
                 return;
             }
diff --git a/src/java/voldemort/store/routed/PipelineData.java b/src/java/voldemort/store/routed/PipelineData.java
index 8f6ea313a0..432c8904e7 100644
--- a/src/java/voldemort/store/routed/PipelineData.java
+++ b/src/java/voldemort/store/routed/PipelineData.java
@@ -73,6 +73,12 @@ public abstract class PipelineData<K, V> {
 
     protected List<Node> replicationSet;
 
+    protected PipelineRoutedStats stats;
+
+    public void setStats(PipelineRoutedStats stats) {
+        this.stats = stats;
+    }
+
     public List<Node> getReplicationSet() {
         return replicationSet;
     }
@@ -117,6 +123,7 @@ public VoldemortException getFatalError() {
     }
 
     public void setFatalError(VoldemortException fatalError) {
+        reportException(fatalError);
         this.fatalError = fatalError;
     }
 
@@ -139,6 +146,7 @@ public List<Exception> getFailures() {
      */
 
     public void recordFailure(Exception e) {
+        reportException(e);
         this.failures.add(e);
     }
 
@@ -157,4 +165,10 @@ public String getStoreName() {
     public void setStoreName(String storeName) {
         this.storeName = storeName;
     }
+
+    public void reportException(Exception e) {
+        if(stats != null) {
+            stats.reportException(e);
+        }
+    }
 }
diff --git a/src/java/voldemort/store/routed/PipelineRoutedStats.java b/src/java/voldemort/store/routed/PipelineRoutedStats.java
new file mode 100644
index 0000000000..77918d28c6
--- /dev/null
+++ b/src/java/voldemort/store/routed/PipelineRoutedStats.java
@@ -0,0 +1,110 @@
+package voldemort.store.routed;
+
+import java.util.Iterator;
+import java.util.Map.Entry;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+
+import voldemort.annotations.jmx.JmxGetter;
+import voldemort.store.InsufficientOperationalNodesException;
+import voldemort.store.InsufficientZoneResponsesException;
+import voldemort.store.InvalidMetadataException;
+import voldemort.store.StoreTimeoutException;
+import voldemort.store.UnreachableStoreException;
+import voldemort.versioning.ObsoleteVersionException;
+
+/**
+ * Tracks all the exceptions we see, down at the routing layer also including
+ * ones that will be eventually propagated up to the client from the routing
+ * layer
+ * 
+ */
+public class PipelineRoutedStats {
+
+    private ConcurrentHashMap<Class<? extends Exception>, AtomicLong> errCountMap;
+    private AtomicLong severeExceptionCount;
+    private AtomicLong benignExceptionCount;
+
+    PipelineRoutedStats() {
+        errCountMap = new ConcurrentHashMap<Class<? extends Exception>, AtomicLong>();
+        errCountMap.put(InvalidMetadataException.class, new AtomicLong(0));
+        errCountMap.put(InsufficientOperationalNodesException.class, new AtomicLong(0));
+        errCountMap.put(InsufficientZoneResponsesException.class, new AtomicLong(0));
+        errCountMap.put(UnreachableStoreException.class, new AtomicLong(0));
+        errCountMap.put(StoreTimeoutException.class, new AtomicLong(0));
+        errCountMap.put(ObsoleteVersionException.class, new AtomicLong(0));
+
+        severeExceptionCount = new AtomicLong(0);
+        benignExceptionCount = new AtomicLong(0);
+    }
+
+    @JmxGetter(name = "numSevereExceptions", description = "Number of exceptions considered serious errors")
+    public long getNumSevereExceptions() {
+        return severeExceptionCount.get();
+    }
+
+    @JmxGetter(name = "numBenignExceptions", description = "Number of exceptions considered benign")
+    public long getNumBenignExceptions() {
+        return benignExceptionCount.get();
+    }
+
+    @JmxGetter(name = "numInsufficientOperationalNodesExceptions", description = "Number of client operations failed due to sufficient nodes not being up")
+    public long getNumInsufficientOperationalNodesExceptions() {
+        return errCountMap.get(InsufficientOperationalNodesException.class).get();
+    }
+
+    @JmxGetter(name = "numInsufficientZoneResponsesExceptions", description = "Number of client operations failed due to sufficient nodes not up across zones")
+    public long getNumInsufficientZoneResponsesExceptions() {
+        return errCountMap.get(InsufficientZoneResponsesException.class).get();
+    }
+
+    @JmxGetter(name = "numInvalidMetadataExceptions", description = "Number of times the metadata was invalid at the client")
+    public long getNumInvalidMetadataExceptions() {
+        return errCountMap.get(InvalidMetadataException.class).get();
+    }
+
+    @JmxGetter(name = "numUnreachableStoreExceptions", description = "Number of requests incomplete since some server could not be reached")
+    public long getNumUnreachableStoreExceptions() {
+        return errCountMap.get(UnreachableStoreException.class).get();
+    }
+
+    @JmxGetter(name = "numStoreTimeoutExceptions", description = "Number of requests timed out since some server was overloaded/unavailable")
+    public long getNumStoreTimeoutExceptions() {
+        return errCountMap.get(StoreTimeoutException.class).get();
+    }
+
+    @JmxGetter(name = "numObsoleteVersionExceptions", description = "Number of requests that got a ObsoleteVersionException as response")
+    public long getNumObsoleteVersionExceptions() {
+        return errCountMap.get(ObsoleteVersionException.class).get();
+    }
+
+    @JmxGetter(name = "getExceptionCountsAsString", description = "Returns counts of all the Exceptions seen so far as a string")
+    public String getExceptionCountsAsString() {
+        StringBuilder result = new StringBuilder();
+        Iterator<Entry<Class<? extends Exception>, AtomicLong>> itr = errCountMap.entrySet()
+                                                                                 .iterator();
+        while(itr.hasNext()) {
+            Entry<Class<? extends Exception>, AtomicLong> pair = itr.next();
+            result.append(pair.getKey().getName() + ":" + pair.getValue().get() + "\n");
+        }
+        return result.toString();
+    }
+
+    public void reportException(Exception e) {
+        if(isSevere(e))
+            severeExceptionCount.incrementAndGet();
+        else
+            benignExceptionCount.incrementAndGet();
+        errCountMap.putIfAbsent(e.getClass(), new AtomicLong(0));
+        errCountMap.get(e.getClass()).incrementAndGet();
+    }
+
+    private boolean isSevere(Exception ve) {
+        if(ve instanceof InsufficientOperationalNodesException
+           || ve instanceof InsufficientZoneResponsesException
+           || ve instanceof InvalidMetadataException)
+            return true;
+        else
+            return false;
+    }
+}
diff --git a/src/java/voldemort/store/routed/PipelineRoutedStore.java b/src/java/voldemort/store/routed/PipelineRoutedStore.java
index 61125e5edb..968ff5a89e 100644
--- a/src/java/voldemort/store/routed/PipelineRoutedStore.java
+++ b/src/java/voldemort/store/routed/PipelineRoutedStore.java
@@ -23,9 +23,11 @@
 import java.util.concurrent.TimeUnit;
 
 import voldemort.VoldemortException;
+import voldemort.client.TimeoutConfig;
 import voldemort.cluster.Cluster;
 import voldemort.cluster.Zone;
 import voldemort.cluster.failuredetector.FailureDetector;
+import voldemort.common.VoldemortOpCode;
 import voldemort.routing.RoutingStrategyType;
 import voldemort.store.Store;
 import voldemort.store.StoreDefinition;
@@ -34,7 +36,12 @@
 import voldemort.store.nonblockingstore.NonblockingStore;
 import voldemort.store.routed.Pipeline.Event;
 import voldemort.store.routed.Pipeline.Operation;
+import voldemort.store.routed.action.AbstractConfigureNodes;
 import voldemort.store.routed.action.ConfigureNodes;
+import voldemort.store.routed.action.ConfigureNodesByZone;
+import voldemort.store.routed.action.ConfigureNodesDefault;
+import voldemort.store.routed.action.ConfigureNodesLocalHost;
+import voldemort.store.routed.action.ConfigureNodesLocalHostByZone;
 import voldemort.store.routed.action.GetAllConfigureNodes;
 import voldemort.store.routed.action.GetAllReadRepair;
 import voldemort.store.routed.action.IncrementClock;
@@ -55,6 +62,7 @@
 import voldemort.store.slop.strategy.HintedHandoffStrategyFactory;
 import voldemort.utils.ByteArray;
 import voldemort.utils.ByteUtils;
+import voldemort.utils.JmxUtils;
 import voldemort.utils.SystemTime;
 import voldemort.versioning.Version;
 import voldemort.versioning.Versioned;
@@ -72,6 +80,16 @@ public class PipelineRoutedStore extends RoutedStore {
     private final HintedHandoffStrategy handoffStrategy;
     private Zone clientZone;
     private boolean zoneRoutingEnabled;
+    private PipelineRoutedStats stats;
+    private boolean jmxEnabled;
+    private int jmxId;
+
+    private enum ConfigureNodesType {
+        DEFAULT,
+        BYZONE,
+        DEFAULT_LOCAL,
+        BYZONE_LOCAL
+    }
 
     /**
      * Create a PipelineRoutedStore
@@ -86,6 +104,8 @@ public class PipelineRoutedStore extends RoutedStore {
      * @param clientZoneId Zone the client is in
      * @param timeoutMs Routing timeout
      * @param failureDetector Failure detector object
+     * @param jmxEnabled is monitoring enabled
+     * @param jmxId unique ID for the factory instance
      */
     public PipelineRoutedStore(String name,
                                Map<Integer, Store<ByteArray, byte[], byte[]>> innerStores,
@@ -96,14 +116,16 @@ public PipelineRoutedStore(String name,
                                StoreDefinition storeDef,
                                boolean repairReads,
                                int clientZoneId,
-                               long timeoutMs,
-                               FailureDetector failureDetector) {
+                               TimeoutConfig timeoutConfig,
+                               FailureDetector failureDetector,
+                               boolean jmxEnabled,
+                               int jmxId) {
         super(name,
               innerStores,
               cluster,
               storeDef,
               repairReads,
-              timeoutMs,
+              timeoutConfig,
               failureDetector,
               SystemTime.INSTANCE);
         this.nonblockingSlopStores = nonblockingSlopStores;
@@ -113,7 +135,6 @@ public PipelineRoutedStore(String name,
         } else {
             zoneRoutingEnabled = false;
         }
-
         this.nonblockingStores = new ConcurrentHashMap<Integer, NonblockingStore>(nonblockingStores);
         this.slopStores = slopStores;
         if(storeDef.hasHintedHandoffStrategyType()) {
@@ -123,18 +144,95 @@ public PipelineRoutedStore(String name,
         } else {
             this.handoffStrategy = null;
         }
+
+        this.jmxEnabled = jmxEnabled;
+        this.jmxId = jmxId;
+        if(this.jmxEnabled) {
+            stats = new PipelineRoutedStats();
+            JmxUtils.registerMbean(stats,
+                                   JmxUtils.createObjectName(JmxUtils.getPackageName(stats.getClass()),
+                                                             getName()
+                                                                     + JmxUtils.getJmxId(this.jmxId)));
+        }
+    }
+
+    private ConfigureNodesType obtainNodeConfigurationType(Integer zonesRequired) {
+
+        if(zonesRequired != null) {
+            if(routingStrategy.getType().equals(RoutingStrategyType.TO_ALL_LOCAL_PREF_STRATEGY)) {
+                return ConfigureNodesType.BYZONE_LOCAL;
+            } else {
+                return ConfigureNodesType.BYZONE;
+            }
+        } else {
+            if(routingStrategy.getType().equals(RoutingStrategyType.TO_ALL_LOCAL_PREF_STRATEGY)) {
+                return ConfigureNodesType.DEFAULT_LOCAL;
+            }
+        }
+
+        return ConfigureNodesType.DEFAULT;
+    }
+
+    private AbstractConfigureNodes<ByteArray, List<Versioned<byte[]>>, BasicPipelineData<List<Versioned<byte[]>>>> makeNodeConfigurationForGet(BasicPipelineData<List<Versioned<byte[]>>> pipelineData,
+                                                                                                                                               ByteArray key) {
+        switch(obtainNodeConfigurationType(pipelineData.getZonesRequired())) {
+            case DEFAULT:
+                return new ConfigureNodesDefault<List<Versioned<byte[]>>, BasicPipelineData<List<Versioned<byte[]>>>>(pipelineData,
+                                                                                                                      Event.CONFIGURED,
+                                                                                                                      failureDetector,
+                                                                                                                      storeDef.getRequiredReads(),
+                                                                                                                      routingStrategy,
+                                                                                                                      key);
+            case BYZONE:
+                return new ConfigureNodesByZone<List<Versioned<byte[]>>, BasicPipelineData<List<Versioned<byte[]>>>>(pipelineData,
+                                                                                                                     Event.CONFIGURED,
+                                                                                                                     failureDetector,
+                                                                                                                     storeDef.getRequiredReads(),
+                                                                                                                     routingStrategy,
+                                                                                                                     key,
+                                                                                                                     clientZone);
+            case DEFAULT_LOCAL:
+                return new ConfigureNodesLocalHost<List<Versioned<byte[]>>, BasicPipelineData<List<Versioned<byte[]>>>>(pipelineData,
+                                                                                                                        Event.CONFIGURED,
+                                                                                                                        failureDetector,
+                                                                                                                        storeDef.getRequiredReads(),
+                                                                                                                        routingStrategy,
+                                                                                                                        key);
+            case BYZONE_LOCAL:
+                return new ConfigureNodesLocalHostByZone<List<Versioned<byte[]>>, BasicPipelineData<List<Versioned<byte[]>>>>(pipelineData,
+                                                                                                                              Event.CONFIGURED,
+                                                                                                                              failureDetector,
+                                                                                                                              storeDef.getRequiredReads(),
+                                                                                                                              routingStrategy,
+                                                                                                                              key,
+                                                                                                                              clientZone);
+            default:
+                return null;
+        }
+
     }
 
     public List<Versioned<byte[]>> get(final ByteArray key, final byte[] transforms) {
         StoreUtils.assertValidKey(key);
 
+        long startTimeMs = -1;
+        long startTimeNs = -1;
+
+        if(logger.isDebugEnabled()) {
+            startTimeMs = System.currentTimeMillis();
+            startTimeNs = System.nanoTime();
+        }
+
         BasicPipelineData<List<Versioned<byte[]>>> pipelineData = new BasicPipelineData<List<Versioned<byte[]>>>();
         if(zoneRoutingEnabled)
             pipelineData.setZonesRequired(storeDef.getZoneCountReads());
         else
             pipelineData.setZonesRequired(null);
+        pipelineData.setStats(stats);
 
-        final Pipeline pipeline = new Pipeline(Operation.GET, timeoutMs, TimeUnit.MILLISECONDS);
+        final Pipeline pipeline = new Pipeline(Operation.GET,
+                                               timeoutConfig.getOperationTimeout(VoldemortOpCode.GET_OP_CODE),
+                                               TimeUnit.MILLISECONDS);
         boolean allowReadRepair = repairReads && transforms == null;
 
         StoreRequest<List<Versioned<byte[]>>> blockingStoreRequest = new StoreRequest<List<Versioned<byte[]>>>() {
@@ -145,14 +243,13 @@ public List<Versioned<byte[]>> request(Store<ByteArray, byte[], byte[]> store) {
 
         };
 
-        pipeline.addEventAction(Event.STARTED,
-                                new ConfigureNodes<List<Versioned<byte[]>>, BasicPipelineData<List<Versioned<byte[]>>>>(pipelineData,
-                                                                                                                        Event.CONFIGURED,
-                                                                                                                        failureDetector,
-                                                                                                                        storeDef.getRequiredReads(),
-                                                                                                                        routingStrategy,
-                                                                                                                        key,
-                                                                                                                        clientZone));
+        // Get the correct type of configure nodes action depending on the store
+        // requirements
+        AbstractConfigureNodes<ByteArray, List<Versioned<byte[]>>, BasicPipelineData<List<Versioned<byte[]>>>> configureNodes = makeNodeConfigurationForGet(pipelineData,
+                                                                                                                                                            key);
+
+        pipeline.addEventAction(Event.STARTED, configureNodes);
+
         pipeline.addEventAction(Event.CONFIGURED,
                                 new PerformParallelRequests<List<Versioned<byte[]>>, BasicPipelineData<List<Versioned<byte[]>>>>(pipelineData,
                                                                                                                                  allowReadRepair ? Event.RESPONSES_RECEIVED
@@ -162,7 +259,7 @@ public List<Versioned<byte[]>> request(Store<ByteArray, byte[], byte[]> store) {
                                                                                                                                  failureDetector,
                                                                                                                                  storeDef.getPreferredReads(),
                                                                                                                                  storeDef.getRequiredReads(),
-                                                                                                                                 timeoutMs,
+                                                                                                                                 timeoutConfig.getOperationTimeout(VoldemortOpCode.GET_OP_CODE),
                                                                                                                                  nonblockingStores,
                                                                                                                                  Event.INSUFFICIENT_SUCCESSES,
                                                                                                                                  Event.INSUFFICIENT_ZONES));
@@ -183,7 +280,7 @@ public List<Versioned<byte[]>> request(Store<ByteArray, byte[], byte[]> store) {
                                     new ReadRepair<BasicPipelineData<List<Versioned<byte[]>>>>(pipelineData,
                                                                                                Event.COMPLETED,
                                                                                                storeDef.getPreferredReads(),
-                                                                                               timeoutMs,
+                                                                                               timeoutConfig.getOperationTimeout(VoldemortOpCode.GET_OP_CODE),
                                                                                                nonblockingStores,
                                                                                                readRepairer));
 
@@ -204,7 +301,12 @@ public List<Versioned<byte[]>> request(Store<ByteArray, byte[], byte[]> store) {
                          + ByteUtils.toHexString(key.get()));
         }
 
-        pipeline.execute();
+        try {
+            pipeline.execute();
+        } catch(VoldemortException e) {
+            stats.reportException(e);
+            throw e;
+        }
 
         if(pipelineData.getFatalError() != null)
             throw pipelineData.getFatalError();
@@ -218,14 +320,43 @@ public List<Versioned<byte[]>> request(Store<ByteArray, byte[], byte[]> store) {
                 results.addAll(value);
         }
 
+        if(logger.isDebugEnabled()) {
+            logger.debug("Finished " + pipeline.getOperation().getSimpleName() + " for key "
+                         + ByteUtils.toHexString(key.get()) + " keyRef: "
+                         + System.identityHashCode(key) + "; started at " + startTimeMs + " took "
+                         + (System.nanoTime() - startTimeNs) + " values: "
+                         + formatNodeValuesFromGet(pipelineData.getResponses()));
+        }
+
         return results;
     }
 
+    private String formatNodeValuesFromGet(List<Response<ByteArray, List<Versioned<byte[]>>>> results) {
+        // log all retrieved values
+        StringBuilder builder = new StringBuilder();
+        builder.append("{");
+        for(Response<ByteArray, List<Versioned<byte[]>>> r: results) {
+            builder.append("(nodeId=" + r.getNode().getId() + ", key=" + r.getKey()
+                           + ", retrieved= " + r.getValue() + "), ");
+        }
+        builder.append("}");
+
+        return builder.toString();
+    }
+
     public Map<ByteArray, List<Versioned<byte[]>>> getAll(Iterable<ByteArray> keys,
                                                           Map<ByteArray, byte[]> transforms)
             throws VoldemortException {
         StoreUtils.assertValidKeys(keys);
 
+        long startTimeMs = -1;
+        long startTimeNs = -1;
+
+        if(logger.isDebugEnabled()) {
+            startTimeMs = System.currentTimeMillis();
+            startTimeNs = System.nanoTime();
+        }
+
         boolean allowReadRepair = repairReads && (transforms == null || transforms.size() == 0);
 
         GetAllPipelineData pipelineData = new GetAllPipelineData();
@@ -233,8 +364,11 @@ public Map<ByteArray, List<Versioned<byte[]>>> getAll(Iterable<ByteArray> keys,
             pipelineData.setZonesRequired(storeDef.getZoneCountReads());
         else
             pipelineData.setZonesRequired(null);
-        Pipeline pipeline = new Pipeline(Operation.GET_ALL, timeoutMs, TimeUnit.MILLISECONDS);
+        pipelineData.setStats(stats);
 
+        Pipeline pipeline = new Pipeline(Operation.GET_ALL,
+                                         timeoutConfig.getOperationTimeout(VoldemortOpCode.GET_ALL_OP_CODE),
+                                         TimeUnit.MILLISECONDS);
         pipeline.addEventAction(Event.STARTED,
                                 new GetAllConfigureNodes(pipelineData,
                                                          Event.CONFIGURED,
@@ -249,7 +383,7 @@ public Map<ByteArray, List<Versioned<byte[]>>> getAll(Iterable<ByteArray> keys,
                                 new PerformParallelGetAllRequests(pipelineData,
                                                                   Event.INSUFFICIENT_SUCCESSES,
                                                                   failureDetector,
-                                                                  timeoutMs,
+                                                                  timeoutConfig.getOperationTimeout(VoldemortOpCode.GET_ALL_OP_CODE),
                                                                   nonblockingStores));
         pipeline.addEventAction(Event.INSUFFICIENT_SUCCESSES,
                                 new PerformSerialGetAllRequests(pipelineData,
@@ -259,14 +393,15 @@ public Map<ByteArray, List<Versioned<byte[]>>> getAll(Iterable<ByteArray> keys,
                                                                 failureDetector,
                                                                 innerStores,
                                                                 storeDef.getPreferredReads(),
-                                                                storeDef.getRequiredReads()));
+                                                                storeDef.getRequiredReads(),
+                                                                timeoutConfig.isPartialGetAllAllowed()));
 
         if(allowReadRepair)
             pipeline.addEventAction(Event.RESPONSES_RECEIVED,
                                     new GetAllReadRepair(pipelineData,
                                                          Event.COMPLETED,
                                                          storeDef.getPreferredReads(),
-                                                         timeoutMs,
+                                                         timeoutConfig.getOperationTimeout(VoldemortOpCode.GET_ALL_OP_CODE),
                                                          nonblockingStores,
                                                          readRepairer));
 
@@ -280,23 +415,62 @@ public Map<ByteArray, List<Versioned<byte[]>>> getAll(Iterable<ByteArray> keys,
             logger.debug("Operation " + pipeline.getOperation().getSimpleName() + " Keys "
                          + keyStr.toString());
         }
-        pipeline.execute();
+        try {
+            pipeline.execute();
+        } catch(VoldemortException e) {
+            stats.reportException(e);
+            throw e;
+        }
 
         if(pipelineData.getFatalError() != null)
             throw pipelineData.getFatalError();
 
+        if(logger.isDebugEnabled()) {
+            logger.debug("Finished " + pipeline.getOperation().getSimpleName() + "for keys "
+                         + ByteArray.toHexStrings(keys) + " keyRef: "
+                         + System.identityHashCode(keys) + "; started at " + startTimeMs + " took "
+                         + (System.nanoTime() - startTimeNs) + " values: "
+                         + formatNodeValuesFromGetAll(pipelineData.getResponses()));
+        }
+
         return pipelineData.getResult();
     }
 
+    private String formatNodeValuesFromGetAll(List<Response<Iterable<ByteArray>, Map<ByteArray, List<Versioned<byte[]>>>>> list) {
+        // log all retrieved values
+        StringBuilder builder = new StringBuilder();
+        builder.append("{");
+        for(Response<Iterable<ByteArray>, Map<ByteArray, List<Versioned<byte[]>>>> r: list) {
+            builder.append("(nodeId=" + r.getNode().getId() + ", keys="
+                           + ByteArray.toHexStrings(r.getKey()) + ", retrieved= " + r.getValue()
+                           + ")");
+            builder.append(", ");
+        }
+        builder.append("}");
+
+        return builder.toString();
+    }
+
     public List<Version> getVersions(final ByteArray key) {
         StoreUtils.assertValidKey(key);
 
+        long startTimeMs = -1;
+        long startTimeNs = -1;
+
+        if(logger.isDebugEnabled()) {
+            startTimeMs = System.currentTimeMillis();
+            startTimeNs = System.nanoTime();
+        }
+
         BasicPipelineData<List<Version>> pipelineData = new BasicPipelineData<List<Version>>();
         if(zoneRoutingEnabled)
             pipelineData.setZonesRequired(storeDef.getZoneCountReads());
         else
             pipelineData.setZonesRequired(null);
-        Pipeline pipeline = new Pipeline(Operation.GET_VERSIONS, timeoutMs, TimeUnit.MILLISECONDS);
+        pipelineData.setStats(stats);
+        Pipeline pipeline = new Pipeline(Operation.GET_VERSIONS,
+                                         timeoutConfig.getOperationTimeout(VoldemortOpCode.GET_VERSION_OP_CODE),
+                                         TimeUnit.MILLISECONDS);
 
         StoreRequest<List<Version>> blockingStoreRequest = new StoreRequest<List<Version>>() {
 
@@ -322,7 +496,7 @@ public List<Version> request(Store<ByteArray, byte[], byte[]> store) {
                                                                                                              failureDetector,
                                                                                                              storeDef.getPreferredReads(),
                                                                                                              storeDef.getRequiredReads(),
-                                                                                                             timeoutMs,
+                                                                                                             timeoutConfig.getOperationTimeout(VoldemortOpCode.GET_VERSION_OP_CODE),
                                                                                                              nonblockingStores,
                                                                                                              Event.INSUFFICIENT_SUCCESSES,
                                                                                                              Event.INSUFFICIENT_ZONES));
@@ -349,10 +523,15 @@ public List<Version> request(Store<ByteArray, byte[], byte[]> store) {
 
         pipeline.addEvent(Event.STARTED);
         if(logger.isDebugEnabled()) {
-            logger.debug("Operation  " + pipeline.getOperation().getSimpleName() + "Key "
+            logger.debug("Operation  " + pipeline.getOperation().getSimpleName() + " Key "
                          + ByteUtils.toHexString(key.get()));
         }
-        pipeline.execute();
+        try {
+            pipeline.execute();
+        } catch(VoldemortException e) {
+            stats.reportException(e);
+            throw e;
+        }
 
         if(pipelineData.getFatalError() != null)
             throw pipelineData.getFatalError();
@@ -362,19 +541,53 @@ public List<Version> request(Store<ByteArray, byte[], byte[]> store) {
         for(Response<ByteArray, List<Version>> response: pipelineData.getResponses())
             results.addAll(response.getValue());
 
+        if(logger.isDebugEnabled()) {
+            logger.debug("Finished " + pipeline.getOperation().getSimpleName() + " for key "
+                         + ByteUtils.toHexString(key.get()) + " keyRef: "
+                         + System.identityHashCode(key) + "; started at " + startTimeMs + " took "
+                         + (System.nanoTime() - startTimeNs) + " values: "
+                         + formatNodeValuesFromGetVersions(pipelineData.getResponses()));
+        }
+
         return results;
     }
 
+    private <R> String formatNodeValuesFromGetVersions(List<Response<ByteArray, List<Version>>> results) {
+        // log all retrieved values
+        StringBuilder builder = new StringBuilder();
+        builder.append("{");
+        for(Response<ByteArray, List<Version>> r: results) {
+            builder.append("(nodeId=" + r.getNode().getId() + ", key="
+                           + ByteUtils.toHexString(r.getKey().get()) + ", retrieved= "
+                           + r.getValue() + "), ");
+        }
+        builder.append("}");
+
+        return builder.toString();
+    }
+
     public boolean delete(final ByteArray key, final Version version) throws VoldemortException {
         StoreUtils.assertValidKey(key);
 
+        long startTimeMs = -1;
+        long startTimeNs = -1;
+
+        if(logger.isDebugEnabled()) {
+            startTimeMs = System.currentTimeMillis();
+            startTimeNs = System.nanoTime();
+        }
+
         BasicPipelineData<Boolean> pipelineData = new BasicPipelineData<Boolean>();
         if(zoneRoutingEnabled)
             pipelineData.setZonesRequired(storeDef.getZoneCountWrites());
         else
             pipelineData.setZonesRequired(null);
         pipelineData.setStoreName(name);
-        Pipeline pipeline = new Pipeline(Operation.DELETE, timeoutMs, TimeUnit.MILLISECONDS);
+        pipelineData.setStats(stats);
+
+        Pipeline pipeline = new Pipeline(Operation.DELETE,
+                                         timeoutConfig.getOperationTimeout(VoldemortOpCode.DELETE_OP_CODE),
+                                         TimeUnit.MILLISECONDS);
         pipeline.setEnableHintedHandoff(isHintedHandoffEnabled());
 
         HintedHandoff hintedHandoff = null;
@@ -385,7 +598,7 @@ public boolean delete(final ByteArray key, final Version version) throws Voldemo
                                               nonblockingSlopStores,
                                               handoffStrategy,
                                               pipelineData.getFailedNodes(),
-                                              timeoutMs);
+                                              timeoutConfig.getOperationTimeout(VoldemortOpCode.DELETE_OP_CODE));
 
         pipeline.addEventAction(Event.STARTED,
                                 new ConfigureNodes<Boolean, BasicPipelineData<Boolean>>(pipelineData,
@@ -403,7 +616,7 @@ public boolean delete(final ByteArray key, final Version version) throws Voldemo
                                                                                                        failureDetector,
                                                                                                        storeDef.getPreferredWrites(),
                                                                                                        storeDef.getRequiredWrites(),
-                                                                                                       timeoutMs,
+                                                                                                       timeoutConfig.getOperationTimeout(VoldemortOpCode.DELETE_OP_CODE),
                                                                                                        nonblockingStores,
                                                                                                        hintedHandoff,
                                                                                                        version));
@@ -428,7 +641,19 @@ public boolean delete(final ByteArray key, final Version version) throws Voldemo
             logger.debug("Operation " + pipeline.getOperation().getSimpleName() + " Key "
                          + ByteUtils.toHexString(key.get()));
         }
-        pipeline.execute();
+        try {
+            pipeline.execute();
+        } catch(VoldemortException e) {
+            stats.reportException(e);
+            throw e;
+        }
+
+        if(logger.isDebugEnabled()) {
+            logger.debug("Finished " + pipeline.getOperation().getSimpleName() + " for key "
+                         + ByteUtils.toHexString(key.get()) + " keyRef: "
+                         + System.identityHashCode(key) + "; started at " + startTimeMs + " took "
+                         + (System.nanoTime() - startTimeNs));
+        }
 
         if(pipelineData.getFatalError() != null)
             throw pipelineData.getFatalError();
@@ -445,8 +670,56 @@ public boolean isHintedHandoffEnabled() {
         return slopStores != null;
     }
 
+    private AbstractConfigureNodes<ByteArray, Void, PutPipelineData> makeNodeConfigurationForPut(PutPipelineData pipelineData,
+                                                                                                 ByteArray key) {
+        switch(obtainNodeConfigurationType(pipelineData.getZonesRequired())) {
+            case DEFAULT:
+                return new ConfigureNodesDefault<Void, PutPipelineData>(pipelineData,
+                                                                        Event.CONFIGURED,
+                                                                        failureDetector,
+                                                                        storeDef.getRequiredWrites(),
+                                                                        routingStrategy,
+                                                                        key);
+            case BYZONE:
+                return new ConfigureNodesByZone<Void, PutPipelineData>(pipelineData,
+                                                                       Event.CONFIGURED,
+                                                                       failureDetector,
+                                                                       storeDef.getRequiredWrites(),
+                                                                       routingStrategy,
+                                                                       key,
+                                                                       clientZone);
+            case DEFAULT_LOCAL:
+                return new ConfigureNodesLocalHost<Void, PutPipelineData>(pipelineData,
+                                                                          Event.CONFIGURED,
+                                                                          failureDetector,
+                                                                          storeDef.getRequiredWrites(),
+                                                                          routingStrategy,
+                                                                          key);
+            case BYZONE_LOCAL:
+                return new ConfigureNodesLocalHostByZone<Void, PutPipelineData>(pipelineData,
+                                                                                Event.CONFIGURED,
+                                                                                failureDetector,
+                                                                                storeDef.getRequiredWrites(),
+                                                                                routingStrategy,
+                                                                                key,
+                                                                                clientZone);
+            default:
+                return null;
+        }
+
+    }
+
     public void put(ByteArray key, Versioned<byte[]> versioned, byte[] transforms)
             throws VoldemortException {
+
+        long startTimeMs = -1;
+        long startTimeNs = -1;
+
+        if(logger.isDebugEnabled()) {
+            startTimeMs = System.currentTimeMillis();
+            startTimeNs = System.nanoTime();
+        }
+
         StoreUtils.assertValidKey(key);
         PutPipelineData pipelineData = new PutPipelineData();
         if(zoneRoutingEnabled)
@@ -455,28 +728,30 @@ public void put(ByteArray key, Versioned<byte[]> versioned, byte[] transforms)
             pipelineData.setZonesRequired(null);
         pipelineData.setStartTimeNs(System.nanoTime());
         pipelineData.setStoreName(name);
+        pipelineData.setStats(stats);
 
-        Pipeline pipeline = new Pipeline(Operation.PUT, timeoutMs, TimeUnit.MILLISECONDS);
+        Pipeline pipeline = new Pipeline(Operation.PUT,
+                                         timeoutConfig.getOperationTimeout(VoldemortOpCode.PUT_OP_CODE),
+                                         TimeUnit.MILLISECONDS);
         pipeline.setEnableHintedHandoff(isHintedHandoffEnabled());
 
         HintedHandoff hintedHandoff = null;
 
+        // Get the correct type of configure nodes action depending on the store
+        // requirements
+        AbstractConfigureNodes<ByteArray, Void, PutPipelineData> configureNodes = makeNodeConfigurationForPut(pipelineData,
+                                                                                                              key);
+
         if(isHintedHandoffEnabled())
             hintedHandoff = new HintedHandoff(failureDetector,
                                               slopStores,
                                               nonblockingSlopStores,
                                               handoffStrategy,
                                               pipelineData.getFailedNodes(),
-                                              timeoutMs);
+                                              timeoutConfig.getOperationTimeout(VoldemortOpCode.PUT_OP_CODE));
+
+        pipeline.addEventAction(Event.STARTED, configureNodes);
 
-        pipeline.addEventAction(Event.STARTED,
-                                new ConfigureNodes<Void, PutPipelineData>(pipelineData,
-                                                                          Event.CONFIGURED,
-                                                                          failureDetector,
-                                                                          storeDef.getRequiredWrites(),
-                                                                          routingStrategy,
-                                                                          key,
-                                                                          clientZone));
         pipeline.addEventAction(Event.CONFIGURED,
                                 new PerformSerialPutRequests(pipelineData,
                                                              isHintedHandoffEnabled() ? Event.RESPONSES_RECEIVED
@@ -497,7 +772,7 @@ public void put(ByteArray key, Versioned<byte[]> versioned, byte[] transforms)
                                                                failureDetector,
                                                                storeDef.getPreferredWrites(),
                                                                storeDef.getRequiredWrites(),
-                                                               timeoutMs,
+                                                               timeoutConfig.getOperationTimeout(VoldemortOpCode.PUT_OP_CODE),
                                                                nonblockingStores,
                                                                hintedHandoff));
         if(isHintedHandoffEnabled()) {
@@ -531,7 +806,20 @@ public void put(ByteArray key, Versioned<byte[]> versioned, byte[] transforms)
             logger.debug("Operation " + pipeline.getOperation().getSimpleName() + " Key "
                          + ByteUtils.toHexString(key.get()));
         }
-        pipeline.execute();
+        try {
+            pipeline.execute();
+        } catch(VoldemortException e) {
+            stats.reportException(e);
+            throw e;
+        }
+
+        if(logger.isDebugEnabled()) {
+            logger.debug("Finished " + pipeline.getOperation().getSimpleName() + " for key "
+                         + ByteUtils.toHexString(key.get()) + " keyRef: "
+                         + System.identityHashCode(key) + "; started at " + startTimeMs + " took "
+                         + (System.nanoTime() - startTimeNs) + " value: " + versioned.getValue()
+                         + " (size: " + versioned.getValue().length + ")");
+        }
 
         if(pipelineData.getFatalError() != null)
             throw pipelineData.getFatalError();
@@ -549,10 +837,14 @@ public void close() {
             }
         }
 
+        if(this.jmxEnabled) {
+            JmxUtils.unregisterMbean(JmxUtils.createObjectName(JmxUtils.getPackageName(stats.getClass()),
+                                                               getName() + JmxUtils.getJmxId(jmxId)));
+        }
+
         if(exception != null)
             throw exception;
 
         super.close();
     }
-
 }
diff --git a/src/java/voldemort/store/routed/RoutedStore.java b/src/java/voldemort/store/routed/RoutedStore.java
index 613f195bf8..a634b16447 100644
--- a/src/java/voldemort/store/routed/RoutedStore.java
+++ b/src/java/voldemort/store/routed/RoutedStore.java
@@ -22,6 +22,7 @@
 import org.apache.log4j.Logger;
 
 import voldemort.VoldemortException;
+import voldemort.client.TimeoutConfig;
 import voldemort.cluster.Cluster;
 import voldemort.cluster.failuredetector.FailureDetector;
 import voldemort.routing.RoutingStrategy;
@@ -45,7 +46,7 @@ public abstract class RoutedStore implements Store<ByteArray, byte[], byte[]> {
     protected final Map<Integer, Store<ByteArray, byte[], byte[]>> innerStores;
     protected final boolean repairReads;
     protected final ReadRepairer<ByteArray, byte[]> readRepairer;
-    protected final long timeoutMs;
+    protected final TimeoutConfig timeoutConfig;
     protected final Time time;
     protected final StoreDefinition storeDef;
     protected final FailureDetector failureDetector;
@@ -57,7 +58,7 @@ protected RoutedStore(String name,
                           Cluster cluster,
                           StoreDefinition storeDef,
                           boolean repairReads,
-                          long timeoutMs,
+                          TimeoutConfig timeoutConfig,
                           FailureDetector failureDetector,
                           Time time) {
         if(storeDef.getRequiredReads() < 1)
@@ -77,7 +78,7 @@ protected RoutedStore(String name,
         this.innerStores = new ConcurrentHashMap<Integer, Store<ByteArray, byte[], byte[]>>(innerStores);
         this.repairReads = repairReads;
         this.readRepairer = new ReadRepairer<ByteArray, byte[]>();
-        this.timeoutMs = timeoutMs;
+        this.timeoutConfig = timeoutConfig;
         this.time = Utils.notNull(time);
         this.storeDef = storeDef;
         this.failureDetector = failureDetector;
diff --git a/src/java/voldemort/store/routed/RoutedStoreFactory.java b/src/java/voldemort/store/routed/RoutedStoreFactory.java
index 52093be256..66486cc21d 100644
--- a/src/java/voldemort/store/routed/RoutedStoreFactory.java
+++ b/src/java/voldemort/store/routed/RoutedStoreFactory.java
@@ -7,6 +7,7 @@
 import org.apache.log4j.Logger;
 
 import voldemort.VoldemortException;
+import voldemort.client.TimeoutConfig;
 import voldemort.cluster.Cluster;
 import voldemort.cluster.Zone;
 import voldemort.cluster.failuredetector.FailureDetector;
@@ -27,16 +28,16 @@ public class RoutedStoreFactory {
 
     private final ExecutorService threadPool;
 
-    private final long routingTimeoutMs;
+    private final TimeoutConfig timeoutConfig;
 
     private final Logger logger = Logger.getLogger(getClass());
 
     public RoutedStoreFactory(boolean isPipelineRoutedStoreEnabled,
                               ExecutorService threadPool,
-                              long routingTimeoutMs) {
+                              TimeoutConfig timeoutConfig) {
         this.isPipelineRoutedStoreEnabled = isPipelineRoutedStoreEnabled;
         this.threadPool = threadPool;
-        this.routingTimeoutMs = routingTimeoutMs;
+        this.timeoutConfig = timeoutConfig;
     }
 
     public NonblockingStore toNonblockingStore(Store<ByteArray, byte[], byte[]> store) {
@@ -58,6 +59,30 @@ public RoutedStore create(Cluster cluster,
                               boolean repairReads,
                               int clientZoneId,
                               FailureDetector failureDetector) {
+        return create(cluster,
+                      storeDefinition,
+                      nodeStores,
+                      nonblockingStores,
+                      slopStores,
+                      nonblockingSlopStores,
+                      repairReads,
+                      clientZoneId,
+                      failureDetector,
+                      false,
+                      0);
+    }
+
+    public RoutedStore create(Cluster cluster,
+                              StoreDefinition storeDefinition,
+                              Map<Integer, Store<ByteArray, byte[], byte[]>> nodeStores,
+                              Map<Integer, NonblockingStore> nonblockingStores,
+                              Map<Integer, Store<ByteArray, Slop, byte[]>> slopStores,
+                              Map<Integer, NonblockingStore> nonblockingSlopStores,
+                              boolean repairReads,
+                              int clientZoneId,
+                              FailureDetector failureDetector,
+                              boolean jmxEnabled,
+                              int jmxId) {
         if(isPipelineRoutedStoreEnabled) {
             return new PipelineRoutedStore(storeDefinition.getName(),
                                            nodeStores,
@@ -68,8 +93,10 @@ public RoutedStore create(Cluster cluster,
                                            storeDefinition,
                                            repairReads,
                                            clientZoneId,
-                                           routingTimeoutMs,
-                                           failureDetector);
+                                           timeoutConfig,
+                                           failureDetector,
+                                           jmxEnabled,
+                                           jmxId);
         } else {
             if(storeDefinition.getRoutingStrategyType()
                               .compareTo(RoutingStrategyType.ZONE_STRATEGY) == 0) {
@@ -88,7 +115,7 @@ public RoutedStore create(Cluster cluster,
                                              storeDefinition,
                                              repairReads,
                                              threadPool,
-                                             routingTimeoutMs,
+                                             timeoutConfig,
                                              failureDetector,
                                              SystemTime.INSTANCE);
         }
@@ -104,7 +131,6 @@ public RoutedStore create(Cluster cluster,
         for(Map.Entry<Integer, Store<ByteArray, byte[], byte[]>> entry: nodeStores.entrySet())
             nonblockingStores.put(entry.getKey(), toNonblockingStore(entry.getValue()));
 
-
         return create(cluster,
                       storeDefinition,
                       nodeStores,
diff --git a/src/java/voldemort/store/routed/ThreadPoolRoutedStore.java b/src/java/voldemort/store/routed/ThreadPoolRoutedStore.java
index 9fec120cd2..947b44f42b 100644
--- a/src/java/voldemort/store/routed/ThreadPoolRoutedStore.java
+++ b/src/java/voldemort/store/routed/ThreadPoolRoutedStore.java
@@ -36,9 +36,11 @@
 
 import voldemort.VoldemortApplicationException;
 import voldemort.VoldemortException;
+import voldemort.client.TimeoutConfig;
 import voldemort.cluster.Cluster;
 import voldemort.cluster.Node;
 import voldemort.cluster.failuredetector.FailureDetector;
+import voldemort.common.VoldemortOpCode;
 import voldemort.store.InsufficientOperationalNodesException;
 import voldemort.store.Store;
 import voldemort.store.StoreDefinition;
@@ -102,7 +104,7 @@ public ThreadPoolRoutedStore(String name,
                                  StoreDefinition storeDef,
                                  int numberOfThreads,
                                  boolean repairReads,
-                                 long timeoutMs,
+                                 TimeoutConfig timeoutConfig,
                                  FailureDetector failureDetector) {
         this(name,
              innerStores,
@@ -110,7 +112,7 @@ public ThreadPoolRoutedStore(String name,
              storeDef,
              repairReads,
              Executors.newFixedThreadPool(numberOfThreads),
-             timeoutMs,
+             timeoutConfig,
              failureDetector,
              SystemTime.INSTANCE);
     }
@@ -134,10 +136,17 @@ public ThreadPoolRoutedStore(String name,
                                  StoreDefinition storeDef,
                                  boolean repairReads,
                                  ExecutorService threadPool,
-                                 long timeoutMs,
+                                 TimeoutConfig timeoutConfig,
                                  FailureDetector failureDetector,
                                  Time time) {
-        super(name, innerStores, cluster, storeDef, repairReads, timeoutMs, failureDetector, time);
+        super(name,
+              innerStores,
+              cluster,
+              storeDef,
+              repairReads,
+              timeoutConfig,
+              failureDetector,
+              time);
         this.executor = threadPool;
     }
 
@@ -184,7 +193,8 @@ public void run() {
                     } catch(Exception e) {
                         failures.add(e);
                         logger.warn("Error in DELETE on node " + node.getId() + "("
-                                    + node.getHost() + ")", e);
+                                            + node.getHost() + ")",
+                                    e);
                     } finally {
                         // signal that the operation is complete
                         semaphore.release();
@@ -199,6 +209,7 @@ public void run() {
         } else {
             for(int i = 0; i < numNodes; i++) {
                 try {
+                    long timeoutMs = timeoutConfig.getOperationTimeout(VoldemortOpCode.DELETE_OP_CODE);
                     boolean acquired = semaphore.tryAcquire(timeoutMs, TimeUnit.MILLISECONDS);
                     if(!acquired)
                         logger.warn("Delete operation timed out waiting for operation " + i
@@ -292,6 +303,7 @@ public Map<ByteArray, List<Versioned<byte[]>>> getAll(Iterable<ByteArray> keys,
             keyToSuccessCount.put(key, new MutableInt(0));
 
         List<Future<GetAllResult>> futures;
+        long timeoutMs = timeoutConfig.getOperationTimeout(VoldemortOpCode.GET_ALL_OP_CODE);
         try {
             // TODO What to do about timeouts? They should be longer as getAll
             // is likely to
@@ -377,7 +389,8 @@ public Map<ByteArray, List<Versioned<byte[]>>> getAll(Iterable<ByteArray> keys,
                             throw e;
                         } catch(Exception e) {
                             logger.warn("Error in GET_ALL on node " + node.getId() + "("
-                                        + node.getHost() + ")", e);
+                                                + node.getHost() + ")",
+                                        e);
                             failures.add(e);
                         }
                     }
@@ -453,6 +466,8 @@ private <R> List<R> get(final ByteArray key,
         }
 
         List<Future<GetResult<R>>> futures;
+        long timeoutMs = (fetcher == VERSION_OP) ? timeoutConfig.getOperationTimeout(VoldemortOpCode.GET_VERSION_OP_CODE)
+                                                : timeoutConfig.getOperationTimeout(VoldemortOpCode.GET_OP_CODE);
         try {
             futures = executor.invokeAll(callables, timeoutMs, TimeUnit.MILLISECONDS);
         } catch(InterruptedException e) {
@@ -498,8 +513,7 @@ private <R> List<R> get(final ByteArray key,
                                                key,
                                                fetcher.execute(innerStores.get(node.getId()),
                                                                key,
-                                                               transforms),
-                                               null));
+                                                               transforms), null));
                 ++successes;
                 recordSuccess(node, startNs);
             } catch(UnreachableStoreException e) {
@@ -760,7 +774,8 @@ private boolean blockOnPut(long startNs,
         for(int i = startingIndex; i < blockCount; i++) {
             try {
                 long ellapsedNs = System.nanoTime() - startNs;
-                long remainingNs = (timeoutMs * Time.NS_PER_MS) - ellapsedNs;
+                long remainingNs = (timeoutConfig.getOperationTimeout(VoldemortOpCode.PUT_OP_CODE) * Time.NS_PER_MS)
+                                   - ellapsedNs;
                 boolean acquiredPermit = semaphore.tryAcquire(Math.max(remainingNs, 0),
                                                               TimeUnit.NANOSECONDS);
                 if(!acquiredPermit) {
diff --git a/src/java/voldemort/store/routed/action/AbstractReadRepair.java b/src/java/voldemort/store/routed/action/AbstractReadRepair.java
index 2588c36df0..ac2dac7829 100644
--- a/src/java/voldemort/store/routed/action/AbstractReadRepair.java
+++ b/src/java/voldemort/store/routed/action/AbstractReadRepair.java
@@ -24,10 +24,11 @@
 import voldemort.store.nonblockingstore.NonblockingStore;
 import voldemort.store.routed.NodeValue;
 import voldemort.store.routed.Pipeline;
+import voldemort.store.routed.Pipeline.Event;
 import voldemort.store.routed.PipelineData;
 import voldemort.store.routed.ReadRepairer;
-import voldemort.store.routed.Pipeline.Event;
 import voldemort.utils.ByteArray;
+import voldemort.utils.ByteUtils;
 import voldemort.versioning.VectorClock;
 import voldemort.versioning.Versioned;
 
@@ -75,6 +76,11 @@ protected void insertNodeValue(Node node, ByteArray key, List<Versioned<byte[]>>
     public void execute(Pipeline pipeline) {
         insertNodeValues();
 
+        long startTimeNs = -1;
+
+        if(logger.isTraceEnabled())
+            startTimeNs = System.nanoTime();
+
         if(nodeValues.size() > 1 && preferred > 1) {
             List<NodeValue<ByteArray, byte[]>> toReadRepair = Lists.newArrayList();
 
@@ -96,7 +102,8 @@ public void execute(Pipeline pipeline) {
                 try {
                     if(logger.isDebugEnabled())
                         logger.debug("Doing read repair on node " + v.getNodeId() + " for key '"
-                                     + v.getKey() + "' with version " + v.getVersion() + ".");
+                                     + ByteUtils.toHexString(v.getKey().get()) + "' with version "
+                                     + v.getVersion() + ".");
 
                     NonblockingStore store = nonblockingStores.get(v.getNodeId());
                     store.submitPutRequest(v.getKey(), v.getVersioned(), null, null, timeoutMs);
@@ -105,12 +112,22 @@ public void execute(Pipeline pipeline) {
                         logger.debug("Read repair cancelled due to application level exception on node "
                                      + v.getNodeId()
                                      + " for key '"
-                                     + v.getKey()
-                                     + "' with version " + v.getVersion() + ": " + e.getMessage());
+                                     + ByteUtils.toHexString(v.getKey().get())
+                                     + "' with version "
+                                     + v.getVersion() + ": " + e.getMessage());
                 } catch(Exception e) {
                     logger.debug("Read repair failed: ", e);
                 }
             }
+
+            if(logger.isDebugEnabled()) {
+                String logStr = "Repaired (node, key, version): (";
+                for(NodeValue<ByteArray, byte[]> v: toReadRepair) {
+                    logStr += "(" + v.getNodeId() + ", " + v.getKey() + "," + v.getVersion() + ") ";
+                }
+                logStr += "in " + (System.nanoTime() - startTimeNs) + " ns";
+                logger.debug(logStr);
+            }
         }
 
         pipeline.addEvent(completeEvent);
diff --git a/src/java/voldemort/store/routed/action/ConfigureNodesByZone.java b/src/java/voldemort/store/routed/action/ConfigureNodesByZone.java
new file mode 100644
index 0000000000..6f1d15a1f0
--- /dev/null
+++ b/src/java/voldemort/store/routed/action/ConfigureNodesByZone.java
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2010 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.routed.action;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import voldemort.VoldemortException;
+import voldemort.cluster.Node;
+import voldemort.cluster.Zone;
+import voldemort.cluster.failuredetector.FailureDetector;
+import voldemort.routing.RoutingStrategy;
+import voldemort.store.routed.BasicPipelineData;
+import voldemort.store.routed.Pipeline;
+import voldemort.store.routed.Pipeline.Event;
+import voldemort.store.routed.Pipeline.Operation;
+import voldemort.utils.ByteArray;
+import voldemort.utils.ByteUtils;
+
+/**
+ * Configure the Nodes obtained via the routing strategy based on the zone
+ * information. Local zone nodes first, followed by the corresponding nodes from
+ * each of the other zones, ordered by proximity.
+ */
+public class ConfigureNodesByZone<V, PD extends BasicPipelineData<V>> extends
+        AbstractConfigureNodes<ByteArray, V, PD> {
+
+    private final ByteArray key;
+
+    private final Zone clientZone;
+
+    public ConfigureNodesByZone(PD pipelineData,
+                                Event completeEvent,
+                                FailureDetector failureDetector,
+                                int required,
+                                RoutingStrategy routingStrategy,
+                                ByteArray key,
+                                Zone clientZone) {
+        super(pipelineData, completeEvent, failureDetector, required, routingStrategy);
+        this.key = key;
+        this.clientZone = clientZone;
+    }
+
+    public List<Node> getNodes(ByteArray key, Operation op) {
+        List<Node> nodes = null;
+        try {
+            nodes = super.getNodes(key);
+        } catch(VoldemortException e) {
+            pipelineData.setFatalError(e);
+            return null;
+        }
+
+        if(logger.isDebugEnabled())
+            logger.debug("Adding " + nodes.size() + " node(s) to preference list");
+
+        if(pipelineData.getZonesRequired() > this.clientZone.getProximityList().size()) {
+            throw new VoldemortException("Number of zones required should be less than the total number of zones");
+        }
+
+        if(pipelineData.getZonesRequired() > required) {
+            throw new VoldemortException("Number of zones required should be less than the required number of "
+                                         + op.getSimpleName() + "s");
+        }
+
+        // Create zone id to node mapping
+        Map<Integer, List<Node>> zoneIdToNode = new HashMap<Integer, List<Node>>();
+        for(Node node: nodes) {
+            List<Node> nodesList = null;
+            if(zoneIdToNode.containsKey(node.getZoneId())) {
+                nodesList = zoneIdToNode.get(node.getZoneId());
+            } else {
+                nodesList = new ArrayList<Node>();
+                zoneIdToNode.put(node.getZoneId(), nodesList);
+            }
+            nodesList.add(node);
+        }
+
+        nodes = new ArrayList<Node>();
+        LinkedList<Integer> zoneProximityList = this.clientZone.getProximityList();
+        if(op != Operation.PUT) {
+            // GET, GET_VERSIONS, DELETE
+
+            // Add a node from every zone, upto a max of
+            // zoneCountReads/zoneCountWrites.
+            for(int index = 0; index < pipelineData.getZonesRequired(); index++) {
+                List<Node> zoneNodes = zoneIdToNode.get(zoneProximityList.get(index));
+                if(zoneNodes != null && zoneNodes.size() > 0) {
+                    nodes.add(zoneNodes.remove(0));
+                }
+            }
+
+        }
+
+        // Add the rest, starting with client zone...
+        List<Node> clientZoneNodes = zoneIdToNode.get(clientZone.getId());
+        if(clientZoneNodes != null && clientZoneNodes.size() > 0)
+            nodes.addAll(clientZoneNodes);
+        // ...followed by other zones sorted by proximity list
+        for(int index = 0; index < zoneProximityList.size(); index++) {
+            List<Node> zoneNodes = zoneIdToNode.get(zoneProximityList.get(index));
+            if(zoneNodes != null && zoneNodes.size() > 0) {
+                nodes.addAll(zoneNodes);
+            }
+        }
+
+        return nodes;
+    }
+
+    public void execute(Pipeline pipeline) {
+        List<Node> nodes = null;
+
+        nodes = getNodes(key, pipeline.getOperation());
+        if(nodes == null) {
+            pipeline.abort();
+            return;
+        }
+
+        if(logger.isDebugEnabled()) {
+            StringBuilder nodeStr = new StringBuilder();
+            for(Node node: nodes) {
+                nodeStr.append(node.getId() + ",");
+            }
+            logger.debug("Key " + ByteUtils.toHexString(key.get())
+                         + " final preference list to contact " + nodeStr);
+        }
+        pipelineData.setNodes(nodes);
+        pipeline.addEvent(completeEvent);
+    }
+
+}
diff --git a/src/java/voldemort/store/routed/action/ConfigureNodesDefault.java b/src/java/voldemort/store/routed/action/ConfigureNodesDefault.java
new file mode 100644
index 0000000000..0df936339b
--- /dev/null
+++ b/src/java/voldemort/store/routed/action/ConfigureNodesDefault.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.routed.action;
+
+import java.util.List;
+
+import voldemort.VoldemortException;
+import voldemort.cluster.Node;
+import voldemort.cluster.failuredetector.FailureDetector;
+import voldemort.routing.RoutingStrategy;
+import voldemort.store.routed.BasicPipelineData;
+import voldemort.store.routed.Pipeline;
+import voldemort.store.routed.Pipeline.Event;
+import voldemort.utils.ByteArray;
+import voldemort.utils.ByteUtils;
+
+/**
+ * Default Configure Nodes that does not reorder the list of nodes obtained via
+ * the routing strategy
+ */
+public class ConfigureNodesDefault<V, PD extends BasicPipelineData<V>> extends
+        AbstractConfigureNodes<ByteArray, V, PD> {
+
+    private final ByteArray key;
+
+    public ConfigureNodesDefault(PD pipelineData,
+                                 Event completeEvent,
+                                 FailureDetector failureDetector,
+                                 int required,
+                                 RoutingStrategy routingStrategy,
+                                 ByteArray key) {
+        super(pipelineData, completeEvent, failureDetector, required, routingStrategy);
+        this.key = key;
+    }
+
+    @Override
+    public List<Node> getNodes(ByteArray key) {
+        List<Node> nodes = null;
+
+        try {
+            nodes = super.getNodes(key);
+        } catch(VoldemortException e) {
+            pipelineData.setFatalError(e);
+            return null;
+        }
+        return nodes;
+    }
+
+    public void execute(Pipeline pipeline) {
+        List<Node> nodes = null;
+
+        nodes = getNodes(key);
+        if(nodes == null) {
+            pipeline.abort();
+            return;
+        }
+
+        if(logger.isDebugEnabled())
+            logger.debug("Adding " + nodes.size() + " node(s) to preference list");
+
+        if(logger.isDebugEnabled()) {
+            StringBuilder nodeStr = new StringBuilder();
+            for(Node node: nodes) {
+                nodeStr.append(node.getId() + ",");
+            }
+            logger.debug("Key " + ByteUtils.toHexString(key.get())
+                         + " final preference list to contact " + nodeStr);
+        }
+        pipelineData.setNodes(nodes);
+        pipeline.addEvent(completeEvent);
+    }
+
+}
diff --git a/src/java/voldemort/store/routed/action/ConfigureNodesLocalHost.java b/src/java/voldemort/store/routed/action/ConfigureNodesLocalHost.java
new file mode 100644
index 0000000000..f3c88b962b
--- /dev/null
+++ b/src/java/voldemort/store/routed/action/ConfigureNodesLocalHost.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.routed.action;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.log4j.Logger;
+
+import voldemort.VoldemortException;
+import voldemort.cluster.Node;
+import voldemort.cluster.failuredetector.FailureDetector;
+import voldemort.routing.RoutingStrategy;
+import voldemort.store.routed.BasicPipelineData;
+import voldemort.store.routed.Pipeline.Event;
+import voldemort.utils.ByteArray;
+
+/**
+ * Use the default node list returned via the routing strategy. However give
+ * preference to the current node, if it is part of the preflist returned from
+ * the routing strategy.
+ */
+
+public class ConfigureNodesLocalHost<V, PD extends BasicPipelineData<V>> extends
+        ConfigureNodesDefault<V, PD> {
+
+    @SuppressWarnings("hiding")
+    private final Logger logger = Logger.getLogger(this.getClass());
+
+    public ConfigureNodesLocalHost(PD pipelineData,
+                                   Event completeEvent,
+                                   FailureDetector failureDetector,
+                                   int required,
+                                   RoutingStrategy routingStrategy,
+                                   ByteArray key) {
+        super(pipelineData, completeEvent, failureDetector, required, routingStrategy, key);
+    }
+
+    /*
+     * If the current node exists in the nodes list, bring it to the front
+     */
+    @Override
+    public List<Node> getNodes(ByteArray key) {
+        logger.debug("Giving pref to localhost ! ");
+        List<Node> nodes = null;
+        List<Node> reorderedNodes = new ArrayList<Node>();
+
+        try {
+            nodes = super.getNodes(key);
+            if(nodes == null) {
+                return null;
+            }
+
+            String currentHost = InetAddress.getLocalHost().getHostName();
+            for(Node n: nodes) {
+                if(currentHost.contains(n.getHost()) || n.getHost().contains(currentHost)) {
+                    logger.debug("Found localhost ! ");
+                    reorderedNodes.add(n);
+                    nodes.remove(n);
+                    break;
+                }
+            }
+            reorderedNodes.addAll(nodes);
+            nodes = reorderedNodes;
+        } catch(VoldemortException e) {
+            pipelineData.setFatalError(e);
+            return null;
+        } catch(UnknownHostException e) {
+            e.printStackTrace();
+            return null;
+        }
+        return nodes;
+    }
+
+}
diff --git a/src/java/voldemort/store/routed/action/ConfigureNodesLocalHostByZone.java b/src/java/voldemort/store/routed/action/ConfigureNodesLocalHostByZone.java
new file mode 100644
index 0000000000..8d34795f31
--- /dev/null
+++ b/src/java/voldemort/store/routed/action/ConfigureNodesLocalHostByZone.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.routed.action;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.List;
+
+import voldemort.VoldemortException;
+import voldemort.cluster.Node;
+import voldemort.cluster.Zone;
+import voldemort.cluster.failuredetector.FailureDetector;
+import voldemort.routing.RoutingStrategy;
+import voldemort.store.routed.BasicPipelineData;
+import voldemort.store.routed.Pipeline.Event;
+import voldemort.store.routed.Pipeline.Operation;
+import voldemort.utils.ByteArray;
+
+/**
+ * Use the zone aware node list returned via the routing strategy. However give
+ * preference to the current node, if it is part of the preflist returned from
+ * the routing strategy.
+ */
+
+public class ConfigureNodesLocalHostByZone<V, PD extends BasicPipelineData<V>> extends
+        ConfigureNodesByZone<V, PD> {
+
+    public ConfigureNodesLocalHostByZone(PD pipelineData,
+                                         Event completeEvent,
+                                         FailureDetector failureDetector,
+                                         int required,
+                                         RoutingStrategy routingStrategy,
+                                         ByteArray key,
+                                         Zone clientZone) {
+        super(pipelineData,
+              completeEvent,
+              failureDetector,
+              required,
+              routingStrategy,
+              key,
+              clientZone);
+    }
+
+    /*
+     * If the current node exists in the nodes list, bring it to the front
+     */
+    @Override
+    public List<Node> getNodes(ByteArray key, Operation op) {
+        List<Node> nodes = null;
+        List<Node> reorderedNodes = new ArrayList<Node>();
+
+        try {
+            nodes = super.getNodes(key, op);
+            String currentHost = InetAddress.getLocalHost().getHostName();
+            for(Node n: nodes) {
+                if(currentHost.contains(n.getHost()) || n.getHost().contains(currentHost)) {
+                    reorderedNodes.add(n);
+                    nodes.remove(n);
+                    break;
+                }
+            }
+            reorderedNodes.addAll(nodes);
+            nodes = reorderedNodes;
+        } catch(VoldemortException e) {
+            pipelineData.setFatalError(e);
+            return null;
+        } catch(UnknownHostException e) {
+            e.printStackTrace();
+            return null;
+        }
+        return nodes;
+    }
+}
diff --git a/src/java/voldemort/store/routed/action/PerformParallelDeleteRequests.java b/src/java/voldemort/store/routed/action/PerformParallelDeleteRequests.java
index 890bcac55a..634b470805 100644
--- a/src/java/voldemort/store/routed/action/PerformParallelDeleteRequests.java
+++ b/src/java/voldemort/store/routed/action/PerformParallelDeleteRequests.java
@@ -93,9 +93,9 @@ public void execute(final Pipeline pipeline) {
 
                 public void requestComplete(Object result, long requestTime) {
                     if(logger.isTraceEnabled())
-                        logger.info(pipeline.getOperation().getSimpleName()
-                                    + " response received (" + requestTime + " ms.) from node "
-                                    + node.getId());
+                        logger.trace(pipeline.getOperation().getSimpleName()
+                                     + " response received (" + requestTime + " ms.) from node "
+                                     + node.getId());
 
                     Response<ByteArray, Object> response = new Response<ByteArray, Object>(node,
                                                                                            key,
@@ -128,6 +128,7 @@ public void requestComplete(Object result, long requestTime) {
                     if(pipeline.isFinished() && response.getValue() instanceof Exception
                        && !(response.getValue() instanceof ObsoleteVersionException)) {
                         if(response.getValue() instanceof InvalidMetadataException) {
+                            pipelineData.reportException((InvalidMetadataException) response.getValue());
                             logger.warn("Received invalid metadata problem after a successful "
                                         + pipeline.getOperation().getSimpleName()
                                         + " call on node " + node.getId() + ", store '"
diff --git a/src/java/voldemort/store/routed/action/PerformParallelGetAllRequests.java b/src/java/voldemort/store/routed/action/PerformParallelGetAllRequests.java
index c9c5a85fd1..e44cfdc9d3 100644
--- a/src/java/voldemort/store/routed/action/PerformParallelGetAllRequests.java
+++ b/src/java/voldemort/store/routed/action/PerformParallelGetAllRequests.java
@@ -98,6 +98,7 @@ public void requestComplete(Object result, long requestTime) {
                     // responses below.
                     if(pipeline.isFinished() && response.getValue() instanceof Exception)
                         if(response.getValue() instanceof InvalidMetadataException) {
+                            pipelineData.reportException((InvalidMetadataException) response.getValue());
                             logger.warn("Received invalid metadata problem after a successful "
                                         + pipeline.getOperation().getSimpleName()
                                         + " call on node " + node.getId() + ", store '"
@@ -118,7 +119,7 @@ public void requestComplete(Object result, long requestTime) {
         }
 
         try {
-            latch.await(timeoutMs * 3, TimeUnit.MILLISECONDS);
+            latch.await(timeoutMs, TimeUnit.MILLISECONDS);
         } catch(InterruptedException e) {
             if(logger.isEnabledFor(Level.WARN))
                 logger.warn(e, e);
@@ -154,6 +155,7 @@ public void requestComplete(Object result, long requestTime) {
                         zoneResponses = pipelineData.getKeyToZoneResponse().get(key);
                     } else {
                         zoneResponses = new HashSet<Integer>();
+                        pipelineData.getKeyToZoneResponse().put(key, zoneResponses);
                     }
                     zoneResponses.add(response.getNode().getZoneId());
                 }
diff --git a/src/java/voldemort/store/routed/action/PerformParallelPutRequests.java b/src/java/voldemort/store/routed/action/PerformParallelPutRequests.java
index ebaa9d4fd7..1cf13c308d 100644
--- a/src/java/voldemort/store/routed/action/PerformParallelPutRequests.java
+++ b/src/java/voldemort/store/routed/action/PerformParallelPutRequests.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010 LinkedIn, Inc
+ * Copyright 2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -42,6 +42,7 @@
 import voldemort.store.slop.HintedHandoff;
 import voldemort.store.slop.Slop;
 import voldemort.utils.ByteArray;
+import voldemort.utils.ByteUtils;
 import voldemort.utils.Time;
 import voldemort.versioning.ObsoleteVersionException;
 import voldemort.versioning.Versioned;
@@ -129,6 +130,12 @@ public void requestComplete(Object result, long requestTime) {
                                                                                            requestTime);
                     responses.put(node.getId(), response);
 
+                    if(logger.isDebugEnabled())
+                        logger.debug("Finished secondary PUT for key "
+                                     + ByteUtils.toHexString(key.get()) + " (keyRef: "
+                                     + System.identityHashCode(key) + "); took " + requestTime
+                                     + " ms on node " + node.getId() + "(" + node.getHost() + ")");
+
                     if(isHintedHandoffEnabled() && pipeline.isFinished()) {
                         if(response.getValue() instanceof UnreachableStoreException) {
                             Slop slop = new Slop(pipelineData.getStoreName(),
@@ -156,6 +163,7 @@ public void requestComplete(Object result, long requestTime) {
                     if(pipeline.isFinished() && response.getValue() instanceof Exception
                        && !(response.getValue() instanceof ObsoleteVersionException)) {
                         if(response.getValue() instanceof InvalidMetadataException) {
+                            pipelineData.reportException((InvalidMetadataException) response.getValue());
                             logger.warn("Received invalid metadata problem after a successful "
                                         + pipeline.getOperation().getSimpleName()
                                         + " call on node " + node.getId() + ", store '"
@@ -188,14 +196,12 @@ public void requestComplete(Object result, long requestTime) {
 
         for(Entry<Integer, Response<ByteArray, Object>> responseEntry: responses.entrySet()) {
             Response<ByteArray, Object> response = responseEntry.getValue();
-            if(response.getValue() instanceof Exception) {
-                if(response.getValue() instanceof ObsoleteVersionException) {
-                    // ignore this completely here
-                    // this means that a higher version was able
-                    // to write on this node and should be termed as
-                    // clean success.
-                    responses.remove(responseEntry.getKey());
-                } else if(handleResponseError(response, pipeline, failureDetector))
+            // Treat ObsoleteVersionExceptions as success since such an
+            // exception means that a higher version was able to write on the
+            // node.
+            if(response.getValue() instanceof Exception
+               && !(response.getValue() instanceof ObsoleteVersionException)) {
+                if(handleResponseError(response, pipeline, failureDetector))
                     return;
             } else {
                 pipelineData.incrementSuccesses();
@@ -219,14 +225,12 @@ public void requestComplete(Object result, long requestTime) {
 
                 for(Entry<Integer, Response<ByteArray, Object>> responseEntry: responses.entrySet()) {
                     Response<ByteArray, Object> response = responseEntry.getValue();
-                    if(response.getValue() instanceof Exception) {
-                        if(response.getValue() instanceof ObsoleteVersionException) {
-                            // ignore this completely here
-                            // this means that a higher version was able
-                            // to write on this node and should be termed as
-                            // clean success.
-                            responses.remove(responseEntry.getKey());
-                        } else if(handleResponseError(response, pipeline, failureDetector))
+                    // Treat ObsoleteVersionExceptions as success since such an
+                    // exception means that a higher version was able to write
+                    // on the node.
+                    if(response.getValue() instanceof Exception
+                       && !(response.getValue() instanceof ObsoleteVersionException)) {
+                        if(handleResponseError(response, pipeline, failureDetector))
                             return;
                     } else {
                         pipelineData.incrementSuccesses();
diff --git a/src/java/voldemort/store/routed/action/PerformParallelRequests.java b/src/java/voldemort/store/routed/action/PerformParallelRequests.java
index 5a5096bbd5..18385a5f7f 100644
--- a/src/java/voldemort/store/routed/action/PerformParallelRequests.java
+++ b/src/java/voldemort/store/routed/action/PerformParallelRequests.java
@@ -38,6 +38,7 @@
 import voldemort.store.routed.Pipeline.Operation;
 import voldemort.store.routed.Response;
 import voldemort.utils.ByteArray;
+import voldemort.utils.ByteUtils;
 import voldemort.utils.Utils;
 
 public class PerformParallelRequests<V, PD extends BasicPipelineData<V>> extends
@@ -95,6 +96,8 @@ public void execute(final Pipeline pipeline) {
             final Node node = nodes.get(i);
             pipelineData.incrementNodeIndex();
 
+            final long startMs = logger.isDebugEnabled() ? System.currentTimeMillis() : -1;
+
             NonblockingStoreCallback callback = new NonblockingStoreCallback() {
 
                 public void requestComplete(Object result, long requestTime) {
@@ -107,6 +110,13 @@ public void requestComplete(Object result, long requestTime) {
                                                                                            key,
                                                                                            result,
                                                                                            requestTime);
+                    if(logger.isDebugEnabled())
+                        logger.debug("Finished " + pipeline.getOperation().getSimpleName()
+                                     + " for key " + ByteUtils.toHexString(key.get())
+                                     + " (keyRef: " + System.identityHashCode(key)
+                                     + "); started at " + startMs + " took " + requestTime
+                                     + " ms on node " + node.getId() + "(" + node.getHost() + ")");
+
                     responses.put(node.getId(), response);
                     latch.countDown();
 
@@ -115,6 +125,7 @@ public void requestComplete(Object result, long requestTime) {
                     // responses below.
                     if(pipeline.isFinished() && response.getValue() instanceof Exception) {
                         if(response.getValue() instanceof InvalidMetadataException) {
+                            pipelineData.reportException((InvalidMetadataException) response.getValue());
                             logger.warn("Received invalid metadata problem after a successful "
                                         + pipeline.getOperation().getSimpleName()
                                         + " call on node " + node.getId() + ", store '"
@@ -163,6 +174,12 @@ else if(pipeline.getOperation() == Operation.GET_VERSIONS)
             }
         }
 
+        if(logger.isDebugEnabled())
+            logger.debug("GET for key " + ByteUtils.toHexString(key.get()) + " (keyRef: "
+                         + System.identityHashCode(key) + "); successes: "
+                         + pipelineData.getSuccesses() + " preferred: " + preferred + " required: "
+                         + required);
+
         if(pipelineData.getSuccesses() < required) {
             if(insufficientSuccessesEvent != null) {
                 pipeline.addEvent(insufficientSuccessesEvent);
diff --git a/src/java/voldemort/store/routed/action/PerformSerialGetAllRequests.java b/src/java/voldemort/store/routed/action/PerformSerialGetAllRequests.java
index bc8cf957e6..01c52d06d2 100644
--- a/src/java/voldemort/store/routed/action/PerformSerialGetAllRequests.java
+++ b/src/java/voldemort/store/routed/action/PerformSerialGetAllRequests.java
@@ -33,6 +33,7 @@
 import voldemort.store.routed.Pipeline.Event;
 import voldemort.store.routed.Response;
 import voldemort.utils.ByteArray;
+import voldemort.utils.ByteUtils;
 import voldemort.utils.Time;
 import voldemort.versioning.Versioned;
 
@@ -52,19 +53,23 @@ public class PerformSerialGetAllRequests
 
     private final int required;
 
+    private final boolean allowPartial;
+
     public PerformSerialGetAllRequests(GetAllPipelineData pipelineData,
                                        Event completeEvent,
                                        Iterable<ByteArray> keys,
                                        FailureDetector failureDetector,
                                        Map<Integer, Store<ByteArray, byte[], byte[]>> stores,
                                        int preferred,
-                                       int required) {
+                                       int required,
+                                       boolean allowPartial) {
         super(pipelineData, completeEvent);
         this.keys = keys;
         this.failureDetector = failureDetector;
         this.stores = stores;
         this.preferred = preferred;
         this.required = required;
+        this.allowPartial = allowPartial;
     }
 
     public void execute(Pipeline pipeline) {
@@ -74,8 +79,14 @@ public void execute(Pipeline pipeline) {
             boolean zoneRequirement = false;
             MutableInt successCount = pipelineData.getSuccessCount(key);
 
+            if(logger.isDebugEnabled())
+                logger.debug("GETALL for key " + ByteUtils.toHexString(key.get()) + " (keyRef: "
+                             + System.identityHashCode(key) + ") successes: "
+                             + successCount.intValue() + " preferred: " + preferred + " required: "
+                             + required);
+
             if(successCount.intValue() >= preferred) {
-                if(pipelineData.getZonesRequired() != null) {
+                if(pipelineData.getZonesRequired() != null && pipelineData.getZonesRequired() > 0) {
 
                     if(pipelineData.getKeyToZoneResponse().containsKey(key)) {
                         int zonesSatisfied = pipelineData.getKeyToZoneResponse().get(key).size();
@@ -110,10 +121,12 @@ public void execute(Pipeline pipeline) {
                     else
                         values = store.get(key, transforms.get(key));
 
-                    if(result.get(key) == null)
-                        result.put(key, Lists.newArrayList(values));
-                    else
-                        result.get(key).addAll(values);
+                    if(values.size() != 0) {
+                        if(result.get(key) == null)
+                            result.put(key, Lists.newArrayList(values));
+                        else
+                            result.get(key).addAll(values);
+                    }
 
                     Map<ByteArray, List<Versioned<byte[]>>> map = new HashMap<ByteArray, List<Versioned<byte[]>>>();
                     map.put(key, values);
@@ -127,11 +140,19 @@ public void execute(Pipeline pipeline) {
                     pipelineData.getResponses().add(response);
                     failureDetector.recordSuccess(response.getNode(), response.getRequestTime());
 
+                    if(logger.isDebugEnabled())
+                        logger.debug("GET for key " + ByteUtils.toHexString(key.get())
+                                     + " (keyRef: " + System.identityHashCode(key)
+                                     + ") successes: " + successCount.intValue() + " preferred: "
+                                     + preferred + " required: " + required
+                                     + " new GET success on node " + node.getId());
+
                     HashSet<Integer> zoneResponses = null;
                     if(pipelineData.getKeyToZoneResponse().containsKey(key)) {
                         zoneResponses = pipelineData.getKeyToZoneResponse().get(key);
                     } else {
                         zoneResponses = new HashSet<Integer>();
+                        pipelineData.getKeyToZoneResponse().put(key, zoneResponses);
                     }
                     zoneResponses.add(response.getNode().getZoneId());
 
@@ -156,21 +177,31 @@ public void execute(Pipeline pipeline) {
             MutableInt successCount = pipelineData.getSuccessCount(key);
 
             if(successCount.intValue() < required) {
-                pipelineData.setFatalError(new InsufficientOperationalNodesException(required
-                                                                                             + " "
-                                                                                             + pipeline.getOperation()
-                                                                                                       .getSimpleName()
-                                                                                             + "s required, but "
-                                                                                             + successCount.intValue()
-                                                                                             + " succeeded. Failing nodes : "
-                                                                                             + pipelineData.getFailedNodes(),
-                                                                                     pipelineData.getFailures()));
-                pipeline.addEvent(Event.ERROR);
-                return;
+                // if we allow partial results, then just remove keys that did
+                // not meet 'required' guarantee; else raise error
+                if(allowPartial) {
+                    if(logger.isDebugEnabled()) {
+                        logger.debug("Excluding Key " + ByteUtils.toHexString(key.get())
+                                     + " from partial get_all result");
+                    }
+                    result.remove(key);
+                } else {
+                    pipelineData.setFatalError(new InsufficientOperationalNodesException(required
+                                                                                                 + " "
+                                                                                                 + pipeline.getOperation()
+                                                                                                           .getSimpleName()
+                                                                                                 + "s required, but "
+                                                                                                 + successCount.intValue()
+                                                                                                 + " succeeded. Failing nodes : "
+                                                                                                 + pipelineData.getFailedNodes(),
+                                                                                         pipelineData.getFailures()));
+                    pipeline.addEvent(Event.ERROR);
+                    return;
+                }
             }
         }
 
         pipeline.addEvent(completeEvent);
     }
 
-}
\ No newline at end of file
+}
diff --git a/src/java/voldemort/store/routed/action/PerformSerialPutRequests.java b/src/java/voldemort/store/routed/action/PerformSerialPutRequests.java
index 5d8debe17a..08a25c7bc7 100644
--- a/src/java/voldemort/store/routed/action/PerformSerialPutRequests.java
+++ b/src/java/voldemort/store/routed/action/PerformSerialPutRequests.java
@@ -29,6 +29,7 @@
 import voldemort.store.routed.Pipeline.Event;
 import voldemort.store.routed.PutPipelineData;
 import voldemort.utils.ByteArray;
+import voldemort.utils.ByteUtils;
 import voldemort.utils.Time;
 import voldemort.versioning.VectorClock;
 import voldemort.versioning.Versioned;
@@ -74,11 +75,20 @@ public void execute(Pipeline pipeline) {
         int currentNode = 0;
         List<Node> nodes = pipelineData.getNodes();
 
+        long startMasterMs = -1;
+        long startMasterNs = -1;
+
+        if(logger.isDebugEnabled()) {
+            startMasterMs = System.currentTimeMillis();
+            startMasterNs = System.nanoTime();
+        }
+
         if(logger.isDebugEnabled())
             logger.debug("Performing serial put requests to determine master");
 
+        Node node = null;
         for(; currentNode < nodes.size(); currentNode++) {
-            Node node = nodes.get(currentNode);
+            node = nodes.get(currentNode);
             pipelineData.incrementNodeIndex();
 
             VectorClock versionedClock = (VectorClock) versioned.getVersion();
@@ -86,8 +96,8 @@ public void execute(Pipeline pipeline) {
                                                                           versionedClock.incremented(node.getId(),
                                                                                                      time.getMilliseconds()));
 
-            if(logger.isTraceEnabled())
-                logger.trace("Attempt #" + (currentNode + 1) + " to perform put (node "
+            if(logger.isDebugEnabled())
+                logger.debug("Attempt #" + (currentNode + 1) + " to perform put (node "
                              + node.getId() + ")");
 
             long start = System.nanoTime();
@@ -98,8 +108,8 @@ public void execute(Pipeline pipeline) {
                 pipelineData.incrementSuccesses();
                 failureDetector.recordSuccess(node, requestTime);
 
-                if(logger.isTraceEnabled())
-                    logger.trace("Put on node " + node.getId() + " succeeded, using as master");
+                if(logger.isDebugEnabled())
+                    logger.debug("Put on node " + node.getId() + " succeeded, using as master");
 
                 pipelineData.setMaster(node);
                 pipelineData.setVersionedCopy(versionedCopy);
@@ -108,6 +118,12 @@ public void execute(Pipeline pipeline) {
             } catch(Exception e) {
                 long requestTime = (System.nanoTime() - start) / Time.NS_PER_MS;
 
+                if(logger.isDebugEnabled())
+                    logger.debug("Master PUT at node " + currentNode + "(" + node.getHost() + ")"
+                                 + " failed (" + e.getMessage() + ") in "
+                                 + (System.nanoTime() - start) + " ns" + " (keyRef: "
+                                 + System.identityHashCode(key) + ")");
+
                 if(handleResponseError(e, node, requestTime, pipeline, failureDetector))
                     return;
             }
@@ -157,10 +173,26 @@ public void execute(Pipeline pipeline) {
                     }
 
                 } else {
+                    if(logger.isDebugEnabled())
+                        logger.debug("Finished master PUT for key "
+                                     + ByteUtils.toHexString(key.get()) + " (keyRef: "
+                                     + System.identityHashCode(key) + "); started at "
+                                     + startMasterMs + " took "
+                                     + (System.nanoTime() - startMasterNs) + " ns on node "
+                                     + (node == null ? "NULL" : node.getId()) + "("
+                                     + (node == null ? "NULL" : node.getHost()) + "); now complete");
+
                     pipeline.addEvent(completeEvent);
                 }
             }
         } else {
+            if(logger.isDebugEnabled())
+                logger.debug("Finished master PUT for key " + ByteUtils.toHexString(key.get())
+                             + " (keyRef: " + System.identityHashCode(key) + "); started at "
+                             + startMasterMs + " took " + (System.nanoTime() - startMasterNs)
+                             + " ns on node " + (node == null ? "NULL" : node.getId()) + "("
+                             + (node == null ? "NULL" : node.getHost()) + ")");
+
             pipeline.addEvent(masterDeterminedEvent);
         }
     }
diff --git a/src/java/voldemort/store/routed/action/PerformSerialRequests.java b/src/java/voldemort/store/routed/action/PerformSerialRequests.java
index 24de5236c7..6feafd140c 100644
--- a/src/java/voldemort/store/routed/action/PerformSerialRequests.java
+++ b/src/java/voldemort/store/routed/action/PerformSerialRequests.java
@@ -31,6 +31,7 @@
 import voldemort.store.routed.Pipeline.Event;
 import voldemort.store.routed.Response;
 import voldemort.utils.ByteArray;
+import voldemort.utils.ByteUtils;
 import voldemort.utils.Time;
 
 public class PerformSerialRequests<V, PD extends BasicPipelineData<V>> extends
@@ -66,6 +67,20 @@ public PerformSerialRequests(PD pipelineData,
         this.insufficientSuccessesEvent = insufficientSuccessesEvent;
     }
 
+    /**
+     * Checks whether every property except 'preferred' is satisfied
+     * 
+     * @return
+     */
+    private boolean isSatisfied() {
+        if(pipelineData.getZonesRequired() != null) {
+            return ((pipelineData.getSuccesses() >= required) && (pipelineData.getZoneResponses()
+                                                                              .size() >= (pipelineData.getZonesRequired() + 1)));
+        } else {
+            return pipelineData.getSuccesses() >= required;
+        }
+    }
+
     public void execute(Pipeline pipeline) {
         List<Node> nodes = pipelineData.getNodes();
 
@@ -84,6 +99,14 @@ public void execute(Pipeline pipeline) {
                                                                              result,
                                                                              ((System.nanoTime() - start) / Time.NS_PER_MS));
 
+                if(logger.isDebugEnabled())
+                    logger.debug(pipeline.getOperation().getSimpleName() + " for key "
+                                 + ByteUtils.toHexString(key.get()) + " successes: "
+                                 + pipelineData.getSuccesses() + " preferred: " + preferred
+                                 + " required: " + required + " new "
+                                 + pipeline.getOperation().getSimpleName() + " success on node "
+                                 + node.getId());
+
                 pipelineData.incrementSuccesses();
                 pipelineData.getResponses().add(response);
                 failureDetector.recordSuccess(response.getNode(), response.getRequestTime());
@@ -95,6 +118,10 @@ public void execute(Pipeline pipeline) {
                     return;
             }
 
+            // break out if we have satisfied everything
+            if(isSatisfied())
+                break;
+
             pipelineData.incrementNodeIndex();
         }
 
@@ -123,6 +150,14 @@ public void execute(Pipeline pipeline) {
                 if(zonesSatisfied >= (pipelineData.getZonesRequired() + 1)) {
                     pipeline.addEvent(completeEvent);
                 } else {
+                    // if you run with zoneCountReads > 0, we could frequently
+                    // run into this exception since our preference list for
+                    // zone routing is laid out thus : <a node from each of
+                    // 'zoneCountReads' zones>, <nodes from local zone>, <nodes
+                    // from remote zone1>, <nodes from remote zone2>,...
+                    // #preferred number of reads may not be able to satisfy
+                    // zoneCountReads, if the original read to a remote node
+                    // fails in the parallel stage
                     pipelineData.setFatalError(new InsufficientZoneResponsesException((pipelineData.getZonesRequired() + 1)
                                                                                       + " "
                                                                                       + pipeline.getOperation()
@@ -139,5 +174,4 @@ public void execute(Pipeline pipeline) {
             }
         }
     }
-
 }
diff --git a/src/java/voldemort/store/slop/HintedHandoff.java b/src/java/voldemort/store/slop/HintedHandoff.java
index a4a66217b3..5e506a85e2 100644
--- a/src/java/voldemort/store/slop/HintedHandoff.java
+++ b/src/java/voldemort/store/slop/HintedHandoff.java
@@ -101,15 +101,17 @@ public void sendHintParallel(final Node failedNode, final Version version, final
 
         for(final Node node: handoffStrategy.routeHint(failedNode)) {
             int nodeId = node.getId();
-            if(logger.isTraceEnabled())
-                logger.trace("Sending an async hint to " + nodeId);
+
+            if(logger.isDebugEnabled())
+                logger.debug("Sending an async hint to " + nodeId);
 
             if(!failedNodes.contains(node) && failureDetector.isAvailable(node)) {
                 NonblockingStore nonblockingStore = nonblockingSlopStores.get(nodeId);
                 Utils.notNull(nonblockingStore);
                 final long startNs = System.nanoTime();
-                if(logger.isTraceEnabled())
-                    logger.trace("Attempt to write " + slop.getKey() + " for " + failedNode
+
+                if(logger.isDebugEnabled())
+                    logger.debug("Slop attempt to write " + slop.getKey() + " for " + failedNode
                                  + " to node " + node);
 
                 NonblockingStoreCallback callback = new NonblockingStoreCallback() {
@@ -127,6 +129,13 @@ public void requestComplete(Object result, long requestTime) {
                                     failedNodes.add(node);
                                 if(response.getValue() instanceof UnreachableStoreException) {
                                     UnreachableStoreException use = (UnreachableStoreException) response.getValue();
+
+				    if(logger.isDebugEnabled())
+					logger.debug("Write of key " + slop.getKey() + " for "
+						     + failedNode + " to node " + node
+						     + " failed due to unreachable: "
+						     + use.getMessage());
+
                                     failureDetector.recordException(node,
                                                                     (System.nanoTime() - startNs)
                                                                     / Time.NS_PER_MS,
@@ -136,6 +145,12 @@ public void requestComplete(Object result, long requestTime) {
                             }
                             return;
                         }
+
+                        if(logger.isDebugEnabled())
+                            logger.debug("Slop write of key " + slop.getKey() + " for "
+                                         + failedNode + " to node " + node + " succeeded in "
+                                         + (System.nanoTime() - startNs) + " ns");
+
                         failureDetector.recordSuccess(node, (System.nanoTime() - startNs)
                                                             / Time.NS_PER_MS);
 
@@ -151,7 +166,7 @@ public void requestComplete(Object result, long requestTime) {
             }
         }
     }
-    
+  
     /**
      * Send a hint of a request originally meant for the failed node to another
      * node in the ring, as selected by the {@link HintedHandoffStrategy}
@@ -166,8 +181,8 @@ public boolean sendHintSerial(Node failedNode, Version version, Slop slop) {
         boolean persisted = false;
         for(Node node: handoffStrategy.routeHint(failedNode)) {
             int nodeId = node.getId();
-            if(logger.isTraceEnabled())
-                logger.trace("Trying to send hint to " + nodeId);
+            if(logger.isDebugEnabled())
+                logger.debug("Trying to send hint to " + nodeId);
 
             if(!failedNodes.contains(node) && failureDetector.isAvailable(node)) {
                 Store<ByteArray, Slop, byte[]> slopStore = slopStores.get(nodeId);
@@ -175,10 +190,10 @@ public boolean sendHintSerial(Node failedNode, Version version, Slop slop) {
                 long startNs = System.nanoTime();
 
                 try {
-                    if(logger.isTraceEnabled())
-                        logger.trace("Attempt to handoff " + slop.getOperation() + " on "
-                                     + slop.getKey() + " for " + failedNode
-                                     + " to node " + node);
+                    if(logger.isDebugEnabled())
+                        logger.debug("Slop attempt to write " + slop.getKey() + " (keyRef: "
+                                     + System.identityHashCode(slop.getKey()) + ") for "
+                                     + failedNode + " to node " + node);
 
                     // No transform needs to applied to the slop
                     slopStore.put(slop.makeKey(), new Versioned<Slop>(slop, version), null);
@@ -197,6 +212,12 @@ public boolean sendHintSerial(Node failedNode, Version version, Slop slop) {
                 } catch(ObsoleteVersionException e) {
                     logger.debug(e, e);
                 }
+
+                if(logger.isDebugEnabled())
+                    logger.debug("Slop write of key " + slop.getKey() + " (keyRef: "
+                                 + System.identityHashCode(slop.getKey()) + " for " + failedNode
+                                 + " to node " + node + " succeeded in "
+                                 + (System.nanoTime() - startNs) + " ns");
             }
         }
 
diff --git a/src/java/voldemort/store/socket/SocketStore.java b/src/java/voldemort/store/socket/SocketStore.java
index 4c44292bcd..06433b7c24 100644
--- a/src/java/voldemort/store/socket/SocketStore.java
+++ b/src/java/voldemort/store/socket/SocketStore.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2010 LinkedIn, Inc
+ * Copyright 2008-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -16,14 +16,10 @@
 
 package voldemort.store.socket;
 
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
 import java.io.IOException;
-import java.nio.ByteBuffer;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 
 import voldemort.VoldemortException;
@@ -33,7 +29,6 @@
 import voldemort.store.NoSuchCapabilityException;
 import voldemort.store.Store;
 import voldemort.store.StoreCapabilityType;
-import voldemort.store.StoreTimeoutException;
 import voldemort.store.StoreUtils;
 import voldemort.store.UnreachableStoreException;
 import voldemort.store.nonblockingstore.NonblockingStore;
@@ -48,7 +43,6 @@
 import voldemort.store.socket.clientrequest.GetVersionsClientRequest;
 import voldemort.store.socket.clientrequest.PutClientRequest;
 import voldemort.utils.ByteArray;
-import voldemort.utils.Time;
 import voldemort.utils.Utils;
 import voldemort.versioning.Version;
 import voldemort.versioning.Versioned;
@@ -100,6 +94,9 @@ public void submitDeleteRequest(ByteArray key,
                                                                     requestRoutingType,
                                                                     key,
                                                                     version);
+        if(logger.isDebugEnabled())
+            logger.debug("DELETE keyRef: " + System.identityHashCode(key) + " requestRef: "
+                         + System.identityHashCode(clientRequest));
         requestAsync(clientRequest, callback, timeoutMs, "delete");
     }
 
@@ -113,6 +110,9 @@ public void submitGetRequest(ByteArray key,
                                                               requestRoutingType,
                                                               key,
                                                               transforms);
+        if(logger.isDebugEnabled())
+            logger.debug("GET keyRef: " + System.identityHashCode(key) + " requestRef: "
+                         + System.identityHashCode(clientRequest));
         requestAsync(clientRequest, callback, timeoutMs, "get");
     }
 
@@ -126,6 +126,9 @@ public void submitGetAllRequest(Iterable<ByteArray> keys,
                                                                     requestRoutingType,
                                                                     keys,
                                                                     transforms);
+        if(logger.isDebugEnabled())
+            logger.debug("GETALL keyRef: " + System.identityHashCode(keys) + " requestRef: "
+                         + System.identityHashCode(clientRequest));
         requestAsync(clientRequest, callback, timeoutMs, "get all");
     }
 
@@ -137,6 +140,9 @@ public void submitGetVersionsRequest(ByteArray key,
                                                                               requestFormat,
                                                                               requestRoutingType,
                                                                               key);
+        if(logger.isDebugEnabled())
+            logger.debug("GETVERSIONS keyRef: " + System.identityHashCode(key) + " requestRef: "
+                         + System.identityHashCode(clientRequest));
         requestAsync(clientRequest, callback, timeoutMs, "get versions");
     }
 
@@ -152,6 +158,9 @@ public void submitPutRequest(ByteArray key,
                                                               key,
                                                               value,
                                                               transforms);
+        if(logger.isDebugEnabled())
+            logger.debug("PUT keyRef: " + System.identityHashCode(key) + " requestRef: "
+                         + System.identityHashCode(clientRequest));
         requestAsync(clientRequest, callback, timeoutMs, "put");
     }
 
@@ -162,6 +171,9 @@ public boolean delete(ByteArray key, Version version) throws VoldemortException
                                                                     requestRoutingType,
                                                                     key,
                                                                     version);
+        if(logger.isDebugEnabled())
+            logger.debug("DELETE keyRef: " + System.identityHashCode(key) + " requestRef: "
+                         + System.identityHashCode(clientRequest));
         return request(clientRequest, "delete");
     }
 
@@ -172,6 +184,9 @@ public List<Versioned<byte[]>> get(ByteArray key, byte[] transforms) throws Vold
                                                               requestRoutingType,
                                                               key,
                                                               transforms);
+        if(logger.isDebugEnabled())
+            logger.debug("GET keyRef: " + System.identityHashCode(key) + " requestRef: "
+                         + System.identityHashCode(clientRequest));
         return request(clientRequest, "get");
     }
 
@@ -184,6 +199,9 @@ public Map<ByteArray, List<Versioned<byte[]>>> getAll(Iterable<ByteArray> keys,
                                                                     requestRoutingType,
                                                                     keys,
                                                                     transforms);
+        if(logger.isDebugEnabled())
+            logger.debug("GETALL keyRef: " + System.identityHashCode(keys) + " requestRef: "
+                         + System.identityHashCode(clientRequest));
         return request(clientRequest, "getAll");
     }
 
@@ -193,6 +211,9 @@ public List<Version> getVersions(ByteArray key) {
                                                                               requestFormat,
                                                                               requestRoutingType,
                                                                               key);
+        if(logger.isDebugEnabled())
+            logger.debug("GETVERSIONS keyRef: " + System.identityHashCode(key) + " requestRef: "
+                         + System.identityHashCode(clientRequest));
         return request(clientRequest, "getVersions");
     }
 
@@ -205,6 +226,9 @@ public void put(ByteArray key, Versioned<byte[]> versioned, byte[] transforms)
                                                               key,
                                                               versioned,
                                                               transforms);
+        if(logger.isDebugEnabled())
+            logger.debug("PUT keyRef: " + System.identityHashCode(key) + " requestRef: "
+                         + System.identityHashCode(clientRequest));
         request(clientRequest, "put");
     }
 
@@ -239,22 +263,73 @@ public void close() throws VoldemortException {
      */
 
     private <T> T request(ClientRequest<T> delegate, String operationName) {
+        long startTimeMs = -1;
+        long startTimeNs = -1;
+
+        if(logger.isDebugEnabled()) {
+            startTimeMs = System.currentTimeMillis();
+        }
+        startTimeNs = System.nanoTime();
+
         ClientRequestExecutor clientRequestExecutor = pool.checkout(destination);
 
+        String debugMsgStr = "";
+
+        BlockingClientRequest<T> blockingClientRequest = null;
         try {
-            BlockingClientRequest<T> blockingClientRequest = new BlockingClientRequest<T>(delegate,
-                                                                                          timeoutMs);
-            clientRequestExecutor.addClientRequest(blockingClientRequest, timeoutMs);
+            blockingClientRequest = new BlockingClientRequest<T>(delegate, timeoutMs);
+            clientRequestExecutor.addClientRequest(blockingClientRequest,
+                                                   timeoutMs,
+                                                   System.nanoTime() - startTimeNs);
             blockingClientRequest.await();
+
+            if(logger.isDebugEnabled())
+                debugMsgStr += "success";
+
             return blockingClientRequest.getResult();
         } catch(InterruptedException e) {
+
+            if(logger.isDebugEnabled())
+                debugMsgStr += "unreachable: " + e.getMessage();
+
             throw new UnreachableStoreException("Failure in " + operationName + " on "
                                                 + destination + ": " + e.getMessage(), e);
         } catch(IOException e) {
             clientRequestExecutor.close();
+
+            if(logger.isDebugEnabled())
+                debugMsgStr += "failure: " + e.getMessage();
+
             throw new UnreachableStoreException("Failure in " + operationName + " on "
                                                 + destination + ": " + e.getMessage(), e);
         } finally {
+            if(blockingClientRequest != null && !blockingClientRequest.isComplete()) {
+                // close the executor if we timed out
+                clientRequestExecutor.close();
+            }
+
+            if(logger.isDebugEnabled()) {
+                logger.debug("Sync request end, type: "
+                             + operationName
+                             + " requestRef: "
+                             + System.identityHashCode(delegate)
+                             + " totalTimeNs: "
+                             + (System.nanoTime() - startTimeNs)
+                             + " start time: "
+                             + startTimeMs
+                             + " end time: "
+                             + System.currentTimeMillis()
+                             + " client:"
+                             + clientRequestExecutor.getSocketChannel().socket().getLocalAddress()
+                             + ":"
+                             + clientRequestExecutor.getSocketChannel().socket().getLocalPort()
+                             + " server: "
+                             + clientRequestExecutor.getSocketChannel()
+                                                    .socket()
+                                                    .getRemoteSocketAddress() + " outcome: "
+                             + debugMsgStr);
+            }
+
             pool.checkin(destination, clientRequestExecutor);
         }
     }
@@ -262,7 +337,7 @@ private <T> T request(ClientRequest<T> delegate, String operationName) {
     /**
      * This method handles submitting and then waiting for the request from the
      * server. It uses the ClientRequest API to actually write the request and
-     * then read back the response. This implementation will block for a
+     * then read back the response. This implementation will not block for a
      * response from the server.
      * 
      * @param <T> Return type
@@ -278,112 +353,7 @@ private <T> void requestAsync(ClientRequest<T> delegate,
                                   NonblockingStoreCallback callback,
                                   long timeoutMs,
                                   String operationName) {
-        ClientRequestExecutor clientRequestExecutor = null;
-
-        try {
-            clientRequestExecutor = pool.checkout(destination);
-        } catch(Exception e) {
-            // If we can't check out a socket from the pool, we'll usually get
-            // either an IOException (subclass) or an UnreachableStoreException
-            // error. However, in the case of asynchronous calls, we want the
-            // error to be reported via our callback, not returned to the caller
-            // directly.
-            if(!(e instanceof UnreachableStoreException))
-                e = new UnreachableStoreException("Failure in " + operationName + ": "
-                                                  + e.getMessage(), e);
-
-            try {
-                callback.requestComplete(e, 0);
-            } catch(Exception ex) {
-                if(logger.isEnabledFor(Level.WARN))
-                    logger.warn(ex, ex);
-            }
-
-            return;
-        }
-
-        NonblockingStoreCallbackClientRequest<T> clientRequest = new NonblockingStoreCallbackClientRequest<T>(delegate,
-                                                                                                              clientRequestExecutor,
-                                                                                                              callback);
-        clientRequestExecutor.addClientRequest(clientRequest, timeoutMs);
-    }
-
-    private class NonblockingStoreCallbackClientRequest<T> implements ClientRequest<T> {
-
-        private final ClientRequest<T> clientRequest;
-
-        private final ClientRequestExecutor clientRequestExecutor;
-
-        private final NonblockingStoreCallback callback;
-
-        private final long startNs;
-
-        private volatile boolean isComplete;
-
-        public NonblockingStoreCallbackClientRequest(ClientRequest<T> clientRequest,
-                                                     ClientRequestExecutor clientRequestExecutor,
-                                                     NonblockingStoreCallback callback) {
-            this.clientRequest = clientRequest;
-            this.clientRequestExecutor = clientRequestExecutor;
-            this.callback = callback;
-            this.startNs = System.nanoTime();
-        }
-
-        private void invokeCallback(Object o, long requestTime) {
-            if(callback != null) {
-                try {
-                    callback.requestComplete(o, requestTime);
-                } catch(Exception e) {
-                    if(logger.isEnabledFor(Level.WARN))
-                        logger.warn(e, e);
-                }
-            }
-        }
-
-        public void complete() {
-            try {
-                clientRequest.complete();
-                Object result = clientRequest.getResult();
-
-                invokeCallback(result, (System.nanoTime() - startNs) / Time.NS_PER_MS);
-            } catch(Exception e) {
-                invokeCallback(e, (System.nanoTime() - startNs) / Time.NS_PER_MS);
-            } finally {
-                pool.checkin(destination, clientRequestExecutor);
-                isComplete = true;
-            }
-        }
-
-        public boolean isComplete() {
-            return isComplete;
-        }
-
-        public boolean formatRequest(DataOutputStream outputStream) {
-            return clientRequest.formatRequest(outputStream);
-        }
-
-        public T getResult() throws VoldemortException, IOException {
-            return clientRequest.getResult();
-        }
-
-        public boolean isCompleteResponse(ByteBuffer buffer) {
-            return clientRequest.isCompleteResponse(buffer);
-        }
-
-        public void parseResponse(DataInputStream inputStream) {
-            clientRequest.parseResponse(inputStream);
-        }
-
-        public void timeOut() {
-            clientRequest.timeOut();
-            invokeCallback(new StoreTimeoutException("ClientRequestExecutor timed out. Cannot complete request."),
-                           (System.nanoTime() - startNs) / Time.NS_PER_MS);
-            pool.checkin(destination, clientRequestExecutor);
-        }
-
-        public boolean isTimedOut() {
-            return clientRequest.isTimedOut();
-        }
+        pool.submitAsync(this.destination, delegate, callback, timeoutMs, operationName);
     }
 
 }
diff --git a/src/java/voldemort/store/socket/clientrequest/ClientRequestExecutor.java b/src/java/voldemort/store/socket/clientrequest/ClientRequestExecutor.java
index 1298b9f162..3dcdc76bdf 100644
--- a/src/java/voldemort/store/socket/clientrequest/ClientRequestExecutor.java
+++ b/src/java/voldemort/store/socket/clientrequest/ClientRequestExecutor.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2010 LinkedIn, Inc
+ * Copyright 2008-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -71,7 +71,7 @@ public boolean isValid() {
         return !s.isClosed() && s.isBound() && s.isConnected();
     }
 
-    public synchronized boolean checkTimeout(SelectionKey selectionKey) {
+    public synchronized boolean checkTimeout() {
         if(expiration <= 0)
             return true;
 
@@ -92,6 +92,12 @@ public synchronized void addClientRequest(ClientRequest<?> clientRequest) {
     }
 
     public synchronized void addClientRequest(ClientRequest<?> clientRequest, long timeoutMs) {
+        addClientRequest(clientRequest, timeoutMs, 0);
+    }
+
+    public synchronized void addClientRequest(ClientRequest<?> clientRequest,
+                                              long timeoutMs,
+                                              long elapsedNs) {
         if(logger.isTraceEnabled())
             logger.trace("Associating client with " + socketChannel.socket());
 
@@ -100,7 +106,11 @@ public synchronized void addClientRequest(ClientRequest<?> clientRequest, long t
         if(timeoutMs == -1) {
             this.expiration = -1;
         } else {
-            this.expiration = System.nanoTime() + (Time.NS_PER_MS * timeoutMs);
+            if (elapsedNs > (Time.NS_PER_MS * timeoutMs)) {
+                this.expiration = System.nanoTime();
+            } else {
+                this.expiration = System.nanoTime() + (Time.NS_PER_MS * timeoutMs) - elapsedNs;
+            }
 
             if(this.expiration < System.nanoTime())
                 throw new IllegalArgumentException("timeout " + timeoutMs + " not valid");
@@ -162,7 +172,7 @@ public void close() {
 
     @Override
     protected void read(SelectionKey selectionKey) throws IOException {
-        if(!checkTimeout(selectionKey))
+        if(!checkTimeout())
             return;
 
         int count = 0;
@@ -211,7 +221,7 @@ protected void read(SelectionKey selectionKey) throws IOException {
 
     @Override
     protected void write(SelectionKey selectionKey) throws IOException {
-        if(!checkTimeout(selectionKey))
+        if(!checkTimeout())
             return;
 
         if(outputStream.getBuffer().hasRemaining()) {
@@ -253,20 +263,29 @@ protected void write(SelectionKey selectionKey) throws IOException {
      * check in the instance again which causes problems for the pool
      * maintenance.
      */
+    private synchronized ClientRequest<?> atomicNullOutClientRequest() {
+        ClientRequest<?> local = clientRequest;
+        clientRequest = null;
+        expiration = 0;
 
-    private synchronized void completeClientRequest() {
-        if(clientRequest == null) {
+        return local;
+    }
+
+    /**
+     * Null out current clientRequest before calling complete. timeOut and
+     * complete must *not* be within a synchronized block since both eventually
+     * check in the client request executor. Such a check in can trigger
+     * additional synchronized methods deeper in the stack.
+     */
+    private void completeClientRequest() {
+        ClientRequest<?> local = atomicNullOutClientRequest();
+        if(local == null) {
             if(logger.isEnabledFor(Level.WARN))
                 logger.warn("No client associated with " + socketChannel.socket());
 
             return;
         }
 
-        // Sorry about this - please see the method comments...
-        ClientRequest<?> local = clientRequest;
-        clientRequest = null;
-        expiration = 0;
-
         if(isExpired)
             local.timeOut();
         else
diff --git a/src/java/voldemort/store/socket/clientrequest/ClientRequestExecutorFactory.java b/src/java/voldemort/store/socket/clientrequest/ClientRequestExecutorFactory.java
index ba2b68a25d..ce95df84c2 100644
--- a/src/java/voldemort/store/socket/clientrequest/ClientRequestExecutorFactory.java
+++ b/src/java/voldemort/store/socket/clientrequest/ClientRequestExecutorFactory.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2010 LinkedIn, Inc
+ * Copyright 2008-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -37,6 +37,7 @@
 import org.apache.log4j.Logger;
 
 import voldemort.store.socket.SocketDestination;
+import voldemort.store.stats.ClientSocketStats;
 import voldemort.utils.DaemonThreadFactory;
 import voldemort.utils.SelectorManager;
 import voldemort.utils.Time;
@@ -61,18 +62,21 @@ public class ClientRequestExecutorFactory implements
     private final AtomicInteger counter = new AtomicInteger();
     private final Map<SocketDestination, Long> lastClosedTimestamps;
     private final Logger logger = Logger.getLogger(getClass());
+    private final ClientSocketStats stats;
 
     public ClientRequestExecutorFactory(int selectors,
                                         int connectTimeoutMs,
                                         int soTimeoutMs,
                                         int socketBufferSize,
-                                        boolean socketKeepAlive) {
+                                        boolean socketKeepAlive,
+                                        ClientSocketStats stats) {
         this.connectTimeoutMs = connectTimeoutMs;
         this.soTimeoutMs = soTimeoutMs;
         this.created = new AtomicInteger(0);
         this.destroyed = new AtomicInteger(0);
         this.socketBufferSize = socketBufferSize;
         this.socketKeepAlive = socketKeepAlive;
+        this.stats = stats;
 
         this.selectorManagers = new ClientRequestSelectorManager[selectors];
         this.selectorManagerThreadPool = Executors.newFixedThreadPool(selectorManagers.length,
@@ -94,6 +98,9 @@ public void destroy(SocketDestination dest, ClientRequestExecutor clientRequestE
             throws Exception {
         clientRequestExecutor.close();
         int numDestroyed = destroyed.incrementAndGet();
+        if(stats != null) {
+            stats.connectionDestroy(dest);
+        }
 
         if(logger.isDebugEnabled())
             logger.debug("Destroyed socket " + numDestroyed + " connection to " + dest.getHost()
@@ -108,7 +115,6 @@ public void destroy(SocketDestination dest, ClientRequestExecutor clientRequestE
 
     public ClientRequestExecutor create(SocketDestination dest) throws Exception {
         int numCreated = created.incrementAndGet();
-
         if(logger.isDebugEnabled())
             logger.debug("Creating socket " + numCreated + " for " + dest.getHost() + ":"
                          + dest.getPort() + " using protocol "
@@ -216,6 +222,10 @@ public ClientRequestExecutor create(SocketDestination dest) throws Exception {
             throw e;
         }
 
+        if(stats != null) {
+            stats.connectionCreate(dest);
+        }
+
         return clientRequestExecutor;
     }
 
@@ -378,7 +388,7 @@ protected void processEvents() {
                     // its way to being canceled.
                     if(clientRequestExecutor != null) {
                         try {
-                            clientRequestExecutor.checkTimeout(selectionKey);
+                            clientRequestExecutor.checkTimeout();
                         } catch(Exception e) {
                             if(logger.isEnabledFor(Level.ERROR))
                                 logger.error(e.getMessage(), e);
@@ -425,5 +435,4 @@ private long getLastClosedTimestamp(SocketDestination socketDestination) {
     public void setLastClosedTimestamp(SocketDestination socketDestination) {
         lastClosedTimestamps.put(socketDestination, System.nanoTime());
     }
-
 }
diff --git a/src/java/voldemort/store/socket/clientrequest/ClientRequestExecutorPool.java b/src/java/voldemort/store/socket/clientrequest/ClientRequestExecutorPool.java
index ce914fde4f..dd78813346 100644
--- a/src/java/voldemort/store/socket/clientrequest/ClientRequestExecutorPool.java
+++ b/src/java/voldemort/store/socket/clientrequest/ClientRequestExecutorPool.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2010 LinkedIn, Inc
+ * Copyright 2008-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -16,29 +16,39 @@
 
 package voldemort.store.socket.clientrequest;
 
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
 
 import voldemort.VoldemortException;
-import voldemort.annotations.jmx.JmxGetter;
-import voldemort.annotations.jmx.JmxManaged;
-import voldemort.annotations.jmx.JmxSetter;
 import voldemort.client.protocol.RequestFormatType;
 import voldemort.server.RequestRoutingType;
+import voldemort.store.StoreTimeoutException;
 import voldemort.store.UnreachableStoreException;
+import voldemort.store.nonblockingstore.NonblockingStoreCallback;
 import voldemort.store.socket.SocketDestination;
 import voldemort.store.socket.SocketStore;
 import voldemort.store.socket.SocketStoreFactory;
+import voldemort.store.stats.ClientSocketStats;
+import voldemort.store.stats.ClientSocketStatsJmx;
+import voldemort.utils.JmxUtils;
 import voldemort.utils.Time;
 import voldemort.utils.Utils;
-import voldemort.utils.pool.KeyedResourcePool;
+import voldemort.utils.pool.AsyncResourceRequest;
+import voldemort.utils.pool.QueuedKeyedResourcePool;
 import voldemort.utils.pool.ResourcePoolConfig;
 
 /**
  * A pool of {@link ClientRequestExecutor} keyed off the
- * {@link SocketDestination}. This is a wrapper around {@link KeyedResourcePool}
- * that translates exceptions as well as providing some JMX access.
+ * {@link SocketDestination}. This is a wrapper around
+ * {@link QueuedKeyedResourcePool} that translates exceptions, provides some JMX
+ * access, and handles asynchronous requests for SocketDestinations.
  * 
  * <p/>
  * 
@@ -46,36 +56,68 @@
  * terminated upon calling {@link #close()}.
  */
 
-@JmxManaged(description = "Voldemort socket pool.")
 public class ClientRequestExecutorPool implements SocketStoreFactory {
 
-    private final AtomicInteger monitoringInterval = new AtomicInteger(10000);
-    private final AtomicInteger checkouts;
-    private final AtomicLong waitNs;
-    private final AtomicLong avgWaitNs;
-    private final KeyedResourcePool<SocketDestination, ClientRequestExecutor> pool;
+    private final QueuedKeyedResourcePool<SocketDestination, ClientRequestExecutor> queuedPool;
     private final ClientRequestExecutorFactory factory;
+    private final ClientSocketStats stats;
+    private final boolean jmxEnabled;
+    private final int jmxId;
+
+    private final Logger logger = Logger.getLogger(ClientRequestExecutorPool.class);
 
     public ClientRequestExecutorPool(int selectors,
                                      int maxConnectionsPerNode,
                                      int connectionTimeoutMs,
                                      int soTimeoutMs,
                                      int socketBufferSize,
-                                     boolean socketKeepAlive) {
+                                     boolean socketKeepAlive,
+                                     boolean jmxEnabled,
+                                     int jmxId) {
         ResourcePoolConfig config = new ResourcePoolConfig().setIsFair(true)
                                                             .setMaxPoolSize(maxConnectionsPerNode)
                                                             .setMaxInvalidAttempts(maxConnectionsPerNode)
                                                             .setTimeout(connectionTimeoutMs,
                                                                         TimeUnit.MILLISECONDS);
+        this.jmxEnabled = jmxEnabled;
+        this.jmxId = jmxId;
+        if(this.jmxEnabled) {
+            stats = new ClientSocketStats(jmxId);
+            JmxUtils.registerMbean(new ClientSocketStatsJmx(stats),
+                                   JmxUtils.createObjectName(JmxUtils.getPackageName(this.getClass()),
+                                                             "aggregated"
+                                                                     + JmxUtils.getJmxId(this.jmxId)));
+        } else {
+            stats = null;
+        }
         this.factory = new ClientRequestExecutorFactory(selectors,
                                                         connectionTimeoutMs,
                                                         soTimeoutMs,
                                                         socketBufferSize,
-                                                        socketKeepAlive);
-        this.pool = new KeyedResourcePool<SocketDestination, ClientRequestExecutor>(factory, config);
-        this.checkouts = new AtomicInteger(0);
-        this.waitNs = new AtomicLong(0);
-        this.avgWaitNs = new AtomicLong(0);
+                                                        socketKeepAlive,
+                                                        stats);
+        this.queuedPool = new QueuedKeyedResourcePool<SocketDestination, ClientRequestExecutor>(factory,
+                                                                                                config);
+        if(stats != null) {
+            this.stats.setPool(queuedPool);
+        }
+    }
+
+    public ClientRequestExecutorPool(int selectors,
+                                     int maxConnectionsPerNode,
+                                     int connectionTimeoutMs,
+                                     int soTimeoutMs,
+                                     int socketBufferSize,
+                                     boolean socketKeepAlive) {
+        // JMX bean is disabled by default
+        this(selectors,
+             maxConnectionsPerNode,
+             connectionTimeoutMs,
+             soTimeoutMs,
+             socketBufferSize,
+             socketKeepAlive,
+             false,
+             0);
     }
 
     public ClientRequestExecutorPool(int maxConnectionsPerNode,
@@ -90,6 +132,7 @@ public ClientRequestExecutorFactory getFactory() {
         return factory;
     }
 
+    @Override
     public SocketStore create(String storeName,
                               String hostName,
                               int port,
@@ -113,31 +156,23 @@ public SocketStore create(String storeName,
      */
 
     public ClientRequestExecutor checkout(SocketDestination destination) {
+        // time checkout
+        long start = System.nanoTime();
+        ClientRequestExecutor clientRequestExecutor;
         try {
-            // time checkout
-            long start = System.nanoTime();
-            ClientRequestExecutor clientRequestExecutor = pool.checkout(destination);
-            updateStats(System.nanoTime() - start);
-
-            return clientRequestExecutor;
+            clientRequestExecutor = queuedPool.checkout(destination);
         } catch(Exception e) {
             throw new UnreachableStoreException("Failure while checking out socket for "
                                                 + destination + ": ", e);
+        } finally {
+            long end = System.nanoTime();
+            if(stats != null) {
+                stats.recordCheckoutTimeUs(destination, (end - start) / Time.NS_PER_US);
+                stats.recordCheckoutQueueLength(destination,
+                                                queuedPool.getBlockingGetsCount(destination));
+            }
         }
-    }
-
-    private void updateStats(long checkoutTimeNs) {
-        long wait = waitNs.getAndAdd(checkoutTimeNs);
-        int count = checkouts.getAndIncrement();
-
-        // reset reporting inverval if we have used up the current interval
-        int interval = this.monitoringInterval.get();
-        if(count % interval == interval - 1) {
-            // harmless race condition:
-            waitNs.set(0);
-            checkouts.set(0);
-            avgWaitNs.set(wait / count);
-        }
+        return clientRequestExecutor;
     }
 
     /**
@@ -148,56 +183,263 @@ private void updateStats(long checkoutTimeNs) {
      */
     public void checkin(SocketDestination destination, ClientRequestExecutor clientRequestExecutor) {
         try {
-            pool.checkin(destination, clientRequestExecutor);
+            queuedPool.checkin(destination, clientRequestExecutor);
         } catch(Exception e) {
             throw new VoldemortException("Failure while checking in socket for " + destination
                                          + ": ", e);
         }
     }
 
+    @Override
     public void close(SocketDestination destination) {
         factory.setLastClosedTimestamp(destination);
-        pool.close(destination);
+        queuedPool.reset(destination);
     }
 
     /**
      * Close the socket pool
      */
+    @Override
     public void close() {
+        // unregister MBeans
+        if(stats != null) {
+            try {
+                if(this.jmxEnabled)
+                    JmxUtils.unregisterMbean(JmxUtils.createObjectName(JmxUtils.getPackageName(this.getClass()),
+                                                                       "aggregated"
+                                                                               + JmxUtils.getJmxId(this.jmxId)));
+            } catch(Exception e) {}
+            stats.close();
+        }
         factory.close();
-        pool.close();
+        queuedPool.close();
     }
 
-    @JmxGetter(name = "socketsCreated", description = "The total number of sockets created by this pool.")
-    public int getNumberSocketsCreated() {
-        return this.factory.getNumberCreated();
+    public ClientSocketStats getStats() {
+        return stats;
     }
 
-    @JmxGetter(name = "socketsDestroyed", description = "The total number of sockets destroyed by this pool.")
-    public int getNumberSocketsDestroyed() {
-        return this.factory.getNumberDestroyed();
-    }
+    public <T> void submitAsync(SocketDestination destination,
+                                ClientRequest<T> delegate,
+                                NonblockingStoreCallback callback,
+                                long timeoutMs,
+                                String operationName) {
 
-    @JmxGetter(name = "numberOfConnections", description = "The number of active connections.")
-    public int getNumberOfActiveConnections() {
-        return this.pool.getTotalResourceCount();
+        AsyncSocketDestinationRequest<T> asyncSocketDestinationRequest = new AsyncSocketDestinationRequest<T>(destination,
+                                                                                                              delegate,
+                                                                                                              callback,
+                                                                                                              timeoutMs,
+                                                                                                              operationName);
+        queuedPool.registerResourceRequest(destination, asyncSocketDestinationRequest);
+        return;
     }
 
-    @JmxGetter(name = "numberOfIdleConnections", description = "The number of idle connections.")
-    public int getNumberOfCheckedInConnections() {
-        return this.pool.getCheckedInResourceCount();
-    }
+    /**
+     * Wrap up an asynchronous request and actually issue it once a
+     * SocketDestination is checked out.
+     */
+    private class AsyncSocketDestinationRequest<T> implements
+            AsyncResourceRequest<ClientRequestExecutor> {
+
+        private final SocketDestination destination;
+        public final ClientRequest<T> delegate;
+        public final NonblockingStoreCallback callback;
+        public final long timeoutMs;
+        public final String operationName;
+
+        private final long startTimeNs;
+
+        public AsyncSocketDestinationRequest(SocketDestination destination,
+                                             ClientRequest<T> delegate,
+                                             NonblockingStoreCallback callback,
+                                             long timeoutMs,
+                                             String operationName) {
+            this.destination = destination;
+            this.delegate = delegate;
+            this.callback = callback;
+            this.timeoutMs = timeoutMs;
+            this.operationName = operationName;
+
+            this.startTimeNs = System.nanoTime();
+        }
+
+        protected void updateStats() {
+            if(stats != null) {
+                stats.recordResourceRequestTimeUs(destination, (System.nanoTime() - startTimeNs)
+                                                               / Time.NS_PER_US);
+                stats.recordResourceRequestQueueLength(destination,
+                                                       queuedPool.getRegisteredResourceRequestCount(destination));
+            }
+        }
+
+        @Override
+        public void useResource(ClientRequestExecutor clientRequestExecutor) {
+            updateStats();
+            if(logger.isDebugEnabled()) {
+                logger.debug("Async request start; type: "
+                             + operationName
+                             + " requestRef: "
+                             + System.identityHashCode(delegate)
+                             + " time: "
+                             // Output time (ms) includes queueing delay (i.e.,
+                             // time between when registerResourceRequest is
+                             // called and time when useResource is invoked).
+                             + (this.startTimeNs / Time.NS_PER_MS)
+                             + " server: "
+                             + clientRequestExecutor.getSocketChannel()
+                                                    .socket()
+                                                    .getRemoteSocketAddress() + " local socket: "
+                             + clientRequestExecutor.getSocketChannel().socket().getLocalAddress()
+                             + ":"
+                             + clientRequestExecutor.getSocketChannel().socket().getLocalPort());
+            }
 
-    @JmxGetter(name = "avgWaitTimeMs", description = "The avg. ms of wait time to acquire a connection.")
-    public double getAvgWaitTimeMs() {
-        return this.avgWaitNs.doubleValue() / Time.NS_PER_MS;
+            NonblockingStoreCallbackClientRequest<T> clientRequest = new NonblockingStoreCallbackClientRequest<T>(destination,
+                                                                                                                  delegate,
+                                                                                                                  clientRequestExecutor,
+                                                                                                                  callback);
+            clientRequestExecutor.addClientRequest(clientRequest, timeoutMs, System.nanoTime()
+                                                                             - startTimeNs);
+        }
+
+        @Override
+        public void handleTimeout() {
+            // Do *not* invoke updateStats since handleException does so.
+            long durationNs = System.nanoTime() - startTimeNs;
+            handleException(new TimeoutException("Could not acquire resource in " + timeoutMs
+                                                 + " ms. (Took " + durationNs + " ns.)"));
+        }
+
+        @Override
+        public void handleException(Exception e) {
+            updateStats();
+            if(!(e instanceof UnreachableStoreException))
+                e = new UnreachableStoreException("Failure in " + operationName + ": "
+                                                  + e.getMessage(), e);
+            try {
+                // Because PerformParallel(Put||Delete|GetAll)Requests define
+                // 'callback' via an anonymous class, callback can be null if
+                // the client factory closes down and some other thread invokes
+                // this code. This can cause NullPointerExceptions during
+                // shutdown if async resource requests are queued up.
+                callback.requestComplete(e, 0);
+            } catch(Exception ex) {
+                if(logger.isEnabledFor(Level.WARN))
+                    logger.warn(ex, ex);
+            }
+        }
+
+        @Override
+        public long getDeadlineNs() {
+            return startTimeNs + TimeUnit.MILLISECONDS.toNanos(timeoutMs);
+        }
     }
 
-    @JmxSetter(name = "monitoringInterval", description = "The number of checkouts over which performance statistics are calculated.")
-    public void setMonitoringInterval(int count) {
-        if(count <= 0)
-            throw new IllegalArgumentException("Monitoring interval must be a positive number.");
-        this.monitoringInterval.set(count);
+    private class NonblockingStoreCallbackClientRequest<T> implements ClientRequest<T> {
+
+        private final SocketDestination destination;
+        private final ClientRequest<T> clientRequest;
+        private final ClientRequestExecutor clientRequestExecutor;
+        private final NonblockingStoreCallback callback;
+        private final long startNs;
+
+        private volatile boolean isComplete;
+
+        public NonblockingStoreCallbackClientRequest(SocketDestination destination,
+                                                     ClientRequest<T> clientRequest,
+                                                     ClientRequestExecutor clientRequestExecutor,
+                                                     NonblockingStoreCallback callback) {
+            this.destination = destination;
+            this.clientRequest = clientRequest;
+            this.clientRequestExecutor = clientRequestExecutor;
+            this.callback = callback;
+            this.startNs = System.nanoTime();
+        }
+
+        private void invokeCallback(Object o, long requestTime) {
+            if(callback != null) {
+                try {
+                    if(logger.isDebugEnabled()) {
+                        logger.debug("Async request end; requestRef: "
+                                     + System.identityHashCode(clientRequest)
+                                     + " time: "
+                                     + System.currentTimeMillis()
+                                     + " server: "
+                                     + clientRequestExecutor.getSocketChannel()
+                                                            .socket()
+                                                            .getRemoteSocketAddress()
+                                     + " local socket: "
+                                     + clientRequestExecutor.getSocketChannel()
+                                                            .socket()
+                                                            .getLocalAddress()
+                                     + ":"
+                                     + clientRequestExecutor.getSocketChannel()
+                                                            .socket()
+                                                            .getLocalPort() + " result: " + o);
+                    }
+
+                    callback.requestComplete(o, requestTime);
+                } catch(Exception e) {
+                    if(logger.isEnabledFor(Level.WARN))
+                        logger.warn(e, e);
+                }
+            }
+        }
+
+        @Override
+        public void complete() {
+            try {
+                clientRequest.complete();
+                Object result = clientRequest.getResult();
+
+                invokeCallback(result, (System.nanoTime() - startNs) / Time.NS_PER_MS);
+            } catch(Exception e) {
+                invokeCallback(e, (System.nanoTime() - startNs) / Time.NS_PER_MS);
+            } finally {
+                isComplete = true;
+                // checkin may throw a (new) exception. Any prior exception
+                // has been passed off via invokeCallback.
+                checkin(destination, clientRequestExecutor);
+            }
+        }
+
+        @Override
+        public boolean isComplete() {
+            return isComplete;
+        }
+
+        @Override
+        public boolean formatRequest(DataOutputStream outputStream) {
+            return clientRequest.formatRequest(outputStream);
+        }
+
+        @Override
+        public T getResult() throws VoldemortException, IOException {
+            return clientRequest.getResult();
+        }
+
+        @Override
+        public boolean isCompleteResponse(ByteBuffer buffer) {
+            return clientRequest.isCompleteResponse(buffer);
+        }
+
+        @Override
+        public void parseResponse(DataInputStream inputStream) {
+            clientRequest.parseResponse(inputStream);
+        }
+
+        @Override
+        public void timeOut() {
+            clientRequest.timeOut();
+            invokeCallback(new StoreTimeoutException("ClientRequestExecutor timed out. Cannot complete request."),
+                           (System.nanoTime() - startNs) / Time.NS_PER_MS);
+            checkin(destination, clientRequestExecutor);
+        }
+
+        @Override
+        public boolean isTimedOut() {
+            return clientRequest.isTimedOut();
+        }
     }
 
 }
diff --git a/src/java/voldemort/store/stats/ClientSocketStats.java b/src/java/voldemort/store/stats/ClientSocketStats.java
new file mode 100644
index 0000000000..d9f7584ce6
--- /dev/null
+++ b/src/java/voldemort/store/stats/ClientSocketStats.java
@@ -0,0 +1,375 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.stats;
+
+import java.util.Iterator;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+
+import voldemort.store.socket.SocketDestination;
+import voldemort.store.socket.clientrequest.ClientRequestExecutor;
+import voldemort.utils.JmxUtils;
+import voldemort.utils.pool.QueuedKeyedResourcePool;
+
+/**
+ * Some convenient statistics to track about the client requests
+ * 
+ * 
+ */
+// TODO: This approach to stats tracking seems scary. All of the getter methods
+// query current counters/histograms that are being updated. If you happen to
+// use a getter soon after the monitoringInterval has rolled over, then your
+// answer is likely statistically insignificant and potentially totally whacked
+// out (not a technical term, sorry). Either of the following approaches seem
+// like an improvement to me:
+//
+// (1) Effectively have two copies of all stats tracking "current" and "prev".
+// All the getters would access "last". This means the responses are
+// statistically meaningful, but potentially stale. reset() would copy
+// "current" to "prev" and then resets "current".
+//
+// (2) A more general variant of (1) is to have n copies of all stats tracking.
+// The getters would aggregated over all n copies of stats tracking. This
+// provides a "sliding" window of statistically valid responses. reset() would
+// create a new stats tracking object and delete the oldest stats trackig
+// object.
+public class ClientSocketStats {
+
+    private final ClientSocketStats parent;
+    private final ConcurrentMap<SocketDestination, ClientSocketStats> statsMap;
+    private final SocketDestination destination;
+    private QueuedKeyedResourcePool<SocketDestination, ClientRequestExecutor> pool;
+
+    // monitoringInterval <= connectionCheckouts + resourceRequests
+    private final AtomicInteger monitoringInterval = new AtomicInteger(10000);
+    // Connection lifecycle
+    private final AtomicInteger connectionsCreated = new AtomicInteger(0);
+    private final AtomicInteger connectionsDestroyed = new AtomicInteger(0);
+    // "Sync checkouts" / KeyedResourcePool::checkout
+    private final Histogram checkoutTimeUsHistogram = new Histogram(20000, 100);
+    private final AtomicLong totalCheckoutTimeUs = new AtomicLong(0);
+    private final AtomicInteger checkoutCount = new AtomicInteger(0);
+    private final Histogram checkoutQueueLengthHistogram = new Histogram(250, 1);
+    // "Async checkouts" / QueuedKeyedResourcePool::registerResourceRequest
+    private final Histogram resourceRequestTimeUsHistogram = new Histogram(20000, 100);
+    private final AtomicLong totalResourceRequestTimeUs = new AtomicLong(0);
+    private final AtomicInteger resourceRequestCount = new AtomicInteger(0);
+    private final Histogram resourceRequestQueueLengthHistogram = new Histogram(250, 1);
+
+    private final int jmxId;
+
+    /**
+     * To construct a per node stats object
+     * 
+     * @param parent An optional parent stats object that will maintain
+     *        aggregate data across many sockets
+     * @param destination The destination object that defines the node
+     * @param pool The socket pool that will give out connection information
+     */
+    public ClientSocketStats(ClientSocketStats parent,
+                             SocketDestination destination,
+                             QueuedKeyedResourcePool<SocketDestination, ClientRequestExecutor> pool,
+                             int jmxId) {
+        this.parent = parent;
+        this.statsMap = null;
+        this.destination = destination;
+        this.pool = pool;
+        this.jmxId = jmxId;
+    }
+
+    /**
+     * Construction of a new aggregate stats object
+     * 
+     * @param pool The socket pool that will give out connection information
+     */
+    public ClientSocketStats(int jmxId) {
+        this.parent = null;
+        this.statsMap = new ConcurrentHashMap<SocketDestination, ClientSocketStats>();
+        this.destination = null;
+        this.pool = null;
+        this.jmxId = jmxId;
+    }
+
+    /* get per node stats, create one if not exist */
+    private ClientSocketStats getOrCreateNodeStats(SocketDestination destination) {
+        if(destination == null) {
+            return null;
+        }
+        ClientSocketStats stats = statsMap.get(destination);
+        if(stats == null) {
+            stats = new ClientSocketStats(this, destination, pool, jmxId);
+            statsMap.putIfAbsent(destination, stats);
+            stats = statsMap.get(destination);
+            JmxUtils.registerMbean(new ClientSocketStatsJmx(stats),
+                                   JmxUtils.createObjectName(JmxUtils.getPackageName(ClientRequestExecutor.class),
+                                                             "stats_"
+                                                                     + destination.toString()
+                                                                                  .replace(':', '_')
+                                                                     + JmxUtils.getJmxId(jmxId)));
+        }
+        return stats;
+    }
+
+    /**
+     * Record the checkout wait time in us
+     * 
+     * @param dest Destination of the socket to checkout. Will actually record
+     *        if null. Otherwise will call this on self and corresponding child
+     *        with this param null.
+     * @param checkoutTimeUs The number of us to wait before getting a socket
+     */
+    public void recordCheckoutTimeUs(SocketDestination dest, long checkoutTimeUs) {
+        if(dest != null) {
+            getOrCreateNodeStats(dest).recordCheckoutTimeUs(null, checkoutTimeUs);
+            recordCheckoutTimeUs(null, checkoutTimeUs);
+        } else {
+            this.totalCheckoutTimeUs.getAndAdd(checkoutTimeUs);
+            this.checkoutTimeUsHistogram.insert(checkoutTimeUs);
+            this.checkoutCount.getAndIncrement();
+
+            checkMonitoringInterval();
+        }
+    }
+
+    /**
+     * Record the checkout queue length
+     * 
+     * @param dest Destination of the socket to checkout. Will actually record
+     *        if null. Otherwise will call this on self and corresponding child
+     *        with this param null.
+     * @param queueLength The number of entries in the "synchronous" checkout
+     *        queue.
+     */
+    public void recordCheckoutQueueLength(SocketDestination dest, int queueLength) {
+        if(dest != null) {
+            getOrCreateNodeStats(dest).recordCheckoutQueueLength(null, queueLength);
+            recordCheckoutQueueLength(null, queueLength);
+        } else {
+            this.checkoutQueueLengthHistogram.insert(queueLength);
+        }
+    }
+
+    /**
+     * Record the resource request wait time in us
+     * 
+     * @param dest Destination of the socket for which the resource was
+     *        requested. Will actually record if null. Otherwise will call this
+     *        on self and corresponding child with this param null.
+     * @param resourceRequestTimeUs The number of us to wait before getting a
+     *        socket
+     */
+    public void recordResourceRequestTimeUs(SocketDestination dest, long resourceRequestTimeUs) {
+        if(dest != null) {
+            getOrCreateNodeStats(dest).recordResourceRequestTimeUs(null, resourceRequestTimeUs);
+            recordResourceRequestTimeUs(null, resourceRequestTimeUs);
+        } else {
+            this.totalResourceRequestTimeUs.getAndAdd(resourceRequestTimeUs);
+            this.resourceRequestTimeUsHistogram.insert(resourceRequestTimeUs);
+            this.resourceRequestCount.getAndIncrement();
+
+            checkMonitoringInterval();
+        }
+    }
+
+    /**
+     * Record the resource request queue length
+     * 
+     * @param dest Destination of the socket for which resource request is
+     *        enqueued. Will actually record if null. Otherwise will call this
+     *        on self and corresponding child with this param null.
+     * @param queueLength The number of entries in the "asynchronous" resource
+     *        request queue.
+     */
+    public void recordResourceRequestQueueLength(SocketDestination dest, int queueLength) {
+        if(dest != null) {
+            getOrCreateNodeStats(dest).recordResourceRequestQueueLength(null, queueLength);
+            recordResourceRequestQueueLength(null, queueLength);
+        } else {
+            this.resourceRequestQueueLengthHistogram.insert(queueLength);
+        }
+    }
+
+    public void connectionCreate(SocketDestination dest) {
+        if(dest != null) {
+            getOrCreateNodeStats(dest).connectionCreate(null);
+            connectionCreate(null);
+        } else {
+            this.connectionsCreated.getAndIncrement();
+        }
+    }
+
+    public void connectionDestroy(SocketDestination dest) {
+        if(dest != null) {
+            getOrCreateNodeStats(dest).connectionDestroy(null);
+            connectionDestroy(null);
+        } else {
+            this.connectionsDestroyed.getAndIncrement();
+        }
+    }
+
+    // Getters for connection life cycle stats
+
+    public int getConnectionsCreated() {
+        return connectionsCreated.intValue();
+    }
+
+    public int getConnectionsDestroyed() {
+        return connectionsDestroyed.intValue();
+    }
+
+    // Getters for checkout stats
+
+    public int getCheckoutCount() {
+        return checkoutCount.intValue();
+    }
+
+    public Histogram getCheckoutWaitUsHistogram() {
+        return this.checkoutTimeUsHistogram;
+    }
+
+    /**
+     * @return 0 if there have been no checkout invocations
+     */
+    public long getAvgCheckoutWaitUs() {
+        long count = checkoutCount.get();
+        if(count > 0)
+            return totalCheckoutTimeUs.get() / count;
+        return 0;
+    }
+
+    public Histogram getCheckoutQueueLengthHistogram() {
+        return this.checkoutQueueLengthHistogram;
+    }
+
+    // Getters for resourceRequest stats
+
+    public int resourceRequestCount() {
+        return resourceRequestCount.intValue();
+    }
+
+    public Histogram getResourceRequestWaitUsHistogram() {
+        return this.resourceRequestTimeUsHistogram;
+    }
+
+    /**
+     * @return 0 if there have been no resourceRequest invocations
+     */
+    public long getAvgResourceRequestWaitUs() {
+        long count = resourceRequestCount.get();
+        if(count > 0)
+            return totalResourceRequestTimeUs.get() / count;
+        return 0;
+    }
+
+    public Histogram getResourceRequestQueueLengthHistogram() {
+        return this.resourceRequestQueueLengthHistogram;
+    }
+
+    // Getters for (queued)pool stats
+    public int getConnectionsActive(SocketDestination destination) {
+        if(destination == null) {
+            return pool.getTotalResourceCount();
+        } else {
+            return pool.getTotalResourceCount(destination);
+        }
+    }
+
+    public int getConnectionsInPool(SocketDestination destination) {
+        if(destination == null) {
+            return pool.getCheckedInResourceCount();
+        } else {
+            return pool.getCheckedInResourcesCount(destination);
+        }
+    }
+
+    // Config & administrivia interfaces
+
+    public void setMonitoringInterval(int count) {
+        this.monitoringInterval.set(count);
+    }
+
+    public int getMonitoringInterval() {
+        return this.monitoringInterval.get();
+    }
+
+    protected void checkMonitoringInterval() {
+        int monitoringCount = this.checkoutCount.get() + this.resourceRequestCount.get();
+
+        // reset aggregated stats and all the node stats for new interval
+        if(parent == null && statsMap != null) {
+            int monitoringInterval = this.monitoringInterval.get();
+            if(monitoringCount % (monitoringInterval + 1) == monitoringInterval) {
+                // reset all children
+                Iterator<SocketDestination> it = statsMap.keySet().iterator();
+                while(it.hasNext()) {
+                    ClientSocketStats stats = statsMap.get(it.next());
+                    stats.resetForInterval();
+                }
+                // reset itself
+                resetForInterval();
+            }
+        }
+    }
+
+    /**
+     * Reset all of the stats counters
+     */
+    protected void resetForInterval() {
+        // harmless race conditions amongst all of this counter resetting:
+        this.totalCheckoutTimeUs.set(0);
+        this.checkoutCount.set(0);
+        this.checkoutTimeUsHistogram.reset();
+        this.checkoutQueueLengthHistogram.reset();
+
+        this.totalResourceRequestTimeUs.set(0);
+        this.resourceRequestCount.set(0);
+        this.resourceRequestTimeUsHistogram.reset();
+        this.resourceRequestQueueLengthHistogram.reset();
+    }
+
+    public void setPool(QueuedKeyedResourcePool<SocketDestination, ClientRequestExecutor> pool) {
+        this.pool = pool;
+    }
+
+    public ConcurrentMap<SocketDestination, ClientSocketStats> getStatsMap() {
+        return statsMap;
+    }
+
+    SocketDestination getDestination() {
+        return destination;
+    }
+
+    /**
+     * Unregister all MBeans
+     */
+    public void close() {
+        Iterator<SocketDestination> it = getStatsMap().keySet().iterator();
+        while(it.hasNext()) {
+            try {
+                SocketDestination destination = it.next();
+                JmxUtils.unregisterMbean(JmxUtils.createObjectName(JmxUtils.getPackageName(ClientRequestExecutor.class),
+                                                                   "stats_"
+                                                                           + destination.toString()
+                                                                                        .replace(':',
+                                                                                                 '_')
+                                                                           + JmxUtils.getJmxId(jmxId)));
+            } catch(Exception e) {}
+        }
+    }
+}
diff --git a/src/java/voldemort/store/stats/ClientSocketStatsJmx.java b/src/java/voldemort/store/stats/ClientSocketStatsJmx.java
new file mode 100644
index 0000000000..a6d68ec900
--- /dev/null
+++ b/src/java/voldemort/store/stats/ClientSocketStatsJmx.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.stats;
+
+import voldemort.annotations.jmx.JmxGetter;
+import voldemort.annotations.jmx.JmxManaged;
+import voldemort.annotations.jmx.JmxSetter;
+import voldemort.utils.Time;
+
+/**
+ * 
+ * A wrapper class to expose client socket stats via JMX
+ * 
+ */
+
+@JmxManaged(description = "Voldemort socket pool.")
+public class ClientSocketStatsJmx {
+
+    private final ClientSocketStats stats;
+
+    /**
+     * Class for JMX
+     */
+    public ClientSocketStatsJmx(ClientSocketStats stats) {
+        this.stats = stats;
+    }
+
+    @JmxGetter(name = "socketsCreated", description = "Number of sockets created. Aggregate measure based on current monitoring interval.")
+    public int getConnectionsCreated() {
+        return stats.getConnectionsCreated();
+    }
+
+    @JmxGetter(name = "socketsDestroyed", description = "Number of sockets destroyed. Aggregate measure based on current monitoring interval.")
+    public int getConnectionsDestroyed() {
+        return stats.getConnectionsDestroyed();
+    }
+
+    @JmxGetter(name = "socketsCheckedout", description = "Number of sockets checked out. Aggregate measure based on current monitoring interval.")
+    public int getConnectionsCheckinout() {
+        return stats.getCheckoutCount();
+    }
+
+    @JmxGetter(name = "waitMsAverage", description = "Average ms wait to do a synchronous socket checkout. Aggregate measure based on current monitoring interval.")
+    public double getWaitMsAverage() {
+        return (double) stats.getAvgCheckoutWaitUs() / Time.US_PER_MS;
+    }
+
+    @JmxGetter(name = "waitMsQ50th", description = "50th percentile wait time (ms) to get a connection. Aggregate measure based on current monitoring interval.")
+    public double getWaitMsQ50th() {
+        return (double) stats.getCheckoutWaitUsHistogram().getQuantile(0.5) / Time.US_PER_MS;
+    }
+
+    @JmxGetter(name = "waitMsQ99th", description = "99th percentile wait time (ms) to get a connection. Aggregate measure based on current monitoring interval.")
+    public double getWaitMsQ99th() {
+        return (double) stats.getCheckoutWaitUsHistogram().getQuantile(0.99) / Time.US_PER_MS;
+    }
+
+    @JmxGetter(name = "checkoutQueueLengthQ50th", description = "50th percentile blocking queue length to get a connection. Aggregate measure based on current monitoring interval.")
+    public double getCheckoutQueueLengthQ50th() {
+        return stats.getCheckoutQueueLengthHistogram().getQuantile(0.5);
+    }
+
+    @JmxGetter(name = "checkoutQueueLength99th", description = "99th percentile blocking queue length to get a connection. Aggregate measure based on current monitoring interval.")
+    public double getCheckoutQueueLengthQ99th() {
+        return stats.getCheckoutQueueLengthHistogram().getQuantile(0.99);
+    }
+
+    @JmxGetter(name = "resourceRequestCount", description = "Number of resource requests made. Aggregate measure based on current monitoring interval.")
+    public int getResourceRequestCount() {
+        return stats.resourceRequestCount();
+    }
+
+    @JmxGetter(name = "resourceRequestWaitMsAverage", description = "Average ms wait to do an asynchronous socket checkout. Aggregate measure based on current monitoring interval.")
+    public double getResourceRequestWaitMsAverage() {
+        return (double) stats.getAvgResourceRequestWaitUs() / Time.US_PER_MS;
+    }
+
+    @JmxGetter(name = "resourceRequestWaitMsQ50th", description = "50th percentile wait time (ms) to do an asynchronous socket checkout. Aggregate measure based on current monitoring interval.")
+    public double getResourceRequestWaitMsQ50th() {
+        return (double) stats.getResourceRequestWaitUsHistogram().getQuantile(0.5) / Time.US_PER_MS;
+    }
+
+    @JmxGetter(name = "resourceRequestWaitMsQ99th", description = "99th percentile wait time (ms) to do an asynchronous socket checkout. Aggregate measure based on current monitoring interval.")
+    public double getResourceRequestWaitMsQ99th() {
+        return (double) stats.getResourceRequestWaitUsHistogram().getQuantile(0.99)
+               / Time.US_PER_MS;
+    }
+
+    @JmxGetter(name = "resourceRequestQueueLengthQ50th", description = "50th percentile asynchronous queue length to get a connection. Aggregate measure based on current monitoring interval.")
+    public double getResourceRequestQueueLengthQ50th() {
+        return stats.getResourceRequestQueueLengthHistogram().getQuantile(0.5);
+    }
+
+    @JmxGetter(name = "resourceRequestQueueLengthQ99th", description = "99th percentile asynchronous queue length to get a connection. Aggregate measure based on current monitoring interval.")
+    public double getResourceRequestQueueLengthQ99th() {
+        return stats.getResourceRequestQueueLengthHistogram().getQuantile(0.99);
+    }
+
+    @JmxGetter(name = "socketsActive", description = "Total number of sockets, checkedin and checkout. Instantaneous measure (i.e., object is polled for current value).")
+    public int getConnActive() {
+        int result = -1;
+        try {
+            result = stats.getConnectionsActive(stats.getDestination());
+        } catch(Exception e) {}
+        return result;
+    }
+
+    @JmxGetter(name = "socketsInPool", description = "Total number of sockets in the pool. Instantaneous measure (i.e., object is polled for current value).")
+    public int getConnAvailable() {
+        int result = -1;
+        try {
+            result = stats.getConnectionsInPool(stats.getDestination());
+        } catch(Exception e) {}
+        return result;
+    }
+
+    @JmxGetter(name = "monitoringInterval", description = "The maximum number of checkouts plus resource requests over which performance statistics are calculated.")
+    public int getMonitoringInterval() {
+        return stats.getMonitoringInterval();
+    }
+
+    @JmxGetter(name = "monitoringCheckoutSampleSize", description = "The number of checkout samples currently included in (pertinent) aggregate measures.")
+    public int getMonitoringCheckoutSampleSize() {
+        return stats.getCheckoutCount();
+    }
+
+    @JmxGetter(name = "monitoringResourceRequestSampleSize", description = "The number of resource request samples currently included in (pertinent) aggregate measures.")
+    public int getMonitoringResourceRequestSampleSize() {
+        return stats.resourceRequestCount();
+    }
+
+    @JmxSetter(name = "monitoringInterval", description = "The number of checkouts over which performance statistics are calculated.")
+    public void setMonitoringInterval(int count) {
+        if(count <= 0)
+            throw new IllegalArgumentException("Monitoring interval must be a positive number.");
+        stats.setMonitoringInterval(count);
+    }
+}
diff --git a/src/java/voldemort/store/stats/Histogram.java b/src/java/voldemort/store/stats/Histogram.java
index 5d5c2270f6..88a9a7b768 100644
--- a/src/java/voldemort/store/stats/Histogram.java
+++ b/src/java/voldemort/store/stats/Histogram.java
@@ -1,29 +1,31 @@
 package voldemort.store.stats;
 
-import voldemort.VoldemortException;
-import voldemort.annotations.concurrency.Threadsafe;
-
 import java.util.Arrays;
 
+import org.apache.log4j.Logger;
+
+import voldemort.annotations.concurrency.Threadsafe;
+
 /**
  * A class for computing percentiles based on a histogram. Values are bucketed
  * by a configurable bound (e.g., 0-1, 1-2, 2-3). When a value is inserted,
  * perform a binary search to find the correct bucket.
- *
- *
+ * 
+ * 
  */
 @Threadsafe
 public class Histogram {
-    
+
     private final int nBuckets;
     private final int step;
     private final int[] buckets;
     private final int[] bounds;
     private int size;
+    private static final Logger logger = Logger.getLogger(Histogram.class);
 
     /**
      * Initialize an empty histogram
-     *
+     * 
      * @param nBuckets The number of buckets to use
      * @param step The size of each bucket
      */
@@ -34,7 +36,7 @@ public Histogram(int nBuckets, int step) {
         this.bounds = new int[nBuckets];
         init();
     }
-    
+
     protected void init() {
         int bound = 0;
         for(int i = 0; i < nBuckets; i++, bound += step) {
@@ -54,22 +56,23 @@ public synchronized void reset() {
     /**
      * Insert a value into the right bucket of the histogram. If the value is
      * larger than any bound, insert into the last bucket
-     *
+     * 
      * @param data The value to insert into the histogram
      */
-    public synchronized void insert(int data) {
+    public synchronized void insert(long data) {
         int index = findBucket(data);
         if(index == -1) {
-            throw new VoldemortException(data + " can't be bucketed, is invalid!");
+            logger.error(data + " can't be bucketed, is invalid!");
+            return;
         }
         buckets[index]++;
         size++;
     }
 
     /**
-     * Find the a value <em>n</em> such that the percentile falls within
-     * [<em>n</em>, <em>n + step</em>)
-     *
+     * Find the a value <em>n</em> such that the percentile falls within [
+     * <em>n</em>, <em>n + step</em>)
+     * 
      * @param quantile The percentile to find
      * @return Lower bound associated with the percentile
      */
@@ -84,9 +87,9 @@ public synchronized int getQuantile(double quantile) {
         }
         return 0;
     }
-    
-    private int findBucket(int needle) {
-        int max = step * nBuckets;
+
+    private int findBucket(long needle) {
+        long max = step * nBuckets;
         if(needle > max) {
             return nBuckets - 1;
         }
@@ -105,8 +108,8 @@ private int findBucket(int needle) {
         }
         return -1;
     }
-    
-    private int compareToBucket(int bucket, int needle) {
+
+    private int compareToBucket(int bucket, long needle) {
         int low = bounds[bucket];
         int high = low + step;
         if(low <= needle && high > needle) {
diff --git a/src/java/voldemort/store/stats/RequestCounter.java b/src/java/voldemort/store/stats/RequestCounter.java
index 08b654c22a..96c17bbf17 100644
--- a/src/java/voldemort/store/stats/RequestCounter.java
+++ b/src/java/voldemort/store/stats/RequestCounter.java
@@ -19,25 +19,43 @@ public class RequestCounter {
     private final Histogram histogram;
     private volatile int q95LatencyMs;
     private volatile int q99LatencyMs;
+    private boolean useHistogram;
 
     /**
      * @param durationMS specifies for how long you want to maintain this
      *        counter (in milliseconds).
      */
     public RequestCounter(int durationMS) {
-        this(durationMS, SystemTime.INSTANCE);
+        this(durationMS, SystemTime.INSTANCE, false);
+    }
+
+    /**
+     * @param durationMS specifies for how long you want to maintain this
+     *        counter (in milliseconds). useHistogram indicates that this
+     *        counter should also use a histogram.
+     */
+    public RequestCounter(int durationMS, boolean useHistogram) {
+        this(durationMS, SystemTime.INSTANCE, useHistogram);
     }
 
     /**
      * For testing request expiration via an injected time provider
      */
     RequestCounter(int durationMS, Time time) {
+        this(durationMS, time, false);
+    }
+
+    RequestCounter(int durationMS, Time time, boolean useHistogram) {
         this.time = time;
         this.values = new AtomicReference<Accumulator>(new Accumulator());
         this.durationMS = durationMS;
-        this.histogram = new Histogram(65535, 1);
         this.q95LatencyMs = 0;
         this.q99LatencyMs = 0;
+        this.useHistogram = useHistogram;
+        if(this.useHistogram)
+            this.histogram = new Histogram(10000, 1);
+        else
+            this.histogram = null;
     }
 
     public long getCount() {
@@ -50,8 +68,7 @@ public long getTotalCount() {
 
     public float getThroughput() {
         Accumulator oldv = getValidAccumulator();
-        double elapsed = (time.getMilliseconds() - oldv.startTimeMS)
-                         / (double) Time.MS_PER_SECOND;
+        double elapsed = (time.getMilliseconds() - oldv.startTimeMS) / (double) Time.MS_PER_SECOND;
         if(elapsed > 0f) {
             return (float) (oldv.count / elapsed);
         } else {
@@ -61,8 +78,7 @@ public float getThroughput() {
 
     public float getThroughputInBytes() {
         Accumulator oldv = getValidAccumulator();
-        double elapsed = (time.getMilliseconds() - oldv.startTimeMS)
-                         / (double) Time.MS_PER_SECOND;
+        double elapsed = (time.getMilliseconds() - oldv.startTimeMS) / (double) Time.MS_PER_SECOND;
         if(elapsed > 0f) {
             return (float) (oldv.totalBytes / elapsed);
         } else {
@@ -91,10 +107,12 @@ public long getMaxLatencyInMs() {
     }
 
     private void maybeResetHistogram() {
+        if(!this.useHistogram)
+            return;
         Accumulator accum = values.get();
         long now = time.getMilliseconds();
         if(now - accum.startTimeMS > durationMS) {
-            // Reset the histogram            
+            // Reset the histogram
             q95LatencyMs = histogram.getQuantile(0.95);
             q99LatencyMs = histogram.getQuantile(0.99);
             histogram.reset();
@@ -140,17 +158,24 @@ public void addRequest(long timeNS) {
     }
 
     /**
-     * @see #addRequest(long)
-     * Detailed request to track additionald data about PUT, GET and GET_ALL
-     *
-     * @param numEmptyResponses For GET and GET_ALL, how many keys were no values found
+     * @see #addRequest(long) Detailed request to track additionald data about
+     *      PUT, GET and GET_ALL
+     * 
+     * @param numEmptyResponses For GET and GET_ALL, how many keys were no
+     *        values found
      * @param bytes Total number of bytes across all versions of values' bytes
-     * @param getAllAggregatedCount Total number of keys returned for getAll calls
+     * @param getAllAggregatedCount Total number of keys returned for getAll
+     *        calls
      */
-    public void addRequest(long timeNS, long numEmptyResponses, long bytes, long getAllAggregatedCount) {
-        int timeMs = (int) timeNS / (int) Time.NS_PER_MS;
-        histogram.insert(timeMs);
-        maybeResetHistogram();
+    public void addRequest(long timeNS,
+                           long numEmptyResponses,
+                           long bytes,
+                           long getAllAggregatedCount) {
+        long timeMs = timeNS / Time.NS_PER_MS;
+        if(this.useHistogram) {
+            histogram.insert(timeMs);
+            maybeResetHistogram();
+        }
         for(int i = 0; i < 3; i++) {
             Accumulator oldv = getValidAccumulator();
             Accumulator newv = new Accumulator(oldv.startTimeMS,
@@ -161,69 +186,105 @@ public void addRequest(long timeNS, long numEmptyResponses, long bytes, long get
                                                Math.max(timeNS, oldv.maxLatencyNS),
                                                oldv.totalBytes + bytes,
                                                Math.max(oldv.maxBytes, bytes),
-                                               oldv.getAllAggregatedCount + getAllAggregatedCount);
+                                               oldv.getAllAggregatedCount + getAllAggregatedCount,
+                                               getAllAggregatedCount > oldv.getAllMaxCount ? getAllAggregatedCount
+                                                                                          : oldv.getAllMaxCount);
             if(values.compareAndSet(oldv, newv))
                 return;
         }
     }
 
     /**
-     * Return the number of requests that have returned returned no value for the requested key.  Tracked only for GET.
+     * Return the number of requests that have returned returned no value for
+     * the requested key. Tracked only for GET.
      */
     public long getNumEmptyResponses() {
         return getValidAccumulator().numEmptyResponses;
     }
 
     /**
-     * Return the size of the largest response or request in bytes returned.  Tracked only for GET, GET_ALL and PUT.
+     * Return the size of the largest response or request in bytes returned.
+     * Tracked only for GET, GET_ALL and PUT.
      */
     public long getMaxSizeInBytes() {
         return getValidAccumulator().maxBytes;
     }
 
     /**
-     * Return the average size of all the versioned values returned. Tracked only for GET, GET_ALL and PUT.
+     * Return the average size of all the versioned values returned. Tracked
+     * only for GET, GET_ALL and PUT.
      */
     public double getAverageSizeInBytes() {
         return getValidAccumulator().getAverageBytes();
     }
 
     /**
-     * Return the aggregated number of keys returned across all getAll calls, taking into account multiple values returned per call.
+     * Return the aggregated number of keys returned across all getAll calls,
+     * taking into account multiple values returned per call.
      */
     public long getGetAllAggregatedCount() {
         return getValidAccumulator().getAllAggregatedCount;
     }
 
+    /**
+     * Return the maximum number of keys returned across all getAll calls.
+     */
+    public long getGetAllMaxCount() {
+        return getValidAccumulator().getAllMaxCount;
+    }
+
     public int getQ95LatencyMs() {
         return q95LatencyMs;
     }
-    
+
     public int getQ99LatencyMs() {
         return q99LatencyMs;
-    }                       
-    
+    }
+
     private class Accumulator {
 
         final long startTimeMS;
         final long count;
         final long totalTimeNS;
         final long total;
-        final long numEmptyResponses; // GET and GET_ALL: number of empty responses that have been returned
-        final long getAllAggregatedCount; // GET_ALL: a single call to GET_ALL can return multiple k-v pairs. Track total returned.
+        final long numEmptyResponses; // GET and GET_ALL: number of empty
+                                      // responses that have been returned
+        final long getAllAggregatedCount; // GET_ALL: a single call to GET_ALL
+                                          // can return multiple k-v pairs.
+                                          // Track total requested.
+        final long getAllMaxCount; // GET_ALL : track max number of keys
+                                   // requesed
         final long maxLatencyNS;
-        final long maxBytes;     // Maximum single value
-        final long totalBytes;   // Sum of all the values
+        final long maxBytes; // Maximum single value
+        final long totalBytes; // Sum of all the values
 
         public Accumulator() {
-            this(RequestCounter.this.time.getMilliseconds(), 0, 0, 0, 0, 0, 0, 0, 0);
+            this(RequestCounter.this.time.getMilliseconds(), 0, 0, 0, 0, 0, 0, 0, 0, 0);
         }
 
         public Accumulator newWithTotal() {
-            return new Accumulator(RequestCounter.this.time.getMilliseconds(), 0, 0, total, 0, 0, 0, 0, 0);
+            return new Accumulator(RequestCounter.this.time.getMilliseconds(),
+                                   0,
+                                   0,
+                                   total,
+                                   0,
+                                   0,
+                                   0,
+                                   0,
+                                   0,
+                                   0);
         }
 
-        public Accumulator(long startTimeMS, long count, long totalTimeNS, long total, long numEmptyResponses, long maxLatencyNS, long totalBytes,  long maxBytes, long getAllAggregatedCount) {
+        public Accumulator(long startTimeMS,
+                           long count,
+                           long totalTimeNS,
+                           long total,
+                           long numEmptyResponses,
+                           long maxLatencyNS,
+                           long totalBytes,
+                           long maxBytes,
+                           long getAllAggregatedCount,
+                           long getAllMaxCount) {
             this.startTimeMS = startTimeMS;
             this.count = count;
             this.totalTimeNS = totalTimeNS;
@@ -233,6 +294,7 @@ public Accumulator(long startTimeMS, long count, long totalTimeNS, long total, l
             this.totalBytes = totalBytes;
             this.maxBytes = maxBytes;
             this.getAllAggregatedCount = getAllAggregatedCount;
+            this.getAllMaxCount = getAllMaxCount;
         }
 
         public double getAverageTimeNS() {
diff --git a/src/java/voldemort/store/stats/StoreStats.java b/src/java/voldemort/store/stats/StoreStats.java
index 863a82466e..e6cf34c99b 100644
--- a/src/java/voldemort/store/stats/StoreStats.java
+++ b/src/java/voldemort/store/stats/StoreStats.java
@@ -26,36 +26,40 @@ public StoreStats(StoreStats parent) {
         counters = new EnumMap<Tracked, RequestCounter>(Tracked.class);
 
         for(Tracked tracked: Tracked.values()) {
-            counters.put(tracked, new RequestCounter(300000));
+            counters.put(tracked, new RequestCounter(300000, true));
         }
         this.parent = parent;
     }
 
     /**
-     * Record the duration of specified op.  For PUT, GET and GET_ALL use specific methods for those ops.
+     * Record the duration of specified op. For PUT, GET and GET_ALL use
+     * specific methods for those ops.
      */
     public void recordTime(Tracked op, long timeNS) {
         recordTime(op, timeNS, 0, 0, 0);
     }
 
     /**
-     * Record the duration of a put operation, along with the size of the values returned.
+     * Record the duration of a put operation, along with the size of the values
+     * returned.
      */
     public void recordPutTimeAndSize(long timeNS, long size) {
-        recordTime(Tracked.PUT, timeNS, 0,  size, 0);
+        recordTime(Tracked.PUT, timeNS, 0, size, 0);
     }
 
     /**
-     * Record the duration of a get operation, along with whether or not an empty response (ie no values matched)
-     * and the size of the values returned.
+     * Record the duration of a get operation, along with whether or not an
+     * empty response (ie no values matched) and the size of the values
+     * returned.
      */
     public void recordGetTime(long timeNS, boolean emptyResponse, long totalBytes) {
         recordTime(Tracked.GET, timeNS, emptyResponse ? 1 : 0, totalBytes, 0);
     }
 
     /**
-     * Record the duration of a get_all operation, along with how many values were requested, how may were actually
-     * returned and the size of the values returned.
+     * Record the duration of a get_all operation, along with how many values
+     * were requested, how may were actually returned and the size of the values
+     * returned.
      */
     public void recordGetAllTime(long timeNS, int requested, int returned, long totalBytes) {
         recordTime(Tracked.GET_ALL, timeNS, requested - returned, totalBytes, requested);
@@ -63,14 +67,21 @@ public void recordGetAllTime(long timeNS, int requested, int returned, long tota
 
     /**
      * Method to service public recording APIs
-     *
-     * @param op  Operation being tracked
-     * @param timeNS  Duration of operation
-     * @param numEmptyResponses  GET and GET_ALL: number of empty responses being sent back, ie requested keys for which there were no values
-     * @param size Total size of response payload, ie sum of lengths of bytes in all versions of values
-     * @param getAllAggregateRequests Total of key-values requested in aggregatee from get_all operations
+     * 
+     * @param op Operation being tracked
+     * @param timeNS Duration of operation
+     * @param numEmptyResponses GET and GET_ALL: number of empty responses being
+     *        sent back, ie requested keys for which there were no values
+     * @param size Total size of response payload, ie sum of lengths of bytes in
+     *        all versions of values
+     * @param getAllAggregateRequests Total of key-values requested in
+     *        aggregatee from get_all operations
      */
-    private void recordTime(Tracked op, long timeNS, long numEmptyResponses, long size, long getAllAggregateRequests) {
+    private void recordTime(Tracked op,
+                            long timeNS,
+                            long numEmptyResponses,
+                            long size,
+                            long getAllAggregateRequests) {
         counters.get(op).addRequest(timeNS, numEmptyResponses, size, getAllAggregateRequests);
         if(parent != null)
             parent.recordTime(op, timeNS, numEmptyResponses, size, getAllAggregateRequests);
@@ -103,7 +114,7 @@ public long getMaxLatencyInMs(Tracked op) {
     public long getQ95LatencyInMs(Tracked op) {
         return counters.get(op).getQ95LatencyMs();
     }
-    
+
     public long getQ99LatencyInMs(Tracked op) {
         return counters.get(op).getQ99LatencyMs();
     }
@@ -128,4 +139,8 @@ public double getGetAllAverageCount() {
     public long getGetAllAggregatedCount() {
         return counters.get(Tracked.GET_ALL).getGetAllAggregatedCount();
     }
+
+    public long getGetAllMaxCount() {
+        return counters.get(Tracked.GET_ALL).getGetAllMaxCount();
+    }
 }
diff --git a/src/java/voldemort/store/stats/StoreStatsJmx.java b/src/java/voldemort/store/stats/StoreStatsJmx.java
index 4c05a2933f..6469ff2d4b 100644
--- a/src/java/voldemort/store/stats/StoreStatsJmx.java
+++ b/src/java/voldemort/store/stats/StoreStatsJmx.java
@@ -44,6 +44,11 @@ public double getAverageGetAllCount() {
         return stats.getGetAllAverageCount();
     }
 
+    @JmxGetter(name = "maxGetAllCount", description = "The max number of keys in a GET_ALL request.")
+    public long getMaxGetAllCount() {
+        return stats.getGetAllMaxCount();
+    }
+
     @JmxGetter(name = "numberOfCallsToGet", description = "The number of calls to GET since the last reset.")
     public long getNumberOfCallsToGet() {
         return stats.getCount(Tracked.GET);
@@ -134,22 +139,25 @@ public double getOperationThroughput() {
 
     @JmxGetter(name = "AllOperationThroughputInBytes", description = "Throughput of all operations in bytes.")
     public double getOperationThroghputInBytes() {
-        return stats.getThroughputInBytes(Tracked.GET) + stats.getThroughputInBytes(Tracked.GET_ALL)
-                + stats.getThroughputInBytes(Tracked.PUT);
+        return stats.getThroughputInBytes(Tracked.GET)
+               + stats.getThroughputInBytes(Tracked.GET_ALL)
+               + stats.getThroughputInBytes(Tracked.PUT);
     }
 
     @JmxGetter(name = "percentGetReturningEmptyResponse", description = "The percentage of calls to GET for which no value was found.")
     public double getPercentGetReturningEmptyResponse() {
-        return numEmptyResponses(stats.getNumEmptyResponses(Tracked.GET), stats.getCount(Tracked.GET));
+        return numEmptyResponses(stats.getNumEmptyResponses(Tracked.GET),
+                                 stats.getCount(Tracked.GET));
     }
 
     @JmxGetter(name = "percentGetAllReturningEmptyResponse", description = "The percentage of calls to GET_ALL for which no value was found, taking into account multiple returned key-values.")
     public double getPercentGetAllReturningEmptyResponse() {
-        return numEmptyResponses(stats.getNumEmptyResponses(Tracked.GET_ALL), stats.getGetAllAggregatedCount());
+        return numEmptyResponses(stats.getNumEmptyResponses(Tracked.GET_ALL),
+                                 stats.getGetAllAggregatedCount());
     }
 
     private double numEmptyResponses(long numEmpty, long total) {
-        return total == 0 ? 0.0d : numEmpty / (float)total;
+        return total == 0 ? 0.0d : numEmpty / (float) total;
     }
 
     @JmxGetter(name = "maxPutLatencyInMs", description = "Maximum latency in ms of PUT")
@@ -172,42 +180,42 @@ public long getMaxDeleteLatency() {
         return stats.getMaxLatencyInMs(Tracked.DELETE);
     }
 
-    @JmxGetter(name = "q95PutLatencyInMs", description="")
+    @JmxGetter(name = "q95PutLatencyInMs", description = "")
     public long getQ95PutLatency() {
         return stats.getQ95LatencyInMs(Tracked.PUT);
     }
 
-    @JmxGetter(name = "q95GetLatencyInMs", description="")
+    @JmxGetter(name = "q95GetLatencyInMs", description = "")
     public long getQ95GetLatency() {
         return stats.getQ95LatencyInMs(Tracked.GET);
     }
 
-    @JmxGetter(name = "q95GetAllLatencyInMs", description="")
+    @JmxGetter(name = "q95GetAllLatencyInMs", description = "")
     public long getQ95GetAllLatency() {
         return stats.getQ95LatencyInMs(Tracked.GET_ALL);
     }
 
-    @JmxGetter(name = "q95DeleteLatencyInMs", description="")
+    @JmxGetter(name = "q95DeleteLatencyInMs", description = "")
     public long getQ95DeleteLatency() {
         return stats.getQ95LatencyInMs(Tracked.DELETE);
     }
 
-    @JmxGetter(name = "q99PutLatencyInMs", description="")
+    @JmxGetter(name = "q99PutLatencyInMs", description = "")
     public long getQ99PutLatency() {
         return stats.getQ99LatencyInMs(Tracked.PUT);
     }
 
-    @JmxGetter(name = "q99GetLatencyInMs", description="")
+    @JmxGetter(name = "q99GetLatencyInMs", description = "")
     public long getQ99GetLatency() {
         return stats.getQ99LatencyInMs(Tracked.GET);
     }
 
-    @JmxGetter(name = "q99GetAllLatencyInMs", description="")
+    @JmxGetter(name = "q99GetAllLatencyInMs", description = "")
     public long getQ99GetAllLatency() {
         return stats.getQ99LatencyInMs(Tracked.GET_ALL);
     }
 
-    @JmxGetter(name = "q99DeleteLatencyInMs", description="")
+    @JmxGetter(name = "q99DeleteLatencyInMs", description = "")
     public long getQ99DeleteLatency() {
         return stats.getQ99LatencyInMs(Tracked.DELETE);
     }
diff --git a/src/java/voldemort/store/system/SystemStoreConstants.java b/src/java/voldemort/store/system/SystemStoreConstants.java
new file mode 100644
index 0000000000..0013b47d7a
--- /dev/null
+++ b/src/java/voldemort/store/system/SystemStoreConstants.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.system;
+
+import java.io.StringReader;
+import java.util.List;
+
+import voldemort.store.StoreDefinition;
+import voldemort.utils.RebalanceUtils;
+import voldemort.xml.StoreDefinitionsMapper;
+
+/**
+ * A file that defines all constants for system stores, including the store
+ * definitions
+ * 
+ */
+public class SystemStoreConstants {
+
+    public static final String NAME_PREFIX = "voldsys$_";
+
+    public static enum SystemStoreName {
+        voldsys$_client_registry,
+        voldsys$_metadata_version_persistence;
+    }
+
+    public static final String SYSTEM_STORE_SCHEMA = "<stores>"
+                                                     + "  <store>"
+                                                     + "    <name>voldsys$_client_registry</name>"
+                                                     + "    <routing-strategy>all-routing</routing-strategy>"
+                                                     + "    <hinted-handoff-strategy>proximity-handoff</hinted-handoff-strategy>"
+                                                     + "    <persistence>memory</persistence>"
+                                                     + "    <routing>client</routing>"
+                                                     + "    <replication-factor>4</replication-factor>"
+                                                     + "    <zone-replication-factor>"
+                                                     + "      <replication-factor zone-id=\"0\">2</replication-factor>"
+                                                     + "      <replication-factor zone-id=\"1\">2</replication-factor>"
+                                                     + "    </zone-replication-factor>"
+                                                     + "    <required-reads>1</required-reads>"
+                                                     + "    <required-writes>1</required-writes>"
+                                                     + "    <key-serializer>"
+                                                     + "      <type>string</type>"
+                                                     + "    </key-serializer>"
+                                                     + "    <value-serializer>"
+                                                     + "      <type>string</type>"
+                                                     + "    </value-serializer>"
+                                                     + "    <retention-days>7</retention-days>"
+                                                     + "  </store>"
+
+                                                     + "  <store>"
+                                                     + "    <name>voldsys$_metadata_version_persistence</name>"
+                                                     + "    <routing-strategy>local-pref-all-routing</routing-strategy>"
+                                                     + "    <hinted-handoff-strategy>proximity-handoff</hinted-handoff-strategy>"
+                                                     + "    <persistence>file-backed-cache</persistence>"
+                                                     + "    <routing>client</routing>"
+                                                     + "    <replication-factor>1</replication-factor>"
+                                                     + "    <required-reads>1</required-reads>"
+                                                     + "    <required-writes>1</required-writes>"
+                                                     + "    <key-serializer>"
+                                                     + "      <type>string</type>"
+                                                     + "    </key-serializer>"
+                                                     + "    <value-serializer>"
+                                                     + "      <type>string</type>"
+                                                     + "    </value-serializer>" + "  </store>"
+
+                                                     + "</stores>";
+
+    public static boolean isSystemStore(String storeName) {
+        return (null == storeName ? false : storeName.startsWith(NAME_PREFIX));
+    }
+
+    public static List<StoreDefinition> getAllSystemStoreDefs() {
+        return (new StoreDefinitionsMapper()).readStoreList(new StringReader(SystemStoreConstants.SYSTEM_STORE_SCHEMA));
+    }
+
+    public static StoreDefinition getSystemStoreDef(String name) {
+        List<StoreDefinition> allDefs = getAllSystemStoreDefs();
+        return RebalanceUtils.getStoreDefinitionWithName(allDefs, name);
+    }
+}
\ No newline at end of file
diff --git a/src/java/voldemort/store/views/ViewStorageConfiguration.java b/src/java/voldemort/store/views/ViewStorageConfiguration.java
index be634ad6eb..35480ca9fa 100644
--- a/src/java/voldemort/store/views/ViewStorageConfiguration.java
+++ b/src/java/voldemort/store/views/ViewStorageConfiguration.java
@@ -34,7 +34,8 @@ public ViewStorageConfiguration(VoldemortConfig config,
 
     public void close() {}
 
-    public StorageEngine<ByteArray, byte[], byte[]> getStore(String name) {
+    public StorageEngine<ByteArray, byte[], byte[]> getStore(StoreDefinition storeDef) {
+        String name = storeDef.getName();
         StoreDefinition def = StoreUtils.getStoreDef(storeDefs, name);
         String targetName = def.getViewTargetStoreName();
         StoreDefinition targetDef = StoreUtils.getStoreDef(storeDefs, targetName);
@@ -86,4 +87,8 @@ public static SerializerFactory loadSerializerFactory(String className) {
         return (View<?, ?, ?, ?>) ReflectUtils.callConstructor(viewClass, new Object[] {});
     }
 
+    public void update(StoreDefinition storeDef) {
+        throw new UnsupportedViewOperationException("Storage config updates not permitted for "
+                                                    + this.getClass().getCanonicalName());
+    }
 }
diff --git a/src/java/voldemort/utils/ByteArray.java b/src/java/voldemort/utils/ByteArray.java
index 71dc609b2d..c6ccf86ada 100644
--- a/src/java/voldemort/utils/ByteArray.java
+++ b/src/java/voldemort/utils/ByteArray.java
@@ -1,6 +1,7 @@
 package voldemort.utils;
 
 import java.io.Serializable;
+import java.util.ArrayList;
 import java.util.Arrays;
 
 /**
@@ -43,6 +44,19 @@ public String toString() {
         return Arrays.toString(underlying);
     }
 
+    /**
+     * Translate the each ByteArray in an iterable into a hexidecimal string
+     * 
+     * @param arrays The array of bytes to translate
+     * @return An iterable of converted strings
+     */
+    public static Iterable<String> toHexStrings(Iterable<ByteArray> arrays) {
+        ArrayList<String> ret = new ArrayList<String>();
+        for(ByteArray array: arrays)
+            ret.add(ByteUtils.toHexString(array.get()));
+        return ret;
+    }
+
     public int length() {
         return underlying.length;
     }
diff --git a/src/java/voldemort/utils/ByteUtils.java b/src/java/voldemort/utils/ByteUtils.java
index c58fc47680..4fb375458a 100644
--- a/src/java/voldemort/utils/ByteUtils.java
+++ b/src/java/voldemort/utils/ByteUtils.java
@@ -53,6 +53,9 @@ public class ByteUtils {
     public static final int MASK_00111111 = Integer.parseInt("00111111", 2);
     public static final int MASK_00011111 = Integer.parseInt("00011111", 2);
 
+    public static final int BYTES_PER_MB = 1048576;
+    public static final long BYTES_PER_GB = 1073741824;
+
     public static MessageDigest getDigest(String algorithm) {
         try {
             return MessageDigest.getInstance(algorithm);
diff --git a/src/java/voldemort/utils/JNAUtils.java b/src/java/voldemort/utils/JNAUtils.java
new file mode 100644
index 0000000000..3625c70fac
--- /dev/null
+++ b/src/java/voldemort/utils/JNAUtils.java
@@ -0,0 +1,84 @@
+package voldemort.utils;
+
+import org.apache.log4j.Logger;
+
+import com.sun.jna.LastErrorException;
+import com.sun.jna.Native;
+
+/**
+ * Native functions used through JNA
+ * 
+ */
+public class JNAUtils {
+
+    private static final Logger logger = Logger.getLogger(JNAUtils.class);
+
+    /* Flags for mlock_all */
+    private static final int MCL_CURRENT = 1;
+    private static final int MCL_FUTURE = 2;
+
+    private static final int ENOMEM = 12;
+
+    static {
+        try {
+            Native.register("c");
+        } catch(NoClassDefFoundError e) {
+            logger.info("Could not locate JNA classes");
+        } catch(UnsatisfiedLinkError e) {
+            logger.info("Failed to link to native library");
+        } catch(NoSuchMethodError e) {
+            logger.warn("Older version of JNA. Please upgrade to 3.2.7+");
+        }
+    }
+
+    private static native int mlockall(int flags) throws LastErrorException;
+
+    private static native int munlockall() throws LastErrorException;
+
+    private static boolean isOperatingSystem(String os) {
+        if(System.getProperty("os.name").toLowerCase().contains(os))
+            return true;
+        else
+            return false;
+    }
+
+    public static void tryMlockall() {
+        try {
+            if(isOperatingSystem("windows"))
+                return;
+            // Since we demand-zero every page of the heap while bringing up the
+            // jvm, MCL_FUTURE is not needed
+            mlockall(MCL_CURRENT);
+            logger.info("mlockall() on JVM Heap successful");
+        } catch(Exception e) {
+            if(!(e instanceof LastErrorException))
+                logger.error("Unexpected error during mlock of server heap", e);
+
+            LastErrorException le = (LastErrorException) e;
+            if(le.getErrorCode() == ENOMEM && isOperatingSystem("linux")) {
+                logger.warn("Unable to lock JVM memory (ENOMEM)."
+                            + " This can result in part of the JVM being swapped out with higher Young gen stalls"
+                            + " Increase RLIMIT_MEMLOCK or run Voldemort as root.");
+            } else if(!isOperatingSystem("mac")) {
+                // fixes a OS X oddity, where it still throws an error, even
+                // though mlockall succeeds
+                logger.warn("Unknown mlockall error " + le.getErrorCode());
+            }
+        }
+    }
+
+    public static void tryMunlockall() {
+        try {
+            if(isOperatingSystem("windows"))
+                return;
+            munlockall();
+            logger.info("munlockall() on JVM Heap successful");
+        } catch(Exception e) {
+            if(!(e instanceof LastErrorException))
+                logger.error("Unexpected error during mlock of server heap", e);
+            LastErrorException le = (LastErrorException) e;
+            logger.warn("Error unlocking JVM heap  " + le.getErrorCode());
+        }
+    }
+
+}
diff --git a/src/java/voldemort/utils/JmxUtils.java b/src/java/voldemort/utils/JmxUtils.java
index 87cb62423c..69b974d531 100644
--- a/src/java/voldemort/utils/JmxUtils.java
+++ b/src/java/voldemort/utils/JmxUtils.java
@@ -68,6 +68,8 @@ public class JmxUtils {
     private static final Object LOCK = new Object();
     private static final Logger logger = Logger.getLogger(JmxUtils.class);
 
+    public static final String MBEAN_NAME_SEPARATOR = "-";
+
     /**
      * Create a model mbean from an object using the description given in the
      * Jmx annotation if present. Only operations are supported so far, no
@@ -134,8 +136,7 @@ public static ModelMBeanOperationInfo[] extractOperationInfo(Object object) {
                                                                            description,
                                                                            extractParameterInfo(m),
                                                                            m.getReturnType()
-                                                                            .getName(),
-                                                                           impact);
+                                                                            .getName(), impact);
                 info.getDescriptor().setField("visibility", Integer.toString(visibility));
                 infos.add(info);
             }
@@ -350,4 +351,14 @@ public static void unregisterMbean(ObjectName name) {
         }
     }
 
+    /**
+     * Return the string representation of jmxId
+     * 
+     * @param jmxId
+     * @return
+     */
+    public static String getJmxId(int jmxId) {
+        return jmxId == 0 ? "" : Integer.toString(jmxId);
+    }
+
 }
diff --git a/src/java/voldemort/utils/ManifestFileReader.java b/src/java/voldemort/utils/ManifestFileReader.java
new file mode 100644
index 0000000000..8ba26d7515
--- /dev/null
+++ b/src/java/voldemort/utils/ManifestFileReader.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.utils;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.log4j.Logger;
+
+/**
+ * A utility class that abstract out fields from manifest file
+ * 
+ */
+public class ManifestFileReader {
+
+    protected static final Logger logger = Logger.getLogger(ManifestFileReader.class);
+
+    private static String MANIFEST_FILE = "META-INF/MANIFEST.MF";
+    private static String RELEASE_VERSION_KEY = "Implementation-Version";
+
+    public static String getReleaseVersion() {
+        String version = null;
+        Properties properties = new Properties();
+        try {
+            properties.load(new FileInputStream(MANIFEST_FILE));
+            version = properties.getProperty(RELEASE_VERSION_KEY);
+        } catch(IOException e) {
+            logger.warn("Unable to load voldemort release version due to the following error:", e);
+        }
+        return version;
+    }
+}
diff --git a/src/java/voldemort/utils/MetadataVersionStoreUtils.java b/src/java/voldemort/utils/MetadataVersionStoreUtils.java
new file mode 100644
index 0000000000..e320692d27
--- /dev/null
+++ b/src/java/voldemort/utils/MetadataVersionStoreUtils.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.utils;
+
+import java.io.ByteArrayInputStream;
+import java.util.Properties;
+
+import org.apache.log4j.Logger;
+
+import voldemort.client.SystemStore;
+
+/**
+ * A Utils class that facilitates conversion between the string containing
+ * metadata versions and the corresponding Properties object.
+ * 
+ * @author csoman
+ * 
+ */
+public class MetadataVersionStoreUtils {
+
+    public static final String VERSIONS_METADATA_KEY = "metadata-versions";
+    private final static Logger logger = Logger.getLogger(MetadataVersionStoreUtils.class);
+
+    /**
+     * Retrieves a properties (hashmap) consisting of all the metadata versions
+     * 
+     * @param versionStore The system store client used to retrieve the metadata
+     *        versions
+     * @return Properties object containing all the
+     *         'property_name=property_value' values
+     */
+    public static Properties getProperties(SystemStore<String, String> versionStore) {
+        Properties props = null;
+        try {
+            String versionList = versionStore.getSysStore(VERSIONS_METADATA_KEY).getValue();
+
+            if(versionList != null) {
+                props = new Properties();
+                props.load(new ByteArrayInputStream(versionList.getBytes()));
+            }
+        } catch(Exception e) {
+            logger.debug("Got exception in getting properties : " + e.getMessage());
+        }
+
+        return props;
+    }
+
+    /**
+     * Writes the Properties object to the Version metadata system store
+     * 
+     * @param versionStore The system store client used to retrieve the metadata
+     *        versions
+     * @param props The Properties object to write to the System store
+     */
+    public static void setProperties(SystemStore<String, String> versionStore, Properties props) {
+        if(props == null) {
+            return;
+        }
+
+        try {
+            StringBuilder finalVersionList = new StringBuilder();
+            for(String propName: props.stringPropertyNames()) {
+                if(finalVersionList.length() == 0) {
+                    finalVersionList.append(propName + "=" + props.getProperty(propName));
+                } else {
+                    finalVersionList.append("\n" + propName + "=" + props.getProperty(propName));
+                }
+            }
+            versionStore.putSysStore(VERSIONS_METADATA_KEY, finalVersionList.toString());
+        } catch(Exception e) {
+            logger.debug("Got exception in setting properties : " + e.getMessage());
+        }
+    }
+}
diff --git a/src/java/voldemort/utils/Props.java b/src/java/voldemort/utils/Props.java
index 4c1920b0c2..c440e127dc 100644
--- a/src/java/voldemort/utils/Props.java
+++ b/src/java/voldemort/utils/Props.java
@@ -67,6 +67,10 @@ public Props(Map<String, String>... props) {
 
     public Props(Properties... properties) {
         this.props = new HashMap<String, String>();
+        loadProperties(properties);
+    }
+
+    public void loadProperties(Properties... properties) {
         for(int i = properties.length - 1; i >= 0; i--)
             for(Entry<Object, Object> e: properties[i].entrySet())
                 this.props.put((String) e.getKey(), (String) e.getValue());
diff --git a/src/java/voldemort/utils/RebalanceUtils.java b/src/java/voldemort/utils/RebalanceUtils.java
index fdc89e5220..dbb8d2233a 100644
--- a/src/java/voldemort/utils/RebalanceUtils.java
+++ b/src/java/voldemort/utils/RebalanceUtils.java
@@ -50,6 +50,7 @@
 import voldemort.cluster.Node;
 import voldemort.routing.RoutingStrategy;
 import voldemort.routing.RoutingStrategyFactory;
+import voldemort.routing.RoutingStrategyType;
 import voldemort.server.VoldemortConfig;
 import voldemort.server.rebalance.VoldemortRebalancingException;
 import voldemort.store.StoreDefinition;
@@ -488,11 +489,21 @@ public static boolean checkKeyBelongsToPartition(int nodeId,
                                                      HashMap<Integer, List<Integer>> replicaToPartitionList,
                                                      Cluster cluster,
                                                      StoreDefinition storeDef) {
-        List<Integer> keyPartitions = new RoutingStrategyFactory().updateRoutingStrategy(storeDef,
-                                                                                         cluster)
-                                                                  .getPartitionList(key);
-        List<Integer> nodePartitions = cluster.getNodeById(nodeId).getPartitionIds();
-        return checkKeyBelongsToPartition(keyPartitions, nodePartitions, replicaToPartitionList);
+        boolean checkResult = false;
+        if(storeDef.getRoutingStrategyType().equals(RoutingStrategyType.TO_ALL_STRATEGY)
+           || storeDef.getRoutingStrategyType()
+                      .equals(RoutingStrategyType.TO_ALL_LOCAL_PREF_STRATEGY)) {
+            checkResult = true;
+        } else {
+            List<Integer> keyPartitions = new RoutingStrategyFactory().updateRoutingStrategy(storeDef,
+                                                                                             cluster)
+                                                                      .getPartitionList(key);
+            List<Integer> nodePartitions = cluster.getNodeById(nodeId).getPartitionIds();
+            checkResult = checkKeyBelongsToPartition(keyPartitions,
+                                                     nodePartitions,
+                                                     replicaToPartitionList);
+        }
+        return checkResult;
     }
 
     /**
@@ -1397,6 +1408,7 @@ public static StoreDefinition getStoreDefinitionWithName(List<StoreDefinition> s
         for(StoreDefinition storeDef: storeDefs) {
             if(storeDef.getName().compareTo(storeName) == 0) {
                 def = storeDef;
+                break;
             }
         }
 
diff --git a/src/java/voldemort/utils/Time.java b/src/java/voldemort/utils/Time.java
index 8b5e407f86..fd27426b1c 100644
--- a/src/java/voldemort/utils/Time.java
+++ b/src/java/voldemort/utils/Time.java
@@ -25,6 +25,7 @@
  */
 public interface Time {
 
+    public final static long HOURS_PER_DAY = 24;
     public final static long US_PER_MS = 1000;
     public final static long NS_PER_US = 1000;
     public final static long NS_PER_MS = US_PER_MS * NS_PER_US;
@@ -32,7 +33,7 @@ public interface Time {
     public final static long US_PER_SECOND = US_PER_MS * MS_PER_SECOND;
     public final static long NS_PER_SECOND = NS_PER_US * US_PER_SECOND;
     public final static long SECONDS_PER_HOUR = 60 * 60;
-    public final static long SECONDS_PER_DAY = 24 * SECONDS_PER_HOUR;
+    public final static long SECONDS_PER_DAY = HOURS_PER_DAY * SECONDS_PER_HOUR;
     public final static long MS_PER_HOUR = SECONDS_PER_HOUR * MS_PER_SECOND;
     public final static long MS_PER_DAY = SECONDS_PER_DAY * MS_PER_SECOND;
 
diff --git a/src/java/voldemort/utils/pool/AsyncResourceRequest.java b/src/java/voldemort/utils/pool/AsyncResourceRequest.java
new file mode 100644
index 0000000000..dc7921dbd6
--- /dev/null
+++ b/src/java/voldemort/utils/pool/AsyncResourceRequest.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package voldemort.utils.pool;
+
+/**
+ * Interface for asynchronous requests for resources. Exactly one of
+ * useResource, handleTimeout, or handleException expected to be invoked before,
+ * or soon after, deadline specified by getDeadlineNs. Ideally, useResource is
+ * only invoked before the deadline. Ideally, handleTimeout is invoked soon
+ * after the deadline. If owners of an object with this interface need to take
+ * action after some specified timeout, then the owner needs to set their own
+ * timer.
+ */
+public interface AsyncResourceRequest<V> {
+
+    /**
+     * To be invoked with resource to use before deadline.
+     * 
+     * @param resource. resource should not be null.
+     */
+    void useResource(V resource);
+
+    /**
+     * Invoked sometime (soon) after deadline.
+     */
+    void handleTimeout();
+
+    /**
+     * Invoked upon exception trying to process resource request. Invoked before
+     * or (soon) after the deadline.
+     * 
+     * @param e
+     */
+    void handleException(Exception e);
+
+    /**
+     * 
+     * @return Deadline (in nanoseconds), after which handleTimeout() should be
+     *         invoked.
+     */
+    long getDeadlineNs();
+
+}
diff --git a/src/java/voldemort/utils/pool/KeyedResourcePool.java b/src/java/voldemort/utils/pool/KeyedResourcePool.java
index 29d7be2c01..e71c582b74 100644
--- a/src/java/voldemort/utils/pool/KeyedResourcePool.java
+++ b/src/java/voldemort/utils/pool/KeyedResourcePool.java
@@ -1,3 +1,19 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
 package voldemort.utils.pool;
 
 import java.util.ArrayList;
@@ -14,7 +30,6 @@
 
 import org.apache.log4j.Logger;
 
-import voldemort.utils.Time;
 import voldemort.utils.Utils;
 
 /**
@@ -24,26 +39,44 @@
  * <li>allocates resources in FIFO order
  * <li>Pools are per key and there is no global maximum pool limit.
  * </ul>
+ * 
+ * Invariants that this implementation does not guarantee:
+ * <ul>
+ * <li>A checked in resource was previously checked out. (I.e., user can use
+ * ResourceFactory and then check in a resource that this pool did not create.)
+ * <li>A checked out resource is checked in at most once. (I.e., a user does not
+ * call check in on a checked out resource more than once.)
+ * <li>User no longer has a reference to a checked in resource. (I.e., user can
+ * keep using the resource after it invokes check in.)
+ * <li>A resource that is checked out is eventually either checked in or
+ * destroyed via objectFactory.destroy(). (I.e., a user can squat on a resource
+ * or let its reference to the resource lapse without checking the resource in
+ * or destroying the resource.)
+ * </ul>
+ * 
+ * Phrased differently, the following is expected of the user of this class:
+ * <ul>
+ * <li>A checked out resource is checked in exactly once.
+ * <li>A resource that is checked in was previously checked out.
+ * <li>A resource that is checked in is never used again. / No reference is
+ * retained to a checked in resource.
+ * <li>Also, checkout is never called after close.
+ * </ul>
  */
 public class KeyedResourcePool<K, V> {
 
     private static final Logger logger = Logger.getLogger(KeyedResourcePool.class.getName());
 
-    private final ResourceFactory<K, V> objectFactory;
-    private final ConcurrentMap<K, Pool<V>> resourcesMap;
     private final AtomicBoolean isOpen = new AtomicBoolean(true);
-    private final long timeoutNs;
-    private final int poolMaxSize;
-    private final int maxCreateAttempts;
-    private final boolean isFair;
+    private final ResourceFactory<K, V> objectFactory;
+    private final ResourcePoolConfig resourcePoolConfig;
+    private final ConcurrentMap<K, Pool<V>> resourcePoolMap;
 
-    public KeyedResourcePool(ResourceFactory<K, V> objectFactory, ResourcePoolConfig config) {
+    public KeyedResourcePool(ResourceFactory<K, V> objectFactory,
+                             ResourcePoolConfig resourcePoolConfig) {
         this.objectFactory = Utils.notNull(objectFactory);
-        this.timeoutNs = Utils.notNull(config).getTimeout(TimeUnit.NANOSECONDS);
-        this.poolMaxSize = config.getMaxPoolSize();
-        this.maxCreateAttempts = config.getMaximumInvalidResourceCreationLimit();
-        this.resourcesMap = new ConcurrentHashMap<K, Pool<V>>();
-        this.isFair = config.isFair();
+        this.resourcePoolConfig = Utils.notNull(resourcePoolConfig);
+        this.resourcePoolMap = new ConcurrentHashMap<K, Pool<V>>();
     }
 
     /**
@@ -77,10 +110,11 @@ public static <K, V> KeyedResourcePool<K, V> create(ResourceFactory<K, V> factor
      * and we have created fewer than the max size resources, then create a new
      * one. If no resources are available and we are already at the max size
      * then block for up to the maximum time specified. When we hit the maximum
-     * time, if we still have not retrieved a resource throw a TimeOutException.
+     * time, if we still have not retrieved a valid resource throw an exception.
      * 
-     * This method is guaranteed to either fail or return a valid resource in
-     * the pool timeout + object creation time.
+     * This method is guaranteed to either return a valid resource in the pool
+     * timeout + object creation time or throw an exception. If an exception is
+     * thrown, resource is guaranteed to be destroyed.
      * 
      * @param key The key to checkout the resource for
      * @return The resource
@@ -89,103 +123,101 @@ public V checkout(K key) throws Exception {
         checkNotClosed();
 
         long startNs = System.nanoTime();
-        Pool<V> resources = getResourcePoolForKey(key);
+        Pool<V> resourcePool = getResourcePoolForKey(key);
+        // Always attempt to grow. This protects against running out of
+        // resources because they were destroyed.
+        attemptGrow(key, resourcePool);
 
-        // repeatedly attempt to checkout/create a resource until we get a valid
-        // one or we hit the timeout or max attempts
         V resource = null;
         try {
-            int attempts = 0;
-            for(; attempts < this.maxCreateAttempts; attempts++) {
-                resource = null;
-                checkNotClosed();
-                long timeRemainingNs = this.timeoutNs - (System.nanoTime() - startNs);
-                if(timeRemainingNs < 0)
+            checkNotClosed();
+            resource = attemptCheckout(resourcePool);
+
+            if(resource == null) {
+                long timeRemainingNs = resourcePoolConfig.getTimeout(TimeUnit.NANOSECONDS)
+                                       - (System.nanoTime() - startNs);
+                if(timeRemainingNs > 0)
+                    resource = resourcePool.blockingGet(timeRemainingNs);
+
+                if(resource == null)
                     throw new TimeoutException("Could not acquire resource in "
-                                               + (this.timeoutNs / Time.NS_PER_MS) + " ms.");
-                resource = checkoutOrCreateResource(key, resources, timeRemainingNs);
-                if(objectFactory.validate(key, resource))
-                    return resource;
-                else
-                    destroyResource(key, resources, resource);
+                                               + resourcePoolConfig.getTimeout(TimeUnit.MILLISECONDS)
+                                               + " ms.");
             }
-            throw new ExcessiveInvalidResourcesException(attempts);
+
+            if(!objectFactory.validate(key, resource))
+                throw new ExcessiveInvalidResourcesException(1);
         } catch(Exception e) {
-            destroyResource(key, resources, resource);
+            destroyResource(key, resourcePool, resource);
             throw e;
         }
+        return resource;
     }
 
-    /*
-     * Get a free resource if one exists. If not create one if there is space.
-     * If no space, block and see if a resource is returned in the given
-     * timeout. If no resource is returned in that time, throw a
-     * TimeoutException.
+    /**
+     * Get a free resource if one exists. This method does not block. It either
+     * returns null or a resource.
      */
-    private V checkoutOrCreateResource(K key, Pool<V> pool, long timeoutNs) throws Exception {
-        // see if there is anything in the pool
+    protected V attemptCheckout(Pool<V> pool) throws Exception {
         V resource = pool.nonBlockingGet();
-        if(resource != null)
-            return resource;
-
-        // okay the queue is empty, maybe we have room to expand a bit?
-        if(pool.size.get() < this.poolMaxSize)
-            attemptGrow(key, pool);
-
-        // now block for next available resource
-        resource = pool.blockingGet(timeoutNs);
-        if(resource == null)
-            throw new TimeoutException("Timed out wait for resource after "
-                                       + (timeoutNs / Time.NS_PER_MS) + " ms.");
-
         return resource;
     }
 
-    /*
+    /**
      * Attempt to create a new object and add it to the pool--this only happens
-     * if there is room for the new object.
+     * if there is room for the new object. This method does not block. This
+     * method returns true if it adds a resource to the pool. (Returning true
+     * does not guarantee subsequent checkout will succeed because concurrent
+     * checkouts may occur.)
      */
-    private void attemptGrow(K key, Pool<V> pool) throws Exception {
-        // attempt to increment, and if the incremented value is less
-        // than the pool size then create a new resource
-        if(pool.size.incrementAndGet() <= this.poolMaxSize) {
-            try {
-                V resource = objectFactory.create(key);
-                pool.nonBlockingPut(resource);
-            } catch(Exception e) {
-                pool.size.decrementAndGet();
-                throw e;
+    protected boolean attemptGrow(K key, Pool<V> pool) throws Exception {
+        return pool.attemptGrow(key, this.objectFactory);
+    }
+
+    /**
+     * Get the pool for the given key. If no pool exists, create one.
+     */
+    protected Pool<V> getResourcePoolForKey(K key) {
+        Pool<V> resourcePool = resourcePoolMap.get(key);
+        if(resourcePool == null) {
+            Pool<V> newResourcePool = new Pool<V>(this.resourcePoolConfig);
+            resourcePool = resourcePoolMap.putIfAbsent(key, newResourcePool);
+            if(resourcePool == null) {
+                resourcePool = newResourcePool;
             }
-        } else {
-            pool.size.decrementAndGet();
         }
+        return resourcePool;
     }
 
-    /*
-     * Get the pool for the given key. If no pool exists, create one.
+    /**
+     * Get the pool for the given key. If no pool exists, throw an exception.
      */
-    private Pool<V> getResourcePoolForKey(K key) {
-        Pool<V> pool = resourcesMap.get(key);
-        if(pool == null) {
-            pool = new Pool<V>(this.poolMaxSize, this.isFair);
-            resourcesMap.putIfAbsent(key, pool);
-            pool = resourcesMap.get(key);
+    protected Pool<V> getResourcePoolForExistingKey(K key) {
+        Pool<V> resourcePool = resourcePoolMap.get(key);
+        if(resourcePool == null) {
+            throw new IllegalArgumentException("Invalid key '" + key
+                                               + "': no resource pool exists for that key.");
         }
-        return pool;
+        return resourcePool;
     }
 
     /*
-     * A safe wrapper to destroy the given resource that catches any user
-     * exceptions
+     * A "safe" wrapper to destroy the given resource that catches any user
+     * exceptions. This wrapper is safe in that it does not throw any
+     * exceptions. However, this wrapper updates the size of the resource pool
+     * and so should be called no more than once for any resource.
      */
-    private void destroyResource(K key, Pool<V> resources, V resource) {
+    protected void destroyResource(K key, Pool<V> resourcePool, V resource) {
         if(resource != null) {
             try {
                 objectFactory.destroy(key, resource);
             } catch(Exception e) {
-                logger.error("Exception while destorying invalid resource:", e);
+                logger.error("Exception while destroying invalid resource: ", e);
             } finally {
-                resources.size.decrementAndGet();
+                // Assumes destroyed resource was in fact checked out of the
+                // pool. Also assumes that this method will be called no more
+                // than once for any given checked out resource.
+                resourcePool.size.decrementAndGet();
             }
         }
     }
@@ -197,120 +229,231 @@ private void destroyResource(K key, Pool<V> resources, V resource) {
      * @param resource The resource
      */
     public void checkin(K key, V resource) throws Exception {
-        Pool<V> pool = resourcesMap.get(key);
-        if(pool == null)
-            throw new IllegalArgumentException("Invalid key '" + key
-                                               + "': no resource pool exists for that key.");
-        if(isOpen.get() && objectFactory.validate(key, resource)) {
-            boolean success = pool.nonBlockingPut(resource);
+        if(isOpenAndValid(key, resource)) {
+            Pool<V> resourcePool = getResourcePoolForExistingKey(key);
+            boolean success = resourcePool.nonBlockingPut(resource);
             if(!success) {
-                destroyResource(key, pool, resource);
-                throw new IllegalStateException("Checkin failed is the pool already full?");
+                destroyResource(key, resourcePool, resource);
+                throw new IllegalStateException("Checkin failed. Is the pool already full? (NB: see if KeyedResourcePool::destroyResource is being called multiple times.)");
             }
+        }
+    }
+
+    // This method may be made protected in the future for the benefit of
+    // classes which extend from KeyedResourcePool.
+    protected boolean isOpenAndValid(K key, V resource) throws Exception {
+        if(isOpen.get() && objectFactory.validate(key, resource)) {
+            return true;
         } else {
-            destroyResource(key, pool, resource);
+            Pool<V> resourcePool = getResourcePoolForExistingKey(key);
+            destroyResource(key, resourcePool, resource);
+            return false;
         }
     }
 
-    /**
-     * Close the pool. This will destroy all checked in resource immediately.
-     * Once closed all attempts to checkout a new resource will fail. All
-     * resources checked in after close is called will be immediately destroyed.
-     */
-    public void close() {
+    protected boolean internalClose() {
+        boolean wasOpen = isOpen.compareAndSet(true, false);
         // change state to false and allow one thread.
-        if(isOpen.compareAndSet(true, false)) {
-            for(Entry<K, Pool<V>> entry: resourcesMap.entrySet()) {
+        if(wasOpen) {
+            for(Entry<K, Pool<V>> entry: resourcePoolMap.entrySet()) {
                 Pool<V> pool = entry.getValue();
                 // destroy each resource in the queue
                 for(V value = pool.nonBlockingGet(); value != null; value = pool.nonBlockingGet())
                     destroyResource(entry.getKey(), entry.getValue(), value);
-                resourcesMap.remove(entry.getKey());
+                resourcePoolMap.remove(entry.getKey());
             }
         }
+        return wasOpen;
     }
 
-    public void close(K key) {
-        Pool<V> pool = resourcesMap.get(key);
-
-        if(pool == null)
-            throw new IllegalArgumentException("Invalid key '" + key
-                                               + "': no resource pool exists for that key.");
-
-        List<V> list = pool.close();
+    /**
+     * Close the pool. This will destroy all checked in resource immediately.
+     * Once closed all attempts to checkout a new resource will fail. All
+     * resources checked in after close is called will be immediately destroyed.
+     */
+    public void close() {
+        internalClose();
+    }
 
-        // destroy each resource currently in the queue
+    /**
+     * Reset a specific resource pool. Destroys all the resources in the pool.
+     * This method does not affect whether the pool is "open" in the sense of
+     * permitting new resources to be added to it.
+     * 
+     * @param key The key for the pool to reset.
+     */
+    public synchronized void reset(K key) {
+        Pool<V> resourcePool = getResourcePoolForExistingKey(key);
+        List<V> list = resourcePool.close();
         for(V value: list)
-            destroyResource(key, pool, value);
+            destroyResource(key, resourcePool, value);
     }
 
     /**
-     * Return the total number of resources for the given key whether they are
-     * currently checked in or checked out.
+     * Count the number of existing resources for a specific pool.
      * 
-     * @param k The key
-     * @return The count
+     * @param key The key
+     * @return The count of existing resources. Returns 0 if no pool exists for
+     *         given key.
      */
-    public int getTotalResourceCount(K k) {
-        Pool<V> pool = this.resourcesMap.get(k);
-        return pool.size.get();
+    public int getTotalResourceCount(K key) {
+        if(resourcePoolMap.containsKey(key)) {
+            try {
+                Pool<V> resourcePool = getResourcePoolForExistingKey(key);
+                return resourcePool.size.get();
+            } catch(IllegalArgumentException iae) {
+                logger.debug("getTotalResourceCount called on invalid key: ", iae);
+            }
+        }
+        return 0;
     }
 
     /**
-     * Get the count of all resources for all pools
+     * Count the total number of existing resources for all pools. The result is
+     * "approximate" in the face of concurrency since individual pools can
+     * change size during the aggregate count.
      * 
-     * @return The count of resources
+     * @return The (approximate) aggregate count of existing resources.
      */
     public int getTotalResourceCount() {
         int count = 0;
-        for(Entry<K, Pool<V>> entry: this.resourcesMap.entrySet())
+        for(Entry<K, Pool<V>> entry: this.resourcePoolMap.entrySet())
             count += entry.getValue().size.get();
         return count;
     }
 
     /**
-     * Return the number of resources for the given key that are currently
-     * sitting idle in the pool waiting to be checked out.
+     * Count the number of checked in (idle) resources for a specific pool.
      * 
-     * @param k The key
-     * @return The count
+     * @param key The key
+     * @return The count of checked in resources. Returns 0 if no pool exists
+     *         for given key.
      */
-    public int getCheckedInResourcesCount(K k) {
-        Pool<V> pool = this.resourcesMap.get(k);
-        return pool.queue.size();
+    public int getCheckedInResourcesCount(K key) {
+        if(resourcePoolMap.containsKey(key)) {
+            try {
+                Pool<V> resourcePool = getResourcePoolForExistingKey(key);
+                return resourcePool.queue.size();
+            } catch(IllegalArgumentException iae) {
+                logger.debug("getCheckedInResourceCount called on invalid key: ", iae);
+            }
+        }
+        return 0;
     }
 
     /**
-     * Get the count of resources for all pools currently checkedin
+     * Count the total number of checked in (idle) resources across all pools.
+     * The result is "approximate" in the face of concurrency since individual
+     * pools can have resources checked in, or out, during the aggregate count.
      * 
-     * @return The count of resources
+     * @return The (approximate) aggregate count of checked in resources.
      */
     public int getCheckedInResourceCount() {
         int count = 0;
-        for(Entry<K, Pool<V>> entry: this.resourcesMap.entrySet())
+        for(Entry<K, Pool<V>> entry: this.resourcePoolMap.entrySet())
             count += entry.getValue().queue.size();
         return count;
     }
 
-    /*
+    /**
+     * Count the number of blocking gets for a specific key.
+     * 
+     * @param key The key
+     * @return The count of blocking gets. Returns 0 if no pool exists for given
+     *         key.
+     */
+    public int getBlockingGetsCount(K key) {
+        if(resourcePoolMap.containsKey(key)) {
+            try {
+                Pool<V> resourcePool = getResourcePoolForExistingKey(key);
+                return resourcePool.blockingGets.get();
+            } catch(IllegalArgumentException iae) {
+                logger.debug("getBlockingGetsCount called on invalid key: ", iae);
+            }
+        }
+        return 0;
+    }
+
+    /**
+     * Count the total number of blocking gets across all pools. The result is
+     * "approximate" in the face of concurrency since blocking gets for
+     * individual pools can be issued or serviced during the aggregate count.
+     * 
+     * @return The (approximate) aggregate count of blocking gets.
+     */
+    public int getBlockingGetsCount() {
+        int count = 0;
+        for(Entry<K, Pool<V>> entry: this.resourcePoolMap.entrySet())
+            count += entry.getValue().blockingGets.get();
+        return count;
+    }
+
+    /**
      * Check that the pool is not closed, and throw an IllegalStateException if
      * it is.
      */
-    private void checkNotClosed() {
+    protected void checkNotClosed() {
         if(!isOpen.get())
             throw new IllegalStateException("Pool is closed!");
     }
 
     /**
-     * A simple pool that uses an ArrayBlockingQueue
+     * A fixed size pool that uses an ArrayBlockingQueue. The pool grows to no
+     * more than some specified maxPoolSize. The pool creates new resources in
+     * the face of existing resources being destroyed.
+     * 
      */
-    private static class Pool<V> {
+    protected static class Pool<V> {
 
-        final BlockingQueue<V> queue;
-        final AtomicInteger size = new AtomicInteger(0);
+        final private AtomicInteger size = new AtomicInteger(0);
+        final private AtomicInteger blockingGets = new AtomicInteger(0);
+        final private int maxPoolSize;
+        final private BlockingQueue<V> queue;
 
-        public Pool(int defaultPoolSize, boolean isFair) {
-            queue = new ArrayBlockingQueue<V>(defaultPoolSize, isFair);
+        public Pool(ResourcePoolConfig resourcePoolConfig) {
+            this.maxPoolSize = resourcePoolConfig.getMaxPoolSize();
+            queue = new ArrayBlockingQueue<V>(this.maxPoolSize, resourcePoolConfig.isFair());
+        }
+
+        /**
+         * If there is room in the pool, attempt to to create a new resource and
+         * add it to the pool. This method is cheap to call even if the pool is
+         * full (i.e., the first thing it does is looks a the current size of
+         * the pool relative to the max pool size.
+         * 
+         * @param key
+         * @param objectFactory
+         * @return True if and only if a resource was successfully added to the
+         *         pool.
+         * @throws Exception if there are issues creating a new object, or
+         *         destroying newly created object that could not be added to
+         *         the pool.
+         * 
+         */
+        public <K> boolean attemptGrow(K key, ResourceFactory<K, V> objectFactory) throws Exception {
+            if(this.size.get() >= this.maxPoolSize) {
+                return false;
+            }
+
+            if(this.size.incrementAndGet() <= this.maxPoolSize) {
+                try {
+                    V resource = objectFactory.create(key);
+                    if(resource != null) {
+                        if(!nonBlockingPut(resource)) {
+                            this.size.decrementAndGet();
+                            objectFactory.destroy(key, resource);
+                            return false;
+                        }
+                    }
+                } catch(Exception e) {
+                    this.size.decrementAndGet();
+                    throw e;
+                }
+            } else {
+                this.size.decrementAndGet();
+                return false;
+            }
+            return true;
         }
 
         public V nonBlockingGet() {
@@ -318,7 +461,14 @@ public V nonBlockingGet() {
         }
 
         public V blockingGet(long timeoutNs) throws InterruptedException {
-            return this.queue.poll(timeoutNs, TimeUnit.NANOSECONDS);
+            V v;
+            try {
+                blockingGets.incrementAndGet();
+                v = this.queue.poll(timeoutNs, TimeUnit.NANOSECONDS);
+            } finally {
+                blockingGets.decrementAndGet();
+            }
+            return v;
         }
 
         public boolean nonBlockingPut(V v) {
diff --git a/src/java/voldemort/utils/pool/QueuedKeyedResourcePool.java b/src/java/voldemort/utils/pool/QueuedKeyedResourcePool.java
new file mode 100644
index 0000000000..8407fc748e
--- /dev/null
+++ b/src/java/voldemort/utils/pool/QueuedKeyedResourcePool.java
@@ -0,0 +1,364 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package voldemort.utils.pool;
+
+import java.util.Map.Entry;
+import java.util.Queue;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ConcurrentMap;
+
+import org.apache.log4j.Logger;
+
+import voldemort.store.UnreachableStoreException;
+
+/**
+ * Extends simple implementation of a per-key resource pool with a non-blocking
+ * interface to enqueue requests for a resource when one becomes available. <br>
+ * <ul>
+ * <li>Allocates resources in FIFOish order: blocking requests via checkout are
+ * FIFO and non-blocking enqueued requests are FIFO, however, there is no
+ * ordering between blocking (checkout) and non-blocking (requestResource).
+ * <li>Pools and Queues are per key and there is no global maximum pool or queue
+ * limit.
+ * </ul>
+ * 
+ * Beyond the expectations documented in KeyedResourcePool, the following is
+ * expected of the user of this class:
+ * <ul>
+ * <li>A resource acquired via {@link #checkout(K)) checkout} or via {@link
+ * #requestResource(K , ResourceRequest<V>) requestResource} is checked in
+ * exactly once.
+ * <li>A resource that is checked in was previously checked out or requested.
+ * <li>Also, reqeustResource is never called after close.
+ * </ul>
+ */
+public class QueuedKeyedResourcePool<K, V> extends KeyedResourcePool<K, V> {
+
+    private static final Logger logger = Logger.getLogger(QueuedKeyedResourcePool.class.getName());
+
+    private final ConcurrentMap<K, Queue<AsyncResourceRequest<V>>> requestQueueMap;
+
+    public QueuedKeyedResourcePool(ResourceFactory<K, V> objectFactory, ResourcePoolConfig config) {
+        super(objectFactory, config);
+        requestQueueMap = new ConcurrentHashMap<K, Queue<AsyncResourceRequest<V>>>();
+    }
+
+    /**
+     * Create a new queued pool
+     * 
+     * @param <K> The type of the keys
+     * @param <R> The type of requests
+     * @param <V> The type of the values
+     * @param factory The factory that creates objects
+     * @param config The pool config
+     * @return The created pool
+     */
+    public static <K, V> QueuedKeyedResourcePool<K, V> create(ResourceFactory<K, V> factory,
+                                                              ResourcePoolConfig config) {
+        return new QueuedKeyedResourcePool<K, V>(factory, config);
+    }
+
+    /**
+     * Create a new queued pool using the defaults
+     * 
+     * @param <K> The type of the keys
+     * @param <R> The type of requests
+     * @param <V> The type of the values
+     * @param factory The factory that creates objects
+     * @return The created pool
+     */
+    public static <K, V> QueuedKeyedResourcePool<K, V> create(ResourceFactory<K, V> factory) {
+        return create(factory, new ResourcePoolConfig());
+    }
+
+    /**
+     * This method is the asynchronous (nonblocking) version of
+     * KeyedResourcePool.checkout. This method necessarily has a different
+     * function declaration (i.e., arguments passed and return type).
+     * 
+     * This method either checks out a resource and uses that resource or
+     * enqueues a request to checkout the resource. I.e., there is a
+     * non-blocking fast-path that is tried optimistically.
+     * 
+     * @param key The key to checkout the resource for
+     * @return The resource
+     * 
+     */
+    public void registerResourceRequest(K key, AsyncResourceRequest<V> resourceRequest) {
+        checkNotClosed();
+
+        Queue<AsyncResourceRequest<V>> requestQueue = getRequestQueueForKey(key);
+        if(requestQueue.isEmpty()) {
+            // Optimistically attempt non-blocking checkout iff requestQueue is
+            // empty.
+            Pool<V> resourcePool = getResourcePoolForKey(key);
+            try {
+                attemptGrow(key, resourcePool);
+            } catch(Exception e) {
+                resourceRequest.handleException(e);
+                return;
+            }
+
+            V resource = null;
+
+            try {
+                resource = attemptCheckout(resourcePool);
+            } catch(Exception e) {
+                destroyResource(key, resourcePool, resource);
+                resource = null;
+                resourceRequest.handleException(e);
+            }
+            if(resource != null) {
+                resourceRequest.useResource(resource);
+                return;
+            }
+        }
+
+        requestQueue.add(resourceRequest);
+        // Guard against (potential) races with checkin by invoking
+        // processQueueLoop after resource request has been added to the
+        // asynchronous queue.
+        processQueueLoop(key);
+    }
+
+    /**
+     * Pops resource requests off the queue until queue is empty or an unexpired
+     * resource request is found. Invokes .handleTimeout on all expired resource
+     * requests popped off the queue.
+     * 
+     * @return null or a valid ResourceRequest
+     */
+    private AsyncResourceRequest<V> getNextUnexpiredResourceRequest(Queue<AsyncResourceRequest<V>> requestQueue) {
+        AsyncResourceRequest<V> resourceRequest = requestQueue.poll();
+        while(resourceRequest != null) {
+            if(resourceRequest.getDeadlineNs() < System.nanoTime()) {
+                resourceRequest.handleTimeout();
+                resourceRequest = requestQueue.poll();
+            } else {
+                break;
+            }
+        }
+        return resourceRequest;
+    }
+
+    /**
+     * Attempts to checkout a resource so that one queued request can be
+     * serviced.
+     * 
+     * @param key The key for which to process the requestQueue
+     * @return true iff an item was processed from the Queue.
+     */
+    private boolean processQueue(K key) {
+        Queue<AsyncResourceRequest<V>> requestQueue = getRequestQueueForKey(key);
+        if(requestQueue.isEmpty()) {
+            return false;
+        }
+
+        // Attempt to get a resource.
+        Pool<V> resourcePool = getResourcePoolForKey(key);
+        V resource = null;
+
+        try {
+            // Always attempt to grow to deal with destroyed resources.
+            attemptGrow(key, resourcePool);
+            resource = attemptCheckout(resourcePool);
+        } catch(Exception e) {
+            destroyResource(key, resourcePool, resource);
+            resource = null;
+        }
+        if(resource == null) {
+            return false;
+        }
+
+        // With resource in hand, process the resource requests
+        AsyncResourceRequest<V> resourceRequest = getNextUnexpiredResourceRequest(requestQueue);
+        if(resourceRequest == null) {
+            // Did not use the resource! Directly check in via super to avoid
+            // circular call to processQueue().
+            try {
+                super.checkin(key, resource);
+            } catch(Exception e) {
+                logger.error("Exception checking in resource: ", e);
+            }
+            return false;
+        }
+
+        resourceRequest.useResource(resource);
+        return true;
+    }
+
+    /**
+     * Attempts to repeatedly process enqueued resource requests. Tries until no
+     * more progress is possible without blocking.
+     * 
+     * @param key
+     */
+    private void processQueueLoop(K key) {
+        while(processQueue(key)) {}
+    }
+
+    /**
+     * Check the given resource back into the pool
+     * 
+     * @param key The key for the resource
+     * @param resource The resource
+     */
+    @Override
+    public void checkin(K key, V resource) throws Exception {
+        super.checkin(key, resource);
+        // NB: Blocking checkout calls for synchronous requests get the resource
+        // checked in above before processQueueLoop() attempts checkout below.
+        // There is therefore a risk that asynchronous requests will be starved.
+        processQueueLoop(key);
+    }
+
+    /**
+     * A safe wrapper to destroy the given resource request.
+     */
+    protected void destroyRequest(AsyncResourceRequest<V> resourceRequest) {
+        if(resourceRequest != null) {
+            try {
+                // To hand control back to the owner of the
+                // AsyncResourceRequest, treat "destroy" as an exception since
+                // there is no resource to pass into useResource, and the
+                // timeout has not expired.
+                Exception e = new UnreachableStoreException("Resource request destroyed before resource checked out.");
+                resourceRequest.handleException(e);
+            } catch(Exception ex) {
+                logger.error("Exception while destroying resource request:", ex);
+            }
+        }
+    }
+
+    /**
+     * Destroys all resource requests in requestQueue.
+     * 
+     * @param requestQueue The queue for which all resource requests are to be
+     *        destroyed.
+     */
+    private synchronized void destroyRequestQueue(Queue<AsyncResourceRequest<V>> requestQueue) {
+        AsyncResourceRequest<V> resourceRequest = requestQueue.poll();
+        while(resourceRequest != null) {
+            destroyRequest(resourceRequest);
+            resourceRequest = requestQueue.poll();
+        }
+    }
+
+    @Override
+    protected boolean internalClose() {
+        // wasOpen ensures only one thread destroys everything.
+        boolean wasOpen = super.internalClose();
+        if(wasOpen) {
+            for(Entry<K, Queue<AsyncResourceRequest<V>>> entry: requestQueueMap.entrySet()) {
+                Queue<AsyncResourceRequest<V>> requestQueue = entry.getValue();
+                destroyRequestQueue(requestQueue);
+                requestQueueMap.remove(entry.getKey());
+            }
+        }
+        return wasOpen;
+    }
+
+    /**
+     * Close the queue and the pool.
+     */
+    @Override
+    public void close() {
+        internalClose();
+    }
+
+    /**
+     * Reset a specific resource pool and resource request queue. First,
+     * "destroy" all registered resource requests. Second, destroy all resources
+     * in the pool.
+     * 
+     * @param key The key for the pool to reset.
+     */
+    @Override
+    public void reset(K key) {
+        // First, destroy enqueued resource requests (if any exist).
+        Queue<AsyncResourceRequest<V>> requestQueue = requestQueueMap.get(key);
+        if(requestQueue != null) {
+            destroyRequestQueue(requestQueue);
+        }
+
+        // Second, destroy resources in the pool.
+        super.reset(key);
+    }
+
+    /*
+     * Get the queue of work for the given key. If no queue exists, create one.
+     */
+    protected Queue<AsyncResourceRequest<V>> getRequestQueueForKey(K key) {
+        Queue<AsyncResourceRequest<V>> requestQueue = requestQueueMap.get(key);
+        if(requestQueue == null) {
+            Queue<AsyncResourceRequest<V>> newRequestQueue = new ConcurrentLinkedQueue<AsyncResourceRequest<V>>();
+            requestQueue = requestQueueMap.putIfAbsent(key, newRequestQueue);
+            if(requestQueue == null) {
+                requestQueue = newRequestQueue;
+            }
+        }
+        return requestQueue;
+    }
+
+    /*
+     * Get the pool for the given key. If no pool exists, throw an exception.
+     */
+    protected Queue<AsyncResourceRequest<V>> getRequestQueueForExistingKey(K key) {
+        Queue<AsyncResourceRequest<V>> requestQueue = requestQueueMap.get(key);
+        if(requestQueue == null) {
+            throw new IllegalArgumentException("Invalid key '" + key
+                                               + "': no request queue exists for that key.");
+        }
+        return requestQueue;
+    }
+
+    /**
+     * Count the number of queued resource requests for a specific pool.
+     * 
+     * @param key The key
+     * @return The count of queued resource requests. Returns 0 if no queue
+     *         exists for given key.
+     */
+    public int getRegisteredResourceRequestCount(K key) {
+        if(requestQueueMap.containsKey(key)) {
+            try {
+                Queue<AsyncResourceRequest<V>> requestQueue = getRequestQueueForExistingKey(key);
+                // FYI: .size() is not constant time in the next call. ;)
+                return requestQueue.size();
+            } catch(IllegalArgumentException iae) {
+                logger.debug("getRegisteredResourceRequestCount called on invalid key: ", iae);
+            }
+        }
+        return 0;
+    }
+
+    /**
+     * Count the total number of queued resource requests for all queues. The
+     * result is "approximate" in the face of concurrency since individual
+     * queues can change size during the aggregate count.
+     * 
+     * @return The (approximate) aggregate count of queued resource requests.
+     */
+    public int getRegisteredResourceRequestCount() {
+        int count = 0;
+        for(Entry<K, Queue<AsyncResourceRequest<V>>> entry: this.requestQueueMap.entrySet()) {
+            // FYI: .size() is not constant time in the next call. ;)
+            count += entry.getValue().size();
+        }
+        return count;
+    }
+}
diff --git a/src/java/voldemort/versioning/TimeBasedInconsistencyResolver.java b/src/java/voldemort/versioning/TimeBasedInconsistencyResolver.java
index 151dd69dfb..71d986189d 100644
--- a/src/java/voldemort/versioning/TimeBasedInconsistencyResolver.java
+++ b/src/java/voldemort/versioning/TimeBasedInconsistencyResolver.java
@@ -33,21 +33,24 @@ public List<Versioned<T>> resolveConflicts(List<Versioned<T>> items) {
         } else {
             Versioned<T> max = items.get(0);
             long maxTime = ((VectorClock) items.get(0).getVersion()).getTimestamp();
+            VectorClock maxClock = ((VectorClock) items.get(0).getVersion());
             for(Versioned<T> versioned: items) {
                 VectorClock clock = (VectorClock) versioned.getVersion();
                 if(clock.getTimestamp() > maxTime) {
                     max = versioned;
                     maxTime = ((VectorClock) versioned.getVersion()).getTimestamp();
                 }
+                maxClock = maxClock.merge(clock);
             }
-            return Collections.singletonList(max);
+            Versioned<T> maxTimeClockVersioned = new Versioned<T>(max.getValue(), maxClock);
+            return Collections.singletonList(maxTimeClockVersioned);
         }
     }
 
-
     @Override
     public boolean equals(Object o) {
-        if (this == o) return true;
+        if(this == o)
+            return true;
         return (o != null && getClass() == o.getClass());
     }
 
diff --git a/src/java/voldemort/xml/StoreDefinitionsMapper.java b/src/java/voldemort/xml/StoreDefinitionsMapper.java
index c51913d1a5..6f958fd3a3 100644
--- a/src/java/voldemort/xml/StoreDefinitionsMapper.java
+++ b/src/java/voldemort/xml/StoreDefinitionsMapper.java
@@ -50,6 +50,7 @@
 import voldemort.store.StoreDefinitionBuilder;
 import voldemort.store.StoreUtils;
 import voldemort.store.slop.strategy.HintedHandoffStrategyType;
+import voldemort.store.system.SystemStoreConstants;
 import voldemort.store.views.ViewStorageConfiguration;
 import voldemort.utils.Utils;
 
@@ -83,6 +84,7 @@ public class StoreDefinitionsMapper {
     public final static String STORE_REQUIRED_READS_ELMT = "required-reads";
     public final static String STORE_PREFERRED_READS_ELMT = "preferred-reads";
     public final static String STORE_RETENTION_POLICY_ELMT = "retention-days";
+    public final static String STORE_RETENTION_FREQ_ELMT = "retention-frequency";
     public final static String STORE_RETENTION_SCAN_THROTTLE_RATE_ELMT = "retention-scan-throttle-rate";
     public final static String STORE_ROUTING_STRATEGY = "routing-strategy";
     public final static String STORE_ZONE_ID_ELMT = "zone-id";
@@ -96,6 +98,7 @@ public class StoreDefinitionsMapper {
     public final static String VIEW_TRANS_ELMT = "view-class";
     public final static String VIEW_SERIALIZER_FACTORY_ELMT = "view-serializer-factory";
     private final static String STORE_VERSION_ATTR = "version";
+    private final static String STORE_MEMORY_FOOTPRINT = "memory-footprint";
 
     private final Schema schema;
 
@@ -220,14 +223,19 @@ private StoreDefinition readStore(Element store) {
         Element retention = store.getChild(STORE_RETENTION_POLICY_ELMT);
         Integer retentionPolicyDays = null;
         Integer retentionThrottleRate = null;
+        Integer retentionFreqDays = null;
         if(retention != null) {
             retentionPolicyDays = Integer.parseInt(retention.getText());
             Element throttleRate = store.getChild(STORE_RETENTION_SCAN_THROTTLE_RATE_ELMT);
             if(throttleRate != null)
                 retentionThrottleRate = Integer.parseInt(throttleRate.getText());
+            Element retentionFreqDaysElement = store.getChild(STORE_RETENTION_FREQ_ELMT);
+            if(retentionFreqDaysElement != null)
+                retentionFreqDays = Integer.parseInt(retentionFreqDaysElement.getText());
         }
 
-        if(routingStrategyType.compareTo(RoutingStrategyType.ZONE_STRATEGY) == 0) {
+        if(routingStrategyType.compareTo(RoutingStrategyType.ZONE_STRATEGY) == 0
+           && !SystemStoreConstants.isSystemStore(name)) {
             if(zoneCountReads == null || zoneCountWrites == null || zoneReplicationFactor == null) {
                 throw new MappingException("Have not set one of the following correctly for store '"
                                            + name
@@ -248,6 +256,11 @@ private StoreDefinition readStore(Element store) {
         Integer hintPrefListSize = (null != hintPrefListSizeStr) ? Integer.parseInt(hintPrefListSizeStr)
                                                                 : null;
 
+        String memoryFootprintStr = store.getChildText(STORE_MEMORY_FOOTPRINT);
+        long memoryFootprintMB = 0;
+        if(memoryFootprintStr != null)
+            memoryFootprintMB = Long.parseLong(memoryFootprintStr);
+
         return new StoreDefinitionBuilder().setName(name)
                                            .setType(storeType)
                                            .setDescription(description)
@@ -263,11 +276,13 @@ private StoreDefinition readStore(Element store) {
                                            .setRequiredWrites(requiredWrites)
                                            .setRetentionPeriodDays(retentionPolicyDays)
                                            .setRetentionScanThrottleRate(retentionThrottleRate)
+                                           .setRetentionFrequencyDays(retentionFreqDays)
                                            .setZoneReplicationFactor(zoneReplicationFactor)
                                            .setZoneCountReads(zoneCountReads)
                                            .setZoneCountWrites(zoneCountWrites)
                                            .setHintedHandoffStrategy(hintedHandoffStrategy)
                                            .setHintPrefListSize(hintPrefListSize)
+                                           .setMemoryFootprintMB(memoryFootprintMB)
                                            .build();
     }
 
@@ -460,6 +475,10 @@ private Element storeToElement(StoreDefinition storeDefinition) {
         if(storeDefinition.hasRetentionScanThrottleRate())
             store.addContent(new Element(STORE_RETENTION_SCAN_THROTTLE_RATE_ELMT).setText(Integer.toString(storeDefinition.getRetentionScanThrottleRate())));
 
+        if(storeDefinition.hasMemoryFootprint()) {
+            store.addContent(new Element(STORE_MEMORY_FOOTPRINT).setText(Long.toString(storeDefinition.getMemoryFootprintMB())));
+        }
+
         return store;
     }
 
diff --git a/src/java/voldemort/xml/stores.xsd b/src/java/voldemort/xml/stores.xsd
index b55c00f7ab..f9cd79a3d5 100644
--- a/src/java/voldemort/xml/stores.xsd
+++ b/src/java/voldemort/xml/stores.xsd
@@ -42,8 +42,11 @@
 			<xs:element name="value-serializer" type="serializer" />
 			<xs:element name="retention-days" type="xs:nonNegativeInteger"
 				minOccurs="0" maxOccurs="1" />
+		    <xs:element name="retention-frequency" type="xs:nonNegativeInteger"
+                minOccurs="0" maxOccurs="1" />
 			<xs:element name="retention-scan-throttle-rate" type="xs:nonNegativeInteger"
 				minOccurs="0" maxOccurs="1" />
+			<xs:element name="memory-footprint" type="xs:nonNegativeInteger" minOccurs="0" maxOccurs="1"/>
 		</xs:all>
 	</xs:complexType>
 
diff --git a/src/proto/voldemort-admin.proto b/src/proto/voldemort-admin.proto
index 6e9590c34c..581ee2e016 100644
--- a/src/proto/voldemort-admin.proto
+++ b/src/proto/voldemort-admin.proto
@@ -285,6 +285,15 @@ message NativeBackupRequest {
   required bool   incremental = 4;
 }
 
+message ReserveMemoryRequest {
+  required string store_name = 1;
+  required int64  size_in_mb = 2;
+}
+
+message ReserveMemoryResponse {
+  optional Error error = 1;
+}
+
 enum AdminRequestType {
   GET_METADATA = 0;
   UPDATE_METADATA = 1;
@@ -313,6 +322,7 @@ enum AdminRequestType {
   INITIATE_REBALANCE_NODE_ON_DONOR = 26;
   DELETE_STORE_REBALANCE_STATE = 27;
   NATIVE_BACKUP = 28;
+  RESERVE_MEMORY = 29;
 }
 
 message VoldemortAdminRequest {
@@ -344,4 +354,5 @@ message VoldemortAdminRequest {
   optional InitiateRebalanceNodeOnDonorRequest initiate_rebalance_node_on_donor = 28;
   optional DeleteStoreRebalanceStateRequest delete_store_rebalance_state = 29;
   optional NativeBackupRequest native_backup = 30;
+  optional ReserveMemoryRequest reserve_memory = 31;
 }
diff --git a/test/common/voldemort/ServerTestUtils.java b/test/common/voldemort/ServerTestUtils.java
index c1e6d48db0..1da8a3994c 100644
--- a/test/common/voldemort/ServerTestUtils.java
+++ b/test/common/voldemort/ServerTestUtils.java
@@ -1,12 +1,12 @@
 /*
- * Copyright 2008-2009 LinkedIn, Inc
- *
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
  * the License at
- *
+ * 
  * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * 
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@@ -20,6 +20,7 @@
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.StringReader;
+import java.net.BindException;
 import java.net.ServerSocket;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -33,7 +34,7 @@
 
 import org.apache.commons.io.FileUtils;
 import org.apache.http.client.HttpClient;
-import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.log4j.Logger;
 import org.mortbay.jetty.Server;
 import org.mortbay.jetty.servlet.Context;
 import org.mortbay.jetty.servlet.ServletHolder;
@@ -82,11 +83,13 @@
 
 /**
  * Helper functions for testing with real server implementations
- *
- *
+ * 
+ * 
  */
 public class ServerTestUtils {
 
+    private static final Logger logger = Logger.getLogger(ServerTestUtils.class.getName());
+
     public static StoreRepository getStores(String storeName, String clusterXml, String storesXml) {
         StoreRepository repository = new StoreRepository();
         Store<ByteArray, byte[], byte[]> store = new InMemoryStorageEngine<ByteArray, byte[], byte[]>(storeName);
@@ -147,7 +150,8 @@ public static AbstractSocketService getSocketService(boolean useNio,
                                                  bufferSize,
                                                  coreConnections,
                                                  "client-request-service",
-                                                 false);
+                                                 false,
+                                                 -1);
         } else {
             socketService = new SocketService(requestHandlerFactory,
                                               port,
@@ -211,7 +215,9 @@ public static Context getJettyServer(String clusterXml,
         return context;
     }
 
-    public static HttpStore getHttpStore(String storeName, RequestFormatType format, int port,
+    public static HttpStore getHttpStore(String storeName,
+                                         RequestFormatType format,
+                                         int port,
                                          final HttpClient httpClient) {
         return new HttpStore(storeName,
                              "localhost",
@@ -222,7 +228,11 @@ public static HttpStore getHttpStore(String storeName, RequestFormatType format,
     }
 
     /**
-     * Return a free port as chosen by new ServerSocket(0)
+     * Return a free port as chosen by new ServerSocket(0).
+     * 
+     * There is no guarantee that the port returned will be free when the caller
+     * attempts to bind to the port. This is a time-of-check-to-time-of-use
+     * (TOCTOU) issue that cannot be avoided.
      */
     public static int findFreePort() {
         return findFreePorts(1)[0];
@@ -230,8 +240,13 @@ public static int findFreePort() {
 
     /**
      * Return an array of free ports as chosen by new ServerSocket(0)
+     * 
+     * There is no guarantee that the ports returned will be free when the
+     * caller attempts to bind to some returned port. This is a
+     * time-of-check-to-time-of-use (TOCTOU) issue that cannot be avoided.
      */
     public static int[] findFreePorts(int n) {
+        logger.info("findFreePorts cannot guarantee that ports identified as free will still be free when used. This is effectively a TOCTOU issue. Expect intermittent BindException when \"free\" ports are used.");
         int[] ports = new int[n];
         ServerSocket[] sockets = new ServerSocket[n];
         try {
@@ -288,7 +303,7 @@ public static Cluster getLocalCluster(int numberOfNodes, int[] ports, int[][] pa
     /**
      * Update a cluster by replacing the specified server with a new host, i.e.
      * new ports since they are all localhost
-     *
+     * 
      * @param original The original cluster to be updated
      * @param serverIds The ids of the server to be replaced with new hosts
      * @return updated cluster
@@ -328,7 +343,7 @@ public static Cluster updateClusterWithNewHost(Cluster original, int... serverId
     /**
      * Returns a list of zones with their proximity list being in increasing
      * order
-     *
+     * 
      * @param numberOfZones The number of zones to return
      * @return List of zones
      */
@@ -352,7 +367,7 @@ public static List<Zone> getZones(int numberOfZones) {
      * Returns a cluster with <b>numberOfNodes</b> nodes in <b>numberOfZones</b>
      * zones. It is important that <b>numberOfNodes</b> be divisible by
      * <b>numberOfZones</b>
-     *
+     * 
      * @param numberOfNodes Number of nodes in the cluster
      * @param partitionsPerNode Number of partitions in one node
      * @param numberOfZones Number of zones
@@ -595,7 +610,7 @@ public static VoldemortConfig createServerConfig(boolean useNio,
                                                      String clusterFile,
                                                      String storeFile,
                                                      Properties properties) throws IOException {
-        Props props = new Props(properties);
+        Props props = new Props();
         props.put("node.id", nodeId);
         props.put("voldemort.home", baseDir + "/node-" + nodeId);
         props.put("bdb.cache.size", 1 * 1024 * 1024);
@@ -603,6 +618,7 @@ public static VoldemortConfig createServerConfig(boolean useNio,
         props.put("bdb.flush.transactions", "true");
         props.put("jmx.enable", "false");
         props.put("enable.mysql.engine", "true");
+        props.loadProperties(properties);
 
         VoldemortConfig config = new VoldemortConfig(props);
         config.setMysqlDatabaseName("voldemort");
@@ -660,6 +676,18 @@ public static void stopVoldemortServer(VoldemortServer server) throws IOExceptio
     public static VoldemortServer startVoldemortServer(SocketStoreFactory socketStoreFactory,
                                                        VoldemortConfig config,
                                                        Cluster cluster) {
+
+        // TODO: Some tests that use this method fail intermittently with the
+        // following output:
+        //
+        // A successor version version() to this version() exists for key
+        // cluster.xml
+        // voldemort.versioning.ObsoleteVersionException: A successor version
+        // version() to this version() exists for key cluster.xml"
+        //
+        // Need to trace through the constructor VoldemortServer(VoldemortConfig
+        // config, Cluster cluster) to understand how this error is possible,
+        // and why it only happens intermittently.
         VoldemortServer server = new VoldemortServer(config, cluster);
         server.start();
 
@@ -668,6 +696,16 @@ public static VoldemortServer startVoldemortServer(SocketStoreFactory socketStor
         return server;
     }
 
+    public static VoldemortServer startVoldemortServer(SocketStoreFactory socketStoreFactory,
+                                                       VoldemortConfig config) {
+        VoldemortServer server = new VoldemortServer(config);
+        server.start();
+
+        ServerTestUtils.waitForServerStart(socketStoreFactory, server.getIdentityNode());
+        // wait till server start or throw exception
+        return server;
+    }
+
     public static void waitForServerStart(SocketStoreFactory socketStoreFactory, Node node) {
         boolean success = false;
         int retries = 10;
@@ -696,4 +734,78 @@ public static void waitForServerStart(SocketStoreFactory socketStoreFactory, Nod
         if(!success)
             throw new RuntimeException("Failed to connect with server:" + node);
     }
+
+    protected static Cluster internalStartVoldemortCluster(int numServers,
+                                                           VoldemortServer[] voldemortServers,
+                                                           int[][] partitionMap,
+                                                           SocketStoreFactory socketStoreFactory,
+                                                           boolean useNio,
+                                                           String clusterFile,
+                                                           String storeFile,
+                                                           Properties properties)
+            throws IOException {
+        Cluster cluster = ServerTestUtils.getLocalCluster(numServers, partitionMap);
+        for(int i = 0; i < numServers; i++) {
+            voldemortServers[i] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
+                                                                       ServerTestUtils.createServerConfig(useNio,
+                                                                                                          i,
+                                                                                                          TestUtils.createTempDir()
+                                                                                                                   .getAbsolutePath(),
+                                                                                                          clusterFile,
+                                                                                                          storeFile,
+                                                                                                          properties),
+                                                                       cluster);
+        }
+        return cluster;
+    }
+
+    /**
+     * This method wraps up work that is done in many different tests to set up
+     * some number of Voldemort servers in a cluster. This method masks an
+     * intermittent TOCTOU problem with the ports identified by
+     * {@link #findFreePorts(int)} not actually being free when a server needs
+     * to bind to them.
+     * 
+     * @param numServers
+     * @param voldemortServers
+     * @param partitionMap
+     * @param socketStoreFactory
+     * @param useNio
+     * @param clusterFile
+     * @param storeFile
+     * @param properties
+     * @return Cluster object that was used to successfully start all of the
+     *         servers.
+     * @throws IOException
+     */
+    public static Cluster startVoldemortCluster(int numServers,
+                                                VoldemortServer[] voldemortServers,
+                                                int[][] partitionMap,
+                                                SocketStoreFactory socketStoreFactory,
+                                                boolean useNio,
+                                                String clusterFile,
+                                                String storeFile,
+                                                Properties properties) throws IOException {
+        boolean started = false;
+        Cluster cluster = null;
+
+        while(!started) {
+            try {
+                cluster = internalStartVoldemortCluster(numServers,
+                                                        voldemortServers,
+                                                        partitionMap,
+                                                        socketStoreFactory,
+                                                        useNio,
+                                                        clusterFile,
+                                                        storeFile,
+                                                        properties);
+                started = true;
+            } catch(BindException be) {
+                logger.debug("Caught BindException when starting cluster. Will retry.");
+            }
+        }
+
+        return cluster;
+    }
+
 }
diff --git a/test/common/voldemort/StaticStoreClientFactory.java b/test/common/voldemort/StaticStoreClientFactory.java
index b93d1d73c0..01e7a7d088 100644
--- a/test/common/voldemort/StaticStoreClientFactory.java
+++ b/test/common/voldemort/StaticStoreClientFactory.java
@@ -65,5 +65,4 @@ public void close() {
     public FailureDetector getFailureDetector() {
         return failureDetector;
     }
-
 }
diff --git a/test/common/voldemort/TestUtils.java b/test/common/voldemort/TestUtils.java
index 8d305ee075..9d0a6e2612 100644
--- a/test/common/voldemort/TestUtils.java
+++ b/test/common/voldemort/TestUtils.java
@@ -31,6 +31,7 @@
 import voldemort.cluster.Cluster;
 import voldemort.cluster.Node;
 import voldemort.store.Store;
+import voldemort.store.StoreDefinition;
 import voldemort.utils.ByteArray;
 import voldemort.utils.Utils;
 import voldemort.versioning.VectorClock;
@@ -340,4 +341,68 @@ public static <T> T getPrivateValue(Object instance, String fieldName) throws Ex
         return (T) eventDataQueueField.get(instance);
     }
 
+    /**
+     * Wrapper to get a StoreDefinition object constructed, given a store name
+     */
+    public static StoreDefinition makeStoreDefinition(String storeName) {
+        return new StoreDefinition(storeName,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   0,
+                                   null,
+                                   0,
+                                   null,
+                                   0,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   0);
+    }
+
+    /**
+     * Wrapper to get a StoreDefinition object constructed, given a store name,
+     * memory foot print
+     */
+    public static StoreDefinition makeStoreDefinition(String storeName, long memFootprintMB) {
+        return new StoreDefinition(storeName,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   0,
+                                   null,
+                                   0,
+                                   null,
+                                   0,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   null,
+                                   memFootprintMB);
+    }
 }
diff --git a/test/common/voldemort/VoldemortTestConstants.java b/test/common/voldemort/VoldemortTestConstants.java
index 34ac46cdaa..723e95fa6e 100644
--- a/test/common/voldemort/VoldemortTestConstants.java
+++ b/test/common/voldemort/VoldemortTestConstants.java
@@ -42,6 +42,10 @@ public static String getSingleStoreDefinitionsXml() {
         return readString("config/single-store.xml");
     }
 
+    public static String getTwoStoreDefinitionsXml() {
+        return readString("config/two-stores.xml");
+    }
+
     public static String getNoVersionStoreDefinitionsXml() {
         return readString("config/no-version-store.xml");
     }
@@ -65,6 +69,7 @@ public static Cluster getTwoNodeCluster() {
     public static String getTenNodeClusterXml() {
         return readString("config/ten-node-cluster.xml");
     }
+
     public static String getNineNodeClusterXml() {
         return readString("config/nine-node-cluster.xml");
     }
diff --git a/test/common/voldemort/config/stores.xml b/test/common/voldemort/config/stores.xml
index f25f33efa1..3dba316a34 100644
--- a/test/common/voldemort/config/stores.xml
+++ b/test/common/voldemort/config/stores.xml
@@ -185,4 +185,51 @@
     <hinted-handoff-strategy>all-handoff</hinted-handoff-strategy> 
     <hint-preflist-size>10</hint-preflist-size>
   </store>
+  
+  <store>
+    <name>test-store-eventual-1</name>
+    <persistence>bdb</persistence>
+    <routing>client</routing>
+    <replication-factor>2</replication-factor>
+    <required-reads>1</required-reads>
+    <required-writes>1</required-writes>
+    <key-serializer>
+      <type>string</type>
+      <schema-info>UTF-8</schema-info>
+    </key-serializer>
+    <value-serializer>    
+      <type>java-serialization</type>
+    </value-serializer>
+  </store>    
+  <store>
+    <name>test-basic-replication-memory</name>
+    <persistence>memory</persistence>
+    <routing>client</routing>
+    <replication-factor>2</replication-factor>
+    <required-reads>1</required-reads>
+    <required-writes>1</required-writes>
+    <key-serializer>
+      <type>string</type>
+      <schema-info>UTF-8</schema-info>
+    </key-serializer>
+    <value-serializer>    
+     <type>string</type>
+      <schema-info>UTF-8</schema-info>
+    </value-serializer>
+  </store>    
+  <store>
+    <name>test-store-eventual-2</name>
+    <persistence>bdb</persistence>
+    <routing>client</routing>
+    <replication-factor>2</replication-factor>
+    <required-reads>1</required-reads>
+    <required-writes>1</required-writes>
+    <key-serializer>
+      <type>string</type>
+      <schema-info>UTF-8</schema-info>
+    </key-serializer>
+    <value-serializer>
+      <type>java-serialization</type>
+    </value-serializer>
+  </store>
 </stores>
diff --git a/test/common/voldemort/config/two-stores.xml b/test/common/voldemort/config/two-stores.xml
new file mode 100644
index 0000000000..360e48b422
--- /dev/null
+++ b/test/common/voldemort/config/two-stores.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0"?>
+<stores>
+  <store>
+    <name>test</name>
+    <persistence>bdb</persistence>
+    <routing>client</routing>
+    <replication-factor>1</replication-factor>
+    <preferred-reads>1</preferred-reads>
+    <required-reads>1</required-reads>
+    <preferred-writes>1</preferred-writes>
+    <required-writes>1</required-writes>
+    <key-serializer>
+      <type>string</type>
+      <schema-info>UTF-8</schema-info>
+    </key-serializer>
+    <value-serializer>
+      <type>string</type>
+      <schema-info>UTF-8</schema-info>
+    </value-serializer>
+  </store>
+  <store>
+    <name>best</name>
+    <persistence>bdb</persistence>
+    <routing>client</routing>
+    <replication-factor>1</replication-factor>
+    <preferred-reads>1</preferred-reads>
+    <required-reads>1</required-reads>
+    <preferred-writes>1</preferred-writes>
+    <required-writes>1</required-writes>
+    <key-serializer>
+      <type>string</type>
+      <schema-info>UTF-8</schema-info>
+    </key-serializer>
+    <value-serializer>
+      <type>string</type>
+      <schema-info>UTF-8</schema-info>
+    </value-serializer>
+  </store>
+</stores>
diff --git a/test/integration/voldemort/CatBdbStore.java b/test/integration/voldemort/CatBdbStore.java
index 10655b5660..44b170fa8d 100644
--- a/test/integration/voldemort/CatBdbStore.java
+++ b/test/integration/voldemort/CatBdbStore.java
@@ -33,9 +33,9 @@
 
 import com.sleepycat.je.Database;
 import com.sleepycat.je.DatabaseConfig;
+import com.sleepycat.je.Durability;
 import com.sleepycat.je.Environment;
 import com.sleepycat.je.EnvironmentConfig;
-import com.sleepycat.je.LockMode;
 
 public class CatBdbStore {
 
@@ -51,7 +51,7 @@ public static void main(String[] args) throws Exception {
         VoldemortConfig config = new VoldemortConfig(new Props(new File(serverProperties)));
 
         EnvironmentConfig environmentConfig = new EnvironmentConfig();
-        environmentConfig.setTxnNoSync(true);
+        environmentConfig.setDurability(Durability.COMMIT_NO_SYNC);
         environmentConfig.setAllowCreate(true);
         environmentConfig.setTransactional(config.isBdbWriteTransactionsEnabled());
         Environment environment = new Environment(new File(bdbDir), environmentConfig);
diff --git a/test/integration/voldemort/cluster/failuredetector/FailureDetectorPerformanceTest.java b/test/integration/voldemort/cluster/failuredetector/FailureDetectorPerformanceTest.java
index f8775b9ff9..d9d76a08f4 100644
--- a/test/integration/voldemort/cluster/failuredetector/FailureDetectorPerformanceTest.java
+++ b/test/integration/voldemort/cluster/failuredetector/FailureDetectorPerformanceTest.java
@@ -16,9 +16,9 @@
 
 package voldemort.cluster.failuredetector;
 
-import static voldemort.cluster.failuredetector.MutableStoreVerifier.create;
 import static voldemort.VoldemortTestConstants.getNineNodeCluster;
 import static voldemort.cluster.failuredetector.FailureDetectorUtils.create;
+import static voldemort.cluster.failuredetector.MutableStoreVerifier.create;
 
 import java.io.IOException;
 
@@ -71,7 +71,7 @@ protected FailureDetectorPerformanceTest(String[] args) {
                                                   failureDetectorConfig.getThresholdInterval());
         Cluster cluster = getNineNodeCluster();
 
-        failureDetectorConfig.setNodes(cluster.getNodes())
+        failureDetectorConfig.setCluster(cluster)
                              .setStoreVerifier(create(cluster.getNodes()))
                              .setAsyncRecoveryInterval(asyncScanInterval)
                              .setBannagePeriod(bannagePeriod)
diff --git a/test/integration/voldemort/cluster/failuredetector/FlappingTest.java b/test/integration/voldemort/cluster/failuredetector/FlappingTest.java
index 7ffdd6583e..4d366a8662 100644
--- a/test/integration/voldemort/cluster/failuredetector/FlappingTest.java
+++ b/test/integration/voldemort/cluster/failuredetector/FlappingTest.java
@@ -52,7 +52,7 @@ protected String getTestHeaders() {
     public String test(FailureDetector failureDetector) throws Exception {
         final int threads = 20;
 
-        Node node = Iterables.get(failureDetectorConfig.getNodes(), 0);
+        Node node = Iterables.get(failureDetectorConfig.getCluster().getNodes(), 0);
         CountDownLatch countDownLatch = new CountDownLatch(1);
         Listener listener = new Listener();
         failureDetector.addFailureDetectorListener(listener);
diff --git a/test/integration/voldemort/cluster/failuredetector/TimedUnavailabilityTest.java b/test/integration/voldemort/cluster/failuredetector/TimedUnavailabilityTest.java
index caceb1d971..481efd4e67 100644
--- a/test/integration/voldemort/cluster/failuredetector/TimedUnavailabilityTest.java
+++ b/test/integration/voldemort/cluster/failuredetector/TimedUnavailabilityTest.java
@@ -47,7 +47,7 @@ protected String getTestHeaders() {
 
     @Override
     public String test(FailureDetector failureDetector) throws Exception {
-        Node node = Iterables.get(failureDetectorConfig.getNodes(), 0);
+        Node node = Iterables.get(failureDetectorConfig.getCluster().getNodes(), 0);
         CountDownLatch countDownLatch = new CountDownLatch(1);
         Listener listener = new Listener(failureDetectorConfig.getTime());
         failureDetector.addFailureDetectorListener(listener);
diff --git a/test/integration/voldemort/nonblocking/E2ENonblockingCheckoutTest.java b/test/integration/voldemort/nonblocking/E2ENonblockingCheckoutTest.java
new file mode 100644
index 0000000000..b6eebbd048
--- /dev/null
+++ b/test/integration/voldemort/nonblocking/E2ENonblockingCheckoutTest.java
@@ -0,0 +1,338 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.nonblocking;
+
+import static org.junit.Assert.assertFalse;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import voldemort.ServerTestUtils;
+import voldemort.TestUtils;
+import voldemort.client.ClientConfig;
+import voldemort.client.RoutingTier;
+import voldemort.client.SocketStoreClientFactory;
+import voldemort.client.StoreClient;
+import voldemort.client.StoreClientFactory;
+import voldemort.cluster.Cluster;
+import voldemort.cluster.Node;
+import voldemort.routing.RoutingStrategyType;
+import voldemort.serialization.SerializerDefinition;
+import voldemort.server.VoldemortConfig;
+import voldemort.server.VoldemortServer;
+import voldemort.store.StoreDefinition;
+import voldemort.store.StoreDefinitionBuilder;
+import voldemort.store.bdb.BdbStorageConfiguration;
+import voldemort.store.memory.InMemoryStorageConfiguration;
+import voldemort.store.slow.SlowStorageConfiguration;
+import voldemort.store.socket.SocketStoreFactory;
+import voldemort.store.socket.clientrequest.ClientRequestExecutorPool;
+import voldemort.versioning.ObsoleteVersionException;
+
+/**
+ * Does an end-to-end unit test of a Voldemort cluster with slow storage
+ * servers. Confirms that client puts which are issued in an asynchronous manner
+ * do not block upon the slow servers.
+ * 
+ */
+public class E2ENonblockingCheckoutTest {
+
+    private static final String STORE_NAME = "test";
+
+    private static final int NUM_CLIENTS = 2;
+    private static final int NUM_PUTS = 25;
+    // Exempt some puts from performance requirements until warmed up
+    private static final int NUM_EXEMPT_PUTS = 2;
+    private static final long MAX_PUT_TIME_MS = 50;
+    private static final long SLOW_PUT_MS = 250;
+    /*
+     * Need to space out each set of concurrent client put operations to allow
+     * prior puts to be completed in the background by the slow server. The plus
+     * one ensures that all background work completes before the next period of
+     * put operations (assuming SLOW_PUT_MS >> MAX_PUT_TIME_MS).
+     */
+    private static final long PUT_PERIODICITY_MS = (NUM_CLIENTS + 1) * SLOW_PUT_MS;
+
+    // Ensure that threads will contend at all servers
+    private static final int CONNECTIONS_PER_NODE = 1;
+
+    private static final int CONNECTION_TIMEOUT_MS = 500; // 10 * 1000;
+    private static final int SOCKET_TIMEOUT_MS = 2 * 1000; // 100 * 1000;
+    private static final int ROUTING_TIMEOUT_MS = 10 * 1000; // 100 * 1000;
+
+    private final SocketStoreFactory socketStoreFactory = new ClientRequestExecutorPool(CONNECTIONS_PER_NODE,
+                                                                                        CONNECTION_TIMEOUT_MS,
+                                                                                        SOCKET_TIMEOUT_MS,
+                                                                                        32 * 1024);
+    private final boolean useNio;
+
+    private List<VoldemortServer> servers;
+    private Cluster cluster;
+    StoreClientFactory storeClientFactory;
+
+    public E2ENonblockingCheckoutTest() {
+        this.useNio = true;
+    }
+
+    public static List<StoreDefinition> getStoreDef(int nodeId) {
+        List<StoreDefinition> defs = new ArrayList<StoreDefinition>();
+        SerializerDefinition serDef = new SerializerDefinition("string");
+        String storageConfiguration = InMemoryStorageConfiguration.TYPE_NAME;
+        if(nodeId == 2) {
+            storageConfiguration = SlowStorageConfiguration.TYPE_NAME;
+        }
+        defs.add(new StoreDefinitionBuilder().setName(STORE_NAME)
+                                             .setType(storageConfiguration)
+                                             .setKeySerializer(serDef)
+                                             .setValueSerializer(serDef)
+                                             .setRoutingPolicy(RoutingTier.SERVER)
+                                             .setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY)
+                                             .setReplicationFactor(3)
+                                             .setPreferredReads(1)
+                                             .setRequiredReads(1)
+                                             .setPreferredWrites(1)
+                                             .setRequiredWrites(1)
+                                             .build());
+        return defs;
+    }
+
+    @Before
+    public void setUp() throws Exception {
+        // PatternLayout patternLayout = new
+        // PatternLayout("%d{ABSOLUTE} %-5p [%t/%c]: %m%n");
+
+        Logger logger;
+        /*-
+        // To analyze whether checkout/checkin paths are blocking add
+        // log4j.trace statements to KeyedResourcePool checkout/checkin methods.
+         */
+        logger = Logger.getLogger("voldemort.store.socket.clientrequest.ClientRequestExecutorPool");
+        logger.setLevel(Level.TRACE);
+
+        logger = Logger.getLogger("voldemort.utils.pool.KeyedResourcePool");
+        logger.setLevel(Level.TRACE);
+
+        logger = Logger.getLogger("voldemort.utils.pool.QueuedKeyedResourcePool");
+        logger.setLevel(Level.TRACE);
+
+        logger = Logger.getLogger("voldemort.store.socket.SocketStore");
+        logger.setLevel(Level.DEBUG);
+
+        logger = Logger.getLogger("voldemort.store.routed.action.PerformParallelPutRequests");
+        logger.setLevel(Level.DEBUG);
+
+        logger = Logger.getLogger("voldemort.store.routed.action.PerformSerialPutRequests");
+        logger.setLevel(Level.DEBUG);
+
+        cluster = ServerTestUtils.getLocalCluster(3, new int[][] { { 0, 3 }, { 1, 4 }, { 2, 5 } });
+        servers = new ArrayList<VoldemortServer>();
+        Properties p = new Properties();
+        String storageConfigs = BdbStorageConfiguration.class.getName() + ","
+                                + InMemoryStorageConfiguration.class.getName() + ","
+                                + SlowStorageConfiguration.class.getName();
+        p.setProperty("storage.configs", storageConfigs);
+        p.setProperty("slow.queueing.put.ms", Long.toString(SLOW_PUT_MS));
+
+        p.setProperty("client.connection.timeout.ms", Integer.toString(CONNECTION_TIMEOUT_MS));
+        p.setProperty("client.routing.timeout.ms", Integer.toString(ROUTING_TIMEOUT_MS));
+
+        for(int i = 0; i < 3; i++) {
+            VoldemortConfig voldemortConfig = ServerTestUtils.createServerConfigWithDefs(this.useNio,
+                                                                                         i,
+                                                                                         TestUtils.createTempDir()
+                                                                                                  .getAbsolutePath(),
+                                                                                         cluster,
+                                                                                         getStoreDef(i),
+                                                                                         p);
+            VoldemortServer voldemortServer = ServerTestUtils.startVoldemortServer(socketStoreFactory,
+                                                                                   voldemortConfig);
+            servers.add(voldemortServer);
+        }
+
+        Node node = cluster.getNodeById(0);
+        String bootstrapUrl = "tcp://" + node.getHost() + ":" + node.getSocketPort();
+        storeClientFactory = new SocketStoreClientFactory(new ClientConfig().setBootstrapUrls(bootstrapUrl)
+                                                                            .setMaxConnectionsPerNode(CONNECTIONS_PER_NODE)
+                                                                            .setConnectionTimeout(CONNECTION_TIMEOUT_MS,
+                                                                                                  TimeUnit.MILLISECONDS));
+    }
+
+    @After
+    public void tearDown() {
+        socketStoreFactory.close();
+    }
+
+    public class Putter implements Runnable {
+
+        private StoreClient<String, String> storeClient;
+        private final CountDownLatch signal;
+        private final int puts;
+        private final int offsetOrVal;
+        private final boolean useOffset;
+        private final long putTimeLimitMs;
+
+        /**
+         * 
+         * @param signal
+         * @param puts Number of puts to do.
+         * @param offsetOrVal Offset or value to put. (See useOffset.)
+         * @param useOffset If true, then Putter will do puts to key 'putCount +
+         *        offsetOrVal'. If false, then Putter will do puts to key
+         *        'offsetOrVal'
+         * @param putTimeLimitMs Time limit in ms.
+         */
+        Putter(CountDownLatch signal,
+               int puts,
+               int offsetOrVal,
+               boolean useOffset,
+               long putTimeLimitMs) {
+            storeClient = storeClientFactory.getStoreClient(STORE_NAME);
+            this.signal = signal;
+            this.puts = puts;
+            this.offsetOrVal = offsetOrVal;
+            this.useOffset = useOffset;
+            this.putTimeLimitMs = putTimeLimitMs;
+        }
+
+        private void sleepUntilNextPeriod() {
+            long currentTimeMs = System.currentTimeMillis();
+            long complement = currentTimeMs % PUT_PERIODICITY_MS;
+            try {
+                TimeUnit.MILLISECONDS.sleep(PUT_PERIODICITY_MS - complement);
+            } catch(InterruptedException e) {
+                e.printStackTrace();
+            }
+        }
+
+        public String getString(int putCount) {
+            if(useOffset) {
+                return Integer.toString(putCount + this.offsetOrVal);
+            } else {
+                return Integer.toString(this.offsetOrVal);
+            }
+        }
+
+        public void run() {
+            for(int i = 0; i < puts; ++i) {
+                sleepUntilNextPeriod();
+                String I = getString(i);
+                System.out.println("");
+                String context = new String("PUT of " + I + " (Put #: " + i + ", Thread: "
+                                            + Thread.currentThread().getName() + ")");
+                System.out.println("START " + context);
+                long startTimeMs = System.currentTimeMillis();
+                try {
+                    storeClient.put(I, I);
+                } catch(ObsoleteVersionException e) {
+                    System.out.println("ObsoleteVersionException caught on put." + context);
+                }
+                long endTimeMs = System.currentTimeMillis();
+                System.out.println(" DONE " + context + " --- Time (ms): "
+                                   + (endTimeMs - startTimeMs));
+                if(i >= NUM_EXEMPT_PUTS) {
+                    assertFalse("Operation completes without blocking on slow server:"
+                                        + (endTimeMs - startTimeMs),
+                                (endTimeMs - startTimeMs) > this.putTimeLimitMs);
+                    if((endTimeMs - startTimeMs) > this.putTimeLimitMs) {
+                        System.err.println("Operation blocked! Therefore, operation is not nonblocking... "
+                                           + context
+                                           + " (Operation time: "
+                                           + (endTimeMs - startTimeMs) + " ms)");
+                    }
+                }
+            }
+            sleepUntilNextPeriod();
+            System.out.println("Thread done. (Thread: " + Thread.currentThread().getName() + ")");
+            signal.countDown();
+        }
+    }
+
+    /**
+     * Multiple clients periodically & simultaneously do puts to the same key.
+     * This means the puts all use the same master. To determine whether
+     * parallel puts are actually non-blocking, then figure out a key that maps
+     * to a fast master so that the slow server is used by the parallel phase of
+     * put.
+     */
+    @Test
+    public void testPutToSameKey() {
+        CountDownLatch waitForPutters = new CountDownLatch(NUM_CLIENTS);
+
+        System.out.println("PRE THREAD CREATION");
+        for(int i = 0; i < NUM_CLIENTS; i++) {
+            System.out.println("THREAD CREATION");
+            /*
+             * Key "12" uses node 1, a fast node as the master. This ensures
+             * that the parallel put requests first use node 2, the slow node,
+             * and then use node 0, the other fast node. If checking out a
+             * connection to the slow node is done asynchronously, then
+             * operations should not block on the slow node before returning
+             * (with secondary puts outstanding in the background). Confirmed
+             * this was doing the right thing (i.e., picking the nodes in the
+             * order 1, 2, 0) by adding a temporary logger.debug print to
+             * PerformSerialPutRequest and visually inspecting the console
+             * output.
+             */
+            new Thread(new Putter(waitForPutters, NUM_PUTS, 12, false, MAX_PUT_TIME_MS)).start();
+
+        }
+        System.out.println("POST THREAD CREATION");
+        try {
+            waitForPutters.await();
+            System.out.println("POST AWAIT");
+        } catch(InterruptedException e) {
+            e.printStackTrace();
+        }
+    }
+
+    /**
+     * Multiple clients periodically & simultaneously do puts to distinct keys.
+     * Different keys map to potentially different masters. So, sometimes the
+     * serial put within the ParallelPut goes to the slow node. This test
+     * generates many distinct interleavings of the concurrent put operations
+     * interacting with the servers (slow & fast).
+     */
+    @Test
+    public void testPutToDifferentKeys() {
+        CountDownLatch waitForPutters = new CountDownLatch(NUM_CLIENTS);
+
+        System.out.println("PRE THREAD CREATION");
+        for(int i = 0; i < NUM_CLIENTS; i++) {
+            System.out.println("THREAD CREATION");
+            // timeoutMs set to accommodate all threads choosing the slow node
+            // as the master.
+            long timeoutMs = MAX_PUT_TIME_MS + (NUM_CLIENTS * SLOW_PUT_MS);
+            new Thread(new Putter(waitForPutters, NUM_PUTS, NUM_PUTS * i, true, timeoutMs)).start();
+        }
+        System.out.println("POST THREAD CREATION");
+        try {
+            waitForPutters.await();
+            System.out.println("POST AWAIT");
+        } catch(InterruptedException e) {
+            e.printStackTrace();
+        }
+    }
+}
diff --git a/test/integration/voldemort/performance/BdbGrowth.java b/test/integration/voldemort/performance/BdbGrowth.java
index 06dca53d1a..cf4574676f 100644
--- a/test/integration/voldemort/performance/BdbGrowth.java
+++ b/test/integration/voldemort/performance/BdbGrowth.java
@@ -29,6 +29,7 @@
 import com.sleepycat.je.Database;
 import com.sleepycat.je.DatabaseConfig;
 import com.sleepycat.je.DatabaseEntry;
+import com.sleepycat.je.Durability;
 import com.sleepycat.je.Environment;
 import com.sleepycat.je.EnvironmentConfig;
 
@@ -56,7 +57,7 @@ public static void main(String[] args) throws Exception {
 
         environmentConfig = new EnvironmentConfig();
         environmentConfig.setCacheSize(cacheSize);
-        environmentConfig.setTxnNoSync(true);
+        environmentConfig.setDurability(Durability.COMMIT_NO_SYNC);
         environmentConfig.setConfigParam(EnvironmentConfig.LOG_FILE_MAX, "1000000000");
         environmentConfig.setConfigParam(EnvironmentConfig.CLEANER_MAX_BATCH_FILES, "100");
         environmentConfig.setConfigParam(EnvironmentConfig.CLEANER_READ_SIZE, "52428800");
diff --git a/test/integration/voldemort/performance/CacheStorageEnginePerformanceTest.java b/test/integration/voldemort/performance/CacheStorageEnginePerformanceTest.java
index edfebcc048..49d8584018 100644
--- a/test/integration/voldemort/performance/CacheStorageEnginePerformanceTest.java
+++ b/test/integration/voldemort/performance/CacheStorageEnginePerformanceTest.java
@@ -18,6 +18,7 @@
 
 import java.util.concurrent.atomic.AtomicInteger;
 
+import voldemort.TestUtils;
 import voldemort.store.Store;
 import voldemort.store.memory.CacheStorageConfiguration;
 import voldemort.utils.ByteArray;
@@ -38,7 +39,7 @@ public static void main(String[] args) {
         final int mod = 100;
         final int readMax = (int) readPercent * mod;
 
-        final Store<ByteArray, byte[], byte[]> store = new CacheStorageConfiguration(null).getStore("test");
+        final Store<ByteArray, byte[], byte[]> store = new CacheStorageConfiguration(null).getStore(TestUtils.makeStoreDefinition("test"));
         final AtomicInteger obsoletes = new AtomicInteger(0);
 
         PerformanceTest readWriteTest = new PerformanceTest() {
diff --git a/test/integration/voldemort/performance/ClientConnectionStressTest.java b/test/integration/voldemort/performance/ClientConnectionStressTest.java
index a26beb984e..810b94f9c8 100644
--- a/test/integration/voldemort/performance/ClientConnectionStressTest.java
+++ b/test/integration/voldemort/performance/ClientConnectionStressTest.java
@@ -1,12 +1,12 @@
 /*
  * Copyright 2008-2010 LinkedIn, Inc
- *
+ * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
  * the License at
- *
+ * 
  * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * 
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@@ -16,20 +16,21 @@
 
 package voldemort.performance;
 
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
 import joptsimple.OptionParser;
 import joptsimple.OptionSet;
 import voldemort.client.ClientConfig;
 import voldemort.client.SocketStoreClientFactory;
 import voldemort.client.StoreClient;
 import voldemort.client.StoreClientFactory;
+import voldemort.client.TimeoutConfig;
 import voldemort.utils.CmdUtils;
 
-import java.util.List;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-
 /**
  * Stress tests the client. Intended to diagnose issues such as connection leaks
  */
@@ -92,8 +93,6 @@ public void run() {
         executor.shutdown();
     }
 
-
-
     public static void main(String[] args) throws Exception {
 
         OptionParser parser = new OptionParser();
@@ -112,18 +111,14 @@ public static void main(String[] args) throws Exception {
         parser.accepts(MAX_CONNECTIONS_TOTAL, "Max total connections")
               .withRequiredArg()
               .ofType(Integer.class);
-        parser.accepts(MAX_THREADS, "Max threads")
-              .withRequiredArg()
-              .ofType(Integer.class);
+        parser.accepts(MAX_THREADS, "Max threads").withRequiredArg().ofType(Integer.class);
         parser.accepts(SELECTORS, "Number of NIO selectors")
               .withRequiredArg()
               .ofType(Integer.class);
         parser.accepts(SOCKET_BUFFER_SIZE, "Socket buffer size")
-               .withRequiredArg()
-               .ofType(Integer.class);
-        parser.accepts(REQS, "Requests per session")
               .withRequiredArg()
               .ofType(Integer.class);
+        parser.accepts(REQS, "Requests per session").withRequiredArg().ofType(Integer.class);
         parser.accepts(CONNECTIONS, "Total connections to make")
               .withRequiredArg()
               .ofType(Integer.class);
@@ -144,11 +139,15 @@ public static void main(String[] args) throws Exception {
 
         ClientConfig config = new ClientConfig();
         if(options.has(CONNECTION_TIMEOUT))
-            config.setConnectionTimeout((Integer) options.valueOf(CONNECTION_TIMEOUT), TimeUnit.MILLISECONDS);
+            config.setConnectionTimeout((Integer) options.valueOf(CONNECTION_TIMEOUT),
+                                        TimeUnit.MILLISECONDS);
         if(options.has(ROUTING_TIMEOUT))
-            config.setRoutingTimeout((Integer) options.valueOf(ROUTING_TIMEOUT), TimeUnit.MILLISECONDS);
+            config.setTimeoutConfig(new TimeoutConfig(TimeUnit.MILLISECONDS.toMillis((Integer) options.valueOf(ROUTING_TIMEOUT)),
+                                                      false));
+
         if(options.has(SOCKET_TIMEOUT))
-            config.setSocketTimeout((Integer) options.valueOf(SOCKET_TIMEOUT), TimeUnit.MILLISECONDS);
+            config.setSocketTimeout((Integer) options.valueOf(SOCKET_TIMEOUT),
+                                    TimeUnit.MILLISECONDS);
         if(options.has(MAX_CONNECTIONS))
             config.setMaxConnectionsPerNode((Integer) options.valueOf(MAX_CONNECTIONS));
         if(options.has(MAX_THREADS))
diff --git a/test/integration/voldemort/performance/RoutedStoreParallelismTest.java b/test/integration/voldemort/performance/RoutedStoreParallelismTest.java
index 0bbbddb76b..e076a17b75 100644
--- a/test/integration/voldemort/performance/RoutedStoreParallelismTest.java
+++ b/test/integration/voldemort/performance/RoutedStoreParallelismTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010 LinkedIn, Inc
+ * Copyright 2010-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -28,9 +28,7 @@
 
 import joptsimple.OptionParser;
 import joptsimple.OptionSet;
-import voldemort.cluster.failuredetector.MutableStoreVerifier;
 import voldemort.ServerTestUtils;
-import voldemort.TestUtils;
 import voldemort.VoldemortException;
 import voldemort.client.ClientConfig;
 import voldemort.cluster.Cluster;
@@ -39,8 +37,8 @@
 import voldemort.cluster.failuredetector.FailureDetector;
 import voldemort.cluster.failuredetector.FailureDetectorConfig;
 import voldemort.cluster.failuredetector.FailureDetectorUtils;
+import voldemort.cluster.failuredetector.MutableStoreVerifier;
 import voldemort.server.StoreRepository;
-import voldemort.server.VoldemortConfig;
 import voldemort.server.VoldemortServer;
 import voldemort.store.SleepyStore;
 import voldemort.store.Store;
@@ -101,7 +99,9 @@ public static void main(String[] args) throws Throwable {
               .ofType(Integer.class);
         parser.accepts("num-clients",
                        "The number of threads to make requests concurrently  Default = "
-                               + DEFAULT_NUM_CLIENTS).withRequiredArg().ofType(Integer.class);
+                               + DEFAULT_NUM_CLIENTS)
+              .withRequiredArg()
+              .ofType(Integer.class);
         parser.accepts("routed-store-type",
                        "Type of routed store, either \"" + THREAD_POOL_ROUTED_STORE + "\" or \""
                                + PIPELINE_ROUTED_STORE + "\"  Default = "
@@ -139,15 +139,6 @@ public static void main(String[] args) throws Throwable {
         ClientConfig clientConfig = new ClientConfig().setMaxConnectionsPerNode(maxConnectionsPerNode)
                                                       .setMaxThreads(maxThreads);
 
-        Map<Integer, VoldemortServer> serverMap = new HashMap<Integer, VoldemortServer>();
-
-        int[][] partitionMap = new int[numNodes][1];
-
-        for(int i = 0; i < numNodes; i++) {
-            partitionMap[i][0] = i;
-        }
-
-        Cluster cluster = ServerTestUtils.getLocalCluster(numNodes, partitionMap);
         String storeDefinitionFile = "test/common/voldemort/config/single-store.xml";
         StoreDefinition storeDefinition = new StoreDefinitionsMapper().readStoreList(new File(storeDefinitionFile))
                                                                       .get(0);
@@ -159,31 +150,31 @@ public static void main(String[] args) throws Throwable {
                                                                               clientConfig.getSocketBufferSize(),
                                                                               clientConfig.getSocketKeepAlive());
 
-        for(int i = 0; i < cluster.getNumberOfNodes(); i++) {
-            VoldemortConfig config = ServerTestUtils.createServerConfig(true,
-                                                                        i,
-                                                                        TestUtils.createTempDir()
-                                                                                 .getAbsolutePath(),
-                                                                        null,
-                                                                        storeDefinitionFile,
-                                                                        new Properties());
-
-            VoldemortServer server = ServerTestUtils.startVoldemortServer(socketStoreFactory,
-                                                                          config,
-                                                                          cluster);
-            serverMap.put(i, server);
+        VoldemortServer[] servers = new VoldemortServer[numNodes];
+        int[][] partitionMap = new int[numNodes][1];
+        for(int i = 0; i < numNodes; i++) {
+            partitionMap[i][0] = i;
+        }
+        Cluster cluster = ServerTestUtils.startVoldemortCluster(numNodes,
+                                                                servers,
+                                                                partitionMap,
+                                                                socketStoreFactory,
+                                                                true,
+                                                                null,
+                                                                storeDefinitionFile,
+                                                                new Properties());
 
+        Map<Integer, VoldemortServer> serverMap = new HashMap<Integer, VoldemortServer>();
+        for(int i = 0; i < cluster.getNumberOfNodes(); i++) {
+            serverMap.put(i, servers[i]);
             Store<ByteArray, byte[], byte[]> store = new InMemoryStorageEngine<ByteArray, byte[], byte[]>("test-sleepy");
-
             if(i < numSlowNodes)
                 store = new SleepyStore<ByteArray, byte[], byte[]>(delay, store);
-
-            StoreRepository storeRepository = server.getStoreRepository();
+            StoreRepository storeRepository = servers[i].getStoreRepository();
             storeRepository.addLocalStore(store);
         }
 
         Map<Integer, Store<ByteArray, byte[], byte[]>> stores = new HashMap<Integer, Store<ByteArray, byte[], byte[]>>();
-
         for(Node node: cluster.getNodes()) {
             Store<ByteArray, byte[], byte[]> socketStore = ServerTestUtils.getSocketStore(socketStoreFactory,
                                                                                           "test-sleepy",
@@ -193,7 +184,7 @@ public static void main(String[] args) throws Throwable {
         }
 
         FailureDetectorConfig failureDetectorConfig = new FailureDetectorConfig().setImplementationClassName(BannagePeriodFailureDetector.class.getName())
-                                                                                 .setNodes(cluster.getNodes())
+                                                                                 .setCluster(cluster)
                                                                                  .setStoreVerifier(MutableStoreVerifier.create(stores));
         FailureDetector failureDetector = FailureDetectorUtils.create(failureDetectorConfig, false);
 
@@ -201,7 +192,7 @@ public static void main(String[] args) throws Throwable {
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(routedStoreType.trim()
                                                                                       .equalsIgnoreCase(PIPELINE_ROUTED_STORE),
                                                                        routedStoreThreadPool,
-                                                                       clientConfig.getRoutingTimeout(TimeUnit.MILLISECONDS));
+                                                                       clientConfig.getTimeoutConfig());
 
         final RoutedStore routedStore = routedStoreFactory.create(cluster,
                                                                   storeDefinition,
@@ -223,7 +214,7 @@ public void run() {
                             try {
                                 routedStore.get(key, null);
                             } catch(VoldemortException e) {
-                                // 
+                                //
                             }
                         }
                     }
diff --git a/test/integration/voldemort/performance/StorageEnginePerformanceTest.java b/test/integration/voldemort/performance/StorageEnginePerformanceTest.java
index 987f1d2fb1..320de8c44f 100644
--- a/test/integration/voldemort/performance/StorageEnginePerformanceTest.java
+++ b/test/integration/voldemort/performance/StorageEnginePerformanceTest.java
@@ -108,7 +108,7 @@ public static void main(String[] args) throws Exception {
             VoldemortConfig config = new VoldemortConfig(props);
             StorageConfiguration storageConfig = (StorageConfiguration) ReflectUtils.callConstructor(ReflectUtils.loadClass(storageEngineClass),
                                                                                                      new Object[] { config });
-            StorageEngine<ByteArray, byte[], byte[]> engine = storageConfig.getStore("test");
+            StorageEngine<ByteArray, byte[], byte[]> engine = storageConfig.getStore(TestUtils.makeStoreDefinition("test"));
             @SuppressWarnings("unchecked")
             final Store<String, byte[], byte[]> store = new SerializingStore(engine,
                                                                              new StringSerializer(),
diff --git a/test/integration/voldemort/performance/benchmark/Benchmark.java b/test/integration/voldemort/performance/benchmark/Benchmark.java
index 6f62737c7b..0ef5285e2c 100644
--- a/test/integration/voldemort/performance/benchmark/Benchmark.java
+++ b/test/integration/voldemort/performance/benchmark/Benchmark.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010 LinkedIn, Inc
+ * Copyright 2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -22,17 +22,20 @@
 import java.text.NumberFormat;
 import java.util.List;
 import java.util.Vector;
+import java.util.concurrent.TimeUnit;
 
 import joptsimple.OptionParser;
 import joptsimple.OptionSet;
 import voldemort.ServerTestUtils;
 import voldemort.StaticStoreClientFactory;
+import voldemort.TestUtils;
 import voldemort.VoldemortException;
 import voldemort.client.AbstractStoreClientFactory;
 import voldemort.client.ClientConfig;
 import voldemort.client.SocketStoreClientFactory;
 import voldemort.client.StoreClient;
 import voldemort.client.StoreClientFactory;
+import voldemort.client.protocol.RequestFormatType;
 import voldemort.serialization.IdentitySerializer;
 import voldemort.serialization.Serializer;
 import voldemort.serialization.SerializerDefinition;
@@ -58,12 +61,14 @@
 public class Benchmark {
 
     private static final int MAX_WORKERS = 8;
+    private static final int MAX_CONNECTIONS_PER_NODE = 50;
 
     /**
      * Constants for the benchmark file
      */
     public static final String PROP_FILE = "prop-file";
     public static final String THREADS = "threads";
+    public static final String NUM_CONNECTIONS_PER_NODE = "num-connections-per-node";
     public static final String ITERATIONS = "iterations";
     public static final String STORAGE_CONFIGURATION_CLASS = "storage-configuration-class";
     public static final String INTERVAL = "interval";
@@ -115,8 +120,8 @@ public class Benchmark {
     private StoreClient<Object, Object> storeClient;
     private StoreClientFactory factory;
 
-    private int numThreads, numIterations, targetThroughput, recordCount, opsCount,
-            statusIntervalSec;
+    private int numThreads, numConnectionsPerNode, numIterations, targetThroughput, recordCount,
+            opsCount, statusIntervalSec;
     private double perThreadThroughputPerMs;
     private Workload workLoad;
     private String pluginName;
@@ -219,6 +224,7 @@ public void run() {
                         e.printStackTrace();
                 }
                 opsDone++;
+
                 if(targetThroughputPerMs > 0) {
                     double timePerOp = ((double) opsDone) / targetThroughputPerMs;
                     while(System.currentTimeMillis() - startTime < timePerOp) {
@@ -314,6 +320,8 @@ public void initializeWorkload(Props workloadProps) throws Exception {
     public void initializeStore(Props benchmarkProps) throws Exception {
 
         this.numThreads = benchmarkProps.getInt(THREADS, MAX_WORKERS);
+        this.numConnectionsPerNode = benchmarkProps.getInt(NUM_CONNECTIONS_PER_NODE,
+                                                           MAX_CONNECTIONS_PER_NODE);
         this.numIterations = benchmarkProps.getInt(ITERATIONS, 1);
         this.statusIntervalSec = benchmarkProps.getInt(INTERVAL, 0);
         this.verbose = benchmarkProps.getBoolean(VERBOSE, false);
@@ -333,7 +341,14 @@ public void initializeStore(Props benchmarkProps) throws Exception {
 
             ClientConfig clientConfig = new ClientConfig().setMaxThreads(numThreads)
                                                           .setMaxTotalConnections(numThreads)
-                                                          .setMaxConnectionsPerNode(numThreads)
+                                                          .setMaxConnectionsPerNode(numConnectionsPerNode)
+                                                          .setRoutingTimeout(1500,
+                                                                             TimeUnit.MILLISECONDS)
+                                                          .setSocketTimeout(1500,
+                                                                            TimeUnit.MILLISECONDS)
+                                                          .setConnectionTimeout(500,
+                                                                                TimeUnit.MILLISECONDS)
+                                                          .setRequestFormatType(RequestFormatType.VOLDEMORT_V3)
                                                           .setBootstrapUrls(socketUrl);
 
             if(clientZoneId >= 0) {
@@ -357,7 +372,7 @@ public void initializeStore(Props benchmarkProps) throws Exception {
             StorageConfiguration conf = (StorageConfiguration) ReflectUtils.callConstructor(ReflectUtils.loadClass(storageEngineClass),
                                                                                             new Object[] { ServerTestUtils.getVoldemortConfig() });
 
-            StorageEngine<ByteArray, byte[], byte[]> engine = conf.getStore(DUMMY_DB);
+            StorageEngine<ByteArray, byte[], byte[]> engine = conf.getStore(TestUtils.makeStoreDefinition(DUMMY_DB));
             if(conf.getType().compareTo(ViewStorageConfiguration.TYPE_NAME) == 0) {
                 engine = new ViewStorageEngine(STORE_NAME,
                                                engine,
@@ -518,6 +533,12 @@ public static void main(String args[]) throws IOException {
               .withRequiredArg()
               .describedAs("num-threads")
               .ofType(Integer.class);
+        parser.accepts(NUM_CONNECTIONS_PER_NODE,
+                       "max number of connections to any node; Default = "
+                               + MAX_CONNECTIONS_PER_NODE)
+              .withRequiredArg()
+              .describedAs("num-connections-per-node")
+              .ofType(Integer.class);
         parser.accepts(ITERATIONS, "number of times to repeat benchmark phase; Default = 1")
               .withRequiredArg()
               .describedAs("num-iter")
@@ -657,6 +678,9 @@ public static void main(String args[]) throws IOException {
             mainProps.put(VALUE_SIZE, CmdUtils.valueOf(options, VALUE_SIZE, 1024));
             mainProps.put(ITERATIONS, CmdUtils.valueOf(options, ITERATIONS, 1));
             mainProps.put(THREADS, CmdUtils.valueOf(options, THREADS, MAX_WORKERS));
+            mainProps.put(NUM_CONNECTIONS_PER_NODE, CmdUtils.valueOf(options,
+                                                                     NUM_CONNECTIONS_PER_NODE,
+                                                                     MAX_CONNECTIONS_PER_NODE));
             mainProps.put(PERCENT_CACHED, CmdUtils.valueOf(options, PERCENT_CACHED, 0));
             mainProps.put(INTERVAL, CmdUtils.valueOf(options, INTERVAL, 0));
             mainProps.put(TARGET_THROUGHPUT, CmdUtils.valueOf(options, TARGET_THROUGHPUT, -1));
diff --git a/test/integration/voldemort/store/noop/NoopStorageConfiguration.java b/test/integration/voldemort/store/noop/NoopStorageConfiguration.java
index ebc2c38cdb..741009ad1d 100644
--- a/test/integration/voldemort/store/noop/NoopStorageConfiguration.java
+++ b/test/integration/voldemort/store/noop/NoopStorageConfiguration.java
@@ -16,9 +16,11 @@
 
 package voldemort.store.noop;
 
+import voldemort.VoldemortException;
 import voldemort.server.VoldemortConfig;
 import voldemort.store.StorageConfiguration;
 import voldemort.store.StorageEngine;
+import voldemort.store.StoreDefinition;
 import voldemort.utils.ByteArray;
 
 /**
@@ -49,8 +51,8 @@ public NoopStorageConfiguration(VoldemortConfig config) {
         reflect = config.getAllProps().getBoolean(REFLECT_PROPERTY, false);
     }
 
-    public StorageEngine<ByteArray, byte[], byte[]> getStore(String name) {
-        return new NoopStorageEngine(name, reflect);
+    public StorageEngine<ByteArray, byte[], byte[]> getStore(StoreDefinition storeDef) {
+        return new NoopStorageEngine(storeDef.getName(), reflect);
     }
 
     public String getType() {
@@ -58,4 +60,9 @@ public String getType() {
     }
 
     public void close() {}
+
+    public void update(StoreDefinition storeDef) {
+        throw new VoldemortException("Storage config updates not permitted for "
+                                     + this.getClass().getCanonicalName());
+    }
 }
diff --git a/test/integration/voldemort/store/pausable/PausableStorageConfiguration.java b/test/integration/voldemort/store/pausable/PausableStorageConfiguration.java
index a1079ed933..225f68b514 100644
--- a/test/integration/voldemort/store/pausable/PausableStorageConfiguration.java
+++ b/test/integration/voldemort/store/pausable/PausableStorageConfiguration.java
@@ -1,8 +1,10 @@
 package voldemort.store.pausable;
 
+import voldemort.VoldemortException;
 import voldemort.server.VoldemortConfig;
 import voldemort.store.StorageConfiguration;
 import voldemort.store.StorageEngine;
+import voldemort.store.StoreDefinition;
 import voldemort.store.memory.InMemoryStorageEngine;
 import voldemort.utils.ByteArray;
 
@@ -19,12 +21,16 @@ public PausableStorageConfiguration(@SuppressWarnings("unused") VoldemortConfig
 
     public void close() {}
 
-    public StorageEngine<ByteArray, byte[], byte[]> getStore(String name) {
-        return new PausableStorageEngine<ByteArray, byte[], byte[]>(new InMemoryStorageEngine<ByteArray, byte[], byte[]>(name));
+    public StorageEngine<ByteArray, byte[], byte[]> getStore(StoreDefinition storeDef) {
+        return new PausableStorageEngine<ByteArray, byte[], byte[]>(new InMemoryStorageEngine<ByteArray, byte[], byte[]>(storeDef.getName()));
     }
 
     public String getType() {
         return TYPE_NAME;
     }
 
+    public void update(StoreDefinition storeDef) {
+        throw new VoldemortException("Storage config updates not permitted for "
+                                     + this.getClass().getCanonicalName());
+    }
 }
diff --git a/test/integration/voldemort/store/slow/SlowStorageConfiguration.java b/test/integration/voldemort/store/slow/SlowStorageConfiguration.java
new file mode 100644
index 0000000000..062aa4e803
--- /dev/null
+++ b/test/integration/voldemort/store/slow/SlowStorageConfiguration.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package voldemort.store.slow;
+
+import voldemort.VoldemortException;
+import voldemort.common.OpTimeMap;
+import voldemort.server.VoldemortConfig;
+import voldemort.store.StorageConfiguration;
+import voldemort.store.StorageEngine;
+import voldemort.store.StoreDefinition;
+import voldemort.store.memory.InMemoryStorageEngine;
+import voldemort.utils.ByteArray;
+
+/**
+ * A storage engine that wraps InMemoryStorageEngine with delays.
+ * 
+ * 
+ */
+public class SlowStorageConfiguration implements StorageConfiguration {
+
+    public static final String TYPE_NAME = "slow";
+
+    private final VoldemortConfig voldemortConfig;
+
+    public SlowStorageConfiguration(VoldemortConfig config) {
+        this.voldemortConfig = config;
+    }
+
+    public StorageEngine<ByteArray, byte[], byte[]> getStore(StoreDefinition storeDef) {
+        if(voldemortConfig != null) {
+            return new SlowStorageEngine<ByteArray, byte[], byte[]>(new InMemoryStorageEngine<ByteArray, byte[], byte[]>(storeDef.getName()),
+                                                                    this.voldemortConfig.testingGetSlowQueueingDelays(),
+                                                                    this.voldemortConfig.testingGetSlowConcurrentDelays());
+        }
+        return new SlowStorageEngine<ByteArray, byte[], byte[]>(new InMemoryStorageEngine<ByteArray, byte[], byte[]>(storeDef.getName()),
+                                                                new OpTimeMap(0),
+                                                                new OpTimeMap(0));
+    }
+
+    public String getType() {
+        return TYPE_NAME;
+    }
+
+    public void close() {}
+
+    public void update(StoreDefinition storeDef) {
+        throw new VoldemortException("Storage config updates not permitted for "
+                                     + this.getClass().getCanonicalName());
+    }
+}
diff --git a/test/integration/voldemort/store/slow/SlowStorageEngine.java b/test/integration/voldemort/store/slow/SlowStorageEngine.java
new file mode 100644
index 0000000000..f9a47a94cb
--- /dev/null
+++ b/test/integration/voldemort/store/slow/SlowStorageEngine.java
@@ -0,0 +1,149 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.slow;
+
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import voldemort.VoldemortException;
+import voldemort.common.OpTimeMap;
+import voldemort.common.VoldemortOpCode;
+import voldemort.store.StorageEngine;
+import voldemort.store.StoreCapabilityType;
+import voldemort.utils.ClosableIterator;
+import voldemort.utils.Pair;
+import voldemort.versioning.Version;
+import voldemort.versioning.Versioned;
+
+/**
+ * A wrapped version of InMemoryStorageEngine that can add delay to each
+ * operation type. Useful for unit testing.
+ * 
+ * Any operation with queueingDelays of more than 0 ms blocks until it has
+ * slept. It will queue up behind other queuingDelays operations that need to
+ * sleep first.
+ * 
+ * Any operation with concurrentDelays of more than 0 ms sleeps for the
+ * specified amount of time. This delay does not block other operations. I.e.,
+ * multiple operations sleep for the specified concurrentDelays simultaneously.
+ * 
+ * Both queueingDelays and concurrentDelays may be specified for each operation.
+ * queueingDelays are done before concurrentDelays; time spent in queueingDelays
+ * does not affect concurrentDelays.
+ * 
+ */
+public class SlowStorageEngine<K, V, T> implements StorageEngine<K, V, T> {
+
+    private final StorageEngine<K, V, T> innerStorageEngine;
+    private final OpTimeMap queueingDelays;
+    private final OpTimeMap concurrentDelays;
+
+    public SlowStorageEngine(StorageEngine<K, V, T> innerStorageEngine) {
+        this(innerStorageEngine, new OpTimeMap(0), new OpTimeMap(0));
+    }
+
+    public SlowStorageEngine(StorageEngine<K, V, T> innerStorageEngine,
+                             OpTimeMap queueingDelays,
+                             OpTimeMap concurrentDelays) {
+        this.innerStorageEngine = innerStorageEngine;
+        this.queueingDelays = queueingDelays;
+        this.concurrentDelays = concurrentDelays;
+    }
+
+    private synchronized void queueingSleep(long ms) {
+        try {
+            TimeUnit.MILLISECONDS.sleep(ms);
+        } catch(InterruptedException e) {
+            e.printStackTrace();
+        }
+    }
+
+    private void concurrentSleep(long ms) {
+        try {
+            TimeUnit.MILLISECONDS.sleep(ms);
+        } catch(InterruptedException e) {
+            e.printStackTrace();
+        }
+    }
+
+    private void delayByOp(byte opCode) {
+        if(queueingDelays.getOpTime(opCode) > 0)
+            queueingSleep(queueingDelays.getOpTime(opCode));
+        if(concurrentDelays.getOpTime(opCode) > 0)
+            concurrentSleep(concurrentDelays.getOpTime(opCode));
+    }
+
+    public boolean delete(K key) {
+        return delete(key, null);
+    }
+
+    public boolean delete(K key, Version version) {
+        delayByOp(VoldemortOpCode.DELETE_OP_CODE);
+        return innerStorageEngine.delete(key, version);
+    }
+
+    public List<Version> getVersions(K key) {
+        delayByOp(VoldemortOpCode.GET_VERSION_OP_CODE);
+        return innerStorageEngine.getVersions(key);
+    }
+
+    public List<Versioned<V>> get(K key, T transform) throws VoldemortException {
+        delayByOp(VoldemortOpCode.GET_OP_CODE);
+        return innerStorageEngine.get(key, transform);
+    }
+
+    public Map<K, List<Versioned<V>>> getAll(Iterable<K> keys, Map<K, T> transforms)
+            throws VoldemortException {
+        delayByOp(VoldemortOpCode.GET_ALL_OP_CODE);
+        return innerStorageEngine.getAll(keys, transforms);
+    }
+
+    public void put(K key, Versioned<V> value, T transforms) throws VoldemortException {
+        delayByOp(VoldemortOpCode.PUT_OP_CODE);
+        innerStorageEngine.put(key, value, transforms);
+    }
+
+    public ClosableIterator<Pair<K, Versioned<V>>> entries() {
+        return innerStorageEngine.entries();
+    }
+
+    public ClosableIterator<K> keys() {
+        return innerStorageEngine.keys();
+    }
+
+    public void truncate() {
+        innerStorageEngine.truncate();
+    }
+
+    public boolean isPartitionAware() {
+        return innerStorageEngine.isPartitionAware();
+    }
+
+    public String getName() {
+        return innerStorageEngine.getName();
+    }
+
+    public void close() {
+        innerStorageEngine.close();
+    }
+
+    public Object getCapability(StoreCapabilityType capability) {
+        return innerStorageEngine.getCapability(capability);
+    }
+
+}
diff --git a/test/long/voldemort/client/rebalance/RebalanceLongTest.java b/test/long/voldemort/client/rebalance/RebalanceLongTest.java
new file mode 100644
index 0000000000..dba0de81ef
--- /dev/null
+++ b/test/long/voldemort/client/rebalance/RebalanceLongTest.java
@@ -0,0 +1,121 @@
+package voldemort.client.rebalance;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import voldemort.ServerTestUtils;
+import voldemort.TestUtils;
+import voldemort.VoldemortException;
+import voldemort.cluster.Cluster;
+import voldemort.server.VoldemortConfig;
+import voldemort.server.VoldemortServer;
+import voldemort.store.metadata.MetadataStore.VoldemortState;
+
+/**
+ * Start VoldemortServer locally using ServerTestUtils and run rebalancing
+ * tests.
+ * 
+ * 
+ */
+@RunWith(Parameterized.class)
+public class RebalanceLongTest extends AbstractRebalanceTest {
+
+    Map<Integer, VoldemortServer> serverMap;
+    private final boolean useNio;
+    private final boolean useDonorBased;
+    protected static int NUM_MANY_KEYS = 10100;
+
+    public RebalanceLongTest(boolean useNio, boolean useDonorBased) {
+        this.useNio = useNio;
+        this.useDonorBased = useDonorBased;
+        this.serverMap = new HashMap<Integer, VoldemortServer>();
+    }
+
+    @Parameters
+    public static Collection<Object[]> configs() {
+        return Arrays.asList(new Object[][] { { true, true }, { true, false }, { false, true },
+                { false, false } });
+    }
+
+    @Override
+    protected int getNumKeys() {
+        return NUM_MANY_KEYS;
+    }
+
+    @Override
+    protected VoldemortState getCurrentState(int nodeId) {
+        VoldemortServer server = serverMap.get(nodeId);
+        if(server == null) {
+            throw new VoldemortException("Node id " + nodeId + " does not exist");
+        } else {
+            return server.getMetadataStore().getServerState();
+        }
+    }
+
+    @Override
+    protected Cluster getCurrentCluster(int nodeId) {
+        VoldemortServer server = serverMap.get(nodeId);
+        if(server == null) {
+            throw new VoldemortException("Node id " + nodeId + " does not exist");
+        } else {
+            return server.getMetadataStore().getCluster();
+        }
+    }
+
+    @Override
+    protected Cluster startServers(Cluster cluster,
+                                   String storeXmlFile,
+                                   List<Integer> nodeToStart,
+                                   Map<String, String> configProps) throws IOException {
+        for(int node: nodeToStart) {
+            Properties properties = new Properties();
+            if(null != configProps) {
+                for(Entry<String, String> property: configProps.entrySet()) {
+                    properties.put(property.getKey(), property.getValue());
+                }
+            }
+
+            VoldemortConfig config = ServerTestUtils.createServerConfig(useNio,
+                                                                        node,
+                                                                        TestUtils.createTempDir()
+                                                                                 .getAbsolutePath(),
+                                                                        null,
+                                                                        storeXmlFile,
+                                                                        properties);
+
+            VoldemortServer server = ServerTestUtils.startVoldemortServer(socketStoreFactory,
+                                                                          config,
+                                                                          cluster);
+            serverMap.put(node, server);
+        }
+
+        return cluster;
+    }
+
+    @Override
+    protected void stopServer(List<Integer> nodesToStop) throws IOException {
+        for(int node: nodesToStop) {
+            try {
+                ServerTestUtils.stopVoldemortServer(serverMap.get(node));
+            } catch(VoldemortException e) {
+                // ignore these at stop time
+            }
+        }
+        serverMap = null;
+    }
+
+    @Override
+    protected boolean useDonorBased() {
+        return this.useDonorBased;
+    }
+}
diff --git a/test/unit/voldemort/client/AbstractStoreClientFactoryTest.java b/test/unit/voldemort/client/AbstractStoreClientFactoryTest.java
index 3ed171e520..0906a1b2e8 100644
--- a/test/unit/voldemort/client/AbstractStoreClientFactoryTest.java
+++ b/test/unit/voldemort/client/AbstractStoreClientFactoryTest.java
@@ -46,7 +46,7 @@ public abstract class AbstractStoreClientFactoryTest extends TestCase {
     @Override
     @Before
     public void setUp() throws Exception {
-        this.storeDefinitionXml = VoldemortTestConstants.getSingleStoreDefinitionsXml();
+        this.storeDefinitionXml = VoldemortTestConstants.getTwoStoreDefinitionsXml();
         this.cluster = ServerTestUtils.getLocalCluster(1);
         this.node = cluster.getNodes().iterator().next();
     }
diff --git a/test/unit/voldemort/client/AdminServiceBasicTest.java b/test/unit/voldemort/client/AdminServiceBasicTest.java
index 9623e89ecf..2dbddf1817 100644
--- a/test/unit/voldemort/client/AdminServiceBasicTest.java
+++ b/test/unit/voldemort/client/AdminServiceBasicTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2009 LinkedIn, Inc
+ * Copyright 2008-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -16,25 +16,33 @@
 
 package voldemort.client;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Properties;
 import java.util.Set;
-import java.util.Map.Entry;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 
-import junit.framework.TestCase;
-
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -46,6 +54,7 @@
 import voldemort.TestUtils;
 import voldemort.VoldemortException;
 import voldemort.client.protocol.admin.AdminClient;
+import voldemort.client.protocol.admin.AdminClientConfig;
 import voldemort.cluster.Cluster;
 import voldemort.cluster.Node;
 import voldemort.cluster.Zone;
@@ -54,6 +63,7 @@
 import voldemort.routing.RoutingStrategyType;
 import voldemort.serialization.SerializerDefinition;
 import voldemort.server.VoldemortServer;
+import voldemort.store.InvalidMetadataException;
 import voldemort.store.Store;
 import voldemort.store.StoreDefinition;
 import voldemort.store.StoreDefinitionBuilder;
@@ -67,6 +77,7 @@
 import voldemort.store.socket.SocketStoreFactory;
 import voldemort.store.socket.clientrequest.ClientRequestExecutorPool;
 import voldemort.utils.ByteArray;
+import voldemort.utils.ByteUtils;
 import voldemort.utils.Pair;
 import voldemort.utils.RebalanceUtils;
 import voldemort.utils.Utils;
@@ -81,7 +92,7 @@
 /**
  */
 @RunWith(Parameterized.class)
-public class AdminServiceBasicTest extends TestCase {
+public class AdminServiceBasicTest {
 
     private static int NUM_RUNS = 100;
     private static int TEST_STREAM_KEYS_SIZE = 10000;
@@ -108,34 +119,27 @@ public static Collection<Object[]> configs() {
         return Arrays.asList(new Object[][] { { true }, { false } });
     }
 
-    @Override
     @Before
     public void setUp() throws IOException {
-        cluster = ServerTestUtils.getLocalCluster(2, new int[][] { { 0, 1, 2, 3 }, { 4, 5, 6, 7 } });
+        int numServers = 2;
+        servers = new VoldemortServer[numServers];
+        int partitionMap[][] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 } };
+        Properties serverProperties = new Properties();
+        serverProperties.setProperty("client.max.connections.per.node", "20");
+        cluster = ServerTestUtils.startVoldemortCluster(numServers,
+                                                        servers,
+                                                        partitionMap,
+                                                        socketStoreFactory,
+                                                        useNio,
+                                                        null,
+                                                        storesXmlfile,
+                                                        serverProperties);
 
-        servers = new VoldemortServer[2];
         storeDefs = new StoreDefinitionsMapper().readStoreList(new File(storesXmlfile));
 
-        servers[0] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
-                                                          ServerTestUtils.createServerConfig(useNio,
-                                                                                             0,
-                                                                                             TestUtils.createTempDir()
-                                                                                                      .getAbsolutePath(),
-                                                                                             null,
-                                                                                             storesXmlfile,
-                                                                                             new Properties()),
-                                                          cluster);
-        servers[1] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
-                                                          ServerTestUtils.createServerConfig(useNio,
-                                                                                             1,
-                                                                                             TestUtils.createTempDir()
-                                                                                                      .getAbsolutePath(),
-                                                                                             null,
-                                                                                             storesXmlfile,
-                                                                                             new Properties()),
-                                                          cluster);
-
-        adminClient = ServerTestUtils.getAdminClient(cluster);
+        Properties adminProperties = new Properties();
+        adminProperties.setProperty("max_connections", "20");
+        adminClient = new AdminClient(cluster, new AdminClientConfig(adminProperties));
     }
 
     /**
@@ -148,9 +152,8 @@ private VoldemortServer getServer(int nodeId) {
         return servers[nodeId];
     }
 
-    @Override
     @After
-    public void tearDown() throws IOException, InterruptedException {
+    public void tearDown() throws IOException {
         adminClient.stop();
         for(VoldemortServer server: servers) {
             ServerTestUtils.stopVoldemortServer(server);
@@ -601,6 +604,141 @@ public void testReplicationMapping() {
         }
     }
 
+    @Test
+    public void testReplicationMappingWithZonePreference() {
+        List<Node> nodes = Lists.newArrayList();
+        nodes.add(new Node(0, "localhost", 1, 2, 3, 0, Lists.newArrayList(0, 4, 8)));
+        nodes.add(new Node(1, "localhost", 1, 2, 3, 0, Lists.newArrayList(1, 5, 9)));
+        nodes.add(new Node(2, "localhost", 1, 2, 3, 1, Lists.newArrayList(2, 6, 10)));
+        nodes.add(new Node(3, "localhost", 1, 2, 3, 1, Lists.newArrayList(3, 7, 11)));
+
+        // Test 0 - With rep-factor 1; zone 1
+        StoreDefinition storeDef = ServerTestUtils.getStoreDef("consistent",
+                                                               1,
+                                                               1,
+                                                               1,
+                                                               1,
+                                                               1,
+                                                               RoutingStrategyType.CONSISTENT_STRATEGY);
+        Cluster newCluster = new Cluster("single_zone_cluster", nodes);
+
+        try {
+            adminClient.getReplicationMapping(0, newCluster, storeDef, 1);
+            fail("Should have thrown an exception since rep-factor = 1");
+        } catch(VoldemortException e) {}
+
+        // With rep-factor 1; zone 0
+        storeDef = ServerTestUtils.getStoreDef("consistent",
+                                               1,
+                                               1,
+                                               1,
+                                               1,
+                                               1,
+                                               RoutingStrategyType.CONSISTENT_STRATEGY);
+        newCluster = new Cluster("single_zone_cluster", nodes);
+
+        try {
+            adminClient.getReplicationMapping(0, newCluster, storeDef, 0);
+            fail("Should have thrown an exception since rep-factor = 1");
+        } catch(VoldemortException e) {}
+
+        // Test 1 - With consistent routing strategy
+        storeDef = ServerTestUtils.getStoreDef("consistent",
+                                               4,
+                                               1,
+                                               1,
+                                               1,
+                                               1,
+                                               RoutingStrategyType.CONSISTENT_STRATEGY);
+
+        // On node 0; zone id 1
+        Map<Integer, HashMap<Integer, List<Integer>>> replicationMapping = adminClient.getReplicationMapping(0,
+                                                                                                             newCluster,
+                                                                                                             storeDef,
+                                                                                                             1);
+        {
+            HashMap<Integer, HashMap<Integer, List<Integer>>> expectedMapping = Maps.newHashMap();
+            HashMap<Integer, List<Integer>> partitionTuple = Maps.newHashMap();
+            partitionTuple.put(0, Lists.newArrayList(2, 6, 10));
+            partitionTuple.put(1, Lists.newArrayList(1, 5, 9));
+            partitionTuple.put(2, Lists.newArrayList(0, 4, 8));
+            expectedMapping.put(2, partitionTuple);
+            HashMap<Integer, List<Integer>> partitionTuple2 = Maps.newHashMap();
+            partitionTuple2.put(0, Lists.newArrayList(3, 7, 11));
+            expectedMapping.put(3, partitionTuple2);
+            // {2={0=[2, 6, 10], 1=[1, 5, 9], 2=[0, 4, 8]}, 3={0=[3, 7, 11]}}
+            assertEquals(replicationMapping, expectedMapping);
+        }
+
+        // On node 0; zone id 0
+        replicationMapping = adminClient.getReplicationMapping(0, newCluster, storeDef, 0);
+        {
+            HashMap<Integer, HashMap<Integer, List<Integer>>> expectedMapping = Maps.newHashMap();
+            HashMap<Integer, List<Integer>> partitionTuple = Maps.newHashMap();
+            partitionTuple.clear();
+            partitionTuple.put(0, Lists.newArrayList(1, 5, 9));
+            partitionTuple.put(1, Lists.newArrayList(0, 4, 8));
+            partitionTuple.put(2, Lists.newArrayList(3, 7, 11));
+            partitionTuple.put(3, Lists.newArrayList(2, 6, 10));
+            expectedMapping.put(1, partitionTuple);
+            // {1={0=[1, 5, 9], 1=[0, 4, 8]}, 2=[3, 7, 11], 3=[2, 6, 10]}
+            assertEquals(replicationMapping, expectedMapping);
+        }
+
+        // Test 2 - With zone routing strategy, and zone replication factor 1
+        List<Zone> zones = ServerTestUtils.getZones(2);
+        HashMap<Integer, Integer> zoneReplicationFactors = Maps.newHashMap();
+        for(int zoneIds = 0; zoneIds < 2; zoneIds++) {
+            zoneReplicationFactors.put(zoneIds, 1);
+        }
+        storeDef = ServerTestUtils.getStoreDef("zone",
+                                               2,
+                                               1,
+                                               1,
+                                               1,
+                                               0,
+                                               0,
+                                               zoneReplicationFactors,
+                                               HintedHandoffStrategyType.PROXIMITY_STRATEGY,
+                                               RoutingStrategyType.ZONE_STRATEGY);
+        newCluster = new Cluster("multi_zone_cluster", nodes, zones);
+
+        {
+            // On node 0, zone 0 - failure case since zoneReplicationFactor is 1
+
+            try {
+                replicationMapping = adminClient.getReplicationMapping(0, newCluster, storeDef, 0);
+                fail("Should have thrown an exception since  zoneReplicationFactor is 1");
+            } catch(VoldemortException e) {}
+        }
+
+        {
+            // On node 0, zone 1
+            replicationMapping = adminClient.getReplicationMapping(0, newCluster, storeDef, 1);
+            HashMap<Integer, HashMap<Integer, List<Integer>>> expectedMapping = Maps.newHashMap();
+            HashMap<Integer, List<Integer>> partitionTuple = Maps.newHashMap();
+            partitionTuple.put(0, Lists.newArrayList(2, 6, 10));
+            partitionTuple.put(1, Lists.newArrayList(0, 4, 8));
+            expectedMapping.put(2, partitionTuple);
+            HashMap<Integer, List<Integer>> partitionTuple2 = Maps.newHashMap();
+            partitionTuple2.put(0, Lists.newArrayList(3, 7, 11));
+            expectedMapping.put(3, partitionTuple2);
+            // {2={0=[2, 6, 10], 1=[0, 4, 8]}, 3={0=[3, 7, 11]}}}
+            assertEquals(replicationMapping, expectedMapping);
+        }
+
+        {
+            // On node 1, zone 1
+            replicationMapping = adminClient.getReplicationMapping(1, newCluster, storeDef, 1);
+            HashMap<Integer, HashMap<Integer, List<Integer>>> expectedMapping = Maps.newHashMap();
+            HashMap<Integer, List<Integer>> partitionTuple = Maps.newHashMap();
+            partitionTuple.put(1, Lists.newArrayList(1, 5, 9));
+            expectedMapping.put(2, partitionTuple);
+            // {2={1=[1, 5, 9]}}
+            assertEquals(replicationMapping, expectedMapping);
+        }
+    }
+
     @Test
     public void testDeleteStore() throws Exception {
         AdminClient adminClient = getAdminClient();
@@ -721,8 +859,9 @@ public void testDeletePartitionEntries() {
         store = getStore(0, testStoreName);
         for(Entry<ByteArray, byte[]> entry: entrySet.entrySet()) {
             if(isKeyPartition(entry.getKey(), 0, testStoreName, deletePartitionsList)) {
-                assertEquals("deleted partitions should be missing.", 0, store.get(entry.getKey(),
-                                                                                   null).size());
+                assertEquals("deleted partitions should be missing.",
+                             0,
+                             store.get(entry.getKey(), null).size());
             }
         }
     }
@@ -982,6 +1121,7 @@ private void generateAndFetchFiles(int numChunks, long versionId, long indexSize
 
     @Test
     public void testGetROStorageFormat() {
+
         Map<String, String> storesToStorageFormat = getAdminClient().getROStorageFormat(0,
                                                                                         Lists.newArrayList("test-readonly-fetchfiles",
                                                                                                            "test-readonly-versions"));
@@ -1119,6 +1259,245 @@ public void testFetch() {
 
     }
 
+    @Test
+    public void testQuery() {
+        HashMap<ByteArray, byte[]> belongToAndInsideServer0 = new HashMap<ByteArray, byte[]>();
+        HashMap<ByteArray, byte[]> belongToAndInsideServer1 = new HashMap<ByteArray, byte[]>();
+        HashMap<ByteArray, byte[]> notBelongServer0ButInsideServer0 = new HashMap<ByteArray, byte[]>();
+        HashMap<ByteArray, byte[]> belongToServer0ButOutsideBoth = new HashMap<ByteArray, byte[]>();
+        HashMap<ByteArray, byte[]> notBelongToServer0AndOutsideBoth = new HashMap<ByteArray, byte[]>();
+
+        Store<ByteArray, byte[], byte[]> store0 = getStore(0, testStoreName);
+        Store<ByteArray, byte[], byte[]> store1 = getStore(1, testStoreName);
+
+        HashMap<ByteArray, byte[]> entrySet = null;
+        Iterator<ByteArray> keys = null;
+        RoutingStrategy strategy = servers[0].getMetadataStore().getRoutingStrategy(testStoreName);
+        while(true) {
+            ByteArray key;
+            byte[] value;
+            if(keys == null || !keys.hasNext()) {
+                entrySet = ServerTestUtils.createRandomKeyValuePairs(100);
+                keys = entrySet.keySet().iterator();
+            }
+            key = keys.next();
+            value = entrySet.get(key);
+            List<Node> routedNodes = strategy.routeRequest(key.get());
+            boolean keyShouldBeInNode0 = false;
+            boolean keyShouldBeInNode1 = false;
+            for(Node node: routedNodes) {
+                keyShouldBeInNode0 = keyShouldBeInNode0 || (node.getId() == 0);
+                keyShouldBeInNode1 = keyShouldBeInNode1 || (node.getId() == 1);
+            }
+
+            if(belongToAndInsideServer0.size() < 10) {
+                if(keyShouldBeInNode0) {
+                    belongToAndInsideServer0.put(key, value);
+                    store0.put(key, new Versioned<byte[]>(value), null);
+                }
+            } else if(belongToAndInsideServer1.size() < 10) {
+                if(keyShouldBeInNode1) {
+                    belongToAndInsideServer1.put(key, value);
+                    store1.put(key, new Versioned<byte[]>(value), null);
+                }
+            } else if(notBelongServer0ButInsideServer0.size() < 5) {
+                if(!keyShouldBeInNode0) {
+                    notBelongServer0ButInsideServer0.put(key, value);
+                    store0.put(key, new Versioned<byte[]>(value), null);
+                }
+            } else if(belongToServer0ButOutsideBoth.size() < 5) {
+                if(keyShouldBeInNode0) {
+                    belongToServer0ButOutsideBoth.put(key, value);
+                }
+            } else if(notBelongToServer0AndOutsideBoth.size() < 5) {
+                if(!keyShouldBeInNode0) {
+                    notBelongToServer0AndOutsideBoth.put(key, value);
+                }
+            } else {
+                break;
+            }
+        }
+
+        ArrayList<ByteArray> belongToAndInsideServer0Keys = new ArrayList<ByteArray>(belongToAndInsideServer0.keySet());
+        ArrayList<ByteArray> belongToAndInsideServer1Keys = new ArrayList<ByteArray>(belongToAndInsideServer1.keySet());
+        ArrayList<ByteArray> notBelongServer0ButInsideServer0Keys = new ArrayList<ByteArray>(notBelongServer0ButInsideServer0.keySet());
+        ArrayList<ByteArray> belongToServer0ButOutsideBothKeys = new ArrayList<ByteArray>(belongToServer0ButOutsideBoth.keySet());
+        ArrayList<ByteArray> notBelongToServer0AndOutsideBothKeys = new ArrayList<ByteArray>(notBelongToServer0AndOutsideBoth.keySet());
+
+        List<ByteArray> queryKeys;
+        Iterator<Pair<ByteArray, Pair<List<Versioned<byte[]>>, Exception>>> results;
+        Pair<ByteArray, Pair<List<Versioned<byte[]>>, Exception>> entry;
+        // test one key on store 0
+        queryKeys = new ArrayList<ByteArray>();
+        queryKeys.add(belongToAndInsideServer0Keys.get(0));
+        results = getAdminClient().queryKeys(0, testStoreName, queryKeys.iterator());
+        assertTrue("Results should not be empty", results.hasNext());
+        entry = results.next();
+        assertEquals(queryKeys.get(0), entry.getFirst());
+        assertNull("There should not be exception in response", entry.getSecond().getSecond());
+        assertEquals("There should be only 1 value in versioned list", 1, entry.getSecond()
+                                                                               .getFirst()
+                                                                               .size());
+        assertEquals("Two byte[] should be equal",
+                     0,
+                     ByteUtils.compare(belongToAndInsideServer0.get(queryKeys.get(0)),
+                                       entry.getSecond().getFirst().get(0).getValue()));
+        assertFalse("There should be only one result", results.hasNext());
+
+        // test one key belongs to but not exists in server 0
+        queryKeys = new ArrayList<ByteArray>();
+        queryKeys.add(belongToServer0ButOutsideBothKeys.get(0));
+        results = getAdminClient().queryKeys(0, testStoreName, queryKeys.iterator());
+        assertTrue("Results should not be empty", results.hasNext());
+        entry = results.next();
+        assertFalse("There should not be more results", results.hasNext());
+        assertEquals("Not the right key", queryKeys.get(0), entry.getFirst());
+        assertNotNull("Response should be non-null", entry.getSecond());
+        assertEquals("Value should be empty list", 0, entry.getSecond().getFirst().size());
+        assertNull("There should not be exception", entry.getSecond().getSecond());
+
+        // test one key not exist and does not belong to server 0
+        queryKeys = new ArrayList<ByteArray>();
+        queryKeys.add(notBelongToServer0AndOutsideBothKeys.get(0));
+        results = getAdminClient().queryKeys(0, testStoreName, queryKeys.iterator());
+        assertTrue("Results should not be empty", results.hasNext());
+        entry = results.next();
+        assertFalse("There should not be more results", results.hasNext());
+        assertEquals("Not the right key", queryKeys.get(0), entry.getFirst());
+        assertNotNull("Response should be non-null", entry.getSecond());
+        assertNull("Value should be null", entry.getSecond().getFirst());
+        assertTrue("There should be InvalidMetadataException exception",
+                   entry.getSecond().getSecond() instanceof InvalidMetadataException);
+
+        // test one key that exists on server 0 but does not belong to server 0
+        queryKeys = new ArrayList<ByteArray>();
+        queryKeys.add(notBelongServer0ButInsideServer0Keys.get(0));
+        results = getAdminClient().queryKeys(0, testStoreName, queryKeys.iterator());
+        assertTrue("Results should not be empty", results.hasNext());
+        entry = results.next();
+        assertFalse("There should not be more results", results.hasNext());
+        assertEquals("Not the right key", queryKeys.get(0), entry.getFirst());
+        assertNotNull("Response should be non-null", entry.getSecond());
+        assertNull("Value should be null", entry.getSecond().getFirst());
+        assertTrue("There should be InvalidMetadataException exception",
+                   entry.getSecond().getSecond() instanceof InvalidMetadataException);
+
+        // test one key deleted
+        store0.delete(belongToAndInsideServer0Keys.get(4), null);
+        queryKeys = new ArrayList<ByteArray>();
+        queryKeys.add(belongToAndInsideServer0Keys.get(4));
+        results = getAdminClient().queryKeys(0, testStoreName, queryKeys.iterator());
+        assertTrue("Results should not be empty", results.hasNext());
+        entry = results.next();
+        assertFalse("There should not be more results", results.hasNext());
+        assertEquals("Not the right key", queryKeys.get(0), entry.getFirst());
+        assertNotNull("Response should be non-null", entry.getSecond());
+        assertEquals("Value should be empty list", 0, entry.getSecond().getFirst().size());
+        assertNull("There should not be exception", entry.getSecond().getSecond());
+
+        // test empty request
+        queryKeys = new ArrayList<ByteArray>();
+        results = getAdminClient().queryKeys(0, testStoreName, queryKeys.iterator());
+        assertFalse("Results should be empty", results.hasNext());
+
+        // test null key
+        queryKeys = new ArrayList<ByteArray>();
+        queryKeys.add(null);
+        assertEquals(1, queryKeys.size());
+        results = getAdminClient().queryKeys(0, testStoreName, queryKeys.iterator());
+        assertTrue("Results should not be empty", results.hasNext());
+        entry = results.next();
+        assertFalse("There should not be more results", results.hasNext());
+        assertNotNull("Response should be non-null", entry.getSecond());
+        assertNull("Value should be null", entry.getSecond().getFirst());
+        assertTrue("There should be IllegalArgumentException exception",
+                   entry.getSecond().getSecond() instanceof IllegalArgumentException);
+
+        // test multiple keys (3) on store 1
+        queryKeys = new ArrayList<ByteArray>();
+        queryKeys.add(belongToAndInsideServer1Keys.get(0));
+        queryKeys.add(belongToAndInsideServer1Keys.get(1));
+        queryKeys.add(belongToAndInsideServer1Keys.get(2));
+        results = getAdminClient().queryKeys(1, testStoreName, queryKeys.iterator());
+        assertTrue("Results should not be empty", results.hasNext());
+        Map<ByteArray, List<Versioned<byte[]>>> entries = new HashMap<ByteArray, List<Versioned<byte[]>>>();
+        int resultCount = 0;
+        while(results.hasNext()) {
+            resultCount++;
+            entry = results.next();
+            assertNull("There should not be exception in response", entry.getSecond().getSecond());
+            assertNotNull("Value should not be null for Key: ", entry.getSecond().getFirst());
+            entries.put(entry.getFirst(), entry.getSecond().getFirst());
+        }
+        assertEquals("There should 3 and only 3 results", 3, resultCount);
+        for(ByteArray key: queryKeys) {
+            // this loop and the count ensure one-to-one mapping
+            assertNotNull("This key should exist in the results: " + key, entries.get(key));
+            assertEquals("Two byte[] should be equal for key: " + key,
+                         0,
+                         ByteUtils.compare(belongToAndInsideServer1.get(key),
+                                           entries.get(key).get(0).getValue()));
+        }
+
+        // test multiple keys, mixed situation
+        // key 0: Exists and belongs to
+        // key 1: Exists but does not belong to
+        // key 2: Does not exist but belongs to
+        // key 3: Does not belong and not exist
+        // key 4: Same situation with key0
+        // key 5: Deleted
+        // key 6: Same situation with key2
+        store0.delete(belongToAndInsideServer0Keys.get(5), null);
+        queryKeys = new ArrayList<ByteArray>();
+        queryKeys.add(belongToAndInsideServer0Keys.get(2));
+        queryKeys.add(notBelongServer0ButInsideServer0Keys.get(1));
+        queryKeys.add(belongToServer0ButOutsideBothKeys.get(1));
+        queryKeys.add(notBelongToServer0AndOutsideBothKeys.get(1));
+        queryKeys.add(belongToAndInsideServer0Keys.get(3));
+        queryKeys.add(belongToAndInsideServer0Keys.get(5));
+        queryKeys.add(notBelongServer0ButInsideServer0Keys.get(2));
+        results = getAdminClient().queryKeys(0, testStoreName, queryKeys.iterator());
+        // key 0
+        entry = results.next();
+        assertEquals(0, ByteUtils.compare(queryKeys.get(0).get(), entry.getFirst().get()));
+        assertEquals(0, ByteUtils.compare(belongToAndInsideServer0.get(queryKeys.get(0)),
+                                          entry.getSecond().getFirst().get(0).getValue()));
+        assertNull(entry.getSecond().getSecond());
+        // key 1
+        entry = results.next();
+        assertEquals(0, ByteUtils.compare(queryKeys.get(1).get(), entry.getFirst().get()));
+        assertTrue("There should be InvalidMetadataException exception",
+                   entry.getSecond().getSecond() instanceof InvalidMetadataException);
+        // key 2
+        entry = results.next();
+        assertEquals(0, ByteUtils.compare(queryKeys.get(2).get(), entry.getFirst().get()));
+        assertEquals(0, entry.getSecond().getFirst().size());
+        assertNull(entry.getSecond().getSecond());
+        // key 3
+        entry = results.next();
+        assertEquals(0, ByteUtils.compare(queryKeys.get(3).get(), entry.getFirst().get()));
+        assertTrue("There should be InvalidMetadataException exception",
+                   entry.getSecond().getSecond() instanceof InvalidMetadataException);
+        // key 4
+        entry = results.next();
+        assertEquals(0, ByteUtils.compare(queryKeys.get(4).get(), entry.getFirst().get()));
+        assertEquals(0, ByteUtils.compare(belongToAndInsideServer0.get(queryKeys.get(4)),
+                                          entry.getSecond().getFirst().get(0).getValue()));
+        assertNull(entry.getSecond().getSecond());
+        // key 5
+        entry = results.next();
+        assertEquals(0, ByteUtils.compare(queryKeys.get(5).get(), entry.getFirst().get()));
+        assertEquals(0, entry.getSecond().getFirst().size());
+        assertNull(entry.getSecond().getSecond());
+        // key 6
+        entry = results.next();
+        assertEquals(0, ByteUtils.compare(queryKeys.get(6).get(), entry.getFirst().get()));
+        assertTrue("There should be InvalidMetadataException exception",
+                   entry.getSecond().getSecond() instanceof InvalidMetadataException);
+        // no more keys
+        assertFalse(results.hasNext());
+    }
+
     @Test
     public void testUpdate() {
         final HashMap<ByteArray, byte[]> entrySet = ServerTestUtils.createRandomKeyValuePairs(TEST_STREAM_KEYS_SIZE);
@@ -1174,8 +1553,9 @@ public void testUpdateSlops() {
             Store<ByteArray, byte[], byte[]> store = getStore(0, nextSlop.getStoreName());
 
             if(nextSlop.getOperation().equals(Slop.Operation.PUT)) {
-                assertNotSame("entry should be present at store", 0, store.get(nextSlop.getKey(),
-                                                                               null).size());
+                assertNotSame("entry should be present at store",
+                              0,
+                              store.get(nextSlop.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(nextSlop.getValue()),
                              new String(store.get(nextSlop.getKey(), null).get(0).getValue()));
diff --git a/test/unit/voldemort/client/ClientJmxTest.java b/test/unit/voldemort/client/ClientJmxTest.java
new file mode 100644
index 0000000000..57f65d22c5
--- /dev/null
+++ b/test/unit/voldemort/client/ClientJmxTest.java
@@ -0,0 +1,314 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.client;
+
+import java.lang.management.ManagementFactory;
+import java.net.URISyntaxException;
+
+import javax.management.InstanceNotFoundException;
+import javax.management.MBeanServer;
+import javax.management.ObjectName;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import voldemort.ServerTestUtils;
+import voldemort.serialization.SerializerFactory;
+import voldemort.server.AbstractSocketService;
+import voldemort.utils.JmxUtils;
+
+/**
+ * 
+ * @author lgao Note: this test suite was originally created for testing mbean
+ *         registration with client context. Because changing mbean names can be
+ *         difficult for customers who builds monitoring systems based on the
+ *         mbean names. We need to give some more thoughts on using client
+ *         context as part of mbean names. This test suit is just a place holder
+ *         for now.
+ */
+
+public class ClientJmxTest extends AbstractStoreClientFactoryTest {
+
+    private static String STATS_DOMAIN = "voldemort.store.stats";
+    private static String AGGREGATE_STATS_DOMAIN = "voldemort.store.stats.aggregate";
+    private static String CLIENT_DOMAIN = "voldemort.client";
+    private static String CLUSTER_FAILUREDETECTOR_DOMAIN = "voldemort.cluster.failuredetector";
+    private static String CLIENT_REQUEST_DOMAIN = "voldemort.store.socket.clientrequest";
+
+    private AbstractSocketService socketService;
+    private MBeanServer mbServer = null;
+
+    private static int factoryJmxId = 0;
+
+    public ClientJmxTest() {
+        super();
+    }
+
+    private static String getAndIncrementJmxId() {
+        int current = factoryJmxId;
+        factoryJmxId++;
+        return (0 == current ? "" : "" + current);
+    }
+
+    @Override
+    @Before
+    public void setUp() throws Exception {
+        super.setUp();
+        socketService = ServerTestUtils.getSocketService(true,
+                                                         getClusterXml(),
+                                                         getStoreDefXml(),
+                                                         getValidStoreName(),
+                                                         getLocalNode().getSocketPort());
+        socketService.start();
+        mbServer = ManagementFactory.getPlatformMBeanServer();
+    }
+
+    @Override
+    @After
+    public void tearDown() throws Exception {
+        mbServer = null;
+        super.tearDown();
+        socketService.stop();
+    }
+
+    @Override
+    protected StoreClientFactory getFactory(String... bootstrapUrls) {
+        return new SocketStoreClientFactory(new ClientConfig().setBootstrapUrls(bootstrapUrls)
+                                                              .setEnableLazy(false)
+                                                              .setEnableJmx(true)
+                                                              .enableDefaultClient(true));
+    }
+
+    protected StoreClientFactory getFactoryWithClientContext(String clientContext,
+                                                             String... bootstrapUrls) {
+        return new SocketStoreClientFactory(new ClientConfig().setBootstrapUrls(bootstrapUrls)
+                                                              .setEnableLazy(false)
+                                                              .setClientContextName(clientContext)
+                                                              .setEnableJmx(true)
+                                                              .enableDefaultClient(true));
+    }
+
+    @Test
+    public void testTwoClientContextOnJmx() throws Exception {
+        String clientContext1 = "clientA";
+        String clientContext2 = "clientB";
+        String jmxId1 = getAndIncrementJmxId();
+        String jmxId2 = getAndIncrementJmxId();
+
+        StoreClient<Object, Object> c1 = getFactoryWithClientContext(clientContext1,
+                                                                     getValidBootstrapUrl()).getStoreClient(getValidStoreName());
+        StoreClient<Object, Object> c2 = getFactoryWithClientContext(clientContext2,
+                                                                     getValidBootstrapUrl()).getStoreClient(getValidStoreName());
+
+        // checking for aggregate stats
+        ObjectName c1Name = JmxUtils.createObjectName(AGGREGATE_STATS_DOMAIN, "aggregate-perf"
+                                                                              + jmxId1);
+        ObjectName c2Name = JmxUtils.createObjectName(AGGREGATE_STATS_DOMAIN, "aggregate-perf"
+                                                                              + jmxId2);
+        checkForMbeanFound(c1Name);
+        checkForMbeanFound(c2Name);
+        mbServer.unregisterMBean(c1Name);
+        mbServer.unregisterMBean(c2Name);
+
+        // checking for per store stats
+        String c1type = "test" + jmxId1;
+        String c2type = "test" + jmxId2;
+        c1Name = JmxUtils.createObjectName(STATS_DOMAIN, c1type);
+
+        c2Name = JmxUtils.createObjectName(STATS_DOMAIN, c2type);
+
+        checkForMbeanFound(c1Name);
+        checkForMbeanFound(c2Name);
+        mbServer.unregisterMBean(c1Name);
+        mbServer.unregisterMBean(c2Name);
+    }
+
+    @Test
+    public void testSameContextOnJmx() throws Exception {
+        String clientContext = "clientContext";
+        String jmxId1 = getAndIncrementJmxId();
+        String jmxId2 = getAndIncrementJmxId();
+
+        StoreClient<Object, Object>[] clients = new StoreClient[2];
+        for(int i = 0; i < 2; i++) {
+            clients[i] = getFactoryWithClientContext(clientContext, getValidBootstrapUrl()).getStoreClient(getValidStoreName());
+        }
+
+        // checking for aggregate stats
+        ObjectName c1Name = JmxUtils.createObjectName(AGGREGATE_STATS_DOMAIN, "aggregate-perf"
+                                                                              + jmxId1);
+        ObjectName c2Name = JmxUtils.createObjectName(AGGREGATE_STATS_DOMAIN, "aggregate-perf"
+                                                                              + jmxId2);
+        checkForMbeanFound(c1Name);
+        checkForMbeanFound(c2Name);
+        mbServer.unregisterMBean(c1Name);
+        mbServer.unregisterMBean(c2Name);
+
+        // checking for per store stats
+        String c1type = "test" + jmxId1;
+        String c2type = "test" + jmxId2;
+        c1Name = JmxUtils.createObjectName(STATS_DOMAIN, c1type);
+        c2Name = JmxUtils.createObjectName(STATS_DOMAIN, c2type);
+        checkForMbeanFound(c1Name);
+        checkForMbeanFound(c2Name);
+        mbServer.unregisterMBean(c1Name);
+        mbServer.unregisterMBean(c2Name);
+    }
+
+    @Test
+    public void testTwoClientNoContextOnJmx() throws Exception {
+        String clientContextCompare = "";
+        String jmxId1 = getAndIncrementJmxId();
+        String jmxId2 = getAndIncrementJmxId();
+
+        StoreClient<Object, Object> c1 = getFactory(getValidBootstrapUrl()).getStoreClient(getValidStoreName());
+        StoreClient<Object, Object> c2 = getFactory(getValidBootstrapUrl()).getStoreClient(getValidStoreName());
+
+        // checking for aggregate stats
+        ObjectName c1Name = JmxUtils.createObjectName(AGGREGATE_STATS_DOMAIN, "aggregate-perf"
+                                                                              + jmxId1);
+        ObjectName c2Name = JmxUtils.createObjectName(AGGREGATE_STATS_DOMAIN, "aggregate-perf"
+                                                                              + jmxId2);
+        checkForMbeanFound(c1Name);
+        checkForMbeanFound(c2Name);
+        mbServer.unregisterMBean(c1Name);
+        mbServer.unregisterMBean(c2Name);
+
+        // checking for per store stats
+        String c1type = clientContextCompare + "test" + jmxId1;
+        String c2type = clientContextCompare + "test" + jmxId2;
+        c1Name = JmxUtils.createObjectName(STATS_DOMAIN, c1type);
+        c2Name = JmxUtils.createObjectName(STATS_DOMAIN, c2type);
+        checkForMbeanFound(c1Name);
+        checkForMbeanFound(c2Name);
+        mbServer.unregisterMBean(c1Name);
+        mbServer.unregisterMBean(c2Name);
+    }
+
+    @Test
+    public void testTwoClientNullContextOnJmx() throws Exception {
+        String clientContextCompare = "";
+        String jmxId1 = getAndIncrementJmxId();
+        String jmxId2 = getAndIncrementJmxId();
+
+        StoreClient<Object, Object> c1 = getFactoryWithClientContext(null, getValidBootstrapUrl()).getStoreClient(getValidStoreName());
+        StoreClient<Object, Object> c2 = getFactoryWithClientContext(null, getValidBootstrapUrl()).getStoreClient(getValidStoreName());
+
+        // checking for aggregate stats
+        ObjectName c1Name = JmxUtils.createObjectName(AGGREGATE_STATS_DOMAIN, "aggregate-perf"
+                                                                              + jmxId1);
+        ObjectName c2Name = JmxUtils.createObjectName(AGGREGATE_STATS_DOMAIN, "aggregate-perf"
+                                                                              + jmxId2);
+        checkForMbeanFound(c1Name);
+        checkForMbeanFound(c2Name);
+        mbServer.unregisterMBean(c1Name);
+        mbServer.unregisterMBean(c2Name);
+
+        // checking for per store stats
+        String c1type = clientContextCompare + "test" + jmxId1;
+        String c2type = clientContextCompare + "test" + jmxId2;
+        c1Name = JmxUtils.createObjectName(STATS_DOMAIN, c1type);
+        c2Name = JmxUtils.createObjectName(STATS_DOMAIN, c2type);
+        checkForMbeanFound(c1Name);
+        checkForMbeanFound(c2Name);
+        mbServer.unregisterMBean(c1Name);
+        mbServer.unregisterMBean(c2Name);
+    }
+
+    @Test
+    public void testSameContextAndFactory() throws Exception {
+        String clientContext = "clientContext";
+        String jmxId = getAndIncrementJmxId();
+        StoreClientFactory factory = getFactoryWithClientContext(clientContext,
+                                                                 getValidBootstrapUrl());
+
+        StoreClient<Object, Object>[] clients = new StoreClient[2];
+        for(int i = 0; i < 2; i++) {
+            clients[i] = factory.getStoreClient(getValidStoreName());
+        }
+
+        ObjectName cName = JmxUtils.createObjectName(AGGREGATE_STATS_DOMAIN, "aggregate-perf"
+                                                                             + jmxId);
+        checkForMbeanFound(cName);
+        mbServer.unregisterMBean(cName);
+
+        // checking for per store stats
+        String ctype = "test" + jmxId;
+        ObjectName c1Name = JmxUtils.createObjectName(STATS_DOMAIN, ctype);
+        ObjectName c2Name = JmxUtils.createObjectName(STATS_DOMAIN, ctype);
+        checkForMbeanFound(c1Name);
+        checkForMbeanFound(c2Name);
+        mbServer.unregisterMBean(c1Name);
+    }
+
+    @Test
+    public void testDifferentId() throws Exception {
+        String clientContext = "clientContext";
+        String jmxId = getAndIncrementJmxId();
+        StoreClientFactory factory = getFactoryWithClientContext(clientContext,
+                                                                 getValidBootstrapUrl());
+
+        StoreClient<Object, Object>[] clients = new StoreClient[2];
+        clients[0] = factory.getStoreClient(getValidStoreName());
+        clients[1] = factory.getStoreClient(getValidStoreName());
+
+        ObjectName cName = JmxUtils.createObjectName(AGGREGATE_STATS_DOMAIN, "aggregate-perf"
+                                                                             + jmxId);
+        checkForMbeanFound(cName);
+        mbServer.unregisterMBean(cName);
+
+        // checking for per store stats
+        String ctype = "test" + jmxId;
+        ObjectName c1Name = JmxUtils.createObjectName(STATS_DOMAIN, ctype);
+        ObjectName c2Name = JmxUtils.createObjectName(STATS_DOMAIN, ctype);
+        checkForMbeanFound(c1Name);
+        checkForMbeanFound(c2Name);
+        // assertTrue(!c1Name.equals(c2Name));
+        mbServer.unregisterMBean(c1Name);
+        // mbServer.unregisterMBean(c2Name);
+    }
+
+    private void checkForMbeanFound(ObjectName name) {
+        try {
+            mbServer.getMBeanInfo(name);
+        } catch(InstanceNotFoundException e) {
+            fail("MBean not found on the JMX Server: " + name.toString());
+        } catch(Exception e) {
+            fail("Test failed: " + e.getMessage());
+        }
+    }
+
+    @Override
+    protected StoreClientFactory getFactoryWithSerializer(SerializerFactory factory,
+                                                          String... bootstrapUrls) {
+        return new SocketStoreClientFactory(new ClientConfig().setBootstrapUrls(bootstrapUrls)
+                                                              .setEnableLazy(false)
+                                                              .setSerializerFactory(factory)
+                                                              .enableDefaultClient(true));
+    }
+
+    @Override
+    protected String getValidBootstrapUrl() throws URISyntaxException {
+        return getLocalNode().getSocketUrl().toString();
+    }
+
+    @Override
+    protected String getValidScheme() {
+        return SocketStoreClientFactory.URL_SCHEME;
+    }
+}
diff --git a/test/unit/voldemort/client/ClientRegistryTest.java b/test/unit/voldemort/client/ClientRegistryTest.java
new file mode 100644
index 0000000000..87cb8b8512
--- /dev/null
+++ b/test/unit/voldemort/client/ClientRegistryTest.java
@@ -0,0 +1,817 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.client;
+
+import java.io.ByteArrayInputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Properties;
+import java.util.concurrent.TimeUnit;
+
+import junit.framework.TestCase;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import voldemort.ServerTestUtils;
+import voldemort.TestUtils;
+import voldemort.client.protocol.admin.AdminClient;
+import voldemort.cluster.Cluster;
+import voldemort.serialization.DefaultSerializerFactory;
+import voldemort.serialization.Serializer;
+import voldemort.serialization.SerializerFactory;
+import voldemort.server.VoldemortServer;
+import voldemort.store.socket.SocketStoreFactory;
+import voldemort.store.socket.clientrequest.ClientRequestExecutorPool;
+import voldemort.store.system.SystemStoreConstants;
+import voldemort.utils.ByteArray;
+import voldemort.utils.Pair;
+import voldemort.versioning.Versioned;
+
+import com.google.common.collect.Lists;
+
+@SuppressWarnings({ "unchecked" })
+public class ClientRegistryTest extends TestCase {
+
+    public static final String SERVER_LOCAL_URL = "tcp://localhost:";
+    public static final String TEST_STORE_NAME = "test-store-eventual-1";
+    public static final String TEST_STORE_NAME2 = "test-store-eventual-2";
+    public static final String STORES_XML_FILE = "test/common/voldemort/config/stores.xml";
+    public static final String CLIENT_CONTEXT_NAME = "testClientRegistryHappyPath";
+    public static final String CLIENT_CONTEXT_NAME2 = "testClientRegistryUnhappyPath";
+    public static final int CLIENT_REGISTRY_REFRESH_INTERVAL = 1;
+    public static final int TOTAL_SERVERS = 2;
+
+    private SocketStoreFactory socketStoreFactory = new ClientRequestExecutorPool(TOTAL_SERVERS,
+                                                                                  10000,
+                                                                                  100000,
+                                                                                  32 * 1024);
+    private static VoldemortServer[] servers = null;
+    private static int[] serverPorts = null;
+    private Cluster cluster = ServerTestUtils.getLocalCluster(2, new int[][] { { 0, 1, 2, 3 },
+            { 4, 5, 6, 7 } });
+    private static AdminClient adminClient;
+
+    private SerializerFactory serializerFactory = new DefaultSerializerFactory();
+    private Serializer<Object> valueSerializer = (Serializer<Object>) serializerFactory.getSerializer(SystemStoreConstants.getSystemStoreDef(SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name())
+                                                                                                                          .getValueSerializer());
+    private long startTime;
+
+    @Before
+    public void setUp() throws Exception {
+
+        if(null == servers) {
+            servers = new VoldemortServer[TOTAL_SERVERS];
+            serverPorts = new int[TOTAL_SERVERS];
+
+            for(int i = 0; i < TOTAL_SERVERS; i++) {
+                servers[i] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
+                                                                  ServerTestUtils.createServerConfig(true,
+                                                                                                     i,
+                                                                                                     TestUtils.createTempDir()
+                                                                                                              .getAbsolutePath(),
+                                                                                                     null,
+                                                                                                     STORES_XML_FILE,
+                                                                                                     new Properties()),
+                                                                  cluster);
+                serverPorts[i] = servers[i].getIdentityNode().getSocketPort();
+            }
+            adminClient = ServerTestUtils.getAdminClient(cluster);
+        }
+
+        startTime = System.currentTimeMillis();
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        this.clearRegistryContent();
+    }
+
+    /*
+     * Tests that the client registry is populated correctly and that we can
+     * query using Admin Tool.
+     */
+    @Test
+    public void testHappyPath() {
+        List<Integer> emptyPartitionList = Lists.newArrayList();
+        ClientConfig clientConfig = new ClientConfig().setMaxThreads(4)
+                                                      .setMaxTotalConnections(4)
+                                                      .setMaxConnectionsPerNode(4)
+                                                      .setBootstrapUrls(SERVER_LOCAL_URL
+                                                                        + serverPorts[0])
+                                                      .setClientContextName(CLIENT_CONTEXT_NAME)
+                                                      .setClientRegistryUpdateIntervalInSecs(CLIENT_REGISTRY_REFRESH_INTERVAL)
+                                                      .setEnableLazy(false);
+        SocketStoreClientFactory socketFactory = new SocketStoreClientFactory(clientConfig);
+        StoreClient<String, String> client1 = socketFactory.getStoreClient(TEST_STORE_NAME);
+        client1.put("k", "v");
+        Iterator<Pair<ByteArray, Versioned<byte[]>>> it = adminClient.fetchEntries(0,
+                                                                                   SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                                                                   emptyPartitionList,
+                                                                                   null,
+                                                                                   false);
+        ArrayList<ClientInfo> infoList = getClientRegistryContent(it);
+        assertEquals(TEST_STORE_NAME, infoList.get(0).getStoreName());
+        assertEquals(CLIENT_CONTEXT_NAME, infoList.get(0).getContext());
+        assertEquals(0, infoList.get(0).getClientSequence());
+        assertTrue("Client registry bootstrap time incorrect",
+                   startTime <= infoList.get(0).getBootstrapTime());
+
+        assertNotNull("Client version is null", infoList.get(0).getReleaseVersion());
+        assertEquals(1, infoList.size());
+
+        it = adminClient.fetchEntries(1,
+                                      SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                      emptyPartitionList,
+                                      null,
+                                      false);
+        infoList = getClientRegistryContent(it);
+        assertEquals(TEST_STORE_NAME, infoList.get(0).getStoreName());
+        assertEquals(CLIENT_CONTEXT_NAME, infoList.get(0).getContext());
+        assertEquals(0, infoList.get(0).getClientSequence());
+        assertTrue("Client registry bootstrap time incorrect",
+                   startTime <= infoList.get(0).getBootstrapTime());
+        assertNotNull("Client version is null", infoList.get(0).getReleaseVersion());
+        assertEquals(1, infoList.size());
+
+        try {
+            Thread.sleep(CLIENT_REGISTRY_REFRESH_INTERVAL * 1000 * 5);
+        } catch(InterruptedException e) {}
+        // now the periodical update has gone through, it shall be higher than
+        // the bootstrap time
+        it = adminClient.fetchEntries(1,
+                                      SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                      emptyPartitionList,
+                                      null,
+                                      false);
+        infoList = getClientRegistryContent(it);
+        assertTrue("Client registry not updated.",
+                   infoList.get(0).getBootstrapTime() < infoList.get(0).getUpdateTime());
+
+        assertTrue("Client Config received from the Client registry system store is incorrect.",
+                   isConfigEqual(infoList.get(0).getClientConfig(), clientConfig));
+        socketFactory.close();
+    }
+
+    /*
+     * Test happy path for 2 clients created by the same factory, pointing to
+     * the same store
+     */
+    @Test
+    public void testTwoClients() {
+        List<Integer> emptyPartitionList = Lists.newArrayList();
+        ClientConfig clientConfig = new ClientConfig().setMaxThreads(4)
+                                                      .setMaxTotalConnections(4)
+                                                      .setMaxConnectionsPerNode(4)
+                                                      .setBootstrapUrls(SERVER_LOCAL_URL
+                                                                        + serverPorts[0])
+                                                      .setClientContextName(CLIENT_CONTEXT_NAME)
+                                                      .setClientRegistryUpdateIntervalInSecs(CLIENT_REGISTRY_REFRESH_INTERVAL)
+                                                      .setEnableLazy(false);
+        SocketStoreClientFactory socketFactory = new SocketStoreClientFactory(clientConfig);
+        StoreClient<String, String> client1 = socketFactory.getStoreClient(TEST_STORE_NAME);
+        StoreClient<String, String> client2 = socketFactory.getStoreClient(TEST_STORE_NAME);
+
+        client1.put("k1", "v1");
+        client2.put("k2", "v2");
+
+        Iterator<Pair<ByteArray, Versioned<byte[]>>> it = adminClient.fetchEntries(0,
+                                                                                   SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                                                                   emptyPartitionList,
+                                                                                   null,
+                                                                                   false);
+        ArrayList<ClientInfo> infoList = getClientRegistryContent(it);
+        assertEquals(TEST_STORE_NAME, infoList.get(0).getStoreName());
+        assertEquals(CLIENT_CONTEXT_NAME, infoList.get(0).getContext());
+        assertTrue("Client registry sequence number incorrect", 1 >= infoList.get(0)
+                                                                             .getClientSequence());
+        assertTrue("Client registry bootstrap time incorrect",
+                   startTime <= infoList.get(0).getBootstrapTime());
+        assertNotNull("Client version is null", infoList.get(0).getReleaseVersion());
+
+        assertEquals(TEST_STORE_NAME, infoList.get(1).getStoreName());
+        assertEquals(CLIENT_CONTEXT_NAME, infoList.get(1).getContext());
+        assertTrue("Client registry sequence number incorrect", 1 >= infoList.get(1)
+                                                                             .getClientSequence());
+        assertTrue("Client registry bootstrap time incorrect",
+                   startTime <= infoList.get(1).getBootstrapTime());
+        assertNotNull("Client version is null", infoList.get(1).getReleaseVersion());
+        assertEquals(infoList.size(), 2);
+
+        it = adminClient.fetchEntries(1,
+                                      SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                      emptyPartitionList,
+                                      null,
+                                      false);
+        infoList = getClientRegistryContent(it);
+        assertEquals(TEST_STORE_NAME, infoList.get(0).getStoreName());
+        assertEquals(CLIENT_CONTEXT_NAME, infoList.get(0).getContext());
+        assertTrue("Client registry sequence number incorrect", 1 >= infoList.get(0)
+                                                                             .getClientSequence());
+        assertTrue("Client registry bootstrap time incorrect",
+                   startTime <= infoList.get(0).getBootstrapTime());
+        assertNotNull("Client version is null", infoList.get(0).getReleaseVersion());
+
+        assertEquals(TEST_STORE_NAME, infoList.get(1).getStoreName());
+        assertEquals(CLIENT_CONTEXT_NAME, infoList.get(1).getContext());
+        assertTrue("Client registry sequence number incorrect", 1 >= infoList.get(1)
+                                                                             .getClientSequence());
+        assertTrue("Client registry bootstrap time incorrect",
+                   startTime <= infoList.get(1).getBootstrapTime());
+        assertNotNull("Client version is null", infoList.get(1).getReleaseVersion());
+
+        assertEquals(infoList.size(), 2);
+
+        try {
+            Thread.sleep(CLIENT_REGISTRY_REFRESH_INTERVAL * 1000 * 5);
+        } catch(InterruptedException e) {}
+        // now the periodical update has gone through, it shall be higher than
+        // the bootstrap time
+        it = adminClient.fetchEntries(1,
+                                      SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                      emptyPartitionList,
+                                      null,
+                                      false);
+        infoList = getClientRegistryContent(it);
+        assertTrue("Client registry not updated.",
+                   infoList.get(0).getBootstrapTime() < infoList.get(0).getUpdateTime());
+        assertTrue("Client registry not updated.",
+                   infoList.get(1).getBootstrapTime() < infoList.get(1).getUpdateTime());
+
+        socketFactory.close();
+    }
+
+    /*
+     * Tests client registry for 2 clients created using the same factory,
+     * pointing to different stores
+     */
+    @Test
+    public void testTwoStores() {
+        List<Integer> emptyPartitionList = Lists.newArrayList();
+        ClientConfig clientConfig = new ClientConfig().setMaxThreads(4)
+                                                      .setMaxTotalConnections(4)
+                                                      .setMaxConnectionsPerNode(4)
+                                                      .setBootstrapUrls(SERVER_LOCAL_URL
+                                                                        + serverPorts[0])
+                                                      .setClientContextName(CLIENT_CONTEXT_NAME)
+                                                      .setClientRegistryUpdateIntervalInSecs(CLIENT_REGISTRY_REFRESH_INTERVAL)
+                                                      .setEnableLazy(false);
+        SocketStoreClientFactory socketFactory = new SocketStoreClientFactory(clientConfig);
+        StoreClient<String, String> client1 = socketFactory.getStoreClient(TEST_STORE_NAME);
+        StoreClient<String, String> client2 = socketFactory.getStoreClient(TEST_STORE_NAME2);
+
+        client1.put("k1", "v1");
+        client2.put("k2", "v2");
+
+        Iterator<Pair<ByteArray, Versioned<byte[]>>> it = adminClient.fetchEntries(0,
+                                                                                   SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                                                                   emptyPartitionList,
+                                                                                   null,
+                                                                                   false);
+        ArrayList<ClientInfo> infoList = getClientRegistryContent(it);
+
+        assertEquals(CLIENT_CONTEXT_NAME, infoList.get(0).getContext());
+        assertTrue("Client registry bootstrap time incorrect",
+                   startTime <= infoList.get(0).getBootstrapTime());
+        assertNotNull("Client version is null", infoList.get(0).getReleaseVersion());
+
+        assertEquals(CLIENT_CONTEXT_NAME, infoList.get(1).getContext());
+        assertTrue("Client registry bootstrap time incorrect",
+                   startTime <= infoList.get(1).getBootstrapTime());
+        assertNotNull("Client version is null", infoList.get(1).getReleaseVersion());
+
+        if(infoList.get(0).getStoreName().equals(TEST_STORE_NAME)) {
+            assertEquals(0, infoList.get(0).getClientSequence());
+            assertEquals(TEST_STORE_NAME2, infoList.get(1).getStoreName());
+            assertEquals(1, infoList.get(1).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       infoList.get(1).getBootstrapTime() >= infoList.get(0).getBootstrapTime());
+        } else {
+            assertEquals(TEST_STORE_NAME2, infoList.get(0).getStoreName());
+            assertEquals(1, infoList.get(0).getClientSequence());
+            assertEquals(TEST_STORE_NAME, infoList.get(1).getStoreName());
+            assertEquals(0, infoList.get(1).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       infoList.get(0).getBootstrapTime() >= infoList.get(1).getBootstrapTime());
+        }
+
+        it = adminClient.fetchEntries(1,
+                                      SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                      emptyPartitionList,
+                                      null,
+                                      false);
+        infoList = getClientRegistryContent(it);
+
+        assertEquals(CLIENT_CONTEXT_NAME, infoList.get(0).getContext());
+        assertTrue("Client registry bootstrap time incorrect",
+                   startTime <= infoList.get(0).getBootstrapTime());
+        assertNotNull("Client version is null", infoList.get(0).getReleaseVersion());
+
+        assertEquals(CLIENT_CONTEXT_NAME, infoList.get(1).getContext());
+        assertTrue("Client registry bootstrap time incorrect",
+                   startTime <= infoList.get(1).getBootstrapTime());
+        assertNotNull("Client version is null", infoList.get(1).getReleaseVersion());
+
+        if(infoList.get(0).getStoreName().equals(TEST_STORE_NAME)) {
+            assertEquals(0, infoList.get(0).getClientSequence());
+            assertEquals(TEST_STORE_NAME2, infoList.get(1).getStoreName());
+            assertEquals(1, infoList.get(1).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       infoList.get(1).getBootstrapTime() >= infoList.get(0).getBootstrapTime());
+        } else {
+            assertEquals(TEST_STORE_NAME2, infoList.get(0).getStoreName());
+            assertEquals(1, infoList.get(0).getClientSequence());
+            assertEquals(TEST_STORE_NAME, infoList.get(1).getStoreName());
+            assertEquals(0, infoList.get(1).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       infoList.get(0).getBootstrapTime() >= infoList.get(1).getBootstrapTime());
+        }
+
+        try {
+            Thread.sleep(CLIENT_REGISTRY_REFRESH_INTERVAL * 1000 * 5);
+        } catch(InterruptedException e) {}
+        // now the periodical update has gone through, it shall be higher than
+        // the bootstrap time
+        it = adminClient.fetchEntries(1,
+                                      SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                      emptyPartitionList,
+                                      null,
+                                      false);
+        infoList = getClientRegistryContent(it);
+        assertTrue("Client registry not updated.",
+                   infoList.get(0).getBootstrapTime() < infoList.get(0).getUpdateTime());
+        assertTrue("Client registry not updated.",
+                   infoList.get(1).getBootstrapTime() < infoList.get(1).getUpdateTime());
+
+        socketFactory.close();
+    }
+
+    /*
+     * Tests client registry for 2 clients created using the different
+     * factories, pointing to different stores
+     */
+    @Test
+    public void testTwoFactories() {
+        List<Integer> emptyPartitionList = Lists.newArrayList();
+        ClientConfig clientConfig = new ClientConfig().setMaxThreads(4)
+                                                      .setMaxTotalConnections(4)
+                                                      .setMaxConnectionsPerNode(4)
+                                                      .setBootstrapUrls(SERVER_LOCAL_URL
+                                                                        + serverPorts[0])
+                                                      .setClientContextName(CLIENT_CONTEXT_NAME)
+                                                      .setClientRegistryUpdateIntervalInSecs(CLIENT_REGISTRY_REFRESH_INTERVAL)
+                                                      .setEnableLazy(false);
+        SocketStoreClientFactory socketFactory1 = new SocketStoreClientFactory(clientConfig);
+
+        ClientConfig clientConfig2 = new ClientConfig().setMaxThreads(4)
+                                                       .setMaxTotalConnections(4)
+                                                       .setMaxConnectionsPerNode(4)
+                                                       .setBootstrapUrls(SERVER_LOCAL_URL
+                                                                         + serverPorts[0])
+                                                       .setClientContextName(CLIENT_CONTEXT_NAME2)
+                                                       .setClientRegistryUpdateIntervalInSecs(CLIENT_REGISTRY_REFRESH_INTERVAL)
+                                                       .setEnableLazy(false);
+        SocketStoreClientFactory socketFactory2 = new SocketStoreClientFactory(clientConfig2);
+
+        StoreClient<String, String> client1 = socketFactory1.getStoreClient(TEST_STORE_NAME);
+        StoreClient<String, String> client2 = socketFactory2.getStoreClient(TEST_STORE_NAME2);
+
+        client1.put("k1", "v1");
+        client2.put("k2", "v2");
+
+        Iterator<Pair<ByteArray, Versioned<byte[]>>> it = adminClient.fetchEntries(0,
+                                                                                   SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                                                                   emptyPartitionList,
+                                                                                   null,
+                                                                                   false);
+        ArrayList<ClientInfo> infoList = getClientRegistryContent(it);
+
+        assertNotNull("Client version is null", infoList.get(0).getReleaseVersion());
+        assertNotNull("Client version is null", infoList.get(1).getReleaseVersion());
+
+        if(infoList.get(0).getStoreName().equals(TEST_STORE_NAME)) {
+            assertEquals(CLIENT_CONTEXT_NAME, infoList.get(0).getContext());
+            assertEquals(0, infoList.get(0).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       startTime <= infoList.get(0).getBootstrapTime());
+
+            assertEquals(TEST_STORE_NAME2, infoList.get(1).getStoreName());
+            assertEquals(CLIENT_CONTEXT_NAME2, infoList.get(1).getContext());
+            assertEquals(0, infoList.get(1).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       startTime <= infoList.get(1).getBootstrapTime());
+
+            assertTrue("Client registry bootstrap time incorrect",
+                       infoList.get(1).getBootstrapTime() >= infoList.get(0).getBootstrapTime());
+
+        } else {
+            assertEquals(TEST_STORE_NAME2, infoList.get(0).getStoreName());
+            assertEquals(CLIENT_CONTEXT_NAME2, infoList.get(0).getContext());
+            assertEquals(0, infoList.get(0).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       startTime <= infoList.get(0).getBootstrapTime());
+
+            assertEquals(TEST_STORE_NAME, infoList.get(1).getStoreName());
+            assertEquals(CLIENT_CONTEXT_NAME, infoList.get(1).getContext());
+            assertEquals(0, infoList.get(1).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       startTime <= infoList.get(1).getBootstrapTime());
+
+            assertTrue("Client registry bootstrap time incorrect",
+                       infoList.get(0).getBootstrapTime() >= infoList.get(1).getBootstrapTime());
+        }
+
+        it = adminClient.fetchEntries(1,
+                                      SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                      emptyPartitionList,
+                                      null,
+                                      false);
+        infoList = getClientRegistryContent(it);
+
+        assertNotNull("Client version is null", infoList.get(0).getReleaseVersion());
+        assertNotNull("Client version is null", infoList.get(1).getReleaseVersion());
+
+        if(infoList.get(0).getStoreName().equals(TEST_STORE_NAME)) {
+            assertEquals(CLIENT_CONTEXT_NAME, infoList.get(0).getContext());
+            assertEquals(0, infoList.get(0).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       startTime <= infoList.get(0).getBootstrapTime());
+
+            assertEquals(TEST_STORE_NAME2, infoList.get(1).getStoreName());
+            assertEquals(CLIENT_CONTEXT_NAME2, infoList.get(1).getContext());
+            assertEquals(0, infoList.get(1).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       startTime <= infoList.get(1).getBootstrapTime());
+
+            assertTrue("Client registry bootstrap time incorrect",
+                       infoList.get(1).getBootstrapTime() >= infoList.get(0).getBootstrapTime());
+
+        } else {
+            assertEquals(TEST_STORE_NAME2, infoList.get(0).getStoreName());
+            assertEquals(CLIENT_CONTEXT_NAME2, infoList.get(0).getContext());
+            assertEquals(0, infoList.get(0).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       startTime <= infoList.get(0).getBootstrapTime());
+
+            assertEquals(TEST_STORE_NAME, infoList.get(1).getStoreName());
+            assertEquals(CLIENT_CONTEXT_NAME, infoList.get(1).getContext());
+            assertEquals(0, infoList.get(1).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       startTime <= infoList.get(1).getBootstrapTime());
+
+            assertTrue("Client registry bootstrap time incorrect",
+                       infoList.get(0).getBootstrapTime() >= infoList.get(1).getBootstrapTime());
+        }
+
+        try {
+            Thread.sleep(CLIENT_REGISTRY_REFRESH_INTERVAL * 1000 * 5);
+        } catch(InterruptedException e) {}
+        // now the periodical update has gone through, it shall be higher than
+        // the bootstrap time
+        it = adminClient.fetchEntries(1,
+                                      SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                      emptyPartitionList,
+                                      null,
+                                      false);
+        infoList = getClientRegistryContent(it);
+        assertTrue("Client registry not updated.",
+                   infoList.get(0).getBootstrapTime() < infoList.get(0).getUpdateTime());
+        assertTrue("Client registry not updated.",
+                   infoList.get(1).getBootstrapTime() < infoList.get(1).getUpdateTime());
+
+        socketFactory1.close();
+        socketFactory2.close();
+    }
+
+    /*
+     * Tests client registry for in the presence of 1 server failure.
+     */
+    @Test
+    public void testOneServerFailure() {
+        // bring down one server before starting up the clients
+        servers[0].stop();
+
+        List<Integer> emptyPartitionList = Lists.newArrayList();
+        ClientConfig clientConfig = new ClientConfig().setMaxThreads(4)
+                                                      .setMaxTotalConnections(4)
+                                                      .setMaxConnectionsPerNode(4)
+                                                      .setBootstrapUrls(SERVER_LOCAL_URL
+                                                                        + serverPorts[1])
+                                                      .setClientContextName(CLIENT_CONTEXT_NAME)
+                                                      .setClientRegistryUpdateIntervalInSecs(CLIENT_REGISTRY_REFRESH_INTERVAL)
+                                                      .setEnableLazy(false);
+        SocketStoreClientFactory socketFactory1 = new SocketStoreClientFactory(clientConfig);
+
+        ClientConfig clientConfig2 = new ClientConfig().setMaxThreads(4)
+                                                       .setMaxTotalConnections(4)
+                                                       .setMaxConnectionsPerNode(4)
+                                                       .setBootstrapUrls(SERVER_LOCAL_URL
+                                                                         + serverPorts[1])
+                                                       .setClientContextName(CLIENT_CONTEXT_NAME2)
+                                                       .setClientRegistryUpdateIntervalInSecs(CLIENT_REGISTRY_REFRESH_INTERVAL)
+                                                       .setEnableLazy(false);
+        SocketStoreClientFactory socketFactory2 = new SocketStoreClientFactory(clientConfig2);
+
+        StoreClient<String, String> client1 = socketFactory1.getStoreClient(TEST_STORE_NAME);
+        StoreClient<String, String> client2 = socketFactory2.getStoreClient(TEST_STORE_NAME2);
+
+        client1.put("k1", "v1");
+        client2.put("k2", "v2");
+
+        Iterator<Pair<ByteArray, Versioned<byte[]>>> it = adminClient.fetchEntries(1,
+                                                                                   SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                                                                   emptyPartitionList,
+                                                                                   null,
+                                                                                   false);
+        ArrayList<ClientInfo> infoList = getClientRegistryContent(it);
+
+        assertNotNull("Client version is null", infoList.get(0).getReleaseVersion());
+        assertNotNull("Client version is null", infoList.get(1).getReleaseVersion());
+
+        if(infoList.get(0).getStoreName().equals(TEST_STORE_NAME)) {
+            assertEquals(CLIENT_CONTEXT_NAME, infoList.get(0).getContext());
+            assertEquals(0, infoList.get(0).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       startTime <= infoList.get(0).getBootstrapTime());
+
+            assertEquals(TEST_STORE_NAME2, infoList.get(1).getStoreName());
+            assertEquals(CLIENT_CONTEXT_NAME2, infoList.get(1).getContext());
+            assertEquals(0, infoList.get(1).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       startTime <= infoList.get(1).getBootstrapTime());
+
+            assertTrue("Client registry bootstrap time incorrect",
+                       infoList.get(1).getBootstrapTime() >= infoList.get(0).getBootstrapTime());
+
+        } else {
+            assertEquals(TEST_STORE_NAME2, infoList.get(0).getStoreName());
+            assertEquals(CLIENT_CONTEXT_NAME2, infoList.get(0).getContext());
+            assertEquals(0, infoList.get(0).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       startTime <= infoList.get(0).getBootstrapTime());
+
+            assertEquals(TEST_STORE_NAME, infoList.get(1).getStoreName());
+            assertEquals(CLIENT_CONTEXT_NAME, infoList.get(1).getContext());
+            assertEquals(0, infoList.get(1).getClientSequence());
+            assertTrue("Client registry bootstrap time incorrect",
+                       startTime <= infoList.get(1).getBootstrapTime());
+
+            assertTrue("Client registry bootstrap time incorrect",
+                       infoList.get(0).getBootstrapTime() >= infoList.get(1).getBootstrapTime());
+        }
+
+        try {
+            Thread.sleep(CLIENT_REGISTRY_REFRESH_INTERVAL * 1000 * 5);
+        } catch(InterruptedException e) {}
+        // now the periodical update has gone through, it shall be higher than
+        // the bootstrap time
+        it = adminClient.fetchEntries(1,
+                                      SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                      emptyPartitionList,
+                                      null,
+                                      false);
+        infoList = getClientRegistryContent(it);
+        assertTrue("Client registry not updated.",
+                   infoList.get(0).getBootstrapTime() < infoList.get(0).getUpdateTime());
+        assertTrue("Client registry not updated.",
+                   infoList.get(1).getBootstrapTime() < infoList.get(1).getUpdateTime());
+
+        socketFactory1.close();
+        socketFactory2.close();
+    }
+
+    /*
+     * Test repeated client-registry setup due to client bounce.
+     */
+    @Test
+    public void testRepeatRegistrationSameFactory() {
+
+        List<Integer> emptyPartitionList = Lists.newArrayList();
+        ClientConfig clientConfig = new ClientConfig().setMaxThreads(4)
+                                                      .setMaxTotalConnections(4)
+                                                      .setMaxConnectionsPerNode(4)
+                                                      .setBootstrapUrls(SERVER_LOCAL_URL
+                                                                        + serverPorts[1])
+                                                      .setClientContextName(CLIENT_CONTEXT_NAME)
+                                                      .setClientRegistryUpdateIntervalInSecs(CLIENT_REGISTRY_REFRESH_INTERVAL)
+                                                      .setEnableLazy(false);
+        SocketStoreClientFactory socketFactory1 = new SocketStoreClientFactory(clientConfig);
+
+        ClientConfig clientConfig2 = new ClientConfig().setMaxThreads(4)
+                                                       .setMaxTotalConnections(4)
+                                                       .setMaxConnectionsPerNode(4)
+                                                       .setBootstrapUrls(SERVER_LOCAL_URL
+                                                                         + serverPorts[1])
+                                                       .setClientContextName(CLIENT_CONTEXT_NAME2)
+                                                       .setClientRegistryUpdateIntervalInSecs(CLIENT_REGISTRY_REFRESH_INTERVAL)
+                                                       .setEnableLazy(false);
+        SocketStoreClientFactory socketFactory2 = new SocketStoreClientFactory(clientConfig2);
+
+        for(int i = 0; i < 3; i++) {
+
+            StoreClient<String, String> client1 = socketFactory1.getStoreClient(TEST_STORE_NAME);
+            StoreClient<String, String> client2 = socketFactory2.getStoreClient(TEST_STORE_NAME2);
+
+            client1.put("k1", "v1");
+            client2.put("k2", "v2");
+
+        }
+
+        Iterator<Pair<ByteArray, Versioned<byte[]>>> it = adminClient.fetchEntries(1,
+                                                                                   SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                                                                   emptyPartitionList,
+                                                                                   null,
+                                                                                   false);
+        ArrayList<ClientInfo> infoList = getClientRegistryContent(it);
+        assertEquals("Incrrect # of entries created in client registry", 6, infoList.size());
+
+        socketFactory1.close();
+        socketFactory2.close();
+    }
+
+    /*
+     * Test repeated client-registry setup due to client bounce and via a
+     * different factory.
+     */
+    @Test
+    public void testRepeatRegistrationDifferentFactories() {
+        long client1LastBootstrapTime = 0;
+        long client2LastBootstrapTime = 0;
+        for(int i = 0; i < 3; i++) {
+
+            List<Integer> emptyPartitionList = Lists.newArrayList();
+            ClientConfig clientConfig = new ClientConfig().setMaxThreads(4)
+                                                          .setMaxTotalConnections(4)
+                                                          .setMaxConnectionsPerNode(4)
+                                                          .setBootstrapUrls(SERVER_LOCAL_URL
+                                                                            + serverPorts[1])
+                                                          .setClientContextName(CLIENT_CONTEXT_NAME)
+                                                          .setClientRegistryUpdateIntervalInSecs(CLIENT_REGISTRY_REFRESH_INTERVAL)
+                                                          .setEnableLazy(false);
+            SocketStoreClientFactory socketFactory1 = new SocketStoreClientFactory(clientConfig);
+
+            ClientConfig clientConfig2 = new ClientConfig().setMaxThreads(4)
+                                                           .setMaxTotalConnections(4)
+                                                           .setMaxConnectionsPerNode(4)
+                                                           .setBootstrapUrls(SERVER_LOCAL_URL
+                                                                             + serverPorts[1])
+                                                           .setClientContextName(CLIENT_CONTEXT_NAME2)
+                                                           .setClientRegistryUpdateIntervalInSecs(CLIENT_REGISTRY_REFRESH_INTERVAL)
+                                                           .setEnableLazy(false);
+            SocketStoreClientFactory socketFactory2 = new SocketStoreClientFactory(clientConfig2);
+
+            StoreClient<String, String> client1 = socketFactory1.getStoreClient(TEST_STORE_NAME);
+            StoreClient<String, String> client2 = socketFactory2.getStoreClient(TEST_STORE_NAME2);
+
+            client1.put("k1", "v1");
+            client2.put("k2", "v2");
+
+            Iterator<Pair<ByteArray, Versioned<byte[]>>> it = adminClient.fetchEntries(1,
+                                                                                       SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                                                                       emptyPartitionList,
+                                                                                       null,
+                                                                                       false);
+            ArrayList<ClientInfo> infoList = getClientRegistryContent(it);
+
+            assertEquals("Incrrect # of entries created in client registry", 2, infoList.size());
+
+            assertNotNull("Client version is null", infoList.get(0).getReleaseVersion());
+            assertNotNull("Client version is null", infoList.get(1).getReleaseVersion());
+
+            if(infoList.get(0).getStoreName().equals(TEST_STORE_NAME)) {
+                assertEquals(CLIENT_CONTEXT_NAME, infoList.get(0).getContext());
+                assertEquals(0, infoList.get(0).getClientSequence());
+                assertTrue("Client registry bootstrap time incorrect",
+                           startTime <= infoList.get(0).getBootstrapTime());
+
+                assertEquals(TEST_STORE_NAME2, infoList.get(1).getStoreName());
+                assertEquals(CLIENT_CONTEXT_NAME2, infoList.get(1).getContext());
+                assertEquals(0, infoList.get(1).getClientSequence());
+                assertTrue("Client registry bootstrap time incorrect",
+                           startTime <= infoList.get(1).getBootstrapTime());
+
+                assertTrue("Client registry bootstrap time incorrect",
+                           infoList.get(1).getBootstrapTime() >= infoList.get(0).getBootstrapTime());
+
+            } else {
+                assertEquals(TEST_STORE_NAME2, infoList.get(0).getStoreName());
+                assertEquals(CLIENT_CONTEXT_NAME2, infoList.get(0).getContext());
+                assertEquals(0, infoList.get(0).getClientSequence());
+                assertTrue("Client registry bootstrap time incorrect",
+                           startTime <= infoList.get(0).getBootstrapTime());
+
+                assertEquals(TEST_STORE_NAME, infoList.get(1).getStoreName());
+                assertEquals(CLIENT_CONTEXT_NAME, infoList.get(1).getContext());
+                assertEquals(0, infoList.get(1).getClientSequence());
+                assertTrue("Client registry bootstrap time incorrect",
+                           startTime <= infoList.get(1).getBootstrapTime());
+
+                assertTrue("Client registry bootstrap time incorrect",
+                           infoList.get(0).getBootstrapTime() >= infoList.get(1).getBootstrapTime());
+            }
+
+            try {
+                Thread.sleep(CLIENT_REGISTRY_REFRESH_INTERVAL * 1000 * 5);
+            } catch(InterruptedException e) {}
+            // now the periodical update has gone through, it shall be higher
+            // than
+            // the bootstrap time
+            it = adminClient.fetchEntries(1,
+                                          SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                          emptyPartitionList,
+                                          null,
+                                          false);
+            infoList = getClientRegistryContent(it);
+
+            assertTrue("Client registry not updated.",
+                       infoList.get(0).getBootstrapTime() < infoList.get(0).getUpdateTime());
+            assertTrue("Client registry not updated.",
+                       infoList.get(1).getBootstrapTime() < infoList.get(1).getUpdateTime());
+
+            assertTrue("Bootstrap time does not increase client bounces",
+                       infoList.get(0).getBootstrapTime() > client1LastBootstrapTime);
+            assertTrue("Bootstrap time does not increase client bounces",
+                       infoList.get(1).getBootstrapTime() > client2LastBootstrapTime);
+
+            client1LastBootstrapTime = infoList.get(0).getBootstrapTime();
+            client2LastBootstrapTime = infoList.get(0).getBootstrapTime();
+
+            socketFactory1.close();
+            socketFactory2.close();
+        }
+    }
+
+    private ArrayList<ClientInfo> getClientRegistryContent(Iterator<Pair<ByteArray, Versioned<byte[]>>> it) {
+        ArrayList<ClientInfo> infoList = Lists.newArrayList();
+        while(it.hasNext()) {
+            String clientInfoString = (String) valueSerializer.toObject(it.next()
+                                                                          .getSecond()
+                                                                          .getValue());
+            Properties props = new Properties();
+            try {
+
+                props.load(new ByteArrayInputStream(clientInfoString.getBytes()));
+
+                ClientConfig clientConfig = new ClientConfig();
+                clientConfig.setMaxConnectionsPerNode(Integer.parseInt(props.getProperty("max_connections")))
+                            .setMaxTotalConnections(Integer.parseInt(props.getProperty("max_total_connections")))
+                            .setRoutingTimeout(Integer.parseInt(props.getProperty("routing_timeout_ms")),
+                                               TimeUnit.MILLISECONDS)
+                            .setConnectionTimeout(Integer.parseInt(props.getProperty("connection_timeout_ms")),
+                                                  TimeUnit.MILLISECONDS)
+                            .setSocketTimeout(Integer.parseInt(props.getProperty("socket_timeout_ms")),
+                                              TimeUnit.MILLISECONDS)
+                            .setClientZoneId(Integer.parseInt(props.getProperty("client_zone_id")))
+                            .setFailureDetectorImplementation(props.getProperty("failuredetector_implementation"));
+
+                ClientInfo cInfo = new ClientInfo(props.getProperty("storeName"),
+                                                  props.getProperty("context"),
+                                                  Integer.parseInt(props.getProperty("sequence")),
+                                                  Long.parseLong(props.getProperty("bootstrapTime")),
+                                                  props.getProperty("releaseVersion"),
+                                                  clientConfig);
+                cInfo.setUpdateTime(Long.parseLong(props.getProperty("updateTime")));
+                cInfo.setDeploymentPath(props.getProperty("deploymentPath"));
+                cInfo.setLocalHostName(props.getProperty("localHostName"));
+                infoList.add(cInfo);
+            } catch(Exception e) {
+                fail("Error in retrieving Client Info: " + e);
+            }
+        }
+        return infoList;
+    }
+
+    private void clearRegistryContent() {
+        for(int i = 0; i < TOTAL_SERVERS; i++) {
+            servers[i].getStoreRepository()
+                      .getStorageEngine(SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name())
+                      .truncate();
+        }
+    }
+
+    private boolean isConfigEqual(ClientConfig received, ClientConfig expected) {
+        return (received.getMaxConnectionsPerNode() == expected.getMaxConnectionsPerNode()
+                && received.getMaxTotalConnections() == expected.getMaxTotalConnections()
+                && received.getRoutingTimeout(TimeUnit.MILLISECONDS) == expected.getRoutingTimeout(TimeUnit.MILLISECONDS)
+                && received.getSocketTimeout(TimeUnit.MILLISECONDS) == expected.getSocketTimeout(TimeUnit.MILLISECONDS)
+                && received.getConnectionTimeout(TimeUnit.MILLISECONDS) == expected.getConnectionTimeout(TimeUnit.MILLISECONDS)
+                && received.getClientZoneId() == expected.getClientZoneId() && received.getFailureDetectorImplementation()
+                                                                                       .equals(expected.getFailureDetectorImplementation()));
+    }
+}
diff --git a/test/unit/voldemort/client/EndToEndRebootstrapTest.java b/test/unit/voldemort/client/EndToEndRebootstrapTest.java
new file mode 100644
index 0000000000..a2ee510627
--- /dev/null
+++ b/test/unit/voldemort/client/EndToEndRebootstrapTest.java
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.client;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.util.Properties;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import voldemort.ServerTestUtils;
+import voldemort.TestUtils;
+import voldemort.VoldemortAdminTool;
+import voldemort.VoldemortException;
+import voldemort.client.protocol.admin.AdminClient;
+import voldemort.client.protocol.admin.AdminClientConfig;
+import voldemort.cluster.Cluster;
+import voldemort.cluster.Node;
+import voldemort.common.service.SchedulerService;
+import voldemort.server.VoldemortServer;
+import voldemort.store.socket.SocketStoreFactory;
+import voldemort.store.socket.clientrequest.ClientRequestExecutorPool;
+import voldemort.store.system.SystemStoreConstants;
+import voldemort.utils.SystemTime;
+import voldemort.xml.ClusterMapper;
+
+/**
+ * Test class to verify that the Zenstore client rebootstraps when needed (on
+ * change of cluster.xml)
+ * 
+ * @author csoman
+ * 
+ */
+public class EndToEndRebootstrapTest {
+
+    private static final String STORE_NAME = "test-replication-persistent";
+    private static final String CLUSTER_KEY = "cluster.xml";
+    private static String storesXmlfile = "test/common/voldemort/config/stores.xml";
+    String[] bootStrapUrls = null;
+    private SocketStoreFactory socketStoreFactory = new ClientRequestExecutorPool(2,
+                                                                                  10000,
+                                                                                  100000,
+                                                                                  32 * 1024);
+
+    private VoldemortServer[] servers;
+    private ZenStoreClient<String, String> storeClient;
+    SystemStore<String, String> sysStoreVersion;
+    SystemStore<String, String> clientRegistryStore;
+    private Cluster cluster;
+    public static String socketUrl = "";
+    protected final int CLIENT_ZONE_ID = 0;
+
+    @Before
+    public void setUp() throws Exception {
+        cluster = ServerTestUtils.getLocalCluster(2, new int[][] { { 0, 1, 2, 3 }, { 4, 5, 6, 7 } });
+        servers = new VoldemortServer[2];
+        servers[0] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
+                                                          ServerTestUtils.createServerConfig(true,
+                                                                                             0,
+                                                                                             TestUtils.createTempDir()
+                                                                                                      .getAbsolutePath(),
+                                                                                             null,
+                                                                                             storesXmlfile,
+                                                                                             new Properties()),
+                                                          cluster);
+        servers[1] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
+                                                          ServerTestUtils.createServerConfig(true,
+                                                                                             1,
+                                                                                             TestUtils.createTempDir()
+                                                                                                      .getAbsolutePath(),
+                                                                                             null,
+                                                                                             storesXmlfile,
+                                                                                             new Properties()),
+                                                          cluster);
+
+        socketUrl = servers[0].getIdentityNode().getSocketUrl().toString();
+        bootStrapUrls = new String[1];
+        bootStrapUrls[0] = socketUrl;
+
+        Node node = cluster.getNodeById(0);
+        String bootstrapUrl = "tcp://" + node.getHost() + ":" + node.getSocketPort();
+        ClientConfig clientConfig = new ClientConfig();
+        clientConfig.setClientRegistryUpdateIntervalInSecs(5);
+        clientConfig.setAsyncMetadataRefreshInMs(5000);
+        clientConfig.setBootstrapUrls(bootstrapUrl);
+        SocketStoreClientFactory storeClientFactory = new SocketStoreClientFactory(clientConfig);
+
+        SchedulerService service = new SchedulerService(clientConfig.getAsyncJobThreadPoolSize(),
+                                                        SystemTime.INSTANCE,
+                                                        true);
+        storeClient = new ZenStoreClient<String, String>(STORE_NAME,
+                                                         null,
+                                                         storeClientFactory,
+                                                         3,
+                                                         clientConfig.getClientContextName(),
+                                                         0,
+                                                         clientConfig,
+                                                         service);
+        sysStoreVersion = new SystemStore<String, String>(SystemStoreConstants.SystemStoreName.voldsys$_metadata_version_persistence.name(),
+                                                          bootStrapUrls,
+                                                          0);
+        clientRegistryStore = new SystemStore<String, String>(SystemStoreConstants.SystemStoreName.voldsys$_client_registry.name(),
+                                                              bootStrapUrls,
+                                                              0);
+
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        servers[0].stop();
+        servers[1].stop();
+    }
+
+    /*
+     * Test to validate that the client bootstraps on metadata change. First do
+     * some operations to validate that the client is correctly initialized.
+     * Then update the cluster.xml using the Admin Tool (which should update the
+     * metadata version as well). Verify that the client bootstraps after this
+     * update.
+     * 
+     * Whether the client has automatically bootstrapped is verified by checking
+     * the new bootstrap time in the client registry.
+     */
+    @Test
+    public void testEndToEndRebootstrap() {
+        try {
+            // Do a sample get, put to check client is correctly initialized.
+            String key = "city";
+            String value = "SF";
+            String bootstrapTime = "";
+            String newBootstrapTime = "";
+            AdminClient adminClient = new AdminClient(bootStrapUrls[0],
+                                                      new AdminClientConfig(),
+                                                      CLIENT_ZONE_ID);
+
+            try {
+                storeClient.put(key, value);
+                String received = storeClient.getValue(key);
+                assertEquals(value, received);
+            } catch(VoldemortException ve) {
+                fail("Error in doing basic get, put");
+            }
+
+            String originalClientInfo = null;
+
+            try {
+                originalClientInfo = clientRegistryStore.getSysStore(storeClient.getClientId())
+                                                        .getValue();
+
+                Properties props = new Properties();
+                props.load(new ByteArrayInputStream(originalClientInfo.getBytes()));
+
+                bootstrapTime = props.getProperty("bootstrapTime");
+                assertNotNull(bootstrapTime);
+            } catch(Exception e) {
+                fail("Error in retrieving bootstrap time: " + e);
+            }
+
+            // Update cluster.xml metadata
+            VoldemortAdminTool adminTool = new VoldemortAdminTool();
+            ClusterMapper mapper = new ClusterMapper();
+            for(Node node: cluster.getNodes()) {
+                VoldemortAdminTool.executeSetMetadata(node.getId(),
+                                                      adminClient,
+                                                      CLUSTER_KEY,
+                                                      mapper.writeCluster(cluster));
+            }
+
+            // Wait for about 15 seconds to be sure
+            try {
+                Thread.sleep(15000);
+            } catch(Exception e) {
+                fail("Interrupted .");
+            }
+
+            // // Retrieve the new client bootstrap timestamp
+            String newClientInfo = null;
+
+            try {
+                newClientInfo = clientRegistryStore.getSysStore(storeClient.getClientId())
+                                                   .getValue();
+                Properties newProps = new Properties();
+                newProps.load(new ByteArrayInputStream(newClientInfo.getBytes()));
+                newBootstrapTime = newProps.getProperty("bootstrapTime");
+                assertNotNull(newBootstrapTime);
+            } catch(Exception e) {
+                fail("Error in retrieving bootstrap time: " + e);
+            }
+
+            assertFalse(bootstrapTime.equals(newBootstrapTime));
+            long origTime = Long.parseLong(bootstrapTime);
+            long newTime = Long.parseLong(newBootstrapTime);
+            assertTrue(newTime > origTime);
+
+        } catch(Exception e) {
+            fail("Error in validating end to end client rebootstrap : " + e);
+        }
+    }
+}
diff --git a/test/unit/voldemort/client/LazyStoreClientTest.java b/test/unit/voldemort/client/LazyStoreClientTest.java
index feea5481a8..2baec6be10 100644
--- a/test/unit/voldemort/client/LazyStoreClientTest.java
+++ b/test/unit/voldemort/client/LazyStoreClientTest.java
@@ -1,12 +1,12 @@
 /*
  * Copyright 2008-2011 LinkedIn, Inc
- *
+ * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
  * the License at
- *
+ * 
  * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * 
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@@ -16,16 +16,19 @@
 
 package voldemort.client;
 
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+import java.util.concurrent.Callable;
+
 import org.junit.Before;
 import org.junit.Test;
+
 import voldemort.serialization.Serializer;
 import voldemort.serialization.StringSerializer;
 import voldemort.utils.SystemTime;
 
-import java.util.concurrent.Callable;
-
-import static org.mockito.Mockito.*;
-
 /**
  */
 public class LazyStoreClientTest extends DefaultStoreClientTest {
@@ -69,6 +72,6 @@ private LazyStoreClient<String, String> newLazyStoreClient(final StoreClientFact
             public StoreClient<String, String> call() throws Exception {
                 return factory.getStoreClient("test");
             }
-        });
+        }, false);
     }
 }
diff --git a/test/unit/voldemort/client/SocketStoreClientFactoryMbeanTest.java b/test/unit/voldemort/client/SocketStoreClientFactoryMbeanTest.java
new file mode 100644
index 0000000000..e785ee5641
--- /dev/null
+++ b/test/unit/voldemort/client/SocketStoreClientFactoryMbeanTest.java
@@ -0,0 +1,213 @@
+package voldemort.client;
+
+import java.lang.management.ManagementFactory;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+
+import javax.management.MBeanServer;
+import javax.management.ObjectName;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runners.Parameterized.Parameters;
+
+import voldemort.utils.JmxUtils;
+
+/**
+ * 
+ * Smoke test to see how many Mbeans we create in each monitoring domain
+ * 
+ */
+public class SocketStoreClientFactoryMbeanTest extends SocketStoreClientFactoryTest {
+
+    // there should one of these per store (that has a store client), per
+    // factory
+    private static String STATS_DOMAIN = "voldemort.store.stats";
+    private static String AGGREGATE_STATS_DOMAIN = "voldemort.store.stats.aggregate";
+    private static String PIPELINE_ROUTED_STATS_DOMAIN = "voldemort.store.routed";
+
+    // there should one of these per factory
+    private static String CLIENT_DOMAIN = "voldemort.client";
+    private static String CLUSTER_FAILUREDETECTOR_DOMAIN = "voldemort.cluster.failuredetector";
+
+    // there should be one of these per factory per host in the cluster the
+    // factory talks to (plus one aggregate)
+    private static String CLIENT_REQUEST_DOMAIN = "voldemort.store.socket.clientrequest";
+
+    private MBeanServer mbServer = null;
+    // list of factory objects to be closed at the end.
+    private List<StoreClientFactory> factories;
+
+    public SocketStoreClientFactoryMbeanTest(boolean useNio, boolean useLazy) {
+        super(useNio, useLazy);
+    }
+
+    @Override
+    @Before
+    public void setUp() throws Exception {
+        super.setUp();
+        mbServer = ManagementFactory.getPlatformMBeanServer();
+        factories = new ArrayList<StoreClientFactory>();
+    }
+
+    @Override
+    @After
+    public void tearDown() throws Exception {
+        mbServer = null;
+        for(StoreClientFactory factory: factories)
+            factory.close();
+
+        super.tearDown();
+    }
+
+    @Parameters
+    public static Collection<Object[]> configs() {
+        return Arrays.asList(new Object[][] { { true, false } });
+    }
+
+    private void checkMbeanIdCount(String domain, String type, int maxMbeans, boolean unregister) {
+        ObjectName oName = JmxUtils.createObjectName(domain, type);
+        Set<ObjectName> objects = mbServer.queryNames(oName, null);
+        assertFalse("Extra mbeans found", objects.size() > maxMbeans);
+        assertFalse("Fewer than expected mbeans found", objects.size() < maxMbeans);
+
+        if(unregister) {
+            try {
+                for(ObjectName objName: objects)
+                    mbServer.unregisterMBean(objName);
+            } catch(Exception e) {
+                fail("Problem unregistering mbeans " + e.getMessage());
+            }
+        }
+    }
+
+    private void bootStrap(List<DefaultStoreClient<Object, Object>> clients, int n) {
+        for(int i = 0; i < n; i++) {
+            for(DefaultStoreClient<Object, Object> client: clients)
+                client.bootStrap();
+        }
+    }
+
+    @Test
+    public void testMultipleDistinctClientsOnSingleFactory() {
+        try {
+            StoreClientFactory factory = getFactory(getValidBootstrapUrl());
+            List<DefaultStoreClient<Object, Object>> clients = new ArrayList<DefaultStoreClient<Object, Object>>();
+
+            clients.add((DefaultStoreClient<Object, Object>) factory.getStoreClient("test"));
+            clients.add((DefaultStoreClient<Object, Object>) factory.getStoreClient("best"));
+            factories.add(factory);
+
+            // bootstrap a number of times
+            bootStrap(clients, 10);
+
+            checkMbeanIdCount(CLIENT_DOMAIN, "ClientThreadPool*", 1, true);
+            checkMbeanIdCount(CLIENT_DOMAIN, "ZenStoreClient*", 2, true);
+            checkMbeanIdCount(CLUSTER_FAILUREDETECTOR_DOMAIN, "ThresholdFailureDetector*", 1, true);
+            checkMbeanIdCount(PIPELINE_ROUTED_STATS_DOMAIN, "*", 2, true);
+            checkMbeanIdCount(CLIENT_REQUEST_DOMAIN, "aggregated*", 1, true);
+            checkMbeanIdCount(CLIENT_REQUEST_DOMAIN, "stats_localhost*", 1, true);
+            checkMbeanIdCount(AGGREGATE_STATS_DOMAIN, "aggregate-perf*", 1, true);
+            checkMbeanIdCount(STATS_DOMAIN, "*", 2, true);
+
+        } catch(Exception e) {
+            fail("Unexpected error " + e.getMessage());
+        }
+    }
+
+    @Test
+    public void testMultipleIndistinctClientsOnSingleFactory() {
+        try {
+            StoreClientFactory factory = getFactory(getValidBootstrapUrl());
+            List<DefaultStoreClient<Object, Object>> clients = new ArrayList<DefaultStoreClient<Object, Object>>();
+
+            clients.add((DefaultStoreClient<Object, Object>) factory.getStoreClient("test"));
+            clients.add((DefaultStoreClient<Object, Object>) factory.getStoreClient("best"));
+            clients.add((DefaultStoreClient<Object, Object>) factory.getStoreClient("test"));
+            clients.add((DefaultStoreClient<Object, Object>) factory.getStoreClient("best"));
+            factories.add(factory);
+
+            // bootstrap a number of times
+            bootStrap(clients, 10);
+
+            checkMbeanIdCount(CLIENT_DOMAIN, "ClientThreadPool*", 1, true);
+            checkMbeanIdCount(CLIENT_DOMAIN, "ZenStoreClient*", 2, true);
+            checkMbeanIdCount(CLUSTER_FAILUREDETECTOR_DOMAIN, "ThresholdFailureDetector*", 1, true);
+            checkMbeanIdCount(PIPELINE_ROUTED_STATS_DOMAIN, "*", 2, true);
+            checkMbeanIdCount(CLIENT_REQUEST_DOMAIN, "aggregated*", 1, true);
+            checkMbeanIdCount(CLIENT_REQUEST_DOMAIN, "stats_localhost*", 1, true);
+            checkMbeanIdCount(AGGREGATE_STATS_DOMAIN, "aggregate-perf*", 1, true);
+            checkMbeanIdCount(STATS_DOMAIN, "*", 2, true);
+
+        } catch(Exception e) {
+            fail("Unexpected error " + e.getMessage());
+        }
+    }
+
+    @Test
+    public void testMultipleDistinctClientsOnMultipleFactories() {
+        try {
+            StoreClientFactory testfactory = getFactory(getValidBootstrapUrl());
+            List<DefaultStoreClient<Object, Object>> clients = new ArrayList<DefaultStoreClient<Object, Object>>();
+            clients.add((DefaultStoreClient<Object, Object>) testfactory.getStoreClient("test"));
+            StoreClientFactory bestfactory = getFactory(getValidBootstrapUrl());
+            clients.add((DefaultStoreClient<Object, Object>) bestfactory.getStoreClient("best"));
+            factories.add(testfactory);
+            factories.add(bestfactory);
+
+            // bootstrap a number of times
+            bootStrap(clients, 10);
+
+            checkMbeanIdCount(CLIENT_DOMAIN, "ClientThreadPool*", 2, true);
+            checkMbeanIdCount(CLIENT_DOMAIN, "ZenStoreClient*", 2, true);
+            checkMbeanIdCount(CLUSTER_FAILUREDETECTOR_DOMAIN, "ThresholdFailureDetector*", 2, true);
+            checkMbeanIdCount(PIPELINE_ROUTED_STATS_DOMAIN, "*", 2, true);
+            checkMbeanIdCount(CLIENT_REQUEST_DOMAIN, "aggregated*", 2, true);
+            checkMbeanIdCount(CLIENT_REQUEST_DOMAIN, "stats_localhost*", 2, true);
+            checkMbeanIdCount(AGGREGATE_STATS_DOMAIN, "aggregate-perf*", 2, true);
+            checkMbeanIdCount(STATS_DOMAIN, "*", 2, true);
+
+        } catch(Exception e) {
+            fail("Unexpected error " + e.getMessage());
+        }
+    }
+
+    @Test
+    public void testMultipleInDistinctClientsOnMultipleFactories() {
+        try {
+            StoreClientFactory factory1 = getFactory(getValidBootstrapUrl());
+            List<DefaultStoreClient<Object, Object>> clients = new ArrayList<DefaultStoreClient<Object, Object>>();
+            clients.add((DefaultStoreClient<Object, Object>) factory1.getStoreClient("test"));
+            clients.add((DefaultStoreClient<Object, Object>) factory1.getStoreClient("test"));
+            clients.add((DefaultStoreClient<Object, Object>) factory1.getStoreClient("best"));
+            clients.add((DefaultStoreClient<Object, Object>) factory1.getStoreClient("best"));
+            factories.add(factory1);
+
+            StoreClientFactory factory2 = getFactory(getValidBootstrapUrl());
+            clients.add((DefaultStoreClient<Object, Object>) factory2.getStoreClient("test"));
+            clients.add((DefaultStoreClient<Object, Object>) factory2.getStoreClient("test"));
+            clients.add((DefaultStoreClient<Object, Object>) factory2.getStoreClient("best"));
+            clients.add((DefaultStoreClient<Object, Object>) factory2.getStoreClient("best"));
+            factories.add(factory2);
+
+            // bootstrap a number of times
+            bootStrap(clients, 10);
+
+            checkMbeanIdCount(CLIENT_DOMAIN, "ClientThreadPool*", 2, true);
+            checkMbeanIdCount(CLIENT_DOMAIN, "ZenStoreClient*", 2, true);
+            checkMbeanIdCount(CLUSTER_FAILUREDETECTOR_DOMAIN, "ThresholdFailureDetector*", 2, true);
+            checkMbeanIdCount(PIPELINE_ROUTED_STATS_DOMAIN, "*", 4, true);
+            checkMbeanIdCount(CLIENT_REQUEST_DOMAIN, "aggregated*", 2, true);
+            checkMbeanIdCount(CLIENT_REQUEST_DOMAIN, "stats_localhost*", 2, true);
+            checkMbeanIdCount(AGGREGATE_STATS_DOMAIN, "aggregate-perf*", 2, true);
+            checkMbeanIdCount(STATS_DOMAIN, "*", 4, true);
+
+        } catch(Exception e) {
+            fail("Unexpected error " + e.getMessage());
+        }
+    }
+}
diff --git a/test/unit/voldemort/client/rebalance/AbstractRebalanceTest.java b/test/unit/voldemort/client/rebalance/AbstractRebalanceTest.java
index ddbceb1a66..0d4d0aad3d 100644
--- a/test/unit/voldemort/client/rebalance/AbstractRebalanceTest.java
+++ b/test/unit/voldemort/client/rebalance/AbstractRebalanceTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2010 LinkedIn, Inc
+ * Copyright 2008-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -29,8 +29,8 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.Map.Entry;
+import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -38,6 +38,7 @@
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.commons.io.FileUtils;
+import org.apache.log4j.Logger;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -86,7 +87,9 @@
 
 public abstract class AbstractRebalanceTest {
 
-    protected static int NUM_KEYS = 10100;
+    private static final Logger logger = Logger.getLogger(AbstractRebalanceTest.class.getName());
+
+    protected static int NUM_KEYS = 20;
     protected static int NUM_RO_CHUNKS_PER_BUCKET = 10;
     protected static String testStoreNameRW = "test";
     protected static String testStoreNameRW2 = "test2";
@@ -111,7 +114,7 @@ public abstract class AbstractRebalanceTest {
 
     @Before
     public void setUp() throws IOException {
-        testEntries = ServerTestUtils.createRandomKeyValueString(NUM_KEYS);
+        testEntries = ServerTestUtils.createRandomKeyValueString(getNumKeys());
         socketStoreFactory = new ClientRequestExecutorPool(2, 10000, 100000, 32 * 1024);
 
         // First without replication
@@ -213,7 +216,9 @@ public void setUp() throws IOException {
     @After
     public void tearDown() {
         testEntries.clear();
+        testEntries = null;
         socketStoreFactory.close();
+        socketStoreFactory = null;
     }
 
     protected abstract Cluster startServers(Cluster cluster,
@@ -258,8 +263,13 @@ public void checkConsistentMetadata(Cluster targetCluster, List<Integer> serverL
         }
     }
 
-    @Test
+    protected int getNumKeys() {
+        return NUM_KEYS;
+    }
+
+    @Test(timeout = 600000)
     public void testRORWRebalance() throws Exception {
+        logger.info("Starting testRORWRebalance");
         Cluster currentCluster = ServerTestUtils.getLocalCluster(2, new int[][] {
                 { 0, 1, 2, 3, 4, 5, 6, 7, 8 }, {} });
 
@@ -269,10 +279,12 @@ public void testRORWRebalance() throws Exception {
 
         // start servers 0 , 1 only
         List<Integer> serverList = Arrays.asList(0, 1);
+        Map<String, String> configProps = new HashMap<String, String>();
+        configProps.put("admin.max.threads", "50");
         currentCluster = startServers(currentCluster,
                                       storeDefFileWithoutReplication,
                                       serverList,
-                                      null);
+                                      configProps);
         // Update the cluster information based on the node information
         targetCluster = updateCluster(targetCluster);
 
@@ -299,6 +311,7 @@ public void testRORWRebalance() throws Exception {
                               storeDefWithoutReplication,
                               rebalanceClient,
                               Arrays.asList(1));
+
             checkConsistentMetadata(targetCluster, serverList);
         } finally {
             // stop servers
@@ -306,8 +319,9 @@ public void testRORWRebalance() throws Exception {
         }
     }
 
-    @Test
+    @Test(timeout = 600000)
     public void testRORWRebalanceWithReplication() throws Exception {
+        logger.info("Starting testRORWRebalanceWithReplication");
         Cluster currentCluster = ServerTestUtils.getLocalCluster(2, new int[][] {
                 { 0, 1, 2, 3, 4, 5, 6 }, { 7, 8 } });
 
@@ -317,7 +331,13 @@ public void testRORWRebalanceWithReplication() throws Exception {
 
         // start servers 0 , 1 only
         List<Integer> serverList = Arrays.asList(0, 1);
-        currentCluster = startServers(currentCluster, storeDefFileWithReplication, serverList, null);
+        Map<String, String> configProps = new HashMap<String, String>();
+        configProps.put("admin.max.threads", "50");
+
+        currentCluster = startServers(currentCluster,
+                                      storeDefFileWithReplication,
+                                      serverList,
+                                      configProps);
         // Update the cluster information based on the node information
         targetCluster = updateCluster(targetCluster);
 
@@ -350,8 +370,9 @@ public void testRORWRebalanceWithReplication() throws Exception {
         }
     }
 
-    @Test
+    @Test(timeout = 600000)
     public void testRORebalanceWithReplication() throws Exception {
+        logger.info("Starting testRORebalanceWithReplication");
         Cluster currentCluster = ServerTestUtils.getLocalCluster(2, new int[][] {
                 { 0, 1, 2, 3, 4, 5, 6 }, { 7, 8 } });
 
@@ -361,10 +382,21 @@ public void testRORebalanceWithReplication() throws Exception {
 
         // start servers 0 , 1 only
         List<Integer> serverList = Arrays.asList(0, 1);
+
+        // If this test fails, consider increasing the number of admin threads.
+        // In particular, if this test fails by RejectedExecutionHandler in
+        // SocketServer.java fires with an error message like the following:
+        // "[18:46:32,994 voldemort.server.socket.SocketServer[admin-server]]
+        // ERROR Too many open connections, 20 of 20 threads in use, denying
+        // connection from /127.0.0.1:43756 [Thread-552]". Note, this issues
+        // seems to only affect ThreadPoolBasedNonblockingStoreImpl tests rather
+        // than Nio-based tests.
+        Map<String, String> configProps = new HashMap<String, String>();
+        configProps.put("admin.max.threads", "50");
         currentCluster = startServers(currentCluster,
                                       roStoreDefFileWithReplication,
                                       serverList,
-                                      null);
+                                      configProps);
         // Update the cluster information based on the node information
         targetCluster = updateCluster(targetCluster);
 
@@ -391,8 +423,9 @@ public void testRORebalanceWithReplication() throws Exception {
         }
     }
 
-    @Test
+    @Test(timeout = 600000)
     public void testRWRebalanceWithReplication() throws Exception {
+        logger.info("Starting testRWRebalanceWithReplication");
         Cluster currentCluster = ServerTestUtils.getLocalCluster(2, new int[][] {
                 { 0, 1, 2, 3, 4, 5, 6 }, { 7, 8 } });
 
@@ -433,8 +466,9 @@ public void testRWRebalanceWithReplication() throws Exception {
         }
     }
 
-    @Test
+    @Test(timeout = 600000)
     public void testRebalanceCleanPrimary() throws Exception {
+        logger.info("Starting testRebalanceCleanPrimary");
         Cluster currentCluster = ServerTestUtils.getLocalCluster(3, new int[][] { { 0 }, { 1, 3 },
                 { 2 } });
 
@@ -534,8 +568,9 @@ public void testRebalanceCleanPrimary() throws Exception {
         }
     }
 
-    @Test
+    @Test(timeout = 600000)
     public void testRebalanceCleanSecondary() throws Exception {
+        logger.info("Starting testRebalanceCleanSecondary");
         Cluster currentCluster = ServerTestUtils.getLocalCluster(3, new int[][] { { 0, 3 }, { 1 },
                 { 2 } });
 
@@ -636,8 +671,9 @@ public void testRebalanceCleanSecondary() throws Exception {
         }
     }
 
-    @Test
+    @Test(timeout = 600000)
     public void testRWRebalanceFourNodes() throws Exception {
+        logger.info("Starting testRWRebalanceFourNodes");
         Cluster currentCluster = ServerTestUtils.getLocalCluster(4, new int[][] {
                 { 0, 1, 4, 7, 9 }, { 2, 3, 5, 6, 8 }, {}, {} });
 
@@ -666,6 +702,70 @@ public void testRWRebalanceFourNodes() throws Exception {
         config.setDeleteAfterRebalancingEnabled(true);
         config.setStealerBasedRebalancing(!useDonorBased());
         config.setPrimaryPartitionBatchSize(100);
+        config.setMaxParallelRebalancing(5);
+        RebalanceController rebalanceClient = new RebalanceController(getBootstrapUrl(currentCluster,
+                                                                                      0),
+                                                                      config);
+        try {
+            populateData(currentCluster,
+                         rwStoreDefWithReplication,
+                         rebalanceClient.getAdminClient(),
+                         false);
+
+            populateData(currentCluster,
+                         rwStoreDefWithReplication2,
+                         rebalanceClient.getAdminClient(),
+                         false);
+
+            rebalanceAndCheck(currentCluster,
+                              targetCluster,
+                              Lists.newArrayList(rwStoreDefWithReplication,
+                                                 rwStoreDefWithReplication2),
+                              rebalanceClient,
+                              serverList);
+            checkConsistentMetadata(targetCluster, serverList);
+        } catch(Exception e) {
+            fail(e.getMessage());
+        } finally {
+            // stop servers
+            stopServer(serverList);
+        }
+    }
+
+    @Test(timeout = 600000)
+    public void testRWRebalanceSerial() throws Exception {
+        logger.info("Starting testRWRebalanceSerial");
+        Cluster currentCluster = ServerTestUtils.getLocalCluster(4, new int[][] {
+                { 0, 1, 4, 7, 9 }, { 2, 3, 5, 6, 8 }, {}, {} });
+
+        ArrayList<Node> nodes = Lists.newArrayList(currentCluster.getNodes());
+        int totalPortNum = nodes.size() * 3;
+        int[] ports = new int[totalPortNum];
+        for(int i = 0; i < nodes.size(); i++) {
+            ports[i * 3] = nodes.get(i).getHttpPort();
+            ports[i * 3 + 1] = nodes.get(i).getSocketPort();
+            ports[i * 3 + 2] = nodes.get(i).getAdminPort();
+        }
+
+        Cluster targetCluster = ServerTestUtils.getLocalCluster(4, ports, new int[][] {
+                { 0, 4, 7 }, { 2, 8 }, { 1, 6 }, { 3, 5, 9 } });
+
+        // start servers
+        Map<String, String> serverProps = new HashMap<String, String>();
+        serverProps.put("max.parallel.stores.rebalancing", String.valueOf(1));
+        List<Integer> serverList = Arrays.asList(0, 1, 2, 3);
+        currentCluster = startServers(currentCluster,
+                                      rwTwoStoreDefFileWithReplication,
+                                      serverList,
+                                      serverProps);
+        // Update the cluster information based on the node information
+        targetCluster = updateCluster(targetCluster);
+
+        RebalanceClientConfig config = new RebalanceClientConfig();
+        config.setDeleteAfterRebalancingEnabled(true);
+        config.setStealerBasedRebalancing(!useDonorBased());
+        config.setPrimaryPartitionBatchSize(100);
+        config.setMaxParallelRebalancing(5);
         RebalanceController rebalanceClient = new RebalanceController(getBootstrapUrl(currentCluster,
                                                                                       0),
                                                                       config);
@@ -695,8 +795,9 @@ public void testRWRebalanceFourNodes() throws Exception {
         }
     }
 
-    @Test
+    @Test(timeout = 600000)
     public void testProxyGetDuringRebalancing() throws Exception {
+        logger.info("Starting testProxyGetDuringRebalancing");
         final Cluster currentCluster = ServerTestUtils.getLocalCluster(2, new int[][] {
                 { 0, 1, 2, 3, 4, 5, 6 }, { 7, 8 } });
 
@@ -705,10 +806,12 @@ public void testProxyGetDuringRebalancing() throws Exception {
                                                                           Lists.newArrayList(2, 3));
         // start servers 0 , 1 only
         final List<Integer> serverList = Arrays.asList(0, 1);
+        Map<String, String> configProps = new HashMap<String, String>();
+        configProps.put("admin.max.threads", "50");
         final Cluster updatedCurrentCluster = startServers(currentCluster,
                                                            storeDefFileWithReplication,
                                                            serverList,
-                                                           null);
+                                                           configProps);
         final Cluster updatedTargetCluster = updateCluster(targetCluster);
 
         ExecutorService executors = Executors.newFixedThreadPool(2);
@@ -756,14 +859,12 @@ public void run() {
                 try {
                     List<String> keys = new ArrayList<String>(testEntries.keySet());
 
-                    int nRequests = 0;
                     while(!rebalancingToken.get()) {
                         // should always able to get values.
                         int index = (int) (Math.random() * keys.size());
 
                         // should get a valid value
                         try {
-                            nRequests++;
                             Versioned<String> value = storeClientRW.get(keys.get(index));
                             assertNotSame("StoreClient get() should not return null.", null, value);
                             assertEquals("Value returned should be good",
@@ -831,8 +932,9 @@ public void run() {
         }
     }
 
-    @Test
+    @Test(timeout = 600000)
     public void testServerSideRouting() throws Exception {
+        logger.info("Starting testServerSideRouting");
         final Cluster currentCluster = ServerTestUtils.getLocalCluster(2, new int[][] {
                 { 0, 1, 2, 3, 4, 5, 6 }, { 7, 8 } });
 
@@ -841,10 +943,12 @@ public void testServerSideRouting() throws Exception {
                                                                           Lists.newArrayList(2, 3));
 
         final List<Integer> serverList = Arrays.asList(0, 1);
+        Map<String, String> configProps = new HashMap<String, String>();
+        configProps.put("admin.max.threads", "50");
         final Cluster updatedCurrentCluster = startServers(currentCluster,
                                                            storeDefFileWithReplication,
                                                            serverList,
-                                                           null);
+                                                           configProps);
         final Cluster updatedTargetCluster = updateCluster(targetCluster);
 
         ExecutorService executors = Executors.newFixedThreadPool(2);
@@ -889,14 +993,12 @@ public void run() {
                 try {
                     List<String> keys = new ArrayList<String>(testEntries.keySet());
 
-                    int nRequests = 0;
                     while(!rebalancingToken.get()) {
                         // should always able to get values.
                         int index = (int) (Math.random() * keys.size());
 
                         // should get a valid value
                         try {
-                            nRequests++;
                             List<Versioned<byte[]>> values = serverSideRoutingStoreRW.get(new ByteArray(ByteUtils.getBytes(keys.get(index),
                                                                                                                            "UTF-8")),
                                                                                           null);
@@ -990,9 +1092,10 @@ protected void populateData(Cluster cluster,
             // Create SocketStores for each Node first
             Map<Integer, Store<ByteArray, byte[], byte[]>> storeMap = new HashMap<Integer, Store<ByteArray, byte[], byte[]>>();
             for(Node node: cluster.getNodes()) {
-                storeMap.put(node.getId(), getSocketStore(storeDef.getName(),
-                                                          node.getHost(),
-                                                          node.getSocketPort()));
+                storeMap.put(node.getId(),
+                             getSocketStore(storeDef.getName(),
+                                            node.getHost(),
+                                            node.getSocketPort()));
 
             }
 
diff --git a/test/unit/voldemort/client/rebalance/AdminRebalanceTest.java b/test/unit/voldemort/client/rebalance/AdminRebalanceTest.java
index f9c85f25f8..685a763e9a 100644
--- a/test/unit/voldemort/client/rebalance/AdminRebalanceTest.java
+++ b/test/unit/voldemort/client/rebalance/AdminRebalanceTest.java
@@ -1,5 +1,27 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
 package voldemort.client.rebalance;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileOutputStream;
@@ -11,13 +33,11 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Properties;
 import java.util.Set;
-import java.util.Map.Entry;
 import java.util.concurrent.TimeUnit;
 
-import junit.framework.TestCase;
-
 import org.apache.commons.io.FileUtils;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -61,7 +81,7 @@
 import com.google.common.collect.Maps;
 
 @RunWith(Parameterized.class)
-public class AdminRebalanceTest extends TestCase {
+public class AdminRebalanceTest {
 
     private SocketStoreFactory socketStoreFactory = new ClientRequestExecutorPool(2,
                                                                                   10000,
@@ -91,10 +111,6 @@ public static Collection<Object[]> configs() {
     }
 
     public void startThreeNodeRW() throws IOException {
-        cluster = ServerTestUtils.getLocalCluster(3, new int[][] { { 0, 1, 2, 3 }, { 4, 5, 6, 7 },
-                {} });
-
-        servers = new VoldemortServer[3];
         storeDef1 = ServerTestUtils.getStoreDef("test",
                                                 1,
                                                 1,
@@ -109,38 +125,34 @@ public void startThreeNodeRW() throws IOException {
                                                 1,
                                                 1,
                                                 RoutingStrategyType.CONSISTENT_STRATEGY);
-        targetCluster = RebalanceUtils.createUpdatedCluster(cluster, 2, Lists.newArrayList(0));
         File tempStoreXml = new File(TestUtils.createTempDir(), "stores.xml");
         FileUtils.writeStringToFile(tempStoreXml,
                                     new StoreDefinitionsMapper().writeStoreList(Lists.newArrayList(storeDef1,
                                                                                                    storeDef2)));
-        for(int nodeId = 0; nodeId < 3; nodeId++) {
-            servers[nodeId] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
-                                                                   ServerTestUtils.createServerConfig(useNio,
-                                                                                                      nodeId,
-                                                                                                      TestUtils.createTempDir()
-                                                                                                               .getAbsolutePath(),
-                                                                                                      null,
-                                                                                                      tempStoreXml.getAbsolutePath(),
-                                                                                                      new Properties()),
-                                                                   cluster);
-        }
 
+        int numServers = 3;
+        servers = new VoldemortServer[numServers];
+        int partitionMap[][] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, {} };
+        cluster = ServerTestUtils.startVoldemortCluster(numServers,
+                                                        servers,
+                                                        partitionMap,
+                                                        socketStoreFactory,
+                                                        useNio,
+                                                        null,
+                                                        tempStoreXml.getAbsolutePath(),
+                                                        new Properties());
+
+        targetCluster = RebalanceUtils.createUpdatedCluster(cluster, 2, Lists.newArrayList(0));
         RebalanceClusterPlan plan = new RebalanceClusterPlan(cluster,
                                                              targetCluster,
                                                              Lists.newArrayList(storeDef1,
-                                                                                storeDef2),
-                                                             true);
+                                                                                storeDef2), true);
         plans = RebalanceUtils.flattenNodePlans(Lists.newArrayList(plan.getRebalancingTaskQueue()));
 
         adminClient = ServerTestUtils.getAdminClient(cluster);
     }
 
     public void startFourNodeRW() throws IOException {
-        cluster = ServerTestUtils.getLocalCluster(4, new int[][] { { 0, 1, 2, 3 }, { 4, 5, 6, 7 },
-                { 8, 9, 10, 11 }, {} });
-
-        servers = new VoldemortServer[4];
         storeDef1 = ServerTestUtils.getStoreDef("test",
                                                 2,
                                                 1,
@@ -155,38 +167,34 @@ public void startFourNodeRW() throws IOException {
                                                 1,
                                                 1,
                                                 RoutingStrategyType.CONSISTENT_STRATEGY);
-        targetCluster = RebalanceUtils.createUpdatedCluster(cluster, 3, Lists.newArrayList(0));
         File tempStoreXml = new File(TestUtils.createTempDir(), "stores.xml");
         FileUtils.writeStringToFile(tempStoreXml,
                                     new StoreDefinitionsMapper().writeStoreList(Lists.newArrayList(storeDef1,
                                                                                                    storeDef2)));
-        for(int nodeId = 0; nodeId < 4; nodeId++) {
-            servers[nodeId] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
-                                                                   ServerTestUtils.createServerConfig(useNio,
-                                                                                                      nodeId,
-                                                                                                      TestUtils.createTempDir()
-                                                                                                               .getAbsolutePath(),
-                                                                                                      null,
-                                                                                                      tempStoreXml.getAbsolutePath(),
-                                                                                                      new Properties()),
-                                                                   cluster);
-        }
 
+        int numServers = 4;
+        servers = new VoldemortServer[numServers];
+        int partitionMap[][] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, {} };
+        cluster = ServerTestUtils.startVoldemortCluster(numServers,
+                                                        servers,
+                                                        partitionMap,
+                                                        socketStoreFactory,
+                                                        useNio,
+                                                        null,
+                                                        tempStoreXml.getAbsolutePath(),
+                                                        new Properties());
+
+        targetCluster = RebalanceUtils.createUpdatedCluster(cluster, 3, Lists.newArrayList(0));
         RebalanceClusterPlan plan = new RebalanceClusterPlan(cluster,
                                                              targetCluster,
                                                              Lists.newArrayList(storeDef1,
-                                                                                storeDef2),
-                                                             true);
+                                                                                storeDef2), true);
         plans = RebalanceUtils.flattenNodePlans(Lists.newArrayList(plan.getRebalancingTaskQueue()));
 
         adminClient = ServerTestUtils.getAdminClient(cluster);
     }
 
     public void startFourNodeRO() throws IOException {
-        cluster = ServerTestUtils.getLocalCluster(4, new int[][] { { 0, 1, 2, 3 }, { 4, 5, 6, 7 },
-                { 8, 9, 10, 11 }, {} });
-
-        servers = new VoldemortServer[4];
         storeDef1 = new StoreDefinitionBuilder().setName("test")
                                                 .setType(ReadOnlyStorageConfiguration.TYPE_NAME)
                                                 .setKeySerializer(new SerializerDefinition("string"))
@@ -211,38 +219,34 @@ public void startFourNodeRO() throws IOException {
                                                 .setPreferredWrites(1)
                                                 .setRequiredWrites(1)
                                                 .build();
-        targetCluster = RebalanceUtils.createUpdatedCluster(cluster, 3, Lists.newArrayList(0));
         File tempStoreXml = new File(TestUtils.createTempDir(), "stores.xml");
         FileUtils.writeStringToFile(tempStoreXml,
                                     new StoreDefinitionsMapper().writeStoreList(Lists.newArrayList(storeDef1,
                                                                                                    storeDef2)));
-        for(int nodeId = 0; nodeId < 4; nodeId++) {
-            servers[nodeId] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
-                                                                   ServerTestUtils.createServerConfig(useNio,
-                                                                                                      nodeId,
-                                                                                                      TestUtils.createTempDir()
-                                                                                                               .getAbsolutePath(),
-                                                                                                      null,
-                                                                                                      tempStoreXml.getAbsolutePath(),
-                                                                                                      new Properties()),
-                                                                   cluster);
-        }
 
+        int numServers = 4;
+        servers = new VoldemortServer[numServers];
+        int partitionMap[][] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, {} };
+        cluster = ServerTestUtils.startVoldemortCluster(numServers,
+                                                        servers,
+                                                        partitionMap,
+                                                        socketStoreFactory,
+                                                        useNio,
+                                                        null,
+                                                        tempStoreXml.getAbsolutePath(),
+                                                        new Properties());
+
+        targetCluster = RebalanceUtils.createUpdatedCluster(cluster, 3, Lists.newArrayList(0));
         RebalanceClusterPlan plan = new RebalanceClusterPlan(cluster,
                                                              targetCluster,
                                                              Lists.newArrayList(storeDef1,
-                                                                                storeDef2),
-                                                             true);
+                                                                                storeDef2), true);
         plans = RebalanceUtils.flattenNodePlans(Lists.newArrayList(plan.getRebalancingTaskQueue()));
 
         adminClient = ServerTestUtils.getAdminClient(cluster);
     }
 
     public void startFourNodeRORW() throws IOException {
-        cluster = ServerTestUtils.getLocalCluster(4, new int[][] { { 0, 1, 2, 3 }, { 4, 5, 6, 7 },
-                { 8, 9, 10, 11 }, {} });
-
-        servers = new VoldemortServer[4];
         storeDef1 = new StoreDefinitionBuilder().setName("test")
                                                 .setType(ReadOnlyStorageConfiguration.TYPE_NAME)
                                                 .setKeySerializer(new SerializerDefinition("string"))
@@ -282,31 +286,31 @@ public void startFourNodeRORW() throws IOException {
                                                 1,
                                                 RoutingStrategyType.CONSISTENT_STRATEGY);
 
-        targetCluster = RebalanceUtils.createUpdatedCluster(cluster, 3, Lists.newArrayList(0));
         File tempStoreXml = new File(TestUtils.createTempDir(), "stores.xml");
         FileUtils.writeStringToFile(tempStoreXml,
                                     new StoreDefinitionsMapper().writeStoreList(Lists.newArrayList(storeDef1,
                                                                                                    storeDef2,
                                                                                                    storeDef3,
                                                                                                    storeDef4)));
-        for(int nodeId = 0; nodeId < 4; nodeId++) {
-            servers[nodeId] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
-                                                                   ServerTestUtils.createServerConfig(useNio,
-                                                                                                      nodeId,
-                                                                                                      TestUtils.createTempDir()
-                                                                                                               .getAbsolutePath(),
-                                                                                                      null,
-                                                                                                      tempStoreXml.getAbsolutePath(),
-                                                                                                      new Properties()),
-                                                                   cluster);
-        }
 
+        int numServers = 4;
+        servers = new VoldemortServer[numServers];
+        int partitionMap[][] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, {} };
+        cluster = ServerTestUtils.startVoldemortCluster(numServers,
+                                                        servers,
+                                                        partitionMap,
+                                                        socketStoreFactory,
+                                                        useNio,
+                                                        null,
+                                                        tempStoreXml.getAbsolutePath(),
+                                                        new Properties());
+
+        targetCluster = RebalanceUtils.createUpdatedCluster(cluster, 3, Lists.newArrayList(0));
         // Make plan only with RO stores
         RebalanceClusterPlan plan = new RebalanceClusterPlan(cluster,
                                                              targetCluster,
                                                              Lists.newArrayList(storeDef1,
-                                                                                storeDef2),
-                                                             true);
+                                                                                storeDef2), true);
         plans = RebalanceUtils.flattenNodePlans(Lists.newArrayList(plan.getRebalancingTaskQueue()));
 
         adminClient = ServerTestUtils.getAdminClient(cluster);
@@ -348,7 +352,7 @@ private Store<ByteArray, byte[], byte[]> getStore(int nodeID, String storeName)
         return store;
     }
 
-    @Test
+    @Test(timeout = 60000)
     public void testRebalanceNodeRW() throws IOException {
 
         try {
@@ -476,8 +480,9 @@ public void testRebalanceNodeRW() throws IOException {
 
             // Primary is on Node 0 and not on Node 1
             for(Entry<ByteArray, byte[]> entry: primaryEntriesMoved.entrySet()) {
-                assertSame("entry should be present at store", 1, storeTest0.get(entry.getKey(),
-                                                                                 null).size());
+                assertSame("entry should be present at store",
+                           1,
+                           storeTest0.get(entry.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(entry.getValue()),
                              new String(storeTest0.get(entry.getKey(), null).get(0).getValue()));
@@ -495,8 +500,9 @@ public void testRebalanceNodeRW() throws IOException {
 
             // Secondary is on Node 2 and not on Node 0
             for(Entry<ByteArray, byte[]> entry: secondaryEntriesMoved.entrySet()) {
-                assertSame("entry should be present at store", 1, storeTest2.get(entry.getKey(),
-                                                                                 null).size());
+                assertSame("entry should be present at store",
+                           1,
+                           storeTest2.get(entry.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(entry.getValue()),
                              new String(storeTest2.get(entry.getKey(), null).get(0).getValue()));
@@ -515,7 +521,7 @@ public void testRebalanceNodeRW() throws IOException {
         }
     }
 
-    @Test
+    @Test(timeout = 60000)
     public void testRebalanceNodeRW2() throws IOException {
 
         try {
@@ -606,22 +612,25 @@ public void testRebalanceNodeRW2() throws IOException {
 
                 // Test 2
                 // Present on Node 0
-                assertSame("entry should be present at store", 1, storeTest0.get(entry.getKey(),
-                                                                                 null).size());
+                assertSame("entry should be present at store",
+                           1,
+                           storeTest0.get(entry.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(entry.getValue()),
                              new String(storeTest0.get(entry.getKey(), null).get(0).getValue()));
 
                 // Present on Node 1
-                assertSame("entry should be present at store", 1, storeTest1.get(entry.getKey(),
-                                                                                 null).size());
+                assertSame("entry should be present at store",
+                           1,
+                           storeTest1.get(entry.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(entry.getValue()),
                              new String(storeTest1.get(entry.getKey(), null).get(0).getValue()));
 
                 // Present on Node 3
-                assertSame("entry should be present at store", 1, storeTest3.get(entry.getKey(),
-                                                                                 null).size());
+                assertSame("entry should be present at store",
+                           1,
+                           storeTest3.get(entry.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(entry.getValue()),
                              new String(storeTest3.get(entry.getKey(), null).get(0).getValue()));
@@ -631,15 +640,17 @@ public void testRebalanceNodeRW2() throws IOException {
 
                 // Test
                 // Present on Node 0
-                assertSame("entry should be present at store", 1, storeTest00.get(entry.getKey(),
-                                                                                  null).size());
+                assertSame("entry should be present at store",
+                           1,
+                           storeTest00.get(entry.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(entry.getValue()),
                              new String(storeTest00.get(entry.getKey(), null).get(0).getValue()));
 
                 // Present on Node 3
-                assertSame("entry should be present at store", 1, storeTest30.get(entry.getKey(),
-                                                                                  null).size());
+                assertSame("entry should be present at store",
+                           1,
+                           storeTest30.get(entry.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(entry.getValue()),
                              new String(storeTest30.get(entry.getKey(), null).get(0).getValue()));
@@ -654,15 +665,17 @@ public void testRebalanceNodeRW2() throws IOException {
 
                 // Test 2
                 // Present on Node 0
-                assertSame("entry should be present at store", 1, storeTest0.get(entry.getKey(),
-                                                                                 null).size());
+                assertSame("entry should be present at store",
+                           1,
+                           storeTest0.get(entry.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(entry.getValue()),
                              new String(storeTest0.get(entry.getKey(), null).get(0).getValue()));
 
                 // Present on Node 3
-                assertSame("entry should be present at store", 1, storeTest3.get(entry.getKey(),
-                                                                                 null).size());
+                assertSame("entry should be present at store",
+                           1,
+                           storeTest3.get(entry.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(entry.getValue()),
                              new String(storeTest3.get(entry.getKey(), null).get(0).getValue()));
@@ -672,8 +685,9 @@ public void testRebalanceNodeRW2() throws IOException {
 
                 // Test
                 // Present on Node 3
-                assertSame("entry should be present at store", 1, storeTest30.get(entry.getKey(),
-                                                                                  null).size());
+                assertSame("entry should be present at store",
+                           1,
+                           storeTest30.get(entry.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(entry.getValue()),
                              new String(storeTest30.get(entry.getKey(), null).get(0).getValue()));
@@ -688,8 +702,9 @@ public void testRebalanceNodeRW2() throws IOException {
 
                 // Test 2
                 // Present on Node 3
-                assertSame("entry should be present at store", 1, storeTest3.get(entry.getKey(),
-                                                                                 null).size());
+                assertSame("entry should be present at store",
+                           1,
+                           storeTest3.get(entry.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(entry.getValue()),
                              new String(storeTest3.get(entry.getKey(), null).get(0).getValue()));
@@ -710,7 +725,7 @@ public void testRebalanceNodeRW2() throws IOException {
         }
     }
 
-    @Test
+    @Test(timeout = 60000)
     public void testRebalanceNodeRO() throws IOException {
         try {
             startFourNodeRO();
@@ -863,7 +878,7 @@ public void testRebalanceNodeRO() throws IOException {
         }
     }
 
-    @Test
+    @Test(timeout = 60000)
     public void testRebalanceNodeRORW() throws IOException, InterruptedException {
 
         try {
@@ -1064,7 +1079,7 @@ private void checkRO(Cluster cluster) {
         }
     }
 
-    @Test
+    @Test(timeout = 60000)
     public void testRebalanceStateChange() throws IOException {
 
         try {
@@ -1163,7 +1178,7 @@ public void testRebalanceStateChange() throws IOException {
         }
     }
 
-    @Test
+    @Test(timeout = 60000)
     public void testClusterAndRebalanceStateChange() throws IOException {
 
         try {
diff --git a/test/unit/voldemort/client/rebalance/RebalanceTest.java b/test/unit/voldemort/client/rebalance/RebalanceTest.java
index 9d3f223398..9f0f06123c 100644
--- a/test/unit/voldemort/client/rebalance/RebalanceTest.java
+++ b/test/unit/voldemort/client/rebalance/RebalanceTest.java
@@ -1,3 +1,19 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
 package voldemort.client.rebalance;
 
 import java.io.IOException;
@@ -6,8 +22,8 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Properties;
 import java.util.Map.Entry;
+import java.util.Properties;
 
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -65,6 +81,8 @@ protected Cluster getCurrentCluster(int nodeId) {
         }
     }
 
+    // This method may be susceptible to BindException issues due to TOCTOU
+    // problem with getLocalCluster.
     @Override
     protected Cluster startServers(Cluster cluster,
                                    String storeXmlFile,
diff --git a/test/unit/voldemort/cluster/TestCluster.java b/test/unit/voldemort/cluster/TestCluster.java
index 3e516005b8..1229c43d5f 100644
--- a/test/unit/voldemort/cluster/TestCluster.java
+++ b/test/unit/voldemort/cluster/TestCluster.java
@@ -16,8 +16,8 @@
 
 package voldemort.cluster;
 
-import static voldemort.cluster.failuredetector.MutableStoreVerifier.create;
 import static voldemort.cluster.failuredetector.FailureDetectorUtils.create;
+import static voldemort.cluster.failuredetector.MutableStoreVerifier.create;
 
 import java.util.Arrays;
 import java.util.Collection;
@@ -69,7 +69,7 @@ public void setUp() throws Exception {
 
         FailureDetectorConfig failureDetectorConfig = new FailureDetectorConfig().setImplementationClassName(failureDetectorClass.getName())
                                                                                  .setBannagePeriod(1000)
-                                                                                 .setNodes(cluster.getNodes())
+                                                                                 .setCluster(cluster)
                                                                                  .setStoreVerifier(create(cluster.getNodes()))
                                                                                  .setTime(time);
 
diff --git a/test/unit/voldemort/cluster/failuredetector/BannagePeriodFailureDetectorTest.java b/test/unit/voldemort/cluster/failuredetector/BannagePeriodFailureDetectorTest.java
index 7acd98423e..ef2fd3252d 100644
--- a/test/unit/voldemort/cluster/failuredetector/BannagePeriodFailureDetectorTest.java
+++ b/test/unit/voldemort/cluster/failuredetector/BannagePeriodFailureDetectorTest.java
@@ -20,8 +20,8 @@
 import static org.junit.Assert.assertTrue;
 import static voldemort.FailureDetectorTestUtils.recordException;
 import static voldemort.FailureDetectorTestUtils.recordSuccess;
-import static voldemort.cluster.failuredetector.MutableStoreVerifier.create;
 import static voldemort.cluster.failuredetector.FailureDetectorUtils.create;
+import static voldemort.cluster.failuredetector.MutableStoreVerifier.create;
 
 import java.net.ConnectException;
 import java.net.NoRouteToHostException;
@@ -42,7 +42,7 @@ public class BannagePeriodFailureDetectorTest extends AbstractFailureDetectorTes
     public FailureDetector createFailureDetector() throws Exception {
         FailureDetectorConfig failureDetectorConfig = new FailureDetectorConfig().setImplementationClassName(BannagePeriodFailureDetector.class.getName())
                                                                                  .setBannagePeriod(BANNAGE_MILLIS)
-                                                                                 .setNodes(cluster.getNodes())
+                                                                                 .setCluster(cluster)
                                                                                  .setStoreVerifier(create(cluster.getNodes()))
                                                                                  .setTime(time);
         return create(failureDetectorConfig, true);
diff --git a/test/unit/voldemort/cluster/failuredetector/ServerStoreVerifierTest.java b/test/unit/voldemort/cluster/failuredetector/ServerStoreVerifierTest.java
index 62edde9cab..5f41196366 100644
--- a/test/unit/voldemort/cluster/failuredetector/ServerStoreVerifierTest.java
+++ b/test/unit/voldemort/cluster/failuredetector/ServerStoreVerifierTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010 LinkedIn, Inc.
+ * Copyright 2010-2012 LinkedIn, Inc.
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -31,12 +31,10 @@
 import org.junit.runners.Parameterized.Parameters;
 
 import voldemort.ServerTestUtils;
-import voldemort.TestUtils;
 import voldemort.VoldemortException;
 import voldemort.cluster.Cluster;
 import voldemort.cluster.Node;
-import voldemort.server.ServiceType;
-import voldemort.server.VoldemortConfig;
+import voldemort.common.service.ServiceType;
 import voldemort.server.VoldemortServer;
 import voldemort.server.storage.StorageService;
 import voldemort.store.socket.SocketStoreFactory;
@@ -45,7 +43,7 @@
 @RunWith(Parameterized.class)
 public class ServerStoreVerifierTest {
 
-    private final String storeDefFile = "test/common/voldemort/config/single-store.xml";
+    private final String storesXmlfile = "test/common/voldemort/config/single-store.xml";
 
     private final Map<Integer, VoldemortServer> serverMap = new HashMap<Integer, VoldemortServer>();
 
@@ -69,23 +67,20 @@ public static Collection<Object[]> configs() {
 
     @Before
     public void setUp() throws IOException {
-        cluster = ServerTestUtils.getLocalCluster(2, new int[][] { { 0 }, { 1 } });
-
-        for(int i = 0; i < cluster.getNumberOfNodes(); i++) {
-            Properties properties = new Properties();
-
-            VoldemortConfig config = ServerTestUtils.createServerConfig(useNio,
-                                                                        i,
-                                                                        TestUtils.createTempDir()
-                                                                                 .getAbsolutePath(),
-                                                                        null,
-                                                                        storeDefFile,
-                                                                        properties);
-
-            VoldemortServer server = ServerTestUtils.startVoldemortServer(socketStoreFactory,
-                                                                          config,
-                                                                          cluster);
-            serverMap.put(i, server);
+        int numServers = 2;
+        VoldemortServer[] servers = new VoldemortServer[numServers];
+        int partitionMap[][] = { { 0 }, { 1 } };
+        cluster = ServerTestUtils.startVoldemortCluster(numServers,
+                                                        servers,
+                                                        partitionMap,
+                                                        socketStoreFactory,
+                                                        useNio,
+                                                        null,
+                                                        storesXmlfile,
+                                                        new Properties());
+
+        for(int i = 0; i < numServers; i++) {
+            serverMap.put(i, servers[i]);
         }
     }
 
diff --git a/test/unit/voldemort/cluster/failuredetector/ThresholdFailureDetectorTest.java b/test/unit/voldemort/cluster/failuredetector/ThresholdFailureDetectorTest.java
index 7a9e409846..25aa19ebc5 100644
--- a/test/unit/voldemort/cluster/failuredetector/ThresholdFailureDetectorTest.java
+++ b/test/unit/voldemort/cluster/failuredetector/ThresholdFailureDetectorTest.java
@@ -20,9 +20,9 @@
 import static org.junit.Assert.assertTrue;
 import static voldemort.FailureDetectorTestUtils.recordException;
 import static voldemort.FailureDetectorTestUtils.recordSuccess;
-import static voldemort.cluster.failuredetector.MutableStoreVerifier.create;
-import static voldemort.cluster.failuredetector.FailureDetectorUtils.create;
 import static voldemort.VoldemortTestConstants.getTenNodeCluster;
+import static voldemort.cluster.failuredetector.FailureDetectorUtils.create;
+import static voldemort.cluster.failuredetector.MutableStoreVerifier.create;
 
 import java.net.ConnectException;
 import java.net.NoRouteToHostException;
@@ -48,7 +48,7 @@ public FailureDetector createFailureDetector() throws Exception {
                                                                                  .setBannagePeriod(BANNAGE_MILLIS)
                                                                                  .setAsyncRecoveryInterval(250)
                                                                                  .setThresholdInterval(500)
-                                                                                 .setNodes(cluster.getNodes())
+                                                                                 .setCluster(cluster)
                                                                                  .setStoreVerifier(storeVerifier)
                                                                                  .setTime(time);
         return create(failureDetectorConfig, true);
@@ -179,6 +179,11 @@ public void testChangeMetadata() throws Exception {
                                         new UnreachableStoreException("intentionalerror",
                                                                       new ConnectException("intentionalerror")));
 
+        /**
+         * Update the failure detector state with the new cluster
+         */
+        failureDetector.getConfig().setCluster(this.cluster);
+
         assertEquals(false, failureDetector.isAvailable(node));
         Thread.sleep(failureDetector.getConfig().getAsyncRecoveryInterval() * 2);
         assertEquals(true, failureDetector.isAvailable(node));
diff --git a/test/unit/voldemort/scheduled/BlockingSlopPusherTest.java b/test/unit/voldemort/scheduled/BlockingSlopPusherTest.java
index fafe74e05a..ad047dcf35 100644
--- a/test/unit/voldemort/scheduled/BlockingSlopPusherTest.java
+++ b/test/unit/voldemort/scheduled/BlockingSlopPusherTest.java
@@ -21,7 +21,6 @@
 import java.util.Arrays;
 import java.util.Date;
 import java.util.List;
-import java.util.concurrent.Semaphore;
 
 import junit.framework.TestCase;
 import voldemort.ServerTestUtils;
@@ -32,12 +31,13 @@
 import voldemort.server.StoreRepository;
 import voldemort.server.VoldemortConfig;
 import voldemort.server.scheduler.slop.BlockingSlopPusherJob;
+import voldemort.server.storage.ScanPermitWrapper;
 import voldemort.store.FailingStore;
 import voldemort.store.memory.InMemoryStorageEngine;
 import voldemort.store.metadata.MetadataStore;
 import voldemort.store.slop.Slop;
-import voldemort.store.slop.SlopStorageEngine;
 import voldemort.store.slop.Slop.Operation;
+import voldemort.store.slop.SlopStorageEngine;
 import voldemort.utils.ByteArray;
 import voldemort.utils.Props;
 import voldemort.versioning.Versioned;
@@ -70,7 +70,7 @@ protected void setUp() throws Exception {
                                            metadataStore,
                                            new NoopFailureDetector(),
                                            new VoldemortConfig(props),
-                                           new Semaphore(1));
+                                           new ScanPermitWrapper(1));
     }
 
     private Cluster makeCluster(int numNodes) {
diff --git a/test/unit/voldemort/scheduled/DataCleanupJobTest.java b/test/unit/voldemort/scheduled/DataCleanupJobTest.java
index 679c29a4e3..60dd9a4346 100644
--- a/test/unit/voldemort/scheduled/DataCleanupJobTest.java
+++ b/test/unit/voldemort/scheduled/DataCleanupJobTest.java
@@ -16,15 +16,27 @@
 
 package voldemort.scheduled;
 
+import java.io.File;
+import java.util.Date;
 import java.util.List;
-import java.util.concurrent.Semaphore;
 
 import junit.framework.TestCase;
+
+import org.apache.commons.io.FileDeleteStrategy;
+
 import voldemort.MockTime;
+import voldemort.TestUtils;
+import voldemort.common.service.SchedulerService;
+import voldemort.server.VoldemortConfig;
 import voldemort.server.scheduler.DataCleanupJob;
+import voldemort.server.storage.ScanPermitWrapper;
 import voldemort.store.StorageEngine;
-import voldemort.store.memory.InMemoryStorageEngine;
+import voldemort.store.StoreDefinition;
+import voldemort.store.bdb.BdbStorageConfiguration;
+import voldemort.utils.ByteArray;
 import voldemort.utils.EventThrottler;
+import voldemort.utils.Props;
+import voldemort.utils.SystemTime;
 import voldemort.utils.Time;
 import voldemort.versioning.VectorClock;
 import voldemort.versioning.Versioned;
@@ -32,12 +44,105 @@
 public class DataCleanupJobTest extends TestCase {
 
     private MockTime time;
-    private StorageEngine<String, String, String> engine;
+    private StorageEngine<ByteArray, byte[], byte[]> engine;
+    private File storeDir;
+    private BdbStorageConfiguration bdbStorage;
 
     @Override
-    public void setUp() {
+    public void setUp() throws Exception {
         time = new MockTime();
-        engine = new InMemoryStorageEngine<String, String, String>("test");
+        storeDir = TestUtils.createTempDir();
+        FileDeleteStrategy.FORCE.delete(storeDir);
+
+        // lets use all the default values.
+        Props props = new Props();
+        props.put("node.id", 1);
+        props.put("voldemort.home", "test/common/voldemort/config");
+        VoldemortConfig voldemortConfig = new VoldemortConfig(props);
+        voldemortConfig.setBdbCacheSize(1024 * 1024);
+        voldemortConfig.setBdbOneEnvPerStore(true);
+        voldemortConfig.setBdbDataDirectory(storeDir.toURI().getPath());
+
+        bdbStorage = new BdbStorageConfiguration(voldemortConfig);
+        StoreDefinition defA = TestUtils.makeStoreDefinition("cleanupTestStore");
+        engine = bdbStorage.getStore(defA);
+    }
+
+    @Override
+    protected void tearDown() throws Exception {
+        super.tearDown();
+        try {
+            if(engine != null)
+                engine.close();
+            if(bdbStorage != null)
+                bdbStorage.close();
+        } finally {
+            FileDeleteStrategy.FORCE.delete(storeDir);
+        }
+    }
+
+    public void testCleanupFrequency() {
+
+        SchedulerService scheduler = new SchedulerService(1, time);
+
+        try {
+            Date now = new Date();
+
+            // clean up will purge everything older than last 2 seconds
+            Runnable cleanupJob = new DataCleanupJob<ByteArray, byte[], byte[]>(engine,
+                                                                                new ScanPermitWrapper(1),
+                                                                                2 * Time.MS_PER_SECOND,
+                                                                                SystemTime.INSTANCE,
+                                                                                new EventThrottler(1));
+
+            // and will run every 5 seconds starting now
+            scheduler.schedule("cleanup-freq-test", cleanupJob, now, 5 * Time.MS_PER_SECOND);
+
+            // load some data
+            for(int i = 0; i < 10; i++) {
+                ByteArray b = new ByteArray(Integer.toString(i).getBytes());
+                engine.put(b, new Versioned<byte[]>(b.get()), null);
+            }
+            // sleep for 2 seconds
+            Thread.sleep(2 * Time.MS_PER_SECOND);
+
+            // None of the keys should have been deleted, i.e data cleanup
+            // should n't have run.
+            for(int i = 0; i < 10; i++) {
+                ByteArray b = new ByteArray(Integer.toString(i).getBytes());
+                List<Versioned<byte[]>> found = engine.get(b, null);
+                assertTrue("Did not find key '" + i + "' in store!", found.size() > 0);
+            }
+
+            // wait till 4 seconds from start
+            Thread.sleep(System.currentTimeMillis() - (now.getTime() + 4 * Time.MS_PER_SECOND));
+            // load some more data
+            for(int i = 10; i < 20; i++) {
+                ByteArray b = new ByteArray(Integer.toString(i).getBytes());
+                engine.put(b, new Versioned<byte[]>(b.get()), null);
+            }
+
+            // give time for data cleanup to finally run
+            Thread.sleep(System.currentTimeMillis() - (now.getTime() + 6 * Time.MS_PER_SECOND));
+
+            // first batch of writes should have been deleted
+            for(int i = 0; i < 10; i++) {
+                ByteArray b = new ByteArray(Integer.toString(i).getBytes());
+                List<Versioned<byte[]>> found = engine.get(b, null);
+                assertTrue("Expected key '" + i + "' to be deleted!", found.size() == 0);
+            }
+            // and later ones retained.
+            for(int i = 10; i < 20; i++) {
+                ByteArray b = new ByteArray(Integer.toString(i).getBytes());
+                List<Versioned<byte[]>> found = engine.get(b, null);
+                assertTrue("Expected key '" + i + "' to be retained!", found.size() > 0);
+            }
+
+        } catch(Exception e) {
+
+        } finally {
+            scheduler.stop();
+        }
     }
 
     public void testCleanupCleansUp() {
@@ -51,11 +156,11 @@ public void testCleanupCleansUp() {
         put("a");
 
         // now run cleanup
-        new DataCleanupJob<String, String, String>(engine,
-                                                   new Semaphore(1),
-                                                   Time.MS_PER_DAY,
-                                                   time,
-                                                   new EventThrottler(1)).run();
+        new DataCleanupJob<ByteArray, byte[], byte[]>(engine,
+                                                      new ScanPermitWrapper(1),
+                                                      Time.MS_PER_DAY,
+                                                      time,
+                                                      new EventThrottler(1)).run();
 
         // Check that all the later keys are there AND the key updated later
         assertContains("a", "d", "e", "f");
@@ -64,7 +169,7 @@ public void testCleanupCleansUp() {
     private void put(String... items) {
         for(String item: items) {
             VectorClock clock = null;
-            List<Versioned<String>> found = engine.get(item, null);
+            List<Versioned<byte[]>> found = engine.get(new ByteArray(item.getBytes()), null);
             if(found.size() == 0) {
                 clock = new VectorClock(time.getMilliseconds());
             } else if(found.size() == 1) {
@@ -73,13 +178,15 @@ private void put(String... items) {
             } else {
                 fail("Found multiple versions.");
             }
-            engine.put(item, new Versioned<String>(item, clock), null);
+            engine.put(new ByteArray(item.getBytes()),
+                       new Versioned<byte[]>(item.getBytes(), clock),
+                       null);
         }
     }
 
     private void assertContains(String... keys) {
         for(String key: keys) {
-            List<Versioned<String>> found = engine.get(key, null);
+            List<Versioned<byte[]>> found = engine.get(new ByteArray(key.getBytes()), null);
             assertTrue("Did not find key '" + key + "' in store!", found.size() > 0);
         }
     }
diff --git a/test/unit/voldemort/scheduled/StreamingSlopPusherTest.java b/test/unit/voldemort/scheduled/StreamingSlopPusherTest.java
index 048c920b5f..e44dddb6d5 100644
--- a/test/unit/voldemort/scheduled/StreamingSlopPusherTest.java
+++ b/test/unit/voldemort/scheduled/StreamingSlopPusherTest.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
 package voldemort.scheduled;
 
 import static org.junit.Assert.assertEquals;
@@ -10,7 +25,6 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Properties;
-import java.util.concurrent.Semaphore;
 
 import org.junit.After;
 import org.junit.Before;
@@ -26,6 +40,7 @@
 import voldemort.server.VoldemortConfig;
 import voldemort.server.VoldemortServer;
 import voldemort.server.scheduler.slop.StreamingSlopPusherJob;
+import voldemort.server.storage.ScanPermitWrapper;
 import voldemort.store.StorageEngine;
 import voldemort.store.metadata.MetadataStore;
 import voldemort.store.slop.Slop;
@@ -78,6 +93,8 @@ public void setUp() throws Exception {
         }
     }
 
+    // This method may be susceptible to BindException issues due to TOCTOU
+    // problem with getLocalCluster.
     private void startServers(int... nodeIds) {
         for(int nodeId: nodeIds) {
             if(nodeId < NUM_SERVERS) {
@@ -136,12 +153,12 @@ public void testFailedServer() throws IOException, InterruptedException {
 
         StreamingSlopPusherJob pusher = new StreamingSlopPusherJob(getVoldemortServer(0).getStoreRepository(),
                                                                    getVoldemortServer(0).getMetadataStore(),
-                                                                   new BannagePeriodFailureDetector(new FailureDetectorConfig().setNodes(cluster.getNodes())
+                                                                   new BannagePeriodFailureDetector(new FailureDetectorConfig().setCluster(cluster)
                                                                                                                                .setStoreVerifier(new ServerStoreVerifier(socketStoreFactory,
                                                                                                                                                                          metadataStore,
                                                                                                                                                                          configs[0]))),
                                                                    configs[0],
-                                                                   new Semaphore(1));
+                                                                   new ScanPermitWrapper(1));
 
         pusher.run();
 
@@ -156,8 +173,9 @@ public void testFailedServer() throws IOException, InterruptedException {
             StorageEngine<ByteArray, byte[], byte[]> store = getVoldemortServer(2).getStoreRepository()
                                                                                   .getStorageEngine(nextSlop.getStoreName());
             if(nextSlop.getOperation().equals(Slop.Operation.PUT)) {
-                assertNotSame("entry should be present at store", 0, store.get(nextSlop.getKey(),
-                                                                               null).size());
+                assertNotSame("entry should be present at store",
+                              0,
+                              store.get(nextSlop.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(nextSlop.getValue()),
                              new String(store.get(nextSlop.getKey(), null).get(0).getValue()));
@@ -269,12 +287,12 @@ public void testOutOfOrder() throws InterruptedException, IOException {
 
         StreamingSlopPusherJob pusher = new StreamingSlopPusherJob(getVoldemortServer(0).getStoreRepository(),
                                                                    getVoldemortServer(0).getMetadataStore(),
-                                                                   new BannagePeriodFailureDetector(new FailureDetectorConfig().setNodes(cluster.getNodes())
+                                                                   new BannagePeriodFailureDetector(new FailureDetectorConfig().setCluster(cluster)
                                                                                                                                .setStoreVerifier(new ServerStoreVerifier(socketStoreFactory,
                                                                                                                                                                          metadataStore,
                                                                                                                                                                          configs[0]))),
                                                                    configs[0],
-                                                                   new Semaphore(1));
+                                                                   new ScanPermitWrapper(1));
 
         pusher.run();
 
@@ -320,12 +338,12 @@ public void testNormalPush() throws InterruptedException, IOException {
 
         StreamingSlopPusherJob pusher = new StreamingSlopPusherJob(getVoldemortServer(0).getStoreRepository(),
                                                                    getVoldemortServer(0).getMetadataStore(),
-                                                                   new BannagePeriodFailureDetector(new FailureDetectorConfig().setNodes(cluster.getNodes())
+                                                                   new BannagePeriodFailureDetector(new FailureDetectorConfig().setCluster(cluster)
                                                                                                                                .setStoreVerifier(new ServerStoreVerifier(socketStoreFactory,
                                                                                                                                                                          metadataStore,
                                                                                                                                                                          configs[0]))),
                                                                    configs[0],
-                                                                   new Semaphore(1));
+                                                                   new ScanPermitWrapper(1));
 
         pusher.run();
 
@@ -340,8 +358,9 @@ public void testNormalPush() throws InterruptedException, IOException {
             StorageEngine<ByteArray, byte[], byte[]> store = getVoldemortServer(1).getStoreRepository()
                                                                                   .getStorageEngine(nextSlop.getStoreName());
             if(nextSlop.getOperation().equals(Slop.Operation.PUT)) {
-                assertNotSame("entry should be present at store", 0, store.get(nextSlop.getKey(),
-                                                                               null).size());
+                assertNotSame("entry should be present at store",
+                              0,
+                              store.get(nextSlop.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(nextSlop.getValue()),
                              new String(store.get(nextSlop.getKey(), null).get(0).getValue()));
@@ -435,19 +454,19 @@ public void testNormalPushBothWays() throws InterruptedException, IOException {
 
         StreamingSlopPusherJob pusher0 = new StreamingSlopPusherJob(getVoldemortServer(0).getStoreRepository(),
                                                                     getVoldemortServer(0).getMetadataStore(),
-                                                                    new BannagePeriodFailureDetector(new FailureDetectorConfig().setNodes(cluster.getNodes())
+                                                                    new BannagePeriodFailureDetector(new FailureDetectorConfig().setCluster(cluster)
                                                                                                                                 .setStoreVerifier(new ServerStoreVerifier(socketStoreFactory,
                                                                                                                                                                           metadataStore,
                                                                                                                                                                           configs[0]))),
                                                                     configs[0],
-                                                                    new Semaphore(1)), pusher1 = new StreamingSlopPusherJob(getVoldemortServer(1).getStoreRepository(),
-                                                                                                                            getVoldemortServer(1).getMetadataStore(),
-                                                                                                                            new BannagePeriodFailureDetector(new FailureDetectorConfig().setNodes(cluster.getNodes())
-                                                                                                                                                                                        .setStoreVerifier(new ServerStoreVerifier(socketStoreFactory,
-                                                                                                                                                                                                                                  metadataStore,
-                                                                                                                                                                                                                                  configs[1]))),
-                                                                                                                            configs[1],
-                                                                                                                            new Semaphore(1));
+                                                                    new ScanPermitWrapper(1)), pusher1 = new StreamingSlopPusherJob(getVoldemortServer(1).getStoreRepository(),
+                                                                                                                                    getVoldemortServer(1).getMetadataStore(),
+                                                                                                                                    new BannagePeriodFailureDetector(new FailureDetectorConfig().setCluster(cluster)
+                                                                                                                                                                                                .setStoreVerifier(new ServerStoreVerifier(socketStoreFactory,
+                                                                                                                                                                                                                                          metadataStore,
+                                                                                                                                                                                                                                          configs[1]))),
+                                                                                                                                    configs[1],
+                                                                                                                                    new ScanPermitWrapper(1));
 
         pusher0.run();
         pusher1.run();
@@ -463,8 +482,9 @@ public void testNormalPushBothWays() throws InterruptedException, IOException {
             StorageEngine<ByteArray, byte[], byte[]> store = getVoldemortServer(1).getStoreRepository()
                                                                                   .getStorageEngine(nextSlop.getStoreName());
             if(nextSlop.getOperation().equals(Slop.Operation.PUT)) {
-                assertNotSame("entry should be present at store", 0, store.get(nextSlop.getKey(),
-                                                                               null).size());
+                assertNotSame("entry should be present at store",
+                              0,
+                              store.get(nextSlop.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(nextSlop.getValue()),
                              new String(store.get(nextSlop.getKey(), null).get(0).getValue()));
@@ -484,8 +504,9 @@ public void testNormalPushBothWays() throws InterruptedException, IOException {
             StorageEngine<ByteArray, byte[], byte[]> store = getVoldemortServer(0).getStoreRepository()
                                                                                   .getStorageEngine(nextSlop.getStoreName());
             if(nextSlop.getOperation().equals(Slop.Operation.PUT)) {
-                assertNotSame("entry should be present at store", 0, store.get(nextSlop.getKey(),
-                                                                               null).size());
+                assertNotSame("entry should be present at store",
+                              0,
+                              store.get(nextSlop.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(nextSlop.getValue()),
                              new String(store.get(nextSlop.getKey(), null).get(0).getValue()));
@@ -542,12 +563,12 @@ public void testServerReplacementWithoutBounce() throws IOException, Interrupted
 
         StreamingSlopPusherJob pusher = new StreamingSlopPusherJob(getVoldemortServer(0).getStoreRepository(),
                                                                    getVoldemortServer(0).getMetadataStore(),
-                                                                   new BannagePeriodFailureDetector(new FailureDetectorConfig().setNodes(cluster.getNodes())
+                                                                   new BannagePeriodFailureDetector(new FailureDetectorConfig().setCluster(cluster)
                                                                                                                                .setStoreVerifier(new ServerStoreVerifier(socketStoreFactory,
                                                                                                                                                                          metadataStore,
                                                                                                                                                                          configs[0]))),
                                                                    configs[0],
-                                                                   new Semaphore(1));
+                                                                   new ScanPermitWrapper(1));
 
         pusher.run();
 
@@ -562,8 +583,9 @@ public void testServerReplacementWithoutBounce() throws IOException, Interrupted
             StorageEngine<ByteArray, byte[], byte[]> store = getVoldemortServer(2).getStoreRepository()
                                                                                   .getStorageEngine(nextSlop.getStoreName());
             if(nextSlop.getOperation().equals(Slop.Operation.PUT)) {
-                assertNotSame("entry should be present at store", 0, store.get(nextSlop.getKey(),
-                                                                               null).size());
+                assertNotSame("entry should be present at store",
+                              0,
+                              store.get(nextSlop.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(nextSlop.getValue()),
                              new String(store.get(nextSlop.getKey(), null).get(0).getValue()));
@@ -619,8 +641,9 @@ public void testServerReplacementWithoutBounce() throws IOException, Interrupted
             StorageEngine<ByteArray, byte[], byte[]> store = getVoldemortServer(1).getStoreRepository()
                                                                                   .getStorageEngine(nextSlop.getStoreName());
             if(nextSlop.getOperation().equals(Slop.Operation.PUT)) {
-                assertNotSame("entry should be present at store", 0, store.get(nextSlop.getKey(),
-                                                                               null).size());
+                assertNotSame("entry should be present at store",
+                              0,
+                              store.get(nextSlop.getKey(), null).size());
                 assertEquals("entry value should match",
                              new String(nextSlop.getValue()),
                              new String(store.get(nextSlop.getKey(), null).get(0).getValue()));
diff --git a/test/unit/voldemort/serialization/avro/versioned/AvroBackwardsCompatibilityTest.java b/test/unit/voldemort/serialization/avro/versioned/AvroBackwardsCompatibilityTest.java
new file mode 100644
index 0000000000..a4ed64b4c9
--- /dev/null
+++ b/test/unit/voldemort/serialization/avro/versioned/AvroBackwardsCompatibilityTest.java
@@ -0,0 +1,95 @@
+package voldemort.serialization.avro.versioned;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.avro.util.Utf8;
+import org.junit.Test;
+
+/**
+ * A test that the avro serialization remains compatible with older serialized
+ * data
+ * 
+ * 
+ */
+public class AvroBackwardsCompatibilityTest {
+
+    private static byte[] writeVersion0(Schema s0) {
+
+        GenericData.Record record = new GenericData.Record(s0);
+        record.put("original", new Utf8("Abhinay"));
+        AvroVersionedGenericSerializer serializer = new AvroVersionedGenericSerializer(s0.toString());
+        return serializer.toBytes(record);
+
+    }
+
+    private static Object readVersion0(Map<Integer, String> versions, byte[] versionZeroBytes) {
+
+        AvroVersionedGenericSerializer serializer = new AvroVersionedGenericSerializer(versions);
+        return serializer.toObject(versionZeroBytes);
+
+    }
+
+    private static byte[] writeVersion0with1Present(Map<Integer, String> versions, Schema s0) {
+
+        GenericData.Record record = new GenericData.Record(s0);
+        record.put("original", new Utf8("Abhinay"));
+        AvroVersionedGenericSerializer serializer = new AvroVersionedGenericSerializer(versions);
+        return serializer.toBytes(record);
+
+    }
+
+    /*
+     * This tests if a client tries to deserialize an object created using an
+     * old schema is successful or not
+     */
+    @Test
+    public void testAvroSchemaEvolution() throws IOException {
+
+        String versionZero = "{\"type\": \"record\", \"name\": \"myrec\",\"fields\": [{ \"name\": \"original\", \"type\": \"string\" }]}";
+
+        String versionOne = "{\"type\": \"record\", \"name\": \"myrec\",\"fields\": [{ \"name\": \"original\", \"type\": \"string\" } ,"
+                            + "{ \"name\": \"new-field\", \"type\": \"string\", \"default\":\"\" }]}";
+
+        Schema s0 = Schema.parse(versionZero);
+        Schema s1 = Schema.parse(versionOne);
+
+        Map<Integer, String> versions = new HashMap<Integer, String>();
+
+        versions.put(0, versionZero);
+        versions.put(1, versionOne);
+
+        byte[] versionZeroBytes = writeVersion0(s0);
+
+        GenericData.Record record = (Record) readVersion0(versions, versionZeroBytes);
+
+    }
+
+    /*
+     * This tests if a client tries to serialize an object created using an old
+     * schema is successful or not
+     */
+    @Test
+    public void testAvroSchemaEvolutionWrite() throws IOException {
+
+        String versionZero = "{\"type\": \"record\", \"name\": \"myrec\",\"fields\": [{ \"name\": \"original\", \"type\": \"string\" }]}";
+
+        String versionOne = "{\"type\": \"record\", \"name\": \"myrec\",\"fields\": [{ \"name\": \"original\", \"type\": \"string\" } ,"
+                            + "{ \"name\": \"new-field\", \"type\": \"string\", \"default\":\"\" }]}";
+
+        Schema s0 = Schema.parse(versionZero);
+        Schema s1 = Schema.parse(versionOne);
+
+        Map<Integer, String> versions = new HashMap<Integer, String>();
+
+        versions.put(0, versionZero);
+        versions.put(1, versionOne);
+
+        byte[] versionZeroBytes = writeVersion0with1Present(versions, s0);
+
+    }
+}
diff --git a/test/unit/voldemort/server/EndToEndTest.java b/test/unit/voldemort/server/EndToEndTest.java
index e5278f8907..a6a4f89098 100644
--- a/test/unit/voldemort/server/EndToEndTest.java
+++ b/test/unit/voldemort/server/EndToEndTest.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
 package voldemort.server;
 
 import static org.junit.Assert.assertEquals;
@@ -5,10 +20,8 @@
 import static org.junit.Assert.assertNull;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 
@@ -20,7 +33,6 @@
 import org.junit.runners.Parameterized.Parameters;
 
 import voldemort.ServerTestUtils;
-import voldemort.TestUtils;
 import voldemort.client.ClientConfig;
 import voldemort.client.SocketStoreClientFactory;
 import voldemort.client.StoreClient;
@@ -49,8 +61,6 @@ public class EndToEndTest {
                                                                                         32 * 1024);
     private final boolean useNio;
 
-    private List<VoldemortServer> servers;
-    private Cluster cluster;
     private StoreClient<String, String> storeClient;
 
     public EndToEndTest(boolean useNio) {
@@ -64,26 +74,18 @@ public static Collection<Object[]> configs() {
 
     @Before
     public void setUp() throws IOException {
-        cluster = ServerTestUtils.getLocalCluster(2, new int[][] { { 0, 2, 4, 6 }, { 1, 3, 5, 7 } });
-        servers = new ArrayList<VoldemortServer>();
-        servers.add(ServerTestUtils.startVoldemortServer(socketStoreFactory,
-                                                         ServerTestUtils.createServerConfig(useNio,
-                                                                                            0,
-                                                                                            TestUtils.createTempDir()
-                                                                                                     .getAbsolutePath(),
-                                                                                            null,
-                                                                                            STORES_XML,
-                                                                                            new Properties()),
-                                                         cluster));
-        servers.add(ServerTestUtils.startVoldemortServer(socketStoreFactory,
-                                                         ServerTestUtils.createServerConfig(useNio,
-                                                                                            1,
-                                                                                            TestUtils.createTempDir()
-                                                                                                     .getAbsolutePath(),
-                                                                                            null,
-                                                                                            STORES_XML,
-                                                                                            new Properties()),
-                                                         cluster));
+        int numServers = 2;
+        VoldemortServer[] servers = new VoldemortServer[numServers];
+        int partitionMap[][] = { { 0, 2, 4, 6 }, { 1, 3, 5, 7 } };
+        Cluster cluster = ServerTestUtils.startVoldemortCluster(numServers,
+                                                                servers,
+                                                                partitionMap,
+                                                                socketStoreFactory,
+                                                                useNio,
+                                                                null,
+                                                                STORES_XML,
+                                                                new Properties());
+
         Node node = cluster.getNodeById(0);
         String bootstrapUrl = "tcp://" + node.getHost() + ":" + node.getSocketPort();
         StoreClientFactory storeClientFactory = new SocketStoreClientFactory(new ClientConfig().setBootstrapUrls(bootstrapUrl));
@@ -120,7 +122,6 @@ public void testSanity() {
 
         assertEquals("getAll works as expected", "Moscow", capitals.get("Russia").getValue());
         assertEquals("getAll works as expected", "Kiev", capitals.get("Ukraine").getValue());
-
         assertFalse("getAll works as expected", capitals.containsKey("Japan"));
 
         storeClient.delete("Ukraine");
diff --git a/test/unit/voldemort/server/ServiceTest.java b/test/unit/voldemort/server/ServiceTest.java
index 8c04c298db..30fd4475ff 100644
--- a/test/unit/voldemort/server/ServiceTest.java
+++ b/test/unit/voldemort/server/ServiceTest.java
@@ -16,6 +16,8 @@
 
 package voldemort.server;
 
+import voldemort.common.service.AbstractService;
+import voldemort.common.service.ServiceType;
 import junit.framework.TestCase;
 
 /**
diff --git a/test/unit/voldemort/server/gossip/GossiperTest.java b/test/unit/voldemort/server/gossip/GossiperTest.java
index d4dcdb892e..041190223a 100644
--- a/test/unit/voldemort/server/gossip/GossiperTest.java
+++ b/test/unit/voldemort/server/gossip/GossiperTest.java
@@ -1,5 +1,22 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
 package voldemort.server.gossip;
 
+import static org.junit.Assert.assertEquals;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -10,8 +27,7 @@
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 
-import junit.framework.TestCase;
-
+import org.apache.log4j.Logger;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -38,11 +54,12 @@
  * Tests {@link voldemort.server.gossip.Gossiper}
  */
 @RunWith(Parameterized.class)
-public class GossiperTest extends TestCase {
+public class GossiperTest {
+
+    private static final Logger logger = Logger.getLogger(GossiperTest.class.getName());
 
     private List<VoldemortServer> servers = new ArrayList<VoldemortServer>();
     private Cluster cluster;
-    private Properties props = new Properties();
     private static final int socketBufferSize = 4096;
     private static final int adminSocketBufferSize = 8192;
     private SocketStoreFactory socketStoreFactory = new ClientRequestExecutorPool(2,
@@ -51,6 +68,8 @@ public class GossiperTest extends TestCase {
                                                                                   socketBufferSize);
     private static String storesXmlfile = "test/common/voldemort/config/stores.xml";
     private final boolean useNio;
+    private CountDownLatch countDownLatch;
+    final private Properties props = new Properties();
 
     public GossiperTest(boolean useNio) {
         this.useNio = useNio;
@@ -61,20 +80,10 @@ public static Collection<Object[]> configs() {
         return Arrays.asList(new Object[][] { { false }, { true } });
     }
 
-    @Override
-    @Before
-    public void setUp() throws IOException {
-        props.put("enable.gossip", "true");
-        props.put("gossip.interval.ms", "250");
-        props.put("socket.buffer.size", String.valueOf(socketBufferSize));
-        props.put("admin.streams.buffer.size", String.valueOf(adminSocketBufferSize));
-
-        // Start all in parallel to avoid exceptions during gossip
-
+    private void attemptParallelClusterStart(ExecutorService executorService) {
+        // Start all servers in parallel to avoid exceptions during gossip.
         cluster = ServerTestUtils.getLocalCluster(3, new int[][] { { 0, 1, 2, 3 }, { 4, 5, 6, 7 },
                 { 8, 9, 10, 11 } });
-        ExecutorService executorService = Executors.newFixedThreadPool(3);
-        final CountDownLatch countDownLatch = new CountDownLatch(3);
 
         for(int i = 0; i < 3; i++) {
             final int j = i;
@@ -91,13 +100,41 @@ public void run() {
                                                                                                             storesXmlfile,
                                                                                                             props),
                                                                          cluster));
+                    } catch(IOException ioe) {
+                        logger.error("Caught IOException during parallel server start: "
+                                     + ioe.getMessage());
+                        RuntimeException re = new RuntimeException();
+                        re.initCause(ioe);
+                        throw re;
+                    } finally {
+                        // Ensure setup progresses in face of errors
                         countDownLatch.countDown();
-                    } catch(IOException e) {
-                        throw new RuntimeException();
                     }
                 }
             });
         }
+    }
+
+    @Before
+    public void setUp() {
+        props.put("enable.gossip", "true");
+        props.put("gossip.interval.ms", "250");
+        props.put("socket.buffer.size", String.valueOf(socketBufferSize));
+        props.put("admin.streams.buffer.size", String.valueOf(adminSocketBufferSize));
+
+        ExecutorService executorService = Executors.newFixedThreadPool(3);
+        countDownLatch = new CountDownLatch(3);
+
+        boolean clusterStarted = false;
+        while(!clusterStarted) {
+            try {
+                attemptParallelClusterStart(executorService);
+                clusterStarted = true;
+            } catch(RuntimeException re) {
+                logger.info("Some server thread threw a RuntimeException. Will print out stacktrace and then try again. Assumption is that the RuntimeException is due to BindException that in turn is due to TOCTOU issue with getLocalCluster");
+                re.printStackTrace();
+            }
+        }
 
         try {
             countDownLatch.await();
@@ -106,7 +143,6 @@ public void run() {
         }
     }
 
-    @Override
     @After
     public void tearDown() {
         socketStoreFactory.close();
@@ -116,18 +152,20 @@ private AdminClient getAdminClient(Cluster newCluster) {
         return new AdminClient(newCluster, new AdminClientConfig());
     }
 
-    @Test
-    public void testGossiper() throws Exception {
-        // First create a new cluster:
-        // Allocate ports for all nodes in the new cluster, to match existing
-        // cluster
+    private Cluster attemptStartAdditionalServer() throws IOException {
+        // Set up a new cluster that is one bigger than the original cluster
+
         int originalSize = cluster.getNumberOfNodes();
         int numOriginalPorts = originalSize * 3;
         int ports[] = new int[numOriginalPorts + 3];
         for(int i = 0, j = 0; i < originalSize; i++, j += 3) {
             Node node = cluster.getNodeById(i);
             System.arraycopy(new int[] { node.getHttpPort(), node.getSocketPort(),
-                    node.getAdminPort() }, 0, ports, j, 3);
+                                     node.getAdminPort() },
+                             0,
+                             ports,
+                             j,
+                             3);
         }
 
         System.arraycopy(ServerTestUtils.findFreePorts(3), 0, ports, numOriginalPorts, 3);
@@ -150,13 +188,27 @@ public void testGossiper() throws Exception {
                                                                                                             storesXmlfile,
                                                                                                             props),
                                                                          newCluster);
+        // This step is only reached if startVoldemortServer does *not* throw a
+        // BindException due to TOCTOU problem with getLocalCluster
         servers.add(newServer);
+        return newCluster;
+    }
 
-        // Wait a while until the new server starts
-        try {
-            Thread.sleep(500);
-        } catch(InterruptedException e) {
-            Thread.currentThread().interrupt();
+    // Protect against this test running forever until the root cause of running
+    // forever is found.
+    @Test(timeout = 1800)
+    public void testGossiper() throws Exception {
+        Cluster newCluster = null;
+
+        boolean startedAdditionalServer = false;
+        while(!startedAdditionalServer) {
+            try {
+                newCluster = attemptStartAdditionalServer();
+                startedAdditionalServer = true;
+            } catch(IOException ioe) {
+                logger.warn("Caught an IOException when attempting to start additional server. Will print stacktrace and then attempt to start additional server again.");
+                ioe.printStackTrace();
+            }
         }
 
         // Get the new cluster.xml
@@ -166,8 +218,7 @@ public void testGossiper() throws Exception {
                                                                                    MetadataStore.CLUSTER_KEY);
 
         // Increment the version, let what would be the "donor node" know about
-        // it
-        // to seed the Gossip.
+        // it to seed the Gossip.
         Version version = versionedClusterXML.getVersion();
         ((VectorClock) version).incrementVersion(3, ((VectorClock) version).getTimestamp() + 1);
         ((VectorClock) version).incrementVersion(0, ((VectorClock) version).getTimestamp() + 1);
@@ -182,6 +233,7 @@ public void testGossiper() throws Exception {
         }
 
         // Wait up to five seconds for Gossip to spread
+        final Cluster newFinalCluster = newCluster;
         try {
             TestUtils.assertWithBackoff(5000, new Attempt() {
 
@@ -194,11 +246,11 @@ public void checkCondition() {
                         assertEquals("server " + nodeId + " has heard "
                                              + " the gossip about number of nodes",
                                      clusterAtServer.getNumberOfNodes(),
-                                     newCluster.getNumberOfNodes());
+                                     newFinalCluster.getNumberOfNodes());
                         assertEquals("server " + nodeId + " has heard "
                                              + " the gossip about partitions",
                                      clusterAtServer.getNodeById(nodeId).getPartitionIds(),
-                                     newCluster.getNodeById(nodeId).getPartitionIds());
+                                     newFinalCluster.getNodeById(nodeId).getPartitionIds());
                         serversSeen++;
                     }
                     assertEquals("saw all servers", serversSeen, servers.size());
diff --git a/test/unit/voldemort/server/protocol/admin/AsyncOperationTest.java b/test/unit/voldemort/server/protocol/admin/AsyncOperationTest.java
index 01b35886a3..16b7781347 100644
--- a/test/unit/voldemort/server/protocol/admin/AsyncOperationTest.java
+++ b/test/unit/voldemort/server/protocol/admin/AsyncOperationTest.java
@@ -26,7 +26,7 @@
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import junit.framework.TestCase;
-import voldemort.server.scheduler.SchedulerService;
+import voldemort.common.service.SchedulerService;
 import voldemort.utils.SystemTime;
 
 /**
diff --git a/test/unit/voldemort/server/socket/ClientRequestExecutorPoolTest.java b/test/unit/voldemort/server/socket/ClientRequestExecutorPoolTest.java
index b81037c0cf..9807b94f9a 100644
--- a/test/unit/voldemort/server/socket/ClientRequestExecutorPoolTest.java
+++ b/test/unit/voldemort/server/socket/ClientRequestExecutorPoolTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2008-2009 LinkedIn, Inc
+ * Copyright 2008-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -68,7 +68,14 @@ public static Collection<Object[]> configs() {
     @Before
     public void setUp() {
         this.port = ServerTestUtils.findFreePort();
-        this.pool = new ClientRequestExecutorPool(maxConnectionsPerNode, 1000, 1000, 32 * 1024);
+        this.pool = new ClientRequestExecutorPool(2,
+                                                  maxConnectionsPerNode,
+                                                  1000,
+                                                  1000,
+                                                  32 * 1024,
+                                                  false,
+                                                  true,
+                                                  0);
         this.dest1 = new SocketDestination("localhost", port, RequestFormatType.VOLDEMORT_V1);
         RequestHandlerFactory handlerFactory = ServerTestUtils.getSocketRequestHandlerFactory(new StoreRepository());
         this.server = ServerTestUtils.getSocketService(useNio,
@@ -112,20 +119,19 @@ public void testCloseWithInFlightSockets() throws Exception {
         for(int i = 0; i < maxConnectionsPerNode; i++)
             list.add(pool.checkout(dest1));
 
-        assertEquals(list.size(), pool.getNumberSocketsCreated());
-        assertEquals(list.size(), pool.getNumberOfActiveConnections());
+        assertEquals(list.size(), pool.getStats().getConnectionsCreated());
+        assertEquals(list.size(), pool.getStats().getConnectionsActive(null));
 
         pool.close(dest1);
 
-        assertEquals(list.size(), pool.getNumberOfActiveConnections());
-        assertEquals(0, pool.getNumberSocketsDestroyed());
+        assertEquals(list.size(), pool.getStats().getConnectionsActive(null));
+        assertEquals(0, pool.getStats().getConnectionsDestroyed());
 
         for(ClientRequestExecutor sas: list)
             pool.checkin(dest1, sas);
 
-        assertEquals(0, pool.getNumberOfActiveConnections());
-        assertEquals(list.size(), pool.getNumberSocketsDestroyed());
-        assertEquals(0, pool.getNumberOfCheckedInConnections());
+        assertEquals(0, pool.getStats().getConnectionsActive(null));
+        assertEquals(list.size(), pool.getStats().getConnectionsCreated());
     }
 
     @Test
diff --git a/test/unit/voldemort/server/socket/NioStatsJmxTest.java b/test/unit/voldemort/server/socket/NioStatsJmxTest.java
new file mode 100644
index 0000000000..fa8b9f138a
--- /dev/null
+++ b/test/unit/voldemort/server/socket/NioStatsJmxTest.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.server.socket;
+
+import static org.junit.Assert.assertEquals;
+
+import java.lang.management.ManagementFactory;
+import java.util.Properties;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+import javax.management.MBeanServer;
+import javax.management.ObjectName;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import voldemort.ServerTestUtils;
+import voldemort.client.ClientConfig;
+import voldemort.cluster.Cluster;
+import voldemort.cluster.Node;
+import voldemort.server.VoldemortServer;
+import voldemort.server.niosocket.NioSocketService;
+import voldemort.store.Store;
+import voldemort.store.socket.SocketStoreFactory;
+import voldemort.store.socket.clientrequest.ClientRequestExecutorPool;
+import voldemort.utils.ByteArray;
+import voldemort.utils.JmxUtils;
+import voldemort.versioning.Versioned;
+
+/**
+ * Unit test for NIO selector connection stats
+ * 
+ */
+public class NioStatsJmxTest {
+
+    private VoldemortServer server;
+    private Store<ByteArray, byte[], byte[]> socketStore;
+    private static final int MAX_TRAFFIC_TIME_MS = 2000;
+
+    @Before
+    public void setUp() throws Exception {
+        String storesXmlfile = "test/common/voldemort/config/single-store.xml";
+        ClientConfig clientConfig = new ClientConfig().setMaxConnectionsPerNode(1).setMaxThreads(1);
+        SocketStoreFactory socketStoreFactory = new ClientRequestExecutorPool(clientConfig.getSelectors(),
+                                                                              clientConfig.getMaxConnectionsPerNode(),
+                                                                              clientConfig.getConnectionTimeout(TimeUnit.MILLISECONDS),
+                                                                              clientConfig.getSocketTimeout(TimeUnit.MILLISECONDS),
+                                                                              clientConfig.getSocketBufferSize(),
+                                                                              clientConfig.getSocketKeepAlive());
+
+        Properties props = new Properties();
+        props.put("jmx.enable", "true");
+
+        int numServers = 1;
+        VoldemortServer[] servers = new VoldemortServer[numServers];
+        Cluster cluster = ServerTestUtils.startVoldemortCluster(numServers,
+                                                                servers,
+                                                                null,
+                                                                socketStoreFactory,
+                                                                true,
+                                                                null,
+                                                                storesXmlfile,
+                                                                props);
+
+        server = servers[0];
+
+        for(Node node: cluster.getNodes()) {
+            socketStore = ServerTestUtils.getSocketStore(socketStoreFactory,
+                                                         "test",
+                                                         node.getSocketPort(),
+                                                         clientConfig.getRequestFormatType());
+        }
+    }
+
+    @Test
+    public void testActiveConnectionCount() throws Exception {
+        // generate some traffic,
+        Random dataGen = new Random();
+        long start = System.currentTimeMillis();
+
+        byte[] data = new byte[256];
+        while(((System.currentTimeMillis()) - start) <= MAX_TRAFFIC_TIME_MS) {
+            dataGen.nextBytes(data);
+            ByteArray key = new ByteArray(data);
+            socketStore.put(key, new Versioned<byte[]>(data), null);
+        }
+
+        // has to be 1, since we configure client with 1 connection and do
+        // atleast one operation
+        MBeanServer beanserver = ManagementFactory.getPlatformMBeanServer();
+        ObjectName name = JmxUtils.createObjectName(JmxUtils.getPackageName(NioSocketService.class),
+                                                    "nio-socket-server");
+        assertEquals(1, beanserver.getAttribute(name, "numActiveConnections"));
+    }
+
+    @After
+    public void tearDown() {
+        server.stop();
+    }
+}
diff --git a/test/unit/voldemort/server/storage/StorageServiceTest.java b/test/unit/voldemort/server/storage/StorageServiceTest.java
index f51ceebc6f..9cd5ace29f 100644
--- a/test/unit/voldemort/server/storage/StorageServiceTest.java
+++ b/test/unit/voldemort/server/storage/StorageServiceTest.java
@@ -1,18 +1,24 @@
 package voldemort.server.storage;
 
+import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.util.List;
+import java.util.Properties;
 
 import junit.framework.TestCase;
 import voldemort.MockTime;
 import voldemort.ServerTestUtils;
 import voldemort.TestUtils;
 import voldemort.cluster.Cluster;
+import voldemort.common.service.SchedulerService;
 import voldemort.server.StoreRepository;
 import voldemort.server.VoldemortConfig;
-import voldemort.server.scheduler.SchedulerService;
+import voldemort.store.Store;
 import voldemort.store.StoreDefinition;
 import voldemort.store.metadata.MetadataStore;
+import voldemort.store.system.SystemStoreConstants;
+import voldemort.utils.ByteArray;
+import voldemort.versioning.Versioned;
 
 /**
  * Test that the storage service is able to load all stores.
@@ -61,4 +67,53 @@ public void testStores() {
             }
         }
     }
+
+    public void testMetadataVersionsInit() {
+        Store<ByteArray, byte[], byte[]> versionStore = storeRepository.getLocalStore(SystemStoreConstants.SystemStoreName.voldsys$_metadata_version_persistence.name());
+        Properties props = new Properties();
+
+        try {
+            ByteArray metadataVersionsKey = new ByteArray(StorageService.VERSIONS_METADATA_STORE.getBytes());
+            List<Versioned<byte[]>> versionList = versionStore.get(metadataVersionsKey, null);
+
+            if(versionList != null && versionList.size() > 0) {
+                byte[] versionsByteArray = versionList.get(0).getValue();
+                if(versionsByteArray != null) {
+                    props.load(new ByteArrayInputStream(versionsByteArray));
+                } else {
+                    fail("Illegal value returned for metadata key: "
+                         + StorageService.VERSIONS_METADATA_STORE);
+                }
+            } else {
+                fail("Illegal value returned for metadata key: "
+                     + StorageService.VERSIONS_METADATA_STORE);
+            }
+
+            // Check if version exists for cluster.xml
+            if(!props.containsKey(StorageService.CLUSTER_VERSION_KEY)) {
+                fail(StorageService.CLUSTER_VERSION_KEY + " not present in "
+                     + StorageService.VERSIONS_METADATA_STORE);
+            }
+
+            // Check if version exists for stores.xml
+            if(!props.containsKey(StorageService.STORES_VERSION_KEY)) {
+                fail(StorageService.STORES_VERSION_KEY + " not present in "
+                     + StorageService.VERSIONS_METADATA_STORE);
+            }
+
+            // Check if version exists for each store
+            for(StoreDefinition def: storeDefs) {
+                if(!props.containsKey(def.getName())) {
+                    fail(def.getName() + " store not present in "
+                         + StorageService.VERSIONS_METADATA_STORE);
+                }
+            }
+        } catch(Exception e) {
+            fail("Error in retrieving : "
+                 + StorageService.VERSIONS_METADATA_STORE
+                 + " key from "
+                 + SystemStoreConstants.SystemStoreName.voldsys$_metadata_version_persistence.name()
+                 + " store. ");
+        }
+    }
 }
diff --git a/test/unit/voldemort/store/AbstractStoreTest.java b/test/unit/voldemort/store/AbstractStoreTest.java
index 93c5efb719..c157d1d1be 100644
--- a/test/unit/voldemort/store/AbstractStoreTest.java
+++ b/test/unit/voldemort/store/AbstractStoreTest.java
@@ -125,8 +125,8 @@ public void testNullKeys() throws Exception {
             // this is good
         }
         try {
-            store.getAll(Collections.<K> singleton(null), Collections.<K, T> singletonMap(null,
-                                                                                          null));
+            store.getAll(Collections.<K> singleton(null),
+                         Collections.<K, T> singletonMap(null, null));
             fail("Store should not getAll null keys!");
         } catch(IllegalArgumentException e) {
             // this is good
@@ -141,12 +141,13 @@ public void testNullKeys() throws Exception {
 
     @Test
     public void testPutNullValue() {
-    // Store<K,V> store = getStore();
-    // K key = getKey();
-    // store.put(key, new Versioned<V>(null));
-    // List<Versioned<V>> found = store.get(key);
-    // assertEquals("Wrong number of values.", 1, found.size());
-    // assertEquals("Returned non-null value.", null, found.get(0).getValue());
+        // Store<K,V> store = getStore();
+        // K key = getKey();
+        // store.put(key, new Versioned<V>(null));
+        // List<Versioned<V>> found = store.get(key);
+        // assertEquals("Wrong number of values.", 1, found.size());
+        // assertEquals("Returned non-null value.", null,
+        // found.get(0).getValue());
     }
 
     @Test
@@ -155,12 +156,8 @@ public void testGetAndDeleteNonExistentKey() throws Exception {
         Store<K, V, T> store = getStore();
         List<Versioned<V>> found = store.get(key, null);
         assertEquals("Found non-existent key: " + found, 0, found.size());
-        assertTrue("Delete of non-existent key succeeded.", !store.delete(key, getClock(1,
-                                                                                        1,
-                                                                                        2,
-                                                                                        2,
-                                                                                        3,
-                                                                                        3)));
+        assertTrue("Delete of non-existent key succeeded.",
+                   !store.delete(key, getClock(1, 1, 2, 2, 3, 3)));
     }
 
     private void testObsoletePutFails(String message,
diff --git a/test/unit/voldemort/store/bdb/BdbCachePartitioningTest.java b/test/unit/voldemort/store/bdb/BdbCachePartitioningTest.java
new file mode 100644
index 0000000000..5f4fa0d110
--- /dev/null
+++ b/test/unit/voldemort/store/bdb/BdbCachePartitioningTest.java
@@ -0,0 +1,337 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.bdb;
+
+import java.io.File;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.io.FileDeleteStrategy;
+
+import voldemort.TestUtils;
+import voldemort.server.VoldemortConfig;
+import voldemort.store.StorageInitializationException;
+import voldemort.store.StoreDefinition;
+import voldemort.utils.ByteUtils;
+import voldemort.utils.Props;
+import voldemort.versioning.Versioned;
+
+import com.sleepycat.je.Environment;
+import com.sleepycat.je.EnvironmentStats;
+import com.sleepycat.je.StatsConfig;
+
+/**
+ * checks that BDB cache partitioning works and caches stay within limits
+ * 
+ */
+public class BdbCachePartitioningTest extends TestCase {
+
+    private File bdbMasterDir;
+    private BdbStorageConfiguration bdbStorage;
+
+    @Override
+    protected void setUp() throws Exception {
+        super.setUp();
+        bdbMasterDir = TestUtils.createTempDir();
+        FileDeleteStrategy.FORCE.delete(bdbMasterDir);
+    }
+
+    @Override
+    protected void tearDown() throws Exception {
+        super.tearDown();
+        try {
+            if(bdbStorage != null)
+                bdbStorage.close();
+        } finally {
+            FileDeleteStrategy.FORCE.delete(bdbMasterDir);
+        }
+    }
+
+    private EnvironmentStats getStats(Environment environment) {
+        StatsConfig config = new StatsConfig();
+        config.setFast(true);
+        return environment.getStats(config);
+    }
+
+    private long getAndCheckCacheSize(BdbStorageEngine engine, StoreDefinition storeDef, String key) {
+        engine.get(TestUtils.toByteArray(key), null);
+        return getStats(bdbStorage.getEnvironment(storeDef)).getCacheTotalBytes();
+    }
+
+    private long getCacheSize(StoreDefinition storeDef) {
+        return getStats(bdbStorage.getEnvironment(storeDef)).getCacheTotalBytes();
+    }
+
+    /**
+     * Tests that, given no data completely fits in memory (realistic prod
+     * conditions), stores will stay within their limits, no matter how much
+     * disproportinate traffic you throw at it
+     */
+    public void testStaticPrivateCaches() {
+
+        int totalCache = 20 * ByteUtils.BYTES_PER_MB; // total cache size
+        int shareA = 10 * ByteUtils.BYTES_PER_MB;// A reserves 10MB
+        int shareB = 5 * ByteUtils.BYTES_PER_MB;// B reserves 5MB
+        int shareC = totalCache - shareA - shareB; // the rest, 5 MB
+        int numRecords = 40;
+
+        // lets use all the default values.
+        Props props = new Props();
+        props.put("node.id", 1);
+        props.put("voldemort.home", "test/common/voldemort/config");
+        VoldemortConfig voldemortConfig = new VoldemortConfig(props);
+        voldemortConfig.setBdbCacheSize(totalCache);
+        voldemortConfig.setBdbOneEnvPerStore(true);
+        voldemortConfig.setBdbDataDirectory(bdbMasterDir.toURI().getPath());
+
+        bdbStorage = new BdbStorageConfiguration(voldemortConfig);
+        StoreDefinition defA = TestUtils.makeStoreDefinition("storeA", shareA
+                                                                       / (ByteUtils.BYTES_PER_MB));
+        BdbStorageEngine storeA = (BdbStorageEngine) bdbStorage.getStore(defA);
+
+        StoreDefinition defB = TestUtils.makeStoreDefinition("storeB", shareB
+                                                                       / (ByteUtils.BYTES_PER_MB));
+        BdbStorageEngine storeB = (BdbStorageEngine) bdbStorage.getStore(defB);
+
+        StoreDefinition defC = TestUtils.makeStoreDefinition("storeC");
+        BdbStorageEngine storeC = (BdbStorageEngine) bdbStorage.getStore(defC);
+
+        // before any traffic, the cache will not have grown
+        assertTrue(Math.abs(shareA - getCacheSize(defA)) > ByteUtils.BYTES_PER_MB);
+        assertTrue(Math.abs(shareB - getCacheSize(defB)) > ByteUtils.BYTES_PER_MB);
+
+        // sharedCacheSize reading 0 confirms that the store has a private cache
+        assertEquals(0, getStats(bdbStorage.getEnvironment(defA)).getSharedCacheTotalBytes());
+        assertEquals(0, getStats(bdbStorage.getEnvironment(defB)).getSharedCacheTotalBytes());
+
+        // load data into the stores; each store is guaranteed to be ~ 40MB.
+        // Data won't fit in memory
+        byte[] value = new byte[ByteUtils.BYTES_PER_MB];
+        for(int i = 0; i < numRecords; i++) {
+            storeA.put(TestUtils.toByteArray("testKey" + i), new Versioned<byte[]>(value), null);
+            storeB.put(TestUtils.toByteArray("testKey" + i), new Versioned<byte[]>(value), null);
+            storeC.put(TestUtils.toByteArray("testKey" + i), new Versioned<byte[]>(value), null);
+        }
+
+        // we will bring all of that data into the cache, by doing a keywalk.
+        // This should expand the cache as much as possible
+        long cacheSizeA = Long.MIN_VALUE;
+        long cacheSizeB = Long.MIN_VALUE;
+        long cacheSizeC = Long.MIN_VALUE;
+
+        for(int cycle = 0; cycle < 10; cycle++) {
+            for(int i = 0; i < numRecords; i++) {
+                long cycleCacheSizeA = getAndCheckCacheSize(storeA, defA, "testKey" + i);
+                long cycleCacheSizeB = getAndCheckCacheSize(storeB, defB, "testKey" + i);
+                long cycleCacheSizeC = getAndCheckCacheSize(storeC, defC, "testKey" + i);
+                // record the maximum cache size, each store every grew to
+                cacheSizeA = (cycleCacheSizeA > cacheSizeA) ? cycleCacheSizeA : cacheSizeA;
+                cacheSizeB = (cycleCacheSizeB > cacheSizeB) ? cycleCacheSizeB : cacheSizeB;
+                cacheSizeC = (cycleCacheSizeC > cacheSizeC) ? cycleCacheSizeC : cacheSizeC;
+            }
+        }
+
+        // check that they are certainly less than expected limits.
+        assertTrue(cacheSizeA <= shareA);
+        assertTrue(cacheSizeB <= shareB);
+        assertTrue(cacheSizeC <= shareC);
+
+        // check that they are not exceedingly high than their limits. Small
+        // overflows are okay. But should not be more than a 1MB
+        assertTrue(Math.abs(cacheSizeA - shareA) <= ByteUtils.BYTES_PER_MB);
+        assertTrue(Math.abs(cacheSizeB - shareB) <= ByteUtils.BYTES_PER_MB);
+        assertTrue(Math.abs(cacheSizeC - shareC) <= ByteUtils.BYTES_PER_MB);
+
+        // try doing reads on store C alone, for which we have no reservations.
+        // This simulates a spike on one store
+        long cacheSizeCNow = Long.MIN_VALUE;
+        for(int cycle = 0; cycle < 10; cycle++) {
+            for(int i = 0; i < numRecords; i++) {
+                long cycleCacheSizeCNow = getAndCheckCacheSize(storeC, defC, "testkey" + i);
+                // record the maximum cache size, each store grew to
+                cacheSizeCNow = (cycleCacheSizeCNow > cacheSizeCNow) ? cycleCacheSizeCNow
+                                                                    : cacheSizeCNow;
+            }
+        }
+
+        assertTrue(cacheSizeCNow <= shareC);
+
+        storeA.close();
+        storeB.close();
+        storeC.close();
+    }
+
+    /**
+     * Tests that any reservation that will not violate minimum shared cache
+     * will fail, during server startup and dynamic updation
+     */
+    public void testMinimumSharedCache() {
+        int totalCache = 20 * ByteUtils.BYTES_PER_MB; // total cache size
+        int shareA = 10 * ByteUtils.BYTES_PER_MB;// A reserves 10MB
+
+        // lets use all the default values.
+        Props props = new Props();
+        props.put("node.id", 1);
+        props.put("voldemort.home", "test/common/voldemort/config");
+        VoldemortConfig voldemortConfig = new VoldemortConfig(props);
+        voldemortConfig.setBdbCacheSize(totalCache);
+        voldemortConfig.setBdbOneEnvPerStore(true);
+        voldemortConfig.setBdbDataDirectory(bdbMasterDir.toURI().getPath());
+        voldemortConfig.setBdbMinimumSharedCache(15 * ByteUtils.BYTES_PER_MB);
+
+        BdbStorageEngine storeA = null;
+        bdbStorage = new BdbStorageConfiguration(voldemortConfig);
+        assertEquals(0, bdbStorage.getReservedCacheSize());
+
+        try {
+            StoreDefinition defA = TestUtils.makeStoreDefinition("storeA", shareA
+                                                                           / ByteUtils.BYTES_PER_MB);
+            storeA = (BdbStorageEngine) bdbStorage.getStore(defA);
+            fail("Should have thrown exception since minSharedCache will be violated");
+        } catch(StorageInitializationException sie) {
+            // should come here.
+        }
+        // failing operations should not alter reserved cache size
+        assertEquals(0, bdbStorage.getReservedCacheSize());
+
+        voldemortConfig.setBdbMinimumSharedCache(10 * ByteUtils.BYTES_PER_MB);
+        bdbStorage = new BdbStorageConfiguration(voldemortConfig);
+        try {
+            StoreDefinition defA = TestUtils.makeStoreDefinition("storeA", shareA
+                                                                           / ByteUtils.BYTES_PER_MB);
+            storeA = (BdbStorageEngine) bdbStorage.getStore(defA);
+        } catch(StorageInitializationException sie) {
+            // should not come here.
+            fail("minSharedCache should n't have been violated");
+        }
+        assertEquals(shareA, bdbStorage.getReservedCacheSize());
+
+        long reserveCacheSize = bdbStorage.getReservedCacheSize();
+        // now, try increasing the reservation dynamically and it should fail
+        try {
+            StoreDefinition defA = TestUtils.makeStoreDefinition("storeA", 15);
+            bdbStorage.update(defA);
+            fail("Should have thrown exception since minSharedCache will be violated");
+        } catch(StorageInitializationException sie) {
+            // should come here.
+        }
+        // this failure cannot alter the reservedCacheSize
+        assertEquals(reserveCacheSize, bdbStorage.getReservedCacheSize());
+
+        if(storeA != null)
+            storeA.close();
+    }
+
+    public void testDynamicReservations() {
+        int totalCache = 20 * ByteUtils.BYTES_PER_MB; // total cache size
+        int shareA = 10 * ByteUtils.BYTES_PER_MB;// A reserves 10MB
+        int shareB = totalCache - shareA;
+        int numRecords = 40;
+
+        // lets use all the default values.
+        Props props = new Props();
+        props.put("node.id", 1);
+        props.put("voldemort.home", "test/common/voldemort/config");
+        VoldemortConfig voldemortConfig = new VoldemortConfig(props);
+        voldemortConfig.setBdbCacheSize(totalCache);
+        voldemortConfig.setBdbOneEnvPerStore(true);
+        voldemortConfig.setBdbDataDirectory(bdbMasterDir.toURI().getPath());
+        voldemortConfig.setBdbMinimumSharedCache(5 * ByteUtils.BYTES_PER_MB);
+
+        bdbStorage = new BdbStorageConfiguration(voldemortConfig);
+        StoreDefinition defA = TestUtils.makeStoreDefinition("storeA", shareA / (1024 * 1024));
+        BdbStorageEngine storeA = (BdbStorageEngine) bdbStorage.getStore(defA);
+
+        StoreDefinition defB = TestUtils.makeStoreDefinition("storeB");
+        BdbStorageEngine storeB = (BdbStorageEngine) bdbStorage.getStore(defB);
+
+        // load data into the stores; each store is guaranteed to be ~ 40MB.
+        // Data won't fit in memory
+        byte[] value = new byte[ByteUtils.BYTES_PER_MB];
+        for(int i = 0; i < numRecords; i++) {
+            storeA.put(TestUtils.toByteArray("testKey" + i), new Versioned<byte[]>(value), null);
+            storeB.put(TestUtils.toByteArray("testKey" + i), new Versioned<byte[]>(value), null);
+        }
+
+        // 1. start with 10MB reserved cache for A and the rest 10MB for B
+        long cacheSizeA = Long.MIN_VALUE;
+        long cacheSizeB = Long.MIN_VALUE;
+
+        for(int cycle = 0; cycle < 10; cycle++) {
+            for(int i = 0; i < numRecords; i++) {
+                long cycleCacheSizeA = getAndCheckCacheSize(storeA, defA, "testKey" + i);
+                long cycleCacheSizeB = getAndCheckCacheSize(storeB, defB, "testKey" + i);
+                // record the maximum cache size, each store every grew to
+                cacheSizeA = (cycleCacheSizeA > cacheSizeA) ? cycleCacheSizeA : cacheSizeA;
+                cacheSizeB = (cycleCacheSizeB > cacheSizeB) ? cycleCacheSizeB : cacheSizeB;
+            }
+        }
+
+        assertTrue(Math.abs(cacheSizeA - shareA) <= ByteUtils.BYTES_PER_MB);
+        assertTrue(Math.abs(cacheSizeB - shareB) <= ByteUtils.BYTES_PER_MB);
+
+        // 2. dynamically grow the cache to 15MB and watch B shrink.
+        shareA = 15 * ByteUtils.BYTES_PER_MB;
+        shareB = totalCache - shareA;
+        defA = TestUtils.makeStoreDefinition("storeA", shareA / (1024 * 1024));
+        bdbStorage.update(defA);
+
+        cacheSizeA = Long.MIN_VALUE;
+        cacheSizeB = Long.MIN_VALUE;
+
+        for(int cycle = 0; cycle < 10; cycle++) {
+            for(int i = 0; i < numRecords; i++) {
+                long cycleCacheSizeA = getAndCheckCacheSize(storeA, defA, "testKey" + i);
+                long cycleCacheSizeB = getAndCheckCacheSize(storeB, defB, "testKey" + i);
+                // record the maximum cache size, each store every grew to
+                cacheSizeA = (cycleCacheSizeA > cacheSizeA) ? cycleCacheSizeA : cacheSizeA;
+                cacheSizeB = (cycleCacheSizeB > cacheSizeB) ? cycleCacheSizeB : cacheSizeB;
+            }
+        }
+
+        assertTrue(Math.abs(cacheSizeA - shareA) <= ByteUtils.BYTES_PER_MB);
+        assertTrue(Math.abs(cacheSizeB - shareB) <= ByteUtils.BYTES_PER_MB);
+
+        // 3. dynamically shrink it back to 10MB and watch B expand again.
+        shareA = 10 * ByteUtils.BYTES_PER_MB;
+        shareB = totalCache - shareA;
+        defA = TestUtils.makeStoreDefinition("storeA", shareA / (1024 * 1024));
+        bdbStorage.update(defA);
+
+        cacheSizeA = Long.MIN_VALUE;
+        cacheSizeB = Long.MIN_VALUE;
+
+        for(int cycle = 0; cycle < 10; cycle++) {
+            for(int i = 0; i < numRecords; i++) {
+                long cycleCacheSizeA = getAndCheckCacheSize(storeA, defA, "testKey" + i);
+                long cycleCacheSizeB = getAndCheckCacheSize(storeB, defB, "testKey" + i);
+                // record the maximum cache size, each store every grew to
+                cacheSizeA = (cycleCacheSizeA > cacheSizeA) ? cycleCacheSizeA : cacheSizeA;
+                cacheSizeB = (cycleCacheSizeB > cacheSizeB) ? cycleCacheSizeB : cacheSizeB;
+            }
+        }
+
+        // check that they are not exceedingly high than their limits. Small
+        // overflows are expected. But should not be more than a 1MB
+        assertTrue(Math.abs(cacheSizeA - shareA) <= ByteUtils.BYTES_PER_MB);
+        assertTrue(Math.abs(cacheSizeB - shareB) <= ByteUtils.BYTES_PER_MB);
+
+        storeA.close();
+        storeB.close();
+    }
+
+}
diff --git a/test/unit/voldemort/store/bdb/BdbSplitStorageEngineTest.java b/test/unit/voldemort/store/bdb/BdbSplitStorageEngineTest.java
index ffce2d815b..3a68734397 100644
--- a/test/unit/voldemort/store/bdb/BdbSplitStorageEngineTest.java
+++ b/test/unit/voldemort/store/bdb/BdbSplitStorageEngineTest.java
@@ -30,10 +30,10 @@
 import com.sleepycat.je.Database;
 import com.sleepycat.je.DatabaseConfig;
 import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.Durability;
 import com.sleepycat.je.Environment;
 import com.sleepycat.je.EnvironmentConfig;
 import com.sleepycat.je.EnvironmentStats;
-import com.sleepycat.je.LockMode;
 import com.sleepycat.je.StatsConfig;
 
 /**
@@ -79,8 +79,8 @@ public void testNoMultipleEnvironment() {
         voldemortConfig.setBdbOneEnvPerStore(false);
 
         bdbStorage = new BdbStorageConfiguration(voldemortConfig);
-        BdbStorageEngine storeA = (BdbStorageEngine) bdbStorage.getStore("storeA");
-        BdbStorageEngine storeB = (BdbStorageEngine) bdbStorage.getStore("storeB");
+        BdbStorageEngine storeA = (BdbStorageEngine) bdbStorage.getStore(TestUtils.makeStoreDefinition("storeA"));
+        BdbStorageEngine storeB = (BdbStorageEngine) bdbStorage.getStore(TestUtils.makeStoreDefinition("storeB"));
 
         storeA.put(TestUtils.toByteArray("testKey1"),
                    new Versioned<byte[]>("value".getBytes()),
@@ -124,8 +124,8 @@ public void testMultipleEnvironment() {
         voldemortConfig.setBdbDataDirectory(bdbMasterDir.toURI().getPath());
 
         bdbStorage = new BdbStorageConfiguration(voldemortConfig);
-        BdbStorageEngine storeA = (BdbStorageEngine) bdbStorage.getStore("storeA");
-        BdbStorageEngine storeB = (BdbStorageEngine) bdbStorage.getStore("storeB");
+        BdbStorageEngine storeA = (BdbStorageEngine) bdbStorage.getStore(TestUtils.makeStoreDefinition("storeA"));
+        BdbStorageEngine storeB = (BdbStorageEngine) bdbStorage.getStore(TestUtils.makeStoreDefinition("storeB"));
 
         storeA.put(TestUtils.toByteArray("testKey1"),
                    new Versioned<byte[]>("value".getBytes()),
@@ -159,7 +159,7 @@ public void testMultipleEnvironment() {
     public void testUnsharedCache() throws DatabaseException {
         EnvironmentConfig environmentConfig = new EnvironmentConfig();
         environmentConfig = new EnvironmentConfig();
-        environmentConfig.setTxnNoSync(true);
+        environmentConfig.setDurability(Durability.COMMIT_NO_SYNC);
         environmentConfig.setAllowCreate(true);
         environmentConfig.setTransactional(true);
         environmentConfig.setSharedCache(false);
@@ -178,7 +178,7 @@ public void testUnsharedCache() throws DatabaseException {
 
     public void testSharedCache() throws DatabaseException {
         EnvironmentConfig environmentConfig = new EnvironmentConfig();
-        environmentConfig.setTxnNoSync(true);
+        environmentConfig.setDurability(Durability.COMMIT_NO_SYNC);
         environmentConfig.setAllowCreate(true);
         environmentConfig.setTransactional(true);
         environmentConfig.setSharedCache(true);
@@ -201,7 +201,10 @@ private long getMaxCacheUsage(EnvironmentConfig environmentConfig, DatabaseConfi
         }
         Environment environmentA = new Environment(dirA, environmentConfig);
         Database databaseA = environmentA.openDatabase(null, "storeA", databaseConfig);
-        BdbStorageEngine storeA = new BdbStorageEngine("storeA", environmentA, databaseA, new BdbRuntimeConfig());
+        BdbStorageEngine storeA = new BdbStorageEngine("storeA",
+                                                       environmentA,
+                                                       databaseA,
+                                                       new BdbRuntimeConfig());
 
         File dirB = new File(bdbMasterDir + "/" + "storeB");
         if(!dirB.exists()) {
@@ -209,7 +212,10 @@ private long getMaxCacheUsage(EnvironmentConfig environmentConfig, DatabaseConfi
         }
         Environment environmentB = new Environment(dirB, environmentConfig);
         Database databaseB = environmentB.openDatabase(null, "storeB", databaseConfig);
-        BdbStorageEngine storeB = new BdbStorageEngine("storeB", environmentB, databaseB, new BdbRuntimeConfig());
+        BdbStorageEngine storeB = new BdbStorageEngine("storeB",
+                                                       environmentB,
+                                                       databaseB,
+                                                       new BdbRuntimeConfig());
 
         long maxCacheUsage = 0;
         for(int i = 0; i <= 4; i++) {
diff --git a/test/unit/voldemort/store/bdb/BdbStorageEngineTest.java b/test/unit/voldemort/store/bdb/BdbStorageEngineTest.java
index 1fbb7a3d55..d0289282fc 100644
--- a/test/unit/voldemort/store/bdb/BdbStorageEngineTest.java
+++ b/test/unit/voldemort/store/bdb/BdbStorageEngineTest.java
@@ -42,6 +42,7 @@
 
 import com.sleepycat.je.Database;
 import com.sleepycat.je.DatabaseConfig;
+import com.sleepycat.je.Durability;
 import com.sleepycat.je.Environment;
 import com.sleepycat.je.EnvironmentConfig;
 import com.sleepycat.je.LockMode;
@@ -62,7 +63,7 @@ public class BdbStorageEngineTest extends AbstractStorageEngineTest {
     protected void setUp() throws Exception {
         super.setUp();
         this.envConfig = new EnvironmentConfig();
-        this.envConfig.setTxnNoSync(true);
+        this.envConfig.setDurability(Durability.COMMIT_NO_SYNC);
         this.envConfig.setAllowCreate(true);
         this.envConfig.setTransactional(true);
         this.tempDir = TestUtils.createTempDir();
diff --git a/test/unit/voldemort/store/configuration/FileBackedCachingStorageEngineTest.java b/test/unit/voldemort/store/configuration/FileBackedCachingStorageEngineTest.java
new file mode 100644
index 0000000000..27eff256dc
--- /dev/null
+++ b/test/unit/voldemort/store/configuration/FileBackedCachingStorageEngineTest.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.configuration;
+
+import static voldemort.TestUtils.getClock;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.io.FileDeleteStrategy;
+import org.junit.Test;
+
+import voldemort.TestUtils;
+import voldemort.VoldemortException;
+import voldemort.store.AbstractStoreTest;
+import voldemort.store.Store;
+import voldemort.utils.ByteArray;
+import voldemort.versioning.VectorClock;
+import voldemort.versioning.Versioned;
+
+/**
+ * A testclass for verifying the FileBackedCachingStorageEngine
+ * 
+ * @author csoman
+ * 
+ */
+public class FileBackedCachingStorageEngineTest extends
+        AbstractStoreTest<ByteArray, byte[], byte[]> {
+
+    private File tempDir;
+
+    @Override
+    public void setUp() throws Exception {
+        super.setUp();
+        if(null != tempDir && tempDir.exists())
+            FileDeleteStrategy.FORCE.delete(tempDir);
+    }
+
+    @Override
+    public void tearDown() throws Exception {
+        super.tearDown();
+        if(null != tempDir && tempDir.exists())
+            FileDeleteStrategy.FORCE.delete(tempDir);
+    }
+
+    /*
+     * Calling getStrings to make it readable (easier debugging)
+     */
+    @Override
+    public List<ByteArray> getKeys(int numKeys) {
+        List<String> keyList = getStrings(numKeys, 10);
+        List<ByteArray> byteArrayKeyList = new ArrayList<ByteArray>();
+        for(String s: keyList) {
+            byteArrayKeyList.add(new ByteArray(s.getBytes()));
+        }
+        return byteArrayKeyList;
+    }
+
+    @Override
+    public Store<ByteArray, byte[], byte[]> getStore() {
+        if(null == tempDir || !tempDir.exists()) {
+            tempDir = TestUtils.createTempDir();
+        }
+        return new FileBackedCachingStorageEngine("file-backed-test", tempDir.getAbsolutePath());
+    }
+
+    @Override
+    protected boolean allowConcurrentOperations() {
+        return false;
+    }
+
+    @Override
+    protected boolean valuesEqual(byte[] t1, byte[] t2) {
+        return Arrays.equals(t1, t2);
+    }
+
+    @Override
+    public List<byte[]> getValues(int numValues) {
+        List<String> keyList = getStrings(numValues, 10);
+        List<byte[]> byteArrayKeyList = new ArrayList<byte[]>();
+        for(String s: keyList) {
+            byteArrayKeyList.add(s.getBytes());
+        }
+        return byteArrayKeyList;
+    }
+
+    @Override
+    public void testDelete() {
+        ByteArray key = getKey();
+        Store<ByteArray, byte[], byte[]> store = getStore();
+        VectorClock c1 = getClock(1, 1);
+        byte[] value = getValue();
+
+        // can't delete something that isn't there
+        assertTrue(!store.delete(key, c1));
+
+        store.put(key, new Versioned<byte[]>(value, c1), null);
+        assertEquals(1, store.get(key, null).size());
+
+        // now delete that version too
+        assertTrue("Delete failed!", store.delete(key, c1));
+        assertEquals(0, store.get(key, null).size());
+    }
+
+    @Override
+    @Test
+    public void testGetAll() throws Exception {
+        Store<ByteArray, byte[], byte[]> store = getStore();
+        int putCount = 10;
+        List<ByteArray> keys = getKeys(putCount);
+        List<byte[]> values = getValues(putCount);
+        assertEquals(putCount, values.size());
+        VectorClock clock = new VectorClock();
+        for(int i = 0; i < putCount; i++) {
+            store.put(keys.get(i), new Versioned<byte[]>(values.get(i), clock), null);
+            clock = clock.incremented(0, System.currentTimeMillis());
+        }
+
+        int countForGet = putCount / 2;
+        List<ByteArray> keysForGet = keys.subList(0, countForGet);
+        List<byte[]> valuesForGet = values.subList(0, countForGet);
+        Map<ByteArray, List<Versioned<byte[]>>> result = store.getAll(keysForGet, null);
+        assertEquals(countForGet, result.size());
+        for(int i = 0; i < keysForGet.size(); ++i) {
+            ByteArray key = keysForGet.get(i);
+            byte[] expectedValue = valuesForGet.get(i);
+            List<Versioned<byte[]>> versioneds = result.get(key);
+            assertGetAllValues(expectedValue, versioneds);
+        }
+    }
+
+    @Test
+    public void testConcurrentWriteFailure() {
+        ByteArray key = getKey();
+        Store<ByteArray, byte[], byte[]> store = getStore();
+        VectorClock c1 = getClock(1, 1);
+        VectorClock c2 = getClock(1, 2);
+        byte[] value = getValue();
+
+        // put two conflicting versions, then delete one
+        Versioned<byte[]> v1 = new Versioned<byte[]>(value, c1);
+        Versioned<byte[]> v2 = new Versioned<byte[]>(value, c2);
+        store.put(key, v1, null);
+        try {
+            store.put(key, v2, null);
+            fail("Concurrent write succeeded in FileBackedCachingStorageEngine. Should not be allowed.");
+        } catch(VoldemortException ve) {
+            // This is OK
+        }
+    }
+
+}
diff --git a/test/unit/voldemort/store/memory/CacheStorageEngineTest.java b/test/unit/voldemort/store/memory/CacheStorageEngineTest.java
index ae27c99a64..d8bf271ede 100644
--- a/test/unit/voldemort/store/memory/CacheStorageEngineTest.java
+++ b/test/unit/voldemort/store/memory/CacheStorageEngineTest.java
@@ -41,7 +41,7 @@ public void setUp() throws Exception {
 
     @Override
     public StorageEngine<ByteArray, byte[], byte[]> getStorageEngine() {
-        return new CacheStorageConfiguration().getStore("test");
+        return new CacheStorageConfiguration().getStore(TestUtils.makeStoreDefinition("test"));
     }
 
     public void testNoPressureBehavior() {
diff --git a/test/unit/voldemort/store/memory/SlowStorageEngineTest.java b/test/unit/voldemort/store/memory/SlowStorageEngineTest.java
new file mode 100644
index 0000000000..8a9f523ffd
--- /dev/null
+++ b/test/unit/voldemort/store/memory/SlowStorageEngineTest.java
@@ -0,0 +1,289 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.memory;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.log4j.Logger;
+
+import voldemort.TestUtils;
+import voldemort.common.OpTimeMap;
+import voldemort.common.VoldemortOpCode;
+import voldemort.store.AbstractStorageEngineTest;
+import voldemort.store.StorageEngine;
+import voldemort.store.slow.SlowStorageEngine;
+import voldemort.utils.ByteArray;
+import voldemort.utils.ByteUtils;
+import voldemort.utils.pool.KeyedResourcePool;
+import voldemort.versioning.ObsoleteVersionException;
+import voldemort.versioning.VectorClock;
+import voldemort.versioning.Versioned;
+
+public class SlowStorageEngineTest extends AbstractStorageEngineTest {
+
+    private static final Logger logger = Logger.getLogger(KeyedResourcePool.class.getName());
+
+    private StorageEngine<ByteArray, byte[], byte[]> store;
+    private final List<Byte> opList;
+
+    public SlowStorageEngineTest() {
+        opList = new ArrayList<Byte>();
+        opList.add(VoldemortOpCode.GET_OP_CODE);
+        opList.add(VoldemortOpCode.GET_VERSION_OP_CODE);
+        opList.add(VoldemortOpCode.GET_ALL_OP_CODE);
+        opList.add(VoldemortOpCode.PUT_OP_CODE);
+        opList.add(VoldemortOpCode.DELETE_OP_CODE);
+    }
+
+    @Override
+    public StorageEngine<ByteArray, byte[], byte[]> getStorageEngine() {
+        return store;
+    }
+
+    @Override
+    public void setUp() throws Exception {
+        super.setUp();
+        // Do not change the magic constants in the next two constructors! The
+        // unit tests assert on specific delays occurring.
+        OpTimeMap queued = new OpTimeMap(10, 20, 30, 40, 50);
+        OpTimeMap concurrent = new OpTimeMap(50, 40, 30, 20, 10);
+        this.store = new SlowStorageEngine<ByteArray, byte[], byte[]>(new InMemoryStorageEngine<ByteArray, byte[], byte[]>("test"),
+                                                                      queued,
+                                                                      concurrent);
+    }
+
+    @Override
+    public List<ByteArray> getKeys(int numKeys) {
+        List<ByteArray> keys = new ArrayList<ByteArray>(numKeys);
+        for(int i = 0; i < numKeys; i++)
+            keys.add(new ByteArray(TestUtils.randomBytes(10)));
+        return keys;
+    }
+
+    private String getOpName(Byte opCode) {
+        switch(opCode) {
+            case VoldemortOpCode.GET_OP_CODE:
+                return "Get";
+            case VoldemortOpCode.GET_VERSION_OP_CODE:
+                return "GetVersion";
+            case VoldemortOpCode.GET_ALL_OP_CODE:
+                return "GetAll";
+            case VoldemortOpCode.DELETE_OP_CODE:
+                return "Delete";
+            case VoldemortOpCode.PUT_OP_CODE:
+                return "Put";
+            default:
+                logger.error("getOpName invoked with bad operation code: " + opCode);
+        }
+        return null;
+    }
+
+    public class OpInvoker implements Runnable {
+
+        private final CountDownLatch signal;
+        private final byte opCode;
+
+        private ConcurrentLinkedQueue<Long> runTimes;
+
+        private final ByteArray key;
+        private final byte[] value;
+
+        OpInvoker(CountDownLatch signal, byte opCode, ConcurrentLinkedQueue<Long> runTimes) {
+            this.signal = signal;
+            this.opCode = opCode;
+            this.runTimes = runTimes;
+            this.key = new ByteArray(ByteUtils.getBytes("key", "UTF-8"));
+            this.value = ByteUtils.getBytes("value", "UTF-8");
+            logger.debug("OpInvoker created for operation " + getOpName(this.opCode) + "(Thread: "
+                         + Thread.currentThread().getName() + ")");
+        }
+
+        private void doGet() {
+            store.get(key, null);
+        }
+
+        private void doGetAll() {
+            List<ByteArray> keys = new ArrayList<ByteArray>();
+            keys.add(key);
+            store.getAll(keys, null);
+        }
+
+        private void doGetVersion() {
+            store.getVersions(key);
+        }
+
+        private void doPut() {
+            try {
+                store.put(key, new Versioned<byte[]>(value), null);
+            } catch(ObsoleteVersionException e) {
+                // This exception is expected in some tests.
+            }
+        }
+
+        private void doDelete() {
+            store.delete(key, new VectorClock());
+        }
+
+        public void run() {
+            long startTimeNs = System.nanoTime();
+
+            switch(this.opCode) {
+                case VoldemortOpCode.GET_OP_CODE:
+                    doGet();
+                    break;
+                case VoldemortOpCode.GET_VERSION_OP_CODE:
+                    doGetVersion();
+                    break;
+                case VoldemortOpCode.GET_ALL_OP_CODE:
+                    doGetAll();
+                    break;
+                case VoldemortOpCode.PUT_OP_CODE:
+                    doPut();
+                    break;
+                case VoldemortOpCode.DELETE_OP_CODE:
+                    doDelete();
+                    break;
+                default:
+                    logger.error("OpInvoker issued with bad operation code: " + this.opCode);
+            }
+            long runTimeMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs);
+
+            runTimes.add(runTimeMs);
+            logger.debug("OpInvoker finished operation " + getOpName(this.opCode) + "(Thread: "
+                         + Thread.currentThread().getName() + ")");
+            signal.countDown();
+        }
+    }
+
+    // true if runtime is not within a "reasonable" range. Reasonable
+    // defined by a 10% fudge factor.
+    private boolean isRunTimeBad(long runTimeMs, long expectedTimeMs) {
+        if((runTimeMs < (expectedTimeMs * 0.9) || runTimeMs > (expectedTimeMs * 1.1))) {
+            return true;
+        }
+        return false;
+    }
+
+    /**
+     * Test the time of each op type individually.
+     */
+    public void testEachOpTypeIndividually() {
+        // Magic constant 60 ms is based on operation times defined above.
+        long expectedMs = 60;
+
+        // Magic constants 50 and 10 below allow us to make sure a tight timing
+        // test passes 80% of the time.
+        int numOps = 50;
+        int numOpsWithBadTimesOK = 10;
+        for(byte op: opList) {
+            int badTimesCounter = 0;
+            for(int i = 0; i < numOps; ++i) {
+                CountDownLatch waitForOp = new CountDownLatch(1);
+                ConcurrentLinkedQueue<Long> runTimes = new ConcurrentLinkedQueue<Long>();
+                new Thread(new OpInvoker(waitForOp, op, runTimes)).start();
+                try {
+                    waitForOp.await();
+                } catch(InterruptedException e) {
+                    e.printStackTrace();
+                }
+
+                long runTimeMs = runTimes.poll();
+                assertTrue(runTimes.isEmpty());
+
+                if(isRunTimeBad(runTimeMs, expectedMs)) {
+                    System.err.println("Bad run time (some are expected): " + getOpName(op)
+                                       + ", runTimeMs: " + runTimeMs + ", expectedMs: "
+                                       + expectedMs + ")");
+                    badTimesCounter++;
+                }
+
+            }
+            assertFalse("Too many bad times for operation " + getOpName(op),
+                        badTimesCounter > numOpsWithBadTimesOK);
+        }
+    }
+
+    /**
+     * Test repeated operations.
+     */
+    public void testEachOpTypeRepeated() {
+        // Magic number '2': Run once to warm up, then tests timing asserts.
+        for(int j = 0; j < 2; j++) {
+            // Magic constant 1 means we can have one op report a bad (tight)
+            // timing result
+            int numOpsWithBadTimesAllowed = 1;
+            int numOpsWithBadTimes = 0;
+
+            for(byte op: opList) {
+                ConcurrentLinkedQueue<Long> runTimes = new ConcurrentLinkedQueue<Long>();
+                CountDownLatch waitForOps = new CountDownLatch(5 + 1);
+                for(int i = 0; i < 5; ++i) {
+                    new Thread(new OpInvoker(waitForOps, op, runTimes)).start();
+                }
+
+                waitForOps.countDown();
+                try {
+                    waitForOps.await();
+                } catch(InterruptedException e) {
+                    e.printStackTrace();
+                }
+
+                // Test runs after the single warm up run.
+                if(j > 0) {
+                    // Determine what the longest delay should be and test the
+                    // maximum delay against that value. The magic constants
+                    // used to construct the SlowStorageEngine determine the
+                    // longest delay.
+                    Long[] allTimes = runTimes.toArray(new Long[0]);
+                    Arrays.sort(allTimes);
+                    long maxTimeMs = allTimes[4];
+                    long expectedTimeMs = 0;
+                    switch(op) {
+                        case VoldemortOpCode.GET_OP_CODE:
+                            expectedTimeMs = (5 * 10) + 50;
+                            break;
+                        case VoldemortOpCode.GET_VERSION_OP_CODE:
+                            expectedTimeMs = (5 * 50) + 10;
+                            break;
+                        case VoldemortOpCode.GET_ALL_OP_CODE:
+                            expectedTimeMs = (5 * 40) + 20;
+                            break;
+                        case VoldemortOpCode.PUT_OP_CODE:
+                            expectedTimeMs = (5 * 20) + 40;
+                            break;
+                        case VoldemortOpCode.DELETE_OP_CODE:
+                            expectedTimeMs = (5 * 30) + 30;
+                            break;
+                    }
+                    if(isRunTimeBad(maxTimeMs, expectedTimeMs)) {
+                        numOpsWithBadTimes++;
+                        String details = getOpName(op) + ", maxTimeMs: " + maxTimeMs + ", "
+                                         + expectedTimeMs;
+                        System.err.println("Bad run time (some are expected): " + details);
+                    }
+                }
+                assertFalse("Too many operations with bad run times: " + numOpsWithBadTimes,
+                            numOpsWithBadTimes > numOpsWithBadTimesAllowed);
+            }
+        }
+    }
+}
diff --git a/test/unit/voldemort/store/readonly/swapper/StoreSwapperTest.java b/test/unit/voldemort/store/readonly/swapper/StoreSwapperTest.java
index 16b115a67c..aaaaf9c7bf 100644
--- a/test/unit/voldemort/store/readonly/swapper/StoreSwapperTest.java
+++ b/test/unit/voldemort/store/readonly/swapper/StoreSwapperTest.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011 LinkedIn, Inc
+ * Copyright 2011-2012 LinkedIn, Inc
  * 
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
@@ -16,6 +16,10 @@
 
 package voldemort.store.readonly.swapper;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
@@ -25,8 +29,6 @@
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 
-import junit.framework.TestCase;
-
 import org.apache.http.impl.client.DefaultHttpClient;
 import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
 import org.junit.After;
@@ -59,7 +61,7 @@
 /**
 *
  */
-public class StoreSwapperTest extends TestCase {
+public class StoreSwapperTest {
 
     private static int NUM_NODES = 3;
     private static String STORE_NAME = "test";
@@ -71,59 +73,69 @@ public class StoreSwapperTest extends TestCase {
     private VoldemortServer[] servers;
     private Cluster cluster;
     private AdminClient adminClient;
-    private StoreDefinition storeDef;
     private File baseDirs[];
 
-    @Override
-    @Before
-    public void setUp() throws IOException {
-        cluster = ServerTestUtils.getLocalCluster(NUM_NODES);
-        servers = new VoldemortServer[NUM_NODES];
-        baseDirs = new File[NUM_NODES];
-        storeDef = new StoreDefinitionBuilder().setName(STORE_NAME)
-                                               .setType(ReadOnlyStorageConfiguration.TYPE_NAME)
-                                               .setKeySerializer(serializerDef)
-                                               .setValueSerializer(serializerDef)
-                                               .setRoutingPolicy(RoutingTier.SERVER)
-                                               .setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY)
-                                               .setReplicationFactor(2)
-                                               .setPreferredReads(1)
-                                               .setRequiredReads(1)
-                                               .setPreferredWrites(1)
-                                               .setRequiredWrites(1)
-                                               .build();
+    protected String constructStoresXml() throws IOException {
+        StoreDefinition storeDef = new StoreDefinitionBuilder().setName(STORE_NAME)
+                                                               .setType(ReadOnlyStorageConfiguration.TYPE_NAME)
+                                                               .setKeySerializer(serializerDef)
+                                                               .setValueSerializer(serializerDef)
+                                                               .setRoutingPolicy(RoutingTier.SERVER)
+                                                               .setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY)
+                                                               .setReplicationFactor(2)
+                                                               .setPreferredReads(1)
+                                                               .setRequiredReads(1)
+                                                               .setPreferredWrites(1)
+                                                               .setRequiredWrites(1)
+                                                               .build();
 
         File storesXml = new File(TestUtils.createTempDir(), "stores.xml");
-
         StoreDefinitionsMapper storeDefMapper = new StoreDefinitionsMapper();
         FileWriter writer = new FileWriter(storesXml);
         writer.write(storeDefMapper.writeStoreList(Lists.newArrayList(storeDef)));
         writer.close();
 
-        File baseTempDir = TestUtils.createTempDir();
+        return storesXml.getAbsolutePath();
+    }
 
+    @Before
+    public void setUp() throws IOException {
+        String storesXmlFile = constructStoresXml();
+
+        servers = new VoldemortServer[NUM_NODES];
         Properties props = new Properties();
         props.put("readonly.backups", "1");
+        cluster = ServerTestUtils.startVoldemortCluster(NUM_NODES,
+                                                        servers,
+                                                        null,
+                                                        socketStoreFactory,
+                                                        false,
+                                                        null,
+                                                        storesXmlFile,
+                                                        props);
+
+        baseDirs = new File[NUM_NODES];
+        for(int nodeId = 0; nodeId < NUM_NODES; nodeId++) {
+            String baseDir = servers[nodeId].getVoldemortConfig().getDataDirectory();
+            baseDirs[nodeId] = new File(baseDir + "/read-only/" + STORE_NAME);
+        }
+        /*-
         for(int nodeId = 0; nodeId < NUM_NODES; nodeId++) {
-            servers[nodeId] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
-                                                                   ServerTestUtils.createServerConfig(false,
-                                                                                                      nodeId,
-                                                                                                      baseTempDir.getAbsolutePath(),
-                                                                                                      null,
-                                                                                                      storesXml.getAbsolutePath(),
-                                                                                                      props),
-                                                                   cluster);
-            baseDirs[nodeId] = new File(baseTempDir + "/node-" + nodeId + "/data/read-only/"
-                                        + STORE_NAME);
+            System.err.println("nodeId: " + nodeId);
+            System.err.println("  basedir: " + baseDirs[nodeId].getAbsolutePath());
+            System.err.println("  datadir: "
+                               + servers[nodeId].getVoldemortConfig().getDataDirectory());
+            System.err.println("  metadir: "
+                               + servers[nodeId].getVoldemortConfig().getMetadataDirectory());
         }
+         */
 
         adminClient = ServerTestUtils.getAdminClient(cluster);
 
     }
 
-    @Override
     @After
-    public void tearDown() throws IOException, InterruptedException {
+    public void tearDown() throws IOException {
         adminClient.stop();
         for(VoldemortServer server: servers) {
             ServerTestUtils.stopVoldemortServer(server);
diff --git a/test/unit/voldemort/store/rebalancing/RebootstrappingStoreTest.java b/test/unit/voldemort/store/rebalancing/RebootstrappingStoreTest.java
index 4c9cb5ebea..3d65074c88 100644
--- a/test/unit/voldemort/store/rebalancing/RebootstrappingStoreTest.java
+++ b/test/unit/voldemort/store/rebalancing/RebootstrappingStoreTest.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
 package voldemort.store.rebalancing;
 
 import static org.junit.Assert.assertEquals;
@@ -14,7 +29,6 @@
 import org.junit.Test;
 
 import voldemort.ServerTestUtils;
-import voldemort.TestUtils;
 import voldemort.client.ClientConfig;
 import voldemort.client.SocketStoreClientFactory;
 import voldemort.client.StoreClient;
@@ -41,40 +55,40 @@ public class RebootstrappingStoreTest {
     private final static String STORE_NAME = "test";
     private final static String STORES_XML = "test/common/voldemort/config/single-store.xml";
 
-    private final int[][] startCluster = new int[][] { { 0, 1 }, {} };
-
-    private Map<String, String> entries;
     private Cluster cluster;
     private List<VoldemortServer> servers;
     private StoreClient<String, String> storeClient;
 
     @Before
     public void setUp() throws Exception {
-        entries = Maps.newHashMap();
-        entries.put("a", "1");
-        entries.put("b", "2");
-        cluster = ServerTestUtils.getLocalCluster(2, startCluster);
-        servers = Lists.newArrayList();
-        Properties props = new Properties();
         SocketStoreFactory socketStoreFactory = new ClientRequestExecutorPool(2,
                                                                               10000,
                                                                               100000,
                                                                               32 * 1024);
 
-        for(Node node: cluster.getNodes()) {
-            VoldemortConfig config = ServerTestUtils.createServerConfig(false,
-                                                                        node.getId(),
-                                                                        TestUtils.createTempDir()
-                                                                                 .getAbsolutePath(),
-                                                                        null,
-                                                                        STORES_XML,
-                                                                        props);
-            servers.add(ServerTestUtils.startVoldemortServer(socketStoreFactory, config, cluster));
+        int numServers = 2;
+        VoldemortServer[] voldemortServers = new VoldemortServer[numServers];
+        int partitionMap[][] = { { 0, 1 }, {} };
+        cluster = ServerTestUtils.startVoldemortCluster(numServers,
+                                                        voldemortServers,
+                                                        partitionMap,
+                                                        socketStoreFactory,
+                                                        false,
+                                                        null,
+                                                        STORES_XML,
+                                                        new Properties());
+
+        servers = Lists.newArrayList();
+        for(int i = 0; i < numServers; ++i) {
+            servers.add(voldemortServers[i]);
         }
 
         String bootstrapUrl = cluster.getNodeById(0).getSocketUrl().toString();
         storeClient = new SocketStoreClientFactory(new ClientConfig().setBootstrapUrls(bootstrapUrl)).getStoreClient(STORE_NAME);
 
+        Map<String, String> entries = Maps.newHashMap();
+        entries.put("a", "1");
+        entries.put("b", "2");
         for(Map.Entry<String, String> entry: entries.entrySet())
             storeClient.put(entry.getKey(), entry.getValue());
     }
diff --git a/test/unit/voldemort/store/routed/GetallNodeReachTest.java b/test/unit/voldemort/store/routed/GetallNodeReachTest.java
new file mode 100644
index 0000000000..39d28f43a0
--- /dev/null
+++ b/test/unit/voldemort/store/routed/GetallNodeReachTest.java
@@ -0,0 +1,290 @@
+package voldemort.store.routed;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static voldemort.VoldemortTestConstants.getEightNodeClusterWithZones;
+import static voldemort.VoldemortTestConstants.getFourNodeClusterWithZones;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Executors;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import voldemort.TestUtils;
+import voldemort.client.RoutingTier;
+import voldemort.client.TimeoutConfig;
+import voldemort.cluster.Cluster;
+import voldemort.cluster.Node;
+import voldemort.cluster.failuredetector.NoopFailureDetector;
+import voldemort.routing.RoutingStrategyType;
+import voldemort.serialization.SerializerDefinition;
+import voldemort.store.Store;
+import voldemort.store.StoreDefinition;
+import voldemort.store.StoreDefinitionBuilder;
+import voldemort.store.memory.InMemoryStorageConfiguration;
+import voldemort.store.memory.InMemoryStorageEngine;
+import voldemort.utils.ByteArray;
+import voldemort.versioning.Versioned;
+
+import com.google.common.collect.Maps;
+
+public class GetallNodeReachTest {
+
+    private Cluster cluster;
+    private StoreDefinition storeDef;
+    private RoutedStore store;
+    Map<Integer, Store<ByteArray, byte[], byte[]>> subStores;
+
+    @Before
+    public void setUp() throws Exception {}
+
+    private void makeStore() {
+        subStores = Maps.newHashMap();
+        for(Node n: cluster.getNodes()) {
+            Store<ByteArray, byte[], byte[]> subStore = new InMemoryStorageEngine<ByteArray, byte[], byte[]>("test");
+            subStores.put(n.getId(), subStore);
+        }
+        RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(true,
+                                                                       Executors.newFixedThreadPool(2),
+                                                                       new TimeoutConfig(1000L,
+                                                                                         false));
+
+        store = routedStoreFactory.create(cluster,
+                                          storeDef,
+                                          subStores,
+                                          true,
+                                          new NoopFailureDetector());
+    }
+
+    @Test
+    public void testGetallTouchOneZone() throws Exception {
+        cluster = getFourNodeClusterWithZones();
+        HashMap<Integer, Integer> zoneReplicationFactor = new HashMap<Integer, Integer>();
+        zoneReplicationFactor.put(0, 2);
+        zoneReplicationFactor.put(1, 1);
+        zoneReplicationFactor.put(2, 1);
+        storeDef = new StoreDefinitionBuilder().setName("test")
+                                               .setType(InMemoryStorageConfiguration.TYPE_NAME)
+                                               .setRoutingPolicy(RoutingTier.CLIENT)
+                                               .setRoutingStrategyType(RoutingStrategyType.ZONE_STRATEGY)
+                                               .setReplicationFactor(4)
+                                               .setZoneReplicationFactor(zoneReplicationFactor)
+                                               .setKeySerializer(new SerializerDefinition("string"))
+                                               .setValueSerializer(new SerializerDefinition("string"))
+                                               .setPreferredReads(2)
+                                               .setRequiredReads(1)
+                                               .setPreferredWrites(1)
+                                               .setRequiredWrites(1)
+                                               .setZoneCountReads(0)
+                                               .setZoneCountWrites(0)
+                                               .build();
+        makeStore();
+        Versioned<byte[]> v = Versioned.value("v".getBytes());
+        subStores.get(0).put(TestUtils.toByteArray("k011_zone0_only"), v, null);
+        subStores.get(1).put(TestUtils.toByteArray("k011_zone0_only"), v, null);
+        subStores.get(2).put(TestUtils.toByteArray("k100_zone1_only"), v, null);
+        /* test single key getall */
+        List<ByteArray> keys011 = new ArrayList<ByteArray>();
+        keys011.add(TestUtils.toByteArray("k011_zone0_only"));
+        List<ByteArray> keys100 = new ArrayList<ByteArray>();
+        keys100.add(TestUtils.toByteArray("k100_zone1_only"));
+        assertEquals(2, store.getAll(keys011, null)
+                             .get(TestUtils.toByteArray("k011_zone0_only"))
+                             .size());
+        assertFalse(store.getAll(keys100, null)
+                         .containsKey(TestUtils.toByteArray("k100_zone1_only")));
+        /* test multiple keys getall */
+        List<ByteArray> keys = new ArrayList<ByteArray>();
+        keys.add(TestUtils.toByteArray("k011_zone0_only"));
+        keys.add(TestUtils.toByteArray("k100_zone1_only"));
+        Map<ByteArray, List<Versioned<byte[]>>> result = store.getAll(keys, null);
+        assertEquals(2, result.get(TestUtils.toByteArray("k011_zone0_only")).size());
+        assertFalse(result.containsKey(TestUtils.toByteArray("k100_zone1_only")));
+    }
+
+    @Test
+    public void testGetall_211() throws Exception {
+        cluster = getFourNodeClusterWithZones();
+        HashMap<Integer, Integer> zoneReplicationFactor = new HashMap<Integer, Integer>();
+        zoneReplicationFactor.put(0, 2);
+        zoneReplicationFactor.put(1, 1);
+        zoneReplicationFactor.put(2, 1);
+        storeDef = new StoreDefinitionBuilder().setName("test")
+                                               .setType(InMemoryStorageConfiguration.TYPE_NAME)
+                                               .setRoutingPolicy(RoutingTier.CLIENT)
+                                               .setRoutingStrategyType(RoutingStrategyType.ZONE_STRATEGY)
+                                               .setReplicationFactor(4)
+                                               .setZoneReplicationFactor(zoneReplicationFactor)
+                                               .setKeySerializer(new SerializerDefinition("string"))
+                                               .setValueSerializer(new SerializerDefinition("string"))
+                                               .setPreferredReads(1)
+                                               .setRequiredReads(1)
+                                               .setPreferredWrites(1)
+                                               .setRequiredWrites(1)
+                                               .setZoneCountReads(0)
+                                               .setZoneCountWrites(0)
+                                               .build();
+        makeStore();
+        Versioned<byte[]> v = Versioned.value("v".getBytes());
+        // k### indicates existence of itself in different nodes
+        // k**1 means this key exists at least on node 0
+        // k*1* means this key exists at least on node 1
+        // k0** means this key does not exist on node 2
+        subStores.get(0).put(TestUtils.toByteArray("k001"), v, null);
+        subStores.get(0).put(TestUtils.toByteArray("k011"), v, null);
+        subStores.get(0).put(TestUtils.toByteArray("k101"), v, null);
+        subStores.get(0).put(TestUtils.toByteArray("k111"), v, null);
+        subStores.get(1).put(TestUtils.toByteArray("k010"), v, null);
+        subStores.get(1).put(TestUtils.toByteArray("k011"), v, null);
+        subStores.get(1).put(TestUtils.toByteArray("k110"), v, null);
+        subStores.get(1).put(TestUtils.toByteArray("k111"), v, null);
+        subStores.get(2).put(TestUtils.toByteArray("k100"), v, null);
+        subStores.get(2).put(TestUtils.toByteArray("k101"), v, null);
+        subStores.get(2).put(TestUtils.toByteArray("k110"), v, null);
+        subStores.get(2).put(TestUtils.toByteArray("k111"), v, null);
+
+        /* test multiple keys getall */
+        List<ByteArray> keys = new ArrayList<ByteArray>();
+        keys.add(TestUtils.toByteArray("k000"));
+        keys.add(TestUtils.toByteArray("k001"));
+        keys.add(TestUtils.toByteArray("k010"));
+        keys.add(TestUtils.toByteArray("k011"));
+        keys.add(TestUtils.toByteArray("k100"));
+        keys.add(TestUtils.toByteArray("k101"));
+        keys.add(TestUtils.toByteArray("k110"));
+        keys.add(TestUtils.toByteArray("k111"));
+        Map<ByteArray, List<Versioned<byte[]>>> result = store.getAll(keys, null);
+        assertFalse(result.containsKey(TestUtils.toByteArray("not_included")));
+        assertFalse(result.containsKey(TestUtils.toByteArray("k000")));
+        assertEquals(1, result.get(TestUtils.toByteArray("k011")).size());
+        assertFalse(result.containsKey(TestUtils.toByteArray("k100")));
+        assertEquals(1, result.get(TestUtils.toByteArray("k111")).size());
+    }
+
+    @Test
+    public void testGetall_211_zoneCountRead_1() throws Exception {
+        cluster = getFourNodeClusterWithZones();
+        HashMap<Integer, Integer> zoneReplicationFactor = new HashMap<Integer, Integer>();
+        zoneReplicationFactor.put(0, 2);
+        zoneReplicationFactor.put(1, 1);
+        zoneReplicationFactor.put(2, 1);
+        /*
+         * First n nodes on the preference list will be one node from each
+         * remote n zones, where n=zoneCountReads, therefore preferred read
+         * should be set > n if want to include local zone node results in
+         * parallel request
+         */
+        storeDef = new StoreDefinitionBuilder().setName("test")
+                                               .setType(InMemoryStorageConfiguration.TYPE_NAME)
+                                               .setRoutingPolicy(RoutingTier.CLIENT)
+                                               .setRoutingStrategyType(RoutingStrategyType.ZONE_STRATEGY)
+                                               .setReplicationFactor(4)
+                                               .setZoneReplicationFactor(zoneReplicationFactor)
+                                               .setKeySerializer(new SerializerDefinition("string"))
+                                               .setValueSerializer(new SerializerDefinition("string"))
+                                               .setPreferredReads(2)
+                                               .setRequiredReads(1)
+                                               .setPreferredWrites(1)
+                                               .setRequiredWrites(1)
+                                               .setZoneCountReads(1)
+                                               .setZoneCountWrites(0)
+                                               .build();
+        makeStore();
+        Versioned<byte[]> v = Versioned.value("v".getBytes());
+        subStores.get(0).put(TestUtils.toByteArray("k001"), v, null);
+        subStores.get(0).put(TestUtils.toByteArray("k011"), v, null);
+        subStores.get(0).put(TestUtils.toByteArray("k101"), v, null);
+        subStores.get(0).put(TestUtils.toByteArray("k111"), v, null);
+        subStores.get(1).put(TestUtils.toByteArray("k010"), v, null);
+        subStores.get(1).put(TestUtils.toByteArray("k011"), v, null);
+        subStores.get(1).put(TestUtils.toByteArray("k110"), v, null);
+        subStores.get(1).put(TestUtils.toByteArray("k111"), v, null);
+        subStores.get(2).put(TestUtils.toByteArray("k100"), v, null);
+        subStores.get(2).put(TestUtils.toByteArray("k101"), v, null);
+        subStores.get(2).put(TestUtils.toByteArray("k110"), v, null);
+        subStores.get(2).put(TestUtils.toByteArray("k111"), v, null);
+
+        /* test multiple keys getall */
+        List<ByteArray> keys = new ArrayList<ByteArray>();
+        keys.add(TestUtils.toByteArray("k000"));
+        keys.add(TestUtils.toByteArray("k001"));
+        keys.add(TestUtils.toByteArray("k010"));
+        keys.add(TestUtils.toByteArray("k011"));
+        keys.add(TestUtils.toByteArray("k100"));
+        keys.add(TestUtils.toByteArray("k101"));
+        keys.add(TestUtils.toByteArray("k110"));
+        keys.add(TestUtils.toByteArray("k111"));
+        Map<ByteArray, List<Versioned<byte[]>>> result = store.getAll(keys, null);
+        assertFalse(result.containsKey(TestUtils.toByteArray("not_included")));
+        /* client will first try all the nodes in local zone */
+        assertFalse(result.containsKey(TestUtils.toByteArray("k000")));
+        assertEquals(1, result.get(TestUtils.toByteArray("k011")).size());
+        assertFalse(result.containsKey(TestUtils.toByteArray("not_included")));
+        assertFalse(result.containsKey(TestUtils.toByteArray("k000")));
+        assertEquals(1, result.get(TestUtils.toByteArray("k011")).size());
+        assertEquals(1, result.get(TestUtils.toByteArray("k100")).size());
+        assertEquals(2, result.get(TestUtils.toByteArray("k111")).size());
+    }
+
+    @Test
+    public void testGetall_322() throws Exception {
+        cluster = getEightNodeClusterWithZones();
+        HashMap<Integer, Integer> zoneReplicationFactor = new HashMap<Integer, Integer>();
+        zoneReplicationFactor.put(0, 3);
+        zoneReplicationFactor.put(1, 3);
+        storeDef = new StoreDefinitionBuilder().setName("test")
+                                               .setType(InMemoryStorageConfiguration.TYPE_NAME)
+                                               .setRoutingPolicy(RoutingTier.CLIENT)
+                                               .setRoutingStrategyType(RoutingStrategyType.ZONE_STRATEGY)
+                                               .setReplicationFactor(6)
+                                               .setZoneReplicationFactor(zoneReplicationFactor)
+                                               .setKeySerializer(new SerializerDefinition("string"))
+                                               .setValueSerializer(new SerializerDefinition("string"))
+                                               .setPreferredReads(2)
+                                               .setRequiredReads(2)
+                                               .setPreferredWrites(2)
+                                               .setRequiredWrites(2)
+                                               .setZoneCountReads(0)
+                                               .setZoneCountWrites(0)
+                                               .build();
+        makeStore();
+        Versioned<byte[]> v = Versioned.value("v".getBytes());
+        subStores.get(0).put(TestUtils.toByteArray("k1111_1111"), v, null);
+        subStores.get(0).put(TestUtils.toByteArray("k0000_1111"), v, null);
+
+        subStores.get(1).put(TestUtils.toByteArray("k1111_1111"), v, null);
+        subStores.get(1).put(TestUtils.toByteArray("k0000_1111"), v, null);
+
+        subStores.get(2).put(TestUtils.toByteArray("k1111_1111"), v, null);
+        subStores.get(2).put(TestUtils.toByteArray("k0000_1111"), v, null);
+
+        subStores.get(3).put(TestUtils.toByteArray("k0000_1111"), v, null);
+        subStores.get(3).put(TestUtils.toByteArray("k1111_1111"), v, null);
+
+        subStores.get(4).put(TestUtils.toByteArray("k1111_1111"), v, null);
+        subStores.get(4).put(TestUtils.toByteArray("k1111_0000"), v, null);
+        subStores.get(5).put(TestUtils.toByteArray("k1111_1111"), v, null);
+        subStores.get(5).put(TestUtils.toByteArray("k1111_0000"), v, null);
+        subStores.get(6).put(TestUtils.toByteArray("k1111_1111"), v, null);
+        subStores.get(6).put(TestUtils.toByteArray("k1111_0000"), v, null);
+        subStores.get(7).put(TestUtils.toByteArray("k1111_1111"), v, null);
+        subStores.get(7).put(TestUtils.toByteArray("k1111_0000"), v, null);
+
+        /* test multiple keys getall */
+        List<ByteArray> keys = new ArrayList<ByteArray>();
+        keys.add(TestUtils.toByteArray("k0000_0000"));
+        keys.add(TestUtils.toByteArray("k0000_1111"));
+        keys.add(TestUtils.toByteArray("k1111_0000"));
+        keys.add(TestUtils.toByteArray("k1111_1111"));
+        Map<ByteArray, List<Versioned<byte[]>>> result = store.getAll(keys, null);
+        assertFalse(result.containsKey(TestUtils.toByteArray("not_included")));
+        assertFalse(result.containsKey(TestUtils.toByteArray("k0000_0000")));
+        assertEquals(2, result.get(TestUtils.toByteArray("k0000_1111")).size());
+        assertFalse(result.containsKey(TestUtils.toByteArray("k1111_0000")));
+        assertEquals(2, result.get(TestUtils.toByteArray("k1111_1111")).size());
+    }
+}
diff --git a/test/unit/voldemort/store/routed/HintedHandoffTest.java b/test/unit/voldemort/store/routed/HintedHandoffTest.java
index dab6ee37b2..a2c5ee197a 100644
--- a/test/unit/voldemort/store/routed/HintedHandoffTest.java
+++ b/test/unit/voldemort/store/routed/HintedHandoffTest.java
@@ -17,7 +17,6 @@
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
-import java.util.concurrent.Semaphore;
 
 import org.apache.log4j.Logger;
 import org.junit.After;
@@ -27,11 +26,11 @@
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
-import voldemort.cluster.failuredetector.MutableStoreVerifier;
 import voldemort.ServerTestUtils;
 import voldemort.TestUtils;
 import voldemort.VoldemortException;
 import voldemort.client.RoutingTier;
+import voldemort.client.TimeoutConfig;
 import voldemort.cluster.Cluster;
 import voldemort.cluster.Node;
 import voldemort.cluster.Zone;
@@ -39,12 +38,14 @@
 import voldemort.cluster.failuredetector.FailureDetector;
 import voldemort.cluster.failuredetector.FailureDetectorConfig;
 import voldemort.cluster.failuredetector.FailureDetectorUtils;
+import voldemort.cluster.failuredetector.MutableStoreVerifier;
 import voldemort.routing.RoutingStrategy;
 import voldemort.routing.RoutingStrategyFactory;
 import voldemort.routing.RoutingStrategyType;
 import voldemort.serialization.SerializerDefinition;
 import voldemort.server.StoreRepository;
 import voldemort.server.scheduler.slop.StreamingSlopPusherJob;
+import voldemort.server.storage.ScanPermitWrapper;
 import voldemort.store.ForceFailStore;
 import voldemort.store.StorageEngine;
 import voldemort.store.Store;
@@ -168,7 +169,9 @@ public void setUp() throws Exception {
         setFailureDetector(subStores);
 
         routedStoreThreadPool = Executors.newFixedThreadPool(NUM_THREADS);
-        routedStoreFactory = new RoutedStoreFactory(true, routedStoreThreadPool, 1500L);
+        routedStoreFactory = new RoutedStoreFactory(true,
+                                                    routedStoreThreadPool,
+                                                    new TimeoutConfig(1500L, false));
         strategy = new RoutingStrategyFactory().updateRoutingStrategy(storeDef, cluster);
 
         Map<Integer, NonblockingStore> nonblockingSlopStores = Maps.newHashMap();
@@ -200,7 +203,7 @@ public void setUp() throws Exception {
                                                                                                                   cluster,
                                                                                                                   Lists.newArrayList(storeDef),
                                                                                                                   new Properties()),
-                                                                       new Semaphore(1));
+                                                                       new ScanPermitWrapper(1));
             slopPusherJobs.add(pusher);
         }
 
@@ -439,7 +442,7 @@ private void setFailureDetector(Map<Integer, Store<ByteArray, byte[], byte[]>> s
         FailureDetectorConfig failureDetectorConfig = new FailureDetectorConfig();
         failureDetectorConfig.setImplementationClassName(failureDetectorCls.getName());
         failureDetectorConfig.setBannagePeriod(500);
-        failureDetectorConfig.setNodes(cluster.getNodes());
+        failureDetectorConfig.setCluster(cluster);
         failureDetectorConfig.setStoreVerifier(MutableStoreVerifier.create(subStores));
 
         failureDetector = FailureDetectorUtils.create(failureDetectorConfig, false);
diff --git a/test/unit/voldemort/store/routed/ReadRepairerTest.java b/test/unit/voldemort/store/routed/ReadRepairerTest.java
index 3c0958f09b..61e41981c8 100644
--- a/test/unit/voldemort/store/routed/ReadRepairerTest.java
+++ b/test/unit/voldemort/store/routed/ReadRepairerTest.java
@@ -21,9 +21,9 @@
 import static org.junit.Assert.assertEquals;
 import static voldemort.FailureDetectorTestUtils.recordException;
 import static voldemort.FailureDetectorTestUtils.recordSuccess;
-import static voldemort.cluster.failuredetector.MutableStoreVerifier.create;
 import static voldemort.TestUtils.getClock;
 import static voldemort.cluster.failuredetector.FailureDetectorUtils.create;
+import static voldemort.cluster.failuredetector.MutableStoreVerifier.create;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -46,6 +46,7 @@
 import voldemort.ServerTestUtils;
 import voldemort.TestUtils;
 import voldemort.VoldemortTestConstants;
+import voldemort.client.TimeoutConfig;
 import voldemort.cluster.Cluster;
 import voldemort.cluster.failuredetector.BannagePeriodFailureDetector;
 import voldemort.cluster.failuredetector.FailureDetector;
@@ -147,7 +148,7 @@ public void testMissingKeysAreAddedToNodeWhenDoingReadRepair() throws Exception
 
         FailureDetectorConfig failureDetectorConfig = new FailureDetectorConfig().setImplementationClassName(failureDetectorClass.getName())
                                                                                  .setBannagePeriod(1000)
-                                                                                 .setNodes(cluster.getNodes())
+                                                                                 .setCluster(cluster)
                                                                                  .setStoreVerifier(create(subStores))
                                                                                  .setTime(time);
 
@@ -157,7 +158,8 @@ public void testMissingKeysAreAddedToNodeWhenDoingReadRepair() throws Exception
 
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(isPipelineRoutedStoreEnabled,
                                                                        routedStoreThreadPool,
-                                                                       1000L);
+                                                                       new TimeoutConfig(1000L,
+                                                                                         false));
 
         RoutedStore store = routedStoreFactory.create(cluster,
                                                       storeDef,
@@ -198,8 +200,8 @@ public void testNoDuplicates() throws Exception {
 
     public void testSingleSuccessor() throws Exception {
         assertVariationsEqual(singletonList(getValue(1, 1, new int[] { 1, 1 })),
-                              asList(getValue(1, 1, new int[] { 1 }), getValue(2, 1, new int[] { 1,
-                                      1 })));
+                              asList(getValue(1, 1, new int[] { 1 }),
+                                     getValue(2, 1, new int[] { 1, 1 })));
     }
 
     public void testAllConcurrent() throws Exception {
@@ -257,8 +259,9 @@ public void testConcurrentToOneDoesNotImplyConcurrentToAll() throws Exception {
                                      getValue(1, 1, new int[] { 1, 2 }),
                                      getValue(2, 1, new int[] { 1, 3, 3 }),
                                      getValue(3, 1, new int[] { 1, 2 })),
-                              asList(getValue(1, 1, new int[] { 3, 3 }), getValue(2, 1, new int[] {
-                                      1, 2 }), getValue(3, 1, new int[] { 1, 3, 3 })));
+                              asList(getValue(1, 1, new int[] { 3, 3 }),
+                                     getValue(2, 1, new int[] { 1, 2 }),
+                                     getValue(3, 1, new int[] { 1, 3, 3 })));
     }
 
     public void testLotsOfVersions() throws Exception {
diff --git a/test/unit/voldemort/store/routed/RoutedStoreTest.java b/test/unit/voldemort/store/routed/RoutedStoreTest.java
index 4ec00e514b..46bc5e3f56 100644
--- a/test/unit/voldemort/store/routed/RoutedStoreTest.java
+++ b/test/unit/voldemort/store/routed/RoutedStoreTest.java
@@ -18,10 +18,10 @@
 
 import static voldemort.FailureDetectorTestUtils.recordException;
 import static voldemort.FailureDetectorTestUtils.recordSuccess;
-import static voldemort.cluster.failuredetector.MutableStoreVerifier.create;
 import static voldemort.TestUtils.getClock;
 import static voldemort.VoldemortTestConstants.getNineNodeCluster;
 import static voldemort.cluster.failuredetector.FailureDetectorUtils.create;
+import static voldemort.cluster.failuredetector.MutableStoreVerifier.create;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -46,11 +46,13 @@
 import voldemort.VoldemortException;
 import voldemort.VoldemortTestConstants;
 import voldemort.client.RoutingTier;
+import voldemort.client.TimeoutConfig;
 import voldemort.cluster.Cluster;
 import voldemort.cluster.Node;
 import voldemort.cluster.failuredetector.BannagePeriodFailureDetector;
 import voldemort.cluster.failuredetector.FailureDetector;
 import voldemort.cluster.failuredetector.FailureDetectorConfig;
+import voldemort.common.VoldemortOpCode;
 import voldemort.routing.RoutingStrategy;
 import voldemort.routing.RoutingStrategyFactory;
 import voldemort.routing.RoutingStrategyType;
@@ -94,7 +96,7 @@
 public class RoutedStoreTest extends AbstractByteArrayStoreTest {
 
     public static final int BANNAGE_PERIOD = 1000;
-    public static final int SLEEPY_TIME = 81;
+    public static final int SLEEPY_TIME = 200;
     public static final int OPERATION_TIMEOUT = 60;
 
     private Cluster cluster;
@@ -202,7 +204,8 @@ else if(count < failing + sleepy)
         routedStoreThreadPool = Executors.newFixedThreadPool(threads);
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(isPipelineRoutedStoreEnabled,
                                                                        routedStoreThreadPool,
-                                                                       1000L);
+                                                                       new TimeoutConfig(BANNAGE_PERIOD,
+                                                                                         false));
 
         return routedStoreFactory.create(cluster, storeDef, subStores, true, failureDetector);
     }
@@ -252,7 +255,8 @@ else if(sleepy != null && sleepy.contains(n.getId()))
         routedStoreThreadPool = Executors.newFixedThreadPool(threads);
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(true,
                                                                        routedStoreThreadPool,
-                                                                       timeOutMs);
+                                                                       new TimeoutConfig(timeOutMs,
+                                                                                         false));
 
         return routedStoreFactory.create(cluster, storeDef, subStores, true, failureDetector);
     }
@@ -376,7 +380,7 @@ public void testPutIncrementsVersion() throws Exception {
 
     @Test
     public void testObsoleteMasterFails() {
-    // write me
+        // write me
     }
 
     @Test
@@ -411,7 +415,7 @@ public void testZoneRouting() throws Exception {
             s1.put(new ByteArray("test".getBytes()), versioned, null);
         } finally {
             long elapsed = (System.nanoTime() - start) / Time.NS_PER_MS;
-            assertTrue(elapsed + " < " + 81, elapsed < 81);
+            assertTrue(elapsed + " < " + SLEEPY_TIME, elapsed < SLEEPY_TIME);
         }
         // Putting extra key to test getAll
         s1.put(new ByteArray("test2".getBytes()), versioned, null);
@@ -421,7 +425,7 @@ public void testZoneRouting() throws Exception {
             s1.get(new ByteArray("test".getBytes()), null);
         } finally {
             long elapsed = (System.nanoTime() - start) / Time.NS_PER_MS;
-            assertTrue(elapsed + " < " + 130, elapsed < 130);
+            assertTrue(elapsed + " < " + SLEEPY_TIME, elapsed < SLEEPY_TIME);
         }
 
         start = System.nanoTime();
@@ -432,29 +436,27 @@ public void testZoneRouting() throws Exception {
             }
         } finally {
             long elapsed = (System.nanoTime() - start) / Time.NS_PER_MS;
-            assertTrue(elapsed + " < " + 81, elapsed < 81);
+            assertTrue(elapsed + " < " + SLEEPY_TIME, elapsed < SLEEPY_TIME);
         }
 
-        // make sure the failure detector adds back any previously failed nodes
-        Thread.sleep(BANNAGE_PERIOD * 2);
         start = System.nanoTime();
         try {
             s1.delete(new ByteArray("test".getBytes()), versioned.getVersion());
         } finally {
             long elapsed = (System.nanoTime() - start) / Time.NS_PER_MS;
-            assertTrue(elapsed + " < " + 81, elapsed < 81);
+            assertTrue(elapsed + " < " + SLEEPY_TIME, elapsed < SLEEPY_TIME);
         }
 
         // make sure sleepy stores processed the delete before checking,
         // otherwise, we might be bailing
         // out of the test too early for the delete to be processed.
-        Thread.sleep(SLEEPY_TIME);
+        Thread.sleep(SLEEPY_TIME * 2);
         List<ByteArray> keys = Lists.newArrayList(new ByteArray("test".getBytes()),
                                                   new ByteArray("test2".getBytes()));
 
         Map<ByteArray, List<Versioned<byte[]>>> values = s1.getAll(keys, null);
-        List<Versioned<byte[]>> results = values.get(new ByteArray("test".getBytes()));
-        assertEquals("\'test\' did not get deleted.", 0, results.size());
+        assertFalse("'test' did not get deleted.",
+                    values.containsKey(new ByteArray("test".getBytes())));
         ByteUtils.compare(values.get(new ByteArray("test2".getBytes())).get(0).getValue(),
                           new byte[] { 1 });
 
@@ -470,7 +472,7 @@ public void testZoneRouting() throws Exception {
                                                        zoneReplicationFactor,
                                                        RoutingStrategyType.ZONE_STRATEGY,
                                                        SLEEPY_TIME,
-                                                       1000,
+                                                       BANNAGE_PERIOD,
                                                        new VoldemortException());
 
         start = System.nanoTime();
@@ -479,7 +481,7 @@ public void testZoneRouting() throws Exception {
             s2.put(new ByteArray("test".getBytes()), versioned, null);
         } finally {
             long elapsed = (System.nanoTime() - start) / Time.NS_PER_MS;
-            assertTrue(elapsed + " > " + 81, elapsed >= 81);
+            assertTrue(elapsed + " > " + SLEEPY_TIME, elapsed >= SLEEPY_TIME);
         }
         s2.put(new ByteArray("test2".getBytes()), versioned, null);
 
@@ -513,8 +515,8 @@ public void testZoneRouting() throws Exception {
         }
 
         values = s2.getAll(keys, null);
-        results = values.get(new ByteArray("test".getBytes()));
-        assertEquals("\'test\' did not get deleted.", 0, results.size());
+        assertFalse("'test' did not get deleted.",
+                    values.containsKey(new ByteArray("test".getBytes())));
         ByteUtils.compare(values.get(new ByteArray("test2".getBytes())).get(0).getValue(),
                           new byte[] { 1 });
 
@@ -530,8 +532,8 @@ public void testZoneRouting() throws Exception {
                                                        null,
                                                        zoneReplicationFactor,
                                                        RoutingStrategyType.ZONE_STRATEGY,
-                                                       81,
-                                                       1000,
+                                                       SLEEPY_TIME,
+                                                       BANNAGE_PERIOD,
                                                        new VoldemortException());
 
         start = System.nanoTime();
@@ -539,7 +541,7 @@ public void testZoneRouting() throws Exception {
             s3.put(new ByteArray("test".getBytes()), versioned, null);
         } finally {
             long elapsed = (System.nanoTime() - start) / Time.NS_PER_MS;
-            assertTrue(elapsed + " < " + 81, elapsed < 81);
+            assertTrue(elapsed + " < " + SLEEPY_TIME, elapsed < SLEEPY_TIME);
         }
         // Putting extra key to test getAll
         s3.put(new ByteArray("test2".getBytes()), versioned, null);
@@ -552,7 +554,7 @@ public void testZoneRouting() throws Exception {
             }
         } finally {
             long elapsed = (System.nanoTime() - start) / Time.NS_PER_MS;
-            assertTrue(elapsed + " < " + 81, elapsed < 81);
+            assertTrue(elapsed + " < " + SLEEPY_TIME, elapsed < SLEEPY_TIME);
         }
 
         start = System.nanoTime();
@@ -560,7 +562,7 @@ public void testZoneRouting() throws Exception {
             s3.get(new ByteArray("test".getBytes()), null);
         } finally {
             long elapsed = (System.nanoTime() - start) / Time.NS_PER_MS;
-            assertTrue(elapsed + " < " + 81, elapsed < 81);
+            assertTrue(elapsed + " < " + SLEEPY_TIME, elapsed < SLEEPY_TIME);
         }
 
         start = System.nanoTime();
@@ -568,7 +570,7 @@ public void testZoneRouting() throws Exception {
             s3.delete(new ByteArray("test".getBytes()), versioned.getVersion());
         } finally {
             long elapsed = (System.nanoTime() - start) / Time.NS_PER_MS;
-            assertTrue(elapsed + " < " + 81, elapsed < 81);
+            assertTrue(elapsed + " < " + SLEEPY_TIME, elapsed < SLEEPY_TIME);
         }
 
         // Basic put with zone read = 1, zone write = 1 and failures in other
@@ -583,8 +585,8 @@ public void testZoneRouting() throws Exception {
                                                        null,
                                                        zoneReplicationFactor,
                                                        RoutingStrategyType.ZONE_STRATEGY,
-                                                       81,
-                                                       1000,
+                                                       SLEEPY_TIME,
+                                                       BANNAGE_PERIOD,
                                                        new VoldemortException());
 
         try {
@@ -779,6 +781,75 @@ public void testGetAllWithNodeDown() throws Exception {
         }
     }
 
+    /**
+     * Tests that getAll returns partial results
+     */
+    @Test
+    public void testPartialGetAll() throws Exception {
+        // create a store with rf=1 i.e disjoint partitions
+        StoreDefinition definition = new StoreDefinitionBuilder().setName("test")
+                                                                 .setType("foo")
+                                                                 .setKeySerializer(new SerializerDefinition("test"))
+                                                                 .setValueSerializer(new SerializerDefinition("test"))
+                                                                 .setRoutingPolicy(RoutingTier.CLIENT)
+                                                                 .setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY)
+                                                                 .setReplicationFactor(1)
+                                                                 .setPreferredReads(1)
+                                                                 .setRequiredReads(1)
+                                                                 .setPreferredWrites(1)
+                                                                 .setRequiredWrites(1)
+                                                                 .build();
+
+        Map<Integer, Store<ByteArray, byte[], byte[]>> stores = new HashMap<Integer, Store<ByteArray, byte[], byte[]>>();
+        List<Node> nodes = new ArrayList<Node>();
+        // create nodes with varying speeds - 100ms, 200ms, 300ms
+        for(int i = 0; i < 3; i++) {
+            Store<ByteArray, byte[], byte[]> store = new SleepyStore<ByteArray, byte[], byte[]>(100 * (i + 1),
+                                                                                                new InMemoryStorageEngine<ByteArray, byte[], byte[]>("test"));
+            stores.put(i, store);
+            List<Integer> partitions = Arrays.asList(i);
+            nodes.add(new Node(i, "none", 0, 0, 0, partitions));
+        }
+        setFailureDetector(stores);
+
+        routedStoreThreadPool = Executors.newFixedThreadPool(3);
+
+        TimeoutConfig timeoutConfig = new TimeoutConfig(1500, true);
+        // This means, the getall will only succeed on two of the nodes
+        timeoutConfig.setOperationTimeout(VoldemortOpCode.GET_ALL_OP_CODE, 250);
+        RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(true,
+                                                                       routedStoreThreadPool,
+                                                                       timeoutConfig);
+
+        RoutedStore routedStore = routedStoreFactory.create(new Cluster("test", nodes),
+                                                            definition,
+                                                            stores,
+                                                            true,
+                                                            failureDetector);
+        /* do some puts so we have some data to test getalls */
+        Map<ByteArray, byte[]> expectedValues = Maps.newHashMap();
+        for(byte i = 1; i < 11; ++i) {
+            ByteArray key = new ByteArray(new byte[] { i });
+            byte[] value = new byte[] { (byte) (i + 50) };
+            routedStore.put(key, Versioned.value(value), null);
+            expectedValues.put(key, value);
+        }
+
+        /* 1. positive test; if partial is on, should get something back */
+        Map<ByteArray, List<Versioned<byte[]>>> all = routedStore.getAll(expectedValues.keySet(),
+                                                                         null);
+        assert (expectedValues.size() > all.size());
+
+        /* 2. negative test; if partial is off, should fail the whole operation */
+        timeoutConfig.setPartialGetAllAllowed(false);
+        try {
+            all = routedStore.getAll(expectedValues.keySet(), null);
+            fail("Should have failed");
+        } catch(Exception e) {
+
+        }
+    }
+
     @Test
     public void testGetAllWithFailingStore() throws Exception {
         cluster = VoldemortTestConstants.getTwoNodeCluster();
@@ -802,7 +873,8 @@ public void testGetAllWithFailingStore() throws Exception {
         routedStoreThreadPool = Executors.newFixedThreadPool(1);
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(isPipelineRoutedStoreEnabled,
                                                                        routedStoreThreadPool,
-                                                                       1000L);
+                                                                       new TimeoutConfig(BANNAGE_PERIOD,
+                                                                                         false));
 
         RoutedStore routedStore = routedStoreFactory.create(cluster,
                                                             storeDef,
@@ -858,7 +930,8 @@ public void testGetAllWithMorePreferredReadsThanNodes() throws Exception {
         routedStoreThreadPool = Executors.newFixedThreadPool(1);
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(isPipelineRoutedStoreEnabled,
                                                                        routedStoreThreadPool,
-                                                                       1000L);
+                                                                       new TimeoutConfig(BANNAGE_PERIOD,
+                                                                                         false));
 
         RoutedStore routedStore = routedStoreFactory.create(cluster,
                                                             storeDef,
@@ -965,7 +1038,8 @@ public void testPutWithOneNodeDownAndOneNodeSlow() throws Exception {
         routedStoreThreadPool = Executors.newFixedThreadPool(1);
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(isPipelineRoutedStoreEnabled,
                                                                        routedStoreThreadPool,
-                                                                       1000L);
+                                                                       new TimeoutConfig(BANNAGE_PERIOD,
+                                                                                         false));
 
         RoutedStore routedStore = routedStoreFactory.create(cluster,
                                                             storeDef,
@@ -1011,7 +1085,8 @@ public void testPutTimeout() throws Exception {
         routedStoreThreadPool = Executors.newFixedThreadPool(3);
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(isPipelineRoutedStoreEnabled,
                                                                        routedStoreThreadPool,
-                                                                       timeout);
+                                                                       new TimeoutConfig(timeout,
+                                                                                         false));
 
         RoutedStore routedStore = routedStoreFactory.create(new Cluster("test", nodes),
                                                             definition,
@@ -1064,7 +1139,8 @@ public void testGetTimeout() throws Exception {
         routedStoreThreadPool = Executors.newFixedThreadPool(3);
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(true,
                                                                        routedStoreThreadPool,
-                                                                       timeout);
+                                                                       new TimeoutConfig(timeout,
+                                                                                         false));
 
         RoutedStore routedStore = routedStoreFactory.create(new Cluster("test", nodes),
                                                             definition,
@@ -1082,6 +1158,62 @@ public void testGetTimeout() throws Exception {
         }
     }
 
+    @Test
+    public void testOperationSpecificTimeouts() throws Exception {
+        StoreDefinition definition = new StoreDefinitionBuilder().setName("test")
+                                                                 .setType("foo")
+                                                                 .setKeySerializer(new SerializerDefinition("test"))
+                                                                 .setValueSerializer(new SerializerDefinition("test"))
+                                                                 .setRoutingPolicy(RoutingTier.CLIENT)
+                                                                 .setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY)
+                                                                 .setReplicationFactor(3)
+                                                                 .setPreferredReads(3)
+                                                                 .setRequiredReads(3)
+                                                                 .setPreferredWrites(3)
+                                                                 .setRequiredWrites(3)
+                                                                 .build();
+        Map<Integer, Store<ByteArray, byte[], byte[]>> stores = new HashMap<Integer, Store<ByteArray, byte[], byte[]>>();
+        List<Node> nodes = new ArrayList<Node>();
+        for(int i = 0; i < 3; i++) {
+            Store<ByteArray, byte[], byte[]> store = new SleepyStore<ByteArray, byte[], byte[]>(200,
+                                                                                                new InMemoryStorageEngine<ByteArray, byte[], byte[]>("test"));
+            stores.put(i, store);
+            List<Integer> partitions = Arrays.asList(i);
+            nodes.add(new Node(i, "none", 0, 0, 0, partitions));
+        }
+
+        setFailureDetector(stores);
+
+        routedStoreThreadPool = Executors.newFixedThreadPool(3);
+        // with a 500ms general timeout and a 100ms get timeout, only get should
+        // fail
+        TimeoutConfig timeoutConfig = new TimeoutConfig(1500, false);
+        timeoutConfig.setOperationTimeout(VoldemortOpCode.GET_OP_CODE, 100);
+        RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(true,
+                                                                       routedStoreThreadPool,
+                                                                       timeoutConfig);
+
+        RoutedStore routedStore = routedStoreFactory.create(new Cluster("test", nodes),
+                                                            definition,
+                                                            stores,
+                                                            true,
+                                                            failureDetector);
+        try {
+            routedStore.put(new ByteArray("test".getBytes()),
+                            new Versioned<byte[]>(new byte[] { 1 }),
+                            null);
+        } catch(InsufficientOperationalNodesException e) {
+            fail("Should not have failed");
+        }
+
+        try {
+            routedStore.get(new ByteArray("test".getBytes()), null);
+            fail("Should have thrown");
+        } catch(InsufficientOperationalNodesException e) {
+
+        }
+    }
+
     /**
      * See Issue #211: Unnecessary read repairs during getAll with more than one
      * key
@@ -1113,7 +1245,8 @@ public void testNoReadRepair() throws Exception {
         routedStoreThreadPool = Executors.newFixedThreadPool(1);
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(isPipelineRoutedStoreEnabled,
                                                                        routedStoreThreadPool,
-                                                                       1000L);
+                                                                       new TimeoutConfig(BANNAGE_PERIOD,
+                                                                                         false));
 
         RoutedStore routedStore = routedStoreFactory.create(cluster,
                                                             storeDef,
@@ -1164,7 +1297,8 @@ public void testTardyResponsesNotIncludedInResult() throws Exception {
         routedStoreThreadPool = Executors.newFixedThreadPool(cluster.getNumberOfNodes());
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(isPipelineRoutedStoreEnabled,
                                                                        routedStoreThreadPool,
-                                                                       10000L);
+                                                                       new TimeoutConfig(10000L,
+                                                                                         false));
 
         RoutedStore routedStore = routedStoreFactory.create(cluster,
                                                             storeDef,
@@ -1176,7 +1310,7 @@ public void testTardyResponsesNotIncludedInResult() throws Exception {
 
         routedStoreFactory = new RoutedStoreFactory(isPipelineRoutedStoreEnabled,
                                                     routedStoreThreadPool,
-                                                    sleepTimeMs / 2);
+                                                    new TimeoutConfig(sleepTimeMs / 2, false));
 
         routedStore = routedStoreFactory.create(cluster, storeDef, subStores, true, failureDetector);
 
@@ -1218,7 +1352,8 @@ public void testSlowStoreDowngradesFromPreferredToRequired() throws Exception {
         routedStoreThreadPool = Executors.newFixedThreadPool(cluster.getNumberOfNodes());
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(isPipelineRoutedStoreEnabled,
                                                                        routedStoreThreadPool,
-                                                                       10000L);
+                                                                       new TimeoutConfig(10000L,
+                                                                                         false));
 
         RoutedStore routedStore = routedStoreFactory.create(cluster,
                                                             storeDef,
@@ -1230,7 +1365,7 @@ public void testSlowStoreDowngradesFromPreferredToRequired() throws Exception {
 
         routedStoreFactory = new RoutedStoreFactory(isPipelineRoutedStoreEnabled,
                                                     routedStoreThreadPool,
-                                                    sleepTimeMs / 2);
+                                                    new TimeoutConfig(sleepTimeMs / 2, false));
 
         routedStore = routedStoreFactory.create(cluster, storeDef, subStores, true, failureDetector);
 
@@ -1255,7 +1390,7 @@ public void testPutDeleteZoneRouting() throws Exception {
             Store<ByteArray, byte[], byte[]> subStore = null;
 
             if(sleepy != null && sleepy.contains(n.getId()))
-                subStore = new SleepyStore<ByteArray, byte[], byte[]>(81,
+                subStore = new SleepyStore<ByteArray, byte[], byte[]>(SLEEPY_TIME,
                                                                       new InMemoryStorageEngine<ByteArray, byte[], byte[]>("test"));
             else
                 subStore = new InMemoryStorageEngine<ByteArray, byte[], byte[]>("test");
@@ -1279,7 +1414,8 @@ public void testPutDeleteZoneRouting() throws Exception {
         routedStoreThreadPool = Executors.newFixedThreadPool(8);
         RoutedStoreFactory routedStoreFactory = new RoutedStoreFactory(true,
                                                                        routedStoreThreadPool,
-                                                                       60);
+                                                                       new TimeoutConfig(OPERATION_TIMEOUT,
+                                                                                         false));
 
         Store<ByteArray, byte[], byte[]> s1 = routedStoreFactory.create(cluster,
                                                                         storeDef,
@@ -1296,10 +1432,10 @@ public void testPutDeleteZoneRouting() throws Exception {
             s1.put(new ByteArray("test".getBytes()), versioned, null);
         } finally {
             elapsed = (System.nanoTime() - start) / Time.NS_PER_MS;
-            assertTrue(elapsed + " < " + 81, elapsed < 81);
+            assertTrue(elapsed + " < " + SLEEPY_TIME, elapsed < SLEEPY_TIME);
         }
 
-        Thread.sleep(81 - elapsed);
+        Thread.sleep(SLEEPY_TIME - elapsed);
 
         for(Node node: nodesRoutedTo) {
             assertEquals(subStores.get(node.getId())
@@ -1314,10 +1450,10 @@ public void testPutDeleteZoneRouting() throws Exception {
             s1.delete(new ByteArray("test".getBytes()), versioned.getVersion());
         } finally {
             elapsed = (System.nanoTime() - start) / Time.NS_PER_MS;
-            assertTrue(elapsed + " < " + 81, elapsed < 81);
+            assertTrue(elapsed + " < " + SLEEPY_TIME, elapsed < SLEEPY_TIME);
         }
 
-        Thread.sleep(81 - elapsed);
+        Thread.sleep(SLEEPY_TIME - elapsed);
 
         for(Node node: nodesRoutedTo) {
             assertEquals(subStores.get(node.getId())
@@ -1344,7 +1480,7 @@ private void setFailureDetector(Map<Integer, Store<ByteArray, byte[], byte[]>> s
 
         FailureDetectorConfig failureDetectorConfig = new FailureDetectorConfig().setImplementationClassName(failureDetectorClass.getName())
                                                                                  .setBannagePeriod(BANNAGE_PERIOD)
-                                                                                 .setNodes(cluster.getNodes())
+                                                                                 .setCluster(cluster)
                                                                                  .setStoreVerifier(create(subStores));
         failureDetector = create(failureDetectorConfig, false);
     }
diff --git a/test/unit/voldemort/store/routed/action/AbstractActionTest.java b/test/unit/voldemort/store/routed/action/AbstractActionTest.java
index 68e408f7e5..269b50366f 100644
--- a/test/unit/voldemort/store/routed/action/AbstractActionTest.java
+++ b/test/unit/voldemort/store/routed/action/AbstractActionTest.java
@@ -41,9 +41,9 @@ public class AbstractActionTest {
     @Before
     public void setUp() throws Exception {
         cluster = VoldemortTestConstants.getThreeNodeCluster();
-        failureDetector = new BannagePeriodFailureDetector(new FailureDetectorConfig().setNodes(cluster.getNodes()));
+        failureDetector = new BannagePeriodFailureDetector(new FailureDetectorConfig().setCluster(cluster));
         clusterWithZones = VoldemortTestConstants.getFourNodeClusterWithZones();
-        failureDetectorWithZones = new BannagePeriodFailureDetector(new FailureDetectorConfig().setNodes(clusterWithZones.getNodes()));
+        failureDetectorWithZones = new BannagePeriodFailureDetector(new FailureDetectorConfig().setCluster(clusterWithZones));
         storeDef = new StoreDefinitionsMapper().readStoreList(new StringReader(VoldemortTestConstants.getSingleStoreWithZonesXml()))
                                                .get(0);
     }
diff --git a/test/unit/voldemort/store/routed/action/ConfigureNodesLocalHostTest.java b/test/unit/voldemort/store/routed/action/ConfigureNodesLocalHostTest.java
new file mode 100644
index 0000000000..7e3eb11157
--- /dev/null
+++ b/test/unit/voldemort/store/routed/action/ConfigureNodesLocalHostTest.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.routed.action;
+
+import static org.junit.Assert.assertEquals;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import org.junit.Test;
+
+import voldemort.TestUtils;
+import voldemort.cluster.Cluster;
+import voldemort.cluster.Node;
+import voldemort.cluster.failuredetector.FailureDetector;
+import voldemort.cluster.failuredetector.FailureDetectorConfig;
+import voldemort.cluster.failuredetector.ThresholdFailureDetector;
+import voldemort.routing.RouteToAllLocalPrefStrategy;
+import voldemort.routing.RoutingStrategy;
+import voldemort.store.routed.BasicPipelineData;
+import voldemort.store.routed.Pipeline;
+import voldemort.store.routed.Pipeline.Event;
+import voldemort.store.routed.Pipeline.Operation;
+import voldemort.utils.ByteArray;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * Test class to verify the ConfigureNodesLocalHost strategy
+ * 
+ * @author csoman
+ * 
+ */
+public class ConfigureNodesLocalHostTest {
+
+    protected final ByteArray aKey = TestUtils.toByteArray("vold");
+    protected String currentHost = "";
+
+    private List<Node> getTestNodes() {
+        try {
+            currentHost = InetAddress.getLocalHost().getHostName();
+        } catch(UnknownHostException e) {
+            e.printStackTrace();
+        }
+        return ImmutableList.of(node(0, "some-node-1", 2, 7, 14),
+                                node(1, "some-node-2", 1, 10, 13),
+                                node(2, currentHost, 3, 5, 17),
+                                node(3, "some-node-3", 0, 11, 16),
+                                node(4, "some-node-4", 6, 9, 15),
+                                node(5, "some-node-5", 4, 8, 12));
+    }
+
+    private Node node(int id, String hostName, int... tags) {
+        List<Integer> list = new ArrayList<Integer>(tags.length);
+        for(int tag: tags)
+            list.add(tag);
+        return new Node(id, hostName, 8080, 6666, 6667, list);
+    }
+
+    /*
+     * Checks to see that the local host is obtained as the first node in the
+     * list returned by ConfigureNodesLocalHost
+     */
+    @Test
+    public void testConfigureNodesLocalHost() throws Exception {
+        List<Node> nodes = getTestNodes();
+        Cluster cluster = new Cluster("test-route-all-local-pref-cluster", nodes);
+        FailureDetector failureDetector = new ThresholdFailureDetector(new FailureDetectorConfig().setCluster(cluster));
+        RoutingStrategy routingStrategy = new RouteToAllLocalPrefStrategy(cluster.getNodes());
+        BasicPipelineData<byte[]> pipelineData = new BasicPipelineData<byte[]>();
+        ConfigureNodesLocalHost<byte[], BasicPipelineData<byte[]>> action = new ConfigureNodesLocalHost<byte[], BasicPipelineData<byte[]>>(pipelineData,
+                                                                                                                                           Event.COMPLETED,
+                                                                                                                                           failureDetector,
+                                                                                                                                           1,
+                                                                                                                                           routingStrategy,
+                                                                                                                                           aKey);
+        Pipeline pipeline = new Pipeline(Operation.GET, 10000, TimeUnit.MILLISECONDS);
+        pipeline.addEventAction(Event.STARTED, action);
+        pipeline.addEvent(Event.STARTED);
+        pipeline.execute();
+
+        if(pipelineData.getFatalError() != null)
+            throw pipelineData.getFatalError();
+
+        assertEquals(cluster.getNodes().size(), pipelineData.getNodes().size());
+        assertEquals(pipelineData.getNodes().get(0).getHost(), currentHost);
+    }
+}
diff --git a/test/unit/voldemort/store/stats/ClientSocketStatsTest.java b/test/unit/voldemort/store/stats/ClientSocketStatsTest.java
new file mode 100644
index 0000000000..1fb2587e98
--- /dev/null
+++ b/test/unit/voldemort/store/stats/ClientSocketStatsTest.java
@@ -0,0 +1,391 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.stats;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import voldemort.ServerTestUtils;
+import voldemort.client.protocol.RequestFormatType;
+import voldemort.store.socket.SocketDestination;
+import voldemort.store.socket.clientrequest.ClientRequestExecutor;
+import voldemort.utils.pool.QueuedKeyedResourcePool;
+
+public class ClientSocketStatsTest {
+
+    private ClientSocketStats masterStats;
+    private int port;
+    private SocketDestination dest1;
+    private SocketDestination dest2;
+    private QueuedKeyedResourcePool<SocketDestination, ClientRequestExecutor> pool;
+
+    @Before
+    public void setUp() throws Exception {
+        this.port = ServerTestUtils.findFreePort();
+        this.dest1 = new SocketDestination("localhost", port, RequestFormatType.VOLDEMORT_V1);
+        this.dest2 = new SocketDestination("localhost", port + 1, RequestFormatType.VOLDEMORT_V1);
+        this.masterStats = new ClientSocketStats(0);
+        pool = null;
+    }
+
+    @Test
+    public void testNewNodeStatsObject() {
+        ClientSocketStats stats = new ClientSocketStats(masterStats, dest1, pool, 0);
+        assertNotNull(stats);
+    }
+
+    @Test
+    public void testNewAggrNodeStatsObject() {
+        ClientSocketStats stats = masterStats;
+        assertNotNull(stats);
+        assertEquals(0, stats.getConnectionsCreated());
+        assertEquals(0, stats.getConnectionsDestroyed());
+        assertEquals(0, stats.getCheckoutCount());
+        assertEquals(0, stats.getAvgCheckoutWaitUs());
+    }
+
+    @Test
+    public void testConnectionCreate() {
+        ClientSocketStats stats = masterStats;
+        stats.connectionCreate(dest1);
+        stats.connectionCreate(dest2);
+        stats.connectionCreate(dest1);
+        assertEquals(3, stats.getConnectionsCreated());
+        assertEquals(2, stats.getStatsMap().get(dest1).getConnectionsCreated());
+        assertEquals(1, stats.getStatsMap().get(dest2).getConnectionsCreated());
+    }
+
+    @Test
+    public void testConnectionDestroy() {
+        ClientSocketStats stats = masterStats;
+        stats.connectionDestroy(dest1);
+        stats.connectionDestroy(dest2);
+        stats.connectionDestroy(dest1);
+        assertEquals(3, stats.getConnectionsDestroyed());
+        assertEquals(2, stats.getStatsMap().get(dest1).getConnectionsDestroyed());
+        assertEquals(1, stats.getStatsMap().get(dest2).getConnectionsDestroyed());
+    }
+
+    @Test
+    public void testRecordCheckoutTimeOnce() {
+        ClientSocketStats stats = masterStats;
+        assertEquals(0, stats.getCheckoutCount());
+
+        stats.recordCheckoutTimeUs(dest1, 100);
+        // check parent
+        assertEquals(1, stats.getCheckoutCount());
+        assertEquals(100, stats.getCheckoutWaitUsHistogram().getQuantile(0.99));
+
+        // check child
+        ClientSocketStats child = stats.getStatsMap().get(dest1);
+        assertNotNull(child);
+        assertEquals(1, child.getCheckoutCount());
+        assertEquals(100, child.getCheckoutWaitUsHistogram().getQuantile(0.99));
+    }
+
+    @Test
+    public void testRecordCheckoutTimeMultiple() {
+        ClientSocketStats stats = masterStats;
+        assertEquals(0, stats.getCheckoutCount());
+
+        stats.recordCheckoutTimeUs(dest1, 100);
+        stats.recordCheckoutTimeUs(dest1, 200);
+        stats.recordCheckoutTimeUs(dest1, 300);
+        stats.recordCheckoutTimeUs(dest1, 400);
+        stats.recordCheckoutTimeUs(dest2, 500);
+        stats.recordCheckoutTimeUs(dest2, 600);
+        stats.recordCheckoutTimeUs(dest1, 700);
+        stats.recordCheckoutTimeUs(dest1, 800);
+        stats.recordCheckoutTimeUs(dest2, 900);
+
+        // check parent
+        assertEquals(9, stats.getCheckoutCount());
+        assertEquals(900, stats.getCheckoutWaitUsHistogram().getQuantile(0.99));
+
+        // check child1
+        ClientSocketStats child1 = stats.getStatsMap().get(dest1);
+        assertNotNull(child1);
+        assertEquals(6, child1.getCheckoutCount());
+        assertEquals(100, child1.getCheckoutWaitUsHistogram().getQuantile(0.1));
+        assertEquals(300, child1.getCheckoutWaitUsHistogram().getQuantile(0.5));
+        assertEquals(800, child1.getCheckoutWaitUsHistogram().getQuantile(0.99));
+
+        // check child2
+        ClientSocketStats child2 = stats.getStatsMap().get(dest2);
+        assertNotNull(child2);
+        assertEquals(3, child2.getCheckoutCount());
+        assertEquals(500, child2.getCheckoutWaitUsHistogram().getQuantile(0.1));
+        assertEquals(600, child2.getCheckoutWaitUsHistogram().getQuantile(0.5));
+        assertEquals(900, child2.getCheckoutWaitUsHistogram().getQuantile(0.99));
+    }
+
+    @Test
+    public void testRecordResourceRequestTimeOnce() {
+        ClientSocketStats stats = masterStats;
+        assertEquals(0, stats.resourceRequestCount());
+
+        stats.recordResourceRequestTimeUs(dest1, 100);
+        // check parent
+        assertEquals(1, stats.resourceRequestCount());
+        assertEquals(100, stats.getResourceRequestWaitUsHistogram().getQuantile(0.99));
+
+        // check child
+        ClientSocketStats child = stats.getStatsMap().get(dest1);
+        assertNotNull(child);
+        assertEquals(1, child.resourceRequestCount());
+        assertEquals(100, child.getResourceRequestWaitUsHistogram().getQuantile(0.99));
+    }
+
+    @Test
+    public void testRecordResourceRequestTimeMultiple() {
+        ClientSocketStats stats = masterStats;
+        assertEquals(0, stats.resourceRequestCount());
+
+        stats.recordResourceRequestTimeUs(dest1, 100);
+        stats.recordResourceRequestTimeUs(dest1, 200);
+        stats.recordResourceRequestTimeUs(dest1, 300);
+        stats.recordResourceRequestTimeUs(dest1, 400);
+        stats.recordResourceRequestTimeUs(dest2, 500);
+        stats.recordResourceRequestTimeUs(dest2, 600);
+        stats.recordResourceRequestTimeUs(dest1, 700);
+        stats.recordResourceRequestTimeUs(dest1, 800);
+        stats.recordResourceRequestTimeUs(dest2, 900);
+
+        // check parent
+        assertEquals(9, stats.resourceRequestCount());
+        assertEquals(900, stats.getResourceRequestWaitUsHistogram().getQuantile(0.99));
+
+        // check child1
+        ClientSocketStats child1 = stats.getStatsMap().get(dest1);
+        assertNotNull(child1);
+        assertEquals(6, child1.resourceRequestCount());
+        assertEquals(100, child1.getResourceRequestWaitUsHistogram().getQuantile(0.1));
+        assertEquals(300, child1.getResourceRequestWaitUsHistogram().getQuantile(0.5));
+        assertEquals(800, child1.getResourceRequestWaitUsHistogram().getQuantile(0.99));
+
+        // check child2
+        ClientSocketStats child2 = stats.getStatsMap().get(dest2);
+        assertNotNull(child2);
+        assertEquals(3, child2.resourceRequestCount());
+        assertEquals(500, child2.getResourceRequestWaitUsHistogram().getQuantile(0.1));
+        assertEquals(600, child2.getResourceRequestWaitUsHistogram().getQuantile(0.5));
+        assertEquals(900, child2.getResourceRequestWaitUsHistogram().getQuantile(0.99));
+    }
+
+    @Test
+    public void testRecordCheckoutQueueLengthOnce() {
+        ClientSocketStats stats = masterStats;
+        assertEquals(0, stats.getCheckoutQueueLengthHistogram().getQuantile(0.99));
+
+        stats.recordCheckoutQueueLength(dest1, 50);
+        // check parent
+        assertEquals(50, stats.getCheckoutQueueLengthHistogram().getQuantile(0.99));
+
+        // check child
+        ClientSocketStats child = stats.getStatsMap().get(dest1);
+        assertNotNull(child);
+        assertEquals(50, child.getCheckoutQueueLengthHistogram().getQuantile(0.99));
+    }
+
+    @Test
+    public void testRecordCheckoutQueueLengthMultiple() {
+        ClientSocketStats stats = masterStats;
+        assertEquals(0, stats.getCheckoutQueueLengthHistogram().getQuantile(0.99));
+
+        stats.recordCheckoutQueueLength(dest1, 50);
+        stats.recordCheckoutQueueLength(dest2, 50);
+        stats.recordCheckoutQueueLength(dest1, 100);
+        stats.recordCheckoutQueueLength(dest2, 100);
+        stats.recordCheckoutQueueLength(dest2, 100);
+        stats.recordCheckoutQueueLength(dest1, 50);
+        // check parent
+        assertEquals(50, stats.getCheckoutQueueLengthHistogram().getQuantile(0.01));
+        assertEquals(50, stats.getCheckoutQueueLengthHistogram().getQuantile(0.50));
+        assertEquals(100, stats.getCheckoutQueueLengthHistogram().getQuantile(0.51));
+        assertEquals(100, stats.getCheckoutQueueLengthHistogram().getQuantile(0.99));
+
+        // check child 1
+        ClientSocketStats child1 = stats.getStatsMap().get(dest1);
+        assertNotNull(child1);
+        assertEquals(50, child1.getCheckoutQueueLengthHistogram().getQuantile(0.01));
+        assertEquals(50, child1.getCheckoutQueueLengthHistogram().getQuantile(0.66));
+        assertEquals(100, child1.getCheckoutQueueLengthHistogram().getQuantile(0.67));
+        assertEquals(100, child1.getCheckoutQueueLengthHistogram().getQuantile(0.99));
+        // check child 2
+        ClientSocketStats child2 = stats.getStatsMap().get(dest2);
+        assertNotNull(child2);
+        assertEquals(50, child2.getCheckoutQueueLengthHistogram().getQuantile(0.01));
+        assertEquals(50, child2.getCheckoutQueueLengthHistogram().getQuantile(0.33));
+        assertEquals(100, child2.getCheckoutQueueLengthHistogram().getQuantile(0.34));
+        assertEquals(100, child2.getCheckoutQueueLengthHistogram().getQuantile(0.99));
+    }
+
+    @Test
+    public void testRecordResourceRequestQueueLengthOnce() {
+        ClientSocketStats stats = masterStats;
+        assertEquals(0, stats.getResourceRequestQueueLengthHistogram().getQuantile(0.99));
+
+        stats.recordResourceRequestQueueLength(dest1, 50);
+        // check parent
+        assertEquals(50, stats.getResourceRequestQueueLengthHistogram().getQuantile(0.99));
+
+        // check child
+        ClientSocketStats child = stats.getStatsMap().get(dest1);
+        assertNotNull(child);
+        assertEquals(50, child.getResourceRequestQueueLengthHistogram().getQuantile(0.99));
+    }
+
+    @Test
+    public void testRecordResourceRequestQueueLengthMultiple() {
+        ClientSocketStats stats = masterStats;
+        assertEquals(0, stats.getResourceRequestQueueLengthHistogram().getQuantile(0.99));
+
+        stats.recordResourceRequestQueueLength(dest1, 50);
+        stats.recordResourceRequestQueueLength(dest2, 50);
+        stats.recordResourceRequestQueueLength(dest1, 100);
+        stats.recordResourceRequestQueueLength(dest2, 100);
+        stats.recordResourceRequestQueueLength(dest2, 100);
+        stats.recordResourceRequestQueueLength(dest1, 50);
+        // check parent
+        assertEquals(50, stats.getResourceRequestQueueLengthHistogram().getQuantile(0.01));
+        assertEquals(50, stats.getResourceRequestQueueLengthHistogram().getQuantile(0.50));
+        assertEquals(100, stats.getResourceRequestQueueLengthHistogram().getQuantile(0.51));
+        assertEquals(100, stats.getResourceRequestQueueLengthHistogram().getQuantile(0.99));
+
+        // check child 1
+        ClientSocketStats child1 = stats.getStatsMap().get(dest1);
+        assertNotNull(child1);
+        assertEquals(50, child1.getResourceRequestQueueLengthHistogram().getQuantile(0.01));
+        assertEquals(50, child1.getResourceRequestQueueLengthHistogram().getQuantile(0.66));
+        assertEquals(100, child1.getResourceRequestQueueLengthHistogram().getQuantile(0.67));
+        assertEquals(100, child1.getResourceRequestQueueLengthHistogram().getQuantile(0.99));
+        // check child 2
+        ClientSocketStats child2 = stats.getStatsMap().get(dest2);
+        assertNotNull(child2);
+        assertEquals(50, child2.getResourceRequestQueueLengthHistogram().getQuantile(0.01));
+        assertEquals(50, child2.getResourceRequestQueueLengthHistogram().getQuantile(0.33));
+        assertEquals(100, child2.getResourceRequestQueueLengthHistogram().getQuantile(0.34));
+        assertEquals(100, child2.getResourceRequestQueueLengthHistogram().getQuantile(0.99));
+    }
+
+    @Test
+    public void testSetMonitoringInterval() {
+        ClientSocketStats stats = masterStats;
+        stats.setMonitoringInterval(9);
+        stats.recordCheckoutTimeUs(dest1, 100);
+        stats.recordCheckoutTimeUs(dest1, 200);
+        stats.recordCheckoutTimeUs(dest1, 300);
+        stats.recordCheckoutTimeUs(dest1, 400);
+        stats.recordCheckoutTimeUs(dest1, 500);
+        stats.recordCheckoutTimeUs(dest1, 600);
+        stats.recordCheckoutTimeUs(dest2, 700);
+        stats.recordCheckoutTimeUs(dest2, 800);
+        // before interval based reset
+        // check parent
+        assertEquals(8, stats.getCheckoutCount());
+        assertEquals(450, stats.getAvgCheckoutWaitUs());
+        // check child
+        ClientSocketStats child1 = stats.getStatsMap().get(dest1);
+        ClientSocketStats child2 = stats.getStatsMap().get(dest2);
+        assertEquals(6, child1.getCheckoutCount());
+        assertEquals(2, child2.getCheckoutCount());
+        assertEquals(350, child1.getAvgCheckoutWaitUs());
+        assertEquals(750, child2.getAvgCheckoutWaitUs());
+
+        // after interval based reset
+        stats.recordCheckoutTimeUs(dest2, 900000);
+        // check parent
+        assertEquals(0, stats.getAvgCheckoutWaitUs());
+        assertEquals(0, stats.getCheckoutCount());
+        // check child
+        assertEquals(0, child1.getAvgCheckoutWaitUs());
+        assertEquals(0, child1.getCheckoutCount());
+        assertEquals(0, child2.getAvgCheckoutWaitUs());
+        assertEquals(0, child2.getCheckoutCount());
+    }
+
+    @Test
+    public void concurrentTest() {
+        class TestThread implements Runnable {
+
+            SocketDestination dest;
+
+            public TestThread(SocketDestination dest) {
+                this.dest = dest;
+            }
+
+            @Override
+            public void run() {
+                masterStats.recordCheckoutTimeUs(dest, 1000);
+                masterStats.recordCheckoutTimeUs(dest, 2000);
+                masterStats.recordCheckoutTimeUs(dest, 3000);
+                masterStats.recordCheckoutTimeUs(dest, 4000);
+                masterStats.recordCheckoutTimeUs(dest, 5000);
+                masterStats.recordCheckoutTimeUs(dest, 6000);
+                masterStats.recordCheckoutTimeUs(dest, 7000);
+                masterStats.recordCheckoutTimeUs(dest, 8000);
+                masterStats.recordCheckoutTimeUs(dest, 9000);
+            }
+        }
+        Thread t1 = new Thread(new TestThread(dest1));
+        Thread t1_1 = new Thread(new TestThread(dest1));
+        Thread t1_2 = new Thread(new TestThread(dest1));
+        Thread t1_3 = new Thread(new TestThread(dest1));
+        Thread t2 = new Thread(new TestThread(dest2));
+        Thread t2_1 = new Thread(new TestThread(dest2));
+        t1.start();
+        t2.start();
+        t2_1.start();
+        t1_1.start();
+        t1_2.start();
+        t1_3.start();
+        try {
+            t1.join();
+            t2.join();
+            t2_1.join();
+            t1_1.join();
+            t1_2.join();
+            t1_3.join();
+        } catch(Exception e) {}
+        assertEquals(masterStats.getCheckoutWaitUsHistogram().getQuantile(0.01), 1000);
+        assertEquals(masterStats.getCheckoutWaitUsHistogram().getQuantile(0.5), 5000);
+
+        assertEquals(masterStats.getStatsMap()
+                                .get(dest1)
+                                .getCheckoutWaitUsHistogram()
+                                .getQuantile(0.01),
+                     1000);
+        assertEquals(masterStats.getStatsMap()
+                                .get(dest1)
+                                .getCheckoutWaitUsHistogram()
+                                .getQuantile(0.5),
+                     5000);
+        assertEquals(masterStats.getStatsMap()
+                                .get(dest2)
+                                .getCheckoutWaitUsHistogram()
+                                .getQuantile(0.01),
+                     1000);
+        assertEquals(masterStats.getStatsMap()
+                                .get(dest2)
+                                .getCheckoutWaitUsHistogram()
+                                .getQuantile(0.5),
+                     5000);
+    }
+}
diff --git a/test/unit/voldemort/store/stats/RequestCounterTest.java b/test/unit/voldemort/store/stats/RequestCounterTest.java
new file mode 100644
index 0000000000..3e31723794
--- /dev/null
+++ b/test/unit/voldemort/store/stats/RequestCounterTest.java
@@ -0,0 +1,28 @@
+package voldemort.store.stats;
+
+import org.junit.Before;
+import org.junit.Test;
+
+public class RequestCounterTest {
+
+    private RequestCounter requestCounter;
+
+    @Before
+    public void setUp() {
+        // Initialize the RequestCounter with a histogram
+        requestCounter = new RequestCounter(10000, true);
+    }
+
+    @Test
+    public void test() {
+        long val = 234;
+        requestCounter.addRequest(val);
+    }
+
+    @Test
+    public void testLargeValues() {
+        long val = 999999992342756424l;
+        requestCounter.addRequest(val);
+    }
+
+}
diff --git a/test/unit/voldemort/store/system/AsyncMetadataVersionManagerTest.java b/test/unit/voldemort/store/system/AsyncMetadataVersionManagerTest.java
new file mode 100644
index 0000000000..69a186126b
--- /dev/null
+++ b/test/unit/voldemort/store/system/AsyncMetadataVersionManagerTest.java
@@ -0,0 +1,268 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.system;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.util.Date;
+import java.util.Properties;
+import java.util.concurrent.Callable;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import voldemort.ServerTestUtils;
+import voldemort.TestUtils;
+import voldemort.client.SystemStore;
+import voldemort.client.SystemStoreRepository;
+import voldemort.client.scheduler.AsyncMetadataVersionManager;
+import voldemort.cluster.Cluster;
+import voldemort.common.service.SchedulerService;
+import voldemort.server.VoldemortServer;
+import voldemort.store.socket.SocketStoreFactory;
+import voldemort.store.socket.clientrequest.ClientRequestExecutorPool;
+import voldemort.utils.MetadataVersionStoreUtils;
+import voldemort.utils.SystemTime;
+
+/**
+ * Test class to verify the AsyncMetadataVersionManager
+ * 
+ * @author csoman
+ * 
+ */
+public class AsyncMetadataVersionManagerTest {
+
+    private static String storesXmlfile = "test/common/voldemort/config/stores.xml";
+    String[] bootStrapUrls = null;
+    private SocketStoreFactory socketStoreFactory = new ClientRequestExecutorPool(2,
+                                                                                  10000,
+                                                                                  100000,
+                                                                                  32 * 1024);
+
+    private VoldemortServer[] servers;
+    private Cluster cluster;
+    public static String socketUrl = "";
+    protected final int CLIENT_ZONE_ID = 0;
+    private long newVersion = 0;
+
+    private SystemStore<String, String> sysVersionStore;
+    private SystemStoreRepository repository;
+    private SchedulerService scheduler;
+    private AsyncMetadataVersionManager asyncCheckMetadata;
+    private boolean callbackDone = false;
+    private long updatedClusterVersion;
+    private long updatedStoreVersion;
+
+    @Before
+    public void setUp() throws Exception {
+        cluster = ServerTestUtils.getLocalCluster(2, new int[][] { { 0, 1, 2, 3 }, { 4, 5, 6, 7 } });
+        servers = new VoldemortServer[2];
+
+        servers[0] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
+                                                          ServerTestUtils.createServerConfig(true,
+                                                                                             0,
+                                                                                             TestUtils.createTempDir()
+                                                                                                      .getAbsolutePath(),
+                                                                                             null,
+                                                                                             storesXmlfile,
+                                                                                             new Properties()),
+                                                          cluster);
+        servers[1] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
+                                                          ServerTestUtils.createServerConfig(true,
+                                                                                             1,
+                                                                                             TestUtils.createTempDir()
+                                                                                                      .getAbsolutePath(),
+                                                                                             null,
+                                                                                             storesXmlfile,
+                                                                                             new Properties()),
+                                                          cluster);
+
+        socketUrl = servers[0].getIdentityNode().getSocketUrl().toString();
+
+        bootStrapUrls = new String[1];
+        bootStrapUrls[0] = socketUrl;
+        sysVersionStore = new SystemStore<String, String>(SystemStoreConstants.SystemStoreName.voldsys$_metadata_version_persistence.name(),
+                                                          bootStrapUrls,
+                                                          this.CLIENT_ZONE_ID);
+        repository = new SystemStoreRepository();
+        repository.addSystemStore(sysVersionStore,
+                                  SystemStoreConstants.SystemStoreName.voldsys$_metadata_version_persistence.name());
+        this.scheduler = new SchedulerService(2, SystemTime.INSTANCE, true);
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        servers[0].stop();
+        servers[1].stop();
+    }
+
+    /*
+     * Validates that the AsyncMetadataVersionManager correctly identifies the
+     * version update. This is done by initializing the base metadata version
+     * (for cluster.xml), starting the AsyncMetadataVersionManager and then
+     * updating the version to a new value. For the test to succeed the callback
+     * has to be invoked correctly by the asynchronous manager.
+     */
+    @Test
+    public void testBasicAsyncBehaviour() {
+        String storeVersionKey = "cluster.xml";
+        try {
+            Callable<Void> rebootstrapCallback = new Callable<Void>() {
+
+                public Void call() throws Exception {
+                    callbackForClusterChange();
+                    return null;
+                }
+            };
+
+            // Write a base version of 100
+            Properties versionProps = MetadataVersionStoreUtils.getProperties(this.sysVersionStore);
+            versionProps.setProperty(storeVersionKey, Integer.toString(100));
+            MetadataVersionStoreUtils.setProperties(this.sysVersionStore, versionProps);
+
+            // Giving enough time to complete the above put.
+            Thread.sleep(500);
+
+            // Starting the Version Metadata Manager
+            this.asyncCheckMetadata = new AsyncMetadataVersionManager(this.repository,
+                                                                      rebootstrapCallback,
+                                                                      null);
+            scheduler.schedule(asyncCheckMetadata.getClass().getName(),
+                               asyncCheckMetadata,
+                               new Date(),
+                               500);
+
+            // Wait until the Version Manager is active
+            int maxRetries = 0;
+            while(maxRetries < 3 && !asyncCheckMetadata.isActive) {
+                Thread.sleep(500);
+                maxRetries++;
+            }
+
+            // Updating the version metadata here for the Version Metadata
+            // Manager to detect
+            this.newVersion = 101;
+            System.err.println("Incrementing the version for : " + storeVersionKey);
+            versionProps.setProperty(storeVersionKey, Long.toString(this.newVersion));
+            MetadataVersionStoreUtils.setProperties(this.sysVersionStore, versionProps);
+
+            maxRetries = 0;
+            while(maxRetries < 3 && !callbackDone) {
+                Thread.sleep(2000);
+                maxRetries++;
+            }
+
+            assertEquals(this.updatedClusterVersion, this.newVersion);
+        } catch(Exception e) {
+            e.printStackTrace();
+            fail("Failed to start the Metadata Version Manager : " + e.getMessage());
+        }
+    }
+
+    /*
+     * Validates that the AsyncMetadataVersionManager correctly identifies the
+     * store specific version update. This is done by initializing the base
+     * metadata version (for a particular store), starting the
+     * AsyncMetadataVersionManager and then updating the version to a new value.
+     * For the test to succeed the callback has to be invoked correctly by the
+     * asynchronous manager.
+     */
+    @Test
+    public void testStoreDefinitionChangeTracker() {
+        String storeVersionKey = "users";
+        Callable<Void> rebootstrapCallback = new Callable<Void>() {
+
+            public Void call() throws Exception {
+                callbackForStoreChange();
+                return null;
+            }
+        };
+
+        try {
+            // Write a base version of 100
+            Properties versionProps = MetadataVersionStoreUtils.getProperties(this.sysVersionStore);
+            versionProps.setProperty(storeVersionKey, Integer.toString(100));
+            MetadataVersionStoreUtils.setProperties(this.sysVersionStore, versionProps);
+
+            // Giving enough time to complete the above put.
+            Thread.sleep(500);
+
+            // Starting the Version Metadata Manager
+            this.asyncCheckMetadata = new AsyncMetadataVersionManager(this.repository,
+                                                                      rebootstrapCallback,
+                                                                      storeVersionKey);
+            scheduler.schedule(asyncCheckMetadata.getClass().getName(),
+                               asyncCheckMetadata,
+                               new Date(),
+                               500);
+
+            // Wait until the Version Manager is active
+            int maxRetries = 0;
+            while(maxRetries < 3 && !asyncCheckMetadata.isActive) {
+                Thread.sleep(500);
+                maxRetries++;
+            }
+
+            // Updating the version metadata here for the Version Metadata
+            // Manager to detect
+            this.newVersion = 101;
+            System.err.println("Incrementing the version for : " + storeVersionKey);
+            versionProps.setProperty(storeVersionKey, Long.toString(this.newVersion));
+            MetadataVersionStoreUtils.setProperties(this.sysVersionStore, versionProps);
+
+            maxRetries = 0;
+            while(maxRetries < 3 && !callbackDone) {
+                Thread.sleep(2000);
+                maxRetries++;
+            }
+
+            assertEquals(false, (this.updatedStoreVersion == 0));
+            assertEquals(this.updatedStoreVersion, this.newVersion);
+        } catch(Exception e) {
+            e.printStackTrace();
+            fail("Failed to start the Metadata Version Manager : " + e.getMessage());
+        }
+    }
+
+    private void callbackForClusterChange() {
+        try {
+            Long clusterVersion = this.asyncCheckMetadata.getClusterMetadataVersion();
+            if(clusterVersion != null) {
+                this.updatedClusterVersion = clusterVersion;
+            }
+        } catch(Exception e) {
+            fail("Error in updating cluster.xml version: " + e.getMessage());
+        } finally {
+            this.callbackDone = true;
+        }
+    }
+
+    private void callbackForStoreChange() {
+        try {
+            Long storeVersion = this.asyncCheckMetadata.getStoreMetadataVersion();
+            if(storeVersion != null) {
+                this.updatedStoreVersion = storeVersion;
+            }
+        } catch(Exception e) {
+            fail("Error in updating store version: " + e.getMessage());
+        } finally {
+            this.callbackDone = true;
+        }
+    }
+}
diff --git a/test/unit/voldemort/store/system/SystemStoreTest.java b/test/unit/voldemort/store/system/SystemStoreTest.java
new file mode 100644
index 0000000000..a2ca3eef0b
--- /dev/null
+++ b/test/unit/voldemort/store/system/SystemStoreTest.java
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2008-2009 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.store.system;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.util.Properties;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import voldemort.ServerTestUtils;
+import voldemort.TestUtils;
+import voldemort.client.AbstractStoreClientFactory;
+import voldemort.client.ClientConfig;
+import voldemort.client.SocketStoreClientFactory;
+import voldemort.client.SystemStore;
+import voldemort.cluster.Cluster;
+import voldemort.server.VoldemortServer;
+import voldemort.store.metadata.MetadataStore;
+import voldemort.store.socket.SocketStoreFactory;
+import voldemort.store.socket.clientrequest.ClientRequestExecutorPool;
+
+/**
+ * Test class to verify the SystemStore (used to interact with the system
+ * metadata stores managed by the cluster).
+ * 
+ * @author csoman
+ * 
+ */
+public class SystemStoreTest {
+
+    private static String storesXmlfile = "test/common/voldemort/config/stores.xml";
+    String[] bootStrapUrls = null;
+    private String clusterXml;
+    private SocketStoreFactory socketStoreFactory = new ClientRequestExecutorPool(2,
+                                                                                  10000,
+                                                                                  100000,
+                                                                                  32 * 1024);
+
+    private VoldemortServer[] servers;
+    private Cluster cluster;
+    public static String socketUrl = "";
+    protected final int CLIENT_ZONE_ID = 0;
+
+    @Before
+    public void setUp() throws Exception {
+        cluster = ServerTestUtils.getLocalCluster(2, new int[][] { { 0, 1, 2, 3 }, { 4, 5, 6, 7 } });
+        servers = new VoldemortServer[2];
+
+        servers[0] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
+                                                          ServerTestUtils.createServerConfig(true,
+                                                                                             0,
+                                                                                             TestUtils.createTempDir()
+                                                                                                      .getAbsolutePath(),
+                                                                                             null,
+                                                                                             storesXmlfile,
+                                                                                             new Properties()),
+                                                          cluster);
+        servers[1] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
+                                                          ServerTestUtils.createServerConfig(true,
+                                                                                             1,
+                                                                                             TestUtils.createTempDir()
+                                                                                                      .getAbsolutePath(),
+                                                                                             null,
+                                                                                             storesXmlfile,
+                                                                                             new Properties()),
+                                                          cluster);
+
+        socketUrl = servers[0].getIdentityNode().getSocketUrl().toString();
+
+        ClientConfig clientConfig = new ClientConfig().setMaxTotalConnections(4)
+                                                      .setMaxConnectionsPerNode(4)
+                                                      .setBootstrapUrls(socketUrl);
+
+        SocketStoreClientFactory socketFactory = new SocketStoreClientFactory(clientConfig);
+        bootStrapUrls = new String[1];
+        bootStrapUrls[0] = socketUrl;
+        clusterXml = ((AbstractStoreClientFactory) socketFactory).bootstrapMetadataWithRetries(MetadataStore.CLUSTER_KEY);
+
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        servers[0].stop();
+        servers[1].stop();
+    }
+
+    @Test
+    public void testBasicStore() {
+        try {
+            SystemStore<String, String> sysVersionStore = new SystemStore<String, String>(SystemStoreConstants.SystemStoreName.voldsys$_metadata_version_persistence.name(),
+                                                                                          bootStrapUrls,
+                                                                                          this.CLIENT_ZONE_ID);
+            long storesVersion = 1;
+            sysVersionStore.putSysStore("stores.xml", Long.toString(storesVersion));
+            long version = Long.parseLong(sysVersionStore.getValueSysStore("stores.xml"));
+            assertEquals("Received incorrect version from the voldsys$_metadata_version system store",
+                         storesVersion,
+                         version);
+        } catch(Exception e) {
+            fail("Failed to create the default System Store : " + e.getMessage());
+        }
+    }
+
+    @Test
+    public void testCustomClusterXmlStore() {
+        try {
+            SystemStore<String, String> sysVersionStore = new SystemStore<String, String>(SystemStoreConstants.SystemStoreName.voldsys$_metadata_version_persistence.name(),
+                                                                                          bootStrapUrls,
+                                                                                          this.CLIENT_ZONE_ID,
+                                                                                          this.clusterXml,
+                                                                                          null);
+            long storesVersion = 1;
+            sysVersionStore.putSysStore("stores.xml", Long.toString(storesVersion));
+            long version = Long.parseLong(sysVersionStore.getValueSysStore("stores.xml"));
+            assertEquals("Received incorrect version from the voldsys$_metadata_version system store",
+                         storesVersion,
+                         version);
+        } catch(Exception e) {
+            fail("Failed to create System Store with custom cluster Xml: " + e.getMessage());
+        }
+    }
+
+    @Test
+    public void testIllegalSystemStore() {
+        try {
+            @SuppressWarnings("unused")
+            SystemStore<String, Long> sysVersionStore = new SystemStore<String, Long>("test-store",
+                                                                                      bootStrapUrls,
+                                                                                      this.CLIENT_ZONE_ID,
+                                                                                      this.clusterXml,
+                                                                                      null);
+            fail("Should not execute this. We can only connect to system store with a 'voldsys$' prefix.");
+        } catch(Exception e) {
+            // This is fine.
+        }
+    }
+}
diff --git a/test/unit/voldemort/utils/ServerTestUtilsTest.java b/test/unit/voldemort/utils/ServerTestUtilsTest.java
new file mode 100644
index 0000000000..5608a6e605
--- /dev/null
+++ b/test/unit/voldemort/utils/ServerTestUtilsTest.java
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2008-2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package voldemort.utils;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.nio.channels.ServerSocketChannel;
+import java.util.Properties;
+
+import org.junit.Test;
+
+import voldemort.ServerTestUtils;
+import voldemort.TestUtils;
+import voldemort.cluster.Cluster;
+import voldemort.server.VoldemortServer;
+import voldemort.store.socket.SocketStoreFactory;
+import voldemort.store.socket.clientrequest.ClientRequestExecutorPool;
+
+public class ServerTestUtilsTest {
+
+    private static String storesXmlfile = "test/common/voldemort/config/stores.xml";
+    private SocketStoreFactory socketStoreFactory = new ClientRequestExecutorPool(2,
+                                                                                  10000,
+                                                                                  100000,
+                                                                                  32 * 1024);
+
+    public void testStartVoldemortCluster() throws IOException {
+        int numServers = 8;
+        VoldemortServer[] servers = new VoldemortServer[numServers];
+        int partitionMap[][] = { { 0 }, { 1 }, { 2 }, { 3 }, { 4 }, { 5 }, { 6 }, { 7 } };
+        ServerTestUtils.startVoldemortCluster(numServers,
+                                              servers,
+                                              partitionMap,
+                                              socketStoreFactory,
+                                              true,
+                                              null,
+                                              storesXmlfile,
+                                              new Properties());
+    }
+
+    @Test
+    public void startMultipleVoldemortClusters() throws IOException {
+        for(int i = 0; i < 10; i++) {
+            testStartVoldemortCluster();
+        }
+    }
+
+    // **********************************************************************
+    // * START : TESTS THAT HELPED FIND ROOT CAUSE OF BindException PROBLEM *
+
+    // @Test
+    public void startMultipleVoldemortServers() throws IOException {
+        Cluster cluster = ServerTestUtils.getLocalCluster(8, new int[][] { { 0 }, { 1 }, { 2 },
+                { 3 }, { 4 }, { 5 }, { 6 }, { 7 } });
+
+        VoldemortServer[] servers = new VoldemortServer[8];
+
+        for(int i = 0; i < 8; i++) {
+            servers[i] = ServerTestUtils.startVoldemortServer(socketStoreFactory,
+                                                              ServerTestUtils.createServerConfig(true,
+                                                                                                 i,
+                                                                                                 TestUtils.createTempDir()
+                                                                                                          .getAbsolutePath(),
+                                                                                                 null,
+                                                                                                 storesXmlfile,
+                                                                                                 new Properties()),
+                                                              cluster);
+        }
+        assertTrue(true);
+    }
+
+    // @Test
+    public void startMultipleVoldemortServers10() {
+        for(int i = 0; i < 10; i++) {
+            boolean started = false;
+            boolean caught = false;
+            while(!started) {
+                try {
+                    startMultipleVoldemortServers();
+                    started = true;
+                } catch(IOException ioe) {
+                    System.err.println("CAUGHT BIND ERROR! Trying again...");
+                    ioe.printStackTrace();
+                    caught = true;
+                }
+            }
+            assertFalse(caught);
+        }
+    }
+
+    // @Test
+    public void testFindFreePort() throws Exception {
+        ServerSocketChannel serverSocketChannel;
+        try {
+            serverSocketChannel = ServerSocketChannel.open();
+        } catch(IOException ioe) {
+            ioe.printStackTrace();
+            assertTrue(false);
+            return;
+        }
+
+        int port = ServerTestUtils.findFreePort();
+
+        try {
+            serverSocketChannel.socket().bind(new InetSocketAddress(port));
+        } catch(IOException ioe) {
+            ioe.printStackTrace();
+            assertTrue(false);
+            return;
+        }
+
+        assertTrue(true);
+    }
+
+    // @Test
+    public void testFindFreePort1000() throws Exception {
+        for(int i = 0; i < 1000; i++) {
+            testFindFreePort();
+        }
+    }
+
+    // @Test
+    public void testFindFreePorts() throws Exception {
+        int numPorts = 25000;
+        ServerSocketChannel serverSocketChannel[] = new ServerSocketChannel[numPorts];
+        int ports[] = ServerTestUtils.findFreePorts(numPorts);
+
+        for(int i = 0; i < numPorts; i++) {
+            boolean bound = false;
+            while(!bound) {
+                try {
+                    serverSocketChannel[i] = ServerSocketChannel.open();
+                    serverSocketChannel[i].socket().bind(new InetSocketAddress(ports[i]));
+                    serverSocketChannel[i].socket().setReuseAddress(true);
+                    bound = true;
+                } catch(IOException ioe) {
+                    System.err.println("Attempt: " + i + ", port: " + ports[i]);
+                    ioe.printStackTrace();
+                    Thread.sleep(10);
+                }
+            }
+        }
+
+        for(int i = 0; i < numPorts; i++) {
+            try {
+                serverSocketChannel[i].socket().close();
+            } catch(IOException ioe) {
+                System.err.println("Attempt: " + i + ", port: " + ports[i]);
+                ioe.printStackTrace();
+                assertTrue(false);
+                return;
+            }
+        }
+
+        assertTrue(true);
+    }
+
+    // @Test
+    public void testFindFreePorts100() throws Exception {
+        for(int i = 0; i < 100; i++) {
+            System.out.println("testFindFreePorts100: " + i);
+            testFindFreePorts();
+        }
+    }
+
+    // ** END : TESTS THAT HELPED FIND ROOT CAUSE OF BindException PROBLEM **
+    // **********************************************************************
+}
diff --git a/test/unit/voldemort/utils/pool/KeyedResourcePoolTest.java b/test/unit/voldemort/utils/pool/KeyedResourcePoolTest.java
index f5b573dcae..cc6625d857 100644
--- a/test/unit/voldemort/utils/pool/KeyedResourcePoolTest.java
+++ b/test/unit/voldemort/utils/pool/KeyedResourcePoolTest.java
@@ -1,38 +1,64 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
 package voldemort.utils.pool;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.LinkedList;
+import java.util.Queue;
+import java.util.Random;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import junit.framework.TestCase;
+import org.junit.Before;
+import org.junit.Test;
+
 import voldemort.utils.Time;
 
-public class KeyedResourcePoolTest extends TestCase {
+public class KeyedResourcePoolTest {
 
-    private static int POOL_SIZE = 5;
-    private static long TIMEOUT_MS = 100;
-    private static int MAX_ATTEMPTS = 10;
+    protected static int POOL_SIZE = 5;
+    protected static long TIMEOUT_MS = 500;
 
-    private TestResourceFactory factory;
-    private KeyedResourcePool<String, TestResource> pool;
-    private ResourcePoolConfig config;
+    protected TestResourceFactory factory;
+    protected KeyedResourcePool<String, TestResource> pool;
+    protected ResourcePoolConfig config;
 
-    @Override
+    @Before
     public void setUp() {
         factory = new TestResourceFactory();
         config = new ResourcePoolConfig().setMaxPoolSize(POOL_SIZE)
-                                         .setTimeout(TIMEOUT_MS, TimeUnit.MILLISECONDS)
-                                         .setMaxInvalidAttempts(MAX_ATTEMPTS);
+                                         .setTimeout(TIMEOUT_MS, TimeUnit.MILLISECONDS);
         this.pool = new KeyedResourcePool<String, TestResource>(factory, config);
     }
 
+    @Test
     public void testResourcePoolConfigTimeout() {
         // Issue 343
         assertEquals(config.getTimeout(TimeUnit.MILLISECONDS), TIMEOUT_MS);
         assertEquals(config.getTimeout(TimeUnit.NANOSECONDS), TIMEOUT_MS * Time.NS_PER_MS);
     }
 
+    @Test
     public void testPoolingOccurs() throws Exception {
         TestResource r1 = this.pool.checkout("a");
         this.pool.checkin("a", r1);
@@ -41,6 +67,7 @@ public void testPoolingOccurs() throws Exception {
                    r1 == r2);
     }
 
+    @Test
     public void testFullPoolBlocks() throws Exception {
         for(int i = 0; i < POOL_SIZE; i++)
             this.pool.checkout("a");
@@ -53,42 +80,171 @@ public void testFullPoolBlocks() throws Exception {
         }
     }
 
-    public void testExceptions() throws Exception {
-        // we should start with an empty pool
-        assertEquals(0, this.pool.getTotalResourceCount());
-
-        Exception toThrow = new Exception("An exception!");
+    @Test
+    public void testExceptionOnDestroy() throws Exception {
+        assertTrue("POOL_SIZE is not big enough", POOL_SIZE >= 2);
 
-        // test exception on destroy
-        TestResource checkedOut = this.pool.checkout("a");
-        assertEquals(1, this.pool.getTotalResourceCount());
-        assertEquals(0, this.pool.getCheckedInResourceCount());
+        Exception toThrow = new Exception("An exception! (This exception is expected and so will print out some output to stderr.)");
         this.factory.setDestroyException(toThrow);
+
+        assertEquals(0, this.pool.getTotalResourceCount());
         try {
+            TestResource checkedOut = this.pool.checkout("a");
+            assertFalse(checkedOut.isDestroyed());
+            assertEquals(1, this.factory.getCreated());
+            assertEquals(1, this.pool.getTotalResourceCount());
+            assertEquals(0, this.pool.getCheckedInResourceCount());
+
             this.pool.checkin("a", checkedOut);
-            // checking out again should force destroy
+            assertEquals(1, this.factory.getCreated());
+            assertEquals(1, this.pool.getTotalResourceCount());
+            assertEquals(1, this.pool.getCheckedInResourceCount());
+
+            this.pool.checkout("a");
+            assertEquals(2, this.factory.getCreated());
+            assertEquals(2, this.pool.getTotalResourceCount());
+            assertEquals(1, this.pool.getCheckedInResourceCount());
+
+            for(int i = 0; i < POOL_SIZE - 1; i++) {
+                checkedOut = this.pool.checkout("a");
+                assertFalse(checkedOut.isDestroyed());
+            }
+            assertEquals(POOL_SIZE, this.factory.getCreated());
+            assertEquals(POOL_SIZE, this.pool.getTotalResourceCount());
+            assertEquals(0, this.pool.getCheckedInResourceCount());
+            assertEquals(0, this.factory.getDestroyed());
+
+            checkedOut.invalidate();
+            try {
+                // pool.checkin should catch and print out the destroy
+                // exception.
+                this.pool.checkin("a", checkedOut);
+            } catch(Exception caught) {
+                fail("No exception expected.");
+            }
+            assertEquals(POOL_SIZE - 1, this.pool.getTotalResourceCount());
+            assertEquals(0, this.pool.getCheckedInResourceCount());
+            assertEquals(0, this.factory.getDestroyed());
+
             this.pool.checkout("a");
-            assertTrue(checkedOut.isDestroyed());
+            assertEquals(POOL_SIZE + 1, this.factory.getCreated());
+            assertEquals(POOL_SIZE, this.pool.getTotalResourceCount());
+            assertEquals(0, this.pool.getCheckedInResourceCount());
+            assertEquals(0, this.factory.getDestroyed());
         } catch(Exception caught) {
             fail("No exception expected.");
         }
-        assertEquals(1, this.pool.getTotalResourceCount());
-        assertEquals(0, this.pool.getCheckedInResourceCount());
+    }
+
+    @Test
+    public void testExceptionOnCreate() throws Exception {
+        Exception toThrow = new Exception("An exception!");
 
+        assertEquals(0, this.pool.getTotalResourceCount());
         this.factory.setCreateException(toThrow);
         try {
             this.pool.checkout("b");
-            fail("Excpected exception!");
+            fail("Expected exception!");
         } catch(Exception caught) {
-            assertEquals("The exception thrown by the factory should propage to the caller.",
+            assertEquals("The exception thrown by the factory should propagate to the caller.",
                          toThrow,
                          caught);
         }
-        // failed checkout shouldn't effect count
+        // failed checkout shouldn't affect count
+        assertEquals(0, this.pool.getTotalResourceCount());
+        assertEquals(0, this.pool.getCheckedInResourceCount());
+    }
+
+    // NEGATIVE Test. See comment in line.
+    @Test
+    public void repeatedCheckins() throws Exception {
+        assertEquals(0, this.pool.getTotalResourceCount());
+
+        TestResource resource = this.pool.checkout("a");
+        assertEquals(1, this.factory.getCreated());
         assertEquals(1, this.pool.getTotalResourceCount());
         assertEquals(0, this.pool.getCheckedInResourceCount());
+
+        this.pool.checkin("a", resource);
+        assertEquals(1, this.factory.getCreated());
+        assertEquals(1, this.pool.getTotalResourceCount());
+        assertEquals(1, this.pool.getCheckedInResourceCount());
+
+        this.pool.checkin("a", resource);
+        assertEquals(1, this.factory.getCreated());
+        assertEquals(1, this.pool.getTotalResourceCount());
+        // KeyedResourcePool does not protect against repeated checkins. It
+        // Should. If it did, then the commented out test below would be
+        // correct.
+        assertEquals(2, this.pool.getCheckedInResourceCount());
+        // assertEquals(1, this.pool.getCheckedInResourceCount());
     }
 
+    // NEGATIVE Test. See comment in line.
+    @Test
+    public void testExceptionOnFullCheckin() throws Exception {
+        assertEquals(0, this.pool.getTotalResourceCount());
+
+        Queue<TestResource> resources = new LinkedList<TestResource>();
+        for(int i = 0; i < POOL_SIZE; i++) {
+            TestResource resource = this.pool.checkout("a");
+            resources.add(resource);
+        }
+        assertEquals(POOL_SIZE, this.pool.getTotalResourceCount());
+
+        for(int i = 0; i < POOL_SIZE; i++) {
+            this.pool.checkin("a", resources.poll());
+        }
+        assertEquals(POOL_SIZE, this.pool.getTotalResourceCount());
+
+        TestResource extraResource = this.factory.create("a");
+        try {
+            this.pool.checkin("a", extraResource);
+            fail("Checking in an extra resource should throw an exception.");
+        } catch(IllegalStateException ise) {
+            // this is good
+        }
+
+        // KeyedResourcePool does not protect against repeated or extraneous
+        // checkins. If an extraneous checkin occurs, then the checked in
+        // resource is destroyed and the size of the resource pool is reduced by
+        // one (even though it should not be in this exceptional case).
+        assertEquals(POOL_SIZE - 1, this.pool.getTotalResourceCount());
+        // assertEquals(POOL_SIZE, this.pool.getTotalResourceCount());
+    }
+
+    // NEGATIVE Test. See comment in line.
+    @Test
+    public void testCheckinExtraneousResource() throws Exception {
+        assertEquals(0, this.pool.getTotalResourceCount());
+
+        TestResource resource = this.pool.checkout("a");
+        this.pool.checkin("a", resource);
+
+        TestResource extraResource = this.factory.create("a");
+        // KeyedResourcePool should not permit random resources to be checked
+        // in. Until it protects against arbitrary resources being checked in,
+        // it is possible to checkin an extraneous resource.
+        this.pool.checkin("a", extraResource);
+        assertEquals(1, this.pool.getTotalResourceCount());
+        assertEquals(2, this.pool.getCheckedInResourceCount());
+    }
+
+    // NEGATIVE Test. See comment in line.
+    @Test
+    public void testNeverCheckin() throws Exception {
+        assertEquals(0, this.pool.getTotalResourceCount());
+
+        {
+            this.pool.checkout("a");
+        }
+        // KeyedResourcePool does not protect against resources being checked
+        // out and never checked back in (or destroyed).
+        assertEquals(1, this.pool.getTotalResourceCount());
+        assertEquals(0, this.pool.getCheckedInResourceCount());
+    }
+
+    @Test
     public void testInvalidIsDestroyed() throws Exception {
         TestResource r1 = this.pool.checkout("a");
         r1.invalidate();
@@ -96,8 +252,10 @@ public void testInvalidIsDestroyed() throws Exception {
         TestResource r2 = this.pool.checkout("a");
         assertTrue("Invalid objects should be destroyed.", r1 != r2);
         assertTrue("Invalid objects should be destroyed.", r1.isDestroyed());
+        assertEquals(1, this.factory.getDestroyed());
     }
 
+    @Test
     public void testMaxInvalidCreations() throws Exception {
         this.factory.setCreatedValid(false);
         try {
@@ -108,7 +266,105 @@ public void testMaxInvalidCreations() throws Exception {
         }
     }
 
-    private static class TestResource {
+    // This method was helpful when developing contendForResources
+    public void printStats(String key) {
+        System.err.println("");
+        System.err.println("getCreated: " + this.factory.getCreated());
+        System.err.println("getDestroyed: " + this.factory.getDestroyed());
+        System.err.println("getTotalResourceCount(key): " + this.pool.getTotalResourceCount(key));
+        System.err.println("getTotalResourceCount(): " + this.pool.getTotalResourceCount());
+        System.err.println("getCheckedInResourcesCount(key): "
+                           + this.pool.getCheckedInResourcesCount(key));
+        System.err.println("getCheckedInResourceCount(): " + this.pool.getCheckedInResourceCount());
+    }
+
+    @Test
+    public void contendForResources() throws Exception {
+        int numCheckers = POOL_SIZE * 2;
+        int numChecks = 10 * 1000;
+        String key = "Key";
+        float invalidationRate = (float) 0.25;
+        CountDownLatch waitForThreads = new CountDownLatch(numCheckers);
+        CountDownLatch waitForCheckers = new CountDownLatch(numCheckers);
+        for(int i = 0; i < numCheckers; ++i) {
+            new Thread(new Checkers(waitForThreads,
+                                    waitForCheckers,
+                                    key,
+                                    numChecks,
+                                    invalidationRate)).start();
+        }
+
+        try {
+            waitForCheckers.await();
+            assertEquals(POOL_SIZE, this.pool.getTotalResourceCount());
+            assertEquals(POOL_SIZE, this.pool.getCheckedInResourceCount());
+        } catch(InterruptedException e) {
+            e.printStackTrace();
+        }
+
+    }
+
+    public class Checkers implements Runnable {
+
+        private final CountDownLatch startSignal;
+        private final CountDownLatch doneSignal;
+
+        private final String key;
+        private final int checks;
+
+        private Random random;
+        private float invalidationRate;
+
+        Checkers(CountDownLatch startSignal,
+                 CountDownLatch doneSignal,
+                 String key,
+                 int checks,
+                 float invalidationRate) {
+            this.startSignal = startSignal;
+            this.doneSignal = doneSignal;
+
+            this.key = key;
+            this.checks = checks;
+
+            this.random = new Random();
+            this.invalidationRate = invalidationRate;
+        }
+
+        public void run() {
+            startSignal.countDown();
+            try {
+                startSignal.await();
+            } catch(InterruptedException e) {
+                e.printStackTrace();
+            }
+
+            try {
+                TestResource tr = null;
+                for(int i = 0; i < checks; ++i) {
+                    tr = pool.checkout(key);
+                    assertTrue(tr.isValid());
+
+                    // Invalid some resources (except on last checkin)
+                    float f = random.nextFloat();
+                    if(f < invalidationRate && i != checks - 1) {
+                        tr.invalidate();
+                    }
+                    Thread.yield();
+
+                    pool.checkin(key, tr);
+                    Thread.yield();
+
+                    // if(i % 1000 == 0) { printStats(key); }
+                }
+            } catch(Exception e) {
+                System.err.println(e.toString());
+                fail(e.toString());
+            }
+            doneSignal.countDown();
+        }
+    }
+
+    protected static class TestResource {
 
         private String value;
         private AtomicBoolean isValid;
@@ -117,7 +373,7 @@ private static class TestResource {
         public TestResource(String value) {
             this.value = value;
             this.isValid = new AtomicBoolean(true);
-            this.isDestroyed = new AtomicBoolean(true);
+            this.isDestroyed = new AtomicBoolean(false);
         }
 
         public boolean isValid() {
@@ -143,7 +399,7 @@ public String toString() {
 
     }
 
-    private static class TestResourceFactory implements ResourceFactory<String, TestResource> {
+    protected static class TestResourceFactory implements ResourceFactory<String, TestResource> {
 
         private final AtomicInteger created = new AtomicInteger(0);
         private final AtomicInteger destroyed = new AtomicInteger(0);
@@ -171,12 +427,10 @@ public boolean validate(String key, TestResource value) {
             return value.isValid();
         }
 
-        @SuppressWarnings("unused")
         public int getCreated() {
             return this.created.get();
         }
 
-        @SuppressWarnings("unused")
         public int getDestroyed() {
             return this.destroyed.get();
         }
diff --git a/test/unit/voldemort/utils/pool/QueuedKeyedResourcePoolTest.java b/test/unit/voldemort/utils/pool/QueuedKeyedResourcePoolTest.java
new file mode 100644
index 0000000000..ebc9d55706
--- /dev/null
+++ b/test/unit/voldemort/utils/pool/QueuedKeyedResourcePoolTest.java
@@ -0,0 +1,483 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package voldemort.utils.pool;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.LinkedList;
+import java.util.Queue;
+import java.util.Random;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.junit.Before;
+import org.junit.Test;
+
+public class QueuedKeyedResourcePoolTest extends KeyedResourcePoolTest {
+
+    protected QueuedKeyedResourcePool<String, TestResource> queuedPool;
+
+    @Before
+    @Override
+    public void setUp() {
+        super.setUp();
+        this.queuedPool = new QueuedKeyedResourcePool<String, TestResource>(factory, config);
+        super.pool = queuedPool;
+
+        TestResourceRequest.usedResourceCount.set(0);
+        TestResourceRequest.handledTimeoutCount.set(0);
+        TestResourceRequest.handledExceptionCount.set(0);
+    }
+
+    @Test
+    public void testQueuingOccurs() throws Exception {
+        Queue<TestResource> resources = new LinkedList<TestResource>();
+        Queue<TestResourceRequest> resourceRequests = new LinkedList<TestResourceRequest>();
+
+        long deadlineNs = System.nanoTime()
+                          + TimeUnit.MILLISECONDS.toNanos(KeyedResourcePoolTest.TIMEOUT_MS);
+        for(int i = 0; i < POOL_SIZE * 2; i++) {
+            resourceRequests.add(new TestResourceRequest(deadlineNs, resources));
+        }
+
+        assertEquals(0, this.factory.getCreated());
+        assertEquals(0, this.queuedPool.getTotalResourceCount());
+        assertEquals(0, this.queuedPool.getCheckedInResourceCount());
+        assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+        assertEquals(0, resources.size());
+
+        // Submit initial POOL_SIZE requests
+        for(int i = 0; i < POOL_SIZE; i++) {
+            this.queuedPool.registerResourceRequest("a", resourceRequests.poll());
+        }
+
+        // Confirm initial requests were handled in nonblocking manner
+        assertEquals(POOL_SIZE, this.factory.getCreated());
+        assertEquals(POOL_SIZE, this.queuedPool.getTotalResourceCount());
+        assertEquals(0, this.queuedPool.getCheckedInResourceCount());
+        assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+        assertEquals(POOL_SIZE, resources.size());
+
+        // Submit additional POOL_SIZE requests
+        for(int i = 0; i < POOL_SIZE; i++) {
+            this.queuedPool.registerResourceRequest("a", resourceRequests.poll());
+        }
+
+        // Confirm additional requests are queued
+        assertEquals(POOL_SIZE, this.factory.getCreated());
+        assertEquals(POOL_SIZE, this.queuedPool.getTotalResourceCount());
+        assertEquals(0, this.queuedPool.getCheckedInResourceCount());
+        assertEquals(POOL_SIZE, this.queuedPool.getRegisteredResourceRequestCount());
+        assertEquals(POOL_SIZE, resources.size());
+
+        // Check in initial resources and confirm that this consumes queued
+        // resource requests
+        for(int i = 0; i < POOL_SIZE; i++) {
+            this.queuedPool.checkin("a", resources.poll());
+
+            assertEquals(POOL_SIZE, this.factory.getCreated());
+            assertEquals(POOL_SIZE, this.queuedPool.getTotalResourceCount());
+            assertEquals(0, this.queuedPool.getCheckedInResourceCount());
+            assertEquals(POOL_SIZE - i - 1, this.queuedPool.getRegisteredResourceRequestCount());
+            assertEquals(POOL_SIZE, resources.size());
+        }
+
+        // Check in additional resources
+        for(int i = 0; i < POOL_SIZE; i++) {
+            this.queuedPool.checkin("a", resources.poll());
+
+            assertEquals(POOL_SIZE, this.factory.getCreated());
+            assertEquals(POOL_SIZE, this.queuedPool.getTotalResourceCount());
+            assertEquals(i + 1, this.queuedPool.getCheckedInResourceCount());
+            assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+            assertEquals(POOL_SIZE - i - 1, resources.size());
+        }
+
+        assertEquals(POOL_SIZE, this.factory.getCreated());
+        assertEquals(POOL_SIZE, this.queuedPool.getTotalResourceCount());
+        assertEquals(POOL_SIZE, this.queuedPool.getCheckedInResourceCount());
+        assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+        assertEquals(0, resources.size());
+
+        assertEquals(POOL_SIZE * 2, TestResourceRequest.usedResourceCount.get());
+        assertEquals(0, TestResourceRequest.handledTimeoutCount.get());
+        assertEquals(0, TestResourceRequest.handledExceptionCount.get());
+    }
+
+    @Test
+    public void testQueuingStats() throws Exception {
+        Queue<TestResource> resources = new LinkedList<TestResource>();
+        Queue<TestResourceRequest> resourceRequests = new LinkedList<TestResourceRequest>();
+
+        long deadlineNs = System.nanoTime()
+                          + TimeUnit.MILLISECONDS.toNanos(KeyedResourcePoolTest.TIMEOUT_MS);
+        for(int i = 0; i < POOL_SIZE * 10001; i++) {
+            resourceRequests.add(new TestResourceRequest(deadlineNs, resources));
+        }
+
+        assertEquals(0, this.factory.getCreated());
+        assertEquals(0, this.queuedPool.getTotalResourceCount());
+        assertEquals(0, this.queuedPool.getCheckedInResourceCount());
+        assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+        assertEquals(0, resources.size());
+
+        // Submit initial POOL_SIZE requests
+        for(int i = 0; i < POOL_SIZE; i++) {
+            this.queuedPool.registerResourceRequest("a", resourceRequests.poll());
+        }
+
+        // Confirm initial requests were handled in nonblocking manner
+        assertEquals(POOL_SIZE, this.factory.getCreated());
+        assertEquals(POOL_SIZE, this.queuedPool.getTotalResourceCount());
+        assertEquals(0, this.queuedPool.getCheckedInResourceCount());
+        assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+        assertEquals(POOL_SIZE, resources.size());
+
+        // Register five order of magnitude more resource requests.
+        for(int i = 0; i < POOL_SIZE * 10000; i++) {
+            this.queuedPool.registerResourceRequest("a", resourceRequests.poll());
+        }
+
+        long startNs = System.nanoTime();
+        assertEquals(POOL_SIZE * 10000, this.queuedPool.getRegisteredResourceRequestCount());
+        long durationNs = System.nanoTime() - startNs;
+        // 20 ms is an arbitrary time limit:
+        // POOL_SIZE * 10000 resource requests / 20 ms
+        // = 50000 resource requests/20ms
+        // = 2500 resource requests/ms
+        assertTrue("O(n) count of queue is too slow: " + durationNs + " ns.",
+                   durationNs < TimeUnit.MILLISECONDS.toNanos(20));
+
+        // Confirm additional requests are queued
+        assertEquals(POOL_SIZE, this.factory.getCreated());
+        assertEquals(POOL_SIZE, this.queuedPool.getTotalResourceCount());
+        assertEquals(0, this.queuedPool.getCheckedInResourceCount());
+        assertEquals(POOL_SIZE, resources.size());
+
+    }
+
+    @Test
+    public void testTimeoutInQueue() throws Exception {
+        Queue<TestResource> resources = new LinkedList<TestResource>();
+        Queue<TestResourceRequest> resourceRequests = new LinkedList<TestResourceRequest>();
+
+        long deadlineNs = System.nanoTime()
+                          + TimeUnit.MILLISECONDS.toNanos(KeyedResourcePoolTest.TIMEOUT_MS);
+        for(int i = 0; i < POOL_SIZE * 2; i++) {
+            resourceRequests.add(new TestResourceRequest(deadlineNs, resources));
+        }
+
+        assertEquals(0, this.factory.getCreated());
+        assertEquals(0, this.queuedPool.getTotalResourceCount());
+        assertEquals(0, this.queuedPool.getCheckedInResourceCount());
+        assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+        assertEquals(0, resources.size());
+
+        // Submit initial POOL_SIZE requests
+        for(int i = 0; i < POOL_SIZE; i++) {
+            this.queuedPool.registerResourceRequest("a", resourceRequests.poll());
+        }
+
+        // Submit additional POOL_SIZE requests that queue up.
+        for(int i = 0; i < POOL_SIZE; i++) {
+            this.queuedPool.registerResourceRequest("a", resourceRequests.poll());
+        }
+
+        // Force deadline (timeout) to expire
+        TimeUnit.MILLISECONDS.sleep(KeyedResourcePoolTest.TIMEOUT_MS * 2);
+
+        // Check in one initial resource and confirm that this causes all
+        // enqueued requests to be timed out.
+        this.queuedPool.checkin("a", resources.poll());
+        assertEquals(POOL_SIZE, this.factory.getCreated());
+        assertEquals(POOL_SIZE, this.queuedPool.getTotalResourceCount());
+        assertEquals(1, this.queuedPool.getCheckedInResourceCount());
+        assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+        assertEquals(POOL_SIZE - 1, resources.size());
+
+        assertEquals(POOL_SIZE, TestResourceRequest.usedResourceCount.get());
+        assertEquals(POOL_SIZE, TestResourceRequest.handledTimeoutCount.get());
+        assertEquals(0, TestResourceRequest.handledExceptionCount.get());
+
+        // Check in remaining (initial) resources
+        for(int i = 0; i < POOL_SIZE - 1; i++) {
+            this.queuedPool.checkin("a", resources.poll());
+
+        }
+
+        assertEquals(POOL_SIZE, this.factory.getCreated());
+        assertEquals(POOL_SIZE, this.queuedPool.getTotalResourceCount());
+        assertEquals(POOL_SIZE, this.queuedPool.getCheckedInResourceCount());
+        assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+        assertEquals(0, resources.size());
+
+        assertEquals(POOL_SIZE, TestResourceRequest.usedResourceCount.get());
+        assertEquals(POOL_SIZE, TestResourceRequest.handledTimeoutCount.get());
+        assertEquals(0, TestResourceRequest.handledExceptionCount.get());
+    }
+
+    @Test
+    public void testExceptionInQueue() throws Exception {
+        Exception toThrow = new Exception("An exception!");
+        this.factory.setCreateException(toThrow);
+
+        Queue<TestResource> resources = new LinkedList<TestResource>();
+
+        long deadlineNs = System.nanoTime()
+                          + TimeUnit.MILLISECONDS.toNanos(KeyedResourcePoolTest.TIMEOUT_MS);
+        TestResourceRequest trr = new TestResourceRequest(deadlineNs, resources);
+
+        assertEquals(0, this.factory.getCreated());
+        assertEquals(0, this.queuedPool.getTotalResourceCount());
+        assertEquals(0, this.queuedPool.getCheckedInResourceCount());
+        assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+        assertEquals(0, resources.size());
+
+        this.queuedPool.registerResourceRequest("a", trr);
+
+        assertEquals(0, this.factory.getCreated());
+        assertEquals(0, this.queuedPool.getTotalResourceCount());
+        assertEquals(0, this.queuedPool.getCheckedInResourceCount());
+        assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+        assertEquals(0, resources.size());
+
+        assertEquals(0, TestResourceRequest.usedResourceCount.get());
+        assertEquals(0, TestResourceRequest.handledTimeoutCount.get());
+        assertEquals(1, TestResourceRequest.handledExceptionCount.get());
+    }
+
+    @Test
+    public void contendForQueue() throws Exception {
+        // Over ride some set up
+        super.config = new ResourcePoolConfig().setMaxPoolSize(POOL_SIZE)
+                                               .setTimeout(TIMEOUT_MS * 50, TimeUnit.MILLISECONDS);
+        this.queuedPool = new QueuedKeyedResourcePool<String, TestResource>(factory, config);
+        super.pool = queuedPool;
+
+        int numEnqueuers = POOL_SIZE * 2;
+        int numEnqueues = 10 * 1000;
+        String key = "Key";
+        float invalidationRate = (float) 0.25;
+        CountDownLatch waitForThreads = new CountDownLatch(numEnqueuers);
+        CountDownLatch waitForEnqueuers = new CountDownLatch(numEnqueuers);
+        for(int i = 0; i < numEnqueuers; ++i) {
+            new Thread(new Enqueuers(waitForThreads,
+                                     waitForEnqueuers,
+                                     key,
+                                     numEnqueues,
+                                     invalidationRate)).start();
+        }
+
+        try {
+            waitForEnqueuers.await();
+            assertEquals(POOL_SIZE, this.queuedPool.getTotalResourceCount());
+            assertEquals(POOL_SIZE, this.queuedPool.getCheckedInResourceCount());
+            assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+
+            assertEquals(numEnqueuers * numEnqueues, TestResourceRequest.usedResourceCount.get());
+            assertEquals(0, TestResourceRequest.handledTimeoutCount.get());
+            assertEquals(0, TestResourceRequest.handledExceptionCount.get());
+
+        } catch(InterruptedException e) {
+            e.printStackTrace();
+        }
+
+    }
+
+    @Test
+    public void contendForQueueAndPool() throws Exception {
+        // Over ride some set up
+        super.config = new ResourcePoolConfig().setMaxPoolSize(POOL_SIZE)
+                                               .setTimeout(TIMEOUT_MS * 100, TimeUnit.MILLISECONDS);
+        this.queuedPool = new QueuedKeyedResourcePool<String, TestResource>(factory, config);
+        super.pool = queuedPool;
+
+        int numEnqueuers = POOL_SIZE;
+        int numCheckers = POOL_SIZE;
+        int numEnqueues = 10 * 1000;
+        String key = "Key";
+        float invalidationRate = (float) 0.25;
+        CountDownLatch waitForThreadsStart = new CountDownLatch(numEnqueuers + numCheckers);
+        CountDownLatch waitForThreadsEnd = new CountDownLatch(numEnqueuers + numCheckers);
+        for(int i = 0; i < numEnqueuers; ++i) {
+            new Thread(new Enqueuers(waitForThreadsStart,
+                                     waitForThreadsEnd,
+                                     key,
+                                     numEnqueues,
+                                     invalidationRate)).start();
+        }
+        for(int i = 0; i < numCheckers; ++i) {
+            new Thread(new Checkers(waitForThreadsStart,
+                                    waitForThreadsEnd,
+                                    key,
+                                    numEnqueues,
+                                    invalidationRate)).start();
+        }
+
+        try {
+            waitForThreadsEnd.await();
+            assertEquals(POOL_SIZE, this.queuedPool.getTotalResourceCount());
+            assertEquals(POOL_SIZE, this.queuedPool.getCheckedInResourceCount());
+            assertEquals(0, this.queuedPool.getRegisteredResourceRequestCount());
+
+            assertEquals(numEnqueuers * numEnqueues, TestResourceRequest.usedResourceCount.get());
+            assertEquals(0, TestResourceRequest.handledTimeoutCount.get());
+            assertEquals(0, TestResourceRequest.handledExceptionCount.get());
+
+        } catch(InterruptedException e) {
+            e.printStackTrace();
+        }
+
+    }
+
+    public class Enqueuers implements Runnable {
+
+        private final CountDownLatch startSignal;
+        private final CountDownLatch doneSignal;
+
+        private final String key;
+        private final int enqueues;
+        private int used;
+        Queue<TestResource> resources;
+
+        private Random random;
+        private float invalidationRate;
+
+        Enqueuers(CountDownLatch startSignal,
+                  CountDownLatch doneSignal,
+                  String key,
+                  int enqueues,
+                  float invalidationRate) {
+            this.startSignal = startSignal;
+            this.doneSignal = doneSignal;
+
+            this.key = key;
+            this.enqueues = enqueues;
+            this.used = 0;
+            resources = new ConcurrentLinkedQueue<TestResource>();
+
+            this.random = new Random();
+            this.invalidationRate = invalidationRate;
+        }
+
+        private void processAtMostOneEnqueuedResource() throws Exception {
+            TestResource tr = resources.poll();
+            if(tr != null) {
+                this.used++;
+                assertTrue(tr.isValid());
+
+                // Invalidate some resources (except on last few check ins)
+                float f = random.nextFloat();
+                if(f < invalidationRate && this.used < this.enqueues - POOL_SIZE) {
+                    tr.invalidate();
+                }
+                Thread.yield();
+
+                queuedPool.checkin(key, tr);
+                Thread.yield();
+            }
+        }
+
+        public void run() {
+            startSignal.countDown();
+            try {
+                startSignal.await();
+            } catch(InterruptedException e) {
+                e.printStackTrace();
+            }
+
+            try {
+                for(int i = 0; i < enqueues; ++i) {
+                    long deadlineNs = System.nanoTime()
+                                      + TimeUnit.MILLISECONDS.toNanos(config.getTimeout(TimeUnit.NANOSECONDS));
+
+                    queuedPool.registerResourceRequest(key, new TestResourceRequest(deadlineNs,
+                                                                                    resources));
+                    Thread.yield();
+
+                    processAtMostOneEnqueuedResource();
+                }
+                while(this.used < enqueues) {
+                    processAtMostOneEnqueuedResource();
+                    Thread.yield();
+                }
+            } catch(Exception e) {
+                fail(e.toString());
+            }
+            doneSignal.countDown();
+        }
+    }
+
+    protected static class TestResourceRequest implements AsyncResourceRequest<TestResource> {
+
+        private AtomicBoolean usedResource;
+        private AtomicBoolean handledTimeout;
+        private AtomicBoolean handledException;
+
+        static AtomicInteger usedResourceCount = new AtomicInteger(0);
+        static AtomicInteger handledTimeoutCount = new AtomicInteger(0);
+        static AtomicInteger handledExceptionCount = new AtomicInteger(0);
+
+        long deadlineNs;
+        final Queue<TestResource> doneQueue;
+
+        TestResourceRequest(long deadlineNs, Queue<TestResource> doneQueue) {
+            this.usedResource = new AtomicBoolean(false);
+            this.handledTimeout = new AtomicBoolean(false);
+            this.handledException = new AtomicBoolean(false);
+            this.deadlineNs = deadlineNs;
+            this.doneQueue = doneQueue;
+        }
+
+        public void useResource(TestResource tr) {
+            // System.err.println("useResource " +
+            // Thread.currentThread().getName());
+            assertFalse(this.handledTimeout.get());
+            assertFalse(this.handledException.get());
+            usedResource.set(true);
+            usedResourceCount.getAndIncrement();
+            doneQueue.add(tr);
+        }
+
+        public void handleTimeout() {
+            // System.err.println("handleTimeout " +
+            // Thread.currentThread().getName());
+            assertFalse(this.usedResource.get());
+            assertFalse(this.handledException.get());
+            handledTimeout.set(true);
+            handledTimeoutCount.getAndIncrement();
+        }
+
+        public void handleException(Exception e) {
+            // System.err.println("handleException " +
+            // Thread.currentThread().getName());
+            assertFalse(this.usedResource.get());
+            assertFalse(this.handledTimeout.get());
+            handledException.set(true);
+            handledExceptionCount.getAndIncrement();
+        }
+
+        public long getDeadlineNs() {
+            return deadlineNs;
+        }
+    }
+}
diff --git a/test/unit/voldemort/versioning/ChainedInconsistencyResolverTest.java b/test/unit/voldemort/versioning/ChainedInconsistencyResolverTest.java
new file mode 100644
index 0000000000..86f99a27c5
--- /dev/null
+++ b/test/unit/voldemort/versioning/ChainedInconsistencyResolverTest.java
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package voldemort.versioning;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Properties;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import voldemort.ServerTestUtils;
+import voldemort.TestUtils;
+import voldemort.client.ClientConfig;
+import voldemort.client.SocketStoreClientFactory;
+import voldemort.client.StoreClient;
+import voldemort.client.StoreClientFactory;
+import voldemort.client.protocol.RequestFormatType;
+import voldemort.cluster.Cluster;
+import voldemort.cluster.Node;
+import voldemort.server.VoldemortServer;
+import voldemort.store.Store;
+import voldemort.store.socket.SocketStoreFactory;
+import voldemort.store.socket.clientrequest.ClientRequestExecutorPool;
+import voldemort.utils.ByteArray;
+
+/*
+ * Test to ensure that the TimeBasedInconsistencyResolver returns a max Vector
+ * clock with the max timestamp.
+ */
+public class ChainedInconsistencyResolverTest {
+
+    private static final String KEY = "XYZ";
+    private Versioned<String> v1, v2;
+    private Versioned<String> conflict1, conflict2, conflict3, conflict4, conflict5, conflict6;
+
+    private StoreClient<String, String> defaultStoreClient;
+    private Store<ByteArray, byte[], byte[]> socketStore;
+    private final SocketStoreFactory socketStoreFactory = new ClientRequestExecutorPool(2,
+                                                                                        10000,
+                                                                                        100000,
+                                                                                        32 * 1024);
+    private static final String STORES_XML = "test/common/voldemort/config/single-store.xml";
+
+    @Before
+    public void setUp() throws IOException {
+        boolean useNio = false;
+        int numServers = 2;
+        VoldemortServer[] servers = new VoldemortServer[numServers];
+        Cluster cluster = ServerTestUtils.startVoldemortCluster(numServers,
+                                                                servers,
+                                                                null,
+                                                                socketStoreFactory,
+                                                                useNio,
+                                                                null,
+                                                                STORES_XML,
+                                                                new Properties());
+
+        // Initialize versioned puts for basic test
+        v1 = getVersioned(0, 1, 1, 1, 1, 1);
+        v2 = getVersioned(0, 0, 1, 1, 1, 1);
+
+        // Initialize versioned puts for > 1 conflicts
+        conflict1 = getVersioned(0, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        conflict2 = getVersioned(0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
+        conflict3 = getVersioned(0, 0, 0, 1, 1, 1, 1, 1, 1, 1);
+        conflict4 = getVersioned(0, 0, 0, 0, 1, 1, 1, 1, 1, 1);
+        conflict5 = getVersioned(0, 0, 0, 0, 0, 1, 1, 1, 1, 1);
+        conflict6 = getVersioned(0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
+
+        Node node = cluster.getNodes().iterator().next();
+        String bootstrapUrl = "tcp://" + node.getHost() + ":" + node.getSocketPort();
+        StoreClientFactory storeClientFactory = new SocketStoreClientFactory(new ClientConfig().setBootstrapUrls(bootstrapUrl));
+
+        defaultStoreClient = storeClientFactory.getStoreClient("test");
+        socketStore = ServerTestUtils.getSocketStore(socketStoreFactory,
+                                                     "test",
+                                                     node.getSocketPort(),
+                                                     RequestFormatType.VOLDEMORT_V1);
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        socketStore.close();
+        socketStoreFactory.close();
+    }
+
+    private Versioned<String> getVersioned(int... nodes) {
+        return new Versioned<String>("my-value", TestUtils.getClock(nodes));
+    }
+
+    @Test
+    public void testVersionedPut() {
+        defaultStoreClient.put(KEY, v1);
+        defaultStoreClient.put(KEY, v2);
+        Versioned<String> res = defaultStoreClient.get(KEY);
+        defaultStoreClient.put(KEY, res);
+        List<Versioned<byte[]>> resList = socketStore.get(new ByteArray(KEY.getBytes()), null);
+        assertEquals(1, resList.size());
+    }
+
+    @Test
+    public void testNormalPut() {
+        defaultStoreClient.put(KEY, v1);
+        defaultStoreClient.put(KEY, v2);
+        defaultStoreClient.put(KEY, "my-value2");
+        List<Versioned<byte[]>> resList = socketStore.get(new ByteArray(KEY.getBytes()), null);
+        assertEquals(1, resList.size());
+    }
+
+    @Test
+    public void testMoreConflicts() {
+        defaultStoreClient.put(KEY, conflict1);
+        defaultStoreClient.put(KEY, conflict2);
+        defaultStoreClient.put(KEY, conflict3);
+        defaultStoreClient.put(KEY, conflict4);
+        defaultStoreClient.put(KEY, conflict5);
+        defaultStoreClient.put(KEY, conflict6);
+        List<Versioned<byte[]>> resList = socketStore.get(new ByteArray(KEY.getBytes()), null);
+        assertEquals(6, resList.size());
+        Versioned<String> res = defaultStoreClient.get(KEY);
+        defaultStoreClient.put(KEY, res);
+        resList = socketStore.get(new ByteArray(KEY.getBytes()), null);
+        assertEquals(1, resList.size());
+    }
+}