diff --git a/hraven-assembly/pom.xml b/hraven-assembly/pom.xml
index 5aa0291..3466847 100644
--- a/hraven-assembly/pom.xml
+++ b/hraven-assembly/pom.xml
@@ -17,12 +17,12 @@
   <parent>
     <groupId>com.twitter.hraven</groupId>
     <artifactId>hraven</artifactId>
-    <version>0.9.17-SNAPSHOT</version>
+    <version>0.9.17.t01-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
   <groupId>com.twitter.hraven</groupId>
   <artifactId>hraven-assembly</artifactId>
-  <version>0.9.17-SNAPSHOT</version>
+  <version>0.9.17.t01-SNAPSHOT</version>
   <name>hRaven-assembly</name>
   <description>hRaven - Assembly artifacts</description>
   <packaging>pom</packaging>
diff --git a/hraven-core/pom.xml b/hraven-core/pom.xml
index e57e992..8ddeb73 100644
--- a/hraven-core/pom.xml
+++ b/hraven-core/pom.xml
@@ -21,12 +21,12 @@
   <parent>
     <groupId>com.twitter.hraven</groupId>
     <artifactId>hraven</artifactId>
-    <version>0.9.17-SNAPSHOT</version>
+    <version>0.9.17.t01-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
   <groupId>com.twitter.hraven</groupId>
   <artifactId>hraven-core</artifactId>
-  <version>0.9.17-SNAPSHOT</version>
+  <version>0.9.17.t01-SNAPSHOT</version>
   <name>hRaven - core</name>
   <description>Core components of hRaven, including model classes and data access layer</description>
  
diff --git a/hraven-core/src/main/java/com/twitter/hraven/Constants.java b/hraven-core/src/main/java/com/twitter/hraven/Constants.java
index 9e8653a..d028c91 100644
--- a/hraven-core/src/main/java/com/twitter/hraven/Constants.java
+++ b/hraven-core/src/main/java/com/twitter/hraven/Constants.java
@@ -242,6 +242,17 @@ public class Constants {
   public static final byte[] SUBMIT_TIME_PREFIX_HADOOP2_BYTES = Bytes
       .toBytes(SUBMIT_TIME_PREFIX_HADOOP2);
 
+  /**
+   * The string representing the submit time in a spark job history file.
+   */
+  public static final String SPARK_SUBMIT_TIME = "\"submit_time\":";
+
+  /**
+   * Raw bytes representation of {@link #SPARK_SUBMIT_TIME};
+   */
+  public static final byte[] SPARK_SUBMIT_TIME_BYTES = Bytes
+      .toBytes(SPARK_SUBMIT_TIME);
+
   /**
    * length of string that contains a unix timestamp in milliseconds
    * this length will be correct till Sat, 20 Nov 2286 which is
@@ -267,6 +278,16 @@ public class Constants {
   public static final int MAX_LONG_LENGTH = Long.toString(Long.MAX_VALUE)
       .length();
 
+  /**
+   * length of run id in bytes in the job key
+   */
+  public static final int RUN_ID_LENGTH_JOBKEY = 8;
+
+  /**
+   * length of sequence number in bytes in the job key
+   */
+  public static final int SEQUENCE_NUM_LENGTH_JOBKEY = 8;
+
   public static final String USER_CONF_KEY = "user.name";
   public static final String USER_CONF_KEY_HADOOP2 = "mapreduce.job.user.name";
 
@@ -322,6 +343,9 @@ public class Constants {
   
   public static final String MR_RUN_CONF_KEY = "mapred.app.submitted.timestamp";
 
+  public static final String FRAMEWORK_CONF_KEY = "mapreduce.framework.name";
+  public static final String FRAMEWORK_CONF_SPARK_VALUE = "spark";
+  public static final String SPARK_VERSION_CONF_KEY = "spark.signature";
   /**
    * hadoop2 memory mb for container size
    * for map, reduce and AM containers
@@ -358,7 +382,9 @@ public class Constants {
   }
 
   /**
-   * Regex to parse a job file name. First back-ref is JT name, second one if
+   * Regex to parse a job file name.
+   * It could be a job history file from a mapreduce job or a spark job
+   * First back-ref is JT name, second one if
    * job-id
    * <p>
    * For example,
@@ -372,8 +398,12 @@ public class Constants {
    * job_201306192120_0003_1371677828795_hadoop_conf.xml
    * in hadoop 2.0, job history file names are named as
    * job_1374258111572_0003-1374260622449-userName1-TeraGen-1374260635219-2-0-SUCCEEDED-default.jhist
+   * in spark, the file name looks like:
+   * spark_1413515656084_3051855
+   * and the spark conf file name:
+   * spark_1413515656084_3051855_conf.xml
    */
-  public static final String JOB_FILENAME_PATTERN_REGEX = ".*(job_[0-9]*_[0-9]*)(-|_)([0-9]*[aA-zZ]*)*(.*)$";
+  public static final String JOB_FILENAME_PATTERN_REGEX = ".*?((job|spark)_[0-9]*_[0-9]*)(-|_)*([0-9]*[aA-zZ]*)*(.*)$";
 
   // JobHistory file name parsing related
   public static final String JOB_CONF_FILE_END = "(.*)(conf.xml)$";
@@ -426,4 +456,12 @@ public class Constants {
 
   /** name of the properties file used for cluster to cluster identifier mapping */
   public static final String HRAVEN_CLUSTER_PROPERTIES_FILENAME = "hRavenClusters.properties";
+
+  /** spark job keys have a prefix of "spark"
+   *  hence spark job key length is calculated as
+   *        5
+   *     +
+   *        regular job key length which is 16 (epoch and sequence number)
+   */
+  public static final int SPARK_JOB_KEY_LENGTH = 21;
 }
diff --git a/hraven-core/src/main/java/com/twitter/hraven/Flow.java b/hraven-core/src/main/java/com/twitter/hraven/Flow.java
index bed215c..f4f519d 100644
--- a/hraven-core/src/main/java/com/twitter/hraven/Flow.java
+++ b/hraven-core/src/main/java/com/twitter/hraven/Flow.java
@@ -149,8 +149,8 @@ public byte[] code() {
   /**  app Version for this flow  */
   private String version ;
 
-  /**  hadoop Version for this flow  */
-  private HadoopVersion hadoopVersion ;
+  /**  hadoop history file type for this flow  */
+  private HistoryFileType historyFileType ;
 
   /**  hadoop pool/queue for this flow  */
   private String queue ;
@@ -281,13 +281,13 @@ public void addJob(JobDetails job) {
     // set the submit time of the flow to the submit time of the first job
     if (( this.submitTime == 0L ) || (job.getSubmitTime() < this.submitTime)) {
       this.submitTime = job.getSubmitTime();
-      // set the hadoop version once for this job
-      this.hadoopVersion = job.getHadoopVersion();
+      // set the hadoop history file type once for this job
+      this.historyFileType = job.getHistoryFileType();
       // set the queue/pool once for this flow
       this.queue = job.getQueue();
-      if (this.hadoopVersion == null) {
+      if (this.historyFileType == null) {
         // set it to default so that we don't run into NPE during json serialization
-        this.hadoopVersion = HadoopVersion.ONE;
+        this.historyFileType = HistoryFileType.ONE;
       }
     }
 
@@ -430,12 +430,12 @@ public void setCost(double cost) {
     this.cost = cost;
   }
 
-  public HadoopVersion getHadoopVersion() {
-    return hadoopVersion;
+  public HistoryFileType getHistoryFileType() {
+    return historyFileType;
   }
 
-  public void setHadoopVersion(HadoopVersion hadoopVersion) {
-    this.hadoopVersion = hadoopVersion;
+  public void setHistoryFileType(HistoryFileType hadoopFileType) {
+    this.historyFileType = hadoopFileType;
   }
 
   public long getReduceShuffleBytes() {
diff --git a/hraven-core/src/main/java/com/twitter/hraven/Framework.java b/hraven-core/src/main/java/com/twitter/hraven/Framework.java
index 29f00bb..1a288a5 100644
--- a/hraven-core/src/main/java/com/twitter/hraven/Framework.java
+++ b/hraven-core/src/main/java/com/twitter/hraven/Framework.java
@@ -28,6 +28,10 @@ public enum Framework {
    * 
    */
   SCALDING("s", "scalding"),
+  /**
+   * Identifies Spark applications
+   */
+  SPARK("sp", "spark"),
   /**
    * 
    */
diff --git a/hraven-core/src/main/java/com/twitter/hraven/HadoopVersion.java b/hraven-core/src/main/java/com/twitter/hraven/HistoryFileType.java
similarity index 77%
rename from hraven-core/src/main/java/com/twitter/hraven/HadoopVersion.java
rename to hraven-core/src/main/java/com/twitter/hraven/HistoryFileType.java
index 19b8cb6..9c85e3b 100644
--- a/hraven-core/src/main/java/com/twitter/hraven/HadoopVersion.java
+++ b/hraven-core/src/main/java/com/twitter/hraven/HistoryFileType.java
@@ -17,8 +17,11 @@
 package com.twitter.hraven;
 
 /**
- * hadoop versions
+ * hadoop file type being processed in hRaven
+ * ONE: hadoop v1 file
+ * TWO: hadoop v2 file
+ * SPARK: spark history file
  */
-public enum HadoopVersion {
-  ONE, TWO
+public enum HistoryFileType {
+  ONE, TWO, SPARK
 };
diff --git a/hraven-core/src/main/java/com/twitter/hraven/JobDescFactory.java b/hraven-core/src/main/java/com/twitter/hraven/JobDescFactory.java
index 76ab46f..de33b85 100644
--- a/hraven-core/src/main/java/com/twitter/hraven/JobDescFactory.java
+++ b/hraven-core/src/main/java/com/twitter/hraven/JobDescFactory.java
@@ -27,6 +27,7 @@ public class JobDescFactory {
 
   private static final MRJobDescFactory MR_JOB_DESC_FACTORY = new MRJobDescFactory();
   private static final PigJobDescFactory PIG_JOB_DESC_FACTORY = new PigJobDescFactory();
+  private static final SparkJobDescFactory SPARK_JOB_DESC_FACTORY = new SparkJobDescFactory();
   private static final ScaldingJobDescFactory SCALDING_JOB_DESC_FACTORY =
       new ScaldingJobDescFactory();
 
@@ -43,6 +44,8 @@ public static JobDescFactoryBase getFrameworkSpecificJobDescFactory(Configuratio
       return PIG_JOB_DESC_FACTORY;
     case SCALDING:
       return SCALDING_JOB_DESC_FACTORY;
+    case SPARK:
+      return SPARK_JOB_DESC_FACTORY;
     default:
       return MR_JOB_DESC_FACTORY;
     }
@@ -69,17 +72,22 @@ public static JobDesc createJobDesc(QualifiedJobId qualifiedJobId,
    */
   public static Framework getFramework(Configuration jobConf) {
     // Check if this is a pig job
-    boolean isPig = jobConf.get(Constants.PIG_CONF_KEY) != null;
-    if (isPig) {
+    if (jobConf.get(Constants.PIG_CONF_KEY) != null) {
       return Framework.PIG;
-    } else {
-      String flowId = jobConf.get(Constants.CASCADING_FLOW_ID_CONF_KEY);
-      if ((flowId == null) || (flowId.length() == 0)) {
-        return Framework.NONE;
-      } else {
-        return Framework.SCALDING;
-      }
     }
+
+    if ((jobConf.get(Constants.CASCADING_FLOW_ID_CONF_KEY) != null)
+        && (jobConf.get(Constants.CASCADING_FLOW_ID_CONF_KEY).length() != 0)) {
+      return Framework.SCALDING;
+    }
+
+    if (Constants.FRAMEWORK_CONF_SPARK_VALUE
+            .equalsIgnoreCase(jobConf.get(Constants.FRAMEWORK_CONF_KEY))) {
+      return Framework.SPARK;
+    }
+
+    return Framework.NONE;
+
   }
 
   /**
diff --git a/hraven-core/src/main/java/com/twitter/hraven/JobDetails.java b/hraven-core/src/main/java/com/twitter/hraven/JobDetails.java
index ed51a60..5ebf67e 100644
--- a/hraven-core/src/main/java/com/twitter/hraven/JobDetails.java
+++ b/hraven-core/src/main/java/com/twitter/hraven/JobDetails.java
@@ -59,7 +59,7 @@ public class JobDetails implements Comparable<JobDetails> {
   private String priority;
   private String status;
   private String version;
-  private HadoopVersion hadoopVersion;
+  private HistoryFileType historyFileType;
   private String queue;
   private long submitTime;
   private long launchTime;
@@ -182,13 +182,13 @@ public void setVersion(String version) {
     this.version = version;
   }
 
-  public HadoopVersion getHadoopVersion() {
-    return hadoopVersion;
+  public HistoryFileType getHistoryFileType() {
+    return historyFileType;
   }
 
-  public void setHadoopVersion(String hadoopVersion) {
+  public void setHistoryFileType(String historyFileType) {
     // the enum.valueOf could throw a NPE or IllegalArgumentException
-    this.hadoopVersion = HadoopVersion.valueOf(hadoopVersion);
+    this.historyFileType = HistoryFileType.valueOf(historyFileType);
   }
 
   public long getSubmitTime() {
@@ -523,21 +523,22 @@ public static double getValueAsDouble(byte[] key, Map<byte[], byte[]> infoValues
   }
 
   /**
-   * return an enum value from the NavigableMap for hadoop version
+   * return an enum value from the NavigableMap
+   * for history file type
    * @param key
    * @param infoValues
    * @return value as a enum or default of hadoop ONE
    */
-  private HadoopVersion getHadoopVersionFromResult(final JobHistoryKeys key,
+  private HistoryFileType getHistoryFileTypeFromResult(final JobHistoryKeys key,
       final NavigableMap<byte[], byte[]> infoValues) {
     byte[] value = infoValues.get(JobHistoryKeys.KEYS_TO_BYTES.get(key));
     if (value != null) {
       String hv = Bytes.toString(value);
       // could throw an NPE or IllegalArgumentException
-      return HadoopVersion.valueOf(hv);
+      return HistoryFileType.valueOf(hv);
     } else {
       // default is hadoop 1
-      return HadoopVersion.ONE;
+      return HistoryFileType.ONE;
     }
   }
 
@@ -594,7 +595,7 @@ public void populate(Result result) {
     this.jobName = getValueAsString(JobHistoryKeys.JOBNAME, infoValues);
     this.priority = getValueAsString(JobHistoryKeys.JOB_PRIORITY, infoValues);
     this.status = getValueAsString(JobHistoryKeys.JOB_STATUS, infoValues);
-    this.hadoopVersion = getHadoopVersionFromResult(JobHistoryKeys.hadoopversion, infoValues);
+    this.historyFileType = getHistoryFileTypeFromResult(JobHistoryKeys.hadoopversion, infoValues);
     this.version = getValueAsString(Constants.VERSION_COLUMN_BYTES, infoValues);
     this.cost = getValueAsDouble(Constants.JOBCOST_BYTES, infoValues);
 
@@ -622,8 +623,8 @@ public void populate(Result result) {
         infoValues);
 
     // populate stats from counters for this job based on
-    // hadoop version
-    if (this.hadoopVersion == HadoopVersion.TWO) {
+    // history file type
+    if (this.historyFileType == HistoryFileType.TWO) {
       // map file bytes read
       this.mapFileBytesRead = getCounterValueAsLong(this.mapCounters, 
             Constants.FILESYSTEM_COUNTER_HADOOP2, Constants.FILES_BYTES_READ);
diff --git a/hraven-core/src/main/java/com/twitter/hraven/JobId.java b/hraven-core/src/main/java/com/twitter/hraven/JobId.java
index 887a0a5..863a921 100644
--- a/hraven-core/src/main/java/com/twitter/hraven/JobId.java
+++ b/hraven-core/src/main/java/com/twitter/hraven/JobId.java
@@ -15,6 +15,7 @@
 */
 package com.twitter.hraven;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang.builder.CompareToBuilder;
 import org.apache.commons.lang.builder.HashCodeBuilder;
 import org.codehaus.jackson.annotate.JsonCreator;
@@ -27,6 +28,14 @@
  */
 public class JobId implements Comparable<JobId> {
   protected static final String JOB_ID_SEP = "_";
+  public static final String JOB_PREFIX = "job";
+  /**
+   * The prefix of the job id which could be
+   * "" for a mapreduce job:
+   *        - to maintain backward compatibility with existing data
+   * "spark" for a spark job
+   */
+  protected String jobPrefix;
   /**
    * The jobtracker start time from the job ID, obtained from parsing the
    * center component of the job ID.
@@ -43,27 +52,59 @@ public JobId(@JsonProperty("jobIdString") String jobId) {
     if (jobId != null) {
       String[] elts = jobId.trim().split(JOB_ID_SEP);
       try {
+        this.jobPrefix = elts[0];
+        if (JOB_PREFIX.equalsIgnoreCase(this.jobPrefix)) {
+          // set it to blank since existing data
+          // is being stored without the "job" prefix
+          this.jobPrefix = "";
+        }
         this.jobEpoch = Long.parseLong(elts[1]);
         this.jobSequence = Long.parseLong(elts[2]);
       } catch (Exception e) {
-        throw new IllegalArgumentException("Invalid job ID '"+jobId+
-            "', must be in the format 'job_[0-9]+_[0-9]+'");
+        throw new IllegalArgumentException("Invalid job ID '" + jobId
+            + "', must be in the format '(job|spark)_[0-9]+_[0-9]+'");
       }
+    } else {
+      this.jobPrefix = "";
+      this.jobEpoch = 0L;
+      this.jobSequence = 0L;
     }
   }
 
   public JobId(long epoch, long seq) {
     this.jobEpoch = epoch;
     this.jobSequence = seq;
+    // set default prefix of job to be blank
+    this.jobPrefix = "";
+  }
+
+  public JobId(String jobPrefix, long epoch, long seq) {
+    if(JOB_PREFIX.equalsIgnoreCase(jobPrefix)) {
+      // set it to blank since existing data
+      // is being stored without the "job" prefix
+      this.jobPrefix = "";
+    } else {
+      this.jobPrefix = jobPrefix;
+    }
+    this.jobEpoch = epoch;
+    this.jobSequence = seq;
   }
 
   public JobId(JobId idToCopy) {
     if (idToCopy != null) {
+      this.jobPrefix = idToCopy.getJobPrefix();
       this.jobEpoch = idToCopy.getJobEpoch();
       this.jobSequence = idToCopy.getJobSequence();
     }
   }
 
+  /**
+   * Returns the prefix part of the job id
+   */
+  public String getJobPrefix() {
+    return this.jobPrefix;
+  }
+
   /**
    * Returns the epoch value from the job ID.  The epoch value is generated by simply
    * parsing the date formatted jobtracker start time as a long value.
@@ -86,7 +127,11 @@ public long getJobSequence() {
   }
 
   public String getJobIdString() {
-    return String.format("job_%d_%04d", this.jobEpoch, this.jobSequence);
+    String prefix = this.jobPrefix;
+    if(StringUtils.isBlank(prefix)) {
+      prefix = JOB_PREFIX;
+    }
+    return String.format("%s_%d_%04d", prefix, this.jobEpoch, this.jobSequence);
   }
 
   public String toString() {
@@ -95,17 +140,18 @@ public String toString() {
 
   /**
    * Compares two JobId objects on the basis of their
+   * jobPrefix (either blank or "spark")
    * jobEpoch (jobtracker start time from the job ID)
    * and
    * jobSequence( jobtracker assigned sequence number for the job,)
    *
    * @param other
-   * @return 0 if this jobEpoch and jobSequence are equal to
-   * 						other jobEpoch and jobSequence,
-   *         1 if this jobEpoch and jobSequence are greater than
-   *         				other jobEpoch and jobSequence,
-   *         -1 if this jobEpoch and jobSequence less than
-   *         				other jobEpoch and jobSequence
+   * @return 0 if this jobPrefix, jobEpoch and jobSequence are equal to
+   * 						other jobPrefix, jobEpoch and jobSequence,
+   *         1 if this jobPrefix, jobEpoch and jobSequence are greater than
+   *         				other jobPrefix, jobEpoch and jobSequence,
+   *         -1 if this jobPrefix, jobEpoch and jobSequence less than
+   *         				other jobPrefix, jobEpoch and jobSequence
    *
    */
   @Override
@@ -114,8 +160,8 @@ public int compareTo(JobId o) {
       // nulls sort last
       return -1;
     }
-
     return new CompareToBuilder()
+        .append(this.jobPrefix, o.getJobPrefix())
         .append(this.jobEpoch, o.getJobEpoch())
         .append(this.jobSequence, o.getJobSequence())
         .toComparison();
@@ -132,6 +178,7 @@ public boolean equals(Object other) {
   @Override
   public int hashCode(){
       return new HashCodeBuilder()
+          .append(this.jobPrefix)
           .append(this.jobEpoch)
           .append(this.jobSequence)
           .toHashCode();
diff --git a/hraven-core/src/main/java/com/twitter/hraven/QualifiedJobId.java b/hraven-core/src/main/java/com/twitter/hraven/QualifiedJobId.java
index 36f762f..9079dc6 100644
--- a/hraven-core/src/main/java/com/twitter/hraven/QualifiedJobId.java
+++ b/hraven-core/src/main/java/com/twitter/hraven/QualifiedJobId.java
@@ -57,4 +57,8 @@ public String getCluster() {
     return cluster;
   }
 
+  @Override
+  public String toString() {
+    return this.cluster + Constants.SEP + this.getJobIdString();
+  }
 }
diff --git a/hraven-core/src/main/java/com/twitter/hraven/SparkJobDescFactory.java b/hraven-core/src/main/java/com/twitter/hraven/SparkJobDescFactory.java
new file mode 100644
index 0000000..26418a4
--- /dev/null
+++ b/hraven-core/src/main/java/com/twitter/hraven/SparkJobDescFactory.java
@@ -0,0 +1,55 @@
+/*
+Copyright 2014 Twitter, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+package com.twitter.hraven;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Used to {@link JobKey} instances that can deal with {@link Configuration}
+ * file (contents) for {@link Framework#SPARK}
+ */
+public class SparkJobDescFactory extends JobDescFactoryBase {
+
+  /**
+   * creates a JobDesc obj based on the framework
+   */
+  @Override
+  JobDesc create(QualifiedJobId qualifiedJobId, long submitTimeMillis,
+      Configuration jobConf) {
+
+    // for spark jobs, app id is the same as jobname
+    String appId =  jobConf.get(Constants.JOB_NAME_HADOOP2_CONF_KEY,
+        Constants.UNKNOWN);
+
+    if (Constants.UNKNOWN.equals(appId)) {
+      // Fall back to job id, should not happen
+      appId = qualifiedJobId.getJobIdString();
+    }
+
+    String version = jobConf.get(Constants.SPARK_VERSION_CONF_KEY,
+        Constants.UNKNOWN);
+
+    return create(qualifiedJobId, jobConf, appId, version,
+        Framework.SPARK, submitTimeMillis);
+  }
+
+  @Override
+  String getAppIdFromJobName(String jobName) {
+    // for spark jobs, app id is the same as jobname
+    return jobName;
+  }
+}
+
diff --git a/hraven-core/src/main/java/com/twitter/hraven/datasource/JobHistoryRawService.java b/hraven-core/src/main/java/com/twitter/hraven/datasource/JobHistoryRawService.java
index 4c6fef7..a992f50 100644
--- a/hraven-core/src/main/java/com/twitter/hraven/datasource/JobHistoryRawService.java
+++ b/hraven-core/src/main/java/com/twitter/hraven/datasource/JobHistoryRawService.java
@@ -209,15 +209,15 @@ public Scan getHistoryRawTableScan(String cluster, String minJobId,
       InclusiveStopFilter inclusiveStopFilter = new InclusiveStopFilter(lastRow);
       filters.addFilter(inclusiveStopFilter);
       LOG.info("Stopping raw table scan (stop filter) at "
-          + Bytes.toStringBinary(lastRow) + " " + idConv.fromBytes(lastRow));
+          + Bytes.toStringBinary(lastRow) + " " + idConv.fromBytes(lastRow).toString());
 
       // Add one to the jobSequence of the maximum JobId.
       JobId maximumJobId = new JobId(maxJobId);
-      JobId oneBiggerThanMaxJobId = new JobId(maximumJobId.getJobEpoch(),
+      JobId oneBiggerThanMaxJobId = new JobId(maximumJobId.getJobPrefix(),
+          maximumJobId.getJobEpoch(),
           maximumJobId.getJobSequence() + 1);
       stopRow = idConv.toBytes(new QualifiedJobId(cluster,
           oneBiggerThanMaxJobId));
-
     } else {
       char oneBiggerSep = (char) (Constants.SEP_CHAR + 1);
       stopRow = Bytes.toBytes(cluster + oneBiggerSep);
diff --git a/hraven-core/src/main/java/com/twitter/hraven/datasource/JobIdConverter.java b/hraven-core/src/main/java/com/twitter/hraven/datasource/JobIdConverter.java
index 1b3436d..fe65f50 100644
--- a/hraven-core/src/main/java/com/twitter/hraven/datasource/JobIdConverter.java
+++ b/hraven-core/src/main/java/com/twitter/hraven/datasource/JobIdConverter.java
@@ -15,8 +15,10 @@
 */
 package com.twitter.hraven.datasource;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.hbase.util.Bytes;
 
+import com.twitter.hraven.Constants;
 import com.twitter.hraven.JobId;
 
 /**
@@ -24,19 +26,38 @@
 public class JobIdConverter implements ByteConverter<JobId> {
   @Override
   public byte[] toBytes(JobId jobId) {
-    return Bytes.add(Bytes.toBytes(jobId.getJobEpoch()),
-        Bytes.toBytes(jobId.getJobSequence()));
+    String prefix = jobId.getJobPrefix();
+    if (StringUtils.isBlank(prefix)) {
+      // do not include "job" prefix in conversion
+      return Bytes.add(Bytes.toBytes(jobId.getJobEpoch()),
+                       Bytes.toBytes(jobId.getJobSequence()));
+    } else {
+      return Bytes.add(Bytes.toBytes(jobId.getJobPrefix()),
+                       Bytes.toBytes(jobId.getJobEpoch()),
+                       Bytes.toBytes(jobId.getJobSequence()));
+    }
   }
 
   @Override
   public JobId fromBytes(byte[] bytes) {
-    if (bytes == null || bytes.length < 16) {
+    int packedBytesEpochSeqSize = Constants.RUN_ID_LENGTH_JOBKEY
+          + Constants.SEQUENCE_NUM_LENGTH_JOBKEY;
+
+    if (bytes == null || bytes.length < packedBytesEpochSeqSize) {
       return null;
     }
 
-    // expect a packed bytes encoding of [8 bytes epoch][8 bytes seq]
-    long epoch = Bytes.toLong(bytes, 0);
-    long seq = Bytes.toLong(bytes, 8);
-    return new JobId(epoch, seq);
+    if (bytes.length <= packedBytesEpochSeqSize) {
+      // expect a packed bytes encoding of [8 bytes epoch][8 bytes seq]
+      long epoch = Bytes.toLong(bytes, 0);
+      long seq = Bytes.toLong(bytes, Constants.SEQUENCE_NUM_LENGTH_JOBKEY);
+      return new JobId(epoch, seq);
+    } else {
+      // expect a packed bytes encoding of [prefix][8 bytes epoch][8 bytes seq]
+      String prefix = Bytes.toString(bytes, 0, (bytes.length - packedBytesEpochSeqSize));
+      long epoch = Bytes.toLong(bytes, prefix.length());
+      long seq = Bytes.toLong(bytes, (bytes.length - Constants.SEQUENCE_NUM_LENGTH_JOBKEY));
+      return new JobId(prefix, epoch, seq);
+    }
   }
 }
diff --git a/hraven-core/src/main/java/com/twitter/hraven/datasource/JobKeyConverter.java b/hraven-core/src/main/java/com/twitter/hraven/datasource/JobKeyConverter.java
index 241325c..41ff5ff 100644
--- a/hraven-core/src/main/java/com/twitter/hraven/datasource/JobKeyConverter.java
+++ b/hraven-core/src/main/java/com/twitter/hraven/datasource/JobKeyConverter.java
@@ -15,6 +15,7 @@
 */
 package com.twitter.hraven.datasource;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.hbase.util.Bytes;
 
 import com.twitter.hraven.Constants;
@@ -105,19 +106,23 @@ static byte[][] splitJobKey(byte[] rawKey) {
       byte[] remainder = splits[3];
       byte[][] extraComponents = new byte[3][];
 
-      int offset = 0;
-      // run ID
-      extraComponents[0] = ByteUtil.safeCopy(remainder, offset, 8);
-      // followed by sep + job epoch + job seq
-      offset += 8+Constants.SEP_BYTES.length;
-      extraComponents[1] = ByteUtil.safeCopy(remainder, offset, 16);
-      offset += 16+Constants.SEP_BYTES.length;
-      // followed by any remainder
-      extraComponents[2] = ByteUtil.safeCopy(remainder, offset, remainder.length - offset);
+      // now extract components (runId!jobId!additional)
+      // run id occurs at the start and is of 8 bytes in length
+      extraComponents[0] = extractRunId(remainder, Constants.RUN_ID_LENGTH_JOBKEY);
+      // job id occurs after run id and seperator
+      int offset = Constants.RUN_ID_LENGTH_JOBKEY + Constants.SEP_BYTES.length;
+      extraComponents[1] = extractJobId(offset, remainder);
+
+      // now extract any additional stuff after the job id
+      if (extraComponents[1] != null) {
+        offset += extraComponents[1].length;
+      }
+      offset += Constants.SEP_BYTES.length;
+      extraComponents[2] = extractRemainder(offset, remainder);
 
       int extraSize = 0;
       // figure out the full size of all splits
-      for (int i=0; i < extraComponents.length; i++) {
+      for (int i = 0; i < extraComponents.length; i++) {
         if (extraComponents[i] != null) {
           extraSize++;
         } else {
@@ -139,4 +144,47 @@ static byte[][] splitJobKey(byte[] rawKey) {
     }
     return splits;
   }
+
+  private static byte[] extractRemainder(int offset, byte[] remainder) {
+    return ByteUtil.safeCopy(remainder, offset,
+      remainder.length - offset);
+  }
+
+  private static int getLengthJobIdPackedBytes(int offset, byte[] remainder) {
+
+    int lengthRest = remainder.length - offset ;
+    byte[] jobIdOtherStuff = null;
+    if (lengthRest > offset) {
+      jobIdOtherStuff = ByteUtil.safeCopy(remainder, offset,
+        remainder.length-offset);
+      if (StringUtils.startsWith(Bytes.toString(jobIdOtherStuff), Constants.FRAMEWORK_CONF_SPARK_VALUE)) {
+        return Constants.SPARK_JOB_KEY_LENGTH;
+      }
+    }
+    return (Constants.SEQUENCE_NUM_LENGTH_JOBKEY + Constants.RUN_ID_LENGTH_JOBKEY);
+  }
+
+  /**
+   * extracts the job id from the packged byte array array looks like encodedRunid!jobid!otherstuff
+   * @param remainder
+   * @return
+   */
+  private static byte[] extractJobId(int offset, byte[] remainder) {
+    // since remainder contains runid ! jobid ! possibly other stuff
+    // the offset for reading job is:
+    //      8 bytes for run id
+    //       +
+    //      bytes for separator field
+    int lengthJobId = getLengthJobIdPackedBytes(offset, remainder);
+
+    return ByteUtil.safeCopy(remainder, offset, lengthJobId);
+  }
+
+  /**
+   * extracts a long number representation of encoded run id it reads 8 bytes
+   * @param remainder
+   */
+  private static byte[] extractRunId(byte[] remainder, int lengthToRead) {
+    return ByteUtil.safeCopy(remainder, 0, lengthToRead);
+  }
 }
diff --git a/hraven-core/src/main/java/com/twitter/hraven/rest/ObjectMapperProvider.java b/hraven-core/src/main/java/com/twitter/hraven/rest/ObjectMapperProvider.java
index 5a4b65f..aa0763e 100644
--- a/hraven-core/src/main/java/com/twitter/hraven/rest/ObjectMapperProvider.java
+++ b/hraven-core/src/main/java/com/twitter/hraven/rest/ObjectMapperProvider.java
@@ -259,13 +259,13 @@ public void serialize(Flow aFlow, JsonGenerator jsonGenerator,
           jsonGenerator.writeNumber(aFlow.getRunId());
           jsonGenerator.writeFieldName("version");
           jsonGenerator.writeString(aFlow.getVersion());
-          jsonGenerator.writeFieldName("hadoopVersion");
+          jsonGenerator.writeFieldName("historyFileType");
           /**
            *  unlikely that the next line with .toString 
            *  will throw NPE since Flow class always sets
            *  default hadoop version in Flow#addJob
            */
-          jsonGenerator.writeString(aFlow.getHadoopVersion().toString());
+          jsonGenerator.writeString(aFlow.getHistoryFileType().toString());
           jsonGenerator.writeFieldName(Constants.HRAVEN_QUEUE);
           jsonGenerator.writeString(aFlow.getQueue());
           jsonGenerator.writeFieldName("counters");
diff --git a/hraven-core/src/test/java/com/twitter/hraven/GenerateFlowTestData.java b/hraven-core/src/test/java/com/twitter/hraven/GenerateFlowTestData.java
index 2fae24f..c7f5f06 100644
--- a/hraven-core/src/test/java/com/twitter/hraven/GenerateFlowTestData.java
+++ b/hraven-core/src/test/java/com/twitter/hraven/GenerateFlowTestData.java
@@ -74,7 +74,7 @@ public void loadFlow(String cluster, String user, String app, long runId,
           Bytes.toBytes("SUCCESS"));
       p.add(Constants.INFO_FAM_BYTES, Constants.VERSION_COLUMN_BYTES, Bytes.toBytes(version));
       p.add(Constants.INFO_FAM_BYTES, JobHistoryKeys.KEYS_TO_BYTES.get(JobHistoryKeys.hadoopversion),
-        Bytes.toBytes(HadoopVersion.ONE.toString()));
+        Bytes.toBytes(HistoryFileType.ONE.toString()));
       p.add(Constants.INFO_FAM_BYTES, JobHistoryKeys.KEYS_TO_BYTES.get(JobHistoryKeys.TOTAL_MAPS),
           Bytes.toBytes(baseStats));
       p.add(Constants.INFO_FAM_BYTES, JobHistoryKeys.KEYS_TO_BYTES.get(JobHistoryKeys.TOTAL_REDUCES),
diff --git a/hraven-core/src/test/java/com/twitter/hraven/TestFramework.java b/hraven-core/src/test/java/com/twitter/hraven/TestFramework.java
index 503a43d..239db0d 100644
--- a/hraven-core/src/test/java/com/twitter/hraven/TestFramework.java
+++ b/hraven-core/src/test/java/com/twitter/hraven/TestFramework.java
@@ -18,6 +18,7 @@
 import static com.twitter.hraven.Framework.NONE;
 import static com.twitter.hraven.Framework.PIG;
 import static com.twitter.hraven.Framework.SCALDING;
+import static com.twitter.hraven.Framework.SPARK;
 import static junit.framework.Assert.assertEquals;
 import static junit.framework.Assert.assertNotNull;
 import static junit.framework.Assert.assertTrue;
@@ -39,6 +40,7 @@ public void testGetCode() {
     assertEquals(PIG, Framework.get(PIG.getCode()));
     assertEquals(SCALDING, Framework.get(SCALDING.getCode()));
     assertEquals(NONE, Framework.get(NONE.getCode()));
+    assertEquals(SPARK,Framework.get(SPARK.getCode()));
   }
 
   /**
@@ -49,10 +51,11 @@ public void getDescription() {
     assertNotNull(PIG.getDescription());
     assertNotNull(SCALDING.getDescription());
     assertNotNull(NONE.getDescription());
-    
+    assertNotNull(SPARK.getDescription());
     assertTrue("Description is not expected to be empty", PIG.getDescription().length() > 0);
     assertTrue("Description is not expected to be empty", SCALDING.getDescription().length() > 0);
     assertTrue("Description is not expected to be empty", NONE.getDescription().length() > 0);
+    assertTrue("Description is not expected to be empty", SPARK.getDescription().length() > 0);
   }
   
   
diff --git a/hraven-core/src/test/java/com/twitter/hraven/TestHadoopVersion.java b/hraven-core/src/test/java/com/twitter/hraven/TestHistoryFileType.java
similarity index 62%
rename from hraven-core/src/test/java/com/twitter/hraven/TestHadoopVersion.java
rename to hraven-core/src/test/java/com/twitter/hraven/TestHistoryFileType.java
index 4cf2ce4..9f62bbb 100644
--- a/hraven-core/src/test/java/com/twitter/hraven/TestHadoopVersion.java
+++ b/hraven-core/src/test/java/com/twitter/hraven/TestHistoryFileType.java
@@ -23,24 +23,24 @@
 import org.junit.Test;
 
 /**
- * test class for hadoop versions
+ * test class for hadoop history file type
  */
-public class TestHadoopVersion {
+public class TestHistoryFileType {
 
-  private enum ExpVersions {
-    ONE, TWO
+  private enum ExpHistoryFileType {
+    ONE, TWO, SPARK
   }
 
   @Test
-  public void checkVersions() {
-    assertEquals(ExpVersions.values().length, HadoopVersion.values().length);
-    for (HadoopVersion hv : HadoopVersion.values()) {
-      assertTrue(ExpVersions.valueOf(hv.toString()) != null);
+  public void checkHistoryFileType() {
+    assertEquals(ExpHistoryFileType.values().length, HistoryFileType.values().length);
+    for (HistoryFileType hv : HistoryFileType.values()) {
+      assertTrue(ExpHistoryFileType.valueOf(hv.toString()) != null);
     }
   }
 
   @Test(expected=IllegalArgumentException.class)
-  public void testNonExistentVersion() {
-    assertNull(HadoopVersion.valueOf("DOES NOT EXIST"));
+  public void testNonExistentHistoryFileType() {
+    assertNull(HistoryFileType.valueOf("DOES NOT EXIST"));
   }
 };
diff --git a/hraven-core/src/test/java/com/twitter/hraven/TestJobDescFactory.java b/hraven-core/src/test/java/com/twitter/hraven/TestJobDescFactory.java
index 7313d21..3dd5a36 100644
--- a/hraven-core/src/test/java/com/twitter/hraven/TestJobDescFactory.java
+++ b/hraven-core/src/test/java/com/twitter/hraven/TestJobDescFactory.java
@@ -52,4 +52,26 @@ public void testCluster() {
     result = JobDescFactory.getCluster(c);
     assertNull(result);  
   }
+
+  @Test
+  public void testGetFramework() {
+    Configuration c = new Configuration();
+    Framework f = JobDescFactory.getFramework(c);
+    assertEquals(Framework.NONE, f);
+
+    // test spark
+    c.set(Constants.FRAMEWORK_CONF_KEY, Constants.FRAMEWORK_CONF_SPARK_VALUE);
+    f = JobDescFactory.getFramework(c);
+    assertEquals(Framework.SPARK, f);
+
+    // test scalding
+    c.set(Constants.CASCADING_FLOW_ID_CONF_KEY, "some value");
+    f = JobDescFactory.getFramework(c);
+    assertEquals(Framework.SCALDING, f);
+
+    // test pig
+    c.set(Constants.PIG_CONF_KEY," some random value");
+    f = JobDescFactory.getFramework(c);
+    assertEquals(Framework.PIG, f);
+  }
 }
diff --git a/hraven-core/src/test/java/com/twitter/hraven/TestJobDetails.java b/hraven-core/src/test/java/com/twitter/hraven/TestJobDetails.java
index 0718a7a..51d20dd 100644
--- a/hraven-core/src/test/java/com/twitter/hraven/TestJobDetails.java
+++ b/hraven-core/src/test/java/com/twitter/hraven/TestJobDetails.java
@@ -181,7 +181,7 @@ private void addInTotalCounters(NavigableMap<byte[], byte[]> infoValues) {
     infoValues.put(c4, value);
   }
 
-  private NavigableMap<byte[], byte[]> getInfoValues(HadoopVersion hv) {
+  private NavigableMap<byte[], byte[]> getInfoValues(HistoryFileType hv) {
     NavigableMap<byte[], byte[]> infoValues = new TreeMap<byte[], byte[]>(Bytes.BYTES_COMPARATOR);
 
     infoValues.put(JobHistoryKeys.KEYS_TO_BYTES.get(JobHistoryKeys.JOBID),
@@ -321,8 +321,8 @@ private void confirmHadoop1Counters(JobDetails jd) {
           .getValue());
   }
 
-  private void confirmHadoopVersion(JobDetails jd, HadoopVersion hv) {
-    assertEquals(hv, jd.getHadoopVersion());
+  private void confirmHistoryFileType(JobDetails jd, HistoryFileType hv) {
+    assertEquals(hv, jd.getHistoryFileType());
   }
 
   /**
@@ -331,7 +331,7 @@ private void confirmHadoopVersion(JobDetails jd, HadoopVersion hv) {
   @Test
   public void testPopulateForHadoop2() {
     JobDetails jd = new JobDetails(null);
-    NavigableMap<byte[], byte[]> infoValues = getInfoValues(HadoopVersion.TWO);
+    NavigableMap<byte[], byte[]> infoValues = getInfoValues(HistoryFileType.TWO);
 
     Result result = Mockito.mock(Result.class);
     when(result.getFamilyMap(Constants.INFO_FAM_BYTES)).thenReturn(infoValues);
@@ -339,7 +339,7 @@ public void testPopulateForHadoop2() {
     jd.populate(result);
 
     // confirm hadoop2 first
-    confirmHadoopVersion(jd, HadoopVersion.TWO);
+    confirmHistoryFileType(jd, HistoryFileType.TWO);
     // confirm job details
     confirmSomeJobDeets(jd);
     // confirm hadoop2 counters
@@ -449,7 +449,7 @@ public void testGetCounterValueAsLong() {
   @Test
   public void testPopulateForHadoop1() {
     JobDetails jd = new JobDetails(null);
-    NavigableMap<byte[], byte[]> infoValues = getInfoValues(HadoopVersion.ONE);
+    NavigableMap<byte[], byte[]> infoValues = getInfoValues(HistoryFileType.ONE);
 
     Result result1 = mock(Result.class);
     when(result1.getFamilyMap(Constants.INFO_FAM_BYTES)).thenReturn(infoValues);
@@ -457,7 +457,7 @@ public void testPopulateForHadoop1() {
     jd.populate(result1);
 
     // confirm hadoop 1
-    confirmHadoopVersion(jd, HadoopVersion.ONE);
+    confirmHistoryFileType(jd, HistoryFileType.ONE);
     // confirm hadoop1 counters
     confirmHadoop1Counters(jd);
     // confirm job details
diff --git a/hraven-core/src/test/java/com/twitter/hraven/TestJobId.java b/hraven-core/src/test/java/com/twitter/hraven/TestJobId.java
index 670e838..fb990bc 100644
--- a/hraven-core/src/test/java/com/twitter/hraven/TestJobId.java
+++ b/hraven-core/src/test/java/com/twitter/hraven/TestJobId.java
@@ -39,6 +39,9 @@ public void testSerialization() {
     String job2 = "job_"+epoch+"_1111";
     String job3 = "job_"+epoch+"_2222";
     String job4 = "job_"+epoch+"_11111";
+    String job5 = "spark_"+epoch+"_11112";
+    String job6 = "spark_"+epoch+"_11113";
+    String job7 = "spark_1413515656084_305185";
 
     JobId jobId1 = new JobId(job1);
     assertEquals(epoch, jobId1.getJobEpoch());
@@ -66,6 +69,23 @@ public void testSerialization() {
     // check Comparable implementation
     assertTrue(jobId3.compareTo(jobId4) < 0);
 
+    JobId jobId5 = new JobId(job5);
+    assertEquals(epoch, jobId5.getJobEpoch());
+    assertEquals(11112L, jobId5.getJobSequence());
+    assertEquals(job5, jobId5.getJobIdString());
+
+    JobId jobId6 = new JobId(job6);
+    assertEquals(epoch, jobId6.getJobEpoch());
+    assertEquals(11113L, jobId6.getJobSequence());
+    assertEquals(job6, jobId6.getJobIdString());
+    // check Comparable implementation
+    assertTrue(jobId5.compareTo(jobId6) < 0);
+
+    JobId jobId7 = new JobId(job7);
+    assertEquals(1413515656084L, jobId7.getJobEpoch());
+    assertEquals(305185L, jobId7.getJobSequence());
+    assertEquals(job7, jobId7.getJobIdString());
+
     JobIdConverter conv = new JobIdConverter();
     JobId tmp = conv.fromBytes( conv.toBytes(jobId1) );
     assertEquals(jobId1, tmp);
@@ -80,6 +100,15 @@ public void testSerialization() {
     tmp = conv.fromBytes( conv.toBytes(jobId4) );
     assertEquals(jobId4, tmp);
     assertEquals(jobId4.hashCode(), tmp.hashCode());
+    tmp = conv.fromBytes( conv.toBytes(jobId5) );
+    assertEquals(jobId5, tmp);
+    assertEquals(jobId5.hashCode(), tmp.hashCode());
+    tmp = conv.fromBytes( conv.toBytes(jobId6) );
+    assertEquals(jobId6, tmp);
+    assertEquals(jobId6.hashCode(), tmp.hashCode());
+    tmp = conv.fromBytes( conv.toBytes(jobId7) );
+    assertEquals(jobId7, tmp);
+    assertEquals(jobId7.hashCode(), tmp.hashCode());
   }
 
   /**
@@ -93,12 +122,16 @@ public void testJobIdOrdering() {
     String job3 = "job_20120101000000_2222";
     String job4 = "job_20120101000000_11111";
     String job5 = "job_20120201000000_0001";
+    String job6 = "spark_20120101000000_11111";
+    String job7 = "spark_20120201000000_0001";
 
     JobId jobId1 = new JobId(job1);
     JobId jobId2 = new JobId(job2);
     JobId jobId3 = new JobId(job3);
     JobId jobId4 = new JobId(job4);
     JobId jobId5 = new JobId(job5);
+    JobId jobId6 = new JobId(job6);
+    JobId jobId7 = new JobId(job7);
 
     JobIdConverter conv = new JobIdConverter();
     byte[] job1Bytes = conv.toBytes(jobId1);
@@ -106,10 +139,13 @@ public void testJobIdOrdering() {
     byte[] job3Bytes = conv.toBytes(jobId3);
     byte[] job4Bytes = conv.toBytes(jobId4);
     byte[] job5Bytes = conv.toBytes(jobId5);
+    byte[] job6Bytes = conv.toBytes(jobId6);
+    byte[] job7Bytes = conv.toBytes(jobId7);
 
     assertTrue(Bytes.compareTo(job1Bytes, job2Bytes) < 0);
     assertTrue(Bytes.compareTo(job2Bytes, job3Bytes) < 0);
     assertTrue(Bytes.compareTo(job3Bytes, job4Bytes) < 0);
     assertTrue(Bytes.compareTo(job4Bytes, job5Bytes) < 0);
+    assertTrue(Bytes.compareTo(job6Bytes, job7Bytes) < 0);
   }
 }
diff --git a/hraven-core/src/test/java/com/twitter/hraven/TestJobKey.java b/hraven-core/src/test/java/com/twitter/hraven/TestJobKey.java
index 5f18288..b28207c 100644
--- a/hraven-core/src/test/java/com/twitter/hraven/TestJobKey.java
+++ b/hraven-core/src/test/java/com/twitter/hraven/TestJobKey.java
@@ -66,11 +66,9 @@ public void testKeySerialization() {
     assertEquals(key.getJobId(), key2.getJobId());
 
     // key with no trailing job Id
-    keyBytes = ByteUtil.join(Constants.SEP_BYTES,
-        Bytes.toBytes("c1@local"),
-        Bytes.toBytes("user1"),
-        Bytes.toBytes("app1"),
-        Bytes.toBytes(Long.MAX_VALUE-15L));
+    keyBytes =
+        ByteUtil.join(Constants.SEP_BYTES, Bytes.toBytes("c1@local"), Bytes.toBytes("user1"),
+          Bytes.toBytes("app1"), Bytes.toBytes(Long.MAX_VALUE - 15L));
     key2 = conv.fromBytes(keyBytes);
     assertEquals("c1@local", key2.getCluster());
     assertEquals("user1", key2.getUserName());
@@ -79,6 +77,7 @@ public void testKeySerialization() {
     assertEquals(0L, key2.getJobId().getJobEpoch());
     assertEquals(0L, key2.getJobId().getJobSequence());
 
+
     // key with empty appId
     key = new JobKey("c1@local", "user1", "", 1234L, "job_201206201718_1941");
     keyBytes = conv.toBytes(key);
@@ -86,6 +85,83 @@ public void testKeySerialization() {
     assertKey(key, key2);
   }
 
+  /**
+   * Confirm that we can properly serialize and deserialize
+   * a JobKey for spark jobs
+   * Important since "spark" prefix will be stored in keys
+   */
+  @Test
+  public void testKeySerializationSpark() {
+    JobKeyConverter conv = new JobKeyConverter();
+    JobKey key = new JobKey("clusterS1@identifierS1",
+      "userS1",
+      "appSpark1",
+      13,
+      "spark_1413515656084_3051855");
+    byte[] keyBytes = conv.toBytes(key);
+    JobKey key2 = conv.fromBytes(keyBytes);
+    assertEquals(key.getCluster(), key2.getCluster());
+    assertEquals(key.getUserName(), key2.getUserName());
+    assertEquals(key.getAppId(), key2.getAppId());
+    assertEquals(key.getRunId(), key2.getRunId());
+    assertEquals(key.getJobId(), key2.getJobId());
+
+    // also verify that the runId gets inverted in the serialized byte
+    // representation
+    byte[][] keyParts = ByteUtil.split(keyBytes, Constants.SEP_BYTES);
+    assertEquals(5, keyParts.length);
+    long encodedRunId = Bytes.toLong(keyParts[3]);
+    assertEquals(key.getRunId(), Long.MAX_VALUE - encodedRunId);
+
+    // key with empty appId
+    key = new JobKey("c1@local", "user1", "", 1234L, "spark_1413515656084_3051855");
+    keyBytes = conv.toBytes(key);
+    key2 = conv.fromBytes(keyBytes);
+    assertKey(key, key2);
+
+    JobKey jobKey = new JobKey("cluster1",
+      "user1",
+      "com.example.spark_program.simple_example.Main$",
+      13,
+      // this job id has the seperator in its byte representation
+      // hence check for this id
+      "spark_1413515656084_305185");
+    keyBytes = conv.toBytes(jobKey);
+    key2 = conv.fromBytes(keyBytes);
+    assertEquals(jobKey.getCluster(), key2.getCluster());
+    assertEquals(jobKey.getUserName(), key2.getUserName());
+    assertEquals(jobKey.getAppId(), key2.getAppId());
+    assertEquals(jobKey.getRunId(), key2.getRunId());
+    assertEquals(jobKey.getJobId(), key2.getJobId());
+
+    }
+
+  @Test
+  public void checkOldFormatKey() {
+    // key with old format job id bytes stored, i.e. without the "job_" prefix
+    long epoch = 1415332804102L;
+    long seq = 10223L;
+    byte[] jobidarr = ByteUtil.join(Constants.EMPTY_BYTES,
+                Bytes.toBytes(epoch),
+                Bytes.toBytes(seq));
+    byte[] keyBytes = ByteUtil.join(Constants.SEP_BYTES,
+          Bytes.toBytes("c1@local"),
+          Bytes.toBytes("user1"),
+          Bytes.toBytes("app1"),
+          Bytes.toBytes(Long.MAX_VALUE - 15L),
+          jobidarr);
+    JobKeyConverter conv = new JobKeyConverter();
+
+    JobKey key2 = conv.fromBytes(keyBytes);
+    assertEquals("c1@local", key2.getCluster());
+    assertEquals("user1", key2.getUserName());
+    assertEquals("app1", key2.getAppId());
+    assertEquals(15L, key2.getRunId());
+    assertEquals(epoch, key2.getJobId().getJobEpoch());
+    assertEquals(seq, key2.getJobId().getJobSequence());
+
+  }
+
   public void assertKey(JobKey expected, JobKey actual) {
     assertEquals(expected.getCluster(), actual.getCluster());
     assertEquals(expected.getUserName(), actual.getUserName());
@@ -115,6 +191,23 @@ public void testPlainConstructor() {
     assertEquals("appSpace", key.getAppId());
     assertEquals(17, key.getRunId());
     assertEquals("job_20120101235959_1111", key.getJobId().getJobIdString());
+
+    // test for spark job keys
+    key = new JobKey("clusterS2@identifierS2 ", "userS2 ", "appSpaceS ", 17,
+        " spark_20120101235959_1111 ");
+    keyBytes = conv.toBytes(key);
+    key2 = conv.fromBytes(keyBytes);
+    assertEquals(key.getUserName(), key2.getUserName());
+    assertEquals(key.getAppId(), key2.getAppId());
+    assertEquals(key.getRunId(), key2.getRunId());
+    assertEquals(key.getJobId(), key2.getJobId());
+
+    assertEquals("clusterS2@identifierS2", key.getCluster());
+    assertEquals("userS2", key.getUserName());
+    assertEquals("appSpaceS", key.getAppId());
+    assertEquals(17, key.getRunId());
+    assertEquals("spark_20120101235959_1111", key.getJobId().getJobIdString());
+
   }
 
   /**
@@ -140,6 +233,34 @@ public void testJobDescConstructor() {
     assertEquals("job_20120101235959_1111", key.getJobId().getJobIdString());
   }
 
+  /**
+   * Confirm that leading and trailing spaces get ripped off for
+   * spark job keys as well
+   */
+  @Test
+  public void testJobDescConstructorSparkJobKeys() {
+    JobKeyConverter conv = new JobKeyConverter();
+    JobDesc jobDesc = new JobDesc("cluster2@identifier3 ",
+        "user3Spark ",
+        "appSpaceSpark     ",
+        "spaceVersion3 ", 19,
+        "      spark_1413515656084_3051855 ",
+        Framework.SPARK);
+    JobKey key = new JobKey(jobDesc);
+    byte[] keyBytes = conv.toBytes(key);
+    JobKey key2 = conv.fromBytes(keyBytes);
+    assertEquals(key.getUserName(), key2.getUserName());
+    assertEquals(key.getAppId(), key2.getAppId());
+    assertEquals(key.getRunId(), key2.getRunId());
+    assertEquals(key.getJobId(), key2.getJobId());
+
+    assertEquals("user3Spark", key.getUserName());
+    assertEquals("cluster2@identifier3", key.getCluster());
+    assertEquals("appSpaceSpark", key.getAppId());
+    assertEquals(19, key.getRunId());
+    assertEquals("spark_1413515656084_3051855", key.getJobId().getJobIdString());
+  }
+
   /**
    * Checks for correct parsing of job key when run ID may contain the byte
    * representation of the separator character.
@@ -157,18 +278,90 @@ public void testEncodedRunId() {
 
     // assemble a job key with the bad run ID
     JobIdConverter idConv = new JobIdConverter();
+    byte[] encodedKey = ByteUtil.join(Constants.SEP_BYTES,
+      Bytes.toBytes("c1S@local"),
+      Bytes.toBytes("userS31"),
+      Bytes.toBytes("spark_app1"),
+      encoded,
+      idConv.toBytes(new JobId("spark_1413515656084_51855")));
+
+    JobKey key = conv.fromBytes(encodedKey);
+    assertEquals("c1S@local", key.getCluster());
+    assertEquals("userS31", key.getUserName());
+    assertEquals("spark_app1", key.getAppId());
+    assertEquals("spark_1413515656084_51855", key.getJobId().getJobIdString());
+
+    // assemble a spark job key with the bad run ID
+    encodedKey = ByteUtil.join(Constants.SEP_BYTES,
+      Bytes.toBytes("c1S@local"),
+      Bytes.toBytes("userS31"),
+      Bytes.toBytes("spark_app1"),
+      encoded,
+      idConv.toBytes(new JobId("spark_1413515656084_51855")));
+
+    key = conv.fromBytes(encodedKey);
+    assertEquals("c1S@local", key.getCluster());
+    assertEquals("userS31", key.getUserName());
+    assertEquals("spark_app1", key.getAppId());
+    assertEquals("spark_1413515656084_51855", key.getJobId().getJobIdString());
+
+  }
+
+  @Test
+  public void testEncodedRunIdDifferentFormats() {
+    JobKeyConverter conv = new JobKeyConverter();
+    long now = System.currentTimeMillis();
+    byte[] encoded = Bytes.toBytes(Long.MAX_VALUE - now);
+    // replace last byte with separator and reconvert to long
+    Bytes.putBytes(encoded, encoded.length-Constants.SEP_BYTES.length,
+        Constants.SEP_BYTES, 0, Constants.SEP_BYTES.length);
+    long badId = Long.MAX_VALUE - Bytes.toLong(encoded);
+    LOG.info("Bad run ID is "+badId);
+
+    // create a packed byte array for
+    // jobid without the "job_" prefix
+    long epoch = 1415332804102L;
+    long seq = 10223L;
+    byte[] jobidarr = ByteUtil.join(Constants.EMPTY_BYTES,
+                Bytes.toBytes(epoch),
+                Bytes.toBytes(seq));
+
+    // assemble a job key with the bad run ID
     byte[] encodedKey = ByteUtil.join(Constants.SEP_BYTES,
         Bytes.toBytes("c1@local"),
         Bytes.toBytes("user1"),
         Bytes.toBytes("app1"),
         encoded,
-        idConv.toBytes(new JobId("job_20120101000000_0001")));
+        jobidarr);
 
     JobKey key = conv.fromBytes(encodedKey);
     assertEquals("c1@local", key.getCluster());
     assertEquals("user1", key.getUserName());
     assertEquals("app1", key.getAppId());
-    assertEquals("job_20120101000000_0001", key.getJobId().getJobIdString());
+    assertEquals("job_1415332804102_10223", key.getJobId().getJobIdString());
+
+    // create a packed byte array for
+    // jobid with "spark" prefix
+    epoch = 1415332804102L;
+    seq = 10223L;
+    jobidarr = ByteUtil.join(Constants.EMPTY_BYTES,
+                Bytes.toBytes("spark"),
+                Bytes.toBytes(epoch),
+                Bytes.toBytes(seq));
+
+    // assemble a job key with the bad run ID
+    encodedKey = ByteUtil.join(Constants.SEP_BYTES,
+        Bytes.toBytes("sc1@local"),
+        Bytes.toBytes("spark_user1"),
+        Bytes.toBytes("sparkApp1"),
+        encoded,
+        jobidarr);
+
+    key = conv.fromBytes(encodedKey);
+    assertEquals("sc1@local", key.getCluster());
+    assertEquals("spark_user1", key.getUserName());
+    assertEquals("sparkApp1", key.getAppId());
+    assertEquals("spark_1415332804102_10223", key.getJobId().getJobIdString());
   }
 
   @Test
diff --git a/hraven-core/src/test/java/com/twitter/hraven/TestJsonSerde.java b/hraven-core/src/test/java/com/twitter/hraven/TestJsonSerde.java
index 6ebb41d..5b6f029 100644
--- a/hraven-core/src/test/java/com/twitter/hraven/TestJsonSerde.java
+++ b/hraven-core/src/test/java/com/twitter/hraven/TestJsonSerde.java
@@ -200,7 +200,7 @@ private void assertFlowEquals(Flow flow1, Flow flow2) {
     assertEquals(flow1.getTotalMaps(), flow2.getTotalMaps());
     assertEquals(flow1.getTotalReduces(), flow2.getTotalReduces());
     assertEquals(flow1.getVersion(), flow2.getVersion());
-    assertEquals(flow1.getHadoopVersion(), flow2.getHadoopVersion());
+    assertEquals(flow1.getHistoryFileType(), flow2.getHistoryFileType());
     assertEquals(flow1.getUserName(), flow2.getUserName());
     assertJobListEquals(flow1.getJobs(), flow2.getJobs());
   }
@@ -222,7 +222,7 @@ private void assertJobListEquals( List<JobDetails> job1, List<JobDetails> job2)
       assertEquals(job1.get(j).getMapSlotMillis(), job2.get(j).getMapSlotMillis());
       assertEquals(job1.get(j).getReduceSlotMillis(), job2.get(j).getReduceSlotMillis());
       assertEquals(job1.get(j).getMegabyteMillis(), job2.get(j).getMegabyteMillis());
-      assertEquals(job1.get(j).getHadoopVersion(), job2.get(j).getHadoopVersion());
+      assertEquals(job1.get(j).getHistoryFileType(), job2.get(j).getHistoryFileType());
       assertEquals(job1.get(j).getUser(), job2.get(j).getUser());
     }
   }
diff --git a/hraven-core/src/test/java/com/twitter/hraven/TestSparkJobDescFactory.java b/hraven-core/src/test/java/com/twitter/hraven/TestSparkJobDescFactory.java
new file mode 100644
index 0000000..b68bd47
--- /dev/null
+++ b/hraven-core/src/test/java/com/twitter/hraven/TestSparkJobDescFactory.java
@@ -0,0 +1,33 @@
+package com.twitter.hraven;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+public class TestSparkJobDescFactory {
+
+  @Test
+  public void testCreation() {
+    String username = "testuser";
+    String jobName1 = "sample spark job";
+
+    Configuration c = new Configuration();
+    c.set("mapreduce.job.name", jobName1);
+    c.set("mapreduce.job.user.name", username);
+
+    SparkJobDescFactory factory = new SparkJobDescFactory();
+    QualifiedJobId jobId = new QualifiedJobId("test@local", "spark_201206010000_0001");
+    long runId = 1415407334L;
+    JobDesc desc = factory.create(jobId, runId, c);
+    assertNotNull(desc);
+    assertEquals(jobId, desc.getQualifiedJobId());
+    
+    assertEquals("spark_201206010000_0001", desc.getJobId());
+    assertEquals(username, desc.getUserName());
+    assertEquals(jobName1, desc.getAppId());
+    assertEquals("", desc.getVersion());
+    assertEquals(Framework.SPARK, desc.getFramework());
+    assertEquals(runId, desc.getRunId());
+  }
+}
diff --git a/hraven-core/src/test/java/com/twitter/hraven/datasource/TestJobHistoryService.java b/hraven-core/src/test/java/com/twitter/hraven/datasource/TestJobHistoryService.java
index 1c8905e..56bb4c8 100644
--- a/hraven-core/src/test/java/com/twitter/hraven/datasource/TestJobHistoryService.java
+++ b/hraven-core/src/test/java/com/twitter/hraven/datasource/TestJobHistoryService.java
@@ -38,7 +38,7 @@
 import com.twitter.hraven.Constants;
 import com.twitter.hraven.Flow;
 import com.twitter.hraven.GenerateFlowTestData;
-import com.twitter.hraven.HadoopVersion;
+import com.twitter.hraven.HistoryFileType;
 import com.twitter.hraven.JobDetails;
 import com.twitter.hraven.JobKey;
 import com.twitter.hraven.datasource.JobHistoryByIdService;
@@ -217,7 +217,7 @@ public void testGetJobByJobID() throws Exception {
   }
 
   @SuppressWarnings("deprecation")
-  private void checkSomeFlowStats(String version, HadoopVersion hv, int numJobs, long baseStats, List<Flow> flowSeries) {
+  private void checkSomeFlowStats(String version, HistoryFileType hv, int numJobs, long baseStats, List<Flow> flowSeries) {
     assertNotNull(flowSeries);
     for ( Flow f : flowSeries ){
       assertEquals( numJobs, f.getJobCount());
@@ -232,7 +232,7 @@ private void checkSomeFlowStats(String version, HadoopVersion hv, int numJobs, l
       assertEquals( numJobs * baseStats , f.getReduceShuffleBytes());
       assertEquals( numJobs * baseStats , f.getReduceSlotMillis());
       assertEquals( version , f.getVersion());
-      assertEquals( hv, f.getHadoopVersion());
+      assertEquals( hv, f.getHistoryFileType());
       assertEquals( numJobs * baseStats , f.getMegabyteMillis());
       assertEquals( numJobs * 1000, f.getDuration());
       assertEquals( f.getDuration() + GenerateFlowTestData.SUBMIT_LAUCH_DIFF, f.getWallClockTime());
@@ -258,10 +258,10 @@ public void testGetFlowTimeSeriesStats() throws Exception {
     try {
       // fetch back the entire flow stats
       List<Flow> flowSeries = service.getFlowTimeSeriesStats("c1@local", "buser", "AppOne", "", 0L, 0L, 1000, null);
-      checkSomeFlowStats("a", HadoopVersion.ONE, numJobsAppOne, baseStats, flowSeries);
+      checkSomeFlowStats("a", HistoryFileType.ONE, numJobsAppOne, baseStats, flowSeries);
 
       flowSeries = service.getFlowTimeSeriesStats("c1@local", "buser", "AppTwo", "", 0L, 0L, 1000, null);
-      checkSomeFlowStats("b", HadoopVersion.ONE, numJobsAppTwo, baseStats, flowSeries);
+      checkSomeFlowStats("b", HistoryFileType.ONE, numJobsAppTwo, baseStats, flowSeries);
 
     } finally {
       service.close();
diff --git a/hraven-etl/pom.xml b/hraven-etl/pom.xml
index 9c29444..20e4795 100644
--- a/hraven-etl/pom.xml
+++ b/hraven-etl/pom.xml
@@ -21,12 +21,12 @@
   <parent>
     <groupId>com.twitter.hraven</groupId>
     <artifactId>hraven</artifactId>
-    <version>0.9.17-SNAPSHOT</version>
+    <version>0.9.17.t01-SNAPSHOT</version>
     <relativePath>../</relativePath>
   </parent>
   <groupId>com.twitter.hraven</groupId>
   <artifactId>hraven-etl</artifactId>
-  <version>0.9.17-SNAPSHOT</version>
+  <version>0.9.17.t01-SNAPSHOT</version>
   <name>hRaven - etl</name>
   <packaging>jar</packaging>
   <description>ETL map reduce jobs and supporting components for data loading</description>
@@ -162,13 +162,13 @@
     <dependency>
       <groupId>com.twitter.hraven</groupId>
       <artifactId>hraven-core</artifactId>
-      <version>0.9.17-SNAPSHOT</version>
+      <version>0.9.17.t01-SNAPSHOT</version>
     </dependency>
 
     <dependency>
       <groupId>com.twitter.hraven</groupId>
       <artifactId>hraven-core</artifactId>
-      <version>0.9.17-SNAPSHOT</version>
+      <version>0.9.17.t01-SNAPSHOT</version>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
diff --git a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFilePreprocessor.java b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFilePreprocessor.java
index b5b3c0c..021ede1 100644
--- a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFilePreprocessor.java
+++ b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFilePreprocessor.java
@@ -276,14 +276,16 @@ public int run(String[] args) throws Exception {
      * Reference:
      * {@link https://github.com/twitter/hraven/issues/59}
      */
-    String maxFileSizeStr = commandLine.getOptionValue("s");
-    LOG.info("maxFileSize=" + maxFileSizeStr);
     long maxFileSize = DEFAULT_RAW_FILE_SIZE_LIMIT;
-    try {
-      maxFileSize = Long.parseLong(maxFileSizeStr);
-    } catch (NumberFormatException nfe) {
-      throw new ProcessingException("Caught NumberFormatException during conversion "
+    if (commandLine.hasOption("s")) {
+      String maxFileSizeStr = commandLine.getOptionValue("s");
+      LOG.info("maxFileSize=" + maxFileSizeStr);
+      try {
+        maxFileSize = Long.parseLong(maxFileSizeStr);
+      } catch (NumberFormatException nfe) {
+        throw new ProcessingException("Caught NumberFormatException during conversion "
             + " of maxFileSize to long", nfe);
+      }
     }
 
     ProcessRecordService processRecordService = new ProcessRecordService(
diff --git a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFileProcessor.java b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFileProcessor.java
index 1c4ea97..a8add22 100644
--- a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFileProcessor.java
+++ b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFileProcessor.java
@@ -40,7 +40,6 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.filecache.DistributedCache;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.client.Scan;
@@ -254,7 +253,7 @@ public int run(String[] args) throws Exception {
     Path hdfsPath = new Path(costFilePath + Constants.COST_PROPERTIES_FILENAME);
     // add to distributed cache
     DistributedCache.addCacheFile(hdfsPath.toUri(), hbaseConf);
-    
+
     // Grab the machine type argument
     String machineType = commandLine.getOptionValue("m");
     // set it as part of conf so that the
diff --git a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserBase.java b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserBase.java
index 83e3f42..a0e24a7 100644
--- a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserBase.java
+++ b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserBase.java
@@ -23,7 +23,7 @@
 import org.apache.hadoop.hbase.util.Bytes;
 
 import com.twitter.hraven.Constants;
-import com.twitter.hraven.HadoopVersion;
+import com.twitter.hraven.HistoryFileType;
 import com.twitter.hraven.JobHistoryKeys;
 import com.twitter.hraven.datasource.ProcessingException;
 import com.twitter.hraven.util.ByteUtil;
@@ -32,7 +32,7 @@
  *  Abstract class for job history file parsing 
  *  
  *  Implements the interface for history file parsing
- *  Adds the implementation for a getHadoopVersionPut function
+ *  Adds the implementation for a getHistoryFileTypePut function
  *  Other methods to be implemented for parsing by sub classes
  * 
  */
@@ -54,7 +54,7 @@ protected JobHistoryFileParserBase(Configuration conf) {
 	 * 
 	 * @return Put
 	 */
-	public Put getHadoopVersionPut(HadoopVersion historyFileVersion, byte[] jobKeyBytes) {
+	public Put getHistoryFileTypePut(HistoryFileType historyFileVersion, byte[] jobKeyBytes) {
 	  Put pVersion = new Put(jobKeyBytes);
 	  byte[] valueBytes = null;
 	  valueBytes = Bytes.toBytes(historyFileVersion.toString());
@@ -139,8 +139,7 @@ public static Long getXmxValue(String javaChildOptsStr) {
         retVal /= (1024 * 1024);
       }
     } catch (NumberFormatException nfe) {
-      LOG.error("Unable to get the Xmx value from " + javaChildOptsStr);
-      nfe.printStackTrace();
+      LOG.error("Unable to get the Xmx value from " + javaChildOptsStr, nfe);
       throw new ProcessingException("Unable to get the Xmx value from " + javaChildOptsStr, nfe);
     }
     return retVal;
@@ -160,16 +159,19 @@ public static Long getXmxTotal(final long xmx75) {
    * @return the job submit time in milliseconds since January 1, 1970 UTC;
    *         or 0 if no value can be found.
    */
-  public static long getSubmitTimeMillisFromJobHistory(byte[] jobHistoryRaw) {
+  public static long getSubmitTimeMillisFromJobHistory(HistoryFileType historyFileType,
+      byte[] jobHistoryRaw) {
+
+    if (historyFileType == null) {
+      throw new IllegalArgumentException("History file type can't be null ");
+    }
 
     long submitTimeMillis = 0;
     if (null == jobHistoryRaw) {
       return submitTimeMillis;
     }
 
-    HadoopVersion hv = JobHistoryFileParserFactory.getVersion(jobHistoryRaw);
-
-    switch (hv) {
+    switch (historyFileType) {
     case TWO:
       // look for the job submitted event, since that has the job submit time
       int startIndex = ByteUtil.indexOf(jobHistoryRaw, Constants.JOB_SUBMIT_EVENT_BYTES, 0);
@@ -185,7 +187,8 @@ public static long getSubmitTimeMillisFromJobHistory(byte[] jobHistoryRaw) {
           try {
             submitTimeMillis = Long.parseLong(submitTimeMillisString);
           } catch (NumberFormatException nfe) {
-            LOG.error(" caught NFE during conversion of submit time " + submitTimeMillisString + " " + nfe.getMessage());
+            LOG.error(" caught NFE during conversion of submit time "
+                + submitTimeMillisString, nfe);
             submitTimeMillis = 0;
           }
         }
@@ -217,13 +220,32 @@ public static long getSubmitTimeMillisFromJobHistory(byte[] jobHistoryRaw) {
           try {
             submitTimeMillis = Long.parseLong(submitTimeMillisString);
           } catch (NumberFormatException nfe) {
-            LOG.error(" caught NFE during conversion of submit time " + submitTimeMillisString
-                + " " + nfe.getMessage());
+            LOG.error(" caught NFE during conversion of submit time "
+                + submitTimeMillisString, nfe);
            submitTimeMillis = 0;
           }
         }
       }
       break;
+
+    case SPARK:
+      // look for the spark submit time key
+      startIndex = ByteUtil.indexOf(jobHistoryRaw,
+          Constants.SPARK_SUBMIT_TIME_BYTES, 0);
+      if (startIndex != -1) {
+        // read the string that contains the unix timestamp
+        String submitTimeMillisString = Bytes.toString(jobHistoryRaw,
+              startIndex + Constants.SPARK_SUBMIT_TIME_BYTES.length,
+              Constants.EPOCH_TIMESTAMP_STRING_LENGTH);
+        try {
+          submitTimeMillis = Long.parseLong(submitTimeMillisString);
+        } catch (NumberFormatException nfe) {
+          LOG.error(" caught NFE during conversion of submit time "
+              + submitTimeMillisString + " ", nfe);
+          submitTimeMillis = 0;
+        }
+      }
+      break;
     }
 
     return submitTimeMillis;
diff --git a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserFactory.java b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserFactory.java
index adbb744..ec03660 100644
--- a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserFactory.java
+++ b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserFactory.java
@@ -17,7 +17,10 @@
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
-import com.twitter.hraven.HadoopVersion;
+
+import com.twitter.hraven.Constants;
+import com.twitter.hraven.HistoryFileType;
+import com.twitter.hraven.QualifiedJobId;
 
 /**
  * Deal with {@link JobHistoryFileParser} implementations.
@@ -49,24 +52,37 @@ public class JobHistoryFileParserFactory {
    * 
    * @throws IllegalArgumentException if neither match
    */
-  public static HadoopVersion getVersion(byte[] historyFileContents) {
+  public static HistoryFileType getHistoryFileType(QualifiedJobId qualifiedJobId,
+      byte[] historyFileContents) {
+    if (historyFileContents == null) {
+      // throw an exception if it's null
+      throw new IllegalArgumentException("Null job history file");
+    }
+
+    // an easy check to see if file type is spark
+    if ((qualifiedJobId != null) && Constants.FRAMEWORK_CONF_SPARK_VALUE
+        .equalsIgnoreCase(qualifiedJobId.getJobPrefix())) {
+      return HistoryFileType.SPARK;
+    }
+
     if(historyFileContents.length > HADOOP2_VERSION_LENGTH) {
       // the first 10 bytes in a hadoop2.0 history file contain Avro-Json
       String version2Part =  new String(historyFileContents, 0, HADOOP2_VERSION_LENGTH);
       if (StringUtils.equalsIgnoreCase(version2Part, HADOOP2_VERSION_STRING)) {
-        return HadoopVersion.TWO;
+        return HistoryFileType.TWO;
       } else {
         if(historyFileContents.length > HADOOP1_VERSION_LENGTH) {
           // the first 18 bytes in a hadoop1.0 history file contain Meta VERSION="1" .
-          String version1Part =  new String(historyFileContents, 0, HADOOP1_VERSION_LENGTH);
+          String version1Part =  new String(historyFileContents, 0,HADOOP1_VERSION_LENGTH);
           if (StringUtils.equalsIgnoreCase(version1Part, HADOOP1_VERSION_STRING)) {
-            return HadoopVersion.ONE;
+            return HistoryFileType.ONE;
           }
         }
       }
     }
     // throw an exception if we did not find any matching version
-    throw new IllegalArgumentException(" Unknown format of job history file: " + historyFileContents);
+    throw new IllegalArgumentException(" Unknown format of job history file: "
+        + historyFileContents);
   }
 
   /**
@@ -80,22 +96,25 @@ public static HadoopVersion getVersion(byte[] historyFileContents) {
    * Or return null if either input is null
    */
   public static JobHistoryFileParser createJobHistoryFileParser(
-      byte[] historyFileContents, Configuration jobConf) throws IllegalArgumentException {
+      byte[] historyFileContents, Configuration jobConf,
+      HistoryFileType historyFileType)
+          throws IllegalArgumentException {
 
     if (historyFileContents == null) {
       throw new IllegalArgumentException(
           "Job history contents should not be null");
     }
 
-    HadoopVersion version = getVersion(historyFileContents);
-
-    switch (version) {
+    switch (historyFileType) {
     case ONE:
       return new JobHistoryFileParserHadoop1(jobConf);
 
     case TWO:
       return new JobHistoryFileParserHadoop2(jobConf);
 
+    case SPARK:
+      return new JobHistoryFileParserSpark(jobConf);
+
     default:
       throw new IllegalArgumentException(
           " Unknown format of job history file ");
@@ -103,16 +122,16 @@ public static JobHistoryFileParser createJobHistoryFileParser(
   }
 
   /**
-   * @return HISTORY_FILE_VERSION1
+   * @return HISTORY FILE TYPE VERSION1
    */
-  public static HadoopVersion getHistoryFileVersion1() {
-    return HadoopVersion.ONE;
+  public static HistoryFileType getHistoryFileVersion1() {
+    return HistoryFileType.ONE;
   }
 
   /**
-   * @return HISTORY_FILE_VERSION2
+   * @return HISTORY FILE TYPE VERSION2
    */
-  public static HadoopVersion getHistoryFileVersion2() {
-    return HadoopVersion.TWO;
+  public static HistoryFileType getHistoryFileVersion2() {
+    return HistoryFileType.TWO;
   }
 }
diff --git a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserHadoop1.java b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserHadoop1.java
index 8955175..6436164 100644
--- a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserHadoop1.java
+++ b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserHadoop1.java
@@ -56,9 +56,9 @@ public void parse(byte[] historyFile, JobKey jobKey) throws ProcessingException
 			jobHistoryListener = new JobHistoryListener(jobKey);
 			JobHistoryCopy.parseHistoryFromIS(new ByteArrayInputStream(historyFile), jobHistoryListener);
 			// set the hadoop version for this record
-			Put versionPut = getHadoopVersionPut(JobHistoryFileParserFactory.getHistoryFileVersion1(), 
+			Put versionPut = getHistoryFileTypePut(JobHistoryFileParserFactory.getHistoryFileVersion1(), 
 			  jobHistoryListener.getJobKeyBytes());
-			jobHistoryListener.includeHadoopVersionPut(versionPut);
+			jobHistoryListener.includeHistoryFileTypePut(versionPut);
 		} catch (IOException ioe) {
 			LOG.error(" Exception during parsing hadoop 1.0 file ", ioe);
 			throw new ProcessingException(
diff --git a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserHadoop2.java b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserHadoop2.java
index 822f6c3..9799e0e 100644
--- a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserHadoop2.java
+++ b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserHadoop2.java
@@ -298,7 +298,7 @@ public void parse(byte[] historyFileContents, JobKey jobKey)
     this.jobPuts.add(jobStatusPut);
 
     // set the hadoop version for this record
-    Put versionPut = getHadoopVersionPut(JobHistoryFileParserFactory.getHistoryFileVersion2(), this.jobKeyBytes);
+    Put versionPut = getHistoryFileTypePut(JobHistoryFileParserFactory.getHistoryFileVersion2(), this.jobKeyBytes);
     this.jobPuts.add(versionPut);
 
     LOG.info("For " + this.jobKey + " #jobPuts " + jobPuts.size() + " #taskPuts: "
diff --git a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserSpark.java b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserSpark.java
new file mode 100644
index 0000000..dc74d7c
--- /dev/null
+++ b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserSpark.java
@@ -0,0 +1,158 @@
+/*
+Copyright 2014 Twitter, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+ */
+package com.twitter.hraven.etl;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.codehaus.jackson.JsonNode;
+import org.codehaus.jackson.JsonProcessingException;
+import org.codehaus.jackson.map.ObjectMapper;
+
+import com.twitter.hraven.Constants;
+import com.twitter.hraven.HistoryFileType;
+import com.twitter.hraven.JobHistoryKeys;
+import com.twitter.hraven.JobKey;
+import com.twitter.hraven.datasource.JobKeyConverter;
+import com.twitter.hraven.datasource.ProcessingException;
+import com.twitter.hraven.util.ByteUtil;
+
+public class JobHistoryFileParserSpark extends JobHistoryFileParserBase {
+
+  private List<Put> jobPuts = new LinkedList<Put>();
+  private List<Put> taskPuts = new LinkedList<Put>();
+  private JobKeyConverter jobKeyConv = new JobKeyConverter();
+  private long megabytemillis;
+  private static final Log LOG = LogFactory.getLog(JobHistoryFileParserSpark.class);
+
+  public JobHistoryFileParserSpark(Configuration jobConf) {
+    super(jobConf);
+  }
+
+  @Override
+  public void parse(byte[] historyFile, JobKey jobKey) {
+    byte[] jobKeyBytes = jobKeyConv.toBytes(jobKey);
+    Put sparkJobPuts = new Put(jobKeyBytes);
+
+    ObjectMapper objectMapper = new ObjectMapper();
+    try {
+      JsonNode rootNode = objectMapper.readTree(new ByteArrayInputStream(historyFile));
+      String key;
+      byte[] qualifier;
+      byte[] valueBytes;
+
+     Iterator<Map.Entry<String, JsonNode>> fieldsIterator = rootNode.getFields();
+     while (fieldsIterator.hasNext()) {
+        Map.Entry<String, JsonNode> field = fieldsIterator.next();
+        // job history keys are in upper case
+        key = field.getKey().toUpperCase();
+        try {
+          if (JobHistoryKeys.valueOf(key) != null) {
+            qualifier = Bytes.toBytes(key.toString().toLowerCase());
+            Class<?> clazz = JobHistoryKeys.KEY_TYPES.get(JobHistoryKeys.valueOf(key));
+            if (clazz == null) {
+              throw new IllegalArgumentException(" unknown key " + key
+                  + " encountered while parsing " + key);
+            }
+            if (Integer.class.equals(clazz)) {
+              try {
+                valueBytes = Bytes.toBytes(field.getValue().getIntValue());
+              } catch (NumberFormatException nfe) {
+                // us a default value
+                valueBytes = Constants.ZERO_INT_BYTES;
+              }
+            } else if (Long.class.equals(clazz)) {
+              try {
+                valueBytes = Bytes.toBytes(field.getValue().getLongValue());
+              } catch (NumberFormatException nfe) {
+                // us a default value
+                valueBytes = Constants.ZERO_LONG_BYTES;
+              }
+            } else {
+              // keep the string representation by default
+              valueBytes = Bytes.toBytes(field.getValue().getTextValue());
+            }
+          } else {
+            // simply store the key value as is
+            qualifier = Bytes.toBytes(key.toLowerCase());
+            valueBytes = Bytes.toBytes(field.getValue().getTextValue());
+          }
+        } catch (IllegalArgumentException iae) {
+          // job history key does not exist, so
+          // it could be 'queue' or 'megabytemillis' or 'batch.desc' field
+          // store them as per hadoop1/hadoop2 compatible storage
+          // if none fo those,
+          // store the key value as is
+          if (StringUtils.equals(key.toLowerCase(), Constants.HRAVEN_QUEUE)) {
+            // queue is stored as part of job conf
+            qualifier =
+                ByteUtil.join(Constants.SEP_BYTES, Constants.JOB_CONF_COLUMN_PREFIX_BYTES,
+                  Constants.HRAVEN_QUEUE_BYTES);
+            valueBytes = Bytes.toBytes(field.getValue().getTextValue());
+
+          } else if (StringUtils.equals(key.toLowerCase(), Constants.MEGABYTEMILLIS)) {
+            qualifier = Constants.MEGABYTEMILLIS_BYTES;
+            this.megabytemillis = field.getValue().getLongValue();
+            valueBytes = Bytes.toBytes(this.megabytemillis);
+          } else {
+            // simply store the key as is
+            qualifier = Bytes.toBytes(key.toLowerCase());
+            valueBytes = Bytes.toBytes(field.getValue().getTextValue());
+          }
+        }
+        sparkJobPuts.add(Constants.INFO_FAM_BYTES, qualifier, valueBytes);
+      }
+    } catch (JsonProcessingException e) {
+      throw new ProcessingException("Caught exception during spark history parsing ", e);
+    } catch (IOException e) {
+      throw new ProcessingException("Caught exception during spark history parsing ", e);
+    }
+    this.jobPuts.add(sparkJobPuts);
+    // set the history file type for this record
+    Put historyFileTypePut = getHistoryFileTypePut(HistoryFileType.SPARK, jobKeyBytes);
+    this.jobPuts.add(historyFileTypePut);
+
+    LOG.info("For " + this.jobKeyConv.fromBytes(jobKeyBytes) + " #jobPuts " + jobPuts.size()
+        + " #taskPuts: " + taskPuts.size());
+
+  }
+
+  @Override
+  public Long getMegaByteMillis() {
+    return this.megabytemillis;
+  }
+
+  @Override
+  public List<Put> getJobPuts() {
+    return this.jobPuts;
+  }
+
+  @Override
+  public List<Put> getTaskPuts() {
+    return this.taskPuts;
+  }
+
+}
diff --git a/hraven-etl/src/main/java/com/twitter/hraven/mapreduce/JobFileTableMapper.java b/hraven-etl/src/main/java/com/twitter/hraven/mapreduce/JobFileTableMapper.java
index eff49d9..deec46e 100644
--- a/hraven-etl/src/main/java/com/twitter/hraven/mapreduce/JobFileTableMapper.java
+++ b/hraven-etl/src/main/java/com/twitter/hraven/mapreduce/JobFileTableMapper.java
@@ -37,6 +37,7 @@
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapreduce.Mapper;
 import com.twitter.hraven.Constants;
+import com.twitter.hraven.HistoryFileType;
 import com.twitter.hraven.JobDesc;
 import com.twitter.hraven.JobDescFactory;
 import com.twitter.hraven.JobKey;
@@ -137,8 +138,11 @@ protected void map(
       context.progress();
 
       byte[] jobhistoryraw = rawService.getJobHistoryRawFromResult(value);
-      long submitTimeMillis = JobHistoryFileParserBase.getSubmitTimeMillisFromJobHistory(
-            jobhistoryraw);
+      // find out if it's a hadoop1 or hadoop2 file or a spark history file
+      HistoryFileType historyFileType = JobHistoryFileParserFactory
+          .getHistoryFileType(qualifiedJobId, jobhistoryraw);
+      long submitTimeMillis = JobHistoryFileParserBase
+              .getSubmitTimeMillisFromJobHistory(historyFileType, jobhistoryraw);
       context.progress();
       // explicitly setting the byte array to null to free up memory
       jobhistoryraw = null;
@@ -162,7 +166,8 @@ protected void map(
 
       // TODO: remove sysout
       String msg = "JobDesc (" + keyCount + "): " + jobDesc
-          + " submitTimeMillis: " + submitTimeMillis;
+          + " submitTimeMillis: " + submitTimeMillis
+          + " historyFileType=" + historyFileType;
       LOG.info(msg);
 
       List<Put> puts = JobHistoryService.getHbasePuts(jobDesc, jobConf);
@@ -203,7 +208,8 @@ protected void map(
         historyFileContents = keyValue.getValue();
       }
       JobHistoryFileParser historyFileParser = JobHistoryFileParserFactory
-    		  .createJobHistoryFileParser(historyFileContents, jobConf);
+                .createJobHistoryFileParser(historyFileContents, jobConf,
+                      historyFileType);
 
       historyFileParser.parse(historyFileContents, jobKey);
       context.progress();
diff --git a/hraven-etl/src/main/java/com/twitter/hraven/mapreduce/JobHistoryListener.java b/hraven-etl/src/main/java/com/twitter/hraven/mapreduce/JobHistoryListener.java
index 320e05e..a0c5783 100644
--- a/hraven-etl/src/main/java/com/twitter/hraven/mapreduce/JobHistoryListener.java
+++ b/hraven-etl/src/main/java/com/twitter/hraven/mapreduce/JobHistoryListener.java
@@ -124,7 +124,7 @@ private void handleJob(Map<JobHistoryKeys, String> values) {
    * @param pVersion
    * @throws IllegalArgumentException if put is null
    */
-  public void includeHadoopVersionPut(Put pVersion) {
+  public void includeHistoryFileTypePut(Put pVersion) {
 	  // set the hadoop version for this record
 	  if (pVersion != null) {
 		  this.jobPuts.add(pVersion);
diff --git a/hraven-etl/src/test/java/com/twitter/hraven/TestJobFile.java b/hraven-etl/src/test/java/com/twitter/hraven/TestJobFile.java
index ba17bcf..2030c97 100644
--- a/hraven-etl/src/test/java/com/twitter/hraven/TestJobFile.java
+++ b/hraven-etl/src/test/java/com/twitter/hraven/TestJobFile.java
@@ -45,6 +45,8 @@ public class TestJobFile {
   /** 2.0 job history file name */
   final static String VALID_JOB_HISTORY_FILENAME5 =
       "job_1374258111572_0003-1374260622449-userName1-TeraGen-1374260635219-2-0-SUCCEEDED-default.jhist";
+  final static String VALID_SPARK_HISTORY_FILENAME =
+      "spark_1413515656084_3051855.json";
 
   final static String INVALID_JOB_FILENAME = "jabbedabbedoo.txt";
 
@@ -107,6 +109,15 @@ public void testJobConfFile() {
      */
     assertEquals("job_1374258111572_0003", jobFile.getJobid());
 
+    jobFile = new JobFile(VALID_SPARK_HISTORY_FILENAME);
+    assertFalse("This should not be a valid job conf file", jobFile.isJobConfFile());
+    assertTrue("this should be a spark job history file", jobFile.isJobHistoryFile());
+    /*
+     * confirm that the job id was parsed correctly. Note that the history filename is a 2.0 job
+     * history file name
+     */
+    assertEquals("spark_1413515656084_3051855", jobFile.getJobid());
+
   }
 
 }
diff --git a/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobFilePreprocessor.java b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobFilePreprocessor.java
new file mode 100644
index 0000000..03cd567
--- /dev/null
+++ b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobFilePreprocessor.java
@@ -0,0 +1,111 @@
+package com.twitter.hraven.etl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import com.twitter.hraven.datasource.HRavenTestUtil;
+
+public class TestJobFilePreprocessor {
+  private static HBaseTestingUtility UTIL;
+
+  @BeforeClass
+  public static void setupBeforeClass() throws Exception {
+    UTIL = new HBaseTestingUtility();
+    UTIL.startMiniCluster();
+    HRavenTestUtil.createSchema(UTIL);
+  }
+
+
+  @Test
+  public void testSparkProcessRecordCreation() throws IOException{
+
+    final String procDir = "spark_processing_dir";
+    final String inputDir = "spark_input_dir";
+    final String cluster = "cluster1";
+
+    FileSystem hdfs = FileSystem.get(UTIL.getConfiguration());
+    Path inputPath = new Path(inputDir);
+    boolean os = hdfs.mkdirs(inputPath);
+    assertTrue(os);
+    assertTrue(hdfs.exists(inputPath));
+
+    Path procPath = new Path(procDir);
+    os = hdfs.mkdirs(procPath);
+    assertTrue(os);
+    assertTrue(hdfs.exists(procPath));
+    hdfs.getFileStatus(procPath);
+    
+    final String SPARK_JOB_HISTORY_FILE_NAME =
+        "src/test/resources/spark_1413515656084_3051855";
+    File jobHistoryfile = new File(SPARK_JOB_HISTORY_FILE_NAME);
+    Path srcPath = new Path(jobHistoryfile.toURI());
+    hdfs.copyFromLocalFile(srcPath, inputPath);
+    Path expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
+    assertTrue(hdfs.exists(expPath));
+
+    final String JOB_CONF_FILE_NAME =
+        "src/test/resources/spark_1413515656084_3051855_conf.xml";
+    File jobConfFile = new File(JOB_CONF_FILE_NAME);
+    srcPath = new Path(jobConfFile.toURI());
+    hdfs.copyFromLocalFile(srcPath, inputPath);
+    expPath = new Path(inputPath.toUri() + "/" + srcPath.getName());
+    assertTrue(hdfs.exists(expPath));
+
+    JobFilePreprocessor jfpp = new JobFilePreprocessor(UTIL.getConfiguration());
+    String args[] = new String[3];
+    args[0] = "-o" + procDir;
+    args[1] = "-i" + inputDir;
+    args[2] = "-c" + cluster;
+    try {
+      // list the files in processing dir and assert that it's empty
+      FileStatus[] fs = hdfs.listStatus(procPath);
+      assertEquals(0, fs.length);
+      jfpp.run(args);
+      // now check if the seq file exists
+      // can't check for exact filename since it includes the current timestamp
+      fs = hdfs.listStatus(procPath);
+      assertEquals(1, fs.length);
+      // ensure that hbase table contains the process record
+      ProcessRecordService processRecordService = new ProcessRecordService(
+        UTIL.getConfiguration());
+      ProcessRecord pr = processRecordService.getLastSuccessfulProcessRecord(cluster);
+      assertNotNull(pr);
+      assertEquals(pr.getMaxJobId(), pr.getMinJobId());
+      assertEquals("spark_1413515656084_3051855", pr.getMaxJobId());
+      ProcessRecordKey pk = pr.getKey();
+      assertNotNull(pk);
+      assertEquals(cluster, pk.getCluster());
+      assertEquals(2, pr.getProcessedJobFiles());
+      assertEquals(ProcessState.PREPROCESSED, pr.getProcessState());
+      assertEquals(fs[0].getPath().getParent().getName()
+        + "/" + fs[0].getPath().getName(), pr.getProcessFile());
+
+      // run raw loader as well
+      args = new String[2];
+      args[0] = "-p" + procDir;
+      args[1] = "-c" + cluster;
+      JobFileRawLoader jr = new JobFileRawLoader(UTIL.getConfiguration());
+      assertNotNull(jr);
+      ToolRunner.run(jr, args);
+      pr = processRecordService.getLastSuccessfulProcessRecord(cluster);
+      assertEquals(ProcessState.LOADED, pr.getProcessState());
+
+    }
+      catch (Exception e) {
+      e.printStackTrace();
+    }
+  }
+  
+}
diff --git a/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserBase.java b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserBase.java
index 3201409..79b5d41 100644
--- a/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserBase.java
+++ b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserBase.java
@@ -23,6 +23,7 @@
 
 import com.google.common.io.Files;
 import com.twitter.hraven.Constants;
+import com.twitter.hraven.HistoryFileType;
 import com.twitter.hraven.datasource.ProcessingException;
 
 public class TestJobHistoryFileParserBase {
@@ -128,7 +129,8 @@ public void testGetSubmitTimeMillisFromJobHistory2() throws IOException {
     // hadoop2 file
     File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
     byte[] contents = Files.toByteArray(jobHistoryfile);
-    long actualts = JobHistoryFileParserBase.getSubmitTimeMillisFromJobHistory(contents);
+    long actualts = JobHistoryFileParserBase
+        .getSubmitTimeMillisFromJobHistory(HistoryFileType.TWO, contents);
     long expts = 1329348443227L;
     assertEquals(expts, actualts);
 
@@ -137,7 +139,8 @@ public void testGetSubmitTimeMillisFromJobHistory2() throws IOException {
         "src/test/resources/job_1329348432999_0003-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
     jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
     contents = Files.toByteArray(jobHistoryfile);
-    actualts = JobHistoryFileParserBase.getSubmitTimeMillisFromJobHistory(contents);
+    actualts = JobHistoryFileParserBase
+        .getSubmitTimeMillisFromJobHistory(HistoryFileType.TWO, contents);
     expts = 1328218696000L;
     assertEquals(expts, actualts);
 
@@ -146,7 +149,8 @@ public void testGetSubmitTimeMillisFromJobHistory2() throws IOException {
         "src/test/resources/job_201311192236_3583_1386370578196_user1_Sleep+job";
     jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
     contents = Files.toByteArray(jobHistoryfile);
-    actualts = JobHistoryFileParserBase.getSubmitTimeMillisFromJobHistory(contents);
+    actualts = JobHistoryFileParserBase
+        .getSubmitTimeMillisFromJobHistory(HistoryFileType.ONE, contents);
     expts = 1386370578196L;
     assertEquals(expts, actualts);
   }
@@ -191,40 +195,80 @@ public void testGetSubmitTimeMillisFromJobHistory() {
 
     byte[] jobHistoryBytes = Bytes.toBytes(JOB_HISTORY);
     long submitTimeMillis = JobHistoryFileParserBase
-        .getSubmitTimeMillisFromJobHistory(jobHistoryBytes);
+        .getSubmitTimeMillisFromJobHistory(HistoryFileType.ONE, jobHistoryBytes);
     assertEquals(1339063492288L, submitTimeMillis);
 
     jobHistoryBytes = Bytes.toBytes(JOB_HISTORY2);
     submitTimeMillis = JobHistoryFileParserBase
-        .getSubmitTimeMillisFromJobHistory(jobHistoryBytes);
+        .getSubmitTimeMillisFromJobHistory(HistoryFileType.ONE, jobHistoryBytes);
     assertEquals(1339063492288L, submitTimeMillis);
 
     jobHistoryBytes = Bytes.toBytes(BAD_JOB_HISTORY);
     submitTimeMillis = JobHistoryFileParserBase
-        .getSubmitTimeMillisFromJobHistory(jobHistoryBytes);
+        .getSubmitTimeMillisFromJobHistory(HistoryFileType.ONE, jobHistoryBytes);
     assertEquals(0L, submitTimeMillis);
 
     jobHistoryBytes = Bytes.toBytes(BAD_JOB_HISTORY2);
     submitTimeMillis = JobHistoryFileParserBase
-        .getSubmitTimeMillisFromJobHistory(jobHistoryBytes);
+        .getSubmitTimeMillisFromJobHistory(HistoryFileType.ONE, jobHistoryBytes);
     assertEquals(0L, submitTimeMillis);
 
     jobHistoryBytes = Bytes.toBytes(BAD_JOB_HISTORY3);
     submitTimeMillis = JobHistoryFileParserBase
-        .getSubmitTimeMillisFromJobHistory(jobHistoryBytes);
+        .getSubmitTimeMillisFromJobHistory(HistoryFileType.ONE, jobHistoryBytes);
     assertEquals(0L, submitTimeMillis);
 
     jobHistoryBytes = Bytes.toBytes(BAD_JOB_HISTORY4);
     submitTimeMillis = JobHistoryFileParserBase
-        .getSubmitTimeMillisFromJobHistory(jobHistoryBytes);
+        .getSubmitTimeMillisFromJobHistory(HistoryFileType.ONE, jobHistoryBytes);
     assertEquals(0L, submitTimeMillis);
   }
 
+  /**
+   * Confirm that we can properly find the submit timestamp for Spark.
+   */
+  @Test
+  public void testGetSubmitTimeMillisFromJobHistorySpark() {
+    /**
+     * Normal example
+     */
+    final String JOB_HISTORY =
+        "{\"appname\":" + "\"com.example.spark_history.simple_example.Main$\","
+            + "\"appid\":\"spark_1412702189634_248930\","
+            + "\"username\":\"userName1\", "
+            + "\"submit_time\":1415146248235,"
+            + "\"start_time\":1415146253739,"
+            + "\"finish_time\":1415146273227,"
+            + "\"queue\":\"defaultQueueName\","
+            + "\"megabytemillis\":1528224768,"
+            + "\"job_status\":\"SUCCEEDED\","
+            + "\"batch.desc\":\"\"}";
+
+    final String BAD_JOB_HISTORY = "{\"appname\":"
+        + "\"com.example.spark_history.simple_example.Main$\","
+        + "\"appid\":\"spark_1412702189634_248930\","
+        + "\"username\":\"userName1\", ";
+
+    byte[] jobHistoryBytes = Bytes.toBytes(JOB_HISTORY);
+    long submitTimeMillis = JobHistoryFileParserBase
+        .getSubmitTimeMillisFromJobHistory(HistoryFileType.SPARK,
+          jobHistoryBytes);
+    assertEquals(1415146248235L, submitTimeMillis);
+
+    jobHistoryBytes = Bytes.toBytes(BAD_JOB_HISTORY);
+    submitTimeMillis = JobHistoryFileParserBase
+        .getSubmitTimeMillisFromJobHistory(HistoryFileType.SPARK,
+          jobHistoryBytes);
+    assertEquals(0L, submitTimeMillis);
+
+  }
+
   @Test(expected=IllegalArgumentException.class)
   public void testIncorrectSubmitTime() {
     // Now some cases where we should not be able to find any timestamp.
     byte[] jobHistoryBytes = Bytes.toBytes("");
-    JobHistoryFileParserBase.getSubmitTimeMillisFromJobHistory(jobHistoryBytes);
+    JobHistoryFileParserBase.getSubmitTimeMillisFromJobHistory(null,
+        jobHistoryBytes);
   }
 
   @Test
diff --git a/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserFactory.java b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserFactory.java
index 6afc95c..c812ecc 100644
--- a/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserFactory.java
+++ b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserFactory.java
@@ -21,7 +21,7 @@
 import static org.junit.Assert.assertTrue;
 import org.junit.Test;
 
-import com.twitter.hraven.HadoopVersion;
+import com.twitter.hraven.HistoryFileType;
 
 /**
  * Test {@link JobHistoryFileParserFactory}
@@ -35,7 +35,7 @@ public void testCreateJobHistoryFileParserCorrectCreation() {
 		String jHist = "Meta VERSION=\"1\" .\n"
 				+ "Job JOBID=\"job_201301010000_12345\"";
 		JobHistoryFileParser historyFileParser = JobHistoryFileParserFactory
-				.createJobHistoryFileParser(jHist.getBytes(), null);
+				.createJobHistoryFileParser(jHist.getBytes(), null, HistoryFileType.ONE);
 
 		assertNotNull(historyFileParser);
 
@@ -52,15 +52,17 @@ public void testCreateJobHistoryFileParserCorrectCreation() {
   @Test
   public void testGetVersion() {
     String jHist1 = "Meta VERSION=\"1\" .\n" + "Job JOBID=\"job_201301010000_12345\"";
-    HadoopVersion version1 = JobHistoryFileParserFactory.getVersion(jHist1.getBytes());
-    // confirm that we get back hadoop 1.0 version
+    HistoryFileType version1 = JobHistoryFileParserFactory.getHistoryFileType(null,
+      jHist1.getBytes());
+    // confirm that we get back hadoop 1.0 version as history file type
     assertEquals(JobHistoryFileParserFactory.getHistoryFileVersion1(), version1);
 
     String jHist2 = "Avro-Json\n"
             + "{\"type\":\"record\",\"name\":\"Event\", "
             + "\"namespace\":\"org.apache.hadoop.mapreduce.jobhistory\",\"fields\":[]\"";
-    HadoopVersion version2 = JobHistoryFileParserFactory.getVersion(jHist2.getBytes());
-    // confirm that we get back hadoop 2.0 version
+    HistoryFileType version2 = JobHistoryFileParserFactory.getHistoryFileType(null,
+      jHist2.getBytes());
+    // confirm that we get back hadoop 2.0 version as history file type
     assertEquals(JobHistoryFileParserFactory.getHistoryFileVersion2(), version2);
   }
 
@@ -68,20 +70,20 @@ public void testGetVersion() {
    * confirm that exception is thrown on incorrect input
    */
   @Test(expected = IllegalArgumentException.class)
-  public void testGetVersionIncorrect2() {
+  public void testGetHistoryFileTypeIncorrect2() {
     String jHist2 =
         "Avro-HELLO-Json\n" + "{\"type\":\"record\",\"name\":\"Event\", "
             + "\"namespace\":\"org.apache.hadoop.mapreduce.jobhistory\",\"fields\":[]\"";
-    JobHistoryFileParserFactory.getVersion(jHist2.getBytes());
+    JobHistoryFileParserFactory.getHistoryFileType(null, jHist2.getBytes());
   }
 
   /**
    * confirm that exception is thrown on incorrect input
    */
   @Test(expected = IllegalArgumentException.class)
-  public void testGetVersionIncorrect1() {
+  public void testGetHistoryFileTypeIncorrect1() {
     String jHist1 = "Meta HELLO VERSION=\"1\" .\n" + "Job JOBID=\"job_201301010000_12345\"";
-    JobHistoryFileParserFactory.getVersion(jHist1.getBytes());
+    JobHistoryFileParserFactory.getHistoryFileType(null, jHist1.getBytes());
   }
 
   /**
@@ -90,7 +92,7 @@ public void testGetVersionIncorrect1() {
 	@Test(expected = IllegalArgumentException.class)
 	public void testCreateJobHistoryFileParserNullCreation() {
 		JobHistoryFileParser historyFileParser = JobHistoryFileParserFactory
-				.createJobHistoryFileParser(null, null);
+				.createJobHistoryFileParser(null, null, null);
 		assertNull(historyFileParser);
 	}
 }
diff --git a/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserHadoop1.java b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserHadoop1.java
index 411b0c4..06cb6b6 100644
--- a/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserHadoop1.java
+++ b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserHadoop1.java
@@ -30,6 +30,7 @@
 
 import com.google.common.io.Files;
 import com.twitter.hraven.Constants;
+import com.twitter.hraven.HistoryFileType;
 import com.twitter.hraven.JobKey;
 
 public class TestJobHistoryFileParserHadoop1 {
@@ -55,7 +56,8 @@ public void testMegaByteMillis() throws IOException {
     jobConf.addResource(new Path(JOB_CONF_FILENAME));
 
     JobHistoryFileParser historyFileParser =
-        JobHistoryFileParserFactory.createJobHistoryFileParser(contents, jobConf);
+        JobHistoryFileParserFactory.createJobHistoryFileParser(contents, jobConf,
+          HistoryFileType.ONE);
     assertNotNull(historyFileParser);
 
     // confirm that we get back an object that can parse hadoop 1.0 files
diff --git a/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserHadoop2.java b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserHadoop2.java
index 486a186..891df27 100644
--- a/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserHadoop2.java
+++ b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserHadoop2.java
@@ -21,7 +21,7 @@
 import org.junit.Test;
 import com.google.common.io.Files;
 import com.twitter.hraven.Constants;
-import com.twitter.hraven.HadoopVersion;
+import com.twitter.hraven.HistoryFileType;
 import com.twitter.hraven.JobHistoryKeys;
 import com.twitter.hraven.JobKey;
 import com.twitter.hraven.datasource.JobKeyConverter;
@@ -55,7 +55,8 @@ public void testCreateJobHistoryFileParserCorrectCreation() throws IOException {
     jobConf.addResource(new Path(JOB_CONF_FILE_NAME));
 
     JobHistoryFileParser historyFileParser =
-        JobHistoryFileParserFactory.createJobHistoryFileParser(contents, jobConf);
+        JobHistoryFileParserFactory.createJobHistoryFileParser(contents, jobConf,
+          HistoryFileType.TWO);
     assertNotNull(historyFileParser);
 
     // confirm that we get back an object that can parse hadoop 2.0 files
@@ -87,7 +88,7 @@ public void testCreateJobHistoryFileParserCorrectCreation() throws IOException {
     	  for (KeyValue kv : lkv) {
     		// ensure we have a hadoop2 version as the value
     		assertEquals(Bytes.toString(kv.getValue()), 
-    				HadoopVersion.TWO.toString()); 
+    				HistoryFileType.TWO.toString()); 
 
     	    // ensure we don't see the same put twice
     	    assertFalse(foundVersion2);
@@ -180,7 +181,8 @@ public void testCreateJobHistoryFileParserNullConf() throws IOException {
     File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
     byte[] contents = Files.toByteArray(jobHistoryfile);
     JobHistoryFileParser historyFileParser =
-        JobHistoryFileParserFactory.createJobHistoryFileParser(contents, null);
+        JobHistoryFileParserFactory.createJobHistoryFileParser(contents, null,
+          HistoryFileType.TWO);
     assertNotNull(historyFileParser);
 
     // confirm that we get back an object that can parse hadoop 2.0 files
@@ -204,7 +206,8 @@ public void testMissingSlotMillis() throws IOException {
     jobConf.addResource(new Path(JOB_CONF_FILE_NAME));
 
     JobHistoryFileParser historyFileParser =
-        JobHistoryFileParserFactory.createJobHistoryFileParser(contents, jobConf);
+        JobHistoryFileParserFactory.createJobHistoryFileParser(contents, jobConf,
+          HistoryFileType.TWO);
     assertNotNull(historyFileParser);
 
     // confirm that we get back an object that can parse hadoop 2.0 files
@@ -269,7 +272,8 @@ public void testVersion2_4() throws IOException {
     jobConf.addResource(new Path(JOB_CONF_FILE_NAME));
 
     JobHistoryFileParser historyFileParser =
-        JobHistoryFileParserFactory.createJobHistoryFileParser(contents, jobConf);
+        JobHistoryFileParserFactory.createJobHistoryFileParser(contents, jobConf,
+          HistoryFileType.TWO);
     assertNotNull(historyFileParser);
 
     // confirm that we get back an object that can parse hadoop 2.x files
diff --git a/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserSpark.java b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserSpark.java
new file mode 100644
index 0000000..0e336e2
--- /dev/null
+++ b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserSpark.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2014 Twitter, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License. You may obtain a copy of the License
+ * at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in
+ * writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific
+ * language governing permissions and limitations under the License.
+ */
+package com.twitter.hraven.etl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Test;
+import com.google.common.io.Files;
+import com.twitter.hraven.Constants;
+import com.twitter.hraven.HistoryFileType;
+import com.twitter.hraven.JobHistoryKeys;
+import com.twitter.hraven.JobKey;
+import com.twitter.hraven.datasource.JobKeyConverter;
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Test {@link JobHistoryFileParserHadoop2}
+ */
+public class TestJobHistoryFileParserSpark {
+
+  @Test
+  public void testCreateJobHistoryFileParserCorrectCreation() throws IOException {
+
+    final String JOB_HISTORY_FILE_NAME =
+        "src/test/resources/spark_1413515656084_3051855";
+
+    File jobHistoryfile = new File(JOB_HISTORY_FILE_NAME);
+    byte[] contents = Files.toByteArray(jobHistoryfile);
+    // now load the conf file and check
+    final String JOB_CONF_FILE_NAME =
+        "src/test/resources/spark_1413515656084_3051855_conf.xml";
+    Configuration jobConf = new Configuration();
+    jobConf.addResource(new Path(JOB_CONF_FILE_NAME));
+
+    JobHistoryFileParser historyFileParser =
+        JobHistoryFileParserFactory.createJobHistoryFileParser(contents, jobConf,
+          HistoryFileType.SPARK);
+    assertNotNull(historyFileParser);
+
+    // confirm that we get back an object that can parse spark files
+    assertTrue(historyFileParser instanceof JobHistoryFileParserSpark);
+
+    JobKey jobKey = new JobKey("cluster1",
+      "user1",
+      "com.example.spark_program.simple_example.Main$",
+      1413515656084L,
+      "spark_1413515656084_305185");
+    historyFileParser.parse(contents, jobKey);
+
+    List<Put> jobPuts = historyFileParser.getJobPuts();
+    assertNotNull(jobPuts);
+    assertEquals(2, jobPuts.size());
+
+    JobKeyConverter jobKeyConv = new JobKeyConverter();
+    System.out.println(" rowkey " +jobPuts.get(0).getRow().toString());
+    assertEquals(jobKey.toString(), jobKeyConv.fromBytes(jobPuts.get(0).getRow()).toString());
+
+    // check history file type
+    boolean foundVersion2 = false;
+    for (Put p : jobPuts) {
+      List<KeyValue> kv2 = p.get(Constants.INFO_FAM_BYTES,
+          Bytes.toBytes(JobHistoryKeys.hadoopversion.toString()));
+      if (kv2.size() == 0) {
+        // we are interested in hadoop version put only
+        // hence continue
+          continue;
+      }
+      assertEquals(1, kv2.size());
+      Map<byte[], List<KeyValue>> d = p.getFamilyMap();
+      for (List<KeyValue> lkv : d.values()) {
+        for (KeyValue kv : lkv) {
+        // ensure we have a hadoop2 version as the value
+        assertEquals(Bytes.toString(kv.getValue()), 
+            HistoryFileType.SPARK.toString()); 
+
+          // ensure we don't see the same put twice
+          assertFalse(foundVersion2);
+        // now set this to true
+        foundVersion2 = true;
+        }
+       }
+    }
+    // ensure that we got the hadoop2 version put
+    assertTrue(foundVersion2);
+
+    // check job status
+    boolean foundJobStatus = false;
+    for (Put p : jobPuts) {
+      List<KeyValue> kv2 =
+          p.get(Constants.INFO_FAM_BYTES,
+            Bytes.toBytes(JobHistoryKeys.JOB_STATUS.toString().toLowerCase()));
+      if (kv2.size() == 0) {
+        // we are interested in JobStatus put only
+        // hence continue
+        continue;
+      }
+      assertEquals(1, kv2.size());
+
+      for (KeyValue kv : kv2) {
+        // ensure we have a job status value as the value
+        assertEquals(Bytes.toString(kv.getValue()),
+          JobHistoryFileParserHadoop2.JOB_STATUS_SUCCEEDED);
+
+        // ensure we don't see the same put twice
+        assertFalse(foundJobStatus);
+        // now set this to true
+        foundJobStatus = true;
+      }
+    }
+    // ensure that we got the JobStatus put
+    assertTrue(foundJobStatus);
+
+    List<Put> taskPuts = historyFileParser.getTaskPuts();
+    assertEquals(taskPuts.size(), 0);
+
+    // check post processing for megabytemillis
+    // first with empty job conf
+    Long mbMillis = historyFileParser.getMegaByteMillis();
+    assertNotNull(mbMillis);
+    Long expValue = 1528224768L;
+    assertEquals(expValue, mbMillis);
+  }
+}
diff --git a/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryListener.java b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryListener.java
index 90af968..6e74295 100644
--- a/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryListener.java
+++ b/hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryListener.java
@@ -31,7 +31,7 @@
 import com.twitter.hraven.Constants;
 import com.twitter.hraven.JobHistoryKeys;
 import com.twitter.hraven.JobKey;
-import com.twitter.hraven.HadoopVersion;
+import com.twitter.hraven.HistoryFileType;
 import com.twitter.hraven.mapreduce.JobHistoryListener;
 
 
@@ -54,10 +54,10 @@ public void checkHadoopVersionSet() {
 		
 		JobHistoryFileParserHadoop1 jh = new JobHistoryFileParserHadoop1(null);
 
-		Put versionPut = jh.getHadoopVersionPut(
-				HadoopVersion.ONE,
+		Put versionPut = jh.getHistoryFileTypePut(
+		  HistoryFileType.ONE,
 				jobHistoryListener.getJobKeyBytes());
-		jobHistoryListener.includeHadoopVersionPut(versionPut);
+		jobHistoryListener.includeHistoryFileTypePut(versionPut);
 		assertEquals(jobHistoryListener.getJobPuts().size(), 1);
 
 		// check hadoop version
@@ -76,7 +76,7 @@ public void checkHadoopVersionSet() {
 			for (KeyValue kv : lkv) {
 			  // ensure we have a hadoop2 version as the value
 			  assertEquals(Bytes.toString(kv.getValue()),
-						HadoopVersion.ONE.toString() );
+			    HistoryFileType.ONE.toString() );
  			  // ensure we don't see the same put twice
 			  assertFalse(foundVersion1);
 			  // now set this to true
diff --git a/hraven-etl/src/test/resources/spark_1413515656084_3051855 b/hraven-etl/src/test/resources/spark_1413515656084_3051855
new file mode 100644
index 0000000..43c47e6
--- /dev/null
+++ b/hraven-etl/src/test/resources/spark_1413515656084_3051855
@@ -0,0 +1,4 @@
+{
+  "jobname":"com.example.spark_program.simple_example.Main$","jobid":"spark_1412702189634_248930","user":"user1","submit_time":1415146248235,"start_time":1415146253739,"finish_time":1415146273227,"queue":"default","megabytemillis":1528224768,"job_status":"SUCCEEDED","batch.desc":""}
+}
+
diff --git a/hraven-etl/src/test/resources/spark_1413515656084_3051855_conf.xml b/hraven-etl/src/test/resources/spark_1413515656084_3051855_conf.xml
new file mode 100644
index 0000000..2231f9e
--- /dev/null
+++ b/hraven-etl/src/test/resources/spark_1413515656084_3051855_conf.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>mapreduce.job.name</name><value>Sleep job</value></property>
+<property><name>batch.desc</name><value>Any descriptive string that identifies this batch of jobs- hraven will group individual MR jobs by this name</value></property>
+<property><name>mapreduce.framework.name</name><value>spark</value></property>
+<property><name>mapreduce.job.user.name</name><value>SomeUserName</value></property>
+<property><name>mapreduce.job.queuename</name><value>someQueueName</value></property>
+<property><name>yarn.resourcemanager.address</name><value>0.0.0.0:8030</value></property>
+<property><name>yarn.app.mapreduce.am.command-opts</name><value>-Xmx500m</value></property>
+<property><name>yarn.nodemanager.process-kill-wait.ms</name><value>2000</value></property>
+<property><name>yarn.nodemanager.log-aggregation.compression-type</name><value>gz</value></property>
+<property><name>mapreduce.map.memory.mb</name><value>512</value></property>
+<property><name>mapreduce.reduce.memory.mb</name><value>512</value></property>
+<property><name>yarn.nodemanager.resource.memory-mb</name><value>8192</value></property>
+</configuration>
diff --git a/pom.xml b/pom.xml
index e15cd5e..464a486 100644
--- a/pom.xml
+++ b/pom.xml
@@ -16,7 +16,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>com.twitter.hraven</groupId>
   <artifactId>hraven</artifactId>
-  <version>0.9.17-SNAPSHOT</version>
+  <version>0.9.17.t01-SNAPSHOT</version>
   <name>hRaven Project</name>
   <url>https://github.com/twitter/hraven</url>
   <description>