Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

added readme, beefed up javadoc

  • Loading branch information...
commit 32ea83dac93a014b2042e159f8073f6b7a9229a8 1 parent 73320bf
mmcbride authored
View
74 README.md
@@ -0,0 +1,74 @@
+StreamyJ
+========
+
+StreamyJ is a Scala helper for the Jackson streaming JSON parser.
+Running some (likely entirely unreliable) benchmarks shows Jackson
+to be significantly faster than GSON, which seems to be the most
+popular non-streaming JSON parser available.
+
+StreamyJ makes it easy to write idiomatic Scala parsers using Jackson by
+* Converting JsonToken constants to case classes, allowing pattern matching
+* Providing a mechanism to provide partial functions to the parser to take action
+on specific parsed items.
+
+It's easiest to show with an example
+
+ val s = new Streamy("""{"bar":1}""")
+ s.obj {
+ case FieldName("bar") => println(s.readField())
+ }
+
+In this case, when the parser encounters a JsonToken.FIELD_NAME token, and the
+current field name is "bar", the println(s.readField()) method is invoked. Simple enough.
+
+A slightly more complex example
+
+ var baz = 0L
+ val s = new Streamy("""{"bar":{"baz":1}}""")
+ s.obj {
+ case FieldName("bar") => {
+ s.obj {
+ case FieldName("baz") => baz = s.readLongField()
+ }
+ }
+ }
+
+Here we have nested handlers. When the parser encounters the "bar" field, we tell it
+to read another object. In this nested object, when you see the baz field, assign our
+baz var whatever the value of the baz field is in JSON.
+
+Parsing arrays is also handled.
+
+ val s = new Streamy("""{"bar":[1,2,3]}""")
+ s.obj {
+ case FieldName("bar") => {
+ s.arr {
+ case ValueLong(l) => println(l)
+ }
+ }
+ }
+
+Parsing methods can also be defined and
+
+ val printfield:Streamy.ParseFunc = {
+ case FieldName(s) => println("hit field " + s)
+ }
+ val s = new Streamy("""{"bar":1, "baz":2}""")
+ s.obj(printfield)
+
+Lastly, if the helpers fail you always have access to the underlying Json reader,
+as well as the current Streamy token
+
+ val s = new Streamy("""{"bar": 1, "baz": {"baz2": 3}}""")
+ s.obj {
+ case FieldName("bar") => println("bar is " + s.readLongField())
+ case FieldName("baz") => {
+ println(s.next())
+ s.token match {
+ case StartObject => println("I'm at an object start")
+ case _ => println("I'm not at an object start")
+ }
+ s.readObject(s.eat)
+ }
+ }
+
View
144 src/main/scala/com/twitter/streamyj/Streamy.scala
@@ -2,37 +2,81 @@ package com.twitter.streamyj
import org.codehaus.jackson._
import org.codehaus.jackson.JsonToken._
-import scala.collection.mutable.HashMap
-
+/**
+ * The base case class for a JsonToken
+ */
case class StreamyToken()
-case object EndArray extends StreamyToken
+/** maps to JsonToken.START_ARRAY */
case object StartArray extends StreamyToken
+/** maps to JsonToken.END_ARRAY */
+case object EndArray extends StreamyToken
+/** maps to JsonToken.START_OBJECT */
case object StartObject extends StreamyToken
+/** maps to JsonToken.END_OBJECT */
case object EndObject extends StreamyToken
+/** maps to JsonToken.FIELD_NAME */
case class FieldName(name: String) extends StreamyToken
+/** maps to JsonToken.NOT_AVAILABLE */
case object NotAvailable extends StreamyToken
+/** maps to JsonToken.VALUE_FALSE */
case object ValueFalse extends StreamyToken
+/** maps to JsonToken.VALUE_TRUE */
case object ValueTrue extends StreamyToken
+/** maps to JsonToken.VALUE_NULL */
case object ValueNull extends StreamyToken
+/** A base class for long, double, and string fields */
case class ValueScalar(value: Any) extends StreamyToken
+/** maps to JsonToken.VALUE_NUMBER_INT */
case class ValueLong(override val value: Long) extends ValueScalar(value)
+/** maps to JsonToken.VALUE_NUMBER_FLOAT */
case class ValueDouble(override val value: Double) extends ValueScalar(value)
+/** maps to JsonToken.VALUE_STRING */
case class ValueString(override val value: String) extends ValueScalar(value)
+/**
+ * Just store the PartialFunction type for parse functions so
+ * it can be referenced by user implementations (easily)
+ */
object Streamy {
+ /**
+ * This is the partial function parse methods need
+ */
type ParseFunc = PartialFunction[StreamyToken, Unit]
+ /**
+ * Jackson's Json parser factory
+ */
+ val factory = new JsonFactory()
+ def createParser(s: String) = factory.createJsonParser(s)
}
+/**
+ * A helper for the Jackson JSON parser.
+ *
+ * Streamy allows you to write streaming parsers in an elegant,
+ * Scala-idiomatic manner. A quick example:
+ *
+ */
class Streamy(s: String) {
+ /**
+ * Pull in Streamy.ParseFunc just for brevity
+ */
type ParseFunc = Streamy.ParseFunc
- val factory = new JsonFactory()
- val reader = factory.createJsonParser(s)
-
+ /**
+ * the underlying json parser
+ */
+ val reader = Streamy.createParser(s)
+ /**
+ * Store the current token (it's useful while parsing)
+ */
var token:StreamyToken = null
+ /**
+ * A mapping of JsonToken constants to StreamyToken instances.
+ * This allows (more elegant) pattern matching parsers
+ */
def tokenToCaseClass(token: JsonToken) = token match {
case START_ARRAY => StartArray
case END_ARRAY => EndArray
@@ -49,27 +93,56 @@ class Streamy(s: String) {
case _ => NotAvailable
}
+ /**
+ * A parse function that "eats" objects or arrays
+ */
val eat:ParseFunc = {
- case token if token != NotAvailable => //noop
+ case FieldName(s) => null; // noop
+ case ValueFalse => null; // noop
+ case ValueTrue => null; // noop
+ case ValueScalar(s) => null; //noop
}
+
+ /**
+ * alias for startObject(fn)
+ */
def \(fn: ParseFunc) = {
- obj(fn)
+ startObject(fn)
}
+ /**
+ * alias for startObject(fn)
+ */
def obj(fn: ParseFunc) = {
startObject(fn)
}
+ /**
+ * An alias for startArray.
+ * Applies the supplied function to the current
+ * JSON array. Note that the current token should
+ * be the start of the array (either an opening bracket or null)
+ */
def arr(fn: ParseFunc) = {
startArray(fn)
}
+ /**
+ * Advances the parser and sets the current token
+ */
def next():StreamyToken = {
token = tokenToCaseClass(reader.nextToken())
token
}
+ /**
+ * applies the supplied function to the current
+ * JSON object. Note that the current token should
+ * be the start of the object (either a curly brace or null).
+ * After the first token is read this passes control to
+ * readObject.
+ */
def startObject(fn: ParseFunc):Unit = {
next() match {
case token if fn.isDefinedAt(token) => {
@@ -82,6 +155,19 @@ class Streamy(s: String) {
readObject(fn)
}
+ /**
+ * Continues reading an object until it is closed.
+ * If the passed in function is defined at the current token
+ * it is called. Otherwise the following actions are taken
+ * <ul>
+ * <li>NotAvailable: return. The JSON stream has ended. Shouldn't happen</li>
+ * <li>EndObject: return. The JSON object has ended.</li>
+ * <li>StartObject: call readObject with the eat() handler. Just consumes
+ * tokens from the embedded object</li>
+ * <li>StartArray: call readArray with the eat() handler. Just consumes
+ * tokens from the embedded array</li>
+ * <li>Anything else: noop</li>
+ */
def readObject(fn: ParseFunc):Unit = {
next() match {
case token if fn.isDefinedAt(token) => {
@@ -89,12 +175,18 @@ class Streamy(s: String) {
}
case NotAvailable => return
case EndObject => return
- case StartObject => readObject(eat)
+ case StartArray => startArray(eat)
+ case StartObject => startObject(eat)
case _ => //noop
}
readObject(fn)
}
+ /**
+ * applies the supplied function to the current
+ * JSON array. Note that the current token should
+ * be the start of the array (either an opening bracket or null)
+ */
def startArray(fn: ParseFunc):Unit = {
next() match {
case token if fn.isDefinedAt(token) => {
@@ -107,6 +199,19 @@ class Streamy(s: String) {
readArray(fn)
}
+ /*
+ * Continues reading an array until it is closed.
+ * If the passed in function is defined at the current token
+ * it is called. Otherwise the following actions are taken
+ * <ul>
+ * <li>NotAvailable: return. The JSON stream has ended. Shouldn't happen</li>
+ * <li>EndArray: return. The JSON array has ended.</li>
+ * <li>StartObjoct: call readObject with the eat() handler. Just consumes
+ * tokens from the embedded object</li>
+ * <li>StartArray: call readArray with the eat() handler. Just consumes
+ * tokens from the embedded array</li>
+ * <li>Anything else: noop</li>
+ */
def readArray(fn: ParseFunc):Unit = {
next () match {
case token if fn.isDefinedAt(token) => {
@@ -114,12 +219,16 @@ class Streamy(s: String) {
}
case NotAvailable => return
case EndArray => return
- case StartArray => readArray(eat)
+ case StartArray => startArray(eat)
+ case StartObject => startObject(eat)
case _ => //noop
}
readArray(fn)
}
+ /**
+ * reads a field of type Any
+ */
def readField() = {
next() match {
case ValueScalar(rv) => rv
@@ -127,6 +236,10 @@ class Streamy(s: String) {
}
}
+ /**
+ * reads a string field. Throws an
+ * IllegalArgumentException if the current value isn't a string
+ */
def readStringField() = {
next() match {
case ValueScalar(rv) => rv.toString
@@ -134,6 +247,10 @@ class Streamy(s: String) {
}
}
+ /**
+ * reads a long field. Throws an
+ * IllegalArgumentException if the current value isn't a long
+ */
def readLongField() = {
next() match {
case ValueLong(rv) => rv
@@ -141,9 +258,14 @@ class Streamy(s: String) {
}
}
- def readDoubleField() = {
+ /**
+ * reads a double field. Throws an
+ * IllegalArgumentException if the current value isn't a double or a long
+ */
+ def readDoubleField(): Double = {
next() match {
case ValueDouble(rv) => rv
+ case ValueLong(rv) => rv
case _ => throw new IllegalArgumentException("tried to read a non-numeric field as a double")
}
}
View
12 src/test/scala/com/twitter/streamyj/StreamySpec.scala
@@ -116,6 +116,18 @@ object StreamySpec extends Specification {
foo must be_==(2)
}
+ "handle embedded objects and arrays" in {
+ val s = new Streamy("""{"id":1, "embed":{"foo":"bar", "baz":{"baz":1}, "arr":[[1],2,3,4]},"id2":2}""")
+ var id = 0L
+ var id2 = 0L
+ s \ {
+ case FieldName("id") => id = s.readLongField()
+ case FieldName("id2") => id2 = s.readLongField()
+ }
+ id must be_==(1)
+ id2 must be_==(2)
+ }
+
"work on a tweet" in {
val s = new Streamy(unAnnotatedJSON)
Please sign in to comment.
Something went wrong with that request. Please try again.