In [1]:
import org.apache.spark.sql.types.{StructType, StringType}
import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.DataFrame

In [2]:
println(s"Current spark version is ${spark.version}")

Current spark version is 2.4.4


In [3]:
val getProbability = udf((prediction: org.apache.spark.ml.linalg.Vector) => prediction(1))

getProbability = UserDefinedFunction(<function1>,DoubleType,Some(List(org.apache.spark.ml.linalg.VectorUDT@3bfc3ba7)))


UserDefinedFunction(<function1>,DoubleType,Some(List(org.apache.spark.ml.linalg.VectorUDT@3bfc3ba7)))

In [4]:
val inputStreamPath = "/home/jovyan/data/events-stream"

val dataSchema = new StructType()
    .add("tweet", StringType)

val inputDF = spark
    .readStream
    .schema(dataSchema)
    .option("maxFilesPerTrigger", 1)
    .json(inputStreamPath)

inputStreamPath = /home/jovyan/data/events-stream
dataSchema = StructType(StructField(tweet,StringType,true))
inputDF = [tweet: string]


[tweet: string]

In [5]:
val modelPath = "/home/jovyan/models/spark-ml-model"
val model = PipelineModel.load(modelPath)

modelPath = /home/jovyan/models/spark-ml-model
model = pipeline_e9786cc35e88


pipeline_e9786cc35e88

In [6]:
val predictionsDF = model
    .transform(inputDF)
    .withColumn("clean_probability", getProbability($"probability"))
    .select($"tweet", $"probability", $"clean_probability", $"prediction")

predictionsDF = [tweet: string, probability: vector ... 2 more fields]


[tweet: string, probability: vector ... 2 more fields]

Output tweet, prediction and probability

In [7]:
val query = predictionsDF
    .writeStream
    .foreachBatch { (batchDF: DataFrame, batchId: Long) => batchDF.show() }
    .start()

query.awaitTermination(10000)

+--------------------+--------------------+-------------------+----------+
|               tweet|         probability|  clean_probability|prediction|
+--------------------+--------------------+-------------------+----------+
|Ugh I don't think...|[0.55208796497136...|0.44791203502863364|       0.0|
|Isnt looking forw...|[0.51631425296792...|0.48368574703207384|       0.0|
|@dooganized he is...|[0.55729786163919...| 0.4427021383608079|       0.0|
|      i am so upset |[0.52112898087296...|0.47887101912703245|       0.0|
|I am going to ans...|[0.53288554543786...|0.46711445456213796|       0.0|
|@dianafariza owhh...|[0.49471670296339...| 0.5052832970366073|       1.0|
|second interview....|[0.47455118342889...| 0.5254488165711034|       1.0|
+--------------------+--------------------+-------------------+----------+

+--------------------+--------------------+-------------------+----------+
|               tweet|         probability|  clean_probability|prediction|
+--------------------+--

+--------------------+--------------------+-------------------+----------+
|               tweet|         probability|  clean_probability|prediction|
+--------------------+--------------------+-------------------+----------+
|growing weary of ...|[0.57370576072924...|0.42629423927075555|       0.0|
|My eyes are runni...|[0.52105444600449...| 0.4789455539955098|       0.0|
|I hope he doesn't...|[0.51060867328331...|0.48939132671668006|       0.0|
|So sad to see her...|[0.56015804086909...|   0.43984195913091|       0.0|
|still needs quite...|[0.48948435298131...| 0.5105156470186842|       1.0|
|@virtualmarketer ...|[0.52463002062467...|0.47536997937532155|       0.0|
|@leleloveleigh aw...|[0.46684731798466...|  0.533152682015332|       1.0|
|i think im in an ...|[0.57900665676035...| 0.4209933432396413|       0.0|
|I can't believe I...|[0.60800534944016...|0.39199465055983845|       0.0|
|is tired and hope...|[0.56768314866505...|0.43231685133494857|       0.0|
|@destinymathurin ...|[0.

query = org.apache.spark.sql.execution.streaming.StreamingQueryWrapper@4b5fe7ed


false

+--------------------+--------------------+-------------------+----------+
|               tweet|         probability|  clean_probability|prediction|
+--------------------+--------------------+-------------------+----------+
|trying to figure ...|[0.48117024377757...|  0.518829756222423|       1.0|
|Which Shampoo is ...|[0.54995134191505...| 0.4500486580849459|       0.0|
|@aliwise TMZ HARV...|[0.46560941547126...| 0.5343905845287309|       1.0|
|back from the hos...|[0.47197769666006...| 0.5280223033399339|       1.0|
|regreatingi wasnt...|[0.46067837931021...| 0.5393216206897891|       1.0|
|@egdidwob my hair...|[0.55454819024974...|0.44545180975025445|       0.0|
|last day of schoo...|[0.52454922061477...| 0.4754507793852218|       0.0|
|I guess having ba...|[0.55273578941215...|0.44726421058784455|       0.0|
|@MsDesMarie I kno...|[0.46961192587552...| 0.5303880741244795|       1.0|
|@kevinoshea Aim i...|[0.46853102211316...|  0.531468977886831|       1.0|
|@MisKoChai am I d...|[0.

+--------------------+--------------------+-------------------+----------+
|               tweet|         probability|  clean_probability|prediction|
+--------------------+--------------------+-------------------+----------+
|Perhaps because i...|[0.49746968563953...| 0.5025303143604676|       1.0|
|bloody hell, i mi...|[0.60560735826848...| 0.3943926417315132|       0.0|
|@sammielynn8489 y...|[0.54584228463372...| 0.4541577153662755|       0.0|
|Is hoping tj call...|[0.54311275432784...|0.45688724567215083|       0.0|
|I hate to say thi...|[0.59874608205250...|0.40125391794749554|       0.0|
|@brian1067 aww ma...|[0.36081654157497...|  0.639183458425025|       1.0|
|I don't know why ...|[0.60207973326079...| 0.3979202667392065|       0.0|
|. still not getin...|[0.61452881208962...|0.38547118791037216|       0.0|
|Been to the coast...|[0.50247605306075...| 0.4975239469392418|       0.0|
|@adamneal23 Of co...|[0.46553722452646...| 0.5344627754735303|       1.0|
|@steve_berra happ...|[0.

+--------------------+--------------------+-------------------+----------+
|               tweet|         probability|  clean_probability|prediction|
+--------------------+--------------------+-------------------+----------+
|i needa stop proc...|[0.50461786222708...|0.49538213777291307|       0.0|
|Me and my dad are...|[0.44354383221099...| 0.5564561677890064|       1.0|
|@ddlovato why don...|[0.49895009303029...|  0.501049906969708|       1.0|
|Summer sonic owns...|[0.49043499784578...| 0.5095650021542112|       1.0|
|           Coughing |[0.43565293729203...| 0.5643470627079621|       1.0|
|Aaah I can't slee...|[0.59124852268181...|0.40875147731818084|       0.0|
|i hate how it rai...|[0.65246692730732...| 0.3475330726926785|       0.0|
|Listening to a ba...|[0.49253418355055...| 0.5074658164494479|       1.0|
| @AnDpap Happy bDay |[0.42399013022054...|  0.576009869779458|       1.0|
|going to Glidden ...|[0.47155583135614...| 0.5284441686438535|       1.0|
|@LatinSoulChild u...|[0.

+--------------------+--------------------+-------------------+----------+
|               tweet|         probability|  clean_probability|prediction|
+--------------------+--------------------+-------------------+----------+
|@taylorphinney i ...|[0.54680941441370...| 0.4531905855862941|       0.0|
|About to board th...|[0.53035493780653...|0.46964506219346014|       0.0|
|Awhhh sounds fun!...|[0.49911296356017...|  0.500887036439828|       1.0|
|thinkin bout a lo...|[0.50624216312750...|0.49375783687249575|       0.0|
|The N front st dr...|[0.50612800802764...|0.49387199197235143|       0.0|
|@krisis I doubt I...|[0.53950496777257...| 0.4604950322274221|       0.0|
|If u follow meeee...|[0.46029823137120...| 0.5397017686287966|       1.0|
|@LilyMazahery Tha...|[0.45968351994938...|  0.540316480050619|       1.0|
|@tweetermass Here...|[0.46644626413532...| 0.5335537358646785|       1.0|
|@Jonasbrothers Ke...|[0.45450015161964...| 0.5454998483803553|       1.0|
|@lilweird4ever: w...|[0.

+--------------------+--------------------+-------------------+----------+
|               tweet|         probability|  clean_probability|prediction|
+--------------------+--------------------+-------------------+----------+
|goodnight! school...|[0.49043146262340...| 0.5095685373765948|       1.0|
|getting ready for...|[0.60009323942489...| 0.3999067605751041|       0.0|
|@yelyahwilliams i...|[0.48785111855842...| 0.5121488814415751|       1.0|
| cant sleep becau...|[0.59418658131533...| 0.4058134186846623|       0.0|
|@mdixson Sorry to...|[0.56018092831412...| 0.4398190716858768|       0.0|
|@erthefae No rain...|[0.52061054722711...|0.47938945277288836|       0.0|
|getting ready for...|[0.46413426011099...| 0.5358657398890049|       1.0|
|just as i finish ...|[0.55761436842445...| 0.4423856315755409|       0.0|
|mona??.........co...|[0.42482862942991...| 0.5751713705700885|       1.0|
|On twitter..... o...|[0.46067837931021...| 0.5393216206897891|       1.0|
|http://twitpic.co...|[0.

+--------------------+--------------------+-------------------+----------+
|               tweet|         probability|  clean_probability|prediction|
+--------------------+--------------------+-------------------+----------+
|at andi's house.....|[0.47023420965668...| 0.5297657903433114|       1.0|
|... don't like go...|[0.50950297113518...| 0.4904970288648147|       0.0|
|Little hungover.....|[0.48493986288345...|  0.515060137116549|       1.0|
|Great stuff - an ...|[0.44556735458757...| 0.5544326454124235|       1.0|
|got a sty in my eye |[0.51181325663627...| 0.4881867433637247|       0.0|
|@JessicaBB Cleari...|[0.50031777086589...|0.49968222913410026|       0.0|
|@lauradaley_ got ...|[0.45970666644664...| 0.5402933335533596|       1.0|
|i'm about to enjo...|[0.47695358000773...| 0.5230464199922625|       1.0|
|reliving the aash...|[0.43654675637071...| 0.5634532436292893|       1.0|
|Arriving in Ghent...|[0.47563685424405...| 0.5243631457559461|       1.0|
|Use Ctrl F to ope...|[0.

+--------------------+--------------------+-------------------+----------+
|               tweet|         probability|  clean_probability|prediction|
+--------------------+--------------------+-------------------+----------+
|@DonnieWahlberg  ...|[0.62588503265704...| 0.3741149673429584|       0.0|
|dang that burrito...|[0.51586207987485...| 0.4841379201251464|       0.0|
|@hiltonius I wish...|[0.57992146748205...| 0.4200785325179403|       0.0|
|Trip delayed by o...|[0.46776575486540...|  0.532234245134591|       1.0|
|@sherieriot ooo i...|[0.49729446512077...| 0.5027055348792263|       1.0|
|Watching &quot;My...|[0.46413426011099...| 0.5358657398890049|       1.0|
|Oh... late? Ok. L...|[0.46413426011099...| 0.5358657398890049|       1.0|
|@arbonneteam Than...|[0.37521736304118...| 0.6247826369588168|       1.0|
|has been enjoying...|[0.40275938542602...| 0.5972406145739786|       1.0|
|Lmao i wanna jump...|[0.51442423387105...| 0.4855757661289447|       0.0|
|It's just really ...|[0.

+--------------------+--------------------+------------------+----------+
|               tweet|         probability| clean_probability|prediction|
+--------------------+--------------------+------------------+----------+
|Oh no! Norwich fo...|[0.46413426011099...|0.5358657398890049|       1.0|
|Got ready, drove ...|[0.60480116840196...|0.3951988315980329|       0.0|
|Another damn shot...|[0.46939967447132...|0.5306003255286735|       1.0|
|Doing Assignments...|[0.42985310665584...|0.5701468933441576|       1.0|
|@schaeferj89 1)fo...|[0.60678203897473...|0.3932179610252608|       0.0|
|i've been wearing...|[0.56403954169329...|0.4359604583067014|       0.0|
|@alcanterbury No ...|[0.52396602746058...|0.4760339725394197|       0.0|
|    i wanna go home |[0.52350228701951...|0.4764977129804813|       0.0|
|@ocifant A millio...|[0.46486005710386...|0.5351399428961389|       1.0|
|2 songs down! 2 t...|[0.50074815129878...| 0.499251848701212|       0.0|
|Watching How to D...|[0.4943257110948

+--------------------+--------------------+-------------------+----------+
|               tweet|         probability|  clean_probability|prediction|
+--------------------+--------------------+-------------------+----------+
|Aaaargh! I must b...|[0.52272931254008...|0.47727068745991696|       0.0|
|Also...how am I s...|[0.58263286139535...|0.41736713860464225|       0.0|
|car just got back...|[0.53407432826453...| 0.4659256717354649|       0.0|
|@atuarre I don't ...|[0.52194529633608...|0.47805470366391206|       0.0|
|is looking out th...|[0.48278237839705...| 0.5172176216029403|       1.0|
|Boot Camp '09 is ...|[0.53546174340302...|0.46453825659697934|       0.0|
|Got the monitor, ...|[0.45603357227019...|  0.543966427729802|       1.0|
|why is tweetdeck ...|[0.58807251641654...|0.41192748358345777|       0.0|
|@RisaRM I can't w...|[0.52118188791453...|0.47881811208546315|       0.0|
|@kennymuto You ar...|[0.43998935631043...| 0.5600106436895675|       1.0|
|@Efusjonsales man...|[0.

Output count of positive and negative tweets for each last 10 seconds

In [10]:
val query2 = predictionsDF
    .withColumn("timestamp", current_timestamp)
    .withWatermark("timestamp", "10 seconds")
    .groupBy(window($"timestamp", "10 seconds", "1 second"), $"prediction")
    .count
    .writeStream
    .outputMode("append")
    .format("console")
    .start()

query2.awaitTermination(500000)

-------------------------------------------
Batch: 0
-------------------------------------------
+------+----------+-----+
|window|prediction|count|
+------+----------+-----+
+------+----------+-----+

-------------------------------------------
Batch: 1
-------------------------------------------
+------+----------+-----+
|window|prediction|count|
+------+----------+-----+
+------+----------+-----+

-------------------------------------------
Batch: 2
-------------------------------------------
+--------------------+----------+-----+
|              window|prediction|count|
+--------------------+----------+-----+
|[2019-10-05 18:09...|       1.0|    2|
|[2019-10-05 18:09...|       0.0|    5|
|[2019-10-05 18:09...|       1.0|    2|
|[2019-10-05 18:09...|       0.0|    5|
|[2019-10-05 18:09...|       1.0|    2|
|[2019-10-05 18:09...|       0.0|    5|
|[2019-10-05 18:09...|       1.0|    2|
|[2019-10-05 18:09...|       0.0|    5|
|[2019-10-05 18:09...|       1.0|    2|
|[2019-10-05 18:09.

query2 = org.apache.spark.sql.execution.streaming.StreamingQueryWrapper@3ace9415


false

-------------------------------------------
Batch: 4
-------------------------------------------
+--------------------+----------+-----+
|              window|prediction|count|
+--------------------+----------+-----+
|[2019-10-05 18:13...|       1.0|    9|
|[2019-10-05 18:13...|       0.0|    5|
|[2019-10-05 18:13...|       0.0|    5|
|[2019-10-05 18:13...|       1.0|    9|
|[2019-10-05 18:13...|       1.0|    9|
|[2019-10-05 18:13...|       1.0|    9|
|[2019-10-05 18:13...|       1.0|    9|
|[2019-10-05 18:13...|       1.0|    9|
|[2019-10-05 18:13...|       0.0|    5|
|[2019-10-05 18:13...|       1.0|    9|
|[2019-10-05 18:13...|       0.0|    5|
|[2019-10-05 18:13...|       0.0|    5|
|[2019-10-05 18:13...|       0.0|    5|
|[2019-10-05 18:13...|       1.0|    9|
|[2019-10-05 18:13...|       1.0|    9|
|[2019-10-05 18:13...|       0.0|    5|
|[2019-10-05 18:13...|       0.0|    5|
|[2019-10-05 18:13...|       1.0|    9|
|[2019-10-05 18:13...|       0.0|    5|
|[2019-10-05 18:13...| 