In [1]:
println(s"Current spark version is ${spark.version}")

Current spark version is 2.4.4


In [2]:
import org.apache.spark.sql.types.{StructType, StructField, IntegerType, LongType, StringType}

val dataSchema = new StructType()
    .add("target", IntegerType)
    .add("id", LongType)
    .add("raw_timestamp", StringType)
    .add("query_status", StringType)
    .add("author", StringType)
    .add("tweet", StringType)

    
val dataPath= "/home/jovyan/data/training.1600000.processed.noemoticon.csv"

val raw_sentiment = spark.read
    .format("csv")
    .option("header",false)
    .schema(dataSchema)
    .load(dataPath)
    .selectExpr("tweet")

println(s"Total tweets in file: ${raw_sentiment.count}")


Total tweets in file: 1600000


dataSchema = StructType(StructField(target,IntegerType,true), StructField(id,LongType,true), StructField(raw_timestamp,StringType,true), StructField(query_status,StringType,true), StructField(author,StringType,true), StructField(tweet,StringType,true))
dataPath = /home/jovyan/data/training.1600000.processed.noemoticon.csv
raw_sentiment = [tweet: string]


[tweet: string]

In [5]:
import java.util.{Calendar, Timer, TimerTask}

val timer = new Timer()

val task = new TimerTask {
  def run(): Unit = {
      val data = raw_sentiment.sample(fraction=0.00001,withReplacement=true)
      data.coalesce(1).write.format("json").mode("append").save("/home/jovyan/data/events-stream")
      println(s"${Calendar.getInstance().toInstant} - saved some data to the events stream!")
  } 
}

println("Streaming started!")

timer.schedule(task, 1000L, 1000L)

Streaming started!


timer = java.util.Timer@28ee6296
task = $anon$1@6a74e86a


$anon$1@6a74e86a

2019-10-05T16:42:36.740Z - saved some data to the events stream!
2019-10-05T16:42:40.638Z - saved some data to the events stream!
2019-10-05T16:42:44.474Z - saved some data to the events stream!
2019-10-05T16:42:48.318Z - saved some data to the events stream!
2019-10-05T16:42:52.169Z - saved some data to the events stream!
2019-10-05T16:42:55.992Z - saved some data to the events stream!
2019-10-05T16:42:59.818Z - saved some data to the events stream!
2019-10-05T16:43:03.695Z - saved some data to the events stream!
2019-10-05T16:43:07.538Z - saved some data to the events stream!
2019-10-05T16:43:11.348Z - saved some data to the events stream!
2019-10-05T16:43:15.144Z - saved some data to the events stream!
2019-10-05T16:43:18.950Z - saved some data to the events stream!
2019-10-05T16:43:22.721Z - saved some data to the events stream!
2019-10-05T16:43:26.560Z - saved some data to the events stream!
2019-10-05T16:43:30.383Z - saved some data to the events stream!
2019-10-05T16:43:34.197Z 

2019-10-05T16:50:49.850Z - saved some data to the events stream!
2019-10-05T16:50:53.758Z - saved some data to the events stream!
2019-10-05T16:50:57.623Z - saved some data to the events stream!
2019-10-05T16:51:01.517Z - saved some data to the events stream!
2019-10-05T16:51:05.443Z - saved some data to the events stream!
2019-10-05T16:51:09.811Z - saved some data to the events stream!
2019-10-05T16:51:14.381Z - saved some data to the events stream!
2019-10-05T16:51:18.203Z - saved some data to the events stream!
2019-10-05T16:51:22.051Z - saved some data to the events stream!
2019-10-05T16:51:25.892Z - saved some data to the events stream!
2019-10-05T16:51:29.680Z - saved some data to the events stream!
2019-10-05T16:51:33.479Z - saved some data to the events stream!
2019-10-05T16:51:37.298Z - saved some data to the events stream!
2019-10-05T16:51:41.164Z - saved some data to the events stream!
2019-10-05T16:51:44.989Z - saved some data to the events stream!
2019-10-05T16:51:48.895Z 

2019-10-05T16:59:04.067Z - saved some data to the events stream!
2019-10-05T16:59:07.936Z - saved some data to the events stream!
2019-10-05T16:59:11.850Z - saved some data to the events stream!
2019-10-05T16:59:15.769Z - saved some data to the events stream!
2019-10-05T16:59:19.918Z - saved some data to the events stream!
2019-10-05T16:59:23.759Z - saved some data to the events stream!
2019-10-05T16:59:27.589Z - saved some data to the events stream!
2019-10-05T16:59:31.426Z - saved some data to the events stream!
2019-10-05T16:59:35.237Z - saved some data to the events stream!
2019-10-05T16:59:39.103Z - saved some data to the events stream!
2019-10-05T16:59:42.963Z - saved some data to the events stream!
2019-10-05T16:59:46.783Z - saved some data to the events stream!
2019-10-05T16:59:50.626Z - saved some data to the events stream!
2019-10-05T16:59:54.467Z - saved some data to the events stream!
2019-10-05T16:59:58.298Z - saved some data to the events stream!
2019-10-05T17:00:02.151Z 

2019-10-05T17:07:17.181Z - saved some data to the events stream!
2019-10-05T17:07:21.011Z - saved some data to the events stream!
2019-10-05T17:07:25.051Z - saved some data to the events stream!
2019-10-05T17:07:28.893Z - saved some data to the events stream!
2019-10-05T17:07:32.821Z - saved some data to the events stream!
2019-10-05T17:07:36.663Z - saved some data to the events stream!
2019-10-05T17:07:40.562Z - saved some data to the events stream!
2019-10-05T17:07:44.437Z - saved some data to the events stream!
2019-10-05T17:07:48.360Z - saved some data to the events stream!
2019-10-05T17:07:52.208Z - saved some data to the events stream!
2019-10-05T17:07:56.245Z - saved some data to the events stream!
2019-10-05T17:08:00.426Z - saved some data to the events stream!
2019-10-05T17:08:06.212Z - saved some data to the events stream!
2019-10-05T17:08:12.057Z - saved some data to the events stream!
2019-10-05T17:08:17.868Z - saved some data to the events stream!
2019-10-05T17:08:23.707Z 

2019-10-05T17:16:15.968Z - saved some data to the events stream!
2019-10-05T17:16:19.794Z - saved some data to the events stream!
2019-10-05T17:16:23.667Z - saved some data to the events stream!
2019-10-05T17:16:27.654Z - saved some data to the events stream!
2019-10-05T17:16:31.570Z - saved some data to the events stream!
2019-10-05T17:16:36.801Z - saved some data to the events stream!
2019-10-05T17:16:42.247Z - saved some data to the events stream!
2019-10-05T17:16:47.885Z - saved some data to the events stream!
2019-10-05T17:16:53.614Z - saved some data to the events stream!
2019-10-05T17:16:59.027Z - saved some data to the events stream!
2019-10-05T17:17:04.949Z - saved some data to the events stream!
2019-10-05T17:17:10.270Z - saved some data to the events stream!
2019-10-05T17:17:15.173Z - saved some data to the events stream!
2019-10-05T17:17:20.726Z - saved some data to the events stream!
2019-10-05T17:17:26.168Z - saved some data to the events stream!
2019-10-05T17:17:31.549Z 

2019-10-05T17:27:02.262Z - saved some data to the events stream!
2019-10-05T17:27:06.141Z - saved some data to the events stream!
2019-10-05T17:27:10.063Z - saved some data to the events stream!
2019-10-05T17:27:14.039Z - saved some data to the events stream!
2019-10-05T17:27:18.001Z - saved some data to the events stream!
2019-10-05T17:27:21.859Z - saved some data to the events stream!
2019-10-05T17:27:25.747Z - saved some data to the events stream!
2019-10-05T17:27:29.589Z - saved some data to the events stream!
2019-10-05T17:27:33.424Z - saved some data to the events stream!
2019-10-05T17:27:37.273Z - saved some data to the events stream!
2019-10-05T17:27:41.117Z - saved some data to the events stream!
2019-10-05T17:27:45.014Z - saved some data to the events stream!
2019-10-05T17:27:48.960Z - saved some data to the events stream!
2019-10-05T17:27:52.876Z - saved some data to the events stream!
2019-10-05T17:27:56.749Z - saved some data to the events stream!
2019-10-05T17:28:00.726Z 

2019-10-05T17:37:25.309Z - saved some data to the events stream!
2019-10-05T17:37:30.369Z - saved some data to the events stream!
2019-10-05T17:37:35.935Z - saved some data to the events stream!
2019-10-05T17:37:41.545Z - saved some data to the events stream!
2019-10-05T17:37:47.128Z - saved some data to the events stream!
2019-10-05T17:37:52.993Z - saved some data to the events stream!
2019-10-05T17:37:58.675Z - saved some data to the events stream!
2019-10-05T17:38:04.136Z - saved some data to the events stream!
2019-10-05T17:38:09.590Z - saved some data to the events stream!
2019-10-05T17:38:14.879Z - saved some data to the events stream!
2019-10-05T17:38:20.470Z - saved some data to the events stream!
2019-10-05T17:38:26.091Z - saved some data to the events stream!
2019-10-05T17:38:30.354Z - saved some data to the events stream!
2019-10-05T17:38:34.209Z - saved some data to the events stream!
2019-10-05T17:38:38.054Z - saved some data to the events stream!
2019-10-05T17:38:42.197Z 

2019-10-05T17:48:37.911Z - saved some data to the events stream!
2019-10-05T17:48:43.332Z - saved some data to the events stream!
2019-10-05T17:48:49.301Z - saved some data to the events stream!
2019-10-05T17:48:54.899Z - saved some data to the events stream!
2019-10-05T17:49:00.738Z - saved some data to the events stream!
2019-10-05T17:49:06.281Z - saved some data to the events stream!
2019-10-05T17:49:12.415Z - saved some data to the events stream!
2019-10-05T17:49:17.983Z - saved some data to the events stream!
2019-10-05T17:49:23.488Z - saved some data to the events stream!
2019-10-05T17:49:28.831Z - saved some data to the events stream!
2019-10-05T17:49:34.228Z - saved some data to the events stream!
2019-10-05T17:49:39.412Z - saved some data to the events stream!
2019-10-05T17:49:44.971Z - saved some data to the events stream!
2019-10-05T17:49:50.460Z - saved some data to the events stream!
2019-10-05T17:49:55.874Z - saved some data to the events stream!
2019-10-05T17:50:01.362Z 

2019-10-05T18:00:00.943Z - saved some data to the events stream!
2019-10-05T18:00:06.506Z - saved some data to the events stream!
2019-10-05T18:00:11.935Z - saved some data to the events stream!
2019-10-05T18:00:17.724Z - saved some data to the events stream!
2019-10-05T18:00:23.420Z - saved some data to the events stream!
2019-10-05T18:00:28.850Z - saved some data to the events stream!
2019-10-05T18:00:34.362Z - saved some data to the events stream!
2019-10-05T18:00:39.353Z - saved some data to the events stream!
2019-10-05T18:00:45.181Z - saved some data to the events stream!
2019-10-05T18:00:50.937Z - saved some data to the events stream!
2019-10-05T18:00:56.302Z - saved some data to the events stream!
2019-10-05T18:01:01.881Z - saved some data to the events stream!
2019-10-05T18:01:07.731Z - saved some data to the events stream!
2019-10-05T18:01:13.242Z - saved some data to the events stream!
2019-10-05T18:01:18.581Z - saved some data to the events stream!
2019-10-05T18:01:24.117Z 

2019-10-05T18:11:50.411Z - saved some data to the events stream!
2019-10-05T18:11:55.904Z - saved some data to the events stream!
2019-10-05T18:12:01.388Z - saved some data to the events stream!
2019-10-05T18:12:07.195Z - saved some data to the events stream!
2019-10-05T18:12:15.933Z - saved some data to the events stream!
2019-10-05T18:12:21.473Z - saved some data to the events stream!
2019-10-05T18:12:26.999Z - saved some data to the events stream!
2019-10-05T18:12:32.581Z - saved some data to the events stream!
2019-10-05T18:12:38.459Z - saved some data to the events stream!
2019-10-05T18:12:43.863Z - saved some data to the events stream!
2019-10-05T18:12:49.832Z - saved some data to the events stream!
2019-10-05T18:12:55.399Z - saved some data to the events stream!
2019-10-05T18:13:00.988Z - saved some data to the events stream!
2019-10-05T18:13:06.321Z - saved some data to the events stream!
2019-10-05T18:13:11.886Z - saved some data to the events stream!
2019-10-05T18:13:17.396Z 

In [6]:
task.cancel()

true