## Stream Processing Exercise 4 - Consuming from Kafka

Goals:

* Perform different computations on a input stream: read, aggregation, windowed aggregation
* Additional references
    * [Spark Streaming](https://spark.apache.org/streaming/)
    * [Structured Spark Streaming documentation](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html)
    * [Spark and Kafka integration guide](https://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html)


Let’s inspect content of Pageviews topic, showing it every 5 seconds:

In [1]:
import sys
import os 
import json
import hashlib
from pyspark import SparkContext
from pyspark import SparkConf
from pyspark.streaming import StreamingContext

from pyspark.streaming.kafka import KafkaUtils
import json
#from org.elasticsearch.spark import *

conf = SparkConf().setAll([("es.index.auto.create", "true")])

sc = SparkContext(appName="QuotesConsumer",conf=conf)

#sc = SparkContext(appName="PageViewsConsumer")

ssc = StreamingContext(sc, 5)


topicQuotes = ['quotes']

kafkaParams = {'bootstrap.servers': 'broker:29092', 
               'group.id' : 'quotesConsumer'}

streamQuotes = KafkaUtils.createDirectStream(ssc, topicQuotes, kafkaParams)

streamQuotes.map(lambda record : (record[0], record[1])).pprint()


es_write_quotes = {
        "es.nodes" : "elasticsearch",
        "es.port" : "9200",
        "es.resource" : 'quotes/_doc',
        "es.input.json": "yes",
    }

streamQuotes.foreachRDD(lambda rdd: rdd.saveAsNewAPIHadoopFile(
        path='-',
        outputFormatClass="org.elasticsearch.hadoop.mr.EsOutputFormat",
        keyClass="org.apache.hadoop.io.NullWritable",
        valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable",
        conf=es_write_quotes))

topicTwitter = ['tweets']

streamTwitter = KafkaUtils.createDirectStream(ssc, topicTwitter, kafkaParams)

streamTwitter.map(lambda record : (record[0], record[1])).pprint()


es_write_twitter = {
        "es.nodes" : "elasticsearch",
        "es.port" : "9200",
        "es.resource" : 'tweets/_doc',
        "es.input.json": "yes",
    }

streamTwitter.foreachRDD(lambda rdd: rdd.saveAsNewAPIHadoopFile(
        path='-',
        outputFormatClass="org.elasticsearch.hadoop.mr.EsOutputFormat",
        keyClass="org.apache.hadoop.io.NullWritable",
        valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable",
        conf=es_write_twitter))

ssc.start()
ssc.awaitTermination()

-------------------------------------------
Time: 2021-01-14 17:17:25
-------------------------------------------

-------------------------------------------
Time: 2021-01-14 17:17:25
-------------------------------------------
(None, '{"created_at":"Thu Jan 14 17:17:18 +0000 2021","id":1349767560363868163,"id_str":"1349767560363868163","text":"@Phaethon314 The Ivy Leaguers are overrated. I worked with many at Google. Arrogant Brotopia.","display_text_range":[13,93],"source":"\\u003ca href=\\"https:\\/\\/mobile.twitter.com\\" rel=\\"nofollow\\"\\u003eTwitter Web App\\u003c\\/a\\u003e","truncated":false,"in_reply_to_status_id":1349767185405792257,"in_reply_to_status_id_str":"1349767185405792257","in_reply_to_user_id":1289187183216263168,"in_reply_to_user_id_str":"1289187183216263168","in_reply_to_screen_name":"Phaethon314","user":{"id":1349331096471703555,"id_str":"1349331096471703555","name":"Sabrinagals","screen_name":"sabrinagals","location":null,"url":null,"description":"Big\\u2764

Py4JJavaError: An error occurred while calling o29.awaitTermination.
: org.apache.spark.SparkException: An exception was raised by Python:
Traceback (most recent call last):
  File "/usr/local/spark/python/pyspark/streaming/util.py", line 68, in call
    r = self.func(t, *rdds)
  File "/usr/local/spark/python/pyspark/streaming/kafka.py", line 403, in <lambda>
    func = lambda r, rdd: old_func(rdd)
  File "<ipython-input-1-6a699efa4abb>", line 65, in <lambda>
    conf=es_write_twitter))
  File "/usr/local/spark/python/pyspark/rdd.py", line 1438, in saveAsNewAPIHadoopFile
    keyConverter, valueConverter, jconf)
  File "/usr/local/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "/usr/local/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py", line 328, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.saveAsNewAPIHadoopFile.
: org.apache.spark.SparkException: Job aborted.
	at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:100)
	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1083)
	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1081)
	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1081)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
	at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1081)
	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:1000)
	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:991)
	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:991)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
	at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:991)
	at org.apache.spark.api.python.PythonRDD$.saveAsNewAPIHadoopFile(PythonRDD.scala:584)
	at org.apache.spark.api.python.PythonRDD.saveAsNewAPIHadoopFile(PythonRDD.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 23.0 failed 1 times, most recent failure: Lost task 0.0 in stage 23.0 (TID 23, localhost, executor driver): org.apache.spark.SparkException: Task failed while writing rows
	at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:157)
	at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83)
	at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
Caused by: org.elasticsearch.hadoop.EsHadoopException: Could not write all entries for bulk operation [1/12]. Error sample (first [5] error messages):
	org.elasticsearch.hadoop.rest.EsHadoopRemoteException: illegal_argument_exception: Limit of total fields [1000] in index [tweets] has been exceeded
	{"index":{}}
{"created_at":"Thu Jan 14 17:17:33 +0000 2021","id":1349767623567953924,"id_str":"1349767623567953924","text":"RT @MyHelpfulHints_: Here's a weird one, I need Google reviews?\n\nSo, if we've ever worked together, either with a review, a giveaway or you\u2026","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":778031123192508417,"id_str":"778031123192508417","name":"Clair","screen_name":"TwystedRoots","location":"Cornwall","url":"https:\/\/www.twystedroots.co.uk\/shop","description":"Maker of bead and wire tree sculptures. Artist inspired by nature. #QueenOf Wire Trees and #SBS winner May 2018. On the #TwitterSisters team.  #ActuallyAutistic","translator_type":"none","protected":false,"verified":false,"followers_count":10016,"friends_count":5634,"listed_count":68,"favourites_count":92467,"statuses_count":67370,"created_at":"Tue Sep 20 00:40:49 +0000 2016","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"1F8E8E","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"000000","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1335984943462281217\/cqJ3pBLh_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1335984943462281217\/cqJ3pBLh_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/778031123192508417\/1565626601","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Thu Jan 14 10:27:59 +0000 2021","id":1349664554465304576,"id_str":"1349664554465304576","text":"Here's a weird one, I need Google reviews?\n\nSo, if we've ever worked together, either with a review, a giveaway or\u2026 https:\/\/t.co\/OaqKZb604a","display_text_range":[0,140],"source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":923162163526676480,"id_str":"923162163526676480","name":"\ud835\uddac\ud835\uddb8 \ud835\udda7\ud835\udda4\ud835\uddab\ud835\uddaf\ud835\udda5\ud835\uddb4\ud835\uddab \ud835\udda7\ud835\udda8\ud835\uddad\ud835\uddb3\ud835\uddb2","screen_name":"MyHelpfulHints_","location":"On your device","url":"https:\/\/myhelpfulhints.co.uk\/hello\/","description":"Multi-Award Winning\ud83e\udd47Product Reviews Website | Small Business Directory #MHHSBD | Twitter Giveaways | #KingOf \ud83d\udc51 | Powered by Autism, Honesty and Coffee \u2615","translator_type":"none","protected":false,"verified":false,"followers_count":5224,"friends_count":2984,"listed_count":10,"favourites_count":32892,"statuses_count":19797,"created_at":"Wed Oct 25 12:19:46 +0000 2017","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"DD8500","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"000000","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1344395261280473088\/m-d1VVV4_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1344395261280473088\/m-d1VVV4_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/923162163526676480\/1607006107","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":{"full_text":"Here's a weird one, I need Google reviews?\n\nSo, if we've ever worked together, either with a review, a giveaway or you're a member of my small business directory, would you be so kind as to do my job and write a review \ud83d\ude06\n\n#MHHSBD \n\nhttps:\/\/t.co\/JZuWJGGS7F https:\/\/t.co\/Rbepya8XM3","display_text_range":[0,255],"entities":{"hashtags":[{"text":"MHHSBD","indices":[222,229]}],"urls":[{"url":"https:\/\/t.co\/JZuWJGGS7F","expanded_url":"https:\/\/g.page\/my-helpful-hints?gm","display_url":"g.page\/my-helpful-hin\u2026","indices":[232,255]}],"user_mentions":[],"symbols":[],"media":[{"id":1349664545443368961,"id_str":"1349664545443368961","indices":[256,279],"description":"Thats What IDo American Soul GIF","media_url":"http:\/\/pbs.twimg.com\/tweet_video_thumb\/Err4wNUXUAEM6We.jpg","media_url_https":"https:\/\/pbs.twimg.com\/tweet_video_thumb\/Err4wNUXUAEM6We.jpg","url":"https:\/\/t.co\/Rbepya8XM3","display_url":"pic.twitter.com\/Rbepya8XM3","expanded_url":"https:\/\/twitter.com\/MyHelpfulHints_\/status\/1349664554465304576\/photo\/1","type":"animated_gif","video_info":{"aspect_ratio":[249,140],"variants":[{"bitrate":0,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/tweet_video\/Err4wNUXUAEM6We.mp4"}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":498,"h":280,"resize":"fit"},"small":{"w":498,"h":280,"resize":"fit"},"large":{"w":498,"h":280,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":1349664545443368961,"id_str":"1349664545443368961","indices":[256,279],"description":"Thats What IDo American Soul GIF","media_url":"http:\/\/pbs.twimg.com\/tweet_video_thumb\/Err4wNUXUAEM6We.jpg","media_url_https":"https:\/\/pbs.twimg.com\/tweet_video_thumb\/Err4wNUXUAEM6We.jpg","url":"https:\/\/t.co\/Rbepya8XM3","display_url":"pic.twitter.com\/Rbepya8XM3","expanded_url":"https:\/\/twitter.com\/MyHelpfulHints_\/status\/1349664554465304576\/photo\/1","type":"animated_gif","video_info":{"aspect_ratio":[249,140],"variants":[{"bitrate":0,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/tweet_video\/Err4wNUXUAEM6We.mp4"}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":498,"h":280,"resize":"fit"},"small":{"w":498,"h":280,"resize":"fit"},"large":{"w":498,"h":280,"resize":"fit"}}}]}},"quote_count":0,"reply_count":3,"retweet_count":5,"favorite_count":7,"entities":{"hashtags":[],"urls":[{"url":"https:\/\/t.co\/OaqKZb604a","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/1349664554465304576","display_url":"twitter.com\/i\/web\/status\/1\u2026","indices":[116,139]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[],"user_mentions":[{"screen_name":"MyHelpfulHints_","name":"\ud835\uddac\ud835\uddb8 \ud835\udda7\ud835\udda4\ud835\uddab\ud835\uddaf\ud835\udda5\ud835\uddb4\ud835\uddab \ud835\udda7\ud835\udda8\ud835\uddad\ud835\uddb3\ud835\uddb2","id":923162163526676480,"id_str":"923162163526676480","indices":[3,19]}],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1610644653585"}


Bailing out...
	at org.elasticsearch.hadoop.rest.bulk.BulkProcessor.flush(BulkProcessor.java:519)
	at org.elasticsearch.hadoop.rest.bulk.BulkProcessor.close(BulkProcessor.java:541)
	at org.elasticsearch.hadoop.rest.RestRepository.close(RestRepository.java:219)
	at org.elasticsearch.hadoop.mr.EsOutputFormat$EsRecordWriter.doClose(EsOutputFormat.java:216)
	at org.elasticsearch.hadoop.mr.EsOutputFormat$EsRecordWriter.close(EsOutputFormat.java:198)
	at org.apache.spark.internal.io.HadoopMapReduceWriteConfigUtil.closeWriter(SparkHadoopWriter.scala:363)
	at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:139)
	at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:129)
	at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394)
	at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:141)
	... 10 more

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1891)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1879)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1878)
	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1878)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
	at scala.Option.foreach(Option.scala:257)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:927)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2112)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2061)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2050)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:738)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114)
	at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:78)
	... 27 more
Caused by: org.apache.spark.SparkException: Task failed while writing rows
	at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:157)
	at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:83)
	at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$3.apply(SparkHadoopWriter.scala:78)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	... 1 more
Caused by: org.elasticsearch.hadoop.EsHadoopException: Could not write all entries for bulk operation [1/12]. Error sample (first [5] error messages):
	org.elasticsearch.hadoop.rest.EsHadoopRemoteException: illegal_argument_exception: Limit of total fields [1000] in index [tweets] has been exceeded
	{"index":{}}
{"created_at":"Thu Jan 14 17:17:33 +0000 2021","id":1349767623567953924,"id_str":"1349767623567953924","text":"RT @MyHelpfulHints_: Here's a weird one, I need Google reviews?\n\nSo, if we've ever worked together, either with a review, a giveaway or you\u2026","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":778031123192508417,"id_str":"778031123192508417","name":"Clair","screen_name":"TwystedRoots","location":"Cornwall","url":"https:\/\/www.twystedroots.co.uk\/shop","description":"Maker of bead and wire tree sculptures. Artist inspired by nature. #QueenOf Wire Trees and #SBS winner May 2018. On the #TwitterSisters team.  #ActuallyAutistic","translator_type":"none","protected":false,"verified":false,"followers_count":10016,"friends_count":5634,"listed_count":68,"favourites_count":92467,"statuses_count":67370,"created_at":"Tue Sep 20 00:40:49 +0000 2016","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"1F8E8E","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"000000","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1335984943462281217\/cqJ3pBLh_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1335984943462281217\/cqJ3pBLh_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/778031123192508417\/1565626601","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Thu Jan 14 10:27:59 +0000 2021","id":1349664554465304576,"id_str":"1349664554465304576","text":"Here's a weird one, I need Google reviews?\n\nSo, if we've ever worked together, either with a review, a giveaway or\u2026 https:\/\/t.co\/OaqKZb604a","display_text_range":[0,140],"source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":923162163526676480,"id_str":"923162163526676480","name":"\ud835\uddac\ud835\uddb8 \ud835\udda7\ud835\udda4\ud835\uddab\ud835\uddaf\ud835\udda5\ud835\uddb4\ud835\uddab \ud835\udda7\ud835\udda8\ud835\uddad\ud835\uddb3\ud835\uddb2","screen_name":"MyHelpfulHints_","location":"On your device","url":"https:\/\/myhelpfulhints.co.uk\/hello\/","description":"Multi-Award Winning\ud83e\udd47Product Reviews Website | Small Business Directory #MHHSBD | Twitter Giveaways | #KingOf \ud83d\udc51 | Powered by Autism, Honesty and Coffee \u2615","translator_type":"none","protected":false,"verified":false,"followers_count":5224,"friends_count":2984,"listed_count":10,"favourites_count":32892,"statuses_count":19797,"created_at":"Wed Oct 25 12:19:46 +0000 2017","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":null,"contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"DD8500","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"000000","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1344395261280473088\/m-d1VVV4_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1344395261280473088\/m-d1VVV4_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/923162163526676480\/1607006107","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":{"full_text":"Here's a weird one, I need Google reviews?\n\nSo, if we've ever worked together, either with a review, a giveaway or you're a member of my small business directory, would you be so kind as to do my job and write a review \ud83d\ude06\n\n#MHHSBD \n\nhttps:\/\/t.co\/JZuWJGGS7F https:\/\/t.co\/Rbepya8XM3","display_text_range":[0,255],"entities":{"hashtags":[{"text":"MHHSBD","indices":[222,229]}],"urls":[{"url":"https:\/\/t.co\/JZuWJGGS7F","expanded_url":"https:\/\/g.page\/my-helpful-hints?gm","display_url":"g.page\/my-helpful-hin\u2026","indices":[232,255]}],"user_mentions":[],"symbols":[],"media":[{"id":1349664545443368961,"id_str":"1349664545443368961","indices":[256,279],"description":"Thats What IDo American Soul GIF","media_url":"http:\/\/pbs.twimg.com\/tweet_video_thumb\/Err4wNUXUAEM6We.jpg","media_url_https":"https:\/\/pbs.twimg.com\/tweet_video_thumb\/Err4wNUXUAEM6We.jpg","url":"https:\/\/t.co\/Rbepya8XM3","display_url":"pic.twitter.com\/Rbepya8XM3","expanded_url":"https:\/\/twitter.com\/MyHelpfulHints_\/status\/1349664554465304576\/photo\/1","type":"animated_gif","video_info":{"aspect_ratio":[249,140],"variants":[{"bitrate":0,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/tweet_video\/Err4wNUXUAEM6We.mp4"}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":498,"h":280,"resize":"fit"},"small":{"w":498,"h":280,"resize":"fit"},"large":{"w":498,"h":280,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":1349664545443368961,"id_str":"1349664545443368961","indices":[256,279],"description":"Thats What IDo American Soul GIF","media_url":"http:\/\/pbs.twimg.com\/tweet_video_thumb\/Err4wNUXUAEM6We.jpg","media_url_https":"https:\/\/pbs.twimg.com\/tweet_video_thumb\/Err4wNUXUAEM6We.jpg","url":"https:\/\/t.co\/Rbepya8XM3","display_url":"pic.twitter.com\/Rbepya8XM3","expanded_url":"https:\/\/twitter.com\/MyHelpfulHints_\/status\/1349664554465304576\/photo\/1","type":"animated_gif","video_info":{"aspect_ratio":[249,140],"variants":[{"bitrate":0,"content_type":"video\/mp4","url":"https:\/\/video.twimg.com\/tweet_video\/Err4wNUXUAEM6We.mp4"}]},"sizes":{"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":498,"h":280,"resize":"fit"},"small":{"w":498,"h":280,"resize":"fit"},"large":{"w":498,"h":280,"resize":"fit"}}}]}},"quote_count":0,"reply_count":3,"retweet_count":5,"favorite_count":7,"entities":{"hashtags":[],"urls":[{"url":"https:\/\/t.co\/OaqKZb604a","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/1349664554465304576","display_url":"twitter.com\/i\/web\/status\/1\u2026","indices":[116,139]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en"},"is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[],"user_mentions":[{"screen_name":"MyHelpfulHints_","name":"\ud835\uddac\ud835\uddb8 \ud835\udda7\ud835\udda4\ud835\uddab\ud835\uddaf\ud835\udda5\ud835\uddb4\ud835\uddab \ud835\udda7\ud835\udda8\ud835\uddad\ud835\uddb3\ud835\uddb2","id":923162163526676480,"id_str":"923162163526676480","indices":[3,19]}],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1610644653585"}


Bailing out...
	at org.elasticsearch.hadoop.rest.bulk.BulkProcessor.flush(BulkProcessor.java:519)
	at org.elasticsearch.hadoop.rest.bulk.BulkProcessor.close(BulkProcessor.java:541)
	at org.elasticsearch.hadoop.rest.RestRepository.close(RestRepository.java:219)
	at org.elasticsearch.hadoop.mr.EsOutputFormat$EsRecordWriter.doClose(EsOutputFormat.java:216)
	at org.elasticsearch.hadoop.mr.EsOutputFormat$EsRecordWriter.close(EsOutputFormat.java:198)
	at org.apache.spark.internal.io.HadoopMapReduceWriteConfigUtil.closeWriter(SparkHadoopWriter.scala:363)
	at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:139)
	at org.apache.spark.internal.io.SparkHadoopWriter$$anonfun$4.apply(SparkHadoopWriter.scala:129)
	at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1394)
	at org.apache.spark.internal.io.SparkHadoopWriter$.org$apache$spark$internal$io$SparkHadoopWriter$$executeTask(SparkHadoopWriter.scala:141)
	... 10 more


	at org.apache.spark.streaming.api.python.TransformFunction.callPythonTransformFunction(PythonDStream.scala:95)
	at org.apache.spark.streaming.api.python.TransformFunction.apply(PythonDStream.scala:78)
	at org.apache.spark.streaming.api.python.PythonDStream$$anonfun$callForeachRDD$1.apply(PythonDStream.scala:179)
	at org.apache.spark.streaming.api.python.PythonDStream$$anonfun$callForeachRDD$1.apply(PythonDStream.scala:179)
	at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ForEachDStream.scala:51)
	at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
	at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ForEachDStream.scala:51)
	at org.apache.spark.streaming.dstream.DStream.createRDDWithLocalProperties(DStream.scala:416)
	at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply$mcV$sp(ForEachDStream.scala:50)
	at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
	at org.apache.spark.streaming.dstream.ForEachDStream$$anonfun$1.apply(ForEachDStream.scala:50)
	at scala.util.Try$.apply(Try.scala:192)
	at org.apache.spark.streaming.scheduler.Job.run(Job.scala:39)
	at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply$mcV$sp(JobScheduler.scala:257)
	at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:257)
	at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler$$anonfun$run$1.apply(JobScheduler.scala:257)
	at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
	at org.apache.spark.streaming.scheduler.JobScheduler$JobHandler.run(JobScheduler.scala:256)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
