In [1]:
from pyspark import SparkContext                                                                                        
from pyspark.sql import SparkSession                                                                                    
from pyspark.streaming import StreamingContext                                                                          
from pyspark.streaming.kafka import KafkaUtils    
import pandas as pd
import json

In [2]:
ss = SparkSession.Builder() \
     .appName("TEST") \
     .master("spark://spark-master:7077") \
     .config('spark.jars.packages', 'org.apache.spark:spark-streaming-kafka-0-8-assembly_2.11:2.4.1') \
     .config("spark.driver.allowMultipleContexts", "true") \
     .config("hive.metastore.uris", "thrift://hive-metastore:9083") \
     .config("spark.sql.warehouse.dir", "hdfs://namenode:9000/hive") \
     .enableHiveSupport() \
     .getOrCreate()

# ss = SparkSession.Builder() \
#      .appName("TEST") \
#      .master("spark://spark-master:7077") \
#      .enableHiveSupport() \
#      .getOrCreate()
# #
#org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.1
#org.apache.spark:spark-streaming-kafka-0-8:2.4.1

In [3]:
# def handle_rdd(rdd):                                                                                                    
#     if not rdd.isEmpty():                                                                                               
#         global ss                                                                                                       
#         df = ss.createDataFrame(rdd, schema=[
#             "ArrivalTime",
#             "BusinessLeisure",
#             "CabinCategory",
#             "CreationDate",
#             "CurrencyCode",
#             "DepartureTime",
#             "Destination",
#             "OfficeIdCountry",
#             "Origin",
#             "TotalAmount",
#             "nPAX"])                                                
#         df.show()                                                                                                       
#         df.write.saveAsTable(name='default.tweets', format='hive', mode='append')        
def handle_rdd(rdd):
    if not rdd.isEmpty():
        global ss
        df = ss.createDataFrame(rdd, schema=['text', 'words', 'length'])
        df.show()
        df.write.saveAsTable(name='default.tweets', format='hive', mode='append')

In [4]:
sc = ss.sparkContext                                                                                    
ssc = StreamingContext(sc, 4)   

In [None]:
ss.sparkContext.setLogLevel('WARN')                                                                                     
                                                                                                                        
ks = KafkaUtils.createDirectStream(ssc, ['tweets'], {'metadata.broker.list': 'kafka:9093'})                       
                                                                                                                        
lines = ks.map(lambda x: x[1])                                                                                          
                                                                                                                        
transform = lines.map(lambda tweet: (tweet, int(len(tweet.split())), int(len(tweet))))                                  
                                                                                                                        
transform.foreachRDD(handle_rdd)                                                                                        
                                                                                                                        
ssc.start()                                                                                                             
ssc.awaitTermination()

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|Trump has launche...|   65|   368|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|At 6:31 a.m. loca...|  253|  1487|
|The UK has ordere...|  141|   891|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|Former White Hous...|  145|   858|
|What happens next...|   35|   225|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|"Covid-19 is a te...|  165|   936|
|What happens next...|  108|   700|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|Nurse Joanna Sloa...|  241|  1369|
|William Shakesper...|  

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|The UK has ordere...|  150|   822|
|"I would just say...|  237|  1401|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|At 6:31 a.m. loca...|   35|   208|
|"Covid-19 is a te...|  215|  1224|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|The vaccine was f...|  184|  1123|
|The UK has ordere...|   87|   539|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|Cardiff, Wales (C...|  129|   738|
|Former White Hous...|  134|   801|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|Biden won the pre...|  

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|The UK has ordere...|  233|  1322|
|Trump has launche...|   75|   425|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|Asked whether he ...|  285|  1526|
|"I would just say...|  297|  1706|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|William Shakesper...|  223|  1298|
|"And so my messag...|   84|   429|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|"I would just say...|  102|   577|
|What happens next...|  141|   802|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|The regulator, th...|  

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|A 90-year-old Bri...|  160|   942|
|Keenan, who turns...|  200|  1084|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|The regulator, th...|  124|   799|
|"The president wa...|  176|  1024|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|The regulator, th...|  134|   768|
|William Shakesper...|  253|  1470|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|"The president wa...|  274|  1573|
|Gill Rogers, whos...|  283|  1740|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|The first Briton ...|  

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|Asked whether he ...|  171|   988|
|Keenan, who turns...|   97|   552|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|"It's the best ea...|  110|   594|
|The country has a...|  170|  1000|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|What happens next...|  128|   677|
|The UK has ordere...|   95|   529|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|The UK has ordere...|   49|   279|
|Keenan, who turns...|   38|   201|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|The UK has ordere...|  

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|The first Briton ...|  246|  1358|
|William Shakesper...|   65|   418|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|"Covid-19 is a te...|  234|  1278|
|Trump has launche...|  187|  1125|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|"Covid-19 is a te...|   88|   476|
|The vaccine was f...|  123|   783|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|Gill Rogers, whos...|  231|  1377|
|The country has a...|   78|   472|
+--------------------+-----+------+

+--------------------+-----+------+
|                text|words|length|
+--------------------+-----+------+
|The UK has ordere...|  