In [1]:

from pyspark import SparkConf,SparkContext
from pyspark.streaming import StreamingContext
from pyspark.sql import Row,SQLContext
import sys
import requests
import time
import subprocess
import re
from google.cloud import bigquery
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import nltk
from nltk.corpus import stopwords

# global variables
bucket = "6893_bigdata_isaac"
output_directory_hashtags = 'gs://{}/hadoop/tmp/bigquery/pyspark_output/hashtagsCount'.format(bucket)
output_directory_wordcount = 'gs://{}/hadoop/tmp/bigquery/pyspark_output/wordcount'.format(bucket)
output_directory_canditweet = 'gs://{}/hadoop/tmp/bigquery/pyspark_output/canditweet'.format(bucket)
output_directory_topwords = 'gs://{}/hadoop/tmp/bigquery/pyspark_output/top5words'.format(bucket)
bucket_directory = 'gs://{}/largescale_data_processing/6889project/testdata6889.csv'.format(bucket)

# output table and columns name
output_dataset = 'bigdata_sparkStreaming'
output_table_hashtags = 'hashtags'
columns_name_hashtags = ['hashtags', 'count']
output_table_wordcount = 'wordcount'
columns_name_wordcount = ['word', 'count', 'time']
output_table_canditweet = 'canditweet'
columns_name_canditweet = ['candidate', 'tweet', 'time']
output_table_topwords = 'top5words'
columns_name_topwords = ['candidate','word','count','time']
# parameter
IP = 'localhost'
PORT = 9001

STREAMTIME = 60
STEP = int(STREAMTIME)/6

WORD = ['trump', 'nikki haley', 'ramaswamy', 'asa hutchinson', 'biden', 'marianne williamson']
#candidates = ['donald trump', 'nikki haley', 'vivek ramaswamy', 'asa hutchinson', 'joe biden', 'marianne williamson']

#nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
common_word = ['donald', 'trump', 'nikki', 'haley', 'vivek', 'ramaswamy', 
               'asa', 'hutchinson', 'joe', 'biden', 'marianne', 'williamson', 
               'president', 'election', 'people']

# Helper functions
def saveToStorage(rdd, output_directory, columns_name, mode):
    
    if not rdd.isEmpty():
        (rdd.toDF( columns_name ) \
        .write.save(output_directory, format="json", mode=mode))
    #else:
    #    print('nothing in rdd {}'.format(output_directory))


def saveToBigQuery(sc, output_dataset, output_table, directory):
    
    files = directory + '/part-*'
    subprocess.check_call(
        'bq load --source_format NEWLINE_DELIMITED_JSON '
        '--replace '
        '--autodetect '
        '{dataset}.{table} {files}'.format(
            dataset=output_dataset, table=output_table, files=files
        ).split())
    output_path = sc._jvm.org.apache.hadoop.fs.Path(directory)
    output_path.getFileSystem(sc._jsc.hadoopConfiguration()).delete(
        output_path, True)
    
def saveToBucket(rdd, bucket_path):
    
    df = rdd.toDF(['candidate', 'tweet', 'time'])
    df.write.format("csv").option("header", "true").save(bucket_path)
    

def topWordCount(tweet):
    """
    input: dstream of format (candidate, related_tweet)
    output: transformed DStream in format (candidate, word, count, time)
    
    The function takes in DStream of candidate and its correlated tweets and returns the top word count dstream data in window
    """
    #stack every tweet in the time period in one line string
    grouped_tweets = tweet.reduceByKey(lambda x, y: x + ' ' + y)
    #grouped_tweets.pprint()
    #filter out the nltk stop words and self defined common words
    candi_word = grouped_tweets.map(lambda x: (x[0], x[1].split())) \
                    .map(lambda x: (x[0], [w for w in x[1] if w not in stop_words and w not in common_word]))
    candi_word.pprint()
    #flatmap to (candidate, word, 1) and doing reduceByKeyAndWindow
    candi_allwords_flat = candi_word.flatMap(lambda x: [((x[0], words), 1) for words in x[1]]) \
                            .reduceByKeyAndWindow(lambda x, y: x + y, lambda x, y: x - y, 10, 10)
    candi_allwords_flat.pprint()
    #group the data and keep the top word
    top_words = candi_allwords_flat.map(lambda x: (x[0][0], (x[0][1], x[1]))) \
                                   .groupByKey() \
                                   .mapValues(lambda x: sorted(x, key=lambda y: y[1], reverse=True)[0])
    top_words.pprint()
    #map to extract the top word for each candidate
    most_count_words = top_words.map(lambda x: (x[0], x[1][0], x[1][1]))
    most_count_words.pprint()
    #map with the time that this wordcount is been recorded
    word_total = most_count_words.transform(lambda time, rdd: \
                                            rdd.map(lambda x: (x[0], x[1], x[2], time.strftime("%Y-%m-%d %H:%M:%S"))))
    word_total.pprint()
    
    return word_total
    

def canditweet(tweet):
    candiTweets_total = tweet.transform(lambda time, rdd: rdd \
                                        .map(lambda x: (x[0], SentimentIntensityAnalyzer().polarity_scores(x[1])['compound'], time.strftime("%Y-%m-%d %H:%M:%S"))))
    
    return candiTweets_total


if __name__ == '__main__':
    # Spark settings
    conf = SparkConf()
    conf.setMaster('local[2]')
    conf.setAppName("TwitterStreamApp")

    # create spark context with the above configuration
    sc = SparkContext(conf=conf)
    sc.setLogLevel("ERROR")
    sql_context = SQLContext(sc)
    ssc = StreamingContext(sc, 5)
    ssc.checkpoint("~/checkpoint_TwitterApp")
    dataStream = ssc.socketTextStream(IP, PORT)
    words = dataStream.flatMap(lambda line: line.split(" "))
    tws = dataStream.flatMap(lambda line: line.split("@"))
    
    filtered_lines = tws.filter(lambda line: any(word in line.lower() for word in WORD))
    filtered_lines.pprint()
    canditw = filtered_lines.map(lambda line: \
                                 (next((word for word in WORD if word.lower() in line.lower()), None), line.lower()))
    candiTweets = canditweet(canditw)
    candiTweets.pprint()
    
    topWords = topWordCount(canditw)
    topWords.pprint()
    
    candiTweets.foreachRDD(lambda rdd: saveToStorage(rdd, output_directory_canditweet, columns_name_canditweet, mode="append"))
    topWords.foreachRDD(lambda rdd: saveToStorage(rdd, output_directory_topwords, columns_name_topwords, mode="append"))
    
    # start the streaming process then wait for streamtime and then stop.
    ssc.start()
    time.sleep(STREAMTIME)
    ssc.stop(stopSparkContext=False, stopGraceFully=True)
    
    saveToBigQuery(sc, output_dataset, output_table_canditweet, output_directory_canditweet)
    saveToBigQuery(sc, output_dataset, output_table_topwords, output_directory_topwords)
    #saveToBucket(candiTweets, bucket_directory)



Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/05/02 23:52:47 INFO org.apache.spark.SparkEnv: Registering MapOutputTracker
23/05/02 23:52:48 INFO org.apache.spark.SparkEnv: Registering BlockManagerMaster
23/05/02 23:52:48 INFO org.apache.spark.SparkEnv: Registering BlockManagerMasterHeartbeat
23/05/02 23:52:48 INFO org.apache.spark.SparkEnv: Registering OutputCommitCoordinator
[Stage 0:>                                                          (0 + 1) / 1]

-------------------------------------------
Time: 2023-05-02 23:52:55
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:52:55
-------------------------------------------



                                                                                

-------------------------------------------
Time: 2023-05-02 23:52:55
-------------------------------------------



[Stage 0:>                                                          (0 + 1) / 1]

-------------------------------------------
Time: 2023-05-02 23:53:00
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:53:00
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:53:00
-------------------------------------------



                                                                                

-------------------------------------------
Time: 2023-05-02 23:53:00
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:53:00
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:53:00
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:53:00
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:53:00
-------------------------------------------



23/05/02 23:53:03 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:03 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071583600 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:04 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:04 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071584600 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:05 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:05 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071584800 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:05 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:05 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071585000 replicated to only 0

-------------------------------------------
Time: 2023-05-02 23:53:05
-------------------------------------------
lawofruby: NEW: The judge presiding over Trump's criminal case has ordered Joe Tacopina to turn over his communications with Stormy Dan…
AmoneyResists: Trump tried to copy 
JoeBiden’s recent trip to his homeland of Ireland by traveling to Scotland &amp; ‘The Scotsman’ just tr…
heyitsmeCarolyn Trump is responsible for her death.RT 
yourhelpfulplum: 🏁 Trump 2024 More 🇺🇸
🏆 MAGA ULTRA FAMILY 🏆This is a new and virulent strain of Trump and the MAGAs.  If elected they will take revenge on those who oppose them and in the same breath they scream about the "weaponization" of government. https://t.co/k9e1L88JDv❤️ https://t.co/UThs7Q9N4G
ACTBrigitte If you are implying that Trump is God, you are not only delusional, you are evil and confused.  If you are implying that God sent Trump, see above.  Trump is responsible for the hate and bitterness that is on the rise in this country.
ImM

23/05/02 23:53:05 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:05 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071585600 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:06 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:06 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071585800 replicated to only 0 peer(s) instead of 1 peers


-------------------------------------------
Time: 2023-05-02 23:53:05
-------------------------------------------
('trump', ['lawofruby:', 'new:', 'judge', 'presiding', "trump's", 'criminal', 'case', 'ordered', 'tacopina', 'turn', 'communications', 'stormy', 'dan…', 'amoneyresists:', 'tried', 'copy', 'heyitsmecarolyn', 'responsible', 'death.rt', 'yourhelpfulplum:', '🏁', '2024', '🇺🇸', '🏆', 'maga', 'ultra', 'family', '🏆this', 'new', 'virulent', 'strain', 'magas.', 'elected', 'take', 'revenge', 'oppose', 'breath', 'scream', '"weaponization"', 'government.', 'https://t.co/k9e1l88jdv❤️', 'https://t.co/uths7q9n4g', 'actbrigitte', 'implying', 'god,', 'delusional,', 'evil', 'confused.', 'implying', 'god', 'sent', 'trump,', 'see', 'above.', 'responsible', 'hate', 'bitterness', 'rise', 'country.', 'judidog', "i'm", 'fox', 'news', 'supporter', '/', 'viewer.', 'asked', 'legitimate', 'question.…biden', 'sending', '1,500', 'troops', 'mexico', 'border', 'migrant', 'surge,', 'that’ll', 'surely', 'stop

23/05/02 23:53:06 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:06 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071586400 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:06 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:06 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071586600 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:07 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:07 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071586800 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:07 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:07 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071587600 replicated to only 0

-------------------------------------------
Time: 2023-05-02 23:53:10
-------------------------------------------
TheBeatWithAri: DOJ Vet David Kelley likens Donald Trump’s criminal case to a “mob trial.” https://t.co/lzKPcpIK3VRT 
ungerbn103: If you are a Veteran who votes for Trump, not only is Lauren Boebert smarter than you, but you have disrespected the unifor…RT 
"Former president Donald Trump got so irritated with an NBC reporter’s questions about a…
SueMassa256: The rich, corporations and ignorant people support trump! Largest political donation ever': Trump ally received $1.6 billi…RT 
ImMeme0: Biden’s Crime Family are POS and these are indisputable facts which Democrats don’t want to admit:
OliLondonTV: Biden is offering teachers in Pakistan $500,000 of American tax payer money  for “Professional Development for Transgender…RT 
lawofruby: NEW: The judge presiding over Trump's criminal case has ordered Joe Tacopina to turn over his communications with Stormy Dan…RT 
therecount

23/05/02 23:53:18 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:18 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071598600 replicated to only 0 peer(s) instead of 1 peers


-------------------------------------------
Time: 2023-05-02 23:53:10
-------------------------------------------
('trump', ['thebeatwithari:', 'doj', 'vet', 'david', 'kelley', 'likens', 'trump’s', 'criminal', 'case', '“mob', 'trial.”', 'https://t.co/lzkpcpik3vrt', 'ungerbn103:', 'veteran', 'votes', 'trump,', 'lauren', 'boebert', 'smarter', 'you,', 'disrespected', 'unifor…rt', '"former', 'got', 'irritated', 'nbc', 'reporter’s', 'questions', 'a…', 'suemassa256:', 'rich,', 'corporations', 'ignorant', 'support', 'trump!', 'largest', 'political', 'donation', "ever':", 'ally', 'received', '$1.6', 'billi…rt', 'lawofruby:', 'new:', 'judge', 'presiding', "trump's", 'criminal', 'case', 'ordered', 'tacopina', 'turn', 'communications', 'stormy', 'dan…rt', 'repmtg', 'support', 'trump?', 'mollyploofkins:', "trump's", 'getting', 'reception', 'scotland', 'got', 'ireland.rt', 'stood', 'behind', 'federalist', 'society', 'filing', 'supreme', 'court.', '💥', 'trump💥', '🇺🇸', '2020🇺🇸', '🇺🇸', '2024🇺🇸', 'the_

23/05/02 23:53:19 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:19 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071599400 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:19 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:19 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071599600 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:20 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:20 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071600400 replicated to only 0 peer(s) instead of 1 peers


-------------------------------------------
Time: 2023-05-02 23:53:10
-------------------------------------------
(('trump', 'lawofruby:'), 2)
(('trump', 'judge'), 3)
(('trump', "trump's"), 5)
(('trump', 'criminal'), 4)
(('trump', 'case'), 3)
(('trump', 'ordered'), 2)
(('trump', 'communications'), 2)
(('trump', 'dan…'), 1)
(('trump', 'copy'), 1)
(('trump', '🏁'), 1)
...

-------------------------------------------
Time: 2023-05-02 23:53:10
-------------------------------------------
('trump', ("trump's", 5))
('biden', ('biden’s', 7))

-------------------------------------------
Time: 2023-05-02 23:53:10
-------------------------------------------
('trump', "trump's", 5)
('biden', 'biden’s', 7)

-------------------------------------------
Time: 2023-05-02 23:53:10
-------------------------------------------
('trump', "trump's", 5, '2023-05-02 23:53:10')
('biden', 'biden’s', 7, '2023-05-02 23:53:10')

-------------------------------------------
Time: 2023-05-02 23:53:10
------------------

23/05/02 23:53:21 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:21 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071601600 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:22 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:22 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071601800 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:22 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:22 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071602000 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:22 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:22 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071602600 replicated to only 0

23/05/02 23:53:35 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:35 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071615400 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:36 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:36 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071615800 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:36 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:36 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071616000 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:36 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:36 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071616200 replicated to only 0

23/05/02 23:53:49 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:49 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071629600 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:50 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:50 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071629800 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:50 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:50 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071630000 replicated to only 0 peer(s) instead of 1 peers
23/05/02 23:53:50 WARN org.apache.spark.storage.RandomBlockReplicationPolicy: Expecting 1 replicas with only 0 peer/s.
23/05/02 23:53:50 WARN org.apache.spark.storage.BlockManager: Block input-0-1683071630600 replicated to only 0

23/05/02 23:54:00 WARN org.apache.spark.streaming.scheduler.ReceivedBlockTracker: Exception thrown while writing record: BatchAllocationEvent(1683071640000 ms,AllocatedBlocks(Map(0 -> ArrayBuffer(ReceivedBlockInfo(0,Some(2),None,BlockManagerBasedStoreResult(input-0-1683071629800,Some(2))), ReceivedBlockInfo(0,Some(1),None,BlockManagerBasedStoreResult(input-0-1683071630000,Some(1))), ReceivedBlockInfo(0,Some(11),None,BlockManagerBasedStoreResult(input-0-1683071630600,Some(11))), ReceivedBlockInfo(0,Some(2),None,BlockManagerBasedStoreResult(input-0-1683071630800,Some(2))), ReceivedBlockInfo(0,Some(1),None,BlockManagerBasedStoreResult(input-0-1683071631000,Some(1))), ReceivedBlockInfo(0,Some(4),None,BlockManagerBasedStoreResult(input-0-1683071631400,Some(4))), ReceivedBlockInfo(0,Some(15),None,BlockManagerBasedStoreResult(input-0-1683071631600,Some(15))), ReceivedBlockInfo(0,Some(3),None,BlockManagerBasedStoreResult(input-0-1683071632000,Some(3))), ReceivedBlockInfo(0,Some(4),None,BlockMa

-------------------------------------------
Time: 2023-05-02 23:53:15
-------------------------------------------
TAftermath2020: Black dude goes to a Trump rally expecting a fight. Comes out realizing the media and Democrats lied to him.RT 
ungerbn103: Trump called CNN the "Enemy of the State" yet they are giving him another free, prime time, opportunity to spew hie lies, R…
GovRonDeSantis BuT tRuMp
lopezobrador_: Hoy sostuvimos tres reuniones importantes: recibimos a Elizabeth Sherwood-Randall, enviada del presidente Biden para tra…RT 
DonaldJTrumpJr: WATCH: President Trump lays out his policy plan to retake our colleges from the Marxists. Calls for Federal Civil Right…
VernonForGA It’s sheer stupidity 2 think it was stolen. If u consider the efforts 2 validate the election, state &amp; county recounts, multiple audits, #Trump’s 62 failed court cases alleging fraud &amp; only a handful of confirmed cases of fraud by republicans. 2 believe is 2 b willfully ignorant.
joncoopertweets Tr

23/05/02 23:54:05 WARN org.apache.spark.streaming.scheduler.ReceivedBlockTracker: Exception thrown while writing record: BatchAllocationEvent(1683071645000 ms,AllocatedBlocks(Map(0 -> ArrayBuffer(ReceivedBlockInfo(0,Some(2),None,BlockManagerBasedStoreResult(input-0-1683071629800,Some(2))), ReceivedBlockInfo(0,Some(1),None,BlockManagerBasedStoreResult(input-0-1683071630000,Some(1))), ReceivedBlockInfo(0,Some(11),None,BlockManagerBasedStoreResult(input-0-1683071630600,Some(11))), ReceivedBlockInfo(0,Some(2),None,BlockManagerBasedStoreResult(input-0-1683071630800,Some(2))), ReceivedBlockInfo(0,Some(1),None,BlockManagerBasedStoreResult(input-0-1683071631000,Some(1))), ReceivedBlockInfo(0,Some(4),None,BlockManagerBasedStoreResult(input-0-1683071631400,Some(4))), ReceivedBlockInfo(0,Some(15),None,BlockManagerBasedStoreResult(input-0-1683071631600,Some(15))), ReceivedBlockInfo(0,Some(3),None,BlockManagerBasedStoreResult(input-0-1683071632000,Some(3))), ReceivedBlockInfo(0,Some(4),None,BlockMa

23/05/02 23:54:25 WARN org.apache.spark.streaming.scheduler.ReceivedBlockTracker: Exception thrown while writing record: BatchAllocationEvent(1683071665000 ms,AllocatedBlocks(Map(0 -> ArrayBuffer(ReceivedBlockInfo(0,Some(2),None,BlockManagerBasedStoreResult(input-0-1683071629800,Some(2))), ReceivedBlockInfo(0,Some(1),None,BlockManagerBasedStoreResult(input-0-1683071630000,Some(1))), ReceivedBlockInfo(0,Some(11),None,BlockManagerBasedStoreResult(input-0-1683071630600,Some(11))), ReceivedBlockInfo(0,Some(2),None,BlockManagerBasedStoreResult(input-0-1683071630800,Some(2))), ReceivedBlockInfo(0,Some(1),None,BlockManagerBasedStoreResult(input-0-1683071631000,Some(1))), ReceivedBlockInfo(0,Some(4),None,BlockManagerBasedStoreResult(input-0-1683071631400,Some(4))), ReceivedBlockInfo(0,Some(15),None,BlockManagerBasedStoreResult(input-0-1683071631600,Some(15))), ReceivedBlockInfo(0,Some(3),None,BlockManagerBasedStoreResult(input-0-1683071632000,Some(3))), ReceivedBlockInfo(0,Some(4),None,BlockMa

-------------------------------------------
Time: 2023-05-02 23:53:20
-------------------------------------------
mspopok: Jack Smith obtaining by force Pence’s testimony against Trump belongs on Page 1 of US History, not buried on page 16 
SecBlinken told our Senate investigators he did not email Hunter Biden. Not t…RT 
nathaliejacoby1: If I called for a BOYCOTT of CNN over their decision to invite Donald Trump to a Town Hall, would you join me? https:/…
ZelenskyyUa SO YOU KNOW YOU WILL BE THE 51st state of AMERICA. LEASES AN PERMITS OUT TO AMERICAN OIL COMPANIES. YOUR JUST WAITING FOR PUTIN TO Level. THATS WHY NO OFFENCE. CASH COW WILL END SOON. U AN BIDEN REFUSED TO WIN https://t.co/TLDBTsKXN5RT 
richimedhurst: Biden says "journalism is not a crime"– except when it comes to Julian Assange
investmattallen Grandma insider trading , she should have to answer to this ,yet there trying to lock up trump fir bullshit ..shes gotta get in the hot seat
TomCottonAR: Hunter Biden has the most e

23/05/02 23:54:25 WARN org.apache.spark.streaming.scheduler.ReceivedBlockTracker: Exception thrown while writing record: BatchCleanupEvent(Vector()) to the WriteAheadLog.
java.lang.IllegalStateException: close() was called on BatchedWriteAheadLog before write request with time 1683071665936 could be fulfilled.
	at org.apache.spark.streaming.util.BatchedWriteAheadLog.write(BatchedWriteAheadLog.scala:88)
	at org.apache.spark.streaming.scheduler.ReceivedBlockTracker.writeToLog(ReceivedBlockTracker.scala:244)
	at org.apache.spark.streaming.scheduler.ReceivedBlockTracker.cleanupOldBatches(ReceivedBlockTracker.scala:177)
	at org.apache.spark.streaming.scheduler.ReceiverTracker.cleanupOldBlocksAndBatches(ReceiverTracker.scala:228)
	at org.apache.spark.streaming.scheduler.JobGenerator.clearCheckpointData(JobGenerator.scala:290)
	at org.apache.spark.streaming.scheduler.JobGenerator.org$apache$spark$streaming$scheduler$JobGenerator$$processEvent(JobGenerator.scala:190)
	at org.apache.spark.strea

-------------------------------------------
Time: 2023-05-02 23:53:20
-------------------------------------------
('trump', ['mspopok:', 'jack', 'smith', 'obtaining', 'force', 'pence’s', 'testimony', 'belongs', 'page', '1', 'us', 'history,', 'buried', 'page', '16', 'nathaliejacoby1:', 'called', 'boycott', 'cnn', 'decision', 'invite', 'town', 'hall,', 'would', 'join', 'me?', 'https:/…', 'investmattallen', 'grandma', 'insider', 'trading', ',', 'answer', ',yet', 'trying', 'lock', 'fir', 'bullshit', '..shes', 'gotta', 'get', 'hot', 'seat', 'halfwaypost:', 'breaking:', 'lindsey', 'graham', 'accidentally', 'said', 'hot', 'mic,', '"if', 'nominate', 'third', 'time', 'get', 'destroyed,', 'an…rt', 'subrata30016572:', 'miss', 'much..drop', 'a❤if', 'https://t.co/qlyhj1jjc7rt', 'vabvox:', 'e', 'jean', 'carroll', 'one', 'person', 'america', 'backed', 'face', "trump's", 'violence', 'criminality.', 'sh…', 'davidchapman141', 'already', 'factual.', 'leading', 'independents', 'women.', 'one', 'two', 'pol

-------------------------------------------
Time: 2023-05-02 23:53:20
-------------------------------------------
(('trump', 'taftermath2020:'), 1)
(('trump', 'black'), 1)
(('trump', 'dude'), 1)
(('trump', 'rally'), 2)
(('trump', 'comes'), 2)
(('trump', 'realizing'), 2)
(('trump', 'media'), 1)
(('trump', 'him.rt'), 1)
(('trump', 'called'), 3)
(('trump', 'state"'), 1)
...

-------------------------------------------
Time: 2023-05-02 23:53:20
-------------------------------------------
('trump', ('one', 10))
('biden', ('hunter', 16))

-------------------------------------------
Time: 2023-05-02 23:53:20
-------------------------------------------
('trump', 'one', 10)
('biden', 'hunter', 16)

-------------------------------------------
Time: 2023-05-02 23:53:20
-------------------------------------------
('trump', 'one', 10, '2023-05-02 23:53:20')
('biden', 'hunter', 16, '2023-05-02 23:53:20')

-------------------------------------------
Time: 2023-05-02 23:53:20
-------------------------

23/05/02 23:54:30 WARN org.apache.spark.streaming.scheduler.ReceivedBlockTracker: Exception thrown while writing record: BatchAllocationEvent(1683071670000 ms,AllocatedBlocks(Map(0 -> ArrayBuffer(ReceivedBlockInfo(0,Some(2),None,BlockManagerBasedStoreResult(input-0-1683071629800,Some(2))), ReceivedBlockInfo(0,Some(1),None,BlockManagerBasedStoreResult(input-0-1683071630000,Some(1))), ReceivedBlockInfo(0,Some(11),None,BlockManagerBasedStoreResult(input-0-1683071630600,Some(11))), ReceivedBlockInfo(0,Some(2),None,BlockManagerBasedStoreResult(input-0-1683071630800,Some(2))), ReceivedBlockInfo(0,Some(1),None,BlockManagerBasedStoreResult(input-0-1683071631000,Some(1))), ReceivedBlockInfo(0,Some(4),None,BlockManagerBasedStoreResult(input-0-1683071631400,Some(4))), ReceivedBlockInfo(0,Some(15),None,BlockManagerBasedStoreResult(input-0-1683071631600,Some(15))), ReceivedBlockInfo(0,Some(3),None,BlockManagerBasedStoreResult(input-0-1683071632000,Some(3))), ReceivedBlockInfo(0,Some(4),None,BlockMa

-------------------------------------------
Time: 2023-05-02 23:53:25
-------------------------------------------
joncoopertweets: 🚨BREAKING: Donald Trump’s rape trial moved into a pivotal phase on Tuesday, as jurors heard from a former businesswoma…RT 
realDonaldTrump 
JoeBiden I’d rather get kicked in the nuts than vote for the shabbos goy DeSantis
FukBidens Yes
Ty___Webb Biden made a huge mistake.
FoxNews So you're looking at fake accusers, after Russiagate, the J6 Fed op, quid pro Joe, the Biden Junta, war, collapse of dollar...
AND IT'S TRUMP'S FAULT?
SalazarYulia Naturalmente ante covid, ante grande "Reset", ante Trump cacciato e soprattutto ante proposta allargamento NATO in Ucraina ...RT 
ProudElephantUS: NEW: Jesse Watters EVISCERATES Hunter Biden in a hilarious new monologue joking about how he went to court to prove he…RT 
MeidasTouch: It's not getting nearly enough attention that Trump recently attacked an NBC reporter aboard his campaign plane and threw…RT 
...



23/05/02 23:54:41 WARN org.apache.spark.streaming.scheduler.ReceivedBlockTracker: Exception thrown while writing record: BatchCleanupEvent(Vector()) to the WriteAheadLog.
java.lang.IllegalStateException: close() was called on BatchedWriteAheadLog before write request with time 1683071681361 could be fulfilled.
	at org.apache.spark.streaming.util.BatchedWriteAheadLog.write(BatchedWriteAheadLog.scala:88)
	at org.apache.spark.streaming.scheduler.ReceivedBlockTracker.writeToLog(ReceivedBlockTracker.scala:244)
	at org.apache.spark.streaming.scheduler.ReceivedBlockTracker.cleanupOldBatches(ReceivedBlockTracker.scala:177)
	at org.apache.spark.streaming.scheduler.ReceiverTracker.cleanupOldBlocksAndBatches(ReceiverTracker.scala:228)
	at org.apache.spark.streaming.scheduler.JobGenerator.clearCheckpointData(JobGenerator.scala:290)
	at org.apache.spark.streaming.scheduler.JobGenerator.org$apache$spark$streaming$scheduler$JobGenerator$$processEvent(JobGenerator.scala:190)
	at org.apache.spark.strea

-------------------------------------------
Time: 2023-05-02 23:53:25
-------------------------------------------
('trump', -0.6908, '2023-05-02 23:53:25')
('trump', 0.0, '2023-05-02 23:53:25')
('biden', -0.3182, '2023-05-02 23:53:25')
('biden', 0.4019, '2023-05-02 23:53:25')
('biden', -0.0258, '2023-05-02 23:53:25')
('biden', -0.8807, '2023-05-02 23:53:25')
('trump', -0.4019, '2023-05-02 23:53:25')
('trump', 0.0, '2023-05-02 23:53:25')
('biden', 0.5574, '2023-05-02 23:53:25')
('trump', -0.4404, '2023-05-02 23:53:25')
...

-------------------------------------------
Time: 2023-05-02 23:53:25
-------------------------------------------
('trump', ['joncoopertweets:', '🚨breaking:', 'trump’s', 'rape', 'trial', 'moved', 'pivotal', 'phase', 'tuesday,', 'jurors', 'heard', 'former', 'businesswoma…rt', 'realdonaldtrump', "trump's", 'fault?', 'salazaryulia', 'naturalmente', 'ante', 'covid,', 'ante', 'grande', '"reset",', 'ante', 'cacciato', 'e', 'soprattutto', 'ante', 'proposta', 'allargamento',

23/05/02 23:54:43 WARN org.apache.spark.streaming.scheduler.JobGenerator: Timed out while stopping the job generator (timeout = 50000)
23/05/02 23:54:45 WARN org.apache.spark.streaming.scheduler.ReceivedBlockTracker: Exception thrown while writing record: BatchAllocationEvent(1683071685000 ms,AllocatedBlocks(Map(0 -> ArrayBuffer(ReceivedBlockInfo(0,Some(2),None,BlockManagerBasedStoreResult(input-0-1683071629800,Some(2))), ReceivedBlockInfo(0,Some(1),None,BlockManagerBasedStoreResult(input-0-1683071630000,Some(1))), ReceivedBlockInfo(0,Some(11),None,BlockManagerBasedStoreResult(input-0-1683071630600,Some(11))), ReceivedBlockInfo(0,Some(2),None,BlockManagerBasedStoreResult(input-0-1683071630800,Some(2))), ReceivedBlockInfo(0,Some(1),None,BlockManagerBasedStoreResult(input-0-1683071631000,Some(1))), ReceivedBlockInfo(0,Some(4),None,BlockManagerBasedStoreResult(input-0-1683071631400,Some(4))), ReceivedBlockInfo(0,Some(15),None,BlockManagerBasedStoreResult(input-0-1683071631600,Some(15))), 

-------------------------------------------
Time: 2023-05-02 23:53:30
-------------------------------------------
TomJChicago: There are many reasons why Trump is not a real candidate. One of them is that he would be unable to hold office (14th Amen…RT 
shaTIRED: she really do look like she comes from a family of trump supportersRT 
middleageriot: While Donald Trump is still overseas, Democrats should enact a travel ban preventing known Russian agents from entering…
LisaKitter: I met a friend for lunch. She's a Trump supporter but doesn't actively follow politics &amp; is rarely on social media.
DonaldJTrumpJr 
WashTimes: House investigator Rep. James Comer: Hunter Biden’s legal team is intimidating witnesses
DineshDSouza: Today: How the shoe is on the other foot for the Left as the Biden regime brings charges against black socialists for opp…
JesseOppie: Literally meaningless statement. It's a nothing Burger but you're so obsessive on Donald Trump that I think you might be in…
Many of

-------------------------------------------
Time: 2023-05-02 23:53:30
-------------------------------------------
(('trump', 'moved'), 2)
(('trump', 'pivotal'), 2)
(('trump', 'phase'), 2)
(('trump', 'jurors'), 2)
(('trump', 'former'), 7)
(('trump', 'businesswoma…rt'), 1)
(('trump', 'realdonaldtrump'), 3)
(('trump', "trump's"), 7)
(('trump', 'naturalmente'), 1)
(('trump', 'grande'), 1)
...

-------------------------------------------
Time: 2023-05-02 23:53:30
-------------------------------------------
('trump', ('&amp;', 10))
('biden', ('biden’s', 9))

-------------------------------------------
Time: 2023-05-02 23:53:30
-------------------------------------------
('trump', '&amp;', 10)
('biden', 'biden’s', 9)

-------------------------------------------
Time: 2023-05-02 23:53:30
-------------------------------------------
('trump', '&amp;', 10, '2023-05-02 23:53:30')
('biden', 'biden’s', 9, '2023-05-02 23:53:30')

-------------------------------------------
Time: 2023-05-02 23:53:30
-

                                                                                

-------------------------------------------
Time: 2023-05-02 23:53:35
-------------------------------------------
Hyperresilience: ✍🏽La lumière éclaire l'ombre, tous les jours un peu plus. L'ADMINISTRATION BIDEN SECOUÉE PAR DES INFORMATIONS FAISANT…RT 
Many of you want to say Trump supporters are in the minority or that 
realDonaldTrump has lost his support.
owillis: try to imagine joe biden grabbing a reporter's phone out of his hand, tossing it, having the event recorded and then the media…RT 
mmpadellan: FUN FACT: Marjorie Taylor Greene works in Washington DC. Hunter Biden does not.
Marjorie Taylor Greene asked trump for a P…
Stonekettle: Funny, how these cultists see Trump. They dress him up in the trappings of professions he   could never abide, attribute t…RT 
PrezLives2022: The Federalist Society has a tax status that condemns political activism😂😂😂yet somehow Leonard Leo who helped Trump sele…RT 
playbookdc: DONALD TRUMP, the former president and the most likely GOP presidential

                                                                                

-------------------------------------------
Time: 2023-05-02 23:53:40
-------------------------------------------
Fuck you Joe Biden 🤨🖕RT 
One time I was having dinner at the Trump Hotel &amp; I asked my waitress…
ElephantWatcher: BIDEN IS CORRECT.....read your own bill you morons. Pay your enormous trump debt and shut up with your lies from Breit…
POTUS FUKC biden
grimsr3ap3r79 An Indian American called Vivek Ramaswamy isn’t going to be a Republican nominee anytime in this century. Their voters don’t even know how to pronounce his nameRT 
genz_4biden: In just a week since Biden announced, Gen-Z for Biden has gained 26,000 followers. The energy is here to organize young pe…
ZherkaOfficial Well ...if you are Better... Defeat them ..you cannot even reach them . Trump , Elon and tate are unbeatable together
HuffPost: Former MSNBC anchor Keith Olbermann accused the cable network of "committing journalistic suicide" by giving the former presi…I keep laughing at the Trump people because they

-------------------------------------------
Time: 2023-05-02 23:53:40
-------------------------------------------
(('biden', 'hyperresilience:'), 1)
(('biden', '✍🏽la'), 1)
(('biden', 'lumière'), 1)
(('biden', 'éclaire'), 1)
(('biden', 'jours'), 1)
(('biden', 'un'), 1)
(('biden', 'plus.'), 1)
(('biden', 'par'), 1)
(('biden', 'faisant…rt'), 1)
(('biden', 'owillis:'), 1)
...

-------------------------------------------
Time: 2023-05-02 23:53:40
-------------------------------------------
('biden', ('hunter', 22))
('trump', ('asked', 6))
('ramaswamy', ('indian', 1))

-------------------------------------------
Time: 2023-05-02 23:53:40
-------------------------------------------
('biden', 'hunter', 22)
('trump', 'asked', 6)
('ramaswamy', 'indian', 1)

-------------------------------------------
Time: 2023-05-02 23:53:40
-------------------------------------------
('biden', 'hunter', 22, '2023-05-02 23:53:40')
('trump', 'asked', 6, '2023-05-02 23:53:40')
('ramaswamy', 'indian', 1, '2023-05-

                                                                                

-------------------------------------------
Time: 2023-05-02 23:53:45
-------------------------------------------
It always struck me as an option that—inasmuch as it ought to be interp'd as fully w/in the law (i.e. "black letter")— the Court would need to back unless they were wanting to take part of the blame for the econ disaster.House Oversight Chair DISINTEGRATES on LIVE TV When Host Asks for Evidence of Biden Crimes! https://t.co/DkcTvwoVon via 
YouTube  #JamesComer #DollemoreDaily #JoeBiden #RepublicansRT 
SDonziger: My latest column calls out President Biden for caving to the fossil fuel industry as it continues to destroy the planet. In…
SecBlinken told our Senate investigators he did not email Hunter Biden. Not t…RT 
JamesNavaCom: El congresista por Oklahoma Josh Brecheen respalda a Donald Trump para presidente de Estados Unidos. https://t.co/9ADyAXX…
IlhanMN: Fun fact: Republicans voted to raise the debt ceiling 3 times when Donald Trump was President, with no preconditions.

                                                                                

-------------------------------------------
Time: 2023-05-02 23:53:50
-------------------------------------------
LouDobbs: Have you noticed that the Biden Regime is getting away with almost everything they do? Whether it's ordering the southern bor…RT 
DavidSacks: House Republicans have passed a debt ceiling increase but it requires Biden to agree to a 1% annual growth cap, claw back u…RT 
GrillJeff So instead of non-stop Trump coverage there will be non-stop Trump elevation.  Thanks CNN.
MayoIsSpicyy And when did Obama or Biden Serve?  Asking for a friend
1JaySC Ireland 🇮🇪 hates Trump too but loves Biden ! https://t.co/v0Sbskyn8DTrump tops DeSantis by 36 points in new poll https://t.co/F81tT68rvbRT 
TrumpWarRoom: #AGENDA47: President Trump’s plan to reclaim our colleges and universities from radical Marxists and ensure America’s stu…
NickAdamsinUSA LOL Donald "Fascist" Trump would have trouble pressing the bar without any weight on it at all. Trump is behind the deep state, illegal a

-------------------------------------------
Time: 2023-05-02 23:53:50
-------------------------------------------
(('biden', 'struck'), 1)
(('biden', 'option'), 1)
(('biden', 'fully'), 1)
(('biden', '"black'), 1)
(('biden', 'letter")—'), 1)
(('biden', 'court'), 5)
(('biden', 'need'), 2)
(('biden', 'back'), 3)
(('biden', 'unless'), 2)
(('biden', 'part'), 2)
...

-------------------------------------------
Time: 2023-05-02 23:53:50
-------------------------------------------
('biden', ('hunter', 12))
('trump', ('would', 8))

-------------------------------------------
Time: 2023-05-02 23:53:50
-------------------------------------------
('biden', 'hunter', 12)
('trump', 'would', 8)

-------------------------------------------
Time: 2023-05-02 23:53:50
-------------------------------------------
('biden', 'hunter', 12, '2023-05-02 23:53:50')
('trump', 'would', 8, '2023-05-02 23:53:50')

-------------------------------------------
Time: 2023-05-02 23:53:50
---------------------------------

                                                                                

-------------------------------------------
Time: 2023-05-02 23:53:55
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:53:55
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:53:55
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:54:00
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:54:00
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:54:00
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:54:00
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:54:00
-------------------------------------------

-------------------------------------------
Time: 2023-05-02 23:54:00
----------

Waiting on bqjob_r7d2a0d3e9e2ddc52_00000187dee57887_1 ... (2s) Current status: DONE   
Waiting on bqjob_r9a221fbb297e04_00000187dee58e81_1 ... (3s) Current status: DONE   
