In [1]:
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.1,org.apache.spark:spark-avro_2.12:3.3.1 pyspark-shell'

In [2]:
from pyspark.sql import SparkSession
import pyspark.sql.types as T
import pyspark.sql.functions as F
from pyspark.sql.functions import sum, col, desc



spark = SparkSession \
    .builder \
    .appName("Spark-Notebook") \
    .getOrCreate()
spark.sparkContext.setLogLevel('WARN')

:: loading settings :: url = jar:file:/home/jainendra/spark/spark-3.3.2-bin-hadoop3/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /home/jainendra/.ivy2/cache
The jars for the packages stored in: /home/jainendra/.ivy2/jars
org.apache.spark#spark-sql-kafka-0-10_2.12 added as a dependency
org.apache.spark#spark-avro_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-5f8be0c4-f3b4-4aef-ba48-e0856aeb1834;1.0
	confs: [default]
	found org.apache.spark#spark-sql-kafka-0-10_2.12;3.3.1 in central
	found org.apache.spark#spark-token-provider-kafka-0-10_2.12;3.3.1 in central
	found org.apache.kafka#kafka-clients;2.8.1 in central
	found org.lz4#lz4-java;1.8.0 in central
	found org.xerial.snappy#snappy-java;1.1.8.4 in central
	found org.slf4j#slf4j-api;1.7.32 in central
	found org.apache.hadoop#hadoop-client-runtime;3.3.2 in central
	found org.spark-project.spark#unused;1.0.0 in central
	found org.apache.hadoop#hadoop-client-api;3.3.2 in central
	found commons-logging#commons-logging;1.1.3 in central
	found com.google.code.findbugs#jsr305;3.0.0 in central
	fou

23/03/14 11:30:36 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [3]:
df_kafka_raw = spark \
      .readStream \
      .format("kafka") \
      .option("kafka.bootstrap.servers", "*************") \
      .option("subscribe", "rides_green,rides_fhv") \
      .option("startingOffsets", "earliest") \
      .option("kafka.security.protocol","SASL_SSL") \
      .option("kafka.sasl.mechanism", "PLAIN") \
      .option("kafka.sasl.jaas.config",
        """org.apache.kafka.common.security.plain.PlainLoginModule required username="************" password="************";""").load()


In [4]:
df_kafka_raw.printSchema()

root
 |-- key: binary (nullable = true)
 |-- value: binary (nullable = true)
 |-- topic: string (nullable = true)
 |-- partition: integer (nullable = true)
 |-- offset: long (nullable = true)
 |-- timestamp: timestamp (nullable = true)
 |-- timestampType: integer (nullable = true)



In [5]:
df_kafka_raw

DataFrame[key: binary, value: binary, topic: string, partition: int, offset: bigint, timestamp: timestamp, timestampType: int]

In [6]:
df_kafka_encoded = df_kafka_raw.selectExpr("CAST(key AS STRING)","CAST(value AS STRING)")

In [7]:
df_kafka_encoded.printSchema()

root
 |-- key: string (nullable = true)
 |-- value: string (nullable = true)



In [8]:
def parse_ride_from_kafka_message(df_raw, schema):
    """ take a Spark Streaming df and parse value col based on <schema>, return streaming df cols in schema """
    assert df_raw.isStreaming is True, "DataFrame doesn't receive streaming data"

    df = df_raw.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")

    # split attributes to nested array in one Column

    col = F.split(df['value'], ', ')
    #print(col)

    # expand col to multiple top-level columns
    for idx, field in enumerate(schema):
        #print (field.name,col.getItem(idx).cast(field.dataType) )
        df = df.withColumn(field.name, col.getItem(idx).cast(field.dataType))
    return df.select([field.name for field in schema])

In [9]:
#         self.dispatching_base_num = arr[1]
#         self.pickup_datetime = datetime.strptime(arr[2], "%Y-%m-%d %H:%M:%S"),
#         self.dropOff_datetime = datetime.strptime(arr[3], "%Y-%m-%d %H:%M:%S"),
#         self.PULocationID = int(eval(arr[4]))
#         self.DOLocationID = int(eval(arr[5]))
#         self.SR_Flag = arr[6]
#         self.Affiliated_base_number = arr[7]

ride_schema = T.StructType(
    [#T.StructField("dispatching_base_num", T.StringType()),
     T.StructField('pickup_datetime', T.StringType()),
     T.StructField('dropOff_datetime', T.StringType()),
     T.StructField("PULocationID", T.StringType()),
     T.StructField("DOLocationID", T.StringType()),
     #T.StructField("SR_Flag", T.StringType()),
     #T.StructField("Affiliated_base_number", T.StringType()),
     T.StructField("texi_type", T.StringType())
     ])

In [10]:
df_rides = parse_ride_from_kafka_message(df_raw=df_kafka_raw, schema=ride_schema)

In [None]:
df_rides.printSchema()

In [None]:
def sink_console(df, output_mode: str = 'complete', processing_time: str = '5 seconds'):
    write_query = df.writeStream \
        .outputMode(output_mode) \
        .trigger(processingTime=processing_time) \
        .format("console") \
        .option("truncate", False) \
        .start()
    return write_query # pyspark.sql.streaming.StreamingQuery

In [None]:
write_query = sink_console(df_rides, output_mode='append')

In [None]:
def sink_memory(df, query_name, query_template):
    write_query = df \
        .writeStream \
        .queryName(query_name) \
        .format('memory') \
        .start()
    query_str = query_template.format(table_name=query_name)
    query_results = spark.sql(query_str)
    return write_query, query_results

In [None]:
query_name = 'frequent_pipup_location'
query_template = 'select count(distinct(PULocationID)) from {table_name}'
write_query, df_vendor_location_id = sink_memory(df=df_rides, query_name=query_name, query_template=query_template)

In [None]:
print(type(write_query)) # pyspark.sql.streaming.StreamingQuery
write_query.status

In [None]:
df_vendor_location_id.show()

In [None]:
#'pickup_datetime','dropOff_datetime','PULocationID','DOLocationID','texi_type'
    
df_trip_messages = prepare_dataframe_to_kafka_sink(df=df_rides,
                                                  value_columns=["texi_type"], key_column='PULocationID')
kafka_sink_query = sink_kafka(df=df_trip_messages, topic="rides_all")

In [11]:
def sink_console(df, output_mode: str = 'complete', processing_time: str = '5 seconds'):
    write_query = df.writeStream \
        .outputMode(output_mode) \
        .trigger(processingTime=processing_time) \
        .format("console") \
        .option("truncate", False) \
        .start()
    return write_query  # pyspark.sql.streaming.StreamingQuery

def op_groupby(df, column_names):
    df_aggregation = df.groupBy(column_names).count().sort(desc("count"))
    return df_aggregation


def op_windowed_groupby(df, window_duration, slide_duration):
    df_windowed_aggregation = df.groupBy(
        F.window(timeColumn=df.pickup_datetime, windowDuration=window_duration, slideDuration=slide_duration),
        df.PULocationID
    ).count()
    return df_windowed_aggregation


def prepare_df_to_kafka_sink(df, value_columns, key_column=None):
    columns = df.columns

    df = df.withColumn("value", F.concat_ws(', ', *value_columns))
    if key_column:
        df = df.withColumnRenamed(key_column, "key")
        df = df.withColumn("key", df.key.cast('string'))
    return df.select(['key', 'value'])


def sink_kafka(df, topic):
    write_query = df.writeStream \
        .format("kafka") \
        .option("kafka.bootstrap.servers", "************") \
        .option("topic", topic) \
        .option("checkpointLocation", "checkpoint") \
        .option("kafka.security.protocol","SASL_SSL") \
        .option("kafka.sasl.mechanism", "PLAIN") \
        .option("kafka.sasl.jaas.config","""org.apache.kafka.common.security.plain.PlainLoginModule required username="************" password="************";""") \
        .start()
    return write_query

In [12]:
sink_console(df_rides, output_mode='append')

df_trip_count_by_pickup_loc = op_groupby(df_rides, ['PULocationID'])
#df_trip_count_by_pickup_date_loc_id = op_windowed_groupby(df_rides, window_duration="10 minutes",slide_duration='5 minutes')

# write the output out to the console for debugging / testing
sink_console(df_trip_count_by_pickup_loc)
# write the output to the kafka topic


23/03/14 11:30:55 WARN ResolveWriteToStream: Temporary checkpoint location created which is deleted normally when the query didn't fail: /tmp/temporary-5368c314-c5e0-4c19-af65-0c8dfc467bf1. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
23/03/14 11:30:55 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.
23/03/14 11:30:55 WARN ResolveWriteToStream: Temporary checkpoint location created which is deleted normally when the query didn't fail: /tmp/temporary-14867b39-4625-4d40-abeb-8179324dcbe9. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
23/03/14 11:30:55 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not support

<pyspark.sql.streaming.StreamingQuery at 0x7f72f867ef10>

                                                                                

-------------------------------------------
Batch: 0
-------------------------------------------


[Stage 2:>                                                        (2 + 4) / 200]

+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-04 18:01:15"]|"dropOff_datetime": ["2019-01-04 18:08:43"]|"PULocationID": 175|"DOLocationID": 16 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-09 09:09:08"]|"dropOff_datetime": ["2019-01-09 09:17:34"]|"PULocationID": 213|"DOLocationID": 60 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-02 22:24:22"]|"dropOff_datetime": ["2019-01-02 22:44:29"]|"PULocationID": 114|"DOLocationID": 142|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-22 10:30:38"]|"dropOff_datetime": ["2019-01-22 10:36:00"]|"PULocation

[Stage 2:=>                                                       (4 + 4) / 200]

23/03/14 11:31:07 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 5000 milliseconds, but spent 12101 milliseconds


                                                                                

-------------------------------------------
Batch: 1
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-02 18:54:14"]|"dropOff_datetime": ["2019-01-02 19:02:25"]|"PULocationID": 43 |"DOLocationID": 262|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-02 18:55:10"]|"dropOff_datetime": ["2019-01-02 19:03:17"]|"PULocationID": 66 |"DOLocationID": 25 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-02 18:25:56"]|"dropOff_datetime": ["2019-01-02 18:33:17"]|"PULocationID": 244|"DOLocationID": 243|"texi_type": "

[Stage 5:====>           (54 + 4) / 200][Stage 7:>                  (0 + 0) / 4]

In [13]:
df_trip_count_messages = prepare_df_to_kafka_sink(df=df_rides,
                                                  value_columns=['texi_type','pickup_datetime','dropOff_datetime','PULocationID','DOLocationID'], key_column='PULocationID')
kafka_sink_query = sink_kafka(df=df_trip_count_messages, topic="rides_all")


[Stage 5:====>           (61 + 4) / 200][Stage 7:>                  (0 + 0) / 4][Stage 5:=====>          (67 + 4) / 200][Stage 7:>                  (0 + 0) / 4]

23/03/14 11:31:20 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.


                                                                                

-------------------------------------------
Batch: 2
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-02 20:03:01"]|"dropOff_datetime": ["2019-01-02 20:14:00"]|"PULocationID": 74 |"DOLocationID": 263|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-02 19:12:45"]|"dropOff_datetime": ["2019-01-02 19:17:33"]|"PULocationID": 129|"DOLocationID": 173|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-02 19:08:41"]|"dropOff_datetime": ["2019-01-02 19:46:00"]|"PULocationID": 217|"DOLocationID": 26 |"texi_type": "

                                                                                

-------------------------------------------
Batch: 0
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3460 |
|"PULocationID": 41 |2675 |
|"PULocationID": 7  |2397 |
|"PULocationID": 75 |2381 |
|"PULocationID": 82 |1965 |
|"PULocationID": 42 |1961 |
|"PULocationID": 129|1833 |
|"PULocationID": 264|1728 |
|"PULocationID": 181|1487 |
|"PULocationID": 255|1481 |
|"PULocationID": 97 |1421 |
|"PULocationID": 95 |1287 |
|"PULocationID": 166|1191 |
|"PULocationID": 244|1180 |
|"PULocationID": 260|1152 |
|"PULocationID": 33 |1091 |
|"PULocationID": 25 |1022 |
|"PULocationID": 80 |1000 |
|"PULocationID": 61 |979  |
|"PULocationID": 65 |936  |
+-------------------+-----+
only showing top 20 rows

23/03/14 11:31:29 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 5000 milliseconds, but spent 33676 milliseconds
-------------------------------------------
Batch: 3


                                                                                

-------------------------------------------
Batch: 5
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-04 07:11:51"]|"dropOff_datetime": ["2019-01-04 07:20:36"]|"PULocationID": 165|"DOLocationID": 21 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-25 04:24:08"]|"dropOff_datetime": ["2019-01-25 04:43:12"]|"PULocationID": 119|"DOLocationID": 48 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-31 21:06:14"]|"dropOff_datetime": ["2019-01-31 21:33:57"]|"PULocationID": 132|"DOLocationID": 36 |"texi_type": "FHV"}|
|{"

                                                                                

-------------------------------------------
Batch: 1
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3503 |
|"PULocationID": 41 |2731 |
|"PULocationID": 75 |2431 |
|"PULocationID": 7  |2424 |
|"PULocationID": 82 |1998 |
|"PULocationID": 42 |1983 |
|"PULocationID": 129|1849 |
|"PULocationID": 264|1754 |
|"PULocationID": 181|1504 |
|"PULocationID": 255|1502 |
|"PULocationID": 97 |1452 |
|"PULocationID": 95 |1316 |
|"PULocationID": 166|1215 |
|"PULocationID": 244|1194 |
|"PULocationID": 260|1170 |
|"PULocationID": 33 |1118 |
|"PULocationID": 25 |1043 |
|"PULocationID": 80 |1007 |
|"PULocationID": 61 |997  |
|"PULocationID": 65 |961  |
+-------------------+-----+
only showing top 20 rows

23/03/14 11:31:45 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 5000 milliseconds, but spent 15532 milliseconds
-------------------------------------------
Batch: 6


                                                                                

-------------------------------------------
Batch: 8
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-10 20:56:13"]|"dropOff_datetime": ["2019-01-10 21:11:27"]|"PULocationID": 80 |"DOLocationID": 144|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-25 15:59:38"]|"dropOff_datetime": ["2019-01-25 16:08:21"]|"PULocationID": 66 |"DOLocationID": 209|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-13 09:27:43"]|"dropOff_datetime": ["2019-01-13 09:38:52"]|"PULocationID": 136|"DOLocationID": 235|"texi_type": "FHV"}|
|{"

                                                                                

-------------------------------------------
Batch: 2
-------------------------------------------
-------------------------------------------
Batch: 9
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3529 |
|"PULocationID": 41 |2750 |
|"PULocationID": 75 |2470 |
|"PULocationID": 7  |2435 |
|"PULocationID": 82 |2014 |
|"PULocationID": 42 |1992 |
|"PULocationID": 129|1861 |
|"PULocationID": 264|1764 |
|"PULocationID": 181|1513 |
|"PULocationID": 255|1508 |
|"PULocationID": 97 |1472 |
|"PULocationID": 95 |1326 |
|"PULocationID": 166|1231 |
|"PULocationID": 244|1197 |
|"PULocationID": 260|1176 |
|"PULocationID": 33 |1130 |
|"PULocationID": 25 |1061 |
|"PULocationID": 80 |1010 |
|"PULocationID": 61 |1001 |
|"PULocationID": 65 |974  |
+-------------------+-----+
only showing top 20 rows

+-------------------------------------------+-------------------------------------------+-------------------

                                                                                

-------------------------------------------
Batch: 10
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-02 20:02:55"]|"dropOff_datetime": ["2019-01-02 20:10:23"]|"PULocationID": 74 |"DOLocationID": 238|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-02 19:48:30"]|"dropOff_datetime": ["2019-01-02 19:56:08"]|"PULocationID": 95 |"DOLocationID": 28 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-02 19:27:51"]|"dropOff_datetime": ["2019-01-02 20:14:33"]|"PULocationID": 152|"DOLocationID": 35 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 3
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3547 |
|"PULocationID": 41 |2764 |
|"PULocationID": 75 |2491 |
|"PULocationID": 7  |2451 |
|"PULocationID": 82 |2024 |
|"PULocationID": 42 |1999 |
|"PULocationID": 129|1867 |
|"PULocationID": 264|1774 |
|"PULocationID": 181|1522 |
|"PULocationID": 255|1517 |
|"PULocationID": 97 |1493 |
|"PULocationID": 95 |1350 |
|"PULocationID": 166|1238 |
|"PULocationID": 244|1206 |
|"PULocationID": 260|1181 |
|"PULocationID": 33 |1141 |
|"PULocationID": 25 |1067 |
|"PULocationID": 80 |1015 |
|"PULocationID": 61 |1009 |
|"PULocationID": 65 |984  |
+-------------------+-----+
only showing top 20 rows

-------------------------------------------
Batch: 11
-------------------------------------------
23/03/14 11:32:11 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 500

                                                                                

-------------------------------------------
Batch: 13
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-10 08:19:12"]|"dropOff_datetime": ["2019-01-10 08:44:10"]|"PULocationID": 75 |"DOLocationID": 223|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-21 06:16:54"]|"dropOff_datetime": ["2019-01-21 06:27:12"]|"PULocationID": 248|"DOLocationID": 235|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-18 06:17:44"]|"dropOff_datetime": ["2019-01-18 06:26:58"]|"PULocationID": 164|"DOLocationID": 237|"texi_type": "FHV"}|
|{

                                                                                

-------------------------------------------
Batch: 4
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3558 |
|"PULocationID": 41 |2780 |
|"PULocationID": 75 |2512 |
|"PULocationID": 7  |2464 |
|"PULocationID": 82 |2034 |
|"PULocationID": 42 |2012 |
|"PULocationID": 129|1875 |
|"PULocationID": 264|1779 |
|"PULocationID": 181|1534 |
|"PULocationID": 255|1528 |
|"PULocationID": 97 |1508 |
|"PULocationID": 95 |1354 |
|"PULocationID": 166|1246 |
|"PULocationID": 244|1213 |
|"PULocationID": 260|1185 |
|"PULocationID": 33 |1146 |
|"PULocationID": 25 |1074 |
|"PULocationID": 61 |1018 |
|"PULocationID": 80 |1017 |
|"PULocationID": 65 |990  |
+-------------------+-----+
only showing top 20 rows

23/03/14 11:32:23 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 5000 milliseconds, but spent 12361 milliseconds
-------------------------------------------
Batch: 14

                                                                                

-------------------------------------------
Batch: 15
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-08 20:05:14"]|"dropOff_datetime": ["2019-01-08 20:47:59"]|"PULocationID": 261|"DOLocationID": 193|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-11 08:57:24"]|"dropOff_datetime": ["2019-01-11 09:03:48"]|"PULocationID": 26 |"DOLocationID": 26 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-30 20:55:13"]|"dropOff_datetime": ["2019-01-30 21:04:57"]|"PULocationID": 48 |"DOLocationID": 50 |"texi_type": "FHV"}|
|{

                                                                                

-------------------------------------------
Batch: 5
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3576 |
|"PULocationID": 41 |2793 |
|"PULocationID": 75 |2527 |
|"PULocationID": 7  |2480 |
|"PULocationID": 82 |2048 |
|"PULocationID": 42 |2019 |
|"PULocationID": 129|1881 |
|"PULocationID": 264|1792 |
|"PULocationID": 181|1543 |
|"PULocationID": 255|1536 |
|"PULocationID": 97 |1517 |
|"PULocationID": 95 |1366 |
|"PULocationID": 166|1256 |
|"PULocationID": 244|1220 |
|"PULocationID": 260|1186 |
|"PULocationID": 33 |1154 |
|"PULocationID": 25 |1086 |
|"PULocationID": 61 |1022 |
|"PULocationID": 80 |1018 |
|"PULocationID": 65 |1001 |
+-------------------+-----+
only showing top 20 rows

23/03/14 11:32:35 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 5000 milliseconds, but spent 11626 milliseconds
-------------------------------------------
Batch: 16

                                                                                

-------------------------------------------
Batch: 17
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-02 20:05:02"]|"dropOff_datetime": ["2019-01-02 20:16:29"]|"PULocationID": 66 |"DOLocationID": 190|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-02 20:28:26"]|"dropOff_datetime": ["2019-01-02 20:40:06"]|"PULocationID": 28 |"DOLocationID": 216|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-02 20:04:17"]|"dropOff_datetime": ["2019-01-02 20:39:40"]|"PULocationID": 41 |"DOLocationID": 242|"texi_type": 

                                                                                

-------------------------------------------
Batch: 18
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-12 15:11:21"]|"dropOff_datetime": ["2019-01-12 15:28:54"]|"PULocationID": 123|"DOLocationID": 14 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-15 00:21:13"]|"dropOff_datetime": ["2019-01-15 00:40:54"]|"PULocationID": 92 |"DOLocationID": 81 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-28 10:03:23"]|"dropOff_datetime": ["2019-01-28 10:23:17"]|"PULocationID": 170|"DOLocationID": 48 |"texi_type": "FHV"}|
|{

                                                                                

-------------------------------------------
Batch: 20
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-15 19:51:51"]|"dropOff_datetime": ["2019-01-15 20:12:45"]|"PULocationID": 36 |"DOLocationID": 82 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-09 22:11:37"]|"dropOff_datetime": ["2019-01-09 22:33:44"]|"PULocationID": 148|"DOLocationID": 225|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-29 19:27:28"]|"dropOff_datetime": ["2019-01-29 19:31:16"]|"PULocationID": 74 |"DOLocationID": 74 |"texi_type": "FHV"}|
|{

                                                                                

-------------------------------------------
Batch: 7
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3607 |
|"PULocationID": 41 |2820 |
|"PULocationID": 75 |2559 |
|"PULocationID": 7  |2505 |
|"PULocationID": 82 |2067 |
|"PULocationID": 42 |2038 |
|"PULocationID": 129|1894 |
|"PULocationID": 264|1816 |
|"PULocationID": 181|1566 |
|"PULocationID": 255|1554 |
|"PULocationID": 97 |1537 |
|"PULocationID": 95 |1393 |
|"PULocationID": 166|1269 |
|"PULocationID": 244|1233 |
|"PULocationID": 260|1201 |
|"PULocationID": 33 |1162 |
|"PULocationID": 25 |1108 |
|"PULocationID": 61 |1029 |
|"PULocationID": 80 |1023 |
|"PULocationID": 65 |1020 |
+-------------------+-----+
only showing top 20 rows

23/03/14 11:32:57 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 5000 milliseconds, but spent 11737 milliseconds
-------------------------------------------
Batch: 21

                                                                                

-------------------------------------------
Batch: 22
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-29 13:44:27"]|"dropOff_datetime": ["2019-01-29 13:56:36"]|"PULocationID": 229|"DOLocationID": 263|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-01 02:29:14"]|"dropOff_datetime": ["2019-01-01 02:37:14"]|"PULocationID": 186|"DOLocationID": 107|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-11 00:11:22"]|"dropOff_datetime": ["2019-01-11 00:13:15"]|"PULocationID": 254|"DOLocationID": 254|"texi_type": "FHV"}|
|{

                                                                                

-------------------------------------------
Batch: 8
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3625 |
|"PULocationID": 41 |2847 |
|"PULocationID": 75 |2569 |
|"PULocationID": 7  |2521 |
|"PULocationID": 82 |2078 |
|"PULocationID": 42 |2042 |
|"PULocationID": 129|1897 |
|"PULocationID": 264|1819 |
|"PULocationID": 181|1586 |
|"PULocationID": 255|1563 |
|"PULocationID": 97 |1552 |
|"PULocationID": 95 |1396 |
|"PULocationID": 166|1279 |
|"PULocationID": 244|1245 |
|"PULocationID": 260|1210 |
|"PULocationID": 33 |1173 |
|"PULocationID": 25 |1115 |
|"PULocationID": 61 |1036 |
|"PULocationID": 65 |1028 |
|"PULocationID": 80 |1024 |
+-------------------+-----+
only showing top 20 rows

23/03/14 11:33:09 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 5000 milliseconds, but spent 11768 milliseconds
-------------------------------------------
Batch: 23

                                                                                

-------------------------------------------
Batch: 25
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-20 09:10:57"]|"dropOff_datetime": ["2019-01-20 09:28:38"]|"PULocationID": 236|"DOLocationID": 244|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-20 21:30:01"]|"dropOff_datetime": ["2019-01-20 21:48:48"]|"PULocationID": 61 |"DOLocationID": 37 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-05 14:08:14"]|"dropOff_datetime": ["2019-01-05 14:42:34"]|"PULocationID": 181|"DOLocationID": 135|"texi_type": "FHV"}|
|{

                                                                                

-------------------------------------------
Batch: 27
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-02 22:21:17"]|"dropOff_datetime": ["2019-01-02 22:24:53"]|"PULocationID": 74 |"DOLocationID": 41 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-02 22:28:03"]|"dropOff_datetime": ["2019-01-02 22:36:52"]|"PULocationID": 97 |"DOLocationID": 17 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-02 22:28:35"]|"dropOff_datetime": ["2019-01-02 22:38:03"]|"PULocationID": 191|"DOLocationID": 205|"texi_type": 

                                                                                

-------------------------------------------
Batch: 10
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3661 |
|"PULocationID": 41 |2872 |
|"PULocationID": 75 |2593 |
|"PULocationID": 7  |2543 |
|"PULocationID": 82 |2110 |
|"PULocationID": 42 |2050 |
|"PULocationID": 129|1910 |
|"PULocationID": 264|1831 |
|"PULocationID": 181|1628 |
|"PULocationID": 97 |1583 |
|"PULocationID": 255|1578 |
|"PULocationID": 95 |1414 |
|"PULocationID": 166|1295 |
|"PULocationID": 244|1255 |
|"PULocationID": 260|1223 |
|"PULocationID": 33 |1178 |
|"PULocationID": 25 |1129 |
|"PULocationID": 61 |1049 |
|"PULocationID": 65 |1043 |
|"PULocationID": 80 |1030 |
+-------------------+-----+
only showing top 20 rows

23/03/14 11:33:32 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 5000 milliseconds, but spent 11602 milliseconds
-------------------------------------------
Batch: 2

                                                                                

-------------------------------------------
Batch: 29
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-16 13:15:12"]|"dropOff_datetime": ["2019-01-16 14:09:31"]|"PULocationID": 162|"DOLocationID": 132|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-15 18:19:45"]|"dropOff_datetime": ["2019-01-15 18:28:48"]|"PULocationID": 116|"DOLocationID": 166|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-30 09:13:24"]|"dropOff_datetime": ["2019-01-30 09:32:26"]|"PULocationID": 212|"DOLocationID": 241|"texi_type": "FHV"}|
|{

                                                                                

-------------------------------------------
Batch: 11
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3672 |
|"PULocationID": 41 |2891 |
|"PULocationID": 75 |2609 |
|"PULocationID": 7  |2556 |
|"PULocationID": 82 |2118 |
|"PULocationID": 42 |2069 |
|"PULocationID": 129|1913 |
|"PULocationID": 264|1841 |
|"PULocationID": 181|1645 |
|"PULocationID": 97 |1587 |
|"PULocationID": 255|1584 |
|"PULocationID": 95 |1417 |
|"PULocationID": 166|1302 |
|"PULocationID": 244|1271 |
|"PULocationID": 260|1227 |
|"PULocationID": 33 |1186 |
|"PULocationID": 25 |1145 |
|"PULocationID": 61 |1052 |
|"PULocationID": 65 |1050 |
|"PULocationID": 80 |1031 |
+-------------------+-----+
only showing top 20 rows

23/03/14 11:33:43 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 5000 milliseconds, but spent 11144 milliseconds
-------------------------------------------
Batch: 3

                                                                                

-------------------------------------------
Batch: 31
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-26 09:31:27"]|"dropOff_datetime": ["2019-01-26 09:44:13"]|"PULocationID": 32 |"DOLocationID": 51 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-25 20:39:08"]|"dropOff_datetime": ["2019-01-25 21:24:25"]|"PULocationID": 239|"DOLocationID": 32 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-09 06:07:20"]|"dropOff_datetime": ["2019-01-09 06:31:41"]|"PULocationID": 48 |"DOLocationID": 198|"texi_type": "FHV"}|
|{

                                                                                

-------------------------------------------
Batch: 12
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3687 |
|"PULocationID": 41 |2903 |
|"PULocationID": 75 |2624 |
|"PULocationID": 7  |2562 |
|"PULocationID": 82 |2135 |
|"PULocationID": 42 |2078 |
|"PULocationID": 129|1921 |
|"PULocationID": 264|1846 |
|"PULocationID": 181|1656 |
|"PULocationID": 97 |1602 |
|"PULocationID": 255|1601 |
|"PULocationID": 95 |1430 |
|"PULocationID": 166|1304 |
|"PULocationID": 244|1275 |
|"PULocationID": 260|1233 |
|"PULocationID": 33 |1188 |
|"PULocationID": 25 |1155 |
|"PULocationID": 65 |1065 |
|"PULocationID": 61 |1055 |
|"PULocationID": 80 |1034 |
+-------------------+-----+
only showing top 20 rows

-------------------------------------------
Batch: 32
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-----------------

                                                                                

-------------------------------------------
Batch: 34
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-11 04:25:31"]|"dropOff_datetime": ["2019-01-11 04:36:56"]|"PULocationID": 220|"DOLocationID": 247|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-20 14:30:20"]|"dropOff_datetime": ["2019-01-20 14:38:54"]|"PULocationID": 22 |"DOLocationID": 14 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-23 19:32:37"]|"dropOff_datetime": ["2019-01-23 20:01:21"]|"PULocationID": 91 |"DOLocationID": 76 |"texi_type": "FHV"}|
|{

                                                                                

-------------------------------------------
Batch: 36
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-28 20:26:30"]|"dropOff_datetime": ["2019-01-28 20:28:17"]|"PULocationID": 91 |"DOLocationID": 91 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-29 15:09:00"]|"dropOff_datetime": ["2019-01-29 15:40:42"]|"PULocationID": 150|"DOLocationID": 91 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-12 12:34:45"]|"dropOff_datetime": ["2019-01-12 12:38:42"]|"PULocationID": 171|"DOLocationID": 171|"texi_type": "FHV"}|
|{

                                                                                

-------------------------------------------
Batch: 14
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3713 |
|"PULocationID": 41 |2924 |
|"PULocationID": 75 |2644 |
|"PULocationID": 7  |2596 |
|"PULocationID": 82 |2164 |
|"PULocationID": 42 |2096 |
|"PULocationID": 129|1940 |
|"PULocationID": 264|1866 |
|"PULocationID": 181|1682 |
|"PULocationID": 255|1625 |
|"PULocationID": 97 |1614 |
|"PULocationID": 95 |1453 |
|"PULocationID": 166|1312 |
|"PULocationID": 244|1294 |
|"PULocationID": 260|1247 |
|"PULocationID": 33 |1199 |
|"PULocationID": 25 |1171 |
|"PULocationID": 65 |1070 |
|"PULocationID": 61 |1067 |
|"PULocationID": 80 |1043 |
+-------------------+-----+
only showing top 20 rows

23/03/14 11:34:18 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 5000 milliseconds, but spent 12856 milliseconds
-------------------------------------------
Batch: 3

                                                                                

-------------------------------------------
Batch: 38
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 00:09:31"]|"dropOff_datetime": ["2019-01-03 00:12:05"]|"PULocationID": 223|"DOLocationID": 223|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 00:18:27"]|"dropOff_datetime": ["2019-01-03 00:21:24"]|"PULocationID": 243|"DOLocationID": 127|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 00:37:57"]|"dropOff_datetime": ["2019-01-03 00:43:04"]|"PULocationID": 7  |"DOLocationID": 179|"texi_type": 

                                                                                

-------------------------------------------
Batch: 15
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3722 |
|"PULocationID": 41 |2942 |
|"PULocationID": 75 |2650 |
|"PULocationID": 7  |2612 |
|"PULocationID": 82 |2178 |
|"PULocationID": 42 |2108 |
|"PULocationID": 129|1955 |
|"PULocationID": 264|1878 |
|"PULocationID": 181|1693 |
|"PULocationID": 255|1644 |
|"PULocationID": 97 |1622 |
|"PULocationID": 95 |1459 |
|"PULocationID": 166|1319 |
|"PULocationID": 244|1306 |
|"PULocationID": 260|1255 |
|"PULocationID": 33 |1204 |
|"PULocationID": 25 |1178 |
|"PULocationID": 65 |1080 |
|"PULocationID": 61 |1075 |
|"PULocationID": 80 |1048 |
+-------------------+-----+
only showing top 20 rows

23/03/14 11:34:30 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 5000 milliseconds, but spent 11765 milliseconds
-------------------------------------------
Batch: 3

                                                                                

-------------------------------------------
Batch: 40
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-09 03:07:05"]|"dropOff_datetime": ["2019-01-09 03:26:31"]|"PULocationID": 264|"DOLocationID": 264|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-17 19:38:18"]|"dropOff_datetime": ["2019-01-17 19:45:11"]|"PULocationID": 113|"DOLocationID": 231|"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-19 17:49:22"]|"dropOff_datetime": ["2019-01-19 18:22:49"]|"PULocationID": 49 |"DOLocationID": 68 |"texi_type": "FHV"}|
|{



-------------------------------------------
Batch: 41
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type          |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+-------------------+
|{"pickup_datetime": ["2019-01-26 13:29:07"]|"dropOff_datetime": ["2019-01-26 13:31:09"]|"PULocationID": 72 |"DOLocationID": 72 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-04 23:33:38"]|"dropOff_datetime": ["2019-01-04 23:54:55"]|"PULocationID": 80 |"DOLocationID": 79 |"texi_type": "FHV"}|
|{"pickup_datetime": ["2019-01-29 21:05:38"]|"dropOff_datetime": ["2019-01-29 21:21:51"]|"PULocationID": 161|"DOLocationID": 79 |"texi_type": "FHV"}|
|{

                                                                                

-------------------------------------------
Batch: 42
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 01:07:02"]|"dropOff_datetime": ["2019-01-03 01:12:39"]|"PULocationID": 41 |"DOLocationID": 152|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 01:55:59"]|"dropOff_datetime": ["2019-01-03 02:02:18"]|"PULocationID": 41 |"DOLocationID": 152|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 01:02:32"]|"dropOff_datetime": ["2019-01-03 01:02:32"]|"PULocationID": 264|"DOLocationID": 193|"texi_type": 

                                                                                

-------------------------------------------
Batch: 43
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 02:30:55"]|"dropOff_datetime": ["2019-01-03 02:39:15"]|"PULocationID": 166|"DOLocationID": 74 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 02:24:03"]|"dropOff_datetime": ["2019-01-03 02:26:29"]|"PULocationID": 193|"DOLocationID": 7  |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 03:00:52"]|"dropOff_datetime": ["2019-01-03 03:23:27"]|"PULocationID": 82 |"DOLocationID": 258|"texi_type": 

                                                                                

-------------------------------------------
Batch: 44
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 03:56:44"]|"dropOff_datetime": ["2019-01-03 03:56:54"]|"PULocationID": 41 |"DOLocationID": 41 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 03:01:53"]|"dropOff_datetime": ["2019-01-03 03:10:10"]|"PULocationID": 129|"DOLocationID": 56 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 03:31:48"]|"dropOff_datetime": ["2019-01-03 03:43:09"]|"PULocationID": 129|"DOLocationID": 56 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 45
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 04:26:49"]|"dropOff_datetime": ["2019-01-03 04:32:08"]|"PULocationID": 7  |"DOLocationID": 138|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 04:20:50"]|"dropOff_datetime": ["2019-01-03 04:35:52"]|"PULocationID": 83 |"DOLocationID": 198|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 04:07:07"]|"dropOff_datetime": ["2019-01-03 04:27:02"]|"PULocationID": 177|"DOLocationID": 138|"texi_type": 

                                                                                

-------------------------------------------
Batch: 46
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 05:59:45"]|"dropOff_datetime": ["2019-01-03 06:15:08"]|"PULocationID": 145|"DOLocationID": 138|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 05:56:20"]|"dropOff_datetime": ["2019-01-03 06:00:39"]|"PULocationID": 166|"DOLocationID": 166|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 05:41:26"]|"dropOff_datetime": ["2019-01-03 06:03:17"]|"PULocationID": 225|"DOLocationID": 71 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 47
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 06:34:30"]|"dropOff_datetime": ["2019-01-03 06:40:52"]|"PULocationID": 74 |"DOLocationID": 75 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 06:28:53"]|"dropOff_datetime": ["2019-01-03 06:44:03"]|"PULocationID": 71 |"DOLocationID": 37 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 06:50:14"]|"dropOff_datetime": ["2019-01-03 07:30:44"]|"PULocationID": 22 |"DOLocationID": 75 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 48
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 06:49:23"]|"dropOff_datetime": ["2019-01-03 06:49:34"]|"PULocationID": 74 |"DOLocationID": 74 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 06:24:49"]|"dropOff_datetime": ["2019-01-03 06:56:25"]|"PULocationID": 74 |"DOLocationID": 197|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 06:37:33"]|"dropOff_datetime": ["2019-01-03 07:10:05"]|"PULocationID": 91 |"DOLocationID": 88 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 49
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 06:44:52"]|"dropOff_datetime": ["2019-01-03 07:04:43"]|"PULocationID": 130|"DOLocationID": 138|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 06:18:24"]|"dropOff_datetime": ["2019-01-03 06:43:13"]|"PULocationID": 130|"DOLocationID": 138|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 06:15:23"]|"dropOff_datetime": ["2019-01-03 06:24:02"]|"PULocationID": 25 |"DOLocationID": 87 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 50
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 06:47:55"]|"dropOff_datetime": ["2019-01-03 07:04:33"]|"PULocationID": 112|"DOLocationID": 138|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 06:40:22"]|"dropOff_datetime": ["2019-01-03 07:42:54"]|"PULocationID": 95 |"DOLocationID": 65 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 06:59:44"]|"dropOff_datetime": ["2019-01-03 07:11:55"]|"PULocationID": 69 |"DOLocationID": 159|"texi_type": 

                                                                                

-------------------------------------------
Batch: 51
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2009-01-01 01:17:20"]|"dropOff_datetime": ["2009-01-01 02:27:07"]|"PULocationID": 92 |"DOLocationID": 146|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:03:12"]|"dropOff_datetime": ["2019-01-03 07:28:53"]|"PULocationID": 32 |"DOLocationID": 126|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:38:34"]|"dropOff_datetime": ["2019-01-03 07:52:29"]|"PULocationID": 66 |"DOLocationID": 158|"texi_type": 

                                                                                

-------------------------------------------
Batch: 52
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 07:36:15"]|"dropOff_datetime": ["2019-01-03 07:56:53"]|"PULocationID": 196|"DOLocationID": 92 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:59:35"]|"dropOff_datetime": ["2019-01-03 08:40:02"]|"PULocationID": 94 |"DOLocationID": 237|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:16:25"]|"dropOff_datetime": ["2019-01-03 07:33:07"]|"PULocationID": 243|"DOLocationID": 151|"texi_type": 

                                                                                

-------------------------------------------
Batch: 53
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 07:50:56"]|"dropOff_datetime": ["2019-01-03 08:02:30"]|"PULocationID": 75 |"DOLocationID": 42 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:17:45"]|"dropOff_datetime": ["2019-01-03 08:31:45"]|"PULocationID": 86 |"DOLocationID": 232|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 06:54:25"]|"dropOff_datetime": ["2019-01-03 07:02:24"]|"PULocationID": 74 |"DOLocationID": 75 |"texi_type": 

[Stage 248:(197 + 3) / 200][Stage 250:>  (0 + 1) / 2][Stage 251:>  (0 + 0) / 2]]

-------------------------------------------
Batch: 54
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 07:01:23"]|"dropOff_datetime": ["2019-01-03 07:47:20"]|"PULocationID": 165|"DOLocationID": 43 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:09:02"]|"dropOff_datetime": ["2019-01-03 07:51:26"]|"PULocationID": 78 |"DOLocationID": 135|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:34:42"]|"dropOff_datetime": ["2019-01-03 07:43:33"]|"PULocationID": 75 |"DOLocationID": 74 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 22
-------------------------------------------
+-------------------+-----+
|PULocationID       |count|
+-------------------+-----+
|"PULocationID": 74 |3813 |
|"PULocationID": 41 |2995 |
|"PULocationID": 75 |2730 |
|"PULocationID": 7  |2674 |
|"PULocationID": 82 |2261 |
|"PULocationID": 42 |2152 |
|"PULocationID": 129|2033 |
|"PULocationID": 264|1900 |
|"PULocationID": 181|1708 |
|"PULocationID": 255|1681 |
|"PULocationID": 97 |1642 |
|"PULocationID": 95 |1490 |
|"PULocationID": 166|1350 |
|"PULocationID": 244|1333 |
|"PULocationID": 260|1299 |
|"PULocationID": 33 |1219 |
|"PULocationID": 25 |1190 |
|"PULocationID": 65 |1097 |
|"PULocationID": 61 |1093 |
|"PULocationID": 80 |1068 |
+-------------------+-----+
only showing top 20 rows

23/03/14 11:35:42 WARN ProcessingTimeExecutor: Current batch is falling behind. The trigger interval is 5000 milliseconds, but spent 10429 milliseconds


                                                                                

-------------------------------------------
Batch: 55
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 07:51:23"]|"dropOff_datetime": ["2019-01-03 08:14:02"]|"PULocationID": 71 |"DOLocationID": 71 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:14:10"]|"dropOff_datetime": ["2019-01-03 07:21:21"]|"PULocationID": 129|"DOLocationID": 260|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:57:49"]|"dropOff_datetime": ["2019-01-03 09:00:33"]|"PULocationID": 17 |"DOLocationID": 170|"texi_type": 

                                                                                

-------------------------------------------
Batch: 56
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 07:39:35"]|"dropOff_datetime": ["2019-01-03 07:51:07"]|"PULocationID": 74 |"DOLocationID": 166|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:49:23"]|"dropOff_datetime": ["2019-01-03 08:13:23"]|"PULocationID": 223|"DOLocationID": 130|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:54:56"]|"dropOff_datetime": ["2019-01-03 08:27:08"]|"PULocationID": 160|"DOLocationID": 130|"texi_type": 

                                                                                

-------------------------------------------
Batch: 57
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 07:49:09"]|"dropOff_datetime": ["2019-01-03 07:55:19"]|"PULocationID": 74 |"DOLocationID": 75 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:56:27"]|"dropOff_datetime": ["2019-01-03 08:01:16"]|"PULocationID": 75 |"DOLocationID": 41 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:37:42"]|"dropOff_datetime": ["2019-01-03 08:00:24"]|"PULocationID": 188|"DOLocationID": 61 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 58
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 07:56:48"]|"dropOff_datetime": ["2019-01-03 07:56:50"]|"PULocationID": 243|"DOLocationID": 243|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:30:48"]|"dropOff_datetime": ["2019-01-03 07:50:37"]|"PULocationID": 244|"DOLocationID": 78 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:55:20"]|"dropOff_datetime": ["2019-01-03 08:15:55"]|"PULocationID": 248|"DOLocationID": 235|"texi_type": 

                                                                                

-------------------------------------------
Batch: 59
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 08:09:26"]|"dropOff_datetime": ["2019-01-03 08:15:26"]|"PULocationID": 179|"DOLocationID": 193|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 08:10:38"]|"dropOff_datetime": ["2019-01-03 08:45:51"]|"PULocationID": 72 |"DOLocationID": 33 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 08:03:38"]|"dropOff_datetime": ["2019-01-03 08:18:46"]|"PULocationID": 17 |"DOLocationID": 25 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 60
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 08:04:23"]|"dropOff_datetime": ["2019-01-03 09:30:28"]|"PULocationID": 123|"DOLocationID": 43 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:59:18"]|"dropOff_datetime": ["2019-01-03 08:08:32"]|"PULocationID": 166|"DOLocationID": 41 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 08:47:39"]|"dropOff_datetime": ["2019-01-03 08:51:13"]|"PULocationID": 75 |"DOLocationID": 74 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 61
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 08:51:41"]|"dropOff_datetime": ["2019-01-03 09:00:52"]|"PULocationID": 134|"DOLocationID": 197|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:10:01"]|"dropOff_datetime": ["2019-01-03 07:15:59"]|"PULocationID": 244|"DOLocationID": 244|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 08:52:26"]|"dropOff_datetime": ["2019-01-03 08:54:11"]|"PULocationID": 63 |"DOLocationID": 76 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 62
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 08:26:23"]|"dropOff_datetime": ["2019-01-03 08:36:51"]|"PULocationID": 166|"DOLocationID": 74 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 08:19:55"]|"dropOff_datetime": ["2019-01-03 08:24:50"]|"PULocationID": 134|"DOLocationID": 95 |"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 08:23:16"]|"dropOff_datetime": ["2019-01-03 08:28:11"]|"PULocationID": 42 |"DOLocationID": 42 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 64
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 08:32:59"]|"dropOff_datetime": ["2019-01-03 08:47:26"]|"PULocationID": 122|"DOLocationID": 121|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:36:12"]|"dropOff_datetime": ["2019-01-03 07:48:18"]|"PULocationID": 74 |"DOLocationID": 238|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 07:58:48"]|"dropOff_datetime": ["2019-01-03 08:05:33"]|"PULocationID": 41 |"DOLocationID": 42 |"texi_type": 

                                                                                

-------------------------------------------
Batch: 65
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 08:25:50"]|"dropOff_datetime": ["2019-01-03 08:33:18"]|"PULocationID": 136|"DOLocationID": 127|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 08:52:59"]|"dropOff_datetime": ["2019-01-03 08:58:31"]|"PULocationID": 116|"DOLocationID": 166|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 08:17:18"]|"dropOff_datetime": ["2019-01-03 08:42:31"]|"PULocationID": 196|"DOLocationID": 145|"texi_type": 

                                                                                

-------------------------------------------
Batch: 66
-------------------------------------------
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|pickup_datetime                            |dropOff_datetime                           |PULocationID       |DOLocationID       |texi_type            |
+-------------------------------------------+-------------------------------------------+-------------------+-------------------+---------------------+
|{"pickup_datetime": ["2019-01-03 08:55:43"]|"dropOff_datetime": ["2019-01-03 08:57:17"]|"PULocationID": 28 |"DOLocationID": 134|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 08:11:53"]|"dropOff_datetime": ["2019-01-03 08:58:59"]|"PULocationID": 220|"DOLocationID": 231|"texi_type": "GREEN"}|
|{"pickup_datetime": ["2019-01-03 08:20:02"]|"dropOff_datetime": ["2019-01-03 08:42:27"]|"PULocationID": 195|"DOLocationID": 97 |"texi_type": 

