### Task 2.2 Data Visualisation

In this task, you will implement a program to visualize the joined streaming data. For the incoming camera event(s), 
* plot the number of violation against arrival time. You need to label some interesting points such as maximum and minimum values. 
* In addition to that, plot the speed against arrival time. You need to include some interesting points such as average and maximum values.

For visualization on the data stored in the database, you have to plot a map using camera location. On the map, annotate
* number of violations between the checkpoints
* identify hotspot (e.g. when number of violations exceed certain threshold within a time in a day)

Explain and justify the plots and the inclusion of the interesting points. Set your own threshold for the hotspot.

If you are running this task in a separate Jupyter notebook file, save the file as **xxx_assignment02_visualisation.ipynb**, where **xxx** represents the student IDs of the group members.

In [1]:
import matplotlib.pyplot as plt

# this line is needed for the inline display of graphs in Jupyter Notebook
%matplotlib notebook
from time import sleep



: 

In [2]:
#read data
#home
# hostip = "192.168.0.21"
#monash
hostip = "10.192.13.255"

DB_NAME     = "awas_db"


from pymongo import MongoClient, ASCENDING, HASHED
from pyspark.sql import SparkSession, Row
from pyspark.sql.functions import col, split, element_at, when, from_json, expr, unix_timestamp
from pyspark.sql.types import StructType, StringType, IntegerType, DoubleType, TimestampType, StructField
from pyspark.sql.streaming.state import GroupState, GroupStateTimeout
import os
os.environ["PYSPARK_SUBMIT_ARGS"] = (
    "--packages "
    "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.0,"
    "org.apache.spark:spark-streaming-kafka-0-10_2.12:3.5.0,"
    "org.mongodb.spark:mongo-spark-connector_2.12:10.3.0 "
    "pyspark-shell"
)

spark = SparkSession.builder \
    .appName("AWAS-Speed-Enforcement") \
    .master("local[*]") \
    .config("spark.mongodb.read.connection.uri", f"mongodb://{hostip}:27017/{DB_NAME}") \
    .config("spark.mongodb.write.connection.uri", f"mongodb://{hostip}:27017/{DB_NAME}") \
    .getOrCreate()

spark.sparkContext.setLogLevel("WARN")

outputSchema = (StructType()
    .add("violation_id", StringType())
    .add("type", StringType()) 
    .add("camera_id_start", IntegerType()) 
    .add("camera_id_end", IntegerType())
    .add("timestamp_start", TimestampType())
    .add("timestamp_end", TimestampType()) 
    .add("recorded_speed", DoubleType()) 
    .add("speed_limit", DoubleType())
)

# df = spark.readStream.schema(outputSchema).format("json").load("path_to_incoming_joined_stream")

# read from mongo
df = (spark.readStream 
        .format("mongodb")
        .option("collection", "camera")
        .load())




In [None]:
def init_plot():
    try:
        width = 9.5
        height = 6
        fig = plt.figure(figsize=(width,height)) # create new figure
        fig.subplots_adjust(hspace=0.6)
        ax1 = fig.add_subplot(221)
        ax1.set_xlabel('Arrival Time')
        ax1.set_ylabel('Number of Violations')
        ax1.set_title('Number of Violations vs Arrival Time')
                                   
        ax2 = fig.add_subplot(222)
        ax2.set_xlabel('Arrival Time')
        ax2.set_ylabel('Average Speed')
        ax2.set_title('Average Speed vs Arrival Time')
        
        fig.suptitle('Real-time uniform stream data visualization') # giving figure a title
        fig.show() # displaying the figure
        fig.canvas.draw() # drawing on the canvas
        return fig, ax1, ax2
    except Exception as ex:
        print(str(ex))

def annotate_max(x, y, ax = None):
    ymax = max(y)
    xpos = y.index(ymax)
    xmax = x[xpos]
    text = 'Max: Time={}, Value={}'.format(xmax, ymax)
    if not ax:
        ax=plt.gca()
    ax.annotate(text, xy=(xmax, ymax), xytext=(xmax, ymax+5), arrowprops=dict(facecolor='red', shrink=0.05),)
    
def annotate_min(x, y, ax = None):
    ymin = min(y)
    xpos = y.index(ymin)
    xmin = x[xpos]
    text = 'Min: Time={}, Value={}'.format(xmin, ymin)
    if not ax:
        ax=plt.gca()
    ax.annotate(text, xy=(xmin, ymin), xytext=(xmin, ymin+5), arrowprops=dict(facecolor='orange', shrink=0.05),)

def annotate_avg(x, y, ax = None):
    avg = sum(y) / len(y)
    text = 'Avg: Value={}'.format(avg)
    if not ax:
        ax=plt.gca()
    ax.annotate(text, xy=(x[-1], avg), xytext=(x[-1], avg+5), arrowprops=dict(facecolor='blue', shrink=0.05),)

def consume_messages(consumer, fig, ax1, ax2):
    try:

        # No of violation vs Arrival time
        violation_data = df.select(
            col("timestamp").alias("arrival_time")
        ).groupBy("arrival_time").count().orderBy("arrival_time").withColumnRenamed("count", "num_violations")
        violation_data = violation_data.toPandas() 

        # Average speed vs Arrival time
        speed_data = df.select(
            col("timestamp").alias("arrival_time"),
            col("speed_reading")
        ).groupBy("arrival_time").avg("speed_reading").orderBy("arrival_time").withColumnRenamed("avg(speed_reading)", "avg_speed")

        violation_data = violation_data.toPandas()
        speed_data = speed_data.toPandas()

        # if violation_data.empty or speed_data.empty:
        #     time.sleep(2)
        #     continue

        ax1.clear()  # clear the previous plot
        ax2.clear()  # clear the previous plot

        # plot 
        x1 = violation_data['arrival_time']
        y1 = violation_data['num_violations']
        ax1.plot(x1, y1)
        annotate_max(x1, y1, ax1)
        annotate_min(x1, y1, ax1)

        x2 = speed_data['arrival_time']
        y2 = speed_data['avg_speed']
        ax2.plot(x2, y2)
        annotate_max(x2, y2, ax2)
        annotate_min(x2, y2, ax2)
        annotate_avg(x2, y2, ax2)

        fig.canvas.draw()  # redraw the canvas to update the plot

        x1.pop(0) 
        y1.pop(0)
        x2.pop(0)
        y2.pop(0)

        plt.close('all')
    except Exception as ex:
        print(str(ex))

