In [None]:
# Task 2.1
# import statements
from time import sleep
from kafka import KafkaConsumer
import datetime as dt
import matplotlib
import matplotlib.pyplot as plt
from json import loads
import pandas as pd

# this line is needed for the inline display of graphs in Jupyter Notebook
%matplotlib notebook

# for process data
topic = 'Process'
# connecting the kafka consumer to the server and decoding the data
def connect_kafka_consumer():
    _consumer = None
    try:
         _consumer = KafkaConsumer(topic,
                                   consumer_timeout_ms = 30000, # stop iteration if no message after 10 sec
                                   auto_offset_reset='latest', # comment this if you don't want to consume earliest available message
                                   bootstrap_servers=['localhost:9092'],
                                   value_deserializer=lambda x: loads(x.decode('ascii')),
                                   api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka')
        print(str(ex))
    finally:
        return _consumer

# initialising empty plot
def init_plots():
    try:
        width = 9.5
        height = 6
        fig = plt.figure(figsize=(width,height)) # create new figure
        ax = fig.add_subplot(111) # adding the subplot axes to the given grid position
        fig.suptitle('Records coming within the last Two minutes') # giving figure a title
        ax.set_xlabel('Time')
        ax.set_ylabel('Count of Records')        
        fig.show() # displaying the figure
        fig.canvas.draw() # drawing on the canvas
        return fig, ax
    except Exception as ex:
        print(str(ex))

# reads the stream and plots incoming count of data
def consume_messages(consumer, fig, ax):
    # current time
    ts_now = int(dt.datetime.now().timestamp())
    try:       
        # insitialising empty lists to append data
        time = []
        machine = []
        stamp = []
        # reading the stream
        for message in consumer:   
            for data in message.value:
                # parsing the row
                row = eval(str(data))
                # chceking if the data arrived within the past 2 minutes
                if 0 <= data['ts'] - ts_now <= 120:
                    # time difference
                    time.append(data['ts'] - ts_now)
                    # machine
                    machine.append(data['machine'])
                    # timestamp
                    stamp.append(data['ts'])
            # plotting when we have 3 or more points
            if len(set(time)) >= 3:
                # making a dictionary
                dic = {'time':time, 'machine':machine, 'stamp':stamp}
                # making a dataframe of the above dictionary
                df = pd.DataFrame(dic, columns=['time', 'machine', 'stamp'])
                ax.clear()
                # grouping by time and machine and counting the reocrds to plot
                df.groupby(['time','machine']).count()['stamp'].unstack().plot(ax=ax)
                fig.canvas.draw()
        plt.close('all')
    except Exception as ex:
        print(str(ex))

# main
if __name__ == '__main__':
    
    consumer = connect_kafka_consumer()
    fig, ax = init_plots()    
    consume_messages(consumer, fig, ax)        
    