# Producer 3

Write a python program that loads all the data from
hotspot_TERRA_streaming.csv and randomly (with replacement) feeds the data to
the stream every 2 seconds. TERRA is another satellite from NASA that reports
latitude, longitude, confidence and surface temperature of a location. You will
need to append additional information such as producer information to identify
the producer and created date & time.

In [1]:
import random

from time import sleep
from json import dumps
from kafka3 import KafkaProducer
from pymongo import MongoClient

import datetime as dt
import pandas as pd

In [2]:
# read csv
hotspot_streaming_df = pd.read_csv("dataset/hotspot_TERRA_streaming.csv")
hotspot_streaming_df.head()

Unnamed: 0,latitude,longitude,confidence,surface_temperature_celcius
0,-37.966,145.051,78,68
1,-35.541,143.311,82,63
2,-35.554,143.307,67,53
3,-35.543,143.316,86,67
4,-37.708,145.1,80,54


In [3]:
# initializing mongo connection to database
client = MongoClient()
db = client.fit3182_assignment_db
collection = db.climate_historic

# finding latest date
result = collection.find().sort("date", -1).limit(1)
latest_date = list(result)[0]["date"]
latest_date

datetime.datetime(2023, 3, 24, 0, 0)

In [4]:
def process_data(df):
    dataset = []
    for index, row in df.iterrows():
        # convert each row to dictionary
        data = row.to_dict()
        dataset.append(data)
    return dataset


def publish_message(producer_instance, topic_name, data):
    try:
        # send message to kafka asynchronously
        producer_instance.send(topic_name, value=data)
        # wait for all outstanding message are persisted to disk
        producer_instance.flush()
        print("Message published successfully. Data: " + str(data))
    except Exception as ex:
        print("Exception in publishing message.")
        print(str(ex))


def connect_kafka_producer(host):
    _producer = None
    try:
        # serializer to serialize data to json instead of string
        _producer = KafkaProducer(
            bootstrap_servers=[f"{host}:9092"],
            value_serializer=lambda x: dumps(x).encode("ascii"),
            api_version=(0, 10),
        )
    except Exception as ex:
        print("Exception while connecting Kafka.")
        print(str(ex))
    finally:
        return _producer

In [5]:
TOPIC = "hotspot"
HOST = "localhost"

producer = connect_kafka_producer(HOST)
dataset = process_data(hotspot_streaming_df)
current_date = latest_date

print("Publishing records..")
while True:
    # increment date after every iteration
    # since 10 seconds == 24 hours, hence 2 seconds = (24 / 10) * 4
    current_date += dt.timedelta(hours=4.8)

    # get random row and add metadata
    selection = random.choice(dataset)
    selection["datetime"] = current_date.isoformat()
    selection["producer_id"] = "terra_producer"

    publish_message(producer, TOPIC, selection)

    sleep(2)  # sleep for 2 seconds before publishing next message

Publishing records..
Message published successfully. Data: {'latitude': -36.2933, 'longitude': 141.36, 'confidence': 94.0, 'surface_temperature_celcius': 75.0, 'datetime': '2023-03-24T04:48:00', 'producer_id': 'terra_producer'}
Message published successfully. Data: {'latitude': -38.1756, 'longitude': 143.9829, 'confidence': 65.0, 'surface_temperature_celcius': 53.0, 'datetime': '2023-03-24T09:36:00', 'producer_id': 'terra_producer'}
Message published successfully. Data: {'latitude': -37.0623, 'longitude': 142.8211, 'confidence': 80.0, 'surface_temperature_celcius': 53.0, 'datetime': '2023-03-24T14:24:00', 'producer_id': 'terra_producer'}
Message published successfully. Data: {'latitude': -36.834, 'longitude': 142.524, 'confidence': 78.0, 'surface_temperature_celcius': 44.0, 'datetime': '2023-03-24T19:12:00', 'producer_id': 'terra_producer'}
Message published successfully. Data: {'latitude': -37.7052, 'longitude': 144.6926, 'confidence': 72.0, 'surface_temperature_celcius': 46.0, 'datet

KeyboardInterrupt: 