In [5]:
pip install confluent-kafka

Note: you may need to restart the kernel to use updated packages.


In [6]:
from confluent_kafka import Producer
import json
import time

In [7]:
# Step 1: Configure the Kafka producer
producer_config = {'bootstrap.servers': 'localhost:9092'}  # Kafka broker address
p = Producer(producer_config)

In [8]:
# Step 2: Load the Heart Disease dataset
import pandas as pd

file_path = 'heart_disease_uci.csv'
df = pd.read_csv(file_path)
print("Dataset loaded successfully!")

Dataset loaded successfully!


In [9]:
# Step 3: Define the Kafka topic name
topic = 'heart_data'  # Name of the topic where data will be sent

In [10]:
# Step 4: Configure batch size and delay
batch_size = 50  # Number of records sent per batch
delay = 2  # Delay (in seconds) between each batch transmission

In [11]:
# Step 5: Send the dataset to Kafka in batches
print(f"Sending data to Kafka topic: {topic}")
for batch in [df[i:i + batch_size] for i in range(0, len(df), batch_size)]:
    batch_data = batch.to_dict(orient="records")  # Convert batch to JSON format
    data = json.dumps(batch_data)  # Serialize the batch as a JSON string
    
    # Produce message to Kafka topic
    p.produce(topic, value=data)
    p.flush()  # Ensure message is delivered to Kafka
    
    print(f"Sent batch with {len(batch_data)} records")
    time.sleep(delay)  # Wait for the specified delay before sending the next batch

print("Heart disease dataset successfully sent to Kafka.")

Sending data to Kafka topic: heart_data
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 50 records
Sent batch with 20 records
Heart disease dataset successfully sent to Kafka.


In [12]:
# Step 6: Send termination signal
done_signal = json.dumps({"end_of_stream": True})
p.produce(topic, value=done_signal)
p.flush()
print("All data sent. Termination signal sent to Kafka.")

All data sent. Termination signal sent to Kafka.
