In [4]:
import json
import random
import uuid
import time
from confluent_kafka import Producer
import fastavro
from fastavro.schema import load_schema
import io

# Avro schema (hardcoded)
avro_schema = {
    "type": "record",
    "name": "CreditTransaction",
    "namespace": "com.fraud.detection",
    "fields": [
        {
            "name": "client_id",
            "type": "string"
        },
        {
            "name": "transaction_id",
            "type": "string"
        },
        {
            "name": "type",
            "type": ["string", "null"],
            "default": None
        },
        {
            "name": "amount",
            "type": ["double", "null"],
            "default": None
        },
        {
            "name": "oldbalanceOrg",
            "type": ["double", "null"],
            "default": None
        },
        {
            "name": "newbalanceOrig",
            "type": ["double", "null"],
            "default": None
        }
    ]
}

# Kafka Producer Configuration
KAFKA_BROKER = "host.docker.internal:9092"  # Replace with your Kafka broker
TOPIC = "raw_credit_data"  # Kafka topic to send data to

# Initialize Kafka producer
conf = {
    'bootstrap.servers': KAFKA_BROKER,
    'acks': 'all',  # Wait for all brokers to acknowledge
}

producer = Producer(conf)

# Generate random test data
def generate_test_data():
    client_id = f"client_{random.randint(1000, 9999)}"
    transaction_id = str(uuid.uuid4())
    transaction_type = random.choice(["TRANSFER","DEBIT","CASH_OUT","CASH_IN"])
    amount = random.uniform(1.0, 10.0)
    old_balance = random.uniform(0.0, 5000.0)
    new_balance = old_balance - amount if transaction_type == "purchase" else old_balance + amount

    return {
        "client_id": client_id,
        "transaction_id": transaction_id,
        "type": transaction_type,
        "amount": amount,
        "oldbalanceOrg": old_balance,
        "newbalanceOrig": new_balance
    }

# Function to serialize data into Avro format
def serialize_avro(data, schema):
    with io.BytesIO() as buf:
        writer = fastavro.writer(buf, schema, [data])
        return buf.getvalue()

# Produce data to Kafka
def produce_to_kafka():
    for _ in range(30):  # Adjust the number of records to send
        test_data = generate_test_data()
        avro_data = serialize_avro(test_data, avro_schema)
        
        # Produce message to Kafka
        producer.produce(topic=TOPIC, key=test_data["transaction_id"], value=avro_data)
        print(f"Sent data: {test_data}")
        producer.flush()

    

# Start the producer
if __name__ == "__main__":
    produce_to_kafka()


Sent data: {'client_id': 'client_9447', 'transaction_id': '4bc409e0-cac6-459a-b908-e4609ebd421c', 'type': 'DEBIT', 'amount': 8.577732595887085, 'oldbalanceOrg': 2410.708499314125, 'newbalanceOrig': 2419.2862319100122}
Sent data: {'client_id': 'client_6494', 'transaction_id': '4fa29726-f8f5-499c-8910-4dd3f0f323bf', 'type': 'CASH_OUT', 'amount': 3.0313582725147676, 'oldbalanceOrg': 88.10038121557562, 'newbalanceOrig': 91.13173948809039}
Sent data: {'client_id': 'client_1647', 'transaction_id': '785a2a74-28ef-4c28-b799-52bb358f8919', 'type': 'TRANSFER', 'amount': 7.185952256766741, 'oldbalanceOrg': 2166.0909610882086, 'newbalanceOrig': 2173.276913344975}
Sent data: {'client_id': 'client_6683', 'transaction_id': '223f68cf-6d3b-4a13-b980-a6e9b9ba0896', 'type': 'TRANSFER', 'amount': 9.62940198071514, 'oldbalanceOrg': 225.0988883685423, 'newbalanceOrig': 234.72829034925743}
Sent data: {'client_id': 'client_6091', 'transaction_id': 'cb91d3f4-2734-4c1d-83ff-408f81a20f8f', 'type': 'DEBIT', 'amou