In [1]:
import json
import random
import uuid
import time
from confluent_kafka import Producer
import fastavro
from fastavro.schema import load_schema
import io

# Avro schema (hardcoded)
avro_schema = {
    "type": "record",
    "name": "CreditTransaction",
    "namespace": "com.fraud.detection",
    "fields": [
        {
            "name": "client_id",
            "type": "string"
        },
        {
            "name": "transaction_id",
            "type": "string"
        },
        {
            "name": "type",
            "type": ["string", "null"],
            "default": None
        },
        {
            "name": "amount",
            "type": ["double", "null"],
            "default": None
        },
        {
            "name": "oldbalanceOrg",
            "type": ["double", "null"],
            "default": None
        },
        {
            "name": "newbalanceOrig",
            "type": ["double", "null"],
            "default": None
        }
    ]
}

# Kafka Producer Configuration
KAFKA_BROKER = "host.docker.internal:9092"  # Replace with your Kafka broker
TOPIC = "raw_credit_data"  # Kafka topic to send data to

# Initialize Kafka producer
conf = {
    'bootstrap.servers': KAFKA_BROKER,
    'acks': 'all',  # Wait for all brokers to acknowledge
}

producer = Producer(conf)

# Generate random test data
def generate_test_data():
    client_id = f"client_{random.randint(1000, 9999)}"
    transaction_id = str(uuid.uuid4())
    transaction_type = random.choice([None, "CASH_OUT", "DEBIT", "TRANSFER"])
    amount = random.uniform(1.0, 10000.0)
    old_balance = random.uniform(0.0, 5000.0)
    new_balance = old_balance - amount if transaction_type == "purchase" else old_balance + amount

    return {
        "client_id": client_id,
        "transaction_id": transaction_id,
        "type": transaction_type,
        "amount": amount,
        "oldbalanceOrg": old_balance,
        "newbalanceOrig": new_balance
    }

# Function to serialize data into Avro format
def serialize_avro(data, schema):
    with io.BytesIO() as buf:
        writer = fastavro.writer(buf, schema, [data])
        return buf.getvalue()

# Produce data to Kafka
def produce_to_kafka():
    for _ in range(20):  # Adjust the number of records to send
        test_data = generate_test_data()
        avro_data = serialize_avro(test_data, avro_schema)
        
        # Produce message to Kafka
        producer.produce(topic=TOPIC, key=test_data["transaction_id"], value=avro_data)
        print(f"Sent data: {test_data}")
        producer.flush()
        time.sleep(1)  # Adjust sleep to control message frequency

# Start the producer
if __name__ == "__main__":
    produce_to_kafka()


Sent data: {'client_id': 'client_9518', 'transaction_id': '7edaa483-9910-40bf-9ae0-48a6c5d18d12', 'type': 'TRANSFER', 'amount': 584.6450496799116, 'oldbalanceOrg': 2767.8238695549067, 'newbalanceOrig': 3352.4689192348183}
Sent data: {'client_id': 'client_7935', 'transaction_id': '4e598044-020d-4e24-8702-611bcf3b1006', 'type': 'DEBIT', 'amount': 9279.744225267628, 'oldbalanceOrg': 4872.560341432638, 'newbalanceOrig': 14152.304566700266}
Sent data: {'client_id': 'client_5955', 'transaction_id': '06fcf2c4-4792-4ca6-845d-0148204c513f', 'type': 'TRANSFER', 'amount': 6615.047553799689, 'oldbalanceOrg': 4487.104090296708, 'newbalanceOrig': 11102.151644096397}
Sent data: {'client_id': 'client_4446', 'transaction_id': '10f23404-54a1-488e-900f-ba61264979e1', 'type': 'CASH_OUT', 'amount': 9964.208888547473, 'oldbalanceOrg': 4583.166340819272, 'newbalanceOrig': 14547.375229366746}
Sent data: {'client_id': 'client_3071', 'transaction_id': 'f458d0ce-f581-4238-a0cc-1533fd261866', 'type': 'DEBIT', 'am