In [45]:
import json
import random
import uuid
import time
from confluent_kafka import Producer
import fastavro
from fastavro.schema import load_schema
import io

# Avro schema (hardcoded)
avro_schema = {
    "type": "record",
    "name": "CreditTransaction",
    "namespace": "com.fraud.detection",
    "fields": [
        {
            "name": "client_id",
            "type": "string"
        },
        {
            "name": "transaction_id",
            "type": "string"
        },
        {
            "name": "type",
            "type": ["string", "null"],
            "default": None
        },
        {
            "name": "amount",
            "type": ["double", "null"],
            "default": None
        },
        {
            "name": "oldbalanceOrg",
            "type": ["double", "null"],
            "default": None
        },
        {
            "name": "newbalanceOrig",
            "type": ["double", "null"],
            "default": None
        }
    ]
}

# Kafka Producer Configuration
KAFKA_BROKER = "host.docker.internal:9092"  # Replace with your Kafka broker
TOPIC = "raw_credit_data"  # Kafka topic to send data to

# Initialize Kafka producer
conf = {
    'bootstrap.servers': KAFKA_BROKER,
    'acks': 'all',  # Wait for all brokers to acknowledge
}

producer = Producer(conf)

# Generate random test data
def generate_test_data():
    client_id = f"client_{random.randint(1000, 9999)}"
    transaction_id = str(uuid.uuid4())
    transaction_type = random.choice([None, "CASH_OUT", "DEBIT", "TRANSFER"])
    amount = random.uniform(1.0, 10000.0)
    old_balance = random.uniform(0.0, 5000.0)
    new_balance = old_balance - amount if transaction_type == "purchase" else old_balance + amount

    return {
        "client_id": client_id,
        "transaction_id": transaction_id,
        "type": transaction_type,
        "amount": amount,
        "oldbalanceOrg": old_balance,
        "newbalanceOrig": new_balance
    }

# Function to serialize data into Avro format
def serialize_avro(data, schema):
    with io.BytesIO() as buf:
        writer = fastavro.writer(buf, schema, [data])
        return buf.getvalue()

# Produce data to Kafka
def produce_to_kafka():
    for _ in range(10):  # Adjust the number of records to send
        test_data = generate_test_data()
        avro_data = serialize_avro(test_data, avro_schema)
        
        # Produce message to Kafka
        producer.produce(topic=TOPIC, key=test_data["transaction_id"], value=avro_data)
        print(f"Sent data: {test_data}")
        producer.flush()
        time.sleep(1)  # Adjust sleep to control message frequency

# Start the producer
if __name__ == "__main__":
    produce_to_kafka()


Sent data: {'client_id': 'client_8168', 'transaction_id': 'af97941c-cf2e-41d8-9f4a-d04228340d8d', 'type': 'TRANSFER', 'amount': 2816.6733640413754, 'oldbalanceOrg': 2552.4343886880856, 'newbalanceOrig': 5369.107752729461}
Sent data: {'client_id': 'client_5060', 'transaction_id': '2edb729c-dac5-4199-a113-a22821753d86', 'type': None, 'amount': 5879.086328873855, 'oldbalanceOrg': 706.382623964022, 'newbalanceOrig': 6585.468952837877}
Sent data: {'client_id': 'client_7330', 'transaction_id': 'f892f886-822a-4f03-ab26-e334285ebd09', 'type': None, 'amount': 2686.7404813180415, 'oldbalanceOrg': 264.64900028355277, 'newbalanceOrig': 2951.389481601594}
Sent data: {'client_id': 'client_7583', 'transaction_id': '920378b7-204b-4a66-9f17-7fe678d70917', 'type': 'TRANSFER', 'amount': 2515.223700867199, 'oldbalanceOrg': 1050.242149078144, 'newbalanceOrig': 3565.465849945343}
Sent data: {'client_id': 'client_3915', 'transaction_id': '77743b5a-f5f5-4c2f-835e-1d56816d15fd', 'type': 'DEBIT', 'amount': 4135

In [24]:
import io

binary_data = b'Hello Avro'  # Binary data (mimicking Avro bytes)
bytes_io = io.BytesIO(binary_data)  # Create an in-memory file

# Read the data back
print(bytes_io.read()) 

b'Hello Avro'
