In [3]:
import json
import random
import uuid
import time
from confluent_kafka import Producer
import fastavro
import io

# Schéma Avro
insurance_avro_schema = {
    "type": "record",
    "name": "InsuranceTransaction",
    "namespace": "com.insurance.data",
    "fields": [
        {"name": "client_id", "type": "string"},
        {"name": "transaction_id", "type": "string"},
        {"name": "high_education_ind", "type": ["int", "null"], "default": None},
        {"name": "past_num_of_claims", "type": ["int", "null"], "default": None},
        {"name": "gender", "type": ["string", "null"], "default": None},
        {"name": "address_change_ind", "type": ["int", "null"], "default": None},
        {"name": "witness_present_ind", "type": ["int", "null"], "default": None},
        {"name": "marital_status", "type": ["int", "null"], "default": None},
        {"name": "channel", "type": ["string", "null"], "default": None},
        {"name": "accident_site", "type": ["string", "null"], "default": None},
        {"name": "living_status", "type": ["string", "null"], "default": None},
        {"name": "vehicle_category", "type": ["string", "null"], "default": None}
    ]
}

# Configuration Kafka
KAFKA_BROKER = "host.docker.internal:9092"  # Remplace avec l'URL de ton broker Kafka
TOPIC = "raw_insurance_data"  # Nom du topic Kafka

conf = {
    'bootstrap.servers': KAFKA_BROKER,
    'acks': 'all',  # Attendre l'accusé de réception de tous les brokers
}

producer = Producer(conf)

# Génération de données fictives
def generate_test_data():
    return {
        "client_id": f"client_{random.randint(1000, 9999)}",
        "transaction_id": str(uuid.uuid4()),
        "high_education_ind": random.choice([0, 1, None]),
        "past_num_of_claims": random.choice([0, 1, 2, 3, None]),
        "gender": random.choice(["M", "F", None]),
        "address_change_ind": random.choice([0, 1, None]),
        "witness_present_ind": random.choice([0, 1, None]),
        "marital_status":random.choice([0, 1, None]),  
        "channel":random.choice(["Phone", None]),
        "accident_site": random.choice(["Parking Lot", "Rural", None]),
        "living_status": random.choice(["Owned", "Rent", None]),
        "vehicle_category": random.choice(["Medium", "SUV", "Truck", None])
    }

# Sérialisation des données en Avro
def serialize_avro(data, schema):
    with io.BytesIO() as buf:
        fastavro.writer(buf, schema, [data])
        return buf.getvalue()

# Envoi des données à Kafka
def produce_to_kafka():
    for _ in range(20):  # Nombre de messages à envoyer
        test_data = generate_test_data()
        avro_data = serialize_avro(test_data, insurance_avro_schema)
        
        producer.produce(topic=TOPIC, key=test_data["transaction_id"], value=avro_data)
        print(f"Sent data: {test_data}")
        producer.flush()
        time.sleep(1)  # Ajuster la fréquence d'envoi

# Lancer le producteur
if __name__ == "__main__":
    produce_to_kafka()


Sent data: {'client_id': 'client_1392', 'transaction_id': 'd384a4b4-db0f-4b2e-bcdb-176555d8376d', 'high_education_ind': 0, 'past_num_of_claims': None, 'gender': None, 'address_change_ind': 0, 'witness_present_ind': 1, 'marital_status': 1, 'channel': 'Phone', 'accident_site': 'Rural', 'living_status': 'Owned', 'vehicle_category': 'SUV'}
Sent data: {'client_id': 'client_5402', 'transaction_id': '6a478676-543a-4c07-bbb7-e8126bc02580', 'high_education_ind': None, 'past_num_of_claims': 1, 'gender': 'M', 'address_change_ind': 1, 'witness_present_ind': None, 'marital_status': 0, 'channel': None, 'accident_site': None, 'living_status': 'Owned', 'vehicle_category': 'Truck'}
Sent data: {'client_id': 'client_6493', 'transaction_id': 'f07f9b99-6fc3-4e0c-bf63-0b574363e9d4', 'high_education_ind': 1, 'past_num_of_claims': 1, 'gender': 'F', 'address_change_ind': None, 'witness_present_ind': 1, 'marital_status': 0, 'channel': None, 'accident_site': 'Parking Lot', 'living_status': 'Owned', 'vehicle_cate