In [1]:
from demolib import schema, cfg, spark
from demolib.lib import *


from confluent_kafka import Producer
from time import sleep
from random import random
import json
import sys
import os
from pyspark.sql.functions import expr


In [2]:
raw_transaction_df = spark.read \
    .option('header', False) \
    .csv('{}/{}'.format(cfg.load.dir, cfg.load.event_transaction), schema=schema.transaction.schema)

transaction_df = raw_transaction_df.withColumn('trans_time', expr('current_timestamp()')) \
    .select("cc_num", "first", "last", "trans_num", "trans_time", "category", "merchant", "amt", "merch_lat", "merch_long")

In [3]:
transaction_list = transaction_df.collect()

In [4]:
def delivery_callback(err, msg):
    if err:
        sys.stderr.write('%% Message failed delivery: %s\n' % err)
    else:
        sys.stderr.write('%% Message delivered to %s [%d] @ %o\n' % (msg.topic(), msg.partition(), msg.offset()))

In [None]:

# See https://github.com/Azure/azure-event-hubs-for-kafka/tree/master/quickstart/python

# Create Producer instance
p = Producer(**cfg.kafka.config)

for i in range(0, len(transaction_list)):

    trans = transaction_list[i].asDict()
    trans['trans_time'] = str(trans['trans_time'])

    trans_json = json.dumps(trans)

    try:
        p.produce(cfg.kafka.topic, trans_json, callback=delivery_callback)
        p.poll(5)
    except BufferError as e:
        sys.stderr.write('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(p))
    

    if flag_poll('producer_stop', True):
        sys.stderr.write('%% Producer %s stop requested!\n' % '')
        break

    sleep(random()*2.0 + 1.0)

# Wait until all messages have been delivered
sys.stderr.write('%% Waiting for %d deliveries\n' % len(p))
p.flush()

% Message delivered to ccfraud [0] @ 13756
% Message delivered to ccfraud [0] @ 13757
% Message delivered to ccfraud [0] @ 13760
% Message delivered to ccfraud [0] @ 13761
% Message delivered to ccfraud [0] @ 13762
% Message delivered to ccfraud [0] @ 13763
% Message delivered to ccfraud [0] @ 13764
% Message delivered to ccfraud [0] @ 13765
% Message delivered to ccfraud [0] @ 13766
% Message delivered to ccfraud [0] @ 13767
% Message delivered to ccfraud [0] @ 13770
% Message delivered to ccfraud [0] @ 13771
% Message delivered to ccfraud [0] @ 13772
% Message delivered to ccfraud [0] @ 13773
% Message delivered to ccfraud [0] @ 13774
% Message delivered to ccfraud [0] @ 13775
% Message delivered to ccfraud [0] @ 13776
% Message delivered to ccfraud [0] @ 13777
% Message delivered to ccfraud [0] @ 14000
% Message delivered to ccfraud [0] @ 14001
% Message delivered to ccfraud [0] @ 14002
% Message delivered to ccfraud [0] @ 14003
% Message delivered to ccfraud [0] @ 14004
% Message d