# End to End Benchmark: Producer (Dask) - Kafka Cluster - Consumer (Spark)

In [1]:
%%capture
# System Libraries
import sys, os
sys.path.insert(0, "..")
import pandas as pd
import uuid
## logging
import logging
import time
logging.basicConfig(level=logging.DEBUG)
logging.getLogger().setLevel(logging.ERROR)
logging.getLogger("py4j").setLevel(logging.ERROR)
logging.getLogger("radical.utils").setLevel(logging.ERROR)
 
# Pilot-Streaming
import pilot.streaming
import masa.spark
import mass.kafka

1. Setup resources for test
2. Start Producer Mini App in Dask producing synthetic data
3. Start Consumer Mini App in Spark for processing data

In [None]:
for num_repeats in range(3):
    for num_producer_nodes in [1]:
        for num_broker_nodes in [1,2]:
            for number_spark_nodes in [1,2,4,8,16]:
                for application in ["light-mlem", "light-gridrec", "kmeansstaticpred-1000",   "kmeansstatic-1000"]:
                    #num_broker_nodes=1
                    #num_producer_nodes=1
                    #number_spark_nodes=1
                    run_id = str(uuid.uuid1())
                    topic_name = "test_" + run_id
                    number_parallel_tasks=8
                    kafka_pilot_description1 = {
                                        "resource":"slurm+ssh://login1.wrangler.tacc.utexas.edu",
                                        "working_directory": os.path.join('/work/01131/tg804093/wrangler/', "work"),
                                        "number_cores": 48*num_broker_nodes,
                                        "project": "TG-MCB090174",
                                        "queue": "normal",
                                        "walltime": 159,
                                        "type":"kafka"
                                    }
                    kafka_pilot = pilot.streaming.PilotComputeService.create_pilot(kafka_pilot_description1)
                    kafka_pilot.wait()
                    
                    dask_pilot_description = {
                        "resource":"slurm+ssh://login1.wrangler.tacc.utexas.edu",
                        "working_directory": os.path.join('/work/01131/tg804093/wrangler/', "work"),
                        "number_cores": 48*num_producer_nodes,
                        "project": "TG-MCB090174",
                        "queue": "normal",
                        "walltime": 159,
                        "type":"dask"
                    }
                    dask_pilot = pilot.streaming.PilotComputeService.create_pilot(dask_pilot_description)
                    dask_pilot.wait()
                    
                    spark_pilot_description = {
                        "resource":"slurm+ssh://login1.wrangler.tacc.utexas.edu",
                        "working_directory": os.path.join('/work/01131/tg804093/wrangler/', "work"),
                        "number_cores": 48*number_spark_nodes,
                        "project": "TG-MCB090174",
                        "queue": "normal",
                        "walltime": 159,
                        "type":"spark"
                    }
                    spark_pilot = pilot.streaming.PilotComputeService.create_pilot(spark_pilot_description)
                    spark_pilot.wait()
                
                    
                    number_clusters = 100
                    if application.startswith("kmeans") and application.find("-")>=0:
                        number_clusters = int(application.split("-")[1])
                        application = application.split("-")[0]
                     
                    produce_interval=0
                    if application.startswith("light"): produce_interval=20
                        
                    print "Application: %s, Number Clusters: %d"%(application, number_clusters)
                        
                    # Scenario: 
                    prod=mass.kafka.MiniApp(
                                               dask_scheduler=dask_pilot.get_details()['master_url'],
                                               kafka_zk_hosts=kafka_pilot.get_details()["master_url"],
                                               number_parallel_tasks=number_parallel_tasks,
                                               number_clusters=192, # kmeans
                                               number_points_per_cluster=52084, # kmeans
                                               number_points_per_message=5000, # kmeans
                                               number_dim=3, # kmeans
                                               number_messages=10000, # light
                                               number_produces=80,
                                               number_partitions=num_broker_nodes*12,
                                               topic_name=topic_name,
                                               application_type=application,
                                               produce_interval=produce_interval
                                            )
                    prod.run_in_background()
    
                    consumer = masa.spark.MiniApp(
                                              spark_master=spark_pilot.get_details()["master_url"],
                                              kafka_zk_hosts=kafka_pilot.get_details()["master_url"],
                                              topic_name = topic_name,
                                              number_clusters=number_clusters,
                                              test_scenario="%s-%d-%d-%d-%d"%(application,num_producer_nodes, num_broker_nodes, number_spark_nodes, number_clusters),
                                              application = application
                                             )
                    consumer.run_in_background()
                    
                    # Wait for completion
                    prod.wait()
                    time.sleep(240)
                    print "******** Producer Wait RETURNED. Cancel Streaming App"
                    consumer.cancel()
                    
                    kafka_pilot.cancel()
                    dask_pilot.cancel()
                    spark_pilot.cancel() 

**** Job: 61217 State : Pending
look for configs in: /work/01131/tg804093/wrangler/work/kafka-2678d908-fe65-11e7-82d0-549f3509766c/config
['broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-2678d908-fe65-11e7-82d0-549f3509766c/config (Sat Jan 20 22:40:04 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id': '0', 'listeners': 'PLAINTEXT://c251-135:9092', 'zookeeper.connect': 'c251-135:2181'}
**** Job: 61218 State : Pending
**** Job: 61219 State : Pending
Create Spark Context for URL: spark://129.114.58.137:7077
Create Spark Context for URL: spark://129.114.58.137:7077
Application: kmeansstaticpred, Number Clusters: 100
look for configs in: /work/01131/tg804093/wrangler/work/kafka-2678d908-fe65-11e7-82d0-549f3509766c/config
['broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-2678d908-fe65-11e7-82d0-549f3509766c/config (Sat Jan 20 22:40:04 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id': '0', 'listeners': 'PLAINTEXT://c251-135:9092', 

Exception in thread Thread-14:
Traceback (most recent call last):
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner
    self.run()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 754, in run
    self.__target(*self.__args, **self.__kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/mass/kafka.py", line 440, in run
    res = delayed(tasks).compute()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 135, in compute
    (result,) = compute(self, traverse=False, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 333, in compute
    results = get(dsk, keys, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/client.py", line 1999, in get
    results = self.gather(packed, asynchronous=asynchronous)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/c

******** Producer Wait RETURNED. Cancel Streaming App
**** Job: 61223 State : Pending
look for configs in: /work/01131/tg804093/wrangler/work/kafka-e7599972-fe6b-11e7-82d0-549f3509766c/config
['broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-e7599972-fe6b-11e7-82d0-549f3509766c/config (Sat Jan 20 23:28:25 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id': '0', 'listeners': 'PLAINTEXT://c251-139:9092', 'zookeeper.connect': 'c251-139:2181'}
**** Job: 61224 State : Pending
**** Job: 61225 State : Pending
Create Spark Context for URL: spark://129.114.58.136:7077
Create Spark Context for URL: spark://129.114.58.136:7077
Application: kmeansstatic, Number Clusters: 10
look for configs in: /work/01131/tg804093/wrangler/work/kafka-e7599972-fe6b-11e7-82d0-549f3509766c/config
['broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-e7599972-fe6b-11e7-82d0-549f3509766c/config (Sat Jan 20 23:28:25 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id'

Exception in thread Thread-56:
Traceback (most recent call last):
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner
    self.run()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 754, in run
    self.__target(*self.__args, **self.__kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/mass/kafka.py", line 440, in run
    res = delayed(tasks).compute()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 135, in compute
    (result,) = compute(self, traverse=False, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 333, in compute
    results = get(dsk, keys, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/client.py", line 1999, in get
    results = self.gather(packed, asynchronous=asynchronous)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/c

******** Producer Wait RETURNED. Cancel Streaming App
**** Job: 61242 State : Running
look for configs in: /work/01131/tg804093/wrangler/work/kafka-68536e66-fe7d-11e7-82d0-549f3509766c/config
['broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-68536e66-fe7d-11e7-82d0-549f3509766c/config (Sun Jan 21 01:33:43 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id': '0', 'listeners': 'PLAINTEXT://c251-139:9092', 'zookeeper.connect': 'c251-139:2181'}
**** Job: 61243 State : Pending
**** Job: 61244 State : Pending
Create Spark Context for URL: spark://129.114.58.137:7077
Create Spark Context for URL: spark://129.114.58.137:7077
Application: kmeansstatic, Number Clusters: 10
look for configs in: /work/01131/tg804093/wrangler/work/kafka-68536e66-fe7d-11e7-82d0-549f3509766c/config
['broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-68536e66-fe7d-11e7-82d0-549f3509766c/config (Sun Jan 21 01:33:43 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id'

Exception in thread Thread-98:
Traceback (most recent call last):
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner
    self.run()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 754, in run
    self.__target(*self.__args, **self.__kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/mass/kafka.py", line 440, in run
    res = delayed(tasks).compute()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 135, in compute
    (result,) = compute(self, traverse=False, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 333, in compute
    results = get(dsk, keys, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/client.py", line 1999, in get
    results = self.gather(packed, asynchronous=asynchronous)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/c

******** Producer Wait RETURNED. Cancel Streaming App
**** Job: 61260 State : Running
look for configs in: /work/01131/tg804093/wrangler/work/kafka-d21739de-fe8e-11e7-82d0-549f3509766c/config
['broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-d21739de-fe8e-11e7-82d0-549f3509766c/config (Sun Jan 21 03:38:22 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id': '0', 'listeners': 'PLAINTEXT://c251-138:9092', 'zookeeper.connect': 'c251-138:2181'}
**** Job: 61261 State : Pending
**** Job: 61262 State : Pending
Create Spark Context for URL: spark://129.114.58.108:7077
Create Spark Context for URL: spark://129.114.58.108:7077
Application: kmeansstatic, Number Clusters: 10
look for configs in: /work/01131/tg804093/wrangler/work/kafka-d21739de-fe8e-11e7-82d0-549f3509766c/config
['broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-d21739de-fe8e-11e7-82d0-549f3509766c/config (Sun Jan 21 03:38:22 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id'

Exception in thread Thread-140:
Traceback (most recent call last):
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner
    self.run()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 754, in run
    self.__target(*self.__args, **self.__kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/mass/kafka.py", line 440, in run
    res = delayed(tasks).compute()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 135, in compute
    (result,) = compute(self, traverse=False, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 333, in compute
    results = get(dsk, keys, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/client.py", line 1999, in get
    results = self.gather(packed, asynchronous=asynchronous)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/

******** Producer Wait RETURNED. Cancel Streaming App
**** Job: 61278 State : Running
look for configs in: /work/01131/tg804093/wrangler/work/kafka-ca6db14e-fe9e-11e7-82d0-549f3509766c/config
['broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-ca6db14e-fe9e-11e7-82d0-549f3509766c/config (Sun Jan 21 05:32:41 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id': '0', 'listeners': 'PLAINTEXT://c251-135:9092', 'zookeeper.connect': 'c251-135:2181'}
**** Job: 61279 State : Pending
**** Job: 61280 State : Pending
Create Spark Context for URL: spark://129.114.58.108:7077
Create Spark Context for URL: spark://129.114.58.108:7077
Application: kmeansstatic, Number Clusters: 10
look for configs in: /work/01131/tg804093/wrangler/work/kafka-ca6db14e-fe9e-11e7-82d0-549f3509766c/config
['broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-ca6db14e-fe9e-11e7-82d0-549f3509766c/config (Sun Jan 21 05:32:41 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id'

Exception in thread Thread-182:
Traceback (most recent call last):
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner
    self.run()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 754, in run
    self.__target(*self.__args, **self.__kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/mass/kafka.py", line 440, in run
    res = delayed(tasks).compute()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 135, in compute
    (result,) = compute(self, traverse=False, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 333, in compute
    results = get(dsk, keys, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/client.py", line 1999, in get
    results = self.gather(packed, asynchronous=asynchronous)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/

******** Producer Wait RETURNED. Cancel Streaming App
**** Job: 61298 State : Pending
look for configs in: /work/01131/tg804093/wrangler/work/kafka-75c35cea-feb0-11e7-82d0-549f3509766c/config
['broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-75c35cea-feb0-11e7-82d0-549f3509766c/config (Sun Jan 21 07:39:10 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id': '0', 'listeners': 'PLAINTEXT://c251-135:9092', 'zookeeper.connect': 'c251-135:2181'}
**** Job: 61299 State : Pending
**** Job: 61300 State : Pending
Create Spark Context for URL: spark://129.114.58.108:7077
Create Spark Context for URL: spark://129.114.58.108:7077
Application: kmeansstatic, Number Clusters: 10
look for configs in: /work/01131/tg804093/wrangler/work/kafka-75c35cea-feb0-11e7-82d0-549f3509766c/config
['broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-75c35cea-feb0-11e7-82d0-549f3509766c/config (Sun Jan 21 07:39:10 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id'

Exception in thread Thread-224:
Traceback (most recent call last):
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner
    self.run()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 754, in run
    self.__target(*self.__args, **self.__kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/mass/kafka.py", line 440, in run
    res = delayed(tasks).compute()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 135, in compute
    (result,) = compute(self, traverse=False, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 333, in compute
    results = get(dsk, keys, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/client.py", line 1999, in get
    results = self.gather(packed, asynchronous=asynchronous)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/

******** Producer Wait RETURNED. Cancel Streaming App
**** Job: 61318 State : Pending
look for configs in: /work/01131/tg804093/wrangler/work/kafka-0a613bf4-fec7-11e7-82d0-549f3509766c/config
['broker-1', 'broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-0a613bf4-fec7-11e7-82d0-549f3509766c/config (Sun Jan 21 10:20:48 2018)
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-0a613bf4-fec7-11e7-82d0-549f3509766c/config (Sun Jan 21 10:20:48 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id': '0', 'listeners': 'PLAINTEXT://c251-135:9092', 'zookeeper.connect': 'c251-135:2181'}
**** Job: 61319 State : Pending
**** Job: 61320 State : Pending
Create Spark Context for URL: spark://129.114.58.139:7077
Create Spark Context for URL: spark://129.114.58.139:7077
Application: kmeansstatic, Number Clusters: 10
look for configs in: /work/01131/tg804093/wrangler/work/kafka-0a613bf4-fec7-11e7-82d0-549f3509766c/config
['broker-1', 'broker-0']
Kafka Config: /work/01131/tg804

Exception in thread Thread-266:
Traceback (most recent call last):
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner
    self.run()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/threading.py", line 754, in run
    self.__target(*self.__args, **self.__kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/mass/kafka.py", line 440, in run
    res = delayed(tasks).compute()
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 135, in compute
    (result,) = compute(self, traverse=False, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/dask/base.py", line 333, in compute
    results = get(dsk, keys, **kwargs)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/client.py", line 1999, in get
    results = self.gather(packed, asynchronous=asynchronous)
  File "/home/01131/tg804093/anaconda2/lib/python2.7/site-packages/distributed/

******** Producer Wait RETURNED. Cancel Streaming App
**** Job: 61346 State : Pending
look for configs in: /work/01131/tg804093/wrangler/work/kafka-d96b8754-fee7-11e7-82d0-549f3509766c/config
['broker-1', 'broker-0']
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-d96b8754-fee7-11e7-82d0-549f3509766c/config (Sun Jan 21 14:15:39 2018)
Kafka Config: /work/01131/tg804093/wrangler/work/kafka-d96b8754-fee7-11e7-82d0-549f3509766c/config (Sun Jan 21 14:15:39 2018)
{'zookeeper.connection.timeout.ms': '6000', 'broker.id': '0', 'listeners': 'PLAINTEXT://c251-102:9092', 'zookeeper.connect': 'c251-102:2181'}
**** Job: 61348 State : Pending
**** Job: 61349 State : Pending
Create Spark Context for URL: spark://129.114.58.104:7077
Create Spark Context for URL: spark://129.114.58.104:7077
Application: kmeansstatic, Number Clusters: 10
look for configs in: /work/01131/tg804093/wrangler/work/kafka-d96b8754-fee7-11e7-82d0-549f3509766c/config
['broker-1', 'broker-0']
Kafka Config: /work/01131/tg804

In [None]:
%%capture
kafka_pilot.cancel()
dask_pilot.cancel()
spark_pilot.cancel()

In [None]:
prod=mass.kafka.MiniApp(
                                              #dask_scheduler=dask_pilot.get_details()['master_url'],
                                              kafka_zk_hosts="c251-133:2181",
                                              number_parallel_tasks=8,
                                              number_clusters=192, # kmeans
                                              number_points_per_cluster=52084, # kmeans
                                              number_points_per_message=5000, # kmeans
                                              number_dim=3, # kmeans
                                              number_messages=60000, # light
                                              number_produces=80,
                                              number_partitions=1*12,
                                              topic_name="test",
                                              application_type="kmeansstaticpred-100",
                                              produce_interval=0
                                           )
prod.run()