# This notebook contains code which writes fictional 1 million file transfer details, across 5 data-centers, on to a cloud hosted ElasticSearch

### Assumptions about the data:

* 5 data centers from 'a'-'e'.
* Data center a is better i.e. more transfers originate there
* File transfer sizes range from 1kb to 10Gb
* 5 clusters have been made based on file sizes(0-100MB, 100MB-1Gb, 1gb-2gb, 2gb-10gb) which determine the time delay during transfer
* All failed transfers have an automatic delay of 10 seconds


In [1]:
#  Dependencies
import requests
from elasticsearch import Elasticsearch,helpers
import certifi
from datetime import datetime,timedelta
import radar
import numpy as np
import uuid
import random

In [2]:
host_name ='https://9e2a4b7052bdf8fd2685a0f5a35ae274.ap-southeast-1.aws.found.io'

In [3]:
# initialise elastic search with authorisation
es = Elasticsearch(
        [host_name],
        port=9243,
        http_auth=("gsoc","vyom@gsoc"),
        use_ssl=True,
        verify_certs=True,
        ca_certs=certifi.where(),
    )

In [4]:
r= requests.get(host_name, auth=('gsoc','vyom@gsoc'))
r.status_code

200

In [5]:
def generate_timestamp():
    
    start= '2017-01-01T00:00:00'
    end = '2017-03-01T00:00:00'
    timestamp= radar.random_datetime(start, end)
    return timestamp

In [6]:
random.seed(1)
def transfer_time(file_size):
    if file_size>0 and file_size<=100000000:     # less than 100 MB
        return random.gauss(1500, 500)
    elif file_size<= 1000000000:                 # b/w 100MB and 1GB
        return random.gauss(4000, 500)
    elif file_size<= 2000000000:                 # b/w 1GB and 2GB
        return random.gauss(6500, 500)
    elif file_size <= 10000000000:               # b/w 2GB and 10GB
        return random.gauss(9000, 500)
    else :                                       # any anomalous value
        return 100

z= generate_timestamp()
delay = int(transfer_time(10000000))
w= z + timedelta(seconds=delay) 
print('original time:{} final_time:{} time_delay:{}'.format(z,w,delay))

original time:2017-01-14 01:05:28 final_time:2017-01-14 01:16:51 time_delay:683


In [9]:
bulk_initial=[]
bulk_final=[]

random.seed(1)
np.random.seed(1)

for i in range(0,10):
    src_centers=['data center a','data center b','data center c','data center d','data center e']
    transfer_src = np.random.choice(src_centers, p=[0.3, 0.175, 0.175, 0.175, 0.175])

    dst_centers = [x for x in src_centers if x != transfer_src]
    transfer_dst = np.random.choice(dst_centers)

    final_transfer_status = ['transfer-success','transfer-failure']

    transfer_starttime = generate_timestamp()
    file_size=random.choice(range(1024,10000000000))

    ftp={
        'event_type': 'transfer-queued',
        'uuid': uuid.uuid4(),
        'src_site' : transfer_src,
        'dst_site' : transfer_dst,
        'timestamp': transfer_starttime,
        'bytes' : file_size
    }
    z={
        '_index': 'ftp_initial',
        '_type': 'transfer',
        '_id': (i+1),
        '_source': ftp
    }

    bulk_initial.append(z)
        
    transfer_status = ['transfer-success', 'transfer-failure']
    final_status = np.random.choice(transfer_status, p=[0.95,0.05])
    ftp['event_type'] = final_status
    
    if (final_status=='transfer-failure'):
        time_delay = 10
    else :
        time_delay = int(transfer_time(file_size))   # ranges roughly from 0-10000 s 
    
    ftp['timestamp'] = transfer_starttime + timedelta(seconds=time_delay)
    z={
        '_index': 'ft_final',
        '_type': 'transfer',
        '_id': (i+1),
        '_source': ftp
    }
    
    bulk_final.append(z)

In [58]:
len(bulk_final)

10000

In [59]:
len(bulk_initial)

10000

In [60]:
helpers.bulk(es,bulk_initial)

(10000, [])

In [61]:
helpers.bulk(es,bulk_final)

(10000, [])

In [63]:
es.search(index='ft_final')

{'_shards': {'failed': 0, 'successful': 1, 'total': 1},
 'hits': {'hits': [{'_id': '3001',
    '_index': 'ft_final',
    '_score': 1.0,
    '_source': {'bytes': 4295223887,
     'dst_site': 'data center d',
     'event_type': 'transfer-success',
     'src_site': 'data center b',
     'timestamp': '2017-02-24T15:18:06',
     'uuid': 'c97ad83d-c778-4840-963a-31fba27da11c'},
    '_type': 'transfer'},
   {'_id': '3002',
    '_index': 'ft_final',
    '_score': 1.0,
    '_source': {'bytes': 1372350991,
     'dst_site': 'data center b',
     'event_type': 'transfer-success',
     'src_site': 'data center c',
     'timestamp': '2017-02-28T13:48:02',
     'uuid': 'bea7c681-4657-4561-ba97-fdbbffe22604'},
    '_type': 'transfer'},
   {'_id': '3003',
    '_index': 'ft_final',
    '_score': 1.0,
    '_source': {'bytes': 9221405689,
     'dst_site': 'data center a',
     'event_type': 'transfer-success',
     'src_site': 'data center c',
     'timestamp': '2017-01-22T16:24:18',
     'uuid': '176bb51

In [64]:
es.search(index='ftp_initial')

{'_shards': {'failed': 0, 'successful': 1, 'total': 1},
 'hits': {'hits': [{'_id': '3001',
    '_index': 'ftp_initial',
    '_score': 1.0,
    '_source': {'bytes': 4295223887,
     'dst_site': 'data center d',
     'event_type': 'transfer-success',
     'src_site': 'data center b',
     'timestamp': '2017-02-24T15:18:06',
     'uuid': 'c97ad83d-c778-4840-963a-31fba27da11c'},
    '_type': 'transfer'},
   {'_id': '3002',
    '_index': 'ftp_initial',
    '_score': 1.0,
    '_source': {'bytes': 1372350991,
     'dst_site': 'data center b',
     'event_type': 'transfer-success',
     'src_site': 'data center c',
     'timestamp': '2017-02-28T13:48:02',
     'uuid': 'bea7c681-4657-4561-ba97-fdbbffe22604'},
    '_type': 'transfer'},
   {'_id': '3003',
    '_index': 'ftp_initial',
    '_score': 1.0,
    '_source': {'bytes': 9221405689,
     'dst_site': 'data center a',
     'event_type': 'transfer-success',
     'src_site': 'data center c',
     'timestamp': '2017-01-22T16:24:18',
     'uuid':

In [8]:
es.indices.delete(index=['ftp_initial','ft_final'])

DELETE https://9e2a4b7052bdf8fd2685a0f5a35ae274.ap-southeast-1.aws.found.io:443/ftp_initial,ft_final [status:404 request:1.685s]


TypeError: string indices must be integers