# Generate individual call events based on call volume TSV

Generate a list of individual call times based on call volume. Assumes that calls arrive uniformly over each time interval.

Also add how long each calls takes. Call length is sampled from a Gaussian distribution.

In [1]:
from datetime import date
from datetime import datetime
from datetime import time
from datetime import timedelta

import re

import pandas as pd
import numpy as np

## Read table of number of calls received every half hour

In [2]:
calls = pd.read_csv("data/call_volume.tsv", sep='\t')

In [3]:
calls.head()

Unnamed: 0,start_time,stop_time,num_calls
0,05:00 AM,05:30 AM,14
1,05:30 AM,06:00 AM,21
2,06:00 AM,06:30 AM,43
3,06:30 AM,07:00 AM,57
4,07:00 AM,07:30 AM,70


In [4]:
calls["num_calls"].sum()

2350

---

## Generate individual call events

Assumes call arrive uniformly over each half hour. Generate how long a particular call will take from the normal distribution.

In [5]:
def create_time(time_string):
    # Create a time object from a string
    # Expect the format: ##:## AM/PM
    # returns datetime.time object
    
    res = re.match(r'^\d\d:\d\d [AP]M$', time_string)
    assert res is not None, "Time string has incorrect format"
    
    nums, period = time_string.split(" ")
    hour, minute = map(int, nums.split(":"))
    
    hour += 12 if period == "PM" and hour < 12 else 0
    
    return time(hour=hour, minute=minute)

In [6]:
def generate_individual_calls(start, stop, num):
    """Generate a list of uniformly spaced time objects.
    
    Starts at start time, ends before stop time. All times
    with microseconds are truncated to the floor of the second.
    """
    
    assert isinstance(start, time), "Start time needs to be a time object"
    assert isinstance(stop, time), "Stop time needs to be a time object"
    
    duration = datetime.combine(date.min, stop) - datetime.combine(date.min, start)
    gap = duration / num
    
    res = []
    for i in range(num):
        call_time = datetime.combine(date.min, start) + gap*i
        
        # get at time object, then round to the nearest second
        rounded_time = call_time.time().replace(microsecond=0)
        
        res.append(rounded_time)
        
    return res

In [7]:
all_calls = []

for row in calls.itertuples():
    start_time = create_time(row.start_time)
    stop_time = create_time(row.stop_time)
    
    call_times = generate_individual_calls(start_time, stop_time, row.num_calls)
    
    all_calls += call_times

### Generate call durations

In [8]:
# average call time: 4 min 30 s
avg_call_time = 270
sigma_secs = 30

call_lengths = (np
    .random.normal(
        loc=avg_call_time, scale=sigma_secs,
        size=len(all_calls)
    )
    .round()
    .astype(int)
)

call_lengths = [
    timedelta(seconds=int(v))
    for v in call_lengths
]

## Save call events to file

In [9]:
fname = "data/call_times_uniform_normal.txt"
with open(fname, "w") as fout:
    for call_intime, duration in zip(all_calls, call_lengths):
        fout.write("{}\t{}\n".format(
            call_intime.isoformat(),
            duration
        ))