In [6]:
import numpy as np 
import pandas as pd
import random
import datetime
from math import sin, cos, sqrt, atan2, radians

def splitTimeRange(rowDriver):
    timediff = datetime.timedelta(days=rowDriver['contractyears'] * 365)
    currentTime = datetime.datetime.now()
    beginTime = currentTime - timediff
    datesArray = []
    
    startTime = beginTime
    endTime = startTime + datetime.timedelta(hours=10) 
    for datesCount in range(0, rowDriver['contractyears'] * 365):
        dateRange = [startTime.timestamp() * 1000, endTime.timestamp() * 1000]
        datesArray.append(dateRange)
        
        startTime = startTime + datetime.timedelta(days=1)
        endTime = endTime + datetime.timedelta(days=1)
    return datesArray
    
def getLocationDistance(lon1, lat1,lon2, lat2):
    # approximate radius of earth in km
    R = 6373.0

    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    return distance
    
def generateLocationPair(rowDriver,dfLocations):
    preferareas = rowDriver['preferareas']
    areas = preferareas.split(",")
    rndLocStart = random.randint(0,len(areas)-1)
    rndLocEnd = rndLocStart
    while (rndLocEnd == rndLocStart):
      rndLocEnd = random.randint(0,len(areas)-1)
    startIndex = (int)(areas[rndLocStart]) - 1
    endIndex = (int)(areas[rndLocEnd]) - 1
    
    startLoc = dfLocations.iloc[startIndex][1]
    endLoc = dfLocations.iloc[endIndex][1]
    
    distance = getLocationDistance(dfLocations.iloc[startIndex][2],dfLocations.iloc[startIndex][3],
                                   dfLocations.iloc[endIndex][2],dfLocations.iloc[endIndex][3])
    return [startLoc,endLoc,distance]
    
def generateTimeSpan(rowDriver):
    prefertime = rowDriver['prefertime']
    timespanarrays = prefertime.split(",")
    timespanindex = random.randint(0,len(timespanarrays)-1)
    timespan = timespanarrays[timespanindex]
    return timespan
    

def prepareDriverOrderRow(rowDriver,dfLocations, dateRange, delay, violation):
    locPairInfo = generateLocationPair(rowDriver,dfLocations)
    timespan = generateTimeSpan(rowDriver)
    
    delayTime = 0
    if delay: 
        delayTime = 90
    else: 
        delayTime = 0
    violationCnt = 0
    if violation: 
        violationCnt = 1
    else: 
        violationCnt = 0
        
    newDriverRow = [rowDriver['driver'], locPairInfo[0], locPairInfo[1], dateRange[0],
                    dateRange[1],random.randint(1,3), locPairInfo[2], timespan, 
                    300, delayTime, violationCnt, 3000]
    
    return newDriverRow


dpath = './'

dataFrameDrivers = pd.read_csv(dpath + "drivers.csv")

dataFrameLocations = pd.read_csv(dpath + "locations.csv")
driversCount = len(dataFrameLocations)

labels = ['driver', 'startLocation', 'endLocation', 'startTime','endTime','meansOfTransport',
          'distance','timespan','duration','delay','trafficViolation','cost']

for indexDriver, rowDriver in dataFrameDrivers.iterrows():
    print("..............",rowDriver['driver'])
    datesArray = splitTimeRange(rowDriver)
    
    delayConf = rowDriver['delay']
    violationsConf= rowDriver['violations']
    
    iNum = len(datesArray)
    delayCount = (int)(delayConf * iNum)
    violationCount = (int)(violationsConf * iNum) 
    
    delay = True
    delayNum = 0
    violation = True
    violationNum = 0
    driverOrderData = []
    for dateRange in datesArray:
        delayNum = delayNum + 1
        if (delayNum>=delayCount):
            delay = False
        violationNum = violationNum + 1
        if (violationNum>=violationCount):
            violation = False
        
        newDriverOrder = prepareDriverOrderRow(rowDriver, dataFrameLocations,dateRange, delay, violation)
        driverOrderData.append(newDriverOrder)
     #Export the dataFrame to csv file
    dfDriverNew = pd.DataFrame.from_records(driverOrderData, columns=labels)
    dfDriverNew.to_csv('./output/' + rowDriver['driver']  + '.csv')    








.............. Bang
.............. Zion
.............. Kristin
.............. Sue
.............. Lingqi
.............. Patric
.............. Ling
.............. Kenneth
.............. Robin
.............. Christy
.............. Summer
.............. Sage
