# Importing Libraries

In [None]:
import subprocess
import json
import shlex
import datetime
from sys import getsizeof
import time
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import math

# Calling Workflow and Storing Logs

In [None]:
from sys import getsizeof
class GetWorkflowLogs:
    def __init__(self, workflow, messages, count, sleepTime, functions, initFunc):
        self.initFunc = initFunc
        self.count = count
        self.msgExeDic = {}
        self.workflow = workflow
        self.messages = messages
        self.functions = functions
        self.messagesize = {}
        self.sleepTime = sleepTime
        self.timeDiff = []
        self.execodes = []
        self.allLogs ={}
        self.writeLogs = {}
        for func in self.functions:
            self.allLogs[func] =[]
            self.writeLogs[func] = []
        self.subscriberExe = {}
        self.publisherFinishedTime = {}
        self.getLogPeriod = math.floor(1000 / (self.count*3))
        self.finalWord = False
        self.execute()

    def execute(self):
        self.getLogCounter = 0
        for msg in self.messages:
            if msg == self.messages[-1]:
                self.finalWord = True
            self.getLogCounter += 1
            self.getLatency(msg)
        self.saveResults()
        print("Files are saved")
            

        
    def saveResults(self):
        with open(os.getcwd()+"/data/"  + str(self.workflow)+ ", "+str(self.count)+", publisheExeIDs.json", "w") as publisherExeID:
            json.dump(self.execodes, publisherExeID)
        with open(os.getcwd()+"/data/"  + str(self.workflow)+ ", "+str(self.count)+", messageExe.json", "w") as publisherExeID:
            json.dump(self.msgExeDic, publisherExeID)


        
        
    def getLatency(self, msg):
        
        self.callWorkflow(msg)
        if((self.getLogCounter == self.getLogPeriod) or (self.finalWord == True)):
            self.getLogCounter = 0
            time.sleep(20)
            self.getLogs()
            if(self.finalWord == True):
                print("Message "+ msg+ " with "+ str(getsizeof(msg)) +" bytes is called for " + str(self.count) + " times!")

            
        
         
    def callWorkflow(self, msg):
        for c in range(self.count):
            res = subprocess.check_output(shlex.split("curl -X POST \"https://northamerica-northeast1-ubc-serverless-ghazal.cloudfunctions.net/" + self.initFunc+"\""  + " --data '{\"message\":\"" + (msg)+"\", \"routing\":\"" + "0000000000" + "\"}' -H \"Content-Type:application/json\""))
            resString = res.decode("utf-8")
            exeId = resString
            print("-----------------"+ str(c)+ "-----------------")
            print("Execution ID: " + exeId)
#             self.msgExeDic[exeId] = str(getsizeof(msg))
            self.msgExeDic[exeId] = msg
            self.execodes.append(exeId)
            time.sleep(self.sleepTime)
        print("Workflow with Input: "+ msg+ " is triggered for " + str(self.count) + " times!")

        
        
        
    def getLogs(self):
        for func in self.functions:
            project_list_logs = "gcloud functions logs read "+ func +  " --region northamerica-northeast1 --format json --limit 1000"
            project_logs = subprocess.check_output(shlex.split(project_list_logs))
            project_logs_json = json.loads(project_logs)
            (self.allLogs[func]).extend(project_logs_json)
        if (self.finalWord == True):
            for func in self.functions:
                self.writeLogs[func]= self.allLogs[func]
            with open(os.getcwd()+"*path" + str(self.workflow)+ ", "+str(self.count)+ ', data.json', 'a') as outfile:
                json.dump(self.writeLogs, outfile)       

### An Example for getting logs

In [None]:
# workflow = workflowName
# messages = [array of inputs]
# workflowFunctions = [array of workflow functions]
# initFunc = initial function of the workflow
# sleepTime = between invocation of functions(sec)
# count = number of invocations per each input
# workflowObj = GetWorkflowLogs(workflow, messages, count, sleepTime, workflowFunctions, initFunc)

# Analyzing Logs and Retriving Data for each Function in the workflow

In [None]:
class AnalyzeLogs:
    def __init__(self, logsFile, publisherExeFile, msgExeFile, function, initFunc):
            print(function)
            self.initFunction = initFunc
            self.function = function
            self.timeDiff = []
            self.messageSizeArray = []
            self.execodes = []
            self.reqIDs = []
            self.reqMsgDic = {}
            self.funcExecodes = []
            self.latencyPerExeCode = {}
            with open(logsFile) as json_file:
                writeLogs = json.load(json_file)
                self.logs = writeLogs[self.function]
                self.initLogs = writeLogs[self.initFunction]
            with open(publisherExeFile) as json_file:
                self.execodes = json.load(json_file)
            with open(msgExeFile) as json_file:
                self.msgExe = json.load(json_file)
            self.fetchReqIDs()
            self.getData()
            print(self.function + " DONE")
        
    

    def fetchReqIDs(self):
        for id in self.execodes:
            for entry in self.initLogs:
                    if(entry['execution_id'] == id and ("WARNING:root:" in entry['log']) ):
                        reqID = entry['log'].replace("WARNING:root:", "")
                        self.reqIDs.append(reqID)
                        self.reqMsgDic[reqID] = self.msgExe[id]
                        break

    def getData(self):
        print(self.function)
        data = {}
        reqExeMap = {}
        counter = 0
        for id in self.reqIDs:
            foundFlag = False
            for entry in self.logs:
                if entry['log'] is not None:
                    if(("WARNING:root:"+id) in entry['log']):
                        counter += 1
                        self.funcExecodes.append(entry['execution_id'])
                        reqExeMap[entry['execution_id']] = id
                        foundFlag = True
                        break
            if foundFlag == False:
                print(id+" NOT FOUND")
        print("finalCounter: "+ str(counter))
        for id in self.funcExecodes:
            data[reqExeMap[id]] = {}
            data[reqExeMap[id]]["message"] = self.reqMsgDic[reqExeMap[id]]
            for entry in self.logs:

                if(entry['execution_id'] == id and (entry['log'] == "Function execution started") ):
                    if entry['time_utc'].endswith('Z'):
                        entry['time_utc'] = entry['time_utc'][:-1]+".000"
                    dateStart = entry['time_utc']
                    data[reqExeMap[id]]["start"]  = dateStart
                elif (entry['execution_id'] == id and ("finished with status" in entry['log']) ):
                    if entry['time_utc'].endswith('Z'):
                        entry['time_utc'] = entry['time_utc'][:-1]+".000"
                    dateFinish = entry['time_utc']
                    data[reqExeMap[id]]["finish"]  = dateFinish
        print(len(data))
        with open(os.getcwd()+"*path" +str(self.function)+', data.json', 'a') as outfile:
            json.dump(data, outfile)  
            

### An Example for retrieving data

In [None]:
# LogsPath = "path to storeed logs"
# ExeIDPath = "path to exe ids"
# msgExePath = "path to stored exe IDs, msg dict"
# initFunc = "name of first function in the workflow"
# for func in workflowFunctions:
#     AnalyzeLogsObj = AnalyzeLogs(LogsPath, ExeIDPath, msgExePath, func, initFunc)


## Merging data from all functions in the workflow

In [None]:
# paths = [array of prev stored paths]

# data = {}
# for path in paths:
#     with open(path) as json_file:
#         data[(path.split("/")[-1]).split(",")[0]]=(json.load(json_file))

# Generating DataFrame from the Retrieved Data

In [None]:
reqIDs = []
inputs=[]
func1S = []
generatedData={}
generatedData2={}
# successors = [array of array of successors]

for req in data[initFunc]:
    reqIDs.append(req)
    inputs.append(data[initFunc][req]['message'])
generatedData["reqID"] = reqIDs
generatedData["inputs"] = inputs
for func in workflowFunctions:
    generatedData2[func+"-Start"] = []
    generatedData2[func+"-Finish"] = []
    generatedData[func] = []
    newData = data[func]
    for req in reqIDs:
        start = newData[req]["start"]
        start = datetime.datetime.strptime(start, "%Y-%m-%d %H:%M:%S.%f")
        generatedData2[func+"-Start"].append(start)
        finish = newData[req]["finish"]
        finish = datetime.datetime.strptime(finish, "%Y-%m-%d %H:%M:%S.%f")
        generatedData2[func+"-Finish"].append(finish)
        difference = finish - start
        generatedData[func].append(difference.microseconds/1000)


for func in workflowFunctions:
    for successor in successors[workflowFunctions.index(func)]:
        generatedData [func+"-"+successor] = [((a_i - b_i).microseconds/1000) for a_i, b_i in zip(generatedData2[successor+"-Start"], generatedData2[func+"-Finish"])]
        
        
print(generatedData)
        
    
    

### Storing datarframe in pickle format

In [None]:
df.to_pickle(os.getcwd()+"*path")