# Importing Libraries

In [None]:
import subprocess
import json
import shlex
import datetime
from sys import getsizeof
import time
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import math
import networkx as nx
from criticalpath import Node
import plotly.express as px
from IPython.display import Image

# Getting Critical Path and Slack Times

In [None]:
class slackAnalysis:

    def __init__(self, mode, sp, df, workflowFunctions, successors, initial, slackpklPath, memory):
        self.slackAnalysisData = {}
        self.pricing_resolution = 100
        self.es = {}
        self.ef = {}
        self.ls = {}
        self.lf = {}
        self.df = df
        self.slackpklPath = slackpklPath
        self.dependencies = []
        self.tasks = {}
        self.mode = mode
        if mode == "cost":
            self.costCalc(df, sp, workflowFunctions, successors)
            self.memory = memory
        if mode == "latency":
            self.latencyCalc(df, sp)
        self.duration, self.crPath = self.findCriticalPath(self.tasks, self.dependencies)
        self.completeESEF(initial)
        self.completeLSLF(self.duration, self.crPath)
        self.getSlackDataframe()
        
        
    def getSlackDataframe(self):
        self.slackAnalysisData["es"] = []
        self.slackAnalysisData["ls"] = []
        self.slackAnalysisData["ef"] = []
        self.slackAnalysisData["lf"] = []
        self.slackAnalysisData["duration"] = []
        self.slackAnalysisData["slackTime"] = []
        self.slackAnalysisData["function"] = self.df.columns
        for func in self.slackAnalysisData["function"]:
            self.slackAnalysisData["es"].append(self.es[func])
            self.slackAnalysisData["ls"].append(self.ls[func])
            self.slackAnalysisData["ef"].append(self.ef[func])
            self.slackAnalysisData["lf"].append(self.lf[func])
            self.slackAnalysisData["duration"].append(self.tasks[func])
            self.slackAnalysisData["slackTime"].append(max(0, self.lf[func] - self.ef[func]))
        if self.mode == "cost":
            self.slackAnalysisData["Memory(GB)"] = self.memory
            priceDuration = (np.ceil(np.array(self.slackAnalysisData["duration"])/self.pricing_resolution)*self.pricing_resolution)
            self.slackAnalysisData["GB-sec"] = (np.array(self.slackAnalysisData["Memory(GB)"]) * priceDuration)/1000
        slackDF = pd.DataFrame(self.slackAnalysisData)
        slackDF.to_pickle(os.getcwd()+self.slackpklPath)


    def costCalc(self, df, sp, workflowFunctions, successors):
        df = df[workflowFunctions]
        self.df = df
        for col in df.columns:
            if sp == "mean":
                self.tasks[col] = df[col].mean()
            else:
                self.tasks[col] = df[col].quantile(sp)

        functionTasks = workflowFunctions
        for func in workflowFunctions:
            index = workflowFunctions.index(func)
            for i in successors[index]:
                self.dependencies.append((func, i ))
        
    def completeESEF(self, initial):
        self.es[initial] = 0
        self.ef[initial] = self.tasks[initial]
        nextSteps = []
        for d in self.dependencies:
            if d[0] == initial:
                if d[1] in self.es:
                    self.es[d[1]] = max( self.es[d[1]], self.ef[initial])
                    self.ef[d[1]] = self.es[d[1]] + self.tasks[d[1]]
                else:
                    self.es[d[1]] = self.ef[initial]
                    self.ef[d[1]] = self.es[d[1]] + self.tasks[d[1]]
                nextSteps.append(d[1])
        for n in nextSteps:
            initial = n
            for d in self.dependencies:
                if d[0] == initial:
                    if d[1] in self.es:
                        self.es[d[1]] = max( self.es[d[1]], self.ef[initial])
                        self.ef[d[1]] = self.es[d[1]] + self.tasks[d[1]]
                    else:
                        self.es[d[1]] = self.ef[initial]
                        self.ef[d[1]] = self.es[d[1]] + self.tasks[d[1]]
                    nextSteps.append(d[1])
        
        
        
    def completeLSLF(self, duration, criticalPath):
        terminals = []
        for d in self.dependencies:
            terminalFlag = True
            for d2 in self.dependencies:
                if d[1] == d2[0]:
                    terminalFlag = False
                    break
            if terminalFlag == True:
                terminals.append(d[1])
        for t in terminals:
            self.lf[t] = duration 
            self.ls[t] = duration - self.tasks[t]
            
        for t in terminals:
            for d in self.dependencies:
                if d[1] == t:
                    if d[0] in self.lf:
                        self.lf[d[0]] = min( self.lf[d[0]], self.ls[t])
                        self.ls[d[0]] = max(0, self.lf[d[0]] - self.tasks[d[0]])
                    else:
                        self.lf[d[0]] = self.ls[t]
                        self.ls[d[0]] = max(0, self.lf[d[0]] - self.tasks[d[0]])
                        
                    terminals.append(d[0])     
    

    def latencyCalc(self, df, sp):
        for col in df.columns:
            if sp == "mean":
                self.tasks[col] = df[col].mean()
            else:
                self.tasks[col] = df[col].quantile(sp)

        functionTasks = list(df.columns)
        for func in functionTasks:
            if "-" not in func:
                for func2 in functionTasks:
                    if ("-" in func2) and (func2.split("-")[0] == func):
                        index = functionTasks.index(func)
                        self.dependencies.append((func, func2))
            else:
                self.dependencies.append((func, func.split("-")[1]))


    def findCriticalPath(self, tasks, dependencies):
        workflow = Node("Workflow")
        for t in tasks:
            workflow.add(Node(t, duration = tasks[t]))
        for d in dependencies:
            workflow.link(d[0], d[1])
        workflow.update_all()
        crit_path = [str(n) for n in workflow.get_critical_path()]
        workflow_duration = workflow.duration

        print(f"The current critical path is: {crit_path}")
        print("."*50)
        print(f"The current workflow duration is: {workflow_duration} milliseconds")
        return workflow_duration, crit_path



### An Example for getting dataframe of results

In [None]:
# We can have two modes of measurements
# mode = "latency"
# mode = "cost"

# workflowFunctions = [array of workflow functions]
# successors =[array of array of successors]
# pklPath = "path to dataframe object "
# inputName ="input name"
# initialFunc = "name of initial function in the workflow"
# slackpklPath = "path for storing slack data"
# memory = [array of allocated memory]
df = pd.read_pickle(os.getcwd()+pklPath)

df2 = df.loc[df["inputs"] == inputName]
df2 = df2.drop(columns=["reqID", "inputs"])
# statisticalParam = a num between 0 to 1 in case of quantile, or mean in case of getting mean of durations
slackAnalysisObj = slackAnalysis(mode, statisticalParam , df2, workflowFunctions, successors, initialFunc, slackpklPath, memory)