In [None]:
import os
import sys
import csv
import json
import numpy as np
import seaborn as sns
import re
import matplotlib.pyplot as plt

In [None]:
def extract_info(s:str)->dict:
    info = dict()
    time_pattern = re.compile(r'Time\[(\d+\.\d+)ms\]')
    id_pattern = re.compile(r'Job\[\s*(\d+)\]')


    info["job_id"] = int(re.findall("\[(.*?)\]",s,re.I|re.M)[1])
    #print(re.findall("\[(.*?)\]",s,re.I|re.M)[0].split())
    info["time"] = float(re.findall("\[(.*?)\]",s,re.I|re.M)[0].split()[0])
    #print(s.split(","))
    info["event"] = s.split(",")[1].strip()
    #print(info)
    return info

In [None]:
extract_info("DEBUG:root:Time[0.00000 ms]:    Job[ 0], ARRIVE, vgg16")

In [None]:
def get_job_num(filename):
    with open(filename, "r") as f:
        lines = f.readlines()
    
    lines = [extract_info(line) for line in lines if line.startswith("DEBUG")]
    return int(len(lines) / 3) , lines

In [None]:
def get_job_event_time(job_id:int,event:str,lines:list,scale_factor):
    for line in lines:
        if line["job_id"] == job_id and line["event"] == event:
            return float(line["time"]) * scale_factor / 1000
    return None

In [None]:
def main(sched_name:str, scale_factor):
    filename = sched_name + ".csv"
    job_num, lines = get_job_num(filename)
    print(lines)
    jobs = dict()
    jct_sum = list()
    for job_id in range(job_num):
        arrive_time = get_job_event_time(job_id,"ARRIVE",lines,scale_factor)
        place_time  = get_job_event_time(job_id,"START", lines,scale_factor)
        end_time    = get_job_event_time(job_id,"END",   lines,scale_factor)
        jobs[job_id] = {
            "arrive_time":arrive_time,
            "place_time":place_time,
            "end_time":end_time,
            "pend_time":place_time - arrive_time,
            "exec_time":end_time - place_time
        }
        #print(job_id,"%.2f" % (end_time - arrive_time))
        jct_sum.append(end_time - arrive_time)
        ave_jct = np.mean(np.array((jct_sum)))
    tmp = [float("%.2f" % i) for i in jct_sum]
    print("JCT", tmp)
    print("*" * 40)
    print("Schedule name: %s" % sched_name)
    print("Ave JCT: %.2f" % (sum(jct_sum) / job_num))
    #print(jct_sum)
    # print("99%%-th JCT: %.2f" % np.percentile(jct_sum, 99))
    print("95%%-th JCT: %.2f" % np.percentile(jct_sum, 95))
    print("Makespan: %.2f" % (lines[-1]["time"] - lines[0]["time"]))
    return jobs, sum(jct_sum) / job_num, np.percentile(jct_sum, 95), (lines[-1]["time"] - lines[0]["time"])

In [None]:
sched_list = [
    "trace",
]
jct_dict = dict()
jct_dict_95 = dict()
makespan_list = dict()
scale_factor = 40
for sched in sched_list:
    jobs_info, ave_jct, jct_95, makespan = main(sched,scale_factor)
    jct_dict[sched] = ave_jct
    jct_dict_95[sched] = jct_95
    makespan_list[sched] = makespan
    #continue
    arrive_list = [v["arrive_time"] for k,v in jobs_info.items()]
    pend_list   = [v["pend_time"]   for k,v in jobs_info.items()]
    exec_list   = [v["exec_time"]   for k,v in jobs_info.items()]
    place_list  = [v["place_time"]  for k,v in jobs_info.items()]
    plt.cla()
    plt.style.use("fivethirtyeight")
    plt.bar(range(len(jobs_info)), pend_list, bottom=arrive_list, color = "#B8DBB3", label = "pending")
    plt.bar(range(len(jobs_info)), exec_list, bottom=place_list,  color = "#E29135", label = "running")
    plt.xticks(range(len(jobs_info)))
    plt.legend()
    plt.savefig("%s.pdf" % sched, bbox_inches = "tight")

total = 0
if 0:
    plt.cla()
    plt.title("JCT comparision")
    plt.style.use("fivethirtyeight")
    plt.bar(range(len(jct_dict)),np.array(list(jct_dict.values())) / min(jct_dict.values()))
    plt.xlabel("Sched + Placer")
    plt.xticks(range(len(jct_dict)),[
        "jaca_future","jaca",
        "G&T-C","GPU-C","Time-C","Arr-C",
        "G&T-LB","GPU-LB","Time-LB","Arr-LB",
    ],rotation = 0, fontsize = 8)
    
    plt.savefig("jct/jct.pdf", bbox_inches = "tight")
if 0:
    plt.cla()
    plt.title("JCT-95th comparision")
    plt.style.use("fivethirtyeight")
    plt.bar(range(len(jct_dict_95)),np.array(list(jct_dict_95.values())) / min(jct_dict_95.values()))
    plt.xlabel("Sched + Placer")
    plt.xticks(range(len(jct_dict_95)),[
        "jaca_future","jaca",
        "G&T-C","GPU-C","Time-C","Arr-C",
        "G&T-LB","GPU-LB","Time-LB","Arr-LB",
    ],rotation = 0, fontsize = 5)
    plt.savefig("jct/jct-95th.pdf", bbox_inches = "tight")
if 0:
    plt.cla()
    plt.title("Makespan comparision")
    plt.style.use("fivethirtyeight")
    plt.bar(range(len(makespan_list)),np.array(list(makespan_list.values())) / min(makespan_list.values()))
    plt.xlabel("Sched + Placer")
    plt.xticks(range(len(makespan_list)),[
        "jaca_future","jaca",
        "G&T-C","GPU-C","Time-C","Arr-C",
        "G&T-LB","GPU-LB","Time-LB","Arr-LB",
    ],rotation = 0, fontsize = 5)
    plt.savefig("jct/makespan.pdf", bbox_inches = "tight")

In [12]:
import re

a = r'''
    DEBUG:root:Time[0.00000s]: Job[ 0] ARRIVE vgg16_1
    DEBUG:root:Time[0.00000s]: Job[ 0] START G0-G1
    DEBUG:root:Time[300.00000s]: Job[ 1] ARRIVE resnet50_1
    DEBUG:root:Time[300.00000s]: Job[ 1] START G2-G3
    DEBUG:root:Time[400.00000s]: Job[ 2] ARRIVE vgg16_2
    DEBUG:root:Time[594.28277s]: Job[ 1] END 294.282771084337
    DEBUG:root:Time[599.46988s]: Job[ 2] START G2-G3
    DEBUG:root:Time[600.00000s]: Job[ 3] ARRIVE resnet50_2
    DEBUG:root:Time[1580.62651s]: Job[ 2] END 1180.6265060240958
    DEBUG:root:Time[1567.90361s]: Job[ 3] START G2-G3
    DEBUG:root:Time[2808.51229s]: Job[ 3] END 2208.512289156633
    DEBUG:root:Time[7063.42169s]: Job[ 0] END 7063.421686746973
'''

# 正则表达式匹配Time后面的时间
pattern = re.compile(r'Time\[(\d+\.\d+)s\]')
p = re.compile(r"Job\[(\s*\d+)\]")
ep = re.compile(r"\]\s(\w+)\s")
gp = re.compile(r"G\d+-G\d+")
# 查找所有匹配的结果
matches = gp.findall(a)

# 打印结果
print(matches)

['G1', 'G3', 'G3', 'G3']
