In [1]:
from importlib.metadata import files
from pathlib import Path
import os
import numpy as np

In [2]:
def dump(lines, fname, path):
    if fname == 'skipdump':
        return
    with open(path+fname, 'a') as f:
        f.write(''.join(lines))

In [19]:
def getname(fname, phase):
    fsplit = fname.split('_')
    match phase:
        case 1:
            return f"index_{fsplit[-1]}"
        case 2:
            return f"browse_{fsplit[-1]}"

benign_dir = "/proj/spark-cluster-PG0/ransomware-setups/benignware"
in_dir = f"{benign_dir}/media-server/output/syscall_output/"
out_dir = f"{benign_dir}/media-server/output/syscall_output_parsed/"

os.makedirs(out_dir, exist_ok=True)

for file in os.listdir(in_dir):
    print(file)
    with open(in_dir+file, 'r') as f:
        lines = f.readlines()
        phase = 0
        phrases = []
        readout = False
        for l in lines:
            if 'sys_exit_pidfd_getfd' in l:
                if phase > 0:
                    out_fname = getname(file, phase)
                    dump(phrases, out_fname, out_dir)
                    print(f"Wrote {len(phrases)} to {out_fname}")
                readout = True
                phrases = []
                phase+=1
                if phase == 3:
                    break
            elif 'sys_enter_pidfd_open' in l:
                readout = False
            elif readout:
                phrases.append(l)
        if phase == 2:
            out_fname = getname(file, phase)
            dump(phrases, out_fname, out_dir)
            print(f"Wrote {len(phrases)} to {out_fname}")

media_syscall_1
Wrote 1160999 to index_1
Wrote 3379301 to browse_1
media_syscall_5
Wrote 1152074 to index_5
Wrote 3289390 to browse_5
media_syscall_2
Wrote 1187079 to index_2
Wrote 3765416 to browse_2
media_syscall_3
Wrote 1187555 to index_3
Wrote 3429316 to browse_3
media_syscall_4
Wrote 1111324 to index_4
Wrote 3735223 to browse_4


In [20]:
def getname(fname, phase):
    fsplit = fname.split('_')
    match phase:
        case 1:
            return f"index_{fsplit[-1]}"
        case 2:
            return f"browse_{fsplit[-1]}"

benign_dir = "/proj/spark-cluster-PG0/ransomware-setups/benignware"
in_dir = f"{benign_dir}/media-server/output/network_output/"
out_dir = f"{benign_dir}/media-server/output/network_output_parsed/"

os.makedirs(out_dir, exist_ok=True)

for file in os.listdir(in_dir):
    print(file)
    with open(in_dir+file, 'r') as f:
        lines = f.readlines()
        phase = 0
        phrases = []
        readout = False
        for l in lines:
            if ',54321,' in l:
                if 'Port unreachable' not in l:
                    readout = False
                    continue
                if phase > 0:
                    out_fname = getname(file, phase)
                    dump(phrases, out_fname, out_dir)
                    print(f"Wrote {len(phrases)} to {out_fname}")
                readout = True
                phrases = []
                phase+=1
                if phase == 3:
                    break
            elif 'sys_enter_pidfd_open' in l:
                readout = False
            elif readout:
                phrases.append(l)
        if phase == 2:
            out_fname = getname(file, phase)
            dump(phrases, out_fname, out_dir)
            print(f"Wrote {len(phrases)} to {out_fname}")

media_netcall_3
Wrote 9609 to index_3
Wrote 1027853 to browse_3
media_netcall_4
Wrote 10916 to index_4
Wrote 1148831 to browse_4
media_netcall_1
Wrote 10086 to index_1
Wrote 1074122 to browse_1
media_netcall_5
Wrote 10006 to index_5
Wrote 1185808 to browse_5
media_netcall_2
Wrote 9570 to index_2
Wrote 1068029 to browse_2


In [17]:
def get_pname(idx):
    if idx == 1:
        return "index"
    if idx == 2:
        return "browse"

def get_count(phasename, path):
    count = 1
    for file in os.listdir(path):
        if phasename in file:
            count+=1
    return (count)

def get_fname(phase, path, original):
    if phase == "all":
        return original
    oparts = original.split('_')
    if phase == 'index':
        count = get_count(phase, path)
        return f"{phase}_hardware_{count}"
    elif phase == 'browse':
        count = get_count(phase, path)
        return f"{phase}_hardware_{count}"

def dump_perf_data(data, ptimes, outfile, outdir):
    processed_data = {}
    if not ptimes:
        processed_data={"all":data}
    else:
        for key in data.keys():
            # print(key)
            for pt in ptimes:
                # print(ptimes)
                if key in pt:
                    phases = ptimes[pt]
            pstart = 0
            for i,time in enumerate(phases):
                if i == 0:
                    pstart = time
                    continue
                pname = get_pname(i)
                pdata = {}
                for t in data[key]:
                    if t<=time and t>=pstart:
                        pdata.update({t:data[key][t]})
                if pname in processed_data:
                    processed_data[pname].update({key:pdata})
                else:
                    processed_data[pname] = {key:pdata}
                pstart = time
    print(outfile)
    for ptype in processed_data:
        # outfile = f"{fname}_{run_iter}"
        outfile = get_fname(ptype, outdir, outfile)
        tmin = 100
        tmax = 0
        header="time,"
        keylist=[]
        for key in processed_data[ptype]:
            tmin = list(processed_data[ptype][key].keys())[0] if list(processed_data[ptype][key].keys())[0] < tmin else tmin
            tmax = list(processed_data[ptype][key].keys())[-1] if list(processed_data[ptype][key].keys())[-1] > tmax else tmax
            keylist.append(key)
            header+=f"{key},"
            
        with open(f"{out_dir}/{outfile}","a") as f:
            f.write(header+'\n')
        # print(header)
        for t in np.arange(tmin,tmax+0.09,0.1):
            t = round(t,1)
            vstring = f"{t},"
            for key in keylist:
                if t in processed_data[ptype][key].keys():
                    val = processed_data[ptype][key][t]
                else:
                    val = 0
                vstring+=f"{val},"
            with open(f"{out_dir}/{outfile}","a") as f:
                f.write(vstring+'\n')
        # print(vstring+'\n')


In [18]:
## Perf counters processor
def save(time, value, metric, data):
    if metric in data.keys():
        data[metric].update({time: value})
    else:
        data.update({metric:{time: value}})
    return data

# prog_name="browser"
# prog_dir=f"{prog_name}"
# run_idx=-2
# run_count=5
# phases = False

# prog_name="perf"
# prog_dir=f"{prog_name}/cpu2017"
# run_idx=-3
# run_count=3
# phases = False

# prog_name="filebench"
# prog_dir=f"{prog_name}"
# run_idx=-2
# run_count=3
# phases = False

prog_name="media"
prog_dir=f"{prog_name}-server"
run_idx=-1
run_count=5
has_phase = True

benign_dir = "/proj/spark-cluster-PG0/ransomware-setups/benignware"
in_dir = f"{benign_dir}/{prog_dir}/output/hardware_output/"
out_dir = f"{benign_dir}/{prog_dir}/output/hardware_output_parsed/"
phase_dir = f"{benign_dir}/{prog_dir}/output/hw_phase_files/"

os.makedirs(out_dir, exist_ok=True)
runs = []
perf_mets = []
file_prefix = f"{prog_name}_hardware"
for file in os.listdir(in_dir):
    separator = "_"
    file_parts = file.split('_')
    if run_idx == -1:
        file_run = "NORUN"
    else:
        file_run = file_parts[run_idx]
    if file_run not in runs:
        runs.append(file_run)
    if separator.join(file_parts[2:run_idx]) not in perf_mets:
        perf_mets.append(separator.join(file_parts[2:run_idx]))
print(runs)
print(perf_mets)

if run_idx == -3:
    add_fname = "_r"
else:
    add_fname = ""

all_data = {}
for idx,run in enumerate(runs):
    for i in range(1,run_count+1):
        data = {}
        phase_times = {}
        for met in perf_mets:
            if run == "NORUN":
                file = f"{file_prefix}_{met}_{i}"
            else:
                file = f"{file_prefix}_{met}_{run}{add_fname}_{i}"
            if has_phase:
                phase_file = file+'_phase'
                with open(f"{phase_dir}/{phase_file}",'r') as fp:
                    lines = fp.readlines()
                    if len(lines) > 3:
                        print(f"Something wrong here:{phase_file}")
                    else:
                        p_time = []
                        for l in lines:
                            # fl_num = round(float(l[:-2]),2)
                            # if (fl_num*10)%1 > 0.4:
                            #     fl_num = int(fl_num*10+1)/10
                            timeparts = l.split(':')
                            fl_num = int(timeparts[0])*60+int(timeparts[1])
                            p_time.append(fl_num)
                        phase_times[met] = p_time
            with open(f"{in_dir}/{file}",'r') as f:
                lines = f.readlines()
                for l in lines[2:]:
                    l=l.replace("<not counted>","0")
                    if "#" in l:
                        continue
                    parts = l.split()
                    time = round(float(parts[0]),1)
                    value = int(parts[1].replace(',',''))
                    metric = parts[2]
                    data = save(time, value, metric, data)
        if run == "NORUN":
            outfile = f"{file_prefix}_{i}"
        else:
            file = f"{file_prefix}_{run}_{i}"
        dump_perf_data(data, phase_times, outfile, out_dir)

['NORUN']
['mem-loads,mem-stores,cache-references,LLC-load-misses', 'instructions,br_inst_retired.all_branches,avx_insts.all,block:block_rq_issue', 'uops_executed_port.port_2,uops_executed_port.port_3,uops_executed_port.port_4,uops_executed_port.port_7', 'uops_executed_port.port_0,uops_executed_port.port_1,uops_executed_port.port_5,uops_executed_port.port_6']
media_hardware_1
media_hardware_2
media_hardware_3
media_hardware_4
media_hardware_5
