# Init the data frame

In [1]:
import this
from unicodedata import name
from pandas import Series, DataFrame
import pandas as pd

timelimit = 60

index_template = ["sat", "unsat", "unknown", "error", "timeout", "time(ms)"]


class ResultDic:
    name: str
    resdic: dict

    def __init__(self, name: str) -> None:
        self.name = name
        self.resdic = {}

    def update(self, colname: str, onedata: list):
        self.resdic[colname] = onedata

    def isempty(self):
        return self.resdic == {}


class ResultDf:
    resdics: dict[str, ResultDic]

    def __init__(self) -> None:
        self.resdics = {
            "unary": ResultDic("unary"),
            "catra": ResultDic("catra"),
            "baseline": ResultDic("baseline")
        }

    def update(self, backend: str, colname: str, onedata: list):
        """update result dataframe for a backend

        Args:
            backend (str): name of backend
            colname (str): name of column
            onedata (list): one column data
        """
        self.resdics[backend].update(colname, onedata)

    def get_df(self) -> list[DataFrame]:
        """convert to dataframe

        Returns:
            res (list[Dataframe]): generator of dataframe
        """
        df_list = []
        for resdic in self.resdics.values():
            if resdic.isempty():
                print(f"Warning: {resdic.name} is empty")
                continue
            res = DataFrame(resdic.resdic, index=index_template).astype("int64").T
            res.name = resdic.name
            df_list.append(res)
        return df_list



The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


#  Get files containing the result


In [2]:
import os
import glob

commit = '22-11-02_22:56:47totaltime'

dirname = os.path.abspath("")

analyzed_files = glob.glob(f"{dirname}/res/{commit}/*_log.txt", recursive=True)
print(analyzed_files)


['/home/hudh/github/ostrich/experiment/res/22-11-02_22:56:47totaltime/unary_log.txt']


# Parse the files

In [3]:
import re

def parse_single_result(result: str) -> list[int|float]:
    SAT_RE = re.compile("^sat", re.M)
    UNSAT_RE = re.compile("^unsat", re.M)
    UNKNOWN_RE = re.compile("^unknown|^--Unknown", re.M)
    ERR_RE = re.compile("^\(error |^--Exception", re.M | re.I)
    TIMEOUT_RE = re.compile("^Timeout", re.M)
    TIME_RE = re.compile("^Time: (.*)ms", re.M)
    sat = unsat = timeout = unknown = err = timeused = 0
    if SAT_RE.search(result):
        sat = 1
        timeused = float(TIME_RE.search(result).group(1))
    elif UNSAT_RE.search(result):
        unsat = 1
        timeused = float(TIME_RE.search(result).group(1))
    elif UNKNOWN_RE.search(result):
        unknown = 1
        timeused = float(timelimit * 500)
    elif TIMEOUT_RE.search(result):
        timeout = 1
        timeused = float(timelimit * 1000)
    elif ERR_RE.search(result):
    # else:
        err = 1
        timeused = float(timelimit* 500)
    onedata = [sat, unsat, unknown, err, timeout, timeused]
    return onedata

def parse_files(filenames: list[str]) -> ResultDf:
    """parse a file to get data

    Args:
        filename (str): file name

    Returns:
        ResultDf: result dataframe
    """
    res = ResultDf()
    for filename in filenames:
        backend = re.search(f"([a-z]+)_log.txt", filename).group(1)
        INSTANCE_RE = re.compile("^Running \[(.*)\]", re.M)
        with open(filename, "r") as f:
            lines = f.read()
            results = lines.split("----splitter----")
            for result in results[:-1]:
                instance = INSTANCE_RE.search(result).group(1)
                onedata = parse_single_result(result)
                res.update(backend, instance, onedata)
    return res


res = parse_files(analyzed_files)


def get_final_df(res: ResultDf):
    """get final dataframe

    Args:
        res (ResultDf): result dataframe

    Returns:
        DataFrame: final dataframe
    """
    dataframes: list[DataFrame] = []
    for df in res.get_df():
        dic = {}
        dic["avg_time(sat) ms"] = [df[df["sat"] == 1]["time(ms)"].mean()]
        dic["avg_time(unsat) ms"] = [df[df["unsat"] == 1]["time(ms)"].mean()]
        dic["avg_time(total) ms"] = [df["time(ms)"].mean()]
        avgf = DataFrame(dic).T
        sumf = df.sum().drop(index="time(ms)")
        concatf = pd.concat([sumf, avgf]).astype("int64")
        concatf.name = df.name
        dataframes.append(concatf)

    index = [i for i in range(len(dataframes))]
    dfnames = [df.name for df in dataframes]
    resdf = pd.concat(dataframes, axis=1, ignore_index=True).rename(
        columns=dict(zip(index, dfnames))
    )
    return resdf


get_final_df(res)




Unnamed: 0,unary
sat,27185
unsat,20567
unknown,1150
error,7440
timeout,354
avg_time(sat) ms,1230
avg_time(unsat) ms,1102
avg_time(total) ms,5900
