# Init the data frame

In [19]:
from unicodedata import name
from pandas import Series, DataFrame
import pandas as pd

timelimit = 60

index_template = ["sat", "unsat", "unknown", "error", "timeout", "time(ms)"]

class ResultDic:
    name: str
    resdic: dict

    def __init__(self, name: str) -> None:
        self.name = name
        self.resdic = {}

    def update(self, colname: str, onedata: list):
        self.resdic[colname] = onedata

    def isempty(self):
        return self.resdic == {}


class ResultDf:
    resdics: dict[str, ResultDic]

    def __init__(self, resultfiles: list[str]) -> None:
        self.resdics = {file: ResultDic(file) for file in resultfiles}

    def update(self, backend: str, colname: str, onedata: list):
        """update result dataframe for a backend

        Args:
            backend (str): name of backend
            colname (str): name of column
            onedata (list): one column data
        """
        self.resdics[backend].update(colname, onedata)

    def get_df(self) -> list[DataFrame]:
        """convert to dataframe

        Returns:
            res (list[Dataframe]): generator of dataframe
        """
        df_list = []
        for resdic in self.resdics.values():
            if resdic.isempty():
                print(f"Warning: {resdic.name} is empty")
                continue
            res = DataFrame(resdic.resdic, index=index_template).astype("int64").T
            res.name = resdic.name
            df_list.append(res)
        return df_list


#  Get files containing the result


In [20]:
import os
import glob

commit = '23-02-24_09:18:11cvc5_small'

dirname = os.path.abspath("")

analyzed_files = glob.glob(f"{dirname}/res/{commit}/*_log.txt", recursive=True)
print(analyzed_files)


['/home/hudh/github/ostrich/experiment/res/23-02-24_09:18:11cvc5_small/cvc5_log.txt']


# Parse the files

In [21]:
import re


def parse_single_result(result: str) -> list[int | float]:
    SAT_RE = re.compile("^sat", re.M)
    UNSAT_RE = re.compile("^unsat", re.M)
    UNKNOWN_RE = re.compile("^unknown|^--Unknown", re.M)
    ERR_RE = re.compile("^\(error |^--Exception", re.M | re.I)
    TIMEOUT_RE = re.compile("^Timeout", re.M)
    TIME_RE = re.compile("^Time: (.*)ms", re.M)
    sat = unsat = timeout = unknown = err = timeused = 0
    if SAT_RE.search(result):
        sat = 1
        timeused = float(TIME_RE.search(result).group(1))
    elif UNSAT_RE.search(result):
        unsat = 1
        timeused = float(TIME_RE.search(result).group(1))
    elif UNKNOWN_RE.search(result):
        unknown = 1
        timeused = float(TIME_RE.search(result).group(1))
    elif TIMEOUT_RE.search(result):
        timeout = 1
        timeused = float(TIME_RE.search(result).group(1))
    # elif ERR_RE.search(result):
    else:
        err = 1
        timeused = float(TIME_RE.search(result).group(1))
    onedata = [sat, unsat, unknown, err, timeout, timeused]
    return onedata


def parse_files(filenames: list[str]) -> ResultDf:
    """parse a file to get data

    Args:
        filename (str): file name

    Returns:
        ResultDf: result dataframe
    """
    basenames = [os.path.basename(filename) for filename in filenames]
    res = ResultDf(basenames)
    for filename in filenames:
        # backend = re.search(f"([a-z]+)_log.txt", filename).group(1)
        backend = os.path.basename(filename)
        INSTANCE_RE = re.compile("^Running \[(.*)\]", re.M)
        with open(filename, "r") as f:
            lines = f.read()
            results = lines.split("----splitter----")
            for result in results[:-1]:
                instance = INSTANCE_RE.search(result).group(1)
                onedata = parse_single_result(result)
                if(onedata[4] > 0):
                    print(instance)
                res.update(backend, instance, onedata)
    return res


res = parse_files(analyzed_files)


def get_final_df(res: ResultDf):
    """get final dataframe

    Args:
        res (ResultDf): result dataframe

    Returns:
        DataFrame: final dataframe
    """
    dataframes: list[DataFrame] = []
    for df in res.get_df():
        dic = {}
        dic["avg_time(sat) ms"] = [df[df["sat"] == 1]["time(ms)"].mean()]
        dic["avg_time(unsat) ms"] = [df[df["unsat"] == 1]["time(ms)"].mean()]
        dic["avg_time(total) ms"] = [df["time(ms)"].mean()]
        avgf = DataFrame(dic).T.fillna(0)
        sumf = df.sum().drop(index="time(ms)")
        concatf = pd.concat([sumf, avgf]).astype("int64")
        concatf.name = df.name
        dataframes.append(concatf)

    index = [i for i in range(len(dataframes))]
    dfnames = [df.name for df in dataframes]
    resdf = pd.concat(dataframes, axis=1, ignore_index=True).rename(
        columns=dict(zip(index, dfnames))
    )
    return resdf


get_final_df(res)


experiment/benchmarks/sanitized/count_benchmark_others/small/instance426.smt2
experiment/benchmarks/sanitized/count_benchmark_others/small/instance8943.smt2
experiment/benchmarks/sanitized/count_benchmark_others/small/instance8217.smt2
experiment/benchmarks/sanitized/count_benchmark_others/small/instance3421.smt2
experiment/benchmarks/sanitized/count_benchmark_others/small/instance6992.smt2
experiment/benchmarks/sanitized/count_benchmark_others/small/instance3529.smt2
experiment/benchmarks/sanitized/count_benchmark_others/small/instance2007.smt2
experiment/benchmarks/sanitized/count_benchmark_others/small/instance1503.smt2
experiment/benchmarks/sanitized/count_benchmark_others/small/instance8020.smt2
experiment/benchmarks/sanitized/count_benchmark_others/small/instance7382.smt2
experiment/benchmarks/sanitized/count_benchmark_others/small/instance5632.smt2
experiment/benchmarks/sanitized/count_benchmark_others/small/instance4127.smt2
experiment/benchmarks/sanitized/count_benchmark_other

Unnamed: 0,cvc5_log.txt
sat,3764
unsat,3375
unknown,0
error,43
timeout,124
avg_time(sat) ms,193
avg_time(unsat) ms,231
avg_time(total) ms,1227
