In [121]:
import os
import re
import csv

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.markers import MarkerStyle
import math


In [113]:
directory = "20230727"
p = re.compile(r"\d\d\d")
instance_size_regex = re.compile(r"\d\d\d\d")

In [143]:
density_structure = {}
type_structure = {}

GROUNDING_TIME = "grounding_time"
GROUNDING_SIZE = "grounding_size"
TOTAL_TIME = "total_time"

GRINGO = "gringo"
IDLV = "idlv"
NEWGROUND_GRINGO = "newground-gringo"
NEWGROUND_IDLV = "newground-idlv"

grounder_types = [GRINGO, IDLV, NEWGROUND_GRINGO, NEWGROUND_IDLV]


file_types = [GROUNDING_TIME, GROUNDING_SIZE, TOTAL_TIME]


In [144]:
for grounder_type in grounder_types:
    type_structure[grounder_type] = {}
    for file_type in file_types:
        type_structure[grounder_type][file_type] = {}
        

In [145]:
# Parse all files into density_structure and type_structure
for root, dirs, files in os.walk(directory):
    for file in files:
        if file.startswith("00"):
            continue
            
        result = p.search(file)
        
        if result is not None:
            density = result.group(0)
            
            if density not in density_structure:
                density_structure[density] = {}
                
            for file_type in file_types:
                if file_type in file:
                    density_structure[density][file_type] = {}
                    density_structure[density][file_type]["filename"] = file
                    density_structure[density][file_type]["density"] = density
                    density_structure[density][file_type]["type"] = file_type
                    file_data = pd.read_csv(os.path.join(directory, file), delimiter=',')    
                    density_structure[density][file_type]["csv"] = file_data
                    
                    for column_name in list(file_data.columns):
                        for grounder_type in grounder_types:
                            if column_name.startswith(grounder_type) and "timeout-occurred" not in column_name and "timeout-occured" not in column_name:
                                if "instance_size" not in type_structure[grounder_type][file_type]:
                                    type_structure[grounder_type][file_type]["instance_size"] = []
                                if "instance_density" not in type_structure[grounder_type][file_type]:
                                    type_structure[grounder_type][file_type]["instance_density"] = []
                                if column_name not in type_structure[grounder_type][file_type]:
                                    type_structure[grounder_type][file_type][column_name] = []
                                    
                                
                                list_instances = list(file_data["instance"])
                    
                                density_list = [int(density)] * len(list_instances)

                                instance_size_list = [int((instance_size_regex.search(instance)).group(0)) for instance in list_instances]
                                
                                    
                                type_structure[grounder_type][file_type]["instance_size"] += instance_size_list
                                type_structure[grounder_type][file_type]["instance_density"] += density_list
                                
                                raw_column_data = list(file_data[column_name])
                                if "duration" in column_name:
                                    parsed_column_data = [time if float(time) < 1800 else -1 for time in raw_column_data]
                                elif "size" in column_name:
                                    parsed_column_data = [size if float(size) <= 32*(math.pow(10,6)) else -1 for size in raw_column_data]
                                else:
                                    print("THIS SHOULD NEVER HAPPEN!")

                                type_structure[grounder_type][file_type][column_name] += parsed_column_data

                        


In [146]:
for grounder_type in grounder_types:
    for file_type in file_types:        
        instance_size = type_structure[grounder_type][file_type]["instance_size"]
        instance_density = type_structure[grounder_type][file_type]["instance_density"]
        
        special_name = None
        for key in type_structure[grounder_type][file_type].keys():
            if "instance" not in key and "data_frame" not in key:
                special_name = key
    
        special = type_structure[grounder_type][file_type][special_name]
        
        data_dict = {
            "instance_size":instance_size,
            "instance_density":instance_density,
            special_name: special
        }
        

        data_frame = pd.DataFrame(data_dict)
        type_structure[grounder_type][file_type]["data_frame"] = data_frame
        
        cpy_df = data_frame.copy()
        cpy_df = cpy_df.drop(cpy_df[cpy_df[special_name] < 0].index)
        type_structure[grounder_type][file_type]["parsed_df"] = cpy_df        

In [155]:
combinations = [((GRINGO,GROUNDING_SIZE),(GRINGO,GROUNDING_TIME)),
               ((IDLV,GROUNDING_SIZE),(IDLV,GROUNDING_TIME)),
               ((NEWGROUND_GRINGO,GROUNDING_SIZE),(NEWGROUND_GRINGO,GROUNDING_TIME)),
               ((NEWGROUND_IDLV,GROUNDING_SIZE),(NEWGROUND_IDLV,GROUNDING_TIME)),
               ((GRINGO,GROUNDING_SIZE),(GRINGO,TOTAL_TIME)),
               ((IDLV,GROUNDING_SIZE),(IDLV,TOTAL_TIME)),
               ((NEWGROUND_GRINGO,GROUNDING_SIZE),(NEWGROUND_GRINGO,TOTAL_TIME)),
                ((NEWGROUND_IDLV,GROUNDING_SIZE),(NEWGROUND_IDLV,TOTAL_TIME))]


In [188]:
for combination in combinations:
    label = f"NPRC {combination[0][0]} {combination[0][1]} {combination[1][1]}"
    plot_file_name = f"NPRC {combination[0][0]}_{combination[0][1]}_{combination[1][1]}.pdf"
    
    data_frame = (type_structure[combination[0][0]][combination[0][1]]["parsed_df"]).copy()
    time_frame = (type_structure[combination[1][0]][combination[1][1]]["parsed_df"]).copy()
    
    
    data_frame_size_density = []
    for index, row in data_frame.iterrows():
        data_frame_size_density.append((row["instance_size"], row["instance_density"]))
        
    time_frame_size_density = []
    for index, row in time_frame.iterrows():
        time_frame_size_density.append((row["instance_size"], row["instance_density"]))
        
    intersected_instances = list(set(data_frame_size_density).intersection(set(time_frame_size_density)))   
    
    
    marked_deletion_data = []
    for index, row in data_frame.iterrows():
        if ((row["instance_size"], row["instance_density"])) not in intersected_instances:
            marked_deletion_data.append(index)
        
    marked_deletion_time = []
    for index, row in time_frame.iterrows():
        if ((row["instance_size"], row["instance_density"])) not in intersected_instances:
            marked_deletion_time.append(index)
            
    data_frame = data_frame.drop(marked_deletion_data)
    time_frame = time_frame.drop(marked_deletion_time)    
    
    data_frame = data_frame[~data_frame["instance_size"].isin([310,320,340,350,370,380,400,410])]    
    time_frame = time_frame[~time_frame["instance_size"].isin([310,320,340,350,370,380,400,410])]

    
    plotsize = (6, 3)
    

    plt.figure(figsize=plotsize)
    plt.scatter(data_frame["instance_size"], data_frame["instance_density"], linewidth=1,edgecolor="black",s=180,c=data_frame.iloc[:,2],marker=MarkerStyle("o", fillstyle="right"), cmap="viridis") #viridis") #,marker="*",color='blue')
    cbar=plt.colorbar()
    cbar.set_label("grounding size [KB] (right half)", rotation=90)

    plt.scatter(time_frame["instance_size"], time_frame["instance_density"], linewidth=1,edgecolor="black",s=180,c=time_frame.iloc[:,2],marker=MarkerStyle("o", fillstyle="left"), cmap="autumn") #plasma") #autumn") #viridis") #,marker="*",color='blue')
    cbar=plt.colorbar()
    cbar.set_label("grounding time [s] (left half)", rotation=90)
    
    plt.title(label)

    plt.xlabel("instance size") #tmeasures[a])
    plt.ylabel("instance density") #tmeasures[b])
    plt.xlim([0, 405])

    #plt.show()
    plt.savefig(plot_file_name, bbox_inches='tight', transparent=True)
    plt.close('all')