In [1]:
import clingo

def parse_fact_to_symbol(fact_str):
    # Parses a string fact and returns a clingo.Symbol
    return clingo.parse_term(fact_str.rstrip('.'))

def convert_list_to_symbols(fact_list):
    # Converts a list of string facts to an Iterable[Symbol]
    return [parse_fact_to_symbol(fact) for fact in fact_list]

In [2]:
import json
import matplotlib.pyplot as plt
import os
import sys
cwd = os.getcwd()
parent_dir = os.path.dirname(cwd)
sys.path.append(parent_dir)

import draco 
from draco import Draco
from draco.asp_utils import get_constants
import pandas as pd



In [3]:
# read training sets for different Draco(GPT) models
def read_json_files_from_directory(directory_path):
    json_data = {}

    for filename in os.listdir(directory_path):
        if filename.endswith(".json"):
            file_path = os.path.join(directory_path, filename)
            
            with open(file_path, 'r') as file:
                try:
                    data = json.load(file)
                    json_data[filename] = data
                except json.JSONDecodeError:
                    print(f"Error reading {filename}")
    
    return json_data

directory_path = '../rank/data'
files = read_json_files_from_directory(directory_path)

In [4]:
def get_channels(files, path):
    positives = []
    negatives = []

    for pair in files[path]:
        stu = 'kim2018'
        pos_to_add = ""
        neg_to_add = ""
        positive_data = draco.answer_set_to_dict(convert_list_to_symbols(pair['positive']))
        negative_data = draco.answer_set_to_dict(convert_list_to_symbols(pair['negative']))
        if len(positive_data["view"][0]["mark"][0]["encoding"]) == 2:
            positive_data["view"][0]["mark"][0]["encoding"].append(positive_data["view"][0]["facet"][0])
        if len(negative_data["view"][0]["mark"][0]["encoding"]) == 2:
            negative_data["view"][0]["mark"][0]["encoding"].append(negative_data["view"][0]["facet"][0])

        for item in positive_data["view"][0]["mark"][0]["encoding"]:
            if item["field"] == "q1":
                pos_to_add += item["channel"]
#                 if item["channel"] == "xsize":
#                     print("error")
        for item in positive_data["view"][0]["mark"][0]["encoding"]:
            if item["field"] == "q2":
                pos_to_add += " "+item["channel"]
        for item in positive_data["view"][0]["mark"][0]["encoding"]:
            if item["field"] == "n":
                pos_to_add += " "+item["channel"] 
        pos_to_add += " " + positive_data["task"]

        for item in negative_data["view"][0]["mark"][0]["encoding"]:
            if item["field"] == "q1":
                neg_to_add += item["channel"]
        for item in negative_data["view"][0]["mark"][0]["encoding"]:
            if item["field"] == "q2":
                neg_to_add += " "+item["channel"]
        for item in negative_data["view"][0]["mark"][0]["encoding"]:
            if item["field"] == "n":
                neg_to_add += " "+item["channel"]   
        neg_to_add += " " + positive_data["task"]
        
        positives.append(pos_to_add)
        negatives.append(neg_to_add)
    return positives, negatives


In [5]:
positives, negatives = get_channels(files, "kim2018_draco2.json")

In [6]:
from collections import Counter

def get_stringified_channels(positives, negatives):
    pos_c = Counter(positives)
    neg_c = Counter(negatives)
    return pos_c, neg_c

pos_c, neg_c = get_stringified_channels(positives, negatives)

In [7]:
pos_c

Counter({'y x color value': 99,
         'size x y summary': 84,
         'y color x summary': 76,
         'y color x value': 75,
         'size y x summary': 71,
         'y size x value': 70,
         'x color y value': 69,
         'x y color value': 69,
         'y x row value': 68,
         'x size y value': 66,
         'x y row value': 60,
         'x color y summary': 58,
         'y size x summary': 52,
         'x size y summary': 43,
         'x y row summary': 38,
         'y x row summary': 35,
         'color x y summary': 28,
         'size y x value': 20,
         'size x y value': 20,
         'x y color summary': 16,
         'y x color summary': 16,
         'color y x summary': 11,
         'color x y value': 5,
         'color y x value': 3})

In [27]:
# def process_counts(counter):
#     processed = Counter()
#     for key, count in counter.items():
#         new_key = key.replace(' summary', '').replace(' value', '')
#         processed[new_key] += count
#     return processed

In [None]:
# def transform_labels(labels):
#     ret = []
#     for l in labels:
#         ls = l.split(" ")
#         ret.append("q1:"+ls[0]+ " q2:"+ls[1] + " n:"+ls[2])
#     return ret

## Scatterplots

In [8]:
def assign_program(weights):
    assign_prog = ""

    for name in weights:
        match = re.search("(.*)_weight", name)
        if match:
            assign_prog += f"preference_weight({match.group(1)},{name}).\n"
        else:
            logging.warning(
                f'Constant "{name}" doesn\'t end with "_weight", so it\'s not assigned.'
            )

    return assign_prog

def read_weights(weight_path):
    """Reads the weights file and assigns the weights to the preferences."""
    with open(weight_path) as weight_constants:
        const_prog = weight_constants.read()
        const_dict = get_constants(const_prog)
        return const_dict




In [9]:
default_draco = Draco()

In [10]:
original_draco_costs = []
rank_costs = []
recommend_costs = []

In [11]:
def rank_and_concat(l):
    # Sort the lists of strings in the input dictionary
    sorted_positive = sorted(l)
    return ",".join(sorted_positive)

In [12]:
def compute_costs(directory_path, json_name, penalties):
    costs = []

    files = read_json_files_from_directory(directory_path)
    
    for pair in files[json_name]:
        cost = 0
        
        for k, v in default_draco.count_preferences(pair["positive"]).items():
            
            cost += v*penalties[k + '_weight'] / 1000   
        costs.append(cost)
        
        
        cost = 0
        for k, v in default_draco.count_preferences(pair["negative"]).items():
            cost += v*penalties[k + '_weight'] / 1000
            
        costs.append(cost)
      
    return costs

In [13]:
def get_facts(directory_path, json_name, penalties):
    
    facts = []

    files = read_json_files_from_directory(directory_path)
    
    for pair in files[json_name]:
        
        for k, v in default_draco.count_preferences(pair["positive"]).items():
            positive_data = draco.answer_set_to_dict(convert_list_to_symbols(pair['positive']))
            pos_encodings = {}
            if len(positive_data["view"][0]["mark"][0]["encoding"]) == 2:
                positive_data["view"][0]["mark"][0]["encoding"].append(positive_data["view"][0]["facet"][0])
         
            for item in positive_data["view"][0]["mark"][0]["encoding"]:
                if item["field"] == "q1":
                    pos_encodings['q1'] = item["channel"]
            for item in positive_data["view"][0]["mark"][0]["encoding"]:
                if item["field"] == "q2":
                    pos_encodings['q2'] = item["channel"]
            for item in positive_data["view"][0]["mark"][0]["encoding"]:
                if item["field"] == "n":
                    pos_encodings['n'] = item["channel"] 
            pos_encodings['task'] = positive_data["task"]
            
        facts.append(pos_encodings)
        
        for k, v in default_draco.count_preferences(pair["negative"]).items():
            negative_data = draco.answer_set_to_dict(convert_list_to_symbols(pair['negative']))
            neg_encodings = {}
            if len(negative_data["view"][0]["mark"][0]["encoding"]) == 2:
                negative_data["view"][0]["mark"][0]["encoding"].append(negative_data["view"][0]["facet"][0])
         
            for item in negative_data["view"][0]["mark"][0]["encoding"]:
                if item["field"] == "q1":
                    neg_encodings['q1'] = item["channel"]
            for item in negative_data["view"][0]["mark"][0]["encoding"]:
                if item["field"] == "q2":
                    neg_encodings['q2'] = item["channel"]
            for item in negative_data["view"][0]["mark"][0]["encoding"]:
                if item["field"] == "n":
                    neg_encodings['n'] = item["channel"] 
            neg_encodings['task'] = negative_data["task"]
            
          
        facts.append(neg_encodings)
        
    return facts

In [14]:
penalties = read_weights("./weights/DracoGPT_recommend_gpt-4-0125-preview_0_weights.txt")
gpt4_turbo_recommend_costs = compute_costs('../rank/data', "kim2018_draco2.json", penalties)
# gpt4_turbo_recommend_facts = get_facts('../rank/data', "kim2018_draco2.json", penalties)

In [15]:
penalties = read_weights("./weights/DracoGPT_recommend_gpt-4-0613_0_weights.txt")
gpt4_recommend_costs = compute_costs('../rank/data', "kim2018_draco2.json", penalties)

In [16]:
penalties = read_weights("./weights/DracoGPT_recommend_gpt-3.5-turbo-0125_0_weights.txt")
chatgpt_recommend_costs = compute_costs('../rank/data', "kim2018_draco2.json", penalties)

In [17]:
penalties = read_weights("./weights/DracoGPT_recommend_altair_gpt-4-0125-preview_0_weights.txt")
altair_gpt4_turbo_recommend_costs = compute_costs('../rank/data', "kim2018_draco2.json", penalties)

In [18]:
penalties = read_weights("./weights/DracoGPT_recommend_altair_gpt-4-0613_0_weights.txt")
altair_gpt4_recommend_costs = compute_costs('../rank/data', "kim2018_draco2.json", penalties)

In [19]:
penalties = read_weights("../rank/weights/DracoGPT_gpt-4-0125-preview_0_weights.txt")
gpt4_turbo_rank_costs = compute_costs('../rank/data', "kim2018_draco2.json", penalties)

In [20]:
penalties = read_weights("../rank/weights/DracoGPT_gpt-4-0613_0_weights.txt")
gpt4_rank_costs = compute_costs('../rank/data', "kim2018_draco2.json", penalties)

In [21]:
penalties = read_weights("../rank/weights/DracoGPT_gpt-3.5-turbo-0125_0_weights.txt")
chatgpt_rank_costs = compute_costs('../rank/data', "kim2018_draco2.json", penalties)

In [22]:
penalties = read_weights("../rank/weights/original_weights.txt")
original_draco_costs = compute_costs('../rank/data', "kim2018_draco2.json", penalties)

In [23]:
import csv
filename = "costs_and_facts.csv"
col_names = ['kim', 'rank', 'rec_vl', 'rec_altair', 'q1', 'q2', 'n', 'task']

distinct_rows = set()

# Writing data to a CSV file
with open(filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(col_names)  # Write the header
    
    # Zip the lists and write each row to the CSV
    for a,b,c,d,e in zip(original_draco_costs, gpt4_turbo_rank_costs, gpt4_turbo_recommend_costs, altair_gpt4_turbo_recommend_costs, gpt4_turbo_recommend_facts):
        row = (a, b, c, d, e['q1'], e['q2'], e['n'], e['task'])
        if row in distinct_rows:
            continue
        distinct_rows.add((a, b, c, d, e['q1'], e['q2'], e['n'], e['task']))
        writer.writerow(row)

In [24]:
def dedup(lists):
    s = set()
    res = []
    for items in zip(*lists):
        s.add(items)
    for item in s:
        res.append([item[0],item[1],item[2]])
    
    return list(map(list, zip(*res)))

In [30]:
data_lists_1 = dedup([gpt4_turbo_rank_costs, original_draco_costs, altair_gpt4_turbo_recommend_costs])
data_lists_2 = dedup([gpt4_turbo_recommend_costs, original_draco_costs, altair_gpt4_turbo_recommend_costs])

In [31]:
rank_vs_kim = {
    "rank": data_lists_1[0],
    "kim": data_lists_1[1]
}
with open("rank_vs_kim.json", "w") as file:
    json.dump(rank_vs_kim, file, indent=4)

rec_vl_vs_kim = {
    "rec_vl": data_lists_2[0],
    "kim": data_lists_2[1]
}
with open("rec_vl_vs_kim.json", "w") as file:
    json.dump(rec_vl_vs_kim, file, indent=4)

In [32]:
rank_vs_rec_vl = {
    "rank": data_lists[0],
    "rec_vl": data_lists[1]
}
with open("rank_vs_rec_vl.json", "w") as file:
    json.dump(rank_vs_rec_vl, file, indent=4)


In [34]:
rec_vl_vs_rec_altair = {
    "rec_vl": data_lists[0],
    "rec_altair": data_lists[1]
}
with open("rec_vl_vs_rec_altair.json", "w") as file:
    json.dump(rec_vl_vs_rec_altair, file, indent=4)