In [1]:
import warnings
import os
import sys

warnings.filterwarnings("ignore")

In [2]:
import json
import numpy as np

In [3]:
cwd = os.getcwd()

parent_dir = os.path.dirname(cwd)

sys.path.append(parent_dir)
import draco 
import pandas as pd

In [4]:
import clingo

def parse_fact_to_symbol(fact_str):
    # Parses a string fact and returns a clingo.Symbol
    return clingo.parse_term(fact_str.rstrip('.'))

def convert_list_to_symbols(fact_list):
    # Converts a list of string facts to an Iterable[Symbol]
    return [parse_fact_to_symbol(fact) for fact in fact_list]



In [5]:
def read_json_files_from_directory(directory_path):
    # Dictionary to store the contents of each JSON file
    json_data = {}

    # List all files in the directory
    for filename in os.listdir(directory_path):
        if filename.endswith(".json"):
            file_path = os.path.join(directory_path, filename)
            
            # Read and parse each JSON file
            with open(file_path, 'r') as file:
                try:
                    data = json.load(file)
                    json_data[filename] = data
                except json.JSONDecodeError:
                    print(f"Error reading {filename}")
    
    return json_data

# Specify the directory path
directory_path = './data'

# Read JSON files into dictionaries
files = read_json_files_from_directory(directory_path)

In [6]:
str1= ["attribute(task,root,value).", "attribute(number_rows,root,90).", "entity(field,root,0).", "attribute((field,name),0,n).", "attribute((field,type),0,string).", "attribute((field,entropy),0,1000).", "attribute((field,unique),0,3).", "entity(field,root,1).", "attribute((field,name),1,q1).", "attribute((field,type),1,number).", "attribute((field,entropy),1,3968).", "attribute((field,unique),1,90).", "attribute((field,interesting),1,true).", "entity(field,root,2).", "attribute((field,name),2,q2).", "attribute((field,type),2,number).", "attribute((field,entropy),2,1631).", "attribute((field,unique),2,90).", "entity(view,root,3).", "attribute((view,coordinates),3,cartesian).", "entity(mark,3,4).", "attribute((mark,type),4,point).", "entity(encoding,4,5).", "attribute((encoding,channel),5,x).", "attribute((encoding,field),5,q1).", "entity(encoding,4,6).", "attribute((encoding,channel),6,y).", "attribute((encoding,field),6,q2).", "entity(encoding,4,7).", "attribute((encoding,channel),7,color).", "attribute((encoding,field),7,n).", "entity(scale,3,8).", "attribute((scale,channel),8,x).", "attribute((scale,type),8,linear).", "attribute((scale,zero),8,false).", "entity(scale,3,9).", "attribute((scale,channel),9,y).", "attribute((scale,type),9,linear).", "attribute((scale,zero),9,true).", "entity(scale,3,10).", "attribute((scale,channel),10,color).", "attribute((scale,type),10,categorical)."] 
str2 =["attribute(task,root,value).", "attribute(number_rows,root,90).", "entity(field,root,0).", "attribute((field,name),0,n).", "attribute((field,type),0,string).", "attribute((field,entropy),0,1000).", "attribute((field,unique),0,3).", "entity(field,root,1).", "attribute((field,name),1,q1).", "attribute((field,type),1,number).", "attribute((field,entropy),1,3968).", "attribute((field,unique),1,90).", "attribute((field,interesting),1,true).", "entity(field,root,2).", "attribute((field,name),2,q2).", "attribute((field,type),2,number).", "attribute((field,entropy),2,1631).", "attribute((field,unique),2,90).", "entity(view,root,3).", "attribute((view,coordinates),3,cartesian).", "entity(mark,3,4).", "attribute((mark,type),4,point).", "entity(encoding,4,5).", "attribute((encoding,channel),5,x).", "attribute((encoding,field),5,q1).", "entity(encoding,4,6).", "attribute((encoding,channel),6,y).", "attribute((encoding,field),6,q2).", "entity(encoding,4,7).", "attribute((encoding,channel),7,color).", "attribute((encoding,field),7,n).", "entity(scale,3,8).", "attribute((scale,channel),8,x).", "attribute((scale,type),8,linear).", "attribute((scale,zero),8,true).", "entity(scale,3,9).", "attribute((scale,channel),9,y).", "attribute((scale,type),9,linear).", "attribute((scale,zero),9,true).", "entity(scale,3,10).", "attribute((scale,channel),10,color).", "attribute((scale,type),10,categorical)."]

In [7]:
draco.answer_set_to_dict(convert_list_to_symbols(str1))

{'task': 'value',
 'number_rows': 90,
 'field': [{'name': 'n', 'type': 'string', 'entropy': 1000, 'unique': 3},
  {'name': 'q1',
   'type': 'number',
   'entropy': 3968,
   'unique': 90,
   'interesting': 'true'},
  {'name': 'q2', 'type': 'number', 'entropy': 1631, 'unique': 90}],
 'view': [{'coordinates': 'cartesian',
   'mark': [{'type': 'point',
     'encoding': [{'channel': 'x', 'field': 'q1'},
      {'channel': 'y', 'field': 'q2'},
      {'channel': 'color', 'field': 'n'}]}],
   'scale': [{'channel': 'x', 'type': 'linear', 'zero': 'false'},
    {'channel': 'y', 'type': 'linear', 'zero': 'true'},
    {'channel': 'color', 'type': 'categorical'}]}]}

In [8]:
draco.answer_set_to_dict(convert_list_to_symbols(str2))

{'task': 'value',
 'number_rows': 90,
 'field': [{'name': 'n', 'type': 'string', 'entropy': 1000, 'unique': 3},
  {'name': 'q1',
   'type': 'number',
   'entropy': 3968,
   'unique': 90,
   'interesting': 'true'},
  {'name': 'q2', 'type': 'number', 'entropy': 1631, 'unique': 90}],
 'view': [{'coordinates': 'cartesian',
   'mark': [{'type': 'point',
     'encoding': [{'channel': 'x', 'field': 'q1'},
      {'channel': 'y', 'field': 'q2'},
      {'channel': 'color', 'field': 'n'}]}],
   'scale': [{'channel': 'x', 'type': 'linear', 'zero': 'true'},
    {'channel': 'y', 'type': 'linear', 'zero': 'true'},
    {'channel': 'color', 'type': 'categorical'}]}]}

In [9]:
def correct_value_interesting(chart):
    chart = chart.replace("'", '"')
    try:
        chart_dict = json.loads(chart)
    except json.JSONDecodeError:
        raise ValueError("Invalid JSON format")

    # Check for 'task': 'value'
    if chart_dict.get('task') == 'value':
        # Look for the field with {'name': 'n'}
        n_found = False
        for item in chart_dict.get('field', []):
            if item.get('name') == 'n':
                n_found = True
                if 'interesting' in item:
                    del item['interesting']
                break
        
        if not n_found:
            raise ValueError("No item with {'name': 'n'} found.")

    return json.dumps(chart_dict)

In [10]:
def replace_entropy(d):
    for var_dict in d["field"]:
        if var_dict["name"] == "n":
            var_dict["entropy"] = "high"
        elif var_dict["name"] in ("q1", "q2"):
            var_dict["entropy"] = "high" if var_dict["entropy"] > 3000 else "low"
        else:
            raise ValueError("unknown variable encountered")
    return d

In [11]:
count = 1
for pair in files["kim2018_draco2.json"]:
    stu = 'kim2018'
        
    dir_path = f'./data/original_example_pairs_to_rank/{stu}'

    # Check if the directory exists, create it if it does not
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    positive_data = draco.answer_set_to_dict(convert_list_to_symbols(pair['positive']))
    negative_data = draco.answer_set_to_dict(convert_list_to_symbols(pair['negative']))
    
    # turn entropy values into high/lows
    positive_data = replace_entropy(draco.answer_set_to_dict(convert_list_to_symbols(pair['positive'])))
    negative_data = replace_entropy(draco.answer_set_to_dict(convert_list_to_symbols(pair['negative'])))

    with open(f'./data/example_pairs_to_rank/{stu}/{count}_pos_first.txt', 'w') as file:
        file.write('Chart 1: ' + correct_value_interesting(str(positive_data)) + '\n\n')
        file.write('Chart 2: ' + correct_value_interesting(str(negative_data)))
            
    with open(f'./data/example_pairs_to_rank/{stu}/{count}_neg_first.txt', 'w') as file:
        file.write('Chart 1: ' + correct_value_interesting(str(negative_data)) + '\n\n')
        file.write('Chart 2: ' + correct_value_interesting(str(positive_data)))
        
    count += 1