In [1]:
import os
import sys
cwd = os.getcwd()
parent_dir = os.path.dirname(cwd)
sys.path.append(parent_dir)

from draco import Draco
import json
import shutil
import pandas as pd
from typing import Dict, List, Any
import pickle


import numpy as np
from sklearn import svm

import plotly.figure_factory as ff
import matplotlib.pyplot as plt
import matplotlib.colors as colors

from draco.data_utils import pairs_to_vec

In [2]:
default_draco = Draco()

In [3]:
from sklearn.model_selection import cross_val_predict
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import svm
import numpy as np
import pandas as pd

np.random.seed(42)
def train_model(data: pd.DataFrame, test_size: float = 0.2, C_values: list = [0.1, 0.5, 1, 5, 10], quiet=False, model_name = ""):
    # Prepare the data
    print("*"*90)
    X = data.negative - data.positive
    size = len(X)
    y = np.ones(size)

    # Flip some of the features and labels to balance label distribution
    idx = np.random.choice([False, True], size=size, p=[0.5, 0.5])
    X[idx] = -X[idx]
    y[idx] = -y[idx]

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

    best_score = 0
    best_C = None

    # Perform 5-fold cross-validation to find the best C
    for C in C_values:
        clf = svm.LinearSVC(C=C, fit_intercept=False, max_iter=10000, dual=False)
        scores = cross_val_score(clf, X_train, y_train, cv=5)  # 5-fold CV
        avg_score = np.mean(scores)
        
        if not quiet:
            print(f"C: {C}, Avg CV Score: {avg_score}")

        if avg_score > best_score:
            best_score = avg_score
            best_C = C


    svc = svm.LinearSVC(C=best_C, fit_intercept=False, max_iter=10000, dual=False)
    clf = CalibratedClassifierCV(svc, method='sigmoid', cv=5)  # Use 5-fold CV
    clf.fit(X_train, y_train)  # X and y are your data and labels
    
    final_test_score = clf.score(X_test, y_test)
    print(f"Best C: {best_C}, Final Test Score: {final_test_score}")
    

    
    
    clf_final = svm.LinearSVC(C=best_C, fit_intercept=False, max_iter=10000, dual=False)

    clf_platt = CalibratedClassifierCV(clf_final, method='sigmoid', cv=5)
    clf_platt.fit(X, y)
    
    
    print("saving",model_name)
    with open(f'./pickled_models/{model_name}.pkl', 'wb') as file:
        pickle.dump(clf_platt, file)
        
    clf_final.fit(X, y)
    return clf_final

In [4]:
def swap_json_fields(json_file_path, ranks, model, temp):
    # Read the JSON file
    with open(json_file_path, 'r') as file:
        json_data = json.load(file)

    if len(json_data) != len(ranks):
        raise ValueError("Length of ranks does not match the number of JSON objects in the file")

    print("swapping and writing:", len(json_data), len(ranks))

    updated_json_data = []
    i = 1
    for json_obj, chart in zip(json_data, ranks):
        # Define base directories
        base_source_dir = './data/example_pairs_to_rank/kim2018'
        base_destination_dir = f'./gpt_responses/kim2018_for_reader/{model}_{temp}'

        # Check for and create directories if needed
        if chart in ["chart 2", "chart 1", "conflicting"]:
            subfolder = "disagree" if chart == "chart 2" else "agree" if chart == "chart 1" else "conflict"
            destination_dir = os.path.join(base_destination_dir, subfolder)
            if not os.path.exists(destination_dir):
                os.makedirs(destination_dir)

        # Perform file operations based on chart type
        if chart == "chart 2":
            source_file = f'{base_source_dir}/{i}_neg_first.txt'
            destination_file = f'{destination_dir}/{i}.txt'
            shutil.copyfile(source_file, destination_file)
            json_obj['positive'], json_obj['negative'] = json_obj['negative'], json_obj['positive']
        elif chart == "chart 1":
            source_file = f'{base_source_dir}/{i}_pos_first.txt'
            destination_file = f'{destination_dir}/{i}.txt'
            shutil.copyfile(source_file, destination_file)
        else:
            if chart != "conflicting":
                print(f"Invalid value in ranks: {chart}")
            source_file = f'{base_source_dir}/{i}_pos_first.txt'
            destination_file = f'{destination_dir}/{i}.txt'
            shutil.copyfile(source_file, destination_file)
            i += 1  # Skip the rest of the loop and continue with the next iteration
            continue
       
        updated_json_data.append(json_obj)
        i += 1

    print("final i", i)
    return updated_json_data


In [5]:
def write_GPT_training_data(model, temp, res_kim, json_file_path='./data/kim2018_draco2.json'):
    
    updated_json = swap_json_fields(json_file_path, res_kim, model, temp)
    directory = os.path.dirname(json_file_path)
    new_file_path = os.path.join(directory, f'modified_kim_{model}_{temp}.json')
    with open(new_file_path, 'w') as file:
        json.dump(updated_json, file, indent = 4)


In [6]:
# write difference vector to agree/disagree/conflict and prepare training data for weights
def train_data_prep(model, temp):
    kim = {}

    with open(f'./data/modified_kim_{model}_{temp}.json') as file:

        i = 0
        json_data = json.load(file)
        print(len(json_data))
        for pair in json_data:
            pair["source"] = "kim"
            pair["pair_id"] = f'{pair["source"]}_{index_map[i]}'
            kim[pair["pair_id"]] = pair
            i += 1

    combined = kim 
    
    baseline_train_data = pairs_to_vec(combined)
    diff = baseline_train_data.positive - baseline_train_data.negative

    for index, row in diff.iterrows():
        non_zero_columns = row[row != 0]
        number_index = index.rsplit('_', 1)[-1]
        if_agree = None
        outcome = ''
        if combined_list[int(number_index)] == 'chart 1':
            outcome = 'agree'
        elif combined_list[int(number_index)] == 'chart 2':
            outcome = 'disagree'
        else:
            raise ValueError("error")
        with open(f'./gpt_responses/kim2018_for_reader/{model}_{temp}/{outcome}/{int(number_index)+1}.txt', 'a') as file:
            file.write('\n\n' + non_zero_columns.to_string())
        
    
    diff.to_csv(f"./gpt_responses/kim2018_for_reader/{model}_{temp}/pos-neg.csv")
    
    
    assert set(baseline_train_data.negative.columns) == set(
        default_draco.soft_constraint_names
    ), "Feature names do not match."
    clf = train_model(baseline_train_data, model_name = model)

    features = baseline_train_data.negative.columns
    print(features)
    baseline_weights = {}
    
    
    

    with open(f'./weights/DracoGPT_{model}_{temp}_weights.txt', 'w') as fout:
        for feature, weight in zip(features, clf.coef_[0]):
            baseline_weights[f"{feature}_weight"] = int(weight * 1000)
            fout.write(f"#const {feature}_weight = {int(weight * 1000)}.\n")
    
    
            
            
            
    '''
    Treat conflicting pairs: writes diff feature vectors to chart pairs but doesn't train
    '''
    combined = {}
    with open('./data/kim2018_draco2.json') as file:
        
        i = 1
        json_data = json.load(file)
        print(len(json_data),len(combined_list))
        for (pair, outcome) in zip(json_data, combined_list):
            if outcome == "conflicting":
                pair["source"] = "kim"
                pair["pair_id"] = f'{pair["source"]}_{i}'
                combined[pair["pair_id"]] = pair
            i += 1
    
    baseline_train_data = pairs_to_vec(combined)
    diff = baseline_train_data.positive - baseline_train_data.negative
    for index, row in diff.iterrows():
        non_zero_columns = row[row != 0]
        number_index = index.rsplit('_', 1)[-1]
        if_agree = None
        outcome = ''
        with open(f'./gpt_responses/kim2018_for_reader/{model}_{temp}/conflict/{number_index}.txt', 'a') as file:
            file.write('\n\n' + non_zero_columns.to_string())





In [7]:
def load_data(model, temp):
    kim_file_path = f'./gpt_responses/kim2018_{model}_{temp}_ranking.pkl'

    # Load the data from kim_GPT.pkl
    with open(kim_file_path, 'rb') as file:
        res_kim = pickle.load(file)


    # Combine the lists
    combined_list = res_kim
    return combined_list, res_kim

In [8]:
'''
Now we select the subset of data used to train dracogpt
'''

def filter_data(json_file_path, ranks):
    # Read the JSON file
    with open(json_file_path, 'r') as file:
        json_data = json.load(file)

    if len(json_data) != len(ranks):
        raise ValueError("Length of ranks does not match the number of JSON objects in the file")

    updated_json_data = []
    for json_obj, chart in zip(json_data, ranks):
        if chart == "chart 2" or chart == 'chart 1':
            updated_json_data.append(json_obj)
        else:
            continue
    print(len(updated_json_data))
    return updated_json_data


def write_draco_subset_training_data(model, temp):
    json_file_path = './data/kim2018_draco2.json'
    updated_json = filter_data(json_file_path, res_kim)
    directory = os.path.dirname(json_file_path)
    new_file_path = os.path.join(directory, f'subset_{model}_{temp}_kim.json')
    with open(new_file_path, 'w') as file:
        json.dump(updated_json, file, indent = 4)
        

def write_subset_weights(model, temp):
    kim = {}
    with open(f'./data/subset_{model}_{temp}_kim.json') as file:

        i = 0
        json_data = json.load(file)

        for pair in json_data:
            pair["source"] = "kim"
            pair["pair_id"] = f'{pair["source"]}_{i}'
            kim[pair["pair_id"]] = pair
            i += 1

    combined = kim #| saket

    baseline_train_data = pairs_to_vec(combined)
    print(len(baseline_train_data))
    clf = train_model(baseline_train_data, model_name = "human_"+model)

    features = baseline_train_data.negative.columns
    baseline_weights = {}

    with open(f'./weights/{model}_{temp}_subset_weights.txt', 'w') as fout:
        for feature, weight in zip(features, clf.coef_[0]):
            baseline_weights[f"{feature}_weight"] = int(weight * 1000)
            fout.write(f"#const {feature}_weight = {int(weight * 1000)}.\n")
    

In [9]:
res_kim = None
combined_list, res_kim = load_data('gpt-4-0125-preview', 0)
# print(combined_list)

# Count occurrences
count_chart_1 = combined_list.count('chart 1')
count_chart_2 = combined_list.count('chart 2')
count_conflicting = combined_list.count('conflicting')

# Calculate total number of items for percentage calculation
total_items = len(combined_list)

# Calculate percentages
percentage_chart_1 = (count_chart_1 / total_items) * 100
percentage_chart_2 = (count_chart_2 / total_items) * 100
percentage_conflicting = (count_conflicting / total_items) * 100

# Print results
print(f"Percentage of 'chart 1': {percentage_chart_1}%")
print(f"Percentage of 'chart 2': {percentage_chart_2}%")
print(f"Percentage of 'conflicting': {percentage_conflicting}%")

write_GPT_training_data('gpt-4-0125-preview', 0, res_kim)



filtered_combined_list = [element for element in combined_list if element in ['chart 1', 'chart 2']]
index_map = {}
filtered_index = 0

for original_index, value in enumerate(combined_list):
    if value in ["chart 1", "chart 2"]:
        index_map[filtered_index] = original_index
        filtered_index += 1


print(f"Training DracoGPT on gpt-4-0125-preview and temp 0")
train_data_prep('gpt-4-0125-preview', 0)
write_draco_subset_training_data('gpt-4-0125-preview', 0)
write_subset_weights('gpt-4-0125-preview', 0)

Percentage of 'chart 1': 63.541666666666664%
Percentage of 'chart 2': 13.368055555555555%
Percentage of 'conflicting': 23.09027777777778%
swapping and writing: 1152 1152


INFO:draco.data_utils:Running 89 partitions of 886 items in parallel on {processes} processes.


final i 1153
Training DracoGPT on gpt-4-0125-preview and temp 0
886


  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.co

******************************************************************************************
C: 0.1, Avg CV Score: 0.9872440315652782
C: 0.5, Avg CV Score: 0.9872440315652782
C: 1, Avg CV Score: 0.9872440315652782
C: 5, Avg CV Score: 0.9886624712815901


INFO:draco.data_utils:Running 27 partitions of 266 items in parallel on {processes} processes.


C: 10, Avg CV Score: 0.9886624712815901
Best C: 5, Final Test Score: 0.9943820224719101
saving gpt-4-0125-preview
Index(['aggregate', 'aggregate_count', 'aggregate_group_by_raw',
       'aggregate_max', 'aggregate_mean', 'aggregate_median', 'aggregate_min',
       'aggregate_no_discrete', 'aggregate_stdev', 'aggregate_sum',
       ...
       'value_line', 'value_point', 'value_rect', 'value_text', 'value_tick',
       'x_col', 'x_row', 'x_y_raw', 'y_col', 'y_row'],
      dtype='object', length=147)
1152 1152


  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
INFO:draco.data_utils:Hash of dataframe: 260272198256654557
INFO:draco.data_utils:

886


  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.co

886
******************************************************************************************
C: 0.1, Avg CV Score: 0.947717510738188
C: 0.5, Avg CV Score: 0.9477274997502747
C: 1, Avg CV Score: 0.9477274997502747
C: 5, Avg CV Score: 0.9449006093297372
C: 10, Avg CV Score: 0.9449006093297372
Best C: 0.5, Final Test Score: 0.9606741573033708
saving human_gpt-4-0125-preview


In [10]:
res_kim = None
combined_list, res_kim = load_data('gpt-4-0613', 0)
# print(combined_list)

# Count occurrences
count_chart_1 = combined_list.count('chart 1')
count_chart_2 = combined_list.count('chart 2')
count_conflicting = combined_list.count('conflicting')

# Calculate total number of items for percentage calculation
total_items = len(combined_list)

# Calculate percentages
percentage_chart_1 = (count_chart_1 / total_items) * 100
percentage_chart_2 = (count_chart_2 / total_items) * 100
percentage_conflicting = (count_conflicting / total_items) * 100

# Print results
print(f"Percentage of 'chart 1': {percentage_chart_1}%")
print(f"Percentage of 'chart 2': {percentage_chart_2}%")
print(f"Percentage of 'conflicting': {percentage_conflicting}%")

write_GPT_training_data('gpt-4-0613', 0, res_kim)



filtered_combined_list = [element for element in combined_list if element in ['chart 1', 'chart 2']]
index_map = {}
filtered_index = 0

for original_index, value in enumerate(combined_list):
    if value in ["chart 1", "chart 2"]:
        index_map[filtered_index] = original_index
        filtered_index += 1


print(f"Training DracoGPT on gpt-4-0613 and temp 0")
train_data_prep('gpt-4-0613', 0)
write_draco_subset_training_data('gpt-4-0613', 0)
write_subset_weights('gpt-4-0613', 0)

Percentage of 'chart 1': 56.510416666666664%
Percentage of 'chart 2': 14.930555555555555%
Percentage of 'conflicting': 28.55902777777778%
swapping and writing: 1152 1152


INFO:draco.data_utils:Running 83 partitions of 823 items in parallel on {processes} processes.


final i 1153
Training DracoGPT on gpt-4-0613 and temp 0
823


  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.co

******************************************************************************************
C: 0.1, Avg CV Score: 0.9908859588248902
C: 0.5, Avg CV Score: 0.9939393939393939
C: 1, Avg CV Score: 0.9939393939393939
C: 5, Avg CV Score: 0.9863405042794356


INFO:draco.data_utils:Running 33 partitions of 329 items in parallel on {processes} processes.


C: 10, Avg CV Score: 0.9848137867221837
Best C: 0.5, Final Test Score: 0.9939393939393939
saving gpt-4-0613
Index(['aggregate', 'aggregate_count', 'aggregate_group_by_raw',
       'aggregate_max', 'aggregate_mean', 'aggregate_median', 'aggregate_min',
       'aggregate_no_discrete', 'aggregate_stdev', 'aggregate_sum',
       ...
       'value_line', 'value_point', 'value_rect', 'value_text', 'value_tick',
       'x_col', 'x_row', 'x_y_raw', 'y_col', 'y_row'],
      dtype='object', length=147)
1152 1152


  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.co

823


  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.co

823
******************************************************************************************
C: 0.1, Avg CV Score: 0.954383529956049
C: 0.5, Avg CV Score: 0.9543950959981494
C: 1, Avg CV Score: 0.9528683784408974
C: 5, Avg CV Score: 0.9528683784408974
C: 10, Avg CV Score: 0.9528683784408974
Best C: 0.5, Final Test Score: 0.9454545454545454
saving human_gpt-4-0613


In [11]:
res_kim = None
combined_list, res_kim = load_data('gpt-3.5-turbo-0125', 0)
# print(combined_list)

# Count occurrences
count_chart_1 = combined_list.count('chart 1')
count_chart_2 = combined_list.count('chart 2')
count_conflicting = combined_list.count('conflicting')

# Calculate total number of items for percentage calculation
total_items = len(combined_list)

# Calculate percentages
percentage_chart_1 = (count_chart_1 / total_items) * 100
percentage_chart_2 = (count_chart_2 / total_items) * 100
percentage_conflicting = (count_conflicting / total_items) * 100

# Print results
print(f"Percentage of 'chart 1': {percentage_chart_1}%")
print(f"Percentage of 'chart 2': {percentage_chart_2}%")
print(f"Percentage of 'conflicting': {percentage_conflicting}%")

write_GPT_training_data('gpt-3.5-turbo-0125', 0, res_kim)



filtered_combined_list = [element for element in combined_list if element in ['chart 1', 'chart 2']]
index_map = {}
filtered_index = 0

for original_index, value in enumerate(combined_list):
    if value in ["chart 1", "chart 2"]:
        index_map[filtered_index] = original_index
        filtered_index += 1


print(f"Training DracoGPT on gpt-3.5 and temp 0")
train_data_prep('gpt-3.5-turbo-0125', 0)
write_draco_subset_training_data('gpt-3.5-turbo-0125', 0)
write_subset_weights('gpt-3.5-turbo-0125', 0)

Percentage of 'chart 1': 9.98263888888889%
Percentage of 'chart 2': 17.53472222222222%
Percentage of 'conflicting': 72.48263888888889%
swapping and writing: 1152 1152


INFO:draco.data_utils:Running 32 partitions of 317 items in parallel on {processes} processes.


final i 1153
Training DracoGPT on gpt-3.5 and temp 0
317


  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.co

******************************************************************************************
C: 0.1, Avg CV Score: 0.976392156862745
C: 0.5, Avg CV Score: 0.9723921568627449
C: 1, Avg CV Score: 0.9723921568627449
C: 5, Avg CV Score: 0.9723921568627449
C: 10, Avg CV Score: 0.9723921568627449
Best C: 0.1, Final Test Score: 0.96875


INFO:draco.data_utils:Running 84 partitions of 835 items in parallel on {processes} processes.


saving gpt-3.5-turbo-0125
Index(['aggregate', 'aggregate_count', 'aggregate_group_by_raw',
       'aggregate_max', 'aggregate_mean', 'aggregate_median', 'aggregate_min',
       'aggregate_no_discrete', 'aggregate_stdev', 'aggregate_sum',
       ...
       'value_line', 'value_point', 'value_rect', 'value_text', 'value_tick',
       'x_col', 'x_row', 'x_y_raw', 'y_col', 'y_row'],
      dtype='object', length=147)
1152 1152


  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.co

317


  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.co

317
******************************************************************************************
C: 0.1, Avg CV Score: 0.8932549019607843
C: 0.5, Avg CV Score: 0.8774117647058823
C: 1, Avg CV Score: 0.8774117647058823
C: 5, Avg CV Score: 0.8734117647058823
C: 10, Avg CV Score: 0.8734117647058823
Best C: 0.1, Final Test Score: 0.953125
saving human_gpt-3.5-turbo-0125


In [12]:
kim = {}


with open('./data/kim2018_draco2.json') as file:

    i = 0
    json_data = json.load(file)

    for pair in json_data:
        pair["source"] = "kim"
        pair["pair_id"] = f'{pair["source"]}_{i}'
        kim[pair["pair_id"]] = pair
        i += 1

combined = kim 
    
baseline_train_data = pairs_to_vec(combined)
print(baseline_train_data.shape)

clf = train_model(baseline_train_data, model_name = "original_kim")

features = baseline_train_data.negative.columns
baseline_weights = {}

with open('./weights/original_weights.txt', 'w') as fout:
    for feature, weight in zip(features, clf.coef_[0]):
        baseline_weights[f"{feature}_weight"] = int(weight * 1000)
        fout.write(f"#const {feature}_weight = {int(weight * 1000)}.\n")


INFO:draco.data_utils:Running 116 partitions of 1152 items in parallel on {processes} processes.
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(dfs).fillna(0)
  return pd.concat(

(1152, 296)
******************************************************************************************
C: 0.1, Avg CV Score: 0.9380904817861341
C: 0.5, Avg CV Score: 0.9359106933019976
C: 1, Avg CV Score: 0.9369976498237369
C: 5, Avg CV Score: 0.9359106933019976
C: 10, Avg CV Score: 0.9359106933019976
Best C: 0.1, Final Test Score: 0.9393939393939394
saving original_kim
