In [1]:
import os
import yaml
from itertools import product

In [2]:
BASE_CONFIG_PATH = 'configs/mini_movies_config.yaml'

In [23]:
BASE_CONFIG_PATH = 'experiments/jan_16_value_query_ucb20/config.yaml'

In [24]:
def deep_update(original, update):
    """Deep update original dict with values from the update dict."""
    for key, value in update.items():
        if isinstance(value, dict):
            original[key] = deep_update(original.get(key, {}), value)
        else:
            original[key] = value
    return original

def save_config(config, directory, experiment_name):
    full_path = os.path.join(directory, experiment_name)
    os.makedirs(full_path, exist_ok=True)
    with open(os.path.join(full_path, "config.yaml"), 'w') as f:
        yaml.dump(config, f)


In [25]:
def generate_experiment_name(params):
    # Convert each parameter to a string of the form "paramName_paramValue"
    # and join them all with underscores
    return "_".join([f"{param}_{value}" for param, value in params.items()])

In [26]:
def save_base_config(base_config, experiment_set):
    # Update the master_path and dir keys
    base_config['paths']['eval_results_master_path'] = f"experiments/{experiment_set}/eval_results.csv"
    base_config['paths']['experiment_dir'] = f"experiments/{experiment_set}"
    
    # Write the updated base config to the experiment set directory
    with open(f"experiments/{experiment_set}/config.yaml", 'w') as f:
        yaml.dump(base_config, f)


In [31]:
# Define the mapping from parameter values to specific config changes
value_mapping = {
        'data': {
            'shapes': {'data': {'data_path': 'data/shapes.json'}},
            'movies_old': {'data': {'data_path': 'data/movielens_16.json'}},
            'clothing': {'data': {'data_path': 'data/clothing.json'}},
            'movies_old_subset': {'data': {'data_path': 'data/movielens_16_trimmed.json'}},
            'movies_16': {'data': {'data_path': 'data/ml25M_16_movie_sample.json'}},
            'movies_100': {'data': {'data_path': 'data/ml25M_100_movie_sample.json'}},
            'movies_50': {'data': {'data_path': 'data/ml25M_50_movie_sample.json'}}
        },
        'usr': {
            'shapes': {'data': {'user_path': 'data/users/shapes_qrel.tsv'}},
            'movies': {'data': {'user_path': 'data/users/movielens_16_qrel.tsv'}},
            'clothing': {'data': {'user_path': 'data/users/clothing_qrel.tsv'}},
            'movies_16_5': {'data': {'user_path': 'data/users/movielens_16_5_users.tsv'}},
            'movies_100_3': {'data': {'user_path': 'data/users/movielens_100_3_users.tsv'}},
            'movies_100_5': {'data': {'user_path': 'data/users/movielens_100_5_users.tsv'}},
            'movies_100_50': {'data': {'user_path': 'data/users/movielens_100_50_users.tsv'}},
            'movies_50_50': {'data': {'user_path': 'data/users/movielens_50_50_users.tsv'}}
        },
        'turns': {
            3: {'dialogue_sim': {'num_turns': 3}},
            5: {'dialogue_sim': {'num_turns': 5}},
            10: {'dialogue_sim': {'num_turns': 10}},
            15: {'dialogue_sim': {'num_turns': 15}},
            20: {'dialogue_sim': {'num_turns': 20}}
        },
        'llm_temp': {
            0: {'llm': {'temperature': 0}},
            1: {'llm': {'temperature': 1}},
        },
        'pe': {
            'DT': {'pe': {'pe_module_name': 'DT'}},
            'MonoLLM': {'pe': {'pe_module_name': 'MonoLLM'}},
        },
        'resp_upd': {
            'individ': {'pe': {'response_update': 'individual'}},
            'concat': {'pe': {'response_update': 'concat'}},
        },
        'n_rec': {
            1: {'pe': {'num_recs': 1}},
            5: {'pe': {'num_recs': 5}},
            10: {'pe': {'num_recs': 10}},
            20: {'pe': {'num_recs': 20}}
        },
        'selction': {
            'greedy': {'query': {'item_selection': 'greedy'}},
            'random': {'query': {'item_selection': 'random'}},
            'entropy_reduction': {'query': {'item_selection': 'entropy_reduction'}},
            'ucb': {'query': {'item_selection': 'ucb'}},
            'thompson': {'query': {'item_selection': 'thompson'}},
        },        
        'preproc': {
            'aspect_value': {'item_scoring': {'history_preprocessor_name': 'AspectValuePreprocessor'}},
            'aspect_key_value': {'item_scoring': {'history_preprocessor_name': 'AspectKVPreprocessor'}},
            'concat': {'item_scoring': {'history_preprocessor_name': 'HistoryConcatenator'}},
        },
        'scorer': {
            'mnli': {'item_scoring': {'item_scorer_name': 'MNLIScorer'}},
            'ce': {'item_scoring': {'item_scorer_name': 'CEScorer'}},
        },
        'mnli_temp': {
            1: {'item_scoring': {'mnli_temp': 1}},
            2: {'item_scoring': {'mnli_temp': 2}},
            4: {'item_scoring': {'mnli_temp': 4}},
            5: {'item_scoring': {'mnli_temp': 5}},
            10: {'item_scoring': {'mnli_temp': 10}},
            20: {'item_scoring': {'mnli_temp': 20}},
            50: {'item_scoring': {'mnli_temp': 50}},
            100: {'item_scoring': {'mnli_temp': 100}},
        },
        'llm': {
            '1106': {'llm': {'model': "gpt-3.5-turbo-1106",
                            'llm_name': 'GPTChatCompletion'}},
            '613': {'llm': {'model': "gpt-3.5-turbo-613",
                            'llm_name': 'GPTChatCompletion'}},
            'turbo-instruct': {'llm': {'model': "gpt-3.5-turbo-instruct",
                                       'llm_name': 'GPTCompletion'}},
        },
        'asp_prpt': {
            'davidFS2_kv': {'query': {'aspect_gen_template': "pointwise_aspect_kv_generation.jinja2", 'aspect_extraction':'key_val'}
            },'antonFS3_kv':{'query': {'aspect_gen_template': "pointwise_aspect_kv_generation_movies_3FS.jinja2", 'aspect_extraction':'key_val'}
            },'FS3_v':{'query': {'aspect_gen_template': "pointwise_aspect_v_generation_movies_3FS_3words.jinja2", 'aspect_extraction':'val'}
            }
        },
        'noise': {
            0: {'dialogue_sim' : {'response_noise': 0}},
            0.1: {'dialogue_sim' : {'response_noise': 0.1}},
            0.25: {'dialogue_sim' : {'response_noise': 0.25}},
            0.5: {'dialogue_sim' : {'response_noise': 0.5}},
        },
        'qry_prpt' : {
            'kv': {'query': {'query_gen_template': "pointwise_query_generation.jinja2"}},
            'v': {'query': {'query_gen_template': "pointwise_query_generation_v.jinja2"}}
        }
}

In [32]:
# Define the parameter grid
param_grid = {
        #'data': ['movies_16'],
        #'usr': ['movies_16_5'],
        #'turns': [10],
        'llm_temp': [0,1],
        'qry_prpt': ['kv','v']
        #'pe': ['DT'],
        #'resp_upd': ['individ'],
        #'n_rec': [10],
        # 'noise': [0, 0.1, 0.25, 0.5],
        #'selection': ['ucb'], #['greedy','ucb','random','entropy_reduction'],
        #'preproc': ['aspect_value'],
        #'scorer': ['mnli'],
        #'mnli_temp': [1, 20],
        # 'asp_prpt': ['davidFS2_kv'] #['davidFS2_kv','antonFS3_kv','FS3_v']
        #'llm': ['turbo'] # 'turbo' 'turbo-instruct'
    }

# NOTE: Current eval_manager can't handle multiple user ground truth files


In [33]:
#experiment set name:
experiment_set = f'jan_16_value_query_ucb20'

In [34]:

# Load the base config file
with open(BASE_CONFIG_PATH, 'r') as f:
    base_config = yaml.safe_load(f)

# Generate and save config files for each combination
for idx, param_values in enumerate(product(*param_grid.values())):
        param_values_dict = dict(zip(param_grid.keys(), param_values))
        #print(param_values_dict)
        experiment_name = generate_experiment_name(param_values_dict)
        updated_config = yaml.safe_load(yaml.dump(base_config))  # deep copy

        # Apply updates to the config based on the current parameter values
        for param, value in param_values_dict.items():
            if param in value_mapping and value in value_mapping[param]:
                deep_update(updated_config, value_mapping[param][value])
            else:
                print(f"No mapping found for parameter '{param}' with value '{value}'")

        # Add logging file
        updated_config['logging']['log_file'] = os.path.join(f'experiments/{experiment_set}', experiment_name, "output.log")

        save_config(updated_config, f'experiments/{experiment_set}', experiment_name)

# Create a copy of param_grid with only the single item lists and do a deep update
new_base_config = yaml.safe_load(yaml.dump(base_config))  # deep copy
for param_key, param_value in param_grid.items():
    if len(param_value) == 1: # If there is only a single value for this param, change the base config to have it
        if param in value_mapping and value in value_mapping[param]:
            deep_update(new_base_config, value_mapping[param_key][param_value[0]])
         

#saves base config for evaluation purposes        
save_base_config(new_base_config, experiment_set)

In [35]:
len(list(product(*param_grid.values())))

4