In [None]:
from typing import Dict, List, Any, Union, Optional
import os
from collections import defaultdict
import numpy as np

from matplotlib import pyplot as plt
import matplotlib
from copy import deepcopy
import math


def measure_cov(data: List[Union[int, float]], cov_type: str) -> np.float64:
    """
    Given a list of integers, measure the co-efficient of variation (COV)
    for this list.

    Simple estimate:
        COV estimate = sample standard deviation / sample mean

    Unbiased estimate, assuming distribution of COVs is normal:
        COV estimate = (1 + 1/4n) * sample standard deviation / sample mean

    Input:
        data (List[Union[int, float]]):
            List of data points

        cov_type (str):
            Type of the COV estimator.
            Currently supported types: "simple" and "unbiased_normal"

    Output:
        cov (np.float64):
            Estimate of the co-efficient of variation
    """
    mean = np.mean(data)
    std = np.std(data, ddof=1)

    if cov_type == "simple":
        cov = std / mean

    elif cov_type == "unbiased_normal":
        cov = (1.0 + 1.0 / (4 * len(data))) * (std / mean)

    else:
        raise ValueError(f"Given cov type {cov_type} not supported.")

    return cov


def get_statistics_for_already_sampled_data(
    alread_sampled_data: Optional[Dict[str, Any]],
    cov_type: str,
    game_scenario_categories: Dict[str, str],
) -> Dict[str, Any]:
    """
    Given a directory containing collected trajectories,
    this function calculates the COVs and trajectories per category.

    Input:
        data_dir (str):
            Directory containing previously collected trajectories,
            based on which we will estimate the COVs per category

            NOTE: data must be stored in json files, and in the format
            that rest of this codebase uses

        cov_type (str):
            Type of the COV estimator.
            Currently supported types: "simple" and "unbiased_normal"

        game_scenario_categories (Dict[str, str]):
            A dictionary mapping game scenario (eg., the topic to guess in 20 questions)
                to the game category (eg., easy, medium, hard)

    Output:
        A dictionary of the following format:
        {
            "samples_by_category": samples_by_category,
            "cov_by_category": cov_by_category,
        }

        samples_by_category (defaultdict):
            A dictionary mapping categories (e.g., easy, medium, hard) to
            list of goals sampled for that particular category

        cov_by_category (defaultdict):
            A dictionary mapping categories (e.g., easy, medium, hard) to
            list of covs, where covs are calculated by goals within that category
    """
    cov_by_category = defaultdict(list)
    samples_by_category = defaultdict(list)

    for record in alread_sampled_data:
        num_turn_data = []
        scenario = None

        # we assume all data within a single record comes from the same scenario
        for trial in record:
            if scenario is None:
                scenario = trial["env_game_scenario"]

            # Only take valid data
            if trial["judge_label"]:
                num_turn_data.append(trial["num_turns"])

            else:
                num_turn_data.append(trial["max_turns"])

        assert scenario is not None

        scenario_category = game_scenario_categories[scenario]
        samples_by_category[scenario_category].append(scenario)

        if len(num_turn_data) > 1:
            cov = measure_cov(data=num_turn_data, cov_type=cov_type)
            cov_by_category[scenario_category].append(cov)

    return {
        "samples_by_category": samples_by_category,
        "cov_by_category": cov_by_category,
    }

In [2]:
from typing import Dict, List, Any, Optional, Tuple
from collections import defaultdict
import numpy as np
import os

from llm_exploration.game import GameEnvironment
from llm_exploration.utils.data_utils import read_json, write_json
from llm_exploration.utils.torch_utils import set_seed_everywhere


def load_data_pool(pool_dir):
    all_files = os.listdir(pool_dir)
    all_data = {}

    for file in all_files:
        if file.endswith(".json"):
            data = read_json(os.path.join(pool_dir, file))

            for record in data["records"]:
                env_game_scenario = None
                for trial in record:
                    if env_game_scenario is None:
                        env_game_scenario = trial["env_game_scenario"]

                    all_data[env_game_scenario] = record
                    break

    return all_data


class Curriculum:
    """
    Base class for our curriculum generation.
    """

    def __init__(
        self,
        game_env: GameEnvironment,
        game_scenario_categories: Dict[str, str],
        game_config: Dict[str, Any],
        cov_type: str,
        sample_with_replacement: bool,
        data_pool_to_sample_from: str,
        alpha: float,
    ):
        """
        Input:
            game_env (GameEnvironment):
                Particular game environment that we are considering
                NOTE: when we move to multiple environments, this may need a code rewrite

            game_scenario_categories (Dict[str, str]):
                A dictionary mapping game scenario (eg., the topic to guess in 20 questions)
                to the game category (eg., easy, medium, hard)

            game_config (Dict[str, Any]):
                The config for retrieving the data. Usually would contain
                "data_type" (e.g., train, eval),
                and "data_subtype" (e.g., "easy", "medium", "hard")

            cov_type (str):
                The type for the COV estimator. For example, "simple" or "unbiased_normal"

            sample_with_replacement (bool):
                Whether to sample the game scenario from the category with replacement or
                not.

            data_pool_to_sample_from (str):
                We collect data from all possible questions first, and sample from this
                dataset to simulate online sampling

            alpha (float):
                Alpha parameter for UCB
        """
        self.game_env = game_env
        self.game_scenarios: List[Dict[str, str]] = self.game_env.get_game_scenarios(
            config=game_config,
        )
        self.game_scenario_categories = game_scenario_categories
        self.cov_type = cov_type

        category_to_scenario_map = defaultdict(set)
        game_scenario_to_index_map = {}

        for index in range(len(self.game_scenarios)):
            scenario = self.game_scenarios[index]["env"]
            if scenario not in self.game_scenario_categories:
                raise ValueError(f"Given scenario {scenario} not found in category map.")

            scenario_category = self.game_scenario_categories[scenario]
            category_to_scenario_map[scenario_category].add(scenario)
            game_scenario_to_index_map[scenario] = index

        self.category_to_scenario_map = category_to_scenario_map
        self.game_scenario_to_index_map = game_scenario_to_index_map
        self.sample_with_replacement = sample_with_replacement

        self.data_pool = load_data_pool(
            pool_dir=data_pool_to_sample_from,
        )

        self.alpha = alpha

        self.frequency_of_sampled_category = defaultdict(int)
        self.total_reward_of_sampled_category = defaultdict(int)

    def print_statistics_of_newly_sampled_data(
        self,
        chosen_categories_stats: defaultdict,
    ) -> None:
        """
        Helper function to print statistics of the newly sampled batch.
        Prints the number of goals sampled per category

        Input:
            chosen_categories_stats (defaultdict):
                A dictionary from category to number of samples in the new batch for
                that category

        Output:
            None
        """
        print("\nPer Category Chosen Data: ")
        for category in chosen_categories_stats:
            print("Category: ", category, "Num samples: ", chosen_categories_stats[category])
        print()

    def generate_next_batch(
        self,
        curr_samples: Optional[Any],
        batch_size: int,
        sampling_type: str,
    ):
        """
        Generates a new batch of topics/game scenarios to generate trajectories from.
        Returns the indices of the sampled game scenarios
        """
        assert sampling_type in ["curriculum", "uniform"]

        edges = np.linspace(0, 1, 11)

        category_frequencies = {}
        total_frequency = 0
        for category in self.category_to_scenario_map:
            category_frequencies[category] = len(self.category_to_scenario_map[category])
            total_frequency += len(self.category_to_scenario_map[category])

        for category in self.category_to_scenario_map:
            category_frequencies[category] = category_frequencies[category] / total_frequency

        sampled_data = []
        chosen_categories_stats = defaultdict(int)

        if curr_samples is not None:
            current_statistics = get_statistics_for_already_sampled_data(
                alread_sampled_data=curr_samples,
                game_scenario_categories=self.game_scenario_categories,
                cov_type=self.cov_type,
            )

            cov_by_category = current_statistics["cov_by_category"]

            for category in cov_by_category:
                for cov in cov_by_category[category]:
                    self.frequency_of_sampled_category[category] += 1
                    self.total_reward_of_sampled_category[category] += cov

        for _ in range(batch_size):
            if sampling_type == "uniform":
                all_categories = [category for category in self.category_to_scenario_map]
                probs = [category_frequencies[category] for category in all_categories]

                sampled_category = np.random.choice(
                    a=all_categories,
                    size=None,
                    p=probs,
                )
                max_category_game_scenarios = list(
                    self.category_to_scenario_map[sampled_category]
                )
                chosen_game_scenario = np.random.choice(
                    a=max_category_game_scenarios,
                    size=None,
                )

                chosen_categories_stats[sampled_category] += 1
                sampled_data.append(self.data_pool[chosen_game_scenario])

            elif sampling_type == "curriculum":
                category_choices = []
                for category in self.category_to_scenario_map:
                    if self.frequency_of_sampled_category[category] == 0:
                        sample = float("inf")

                    else:
                        average_reward = (
                            self.total_reward_of_sampled_category[category]
                            / self.frequency_of_sampled_category[category]
                        )
                        total_counts = sum(
                            [
                                self.frequency_of_sampled_category[category]
                                for category in self.frequency_of_sampled_category
                            ]
                        )

                        exploration_bonus = self.alpha * math.sqrt(
                            (2 * math.log(total_counts))
                            / float(self.frequency_of_sampled_category[category])
                        )

                        sample = average_reward + exploration_bonus

                    category_choices.append((category, sample))

                # take the argmax
                category_choices = sorted(category_choices, key=lambda x: x[1])

                # in case some category is exhausted of samples,
                # we pick data from the next best category
                most_learnable_category_index = len(category_choices) - 1
                found_new_sample = False

                if self.sample_with_replacement:
                    max_category = category_choices[most_learnable_category_index][0]

                    max_category_game_scenarios = list(
                        self.category_to_scenario_map[max_category]
                    )

                    chosen_game_scenario = np.random.choice(
                        a=max_category_game_scenarios,
                        size=None,
                    )
                    chosen_categories_stats[max_category] += 1

                    sampled_data.append(self.data_pool[chosen_game_scenario])

                else:
                    while most_learnable_category_index >= 0 and not found_new_sample:
                        max_category = category_choices[most_learnable_category_index][0]

                        # Choose a sample from the chosen category, uniformly at random
                        # that has not been chosen yet
                        left_over_game_scenarios = list(
                            set(self.category_to_scenario_map[max_category])
                            - set(sample_by_category[max_category])
                        )

                        # we sample from the next most learnable category
                        if len(left_over_game_scenarios) == 0:
                            most_learnable_category_index -= 1

                        else:
                            chosen_game_scenario = np.random.choice(
                                a=left_over_game_scenarios,
                                size=None,
                            )

                            sample_by_category[max_category].append(chosen_game_scenario)
                            sampled_data.append(self.data_pool[chosen_game_scenario])
                            chosen_categories_stats[max_category] += 1

                            found_new_sample = True

                    if not found_new_sample:
                        print("Dataset is exhausted, can't sample more without replacement.")
                        break

        return sampled_data, chosen_categories_stats

Could not load the GameSimulator, so cannot use it!


In [3]:
def simulate_online_sampling(
    game_env: GameEnvironment,
    game_scenario_categories: Dict[str, str],
    game_config: Dict[str, Any],
    cov_type: str,
    sample_with_replacement: bool,
    data_pool_to_sample_from: str,
    alpha: float,
    timesteps: int,
    batch_size: int,
    sampling_type: str,
):
    category_frequencies_over_timestep = defaultdict(list)
    sampled_data_per_time_step = []

    curriculum = Curriculum(
        game_env=game_env,
        game_scenario_categories=game_scenario_categories,
        game_config=game_config,
        cov_type=cov_type,
        sample_with_replacement=sample_with_replacement,
        data_pool_to_sample_from=data_pool_to_sample_from,
        alpha=alpha,
    )

    curr_samples = None

    for t in range(timesteps):
        (
            sampled_data,
            chosen_categories_stats,
        ) = curriculum.generate_next_batch(
            curr_samples=curr_samples,
            batch_size=batch_size,
            sampling_type=sampling_type,
        )

        for category in curriculum.category_to_scenario_map:
            category_frequencies_over_timestep[category].append(
                chosen_categories_stats[category]
            )

        curr_samples = sampled_data
        sampled_data_per_time_step.append(sampled_data)

    return category_frequencies_over_timestep, sampled_data_per_time_step

In [4]:
from llm_exploration.game import get_game_environment


def run_one_iteration_of_curriculum(
    alpha: float,
    batch_size: int,
    timesteps: int,
    save_file_path: str,
    random_seed: int,
    sampling_type: str,
):
    set_seed_everywhere(seed=random_seed)

    game_environment = get_game_environment(
        environment_name="twenty_questions",
    )

    game_scenario_categories = read_json(
        fname="/Users/fahimtajwar/academics/llm_exploration/llm_exploration/curriculum/curriculum_configs/twenty_questions_difficulty_train.json",
    )

    game_config = {
        "environment_name": "twenty_questions",
        "data_type": "train",
    }

    cov_type = "unbiased_normal"

    data_pool_to_sample_from = "/Users/fahimtajwar/academics/llm_exploration/exploration_datasets_round_2_curriculum/twenty_questions_datasets/llm_evaluation_on_twenty_questions_split_train_agent_Llama-3.1-8B-Instruct_env_gpt-4o-mini_judge_gpt-4o-mini/curriculum_no_curriculum_round_1"

    category_frequencies_over_timestep, sampled_data_per_time_step = simulate_online_sampling(
        game_env=game_environment,
        game_scenario_categories=game_scenario_categories,
        game_config=game_config,
        cov_type=cov_type,
        sample_with_replacement=True,
        data_pool_to_sample_from=data_pool_to_sample_from,
        alpha=alpha,
        timesteps=timesteps,
        batch_size=batch_size,
        sampling_type=sampling_type,
    )

    for t in range(3):
        data = sampled_data_per_time_step[t]

        all_topics = []
        for d in data:
            all_topics.append(d[0]["env_game_scenario"])

    for category in category_frequencies_over_timestep:
        print(
            "Category: ",
            category,
            "total: ",
            sum(category_frequencies_over_timestep[category]),
        )

    print(category_frequencies_over_timestep)

    all_data = []
    for t in range(timesteps):
        data_t = sampled_data_per_time_step[t]

        for data in data_t:
            all_data.append(data)

    write_json(
        data={"records": all_data},
        fname=save_file_path,
    )

In [10]:
run_one_iteration_of_curriculum(
    alpha=1.0,
    batch_size=1,
    timesteps=250,
    save_file_path="/Users/fahimtajwar/academics/llm_exploration/exploration_datasets_train_curriculum/uniform_round_3_seed_71/data.json",
    random_seed=71,
    sampling_type="uniform",
)

Category:  medium total:  119
Category:  easy total:  79
Category:  hard total:  52
defaultdict(<class 'list'>, {'medium': [1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1], 'easy': [0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0,