In [1]:
from typing import Optional, Sequence, Tuple
import requests
import json

SYSTEM = "You are a helpful assistant."


def chat_complete(
    *,
    system: str = SYSTEM,
    history: Sequence[Tuple[str, str]] = (),
    question: str = "",
    api_key_path: str = "api_key.txt",
    proxy: Optional[str] = None,
):
    messages = [{"role": "system", "content": system}]
    for user, assistant in history:
        messages.extend(
            (
                {"role": "user", "content": user},
                {"role": "assistant", "content": assistant},
            )
        )
    messages.append({"role": "user", "content": question})

    url = "https://api.openai.com/v1/chat/completions"
    payload = {"model": "gpt-3.5-turbo", "messages": messages, "stream": True}

    with open(api_key_path) as f:
        api_key = f.read().strip()

    headers = {"Authorization": f"Bearer {api_key}"}

    response = requests.post(
        url,
        json=payload,
        headers=headers,
        proxies={"https": proxy} if proxy else None,
        stream=True,
    )

    for line in response.iter_lines(decode_unicode=False):
        line = line.decode("utf-8")
        if line.startswith("data:"):
            if line == "data: [DONE]":
                return
            if data := line[5:]:
                yield json.loads(data)["choices"][0]["delta"].get("content", "")


In [2]:
topics = [
    "Politics",
    "Immigration",
    "Climate Change",
    "Environment",
    "Technology",
    "Social Media",
    "Privacy",
    "Art",
    "Culture",
    "Sports",
    "Health",
    "Education",
    "Media",
    "Science",
    "Religion",
    "Gender Equality",
    "Race Relations",
    "War and Military",
    "International Relations",
    "Economy and Finance",
    "Food and Diet",
    "Travel and Adventure",
    "Animal Rights and Protection",
    "Mental Health",
    "Physical Health",
    "Crime and Justice",
    "Marriage and Family",
    "Fashion and Beauty",
    "Entertainment",
    "Personal Wealth",
    "Charity",
    "Human Rights",
    "Social Justice",
    "Science Fiction and Fantasy",
    "Music",
    "Pop Culture",
    "Infrastructure",
    "Urbanization",
    "Natural Disasters",
    "Emergency Relief",
    "Professional Sports",
    "Energy Efficiency",
    "Sustainability",
    "Career Development",
    "Talent Acquisition",
    "Computer Science",
    "Programming",
    "Mathematics",
    "Statistics",
    "Fine Art",
    "Collectibles",
    "Film and Television",
    "Military History and Technology",
    "Architecture and Design",
    "Audio and Sound Technology",
    "Shopping and Consumer Behavior",
    "Technology Entrepreneurship",
    "Startups",
    "Personal Development",
    "Government and Governance",
    "Parenting and Family",
    "Law and Regulation",
    "Artificial Intelligence",
    "Machine Learning",
    "Interpersonal Relationships and Communication",
]
print(len(topics))


65


In [3]:
import json


def gen_attributes(topic: str, template: str):
    question = (
        """
    一组数据的三个维度分别是类别、时间范围、数值范围，请给出一些这样的数据属性。类别属性里的choice应该包含5到10个。
    例如：[[TEMPLATE]]
    请避免与我给出的句子重复, 你应该替换为 [[TOPIC]] 的知识。
    三个数据维度在意义上应该有关联，只输出一个 json，不要输出额外的内容。
    """.strip()
        .replace("[[TOPIC]]", topic)
        .replace("[[TEMPLATE]]", template)
    )

    try:
        s = ''.join(
            chat_complete(
                system="You are a helpful assistant that process json well.",
                question=question,
                proxy="http://127.0.0.1:7890",
            )
        )

        start, end = s.find("{"), len(s) - s[::-1].find("}")

        attributes = json.loads(s[start:end])
        attributes = next(iter(attributes.values()))        

        return attributes
    except Exception:
        return None


In [5]:
from typing import Union
from pydantic import BaseModel


class Categorical(BaseModel):
    type: str
    name: str
    choice: Sequence[str]


class Temporal(BaseModel):
    type: str
    span: Union[Sequence[str], Sequence[int]]


class Quantitative(BaseModel):
    type: str
    name: str
    range: Union[Sequence[str], Sequence[int]]


In [7]:
import contextlib
import random
import tqdm
import pandas as pd

N = 500

df = pd.DataFrame(columns=["topic", "c", "t", "q"])

template_ctq = """{"attribute": [{"type": "categorical","name": "country","choice": ["China", "Japan", "USA", "Germany", "France", "Iran"]}, {"type": "temporal","span": [2010, 2023]},{"type": "quantitative","name": "GDP","range": [10, 2000]}]}"""

success = 0
error = 0

with tqdm.tqdm(total=N) as pbar:
    for _ in range(N):
        topic = random.choice(topics)

        if attributes := gen_attributes(topic, template_ctq):
            c, t, q = attributes
            with contextlib.suppress(Exception):
                c = Categorical(**c).dict()
                t = Temporal(**t).dict()
                q = Quantitative(**q).dict()
                df = pd.concat(
                    [df, pd.DataFrame({"topic": [topic], "c": [c], "t": [t], "q": [q]})],
                    ignore_index=True,
                )
                success += 1
                pbar.set_description(f"success: {success}, error: {error}")
                pbar.update(1)
                df.to_csv("ctq_attribute.csv", index=False)
                continue
        error += 1
        pbar.set_description(f"success: {success}, error: {error}")
        pbar.update(1)



success: 168, error: 4:  34%|███▍      | 172/500 [35:16<1:07:16, 12.31s/it]


KeyboardInterrupt: 

In [8]:
import pandas as pd

pd.read_csv("ctq_attribute.csv").head(10)

Unnamed: 0,topic,c,t,q
0,Technology,"{'type': 'categorical', 'name': 'industry', 'c...","{'type': 'temporal', 'span': ['2015', '2025']}","{'type': 'quantitative', 'name': 'Revenue', 'r..."
1,Startups,"{'type': 'categorical', 'name': 'industry', 'c...","{'type': 'temporal', 'span': ['2015', '2021']}","{'type': 'quantitative', 'name': 'Funding', 'r..."
2,Mathematics,"{'type': 'categorical', 'name': 'mathematical ...","{'type': 'temporal', 'span': ['1990', '2020']}","{'type': 'quantitative', 'name': 'number of pu..."
3,Marriage and Family,"{'type': 'categorical', 'name': 'relationship_...","{'type': 'temporal', 'span': ['1980', '2023']}","{'type': 'quantitative', 'name': 'number_of_ch..."
4,Physical Health,"{'type': 'categorical', 'name': 'activity', 'c...","{'type': 'temporal', 'span': ['January 1, 2020...","{'type': 'quantitative', 'name': 'Calories Bur..."
5,Sustainability,"{'type': 'categorical', 'name': 'industry', 'c...","{'type': 'temporal', 'span': ['2000', '2025']}","{'type': 'quantitative', 'name': 'Carbon Emiss..."
6,Travel and Adventure,"{'type': 'categorical', 'name': 'destination',...","{'type': 'temporal', 'span': ['2022 Spring', '...","{'type': 'quantitative', 'name': 'budget', 'ra..."
7,Parenting and Family,"{'type': 'categorical', 'name': 'Parenting Sty...","{'type': 'temporal', 'span': ['1990', '2021']}","{'type': 'quantitative', 'name': 'Number of Ch..."
8,Education,"{'type': 'categorical', 'name': 'degree', 'cho...","{'type': 'temporal', 'span': ['1990', '2022']}","{'type': 'quantitative', 'name': 'Salary', 'ra..."
9,War and Military,"{'type': 'categorical', 'name': 'country', 'ch...","{'type': 'temporal', 'span': ['1939', '1945']}","{'type': 'quantitative', 'name': 'casualties',..."


In [28]:
templates = []


def template(t: str):
    def wrapper(f):
        templates.append((t, f))
        return f

    return wrapper


# @template("What is the {quantity} of {category_1}?")
# def cq_identify(category, category_1, quantity, **_):
#     return {
#         "identify": quantity,
#         "filter": [{"attr": category, "op": "=", "value": category_1}],
#     }


# @template("Which {category} has the highest {quantity}?")
# def cq_max(category, quantity, **_):
#     return {
#         "identify": category,
#         "filter": [
#             {
#                 "attr": quantity,
#                 "op": "=",
#                 "value": {"operation": "max", "attribute": quantity},
#             }
#         ],
#     }


# @template("Which {category} has the lowest {quantity}?")
# def cq_min(category, quantity, **_):
#     return {
#         "identify": category,
#         "filter": [
#             {
#                 "attr": quantity,
#                 "op": "=",
#                 "value": {"operation": "min", "attribute": quantity},
#             }
#         ],
#     }


# @template("What is the difference of {quantity} between {category_1} and {category_2}?")
# def cq_diff(category, category_1, category_2, quantity, **_):
#     return {
#         "compare": quantity,
#         "list": [
#             {
#                 "identify": quantity,
#                 "filter": [{"attr": category, "op": "=", "value": category_1}],
#             },
#             {
#                 "identify": quantity,
#                 "filter": [{"attr": category, "op": "=", "value": category_2}],
#             },
#         ],
#     }


# @template("Which {category} has the {quantity} that is above average?")
# def cq_above_avg(category, quantity, **_):
#     return {
#         "identify": category,
#         "filter": [
#             {
#                 "attr": quantity,
#                 "op": ">",
#                 "value": {"operation": "avg", "attribute": quantity},
#             }
#         ],
#     }


# @template("Which {category} has the {quantity} that is below average?")
# def cq_below_avg(category, quantity, **_):
#     return {
#         "identify": category,
#         "filter": [
#             {
#                 "attr": quantity,
#                 "op": "<",
#                 "value": {"operation": "avg", "attribute": quantity},
#             }
#         ],
#     }


# @template("what is the sum of {quantity} of {category_1} and {category_2}?")
# def cq_sum(category, category_1, category_2, quantity, **_):
#     return {
#         "sum": quantity,
#         "list": [
#             {
#                 "identify": quantity,
#                 "filter": [{"attr": category, "op": "=", "value": category_1}],
#             },
#             {
#                 "identify": quantity,
#                 "filter": [{"attr": category, "op": "=", "value": category_2}],
#             },
#         ],
#     }


# @template("What is the {quantity} of {category_1} in {time_1}?")
# def ctq_identify(category, category_1, quantity, time, time_1, **_):
#     return {
#         "identify": quantity,
#         "filter": [
#             {"attr": category, "op": "=", "value": category_1},
#             {"attr": time, "op": "=", "value": time_1},
#         ],
#     }


# @template("Which {category} has the highest {quantity} in {time_1}?")
# def ctq_max(category, quantity, time_1, **_):
#     return {
#         "identify": category,
#         "filter": [
#             {"attr": "time", "op": "=", "value": time_1},
#             {
#                 "attr": quantity,
#                 "op": "=",
#                 "value": {"operation": "max", "attribute": quantity},
#             },
#         ],
#     }


# @template("Which {category} has the lowest {quantity} in {time_1}?")
# def ctq_min(category, quantity, time_1, **_):
#     return {
#         "identify": category,
#         "filter": [
#             {"attr": "time", "op": "=", "value": time_1},
#             {
#                 "attr": quantity,
#                 "op": "=",
#                 "value": {"operation": "min", "attribute": quantity},
#             },
#         ],
#     }


# @template(
#     "What is the difference of {quantity} between {category_1} and {category_2} in {time_1}?"
# )
# def ctq_diff(category, category_1, category_2, quantity, time_1, **_):
#     return {
#         "compare": quantity,
#         "list": [
#             {
#                 "identify": category,
#                 "filter": [
#                     {"attr": category, "op": "=", "value": category_1},
#                     {"attr": "time", "op": "=", "value": time_1},
#                 ],
#             },
#             {
#                 "identify": category,
#                 "filter": [
#                     {"attr": category, "op": "=", "value": category_2},
#                     {"attr": "time", "op": "=", "value": time_1},
#                 ],
#             },
#         ],
#     }


# @template("What is the trend of the {quantity} of {category_1}?")
# def ctq_trend(category, category_1, quantity, **_):
#     return {
#         "trend": quantity,
#         "filter": [{"attr": category, "op": "=", "value": category_1}],
#     }


# @template("What is the sum of {quantity} of {category_1} and {category_2} in {time_1}?")
# def ctq_sum(category, category_1, category_2, quantity, time_1, **_):
#     return {
#         "sum": quantity,
#         "list": [
#             {
#                 "identify": quantity,
#                 "filter": [
#                     {"attr": category, "op": "=", "value": category_1},
#                     {"attr": "time", "op": "=", "value": time_1},
#                 ],
#             },
#             {
#                 "identify": quantity,
#                 "filter": [
#                     {"attr": category, "op": "=", "value": category_2},
#                     {"attr": "time", "op": "=", "value": time_1},
#                 ],
#             },
#         ],
#     }


# @template("Which {category} has the highest {quantity} from {time_1} to {time_2}?")
# def ctq_max_range(category, quantity, time_1, time_2, **_):
#     return {
#         "identify": category,
#         "filter": [
#             {"attr": "time", "op": ">=", "value": time_1},
#             {"attr": "time", "op": "<=", "value": time_2},
#             {
#                 "attr": quantity,
#                 "op": "=",
#                 "value": {"operation": "max", "attribute": quantity},
#             },
#         ],
#     }


# @template("Which {category} has the lowest {quantity} from {time_1} to {time_2}?")
# def ctq_min_range(category, quantity, time_1, time_2, **_):
#     return {
#         "identify": category,
#         "filter": [
#             {"attr": "time", "op": ">=", "value": time_1},
#             {"attr": "time", "op": "<=", "value": time_2},
#             {
#                 "attr": quantity,
#                 "op": "=",
#                 "value": {"operation": "min", "attribute": quantity},
#             },
#         ],
#     }


# @template(
#     "What is the sum of {quantity} of {category_1} and {category_2} from {time_1} to {time_2}?"
# )
# def ctq_sum_range(category, category_1, category_2, quantity, time_1, time_2, **_):
#     return {
#         "sum": quantity,
#         "list": [
#             {
#                 "identify": quantity,
#                 "filter": [
#                     {"attr": category, "op": "=", "value": category_1},
#                     {"attr": "time", "op": ">=", "value": time_1},
#                     {"attr": "time", "op": "<=", "value": time_2},
#                 ],
#             },
#             {
#                 "identify": quantity,
#                 "filter": [
#                     {"attr": category, "op": "=", "value": category_2},
#                     {"attr": "time", "op": ">=", "value": time_1},
#                     {"attr": "time", "op": "<=", "value": time_2},
#                 ],
#             },
#         ],
#     }


# @template(
#     "What is the difference of {quantity} between {category_1} and {category_2} from {time_1} to {time_2}?"
# )
# def ctq_diff_range(category, category_1, category_2, quantity, time_1, time_2, **_):
#     return {
#         "compare": quantity,
#         "list": [
#             {
#                 "identify": category,
#                 "filter": [
#                     {"attr": category, "op": "=", "value": category_1},
#                     {"attr": "time", "op": ">=", "value": time_1},
#                     {"attr": "time", "op": "<=", "value": time_2},
#                 ],
#             },
#             {
#                 "identify": category,
#                 "filter": [
#                     {"attr": category, "op": "=", "value": category_2},
#                     {"attr": "time", "op": ">=", "value": time_1},
#                     {"attr": "time", "op": "<=", "value": time_2},
#                 ],
#             },
#         ],
#     }


# @template(
#     "What is the trend of the {quantity} of {category_1} from {time_1} to {time_2}?"
# )
# def ctq_trend_range(category, category_1, quantity, time_1, time_2, **_):
#     return {
#         "trend": quantity,
#         "filter": [
#             {"attr": category, "op": "=", "value": category_1},
#             {"attr": "time", "op": ">=", "value": time_1},
#             {"attr": "time", "op": "<=", "value": time_2},
#         ],
#     }


# added


# @template(
#     "What is the trend of the {quantity} of {category_1} and {category_2} from {time_1} to {time_2}?"
# )
# def ctq_multi_trend_range(category, category_1, category_2, quantity, time_1, time_2, **_):
#     return {
#         "trend": quantity,
#         "filter": [
#             {"attr": category, "op": "in", "value": [category_1, category_2]},
#             {"attr": "time", "op": ">=", "value": time_1},
#             {"attr": "time", "op": "<=", "value": time_2},
#         ],
#     }


# @template(
#     "Among {category_1} and {category_2}, what is the {category} with the highest {quantity} from {time_1} to {time_2}?"
# )
# def ctq_max_value_choice_range(category, category_1, category_2, quantity, time_1, time_2, **_):
#     return {
#         "identify": category,
#         "filter": [
#             {"attr": category, "op": "in", "value": [category_1, category_2]},
#             {"attr": quantity, "op": "=", "value": {"operation": "max", "attribute": quantity}},
#             {"attr": "time", "op": ">=", "value": time_1},
#             {"attr": "time", "op": "<=", "value": time_2},
#         ],
#     }

# @template(
#     "Among {category_1}, {category_2}, and {category_3}, what is the {category} with the highest {quantity} from {time_1} to {time_2}?"
# )
# def ctq_max_value_3_choice_range(category, category_1, category_2, category_3, quantity, time_1, time_2, **_):
#     return {
#         "identify": category,
#         "filter": [
#             {"attr": category, "op": "in", "value": [category_1, category_2, category_3]},
#             {"attr": quantity, "op": "=", "value": {"operation": "max", "attribute": quantity}},
#             {"attr": "time", "op": ">=", "value": time_1},
#             {"attr": "time", "op": "<=", "value": time_2},
#         ],
#     }

# @template(
#     "Among {category_1} and {category_2}, what is the {category} with higher {quantity} from {time_1} to {time_2}?"
# )
# def ctq_top_value_2_choice_range(category, category_1, category_2, rank_1, quantity, time_1, time_2, **_):
#     return {
#         "identify": category,
#         "filter": [
#             {"attr": category, "op": "in", "value": [category_1, category_2]},
#             {"attr": {'operation': 'rank', 'attribute': quantity}, "op": "in", "value": [1, rank_1]},
#             {"attr": "time", "op": ">=", "value": time_1},
#             {"attr": "time", "op": "<=", "value": time_2},
#         ],
#     }

# @template(
#     "What is the {category} with the top-{rank_1} {quantity} from {time_1} to {time_2}?"
# )
# def ctq_top_3_choice_range(category, rank_1, quantity, time_1, time_2, **_):

#     if rank_1 == 1:
#         op = '='
#         value = 1

#     return {
#         "identify": category,
#         "filter": [
#             {"attr": {'operation': 'rank', 'attribute': quantity}, "op": "in", "value": [1, rank_1]},
#             {"attr": "time", "op": ">=", "value": time_1},
#             {"attr": "time", "op": "<=", "value": time_2},
#         ],
#     }


# @template(
#     "What is the {category} with the top-{rank_1} {quantity} in {time_1}?"
# )
# def ctq_top_3_choice_single_time(category, rank_1, quantity, time_1, **_):
#     return {
#         "identify": category,
#         "filter": [
#             {"attr": {'operation': 'rank', 'attribute': quantity}, "op": "in", "value": [1, rank_1]},
#             {"attr": "time", "op": "=", "value": time_1}
#         ],
#     }

@template(
    "What is the average {quantity}?"
)
def avg_quan(quantity, **_):
    return {
        "identify": {
            "operation": "avg",
            "attribute": quantity
        }
    }

@template(
    "What is the average {quantity} from {time_1} to {time_2}?"
)
def avg_quan_time_range(quantity, time_1, time_2, **_):
    return {
        "identify": {
            "operation": "avg",
            "attribute": quantity
        },
        "filter": [
            {"attr": "time", "op": ">=", "value": time_1},
            {"attr": "time", "op": "<=", "value": time_2}
        ]
    }

@template(
    "What is the average {quantity} in {time_1}?"
)
def avg_quan_time_single_range(quantity, time_1, **_):
    return {
        "identify": {
            "aggregate": "avg",
            "attribute": quantity
        },
        "filter": [
            {"attr": "time", "op": "=", "value": time_1}
        ]
    }


In [29]:
import random
from typing import Callable, Tuple


def construct_question(attributes, template: Tuple[str, Callable]):
    c, t, q = attributes
    sentence_t, query_t = template

    params = {
        "category": c["name"],
        "quantity": q["name"],
        "time_1": t["span"][0],
        "time_2": t["span"][1],
    }
    params["category_1"], params["category_2"], params['category_3'] = random.sample(c["choice"], k=3)
    params['rank_1'] = random.randint(1, 3)

    sentence = sentence_t.format(**params).replace("_", " ")
    query = query_t(**params)

    question = f"""
    请用另一种说法表达以下问题：
    {sentence}
    """.strip()

    sentence_refined = ''.join(
        chat_complete(
            system="You are a helpful assistant that has a good command of language.",
            question=question,
            proxy="http://127.0.0.1:7890",
        )
    )

    return sentence, sentence_refined, query


In [30]:
import json
import random
import tqdm
import pandas as pd

df = pd.read_csv("ctq_attribute.csv")
res = pd.DataFrame(columns=["input", "output"])

file_output = 'ctq_with_avg.csv'

N = 5000
N = 50

with tqdm.tqdm(total=N) as pbar:
    success = 0
    error = 0
    pbar.set_description(f"Success: {success}, Error: {error}")
    for _ in range(N):
        _, c, t, q = df.sample().iloc[0]
        try:
            c = eval(c)
            t = eval(t)
            q = eval(q)
            template = random.choice(templates)

            print(c,t,q, template)

            sentence, sentence_refined, query = construct_question((c, t, q), template)

            input_ = {
                "attributes": [c, t, q],
                "sentence": sentence_refined,
            }
            output = query

            print(output)

            res = pd.concat(
                [
                    res,
                    pd.DataFrame(
                        {"input": [json.dumps(input_)], "output": [json.dumps(output)]}
                    ),
                ],
                ignore_index=True,
            )
            res.to_csv(file_output, index=False)
            success += 1
            pbar.update(1)
        except Exception:
            error += 1

        pbar.set_description(f"Success: {success}, Error: {error}")


Success: 0, Error: 0:   0%|          | 0/50 [00:00<?, ?it/s]

{'type': 'categorical', 'name': 'crime_type', 'choice': ['Assault', 'Burglary', 'Robbery', 'Larceny-Theft', 'Rape', 'Homicide', 'Drug Offenses', 'Fraud', 'Vandalism', 'Motor Vehicle Theft']} {'type': 'temporal', 'span': ['1990', '2021']} {'type': 'quantitative', 'name': 'number_of_crimes_reported', 'range': ['0', '100000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 1, Error: 0:   2%|▏         | 1/50 [00:03<02:49,  3.46s/it]

{'identify': {'operation': 'avg', 'attribute': 'number_of_crimes_reported'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '1990'}, {'attr': 'time', 'op': '<=', 'value': '2021'}]}
{'type': 'categorical', 'name': 'brand', 'choice': ['Chanel', 'Louis Vuitton', 'Gucci', 'Prada', 'Dior', 'Yves Saint Laurent', 'Hermes', 'Burberry', 'Versace']} {'type': 'temporal', 'span': ['2010', '2021']} {'type': 'quantitative', 'name': 'price', 'range': ['100', '5000']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 2, Error: 0:   4%|▍         | 2/50 [00:05<02:05,  2.61s/it]

{'identify': {'operation': 'avg', 'attribute': 'price'}}
{'type': 'categorical', 'name': 'country', 'choice': ['USA', 'Russia', 'China', 'UK', 'Germany', 'France', 'Japan', 'North Korea']} {'type': 'temporal', 'span': ['1914', '1945']} {'type': 'quantitative', 'name': 'military expenditure', 'range': ['0.1', '2000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 3, Error: 0:   6%|▌         | 3/50 [00:09<02:25,  3.09s/it]

{'identify': {'operation': 'avg', 'attribute': 'military expenditure'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '1914'}, {'attr': 'time', 'op': '<=', 'value': '1945'}]}
{'type': 'categorical', 'name': 'animal type', 'choice': ['dogs', 'cats', 'birds', 'fish', 'rabbits']} {'type': 'temporal', 'span': ['2010', '2022']} {'type': 'quantitative', 'name': 'animal count', 'range': ['50', '5000']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 4, Error: 0:   8%|▊         | 4/50 [00:11<02:13,  2.90s/it]

{'identify': {'operation': 'avg', 'attribute': 'animal count'}}
{'type': 'categorical', 'name': 'fashion brand', 'choice': ['Gucci', 'Chanel', 'Louis Vuitton', 'Prada', 'Dior', 'Hermès', 'Saint Laurent', 'Versace', 'Burberry', 'Balenciaga']} {'type': 'temporal', 'span': ['2010', '2022']} {'type': 'quantitative', 'name': 'price range', 'range': ['100', '5000']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 5, Error: 0:  10%|█         | 5/50 [00:14<02:14,  2.99s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'price range'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '2010'}]}
{'type': 'categorical', 'name': 'league', 'choice': ['NBA', 'NFL', 'MLB', 'NHL', 'MLS']} {'type': 'temporal', 'span': ['2000', '2021']} {'type': 'quantitative', 'name': 'salary', 'range': ['10000', '500000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 6, Error: 0:  12%|█▏        | 6/50 [00:17<02:06,  2.87s/it]

{'identify': {'operation': 'avg', 'attribute': 'salary'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '2000'}, {'attr': 'time', 'op': '<=', 'value': '2021'}]}
{'type': 'categorical', 'name': 'race', 'choice': ['White', 'Black', 'Asian', 'Hispanic', 'Native American', 'Pacific Islander']} {'type': 'temporal', 'span': ['1990', '2020']} {'type': 'quantitative', 'name': 'income', 'range': ['0', '100000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 7, Error: 0:  14%|█▍        | 7/50 [00:20<02:07,  2.96s/it]

{'identify': {'operation': 'avg', 'attribute': 'income'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '1990'}, {'attr': 'time', 'op': '<=', 'value': '2020'}]}
{'type': 'categorical', 'name': 'country', 'choice': ['USA', 'Russia', 'China', 'UK', 'Germany', 'France', 'Japan', 'North Korea']} {'type': 'temporal', 'span': ['1914', '1945']} {'type': 'quantitative', 'name': 'military expenditure', 'range': ['0.1', '2000']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 8, Error: 0:  16%|█▌        | 8/50 [00:30<03:30,  5.02s/it]

{'identify': {'operation': 'avg', 'attribute': 'military expenditure'}}
{'type': 'categorical', 'name': 'media type', 'choice': ['TV', 'radio', 'newspaper', 'magazine', 'online news', 'social media', 'podcast']} {'type': 'temporal', 'span': ['2015', '2021']} {'type': 'quantitative', 'name': 'sales', 'range': ['500', '10000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 9, Error: 0:  18%|█▊        | 9/50 [00:33<03:02,  4.45s/it]

{'identify': {'operation': 'avg', 'attribute': 'sales'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '2015'}, {'attr': 'time', 'op': '<=', 'value': '2021'}]}
{'type': 'categorical', 'name': 'crime_type', 'choice': ['Assault', 'Burglary', 'Robbery', 'Drug offenses', 'Motor theft']} {'type': 'temporal', 'span': ['2010', '2021']} {'type': 'quantitative', 'name': 'number_of_crimes', 'range': ['10', '100000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 10, Error: 0:  20%|██        | 10/50 [00:36<02:41,  4.03s/it]

{'identify': {'operation': 'avg', 'attribute': 'number_of_crimes'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '2010'}, {'attr': 'time', 'op': '<=', 'value': '2021'}]}
{'type': 'categorical', 'name': 'war', 'choice': ['World War I', 'World War II', 'Korean War', 'Vietnam War', 'Gulf War', 'Iraq War']} {'type': 'temporal', 'span': ['1914', '2019']} {'type': 'quantitative', 'name': 'casualties', 'range': ['100', '8000000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 11, Error: 0:  22%|██▏       | 11/50 [00:40<02:32,  3.92s/it]

{'identify': {'operation': 'avg', 'attribute': 'casualties'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '1914'}, {'attr': 'time', 'op': '<=', 'value': '2019'}]}
{'type': 'categorical', 'name': 'type of relationship', 'choice': ['friendship', 'romantic partnership', 'family', 'professional relationships']} {'type': 'temporal', 'span': ['less than 1 year', '1-5 years', '5-10 years', 'more than 10 years']} {'type': 'quantitative', 'name': 'frequency of communication', 'range': ['rarely', 'occasionally', 'regularly', 'daily']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 12, Error: 0:  24%|██▍       | 12/50 [00:42<02:13,  3.53s/it]

{'identify': {'operation': 'avg', 'attribute': 'frequency of communication'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': 'less than 1 year'}, {'attr': 'time', 'op': '<=', 'value': '1-5 years'}]}
{'type': 'categorical', 'name': 'genre', 'choice': ['Comedy', 'Drama', 'Action', 'Horror', 'Romance', 'Mystery', 'Documentary', 'Science Fiction']} {'type': 'temporal', 'span': ['2000', '2022']} {'type': 'quantitative', 'name': 'box office revenue', 'range': ['0', '1000000000']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 13, Error: 0:  26%|██▌       | 13/50 [00:45<02:06,  3.41s/it]

{'identify': {'operation': 'avg', 'attribute': 'box office revenue'}}
{'type': 'categorical', 'name': 'vehicle', 'choice': ['car', 'truck', 'motorcycle', 'bus', 'train', 'ship']} {'type': 'temporal', 'span': ['2020Q1', '2022Q4']} {'type': 'quantitative', 'name': 'CO2 emission', 'range': ['1', '1000']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 14, Error: 0:  28%|██▊       | 14/50 [00:48<01:59,  3.33s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'CO2 emission'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '2020Q1'}]}
{'type': 'categorical', 'name': 'fruit', 'choice': ['Apple', 'Orange', 'Banana', 'Grapes', 'Watermelon', 'Pineapple', 'Strawberry', 'Mango', 'Papaya']} {'type': 'temporal', 'span': ['2000', '2021']} {'type': 'quantitative', 'name': 'price', 'range': ['1', '50']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 15, Error: 0:  30%|███       | 15/50 [00:51<01:43,  2.96s/it]

{'identify': {'operation': 'avg', 'attribute': 'price'}}
{'type': 'categorical', 'name': 'exercise_type', 'choice': ['Aerobic', 'Strength', 'Flexibility', 'Balance']} {'type': 'temporal', 'span': ['2010', '2023']} {'type': 'quantitative', 'name': 'Physical Health Index', 'range': ['0', '100']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 16, Error: 0:  32%|███▏      | 16/50 [00:53<01:37,  2.86s/it]

{'identify': {'operation': 'avg', 'attribute': 'Physical Health Index'}}
{'type': 'categorical', 'name': 'major', 'choice': ['Computer Science', 'Mathematics', 'History', 'Psychology', 'Biology', 'Physics', 'English', 'Chemistry']} {'type': 'temporal', 'span': ['2000', '2021']} {'type': 'quantitative', 'name': 'GPA', 'range': ['2.0', '4.0']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 17, Error: 0:  34%|███▍      | 17/50 [00:56<01:31,  2.79s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'GPA'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '2000'}]}
{'type': 'categorical', 'name': 'industry', 'choice': ['Technology', 'Finance', 'Healthcare', 'Retail', 'Energy', 'Manufacturing']} {'type': 'temporal', 'span': ['2000', '2021']} {'type': 'quantitative', 'name': 'Revenue', 'range': ['100', '5000']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 18, Error: 0:  36%|███▌      | 18/50 [00:58<01:27,  2.74s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'Revenue'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '2000'}]}
{'type': 'categorical', 'name': 'media_type', 'choice': ['TV', 'print', 'radio', 'online']} {'type': 'temporal', 'span': ['2000', '2020']} {'type': 'quantitative', 'name': 'audience_reach', 'range': ['1000', '1000000']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 19, Error: 1:  38%|███▊      | 19/50 [01:02<01:28,  2.86s/it]

{'identify': {'operation': 'avg', 'attribute': 'audience_reach'}}
{'type': 'categorical', 'name': 'gender', 'choice': ['Female', 'Male']} {'type': 'temporal', 'span': ['1940', '2025']} {'type': 'quantitative', 'name': 'income', 'range': ['0', '1000000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)
{'type': 'categorical', 'name': 'region', 'choice': ['North America', 'Europe', 'Asia', 'South America', 'Africa', 'Oceania']} {'type': 'temporal', 'span': ['1970', '2021']} {'type': 'quantitative', 'name': 'CO2 Emissions', 'range': ['1', '100']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 20, Error: 1:  40%|████      | 20/50 [01:05<01:28,  2.94s/it]

{'identify': {'operation': 'avg', 'attribute': 'CO2 Emissions'}}
{'type': 'categorical', 'name': 'government type', 'choice': ['democracy', 'monarchy', 'dictatorship', 'oligarchy', 'anarchy', 'theocracy']} {'type': 'temporal', 'span': ['1950', '2023']} {'type': 'quantitative', 'name': 'corruption perception', 'range': ['0', '100']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 21, Error: 1:  42%|████▏     | 21/50 [01:30<04:38,  9.62s/it]

{'identify': {'operation': 'avg', 'attribute': 'corruption perception'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '1950'}, {'attr': 'time', 'op': '<=', 'value': '2023'}]}
{'type': 'categorical', 'name': 'industry', 'choice': ['Technology', 'Finance', 'Healthcare', 'Energy', 'Retail', 'Manufacturing', 'Real Estate', 'Transportation']} {'type': 'temporal', 'span': ['2010', '2021']} {'type': 'quantitative', 'name': 'Net Worth', 'range': ['1000', '1000000']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 22, Error: 1:  44%|████▍     | 22/50 [01:33<03:30,  7.52s/it]

{'identify': {'operation': 'avg', 'attribute': 'Net Worth'}}
{'type': 'categorical', 'name': 'gender', 'choice': ['Male', 'Female', 'Transgender', 'Non-binary', 'Genderqueer', 'Bigender']} {'type': 'temporal', 'span': ['1950', '2021']} {'type': 'quantitative', 'name': 'Salary', 'range': ['15000', '150000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 23, Error: 1:  46%|████▌     | 23/50 [01:39<03:17,  7.33s/it]

{'identify': {'operation': 'avg', 'attribute': 'Salary'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '1950'}, {'attr': 'time', 'op': '<=', 'value': '2021'}]}
{'type': 'categorical', 'name': 'education level', 'choice': ['High School', "Bachelor's Degree", "Master's Degree", 'PhD']} {'type': 'temporal', 'span': ['2000', '2021']} {'type': 'quantitative', 'name': 'Salary', 'range': ['15000', '150000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 24, Error: 1:  48%|████▊     | 24/50 [01:42<02:37,  6.05s/it]

{'identify': {'operation': 'avg', 'attribute': 'Salary'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '2000'}, {'attr': 'time', 'op': '<=', 'value': '2021'}]}
{'type': 'categorical', 'name': 'Continent', 'choice': ['Asia', 'Africa', 'Europe', 'North America', 'South America', 'Oceania']} {'type': 'temporal', 'span': ['1990', '2021']} {'type': 'quantitative', 'name': 'Democracy Index', 'range': ['0.1', '10']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 25, Error: 1:  50%|█████     | 25/50 [01:45<02:05,  5.02s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'Democracy Index'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '1990'}]}
{'type': 'categorical', 'name': 'destination', 'choice': ['Thailand', 'Maldives', 'New Zealand', 'Japan', 'Italy', 'Brazil', 'Canada', 'Egypt', 'India']} {'type': 'temporal', 'span': ['2010', '2023']} {'type': 'quantitative', 'name': 'budget', 'range': ['100', '10000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 26, Error: 1:  52%|█████▏    | 26/50 [01:48<01:43,  4.30s/it]

{'identify': {'operation': 'avg', 'attribute': 'budget'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '2010'}, {'attr': 'time', 'op': '<=', 'value': '2023'}]}
{'type': 'categorical', 'name': 'conflict', 'choice': ['World War I', 'World War II', 'Vietnam War', 'Korean War', 'Iraq War', 'Syrian Civil War']} {'type': 'temporal', 'span': ['1914', '2022']} {'type': 'quantitative', 'name': 'Casualties', 'range': ['1000', '1000000']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 27, Error: 1:  54%|█████▍    | 27/50 [01:51<01:30,  3.95s/it]

{'identify': {'operation': 'avg', 'attribute': 'Casualties'}}
{'type': 'categorical', 'name': 'species', 'choice': ['dogs', 'cats', 'birds', 'fish', 'rabbits', 'hamsters', 'guinea pigs', 'reptiles', 'wildlife', 'farm animals']} {'type': 'temporal', 'span': ['2000', '2022']} {'type': 'quantitative', 'name': 'number of protected animals', 'range': ['1000', '1000000']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 28, Error: 1:  56%|█████▌    | 28/50 [01:54<01:21,  3.71s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'number of protected animals'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '2000'}]}
{'type': 'categorical', 'name': 'type of building', 'choice': ['commercial', 'residential', 'industrial', 'governmental']} {'type': 'temporal', 'span': ['2000', '2021']} {'type': 'quantitative', 'name': 'energy consumption per square meter', 'range': ['50', '500']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 29, Error: 1:  58%|█████▊    | 29/50 [01:57<01:11,  3.38s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'energy consumption per square meter'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '2000'}]}
{'type': 'categorical', 'name': 'genre', 'choice': ['Comedy', 'Drama', 'Action', 'Horror', 'Romance', 'Mystery', 'Documentary', 'Science Fiction']} {'type': 'temporal', 'span': ['2000', '2022']} {'type': 'quantitative', 'name': 'box office revenue', 'range': ['0', '1000000000']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 30, Error: 1:  60%|██████    | 30/50 [02:00<01:06,  3.31s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'box office revenue'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '2000'}]}
{'type': 'categorical', 'name': 'relationship_status', 'choice': ['Married', 'Single', 'Divorced', 'Widowed', 'Separated']} {'type': 'temporal', 'span': ['1970', '2021']} {'type': 'quantitative', 'name': 'number_of_children', 'range': ['0', '5']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 31, Error: 1:  62%|██████▏   | 31/50 [02:03<01:02,  3.26s/it]

{'identify': {'operation': 'avg', 'attribute': 'number_of_children'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '1970'}, {'attr': 'time', 'op': '<=', 'value': '2021'}]}
{'type': 'categorical', 'name': 'country', 'choice': ['Sweden', 'Iceland', 'Norway', 'Finland', 'Denmark', 'Netherlands', 'Switzerland', 'Belgium', 'Luxembourg', 'Germany']} {'type': 'temporal', 'span': ['2005', '2020']} {'type': 'quantitative', 'name': 'Gender Equality Index', 'range': ['20', '90']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 32, Error: 1:  64%|██████▍   | 32/50 [02:06<00:58,  3.23s/it]

{'identify': {'operation': 'avg', 'attribute': 'Gender Equality Index'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '2005'}, {'attr': 'time', 'op': '<=', 'value': '2020'}]}
{'type': 'categorical', 'name': 'genre', 'choice': ['Pop', 'Rock', 'Hip Hop', 'Electronic', 'Jazz', 'Classical', 'Reggae']} {'type': 'temporal', 'span': ['1950', '2023']} {'type': 'quantitative', 'name': 'sales', 'range': ['1', '1000']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 33, Error: 1:  66%|██████▌   | 33/50 [02:08<00:49,  2.90s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'sales'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '1950'}]}
{'type': 'categorical', 'name': 'mathematical concept', 'choice': ['Calculus', 'Linear Algebra', 'Statistics', 'Number Theory', 'Topology', 'Graph Theory', 'Combinatorics', 'Differential Equations']} {'type': 'temporal', 'span': ['1940s', '2020s']} {'type': 'quantitative', 'name': 'number of publications', 'range': ['0', '5000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 34, Error: 1:  68%|██████▊   | 34/50 [02:11<00:47,  2.97s/it]

{'identify': {'operation': 'avg', 'attribute': 'number of publications'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '1940s'}, {'attr': 'time', 'op': '<=', 'value': '2020s'}]}
{'type': 'categorical', 'name': 'gender', 'choice': ['Male', 'Female', 'Transgender', 'Non-binary', 'Genderqueer', 'Bigender']} {'type': 'temporal', 'span': ['1950', '2021']} {'type': 'quantitative', 'name': 'Salary', 'range': ['15000', '150000']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 35, Error: 1:  70%|███████   | 35/50 [02:13<00:40,  2.70s/it]

{'identify': {'operation': 'avg', 'attribute': 'Salary'}}
{'type': 'categorical', 'name': 'origin_country', 'choice': ['China', 'India', 'Mexico', 'Philippines', 'Vietnam', 'El Salvador', 'Honduras', 'Guatemala', 'Nigeria']} {'type': 'temporal', 'span': ['2000', '2020']} {'type': 'quantitative', 'name': 'number_of_immigrants', 'range': ['0', '1000000']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 36, Error: 1:  72%|███████▏  | 36/50 [02:16<00:37,  2.70s/it]

{'identify': {'operation': 'avg', 'attribute': 'number_of_immigrants'}}
{'type': 'categorical', 'name': 'activity_type', 'choice': ['Running', 'Swimming', 'Cycling', 'Yoga', 'Weightlifting', 'Dancing']} {'type': 'temporal', 'span': ['2015', '2021']} {'type': 'quantitative', 'name': 'Calories_burned', 'range': ['50', '1000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 37, Error: 1:  74%|███████▍  | 37/50 [02:22<00:48,  3.76s/it]

{'identify': {'operation': 'avg', 'attribute': 'Calories_burned'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '2015'}, {'attr': 'time', 'op': '<=', 'value': '2021'}]}
{'type': 'categorical', 'name': 'region', 'choice': ['North America', 'Europe', 'Asia', 'South America', 'Africa', 'Oceania']} {'type': 'temporal', 'span': ['1970', '2021']} {'type': 'quantitative', 'name': 'CO2 Emissions', 'range': ['1', '100']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 38, Error: 1:  76%|███████▌  | 38/50 [02:25<00:42,  3.57s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'CO2 Emissions'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '1970'}]}
{'type': 'categorical', 'name': 'race', 'choice': ['White', 'Black', 'Hispanic', 'Asian', 'Native American']} {'type': 'temporal', 'span': ['1960', '2020']} {'type': 'quantitative', 'name': 'income', 'range': ['0', '1000000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 39, Error: 1:  78%|███████▊  | 39/50 [02:28<00:34,  3.14s/it]

{'identify': {'operation': 'avg', 'attribute': 'income'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '1960'}, {'attr': 'time', 'op': '<=', 'value': '2020'}]}
{'type': 'categorical', 'name': 'Continent', 'choice': ['Asia', 'Africa', 'Europe', 'North America', 'South America', 'Oceania']} {'type': 'temporal', 'span': ['1990', '2021']} {'type': 'quantitative', 'name': 'Democracy Index', 'range': ['0.1', '10']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 40, Error: 1:  80%|████████  | 40/50 [02:31<00:31,  3.13s/it]

{'identify': {'operation': 'avg', 'attribute': 'Democracy Index'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '1990'}, {'attr': 'time', 'op': '<=', 'value': '2021'}]}
{'type': 'categorical', 'name': 'topic', 'choice': ['fashion', 'sports', 'politics', 'entertainment', 'food', 'travel']} {'type': 'temporal', 'span': ['2015', '2022']} {'type': 'quantitative', 'name': 'mentions', 'range': ['100', '100000']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 41, Error: 1:  82%|████████▏ | 41/50 [02:33<00:26,  2.98s/it]

{'identify': {'operation': 'avg', 'attribute': 'mentions'}}
{'type': 'categorical', 'name': 'country', 'choice': ['Afghanistan', 'Bangladesh', 'India', 'Nepal', 'Pakistan', 'Sri Lanka']} {'type': 'temporal', 'span': ['2000', '2020']} {'type': 'quantitative', 'name': 'Gender Equality Index', 'range': ['20', '90']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 42, Error: 1:  84%|████████▍ | 42/50 [02:35<00:21,  2.71s/it]

{'identify': {'operation': 'avg', 'attribute': 'Gender Equality Index'}}
{'type': 'categorical', 'name': 'crime_type', 'choice': ['assault', 'burglary', 'drug offenses', 'homicide', 'robbery', 'sexual offenses']} {'type': 'temporal', 'span': ['2010', '2023']} {'type': 'quantitative', 'name': 'number_of_incidences', 'range': ['10', '2000']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 43, Error: 1:  86%|████████▌ | 43/50 [02:38<00:18,  2.69s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'number_of_incidences'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '2010'}]}
{'type': 'categorical', 'name': 'media_type', 'choice': ['TV Show', 'Movie', 'Documentary', 'Anime', 'Reality TV', 'Talk Show']} {'type': 'temporal', 'span': ['2015', '2022']} {'type': 'quantitative', 'name': 'IMDb_rating', 'range': ['0', '10']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 44, Error: 1:  88%|████████▊ | 44/50 [02:40<00:15,  2.51s/it]

{'identify': {'operation': 'avg', 'attribute': 'IMDb_rating'}}
{'type': 'categorical', 'name': 'programming_language', 'choice': ['Python', 'Java', 'C++', 'JavaScript', 'Ruby', 'Swift', 'Go', 'R', 'PHP']} {'type': 'temporal', 'span': ['2000', '2021']} {'type': 'quantitative', 'name': 'code_lines', 'range': ['1', '1000000']} ('What is the average {quantity} from {time_1} to {time_2}?', <function avg_quan_time_range at 0x147251cf0>)


Success: 45, Error: 1:  90%|█████████ | 45/50 [02:43<00:13,  2.70s/it]

{'identify': {'operation': 'avg', 'attribute': 'code_lines'}, 'filter': [{'attr': 'time', 'op': '>=', 'value': '2000'}, {'attr': 'time', 'op': '<=', 'value': '2021'}]}
{'type': 'categorical', 'name': 'region', 'choice': ['North America', 'South America', 'Europe', 'Africa', 'Asia', 'Australia']} {'type': 'temporal', 'span': ['1950', '2025']} {'type': 'quantitative', 'name': 'temperature', 'range': ['-20', '50']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 46, Error: 1:  92%|█████████▏| 46/50 [02:45<00:10,  2.53s/it]

{'identify': {'operation': 'avg', 'attribute': 'temperature'}}
{'type': 'categorical', 'name': 'building type', 'choice': ['office', 'residential', 'educational', 'hospitality', 'retail', 'industrial']} {'type': 'temporal', 'span': ['2000', '2022']} {'type': 'quantitative', 'name': 'square footage', 'range': ['100', '10000']} ('What is the average {quantity}?', <function avg_quan at 0x1472524d0>)


Success: 47, Error: 1:  94%|█████████▍| 47/50 [02:48<00:07,  2.55s/it]

{'identify': {'operation': 'avg', 'attribute': 'square footage'}}
{'type': 'categorical', 'name': 'product_category', 'choice': ['Electronics', 'Apparel', 'Books', 'Beauty', 'Sports', 'Home & Furniture', 'Toys', 'Food & Beverage']} {'type': 'temporal', 'span': ['2020-01-01', '2021-12-31']} {'type': 'quantitative', 'name': 'sales', 'range': ['100', '10000']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 48, Error: 1:  96%|█████████▌| 48/50 [02:51<00:05,  2.73s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'sales'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '2020-01-01'}]}
{'type': 'categorical', 'name': 'media_type', 'choice': ['TV Show', 'Movie', 'Documentary', 'Anime', 'Reality TV', 'Talk Show']} {'type': 'temporal', 'span': ['2015', '2022']} {'type': 'quantitative', 'name': 'IMDb_rating', 'range': ['0', '10']} ('What is the average {quantity} in {time_1}?', <function avg_quan_time_single_range at 0x1472f9360>)


Success: 49, Error: 1:  98%|█████████▊| 49/50 [02:54<00:03,  3.56s/it]

{'identify': {'aggregate': 'avg', 'attribute': 'IMDb_rating'}, 'filter': [{'attr': 'time', 'op': '=', 'value': '2015'}]}





In [19]:
def strip(s):    
    o = json.loads(s)    
    o["sentence"] = o["sentence"].strip().split("\n")[-1]
    return json.dumps(o)

df = pd.read_csv(file_output)
df.input = df.input.apply(strip)
df.to_csv(file_output, index=False)