### Distill step by step finetuning approach - trying enhanced rationale with specific reasoning for date conversion

In [1]:
!sudo pip install -q transformers==4.35.0

In [2]:
# !sudo pip install vllm

In [2]:
import transformers
transformers.__version__

'4.35.0'

In [2]:
#!sudo pip install -q accelerate peft==0.4.0 bitsandbytes trl==0.4.7

In [3]:
import os
import torch
from datasets import load_dataset
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import pandas as pd
import torch

In [4]:
df = pd.read_csv('/data/mistral/query-to-mql/exp-11/training_data_1.csv')

In [5]:
df.columns

Index(['template_question', 'context', 'iMQL', 'New_Rationale'], dtype='object')

In [6]:
df.shape

(1881, 4)

In [49]:
del df
torch.cuda.empty_cache()

In [5]:
# The model that you want to train from the Hugging Face hub
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

In [10]:
################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

In [11]:
################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = True

In [12]:
################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "/data/mistral/query-to-mql/exp-11/dec-18"

# Number of training epochs
num_train_epochs = 15

# Enable fp16/bf16 training (set bf16 to True with an A100)
# fp16 = False
fp16 = True # not using quantisation
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 1

# Batch size per GPU for evaluation
per_device_eval_batch_size = 1

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True


# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule (constant a bit better than cosine)
lr_scheduler_type = "constant"

# Number of training steps (overrides num_train_epochs)
max_steps = 200

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 500

# Log every X updates steps
logging_steps = 40

In [13]:
################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [14]:
promt_template = """Given the context : {context}, the query: {user_query}, is converted into below shown structured output.
[MQL]
{mql}
[/MQL]
the steps and rationale used to achieve above structured output is as below.
{rationale}
"""

In [15]:
df.columns

Index(['template_question', 'context', 'iMQL', 'New_Rationale'], dtype='object')

In [16]:
def create_fine_tuning_dataset(row):
    mql = row['iMQL']
    user_query = row['template_question']
    context = row['context']
    rationale = row['New_Rationale']
    formated = promt_template.format(context=context,
                                             user_query=user_query,
                                             mql=mql,
                                             rationale=rationale)
    return formated

In [17]:
df['fine_tuning_dataset']=df.apply(create_fine_tuning_dataset, axis=1)

In [18]:
df['fine_tuning_dataset'][0]

'Given the context : {\'MEASURE\': [{\'ENTITY\': \'Profit\', \'other names\': [\'profits\']}], \'DIMENSION\': [], \'FILTER\': [], \'DERIVED MEASURE\': [], \'DATE VARIABLE\': [{\'ENTITY\': \'Order Date\', \'other names\': [\'trend\', \'yoy\', \'mom\', \'date\', \'order date\', \'when\', \'time\']}]}, the query: What is the profits in 1st quarter of last year, is converted into below shown structured output.\n[MQL]\n{\'MEASURE\': {\'profits\': [{\'ENTITY\': \'Profit\'}]}, \'DATE VARIABLE\': {\'1st quarter of last year\': [{\'ENTITY\': \'Order Date\', \'CONVERTED TIME ELEMENT\': \'quarter 1 last year\'}]}}\n[/MQL]\nthe steps and rationale used to achieve above structured output is as below.\nStep 1: Identify the components in the query\n- In the query "What is the profits in 1st quarter of last year", we can identify the following components:\n  - Measure: profits\n  - Date variable: 1st quarter of last year\n\nStep 2: Match the components to the context\n- Measure: The context provides a

In [19]:
df.drop(columns=['template_question', 'context', 'iMQL', 'New_Rationale'], inplace=True)
df.shape

(1881, 1)

In [20]:
train_dataset = Dataset.from_pandas(df)

In [21]:
train_dataset

Dataset({
    features: ['fine_tuning_dataset'],
    num_rows: 1881
})

In [22]:
train_dataset['fine_tuning_dataset'][0]

'Given the context : {\'MEASURE\': [{\'ENTITY\': \'Profit\', \'other names\': [\'profits\']}], \'DIMENSION\': [], \'FILTER\': [], \'DERIVED MEASURE\': [], \'DATE VARIABLE\': [{\'ENTITY\': \'Order Date\', \'other names\': [\'trend\', \'yoy\', \'mom\', \'date\', \'order date\', \'when\', \'time\']}]}, the query: What is the profits in 1st quarter of last year, is converted into below shown structured output.\n[MQL]\n{\'MEASURE\': {\'profits\': [{\'ENTITY\': \'Profit\'}]}, \'DATE VARIABLE\': {\'1st quarter of last year\': [{\'ENTITY\': \'Order Date\', \'CONVERTED TIME ELEMENT\': \'quarter 1 last year\'}]}}\n[/MQL]\nthe steps and rationale used to achieve above structured output is as below.\nStep 1: Identify the components in the query\n- In the query "What is the profits in 1st quarter of last year", we can identify the following components:\n  - Measure: profits\n  - Date variable: 1st quarter of last year\n\nStep 2: Match the components to the context\n- Measure: The context provides a

In [23]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [24]:
bnb_4bit_quant_type

'nf4'

In [25]:
compute_dtype

torch.float16

In [26]:
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

In [27]:
!sudo pip install -q pynvml

In [50]:
from pynvml.smi import nvidia_smi
nvsmi = nvidia_smi.getInstance()
nvsmi.DeviceQuery('memory.free, memory.total')

{'gpu': [{'fb_memory_usage': {'total': 16384.0,
    'free': 10006.9375,
    'unit': 'MiB'}}]}

In [29]:
#!df -H

In [30]:
torch.cuda.is_available()

True

In [31]:
# del model
# torch.cuda.empty_cache()

In [32]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,
                                          # add_eos_token=True,
                                          use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [58]:
max([len(tokenizer.encode(df['fine_tuning_dataset'][i])) for i in range(df.shape[0])])

2557

In [51]:
del tokenizer

In [34]:
# LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    target_modules = ["q_proj", "v_proj","k_proj"],
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [47]:
del training_arguments

In [35]:
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
#     eval_steps=50, # requires when eval_dataset is defined
#     per_device_eval_batch_size=1, # Batch size for evaluation
#     evaluation_strategy="steps", # requires when eval_dataset is defined
    logging_strategy="steps",
    logging_steps=1,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=20000,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard",
#     load_best_model_at_end=True,
#     save_total_limit=1,
#     metric_for_best_model="eval_loss",
#     greater_is_better=False
)

In [36]:
torch.cuda.empty_cache()

In [37]:
## Getting FLOPs of model

model_flops = (
  model.floating_point_ops(
    {
       "input_ids": torch.zeros(
           (1, 4096)
      )
    }
  )
  * training_arguments.gradient_accumulation_steps
)

#print(model)
print("Memory footprint", model.get_memory_footprint() / 1e9, "GB")
print("Flops", model_flops / 1e9, "GFLOPs")

Memory footprint 4.551360512 GB
Flops 174751.582519296 GFLOPs


In [38]:
train_dataset

Dataset({
    features: ['fine_tuning_dataset'],
    num_rows: 1881
})

In [39]:
#print(torch.cuda.memory_summary(device=None, abbreviated=False))

In [40]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
#     eval_dataset=val_dataset,
    peft_config=peft_config,
    dataset_text_field="fine_tuning_dataset",
    max_seq_length=4096,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)



Map:   0%|          | 0/1881 [00:00<?, ? examples/s]

In [41]:
output_dir

'/data/mistral/query-to-mql/exp-11/dec-18'

In [42]:
# Train model
trainer.train(resume_from_checkpoint="/data/mistral/query-to-mql/exp-11/dec-18/checkpoint-16500")

Step,Training Loss
16501,0.0353
16502,0.024
16503,0.0262
16504,0.0564
16505,0.0623
16506,0.0502
16507,0.0355
16508,0.0354
16509,0.0485
16510,0.0506


TrainOutput(global_step=20000, training_loss=0.006139943813672289, metrics={'train_runtime': 13936.2183, 'train_samples_per_second': 1.435, 'train_steps_per_second': 1.435, 'total_flos': 7.020588562579538e+17, 'train_loss': 0.006139943813672289, 'epoch': 10.63})

In [None]:
# Fine-tuned model name
#new_model_name = "mistral-ft-peft-v1-lr-64-with-more-data"

In [None]:
# Save trained model
trainer.model.save_pretrained(new_model)

In [59]:
trainer.model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): Linear4bit(
                in_features=4096, out_features=4096, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
              (v_proj)

In [8]:
from peft import PeftModel, PeftConfig

In [48]:
# del model
# del trainer
torch.cuda.empty_cache()

In [45]:
nvsmi = nvidia_smi.getInstance()
nvsmi.DeviceQuery('memory.free, memory.total')

{'gpu': [{'fb_memory_usage': {'total': 16384.0,
    'free': 10006.9375,
    'unit': 'MiB'}}]}

In [12]:
%load_ext tensorboard

In [13]:
%tensorboard --logdir /data//mistral/query-to-mql/exp-10/nov-20/runs

In [7]:
new_model_name = "/data/mistral/query-to-mql/exp-11/dec-18/checkpoint-20000"

In [8]:
!sudo pip install -q peft

In [9]:
# del model
torch.cuda.empty_cache()

from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(new_model_name, torch_dtype=torch.bfloat16, device_map="auto")
model = model.merge_and_unload()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [18]:
model.save_pretrained('/data/mistral/query-to-mql/exp-10/nov-20/merged-model')

In [10]:
tokenizer.save_pretrained('/data/mistral/query-to-mql/exp-10/nov-20/merged-model')

('/data/mistral/query-to-mql/exp-10/nov-20/merged-model/tokenizer_config.json',
 '/data/mistral/query-to-mql/exp-10/nov-20/merged-model/special_tokens_map.json',
 '/data/mistral/query-to-mql/exp-10/nov-20/merged-model/tokenizer.model',
 '/data/mistral/query-to-mql/exp-10/nov-20/merged-model/added_tokens.json')

In [13]:
import torch

In [10]:
# from transformers import pipeline
# pipe = pipeline(task="text-generation", model='/data/mistral/query-to-mql/exp-10/nov-20/merged-model', device_map='auto',
#                torch_dtype=torch.int8)

In [10]:
query_template_v1 = """Given the context : {context}, the query: {user_query}, is converted into below shown structured output.
[MQL]
"""

In [11]:
# loading testing data

df = pd.read_csv('sample_test_questions_re_pharsed.csv')

In [13]:
df.head(1)

Unnamed: 0,user_question,template_question,query_type,context,iMQL,New_Rationale,predicted_imql
0,Number of ship modes boosting sales over 3 mon...,How many ship mode contributing to growth of s...,howmany_contribute_to_growth,"{'MEASURE': [{'ENTITY': 'Sales', 'other names'...","{'MEASURE': {'sale': [{'ENTITY': 'Sales'}]}, '...",Step 1: Identify the components in the query\n...,"{'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}, ..."


In [17]:
df =  df.head(300)

In [18]:
from tqdm import tqdm

In [19]:
pred_mql = {}
for i, row in tqdm(df.iterrows()):
    user_query = row['user_question']
    context = row['context']
    
    inp = query_template_v1.format(context=context,
                                   user_query=user_query)
    _inputs = tokenizer.encode(inp, return_tensors="pt").to('cuda')
    outputs = model.generate(input_ids=_inputs, max_new_tokens = 200, pad_token_id=tokenizer.eos_token_id)
    output = tokenizer.decode(outputs[0])
    output_new = output.split('[MQL]\n')[1]
    pred_mql[user_query]=output_new.split('\n[/MQL]')[0]

300it [1:05:17, 13.06s/it]


In [20]:
new_df = pd.DataFrame(pred_mql.items(), columns=['query', 'predicted_mql'])
new_df.to_csv("predicted_mql_exp_11_rephrased_testing_query.csv", index=False)

In [3]:
import pandas as pd
new_df = pd.read_csv('predicted_mql_exp_11_rephrased_testing_query.csv')

In [4]:
new_df.columns

Index(['query', 'predicted_mql'], dtype='object')

In [5]:
df_gpt = pd.read_csv('300_rephrased_test_data.csv')

In [6]:
df_gpt.columns

Index(['user_question', 'template_question', 'query_type', 'context', 'iMQL',
       'New_Rationale', 'predicted_imql', 'context_phrased',
       'actual_iMQL_phrased'],
      dtype='object')

In [36]:
df_gpt.shape

(294, 9)

In [7]:
df_gpt.rename(columns={'user_question':'query'}, inplace=True)

In [8]:
df_combined = new_df.merge(df_gpt[['actual_iMQL_phrased','query']], on='query',how='left')

In [9]:
(df_combined['predicted_mql']==df_combined['actual_iMQL_phrased']).value_counts()

False    279
True      21
Name: count, dtype: int64

In [43]:
df_combined['is_same']=df_combined['predicted_mql']==df_combined['actual_iMQL_phrased']

df_combined_false = df_combined[df_combined['is_same']==False]


for i, row in df_combined_false.iterrows():
    print("query is --", row['query'])
    print('----------Actual GPT MQL----------')
    print(row['actual_iMQL_phrased'])
    print('----------Predicted MQL----------')
    print(row['predicted_mql'])
    print('\n')

query is -- Number of ship modes boosting sales over 3 months?
----------Actual GPT MQL----------
{'MEASURE': {'sales': [{'ENTITY': 'Sales', 'ADJECTIVE': ['boosting'], 'TONE': 'positive'}]}, 'DIMENSION': {'ship modes': [{'ENTITY': 'Ship Mode'}]}, 'DATE VARIABLE': {'3 months': [{'ENTITY': 'Order Date', 'CONVERTED TIME ELEMENT': 'last 3 months'}]}}
----------Predicted MQL----------
{'MEASURE': {'sale': [{'ENTITY': 'Sales'}]}, 'DIMENSION': {'ship mode': [{'ENTITY': 'Ship Mode'}]}, 'DERIVED MEASURE': {'contributing to growth': [{'ENTITY': 'contribution_to_growth', 'APPLIED MEASURE': [{'sale': 'Sales'}], 'ADJECTIVE': ['over 3 months']}]}, 'DATE VARIABLE': {'3 months': [{'ENTITY': 'Order Date', 'CONVERTED TIME ELEMENT': 'last 3 months'}]}}


query is -- When did discount % first exceed 20k with sales under 100k in the past 10 months?
----------Actual GPT MQL----------
{'MEASURE': {'discount %': [{'ENTITY': 'Discount percentage', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '20k', 'COMPARISON 

In [15]:
df_combined.to_csv('combined_df.csv')

In [14]:
df_combined['predicted_mql'][0]

"{'MEASURE': {'sale': [{'ENTITY': 'Sales'}]}, 'DIMENSION': {'ship mode': [{'ENTITY': 'Ship Mode'}]}, 'DERIVED MEASURE': {'contributing to growth': [{'ENTITY': 'contribution_to_growth', 'APPLIED MEASURE': [{'sale': 'Sales'}], 'ADJECTIVE': ['over 3 months']}]}, 'DATE VARIABLE': {'3 months': [{'ENTITY': 'Order Date', 'CONVERTED TIME ELEMENT': 'last 3 months'}]}}"

In [1]:
from imqlKPI import KPIs, display_over_all_KPIs

In [None]:
df_combined["INDEX"] = df_combined.index

In [11]:
df_combined["KPI_on_rephrased_question"]=df_combined.apply(lambda row: KPIs(predicted=row['predicted_mql'], actual=row['actual_iMQL_phrased'],index=row["INDEX"]), axis=1)

kpi_measure=display_over_all_KPIs(KPI_df=df_combined,
                                  KPI_col_name="KPI_on_rephrased_question",
                                  field="MEASURE")

Invalid json Syntax at index 23


ValueError: malformed node or string: <_ast.Name object at 0x7effbc17f790>

In [18]:
pred_mql = {}
for i, row in tqdm(df.iterrows()):
    user_query = row['template_question']
    context = row['context']
    
    inp = query_template_v1.format(context=context,
                                   user_query=user_query)
    _inputs = tokenizer.encode(inp, return_tensors="pt").to('cuda')
    outputs = model.generate(input_ids=_inputs, max_new_tokens = 200, pad_token_id=tokenizer.eos_token_id)
    output = tokenizer.decode(outputs[0])
    output_new = output.split('[MQL]\n')[1]
    pred_mql[user_query]=output_new.split('\n[/MQL]')[0]

1000it [3:26:29, 12.39s/it]


In [19]:
new_df = pd.DataFrame(pred_mql.items(), columns=['query', 'predicted_mql'])
new_df.to_csv("predicted_mql_exp_11.csv", index=False)

In [29]:
df.columns

Index(['template_question', 'context', 'iMQL', 'New_Rationale'], dtype='object')

In [33]:
new_df.rename(columns={'query':'template_question'}, inplace=True)

In [35]:
df2 = new_df.merge(df[['iMQL','template_question']], on='template_question',how='left')

In [34]:
new_df.columns

Index(['template_question', 'predicted_mql'], dtype='object')

In [42]:
# training data
(df2['predicted_mql']==df2['iMQL']).value_counts()

True     754
False    246
Name: count, dtype: int64

In [43]:
df2.to_csv("predicted_mql_exp_11.csv", index=False)

In [44]:
df2['is_same']=df2['predicted_mql']==df2['iMQL']

In [46]:
df2_false = df2[df2['is_same']==False]

In [None]:
df2_false

for i, row in df2_false.iterrows():
    print("query is --", row['template_question'])
    print('----------Actual MQL----------')
    print(row['iMQL'])
    print('----------Predicted MQL----------')
    print(row['predicted_mql'])
    print('\n')

query is -- What is the profits and sales by product name across 2 months
----------Actual MQL----------
{'MEASURE': {'profits': [{'ENTITY': 'Profit'}], 'sales': [{'ENTITY': 'Sales'}]}, 'DIMENSION': {'product name': [{'ENTITY': 'Product Name'}]}, 'DATE VARIABLE': {'2 months': [{'ENTITY': 'Order Date', 'CONVERTED TIME ELEMENT': 'last 2 months'}]}}
----------Predicted MQL----------
{'MEASURE': {'profits': [{'ENTITY': 'Profit'}], 'sales': [{'ENTITY': 'Sales'}]}, 'DIMENSION': {'product name': [{'ENTITY': 'Product Name'}]}, 'DATE VARIABLE': {'across 2 months': [{'ENTITY': 'Order Date', 'CONVERTED TIME ELEMENT': 'last 2 months'}]}}


query is -- What is the profits of product name except slovenia across 2 months
----------Actual MQL----------
{'MEASURE': {'profits': [{'ENTITY': 'Profit'}]}, 'DIMENSION': {'product name': [{'ENTITY': 'Product Name'}]}, 'DATE VARIABLE': {'2 months': [{'ENTITY': 'Order Date', 'CONVERTED TIME ELEMENT': 'last 2 months'}]}}
----------Predicted MQL----------
{'MEASU

In [9]:
user_query = 'sales in nov 2020 to may 2021 vs Q3 2021'
inp = query_template_v1.format(context=context,
                                   user_query=user_query,
                                  date_input=date_input)

In [10]:
inp

'Given the context : {\n    "MEASURE": [{"ENTITY": "Discount", "other names": ["discount", "discount rate", "discount value", "deduction"]},\n                {"ENTITY": "Purchase Vol", "other names": ["purchase", "purchase value", "purchase model"]},\n                {"ENTITY": "Quantity", "other names": ["quantity", "volume"]},\n                {"ENTITY": "Sales", "other names": ["sales", "sale"]}],\n    "DIMENSION": [{"ENTITY": "Sub-Category", "other names": ["sub-category", "sub category", "categories", "section"]},\n                  {"ENTITY": "Segment", "other names": ["segment", "segments", "units", "divisions"]},\n                  {"ENTITY": "Parts", "other names": ["parts", "part", "section", "divisions"]},\n                  {"ENTITY": "Country", "other names": ["country", "countries"]}],\n    "FILTER": [{"ENTITY": "Consumer", "other names": ["consumers", "consumer"], "parent": "Segment"},\n               {"ENTITY": "Phone", "other names": ["phone", "phones", "mobile phones"

In [None]:
%%time
user_query = 'sales in nov 2020 to may 2021 vs Q3 2021'
inp = query_template_v1.format(context=context,
                                   user_query=user_query,
                                  date_input=date_input)
pipe(inp, max_new_tokens=1000)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [20]:
model.to('cuda')

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRM

In [51]:
# date_input = {
#     "start_date": "01/06/2020",
#     "end_date": "15/07/2026"
# }

In [16]:
def predict_template_query_v1(user_query):
    inp = query_template_v1.format(context=context,
                                   user_query=user_query,
                                  date_input=date_input)
    _inputs = tokenizer.encode(inp, return_tensors="pt").to('cuda')
    outputs = model.generate(input_ids=_inputs, max_new_tokens = 200, pad_token_id=tokenizer.eos_token_id)
    output = tokenizer.decode(outputs[0])
    output_new = output.split('[MQL]\n')[1]
    return output_new.split('\n[/MQL]')[0], output
#     return output

In [12]:
import time

In [30]:
questions = pd.read_csv("test-questions.csv")

In [31]:
for i, row in questions:
    q = row['questions']
    

Unnamed: 0,questions
0,discount rate of phone and binders
1,discount rate of overall sub-category in corpo...
2,maximum sales of phone for consumer segment
3,forecast of sales
4,quantity across segments except consumer
5,which segment has highest purchase
6,top 2 segments basis discount
7,which category contributing to 5% of discount
8,monthly sales of segments in 2021
9,show me the 2 top segments basis sales


In [13]:
start = time.time()
user_query = 'what is purchase across segments'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print("time taken : ", time.time()-start)
print(eval(output))
# print('-'*100)
# print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is purchase across segments
----------------------------------------------------------------------------------------------------
time taken :  16.96851134300232
{'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': '', 'RANK VALUE': ''}]}]}, 'FILTER': {}, 'DERIVED MEASURE': {}, 'DATE VARIABLE': {'purchase': [{'ENTITY': 'Order Date', 'CONVERTED TIME ELEMENT': 'purchase'}]}}


In [14]:
import deepspeed

In [15]:
ds_engine = deepspeed.init_inference(model)
model = ds_engine.module

[2023-12-11 04:47:56,249] [INFO] [logging.py:96:log_dist] [Rank -1] DeepSpeed info: version=0.12.4, git-hash=unknown, git-branch=unknown
[2023-12-11 04:47:56,250] [INFO] [logging.py:96:log_dist] [Rank -1] quantize_bits = 8 mlp_extra_grouping = False, quantize_groups = 1


In [18]:
start = time.time()
user_query = 'what is purchase across segments'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print("time taken : ", time.time()-start)
print(eval(output))

user query:  what is purchase across segments
----------------------------------------------------------------------------------------------------
time taken :  12.465290069580078
{'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': '', 'RANK VALUE': ''}]}]}, 'FILTER': {}, 'DERIVED MEASURE': {}, 'DATE VARIABLE': {'purchase': [{'ENTITY': 'Order Date', 'CONVERTED TIME ELEMENT': 'purchase'}]}}


In [33]:
output

"{'DATE VARIABLE': {'Q3 2021': [{'CONVERTED TIME ELEMENT': 'Q3 2021', 'DATE RANGE': '2021/07/01 - 2021/09/30', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}"

In [21]:
%%time
user_query = 'sales in last week'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in last week
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last week': [{'CONVERTED TIME ELEMENT': 'last week', 'DATE RANGE': '2023/09/08 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales" which is a measure.
- The query specifies a time period "last week".

Step 2: Match the components to the context
- The measure "sales" can be matched to the entity "Sales" in the context.
- The time period "last week" needs to be converted to a date range using the date reference.

Step 3: Convert the time period to a date range
- The date reference provides a start date of '01/01/2020' and an end date of '15/09/2023'.
- The last week refers to the 7 days before the end date.
- Subtract 7 days from t

In [20]:
%%time
user_query = 'sales in 2022, second quarter'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in 2022, second quarter
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'second quarter': [{'CONVERTED TIME ELEMENT': ' quarter 2, 2022', 'DATE RANGE': '2022/04/01 - 2022/06/31', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "2022" which is a date component.

Step 2: Match the components to the context
- In the context, "sales" can be matched to the "Sales" entity under the "MEASURE" category.
- The date component "2022" can be matched to the "Order Date" entity under the "DATE VARIABLE" category.

Step 3: Convert the date component
- The query mentions "second quarter" which is a date component.
- The date reference provided has a start_date of '01/01/2

In [19]:
%%time
user_query = 'sales in 2022, fourth quarter'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in 2022, fourth quarter
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'fourth quarter': [{'CONVERTED TIME ELEMENT': ' quarter 4, 2022', 'DATE RANGE': '2022/10/01 - 2022/12/31', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "2022" which is a date component.
- The query mentions "fourth quarter" which is a date component related to the year 2022.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "2022" can be matched to the "Order Date" entity in the context under DATE VARIABLE.
- "fourth quarter" can be matched to the "Order Date" entity in the context under DATE VARIABLE.

Step 3

In [18]:
%%time
user_query = 'sales in quarter 4 of year 2021'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in quarter 4 of year 2021
----------------------------------------------------------------------------------------------------


  next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)


{'DATE VARIABLE': {'quarter 4 of year 2021': [{'CONVERTED TIME ELEMENT': ' quarter 4 of year 2021', 'DATE RANGE': '2021/10/01 - 2021/12/31', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "quarter 4 of year 2021" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "quarter 4 of year 2021" can be matched to the "Order Date" entity in the context under DATE VARIABLE.

Step 3: Convert the date component
- The query mentions "quarter 4 of year 2021". In a year, there are 4 quarters, each consisting of 3 months.
- Quarter 4 refers to the months October, November, and December.
- Since the year is 2021, quarter 4 corresponds to the date range "2021/10/01 - 20

In [67]:
%%time
user_query = 'quantity in 21st Jun 2023'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity in 21st Jun 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'21st Jun 2023': [{'CONVERTED TIME ELEMENT': '21st june, 2023', 'DATE RANGE': '2023/06/21 - 2023/06/21', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "quantity" on "21st Jun 2023".

Step 2: Match the components to the context
- "quantity" can be matched to the "Quantity" entity in the MEASURE section of the context.
- "21st Jun 2023" is a date component that needs to be converted to a date range.

Step 3: Convert the date component
- The query asks for quantity on "21st Jun 2023". Since it's asking for quantity on a specific date, the date range will be the same date.
- In this case, the date range is "2023/06/21 - 2023/06/21".

Step

In [66]:
%%time
user_query = 'quantity in 12.08.2024'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity in 12.08.2024
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'12.08.2024': [{'CONVERTED TIME ELEMENT': '12 august, 2024', 'DATE RANGE': '2024/08/12 - 2024/08/12', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "quantity" on a specific date "12.08.2024".

Step 2: Match the components to the context
- The "quantity" component can be matched to the "Quantity" entity in the MEASURE section of the context.
- The "12.08.2024" component is a date and can be matched to the "Order Date" entity in the DATE VARIABLE section of the context.

Step 3: Convert the date component
- The date "12.08.2024" is given in the format "DD.MM.YYYY". We need to convert it to a more standard format "YYYY/MM/DD".
- The date "1

In [69]:
%%time
user_query = 'quantity in rolling year'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity in rolling year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'rolling year': [{'CONVERTED TIME ELEMENT': 'year', 'DATE RANGE': '2025/07/16 - 2026/07/15', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "quantity" which is a measure.
- The query mentions "rolling year" which is a date variable.

Step 2: Match the components to the context
- The measure "quantity" can be matched to the entity "Quantity" in the context.
- The date variable "rolling year" needs to be converted to a date range using the date reference.

Step 3: Convert the date variable "rolling year" to a date range
- The date reference provides a start date of '01/06/2020' and an end date of '15/07/2026'.
- A rolling year refers to t

In [70]:
%%time
user_query = 'quantity in previous month'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity in previous month
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'previous month': [{'CONVERTED TIME ELEMENT': ' previous month', 'DATE RANGE': '2026/06/01 - 2026/06/31', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "quantity" which is a measure.
- The query specifies a time period "previous month".

Step 2: Match the components to the context
- The measure "quantity" can be matched to the entity "Quantity" in the context.
- The time period "previous month" needs to be converted to a date range using the date reference.

Step 3: Convert the time period to a date range
- The date reference provides the start date as '01/06/2020' and the end date as '15/07/2026'.
- The previous month of the end dat

In [72]:
%%time
user_query = 'what is sales of phone in past 6 months'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in past 6 months
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'past 6 months': [{'CONVERTED TIME ELEMENT': 'past 6 months', 'DATE RANGE': '2026/01/15 - 2026/07/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query asks for "sales of phone in the past 6 months."
- Components identified: sales, phone, and past 6 months.

Step 2: Match components to the context
- Sales: Found in the MEASURE section of the context as "Sales."
- Phone: Found in the FILTER section of the context as "Phone" under the parent "Sub-Category."
- Past 6 months: This is a date component that needs to be converted using the date reference.

Step 3: Convert date com

In [58]:
%%time
user_query = 'what will be sales of phone in Q1 2023'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what will be sales of phone in Q1 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'Q1 2023': [{'CONVERTED TIME ELEMENT': 'quarter 1 2023', 'DATE RANGE': '2023/01/01 - 2023/03/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query asks for "sales of phone in Q1 2023". We can identify the following components:
  - Measure: Sales
  - Filter: Phone (Sub-Category)
  - Date Variable: Q1 2023

Step 2: Match components to the context
- Measure: Sales is present in the context as {'ENTITY': 'Sales', 'other names': ['sales', 'sale']}
- Filter: Phone is present in the context as {'ENTITY': 'Phone', 'other names': ['phone', 'phones', 'mobile phones'], 'parent': 'Sub-C

In [41]:
%%time
user_query = 'what will be sales of phone in 01.21'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what will be sales of phone in 01.21
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'01.21': [{'CONVERTED TIME ELEMENT': 'january 21, 2023', 'DATE RANGE': '2023/01/01 - 2023/01/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 01.21". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: 01.21 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 'Sub-

In [52]:
%%time
user_query = 'quantity of binders in last 15 months'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in last 15 months
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 15 months': [{'CONVERTED TIME ELEMENT': 'last 15 months', 'DATE RANGE': '2022/03/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "quantity" which is a measure.
- The query mentions "binders" which is a filter related to the sub-category.
- The query mentions "last 15 months" which is a date variable.

Step 2: Match the components to the context
- "quantity" can be matched to the "Quantity" entity in the context under MEASURE.
- "binders" can be matched to the "Binder" entity in the context under FILTER.
- "last 15 months" can be 

In [50]:
%%time
user_query = 'quantity of binders in last 3 years'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in last 3 years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 3 years': [{'CONVERTED TIME ELEMENT': 'last 3 years', 'DATE RANGE': '2020/03/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "quantity of binders" which refers to the measure "Quantity" and the filter "Binder" from the context.
- The query also mentions "last 3 years" which is a date component.

Step 2: Match the components to the context
- "quantity" is matched to the "Quantity" entity under the "MEASURE" category in the context.
- "binders" is matched to the "Binder" entity under the "FILTER" category in the context.

Step 3: Con

In [49]:
%%time
user_query = 'quantity of binders in last 2.5 years'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in last 2.5 years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 2.5 years': [{'CONVERTED TIME ELEMENT': 'last 2.5 years', 'DATE RANGE': '2021/03/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "quantity" which is a measure.
- The query mentions "binders" which is a filter related to the sub-category.
- The query mentions "last 2.5 years" which is a date variable.

Step 2: Match the components to the context
- "quantity" can be matched to the "Quantity" entity in the context under MEASURE.
- "binders" can be matched to the "Binder" entity in the context under FILTER.
- "last 2.5 years" can be 

In [48]:
%%time
user_query = 'quantity of binders in last two and half years'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in last two and half years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last two and half years': [{'CONVERTED TIME ELEMENT': 'last two and half years', 'DATE RANGE': '2021/03/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "quantity" which is a measure.
- The query mentions "binders" which is a filter related to the sub-category.
- The query mentions "last two and half years" which is a date variable.

Step 2: Match the components to the context
- "quantity" can be matched to the "Quantity" entity in the context under MEASURE.
- "binders" can be matched to the "Binder" entity in the context unde

In [47]:
%%time
user_query = 'quantity of binders in last 30 months'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in last 30 months
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 30 months': [{'CONVERTED TIME ELEMENT': 'last 30 months', 'DATE RANGE': '2021/03/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "quantity" which is a measure.
- The query mentions "binders" which is a filter related to the sub-category.
- The query mentions "last 30 months" which is a date variable.

Step 2: Match the components to the context
- "quantity" can be matched to the "Quantity" entity in the context under MEASURE.
- "binders" can be matched to the "Binder" entity in the context under FILTER.
- "last 30 months" can be 

In [46]:
%%time
user_query = 'quantity of binders in last 24 months'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in last 24 months
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 24 months': [{'CONVERTED TIME ELEMENT': 'last 24 months', 'DATE RANGE': '2021/09/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "quantity" which is a measure.
- The query mentions "binders" which is a filter related to the sub-category.
- The query mentions "last 24 months" which is a date variable.

Step 2: Match the components to the context
- "quantity" can be matched to the "Quantity" entity in the context under MEASURE.
- "binders" can be matched to the "Binder" entity in the context under FILTER.
- "last 24 months" can be 

In [45]:
%%time
user_query = 'quantity of binders in last 18 months'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in last 18 months
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 18 months': [{'CONVERTED TIME ELEMENT': 'last 18 months', 'DATE RANGE': '2022/03/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "quantity" which is a measure.
- The query mentions "binders" which is a filter related to the sub-category.
- The query mentions "last 18 months" which is a date variable.

Step 2: Match the components to the context
- "quantity" can be matched to the "Quantity" entity in the context under MEASURE.
- "binders" can be matched to the "Binder" entity in the context under FILTER.
- "last 18 months" can be 

In [44]:
%%time
user_query = 'quantity of binders in last 2 years'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in last 2 years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 2 years': [{'CONVERTED TIME ELEMENT': 'last 2 years', 'DATE RANGE': '2021/09/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "quantity of binders" which refers to the measure "Quantity" and the filter "Binder" from the context.
- The query also mentions "last 2 years" which is a date component.

Step 2: Match the components to the context
- "quantity" is matched to the "Quantity" entity under the "MEASURE" category in the context.
- "binders" is matched to the "Binder" entity under the "FILTER" category in the context.

Step 3: Con

In [43]:
%%time
user_query = 'sales in previous year'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in previous year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'previous year': [{'CONVERTED TIME ELEMENT': 'previous year', 'DATE RANGE': '2022/01/01 - 2022/12/31', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "previous year" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "previous year" can be matched to the "Order Date" entity in the context under DATE VARIABLE.

Step 3: Convert the date component
- The date reference provided has a start_date of '01/01/2020' and an end_date of '15/09/2023'.
- Since the query asks for sales in the "previous year",

In [35]:
%%time
user_query = 'what will be sales of phone in 03.22'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what will be sales of phone in 03.22
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'03.22': [{'CONVERTED TIME ELEMENT': 'March 2022', 'DATE RANGE': '2022/03/01 - 2022/03/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 03.22". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: 03.22 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 'Sub-Catego

In [25]:
%%time
user_query = 'what will be sales of phone in 07/22'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what will be sales of phone in 07/22
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'07/22': [{'CONVERTED TIME ELEMENT': 'July 2022', 'DATE RANGE': '2022/07/01 - 2022/07/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 07/22". We can identify the following components:
  - Measure: Sales
  - Filter: Phone (Sub-Category)
  - Date Variable: 07/22 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 'Sub-Category'.
- 

In [36]:
%%time
user_query = 'sales of phone in 06/2024'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of phone in 06/2024
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'06/2024': [{'CONVERTED TIME ELEMENT': 'June 2024', 'DATE RANGE': '2024/06/01 - 2024/06/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "phone" which is a filter under the sub-category.
- The query mentions "06/2024" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "phone" can be matched to the "Phone" entity in the context under FILTER with parent "Sub-Category".
- "06/2024" can be matched to the "Order Date" entity in t

In [37]:
%%time
user_query = 'what will be sales of phone in 04/24'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what will be sales of phone in 04/24
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'04/24': [{'CONVERTED TIME ELEMENT': 'april, 2024', 'DATE RANGE': '2024/04/01 - 2024/04/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 04/24". We can identify the following components:
  - Measure: Sales
  - Filter: Phone (Sub-Category)
  - Date Variable: 04/24 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 'Sub-Category'.


In [38]:
%%time
user_query = 'show me sales of phone in 05/23'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  show me sales of phone in 05/23
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'05/23': [{'CONVERTED TIME ELEMENT': 'may, 2023', 'DATE RANGE': '2023/05/01 - 2023/05/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 05/23". We can identify the following components:
  - Measure: Sales
  - Filter: Phone (Sub-Category)
  - Date Variable: 05/23 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 'Sub-Category'.
- Date 

In [39]:
%%time
user_query = 'show me the bottom 10 segments basis sales'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  show me the bottom 10 segments basis sales
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'bottom', 'RANK VALUE': '10'}]}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- "bottom 10 segments" - Rank adjective "bottom" and rank value "10"
- "basis sales" - Measure "Sales"

Step 2: Match the components to the context
- "bottom 10 segments" - Matches with "Segment" in the context under "DIMENSION" with the rank adjective "bottom" and rank value "10"
- "basis sales" - Matches with "Sales" in the context under "MEASURE"

Step 3: Convert the components into structured output
- "bottom 10 segments" - Create a "DIMENSION" object with "Segment" as the ENTITY and a "RANK" object containing the rank adjective 

In [23]:
%%time
user_query = 'top 2 and bottom 3 segments by sales'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  top 2 and bottom 3 segments by sales
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'top', 'RANK VALUE': '2'}, {'RANK ADJECTIVE': 'bottom', 'RANK VALUE': '3'}]}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- "top 2": Rank adjective and rank value
- "bottom 3": Rank adjective and rank value
- "segments": Dimension (matches with 'Segment' in the context)
- "by sales": Measure (matches with 'Sales' in the context)

Step 2: Match the components to the context
- "segments" matches with 'Segment' in the context under 'DIMENSION'
- "sales" matches with 'Sales' in the context under 'MEASURE'

Step 3: Convert the query into structured output
- Create a 'DIMENSION' key and add 'segments' as an array with 'S

In [24]:
%%time
user_query = 'top 2 segments and bottom 3 sub-category basis quantity'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  top 2 segments and bottom 3 sub-category basis quantity
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'top', 'RANK VALUE': '2'}]}], 'sub-category': [{'ENTITY': 'Sub-Category', 'RANK': [{'RANK ADJECTIVE': 'bottom', 'RANK VALUE': '3'}]}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- "top 2 segments": Rank adjective "top", rank value "2", and entity "segments"
- "bottom 3 sub-category": Rank adjective "bottom", rank value "3", and entity "sub-category"
- "basis quantity": Measure "quantity"

Step 2: Match the components to the context
- "segments" matches the entity "Segment" in the context with other names ['segment', 'segments', 'units', 'divisions'].
- "sub-category" matches the entity "Sub

In [25]:
%%time
user_query = 'quantity across segments except consumer and corporate in dubai'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity across segments except consumer and corporate in dubai
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'FILTER': {'consumer': [{'ENTITY': 'Consumer', 'EXCLUDE': 'True', 'PARENT': 'Segment'}], 'corporate': [{'ENTITY': 'Corporate', 'EXCLUDE': 'True', 'PARENT': 'Segment'}], 'dubai': [{'ENTITY': 'Dubai', 'PARENT': 'Country'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- Quantity: Measure
- Segments: Dimension
- Consumer: Filter (to be excluded)
- Corporate: Filter (to be excluded)
- Dubai: Filter

Step 2: Match the components to the context
- Quantity: Matches with 'Quantity' in the context
- Segments: Matches with 'Segment' in the context
- Consumer: Matches with 'Consumer' in the context (parent: Segment)
- C

In [26]:
%%time
user_query = 'in corporate share of phone and binder basis discount'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  in corporate share of phone and binder basis discount
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'share': [{'APPLIED MEASURE': [{'discount': 'Discount'}], 'ENTITY': 'Ratio', 'RATIO FILTER': ['Phone', 'Binder']}]}, 'FILTER': {'binder': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}], 'corporate': [{'ENTITY': 'Corporate', 'PARENT': 'Segment'}], 'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- Corporate: Segment
- Share: Derived Measure (Ratio)
- Phone and Binder: Sub-Category
- Discount: Measure

Step 2: Match the components to the context
- Corporate: {'ENTITY': 'Corporate', 'other names': ['corporates', 'corporate'], 'parent': 'Segment'}
- Share: {'ENTITY': 'Ratio', 'other names': ['ratio'

In [27]:
%%time
user_query = 'list of under performing segments'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  list of under performing segments
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ADJECTIVE': ['under performing'], 'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'bottom', 'RANK VALUE': '1'}], 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the main components of the query
- The query asks for a "list of under performing segments."

Step 2: Match the components to the context
- In the context, we have a 'DIMENSION' with an 'ENTITY' called 'Segment' and its other names as ['segment', 'segments', 'units', 'divisions'].

Step 3: Identify the adjectives and tone
- The query has an adjective "under performing" which indicates a negative tone.

Step 4: Identify the ranking
- The query asks for a list, which implies a ranking. Since it's asking for under performing segments, we can assume it's looking fo

In [40]:
%%time
user_query = 'when was the first time sales of segments was 0'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  when was the first time sales of segments was 0
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'when': [{'CONVERTED TIME ELEMENT': 'when', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '0', 'COMPARSION OPERATOR': '='}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "when" which indicates a date variable.
- The query mentions "sales" which is a measure.
- The query mentions "segments" which is a dimension.
- The query specifies a condition for sales to be "0".

Step 2: Match the components to the context
- "when" can be matched to the "Order Date" entity in the DATE VARIABLE context.
- "sales" can be matched to the "Sales" entity in the MEASURE context.
- "segment

In [29]:
%%time
user_query = 'sales of segments from beginning'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segments from beginning
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'beginning': [{'CONVERTED TIME ELEMENT': 'beginning', 'DATE RANGE': '01/01/2020 - 15/09/2023', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which can be matched to the "Sales" entity in the MEASURE context.
- The query mentions "segments" which can be matched to the "Segment" entity in the DIMENSION context.
- The query mentions "from beginning" which indicates a date range starting from the earliest date available.

Step 2: Match the components to the context
- "sales" is matched to the "Sales" entity in the MEASURE context.
- "segments" is matched to the "Segment" entity 

In [30]:
%%time
user_query = 'sales in 20/01/2020'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in 20/01/2020
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'20/01/2020': [{'CONVERTED TIME ELEMENT': 'january 20, 2020', 'DATE RANGE': '2020/01/20 - 2020/01/20', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "20/01/2020" which is a date component.

Step 2: Match the components to the context
- In the context, "sales" can be matched to the "Sales" entity under the "MEASURE" category.
- The date component "20/01/2020" needs to be converted to a proper date range.

Step 3: Convert the date component
- The date component "20/01/2020" refers to January 20, 2020.
- To convert it to a date range, we can use the format "YYYY/MM/DD". So, the date range for thi

In [31]:
%%time
user_query = 'sales in last one and half years'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in last one and half years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last one and half years': [{'CONVERTED TIME ELEMENT': 'last one and half years', 'DATE RANGE': '2022/03/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "last one and half years" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "last one and half years" can be matched to the "Order Date" entity in the context under DATE VARIABLE.

Step 3: Convert the date component
- The date component "last one and half years" needs to be converted into a date range.
- To do this, 

In [32]:
%%time
user_query = 'trend of sales in dubai now'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  trend of sales in dubai now
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'trend': [{'CONVERTED TIME ELEMENT': 'now', 'DATE RANGE': '2023/09/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'dubai': [{'ENTITY': 'Dubai', 'PARENT': 'Country'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "trend of sales" which refers to the measure 'Sales' and the date variable 'Order Date'.
- The query also mentions "in Dubai" which refers to the filter 'Dubai'.

Step 2: Match components to the context
- 'Sales' can be matched to the measure 'Sales' in the context.
- 'Order Date' can be matched to the date variable 'Order Date' in the context.
- 'Dubai' can be matched to the filter 'Dubai' in the context.

Step 3: Convert date components
- The query

In [33]:
%%time
user_query = 'sales in the week of 01 March 2022'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in the week of 01 March 2022
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'week of 01 March 2022': [{'CONVERTED TIME ELEMENT': 'week of 1 March 2022', 'DATE RANGE': '2022/02/28 - 2022/03/06', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "week of 01 March 2022" which is a date component.

Step 2: Match components to the context
- In the context, "sales" can be matched to the "Sales" entity under the "MEASURE" category.
- The date component "week of 01 March 2022" needs to be converted to a date range.

Step 3: Convert date component
- The date component "week of 01 March 2022" refers to the week starting from 1st March 2022. 
- To find the date range for 

In [34]:
%%time
user_query = 'top most selling segments in 2020 vs 2021'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  top most selling segments in 2020 vs 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2020 vs 2021': [{'CONVERTED TIME ELEMENT': '2020 vs 2021', 'DATE RANGE': '2020/01/01 - 2020/12/31, 2021/01/01 - 2021/12/31', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'top', 'RANK VALUE': '1'}]}]}, 'MEASURE': {'selling': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- "top most selling": Rank adjective "top" and Measure "Sales"
- "segments": Dimension "Segment"
- "2020 vs 2021": Date Variable "Order Date" with a comparison between two time periods

Step 2: Match the components to the context
- "top most selling" matches with the Measure "Sales"
- "segments" matches with the Dimension "Segment"
- "2020 vs 2021" matches with the Dat

In [35]:
%%time
user_query = 'sales from 100 to 200'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales from 100 to 200
----------------------------------------------------------------------------------------------------
{'MEASURE': {'sales': [{'ENTITY': 'Sales', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '100', 'COMPARSION OPERATOR': '>='}, {'COMPARISON VALUE': '200', 'COMPARSION OPERATOR': '<='}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions a range "from 100 to 200" which is a constraint on the sales measure.

Step 2: Match the components to the context
- In the context, we can find the "Sales" entity under the 'MEASURE' category with other names like 'sales' and 'sale'.
- There is no date component mentioned in the query, so the date reference is not utilized here.

Step 3: Convert the query into a structured output
- Since the query is about sales and has a range from 100 to 200, we can 

In [36]:
%%time
user_query = 'sales from 2021 to 2023'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales from 2021 to 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'sales': [{'CONVERTED TIME ELEMENT': 'from 2021 to 2023', 'DATE RANGE': '2021/01/01 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions a date range "from 2021 to 2023" which is a date variable.

Step 2: Match the components to the context
- In the context, "sales" can be matched to the "Sales" entity under the "MEASURE" category.
- The date range "from 2021 to 2023" needs to be converted to a date range.

Step 3: Convert the date range
- The date range "from 2021 to 2023" indicates that we want to include all dates between 2021-01-01 and 2023-09-15.
- The date reference provided 

In [37]:
%%time
user_query = 'what is purchase across segments'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is purchase across segments
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': '', 'RANK VALUE': ''}]}]}, 'MEASURE': {'purchase': [{'ENTITY': 'Purchase Vol', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "purchase" which can be matched to the context as "Purchase Vol".
- The query mentions "across segments" which can be matched to the context as "Segment".

Step 2: Match the components to the context
- "Purchase Vol" is a measure in the context, so it will be placed under the "MEASURE" section in the structured output.
- "Segment" is a dimension in the context, so it will be placed under the "DIMENSION" section in the structured output.

Step 3:

In [38]:
%%time
user_query = 'discount rate of phone and binders'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount rate of phone and binders
----------------------------------------------------------------------------------------------------
{'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}], 'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount rate': [{'ENTITY': 'Discount', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "discount rate" which is a measure.
- The query also mentions "phone" and "binders" which are filters.

Step 2: Match components to the context
- "discount rate" can be matched to the "Discount" entity in the context.
- "phone" can be matched to the "Phone" entity in the context, which has a parent "Sub-Category".
- "binders" can be matched to the "Binder" entity in the context, which also has a parent "Sub-Category".



In [39]:
%%time
user_query = 'discount rate of overall sub-category in corporate'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount rate of overall sub-category in corporate
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'sub-category': [{'ENTITY': 'Sub-Category', 'RANK': [{'RANK ADJECTIVE': '', 'RANK VALUE': ''}]}]}, 'FILTER': {'corporate': [{'ENTITY': 'Corporate', 'PARENT': 'Segment'}]}, 'MEASURE': {'discount rate': [{'ENTITY': 'Discount', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount rate" which is a measure.
- It also mentions "overall sub-category" which is a dimension.
- The query specifies "corporate" as a filter.

Step 2: Match the components to the context
- "discount rate" can be matched to the "Discount" entity in the context.
- "overall sub-category" can be matched to the "Sub-Category" entity in the contex

In [40]:
%%time
user_query = 'maximum sales of phone for consumer segment'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  maximum sales of phone for consumer segment
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segment': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': '', 'RANK VALUE': ''}]}]}, 'FILTER': {'consumer': [{'ENTITY': 'Consumer', 'PARENT': 'Segment'}], 'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ADJECTIVE': ['maximum'], 'ENTITY': 'Sales', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'TONE': 'positive'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- "maximum sales": Measure (Sales) with an adjective (maximum)
- "phone": Filter (Phone)
- "consumer segment": Filter (Consumer)

Step 2: Match the components to the context
- "maximum sales": Matched to "Sales" in the context with the adjective "maximum"
- "phone": Matched to "Phone" in the co

In [41]:
%%time
user_query = 'forecast of sales'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  forecast of sales
----------------------------------------------------------------------------------------------------
{'MEASURE': {'sales': [{'ENTITY': 'Sales', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query is "forecast of sales". The main component here is "sales".

Step 2: Match the components to the context
- In the context, we can find "Sales" under the "MEASURE" category with other names such as "sales" and "sale". So, we can match the "sales" component in the query to the "Sales" entity in the context.

Step 3: Identify the date component in the query
- The query does not have any explicit date component mentioned. However, since it is a forecast, we can assume that the forecast is for the future period after the end_date mentioned in the date reference.

Step 4: Utilize the da

In [42]:
%%time
user_query = 'quantity across segments except consumer'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity across segments except consumer
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': '', 'RANK VALUE': ''}]}]}, 'FILTER': {'consumer': [{'ENTITY': 'Consumer', 'EXCLUDE': 'True', 'PARENT': 'Segment'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "quantity across segments except consumer". 
- The main components are "quantity" and "segments", with an exclusion of "consumer".

Step 2: Match the components to the context
- "quantity" can be matched to the "Quantity" entity in the MEASURE context.
- "segments" can be matched to the "Segment" entity in the DIMENSION context.
- "consumer" can be matc

In [43]:
%%time
user_query = 'which segment has highest purchase'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  which segment has highest purchase
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segment': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'top', 'RANK VALUE': '1'}]}]}, 'MEASURE': {'purchase': [{'ADJECTIVE': ['highest'], 'ENTITY': 'Purchase Vol', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'TONE': 'positive'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for the segment with the highest purchase. 
- Components: segment, highest, purchase

Step 2: Match the components to the context
- Segment: Found in the context under DIMENSION with the ENTITY "Segment"
- Purchase: Found in the context under MEASURE with the ENTITY "Purchase Vol"
- Highest: This is an adjective that indicates a ranking or comparison.

Step 3: Convert the query into structured outpu

In [44]:
%%time
user_query = 'top 2 segments basis discount'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  top 2 segments basis discount
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'top', 'RANK VALUE': '2'}]}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "top 2 segments" which indicates a ranking and a dimension.
- The query also mentions "basis discount" which indicates a measure.

Step 2: Match the components to the context
- In the context, "segments" can be matched to the "Segment" entity under the "DIMENSION" category.
- "discount" can be matched to the "Discount" entity under the "MEASURE" category.

Step 3: Convert the query components into structured output
- For the dimension "segments", we need to include the "ENTITY" as "Segment" and the ranking information as "R

In [45]:
%%time
user_query = 'binder share of discount by consumer'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  binder share of discount by consumer
----------------------------------------------------------------------------------------------------


SyntaxError: closing parenthesis '}' does not match opening parenthesis '[' on line 10 (<string>, line 20)

In [46]:
%%time
user_query = 'binder and phone share basis sales for corporate'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  binder and phone share basis sales for corporate
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'share': [{'APPLIED MEASURE': [{'sales': 'Sales'}], 'DERIVED MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'ENTITY': 'Ratio', 'RATIO FILTER': ['Binder', 'Phone']}]}, 'FILTER': {'binder': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}], 'corporate': [{'ENTITY': 'Corporate', 'PARENT': 'Segment'}], 'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- Binder
- Phone
- Share
- Sales
- Corporate

Step 2: Match the components to the context
- Binder: Sub-Category (Filter)
- Phone: Sub-Category (Filter)
- Share: Ratio (Derived Measure)
- Sales: Sales (Measure)
- Corporate: Segment (Filter)

Step 3: Convert the query into structured

In [47]:
%%time
user_query = 'in corporate, share of phone and binder basis discount'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  in corporate, share of phone and binder basis discount
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'share': [{'APPLIED MEASURE': [{'discount': 'Discount'}], 'DERIVED MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'ENTITY': 'Ratio', 'RATIO FILTER': ['Phone', 'Binder']}]}, 'FILTER': {'binder': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}], 'corporate': [{'ENTITY': 'Corporate', 'PARENT': 'Segment'}], 'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- Corporate: Segment
- Share: Derived Measure (Ratio)
- Phone and Binder: Sub-Category
- Discount: Measure

Step 2: Match the components to the

In [48]:
%%time
user_query = 'phone to binder ratio of discount in corporate'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  phone to binder ratio of discount in corporate
----------------------------------------------------------------------------------------------------
{'DATE': {'': []}, 'DERIVED MEASURE': {'ratio': [{'APPLIED MEASURE': [{'discount': 'Discount'}], 'DERIVED MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'ENTITY': 'Ratio', 'RATIO FILTER': ['Phone', 'Binder']}]}, 'DIMENSION': {'corporate': [{'ENTITY': 'Corporate', 'RANK': [{'RANK ADJECTIVE': '', 'RANK VALUE': ''}]}]}, 'FILTER': {'binder': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}], 'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- Phone
- Binder
- Ratio
- Discount
- Corporate

Step 2: Match the components to the co

In [49]:
%%time
user_query = 'segments with discount rate greater than 100k'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  segments with discount rate greater than 100k
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'discount rate': [{'ENTITY': 'Discount', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '100k', 'COMPARSION OPERATOR': '>'}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "segments" which can be matched to the "Segment" entity in the context under DIMENSION.
- The query also mentions "discount rate" which can be matched to the "Discount" entity in the context under MEASURE.
- The query specifies a condition "greater than 100k" which is a comparison constraint.

Step 2: Match the components to the context
- "segments" is matched to the "Segment" entity in the context under DIMENSION.
- "discount rate" is matched to the "Discount" entity in the co

In [50]:
%%time
user_query = 'category with discount rate greater than 10k and quantity less than 2k'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  category with discount rate greater than 10k and quantity less than 2k
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'category': [{'ENTITY': 'Sub-Category'}]}, 'MEASURE': {'discount rate': [{'ENTITY': 'Discount', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '10k', 'COMPARSION OPERATOR': '>'}]}], 'quantity': [{'ENTITY': 'Quantity', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '2k', 'COMPARSION OPERATOR': '<'}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- "category" is a dimension component
- "discount rate" is a measure component
- "greater than 10k" is a measure constraint
- "quantity" is a measure component
- "less than 2k" is a measure constraint

Step 2: Match the components to the context
- "category" matches with "Sub-Category" in the context
- "discount rate" matches with "Discount" in the conte

In [51]:
%%time
user_query = 'why did discount of phones drop'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  why did discount of phones drop
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ADJECTIVE': ['dropped'], 'ENTITY': 'Discount', 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks "why did discount of phones drop". The components in the query are "discount", "phones", and "drop".

Step 2: Match the components to the context
- "discount" can be matched to the "Discount" entity in the MEASURE context.
- "phones" can be matched to the "Phone" entity in the FILTER context.
- "drop" indicates a negative tone and can be associated with the "Discount" entity.

Step 3: Identify the derived measure
- The query asks "why", which can be matched to the "Why

In [52]:
%%time
user_query = 'correlation of sales and purchase for phone'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  correlation of sales and purchase for phone
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'correlation': [{'APPLIED MEASURE': [{'sales': 'Sales'}, {'purchase': 'Purchase Vol'}], 'ENTITY': 'correlation'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "correlation", which is a derived measure.
- It also mentions "sales" and "purchase", which are measures.
- The term "phone" is mentioned, which is a filter.

Step 2: Match the components to the context
- "correlation" can be matched to the derived measure "correlation" in the context.
- "sales" can be matched to the measure "Sales" in the context.
- "purchase" can be matched to the measure "Purchase Vol" in the context.
- "phone" can be matched to the filter "Pho

In [53]:
%%time
user_query = 'what will be the sales in q1 24'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what will be the sales in q1 24
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q1 24': [{'CONVERTED TIME ELEMENT': 'quarter 1 2024', 'DATE RANGE': '2024/01/01 - 2024/03/31', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales" in "q1 24".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the MEASURE context.
- "q1 24" is a date component that needs to be converted.

Step 3: Convert the date component
- "q1 24" can be interpreted as "quarter 1 2024".
- The date range for quarter 1 2024 is "2024/01/01 - 2024/03/31".

Step 4: Check if the date reference is needed
- The date reference provided is {'start_date': '01/01/2020', 'end_date': '15/09/2023'}.
- Since the query 

In [54]:
%%time
user_query = 'sales in q1 and q2 2021'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in q1 and q2 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q1 and q2 2021': [{'CONVERTED TIME ELEMENT': 'quarter 1 and quarter 2 2021', 'DATE RANGE': '2021/01/01 - 2021/03/31, 2021/04/01 - 2021/06/30', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "q1 and q2 2021" which is a date component.

Step 2: Match the components to the context
- In the context, "sales" can be matched to the "Sales" entity under the 'MEASURE' category.
- For the date component "q1 and q2 2021", we need to convert it into a proper date range.

Step 3: Convert the date component
- "q1 and q2 2021" refers to the first two quarters of the year 2021.
- Quarter 1 (Q1) of 2021 ra

In [55]:
%%time
user_query = 'how many segments contributing to growth of sales in p3m vs pp'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  how many segments contributing to growth of sales in p3m vs pp
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'p3m': [{'CONVERTED TIME ELEMENT': 'previous 3 months', 'DATE RANGE': '2023/06/15-2023/09/15', 'ENTITY': 'Order Date'}], 'pp': [{'CONVERTED TIME ELEMENT': 'previous 1 month', 'DATE RANGE': '2023/09/15-2023/10/15', 'ENTITY': 'Order Date'}]}, 'DERIVED MEASURE': {'contributing to growth': [{'APPLIED MEASURE': [{'sales': 'Sales'}], 'ENTITY': 'contribution_to_growth'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- "segments": Dimension
- "contributing to growth": Derived Measure
- "sales": Measure
- "p3m": Date Variable
- "pp": Date Variable

Step 2: Match the components to the context
- "segments" matches to "Segment" in the context under DIMEN

In [56]:
%%time
user_query = 'how has sales trended in first week of 2021'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  how has sales trended in first week of 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'first week of 2021': [{'CONVERTED TIME ELEMENT': 'first week of 2021', 'DATE RANGE': '2021/01/01 - 2021/01/07', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for the sales trend in the first week of 2021.

Step 2: Match the components to the context
- From the context, we can identify that "sales" is a measure and is represented by the entity "Sales".
- The date component "first week of 2021" is not directly available in the context, but we can derive it using the date reference.

Step 3: Convert the date component
- The query asks for the sales trend in the first week of 2021. We can derive this date range by considering the

In [57]:
%%time
user_query = 'what will be sales in 1st 5 days of 2024'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what will be sales in 1st 5 days of 2024
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'1st 5 days of 2024': [{'CONVERTED TIME ELEMENT': 'first 5 days of january 2024', 'DATE RANGE': '2024/01/01 - 2024/01/05', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales" which can be matched to the "Sales" entity in the context under MEASURE.
- The query specifies a time period "1st 5 days of 2024" which needs to be converted into a date range.

Step 2: Match the components to the context
- "sales" is matched to the "Sales" entity under MEASURE in the context.

Step 3: Convert the date component
- The date component "1st 5 days of 2024" can be converted into a date range "2024/01/01 - 2024/01/05" as it refers to the fi

In [58]:
%%time
user_query = 'sales and purchase across yoy'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales and purchase across yoy
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'yoy': [{'CONVERTED TIME ELEMENT': 'year over year', 'DATE RANGE': '2022/09/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'purchase': [{'ENTITY': 'Purchase Vol'}], 'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" and "purchase" which are measures.
- The query also mentions "yoy" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context.
- "purchase" can be matched to the "Purchase Vol" entity in the context.
- "yoy" can be matched to the "Order Date" entity in the context with the "year over year" time element.

Step 3: Convert the date component
- The date reference provided is {'star

In [59]:
%%time
user_query = 'sales and purchase during last one year'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales and purchase during last one year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last one year': [{'CONVERTED TIME ELEMENT': 'last one year', 'DATE RANGE': '2022/09/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'purchase': [{'ENTITY': 'Purchase Vol'}], 'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" and "purchase" which are measures.
- The query also mentions "last one year" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context.
- "purchase" can be matched to the "Purchase Vol" entity in the context.
- "last one year" can be matched to the "Order Date" entity in the context.

Step 3: Convert the date component
- The date reference provided is {'star

In [60]:
%%time
user_query = 'In q1 2023, what was sales'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  In q1 2023, what was sales
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q1 2023': [{'CONVERTED TIME ELEMENT': 'quarter 1 2023', 'DATE RANGE': '2023/01/01 - 2023/03/31', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales" in "q1 2023".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "q1 2023" is a date component that needs to be converted.

Step 3: Convert the date component
- "q1 2023" is a date component that represents quarter 1 of the year 2023.
- To convert this, we can use the date reference provided, which has a start_date of '01/01/2020' and an end_date of '15/09/2023'.
- Since quarter 1 of 2023 falls within the date referenc

In [61]:
%%time
user_query = 'What are drivers of sales growth'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  What are drivers of sales growth
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'sales growth': [{'APPLIED MEASURE': [{'sales': 'Sales'}], 'DERIVED MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'ENTITY': 'contribution_to_growth', 'RATIO': [{'growth rate': 'Growth Rate'}]}]}, 'DATE VARIABLE': {'order date': [{'ENTITY': 'Order Date', 'TIDE': [{'YEAR': [{'YEAR VALUE': '2023', 'YEAR RANGE': [{'END YEAR': '2023', 'START YEAR': '2023'}]}]}]}]}, 'FILTER': {'consumers': [{'ENTITY': 'Consumer', 'PARENT': 'Segment'}], 'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}], 'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}], 'corporates': [{'ENTITY': 'Corporate', 'PARENT': 'Segment'}], 'india': [{'ENTITY': 'India', 'PARENT': 'Country'}], 'dubai': [{'ENTITY': 'Dubai', 'PARENT': 'Country'}]}, 'DIMENSION': {'sub-category': [{'ENTITY': 'Sub-Category', 'SECTION': [{'ENTITY'

In [62]:
%%time
user_query = 'why did discount of phones decrease'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  why did discount of phones decrease
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ADJECTIVE': ['decreased'], 'ENTITY': 'Discount', 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks "why did discount of phones decrease". The components in the query are "discount", "phones", and "decreased".

Step 2: Match the components to the context
- "discount" can be matched to the "Discount" entity in the MEASURE context.
- "phones" can be matched to the "Phone" entity in the FILTER context.
- "decreased" is a negative tone and can be associated with the "Discount" entity.

Step 3: Identify the derived measure
- The query asks "why", which can be matche

In [63]:
%%time
user_query = 'why did sales of phones drop'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  why did sales of phones drop
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks "why did sales of phones drop". The components in the query are "sales", "phones", and "drop".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the MEASURE context.
- "phones" can be matched to the "Phone" entity in the FILTER context.
- "drop" is a verb and cannot be directly matched to a context entity.

Step 3: Identify the derived measure
- The query asks "why", which can be matched to the "Why" entity in the DERIVED MEASURE context.

Step 4: Create the structured output
- 

In [64]:
%%time
user_query = 'why did discount of phones drop'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  why did discount of phones drop
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ADJECTIVE': ['dropped'], 'ENTITY': 'Discount', 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks "why did discount of phones drop". The components in the query are "discount", "phones", and "drop".

Step 2: Match the components to the context
- "discount" can be matched to the "Discount" entity in the MEASURE context.
- "phones" can be matched to the "Phone" entity in the FILTER context.
- "drop" indicates a negative tone and can be associated with the "Discount" entity.

Step 3: Identify the derived measure
- The query asks "why", which can be matched to the "Why

In [65]:
%%time
user_query = 'why discount of phones dropped'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  why discount of phones dropped
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ADJECTIVE': ['dropped'], 'ENTITY': 'Discount', 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks "why discount of phones dropped". The components in the query are "discount", "phones", and "dropped".

Step 2: Match the components to the context
- "discount" can be matched to the "Discount" entity in the MEASURE context.
- "phones" can be matched to the "Phone" entity in the FILTER context.
- "dropped" indicates a negative tone and can be associated with the "Discount" entity.

Step 3: Identify the derived measure
- The query asks "why", which can be matched to the 

In [66]:
%%time
user_query = 'discount of phones dropped, why?'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of phones dropped, why?
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ADJECTIVE': ['dropped'], 'ENTITY': 'Discount', 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which is a measure.
- The query mentions "phones" which is a filter related to the sub-category.
- The query mentions "dropped" which is an adjective describing the measure.
- The query mentions "why?" which is a derived measure.

Step 2: Match the components to the context
- "discount" can be matched to the "Discount" entity in the context under MEASURE.
- "phones" can be matched to the "Phone" entity in the context under FILTER with parent "Sub-Catego

In [67]:
%%time
user_query = "Phones least sold in 2021"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  Phones least sold in 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2021': [{'CONVERTED TIME ELEMENT': '2021/01/01 - 2021/12/31', 'DATE RANGE': '2021/01/01 - 2021/12/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'least sold': [{'ADJECTIVE': ['least'], 'ENTITY': 'Sales', 'QUANTITY': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "phones least sold in 2021", which refers to the Sub-Category "Phone" and the measure "Sales" with the adjective "least sold" and a negative tone.
- The query also mentions the date component "2021", which is a year.

Step 2: Match the components to the context
- The Sub-Category "Phone" can be matched to the c

In [68]:
%%time
user_query = "Phones most sold in 2021"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  Phones most sold in 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2021': [{'CONVERTED TIME ELEMENT': '2021/01/01 - 2021/12/31', 'DATE RANGE': '2021/01/01 - 2021/12/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'most sold': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "Phones" which is a FILTER component.
- The query mentions "most sold" which is a MEASURE component.
- The query mentions "2021" which is a DATE VARIABLE component.

Step 2: Match the components to the context
- "Phones" can be matched to the "Phone" entity in the context under FILTER with parent "Sub-Category".
- "most sold" can be matched to the "Sales" entity in the context under MEASURE.
- "2021" can be matched to the

In [69]:
%%time
user_query = "In 02/19, What was the sales of phone?"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  In 02/19, What was the sales of phone?
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'02/19': [{'CONVERTED TIME ELEMENT': 'february 2019', 'DATE RANGE': '2019/02/01 - 2019/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "02/19" which is a date component.
- The query also mentions "What was the sales of phone?" which are two separate components - "02/19" and "Sales of phone".

Step 2: Match the components to the context
- "02/19" can be matched to the "Order Date" entity in the context under "DATE VARIABLE".
- "Sales" can be matched to the "Sales" entity in the context under "MEASURE".
- "Phone" can be matched to the "Phone" entity in th

In [70]:
%%time
user_query = "In Feb 2019, What was the sales of phone?"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  In Feb 2019, What was the sales of phone?
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'Feb 2019': [{'CONVERTED TIME ELEMENT': 'February 2019', 'DATE RANGE': '2019/02/01 - 2019/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "Feb 2019" which is a date component.
- The query also mentions "sales" which is a measure component.
- The query mentions "phone" which is a filter component.

Step 2: Match the components to the context
- "Feb 2019" can be matched to the "Order Date" entity in the context.
- "sales" can be matched to the "Sales" entity in the context.
- "phone" can be matched to the "Phone" entity in the context.

Step 3: Convert

In [71]:
%%time
user_query = "worst performing sub category in 2021"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  worst performing sub category in 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2021': [{'CONVERTED TIME ELEMENT': '2021/01/01 - 2021/12/31', 'DATE RANGE': '2021/01/01 - 2021/12/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Sub-Category': [{'ENTITY': 'Sub-Category', 'RANK': [{'RANK ADJECTIVE': 'worst', 'RANK VALUE': '1'}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "worst performing sub category" which indicates a ranking and a filter.
- The query also mentions "2021" which is a date component.

Step 2: Match the components to the context
- "sub category" can be matched to the "Sub-Category" entity in the context under FILTER.
- "worst performing" can be matched to the "Sub-Category" entity in the context under FILTER with the RANK ADJECTIVE "worst" and RANK VALUE "1".
- "

In [72]:
%%time
user_query = "What was phone sales in jun '20'?"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  What was phone sales in jun '20'?
----------------------------------------------------------------------------------------------------


SyntaxError: EOL while scanning string literal (<string>, line 3)

In [73]:
%%time
user_query = "Growth rate of sales share of phone"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  Growth rate of sales share of phone
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'growth rate': [{'APPLIED MEASURE': [{'sales': 'Sales'}], 'DERIVED MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'ENTITY': 'Growth Rate'}], 'sales share': [{'APPLIED MEASURE': [{'sales': 'Sales'}], 'DERIVED MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'ENTITY': 'Ratio'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "growth rate" and "sales share" as derived measures.
- The query also mentions "phone" as a filter.

Step 2: Match the components to the context
- "growth rate" can be matched to the "Growth Rate" entity in the context.
- "sales share" can be matched to the

In [74]:
%%time
user_query = "which are the 5 top selling sub category by sales in 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  which are the 5 top selling sub category by sales in 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2023': [{'CONVERTED TIME ELEMENT': '2023/01/01 - 2023/09/15', 'DATE RANGE': '2023/01/01 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'DERIVED MEASURE': {'top 5': [{'RANK': [{'RANK ADJECTIVE': 'top', 'RANK VALUE': '5'}], 'SUB-CATEGORY': [{'SUB-CATEGORY': [{'sales': 'sales'}]}]}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}], 'sub-category': [{'ENTITY': 'Sub-Category', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- "top 5": Derived Measure (rank adjective and rank value)
- "sub-category": Filter (parent is Sub-Category)
- "sales": Measure (Sales entity)
- "2023": Date Variable

Step 2: Match the components 

In [75]:
%%time
user_query = "how does the sales change for phone in the last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  how does the sales change for phone in the last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last year': [{'CONVERTED TIME ELEMENT': 'last year', 'DATE RANGE': '2022/01/01 - 2022/12/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "how does the sales change for phone in the last year". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: last year (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone

In [76]:
%%time
user_query = "what is sales of phone in q1 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in q1 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q1 2023': [{'CONVERTED TIME ELEMENT': 'quarter 1 2023', 'DATE RANGE': '2023/01/01 - 2023/03/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in q1 2023". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: q1 2023 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 'Sub

In [77]:
%%time
user_query = "what is sales of phone in q3"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in q3
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q3': [{'CONVERTED TIME ELEMENT': 'quarter 3', 'DATE RANGE': '2023/04/01 - 2023/06/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in q3". 
- The components are "sales", "phone", and "q3".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the MEASURE context.
- "phone" can be matched to the "Phone" entity in the FILTER context.
- "q3" is a date component that needs to be converted.

Step 3: Convert the date component
- "q3" is a date component that represents quarter 3.
- We need to convert it to a date ra

In [78]:
%%time
user_query = "what is sales of phone in q1"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in q1
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q1': [{'CONVERTED TIME ELEMENT': 'quarter 1', 'DATE RANGE': '2023/01/01 - 2023/03/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in q1". 
- The components are "sales", "phone", and "q1".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the MEASURE context.
- "phone" can be matched to the "Phone" entity in the FILTER context.
- "q1" is a date component that needs to be converted.

Step 3: Convert the date component
- "q1" is a date component that represents quarter 1.
- We need to convert it to a date ra

In [79]:
%%time
user_query = "what as sales of phone in last one and half years"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what as sales of phone in last one and half years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last one and half years': [{'CONVERTED TIME ELEMENT': 'last one and half years', 'DATE RANGE': '2022/03/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales" of "phone" in the "last one and half years".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "phone" can be matched to the "Phone" entity in the context under FILTER with parent "Sub-Category".
- "last one and half years" needs to be converted to a date range.

Step 3: Convert the date component
- "l

In [80]:
%%time
user_query = "what is sales of phone now"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone now
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'now': [{'CONVERTED TIME ELEMENT': 'now', 'DATE RANGE': '2023/09/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone now". 
- The components are "sales" (measure), "phone" (filter), and "now" (date variable).

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "phone" can be matched to the "Phone" entity in the context under FILTER with parent "Sub-Category".
- "now" is a date variable that needs to be converted.

Step 3: Convert the date component
- "now" is a date variab

In [81]:
%%time
user_query = "what is sales of phone in 1st quarter 2022"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in 1st quarter 2022
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'1st quarter 2022': [{'CONVERTED TIME ELEMENT': 'quarter 1 2022', 'DATE RANGE': '2022/01/01 - 2022/03/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 1st quarter 2022". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: 1st quarter 2022 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phone

In [82]:
%%time
user_query = "what is sales of phone in 1st quarter 2019"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in 1st quarter 2019
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'1st quarter 2019': [{'CONVERTED TIME ELEMENT': '1st quarter 2019', 'DATE RANGE': '2019/01/01 - 2019/03/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 1st quarter 2019". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: 1st quarter 2019 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'pho

In [83]:
%%time
user_query = "what is sales of phone in q1 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in q1 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q1 2023': [{'CONVERTED TIME ELEMENT': 'quarter 1 2023', 'DATE RANGE': '2023/01/01 - 2023/03/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in q1 2023". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: q1 2023 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 'Sub

In [84]:
%%time
user_query = "what is sales of phone in Feb 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in Feb 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'Feb 2023': [{'CONVERTED TIME ELEMENT': 'February 2023', 'DATE RANGE': '2023/02/01 - 2023/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in Feb 2023". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: Feb 2023 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent '

In [85]:
%%time
user_query = "what is sales of phone in Sep 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in Sep 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'Sep 2023': [{'CONVERTED TIME ELEMENT': 'September 2023', 'DATE RANGE': '2023/09/01 - 2023/09/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in Sep 2023". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: Sep 2023 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 

In [86]:
%%time
user_query = "what is sales of phone in last one and half month"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last one and half month
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last one and half month': [{'CONVERTED TIME ELEMENT': 'last one and half month', 'DATE RANGE': '2023/08/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last one and half month". We can identify the following components:
  - Measure: Sales
  - Filter: Phone (Sub-Category)
  - Date Variable: Last one and half month (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' wi

In [87]:
%%time
user_query = "what is sales of phone in 01/2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in 01/2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'01/2023': [{'CONVERTED TIME ELEMENT': 'january 2023', 'DATE RANGE': '2023/01/01 - 2023/01/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 01/2023". We can identify the following components:
  - Measure: Sales
  - Filter: Phone (Sub-Category)
  - Date Variable: 01/2023 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as {'ENTITY': 'Sales', 'other names': ['sales', 'sale']}
- Filter: Phone is present in the context as {'ENTITY': 'Phone', 'other names': ['phone', 'phones', 'mobile phones'], 'p

In [88]:
%%time
user_query = "what is sales of phone in 01/22"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in 01/22
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'01/22': [{'CONVERTED TIME ELEMENT': 'january 2022', 'DATE RANGE': '2022/01/01 - 2022/01/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 01/22". We can identify the following components:
  - Measure: Sales
  - Filter: Phone (Sub-Category)
  - Date Variable: 01/22 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 'Sub-Category'.
- Da

In [89]:
%%time
user_query = "what is sales of phone in last 2.5 years"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last 2.5 years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 2.5 years': [{'CONVERTED TIME ELEMENT': 'last 2.5 years', 'DATE RANGE': '2021/03/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last 2.5 years."
- The components are: sales, phone, and last 2.5 years.

Step 2: Match the components to the context
- Sales can be matched to the "Sales" entity in the MEASURE context.
- Phone can be matched to the "Phone" entity in the FILTER context.
- Last 2.5 years can be matched to the "Order Date" entity in the DATE VARIABLE context.

Step 3: Convert the date component
- The date re

In [90]:
%%time
user_query = "what is sales of phone in last week"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last week
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last week': [{'CONVERTED TIME ELEMENT': 'last week', 'DATE RANGE': '2023/09/01 - 2023/09/07', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last week". 
- The components are "sales", "phone", and "last week".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the MEASURE context.
- "phone" can be matched to the "Phone" entity in the FILTER context.
- "last week" is a date component that needs to be converted.

Step 3: Convert the date component
- "last week" is a date component that needs to be conv

In [91]:
context = """{
    "MEASURE": [{"ENTITY": "Discount", "other names": ["discount", "discount rate", "discount value", "deduction"]},
                {"ENTITY": "Purchase Vol", "other names": ["purchase", "purchase value", "purchase model"]},
                {"ENTITY": "Quantity", "other names": ["quantity", "volume"]},
                {"ENTITY": "Sales", "other names": ["sales", "sale"]}],
    "DIMENSION": [{"ENTITY": "Sub-Category", "other names": ["sub-category", "sub category", "categories", "section"]},
                  {"ENTITY": "Segment", "other names": ["segment", "segments", "units", "divisions"]},
                  {"ENTITY": "Parts", "other names": ["parts", "part", "section", "divisions"]},
                  {"ENTITY": "Country", "other names": ["country", "countries"]}],
    "FILTER": [{"ENTITY": "Consumer", "other names": ["consumers", "consumer"], "parent": "Segment"},
               {"ENTITY": "Phone", "other names": ["phone", "phones", "mobile phones"], "parent": "Sub-Category"},
               {"ENTITY": "Binder", "other names": ["binders", "binder"], "parent": "Sub-Category"},
               {"ENTITY": "Corporate", "other names": ["corporates", "corporate"], "parent": "Segment"},
               {"ENTITY": "India", "other names": ["india"], "parent": "Country"},
               {"ENTITY": "Dubai", "other names": ["dubai"], "parent": "Country"}],
    "DERIVED MEASURE": [{"ENTITY": "Ratio",
             "other names": ["ratio", "share", "contribution", "percentage", "proportion", "contributing"]},
            {"ENTITY": "Why", "other names": ["why", "cause of", "reason for", "diagnose"]},
            {"ENTITY": "contribution_to_growth", "other names": ["contribution to growth", "growth", "grown"]},
            {"ENTITY": "kda_transactional", "other names": ["kda", "key drivers", "key driver", "drivers", "driver"]},
            {"ENTITY": "Growth Rate", "other names": ["growth rate", "growth", "grown"]},
            {"ENTITY": "correlation",
             "other names": ["associate", "associated", "association", "associations", "correlate", "correlated",
                             "correlation", "correlations", "relate", "related", "relation", "relations",
                             "relationship",
                             "relationships"]}
            ],
    "DATE VARIABLE": [{"ENTITY": "Order Date", "other names": ["order date", "date", "trend", "time", "when", "mom", "yoy"]}]
    }"""

In [92]:
date_input = {
    "start_date": "01/08/2020",
    "end_date": "20/12/2023"
}

In [93]:
%%time
user_query = "what is sales of phone from beginning"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone from beginning
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'beginning': [{'CONVERTED TIME ELEMENT': 'beginning', 'DATE RANGE': '01/01/2020 - 20/12/2023', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone from beginning."
- The components in the query are "sales," "phone," and "beginning."

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the MEASURE context.
- "phone" can be matched to the "Phone" entity in the FILTER context.
- "beginning" can be matched to the "Order Date" entity in the DATE VARIABLE context.

Step 3: Convert the date component
- The query ask

In [94]:
%%time
user_query = "what is sales of phone in last one year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last one year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last one year': [{'CONVERTED TIME ELEMENT': 'last one year', 'DATE RANGE': '2022/02/20 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last one year". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: Last one year (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile p

In [95]:
%%time
user_query = "what is sales of phone in last 2 year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last 2 year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 2 year': [{'CONVERTED TIME ELEMENT': 'last 2 year', 'DATE RANGE': '2021/02/20 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last 2 year". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: last 2 year (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] an

In [127]:
%%time
user_query = "what is sales of phone in last four year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last four year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last four year': [{'CONVERTED TIME ELEMENT': 'last four year', 'DATE RANGE': '2020/01/01 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last four year". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: last four year (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mob

In [125]:
%%time
user_query = "what is sales of phone in second month of next year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in second month of next year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'second month of next year': [{'CONVERTED TIME ELEMENT': 'second month of next year', 'DATE RANGE': '2024/02/01 - 2024/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in second month of next year". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: Second month of next year (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the conte

In [124]:
%%time
user_query = "what is sales of phone in second month of second last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in second month of second last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'second month of second last year': [{'CONVERTED TIME ELEMENT': 'second month of second last year', 'DATE RANGE': '2021/02/01 - 2021/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in second month of second last year". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: Second month of second last year (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Fi

In [98]:
%%time
user_query = "what is sales of phone in last five years"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last five years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last five years': [{'CONVERTED TIME ELEMENT': 'last five years', 'DATE RANGE': '2020/01/01 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last five years". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: last five years (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones',

In [123]:
%%time
user_query = "sales of segment in sixth month of last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segment in sixth month of last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'sixth month of last year': [{'CONVERTED TIME ELEMENT': 'sixth month of last year', 'DATE RANGE': '2022/06/01 - 2022/06/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "segment" which is a dimension.
- The query mentions "sixth month of last year" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context.
- "segment" can be matched to the "Segment" entity in the context.

Step 3: Convert the date component
- The query mentions "

In [122]:
%%time
user_query = "sales of segment in 6th month of this year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segment in 6th month of this year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'6th month of this year': [{'CONVERTED TIME ELEMENT': '6th month of this year', 'DATE RANGE': '2023/06/01 - 2023/06/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "segment" which is a dimension.
- The query mentions "6th month of this year" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context.
- "segment" can be matched to the "Segment" entity in the context.

Step 3: Convert the date component
- The query mentions "6th mont

In [99]:
%%time
user_query = "sales of segment in 2nd month of last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segment in 2nd month of last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2nd month of last year': [{'CONVERTED TIME ELEMENT': '2nd month of last year', 'DATE RANGE': '2022/02/01 - 2022/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "segment" which is a dimension.
- The query mentions "2nd month of last year" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context.
- "segment" can be matched to the "Segment" entity in the context.

Step 3: Convert the date component
- The query mentions "2nd mont

In [100]:
%%time
user_query = "quantity of binders in fifth month of this year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in fifth month of this year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'fifth month of this year': [{'CONVERTED TIME ELEMENT': 'fifth month of this year', 'DATE RANGE': '2023/05/01 - 2023/05/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "quantity of binders" which refers to the measure "Quantity" and the filter "Binder".
- The query also mentions "fifth month of this year", which is a date component related to the "Order Date" entity.

Step 2: Match components to the context
- "quantity" is matched to the "Quantity" entity under the "MEASURE" category in the context.
- "binders" is matched to the "Binder" enti

In [101]:
%%time
user_query = "discount of binders this year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of binders this year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'this year': [{'CONVERTED TIME ELEMENT': 'this year', 'DATE RANGE': '2023/01/01 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "discount" which is a measure.
- The query mentions "binders" which is a filter with parent "Sub-Category".
- The query mentions "this year" which is a date variable.

Step 2: Match components to context
- "discount" matches the entity "Discount" in the context under "MEASURE".
- "binders" matches the entity "Binder" in the context under "FILTER" with parent "Sub-Category".
- "this year" matches the entity "Order Date" in th

In [120]:
%%time
user_query = "discount of phones in last 5 years"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of phones in last 5 years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 5 years': [{'CONVERTED TIME ELEMENT': 'last 5 years', 'DATE RANGE': '2020/01/01 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which is a measure.
- The query mentions "phones" which is a filter related to the sub-category.
- The query mentions "last 5 years" which is a date variable.

Step 2: Match the components to the context
- "discount" can be matched to the entity "Discount" in the context under MEASURE.
- "phones" can be matched to the entity "Phone" in the context under FILTER with parent "Sub-Category".
- "last 5 yea

In [102]:
%%time
user_query = "discount of phones in last four years"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of phones in last four years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last four years': [{'CONVERTED TIME ELEMENT': 'last four years', 'DATE RANGE': '2020/01/08 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which is a measure.
- The query mentions "phones" which is a filter related to the sub-category.
- The query mentions "last four years" which is a date variable.

Step 2: Match the components to the context
- "discount" can be matched to the entity "Discount" in the context.
- "phones" can be matched to the entity "Phone" in the context, which has a parent "Sub-Category".
- "last four years" 

In [103]:
%%time
user_query = "quantity of phones in feb 2021 vs mar 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of phones in feb 2021 vs mar 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'feb 2021': [{'CONVERTED TIME ELEMENT': 'february 2021', 'DATE RANGE': '2021/02/01 - 2021/02/28', 'ENTITY': 'Order Date'}], 'mar 2023': [{'CONVERTED TIME ELEMENT': 'march 2023', 'DATE RANGE': '2023/03/01 - 2023/03/31', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- Quantity: Measure
- Phones: Filter (Sub-Category)
- Feb 2021 and Mar 2023: Date Variable

Step 2: Match the components to the context
- Quantity: Found in context as "Quantity"
- Phones: Found in context as "Phone" under "Sub-Categ

In [104]:
%%time
user_query = "quantity of binders in fifth month of last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in fifth month of last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'fifth month of last year': [{'CONVERTED TIME ELEMENT': 'fifth month of last year', 'DATE RANGE': '2022/05/01 - 2022/05/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "quantity of binders" which refers to the measure "Quantity" and the filter "Binder".
- The query also mentions "fifth month of last year" which is a date component related to the "Order Date" entity.

Step 2: Match components to the context
- "quantity" is matched to the "Quantity" entity under the "MEASURE" category in the context.
- "binders" is matched to the "Binder" entit

In [105]:
%%time
user_query = "discount of phones in last 1 year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of phones in last 1 year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 1 year': [{'CONVERTED TIME ELEMENT': 'last 1 year', 'DATE RANGE': '2022/02/20 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which is a measure.
- The query mentions "phones" which is a filter under the Sub-Category.
- The query mentions "last 1 year" which is a date variable.

Step 2: Match the components to the context
- "discount" can be matched to the entity "Discount" in the context under MEASURE.
- "phones" can be matched to the entity "Phone" in the context under FILTER.
- "last 1 year" can be matched to the entity "Ord

In [106]:
%%time
user_query = "sales of phones in last four months"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of phones in last four months
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last four months': [{'CONVERTED TIME ELEMENT': 'last four months', 'DATE RANGE': '2023/08/20 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "phones" which is a filter under the sub-category.
- The query mentions "last four months" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "phones" can be matched to the "Phone" entity in the context under FILTER with parent "Sub-Category".
- "last four months" 

In [107]:
%%time
user_query = "sales of segments current year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segments current year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'current year': [{'CONVERTED TIME ELEMENT': 'current year', 'DATE RANGE': '2023/01/01 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "segments" which is a dimension.

Step 2: Match the components to the context
- In the context, "sales" is matched to the entity "Sales" under the "MEASURE" category.
- "segments" is matched to the entity "Segment" under the "DIMENSION" category.

Step 3: Identify the date component in the query
- The query mentions "current year" which is a date component.

Step 4: Convert the date comp

In [108]:
%%time
user_query = "sales of segments this year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segments this year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'this year': [{'CONVERTED TIME ELEMENT': 'this year', 'DATE RANGE': '2023/01/01 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "segments" which is a dimension.
- The query mentions "this year" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "segments" can be matched to the "Segment" entity in the context under DIMENSION.

Step 3: Convert the date component
- "this year" is a date variable that needs to be converted using the 

In [109]:
%%time
user_query = "sales of segments last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segments last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last year': [{'CONVERTED TIME ELEMENT': 'last year', 'DATE RANGE': '2022/01/01 - 2022/12/31', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "segments" which is a dimension.
- The query mentions "last year" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "segments" can be matched to the "Segment" entity in the context under DIMENSION.

Step 3: Convert the date component
- "last year" is a date variable that needs to be converted using the 

In [110]:
%%time
user_query = "sales in the last 2 weeks"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in the last 2 weeks
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 2 weeks': [{'CONVERTED TIME ELEMENT': 'last 2 weeks', 'DATE RANGE': '2023/12/07 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "last 2 weeks" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "last 2 weeks" can be matched to the "Order Date" entity in the context under DATE VARIABLE.

Step 3: Convert the date component
- The date reference provided has a start_date of '01/08/2020' and an end_date of '20/12/2023'.
- Since the query asks for sales in the "last 2 weeks", w

In [111]:
%%time
user_query = "discount of segments in last three weeks"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of segments in last three weeks
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last three weeks': [{'CONVERTED TIME ELEMENT': 'last three weeks', 'DATE RANGE': '2023/11/20 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which can be matched to the "Discount" entity in the MEASURE context.
- The query mentions "segments" which can be matched to the "Segment" entity in the DIMENSION context.
- The query mentions "last three weeks" which is a date component that needs to be converted.

Step 2: Match the components to the context
- "discount" is matched to the "Discount" entity in the MEASURE context.
- "segments" is matched 

In [112]:
%%time
user_query = "quantity of binders in 10th month of last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in 10th month of last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'10th month of last year': [{'CONVERTED TIME ELEMENT': 'last year 10th month', 'DATE RANGE': '2022/10/01 - 2022/10/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "quantity of binders" which refers to the measure "Quantity" and the filter "Binder".
- The query also mentions "10th month of last year" which is a date component related to the "Order Date" entity.

Step 2: Match components to the context
- "Quantity" is matched to the "Quantity" entity in the context under "MEASURE".
- "Binders" is matched to the "Binder" entity in the context und

In [113]:
%%time
user_query = "what is sales of phone in 01/2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in 01/2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'01/2023': [{'CONVERTED TIME ELEMENT': 'january 2023', 'DATE RANGE': '2023/01/01 - 2023/01/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 01/2023". We can identify the following components:
  - Measure: Sales
  - Filter: Phone (Sub-Category)
  - Date Variable: 01/2023 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as {'ENTITY': 'Sales', 'other names': ['sales', 'sale']}
- Filter: Phone is present in the context as {'ENTITY': 'Phone', 'other names': ['phone', 'phones', 'mobile phones'], 'p

In [114]:
%%time
user_query = "sales in 02/22"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in 02/22
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'02/22': [{'CONVERTED TIME ELEMENT': 'february 2022', 'DATE RANGE': '2022/02/01 - 2022/02/28', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "02/22" which is a date component.

Step 2: Match the components to the context
- In the context, "sales" can be matched to the "Sales" entity under the "MEASURE" category.
- The date component "02/22" needs to be converted to a proper date range.

Step 3: Convert the date component
- The date component "02/22" can be interpreted as "February 2022".
- To convert it into a date range, we can use the format "YYYY/MM/DD". So, the date range for February 2022 woul

In [115]:
%%time
user_query = "discount of phones in 04/23"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of phones in 04/23
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'04/23': [{'CONVERTED TIME ELEMENT': 'april, 2023', 'DATE RANGE': '2023/04/01 - 2023/04/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which is a measure.
- The query mentions "phones" which is a filter related to the sub-category.
- The query mentions "04/23" which is a date component.

Step 2: Match the components to the context
- "discount" can be matched to the entity "Discount" in the context under MEASURE.
- "phones" can be matched to the entity "Phone" in the context under FILTER.
- "04/23" needs to be converted to a date range and matched t

In [116]:
%%time
user_query = "quantity of binders in 04/2022"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in 04/2022
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'04/2022': [{'CONVERTED TIME ELEMENT': 'april 2022', 'DATE RANGE': '2022/04/01 - 2022/04/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "quantity" which is a measure.
- The query mentions "binders" which is a filter related to the sub-category.
- The query mentions "04/2022" which is a date variable.

Step 2: Match components to the context
- "quantity" can be matched to the "Quantity" entity in the context under MEASURE.
- "binders" can be matched to the "Binder" entity in the context under FILTER.
- "04/2022" can be matched to the "Order Date" entity in th

In [117]:
%%time
user_query = "discount of segments in 07/23"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of segments in 07/23
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'07/23': [{'CONVERTED TIME ELEMENT': 'July 2023', 'DATE RANGE': '2023/07/01 - 2023/07/31', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which can be matched to the 'Discount' entity in the MEASURE context.
- The query mentions "segments" which can be matched to the 'Segment' entity in the DIMENSION context.
- The query mentions "07/23" which is a date component.

Step 2: Match the components to the context
- 'Discount' in the query matches the 'Discount' entity in the MEASURE context.
- 'Segments' in the query matches the 'Segment' entity in the DIMENSION context.

St