### Distill step by step finetuning approach - trying enhanced rationale with putting rationale before MQL in fine tuning prompt

In [1]:
!sudo pip install -q transformers --upgrade

In [2]:
import transformers
transformers.__version__

'4.34.1'

In [3]:
#!sudo pip install -q accelerate peft==0.4.0 bitsandbytes trl==0.4.7

In [4]:
import os
import torch
from datasets import load_dataset
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import pandas as pd
import torch

In [5]:
df = pd.read_csv('/data/mistral/query-to-mql/exp-6/training-data.csv')

In [6]:
df.columns

Index(['Query', 'MQL', 'Rationale'], dtype='object')

In [7]:
df.shape

(37, 3)

In [8]:
# The model that you want to train from the Hugging Face hub
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

# The instruction dataset to use
#dataset_name = ""

# Fine-tuned model name
#new_model = "mistral-ft-peft-on-template_and_user_query-data"

In [9]:
################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

In [10]:
################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = True

In [43]:
################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "/data/mistral/query-to-mql/exp-6/oct-28"

# Number of training epochs
num_train_epochs = 15

# Enable fp16/bf16 training (set bf16 to True with an A100)
# fp16 = False
fp16 = True # not using quantisation
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 1

# Batch size per GPU for evaluation
per_device_eval_batch_size = 1

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True


# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule (constant a bit better than cosine)
lr_scheduler_type = "constant"

# Number of training steps (overrides num_train_epochs)
max_steps = 200

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 20

# Log every X updates steps
logging_steps = 20

In [44]:
################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [13]:
promt_template = """Given the context : {context} and date reference: {date_input}, the query: {user_query}, lets use the below reasoning and convert query into a structured output
reasoning: {rationale}
Converted structured output is:.
[MQL]
{mql}
[/MQL]
"""

In [14]:
df.columns

Index(['Query', 'MQL', 'Rationale'], dtype='object')

In [5]:
context = """{
    "MEASURE": [{"ENTITY": "Discount", "other names": ["discount", "discount rate", "discount value", "deduction"]},
                {"ENTITY": "Purchase Vol", "other names": ["purchase", "purchase value", "purchase model"]},
                {"ENTITY": "Quantity", "other names": ["quantity", "volume"]},
                {"ENTITY": "Sales", "other names": ["sales", "sale"]}],
    "DIMENSION": [{"ENTITY": "Sub-Category", "other names": ["sub-category", "sub category", "categories", "section"]},
                  {"ENTITY": "Segment", "other names": ["segment", "segments", "units", "divisions"]},
                  {"ENTITY": "Parts", "other names": ["parts", "part", "section", "divisions"]},
                  {"ENTITY": "Country", "other names": ["country", "countries"]}],
    "FILTER": [{"ENTITY": "Consumer", "other names": ["consumers", "consumer"], "parent": "Segment"},
               {"ENTITY": "Phone", "other names": ["phone", "phones", "mobile phones"], "parent": "Sub-Category"},
               {"ENTITY": "Binder", "other names": ["binders", "binder"], "parent": "Sub-Category"},
               {"ENTITY": "Corporate", "other names": ["corporates", "corporate"], "parent": "Segment"},
               {"ENTITY": "India", "other names": ["india"], "parent": "Country"},
               {"ENTITY": "Dubai", "other names": ["dubai"], "parent": "Country"}],
    "DERIVED MEASURE": [{"ENTITY": "Ratio",
             "other names": ["ratio", "share", "contribution", "percentage", "proportion", "contributing"]},
            {"ENTITY": "Why", "other names": ["why", "cause of", "reason for", "diagnose"]},
            {"ENTITY": "contribution_to_growth", "other names": ["contribution to growth", "growth", "grown"]},
            {"ENTITY": "kda_transactional", "other names": ["kda", "key drivers", "key driver", "drivers", "driver"]},
            {"ENTITY": "Growth Rate", "other names": ["growth rate", "growth", "grown"]},
            {"ENTITY": "correlation",
             "other names": ["associate", "associated", "association", "associations", "correlate", "correlated",
                             "correlation", "correlations", "relate", "related", "relation", "relations",
                             "relationship",
                             "relationships"]}
            ],
    "DATE VARIABLE": [{"ENTITY": "Order Date", "other names": ["order date", "date", "trend", "time", "when", "mom", "yoy"]}]
    }"""

In [6]:
date_input = {
    "start_date": "01/01/2020",
    "end_date": "15/09/2023"
}

In [17]:
def create_fine_tuning_dataset(row):
    mql = row['MQL']
    user_query = row['Query']
    rationale = row['Rationale']
    formated = promt_template.format(context=context,
                                             date_input=date_input,
                                             user_query=user_query,
                                             mql=mql,
                                             rationale=rationale)
    return formated

In [18]:
df['fine_tuning_dataset']=df.apply(create_fine_tuning_dataset, axis=1)

In [19]:
df.drop(columns=['Query', 'MQL', 'Rationale'], inplace=True)
df.shape

(37, 1)

In [20]:
train_dataset = Dataset.from_pandas(df)

In [21]:
train_dataset

Dataset({
    features: ['fine_tuning_dataset'],
    num_rows: 37
})

In [22]:
train_dataset['fine_tuning_dataset'][0]

'Given the context : {\n    "MEASURE": [{"ENTITY": "Discount", "other names": ["discount", "discount rate", "discount value", "deduction"]},\n                {"ENTITY": "Purchase Vol", "other names": ["purchase", "purchase value", "purchase model"]},\n                {"ENTITY": "Quantity", "other names": ["quantity", "volume"]},\n                {"ENTITY": "Sales", "other names": ["sales", "sale"]}],\n    "DIMENSION": [{"ENTITY": "Sub-Category", "other names": ["sub-category", "sub category", "categories", "section"]},\n                  {"ENTITY": "Segment", "other names": ["segment", "segments", "units", "divisions"]},\n                  {"ENTITY": "Parts", "other names": ["parts", "part", "section", "divisions"]},\n                  {"ENTITY": "Country", "other names": ["country", "countries"]}],\n    "FILTER": [{"ENTITY": "Consumer", "other names": ["consumers", "consumer"], "parent": "Segment"},\n               {"ENTITY": "Phone", "other names": ["phone", "phones", "mobile phones"

In [23]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [24]:
bnb_4bit_quant_type

'nf4'

In [25]:
compute_dtype

torch.float16

In [26]:
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

In [7]:
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

In [8]:
!sudo pip install -q pynvml

In [9]:
from pynvml.smi import nvidia_smi
nvsmi = nvidia_smi.getInstance()
nvsmi.DeviceQuery('memory.free, memory.total')

{'gpu': [{'fb_memory_usage': {'total': 16384.0,
    'free': 15972.9375,
    'unit': 'MiB'}}]}

In [15]:
#!df -H

In [10]:
torch.cuda.is_available()

True

In [31]:
# del model
# torch.cuda.empty_cache()

In [32]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
#     torch_dtype=torch.bfloat16,
    device_map="auto"
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,
                                          # add_eos_token=True,
                                          use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [34]:
max([len(tokenizer.encode(df['fine_tuning_dataset'][i])) for i in range(df.shape[0])])

1631

In [35]:
# LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    target_modules = ["q_proj", "v_proj"],
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [45]:
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
#     eval_steps=50, # requires when eval_dataset is defined
#     per_device_eval_batch_size=1, # Batch size for evaluation
#     evaluation_strategy="steps", # requires when eval_dataset is defined
    logging_strategy="steps",
    logging_steps=5,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=1000,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard",
#     load_best_model_at_end=True,
#     save_total_limit=1,
#     metric_for_best_model="eval_loss",
#     greater_is_better=False
)

In [46]:
## Getting FLOPs of model

model_flops = (
  model.floating_point_ops(
    {
       "input_ids": torch.zeros(
           (1, 2048)
      )
    }
  )
  * training_arguments.gradient_accumulation_steps
)

#print(model)
print("Memory footprint", model.get_memory_footprint() / 1e9, "GB")
print("Flops", model_flops / 1e9, "GFLOPs")

Memory footprint 5.185232896 GB
Flops 87710.798708736 GFLOPs


In [38]:
train_dataset

Dataset({
    features: ['fine_tuning_dataset'],
    num_rows: 37
})

In [39]:
#print(torch.cuda.memory_summary(device=None, abbreviated=False))

In [47]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
#     eval_dataset=val_dataset,
    peft_config=peft_config,
    dataset_text_field="fine_tuning_dataset",
    max_seq_length=2048,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)



Map:   0%|          | 0/37 [00:00<?, ? examples/s]

In [None]:
# Train model
trainer.train()

Step,Training Loss
5,0.8783
10,0.771
15,0.545
20,0.3367
25,0.2447
30,0.206
35,0.157
40,0.1454
45,0.1251
50,0.1234


In [None]:
# Fine-tuned model name
new_model_name = "mistral-ft-peft-v1-lr-64-with-more-data"

In [None]:
# Save trained model
trainer.model.save_pretrained(new_model)

In [59]:
trainer.model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): Linear4bit(
                in_features=4096, out_features=4096, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
              (v_proj)

In [12]:
from peft import PeftModel, PeftConfig

In [13]:
new_model_name = "/data/mistral/query-to-mql/exp-6/oct-28/checkpoint-1000"

In [45]:
del model
# del trainer
torch.cuda.empty_cache()

In [14]:
nvsmi = nvidia_smi.getInstance()
nvsmi.DeviceQuery('memory.free, memory.total')

{'gpu': [{'fb_memory_usage': {'total': 16384.0,
    'free': 15972.9375,
    'unit': 'MiB'}}]}

In [15]:
# del model
torch.cuda.empty_cache()

from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(new_model_name, device_map="auto", torch_dtype=torch.bfloat16)
model = model.merge_and_unload()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [19]:
query_template_v1 = """Given the context : {context} and date reference: {date_input}, the query: {user_query}, lets use the below reasoning and convert query into a structured output
reasoning:
"""

In [20]:
#model.to('cuda')

In [21]:
def predict_template_query_v1(user_query):
    inp = query_template_v1.format(context=context,
                                   user_query=user_query,
                                  date_input=date_input)
    _inputs = tokenizer.encode(inp, return_tensors="pt")
    outputs = model.generate(input_ids=_inputs.to('cuda'), max_length= 1700, pad_token_id=tokenizer.eos_token_id)
    output = tokenizer.decode(outputs[0])
    output_new = output.split('[MQL]\n')[1]
    return output_new.split('\n[/MQL]')[0], output
#     return output

In [22]:
%%time
user_query = 'show me the bottom 10 segments basis sales'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  show me the bottom 10 segments basis sales
----------------------------------------------------------------------------------------------------


  next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)


{'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'bottom', 'RANK VALUE': '10'}]}]}, 'MEASURE': {'basis sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "show me the bottom 10 segments basis sales", we can identify the following components:
  - "bottom 10": a rank adjective and rank value
  - "segments": a dimension
  - "basis sales": a measure

Step 2: Match the components to the context
- We can match the identified components to the context as follows:
  - "segments" can be matched to the "Segment" entity in the DIMENSION section of the context.
  - "basis sales" can be matched to the "Sales" entity in the MEASURE section of the context.

Step 3: Create the structured output
- Based on the matched components, we can create the structured output as follows:
  - For the "DIMENSION" key, we include the "Segment" enti

In [23]:
%%time
user_query = 'top 2 and bottom 3 segments by sales'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  top 2 and bottom 3 segments by sales
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'top', 'RANK VALUE': '2'}, {'RANK ADJECTIVE': 'bottom', 'RANK VALUE': '3'}]}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "top 2 and bottom 3 segments by sales", we can identify the following components:
  - "top 2" and "bottom 3" are rank adjectives and rank values.
  - "segments" refers to a dimension.
  - "sales" refers to a measure.

Step 2: Match the components to the context
- From the context, we can match the following components:
  - "segments" can be matched to the "Segment" entity under the "DIMENSION" category.
  - "sales" can be matched to the "Sales" entity under the "MEASURE" category.

In [24]:
%%time
user_query = 'top 2 segments and bottom 3 sub-category basis quantity'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  top 2 segments and bottom 3 sub-category basis quantity
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'top', 'RANK VALUE': '2'}]}], 'sub-category': [{'ENTITY': 'Sub-Category', 'RANK': [{'RANK ADJECTIVE': 'bottom', 'RANK VALUE': '3'}]}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the main components of the query
- In the query "top 2 segments and bottom 3 sub-category basis quantity", we can identify the following components:
  - "top 2 segments": This refers to the top 2 entities in the "Segment" dimension.
  - "bottom 3 sub-category": This refers to the bottom 3 entities in the "Sub-Category" dimension.
  - "basis quantity": This indicates that the ranking should be based on the "Quantity" measure.

Step 2: Match the

In [25]:
%%time
user_query = 'quantity across segments except consumer and corporate in dubai'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity across segments except consumer and corporate in dubai
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'FILTER': {'consumer': [{'ENTITY': 'Consumer', 'EXCLUDE': 'True', 'PARENT': 'Segment'}], 'corporate': [{'ENTITY': 'Corporate', 'EXCLUDE': 'True', 'PARENT': 'Segment'}], 'dubai': [{'ENTITY': 'Dubai', 'PARENT': 'Country'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "quantity", which is a measure.
- It also mentions "segments", which is a dimension.
- The query specifies "except consumer and corporate", which are filters with exclusion.
- The location "Dubai" is also mentioned, which is another filter.

Step 2: Match the components to the context
- "quantity" can be matched to the "Quanti

In [18]:
%%time
user_query = 'in corporate share of phone and binder basis discount'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  in corporate share of phone and binder basis discount
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'share': [{'APPLIED MEASURE': [{'discount': 'Discount'}], 'ENTITY': 'Ratio', 'RATIO FILTER': ['Phone', 'Binder']}]}, 'FILTER': {'binder': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}], 'corporate': [{'ENTITY': 'Corporate', 'PARENT': 'Segment'}], 'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "in corporate share of phone and binder basis discount", we can identify the following components:
  - Corporate: Segment
  - Share: Derived Measure (Ratio)
  - Phone: Sub-Category
  - Binder: Sub-Category
  - Discount: Measure

Step 2: Match the components to the context
- Corporate: Match

In [20]:
%%time
user_query = 'list of under performing segments'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  list of under performing segments
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ADJECTIVE': ['under performing'], 'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'bottom', 'RANK VALUE': '1'}], 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the main components in the query
- The query asks for a "list of under performing segments."

Step 2: Match the components to the context
- In the context, we have a 'DIMENSION' with an 'ENTITY' called 'Segment' and its other names as ['segment', 'segments', 'units', 'divisions'].

Step 3: Identify the adjectives used in the query
- The query uses the adjective 'under performing'.

Step 4: Identify the tone of the query
- The query has a negative tone as it asks for a list of under performing segments.

Step 5: Create the structured output
- Based on the identi

In [21]:
%%time
user_query = 'when was the first time sales of segments was 0'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  when was the first time sales of segments was 0
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'when': [{'CONVERTED TIME ELEMENT': 'when', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '0', 'COMPARSION OPERATOR': '='}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "when", which indicates a time-related question.
- The query mentions "sales", which is a measure.
- The query mentions "segments", which is a dimension.
- The query mentions "0", which is a comparison value.

Step 2: Match the components to the context
- "when" can be matched to the "Order Date" entity in the DATE VARIABLE section of the context.
- "sales" can be matched to the "Sales" entity in the M

In [22]:
%%time
user_query = 'sales of segments from beginning'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segments from beginning
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'beginning': [{'CONVERTED TIME ELEMENT': 'beginning', 'DATE RANGE': '01/01/2020 - 30/09/2023', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "segments" which is a dimension.
- The query mentions "from beginning" which refers to a date reference.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under the 'MEASURE' category.
- "segments" can be matched to the "Segment" entity in the context under the 'DIMENSION' category.
- "beginning" can be matched to the 'Order Date' entity in

In [23]:
%%time
user_query = 'sales in 20/01/2020'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in 20/01/2020
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'20/01/2020': [{'CONVERTED TIME ELEMENT': '20 January 2020', 'DATE RANGE': '2020/01/20 - 2020/01/20', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "sales in 20/01/2020", we can identify the following components:
  - "sales" is a measure
  - "20/01/2020" is a date

Step 2: Match the components to the context
- From the given context, we can match the identified components as follows:
  - "sales" can be matched to the "Sales" entity under the "MEASURE" category
  - "20/01/2020" can be matched to the "Order Date" entity under the "DATE VARIABLE" category

Step 3: Convert the query into a structured output
- Based on the matched components, we can now cre

In [24]:
%%time
user_query = 'sales in last one and half years'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in last one and half years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last one and half years': [{'CONVERTED TIME ELEMENT': 'last one and half years', 'DATE RANGE': '2019/04/01 - 2023/09/30', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "sales in last one and half years", we can identify the following components:
  - "sales" is a measure
  - "last one and half years" is a date variable

Step 2: Match the components to the context
- From the given context, we can match the identified components as follows:
  - "sales" can be matched to the "Sales" entity under the 'MEASURE' category
  - "last one and half years" can be matched to the "Order Date" entity under the 'DATE VARIABLE' category

Step 3: Convert th

In [25]:
%%time
user_query = 'trend of sales in dubai now'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  trend of sales in dubai now
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'trend': [{'CONVERTED TIME ELEMENT': 'now', 'DATE RANGE': '2023/09/30 - 2023/09/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'dubai': [{'ENTITY': 'Dubai', 'PARENT': 'Country'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "trend of sales in dubai now", we can identify the following components:
  - "trend" refers to a date variable
  - "sales" refers to a measure
  - "dubai" refers to a filter
  - "now" refers to a specific time

Step 2: Match the components to the context
- Match "trend" to the "Order Date" entity in the DATE VARIABLE section of the context.
- Match "sales" to the "Sales" entity in the MEASURE section of the context.
- Match "dubai" to the "Dubai" entit

In [26]:
%%time
user_query = 'sales in the week of 01 March 2022'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in the week of 01 March 2022
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'week of 01 March 2022': [{'CONVERTED TIME ELEMENT': 'week of 01 March 2022', 'DATE RANGE': '2022/02/28 - 2022/03/06', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "sales in the week of 01 March 2022", we can identify the following components:
  - "sales" is a measure
  - "week of 01 March 2022" is a date variable

Step 2: Match the components to the context
- From the given context, we can match the identified components as follows:
  - "sales" can be matched to the "Sales" entity under the 'MEASURE' category
  - "week of 01 March 2022" can be matched to the "Order Date" entity under the 'DATE VARIABLE' category

Step 3: Convert the qu

In [63]:
%%time
user_query = 'top most selling segments in 2020 vs 2021'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  top most selling segments in 2020 vs 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2020 vs 2021': [{'CONVERTED TIME ELEMENT': '2020 vs 2021', 'DATE RANGE': '2020/01/01 - 2020/12/31, 2021/01/01 - 2021/12/31', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'top', 'RANK VALUE': '1'}]}]}, 'MEASURE': {'selling': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "top most selling segments" which indicates a ranking of segments based on sales.
- The query also mentions "in 2020 vs 2021" which indicates a comparison between two time periods.

Step 2: Match the components to the context
- "segments" can be matched to the "Segment" entity in the DIMENSION section of the context.
- "selling" can be matched to t

In [28]:
%%time
user_query = 'sales from 100 to 200'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales from 100 to 200
----------------------------------------------------------------------------------------------------
{'MEASURE': {'sales': [{'ENTITY': 'Sales', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '100', 'COMPARSION OPERATOR': '>='}, {'COMPARISON VALUE': '200', 'COMPARSION OPERATOR': '<='}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "sales from 100 to 200", the components are "sales", "100", and "200".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the MEASURE section of the context.
- "100" and "200" are numerical values that will be used as constraints for the "Sales" entity.

Step 3: Determine the structured output format
- Since the query is asking for sales within a specific range, we need to use the "MEASURE CONSTRAINT" format in the structured output.

Step 4: Create the structured outp

In [29]:
%%time
user_query = 'sales from 2021 to 2023'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales from 2021 to 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2021 to 2023': [{'CONVERTED TIME ELEMENT': '2021 to 2023', 'DATE RANGE': '2021/01/01 - 2023/12/31', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "sales from 2021 to 2023", we can identify the following components:
  1. "sales" - a measure
  2. "2021 to 2023" - a date range

Step 2: Match the components to the context
- For each component identified in Step 1, we need to match it with the appropriate entity in the context.

1. "sales" - Looking at the context under 'MEASURE', we can find that 'Sales' is an entity with other names such as 'sales' and 'sale'. Since 'sales' is present in the query, we can assume it is the preferred name in this conte

In [30]:
%%time
user_query = 'what is purchase across segments'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is purchase across segments
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'purchase': [{'ENTITY': 'Purchase Vol'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "what is purchase across segments", the components are "purchase" and "segments".

Step 2: Match the components to the context
- "purchase" can be matched to the 'Purchase Vol' entity in the MEASURE section of the context.
- "segments" can be matched to the 'Segment' entity in the DIMENSION section of the context.

Step 3: Create the structured output
- Based on the matched components, we can create the structured output by organizing the identified entities under the appropriate sections (DIMENSION and MEASURE).

Rationale:
- The query is asking for the purchase value across d

In [31]:
%%time
user_query = 'discount rate of phone and binders'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount rate of phone and binders
----------------------------------------------------------------------------------------------------
{'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}], 'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount rate': [{'ENTITY': 'Discount', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "discount rate of phone and binders", we can identify the following components:
  - "discount rate" is a measure
  - "phone" is a filter related to the sub-category
  - "binders" is also a filter related to the sub-category

Step 2: Match the components to the context
- Match "discount rate" to the "Discount" entity in the context under "MEASURE"
- Match "phone" to the "Phone" entity in the context under "FILTER" with par

In [32]:
%%time
user_query = 'discount rate of overall sub-category in corporate'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount rate of overall sub-category in corporate
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'sub-category': [{'ENTITY': 'Sub-Category', 'RANK': [{'RANK ADJECTIVE': '', 'RANK VALUE': ''}]}]}, 'FILTER': {'corporate': [{'ENTITY': 'Corporate', 'PARENT': 'Segment'}]}, 'MEASURE': {'discount rate': [{'ENTITY': 'Discount', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "discount rate of overall sub-category in corporate", we can identify the following components:
  - Discount rate: Measure
  - Sub-category: Dimension
  - Corporate: Filter

Step 2: Match the components to the context
- Match the identified components to the context provided:
  - Discount rate: {'ENTITY': 'Discount', 'other names': ['discount', 'discou

In [33]:
%%time
user_query = 'maximum sales of phone for consumer segment'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  maximum sales of phone for consumer segment
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segment': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': '', 'RANK VALUE': ''}]}]}, 'FILTER': {'consumer': [{'ENTITY': 'Consumer', 'PARENT': 'Segment'}], 'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ADJECTIVE': ['maximum'], 'ENTITY': 'Sales', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'TONE': 'positive'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "maximum sales", which is a measure with an adjective.
- "Phone" is mentioned, which is a filter related to a sub-category.
- "Consumer segment" is mentioned, which is another filter related to a segment.

Step 2: Match the components to the context
- "Maximum sales" can be

In [34]:
%%time
user_query = 'forecast of sales'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  forecast of sales
----------------------------------------------------------------------------------------------------
{'MEASURE': {'sales': [{'ENTITY': 'Sales', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the main components in the query
- In the query "forecast of sales", the main component is "sales".

Step 2: Match the component to the context
- In the given context, "sales" can be matched to the "Sales" entity under the "MEASURE" category.

Step 3: Create the structured output
- Since the query is asking for a forecast of sales, we need to include the "Sales" entity in the structured output. We do this by creating a "MEASURE" dictionary with a key "sales" and a list containing a dictionary with the "ENTITY" key and the value "Sales".

Step 4: Add measure constraints
- In this case, there are no specific constraints m

In [35]:
%%time
user_query = 'quantity across segments except consumer'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity across segments except consumer
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'FILTER': {'consumer': [{'ENTITY': 'Consumer', 'EXCLUDE': 'True', 'PARENT': 'Segment'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "quantity" which is a measure.
- It also mentions "segments" which refers to a dimension.
- The query specifies "except consumer" which is a filter.

Step 2: Match the components to the context
- In the context, "quantity" is found under 'MEASURE' with the ENTITY 'Quantity'.
- "segments" is found under 'DIMENSION' with the ENTITY 'Segment'.
- "consumer" is found under 'FILTER' with the ENTITY 'Consumer' and parent 'Segment'.

Step 3: Create the structured output
- For the dimensi

In [36]:
%%time
user_query = 'which segment has highest purchase'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  which segment has highest purchase
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segment': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'top', 'RANK VALUE': '1'}]}]}, 'MEASURE': {'purchase': [{'ADJECTIVE': ['highest'], 'ENTITY': 'Purchase Vol', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'TONE': 'positive'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "which segment has the highest purchase", we can identify the following components:
  - "segment" as a dimension
  - "highest purchase" as a measure with an adjective

Step 2: Match the components to the context
- From the given context, we can match the identified components as follows:
  - "segment" matches with the "Segment" entity under the "DIMENSION" category
  - "highest purchase" matches with 

In [37]:
%%time
user_query = 'top 2 segments basis discount'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  top 2 segments basis discount
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment', 'RANK': [{'RANK ADJECTIVE': 'top', 'RANK VALUE': '2'}]}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "top 2 segments basis discount", we can identify the following components:
  - "top 2": a rank adjective and rank value
  - "segments": a dimension
  - "discount": a measure

Step 2: Match the components to the context
- We match the components to the context provided:
  - "segments" can be matched to the "Segment" entity in the 'DIMENSION' section of the context.
  - "discount" can be matched to the "Discount" entity in the 'MEASURE' section of the context.

Step 3: Create the structured output
- Based on the matched components,

In [38]:
%%time
user_query = 'binder share of discount by consumer'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  binder share of discount by consumer
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'share': [{'APPLIED MEASURE': [{'discount': 'Discount'}], 'DERIVED MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'ENTITY': 'Ratio', 'RATIO FILTER': ['binder']}]}, 'FILTER': {'binder': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}], 'consumer': [{'ENTITY': 'Consumer', 'PARENT': 'Segment'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "binder share of discount by consumer", we can identify the following components:
  - Binder: Sub-Category
  - Share: Derived Measure (Ratio)
  - Discount: Measure
  - Consumer: Filter (Segment)

Step 2: Match the c

In [39]:
%%time
user_query = 'binder and phone share basis sales for corporate'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  binder and phone share basis sales for corporate
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'share': [{'APPLIED MEASURE': [{'sales': 'Sales'}], 'ENTITY': 'Ratio', 'RATIO FILTER': ['Binder', 'Phone']}]}, 'FILTER': {'binder': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}], 'corporate': [{'ENTITY': 'Corporate', 'PARENT': 'Segment'}], 'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "binder" and "phone," which are related to the "Sub-Category" dimension.
- It also mentions "corporate," which is related to the "Segment" dimension.
- The query asks for the "share" based on "sales."

Step 2: Match the components to the context
- "Binder" and "Phone" are matched to the "Sub-Category"

In [40]:
%%time
user_query = 'in corporate, share of phone and binder basis discount'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  in corporate, share of phone and binder basis discount
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'share': [{'APPLIED MEASURE': [{'discount': 'Discount'}], 'ENTITY': 'Ratio', 'RATIO FILTER': ['Phone', 'Binder']}]}, 'FILTER': {'binder': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}], 'corporate': [{'ENTITY': 'Corporate', 'PARENT': 'Segment'}], 'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "in corporate, share of phone and binder basis discount", we can identify the following components:
  - Corporate (Segment)
  - Share (Derived Measure: Ratio)
  - Phone (Sub-Category)
  - Binder (Sub-Category)
  - Discount (Measure)

Step 2: Match the components to the context
- Corporate:

In [41]:
%%time
user_query = 'phone to binder ratio of discount in corporate'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  phone to binder ratio of discount in corporate
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'ratio': [{'APPLIED MEASURE': [{'discount': 'Discount'}], 'ENTITY': 'Ratio', 'RATIO FILTER': ['Phone', 'Binder']}]}, 'FILTER': {'binder': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}], 'corporate': [{'ENTITY': 'Corporate', 'PARENT': 'Segment'}], 'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- Phone
- Binder
- Ratio
- Discount
- Corporate

Step 2: Match the components to the context
- Phone: Sub-Category (Filter)
- Binder: Sub-Category (Filter)
- Ratio: Derived Measure
- Discount: Measure
- Corporate: Segment (Filter)

Step 3: Structure the output based on the identified components and their context

In [42]:
%%time
user_query = 'segments with discount rate greater than 100k'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  segments with discount rate greater than 100k
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'discount rate': [{'ENTITY': 'Discount', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '100k', 'COMPARSION OPERATOR': '>'}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "segments with discount rate greater than 100k", we can identify the following components:
  - "segments" refers to a dimension
  - "discount rate" refers to a measure
  - "greater than 100k" is a constraint on the measure

Step 2: Match the components to the context
- Looking at the context, we can find the following matches:
  - "segments" matches with the "Segment" entity under "DIMENSION"
  - "discount rate" matches with the "Discount" entity under "MEASURE"

Step 3: Create the s

In [43]:
%%time
user_query = 'category with discount rate greater than 10k and quantity less than 2k'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  category with discount rate greater than 10k and quantity less than 2k
----------------------------------------------------------------------------------------------------
{'DIMENSION': {'category': [{'ENTITY': 'Sub-Category'}]}, 'MEASURE': {'discount rate': [{'ENTITY': 'Discount', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '10k', 'COMPARSION OPERATOR': '>'}]}], 'quantity': [{'ENTITY': 'Quantity', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '2k', 'COMPARSION OPERATOR': '<'}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "category with discount rate greater than 10k and quantity less than 2k", we can identify the following components:
  - "category" refers to a dimension
  - "discount rate" refers to a measure
  - "greater than 10k" is a constraint on the discount rate measure
  - "quantity" refers to another measure
  - "less than 2k" is a constraint on t

In [44]:
%%time
user_query = 'why did discount of phones drop'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  why did discount of phones drop
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ADJECTIVE': ['dropped'], 'ENTITY': 'Discount', 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "why did discount of phones drop", the components are "why", "discount", "phones", and "drop".

Step 2: Match the components to the context
- "why" can be matched to the derived measure "Why" in the context.
- "discount" can be matched to the measure "Discount" in the context.
- "phones" can be matched to the filter "Phone" in the context, which has a parent "Sub-Category".
- "drop" is an adjective that indicates a negative tone.

Step 3: Create the structured output
- F

In [45]:
%%time
user_query = 'correlation of sales and purchase for phone'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  correlation of sales and purchase for phone
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'correlation': [{'APPLIED MEASURE': [{'sales': 'Sales'}, {'purchase': 'Purchase Vol'}], 'ENTITY': 'correlation'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "correlation of sales and purchase for phone", we can identify the following components:
  - "correlation" is a derived measure
  - "sales" is a measure
  - "purchase" is a measure
  - "phone" is a filter

Step 2: Match the components to the context
- Match "correlation" to the derived measure "correlation" in the context
- Match "sales" to the measure "Sales" in the context
- Match "purchase" to the measure "Purchase Vol" in the context
- Match "phone" to the filter "P

In [46]:
%%time
user_query = 'what will be the sales in q1 24'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what will be the sales in q1 24
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q1 24': [{'CONVERTED TIME ELEMENT': 'quarter 1 2024', 'DATE RANGE': '2024/01/01 - 2024/03/31', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "what will be the sales in q1 24", we can identify the following components:
  - "sales" as a measure
  - "q1 24" as a date variable

Step 2: Match the components to the context
- From the given context, we can match the identified components as follows:
  - "sales" can be matched to the "Sales" entity under the "MEASURE" category
  - "q1 24" can be matched to the "Order Date" entity under the "DATE VARIABLE" category

Step 3: Convert the date component
- The date component "q1 24" needs to be convert

In [47]:
%%time
user_query = 'sales in q1 and q2 2021'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in q1 and q2 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q1 and q2 2021': [{'CONVERTED TIME ELEMENT': 'quarter 1 and quarter 2 2021', 'DATE RANGE': '2021/01/01 - 2021/03/31, 2021/04/01 - 2021/06/30', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "sales in q1 and q2 2021", the components are "sales" and "q1 and q2 2021".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity under the "MEASURE" category in the context.
- "q1 and q2 2021" is a time-related component and can be matched to the "Order Date" entity under the "DATE VARIABLE" category in the context.

Step 3: Convert the time component to a date range
- "q1 and q2 2021" can be converted to two date ranges: "

In [48]:
%%time
user_query = 'how many segments contributing to growth of sales in p3m vs pp'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  how many segments contributing to growth of sales in p3m vs pp
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'p3m': [{'CONVERTED TIME ELEMENT': 'previous 3 months', 'DATE RANGE': 'date range', 'ENTITY': 'Order Date'}], 'pp': [{'CONVERTED TIME ELEMENT': 'previous period', 'DATE RANGE': 'date range', 'ENTITY': 'Order Date'}]}, 'DERIVED MEASURE': {'contributing to growth': [{'APPLIED MEASURE': [{'sales': 'Sales'}], 'ENTITY': 'contribution_to_growth'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- "how many segments": Dimension - Segment
- "contributing to growth": Derived Measure - contribution_to_growth
- "of sales": Measure - Sales
- "in p3m vs pp": Date Variable - Order Date

Step 2: Convert the query i

In [49]:
%%time
user_query = 'how has sales trended in first week of 2021'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  how has sales trended in first week of 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'first week of 2021': [{'CONVERTED DATE RANGE': '01/01/2021 - 07/01/2021', 'DATE RANGE': '01/01/2021 - 07/01/2021', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "how has sales trended in first week of 2021", we can identify the following components:
  - Sales: A measure
  - First week of 2021: A date range

Step 2: Match the components to the context
- From the given context, we can match the identified components as follows:
  - Sales: Matches with the 'Sales' entity in the 'MEASURE' section of the context.
  - First week of 2021: This is a date range, which can be converted into a date range in the context. Since the context 

In [50]:
%%time
user_query = 'what will be sales in 1st 5 days of 2024'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what will be sales in 1st 5 days of 2024
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'1st 5 days of 2024': [{'CONVERTED TIME ELEMENT': 'first 5 days of january 2024', 'DATE RANGE': '2024/01/01 - 2024/01/05', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "what will be sales in 1st 5 days of 2024", the components are "sales" and "1st 5 days of 2024".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity under the "MEASURE" category in the context.
- "1st 5 days of 2024" can be matched to the "Order Date" entity under the "DATE VARIABLE" category in the context.

Step 3: Convert the components into structured output format
- For the "sales" component, create a dictionary with the 

In [51]:
%%time
user_query = 'sales and purchase across yoy'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales and purchase across yoy
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'yoy': [{'CONVERTED TIME ELEMENT': 'year over year', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'purchase': [{'ENTITY': 'Purchase Vol'}], 'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "sales and purchase across yoy", we can identify the following components:
  - sales
  - purchase
  - yoy (year over year)

Step 2: Match the components to the context
- We will now match these components to the given context:
  - sales: This can be matched to the 'Sales' entity under 'MEASURE'.
  - purchase: This can be matched to the 'Purchase Vol' entity under 'MEASURE'.
  - yoy: This can be matched to the 'Order Date' entity under 'DATE VARIABLE' as it is related to time and has 'yoy' as one of its o

In [52]:
%%time
user_query = 'sales and purchase during last one year'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales and purchase during last one year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last one year': [{'CONVERTED TIME ELEMENT': 'last one year', 'DATE RANGE': '2019/09/01 - 2020/08/31', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'purchase': [{'ENTITY': 'Purchase Vol'}], 'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" and "purchase," which are measures.
- The query mentions "during last one year," which is a date variable.

Step 2: Match the components to the context
- In the context, "sales" is found under the 'MEASURE' category with the entity 'Sales'.
- In the context, "purchase" is found under the 'MEASURE' category with the entity 'Purchase Vol'.
- In the context, "last one year" is found under the 'DATE VARIABLE' category with the entity '

In [53]:
%%time
user_query = 'In q1 2023, what was sales'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  In q1 2023, what was sales
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q1 2023': [{'CONVERTED DATE RANGE': '01/01/2023 - 31/03/2023', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "In q1 2023, what was sales", the components are "q1 2023" (date), "what was sales" (measure), and "in q1 2023" (date reference).

Step 2: Match the components to the context
- "q1 2023" can be matched to the "Order Date" entity in the DATE VARIABLE section of the context.
- "sales" can be matched to the "Sales" entity in the MEASURE section of the context.

Step 3: Convert the query into structured output format
- For the "q1 2023" component, create a "DATE VARIABLE" key with a dictionary containing the "Order Date" entity and its corre

In [54]:
%%time
user_query = 'What are drivers of sales growth'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  What are drivers of sales growth
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'contribution_to_growth': [{'APPLIED MEASURE': [{'sales': 'Sales'}], 'DERIVED MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'ENTITY': 'contribution_to_growth', 'RANK': [{'RANK ADJECTIVE': '', 'RANK VALUE': ''}]}]}, 'DATE VARIABLE': {'order date': [{'ENTITY': 'Order Date', 'TIME FRAME': [{'END DATE': '', 'START DATE': ''}]}]}, 'FILTER': {'corporate': [{'ENTITY': 'Corporate', 'PARENT': 'Segment'}], 'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- Drivers of sales growth

Step 2: Match the components to the context
- S

In [55]:
%%time
user_query = 'why did discount of phones decrease'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  why did discount of phones decrease
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ADJECTIVE': ['decreased'], 'ENTITY': 'Discount', 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "why did discount of phones decrease", the components are "why", "discount", "phones", and "decrease".

Step 2: Match the components to the context
- "why" can be matched to the derived measure "Why" in the context.
- "discount" can be matched to the measure "Discount" in the context.
- "phones" can be matched to the filter "Phone" in the context, which has a parent "Sub-Category".
- "decrease" is an adjective that indicates a negative tone.

Step 3: Create the str

In [56]:
%%time
user_query = 'why did sales of phones drop'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  why did sales of phones drop
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ADJECTIVE': ['dropped'], 'ENTITY': 'Sales', 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "why did sales of phones drop", the components are "why", "sales", "phones", and "drop".

Step 2: Match the components to the context
- "why" can be matched to the derived measure "Why" in the context.
- "sales" can be matched to the measure "Sales" in the context.
- "phones" can be matched to the filter "Phone" in the context, which has a parent "Sub-Category".
- "drop" is an adjective that indicates a negative tone.

Step 3: Create the structured output
- For the derived measur

In [57]:
%%time
user_query = 'why did discount of phones drop'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  why did discount of phones drop
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ADJECTIVE': ['dropped'], 'ENTITY': 'Discount', 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "why did discount of phones drop", the components are "why", "discount", "phones", and "drop".

Step 2: Match the components to the context
- "why" can be matched to the derived measure "Why" in the context.
- "discount" can be matched to the measure "Discount" in the context.
- "phones" can be matched to the filter "Phone" in the context, which has a parent "Sub-Category".
- "drop" is an adjective that indicates a negative tone.

Step 3: Create the structured output
- F

In [58]:
%%time
user_query = 'why discount of phones dropped'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  why discount of phones dropped
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ADJECTIVE': ['dropped'], 'ENTITY': 'Discount', 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "why discount of phones dropped", the components are "why", "discount", "phones", and "dropped".

Step 2: Match the components to the context
- "why" can be matched to the derived measure "Why" in the context.
- "discount" can be matched to the measure "Discount" in the context.
- "phones" can be matched to the filter "Phone" in the context, which has a parent "Sub-Category".
- "dropped" is an adjective that indicates a negative tone.

Step 3: Create the structured output

In [59]:
%%time
user_query = 'discount of phones dropped, why?'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of phones dropped, why?
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ADJECTIVE': ['dropped'], 'ENTITY': 'Discount', 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "discount of phones dropped, why?", we can identify the following components:
  - "discount" is a measure
  - "phones" is a filter related to the sub-category
  - "dropped" is an adjective describing the measure
  - "why" is a derived measure asking for a reason

Step 2: Match the components to the context
- Match "discount" to the "Discount" entity in the context under "MEASURE"
- Match "phones" to the "Phone" entity in the context under "FILTER" with parent "Sub-Categ

In [60]:
%%time
user_query = "What was phone sales in q1 '23'?"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  What was phone sales in q1 '23'?
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {"q1 '23'": [{'CONVERTED DATE RANGE': '01/01/2023 - 31/03/2023', 'DATE RANGE': '01/01/2023 - 31/03/2023', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- Phone
- Sales
- Q1 '23'

Step 2: Match the components to the context
- Phone: Sub-Category (Filter)
- Sales: Sales (Measure)
- Q1 '23': Date Variable

Step 3: Convert the query into structured output format
- For the Filter section, add the identified component (Phone) with its parent (Sub-Category).
- For the Measure section, add the identified component (Sales) with its parent (Sales).
- For the Date Variable section, add the identif

In [61]:
%%time
user_query = "Phones least sold in 2021"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  Phones least sold in 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2021': [{'CONVERTED TIME ELEMENT': '2021', 'DATE RANGE': '2021', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'least sold': [{'ADJECTIVE': ['least'], 'ENTITY': 'Sales', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'RANK': [{'RANK ADJECTIVE': 'bottom', 'RANK VALUE': '1'}], 'TONE': 'negative'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "Phones" which is a Sub-Category.
- The query mentions "least sold" which is a measure with a rank constraint.
- The query mentions "in 2021" which is a date reference.

Step 2: Match the components to the context
- "Phones" matches to the "Sub-Category" entity in the context und

In [62]:
%%time
user_query = "Phones most sold in 2021"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  Phones most sold in 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2021': [{'CONVERTED TIME ELEMENT': '2021', 'DATE RANGE': '2021', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sold': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "Phones" which is a sub-category.
- The query mentions "most sold" which is a measure.
- The query mentions "in 2021" which is a date reference.

Step 2: Match the components to the context
- "Phones" can be matched to the "Sub-Category" entity in the context under the "FILTER" category.
- "most sold" can be matched to the "Sales" entity in the context under the "MEASURE" category.
- "in 2021" can be matched to the "Order Date" entity in the context under the "DA

In [64]:
%%time
user_query = "In 02/19, What was the sales of phone?"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  In 02/19, What was the sales of phone?
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'02/19': [{'CONVERTED DATE FORMAT': '02/19/2020', 'DATE RANGE': '2020/02/19 - 2020/02/19', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "In 02/19, What was the sales of phone?", we can identify the following components:
  - 02/19: Date
  - sales: Measure
  - phone: Filter

Step 2: Match the components to the context
- Match the identified components to the context provided:
  - 02/19: This date can be matched to the 'Order Date' entity in the DATE VARIABLE section of the context.
  - sales: This measure can be matched to the 'Sales' entity in the MEASURE section o

In [65]:
%%time
user_query = "In Feb 2019, What was the sales of phone?"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  In Feb 2019, What was the sales of phone?
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'Feb 2019': [{'CONVERTED DATE FORMAT': '02/01/2019', 'DATE RANGE': '2019/02/01 - 2019/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "In Feb 2019, What was the sales of phone?", we can identify the following components:
  - Date: "Feb 2019"
  - Query object: "sales of phone"

Step 2: Match the components to the context
- Match the identified components to the context provided:
  - Date: Match 'Feb 2019' to 'Order Date' in the context under 'DATE VARIABLE'.
  - Query object: Match 'sales' to 'Sales' in the context under 'MEASURE'.
  - Phone: Match 'phone' 

In [66]:
%%time
user_query = "worst performing sub category in 2021"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  worst performing sub category in 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2021': [{'CONVERTED TIME ELEMENT': '2021', 'DATE RANGE': '2021', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'sub category': [{'ENTITY': 'Sub-Category', 'RANK': [{'RANK ADJECTIVE': 'worst', 'RANK VALUE': '1'}]}]}, 'MEASURE': {'performing': [{'ENTITY': 'Purchase Vol'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "worst performing sub category", which indicates a dimension (sub-category) and a measure (purchase volume).
- The query also specifies the time frame as "in 2021", which indicates a date variable (order date).

Step 2: Match the components to the context
- "sub-category" can be matched to the "Sub-Category" entity in the context under the "DIMENSION" category.
- "purchase volume" can be ma

In [68]:
%%time
user_query = "What was phone sales in jun '20'?"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  What was phone sales in jun '20'?
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {"jun '20'": [{'CONVERTED TIME ELEMENT': 'June 2020', 'DATE RANGE': '2020/06/01 - 2020/06/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "What was phone sales in jun '20'?" which indicates that we need to find the sales of a specific sub-category (phone) during a particular time (jun '20').

Step 2: Match the components to the context
- "Phone" can be matched to the "Sub-Category" entity in the context under the "FILTER" category.
- "sales" can be matched to the "Sales" entity in the context under the "MEASURE" category.
- "jun '20'" can be matched to the "Orde

In [69]:
%%time
user_query = "Growth rate of sales share of phone"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  Growth rate of sales share of phone
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'growth rate': [{'APPLIED MEASURE': [{'sales': 'Sales'}], 'ENTITY': 'Growth Rate'}], 'share': [{'ENTITY': 'Ratio', 'FILTER': [{'PARENT': 'Sub-Category', 'ENTITY': 'Phone'}]}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "Growth rate of sales share of phone", we can identify the following components:
  - "Growth rate" is a derived measure.
  - "sales" is a measure.
  - "share" is a derived measure.
  - "phone" is a filter.

Step 2: Match the components to the context
- Match "Growth rate" to the derived measure "Growth Rate" in the context.
- Match "sales" to the measure "Sales" in the context.
- Match "share" to the derived measure "Ratio" in the context.
- 

In [27]:
%%time
user_query = "which are the 5 top selling sub category by sales in 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  which are the 5 top selling sub category by sales in 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2023': [{'CONVERTED TIME ELEMENT': '2023', 'DATE RANGE': '2023', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}], 'Binder': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "which are the 5 top selling sub category by sales in 2023", we can identify the following components:
  - Top selling sub-categories: Sub-Category
  - Sales: Sales
  - 2023: Date Variable

Step 2: Match the components to the context
- We match the identified components to the context provided:
  - Sub-Category: Found in the "FILTER" section with the entity "Phone" and "Binder"

In [26]:
%%time
user_query = "how does the sales change for phone in the last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  how does the sales change for phone in the last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last year': [{'CONVERTED TIME ELEMENT': 'last year', 'DATE RANGE': '2020/01/01 - 2023/12/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- In the query "how does the sales change for phone in the last year", we can identify the following components:
  - "sales" is a measure
  - "phone" is a filter related to the sub-category
  - "last year" is a date variable

Step 2: Match the components to the context
- From the given context, we can match the identified components as follows:
  - "sales" can be matched to the "Sales" entity under the 'MEASURE' category
  - "p