### Distill step by step finetuning approach - trying enhanced rationale with specific reasoning for date conversion

In [1]:
!sudo pip install -q transformers --upgrade

In [8]:
!sudo pip install -q peft

In [2]:
import transformers
transformers.__version__

'4.34.1'

In [3]:
#!sudo pip install -q accelerate peft==0.4.0 bitsandbytes trl==0.4.7

In [9]:
import os
import torch
from datasets import load_dataset
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
import pandas as pd
import torch

In [4]:
# The model that you want to train from the Hugging Face hub
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

# The instruction dataset to use
#dataset_name = ""

# Fine-tuned model name
#new_model = "mistral-ft-peft-on-template_and_user_query-data"

In [5]:
context = """{
    "MEASURE": [{"ENTITY": "Discount", "other names": ["discount", "discount rate", "discount value", "deduction"]},
                {"ENTITY": "Purchase Vol", "other names": ["purchase", "purchase value", "purchase model"]},
                {"ENTITY": "Quantity", "other names": ["quantity", "volume"]},
                {"ENTITY": "Sales", "other names": ["sales", "sale"]}],
    "DIMENSION": [{"ENTITY": "Sub-Category", "other names": ["sub-category", "sub category", "categories", "section"]},
                  {"ENTITY": "Segment", "other names": ["segment", "segments", "units", "divisions"]},
                  {"ENTITY": "Parts", "other names": ["parts", "part", "section", "divisions"]},
                  {"ENTITY": "Country", "other names": ["country", "countries"]}],
    "FILTER": [{"ENTITY": "Consumer", "other names": ["consumers", "consumer"], "parent": "Segment"},
               {"ENTITY": "Phone", "other names": ["phone", "phones", "mobile phones"], "parent": "Sub-Category"},
               {"ENTITY": "Binder", "other names": ["binders", "binder"], "parent": "Sub-Category"},
               {"ENTITY": "Corporate", "other names": ["corporates", "corporate"], "parent": "Segment"},
               {"ENTITY": "India", "other names": ["india"], "parent": "Country"},
               {"ENTITY": "Dubai", "other names": ["dubai"], "parent": "Country"}],
    "DERIVED MEASURE": [{"ENTITY": "Ratio",
             "other names": ["ratio", "share", "contribution", "percentage", "proportion", "contributing"]},
            {"ENTITY": "Why", "other names": ["why", "cause of", "reason for", "diagnose"]},
            {"ENTITY": "contribution_to_growth", "other names": ["contribution to growth", "growth", "grown"]},
            {"ENTITY": "kda_transactional", "other names": ["kda", "key drivers", "key driver", "drivers", "driver"]},
            {"ENTITY": "Growth Rate", "other names": ["growth rate", "growth", "grown"]},
            {"ENTITY": "correlation",
             "other names": ["associate", "associated", "association", "associations", "correlate", "correlated",
                             "correlation", "correlations", "relate", "related", "relation", "relations",
                             "relationship",
                             "relationships"]}
            ],
    "DATE VARIABLE": [{"ENTITY": "Order Date", "other names": ["order date", "date", "trend", "time", "when", "mom", "yoy"]}]
    }"""

In [6]:
date_input = {
    "start_date": "01/01/2020",
    "end_date": "15/09/2023"
}

In [10]:
torch.cuda.is_available()

False

In [11]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,
                                          # add_eos_token=True,
                                          use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [12]:
from peft import PeftModel, PeftConfig

In [13]:
new_model_name = "/data/mistral/query-to-mql/exp-9/nov-01/checkpoint-2000"

In [16]:
from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(new_model_name, device_map="auto", torch_dtype=torch.bfloat16)
model = model.merge_and_unload()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [17]:
query_template_v1 = """Given the context : {context} and date reference: {date_input}, the query: {user_query}, is converted into below shown structured output.
[MQL]
"""

In [18]:
def predict_template_query_v1(user_query):
    inp = query_template_v1.format(context=context,
                                   user_query=user_query,
                                  date_input=date_input)
    _inputs = tokenizer.encode(inp, return_tensors="pt")
    outputs = model.generate(input_ids=_inputs, max_length= 1700, pad_token_id=tokenizer.eos_token_id)
    output = tokenizer.decode(outputs[0])
    output_new = output.split('[MQL]\n')[1]
    return output_new.split('\n[/MQL]')[0], output
#     return output

In [43]:
%%time
user_query = 'why sales changed in last 2 weeks of aug 2021'
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  why sales changed in last 2 weeks of aug 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 2 weeks of aug 2021': [{'CONVERTED TIME ELEMENT': 'last 2 weeks of aug 2021', 'DATE RANGE': '2021/08/16 - 2021/08/29', 'ENTITY': 'Order Date'}]}, 'DERIVED MEASURE': {'why': [{'ENTITY': 'Why'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks "why sales changed in last 2 weeks of aug 2021". The components in the query are "sales", "last 2 weeks of aug 2021", and "why".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the MEASURE context.
- "last 2 weeks of aug 2021" can be matched to the "Order Date" entity in the DATE VARIABLE context.
- "why" can be matched to the "Why" entity in the DERIVED MEASURE 

In [68]:
%%time
user_query = "Phones most sold in 2021"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  Phones most sold in 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2021': [{'CONVERTED TIME ELEMENT': '2021/01/01 - 2021/12/31', 'DATE RANGE': '2021/01/01 - 2021/12/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'most sold': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "Phones" which is a FILTER component.
- The query mentions "most sold" which is a MEASURE component.
- The query mentions "2021" which is a DATE VARIABLE component.

Step 2: Match the components to the context
- "Phones" can be matched to the "Phone" entity in the context under FILTER with parent "Sub-Category".
- "most sold" can be matched to the "Sales" entity in the context under MEASURE.
- "2021" can be matched to the

In [69]:
%%time
user_query = "In 02/19, What was the sales of phone?"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  In 02/19, What was the sales of phone?
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'02/19': [{'CONVERTED TIME ELEMENT': 'february 2019', 'DATE RANGE': '2019/02/01 - 2019/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "02/19" which is a date component.
- The query also mentions "What was the sales of phone?" which are two separate components - "02/19" and "Sales of phone".

Step 2: Match the components to the context
- "02/19" can be matched to the "Order Date" entity in the context under "DATE VARIABLE".
- "Sales" can be matched to the "Sales" entity in the context under "MEASURE".
- "Phone" can be matched to the "Phone" entity in th

In [70]:
%%time
user_query = "In Feb 2019, What was the sales of phone?"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  In Feb 2019, What was the sales of phone?
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'Feb 2019': [{'CONVERTED TIME ELEMENT': 'February 2019', 'DATE RANGE': '2019/02/01 - 2019/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "Feb 2019" which is a date component.
- The query also mentions "sales" which is a measure component.
- The query mentions "phone" which is a filter component.

Step 2: Match the components to the context
- "Feb 2019" can be matched to the "Order Date" entity in the context.
- "sales" can be matched to the "Sales" entity in the context.
- "phone" can be matched to the "Phone" entity in the context.

Step 3: Convert

In [71]:
%%time
user_query = "worst performing sub category in 2021"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  worst performing sub category in 2021
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2021': [{'CONVERTED TIME ELEMENT': '2021/01/01 - 2021/12/31', 'DATE RANGE': '2021/01/01 - 2021/12/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Sub-Category': [{'ENTITY': 'Sub-Category', 'RANK': [{'RANK ADJECTIVE': 'worst', 'RANK VALUE': '1'}]}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "worst performing sub category" which indicates a ranking and a filter.
- The query also mentions "2021" which is a date component.

Step 2: Match the components to the context
- "sub category" can be matched to the "Sub-Category" entity in the context under FILTER.
- "worst performing" can be matched to the "Sub-Category" entity in the context under FILTER with the RANK ADJECTIVE "worst" and RANK VALUE "1".
- "

In [72]:
%%time
user_query = "What was phone sales in jun '20'?"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  What was phone sales in jun '20'?
----------------------------------------------------------------------------------------------------


SyntaxError: EOL while scanning string literal (<string>, line 3)

In [73]:
%%time
user_query = "Growth rate of sales share of phone"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  Growth rate of sales share of phone
----------------------------------------------------------------------------------------------------
{'DERIVED MEASURE': {'growth rate': [{'APPLIED MEASURE': [{'sales': 'Sales'}], 'DERIVED MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'ENTITY': 'Growth Rate'}], 'sales share': [{'APPLIED MEASURE': [{'sales': 'Sales'}], 'DERIVED MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}], 'ENTITY': 'Ratio'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "growth rate" and "sales share" as derived measures.
- The query also mentions "phone" as a filter.

Step 2: Match the components to the context
- "growth rate" can be matched to the "Growth Rate" entity in the context.
- "sales share" can be matched to the

In [74]:
%%time
user_query = "which are the 5 top selling sub category by sales in 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  which are the 5 top selling sub category by sales in 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2023': [{'CONVERTED TIME ELEMENT': '2023/01/01 - 2023/09/15', 'DATE RANGE': '2023/01/01 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'DERIVED MEASURE': {'top 5': [{'RANK': [{'RANK ADJECTIVE': 'top', 'RANK VALUE': '5'}], 'SUB-CATEGORY': [{'SUB-CATEGORY': [{'sales': 'sales'}]}]}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}], 'sub-category': [{'ENTITY': 'Sub-Category', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- "top 5": Derived Measure (rank adjective and rank value)
- "sub-category": Filter (parent is Sub-Category)
- "sales": Measure (Sales entity)
- "2023": Date Variable

Step 2: Match the components 

In [75]:
%%time
user_query = "how does the sales change for phone in the last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  how does the sales change for phone in the last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last year': [{'CONVERTED TIME ELEMENT': 'last year', 'DATE RANGE': '2022/01/01 - 2022/12/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "how does the sales change for phone in the last year". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: last year (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone

In [76]:
%%time
user_query = "what is sales of phone in q1 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in q1 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q1 2023': [{'CONVERTED TIME ELEMENT': 'quarter 1 2023', 'DATE RANGE': '2023/01/01 - 2023/03/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in q1 2023". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: q1 2023 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 'Sub

In [77]:
%%time
user_query = "what is sales of phone in q3"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in q3
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q3': [{'CONVERTED TIME ELEMENT': 'quarter 3', 'DATE RANGE': '2023/04/01 - 2023/06/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in q3". 
- The components are "sales", "phone", and "q3".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the MEASURE context.
- "phone" can be matched to the "Phone" entity in the FILTER context.
- "q3" is a date component that needs to be converted.

Step 3: Convert the date component
- "q3" is a date component that represents quarter 3.
- We need to convert it to a date ra

In [78]:
%%time
user_query = "what is sales of phone in q1"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in q1
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q1': [{'CONVERTED TIME ELEMENT': 'quarter 1', 'DATE RANGE': '2023/01/01 - 2023/03/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in q1". 
- The components are "sales", "phone", and "q1".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the MEASURE context.
- "phone" can be matched to the "Phone" entity in the FILTER context.
- "q1" is a date component that needs to be converted.

Step 3: Convert the date component
- "q1" is a date component that represents quarter 1.
- We need to convert it to a date ra

In [79]:
%%time
user_query = "what as sales of phone in last one and half years"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what as sales of phone in last one and half years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last one and half years': [{'CONVERTED TIME ELEMENT': 'last one and half years', 'DATE RANGE': '2022/03/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales" of "phone" in the "last one and half years".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "phone" can be matched to the "Phone" entity in the context under FILTER with parent "Sub-Category".
- "last one and half years" needs to be converted to a date range.

Step 3: Convert the date component
- "l

In [80]:
%%time
user_query = "what is sales of phone now"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone now
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'now': [{'CONVERTED TIME ELEMENT': 'now', 'DATE RANGE': '2023/09/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone now". 
- The components are "sales" (measure), "phone" (filter), and "now" (date variable).

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "phone" can be matched to the "Phone" entity in the context under FILTER with parent "Sub-Category".
- "now" is a date variable that needs to be converted.

Step 3: Convert the date component
- "now" is a date variab

In [81]:
%%time
user_query = "what is sales of phone in 1st quarter 2022"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in 1st quarter 2022
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'1st quarter 2022': [{'CONVERTED TIME ELEMENT': 'quarter 1 2022', 'DATE RANGE': '2022/01/01 - 2022/03/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 1st quarter 2022". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: 1st quarter 2022 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phone

In [82]:
%%time
user_query = "what is sales of phone in 1st quarter 2019"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in 1st quarter 2019
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'1st quarter 2019': [{'CONVERTED TIME ELEMENT': '1st quarter 2019', 'DATE RANGE': '2019/01/01 - 2019/03/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 1st quarter 2019". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: 1st quarter 2019 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'pho

In [83]:
%%time
user_query = "what is sales of phone in q1 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in q1 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'q1 2023': [{'CONVERTED TIME ELEMENT': 'quarter 1 2023', 'DATE RANGE': '2023/01/01 - 2023/03/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in q1 2023". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: q1 2023 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 'Sub

In [84]:
%%time
user_query = "what is sales of phone in Feb 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in Feb 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'Feb 2023': [{'CONVERTED TIME ELEMENT': 'February 2023', 'DATE RANGE': '2023/02/01 - 2023/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in Feb 2023". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: Feb 2023 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent '

In [85]:
%%time
user_query = "what is sales of phone in Sep 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in Sep 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'Sep 2023': [{'CONVERTED TIME ELEMENT': 'September 2023', 'DATE RANGE': '2023/09/01 - 2023/09/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in Sep 2023". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: Sep 2023 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 

In [86]:
%%time
user_query = "what is sales of phone in last one and half month"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last one and half month
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last one and half month': [{'CONVERTED TIME ELEMENT': 'last one and half month', 'DATE RANGE': '2023/08/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last one and half month". We can identify the following components:
  - Measure: Sales
  - Filter: Phone (Sub-Category)
  - Date Variable: Last one and half month (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' wi

In [87]:
%%time
user_query = "what is sales of phone in 01/2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in 01/2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'01/2023': [{'CONVERTED TIME ELEMENT': 'january 2023', 'DATE RANGE': '2023/01/01 - 2023/01/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 01/2023". We can identify the following components:
  - Measure: Sales
  - Filter: Phone (Sub-Category)
  - Date Variable: 01/2023 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as {'ENTITY': 'Sales', 'other names': ['sales', 'sale']}
- Filter: Phone is present in the context as {'ENTITY': 'Phone', 'other names': ['phone', 'phones', 'mobile phones'], 'p

In [88]:
%%time
user_query = "what is sales of phone in 01/22"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in 01/22
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'01/22': [{'CONVERTED TIME ELEMENT': 'january 2022', 'DATE RANGE': '2022/01/01 - 2022/01/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 01/22". We can identify the following components:
  - Measure: Sales
  - Filter: Phone (Sub-Category)
  - Date Variable: 01/22 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] and parent 'Sub-Category'.
- Da

In [89]:
%%time
user_query = "what is sales of phone in last 2.5 years"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last 2.5 years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 2.5 years': [{'CONVERTED TIME ELEMENT': 'last 2.5 years', 'DATE RANGE': '2021/03/15 - 2023/09/15', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last 2.5 years."
- The components are: sales, phone, and last 2.5 years.

Step 2: Match the components to the context
- Sales can be matched to the "Sales" entity in the MEASURE context.
- Phone can be matched to the "Phone" entity in the FILTER context.
- Last 2.5 years can be matched to the "Order Date" entity in the DATE VARIABLE context.

Step 3: Convert the date component
- The date re

In [90]:
%%time
user_query = "what is sales of phone in last week"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last week
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last week': [{'CONVERTED TIME ELEMENT': 'last week', 'DATE RANGE': '2023/09/01 - 2023/09/07', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last week". 
- The components are "sales", "phone", and "last week".

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the MEASURE context.
- "phone" can be matched to the "Phone" entity in the FILTER context.
- "last week" is a date component that needs to be converted.

Step 3: Convert the date component
- "last week" is a date component that needs to be conv

In [91]:
context = """{
    "MEASURE": [{"ENTITY": "Discount", "other names": ["discount", "discount rate", "discount value", "deduction"]},
                {"ENTITY": "Purchase Vol", "other names": ["purchase", "purchase value", "purchase model"]},
                {"ENTITY": "Quantity", "other names": ["quantity", "volume"]},
                {"ENTITY": "Sales", "other names": ["sales", "sale"]}],
    "DIMENSION": [{"ENTITY": "Sub-Category", "other names": ["sub-category", "sub category", "categories", "section"]},
                  {"ENTITY": "Segment", "other names": ["segment", "segments", "units", "divisions"]},
                  {"ENTITY": "Parts", "other names": ["parts", "part", "section", "divisions"]},
                  {"ENTITY": "Country", "other names": ["country", "countries"]}],
    "FILTER": [{"ENTITY": "Consumer", "other names": ["consumers", "consumer"], "parent": "Segment"},
               {"ENTITY": "Phone", "other names": ["phone", "phones", "mobile phones"], "parent": "Sub-Category"},
               {"ENTITY": "Binder", "other names": ["binders", "binder"], "parent": "Sub-Category"},
               {"ENTITY": "Corporate", "other names": ["corporates", "corporate"], "parent": "Segment"},
               {"ENTITY": "India", "other names": ["india"], "parent": "Country"},
               {"ENTITY": "Dubai", "other names": ["dubai"], "parent": "Country"}],
    "DERIVED MEASURE": [{"ENTITY": "Ratio",
             "other names": ["ratio", "share", "contribution", "percentage", "proportion", "contributing"]},
            {"ENTITY": "Why", "other names": ["why", "cause of", "reason for", "diagnose"]},
            {"ENTITY": "contribution_to_growth", "other names": ["contribution to growth", "growth", "grown"]},
            {"ENTITY": "kda_transactional", "other names": ["kda", "key drivers", "key driver", "drivers", "driver"]},
            {"ENTITY": "Growth Rate", "other names": ["growth rate", "growth", "grown"]},
            {"ENTITY": "correlation",
             "other names": ["associate", "associated", "association", "associations", "correlate", "correlated",
                             "correlation", "correlations", "relate", "related", "relation", "relations",
                             "relationship",
                             "relationships"]}
            ],
    "DATE VARIABLE": [{"ENTITY": "Order Date", "other names": ["order date", "date", "trend", "time", "when", "mom", "yoy"]}]
    }"""

In [92]:
date_input = {
    "start_date": "01/08/2020",
    "end_date": "20/12/2023"
}

In [93]:
%%time
user_query = "what is sales of phone from beginning"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone from beginning
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'beginning': [{'CONVERTED TIME ELEMENT': 'beginning', 'DATE RANGE': '01/01/2020 - 20/12/2023', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone from beginning."
- The components in the query are "sales," "phone," and "beginning."

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the MEASURE context.
- "phone" can be matched to the "Phone" entity in the FILTER context.
- "beginning" can be matched to the "Order Date" entity in the DATE VARIABLE context.

Step 3: Convert the date component
- The query ask

In [94]:
%%time
user_query = "what is sales of phone in last one year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last one year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last one year': [{'CONVERTED TIME ELEMENT': 'last one year', 'DATE RANGE': '2022/02/20 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last one year". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: Last one year (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile p

In [95]:
%%time
user_query = "what is sales of phone in last 2 year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last 2 year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 2 year': [{'CONVERTED TIME ELEMENT': 'last 2 year', 'DATE RANGE': '2021/02/20 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last 2 year". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: last 2 year (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mobile phones'] an

In [127]:
%%time
user_query = "what is sales of phone in last four year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last four year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last four year': [{'CONVERTED TIME ELEMENT': 'last four year', 'DATE RANGE': '2020/01/01 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last four year". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: last four year (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones', 'mob

In [125]:
%%time
user_query = "what is sales of phone in second month of next year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in second month of next year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'second month of next year': [{'CONVERTED TIME ELEMENT': 'second month of next year', 'DATE RANGE': '2024/02/01 - 2024/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in second month of next year". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: Second month of next year (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the conte

In [124]:
%%time
user_query = "what is sales of phone in second month of second last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in second month of second last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'second month of second last year': [{'CONVERTED TIME ELEMENT': 'second month of second last year', 'DATE RANGE': '2021/02/01 - 2021/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in second month of second last year". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: Second month of second last year (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Fi

In [98]:
%%time
user_query = "what is sales of phone in last five years"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in last five years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last five years': [{'CONVERTED TIME ELEMENT': 'last five years', 'DATE RANGE': '2020/01/01 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in last five years". We can identify the following components:
    - Measure: Sales
    - Filter: Phone (Sub-Category)
    - Date Variable: last five years (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as 'Sales' with other names ['sales', 'sale'].
- Filter: Phone is present in the context as 'Phone' with other names ['phone', 'phones',

In [123]:
%%time
user_query = "sales of segment in sixth month of last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segment in sixth month of last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'sixth month of last year': [{'CONVERTED TIME ELEMENT': 'sixth month of last year', 'DATE RANGE': '2022/06/01 - 2022/06/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "segment" which is a dimension.
- The query mentions "sixth month of last year" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context.
- "segment" can be matched to the "Segment" entity in the context.

Step 3: Convert the date component
- The query mentions "

In [122]:
%%time
user_query = "sales of segment in 6th month of this year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segment in 6th month of this year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'6th month of this year': [{'CONVERTED TIME ELEMENT': '6th month of this year', 'DATE RANGE': '2023/06/01 - 2023/06/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "segment" which is a dimension.
- The query mentions "6th month of this year" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context.
- "segment" can be matched to the "Segment" entity in the context.

Step 3: Convert the date component
- The query mentions "6th mont

In [99]:
%%time
user_query = "sales of segment in 2nd month of last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segment in 2nd month of last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'2nd month of last year': [{'CONVERTED TIME ELEMENT': '2nd month of last year', 'DATE RANGE': '2022/02/01 - 2022/02/28', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "segment" which is a dimension.
- The query mentions "2nd month of last year" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context.
- "segment" can be matched to the "Segment" entity in the context.

Step 3: Convert the date component
- The query mentions "2nd mont

In [100]:
%%time
user_query = "quantity of binders in fifth month of this year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in fifth month of this year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'fifth month of this year': [{'CONVERTED TIME ELEMENT': 'fifth month of this year', 'DATE RANGE': '2023/05/01 - 2023/05/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "quantity of binders" which refers to the measure "Quantity" and the filter "Binder".
- The query also mentions "fifth month of this year", which is a date component related to the "Order Date" entity.

Step 2: Match components to the context
- "quantity" is matched to the "Quantity" entity under the "MEASURE" category in the context.
- "binders" is matched to the "Binder" enti

In [101]:
%%time
user_query = "discount of binders this year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of binders this year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'this year': [{'CONVERTED TIME ELEMENT': 'this year', 'DATE RANGE': '2023/01/01 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "discount" which is a measure.
- The query mentions "binders" which is a filter with parent "Sub-Category".
- The query mentions "this year" which is a date variable.

Step 2: Match components to context
- "discount" matches the entity "Discount" in the context under "MEASURE".
- "binders" matches the entity "Binder" in the context under "FILTER" with parent "Sub-Category".
- "this year" matches the entity "Order Date" in th

In [120]:
%%time
user_query = "discount of phones in last 5 years"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of phones in last 5 years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 5 years': [{'CONVERTED TIME ELEMENT': 'last 5 years', 'DATE RANGE': '2020/01/01 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which is a measure.
- The query mentions "phones" which is a filter related to the sub-category.
- The query mentions "last 5 years" which is a date variable.

Step 2: Match the components to the context
- "discount" can be matched to the entity "Discount" in the context under MEASURE.
- "phones" can be matched to the entity "Phone" in the context under FILTER with parent "Sub-Category".
- "last 5 yea

In [102]:
%%time
user_query = "discount of phones in last four years"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of phones in last four years
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last four years': [{'CONVERTED TIME ELEMENT': 'last four years', 'DATE RANGE': '2020/01/08 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which is a measure.
- The query mentions "phones" which is a filter related to the sub-category.
- The query mentions "last four years" which is a date variable.

Step 2: Match the components to the context
- "discount" can be matched to the entity "Discount" in the context.
- "phones" can be matched to the entity "Phone" in the context, which has a parent "Sub-Category".
- "last four years" 

In [103]:
%%time
user_query = "quantity of phones in feb 2021 vs mar 2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of phones in feb 2021 vs mar 2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'feb 2021': [{'CONVERTED TIME ELEMENT': 'february 2021', 'DATE RANGE': '2021/02/01 - 2021/02/28', 'ENTITY': 'Order Date'}], 'mar 2023': [{'CONVERTED TIME ELEMENT': 'march 2023', 'DATE RANGE': '2023/03/01 - 2023/03/31', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity', 'MEASURE CONSTRAINT': [{'COMPARISON VALUE': '', 'COMPARSION OPERATOR': ''}]}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- Quantity: Measure
- Phones: Filter (Sub-Category)
- Feb 2021 and Mar 2023: Date Variable

Step 2: Match the components to the context
- Quantity: Found in context as "Quantity"
- Phones: Found in context as "Phone" under "Sub-Categ

In [104]:
%%time
user_query = "quantity of binders in fifth month of last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in fifth month of last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'fifth month of last year': [{'CONVERTED TIME ELEMENT': 'fifth month of last year', 'DATE RANGE': '2022/05/01 - 2022/05/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "quantity of binders" which refers to the measure "Quantity" and the filter "Binder".
- The query also mentions "fifth month of last year" which is a date component related to the "Order Date" entity.

Step 2: Match components to the context
- "quantity" is matched to the "Quantity" entity under the "MEASURE" category in the context.
- "binders" is matched to the "Binder" entit

In [105]:
%%time
user_query = "discount of phones in last 1 year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of phones in last 1 year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 1 year': [{'CONVERTED TIME ELEMENT': 'last 1 year', 'DATE RANGE': '2022/02/20 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which is a measure.
- The query mentions "phones" which is a filter under the Sub-Category.
- The query mentions "last 1 year" which is a date variable.

Step 2: Match the components to the context
- "discount" can be matched to the entity "Discount" in the context under MEASURE.
- "phones" can be matched to the entity "Phone" in the context under FILTER.
- "last 1 year" can be matched to the entity "Ord

In [106]:
%%time
user_query = "sales of phones in last four months"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of phones in last four months
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last four months': [{'CONVERTED TIME ELEMENT': 'last four months', 'DATE RANGE': '2023/08/20 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "phones" which is a filter under the sub-category.
- The query mentions "last four months" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "phones" can be matched to the "Phone" entity in the context under FILTER with parent "Sub-Category".
- "last four months" 

In [107]:
%%time
user_query = "sales of segments current year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segments current year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'current year': [{'CONVERTED TIME ELEMENT': 'current year', 'DATE RANGE': '2023/01/01 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "segments" which is a dimension.

Step 2: Match the components to the context
- In the context, "sales" is matched to the entity "Sales" under the "MEASURE" category.
- "segments" is matched to the entity "Segment" under the "DIMENSION" category.

Step 3: Identify the date component in the query
- The query mentions "current year" which is a date component.

Step 4: Convert the date comp

In [108]:
%%time
user_query = "sales of segments this year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segments this year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'this year': [{'CONVERTED TIME ELEMENT': 'this year', 'DATE RANGE': '2023/01/01 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "segments" which is a dimension.
- The query mentions "this year" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "segments" can be matched to the "Segment" entity in the context under DIMENSION.

Step 3: Convert the date component
- "this year" is a date variable that needs to be converted using the 

In [109]:
%%time
user_query = "sales of segments last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales of segments last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last year': [{'CONVERTED TIME ELEMENT': 'last year', 'DATE RANGE': '2022/01/01 - 2022/12/31', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query mentions "segments" which is a dimension.
- The query mentions "last year" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "segments" can be matched to the "Segment" entity in the context under DIMENSION.

Step 3: Convert the date component
- "last year" is a date variable that needs to be converted using the 

In [110]:
%%time
user_query = "sales in the last 2 weeks"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in the last 2 weeks
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last 2 weeks': [{'CONVERTED TIME ELEMENT': 'last 2 weeks', 'DATE RANGE': '2023/12/07 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "last 2 weeks" which is a date variable.

Step 2: Match the components to the context
- "sales" can be matched to the "Sales" entity in the context under MEASURE.
- "last 2 weeks" can be matched to the "Order Date" entity in the context under DATE VARIABLE.

Step 3: Convert the date component
- The date reference provided has a start_date of '01/08/2020' and an end_date of '20/12/2023'.
- Since the query asks for sales in the "last 2 weeks", w

In [111]:
%%time
user_query = "discount of segments in last three weeks"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of segments in last three weeks
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'last three weeks': [{'CONVERTED TIME ELEMENT': 'last three weeks', 'DATE RANGE': '2023/11/20 - 2023/12/20', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which can be matched to the "Discount" entity in the MEASURE context.
- The query mentions "segments" which can be matched to the "Segment" entity in the DIMENSION context.
- The query mentions "last three weeks" which is a date component that needs to be converted.

Step 2: Match the components to the context
- "discount" is matched to the "Discount" entity in the MEASURE context.
- "segments" is matched 

In [112]:
%%time
user_query = "quantity of binders in 10th month of last year"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in 10th month of last year
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'10th month of last year': [{'CONVERTED TIME ELEMENT': 'last year 10th month', 'DATE RANGE': '2022/10/01 - 2022/10/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "quantity of binders" which refers to the measure "Quantity" and the filter "Binder".
- The query also mentions "10th month of last year" which is a date component related to the "Order Date" entity.

Step 2: Match components to the context
- "Quantity" is matched to the "Quantity" entity in the context under "MEASURE".
- "Binders" is matched to the "Binder" entity in the context und

In [113]:
%%time
user_query = "what is sales of phone in 01/2023"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  what is sales of phone in 01/2023
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'01/2023': [{'CONVERTED TIME ELEMENT': 'january 2023', 'DATE RANGE': '2023/01/01 - 2023/01/31', 'ENTITY': 'Order Date'}]}, 'FILTER': {'Phone': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'Sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query asks for "sales of phone in 01/2023". We can identify the following components:
  - Measure: Sales
  - Filter: Phone (Sub-Category)
  - Date Variable: 01/2023 (Order Date)

Step 2: Match the components to the context
- Measure: Sales is present in the context as {'ENTITY': 'Sales', 'other names': ['sales', 'sale']}
- Filter: Phone is present in the context as {'ENTITY': 'Phone', 'other names': ['phone', 'phones', 'mobile phones'], 'p

In [114]:
%%time
user_query = "sales in 02/22"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  sales in 02/22
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'02/22': [{'CONVERTED TIME ELEMENT': 'february 2022', 'DATE RANGE': '2022/02/01 - 2022/02/28', 'ENTITY': 'Order Date'}]}, 'MEASURE': {'sales': [{'ENTITY': 'Sales'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "sales" which is a measure.
- The query also mentions "02/22" which is a date component.

Step 2: Match the components to the context
- In the context, "sales" can be matched to the "Sales" entity under the "MEASURE" category.
- The date component "02/22" needs to be converted to a proper date range.

Step 3: Convert the date component
- The date component "02/22" can be interpreted as "February 2022".
- To convert it into a date range, we can use the format "YYYY/MM/DD". So, the date range for February 2022 woul

In [115]:
%%time
user_query = "discount of phones in 04/23"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of phones in 04/23
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'04/23': [{'CONVERTED TIME ELEMENT': 'april, 2023', 'DATE RANGE': '2023/04/01 - 2023/04/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'phones': [{'ENTITY': 'Phone', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which is a measure.
- The query mentions "phones" which is a filter related to the sub-category.
- The query mentions "04/23" which is a date component.

Step 2: Match the components to the context
- "discount" can be matched to the entity "Discount" in the context under MEASURE.
- "phones" can be matched to the entity "Phone" in the context under FILTER.
- "04/23" needs to be converted to a date range and matched t

In [116]:
%%time
user_query = "quantity of binders in 04/2022"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  quantity of binders in 04/2022
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'04/2022': [{'CONVERTED TIME ELEMENT': 'april 2022', 'DATE RANGE': '2022/04/01 - 2022/04/30', 'ENTITY': 'Order Date'}]}, 'FILTER': {'binders': [{'ENTITY': 'Binder', 'PARENT': 'Sub-Category'}]}, 'MEASURE': {'quantity': [{'ENTITY': 'Quantity'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify components in the query
- The query mentions "quantity" which is a measure.
- The query mentions "binders" which is a filter related to the sub-category.
- The query mentions "04/2022" which is a date variable.

Step 2: Match components to the context
- "quantity" can be matched to the "Quantity" entity in the context under MEASURE.
- "binders" can be matched to the "Binder" entity in the context under FILTER.
- "04/2022" can be matched to the "Order Date" entity in th

In [117]:
%%time
user_query = "discount of segments in 07/23"
print('user query: ', user_query)
print('-'*100)
output, raw = predict_template_query_v1(user_query=user_query)
print(eval(output))
print('-'*100)
print('Step 1:' +raw.split('\nStep 1:')[1])

user query:  discount of segments in 07/23
----------------------------------------------------------------------------------------------------
{'DATE VARIABLE': {'07/23': [{'CONVERTED TIME ELEMENT': 'July 2023', 'DATE RANGE': '2023/07/01 - 2023/07/31', 'ENTITY': 'Order Date'}]}, 'DIMENSION': {'segments': [{'ENTITY': 'Segment'}]}, 'MEASURE': {'discount': [{'ENTITY': 'Discount'}]}}
----------------------------------------------------------------------------------------------------
Step 1: Identify the components in the query
- The query mentions "discount" which can be matched to the 'Discount' entity in the MEASURE context.
- The query mentions "segments" which can be matched to the 'Segment' entity in the DIMENSION context.
- The query mentions "07/23" which is a date component.

Step 2: Match the components to the context
- 'Discount' in the query matches the 'Discount' entity in the MEASURE context.
- 'Segments' in the query matches the 'Segment' entity in the DIMENSION context.

St