In [1]:
!sudo pip install git+https://github.com/huggingface/transformers

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-osydzm2d
  Running command git clone -q https://github.com/huggingface/transformers /tmp/pip-req-build-osydzm2d
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
Collecting tokenizers<0.15,>=0.14
[?25l  Downloading https://files.pythonhosted.org/packages/76/ee/7e35fb46c728989357e6ccb96df64c4364601cfbfdd6c25ccc872e6c16a0/tokenizers-0.14.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 6.7MB/s eta 0:00:01
Building wheels for collected packages: transformers
  Building wheel for transformers (PEP 517) ... [?25ldone
[?25h  Created wheel for transformers: filename=transformers-4.35.0.dev0-cp38-none-any.whl size=7817286 sha256=9ddcadf7d7e766cb5a33afe02001ecd3998b318d96bb09a198315a44c6bc

In [2]:
import transformers
transformers.__version__

'4.35.0.dev0'

In [3]:
#!sudo pip install -q accelerate peft==0.4.0 bitsandbytes trl==0.4.7

In [4]:
import os
import torch
from datasets import load_dataset
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import pandas as pd
import torch

In [5]:
df = pd.read_csv('/data/mistral/query-to-mql/Live_Usage_queries_with_mql_formatted.csv')

In [6]:
df.shape

(17334, 9)

In [7]:
df.columns

Index(['query', 'mql', 'account_id', 'metadata', 'measure', 'dimension',
       'derived_measure', 'date', 'metadata_none_removed'],
      dtype='object')

In [8]:
g = torch.Generator().manual_seed(1234)

In [9]:
train_size = 1000

In [10]:
rows = torch.randint(0,df.shape[0],(1000,))

In [11]:
train_df = df.iloc[rows.tolist()]

In [12]:
val_df = df.drop(rows.tolist())

In [13]:
val_df.shape, train_df.shape

((16360, 9), (1000, 9))

In [14]:
val_df.to_csv('val_df_query_to_mql.csv', index=True)
train_df.to_csv('train_df_query_to_mql.csv', index=True)

In [15]:
# The model that you want to train from the Hugging Face hub
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

# The instruction dataset to use
#dataset_name = ""

# Fine-tuned model name
#new_model = "mistral-ft-peft-on-template_and_user_query-data"

In [16]:
################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

In [17]:
################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = True

In [18]:
################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "/data/mistral/query-to-mql/oct-19"

# Number of training epochs
num_train_epochs = 15

# Enable fp16/bf16 training (set bf16 to True with an A100)
# fp16 = False
fp16 = True # not using quantisation
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 8

# Batch size per GPU for evaluation
per_device_eval_batch_size = 8

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True


# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule (constant a bit better than cosine)
lr_scheduler_type = "constant"

# Number of training steps (overrides num_train_epochs)
max_steps = 1000

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 200

# Log every X updates steps
logging_steps = 200

In [19]:
################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [21]:
promt_template_v1 = """<s>[INST]<<SYS>>
You are an assistant that helps to map the user question to the a particular JSON format which contains info asked by user and also maps it the below CONTEXT. You might also need to act as a time tagger expert to convert the date elements present in the question to a standard format and to find possible date ranges for the same.

CONTEXT:{context}

Step 1: Identify the n-grams match between question and context

        Map the n-gram or their lemma or their inflections from the question with the values in the passed context.
        Always consider the longest n-gram match, not the sub-string.
        If there are multiple matches for an n-gram with context, return all such ENTITY in response.
        If you are returning any match which is not exactly present with the context, make sure that it is a noun phrase and there is a high similarity between the match and the matched value in context. 


Step 2: Applying time tagger rules only if time elements are present in question

        Identify the TIME ELEMENTS in the input question and convert it to a standard format (if not already) by applying the general time tagging rules. If the TIME ELEMENT is already in a standard format, then no need to convert it.
        TIME ELEMENT can be either a temporal interval (across months, yoy, mom, qoq, wow, quarterly etc.) or a temporal expression (time points such as specific dates, relative expressions etc.).
        Calculate date range for each time points based on the following conditions:
        1. For relative time expressions, calculate the date range based on a reference date - By default the reference date is the end_date in date input: {date_input}
        2. To calculate the date range for "last X years", strictly follow below conditions:
                For "last 1 year", consider exactly one year before the reference year and set start date as January 1 and end date as Decemebr 31 of that year.
                For "last X years", where X is greater than 1, consider starting year = (reference year - X+1) and set start date as January 1 of starting year and end date as the reference date.
        3. To calculate the date range for "last X months", strictly follow below conditions:
                Consider reference month as the month in reference date.
                For "last 1 month", consider exactly one month before the reference month and set start date as first day and end date as last day of that month.
                For "last X months", where X is greater than 1, consider starting month = (reference month - X+1) and set start date as first day of starting month and end date as the reference date. (Example: if reference date is 14/09/2022, then last 3 months = 01/07/2022 - 14/09/2022)
        4. To calculate the date range for "last X quarters", strictly follow below conditions:
                For "last 1 quarter", consider exactly one quarter before the reference quarter and set start date as first day and end date as last day of that quarter .
                For "last X quarter", where X is greater than 1, consider starting quarter = (reference quarter - X+1) and set start date as first day of starting quarter and end date as the reference date.
        5. To calculate the date range for "last X weeks", strictly follow the below conditions:
                Consider reference week as the week in reference week.
                For "last 1 week", set start date as Monday and end date as Sunday of the previous week of reference week. (Example: if reference date is 14/09/2022, then last week = 05/09/2022 - 11/09/2022)
                For "last X weeks", set start date as Monday of reference week and set start date as the Monday of that week and end date as reference date. 
        6. Provide the date range of each time point in "start date - end date" format always.

<</SYS>>
User question is : {user_query}

Converted JSON is as shown below: 
[/INST]
[MQL]
{mql}
[/MQL]</s>"""

In [22]:
df.columns

Index(['query', 'mql', 'account_id', 'metadata', 'measure', 'dimension',
       'derived_measure', 'date', 'metadata_none_removed'],
      dtype='object')

In [23]:
eval(df['metadata_none_removed'][0])

[{'measure': 'PROFIT', 'measure_label': 'Profit in Dollars'},
 {'date': 'AFS_SHIFT_DATE_START', 'date_label': 'Shift Date Start'}]

In [24]:
df['query']

0                                          trend of profit
1                                  profit in next 3 months
2                           why profit change in july 2022
3                            why profit change in aug 2022
4                 why did scheduled hours increase in 2022
                               ...                        
17329    how many provider ids having worked shift grea...
17330    how many provider ids having worked shift grea...
17331    how many provider ids having worked shift grea...
17332    facilities having unfilled shifts greater than 20
17333    facilities having unfilled shifts greater than 20
Name: query, Length: 17334, dtype: object

In [25]:
def create_fine_tuning_dataset(row):
    mql = eval(row['mql'])[0]['mql']
    user_query = row['query']
    date_input = {"start_date": "01/01/2020", "end_date": "15/09/2023"}
    context = row['metadata_none_removed']
    formated = promt_template_v1.format(context=context,
                                        date_input = date_input,
                                        user_query=user_query,
                                        mql=mql)
    return formated

In [26]:
train_df['fine_tuning_dataset']=train_df.apply(create_fine_tuning_dataset, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['fine_tuning_dataset']=train_df.apply(create_fine_tuning_dataset, axis=1)


In [27]:
val_df['fine_tuning_dataset']=val_df.apply(create_fine_tuning_dataset, axis=1)

In [28]:
train_df.shape

(1000, 10)

In [29]:
train_df = train_df[['fine_tuning_dataset']]

In [30]:
train_df.shape

(1000, 1)

In [31]:
val_df = val_df[['fine_tuning_dataset']]
val_df.shape

(16360, 1)

In [32]:
val_df.reset_index(inplace=True)
train_df.reset_index(inplace=True)

In [33]:
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

In [34]:
train_dataset

Dataset({
    features: ['index', 'fine_tuning_dataset'],
    num_rows: 1000
})

In [35]:
val_dataset

Dataset({
    features: ['index', 'fine_tuning_dataset'],
    num_rows: 16360
})

In [36]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [37]:
bnb_4bit_quant_type

'nf4'

In [38]:
compute_dtype

torch.float16

In [39]:
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

In [40]:
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

In [41]:
!sudo pip install pynvml

Collecting pynvml
[?25l  Downloading https://files.pythonhosted.org/packages/5b/9c/adb8070059caaa15d5a572b66bccd95900d8c1b9fa54d6ecea6ae97448d1/pynvml-11.5.0-py3-none-any.whl (53kB)
[K     |████████████████████████████████| 61kB 5.6MB/s eta 0:00:011
[?25hInstalling collected packages: pynvml
Successfully installed pynvml-11.5.0


In [42]:
from pynvml.smi import nvidia_smi
nvsmi = nvidia_smi.getInstance()
nvsmi.DeviceQuery('memory.free, memory.total')

{'gpu': [{'fb_memory_usage': {'total': 16384.0,
    'free': 15972.9375,
    'unit': 'MiB'}}]}

In [35]:
#!df -H

In [118]:
# del model
# torch.cuda.empty_cache()

In [43]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
#     torch_dtype=torch.bfloat16,
    device_map="auto"
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [44]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,
                                          # add_eos_token=True,
                                          use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [46]:
max([len(tokenizer.encode(train_df['fine_tuning_dataset'][i])) for i in range(1000)])

2094

In [48]:
val_dataset[:100]

{'index': [0,
  1,
  2,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  58,
  59,
  60,
  61,
  62,
  63,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  89,
  90,
  92,
  93,
  94,
  95,
  96,
  97,
  98,
  99,
  100,
  101,
  102,
  103,
  104],
 'fine_tuning_dataset': ['<s>[INST]<<SYS>>\nYou are an assistant that helps to map the user question to the a particular JSON format which contains info asked by user and also maps it the below CONTEXT. You might also need to act as a time tagger expert to convert the date elements present in the question to a standard format and to find possible date ranges for the same.\n\nCONTEXT:[{\'meas

In [47]:
tokenizer.decode([2])

'</s>'

In [40]:
# LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    target_modules = ["q_proj", "v_proj"],
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [41]:
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    eval_steps=200, # requires when eval_dataset is defined
    per_device_eval_batch_size=1, # Batch size for evaluation
    evaluation_strategy="steps", # requires when eval_dataset is defined
    logging_strategy="steps",
    logging_steps=1,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=4000,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard",
    load_best_model_at_end=True,
    save_total_limit=1,
    metric_for_best_model="eval_loss",
    greater_is_better=False
)

In [42]:
## Getting FLOPs of model

model_flops = (
  model.floating_point_ops(
    {
       "input_ids": torch.zeros(
           (1, 512)
      )
    }
  )
  * training_arguments.gradient_accumulation_steps
)

#print(model)
print("Memory footprint", model.get_memory_footprint() / 1e9, "GB")
print("Flops", model_flops / 1e9, "GFLOPs")

Memory footprint 4.551360512 GB
Flops 21843.947814912 GFLOPs


In [43]:
train_dataset

Dataset({
    features: ['fine_tuning_dataset'],
    num_rows: 9223
})

In [44]:
#print(torch.cuda.memory_summary(device=None, abbreviated=False))

In [45]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    peft_config=peft_config,
    dataset_text_field="fine_tuning_dataset",
    max_seq_length=256,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)



Map:   0%|          | 0/9223 [00:00<?, ? examples/s]

Map:   0%|          | 0/337 [00:00<?, ? examples/s]

In [46]:
val_dataset[0]

{'fine_tuning_dataset': '<s>[INST]<<SYS>>\nYou are an advanced template converter that converts user question to a specific template which answers the user question.\n\n<</SYS>>\n\nQ1 2018 quantity?\n[/INST]\n[LUMINTEMPLATE]\nWhat is the Quantity in Quarter1 2018\n[/LUMINTEMPLATE]</s>'}

In [47]:
# Train model
trainer.train()

Step,Training Loss,Validation Loss
200,0.2807,0.364057
400,0.252,0.283258
600,0.214,0.252046
800,0.2419,0.233638
1000,0.2487,0.222283
1200,0.2265,0.208451
1400,0.215,0.207179
1600,0.2211,0.196577
1800,0.2477,0.198251
2000,0.1997,0.192909


TrainOutput(global_step=4000, training_loss=0.2516471045911312, metrics={'train_runtime': 16054.2987, 'train_samples_per_second': 1.993, 'train_steps_per_second': 0.249, 'total_flos': 1.3459230627363226e+17, 'train_loss': 0.2516471045911312, 'epoch': 3.47})

In [None]:
# Fine-tuned model name
new_model_name = "mistral-ft-peft-v1-lr-64-with-more-data"

In [None]:
# Save trained model
trainer.model.save_pretrained(new_model)

In [41]:
trainer.model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): Linear4bit(
                in_features=4096, out_features=4096, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
              (v_

In [43]:
from peft import PeftModel, PeftConfig

In [None]:
del model
torch.cuda.empty_cache()

from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(new_model_name, device_map="auto", torch_dtype=torch.bfloat16)
model = model.merge_and_unload()

In [None]:
output_merged_dir = os.path.join(new_model_name, "final_merged_checkpoint")
model.save_pretrained(output_merged_dir, safe_serialization=True)

In [45]:
## Getting FLOPs of model

model_flops = (
  model_to_merge.floating_point_ops(
    {
       "input_ids": torch.zeros(
           (1, 200)
      )
    }
  )
  * training_arguments.gradient_accumulation_steps
)

#print(model)
print("Memory footprint", model.get_memory_footprint() / 1e9, "GB")
print("Flops", model_flops / 1e9, "GFLOPs")

Memory footprint 4.488974336 GB
Flops 4083.474432 GFLOPs


In [12]:
query_template_v2 = """[INST]<<SYS>>
You are an advanced template converter that converts user question to a specific template which answers the user question.

<</SYS>>

{user_query}
[/INST]
[LUMINTEMPLATE]"""

In [24]:
#model_to_merge.to('cuda')

In [23]:
!sudo pip install ninja

Collecting ninja
[?25l  Downloading https://files.pythonhosted.org/packages/0f/58/854ce5aab0ff5c33d66e1341b0be42f0330797335011880f7fbd88449996/ninja-1.11.1-py2.py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (145kB)
[K     |████████████████████████████████| 153kB 6.9MB/s eta 0:00:01
[?25hInstalling collected packages: ninja
Successfully installed ninja-1.11.1


In [24]:
import ninja
ninja.__version__

'1.11.1'

In [12]:
model = AutoModelForCausalLM.from_pretrained(output_merged_dir, device_map="auto", torch_dtype=torch.bfloat16)

KeyError: 'mistral'

In [15]:
def predict_template_query_v3(user_query):
    inp = query_template_v2.format(user_query=user_query)
    _inputs = tokenizer.encode(inp, return_tensors="pt")
    outputs = model.generate(input_ids=_inputs.to('cuda'), max_length= 150, pad_token_id=tokenizer.eos_token_id)
    output = tokenizer.decode(outputs[0])
    output_new = output.split('[LUMINTEMPLATE]\n')[1]
    return output_new.split('\n[/LUMINTEMPLATE]')[0]

In [17]:
%%time
predict_template_query_v3('brands least profitable in 2021')

CPU times: user 5.09 s, sys: 93.8 ms, total: 5.18 s
Wall time: 5.18 s


'List of brands with lowest profit in 2021'

In [18]:
%%time
predict_template_query_v3('trend of work hours in 2023')

CPU times: user 5 s, sys: 143 ms, total: 5.14 s
Wall time: 5.15 s


'What is the trend of work hours in 2023'

In [44]:
model_to_merge = PeftModel.from_pretrained(
    model,
    '/data/sept-23/checkpoint-810')

In [49]:
model_to_merge = PeftModel.from_pretrained(
    model,
    'llama-2-7b-fine-tuned-peft-v2')

In [19]:
%%time
predict_template_query_v3('which are the 3 top selling category by sales in 2022')

CPU times: user 4.86 s, sys: 205 ms, total: 5.06 s
Wall time: 5.06 s


'Which are the top 3 sub category based on sales in 2022'

In [20]:
%%time
predict_template_query_v3('which are the 3 top selling category by profit in 2022')

CPU times: user 4.83 s, sys: 221 ms, total: 5.05 s
Wall time: 5.05 s


'Which are the top 3 sub category based on profit in 2022'

In [21]:
%%time
predict_template_query_v3("In 02/19 What was the sales of paper?")

CPU times: user 4.98 s, sys: 214 ms, total: 5.19 s
Wall time: 5.19 s


'What is the sales of paper in februaury 2019'

In [35]:
%%time
predict_template_query_v3("notebook sales in '20'?")

CPU times: user 5.07 s, sys: 134 ms, total: 5.2 s
Wall time: 5.2 s


'What is the sales of notebook in 2020'

In [36]:
%%time
predict_template_query_v3("monthly trend of sales")

CPU times: user 5.3 s, sys: 138 ms, total: 5.44 s
Wall time: 5.43 s


'What is the monthly trend of sales'

In [28]:
%%time
predict_template_query_v3("worst sub category in 2021")

CPU times: user 5.01 s, sys: 155 ms, total: 5.16 s
Wall time: 5.16 s


'top sub category basis sales contribution in 2021'

In [38]:
%%time
predict_template_query_v3("What was laptop sales in 4th quarter '11?")

CPU times: user 5 s, sys: 195 ms, total: 5.2 s
Wall time: 5.2 s


'What is the sales of laptop in quarter4 2011'

In [32]:
%%time
predict_template_query_v3("worst brands basis profit in 2021")

CPU times: user 4.98 s, sys: 148 ms, total: 5.13 s
Wall time: 5.13 s


'Which are the top 5 brands based on profit contribution in 2021'

In [17]:
prompt = query_template_v2.format(user_query='brands least profitable in 2021')

In [18]:
tokens = tokenizer.encode(prompt, return_tensors="pt")

In [22]:
%%time
outputs = model.generate(input_ids=tokens.to('cuda'), max_length= 180, )

CPU times: user 6.61 s, sys: 135 ms, total: 6.75 s
Wall time: 6.75 s


In [23]:
tokenizer.decode(outputs[0])

'<s>[INST]<<SYS>>\nYou are an advanced template converter that converts user question to a specific template which answers the user question.\n\n<</SYS>>\n\nbrands least profitable in 2021\n[/INST]\n[LUMINTEMPLATE]\nList of brands with lowest profit in 2021\n[/LUMINTEMPLATE]\n\nWhich are the top 5 brands based on profit share in 2021\n[/LUMINTEMPLATE]\n\nWhich are the top 5 brands based on market share in 2021\n[/LUMINTEMPLATE]\n\nWhich are the top 5 brands based on sales share in 2021\n[/LUMINTEMPLATE]\n\nWhich are the top 5'

In [132]:
%%time
predict_template_query_v3("Growth rate of sales share of phone")

CPU times: user 1min 4s, sys: 30.3 s, total: 1min 34s
Wall time: 1min 34s


'What is the growth contribution to overall sales for phone'

In [133]:
%%time
predict_template_query_v3("What is the growth contribution to overall sales for phone in 2021")

CPU times: user 1min 2s, sys: 28.8 s, total: 1min 31s
Wall time: 1min 31s


'What is the growth contribution to overall sales for phone in 2021'

In [134]:
%%time
predict_template_query_v3("Which are the top 3 sub category based on share/contribution of sales across segment except paper")

CPU times: user 59.8 s, sys: 27.6 s, total: 1min 27s
Wall time: 1min 27s


'Which are the top 3 sub category based on contribution to overall sales except paper'

In [135]:
%%time
predict_template_query_v3("which are the 5 top selling sub category by sales in 2023")

CPU times: user 59.3 s, sys: 26.9 s, total: 1min 26s
Wall time: 1min 26s


'Which are the top sub category basis sales in 2023'

In [136]:
%%time
predict_template_query_v3("how does the sales change for phone in the last year")

CPU times: user 1min 4s, sys: 29.4 s, total: 1min 33s
Wall time: 1min 33s


'What is the trend of sales for phone in 2020'

In [137]:
%%time
predict_template_query_v3("what is the average monthly market share of paper for the last two and a half years")

CPU times: user 1min 2s, sys: 28.6 s, total: 1min 30s
Wall time: 1min 30s


'What is the average monthly share of paper sales over the last 2.5 years'

In [138]:
%%time
predict_template_query_v3("when was the monthly selling of paper recorded the lowest?")

CPU times: user 1min 3s, sys: 29 s, total: 1min 32s
Wall time: 1min 32s


'When was the last time that sales of paper across months was lowest'

In [49]:
%%time
predict_template_query_v3("What were the sales in '23?")

  next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)


CPU times: user 23.8 s, sys: 5.03 s, total: 28.8 s
Wall time: 28.8 s


'What is the sales in 2023'

In [50]:
%%time
predict_template_query_v3("sales number in year 2022?")

CPU times: user 22.7 s, sys: 5.03 s, total: 27.8 s
Wall time: 27.8 s


'What is the sales in 2022'

In [53]:
%%time
predict_template_query_v3("what was profit for the year 2020?")

CPU times: user 1min 4s, sys: 30.3 s, total: 1min 34s
Wall time: 1min 34s


'What is the sales in 2020'

In [49]:
%%time
predict_template_query_v3("what was sales in 11/20?")

CPU times: user 1min 10s, sys: 33.4 s, total: 1min 43s
Wall time: 1min 43s


'What is the sales in 2020'

In [51]:
%%time
predict_template_query_v3("Compare 1983 and 1984 sales and discount percentages.")

CPU times: user 1min 2s, sys: 29.1 s, total: 1min 31s
Wall time: 1min 31s


'What is the sales and discount percentage in 1983 vs 1984'

In [54]:
%%time
predict_template_query_v3("what was sales in twenty twenty two?")

CPU times: user 23.9 s, sys: 5.52 s, total: 29.4 s
Wall time: 29.4 s


'What is the sales in 2022'

In [52]:
%%time
predict_template_query_v3("what are profit each months")

CPU times: user 1min 7s, sys: 31.3 s, total: 1min 38s
Wall time: 1min 38s


'What is the monthly trend of profit'

In [45]:
%%time
predict_template_query_v3("When did the previous low in phone sales take place?")

CPU times: user 1min 5s, sys: 29.9 s, total: 1min 35s
Wall time: 1min 35s


'When was the last time that sales of phone was lowest'

In [46]:
%%time
predict_template_query_v3("When was phone sales lowest recently?")

CPU times: user 1min 8s, sys: 31.8 s, total: 1min 40s
Wall time: 1min 40s


'When was the last time that sales of phone was lowest'

In [67]:
%%time

inp = 'what are you doing'
inp = tokenizer.encode(inp, return_tensors='pt')
output = model.generate(input_ids=inp.to('cuda'), max_length= 200)
output = tokenizer.decode(outputs[0])

CPU times: user 1min 47s, sys: 50 s, total: 2min 37s
Wall time: 2min 37s


In [49]:
from tqdm import tqdm

In [50]:
df2 = pd.read_csv('template_and_user_query.csv')

In [51]:
val_df2 = df2.iloc[::10,:]
val_df2.reset_index(drop=True, inplace=True)
df2 = pd.concat([df2,val_df2]).drop_duplicates(keep=False).reset_index()
df2.drop(columns=['index'], inplace=True)

In [57]:
df2_new = df2[300:500]

In [60]:
%%time
output_list_300_500 = []
for i, row in tqdm(df2_new.iterrows()):
    inp = row['user query']
    try:
        output = predict_template_query_v3(inp)
    except:
        output = None
        print(f"Got exception while processing : {inp}")
    output_list_300_500.append(output)

200it [1:42:11, 30.66s/it]

CPU times: user 1h 21min 28s, sys: 20min 42s, total: 1h 42min 10s
Wall time: 1h 42min 11s





In [62]:
df = pd.DataFrame(output_list_300_500, columns=['predicted_template'])
df.to_csv('predict_df_300_500.csv', index=False)

In [61]:
output_list_300_500

['What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the s

In [86]:
output_list_200_300

['What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category and country across months',
 'What is the sales and discount percentage by sub category and country across months',
 'What is the sales and discount percentage by sub category and 

In [81]:
output_list_100_200

['What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage in 2020 vs 2021',
 'What is the sales and discount percentage in 2020 vs 2021',
 'What is the sales and discount percentage in 2020 vs 2

In [78]:
output_list

['What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub cat

In [40]:
def predict_template_query(user_query):
    question = query_template.format(user_query=user_query)
    _input = str({'question': question})
#     print(_input)
    _inputs = tokenizer.encode(_input, return_tensors="pt")
#     print(_inputs)
#     print(type(_inputs))
    outputs = model_to_merge.generate(input_ids=_inputs.to('cuda'), max_length= 200)
    output = tokenizer.decode(outputs[0])
    # print(output)
    # a = output.split("_LUMIN_ Answer",1)[1]
    #print(a)
    return output

In [None]:
model_to_merge.to('cuda')

In [63]:
def process_output(a):
    b=a.split("### Answer",1)[1]
    return b.split("\n")[1]

In [5]:
# inp = "why production dropped for bikes in 1st quarter 2022"
# predict_template_query(inp)

In [4]:
# inp = 'why sales of texas increase in 1st quarter 2023'
# a = predict_template_query(inp)

In [2]:
# inp = 'drivers of profit'
# predict_template_query(inp)

In [3]:
# inp = "why sales dropped for bikes in 1st quarter 2022"
# predict_template_query(inp).split('\n')[1]