In [2]:
!sudo pip install git+https://github.com/huggingface/transformers

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-xchwabxp
  Running command git clone -q https://github.com/huggingface/transformers /tmp/pip-req-build-xchwabxp
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
Collecting tokenizers<0.15,>=0.14
[?25l  Downloading https://files.pythonhosted.org/packages/59/15/c60dae8646210e148e8432fbb5a13d1f6fa8cefda6314ff6c4fc0b58b6ec/tokenizers-0.14.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 7.1MB/s eta 0:00:01
Building wheels for collected packages: transformers
  Building wheel for transformers (PEP 517) ... [?25ldone
[?25h  Created wheel for transformers: filename=transformers-4.34.0.dev0-cp38-none-any.whl size=7746762 sha256=65971629c8ed8d586ca7ccf9e226ccbee544041a949a409f3ec9291b92b3

In [3]:
import os
import torch
from datasets import load_dataset
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import pandas as pd

In [4]:
df = pd.read_csv('template_and_user_query_v2.csv')

In [5]:
#val_df1 = df.iloc[::9,:]
val_df = df.iloc[::10,:]
val_df.reset_index(drop=True, inplace=True)
df = pd.concat([df,val_df]).drop_duplicates(keep=False).reset_index()
df.drop(columns=['index'], inplace=True)

In [6]:
#val_df = pd.concat([val_df1, val_df2])

In [7]:
#val_df.to_csv('val_df.csv', index=False)

In [8]:
#df = df.sample(frac=1).reset_index(drop=True)

In [9]:
# The model that you want to train from the Hugging Face hub
model_name = "NousResearch/llama-2-7b-chat-hf"

# The instruction dataset to use
#dataset_name = ""

# Fine-tuned model name
new_model = "llama-2-7b-ft-peft-on-template_and_user_query-data"

In [59]:
################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

In [60]:
################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

In [61]:
################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "/data/oct-2"

# Number of training epochs
num_train_epochs = 15

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 8

# Batch size per GPU for evaluation
per_device_eval_batch_size = 8

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True


# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule (constant a bit better than cosine)
lr_scheduler_type = "constant"

# Number of training steps (overrides num_train_epochs)
max_steps = 1000

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 30

# Log every X updates steps
logging_steps = 30

In [62]:
################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [15]:
prompt_template1 = """### _LUMIN_ Question:
{question}
### Context: {context}
### _LUMIN_ Answer: {answer}"""

In [16]:
promt_template_v2 = """[INST]<<SYS>>
You are an advanced template converter that converts user question to a specific template which answers the user question.

<</SYS>>

{user_query}
[/INST]
[LUMINTEMPLATE]
{template_query}
[/LUMINTEMPLATE]</s>"""

In [17]:
prompt_template = """### Question:
{user_query}

### Context:
Converting above _LUMIN_ Question to LUMIN specific template _LUMIN_ Answer.

### Answer:
{template_query}"""

In [18]:
context = "Converting above _LUMIN_ Question to LUMIN specific template _LUMIN_ Answer."

In [19]:
df.columns

Index(['question', 'user query'], dtype='object')

In [20]:
def create_fine_tuning_dataset(row):
    user_query = row['user query']
    template_query = row['question']
    formated = promt_template_v2.format(user_query=user_query,
                                      template_query=template_query)
    # data_point = str({'question': formated, "answer":template_query})
    return formated

In [21]:
df['fine_tuning_dataset']=df.apply(create_fine_tuning_dataset, axis=1)

In [22]:
val_df['fine_tuning_dataset']=val_df.apply(create_fine_tuning_dataset, axis=1)

In [23]:
val_df['fine_tuning_dataset'][2]

'[INST]<<SYS>>\nYou are an advanced template converter that converts user question to a specific template which answers the user question.\n\n<</SYS>>\n\nWhat are the 2020 sales figures for each subcategory?\n[/INST]\n[LUMINTEMPLATE]\nWhat is the sales by sub category in 2020\n[/LUMINTEMPLATE]</s>'

In [24]:
df['fine_tuning_dataset'][0]

'[INST]<<SYS>>\nYou are an advanced template converter that converts user question to a specific template which answers the user question.\n\n<</SYS>>\n\nHow much were the sales in 2020?\n[/INST]\n[LUMINTEMPLATE]\nWhat is the sales in 2020\n[/LUMINTEMPLATE]</s>'

In [25]:
df.drop(columns=['question','user query'], inplace=True)

In [26]:
df.shape

(2235, 1)

In [27]:
df.columns

Index(['fine_tuning_dataset'], dtype='object')

In [28]:
val_df.drop(columns=['question','user query'], inplace=True)

In [29]:
val_df.shape

(249, 1)

In [30]:
train_dataset = Dataset.from_pandas(df)
val_dataset = Dataset.from_pandas(val_df)

In [31]:
train_dataset

Dataset({
    features: ['fine_tuning_dataset'],
    num_rows: 2235
})

In [32]:
val_dataset

Dataset({
    features: ['fine_tuning_dataset'],
    num_rows: 249
})

In [63]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [64]:
bnb_4bit_quant_type

'nf4'

In [65]:
compute_dtype

torch.float16

In [66]:
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

In [None]:
!df

In [37]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [38]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,
                                          # add_eos_token=True,
                                          use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`,  it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [39]:
max([len(tokenizer.encode(df['fine_tuning_dataset'][i])) for i in range(2000)])

123

In [42]:
tokenizer.decode([2])

'</s>'

In [67]:
# LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [68]:
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    eval_steps=30,
    per_device_eval_batch_size=1, # Batch size for evaluation
    evaluation_strategy="steps",
    logging_strategy="steps",
    logging_steps=1,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=1000,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard",
    load_best_model_at_end=True,
    save_total_limit=1,
    metric_for_best_model="eval_loss",
    greater_is_better=False
)

In [69]:
## Getting FLOPs of model

model_flops = (
  model.floating_point_ops(
    {
       "input_ids": torch.zeros(
           (1, 512)
      )
    }
  )
  * training_arguments.gradient_accumulation_steps
)

#print(model)
print("Memory footprint", model.get_memory_footprint() / 1e9, "GB")
print("Flops", model_flops / 1e9, "GFLOPs")

Memory footprint 4.388315136 GB
Flops 20323.529392128 GFLOPs


In [70]:
train_dataset

Dataset({
    features: ['fine_tuning_dataset'],
    num_rows: 2235
})

In [71]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    peft_config=peft_config,
    dataset_text_field="fine_tuning_dataset",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)

Map:   0%|          | 0/2235 [00:00<?, ? examples/s]

Map:   0%|          | 0/249 [00:00<?, ? examples/s]

In [72]:
train_dataset[0]

{'fine_tuning_dataset': '[INST]<<SYS>>\nYou are an advanced template converter that converts user question to a specific template which answers the user question.\n\n<</SYS>>\n\nHow much were the sales in 2020?\n[/INST]\n[LUMINTEMPLATE]\nWhat is the sales in 2020\n[/LUMINTEMPLATE]</s>'}

In [None]:
# Train model
trainer.train()

Step,Training Loss,Validation Loss
30,0.7295,0.681522
60,0.3574,0.338475
90,0.2926,0.2718
120,0.3089,0.251946
150,0.1945,0.257716
180,0.2732,0.222298
210,0.2314,0.220961
240,0.2555,0.209292
270,0.202,0.201845
300,0.2176,0.204883


In [None]:
# Fine-tuned model name
new_model = "mistral-7b-ft-peft-v1-lr-16-new-template-additional_data"

In [None]:
# Save trained model
trainer.model.save_pretrained(new_model)

In [41]:
trainer.model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): Linear4bit(
                in_features=4096, out_features=4096, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
              (v_

In [44]:
from peft import PeftModel, PeftConfig

In [49]:
model_to_merge = PeftModel.from_pretrained(
    model,
    'llama-2-7b-fine-tuned-peft-v2')

In [45]:
model_to_merge = PeftModel.from_pretrained(
    model,
    '/data/sept-23/checkpoint-840')

In [129]:
## Getting FLOPs of model

model_flops = (
  model_to_merge.floating_point_ops(
    {
       "input_ids": torch.zeros(
           (1, 200)
      )
    }
  )
  * training_arguments.gradient_accumulation_steps
)

#print(model)
print("Memory footprint", model.get_memory_footprint() / 1e9, "GB")
print("Flops", model_flops / 1e9, "GFLOPs")

Memory footprint 4.488974336 GB
Flops 4083.474432 GFLOPs


In [45]:
query_template = """### Question:
{user_query}

### Context:
Converting above _LUMIN_ Question to LUMIN specific template _LUMIN_ Answer.

"""

In [46]:
query_template_v2 = """[INST]<<SYS>>
You are an advanced template converter that converts user question to a specific template which answers the user question.

<</SYS>>

{user_query}
[/INST]
[LUMINTEMPLATE]"""

In [47]:
model_to_merge.to('cuda')

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): Linear4bit(
                in_features=4096, out_features=4096, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
              (

In [48]:
def predict_template_query_v3(user_query):
    inp = query_template_v2.format(user_query=user_query)
    _inputs = tokenizer.encode(inp, return_tensors="pt")
    outputs = model_to_merge.generate(input_ids=_inputs.to('cuda'), max_length= 180)
    output = tokenizer.decode(outputs[0])
    output_new = output.split('[LUMINTEMPLATE]\n')[1]
    return output_new.split('\n[/LUMINTEMPLATE]')[0]

In [134]:
%%time
predict_template_query_v3("what was sales in 11/20?")

CPU times: user 1min 4s, sys: 29.6 s, total: 1min 34s
Wall time: 1min 34s


'What is the sales in november 2020'

In [151]:
%%time
predict_template_query_v3("what was sales in 12/20?")

CPU times: user 1min 2s, sys: 28.8 s, total: 1min 31s
Wall time: 1min 31s


'What is the sales in december 2020'

In [136]:
%%time
predict_template_query_v3("trend of weekly hours in 2023")

CPU times: user 1min 5s, sys: 29.7 s, total: 1min 35s
Wall time: 1min 35s


'What is the trend of weekly hours in 2023'

In [137]:
%%time
predict_template_query_v3("What was phone sales in november '23'?")

CPU times: user 1min 6s, sys: 30.4 s, total: 1min 36s
Wall time: 1min 36s


'What is the sales of phone in november 2023'

In [138]:
%%time
predict_template_query_v3("which are the 5 top selling sub category by sales in 2023")

CPU times: user 1min 3s, sys: 29.2 s, total: 1min 32s
Wall time: 1min 32s


'Which are the top 5 sub category based on sales in 2023'

In [139]:
%%time
predict_template_query_v3("Which are the top 3 sub category based on share/contribution of sales across segment except paper")

CPU times: user 1min 1s, sys: 28.6 s, total: 1min 30s
Wall time: 1min 30s


'Which are the top 3 sub category based on share of contribution of sales except paper'

In [140]:
%%time
predict_template_query_v3("how does the sales change for phone in the last year")

CPU times: user 1min 6s, sys: 30.4 s, total: 1min 37s
Wall time: 1min 37s


'What is the trend of sales for phone in last year'

In [141]:
%%time
predict_template_query_v3("when was the monthly selling of paper recorded the lowest?")

CPU times: user 1min 5s, sys: 30.6 s, total: 1min 35s
Wall time: 1min 35s


'When was the monthly sales of paper recorded the lowest'

In [142]:
%%time
predict_template_query_v3("what is the average monthly market share of paper for the last two and a half years")

CPU times: user 1min 3s, sys: 29 s, total: 1min 32s
Wall time: 1min 32s


'What is the average monthly market share of paper for last 2.5 years'

In [56]:
%%time
predict_template_query_v3("What was phone sales in 2nd quarter '23'?")

CPU times: user 59.1 s, sys: 26.9 s, total: 1min 26s
Wall time: 1min 26s


'What is the sales of phone in quarter2 2023'

In [58]:
%%time
predict_template_query_v3("In 02/19 What was the sales of paper?")

CPU times: user 59.1 s, sys: 26.9 s, total: 1min 25s
Wall time: 1min 26s


'What is the sales of paper in februaury 2019'

In [59]:
%%time
predict_template_query_v3("brands least profitable in 2021")

CPU times: user 1min, sys: 27.3 s, total: 1min 27s
Wall time: 1min 27s


'Which are the top 5 sub category with lowest sales in 2021'

In [60]:
%%time
predict_template_query_v3("sub category least profitable in 2021")

CPU times: user 1min, sys: 27.5 s, total: 1min 28s
Wall time: 1min 28s


'Which is the top sub category based on sales in 2021'

In [63]:
%%time
predict_template_query_v3("worst performing category in 2021")

CPU times: user 1min 1s, sys: 27.8 s, total: 1min 28s
Wall time: 1min 28s


'top category based on sales in 2021'

In [145]:
%%time
predict_template_query_v3("What was phone sales in q1 '23'?")

CPU times: user 1min 6s, sys: 30.6 s, total: 1min 36s
Wall time: 1min 36s


'What is the sales of phone in quarter1 2023'

In [146]:
%%time
predict_template_query_v3("What was phone sales in q2 '22 ?")

CPU times: user 1min 5s, sys: 30.2 s, total: 1min 36s
Wall time: 1min 36s


'What is the sales of phone in quarter2 2022'

In [147]:
%%time
predict_template_query_v3("What was paper sales in jun '20'?")

CPU times: user 1min 6s, sys: 30.4 s, total: 1min 36s
Wall time: 1min 36s


'What is the sales of paper in june 2020'

In [150]:
%%time
predict_template_query_v3("What was laptop sales in q1 2019?")

CPU times: user 1min 5s, sys: 30.7 s, total: 1min 36s
Wall time: 1min 36s


'What is the sales of laptop in quarter1 2020'

In [57]:
%%time
predict_template_query_v3("Growth rate of sales share of phone")

CPU times: user 1min 3s, sys: 28.7 s, total: 1min 32s
Wall time: 1min 32s


'What is the growth rate of share of phone for sales'

In [58]:
%%time
predict_template_query_v3("What is the growth contribution to overall sales for phone in 2021")

CPU times: user 1min 1s, sys: 28.8 s, total: 1min 30s
Wall time: 1min 30s


'What is the growth contribution to overall sales for phone in 2021'

In [59]:
%%time
predict_template_query_v3("Which are the top 3 sub category based on share/contribution of sales across segment except paper")

CPU times: user 1min, sys: 27.5 s, total: 1min 27s
Wall time: 1min 27s


'Which are the top sub category based on share of sales except paper across segment'

In [60]:
%%time
predict_template_query_v3("which are the 5 top selling sub category by sales in 2023")

CPU times: user 1min 1s, sys: 28.5 s, total: 1min 30s
Wall time: 1min 30s


'Which are the top sub category based on sales in 2023'

In [61]:
%%time
predict_template_query_v3("how does the sales change for phone in the last year")

CPU times: user 1min 5s, sys: 30.3 s, total: 1min 35s
Wall time: 1min 35s


'What is the trend of sales for phone in 2020'

In [62]:
%%time
predict_template_query_v3("what is the average monthly market share of paper for the last two and a half years")

CPU times: user 1min 2s, sys: 28.9 s, total: 1min 31s
Wall time: 1min 31s


'What is the trend of share of paper across months'

In [63]:
%%time
predict_template_query_v3("when was the monthly selling of paper recorded the lowest?")

CPU times: user 1min 5s, sys: 30.5 s, total: 1min 35s
Wall time: 1min 35s


'When was the last time that monthly sales of paper was lowest'

In [64]:
%%time
predict_template_query_v3("What were the sales in '23?")

CPU times: user 1min 7s, sys: 30.9 s, total: 1min 38s
Wall time: 1min 38s


'What is the sales in 2023'

In [65]:
%%time
predict_template_query_v3("sales number in year 2022?")

CPU times: user 1min 6s, sys: 30.8 s, total: 1min 37s
Wall time: 1min 37s


'What is the sales in 2022'

In [76]:
%%time
predict_template_query_v3("what was sales of motorcycles in the year 2021?")

CPU times: user 1min, sys: 27.7 s, total: 1min 28s
Wall time: 1min 28s


'What is the sales of motorcycle in 2021'

In [72]:
%%time
predict_template_query_v3("what was profit for the year 2020?")

CPU times: user 1min 5s, sys: 30.9 s, total: 1min 36s
Wall time: 1min 36s


'What is the profit for 2020'

In [66]:
%%time
predict_template_query_v3("what was sales in 11/20?")

CPU times: user 1min 4s, sys: 29.3 s, total: 1min 33s
Wall time: 1min 33s


'What is the sales in 2020'

In [67]:
%%time
predict_template_query_v3("Compare 1983 and 1984 sales and discount percentages.")

CPU times: user 1min, sys: 28.8 s, total: 1min 29s
Wall time: 1min 29s


'What is the sales and discount percentage in 1983 vs 1984'

In [68]:
%%time
predict_template_query_v3("what was sales in twenty twenty two?")

CPU times: user 1min 7s, sys: 31.3 s, total: 1min 38s
Wall time: 1min 38s


'What is the sales in 2020'

In [69]:
%%time
predict_template_query_v3("what are profit each months")

CPU times: user 1min 9s, sys: 31.4 s, total: 1min 40s
Wall time: 1min 40s


'What is the monthly trend of profit'

In [70]:
%%time
predict_template_query_v3("When did the previous low in phone sales take place?")

CPU times: user 1min 6s, sys: 30.9 s, total: 1min 37s
Wall time: 1min 37s


'When was the last time that sales of phone was lowest'

In [71]:
%%time
predict_template_query_v3("When was phone sales lowest recently?")

CPU times: user 1min 8s, sys: 30.9 s, total: 1min 39s
Wall time: 1min 39s


'When was the last time that sales of phone was lowest'

In [74]:
%%time
predict_template_query_v3("What was phone sales in november '23'?")

CPU times: user 1min 3s, sys: 29.4 s, total: 1min 32s
Wall time: 1min 32s


'What is the sales of phone in 2023'

In [75]:
%%time
predict_template_query_v3("What was phone sales in 1st quarter '23'?")

CPU times: user 1min 1s, sys: 27.8 s, total: 1min 29s
Wall time: 1min 29s


'What is the sales of phone in 2023'

In [77]:
%%time
predict_template_query_v3("What was phone sales in q1 '23'?")

CPU times: user 1min 2s, sys: 28.8 s, total: 1min 31s
Wall time: 1min 31s


'What is the sales of phone in 2023'

In [79]:
%%time
predict_template_query_v3("trend of weekly hours in 2023")

CPU times: user 1min 2s, sys: 28.3 s, total: 1min 30s
Wall time: 1min 30s


'What is the monthly trend of hours in 2023'

In [None]:
%%time
predict_template_query_v3("trend of quantity in 2023")

In [82]:
%%time
predict_template_query_v3("What was paper sales in feb '23'?")

CPU times: user 1min 2s, sys: 28.8 s, total: 1min 30s
Wall time: 1min 30s


'What is the sales of paper in march 2023'

In [78]:
%%time
predict_template_query_v3("Sales numbers?")

CPU times: user 1min 5s, sys: 30.3 s, total: 1min 36s
Wall time: 1min 36s


'What is the sales'

In [85]:
%%time

inp = 'what was sales in 11/20?'
inp = tokenizer.encode(inp, return_tensors='pt')
output = model.generate(input_ids=inp.to('cuda'), max_length= 200)
output = tokenizer.decode(output[0])

CPU times: user 1min 38s, sys: 43.8 s, total: 2min 21s
Wall time: 2min 21s


In [None]:
output

CPU times: user 1min 47s, sys: 50 s, total: 2min 37s
Wall time: 2min 37s


In [49]:
from tqdm import tqdm

In [50]:
df2 = pd.read_csv('template_and_user_query.csv')

In [51]:
val_df2 = df2.iloc[::10,:]
val_df2.reset_index(drop=True, inplace=True)
df2 = pd.concat([df2,val_df2]).drop_duplicates(keep=False).reset_index()
df2.drop(columns=['index'], inplace=True)

In [57]:
df2_new = df2[300:500]

In [60]:
%%time
output_list_300_500 = []
for i, row in tqdm(df2_new.iterrows()):
    inp = row['user query']
    try:
        output = predict_template_query_v3(inp)
    except:
        output = None
        print(f"Got exception while processing : {inp}")
    output_list_300_500.append(output)

200it [1:42:11, 30.66s/it]

CPU times: user 1h 21min 28s, sys: 20min 42s, total: 1h 42min 10s
Wall time: 1h 42min 11s





In [62]:
df = pd.DataFrame(output_list_300_500, columns=['predicted_template'])
df.to_csv('predict_df_300_500.csv', index=False)

In [61]:
output_list_300_500

['What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the s

In [86]:
output_list_200_300

['What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category and country across months',
 'What is the sales and discount percentage by sub category and country across months',
 'What is the sales and discount percentage by sub category and 

In [81]:
output_list_100_200

['What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage in 2020 vs 2021',
 'What is the sales and discount percentage in 2020 vs 2021',
 'What is the sales and discount percentage in 2020 vs 2

In [78]:
output_list

['What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub cat

In [40]:
def predict_template_query(user_query):
    question = query_template.format(user_query=user_query)
    _input = str({'question': question})
#     print(_input)
    _inputs = tokenizer.encode(_input, return_tensors="pt")
#     print(_inputs)
#     print(type(_inputs))
    outputs = model_to_merge.generate(input_ids=_inputs.to('cuda'), max_length= 200)
    output = tokenizer.decode(outputs[0])
    # print(output)
    # a = output.split("_LUMIN_ Answer",1)[1]
    #print(a)
    return output

In [None]:
model_to_merge.to('cuda')

In [63]:
def process_output(a):
    b=a.split("### Answer",1)[1]
    return b.split("\n")[1]

In [5]:
# inp = "why production dropped for bikes in 1st quarter 2022"
# predict_template_query(inp)

In [4]:
# inp = 'why sales of texas increase in 1st quarter 2023'
# a = predict_template_query(inp)

In [2]:
# inp = 'drivers of profit'
# predict_template_query(inp)

In [3]:
# inp = "why sales dropped for bikes in 1st quarter 2022"
# predict_template_query(inp).split('\n')[1]