In [None]:
!

In [1]:
!sudo pip install git+https://github.com/huggingface/transformers

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-oyd4ml3g
  Running command git clone -q https://github.com/huggingface/transformers /tmp/pip-req-build-oyd4ml3g
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
Collecting tokenizers<0.15,>=0.14
[?25l  Downloading https://files.pythonhosted.org/packages/76/ee/7e35fb46c728989357e6ccb96df64c4364601cfbfdd6c25ccc872e6c16a0/tokenizers-0.14.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 6.8MB/s eta 0:00:01
Building wheels for collected packages: transformers
  Building wheel for transformers (PEP 517) ... [?25ldone
[?25h  Created wheel for transformers: filename=transformers-4.35.0.dev0-cp38-none-any.whl size=7746364 sha256=3e7a6adc8963bf2f999aa1b7a57a322a527045c9a4b9c73dab8fec8b0105

In [2]:
import transformers
transformers.__version__

'4.35.0.dev0'

In [3]:
#!sudo pip install -q accelerate peft==0.4.0 bitsandbytes trl==0.4.7

In [4]:
import os
import torch
from datasets import load_dataset
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
import pandas as pd

In [5]:
df = pd.read_csv('/data/mistral/user_query_complete.csv')

In [6]:
df.head(2)

Unnamed: 0,question,user query
0,What is the Quantity in Quarter1 2018,Q1 2018 quantity?
1,What is the Quantity in Quarter1 2018,Quantity for 1st quarter 2018?


In [7]:
val_df = df.iloc[::32,:]
val_df.reset_index(drop=True, inplace=True)
df = pd.concat([df,val_df]).drop_duplicates(keep=False).reset_index()
df.drop(columns=['index'], inplace=True)

In [8]:
val_df.to_csv('val_df.csv', index=False)

In [9]:
# The model that you want to train from the Hugging Face hub
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

# The instruction dataset to use
#dataset_name = ""

# Fine-tuned model name
#new_model = "mistral-ft-peft-on-template_and_user_query-data"

In [10]:
################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

In [11]:
################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = True

In [12]:
################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "/data/mistral/with-more-data/oct-9"

# Number of training epochs
num_train_epochs = 15

# Enable fp16/bf16 training (set bf16 to True with an A100)
# fp16 = False
fp16 = True # not using quantisation
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 8

# Batch size per GPU for evaluation
per_device_eval_batch_size = 8

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True


# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule (constant a bit better than cosine)
lr_scheduler_type = "constant"

# Number of training steps (overrides num_train_epochs)
max_steps = 1000

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 200

# Log every X updates steps
logging_steps = 200

In [13]:
################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [14]:
promt_template_v2 = """<s>[INST]<<SYS>>
You are an advanced template converter that converts user question to a specific template which answers the user question.

<</SYS>>

{user_query}
[/INST]
[LUMINTEMPLATE]
{template_query}
[/LUMINTEMPLATE]</s>"""

In [15]:
df.columns

Index(['question', 'user query'], dtype='object')

In [16]:
def create_fine_tuning_dataset(row):
    user_query = row['user query']
    template_query = row['question']
    formated = promt_template_v2.format(user_query=user_query,
                                      template_query=template_query)
    # data_point = str({'question': formated, "answer":template_query})
    return formated

In [17]:
df['fine_tuning_dataset']=df.apply(create_fine_tuning_dataset, axis=1)

In [18]:
val_df['fine_tuning_dataset']=val_df.apply(create_fine_tuning_dataset, axis=1)

In [19]:
val_df['fine_tuning_dataset'][0]

'<s>[INST]<<SYS>>\nYou are an advanced template converter that converts user question to a specific template which answers the user question.\n\n<</SYS>>\n\nQ1 2018 quantity?\n[/INST]\n[LUMINTEMPLATE]\nWhat is the Quantity in Quarter1 2018\n[/LUMINTEMPLATE]</s>'

In [20]:
df['fine_tuning_dataset'][0]

'<s>[INST]<<SYS>>\nYou are an advanced template converter that converts user question to a specific template which answers the user question.\n\n<</SYS>>\n\nQuantity for 1st quarter 2018?\n[/INST]\n[LUMINTEMPLATE]\nWhat is the Quantity in Quarter1 2018\n[/LUMINTEMPLATE]</s>'

In [21]:
df.drop(columns=['question','user query'], inplace=True)

In [22]:
df.columns

Index(['fine_tuning_dataset'], dtype='object')

In [23]:
val_df.drop(columns=['question','user query'], inplace=True)

In [24]:
val_df.shape

(337, 1)

In [25]:
train_dataset = Dataset.from_pandas(df)
val_dataset = Dataset.from_pandas(val_df)

In [26]:
train_dataset

Dataset({
    features: ['fine_tuning_dataset'],
    num_rows: 9223
})

In [27]:
val_dataset

Dataset({
    features: ['fine_tuning_dataset'],
    num_rows: 337
})

In [28]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

In [29]:
bnb_4bit_quant_type

'nf4'

In [30]:
compute_dtype

torch.float16

In [31]:
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

In [32]:
model_name = "mistralai/Mistral-7B-Instruct-v0.1"

In [33]:
!sudo pip install pynvml

Collecting pynvml
[?25l  Downloading https://files.pythonhosted.org/packages/5b/9c/adb8070059caaa15d5a572b66bccd95900d8c1b9fa54d6ecea6ae97448d1/pynvml-11.5.0-py3-none-any.whl (53kB)
[K     |████████████████████████████████| 61kB 5.6MB/s eta 0:00:011
[?25hInstalling collected packages: pynvml
Successfully installed pynvml-11.5.0


In [34]:
from pynvml.smi import nvidia_smi
nvsmi = nvidia_smi.getInstance()
nvsmi.DeviceQuery('memory.free, memory.total')

{'gpu': [{'fb_memory_usage': {'total': 16384.0,
    'free': 15972.9375,
    'unit': 'MiB'}}]}

In [35]:
#!df -H

In [118]:
del model
torch.cuda.empty_cache()

In [36]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
#     torch_dtype=torch.bfloat16,
    device_map="auto"
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [37]:
# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,
                                          # add_eos_token=True,
                                          use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [38]:
max([len(tokenizer.encode(df['fine_tuning_dataset'][i])) for i in range(1800)])

126

In [39]:
tokenizer.decode([2])

'</s>'

In [40]:
# LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    target_modules = ["q_proj", "v_proj"],
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [41]:
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    eval_steps=200, # requires when eval_dataset is defined
    per_device_eval_batch_size=1, # Batch size for evaluation
    evaluation_strategy="steps", # requires when eval_dataset is defined
    logging_strategy="steps",
    logging_steps=1,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=4000,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard",
    load_best_model_at_end=True,
    save_total_limit=1,
    metric_for_best_model="eval_loss",
    greater_is_better=False
)

In [42]:
## Getting FLOPs of model

model_flops = (
  model.floating_point_ops(
    {
       "input_ids": torch.zeros(
           (1, 512)
      )
    }
  )
  * training_arguments.gradient_accumulation_steps
)

#print(model)
print("Memory footprint", model.get_memory_footprint() / 1e9, "GB")
print("Flops", model_flops / 1e9, "GFLOPs")

Memory footprint 4.551360512 GB
Flops 21843.947814912 GFLOPs


In [43]:
train_dataset

Dataset({
    features: ['fine_tuning_dataset'],
    num_rows: 9223
})

In [44]:
#print(torch.cuda.memory_summary(device=None, abbreviated=False))

In [45]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    peft_config=peft_config,
    dataset_text_field="fine_tuning_dataset",
    max_seq_length=256,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)



Map:   0%|          | 0/9223 [00:00<?, ? examples/s]

Map:   0%|          | 0/337 [00:00<?, ? examples/s]

In [46]:
val_dataset[0]

{'fine_tuning_dataset': '<s>[INST]<<SYS>>\nYou are an advanced template converter that converts user question to a specific template which answers the user question.\n\n<</SYS>>\n\nQ1 2018 quantity?\n[/INST]\n[LUMINTEMPLATE]\nWhat is the Quantity in Quarter1 2018\n[/LUMINTEMPLATE]</s>'}

In [None]:
# Train model
trainer.train()

Step,Training Loss,Validation Loss
200,0.2807,0.364057
400,0.252,0.283258
600,0.214,0.252046
800,0.2419,0.233638
1000,0.2487,0.222283
1200,0.2265,0.208451
1400,0.215,0.207179
1600,0.2211,0.196577
1800,0.2477,0.198251
2000,0.1997,0.192909


In [None]:
# Fine-tuned model name
new_model_name = "mistral-ft-peft-v1-lr-64-with-more-data"

In [None]:
# Save trained model
trainer.model.save_pretrained(new_model)

In [41]:
trainer.model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): Linear4bit(
                in_features=4096, out_features=4096, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
              (v_

In [43]:
from peft import PeftModel, PeftConfig

In [None]:
del model
torch.cuda.empty_cache()

from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(new_model_name, device_map="auto", torch_dtype=torch.bfloat16)
model = model.merge_and_unload()

In [None]:
output_merged_dir = os.path.join(new_model_name, "final_merged_checkpoint")
model.save_pretrained(output_merged_dir, safe_serialization=True)

In [45]:
## Getting FLOPs of model

model_flops = (
  model_to_merge.floating_point_ops(
    {
       "input_ids": torch.zeros(
           (1, 200)
      )
    }
  )
  * training_arguments.gradient_accumulation_steps
)

#print(model)
print("Memory footprint", model.get_memory_footprint() / 1e9, "GB")
print("Flops", model_flops / 1e9, "GFLOPs")

Memory footprint 4.488974336 GB
Flops 4083.474432 GFLOPs


In [12]:
query_template_v2 = """[INST]<<SYS>>
You are an advanced template converter that converts user question to a specific template which answers the user question.

<</SYS>>

{user_query}
[/INST]
[LUMINTEMPLATE]"""

In [24]:
#model_to_merge.to('cuda')

In [23]:
!sudo pip install ninja

Collecting ninja
[?25l  Downloading https://files.pythonhosted.org/packages/0f/58/854ce5aab0ff5c33d66e1341b0be42f0330797335011880f7fbd88449996/ninja-1.11.1-py2.py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (145kB)
[K     |████████████████████████████████| 153kB 6.9MB/s eta 0:00:01
[?25hInstalling collected packages: ninja
Successfully installed ninja-1.11.1


In [24]:
import ninja
ninja.__version__

'1.11.1'

In [12]:
model = AutoModelForCausalLM.from_pretrained(output_merged_dir, device_map="auto", torch_dtype=torch.bfloat16)

KeyError: 'mistral'

In [15]:
def predict_template_query_v3(user_query):
    inp = query_template_v2.format(user_query=user_query)
    _inputs = tokenizer.encode(inp, return_tensors="pt")
    outputs = model.generate(input_ids=_inputs.to('cuda'), max_length= 150, pad_token_id=tokenizer.eos_token_id)
    output = tokenizer.decode(outputs[0])
    output_new = output.split('[LUMINTEMPLATE]\n')[1]
    return output_new.split('\n[/LUMINTEMPLATE]')[0]

In [17]:
%%time
predict_template_query_v3('brands least profitable in 2021')

CPU times: user 5.09 s, sys: 93.8 ms, total: 5.18 s
Wall time: 5.18 s


'List of brands with lowest profit in 2021'

In [18]:
%%time
predict_template_query_v3('trend of work hours in 2023')

CPU times: user 5 s, sys: 143 ms, total: 5.14 s
Wall time: 5.15 s


'What is the trend of work hours in 2023'

In [44]:
model_to_merge = PeftModel.from_pretrained(
    model,
    '/data/sept-23/checkpoint-810')

In [49]:
model_to_merge = PeftModel.from_pretrained(
    model,
    'llama-2-7b-fine-tuned-peft-v2')

In [19]:
%%time
predict_template_query_v3('which are the 3 top selling category by sales in 2022')

CPU times: user 4.86 s, sys: 205 ms, total: 5.06 s
Wall time: 5.06 s


'Which are the top 3 sub category based on sales in 2022'

In [20]:
%%time
predict_template_query_v3('which are the 3 top selling category by profit in 2022')

CPU times: user 4.83 s, sys: 221 ms, total: 5.05 s
Wall time: 5.05 s


'Which are the top 3 sub category based on profit in 2022'

In [21]:
%%time
predict_template_query_v3("In 02/19 What was the sales of paper?")

CPU times: user 4.98 s, sys: 214 ms, total: 5.19 s
Wall time: 5.19 s


'What is the sales of paper in februaury 2019'

In [35]:
%%time
predict_template_query_v3("notebook sales in '20'?")

CPU times: user 5.07 s, sys: 134 ms, total: 5.2 s
Wall time: 5.2 s


'What is the sales of notebook in 2020'

In [36]:
%%time
predict_template_query_v3("monthly trend of sales")

CPU times: user 5.3 s, sys: 138 ms, total: 5.44 s
Wall time: 5.43 s


'What is the monthly trend of sales'

In [28]:
%%time
predict_template_query_v3("worst sub category in 2021")

CPU times: user 5.01 s, sys: 155 ms, total: 5.16 s
Wall time: 5.16 s


'top sub category basis sales contribution in 2021'

In [38]:
%%time
predict_template_query_v3("What was laptop sales in 4th quarter '11?")

CPU times: user 5 s, sys: 195 ms, total: 5.2 s
Wall time: 5.2 s


'What is the sales of laptop in quarter4 2011'

In [32]:
%%time
predict_template_query_v3("worst brands basis profit in 2021")

CPU times: user 4.98 s, sys: 148 ms, total: 5.13 s
Wall time: 5.13 s


'Which are the top 5 brands based on profit contribution in 2021'

In [17]:
prompt = query_template_v2.format(user_query='brands least profitable in 2021')

In [18]:
tokens = tokenizer.encode(prompt, return_tensors="pt")

In [22]:
%%time
outputs = model.generate(input_ids=tokens.to('cuda'), max_length= 180, )

CPU times: user 6.61 s, sys: 135 ms, total: 6.75 s
Wall time: 6.75 s


In [23]:
tokenizer.decode(outputs[0])

'<s>[INST]<<SYS>>\nYou are an advanced template converter that converts user question to a specific template which answers the user question.\n\n<</SYS>>\n\nbrands least profitable in 2021\n[/INST]\n[LUMINTEMPLATE]\nList of brands with lowest profit in 2021\n[/LUMINTEMPLATE]\n\nWhich are the top 5 brands based on profit share in 2021\n[/LUMINTEMPLATE]\n\nWhich are the top 5 brands based on market share in 2021\n[/LUMINTEMPLATE]\n\nWhich are the top 5 brands based on sales share in 2021\n[/LUMINTEMPLATE]\n\nWhich are the top 5'

In [132]:
%%time
predict_template_query_v3("Growth rate of sales share of phone")

CPU times: user 1min 4s, sys: 30.3 s, total: 1min 34s
Wall time: 1min 34s


'What is the growth contribution to overall sales for phone'

In [133]:
%%time
predict_template_query_v3("What is the growth contribution to overall sales for phone in 2021")

CPU times: user 1min 2s, sys: 28.8 s, total: 1min 31s
Wall time: 1min 31s


'What is the growth contribution to overall sales for phone in 2021'

In [134]:
%%time
predict_template_query_v3("Which are the top 3 sub category based on share/contribution of sales across segment except paper")

CPU times: user 59.8 s, sys: 27.6 s, total: 1min 27s
Wall time: 1min 27s


'Which are the top 3 sub category based on contribution to overall sales except paper'

In [135]:
%%time
predict_template_query_v3("which are the 5 top selling sub category by sales in 2023")

CPU times: user 59.3 s, sys: 26.9 s, total: 1min 26s
Wall time: 1min 26s


'Which are the top sub category basis sales in 2023'

In [136]:
%%time
predict_template_query_v3("how does the sales change for phone in the last year")

CPU times: user 1min 4s, sys: 29.4 s, total: 1min 33s
Wall time: 1min 33s


'What is the trend of sales for phone in 2020'

In [137]:
%%time
predict_template_query_v3("what is the average monthly market share of paper for the last two and a half years")

CPU times: user 1min 2s, sys: 28.6 s, total: 1min 30s
Wall time: 1min 30s


'What is the average monthly share of paper sales over the last 2.5 years'

In [138]:
%%time
predict_template_query_v3("when was the monthly selling of paper recorded the lowest?")

CPU times: user 1min 3s, sys: 29 s, total: 1min 32s
Wall time: 1min 32s


'When was the last time that sales of paper across months was lowest'

In [49]:
%%time
predict_template_query_v3("What were the sales in '23?")

  next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)


CPU times: user 23.8 s, sys: 5.03 s, total: 28.8 s
Wall time: 28.8 s


'What is the sales in 2023'

In [50]:
%%time
predict_template_query_v3("sales number in year 2022?")

CPU times: user 22.7 s, sys: 5.03 s, total: 27.8 s
Wall time: 27.8 s


'What is the sales in 2022'

In [53]:
%%time
predict_template_query_v3("what was profit for the year 2020?")

CPU times: user 1min 4s, sys: 30.3 s, total: 1min 34s
Wall time: 1min 34s


'What is the sales in 2020'

In [49]:
%%time
predict_template_query_v3("what was sales in 11/20?")

CPU times: user 1min 10s, sys: 33.4 s, total: 1min 43s
Wall time: 1min 43s


'What is the sales in 2020'

In [51]:
%%time
predict_template_query_v3("Compare 1983 and 1984 sales and discount percentages.")

CPU times: user 1min 2s, sys: 29.1 s, total: 1min 31s
Wall time: 1min 31s


'What is the sales and discount percentage in 1983 vs 1984'

In [54]:
%%time
predict_template_query_v3("what was sales in twenty twenty two?")

CPU times: user 23.9 s, sys: 5.52 s, total: 29.4 s
Wall time: 29.4 s


'What is the sales in 2022'

In [52]:
%%time
predict_template_query_v3("what are profit each months")

CPU times: user 1min 7s, sys: 31.3 s, total: 1min 38s
Wall time: 1min 38s


'What is the monthly trend of profit'

In [45]:
%%time
predict_template_query_v3("When did the previous low in phone sales take place?")

CPU times: user 1min 5s, sys: 29.9 s, total: 1min 35s
Wall time: 1min 35s


'When was the last time that sales of phone was lowest'

In [46]:
%%time
predict_template_query_v3("When was phone sales lowest recently?")

CPU times: user 1min 8s, sys: 31.8 s, total: 1min 40s
Wall time: 1min 40s


'When was the last time that sales of phone was lowest'

In [67]:
%%time

inp = 'what are you doing'
inp = tokenizer.encode(inp, return_tensors='pt')
output = model.generate(input_ids=inp.to('cuda'), max_length= 200)
output = tokenizer.decode(outputs[0])

CPU times: user 1min 47s, sys: 50 s, total: 2min 37s
Wall time: 2min 37s


In [49]:
from tqdm import tqdm

In [50]:
df2 = pd.read_csv('template_and_user_query.csv')

In [51]:
val_df2 = df2.iloc[::10,:]
val_df2.reset_index(drop=True, inplace=True)
df2 = pd.concat([df2,val_df2]).drop_duplicates(keep=False).reset_index()
df2.drop(columns=['index'], inplace=True)

In [57]:
df2_new = df2[300:500]

In [60]:
%%time
output_list_300_500 = []
for i, row in tqdm(df2_new.iterrows()):
    inp = row['user query']
    try:
        output = predict_template_query_v3(inp)
    except:
        output = None
        print(f"Got exception while processing : {inp}")
    output_list_300_500.append(output)

200it [1:42:11, 30.66s/it]

CPU times: user 1h 21min 28s, sys: 20min 42s, total: 1h 42min 10s
Wall time: 1h 42min 11s





In [62]:
df = pd.DataFrame(output_list_300_500, columns=['predicted_template'])
df.to_csv('predict_df_300_500.csv', index=False)

In [61]:
output_list_300_500

['What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper in 2020',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the sales of phone and paper across months',
 'What is the s

In [86]:
output_list_200_300

['What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category across months',
 'What is the sales and discount percentage by sub category and country across months',
 'What is the sales and discount percentage by sub category and country across months',
 'What is the sales and discount percentage by sub category and 

In [81]:
output_list_100_200

['What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage by sub category and country in 2020',
 'What is the sales and discount percentage in 2020 vs 2021',
 'What is the sales and discount percentage in 2020 vs 2021',
 'What is the sales and discount percentage in 2020 vs 2

In [78]:
output_list

['What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales across months in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub category in 2020',
 'What is the sales by sub cat

In [40]:
def predict_template_query(user_query):
    question = query_template.format(user_query=user_query)
    _input = str({'question': question})
#     print(_input)
    _inputs = tokenizer.encode(_input, return_tensors="pt")
#     print(_inputs)
#     print(type(_inputs))
    outputs = model_to_merge.generate(input_ids=_inputs.to('cuda'), max_length= 200)
    output = tokenizer.decode(outputs[0])
    # print(output)
    # a = output.split("_LUMIN_ Answer",1)[1]
    #print(a)
    return output

In [None]:
model_to_merge.to('cuda')

In [63]:
def process_output(a):
    b=a.split("### Answer",1)[1]
    return b.split("\n")[1]

In [5]:
# inp = "why production dropped for bikes in 1st quarter 2022"
# predict_template_query(inp)

In [4]:
# inp = 'why sales of texas increase in 1st quarter 2023'
# a = predict_template_query(inp)

In [2]:
# inp = 'drivers of profit'
# predict_template_query(inp)

In [3]:
# inp = "why sales dropped for bikes in 1st quarter 2022"
# predict_template_query(inp).split('\n')[1]