In [21]:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer,BitsAndBytesConfig
import json
from query_to_mql_tt_integrated import system_msg,context,postprocess

In [2]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name())

True
Tesla T4


In [3]:
################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)
# Load the entire model on the GPU 0
device_map = {"": 0}


In [4]:
model_name = "NousResearch/llama-2-7b-chat-hf"
model = AutoModelForCausalLM.from_pretrained(model_name,quantization_config=bnb_config,
    device_map=device_map)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [21]:
#Testing prompt

# question = "what are the sales in africa"
# prompt = f"""[INST]<<SYS>>
# Assume you are a system that detects the month and year in the given secentance, user will give only provide the input in english.
# For example if user gives "top  orders in march 2020", you should only return march 2020.

# <</SYS>>
# {question}
# [/INST]
# [END]"""

In [10]:
def gereneate_output_llama2(prompt):
    #prompt = json.dumps(prompt)
    model_input = tokenizer(prompt, return_tensors="pt").to("cuda")
    #model.eval()
    response = tokenizer.decode(model.generate(**model_input, max_length=4096)[0], skip_special_tokens=True)
    return response

In [6]:
def get_user_question(question):
    return "Based on the following context:\n" + str(context) + "\n return a response for the question:\n" + question + "\n"

In [13]:
def compose_prompt(question):
    return f"""[INST]<<SYS>>
            {system_msg}
            <</SYS>>
            {get_user_question(question)}
            [/INST]
            [END]"""

In [65]:
def post_process(output):
    mql = output.split("[END]")[1].split("\n\nReasoning:\n\n")[0]
    reason = output.split("[END]")[1].split("\n\nReasoning:\n\n")[1]
    print("*"*50+"MQL"+"*"*50)
    print(mql)
    print("\n"+"*"*50+"Reason"+"*"*50)
    print(reason)
    #return mql,reason

In [72]:
%%time
question = "top 5 segments basis discount"
output=gereneate_output_llama2(compose_prompt(question))
post_process(output)

**************************************************MQL**************************************************


The response for the question "top 5 segments basis discount" is as follows:

{
"MEASURE": {
"n-gram matched to MEASURE": [
{
"ENTITY": "Matched MEASURE",
"MEASURE CONSTRAINT": [
{
"COMPARISON VALUE": "top 5",
"COMPARISION OPERATOR": "="
}
],
"ADJECTIVE": [],
"TONE": ""
}
],
"DIMENSION": {
"n-gram matched to DIMENSION": [
{
"ENTITY": "Matched DIMENSION",
"RANK": [
{
"RANK ADJECTIVE": "top",
"RANK VALUE": "5"
}
],
"ADJECTIVE": [],
"TONE": ""
}
],
"FILTER": {
"n-gram matched to FILTER": [
{
"ENTITY": "Matched FILTER",
"PARENT": "Segment",
"EXCLUDE": ""
}
],
"DERIVED MEASURE": {
"n-gram matched to DERIVED MEASURE": [
{
"ENTITY": "Matched DERIVED MEASURE",
"RATIO FILTER": [
{}
],
"APPLIED MEASURE": [
{
"n-gram matched to MEASURE": [
{
"ENTITY": "Discount",
"RANK": [
{
"RANK ADJECTIVE": "top",
"RANK VALUE": "5"
}
]
}
],
"ADJECTIVE": [],
"TONE": ""
}
],
"DATE VARIABLE": {
"asked time ele

In [70]:
%%time
question = "discount rate of phone and binders"
output=gereneate_output_llama2(compose_prompt(question))
post_process(output)

**************************************************MQL**************************************************


The response for the question "discount rate of phone and binders" is:

{
"MEASURE": [
{
"ENTITY": "Discount",
"other names": ["discount", "discount rate", "discount value", "deduction"]
},
{
"ENTITY": "Purchase Vol",
"other names": ["purchase", "purchase value", "purchase model"]
},
{
"ENTITY": "Quantity",
"other names": ["quantity", "volume"]
},
{
"ENTITY": "Sales",
"other names": ["sales", "sale"]
}
],
"DIMENSION": [
{
"ENTITY": "Sub-Category",
"other names": ["sub-category", "sub category", "categories", "section"]
},
{
"ENTITY": "Segment",
"other names": ["segment", "segments", "units", "divisions"]
},
{
"ENTITY": "Parts",
"other names": ["parts", "part", "section", "divisions"]
},
{
"ENTITY": "Country",
"other names": ["country", "countries"]
}
],
"FILTER": [
{
"ENTITY": "Consumer",
"other names": ["consumers", "consumer"],
"parent": "Segment"
},
{
"ENTITY": "Phone",
"other n

In [73]:
!pip install ipywidgets

Collecting ipywidgets
[?25l  Downloading https://files.pythonhosted.org/packages/4a/0e/57ed498fafbc60419a9332d872e929879ceba2d73cb11d284d7112472b3e/ipywidgets-8.1.1-py3-none-any.whl (139kB)
[K     |████████████████████████████████| 143kB 6.8MB/s eta 0:00:01
[?25hCollecting traitlets>=4.3.1
[?25l  Downloading https://files.pythonhosted.org/packages/85/e9/d82415708306eb348fb16988c4697076119dfbfa266f17f74e514a23a723/traitlets-5.11.2-py3-none-any.whl (83kB)
[K     |████████████████████████████████| 92kB 18.7MB/s eta 0:00:01
[?25hCollecting comm>=0.1.3
  Downloading https://files.pythonhosted.org/packages/fe/47/0133ac1b7dc476ed77710715e98077119b3d9bae56b13f6f9055e7da1c53/comm-0.1.4-py3-none-any.whl
Collecting jupyterlab-widgets~=3.0.9
[?25l  Downloading https://files.pythonhosted.org/packages/e8/05/0ebab152288693b5ec7b339aab857362947031143b282853b4c2dd4b5b40/jupyterlab_widgets-3.0.9-py3-none-any.whl (214kB)
[K     |████████████████████████████████| 215kB 17.9MB/s eta 0:00:01
[?25hC

In [76]:
import ipywidgets as widgets
from IPython.display import display
input_widget = widgets.Text(placeholder='Type your message and press Enter', description='User Input:')
chat_output = widgets.Output()
def generate_response(user_input):
    # Replace this with actual code to interact with your LLM
    response = "Chatbot: Thanks for your message - " + user_input
    return response

In [77]:
def on_enter_key_change(change):
    if change['new']:
        with chat_output:
            user_input = input_widget.value
            response = generate_response(user_input)
            print(response)
        input_widget.value = ''

input_widget.observe(on_enter_key_change, names='value')

In [78]:
display(input_widget, chat_output)

Text(value='', description='User Input:', placeholder='Type your message and press Enter')

Output()

Chatbot: Thanks for your message - hoi
