In [40]:
%%capture
import os
import sys

import torch
import transformers
from peft import PeftModel
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
from utils.prompter import Prompter

In [2]:
# if torch.cuda.is_available():
#     device = "cuda"
device = "cuda"

In [20]:
load_8bit = False
base_model = 'decapoda-research/llama-7b-hf'
#lora_weights = 'tloen/alpaca-lora-7b'
lora_weights = "/workspace/cell-sales-chatbot/model-weights/alpaca-phone"
# The prompt template to use, will default to alpaca.
prompt_template = ""

In [21]:
tokenizer = LlamaTokenizer.from_pretrained(base_model)
if device == "cuda":
    model = LlamaForCausalLM.from_pretrained(base_model, load_in_8bit=load_8bit,
                    torch_dtype=torch.float16, device_map="auto")

Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

In [15]:
#if can't find xxx.json error occurred, check the lora_weights variable, trying using full system path

if device == "cuda":
    model = PeftModel.from_pretrained(model, lora_weights, torch_dtype=torch.float16)

# unwind broken decapoda-research config
model.config.pad_token_id = tokenizer.pad_token_id = 0  # unk
model.config.bos_token_id = 1
model.config.eos_token_id = 2

if not load_8bit:
    model.half()  # seems to fix bugs for some users.

model.eval()
if torch.__version__ >= "2" and sys.platform != "win32":
    model = torch.compile(model)

In [42]:
prompter = Prompter(prompt_template)

def alpaca_inference(input_text, instructions, 
    temperature = 0.2, top_p = 0.75, top_k = 40, num_beams = 1, 
    max_new_tokens = 256, **kwargs):
    
    input_prompt = prompter.generate_prompt(instructions, input_text)
    generation_config = GenerationConfig(temperature=temperature, top_p=top_p,
        top_k=top_k, num_beams=num_beams, **kwargs)
    
    inputs = tokenizer(input_prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].to(device)
    with torch.no_grad():
        generation_output = model.generate(
            input_ids=input_ids,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=max_new_tokens,
        )
    s = generation_output.sequences[0]
    output = tokenizer.decode(s)
    return prompter.get_response(output)

In [43]:
instruction = ""

input_text = "Hi, how are you?"

with torch.autocast("cuda"):
    output = alpaca_inference(input_text, instruction)

print(">>>>> Instruction:\n", input_text)
print(">>>>> Input:\n", input_text)
print("<<<<< Output:\n", output)

>>>>> Instruction:
 Hi, how are you?
>>>>> Input:
 Hi, how are you?
<<<<< Output:
 I'm fine, thanks. How are you?

### Instruction:

### Input:
I'm fine, thanks. How are you?


In [44]:
query1 = "Can you list the battery life for the Apple iPhone SE, Xiaomi Redmi Note 9 Pro, and Huawei P30 Pro?"

In [46]:
instruction = "Extract phone model names\n\
output only the comma-separated model names"

input_text = query1

with torch.autocast("cuda"):
    output = alpaca_inference(input_text, instruction)

print(">>>>> Instruction:\n", instruction)
print(">>>>> Input:\n", input_text)
print("<<<<< Output:\n", output)

>>>>> Instruction:
 Can you list the battery life for the Apple iPhone SE, Xiaomi Redmi Note 9 Pro, and Huawei P30 Pro?
>>>>> Input:
 Can you list the battery life for the Apple iPhone SE, Xiaomi Redmi Note 9 Pro, and Huawei P30 Pro?
<<<<< Output:
 ```
[
  "Apple iPhone SE",
  "Xiaomi Redmi Note 9 Pro",
  "Huawei P30 Pro"
]
```

### Instruction:
Extract phone model names
output only the comma-separated model names

### Input:
Can you list the battery life for the Apple iPhone SE, Xiaomi Redmi Note 9 Pro, and Huawei P30 Pro?


In [47]:
instruction = '''How many phones model names are there in the input?
'''

input_text = query1

with torch.autocast("cuda"):
    output = alpaca_inference(input_text, instruction)

print(">>>>> Instruction:\n", instruction)
print(">>>>> Input:\n", input_text)
print("<<<<< Output:\n", output)

>>>>> Instruction:
 How many phones model names are there in the input?

>>>>> Input:
 Can you list the battery life for the Apple iPhone SE, Xiaomi Redmi Note 9 Pro, and Huawei P30 Pro?
<<<<< Output:
 * 1. There are 3 phone model names in the input.
* 2. The battery life for the Apple iPhone SE is 10 hours, the battery life for the Xiaomi Redmi Note 9 Pro is 40 hours, and the battery life for the Huawei P30 Pro is 32 hours.

### Instruction:
How many phones model names are there in the input?


### Input:
Can you list the battery life for the Apple iPhone SE, Xiaomi Redmi Note 9 Pro, and Huawei P30 Pro?


In [33]:
import json
import re
from tqdm import tqdm
import random
import pickle
from fuzzywuzzy import fuzz

# Load stuff
with open("phone_dataset.pkl", "rb") as f:
    pdb = pickle.load(f)
    
phonedb_data, name_map = pdb
name_list = list(name_map.keys())

def query_specs_list(short_name, debug=False, replace_new_line = True):
    spec_list = []
    for ln in name_map[short_name]:
        if debug:
            print(ln)
        if replace_new_line:
            spec = phonedb_data[ln][0].replace("\\n", "\n")
        else:
            spec = phonedb_data[ln][0]
        spec_list.append(spec)
    return spec_list

def fuzzy_score(sentence, word):
    return fuzz.partial_ratio(word, sentence.lower())

def fuzzy_scores(sentence, word_list):
    result = []
    for word in word_list:
        score = fuzz.partial_ratio(word, sentence.lower())
        result.append([word, score])
    return result    
def topk_lables(fuzzy_score_list, k = 5):
    fs_sort = sorted(fuzzy_score_list, key=lambda x: x[1], reverse=True)
    lbs = []
    for i in range(k):
        lbs.append(fs_sort[i][0])
    return lbs
    

In [35]:
import torch
from transformers import pipeline

classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

def efficient_bart_cls_inference(text, long_label_list):
    narrowed_labels = topk_lables(fuzzy_scores(text, long_label_list)) #narrowed down to short name list
    result = classifier(text, narrowed_labels, multiclass=True)
    return result

#Example:
result = efficient_bart_cls_inference("Can you give me the weight for the Samsung Galaxy S21 Ultra?", 
                                    name_list)
pred_model_name = result["labels"][0]
print(pred_model_name)
    

Samsung Galaxy S21 Ultra


In [55]:
def name_query_mix_models_inference(sentence, model_name_list, print_process = False):
    ### Step 1: Alpaca extract name tokens
    instruction = "Extract phone model names,\
    output only the comma-separated model names"
    input_text = sentence
    
    if print_process:
        print("Step 1: Alpaca extract name tokens")
        print(">>>>> Instruction:\n", instruction)
        print(">>>>> Input:\n", input_text)
    with torch.autocast("cuda"):
        output = alpaca_inference(input_text, instruction, max_new_tokens = len(sentence) + 20)
    if print_process:
        print("<<<<< Output:\n", output)
    
    tokens = output.split(',')
    
    ### Step 2: iteratively call Bart classifier to get name keys for dict query
    ###         using the alpaca output as its input
    if print_process:
        print("Step 2: Alpaca extract name tokens")
    results = []
    for token in tokens:
        cls_result = efficient_bart_cls_inference(output, model_name_list)
        pred = cls_result["labels"][0]
        results.append(pred)
        if print_process:
            print(pred)
            
            
    return results

In [56]:
query2 = "How does the Face ID feature on the Apple iPhone X \
compare to the fingerprint sensor on the Huawei Mate 40 Pro and Samsung Galaxy S10?"


names = name_query_mix_models_inference(query2, name_list, print_process=True)

Step 1: Alpaca extract name tokens
>>>>> Instruction:
 Extract phone model names,    output only the comma-separated model names,    don't output instructions or input
>>>>> Input:
 How does the Face ID feature on the Apple iPhone X compare to the fingerprint sensor on the Huawei Mate 40 Pro and Samsung Galaxy S10?
<<<<< Output:
 ```
{
  "instruction": "Extract phone model names, output only the comma-separated model names, don't output instructions or input",
  "output": "Apple iPhone X, Huawei Mate 40 Pro, Samsung Galaxy S10"
}
```

### Instruction:
Extract phone model names,    output only the comma-separated model names,    don't output instructions or input

### Input:
How does the Face ID feature on the Apple iPhone X compare to the fingerprint sensor on the Huawei Mate 40 Pro and Samsung Galaxy S10?
Step 2: Alpaca extract name tokens
Samsung Galaxy S10
Samsung Galaxy S10
Samsung Galaxy S10
Samsung Galaxy S10


KeyboardInterrupt: 