In [1]:
import json
import re
from tqdm import tqdm
import random
import pickle

In [2]:
%%capture
!pip install -q transformers
!pip install -q sentence_transformers


In [3]:
# Load stuff
with open("phone_dataset.pkl", "rb") as f:
    pdb = pickle.load(f)
    
phonedb_data, name_map = pdb
name_list = list(name_map.keys())

def query_specs_list(short_name, debug=False, replace_new_line = True):
    spec_list = []
    for ln in name_map[short_name]:
        if debug:
            print(ln)
        if replace_new_line:
            spec = phonedb_data[ln][0].replace("\\n", "\n")
        else:
            spec = phonedb_data[ln][0]
        spec_list.append(spec)
    return spec_list

In [4]:
#Example Usage

random_idx = random.randint(0, len(name_list) - 1)
query_name = name_list[random_idx]
spec_list = query_specs_list(query_name)

print(spec_list[0][0:500]) #only print the first text in the list in its first 500 letters

Brand: Huawei 
 Model: Honor Changwan 7X Dual SIM TD-LTE CN 32GB BND-AL10 
 Released: 2017 Dec 
 Announced: 2017 Oct 11 
 Hardware Designer: Huawei 
 Manufacturer: Huawei 
 Codename: Huawei Bond 
 General Extras: Haptic touch feedback 
 Device Category: Smartphone 
 Width: 75.3 mm, 2.96 inch 
 Height: 156.5 mm, 6.16 inch 
 Depth: 7.6 mm, 0.3 inches 
 Bounding Volume: 89.6 ccm 
 Mass: 165 g, 5.82 ounces 
 Platform: Google Android 
 Operating System: Google Android 7.0 (Nougat), EMUI 5.1 
 Operati


In [5]:
import torch
from transformers import pipeline

In [6]:
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

In [7]:
seq = "What are the specs and features of the iPhone 14 Pro and iPhone 8?"

In [8]:
%%capture
!pip install -q fuzzywuzzy
!pip install -q python-Levenshtein
from fuzzywuzzy import fuzz

In [9]:
def fuzzy_score(sentence, word):
    return fuzz.partial_ratio(word, sentence.lower())

def fuzzy_scores(sentence, word_list):
    result = []
    for word in word_list:
        score = fuzz.partial_ratio(word, sentence.lower())
        result.append([word, score])
    return result    
def topk_lables(fuzzy_score_list, k = 5):
    fs_sort = sorted(fuzzy_score_list, key=lambda x: x[1], reverse=True)
    lbs = []
    for i in range(k):
        lbs.append(fs_sort[i][0])
    return lbs
    

In [10]:
print(seq)
topk_lables(fuzzy_scores(seq, name_list))

What are the specs and features of the iPhone 14 Pro and iPhone 8?


['iPhone 14', 'iPhone 8', 'iPhone 14 Pro', 'iPhone 11', 'iPhone 12']

In [11]:
seq1 = "What are the specifications for the Samsung Galaxy S20 Plus?"
seq2 = "Can you tell me the battery life for the Apple iPhone 12 Pro Max?"
seq3 = "What is the screen size for the Xiaomi Redmi Note 10 Pro?"
seq4 = "What is the camera resolution for the Huawei P40 Pro?"
seq5 = "Can you give me the weight for the Samsung Galaxy S21 Ultra?"


seq6= "How do the camera capabilities of the Apple iPhone 12, Samsung Galaxy S21, and Xiaomi Mi 11 compare?"
seq7 = "Which phone has a larger screen, the Samsung Galaxy Note 20 or Huawei P40 Pro Plus?"
seq8 = "Can you list the battery life for the Apple iPhone SE, Xiaomi Redmi Note 9 Pro, and Huawei P30 Pro?"
seq9 = "What are the storage options for the Samsung Galaxy A71, Apple iPhone 11, and Xiaomi Mi 10T Pro?"
seq10 = "How does the Face ID feature on the Apple iPhone X compare to the fingerprint sensor on the Huawei Mate 40 Pro and Samsung Galaxy S10?"

In [12]:
seq = seq1

narrowed_labels = topk_lables(fuzzy_scores(seq, name_list))

classifier(seq, narrowed_labels, multiclass=True)

{'sequence': 'What are the specifications for the Samsung Galaxy S20 Plus?',
 'labels': ['Samsung Galaxy S20+',
  'Samsung Galaxy S20',
  'Samsung Galaxy A20s',
  'Samsung Galaxy S23',
  'Samsung Galaxy A6s'],
 'scores': [0.764614462852478,
  0.2086407095193863,
  0.011368677951395512,
  0.011285211890935898,
  0.004090992268174887]}

In [13]:
seq = seq6

narrowed_labels = topk_lables(fuzzy_scores(seq, name_list))

classifier(seq, narrowed_labels, multiclass=True)

{'sequence': 'How do the camera capabilities of the Apple iPhone 12, Samsung Galaxy S21, and Xiaomi Mi 11 compare?',
 'labels': ['iPhone 12',
  'Xiaomi Mi 11',
  'Samsung Galaxy S21',
  'Samsung Galaxy S21+',
  'Samsung Galaxy A6s'],
 'scores': [0.5691676735877991,
  0.19673483073711395,
  0.16890756785869598,
  0.059184640645980835,
  0.0060053253546357155]}

In [14]:
seq = seq7

narrowed_labels = topk_lables(fuzzy_scores(seq, name_list), k = 10)

classifier(seq, narrowed_labels, multiclass=True)

{'sequence': 'Which phone has a larger screen, the Samsung Galaxy Note 20 or Huawei P40 Pro Plus?',
 'labels': ['Samsung Galaxy Note 20',
  'Huawei P40 Pro',
  'Huawei P40',
  'Huawei P40 Pro+',
  'Samsung Galaxy Note 20 Ultra',
  'Samsung Galaxy Note 10+',
  'Samsung Galaxy Note 9',
  'Samsung Galaxy Note 10',
  'Samsung Galaxy A53',
  'Samsung Galaxy Note 8 Duos'],
 'scores': [0.30299654603004456,
  0.22459819912910461,
  0.19807952642440796,
  0.14535367488861084,
  0.05118313804268837,
  0.0197310708463192,
  0.016331294551491737,
  0.015738993883132935,
  0.014524193480610847,
  0.011463314294815063]}

In [18]:
from scipy.stats import entropy

seq = seq8
print(seq)

seq_tmp = seq.copy()
nl_tmp = name_list.copy()
for i in range(3):
    narrowed_labels = topk_lables(fuzzy_scores(seq, nl_tmp), k = 5)
    result = classifier(seq, narrowed_labels)
    pred_model_name = result["labels"][0]
    print(pred_model_name)
    
    
    # scores_distr_evenness = entropy(result["scores"], base=2) 
    # print(scores_distr_evenness)
    nl_tmp.remove(pred_model_name)
    
    
    
    
    

Can you list the battery life for the Apple iPhone SE, Xiaomi Redmi Note 9 Pro, and Huawei P30 Pro?
Huawei P30
1.6489471733745198
Xiaomi Redmi Note 9 Pro
1.1473202217736986
Xiaomi Redmi Note 9
0.3895731962047616


In [None]:
for n in name_list:
    if "Huawei P40" in n:
        print(n)

In [1]:
from transformers import AutoTokenizer, RobertaForQuestionAnswering

#load pretrained RoBERTa
tokenizer = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')
model = RobertaForQuestionAnswering.from_pretrained('deepset/roberta-base-squad2')

Downloading (…)okenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/496M [00:00<?, ?B/s]

In [None]:
context = """

"""


question = "how many phone names are available"

In [None]:
# Tokenize the context and question pair
input_ids = tokenizer.encode(question, context)

#print(input_ids)
#print(tokenizer.decode(input_ids))

# Get the start and end positions of the answer in the context
model_output = model(torch.tensor([input_ids]))

#print(model_output.start_logits)

start_index = model_output.start_logits.argmax()
end_index = model_output.end_logits.argmax()

# Decode the answer from the context
answer = tokenizer.decode(input_ids[start_index:end_index + 1])
print(answer)