
### Step 1: Import the dataset and install Transformer 

In [217]:
# load the sentence-bert model from the HuggingFace model hub
!pip install transformers
from transformers import AutoTokenizer, AutoModel
from torch.nn import functional as F
tokenizer = AutoTokenizer.from_pretrained('deepset/sentence_bert')
model = AutoModel.from_pretrained('deepset/sentence_bert')

import pandas as pd
data = 'https://github.com/waadalhoshan/datasets/raw/main/Promise_NFR_dataset_orginal.csv'
dataset = pd.read_csv(data, delimiter = ';')

labels_short = ['US', 'SE']
labels_long =  ['Usability', 'Security']

Requirement_Statements = []
selected_class = 2
original_classes = []
for index, row in dataset.iterrows():
  #original_classes.append(row['NFR'])
  #Requirement_Statements.append(row['RequirementText'])
  if row['class'] == labels_short[0]:
    original_classes.append(labels_long[0])
    Requirement_Statements.append(row['RequirementText'])
  if row['class'] == labels_short[1]:
    original_classes.append(labels_long[1])
    Requirement_Statements.append(row['RequirementText'])




Some weights of the model checkpoint at deepset/sentence_bert were not used when initializing BertModel: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Step 2: Set the paramteris for the Zero-shot Classifier 

In [218]:
# run inputs through model and mean-pool over the sequence
# dimension to get sequence-level representations
def Zero_Shot_classifier(requirement):
  inputs = tokenizer.batch_encode_plus([requirement] + labels_long,
                                      return_tensors='pt',
                                      pad_to_max_length=True)
  input_ids = inputs['input_ids']
  attention_mask = inputs['attention_mask']
  output = model(input_ids, attention_mask=attention_mask)[0]
  sentence_rep = output[:1].mean(dim=1)
  label_reps = output[1:].mean(dim=1)

  # now find the labels with the highest cosine similarities to
  # the sentence
  similarities = F.cosine_similarity(sentence_rep, label_reps)
  closest = similarities.argsort(descending=True)
  for ind in closest:
    label = labels_long[ind]
    print(f'label: {labels_long[ind]} \t similarity: {similarities[ind]}')
    break
  print("================================")
  return label

## Step 3: Use the Zero-shot classifier to classify NFR requirements

In [219]:
predicated_classes = [Zero_Shot_classifier(req) for req in Requirement_Statements]




label: Usability 	 similarity: 0.25248658657073975
label: Usability 	 similarity: 0.34944167733192444
label: Security 	 similarity: 0.3803856372833252
label: Usability 	 similarity: 0.4210813343524933
label: Security 	 similarity: 0.19491997361183167
label: Usability 	 similarity: 0.49788233637809753
label: Usability 	 similarity: 0.41117972135543823
label: Usability 	 similarity: 0.3213362693786621
label: Security 	 similarity: 0.18559730052947998
label: Security 	 similarity: 0.13058319687843323
label: Security 	 similarity: 0.23520641028881073
label: Usability 	 similarity: 0.49192580580711365
label: Security 	 similarity: 0.523211658000946
label: Usability 	 similarity: 0.5026825666427612
label: Security 	 similarity: 0.20061571896076202
label: Security 	 similarity: 0.20670509338378906
label: Security 	 similarity: -0.06472525745630264
label: Usability 	 similarity: 0.4148028492927551
label: Usability 	 similarity: 0.16369697451591492
label: Usability 	 similarity: 0.3485089242458

## Step 4: Evaluate the performance of the zero-shot classifier

In [220]:

from sklearn.metrics import precision_recall_fscore_support
print(precision_recall_fscore_support(original_classes, predicated_classes, average='macro'))
print(precision_recall_fscore_support(original_classes, predicated_classes, average='micro'))
print(precision_recall_fscore_support(original_classes, predicated_classes, average='weighted'))

from  sklearn.metrics import f1_score
print(f1_score(original_classes, predicated_classes, average= None))

(0.7479386165826845, 0.7447987336047037, 0.7436507936507937, None)
(0.7443609022556391, 0.7443609022556391, 0.7443609022556391, None)
(0.7481771308711542, 0.7443609022556391, 0.7435493495643871, None)
[0.75714286 0.73015873]


Reference: https://colab.research.google.com/github/joeddav/blog/blob/master/_notebooks/2020-05-29-ZSL.ipynb#scrollTo=j-BVPo0T0ujS