In [1]:
!pip install transformers
!pip install sentencepiece
!pip install datasets

Collecting transformers
  Downloading transformers-4.9.2-py3-none-any.whl (2.6 MB)
[?25l[K     |▏                               | 10 kB 30.1 MB/s eta 0:00:01[K     |▎                               | 20 kB 26.4 MB/s eta 0:00:01[K     |▍                               | 30 kB 18.2 MB/s eta 0:00:01[K     |▌                               | 40 kB 15.2 MB/s eta 0:00:01[K     |▋                               | 51 kB 8.5 MB/s eta 0:00:01[K     |▊                               | 61 kB 8.8 MB/s eta 0:00:01[K     |▉                               | 71 kB 7.3 MB/s eta 0:00:01[K     |█                               | 81 kB 8.1 MB/s eta 0:00:01[K     |█▏                              | 92 kB 8.4 MB/s eta 0:00:01[K     |█▎                              | 102 kB 8.2 MB/s eta 0:00:01[K     |█▍                              | 112 kB 8.2 MB/s eta 0:00:01[K     |█▌                              | 122 kB 8.2 MB/s eta 0:00:01[K     |█▋                              | 133 kB 8.2 MB/s eta 

## Inference on test dataset

In [58]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
import numpy as np

In [10]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased", use_fast=True)

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/442 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

### Load model

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
multi_model = AutoModelForSequenceClassification.from_pretrained("/content/drive/MyDrive/labtwin-test/multi_model_trained")

In [6]:
type(multi_model)

transformers.models.distilbert.modeling_distilbert.DistilBertForSequenceClassification

### Test on 1 example

In [17]:
example_text = """
The British Broadcasting Corporation (BBC) is the national broadcaster of the United Kingdom. Headquartered at Broadcasting House in London, it is the world's oldest national broadcaster, and the largest broadcaster in the world by number of employees, employing over 22,000 staff in total, of whom more than 19,000 are in public-sector broadcasting.
"""

In [18]:
example_inputs = tokenizer(example_text, padding=True, truncation=True, max_length=512, return_tensors="pt")

predictions = multi_model(**example_inputs).logits

torch.softmax(predictions, dim=1).tolist()[0]

[0.9997023940086365,
 0.00011381757212802768,
 1.0845762517419644e-05,
 1.8507029380998574e-05,
 4.5009346649749205e-05,
 0.0001093574392143637]

In [36]:
example_text = """
sparfloxacin is a fluoroquinolone antibiotic used in the treatment of bacterial infections. it has a controversial safety profile.it was patented in 1985 and approved for medical use in 1993. zagam is no longer available in the united states.
"""

In [37]:
example_inputs = tokenizer(example_text, padding=True, truncation=True, max_length=512, return_tensors="pt")

predictions = multi_model(**example_inputs).logits

torch.softmax(predictions, dim=1).tolist()[0]

[6.877542182337493e-05,
 4.998061922378838e-05,
 4.997218638891354e-05,
 3.0868199246469885e-05,
 3.581976125133224e-05,
 0.999764621257782]

In [None]:
# 'non_science', 'reagent', 'drug', 'protein', 'cell', 'antibiotic'

## Inference on test_dataset

In [64]:
def get_prediction(text):
    """
    Get the label prediction for a given text
    """
    
    # prepare our text into tokenized sequence
    inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt")
    
    # perform inference to our model
    outputs = multi_model(**inputs)

    classification_logits = outputs.logits

    # get output probabilities by doing softmax
    results = torch.softmax(classification_logits, dim=1).tolist()[0]

    # executing argmax function to get the candidate label
    class_label = np.argmax(results)

    return class_label

In [51]:
# read dataset
test_dataset_df = pd.read_csv('/content/test_dataset.csv')

In [49]:
### get predictions for each text in test_Dataset

In [65]:
test_texts = test_dataset_df['text'].tolist()

In [68]:
test_predictions = []

for text in test_texts:

  # get the label prediction
  res_index = get_prediction(text)

  if res_index not in range(0, 6): print("Wrong output")

  if res_index == 0: label='non_science'
  if res_index == 1: label='reagent'
  if res_index == 2: label='drug'
  if res_index == 3: label='protein'
  if res_index == 4: label='cell'
  if res_index == 5: label='antibiotic'

  test_predictions.append(label)

In [70]:
# get the true labels from test dataset
true_labels = test_dataset_df['label'].tolist()

In [71]:
assert len(true_labels) == len(test_predictions)

In [74]:
true_labels[100]

'cell'

In [75]:
test_predictions[100]

'cell'

## Confusion Metrix

In [76]:
from sklearn.metrics import confusion_matrix

In [77]:
confusion = confusion_matrix(y_true=true_labels, 
                             y_pred=test_predictions, 
                             labels=['non_science', 'reagent', 'drug', 'protein', 'cell', 'antibiotic'])

In [78]:
print('Confusion Matrix\n')
print(confusion)

Confusion Matrix

[[278   6   2   1   1   1]
 [  4  32  11   3   0   2]
 [  0   6  38   1   4   4]
 [  2   2   2  78   5   1]
 [  1   2   1   2  59   0]
 [  0   0   4   0   1  48]]


In [79]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [80]:
print('\nAccuracy: {:.2f}\n'.format(accuracy_score(true_labels, test_predictions)))

print('Micro Precision: {:.2f}'.format(precision_score(true_labels, test_predictions, average='micro')))
print('Micro Recall: {:.2f}'.format(recall_score(true_labels, test_predictions, average='micro')))
print('Micro F1-score: {:.2f}\n'.format(f1_score(true_labels, test_predictions, average='micro')))

print('Macro Precision: {:.2f}'.format(precision_score(true_labels, test_predictions, average='macro')))
print('Macro Recall: {:.2f}'.format(recall_score(true_labels, test_predictions, average='macro')))
print('Macro F1-score: {:.2f}\n'.format(f1_score(true_labels, test_predictions, average='macro')))

print('Weighted Precision: {:.2f}'.format(precision_score(true_labels, test_predictions, average='weighted')))
print('Weighted Recall: {:.2f}'.format(recall_score(true_labels, test_predictions, average='weighted')))
print('Weighted F1-score: {:.2f}'.format(f1_score(true_labels, test_predictions, average='weighted')))



Accuracy: 0.89

Micro Precision: 0.89
Micro Recall: 0.89
Micro F1-score: 0.89

Macro Precision: 0.82
Macro Recall: 0.83
Macro F1-score: 0.82

Weighted Precision: 0.89
Weighted Recall: 0.89
Weighted F1-score: 0.89


In [86]:
from sklearn.metrics import classification_report

print('\nClassification Report\n')
print(classification_report(true_labels, test_predictions))


Classification Report

              precision    recall  f1-score   support

  antibiotic       0.86      0.91      0.88        53
        cell       0.84      0.91      0.87        65
        drug       0.66      0.72      0.68        53
 non_science       0.98      0.96      0.97       289
     protein       0.92      0.87      0.89        90
     reagent       0.67      0.62      0.64        52

    accuracy                           0.89       602
   macro avg       0.82      0.83      0.82       602
weighted avg       0.89      0.89      0.89       602

