In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 22.1 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 49.1 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 60.9 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.10.1 tokenizers-0.13.2 transformers-4.24.0


In [3]:
import torch
from transformers import AutoTokenizer, AutoModel

from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch.nn.functional as F
from torch import nn, optim

In [4]:
model_name = 'sentence-transformers/all-distilroberta-v1'
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class TextClassifier(nn.Module):
 def __init__(self, n_classes):
   super(TextClassifier, self).__init__()
   self.bert = AutoModel.from_pretrained(model_name, return_dict=False)
   self.drop = nn.Dropout(p=0.3)
   self.L1 = nn.Linear(self.bert.config.hidden_size, n_classes)
   self.L2 = nn.Linear(n_classes, n_classes)
   self.out = nn.Softmax(dim =1)


 def forward(self, input_ids, attention_mask):
   _, pooled_output = self.bert(
       input_ids = input_ids,
       attention_mask = attention_mask
   )
   output = self.drop(pooled_output)
   output = self.L1(output)
   output = self.drop(output)
   output = self.L2(output)
   output = self.out(output)
   return output

Downloading:   0%|          | 0.00/333 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [7]:
def pipeline(text):
    model = torch.load('/content/drive/MyDrive/ML/model2.pt', map_location=torch.device('cpu'))
    model = model.to(device)

    encoding = tokenizer.encode_plus(
          text,
          add_special_tokens=True,
          max_length=256,
          return_token_type_ids=False,
          padding='max_length',
          truncation=True,
          return_attention_mask=True,
          return_tensors='pt',
        )

    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    output = model(input_ids, attention_mask)
    #print(output)

    def result(output):
        out_images = output.detach().numpy()
        ind1 = np.argsort(np.max(out_images, axis=0))[-1]
        
        return ind1

    def decode(ind1):
      mlb_classes = ['clinical research', 'data management and statistics', 'manufacturing  operations', 'medical affairs  pharmaceutical physician', 'medical information and pharmacovigilance', 'pharmaceutical healthcare and medical sales', 'pharmaceutical marketing', 'pharmacy', 'qualityassurance', 'regulatory affairs', 'science']
      out_fin = []
      out_fin.append(mlb_classes[ind1])
      return out_fin


    ind1 = result(output)
    out_fin = decode(ind1)
    return out_fin


In [13]:
text = ' information and education to customer accounts within a defined Sales territory including both Primary and Secondary care sector organisations. Effective and efficient administration and preparation for support provision. Provide direct and timely feedback to the National Sales Manager to support identification of ‘sales’ opportunities.  Participate in weekly Territory Planning meetings / calls with the National Sales Manager. Maintain a full and current knowledge of the company’s products and services. Represent the best interests of the Company internally and externally in a professional, supportive and courteous manner. Timely maintenance and administration of information databases to include: Company CRM system, &amp;amp; Company Business Planning System. Timely submission of expense claim information and forms to Head Office for processing and payment. Other tasks from time to time as directed by the National Sales Manager and / or European Sales Director. Role Specific Competencies'
output = pipeline(text)

In [21]:
output

['pharmaceutical healthcare and medical sales']