In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
train_path = '/Users/z88756212/Documents/AI/python/medical sentiment analysis​/archive/Drug Reviews (Druglib.com)/drugLibTrain_raw.csv'
test_path = '/Users/z88756212/Documents/AI/python/medical sentiment analysis​/archive/Drug Reviews (Druglib.com)/drugLibTest_raw.csv'
df_train = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)
print(df_train)

      Unnamed: 0       urlDrugName  rating           effectiveness  \
0           2202         enalapril       4        Highly Effective   
1           3117  ortho-tri-cyclen       1        Highly Effective   
2           1146           ponstel      10        Highly Effective   
3           3947          prilosec       3    Marginally Effective   
4           1951            lyrica       2    Marginally Effective   
...          ...               ...     ...                     ...   
3102        1039           vyvanse      10        Highly Effective   
3103        3281            zoloft       1             Ineffective   
3104        1664           climara       2    Marginally Effective   
3105        2621         trileptal       8  Considerably Effective   
3106        2748          micardis       4    Moderately Effective   

                        sideEffects                               condition  \
0                 Mild Side Effects  management of congestive heart failure   
1

urlDrugName	藥物名稱

rating	1-10 分評價（可用來做情感分類）

effectiveness	藥物效果（Highly Effective, Moderately Effective, etc.）

sideEffects	副作用程度（Mild, Severe, None, etc.）

condition	患者服用藥物的疾病

benefitsReview	藥物的好處（用來判斷正面情緒）

sideEffectsReview	副作用評論（用來判斷負面情緒）

commentsReview	整體評論（可能包含正負面意見）

In [4]:
print(df_test)

      Unnamed: 0     urlDrugName  rating           effectiveness  \
0           1366          biaxin       9  Considerably Effective   
1           3724        lamictal       9        Highly Effective   
2           3824        depakene       4    Moderately Effective   
3            969         sarafem      10        Highly Effective   
4            696        accutane      10        Highly Effective   
...          ...             ...     ...                     ...   
1031         690        accutane       7  Considerably Effective   
1032        1071      proair-hfa      10        Highly Effective   
1033         681        accutane       8  Considerably Effective   
1034        2709         divigel      10        Highly Effective   
1035         729  claripel-cream       8  Considerably Effective   

                sideEffects                          condition  \
0         Mild Side Effects                    sinus infection   
1         Mild Side Effects                   bipol

In [5]:
df_train = df_train[['rating', 'commentsReview']]
df_test = df_test[['rating', 'commentsReview']]
df_train.dropna(inplace=True)
df_test.dropna(inplace=True)

In [6]:
def map_sentiment(rating):
    if rating >= 7:
        return "positive"
    elif rating <= 4:
        return "negative"
    else:
        return "neutral"

df_train['sentiment'] = df_train['rating'].apply(map_sentiment)
df_test['sentiment'] = df_test['rating'].apply(map_sentiment)

df_train['commentsReview'] = df_train['commentsReview'].str.lower().str.strip()
df_test['commentsReview'] = df_test['commentsReview'].str.lower().str.strip()


print(df_train.head())
print(df_train['sentiment'].value_counts()) 

   rating                                     commentsReview sentiment
0       4  monitor blood pressure , weight and asses for ...  negative
1       1  i hate this birth control, i would not suggest...  negative
2      10  i took 2 pills at the onset of my menstrual cr...  positive
3       3  i was given prilosec prescription at a dose of...  negative
4       2                                          see above  negative
sentiment
positive    2123
negative     656
neutral      316
Name: count, dtype: int64


In [7]:
df_train = df_train[df_train['sentiment'] != 'neutral']
df_test = df_test[df_test['sentiment'] != 'neutral']
print(df_train['sentiment'].value_counts())

sentiment
positive    2123
negative     656
Name: count, dtype: int64


In [8]:
import re
def clean_text(text):
    text = text.lower()  
    text = re.sub(r'\W', ' ', text) 
    text = re.sub(r'\s+', ' ', text).strip() 
    return text

df_train['cleaned_comments'] = df_train['commentsReview'].apply(clean_text)
df_test['cleaned_comments'] = df_test['commentsReview'].apply(clean_text)


print(df_train[['cleaned_comments', 'sentiment']].head())

                                    cleaned_comments sentiment
0  monitor blood pressure weight and asses for re...  negative
1  i hate this birth control i would not suggest ...  negative
2  i took 2 pills at the onset of my menstrual cr...  positive
3  i was given prilosec prescription at a dose of...  negative
4                                          see above  negative


In [9]:
df_train['sentiment_label'] = df_train['sentiment'].map({'positive': 1, 'negative': 0})
df_test['sentiment_label'] = df_test['sentiment'].map({'positive': 1, 'negative': 0})

print(df_train[['cleaned_comments', 'sentiment_label']].head())

                                    cleaned_comments  sentiment_label
0  monitor blood pressure weight and asses for re...                0
1  i hate this birth control i would not suggest ...                0
2  i took 2 pills at the onset of my menstrual cr...                1
3  i was given prilosec prescription at a dose of...                0
4                                          see above                0


In [10]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_encodings = tokenizer(list(df_train['cleaned_comments']), truncation=True, padding=True, max_length=512)
test_encodings = tokenizer(list(df_test['cleaned_comments']), truncation=True, padding=True, max_length=512)

print(train_encodings.keys())



dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])


In [11]:
import torch

class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

train_dataset = SentimentDataset(train_encodings, df_train['sentiment_label'].tolist())
test_dataset = SentimentDataset(test_encodings, df_test['sentiment_label'].tolist())

In [12]:
from transformers import BertForSequenceClassification

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
model.to(device) 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [13]:
import torch

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [14]:
from torch.utils.data import DataLoader

batch_size = 8  

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [15]:
from transformers import AdamW

optimizer = AdamW(model.parameters(), lr=1e-5, eps=1e-8)

loss_fn = torch.nn.CrossEntropyLoss()




In [16]:
from transformers import get_scheduler

num_epochs = 3
num_training_steps = num_epochs * len(train_loader)
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)


from tqdm import tqdm

for epoch in range(num_epochs):
    model.train() 
    loop = tqdm(train_loader, leave=True)
    
    for batch in loop:

        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        logits = outputs.logits

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        # 更新進度條
        loop.set_description(f"Epoch {epoch+1}")
        loop.set_postfix(loss=loss.item())


Epoch 1: 100%|██████████████████| 348/348 [4:10:33<00:00, 43.20s/it, loss=0.601]
Epoch 2: 100%|██████████████████| 348/348 [3:08:44<00:00, 32.54s/it, loss=0.231]
Epoch 3: 100%|███████████████████| 348/348 [3:27:51<00:00, 35.84s/it, loss=1.23]


In [19]:
from sklearn.metrics import accuracy_score, classification_report

model.eval() 
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)

        all_preds.extend(predictions.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

accuracy = accuracy_score(all_labels, all_preds)
print(f"測試準確率: {accuracy:.4f}")



report = classification_report(all_labels, all_preds, target_names=["negative", "positive"])
print(report)
      

測試準確率: 0.8022
              precision    recall  f1-score   support

    negative       0.69      0.46      0.55       240
    positive       0.83      0.93      0.87       670

    accuracy                           0.80       910
   macro avg       0.76      0.69      0.71       910
weighted avg       0.79      0.80      0.79       910

