In [134]:
import pandas as pd
import numpy as np
import re
import string
import nltk
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import seaborn as sns
import colorlover as cl
import random
import shap

from wordcloud import WordCloud
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from collections import Counter
from sklearn.feature_extraction.text import CountVectorizer
from plotly.subplots import make_subplots
from nltk.tokenize import TreebankWordTokenizer, TweetTokenizer as twt
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from gensim.models import Word2Vec, Doc2Vec, KeyedVectors
from gensim.models.doc2vec import TaggedDocument
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, FunctionTransformer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import Pipeline
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import GridSearchCV

In [135]:
def process_load_data(dataframes):
    data = pd.concat(dataframes, ignore_index=True)

    # Select columns to be retained and rename them
    selected_columns = [
        'id',
        'context',
        'annotations/0/events/0/event_type',
        'annotations/0/events/0/Trigger/text/0/0',
        'annotations/0/events/0/Treatment/Drug/text/0/0',
        'annotations/0/events/0/Effect/text/0/0'
    ]
    new_names = ['id', 'context', 'event_type', 'trigger_text', 'drug', 'drug_effect']
    df = data[selected_columns].rename(columns=dict(zip(selected_columns, new_names)))

    # Map sentiment labels to numeric values
    sentiment_mapping = {'Adverse_event': 1, 'Potential_therapeutic_event': 0}
    df['sentiment'] = df['event_type'].map(sentiment_mapping)

    return df

In [136]:
train_data = pd.read_csv('data/train.csv')
test_data = pd.read_csv('data/test.csv')

dataframes = [train_data, test_data]
df = process_load_data(dataframes)

Columns (112,114,121,125,157,168,186,188,206,208,217,229,231,232,233,235,237,241,244,246,247,249,250,251,268,270,271,280,282,286,288,289,291,305,310,312,313,315,316,318,319,323,324,327,328,329,331,333,335,336,338,342,344,348,350,352,353,355,356,358,359,361) have mixed types. Specify dtype option on import or set low_memory=False.


In [137]:
df.sample(5)

Unnamed: 0,id,context,event_type,trigger_text,drug,drug_effect,sentiment
3292,15588385_2,"After an extensive review of the literature, w...",Potential_therapeutic_event,control,amiodarone,,0
849,8438851_2,Idiosyncratic factors involving vasopressin re...,Adverse_event,contribute,vasopressin,impaired tissue perfusion,1
3723,15836666_1,Chronic fentanyl application induces adrenocor...,Adverse_event,induces,fentanyl,adrenocortical insufficiency,1
603,8239963_3,"Though hypotension, dry mouth, and constipatio...",Adverse_event,induced,clonidine,bradycardia,1
2968,12477460_1,Atypical endometriosis may act as a precancero...,Adverse_event,induced,tamoxifen,malignant transformation of endometriosis,1


#### Load pre-trained word vectors from a binary file located at the specified path. The file contains word vectors in a format compatible with Word2Vec. Only the first 100,000 word vectors are loaded.

In [138]:
model_path = './biowordvec/BioWordVec_PubMed_MIMICIII_d200.vec.bin'
model = KeyedVectors.load_word2vec_format(model_path, binary=True, limit=100000)

In [139]:
def average_word_embeddings(df, column, word_embeddings):
    embeddings = []
    for document in df[column]:
        for word in document.split():
            if word in word_embeddings:
                embeddings.append(word_embeddings[word])
    if len(embeddings) > 0:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros_like(word_embeddings.vector_size)

In [140]:
df['average_embeddings'] = df.apply(lambda row: average_word_embeddings(row, 'context', model), axis=1)

In [141]:
df.sample(2)

Unnamed: 0,id,context,event_type,trigger_text,drug,drug_effect,sentiment,average_embeddings
856,11568758_1,Lichenoid drug eruption to salsalate.,Adverse_event,eruption,salsalate,Lichenoid drug eruption,1,"[0.26032946, 0.12860788, 0.12851445, 0.0081430..."
787,10891991_5,PATIENTS: Three patients developed a reproduct...,Adverse_event,developed,valproate,reproductive endocrine disorder,1,"[0.21409237, 0.14898957, 0.11392334, 0.0940338..."


## MultinomialNB

In [142]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('sentiment', axis=1),
                                                    df['sentiment'], test_size=0.2, random_state=42)

scaler = MinMaxScaler(feature_range=(0, 1))

X_train_embeddings = np.array(X_train['average_embeddings'].tolist())
X_test_embeddings = np.array(X_test['average_embeddings'].tolist())

X_train_scaled = scaler.fit_transform(X_train_embeddings)
X_test_scaled = scaler.transform(X_test_embeddings)

pipeline = Pipeline([
    ('scaler', MinMaxScaler(feature_range=(0, 1))),
    ('classifier', MultinomialNB())
])

param_grid = {
    'classifier__alpha': [0.1, 1.0, 10.0],
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5)
grid_search.fit(X_train_embeddings, y_train)

best_model = grid_search.best_estimator_

y_pred = best_model.predict(X_test_embeddings)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

report = classification_report(y_test, y_pred, zero_division=1)
print("Classification Report:")
print(report)


Accuracy: 0.9095607235142119
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.00      0.00        70
           1       0.91      1.00      0.95       704

    accuracy                           0.91       774
   macro avg       0.95      0.50      0.48       774
weighted avg       0.92      0.91      0.87       774



In [143]:
lines = report.split('\n')
data = [line.split() for line in lines[2:-5]]
columns = ['class', 'precision', 'recall', 'f1-score', 'support']
df_report = pd.DataFrame(data, columns=columns).replace({'class': {'0': 'PTE', '1': 'ADE'}})
df_report

Unnamed: 0,class,precision,recall,f1-score,support
0,PTE,1.0,0.0,0.0,70
1,ADE,0.91,1.0,0.95,704


### Validate MultinomialNB using a validation dataset ( @Savio discussion point - dev_df is not balanced, negative more than positive 

In [144]:
dev_df = pd.read_csv('data/dev.csv')

validation_dataframe = [dev_df]
df_v = process_load_data(validation_dataframe)

In [145]:
df_v['average_embeddings'] = df_v.apply(lambda row: average_word_embeddings(row, 'context', model), axis=1)

In [151]:
df_v.sample(5)

Unnamed: 0,id,context,event_type,trigger_text,drug,drug_effect,sentiment,average_embeddings
603,8551001_4,Thiopurine methyltransferase deficiency occurs...,Adverse_event,after,azathioprine,profound myelosuppression,1,"[0.2511576, 0.12727843, 0.2048807, 0.028870987..."
804,18775393_1,CONCLUSION: This report describes a case of a ...,Adverse_event,resulted,econazole,overanticoagulation and a life-threatening lar...,1,"[0.22775793, 0.18932436, 0.14468655, 0.0202081..."
329,9517515_2,Second cancers including various types of hema...,Adverse_event,reported,interferon alfa,Second cancers including various types of hema...,1,"[0.26094806, 0.15710987, 0.14052378, 0.0250206..."
3,4044222_1,"A 54-year-old man, treated with amiodarone, de...",Adverse_event,developed,amiodarone,thyrotoxicosis,1,"[0.23841712, 0.14187945, 0.1431606, 0.05480042..."
168,968449_2,Bone marrow chromosomes were studied in 2 pati...,Potential_therapeutic_event,therapy,chlorambucil,,0,"[0.27884442, 0.15842652, 0.14230281, 0.0185529..."


In [169]:
X_dev = df_v.drop('sentiment', axis=1)
y_dev = df_v['sentiment']

X_dev_embeddings = np.array(X_dev['average_embeddings'].tolist())
X_dev_scaled = scaler.transform(X_dev_embeddings)

y_dev_pred = best_model.predict(X_dev_scaled)

accuracy_dev = accuracy_score(y_dev, y_dev_pred)
print("Accuracy on dev set:", accuracy_dev)

report_dev = classification_report(y_dev, y_dev_pred, zero_division=1)
print("Classification Report on dev set:")
print(report_dev)

Accuracy on dev set: 0.8928199791883454
Classification Report on dev set:
              precision    recall  f1-score   support

           0       0.33      0.01      0.02       102
           1       0.89      1.00      0.94       859

    accuracy                           0.89       961
   macro avg       0.61      0.50      0.48       961
weighted avg       0.84      0.89      0.85       961



Here's a breakdown of the evaluation results:

*** For the test set:

- Accuracy: 0.9095607235142119
- Precision, recall, and F1-score for PTE (Potential Therapeutic Event) class (0): The precision is 1.00, recall is 0.00, and F1-score is 0.00. This indicates that the model correctly predicted all instances of ADE (Adverse Event) but failed to predict any instances of PTE.
- Precision, recall, and F1-score for ADE (Adverse Event) class (1): The precision is 0.91, recall is 1.00, and F1-score is 0.95. This indicates that the model performed well in predicting instances of ADE.

*** For the dev set:

- Accuracy: 0.8928199791883454
- Precision, recall, and F1-score for PTE (Potential Therapeutic Event) class (0): The precision is 0.33, recall is 0.01, and F1-score is 0.02. This indicates that the model predicted very few instances of PTE correctly, resulting in low precision, recall, and F1-score for this class.
- Precision, recall, and F1-score for ADE (Adverse Event) class (1): The precision is 0.89, recall is 1.00, and F1-score is 0.94. This indicates that the model performed well in predicting instances of ADE.

Based on the provided classification reports, the model is performing well in predicting instances of Adverse Event (ADE) with high precision, recall, and F1-score. However, the model is not performing well in predicting instances of Potential Therapeutic Event (PTE) as indicated by low precision, recall, and F1-score for the PTE class.

In the test set, the model correctly predicts all instances of ADE but fails to predict any instances of PTE. This is reflected in the high accuracy for the test set. However, it's important to note that the model's inability to predict PTE correctly is a limitation.

In the dev set, the model has low precision, recall, and F1-score for the PTE class, indicating that it struggles to accurately identify instances of PTE. On the other hand, the model performs well in predicting ADE instances, as reflected by the high precision, recall, and F1-score for the ADE class.

Overall, the model's performance is good in predicting ADE but needs improvement in predicting PTE.

## LDA

In [None]:
X_train_embeddings = X_train.apply(lambda x: average_word_embeddings(x, 'context', word_embeddings))
X_test_embeddings = X_test.apply(lambda x: average_word_embeddings(x, 'context', word_embeddings))

X_train_embeddings = np.stack(X_train_embeddings.values)
X_test_embeddings = np.stack(X_test_embeddings.values)


lda = LatentDirichletAllocation(n_components=10, random_state=42)
lda.fit(X_train_embeddings)

X_train_topics = lda.transform(X_train_embeddings)
X_test_topics = lda.transform(X_test_embeddings)
