In [1]:
import pandas as pd
from gensim.models import KeyedVectors

In [2]:
def process_load_data(dataframes):
    data = pd.concat(dataframes, ignore_index=True)

    # Select columns to be retained and rename them
    selected_columns = [
        'id',
        'context',
        'annotations/0/events/0/event_type',
        'annotations/0/events/0/Trigger/text/0/0',
        'annotations/0/events/0/Treatment/Drug/text/0/0',
        'annotations/0/events/0/Effect/text/0/0'
    ]
    new_names = ['id', 'context', 'event_type', 'trigger_text', 'drug', 'drug_effect']
    df = data[selected_columns].rename(columns=dict(zip(selected_columns, new_names)))

    # Map sentiment labels to numeric values
    sentiment_mapping = {'Adverse_event': 1, 'Potential_therapeutic_event': 0}
    df['sentiment'] = df['event_type'].map(sentiment_mapping)

    return df

In [3]:
def evaluate_model(y_true, y_pred, y_prob):
    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    auc_roc = roc_auc_score(y_true, y_prob)

    # Print metrics
    print(f'Accuracy: {accuracy:.2f}')
    print(f'Precision: {precision:.2f}')
    print(f'Recall: {recall:.2f}')
    print(f'F1 score: {f1:.2f}')
    print(f'ROC AUC score: {auc_roc:.2f}')

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix: ")
    print(cm)

In [4]:
def load_wordvec_model(model_path: str, binary: bool = True, limit: int = 100000):
    model = KeyedVectors.load_word2vec_format(model_path, binary=binary, limit=limit)
    return model