In [80]:
from simpletransformers.classification import ClassificationModel
from simpletransformers.ner import NERModel
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
import pandas as pd
import numpy as np

1. prepare binary classification model

In [81]:
train_args={
    'fp16': False,
    'sliding_window': True,
    'reprocess_input_data': True,
    'overwrite_output_dir': True,
    
    'evaluate_during_training': True,
    'evaluate_during_training_steps': 4,
    'logging_steps': 4,
    'evaluate_during_training_verbose': True,

    'num_train_epochs': 5,
    
    'learning_rate': 3.5e-5,
    
    'train_batch_size': 32,
    'eval_batch_size': 8,
    
    'warmup_ratio': 0.1, 
    'warmup_steps': 14,

    'weight_decay': 0.01,
    
    'wandb_project': "NPI_binary",
    
    'save_eval_checkpoints': False,
    "save_model_every_epoch": False,
    'use_cached_eval_features': True,
    
    'manual_seed': 1
}

binary_classifier_model = ClassificationModel('roberta', 
                                              './binary_classifier/outputs/', 
                                              use_cuda=False, 
                                              args=train_args)

2. prepare NER model

In [82]:
train_args={
    'fp16': False,
    'sliding_window': True,
    'reprocess_input_data': True,
    'overwrite_output_dir': True,
    
    'evaluate_during_training': True,
    'evaluate_during_training_steps': 4,
    'logging_steps': 4,
    
    'num_train_epochs': 5,
    
    'learning_rate':3.5e-5,
    
    'train_batch_size':8,
    
    'warmup_ratio': 0.1, 
    'warmup_steps':6,
    
    "wandb_project": "NPI_NER",
    
    'save_eval_checkpoints': False,
    "save_model_every_epoch": False,
    "use_cached_eval_features": True,
  
    "manual_seed": 1
}

ner_model = NERModel('bert', 
                     './NER/outputs/',
                     use_cuda=False, 
                     args = train_args,labels = ["O", "B", "I"])

3. prepare sentiment classification model

In [83]:
train_args={
    'fp16': False,
    'reprocess_input_data': True,
    'overwrite_output_dir': True,
    
    'evaluate_during_training': True,
    'evaluate_during_training_steps': 4,
    "evaluate_during_training_verbose": True,
    'logging_steps':4,
    
    'num_train_epochs': 5,
    
    'learning_rate':3.5e-5,
    
    'train_batch_size':16,
    
    'warmup_ratio': 0.1, 
    'warmup_steps':16,

    'weight_decay':0.001,

    'gradient_accumulation_steps':2,
    
    "wandb_project": "NPI_sentiment",
    
    'save_eval_checkpoints': False,
    "save_model_every_epoch": False,
    "use_cached_eval_features": True,
    
    "manual_seed": 1
}
 
sentiment_model = ClassificationModel('xlnet', 
                                      './sentiment/outputs/',  
                                      use_cuda=False,
                                      args=train_args,num_labels=2)

4. perform ABSA

In [88]:
# 'This experiment could help us understand the relationship between social isolation, school closure and other non-medical interventions, and reducing the number of cases.'
# given a long text
text = "As the current pandemic situation evolves, urges have been made to focus efforts toward examining the mental health status of the general population. \
This large-scale epidemiological investigation assesses the prevalence of depression and anxiety associated with the mitigation strategies aimed at impeding viral \
transmission chains during the COVID-19 pandemic. A representative sample of 10084 adults participated in the study. The results reveal that the globally ubiquitous \
mitigation strategies involving lockdowns, social distancing, quarantine, and isolation are associated with two to threefold increases in anxiety and depression symptoms. \
Risk factors and protective factors associated with these psychiatric symptoms were identified. Finally, factors associated with adherence rates to these mitigation strategies \
were investigated. The presented time-sensitive findings provide health-policy makers and government officials with a foundation for making informed decisions concerning the mental \
health impacts of the contemporaneously in-practice disease containment strategies, and suggests ways of increasing adherence to these protocols while simultaneously protecting the general \
public against detrimental mental health effects. Of importance, a variety of NPIs are contemporaneously in practice across countries in all continents, including lockdown strategies such as \
closure of schools, social distancing, quarantine, and isolation. Consequently, accompanied by the realization that the end of the crisis is nowhere near, a different dimension of public health \
has emerged to require urgent attention; namely the mental health impacts associated with the strategies implemented towards viral control during the COVID-19 pandemic. \
Research also shows that social distancing, enhanced testing and public building closures have succeeded in reducing the incidence of cases, compared to wearing masks. \
Don't implement border ban any more! Social distancing is the best choice! \
Social distance is as important as contact tracking."

sentences = sent_tokenize(text)

5. Using binary classifier to evaluate if sentence contains the evaluation of the specific NPI

In [89]:
binary_pred, output = binary_classifier_model.predict(sentences)
sentence_df = pd.DataFrame({'sentence':sentences, 'binary_predict':binary_pred})
predict_sentences = np.asarray(sentence_df[sentence_df['binary_predict'] == 1]['sentence'])

print(predict_sentences)

HBox(children=(FloatProgress(value=0.0, max=13.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))


['The results reveal that the globally ubiquitous mitigation strategies involving lockdowns, social distancing, quarantine, and isolation are associated with two to threefold increases in anxiety and depression symptoms.'
 'Research also shows that social distancing, enhanced testing and public building closures have succeeded in reducing the incidence of cases, compared to wearing masks.'
 "Don't implement border ban any more!"
 'Social distancing is the best choice!'
 'Social distance is as important as contact tracking.']


6. Using NER model to extract NPI name

In [90]:
def modify_sentences(raw_sentences):
    sentences = []
    
    for i in range(len(raw_sentences)):
        i = word_tokenize(raw_sentences[i])
        sentence = ' '.join(i)
        sentences.append(sentence)
        sentence = ''
    
    return sentences


def get_prediction(predictions):
    y_pred = []
    
    for i in predictions:
        temp=[]
        for j in list(i):
            temp = temp + list(j.values())
        y_pred.append(temp)
        temp=[]
        
    return y_pred


def extract_terms(sentences, y_pred):
    result = [] # (sentence, [term1, term2, ...]) pairs
    
    for sentence, prediction in zip(sentences, y_pred):
        words = sentence.split(' ')
        terms_list = []
        term = []
        inside_term = False
        for i in range(len(words) + 1):
            if inside_term and (i == len(words) or prediction[i] == "O"):
                terms_list.append(term)
                term = []
                inside_term = False
                continue
            elif i == len(words):
                continue
            elif inside_term and prediction[i] == "B": # B I B I O ...
                terms_list.append(term)
                term = []
                term.append(words[i])
            elif prediction[i] == "B":
                term.append(words[i])
                inside_term = True
            elif inside_term and (prediction[i] == "I"):
                term.append(words[i])
            elif prediction[i] == "I": # O I O ...
                continue
            
                
    
        terms_list = [' '.join(t) for t in terms_list]
        result.append((sentence, terms_list))
    
    return result


modified_sentences = modify_sentences(predict_sentences)
NER_pred, _ = ner_model.predict(modified_sentences)
y_pred = get_prediction(NER_pred)

NER_sentences = extract_terms(modified_sentences, y_pred)

print(NER_sentences)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


[('The results reveal that the globally ubiquitous mitigation strategies involving lockdowns , social distancing , quarantine , and isolation are associated with two to threefold increases in anxiety and depression symptoms .', ['lockdowns', 'social distancing', 'quarantine', 'isolation']), ('Research also shows that social distancing , enhanced testing and public building closures have succeeded in reducing the incidence of cases , compared to wearing masks .', ['social distancing', 'enhanced testing', 'public building closures', 'wearing masks']), ("Do n't implement border ban any more !", ['border ban']), ('Social distancing is the best choice !', ['Social distancing']), ('Social distance is as important as contact tracking .', ['Social distance', 'contact tracking'])]


7. Judge sentiment polarity of NPI in each sentence

In [91]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
polarity_pred_input = []
    
for sentence, terms_list in NER_sentences:
    for term in terms_list:
        polarity_pred_input.append([sentence, term])
        
polarity_pred_df = pd.DataFrame(polarity_pred_input)
polarity_pred_df.columns = ['sentence', 'term_pred']

sentiment_pred, _ = sentiment_model.predict(polarity_pred_input)
polarity_pred_df['sentiment_pred'] = sentiment_pred
polarity_pred_df

HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))






Unnamed: 0,sentence,term_pred,sentiment_pred
0,The results reveal that the globally ubiquitou...,lockdowns,0
1,The results reveal that the globally ubiquitou...,social distancing,0
2,The results reveal that the globally ubiquitou...,quarantine,0
3,The results reveal that the globally ubiquitou...,isolation,0
4,"Research also shows that social distancing , e...",social distancing,1
5,"Research also shows that social distancing , e...",enhanced testing,1
6,"Research also shows that social distancing , e...",public building closures,1
7,"Research also shows that social distancing , e...",wearing masks,0
8,Do n't implement border ban any more !,border ban,0
9,Social distancing is the best choice !,Social distancing,1
