In [0]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Sun Jun  7 07:49:23 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   31C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install transformers==2.10.0

Collecting transformers==2.10.0
[?25l  Downloading https://files.pythonhosted.org/packages/12/b5/ac41e3e95205ebf53439e4dd087c58e9fd371fd8e3724f2b9b4cdb8282e5/transformers-2.10.0-py3-none-any.whl (660kB)
[K     |▌                               | 10kB 26.3MB/s eta 0:00:01[K     |█                               | 20kB 2.2MB/s eta 0:00:01[K     |█▌                              | 30kB 2.8MB/s eta 0:00:01[K     |██                              | 40kB 3.1MB/s eta 0:00:01[K     |██▌                             | 51kB 2.5MB/s eta 0:00:01[K     |███                             | 61kB 2.8MB/s eta 0:00:01[K     |███▌                            | 71kB 3.0MB/s eta 0:00:01[K     |████                            | 81kB 3.4MB/s eta 0:00:01[K     |████▌                           | 92kB 3.7MB/s eta 0:00:01[K     |█████                           | 102kB 3.5MB/s eta 0:00:01[K     |█████▌                          | 112kB 3.5MB/s eta 0:00:01[K     |██████                          | 1

In [0]:
!pip install simpletransformers

In [2]:
!pip install wandb

Installing collected packages: smmap, gitdb, GitPython, sentry-sdk, graphql-core, gql, subprocess32, configparser, shortuuid, docker-pycreds, pathtools, watchdog, wandb
Successfully installed GitPython-3.1.3 configparser-5.0.0 docker-pycreds-0.4.0 gitdb-4.0.5 gql-0.2.0 graphql-core-1.1 pathtools-0.1.2 sentry-sdk-0.14.4 shortuuid-1.0.1 smmap-3.0.4 subprocess32-3.5.4 wandb-0.9.0 watchdog-0.10.2


In [3]:
!wandb login

[34m[1mwandb[0m: You can find your API key in your browser here: https://app.wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter: 466ed0b4b2a230df648b8b1636be583c783c283e
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[32mSuccessfully logged in to Weights & Biases![0m


In [1]:
cd "/content/drive/My Drive/Colab Notebooks/binary_classifier"

/content/drive/My Drive/Colab Notebooks/binary_classifier


In [0]:
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
from simpletransformers.classification import ClassificationModel
import logging
import sklearn



1. Input Data

In [3]:
prefix = 'data/'

raw_data = pd.read_excel(prefix + "positive_data.xlsx")
pos_data = raw_data[['Sentence']].drop_duplicates("Sentence")
pos_data['Label'] = 1

neg_data = pd.read_excel(prefix + "negative_data.xlsx")
neg_data['Label'] = 0

dataframe = pd.concat([pos_data[['Sentence', 'Label']], neg_data[['Sentence', 'Label']]])
dataframe = dataframe.rename(columns={"Sentence": "text", "Label": "labels"})
dataframe = dataframe.reset_index(drop=True)

print(dataframe.groupby('labels').count())
print(dataframe.head())

        text
labels      
0        239
1        303
                                                text  labels
0  Stopping mass gatherings is predicted to have ...       1
1  Adding household quarantine to isolation of th...       1
2  Preventing mass gatherings is expected to have...       1
3  At the same time, by implementing population-w...       1
4  As evidence suggests COVID-19 could be transmi...       1


2. Prepare the Training and Testing Data sets

In [4]:
# split the dataset into training and testing datasets 
train_df, eval_df = train_test_split(dataframe, 
                                     random_state=1, 
                                     test_size=0.3, 
                                     shuffle=True,
                                     stratify = dataframe['labels'])

train_df = train_df.reset_index(drop=True)
print(train_df.groupby('labels').count())
print(train_df.head())

eval_df = eval_df.reset_index(drop=True)
print(eval_df.groupby('labels').count())
print(eval_df.head())

        text
labels      
0        167
1        212
                                                text  labels
0  Internal travel restrictions in England, Scotl...       1
1  The closure of public places and restrictions ...       1
2  Our goal is to experimentally demonstrate the ...       0
3  Quarantine at home can play an important role ...       1
4  This experiment could help us understand the r...       0
        text
labels      
0         72
1         91
                                                text  labels
0      Will wearing masks help control the epidemic?       0
1  During the 1918–19 pandemic, excess death rate...       1
2  Given that influenza patients can already tran...       1
3  In controlled trials using hand sanitizers and...       1
4  Travel bans and home quarantine are equally im...       1


3. Set the hyperparameters of model

In [0]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

train_args={
    'fp16': False,
    'sliding_window': True,
    'reprocess_input_data': True,
    'overwrite_output_dir': True,
    
    'evaluate_during_training': True,
    'evaluate_during_training_steps': 4,
    'logging_steps': 4,
    'evaluate_during_training_verbose': True,

    'num_train_epochs': 5,
    
    'learning_rate': 3.5e-5,
    
    'train_batch_size': 32,
    'eval_batch_size': 8,
    
    'warmup_ratio': 0.1, 
    'warmup_steps': 14,

    'weight_decay': 0.01,
    
    'wandb_project': "NPI_binary",
    
    'save_eval_checkpoints': False,
    "save_model_every_epoch": False,
    'use_cached_eval_features': True,
    
    'manual_seed': 1
}



4. Train the model using BERT

In [8]:
# Create a TransformerModel
model = ClassificationModel('roberta', 
                            'roberta-large',
                            args=train_args
                           )

# Train the model
model.train_model(train_df, 
                  eval_df=eval_df,
                  acc = sklearn.metrics.accuracy_score,
                  f1_score = sklearn.metrics.f1_score
                 ) 


INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_model: Sliding window enabled


HBox(children=(FloatProgress(value=0.0, max=379.0), HTML(value='')))

INFO:simpletransformers.classification.classification_model: 379 features created from 379 samples.





HBox(children=(FloatProgress(value=0.0, description='Epoch', max=5.0, style=ProgressStyle(description_width='i…

INFO:wandb.run_manager:system metrics and metadata threads started
INFO:wandb.run_manager:checking resume status, waiting at most 10 seconds
INFO:wandb.run_manager:resuming run from id: UnVuOnYxOjFyMWhxcWR0Ok5QSV9iaW5hcnk6cnlhbmxlZQ==
INFO:wandb.run_manager:upserting run before process can begin, waiting at most 10 seconds
INFO:wandb.run_manager:saving pip packages
INFO:wandb.run_manager:initializing streaming files api
INFO:wandb.run_manager:unblocking file change observer, beginning sync with W&B servers


HBox(children=(FloatProgress(value=0.0, description='Current iteration', max=12.0, style=ProgressStyle(descrip…

INFO:wandb.run_manager:file/dir created: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-events.jsonl
INFO:wandb.run_manager:file/dir created: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir created: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-metadata.json
INFO:wandb.run_manager:file/dir created: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:wandb.run_manager:file/dir created: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/requirements.txt


Running loss: 0.720506

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:wandb.run_manager:file/dir created: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/media/graph/graph_0_summary_f7c22e28.graph.json
INFO:wandb.run_manager:file/dir created: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/media
INFO:wandb.run_manager:file/dir created: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/media/graph


Running loss: 0.689440

INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_model: Sliding window enabled
INFO:simpletransformers.classification.classification_model: 163 features created from 163 samples.
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
INFO:simpletransformers.classification.classification_model:{'mcc': 0.0, 'tp': 91, 'tn': 0, 'fp': 72, 'fn': 0, 'acc': 0.558282208588957, 'f1_score': 0.7165354330708662, 'eval_loss': 0.7012162989094144}
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-hist

Running loss: 0.674351

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-metadata.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.0, 'tp': 91, 'tn': 0, 'fp': 72, 'fn': 0, 'acc': 0.558282208588957, 'f1_score': 0.7165354330708662, 'eval_loss': 0.6812214851379395}
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-events.jsonl
INFO:wandb.run_manager:file/dir modified: /content

Running loss: 0.697847

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-metadata.json


Running loss: 0.672001

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:simpletransformers.classification.classification_model:{'mcc': 0.497169925818894, 'tp': 59, 'tn': 61, 'fp': 11, 'fn': 32, 'acc': 0.7361963190184049, 'f1_score': 0.732919254658385, 'eval_loss': 0.6371492658342633}
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:wandb.run_manager:file/dir mo




INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-metadata.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.497169925818894, 'tp': 59, 'tn': 61, 'fp': 11, 'fn': 32, 'acc': 0.7361963190184049, 'f1_score': 0.732919254658385, 'eval_loss': 0.6371492658342633}


HBox(children=(FloatProgress(value=0.0, description='Current iteration', max=12.0, style=ProgressStyle(descrip…

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json


Running loss: 0.518020

INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.6729736387991127, 'tp': 91, 'tn': 43, 'fp': 29, 'fn': 0, 'acc': 0.8220858895705522, 'f1_score': 0.8625592417061612, 'eval_loss': 0.45261656031722114}
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-events.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir 

Running loss: 0.283157

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-metadata.json


Running loss: 0.313188

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.7143734811331993, 'tp': 79, 'tn': 61, 'fp': 11, 'fn': 12, 'acc': 0.8588957055214724, 'f1_score': 0.8729281767955802, 'eval_loss': 0.2865211321484475}
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:wandb.run_manager:file/dir 

Running loss: 0.323972

INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.7803618689202122, 'tp': 91, 'tn': 53, 'fp': 19, 'fn': 0, 'acc': 0.8834355828220859, 'f1_score': 0.9054726368159205, 'eval_loss': 0.23870935574883506}
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:wandb.run_manager:file/dir 




INFO:simpletransformers.classification.classification_model:{'mcc': 0.7803618689202122, 'tp': 91, 'tn': 53, 'fp': 19, 'fn': 0, 'acc': 0.8834355828220859, 'f1_score': 0.9054726368159205, 'eval_loss': 0.23870935574883506}


HBox(children=(FloatProgress(value=0.0, description='Current iteration', max=12.0, style=ProgressStyle(descrip…

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json


Running loss: 0.014368

INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-metadata.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.8438701778209865, 'tp': 90, 'tn': 60, 'fp': 12, 'fn': 1, 'acc': 0.9202453987730062, 'f1_score': 0.9326424870466321, 'eval_loss': 0.21306446939706802}
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir

Running loss: 0.124465

INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-metadata.json
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.7762543502051171, 'tp': 72, 'tn': 71, 'fp': 1, 'fn': 19, 'acc': 0.8773006134969326, 'f1_score': 0.8780487804878049, 'eval_loss': 0.5355195770916041}


Running loss: 0.401122

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json


Running loss: 0.014196

INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.8438701778209865, 'tp': 90, 'tn': 60, 'fp': 12, 'fn': 1, 'acc': 0.9202453987730062, 'f1_score': 0.9326424870466321, 'eval_loss': 0.25352892758590834}
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classifi




INFO:simpletransformers.classification.classification_model:{'mcc': 0.8438701778209865, 'tp': 90, 'tn': 60, 'fp': 12, 'fn': 1, 'acc': 0.9202453987730062, 'f1_score': 0.9326424870466321, 'eval_loss': 0.25352892758590834}


HBox(children=(FloatProgress(value=0.0, description='Current iteration', max=12.0, style=ProgressStyle(descrip…

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json


Running loss: 0.005306

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-metadata.json


Running loss: 0.001349

INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-events.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.9131885679762729, 'tp': 89, 'tn': 67, 'fp': 5, 'fn': 2, 'acc': 0.9570552147239264, 'f1_score': 0.9621621621621622, 'eval_loss': 0.1753346524866564}
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir mo

Running loss: 0.001629

INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.8762398043700634, 'tp': 85, 'tn': 68, 'fp': 4, 'fn': 6, 'acc': 0.9386503067484663, 'f1_score': 0.9444444444444444, 'eval_loss': 0.21723555525143942}
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-events.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl


Running loss: 0.028561

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json


Running loss: 0.123232

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-metadata.json


Running loss: 0.013117

INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.9140870993769628, 'tp': 90, 'tn': 66, 'fp': 6, 'fn': 1, 'acc': 0.9570552147239264, 'f1_score': 0.9625668449197862, 'eval_loss': 0.2151941139073599}
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classifica




INFO:simpletransformers.classification.classification_model:{'mcc': 0.9140870993769628, 'tp': 90, 'tn': 66, 'fp': 6, 'fn': 1, 'acc': 0.9570552147239264, 'f1_score': 0.9625668449197862, 'eval_loss': 0.2151941139073599}


HBox(children=(FloatProgress(value=0.0, description='Current iteration', max=12.0, style=ProgressStyle(descrip…

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json


Running loss: 0.000361

INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.9021756621578736, 'tp': 90, 'tn': 65, 'fp': 7, 'fn': 1, 'acc': 0.950920245398773, 'f1_score': 0.9574468085106383, 'eval_loss': 0.22175848342123486}


Running loss: 0.000364

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-metadata.json
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json


Running loss: 0.028194

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl


Running loss: 0.000283

INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.9128717482439989, 'tp': 88, 'tn': 68, 'fp': 4, 'fn': 3, 'acc': 0.9570552147239264, 'f1_score': 0.9617486338797815, 'eval_loss': 0.21224212433610642}


Running loss: 0.000341

INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json


Running loss: 0.000431

INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model:{'mcc': 0.9128717482439989, 'tp': 88, 'tn': 68, 'fp': 4, 'fn': 3, 'acc': 0.9570552147239264, 'f1_score': 0.9617486338797815, 'eval_loss': 0.22090506364613594}
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-events.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir m




INFO:simpletransformers.classification.classification_model:{'mcc': 0.9128717482439989, 'tp': 88, 'tn': 68, 'fp': 4, 'fn': 3, 'acc': 0.9570552147239264, 'f1_score': 0.9617486338797815, 'eval_loss': 0.22090506364613594}





INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-metadata.json
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-history.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-summary.json
INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to outputs/.
INFO:wandb.run_manager:shutting down system stats and metadata service
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-events.jsonl
INFO:wandb.run_manager:file/dir modified: /content/drive/My Drive/Colab Notebooks/binary_classifier/wandb/run-20200608_152655-1r1hqqdt/wandb-metadata.json
INFO:wandb.run_manager:stopping streamin

5. Evaluate the model

In [9]:
# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(eval_df, 
                                                            acc = sklearn.metrics.accuracy_score,
                                                            f1_score = sklearn.metrics.f1_score)

print(result)

INFO:simpletransformers.classification.classification_model: Features loaded from cache at cache_dir/cached_dev_roberta_128_2_163


HBox(children=(FloatProgress(value=0.0, max=21.0), HTML(value='')))

INFO:simpletransformers.classification.classification_model:{'mcc': 0.9128717482439989, 'tp': 88, 'tn': 68, 'fp': 4, 'fn': 3, 'acc': 0.9570552147239264, 'f1_score': 0.9617486338797815, 'eval_loss': 0.22090506364613594}



{'mcc': 0.9128717482439989, 'tp': 88, 'tn': 68, 'fp': 4, 'fn': 3, 'acc': 0.9570552147239264, 'f1_score': 0.9617486338797815, 'eval_loss': 0.22090506364613594}


6. Loading Saved Models

In [0]:
binary_classifier = ClassificationModel('roberta', 
                                        'outputs/', 
                                        args=train_args)

7. Prediction

In [11]:
binary_pred, _ = binary_classifier.predict([
    'Aggressive disease containment efforts, including isolation of the source of infection, contact tracing and quarantine, social distancing, and personal protection and prevention, have considerably changed the course of Covid-19 outbreak in Wuhan, when there was neither effective drug nor vaccine for this new infectious disease with high transmission.',
    'If the infected are quarantined, cities will not accept more cases.',
    'Aggressive disease containment efforts, including isolation of the source of infection, contact tracing and quarantine, social distancing, and personal protection and prevention, have considerably changed the course of Covid-19 outbreak in Wuhan, when there was neither effective drug nor vaccine for this new infectious disease with high transmission.',
    'Shutting down public schools in New York City, for example, would deprive tens of thousands of kids of urgently needed school meals.',
    'Experts advise people to use hand sanitizers instead of masks because they can take up scarce medical resources.',
    'Experiments have shown that frequent hand washing does not reduce the spread of the virus from person to person as effectively as wearing a mask.',
    'In Singapore, for example, the government quickly canceled public events while private companies handed out free hand sanitizer. As a result, the number of cases has grown much more slowly than in nearby countries.',
    'And overall, the ban “non-essential” mass gatherings makes sense from a public health perspective.',
    'Without the intervention of contact reductions, in the longer term, the epidemics would increase exponentially. ',
    'Closely watching these contacts after exposure to an infected person will help the contacts to get care and treatment, and will prevent further transmission of the virus.',
    'Timely bans on public gatherings and closure of public places, including theaters and churches, were suggested to have had a positive effect on reducing the excess death rate during the 1918 pandemic in the United States (5,48). ',
    'Telecommuting, shift work and extended leave are some of the things that are not thought to mitigate the pandemic.',
    'Amongst the set of control measures tested, refraining from social activities with various compliance levels was relatively ineffective. ',
    'This increase in use of face masks by the general public exacerbates the global supply shortage of face masks, with prices soaring and risks supply constraints to frontline health-care professionals.',
    'This paper talked about isolation of the source of infection, contact tracing and quarantine, social distancing, and personal protection and prevention.',
    'The government issue the social distancing and wearing masks measures.',
    'Of importance, a variety of NPIs are contemporaneously in practice across countries in all continents, including lockdown strategies such as closure of schools, social distancing, quarantine, and isolation.'
])
print(binary_pred)

INFO:simpletransformers.classification.classification_model: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_model: Sliding window enabled


HBox(children=(FloatProgress(value=0.0, max=17.0), HTML(value='')))

INFO:simpletransformers.classification.classification_model: 17 features created from 17 samples.





HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))


[1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0]
