# Toxic Language Detection on Jigsaw Dataset

In this notebook we're training a multi-label text classifier. 

## Toy Example

In [3]:
import pandas as pd

from simpletransformers.classification import MultiLabelClassificationModel

In [None]:
train_data = [
    ["Pizza and pasta are Italian food", [1, 1, 0, 0, 0, 0]],
    ["Before start cooking find a good recipe", [1, 0, 0, 0, 0, 0]],
    ["Cooking is one of my hobbies", [0, 1, 0, 0, 0, 0]],
    ["I like football", [0, 0, 1, 1, 0, 0]],
    ["I hate tennis", [0, 0, 1, 0, 0, 0]],
    ["This year the Olympic Games are held in Tokyo", [0, 0, 0, 1, 0, 0]],
    ["Natural Language Processing deals with talking machines", [0, 0, 0, 0, 1, 1]],
    ["Textual entailment and semantic similarity are NLP tasks", [0, 0, 0, 0, 1, 0]],
    ["NLU stands for natural language understanding", [0, 0, 0, 0, 0, 1]],
]

train_df = pd.DataFrame(train_data, columns=["text", "labels"])

eval_data = [
    ["Cooking is one of my hobbies", [0, 1, 0, 0, 0, 0]],
    ["I hate tennis", [0, 0, 1, 0, 0, 0]],
    ["Natural Language Processing deals with talking machines", [0, 0, 0, 0, 1, 1]],
]

eval_df = pd.DataFrame(eval_data, columns=["text", "labels"])

In [None]:
# configuration
args = {
    "output_dir": "outputs/",
    "cache_dir": "cache_dir/",
    "fp16": False,
    "fp16_opt_level": "O1",
    "max_seq_length": 128,
    "train_batch_size": 32,
    "gradient_accumulation_steps": 1,
    "eval_batch_size": 8,
    "num_train_epochs": 1,
    "weight_decay": 0,
    "learning_rate": 4e-5,
    "adam_epsilon": 1e-8,
    "warmup_ratio": 0.06,
    "warmup_steps": 0,
    "max_grad_norm": 1.0,
    "logging_steps": 50,
    "save_steps": 2000,
    "overwrite_output_dir": True,
    "reprocess_input_data": False,
    "evaluate_during_training": False,
    # "process_count": cpu_count() - 2 if cpu_count() > 2 else 1,
    "n_gpu": 1,
}

In [None]:
model = MultiLabelClassificationModel(
    "roberta", "roberta-base", num_labels=6, use_cuda=False, args=args
)

# Train the model
model.train_model(train_df)

In [None]:
# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(eval_df)
print(result)

In [None]:
print(model_outputs)

In [None]:
predictions, raw_outputs = model.predict(["This class is about natural language."])

print(predictions)
print(raw_outputs)

## Training a Text Classifier to Detect Toxic Language

In [None]:
train = pd.read_csv("../datasets/toxic_language/train.csv")
train.head(10)

In [None]:
# encode the labels in a new column
labels = list(
    map(
        list,
        zip(
            train["toxic"],
            train["severe_toxic"],
            train["obscene"],
            train["threat"],
            train["insult"],
            train["identity_hate"],
        ),
    )
)

train["labels"] = labels

# rename and remove other columns
train = train.rename(columns={"comment_text": "text"})
train = train.drop(
    "id toxic severe_toxic obscene threat insult identity_hate".split(), axis=1
)

train.head(10)

In [None]:
# configuration
args = {
    "output_dir": "outputs/",
    "cache_dir": "cache_dir/",
    "fp16": False,
    "fp16_opt_level": "O1",
    "max_seq_length": 128,
    "train_batch_size": 32,
    "gradient_accumulation_steps": 1,
    "eval_batch_size": 8,
    "num_train_epochs": 10,
    "weight_decay": 0,
    "learning_rate": 4e-5,
    "adam_epsilon": 1e-8,
    "warmup_ratio": 0.06,
    "warmup_steps": 0,
    "max_grad_norm": 1.0,
    "logging_steps": 50,
    "save_steps": 2000,
    "overwrite_output_dir": True,
    "reprocess_input_data": False,
    "evaluate_during_training": False,
    # "process_count": cpu_count() - 2 if cpu_count() > 2 else 1,
    "n_gpu": 1,
    "wandb_project": "test-master",
}

In [None]:
model = MultiLabelClassificationModel(
    "roberta", "roberta-base", num_labels=6, use_cuda=False, args=args
)

# Train the model
model.train_model(train)

In [None]:
# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(eval_df)
print(result)

In [None]:
print(model_outputs[:10])

In [None]:
predictions, raw_outputs = model.predict(["You fucking idiot!!"])

print(predictions)
print(raw_outputs)

## A Quick Demo

In [4]:
def load_model(
    model_architecture: str,
    directory: str = "outputs/",
    num_labels: int = 6,
    use_cuda: bool = False,
    **kwargs
):
    """Loads a pre-trained model"""
    model = MultiLabelClassificationModel(
        model_architecture, directory, num_labels=6, use_cuda=use_cuda, args=kwargs
    )
    return model

In [5]:
model = load_model("roberta")

In [6]:
from IPython.core.magic import register_cell_magic

@register_cell_magic
def detect_toxic_lang(line, text):
    """Prints predictions of a Text Classifier"""
    predictions, raw_outputs = model.predict([text])
    return predictions[0]

In [7]:
%%detect_toxic_lang
Hello, sir. How is your day?

Converting to features started. Cache is not used.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




[0, 1, 1, 0, 0, 0]

In [8]:
%%detect_toxic_lang
Only nigga that I trust is me

Converting to features started. Cache is not used.


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




[0, 1, 1, 0, 0, 0]