Загружаем скачанный классификатор токсичности:

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
  
tokenizer = AutoTokenizer.from_pretrained("trained_roberta/")

model = AutoModelForSequenceClassification.from_pretrained("trained_roberta/").cuda()

TOXIC_CLASS=-1
TOKENIZATION_TYPE='sentencepiece'




Ниже функции для применения классификатора

In [2]:
from torch import softmax, sigmoid
import numpy as np

def logits_to_toxic_probas(logits):
    if logits.shape[-1] > 1:
        activation = lambda x: softmax(x, -1)
    else:
        activation = sigmoid
    return activation(logits)[:, TOXIC_CLASS].cpu().detach().numpy()


def is_word_start(token):
    if TOKENIZATION_TYPE == 'sentencepiece':
        return token.startswith('▁')
    if TOKENIZATION_TYPE == 'bert':
        return not token.startswith('##')
    raise ValueError("Unknown tokenization type")


def normalize(sentence, max_tokens_per_word=20):
    sentence = ''.join(map(lambda c: c if c.isalpha() else ' ', sentence.lower()))
    ids = tokenizer(sentence)['input_ids']
    tokens = tokenizer.convert_ids_to_tokens(ids)[1:-1]
    
    result = []
    num_continuation_tokens = 0
    for token in tokens:
        if not is_word_start(token):
            num_continuation_tokens += 1
            if num_continuation_tokens < max_tokens_per_word:
                result.append(token.lstrip('#▁'))
        else:
            num_continuation_tokens = 0
            result.extend([' ', token.lstrip('▁#')])
    
    return ''.join(result).strip()

def iterate_batches(data, batch_size=40):
    batch = []
    for x in data:
        batch.append(x)
        if len(batch) >= batch_size:
            yield batch
            batch = []
    if len(batch) > 0:
        yield batch

from tqdm.auto import tqdm
def predict_toxicity(sentences, batch_size=5, threshold=0.5, return_scores=False, verbose=True, device='cuda'):
    results = []
    tqdm_fn = tqdm if verbose else lambda x, total: x
    for batch in tqdm_fn(iterate_batches(sentences, batch_size), total=np.ceil(len(sentences) / batch_size)):
        normlized = [normalize(sent, max_tokens_per_word=5) for sent in batch]
        tokenized = tokenizer(normlized, return_tensors='pt', padding=True, max_length=512, truncation=True)
        
        logits = model.to(device)(**{key: val.to(device) for key, val in tokenized.items()}).logits
        preds = logits_to_toxic_probas(logits)
        if not return_scores:
            preds = preds >= threshold
        results.extend(preds)
    return results


Читаем тестовый набор

In [3]:
texts = []
with open('public_testset.short.txt', 'rt') as f:
    for line in f:
        texts.append(normalize(line)) 

Token indices sequence length is longer than the specified maximum sequence length for this model (533 > 512). Running this sequence through the model will result in indexing errors


Вычисляем токсичность отдельных слов

In [4]:
import torch

words = set()
for text in texts:
    words.update(text.split())
words = sorted(words)

with torch.inference_mode():
    word_toxicities = predict_toxicity(texts, batch_size=100, return_scores=True)
    
toxicity = dict(zip(words, word_toxicities))


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))




Ниже читаем эмбеддинги слов и описываем функции их обработки

In [5]:
import gensim
from pymystem3 import Mystem

stemmer = Mystem()

In [6]:
embs_file = np.load('embeddings_with_lemmas.npz', allow_pickle=True)
embs_vectors = embs_file['vectors']
embs_vectors_normed = embs_vectors / np.linalg.norm(embs_vectors, axis=1, keepdims=True)
embs_voc = embs_file['voc'].item()

embs_voc_by_id = [None for i in range(len(embs_vectors))]
for word, idx in embs_voc.items():
    if embs_voc_by_id[idx] is None:
        embs_voc_by_id[idx] = word

In [7]:
def get_w2v_indicies(a):
    res = []
    if isinstance(a, str):
        a = a.split()
    for w in a:
        if w in embs_voc:
            res.append(embs_voc[w])
        else:
            lemma = stemmer.lemmatize(w)[0]
            res.append(embs_voc.get(lemma, None))
    return res

def calc_embs(words):
    words = ' '.join(map(normalize, words))
    inds = get_w2v_indicies(words)
    return [None if i is None else embs_vectors[i] for i in inds]

Вычисление эмбеддинговых расстояний, как в score.py

In [8]:
def count_none(array):
    res = 0
    for el in array:
        if el is None:
            res += 1
    return res


def greedy_match_embs(a, b, dots=None):
    if len(a) == 0:
        return len(b)
    if len(b) == 0:
        return len([x for x in a if x is not None])
    # compute dot-product on initial run
    if dots is None:
        a_none_count = count_none(a)
        b_none_count = count_none(b)
        
        if a_none_count + b_none_count > 0:
            # None values don't match anything except other None values
            return max(b_none_count - a_none_count, 0) + greedy_match_embs(
                [x for x in a if x is not None],
                [x for x in b if x is not None]
            )
        # scale embeddings so that their dot product turns into cosine
        a = np.array(a) / np.linalg.norm(a, axis=1, keepdims=True)
        b = np.array(b) / np.linalg.norm(b, axis=1, keepdims=True)
        dots = np.dot(a, b.T)
    # select the closest embeddings
    # note: assume None embeddings are filtered out at this point
    a_closest, b_closest = np.unravel_index(np.argmax(dots), dots.shape)
    min_dist = (1 - dots[a_closest, b_closest]) / 2
    
    # exclude the matched embeddings from the subsequent iterations
    remaining_a_inds = np.arange(len(a)) != a_closest
    remaining_b_inds = np.arange(len(b)) != b_closest
    
    return min_dist + greedy_match_embs(
        a[remaining_a_inds], 
        b[remaining_b_inds], 
        dots[remaining_a_inds][:, remaining_b_inds]
    )


def calc_semantic_distance(a, b):
    a_embs = calc_embs(a)
    b_embs = calc_embs(b)
    return np.maximum(greedy_match_embs(a_embs, b_embs), 0)
    

Функция находит самое близкое нетоксичное слово по предпосчитанным эмбеддингам слов

In [15]:
from functools import lru_cache

@lru_cache()
def find_closest_nontoxic(word, threshold=0.5, measure='cosine'):
    threshold = min(toxicity.get(word, threshold), threshold)
    word = normalize(word)
    word_emb = calc_embs([word])
    if word_emb is None or word_emb[0] is None:
        return None
    word_emb = word_emb[0]
    if measure == 'cosine':
        word_emb /= np.linalg.norm(word_emb)
        dot = embs_vectors_normed.dot(word_emb)
    elif measure == 'l2':
        dot = -((embs_vectors - word_emb[None]) ** 2).sum(axis=1)
    
    for i in np.argsort(dot)[::-1]:
        other_word = embs_voc_by_id[i]
        if other_word != word and toxicity.get(other_word, 1.0) <= threshold:
            return other_word
    return None

Итеративно пытаемся заменить самое токсичное слово на его нетоксичный аналог

In [18]:
def detox_iterative(line):
    result = normalize(line).split()
    while len(result) > 0 and predict_toxicity([' '.join(result)], verbose=False)[0]:
        most_toxic = np.argmax(list(map(lambda x: toxicity.get(x, 0.0), result)))
        
        new_variant = find_closest_nontoxic(result[most_toxic]) or ''
        augmented_result = result[:most_toxic] + [new_variant] + result[most_toxic + 1:]
        
        comparison = predict_toxicity(
            [' '.join(result), ' '.join(augmented_result)],
            verbose=False,
            return_scores=True,
            batch_size=10
        )
        # 0.1 тут 
        if not new_variant or comparison[0] <= comparison[1] + 0.1:
            result = result[:most_toxic] + result[most_toxic + 1:]
        else:
            result = augmented_result
    
    return ' '.join(result)

In [19]:
fixed_texts = list(map(detox_iterative, tqdm(texts)))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2500.0), HTML(value='')))




запишем результат в файл

In [20]:
with open('baseline_fixed.txt', 'wt') as f:
    for text in fixed_texts:
        print(text, file=f)

Скор, если никак не изменять комментарии:

In [21]:
!CUDA_VISIBLE_DEVICES=2 python3.7 score.py public_testset.short.txt public_testset.short.txt  --embeddings embeddings_with_lemmas.npz --model ./trained_roberta/ --device cuda --score -

Loading tokenizer
Loading model
Loading texts
Loading embeddings
Scoring
 18%|███████▎                                | 92/500.0 [00:03<00:15, 26.27it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (533 > 512). Running this sequence through the model will result in indexing errors
100%|███████████████████████████████████████| 500/500.0 [00:21<00:00, 23.74it/s]
2500it [00:12, 194.21it/s]
28.07999973873593


Скор бейзлайна:

In [22]:
!CUDA_VISIBLE_DEVICES=2 python3.7 score.py public_testset.short.txt baseline_fixed.txt  --embeddings embeddings_with_lemmas.npz --model ./trained_roberta/ --device cuda --score -

Loading tokenizer
Loading model
Loading texts
Loading embeddings
Scoring
 37%|██████████████▍                        | 185/500.0 [00:05<00:09, 34.14it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (562 > 512). Running this sequence through the model will result in indexing errors
100%|███████████████████████████████████████| 500/500.0 [00:13<00:00, 36.94it/s]
2500it [00:34, 73.31it/s] 
57.84865188082298
