In [1]:
import numpy as np
import pandas as pd
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
from tensorflow.keras.layers import Input,Dense,Concatenate,concatenate,Dropout,Embedding,Bidirectional,GlobalMaxPooling1D,GlobalAveragePooling1D,SpatialDropout1D,Add,LSTM
from tensorflow.keras.models import Model
from tensorflow.keras.losses import BinaryCrossentropy
from transformers import AutoTokenizer,TFBertModel
from nltk.tokenize.treebank import TreebankWordTokenizer

In [2]:
from tensorflow.keras.layers import Layer
from tensorflow.keras import backend as K
from tensorflow.keras import initializers


class AttentionWeightedAverage(Layer):
    """
    Computes a weighted average of the different channels across timesteps.
    Uses 1 parameter pr. channel to compute the attention value for
    a single timestep.
    """

    def __init__(self):
        self.init = initializers.get('uniform')
        super(AttentionWeightedAverage, self).__init__()

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight(shape=(input_shape[2], 1),
                                 name='{}_W'.format(self.name),
                                 trainable=True,
                                 initializer=self.init)
        super(AttentionWeightedAverage, self).build(input_shape)

    def call(self, x, mask=None):
        # computes a probability distribution over the timesteps
        # uses 'max trick' for numerical stability
        # reshape is done to avoid issue with Tensorflow
        # and 1-dimensional weights
        logits = K.dot(x, self.W)
        x_shape = K.shape(x)
        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))
        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())

        weighted_input = x * K.expand_dims(att_weights)
        
        result = K.sum(weighted_input, axis=1)
        return result

    def get_output_shape_for(self, input_shape):
        return self.compute_output_shape(input_shape)

    def compute_output_shape(self, input_shape):
        output_len = input_shape[2]
        if self.return_attention:
            return [(input_shape[0], output_len), (input_shape[0],
                                                   input_shape[1])]
        return (input_shape[0], output_len)

In [3]:
class Final_Model:

    def __init__(self):
        self.MAX_LEN = 237 + 2
        self.treebankwordtokenizer = TreebankWordTokenizer()
        self.tokenizer = AutoTokenizer.from_pretrained('/content/bert-base-cased')
        self.bert = TFBertModel.from_pretrained('/content/bert-base-cased')
        self.tok = pickle.load(open('/content/models/tokenizer-v2.pkl','rb'))
        self.vocab_size = len(self.tok.word_index)
        self.bert_model = self.get_bert()
        self.model1 = self.get_model1()
        self.model2 = self.get_model2()
        self.model3 = self.get_model3()
        self.bce =  BinaryCrossentropy()


        ##this symbols and emojis we will keep in the text
        self.symbols_to_isolate = '.,?!-;*"…:—()%#$&_/@＼・ω+=”“[]^–>\\°<~•≠™ˈʊɒ∞§{}·τα❤☺ɡ|¢→̶`❥━┣┫┗Ｏ►★©―ɪ✔®\x96\x92●£♥➤´¹☕≈÷♡◐║▬′ɔː€۩۞†μ✒➥═☆ˌ◄½ʻπδηλσερνʃ✬ＳＵＰＥＲＩＴ☻±♍µº¾✓◾؟．⬅℅»Вав❣⋅¿¬♫ＣＭβ█▓▒░⇒⭐›¡₂₃❧▰▔◞▀▂▃▄▅▆▇↙γ̄″☹➡«φ⅓„✋：¥̲̅́∙‛◇✏▷❓❗¶˚˙）сиʿ✨。ɑ\x80◕！％¯−ﬂﬁ₁²ʌ¼⁴⁄₄⌠♭✘╪▶☭✭♪☔☠♂☃☎✈✌✰❆☙○‣⚓年∎ℒ▪▙☏⅛ｃａｓǀ℮¸ｗ‚∼‖ℳ❄←☼⋆ʒ⊂、⅔¨͡๏⚾⚽Φ×θ￦？（℃⏩☮⚠月✊❌⭕▸■⇌☐☑⚡☄ǫ╭∩╮，例＞ʕɐ̣Δ₀✞┈╱╲▏▕┃╰▊▋╯┳┊≥☒↑☝ɹ✅☛♩☞ＡＪＢ◔◡↓♀⬆̱ℏ\x91⠀ˤ╚↺⇤∏✾◦♬³の｜／∵∴√Ω¤☜▲↳▫‿⬇✧ｏｖｍ－２０８＇‰≤∕ˆ⚜☁'
        ##this symbols and emojis we will remove
        self.symbols_to_delete = '\n🍕\r🐵😑\xa0\ue014\t\uf818\uf04a\xad😢🐶️\uf0e0😜😎👊\u200b\u200e😁عدويهصقأناخلىبمغر😍💖💵Е👎😀😂\u202a\u202c🔥😄🏻💥ᴍʏʀᴇɴᴅᴏᴀᴋʜᴜʟᴛᴄᴘʙғᴊᴡɢ😋👏שלוםבי😱‼\x81エンジ故障\u2009🚌ᴵ͞🌟😊😳😧🙀😐😕\u200f👍😮😃😘אעכח💩💯⛽🚄🏼ஜ😖ᴠ🚲‐😟😈💪🙏🎯🌹😇💔😡\x7f👌ἐὶήιὲκἀίῃἴξ🙄Ｈ😠\ufeff\u2028😉😤⛺🙂\u3000تحكسة👮💙فزط😏🍾🎉😞\u2008🏾😅😭👻😥😔😓🏽🎆🍻🍽🎶🌺🤔😪\x08‑🐰🐇🐱🙆😨🙃💕𝘊𝘦𝘳𝘢𝘵𝘰𝘤𝘺𝘴𝘪𝘧𝘮𝘣💗💚地獄谷улкнПоАН🐾🐕😆ה🔗🚽歌舞伎🙈😴🏿🤗🇺🇸мυтѕ⤵🏆🎃😩\u200a🌠🐟💫💰💎эпрд\x95🖐🙅⛲🍰🤐👆🙌\u2002💛🙁👀🙊🙉\u2004ˢᵒʳʸᴼᴷᴺʷᵗʰᵉᵘ\x13🚬🤓\ue602😵άοόςέὸתמדףנרךצט😒͝🆕👅👥👄🔄🔤👉👤👶👲🔛🎓\uf0b7\uf04c\x9f\x10成都😣⏺😌🤑🌏😯ех😲Ἰᾶὁ💞🚓🔔📚🏀👐\u202d💤🍇\ue613小土豆🏡❔⁉\u202f👠》कर्मा🇹🇼🌸蔡英文🌞🎲レクサス😛外国人关系Сб💋💀🎄💜🤢َِьыгя不是\x9c\x9d🗑\u2005💃📣👿༼つ༽😰ḷЗз▱ц￼🤣卖温哥华议会下降你失去所有的钱加拿大坏税骗子🐝ツ🎅\x85🍺آإشء🎵🌎͟ἔ油别克🤡🤥😬🤧й\u2003🚀🤴ʲшчИОРФДЯМюж😝🖑ὐύύ特殊作戦群щ💨圆明园קℐ🏈😺🌍⏏ệ🍔🐮🍁🍆🍑🌮🌯🤦\u200d𝓒𝓲𝓿𝓵안영하세요ЖљКћ🍀😫🤤ῦ我出生在了可以说普通话汉语好极🎼🕺🍸🥂🗽🎇🎊🆘🤠👩🖒🚪天一家⚲\u2006⚭⚆⬭⬯⏖新✀╌🇫🇷🇩🇪🇮🇬🇧😷🇨🇦ХШ🌐\x1f杀鸡给猴看ʁ𝗪𝗵𝗲𝗻𝘆𝗼𝘂𝗿𝗮𝗹𝗶𝘇𝗯𝘁𝗰𝘀𝘅𝗽𝘄𝗱📺ϖ\u2000үսᴦᎥһͺ\u2007հ\u2001ɩｙｅ൦ｌƽｈ𝐓𝐡𝐞𝐫𝐮𝐝𝐚𝐃𝐜𝐩𝐭𝐢𝐨𝐧Ƅᴨןᑯ໐ΤᏧ௦Іᴑ܁𝐬𝐰𝐲𝐛𝐦𝐯𝐑𝐙𝐣𝐇𝐂𝐘𝟎ԜТᗞ౦〔Ꭻ𝐳𝐔𝐱𝟔𝟓𝐅🐋ﬃ💘💓ё𝘥𝘯𝘶💐🌋🌄🌅𝙬𝙖𝙨𝙤𝙣𝙡𝙮𝙘𝙠𝙚𝙙𝙜𝙧𝙥𝙩𝙪𝙗𝙞𝙝𝙛👺🐷ℋ𝐀𝐥𝐪🚶𝙢Ἱ🤘ͦ💸ج패티Ｗ𝙇ᵻ👂👃ɜ🎫\uf0a7БУі🚢🚂ગુજરાતીῆ🏃𝓬𝓻𝓴𝓮𝓽𝓼☘﴾̯﴿₽\ue807𝑻𝒆𝒍𝒕𝒉𝒓𝒖𝒂𝒏𝒅𝒔𝒎𝒗𝒊👽😙\u200cЛ‒🎾👹⎌🏒⛸公寓养宠物吗🏄🐀🚑🤷操美𝒑𝒚𝒐𝑴🤙🐒欢迎来到阿拉斯ספ𝙫🐈𝒌𝙊𝙭𝙆𝙋𝙍𝘼𝙅ﷻ🦄巨收赢得白鬼愤怒要买额ẽ🚗🐳𝟏𝐟𝟖𝟑𝟕𝒄𝟗𝐠𝙄𝙃👇锟斤拷𝗢𝟳𝟱𝟬⦁マルハニチロ株式社⛷한국어ㄸㅓ니͜ʖ𝘿𝙔₵𝒩ℯ𝒾𝓁𝒶𝓉𝓇𝓊𝓃𝓈𝓅ℴ𝒻𝒽𝓀𝓌𝒸𝓎𝙏ζ𝙟𝘃𝗺𝟮𝟭𝟯𝟲👋🦊多伦🐽🎻🎹⛓🏹🍷🦆为和中友谊祝贺与其想象对法如直接问用自己猜本传教士没积唯认识基督徒曾经让相信耶稣复活死怪他但当们聊些政治题时候战胜因圣把全堂结婚孩恐惧且栗谓这样还♾🎸🤕🤒⛑🎁批判检讨🏝🦁🙋😶쥐스탱트뤼도석유가격인상이경제황을렵게만들지않록잘관리해야합다캐나에서대마초와화약금의품런성분갈때는반드시허된사용🔫👁凸ὰ💲🗯𝙈Ἄ𝒇𝒈𝒘𝒃𝑬𝑶𝕾𝖙𝖗𝖆𝖎𝖌𝖍𝖕𝖊𝖔𝖑𝖉𝖓𝖐𝖜𝖞𝖚𝖇𝕿𝖘𝖄𝖛𝖒𝖋𝖂𝕴𝖟𝖈𝕸👑🚿💡知彼百\uf005𝙀𝒛𝑲𝑳𝑾𝒋𝟒😦𝙒𝘾𝘽🏐𝘩𝘨ὼṑ𝑱𝑹𝑫𝑵𝑪🇰🇵👾ᓇᒧᔭᐃᐧᐦᑳᐨᓃᓂᑲᐸᑭᑎᓀᐣ🐄🎈🔨🐎🤞🐸💟🎰🌝🛳点击查版🍭𝑥𝑦𝑧ＮＧ👣\uf020っ🏉ф💭🎥Ξ🐴👨🤳🦍\x0b🍩𝑯𝒒😗𝟐🏂👳🍗🕉🐲چی𝑮𝗕𝗴🍒ꜥⲣⲏ🐑⏰鉄リ事件ї💊「」\uf203\uf09a\uf222\ue608\uf202\uf099\uf469\ue607\uf410\ue600燻製シ虚偽屁理屈Г𝑩𝑰𝒀𝑺🌤𝗳𝗜𝗙𝗦𝗧🍊ὺἈἡχῖΛ⤏🇳𝒙ψՁմեռայինրւդձ冬至ὀ𝒁🔹🤚🍎𝑷🐂💅𝘬𝘱𝘸𝘷𝘐𝘭𝘓𝘖𝘹𝘲𝘫کΒώ💢ΜΟΝΑΕ🇱♲𝝈↴💒⊘Ȼ🚴🖕🖤🥘📍👈➕🚫🎨🌑🐻𝐎𝐍𝐊𝑭🤖🎎😼🕷ｇｒｎｔｉｄｕｆｂｋ𝟰🇴🇭🇻🇲𝗞𝗭𝗘𝗤👼📉🍟🍦🌈🔭《🐊🐍\uf10aლڡ🐦\U0001f92f\U0001f92a🐡💳ἱ🙇𝗸𝗟𝗠𝗷🥜さようなら🔼'
        ## punctuation and synmobols
        self.punct = "/-'?!.,#$%\'()*+-/:;<=>@[\\]^_`{|}~`" + '""“”’' + '∞θ÷α•à−β∅³π‘₹´°£€\×™√²—–&'
        ## caps and small
        self.small_caps_mapping = { 
            "ᴀ": "a", "ʙ": "b", "ᴄ": "c", "ᴅ": "d", "ᴇ": "e", "ғ": "f", "ɢ": "g", "ʜ": "h", "ɪ": "i", 
            "ᴊ": "j", "ᴋ": "k", "ʟ": "l", "ᴍ": "m", "ɴ": "n", "ᴏ": "o", "ᴘ": "p", "ǫ": "q", "ʀ": "r", 
            "s": "s", "ᴛ": "t", "ᴜ": "u", "ᴠ": "v", "ᴡ": "w", "x": "x", "ʏ": "y", "ᴢ": "z"}
        ## special signs
        self.specail_signs = { "…": "...", "₂": "2"}
        ## special chars
        self.specials = ["’", "‘", "´", "`"]
        self.isolate_dict = {ord(c):f' {c} ' for c in self.symbols_to_isolate}
        self.remove_dict = {ord(c):f'' for c in self.symbols_to_delete}
        

    def predict(self,txt,y_true=None):
        txt = self.preprocess(txt)

        seq = self.bert_get_sequence(txt)
        bert_output = self.bert_model.predict([seq,(seq>0).astype(int)])[:,0][0] * 0.5

        seq = self.get_sequence(txt)
        model1_out = self.model1.predict(seq)[:,0][0] * 0.22
        model2_out = self.model2.predict(seq)[:,0][0] * 0.22
        model3_out = self.model3.predict(seq)[:,0][0] * 0.06

        y_pred = model1_out + model2_out + model3_out + bert_output
        if y_true!=None:
            loss = self.loss(y_true,y_pred)
            return y_pred,loss
        return y_pred

    def loss(self,y_true,y_pred):
        return self.bce(np.array(y_true).reshape(-1,1),np.array(y_pred).reshape(-1,1)).numpy()

    def preprocess(self,x):
        """
        This function apply required preprocession on given text data
        Parameter:
        x : text, string
        """
        def handle_punctuation(x):
            """
            This function handle all the punctuation remove or isolate both
            """
            x = x.translate(self.remove_dict)
            x = x.translate(self.isolate_dict)
            return x
        def handle_contractions(x):
            """
            this function handle contractions
            """
            x = self.treebankwordtokenizer.tokenize(x)
            return x
        def fix_quote(x):
            """
            This function handle single quotes
            """
            x = [x_[1:] if x_.startswith("'") else x_ for x_ in x]
            x = ' '.join(x)
            return x

        x = handle_punctuation(x)
        x = handle_contractions(x)
        x = fix_quote(x)
        return x
    
    def bert_get_sequence(self,txt):
        """
        This function return sequence data for given text
        Parameter:
        txt: text, String
        max_len: maximum length of sequence
        tokenixer bert pre-trained tokenizer
        """
        return np.array(
            self.tokenizer(
                txt,
                return_attention_mask=False,
                return_token_type_ids=False,
                max_length=self.MAX_LEN,
                truncation=True,
                padding='max_length'
            )['input_ids']
        ).reshape(1,-1)
    
    def get_sequence(self,txt):
        """
        This function return sequence data for given text
        Parameter:
        txt: text, String
        max_len: maximum length of sequence
        """
        txt = self.tok.texts_to_sequences([txt])
        txt = pad_sequences(txt,maxlen=self.MAX_LEN,padding='pre')
        return txt

    def get_bert(self):
        """
        This function return trained model
        """
        input_ids = Input((self.MAX_LEN,),dtype=tf.int32)
        attention_mask = Input((self.MAX_LEN,),dtype=tf.int32)
        bert_out = self.bert(input_ids=input_ids, attention_mask=attention_mask)[1]
        x = Dense(128,activation='relu')(bert_out)
        output2 = Dense(8)(x)
        x = Dropout(0.1)(x)
        output1 = Dense(1,activation='sigmoid')(x)
        output = concatenate([output1,output2])
        model = Model([input_ids,attention_mask],output)
        model.load_weights('/content/models/BERT-linear-cust-loss-v2-14epoch.h5')
        return model

    def get_model1(self):
        input = Input((self.MAX_LEN,))
        x = Embedding(self.vocab_size + 2,600)(input)
        x = SpatialDropout1D(0.3)(x)
        # x = Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(128,return_sequences=True))(x)
        # x = Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(128,return_sequences=True))(x)
        x = Bidirectional(LSTM(128,return_sequences=True))(x)
        x = Bidirectional(LSTM(128,return_sequences=True))(x)
        gmax = GlobalMaxPooling1D()(x)
        gavg = GlobalAveragePooling1D()(x)
        x = Concatenate(1)([gmax,gavg])
        d = Dense(512,activation='relu')(x)
        x = Add()([x,d])
        d = Dense(512,activation='relu')(x)
        x = Add()([x,d])
        output1 = Dense(1,activation='sigmoid')(x)
        output2 = Dense(16)(x)
        output = concatenate([output1,output2])
        model = Model(input,output)
        model.load_weights('/content/models/stacked-lstm-glove-v2.h5')
        return model

    def get_model2(self):
        input = Input((self.MAX_LEN,))
        x = Embedding(self.vocab_size + 2,600)(input)
        x = SpatialDropout1D(0.2)(x)
        x = Bidirectional(LSTM(256,return_sequences=True))(x)
        x = SpatialDropout1D(0.2)(x)
        x = Bidirectional(LSTM(256,return_sequences=True))(x)
        x = tf.keras.layers.TimeDistributed(tf.keras.layers.Activation('relu'))(x)
        x = AttentionWeightedAverage()(x)
        x = Dropout(0.5)(x)
        x = Dense(256,activation='relu')(x)
        x = Dropout(0.5)(x)
        x = Dense(128,activation='relu')(x)
        x = Dropout(0.5)(x)
        output1 = Dense(1,activation='sigmoid')(x)
        output2 = Dense(16)(x)
        output = tf.keras.layers.concatenate([output1,output2])
        model = Model(input,output)
        model.load_weights('/content/models/bi-lstm-attention-v2.h5')
        return model

    def get_model3(self):
        input = Input((self.MAX_LEN,))
        x = Embedding(self.vocab_size + 2,600)(input)
        x = SpatialDropout1D(0.3)(x)
        # x = Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(128,return_sequences=True))(x)
        x = Bidirectional(LSTM(128,return_sequences=True))(x)
        att1 = AttentionWeightedAverage()(x)
        gmax1 = GlobalMaxPooling1D()(x)
        gavg1 = GlobalAveragePooling1D()(x)
        # x = Bidirectional(tf.compat.v1.keras.layers.CuDNNLSTM(128,return_sequences=True))(x)
        x = Bidirectional(LSTM(128,return_sequences=True))(x)
        att2 = AttentionWeightedAverage()(x)
        gmax2 = GlobalMaxPooling1D()(x)
        gavg2 = GlobalAveragePooling1D()(x)
        x = Concatenate(1)([att1,att2,gmax1,gavg1,gmax2,gavg2])
        d = Dense(512*3,activation='relu')(x)
        x = Add()([x,d])
        d = Dense(512*3,activation='relu')(x)
        x = Add()([x,d])
        output1 = Dense(1,activation='sigmoid')(x)
        output2 = Dense(9)(x)
        output = tf.keras.layers.concatenate([output1,output2])
        model = Model(input,output)
        model.load_weights('/content/models/bi-lstm-bi-attention-linear-v2.h5')
        return model


In [4]:
model = Final_Model()

Some layers from the model checkpoint at /content/bert-base-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at /content/bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
Cause: while/else statement not yet supported
Cause: while/else statement not yet supported
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


Random prediction from train data:

In [5]:
proba,loss = model.predict('haha you guys are a bunch of losers.',1)
print(f"Toxicity of comment is {np.round(proba,3)}")
print(f"Loss is {np.round(loss,3)}")

Toxicity of comment is 0.995
Loss is 0.005


In [6]:
proba,loss = model.predict("I guess that's the reason  Canada's PM Trudeau loves China and wants more trade with China.",0)
print(f"Toxicity of comment is {np.round(proba,3)}")
print(f"Loss is {np.round(loss,3)}")

Toxicity of comment is 0.0
Loss is 0.0


In [7]:
proba,loss = model.predict('Unfortunately, because both teams suck, aCSU fan could say the same thing.',1)
print(f"Toxicity of comment is {np.round(proba,3)}")
print(f"Loss is {np.round(loss,3)}")

Toxicity of comment is 0.725
Loss is 0.321


In [8]:
proba,loss = model.predict('Just by accepting her senate seat in the shamefully, and shamelessly, nepotistic manner in which she did so from her father, BugEyes lost any potential respect from me, and her woeful record in the US Senate on behalf of her corporate donors (and masters) has only lowered my opinion of her since.',0)
print(f"Toxicity of comment is {np.round(proba,3)}")
print(f"Loss is {np.round(loss,3)}")

Toxicity of comment is 0.022
Loss is 0.022


In [9]:
proba,loss = model.predict("The Globe is pathetic, First Nations Unite Against TransMountain? And in the same article it's reported 51 First Nations have agreements with Kinder Morgan... So First Nations are united in support of job creation and the vocal minority is against it... click-bate from the Globe.",1)
print(f"Toxicity of comment is {np.round(proba,3)}")
print(f"Loss is {np.round(loss,3)}")

Toxicity of comment is 0.487
Loss is 0.72


In [10]:
proba,loss = model.predict("Homer is mostly white people.  There's no argument there.  There are people of other racial and ethnic backgrounds there, but I've never known this to be an issue.  Most of us don't pay any attention to these things.  They are our neighbors, friends and coworkers.....not segregated by anything really.  I've lived here 15 years, and we live quite peacefully most of the time.  Everyone lends another a helping hand, and I've never seen the so called bullying and hatred that others claim exists.  Intolerant people?  Never.  To each his own is what I see here.",0)
print(f"Toxicity of comment is {np.round(proba,3)}")
print(f"Loss is {np.round(loss,3)}")

Toxicity of comment is 0.022
Loss is 0.022


Kaggle Submission:
* Score: 0.93622
* 632 in TOP in TOP 20.28% 

![](https://drive.google.com/uc?export=view&id=1SgCrKN8SoQDycdiDFE3_pOoZgJXxkuDm)

###Reference:
* [keras.io](https://keras.io)
* [tensorflow.org](https://tensorflow.org)
* https://huggingface.co/
* https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification
* https://github.com/jiaruxu233/Jigsaw-Unintended-Bias-in-Toxicity-Classification/blob/master/Harness_the_Beast.ipynb
* https://www.kaggle.com/sandeepkumar121995/keras-bi-gru-lstm-attention-fasttext
* https://github.com/minimaxir/textgenrnn/blob/master/textgenrnn/AttentionWeightedAverage.py
* https://www.researchgate.net/publication/341238064_Avoiding_Unintended_Bias_in_Toxicity_Classification_with_Neural_Networks
* https://www.theseus.fi/bitstream/handle/10024/226938/Quan_Do.pdf
* https://fasttext.cc/
* https://nlp.stanford.edu/projects/glove/
* https://www.nltk.org/