In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [3]:
import pandas as pd
test = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/dreaddit-test.csv")
test.describe()

Unnamed: 0,id,label,confidence,social_timestamp,social_karma,syntax_ari,lex_liwc_WC,lex_liwc_Analytic,lex_liwc_Clout,lex_liwc_Authentic,lex_liwc_Tone,lex_liwc_WPS,lex_liwc_Sixltr,lex_liwc_Dic,lex_liwc_function,lex_liwc_pronoun,lex_liwc_ppron,lex_liwc_i,lex_liwc_we,lex_liwc_you,lex_liwc_shehe,lex_liwc_they,lex_liwc_ipron,lex_liwc_article,lex_liwc_prep,lex_liwc_auxverb,lex_liwc_adverb,lex_liwc_conj,lex_liwc_negate,lex_liwc_verb,lex_liwc_adj,lex_liwc_compare,lex_liwc_interrog,lex_liwc_number,lex_liwc_quant,lex_liwc_affect,lex_liwc_posemo,lex_liwc_negemo,lex_liwc_anx,lex_liwc_anger,...,lex_liwc_motion,lex_liwc_space,lex_liwc_time,lex_liwc_work,lex_liwc_leisure,lex_liwc_home,lex_liwc_money,lex_liwc_relig,lex_liwc_death,lex_liwc_informal,lex_liwc_swear,lex_liwc_netspeak,lex_liwc_assent,lex_liwc_nonflu,lex_liwc_filler,lex_liwc_AllPunc,lex_liwc_Period,lex_liwc_Comma,lex_liwc_Colon,lex_liwc_SemiC,lex_liwc_QMark,lex_liwc_Exclam,lex_liwc_Dash,lex_liwc_Quote,lex_liwc_Apostro,lex_liwc_Parenth,lex_liwc_OtherP,lex_dal_max_pleasantness,lex_dal_max_activation,lex_dal_max_imagery,lex_dal_min_pleasantness,lex_dal_min_activation,lex_dal_min_imagery,lex_dal_avg_activation,lex_dal_avg_imagery,lex_dal_avg_pleasantness,social_upvote_ratio,social_num_comments,syntax_fk_grade,sentiment
count,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,...,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0,715.0
mean,13861.372028,0.516084,0.717895,1517570000.0,24.627972,4.705624,85.667133,34.743399,38.410839,69.917413,32.375706,17.861091,14.885566,92.513706,58.890937,20.054713,14.110042,9.360406,0.780629,0.775259,2.61986,0.57351,5.929762,4.828923,13.498098,10.234196,6.169497,7.735692,2.277483,19.270545,4.529538,2.336797,1.520993,1.375986,2.320895,6.050853,2.606573,3.360769,0.980853,0.880308,...,1.975552,6.328685,6.415217,1.765329,0.768364,0.573035,0.720657,0.133413,0.143301,0.820769,0.251287,0.21958,0.128378,0.112238,0.054895,16.733203,6.198294,3.56842,0.185538,0.099189,0.460196,0.162937,0.468797,0.425399,3.325063,0.737287,1.102014,2.79917,2.713098,2.944056,1.081046,1.117715,1.00028,1.723382,1.529951,1.878992,0.856294,8.951049,5.497574,0.045345
std,17765.755427,0.500091,0.325688,15539440.0,113.086473,3.244652,31.19732,26.33158,30.834289,31.682491,34.495223,7.002507,5.154319,4.458381,6.213774,5.54713,4.739893,4.440333,1.592174,1.986996,3.636421,1.348621,3.233104,2.639801,3.644833,3.624511,3.186709,2.914734,1.882514,4.499541,2.703421,1.910232,1.518803,1.921668,1.877378,3.236959,2.364876,2.810139,1.532993,1.332447,...,1.86584,3.349923,3.800014,2.376354,1.41437,1.13096,1.728165,0.616299,0.493143,1.314436,0.692429,0.707232,0.433971,0.381352,0.293046,9.101795,3.147372,2.759322,0.614649,0.48257,1.185406,0.741345,2.091756,1.247314,2.838065,1.538606,6.207591,0.158079,0.172314,0.13137,0.107884,0.089972,0.00748,0.048894,0.100724,0.056044,0.174352,12.891993,2.490971,0.194114
min,2.0,0.0,0.0,1483330000.0,0.0,-4.8,12.0,1.0,1.0,1.0,1.0,2.4,0.0,66.67,16.67,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.36,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3,2.1111,2.0,1.0,1.0,1.0,1.57467,1.28148,1.71854,0.2,0.0,-2.318095,-1.0
25%,848.0,0.0,0.6,1509572000.0,2.0,2.584422,65.0,11.975,10.34,46.4,1.23,13.4,11.355,90.25,55.215,16.145,11.11,6.51,0.0,0.0,0.0,0.0,3.7,3.03,11.24,7.84,3.75,5.77,0.98,16.205,2.6,1.045,0.0,0.0,1.07,3.74,1.005,1.32,0.0,0.0,...,0.0,4.0,3.57,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.155,4.415,1.54,0.0,0.0,0.0,0.0,0.0,0.0,1.19,0.0,0.0,2.7143,2.625,3.0,1.0,1.0,1.0,1.69257,1.46426,1.841965,0.765,2.0,3.693035,-0.056181
50%,1778.0,1.0,0.8,1516658000.0,5.0,4.477286,81.0,30.37,30.24,84.47,16.15,16.8,14.29,93.33,59.3,20.0,13.79,9.73,0.0,0.0,0.0,0.0,5.51,4.57,13.33,10.14,6.0,7.69,2.0,19.4,4.3,2.08,1.32,1.01,2.0,5.71,2.06,2.88,0.0,0.0,...,1.64,6.06,6.02,1.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.69,5.63,3.33,0.0,0.0,0.0,0.0,0.0,0.0,2.83,0.0,0.0,2.8,2.7143,3.0,1.0,1.125,1.0,1.72066,1.52326,1.87817,0.91,5.0,5.327778,0.04246
75%,26201.5,1.0,1.0,1531001000.0,10.0,6.723889,101.5,53.48,61.58,96.945,60.395,21.0,18.05,95.675,62.77,23.665,17.165,12.43,1.09,0.0,4.64,0.77,7.87,6.435,16.06,12.35,8.295,9.495,3.33,22.22,6.02,3.3,2.33,2.045,3.47,7.925,3.91,4.92,1.545,1.47,...,3.03,8.53,8.745,2.81,1.195,0.94,0.715,0.0,0.0,1.35,0.0,0.0,0.0,0.0,0.0,19.415,7.25,5.26,0.0,0.0,0.0,0.0,0.0,0.0,4.915,1.165,0.0,3.0,2.8571,3.0,1.125,1.1429,1.0,1.75311,1.59257,1.910965,1.0,11.0,7.04938,0.154701
max,55783.0,1.0,1.0,1542583000.0,1687.0,22.535493,264.0,98.22,99.0,99.0,99.0,60.0,35.44,100.0,73.47,38.89,30.19,23.08,12.5,15.52,20.0,12.63,18.18,17.65,23.73,27.78,17.14,16.67,11.54,35.48,15.96,9.8,10.0,25.0,10.61,17.33,14.29,16.67,16.67,11.9,...,13.51,22.06,24.29,22.22,11.54,7.41,16.67,9.68,3.9,14.29,5.26,6.67,3.57,2.94,3.33,166.1,33.33,22.58,9.52,8.33,8.57,9.38,48.08,14.29,18.18,16.67,152.54,3.0,3.0,3.0,1.7778,1.7778,1.2,1.94642,2.04,2.14043,1.0,128.0,18.530103,0.8


In [4]:
train = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/dreaddit-train.csv")
train.describe()

Unnamed: 0,id,label,confidence,social_timestamp,social_karma,syntax_ari,lex_liwc_WC,lex_liwc_Analytic,lex_liwc_Clout,lex_liwc_Authentic,lex_liwc_Tone,lex_liwc_WPS,lex_liwc_Sixltr,lex_liwc_Dic,lex_liwc_function,lex_liwc_pronoun,lex_liwc_ppron,lex_liwc_i,lex_liwc_we,lex_liwc_you,lex_liwc_shehe,lex_liwc_they,lex_liwc_ipron,lex_liwc_article,lex_liwc_prep,lex_liwc_auxverb,lex_liwc_adverb,lex_liwc_conj,lex_liwc_negate,lex_liwc_verb,lex_liwc_adj,lex_liwc_compare,lex_liwc_interrog,lex_liwc_number,lex_liwc_quant,lex_liwc_affect,lex_liwc_posemo,lex_liwc_negemo,lex_liwc_anx,lex_liwc_anger,...,lex_liwc_motion,lex_liwc_space,lex_liwc_time,lex_liwc_work,lex_liwc_leisure,lex_liwc_home,lex_liwc_money,lex_liwc_relig,lex_liwc_death,lex_liwc_informal,lex_liwc_swear,lex_liwc_netspeak,lex_liwc_assent,lex_liwc_nonflu,lex_liwc_filler,lex_liwc_AllPunc,lex_liwc_Period,lex_liwc_Comma,lex_liwc_Colon,lex_liwc_SemiC,lex_liwc_QMark,lex_liwc_Exclam,lex_liwc_Dash,lex_liwc_Quote,lex_liwc_Apostro,lex_liwc_Parenth,lex_liwc_OtherP,lex_dal_max_pleasantness,lex_dal_max_activation,lex_dal_max_imagery,lex_dal_min_pleasantness,lex_dal_min_activation,lex_dal_min_imagery,lex_dal_avg_activation,lex_dal_avg_imagery,lex_dal_avg_pleasantness,social_upvote_ratio,social_num_comments,syntax_fk_grade,sentiment
count,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,...,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0,2838.0
mean,13751.999295,0.524313,0.808972,1518107000.0,18.262156,4.684272,85.996124,35.240941,40.948231,67.044249,33.428157,18.189475,14.858157,92.349292,58.598887,19.767699,13.98753,9.066254,0.764679,0.865772,2.708076,0.582667,5.76902,4.937636,13.356656,10.306311,6.045853,7.599056,2.262903,19.431293,4.362734,2.287287,1.608217,1.444151,2.246406,6.0974,2.697861,3.304732,0.914197,0.928534,...,2.004397,6.295144,6.199316,1.875694,0.785134,0.635113,0.786226,0.115662,0.141811,0.825655,0.246483,0.255201,0.119112,0.124767,0.049856,17.05365,6.066293,3.573312,0.218788,0.123428,0.519066,0.192491,0.486801,0.473513,3.176459,0.778961,1.444264,2.796964,2.70494,2.948414,1.088001,1.120099,1.000211,1.722759,1.5364,1.879385,0.843517,9.948555,5.448836,0.04074
std,17340.161897,0.499497,0.177038,15522090.0,79.419166,3.316435,32.334887,26.486189,31.587117,32.880644,35.33477,9.51606,5.531303,5.301902,6.852503,5.577772,4.792445,4.653986,1.564468,2.095719,3.611687,1.26514,3.22379,2.611391,3.649962,3.649751,3.182057,2.813245,1.894125,4.769641,2.688372,1.990455,1.540092,1.910695,1.790213,3.530977,2.541304,3.021693,1.468605,1.447357,...,1.796235,3.261277,3.781227,2.455234,1.352407,1.217066,1.716934,0.528442,0.555553,1.319571,0.743807,0.730756,0.484504,0.415866,0.328158,12.335859,3.219358,2.870731,0.807024,0.500141,2.342935,1.029934,1.805352,1.295909,2.723686,1.728138,10.057253,0.162649,0.174462,0.125843,0.117159,0.085227,0.0065,0.047835,0.102971,0.058932,0.174794,21.798032,2.535829,0.19549
min,4.0,0.0,0.428571,1483274000.0,0.0,-6.62,5.0,1.0,1.0,1.0,1.0,2.4,0.0,27.91,4.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.8571,2.0,1.0,1.0,1.0,1.4854,1.2,1.56115,0.14,0.0,-1.918,-1.0
25%,926.25,0.0,0.6,1509698000.0,2.0,2.464243,65.0,12.41,12.135,41.07,1.42,13.4,11.11,90.09,54.84,16.05,10.7175,6.06,0.0,0.0,0.0,0.0,3.33,3.12,11.0,7.81,3.85,5.77,0.9725,16.28,2.505,0.93,0.0,0.0,1.08,3.57,0.9625,1.14,0.0,0.0,...,0.6425,3.96,3.45,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.05,4.21,1.43,0.0,0.0,0.0,0.0,0.0,0.0,1.15,0.0,0.0,2.7143,2.625,3.0,1.0,1.0,1.0,1.69143,1.469745,1.841782,0.75,2.0,3.729973,-0.072222
50%,1891.5,1.0,0.8,1517066000.0,5.0,4.321886,81.0,29.42,33.52,80.71,25.77,16.775,14.29,93.43,59.26,19.835,13.79,9.38,0.0,0.0,0.95,0.0,5.36,4.76,13.45,10.065,5.8,7.66,1.95,19.32,4.08,1.89,1.39,1.03,1.96,5.56,2.17,2.7,0.0,0.0,...,1.69,5.97,5.68,1.16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.52,5.59,3.265,0.0,0.0,0.0,0.0,0.0,0.0,2.725,0.0,0.0,2.8,2.7,3.0,1.0,1.1429,1.0,1.72143,1.530295,1.87825,0.89,5.0,5.21,0.044821
75%,25473.75,1.0,1.0,1530898000.0,10.0,6.505657,101.0,55.0575,69.32,96.18,61.55,21.0,18.0225,95.77,63.025,23.5225,17.185,12.2,1.0475,0.7975,4.76,0.88,7.7775,6.59,15.79,12.5,8.0,9.38,3.37,22.41,5.88,3.39,2.44,2.2,3.2,8.0,3.9,4.84,1.45,1.47,...,3.03,8.275,8.47,2.78,1.23,1.075,1.02,0.0,0.0,1.33,0.0,0.0,0.0,0.0,0.0,19.7275,7.25,5.19,0.0,0.0,0.0,0.0,0.0,0.0,4.76,1.41,0.9075,3.0,2.8571,3.0,1.1429,1.1429,1.0,1.75176,1.59603,1.916243,1.0,10.0,6.855217,0.166667
max,55757.0,1.0,1.0,1542592000.0,1435.0,24.074231,310.0,99.0,99.0,99.0,99.0,233.0,65.12,100.0,77.78,44.44,30.77,30.0,12.96,25.0,23.47,12.86,21.43,15.53,29.63,25.0,21.43,20.69,12.9,40.0,18.18,15.15,17.86,25.0,11.63,37.5,20.0,37.5,25.0,15.79,...,14.55,21.43,27.27,20.0,13.33,13.79,14.93,8.24,11.11,13.04,8.33,11.11,8.7,3.85,11.11,420.41,47.06,41.86,20.0,10.53,105.41,40.0,55.56,12.5,25.0,41.94,391.84,3.0,3.0,3.0,1.9,1.5,1.2,2.0074,2.06667,2.15849,1.0,416.0,21.198919,1.0


In [5]:
train['subreddit'].unique()

array(['ptsd', 'assistance', 'relationships', 'survivorsofabuse',
       'domesticviolence', 'anxiety', 'homeless', 'stress',
       'almosthomeless', 'food_pantry'], dtype=object)

In [6]:
new_train = train[['subreddit','text','label','sentiment']]
new_train = new_train[new_train['subreddit']!='food_pantry']
index = new_train[new_train['subreddit']=='almosthomeless'].index
new_train.loc[index,'subreddit'] = 'homeless'

In [7]:
new_train['subreddit'].unique()

array(['ptsd', 'assistance', 'relationships', 'survivorsofabuse',
       'domesticviolence', 'anxiety', 'homeless', 'stress'], dtype=object)

In [8]:
train_x,train_y = new_train[['text','sentiment','label']], new_train[['subreddit']]

In [9]:
new_test = test[['subreddit','text','label','sentiment']]
new_test = new_test[new_test['subreddit']!='food_pantry']
index = new_test[new_test['subreddit']=='almosthomeless'].index
new_test.loc[index,'subreddit'] = 'homeless'

In [10]:
new_test['subreddit'].unique()

array(['relationships', 'anxiety', 'ptsd', 'assistance', 'homeless',
       'domesticviolence', 'survivorsofabuse', 'stress'], dtype=object)

In [11]:
test_x,test_y = new_test[['text','sentiment','label']],new_test[['subreddit']]

In [12]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Tokenize our training data
num_words = 100000
oov_token = '<UNK>'
pad_type = 'post'
trunc_type = 'post'

tokenizer = Tokenizer(num_words=num_words, oov_token=oov_token)
tokenizer.fit_on_texts(train_x['text'])

# Get our training data word index
word_index = tokenizer.word_index

# Encode training data sentences into sequences
train_sequences = tokenizer.texts_to_sequences(train_x['text'])

# Get max training sequence length
maxlen = max([len(x) for x in train_sequences])

# Pad the training sequences
train_padded = pad_sequences(train_sequences, padding=pad_type, truncating=trunc_type, maxlen=maxlen)

# Output the results of our work
print("Word index:\n", word_index)
print("\nTraining sequences:\n", train_sequences)
print("\nPadded training sequences:\n", train_padded)
print("\nPadded training shape:", train_padded.shape)
print("Training sequences data type:", type(train_sequences))
print("Padded Training sequences data type:", type(train_padded))

Word index:

Training sequences:
 [[20, 123, 20, 42, 28, 190, 10, 124, 131, 6441, 2, 71, 724, 4, 22, 836, 1078, 35, 6442, 6, 6443, 32, 9, 2, 1432, 3, 374, 39, 394, 8, 3252, 11, 1927, 8, 251, 154, 3836, 105, 69, 2843, 1256, 8, 6444, 6445, 6, 569, 50, 25, 3837, 1507, 31, 192, 484, 1199, 1257, 108, 2, 94, 63, 2079, 33, 151, 5, 863, 70, 2, 425, 39, 418, 10, 19, 648, 8, 3252, 53, 26, 6446, 8, 1683, 289, 4757, 65, 8, 111, 2, 55, 3253, 3, 22, 21, 630, 8, 7, 3252, 3, 7, 6447, 2, 55, 64, 6448, 461, 30, 7, 420, 2, 38, 441, 3, 17, 6, 466, 11, 6449, 286], [745, 79, 570, 725, 28, 226, 35, 19, 15, 5, 155, 197, 3, 261, 19, 16, 126, 470, 27, 414, 6, 674, 6450, 33, 6451, 3838, 4758, 4, 258, 21, 6, 364, 3, 68, 1433, 103, 3839, 6452, 3254, 33, 5, 230, 2286, 34, 6453, 33, 159, 5, 675, 471, 269, 13, 584, 3840, 1802, 3255, 2287, 22, 35, 539, 6, 3255, 674, 31, 45, 3255, 2287, 2, 54, 889, 800, 35, 25, 53, 68, 129, 31, 1011, 515, 19, 390, 364, 37, 6, 277, 25, 229, 75, 608, 129, 5, 364, 67, 26, 2539, 11, 3, 6, 

In [14]:
import pickle

# saving
with open('/content/drive/MyDrive/Colab Notebooks/tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# # loading
# with open('tokenizer.pickle', 'rb') as handle:
#     tokenizer = pickle.load(handle)

In [16]:
test_sequences = tokenizer.texts_to_sequences(test_x['text'])
test_padded = pad_sequences(test_sequences, padding=pad_type, truncating=trunc_type, maxlen=maxlen)

In [17]:
vocab_size=13000

In [18]:
train_y['subreddit'] = train_y.subreddit.astype('category')

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train_y["subreddit"] = le.fit_transform(train_y["subreddit"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [None]:
test_y['subreddit'] = test_y.subreddit.astype('category')
le = LabelEncoder()
test_y["subreddit"] = le.fit_transform(test_y["subreddit"])

In [21]:
from keras.utils import to_categorical
test_labels = to_categorical(test_y['subreddit'])
train_labels = to_categorical(train_y['subreddit'])

In [25]:
embed_dim = 32  # Embedding size for each token
num_heads = 8  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer

inputs = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.4)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.4)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)

model = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])
history = model.fit(train_padded,train_x['label'], batch_size=5, epochs=25, validation_data=(test_padded, test_x['label']))

In [29]:
reconstructed_model = keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/StressDetector')

In [28]:
model.save("/content/drive/MyDrive/Colab Notebooks/StressDetector")



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Colab Notebooks/StressDetector/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Colab Notebooks/StressDetector/assets


In [34]:
def stress_detect(msg):
  test_sequences = tokenizer.texts_to_sequences([msg])
  test_padded = pad_sequences(test_sequences, padding=pad_type, truncating=trunc_type, maxlen=maxlen)
  ans = model.predict(test_padded)
  return ans[0][0]

0.44148374