In [24]:
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Flatten
from tensorflow.keras.layers import Embedding
from keras_preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split
from tokenizer import filter_to_top_x

In [25]:
df = pd.read_csv('data/reddit_depression_suicidewatch.csv')
df.head()

Unnamed: 0,text,label
0,I recently went through a breakup and she said...,depression
1,"I do not know how to navigate these feelings, ...",depression
2,"So I have been with my bf for 5 months , and h...",depression
3,I am so exhausted of this. Just when I think I...,SuicideWatch
4,I have been severly bullied since i was 5 till...,depression


In [26]:
counter = Counter(df['label'].tolist())
print(counter)

Counter({'depression': 10371, 'SuicideWatch': 9992})


In [27]:
top_10_varieties = {i[0]: idx for idx, i in enumerate(counter.most_common(10))}
print(top_10_varieties)

{'depression': 0, 'SuicideWatch': 1}


In [28]:
df = df[df['label'].map(lambda x: x in top_10_varieties)]
df.head()

Unnamed: 0,text,label
0,I recently went through a breakup and she said...,depression
1,"I do not know how to navigate these feelings, ...",depression
2,"So I have been with my bf for 5 months , and h...",depression
3,I am so exhausted of this. Just when I think I...,SuicideWatch
4,I have been severly bullied since i was 5 till...,depression


In [29]:
description_list = df['text'].tolist()
print(description_list[1])

I do not know how to navigate these feelings, not that its a new feeling by any stretch. I just do not understand how I can go on from being so pleasantly fine to suicidal and self loathing within in minutes and not be able to step out of it. I have been suffering from poor mental health for over 10 years and I am about to turn 26 next week and I feel so ashamed for how serious my negative thoughts are. Covid was a hard transition especially having moved to a new city a few months before everything started locking down, but now I am here and settled and I have a loving partner, a great connection to their family, a really fun job and I finished my degree. But when I stop and look around I feel like nothing will ever be good enough for me and I will never be good enough, I will always have skin problems and bad teeth and acne. I will never be the artist I used to be (I have a Bachelor in fine art) because I do not have the same drive or passionate that I used to have for it. I will neve

In [30]:
mapped_list, word_list = filter_to_top_x(description_list, 2500, 10)
print(mapped_list[1])

[5, 28, 48, 218, 353, 6, 81, 258, 113, 96, 85, 8, 5, 278, 48, 12, 64, 22, 55, 67, 15, 459, 209, 247, 950, 7, 820, 18, 231, 942, 35, 0, 3, 1, 37, 414, 55, 953, 213, 293, 9, 117, 544, 74, 29, 561, 1535, 316, 275, 20, 15, 1216, 9, 48, 893, 755, 179, 38, 1921, 14, 164, 639, 191, 539, 258, 995, 184, 201, 178, 94, 205, 177, 11, 50, 108, 1, 898, 593, 388, 1478, 186, 99, 51, 450, 135, 1448, 875, 100, 47, 203, 273, 173, 20, 17, 105, 30, 136, 18, 91, 193, 9, 4, 30, 59, 18, 91, 193, 30, 88, 1, 1370, 338, 122, 1255, 30, 59, 18, 248, 18, 63, 1, 7, 459, 1483, 62, 31, 5, 1, 212, 697, 27, 6, 248, 1, 9, 3, 30, 59, 1, 710, 1922, 482, 530, 173, 31, 542, 822, 7, 1483, 30, 59, 1, 287, 16, 99, 6, 19, 31, 70, 679, 322, 7, 331, 12, 20, 2, 13, 0, 173, 4, 25, 732, 46, 128, 8, 292, 6, 43, 290, 48, 217, 251, 86, 42, 5, 160, 169, 193, 35, 649, 5, 51, 28, 34, 364, 9, 307, 8, 208, 159, 13, 35, 1643, 27, 505, 1640, 1082]


In [31]:
print(word_list)

{'of': 0, 'have': 1, 'is': 2, 'it': 3, 'me': 4, 'do': 5, 'that': 6, 'in': 7, 'just': 8, 'for': 9, 'i': 10, 'but': 11, 'can': 12, 'this': 13, 'was': 14, 'so': 15, 'with': 16, 'like': 17, 'be': 18, 'want': 19, 'feel': 20, 'you': 21, 'on': 22, '?': 23, 'life': 24, 'all': 25, 'myself': 26, 'or': 27, 'know': 28, 'about': 29, 'will': 30, 'because': 31, 'at': 32, 'get': 33, 'what': 34, 'out': 35, 'up': 36, 'been': 37, 'are': 38, 'would': 39, 'if': 40, 'even': 41, 'they': 42, 'no': 43, 'as': 44, 'people': 45, 'time': 46, 'when': 47, 'how': 48, 'had': 49, 'now': 50, 'really': 51, 'going': 52, 'one': 53, 'she': 54, 'from': 55, 'think': 56, 'her': 57, 'them': 58, 'never': 59, 'there': 60, 'much': 61, ')': 62, '(': 63, 'go': 64, 'did': 65, 'only': 66, 'being': 67, 'more': 68, 'day': 69, 'could': 70, 'things': 71, 'friends': 72, 'My': 73, 'years': 74, 'he': 75, 'has': 76, 'an': 77, 'help': 78, 'who': 79, 'anymore': 80, 'its': 81, 'anything': 82, 'some': 83, 'better': 84, 'any': 85, 'way': 86, 'depr

In [32]:
varietal_list_o = [top_10_varieties[i] for i in df['label'].tolist()]
varietal_list = to_categorical(varietal_list_o)
print(varietal_list[0:5])

[[1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]]


In [33]:
max_review_length = 150

mapped_list = pad_sequences(mapped_list, maxlen=max_review_length)
train_x, test_x, train_y, test_y = train_test_split(mapped_list, varietal_list, test_size=0.2)
print(train_y)

[[1. 0.]
 [1. 0.]
 [1. 0.]
 ...
 [1. 0.]
 [0. 1.]
 [0. 1.]]


In [34]:
embedding_vector_length = 64
model = Sequential()

model.add(Embedding(2500, embedding_vector_length, input_length=max_review_length))
model.add(Conv1D(50, 5))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(max(varietal_list_o) + 1, activation='softmax'))

In [35]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [36]:
history = model.fit(train_x, train_y, epochs=50, batch_size=64)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [37]:
y_score = model.predict(test_x)
y_score = [[1 if i == max(sc) else 0 for i in sc] for sc in y_score]
n_right = 0
for i in range(len(y_score)):
    if all(y_score[i][j] == test_y[i][j] for j in range(len(y_score[i]))):
        n_right += 1
print(n_right / len(y_score))

0.6410508224895655


In [38]:
y_score = model.predict(test_x)
correct = 0

for i in range(len(y_score)):
    # print(y_score[i])
    # print(test_y[i])
    if test_y[i][0] > test_y[i][1] and y_score[i][0] > y_score[i][1]:
        correct += 1


print(correct / len(y_score))

0.33022342253866926
