In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

import spacy
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

import re

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GRU, Input, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator, pad_sequences
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor

In [6]:
depression = pd.read_csv('../data/depression_30k.csv')
anxiety = pd.read_csv('../data/anxiety_20k.csv')
basetext = pd.read_csv('../data/basetext.csv')

In [7]:
depression.drop(columns='Unnamed: 0',inplace=True)
anxiety.drop(columns='Unnamed: 0',inplace=True)
basetext.drop(columns='Unnamed: 0',inplace=True)

In [8]:
depression.head()

Unnamed: 0,title,author,selftext,created_utc,subreddit
0,Lifes unfair for us lonely ones,maxpaxxo,When I was 12-13 and under I still had good so...,1602713897,depression
1,Disconnected,Alexmc4444,Does anyone else feel like what has happened i...,1602713800,depression
2,I can’t ever focus and it’s getting worse.,aj2morrow,"I’m just so tired all the time, and because of...",1602713545,depression
3,"Need a word of courage today, please help me :,)",Lynx-Murky,"i have so many things i want/need to do, i wan...",1602713514,depression
4,"I finally got my anxiety under control, but no...",throwaway_87653,Just rambling. I’ve been struggling with anxie...,1602713437,depression


In [9]:
anxiety.head()

Unnamed: 0,title,author,selftext,created_utc,subreddit
0,Fate has a sense of humor,themangoberri,Just reflecting on the fact that so many of th...,1602713908,Anxiety
1,Someone shake me and say it’s all okay!,BigBrainSmallBones,It just feels so overwhelming some times. Long...,1602712822,Anxiety
2,I strangled my girlfriend to death at the age ...,RecordSerious,"\n\nExcuse my English and, yes, this is a thr...",1602712789,Anxiety
3,Is anyone else unable to do literally anything...,Maybe_someday_14,I've noticed that if I have something I'm anxi...,1602712336,Anxiety
4,Lists,Pleb_On_The_Web_2020,Are you all making more or less lists in 2020?...,1602712000,Anxiety


In [10]:
basetext.head()

Unnamed: 0,title,author,selftext,created_utc,subreddit
0,Why is it that the person who beats themself u...,ToesyToeNails,[removed],1602713864,CasualConversation
1,Dealing with sadness,willhound71,Hi I’m Will and I’ve been a lurker for a while...,1602713155,CasualConversation
2,"My life has never been better, and I feel as t...",mrsleveman,"Hi :). I live in the UK and I'm 18, currently ...",1602713095,CasualConversation
3,It‘s my cake day!!!! :o,sinah-mv,I love Reddit and will probably spend too much...,1602713014,CasualConversation
4,Can I have weed dealer I colorado about 15 min...,WALMART_RAPIST,[removed],1602712660,CasualConversation


In [12]:
analyzer = SentimentIntensityAnalyzer()

In [15]:
analyzer.polarity_scores(depression.sample(200)['selftext'])

{'neg': 0.175, 'neu': 0.689, 'pos': 0.136, 'compound': -1.0}

In [16]:
sample_df = pd.concat([depression.sample(500),anxiety.sample(500),basetext.sample(500)])

In [17]:
sample_df['subreddit'].unique()

array(['depression', 'Anxiety', 'CasualConversation', 'happy'],
      dtype=object)

In [20]:
X = sample_df['title'].replace('[removed]','').map(lambda x: str(x))
y = sample_df['subreddit'].map({'Anxiety':0,'depression':1,'happy':2,'CasualConversation':2})

In [21]:
y.unique()

array([1, 0, 2], dtype=int64)

### Sample Word Embeddings to SKlearn Logistic

In [22]:
X_train,X_test,y_train,y_test = train_test_split(X,y,stratify=y)

In [23]:
import numpy as np
import spacy
from sklearn.base import BaseEstimator, TransformerMixin

class WordVectorTransformer(TransformerMixin,BaseEstimator):
    def __init__(self, model="en_trf_bertbaseuncased_lg"):    #put bert embeddings here
        self.model = model
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        nlp = spacy.load(self.model)
        return np.concatenate([nlp(doc).vector.reshape(1,-1) for doc in X])

In [24]:
from sklearn.pipeline import make_pipeline

In [25]:
pipe = make_pipeline(WordVectorTransformer(), LogisticRegression())

In [166]:
pipe.fit(X_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Pipeline(steps=[('wordvectortransformer', WordVectorTransformer()),
                ('logisticregression', LogisticRegression())])

In [167]:
pipe.score(X_train,y_train), pipe.score(X_test,y_test)

(0.9911111111111112, 0.5253333333333333)

In [137]:
pipe2 = make_pipeline(CountVectorizer(), LogisticRegression())

In [151]:
pipe2.fit(X_train,y_train)

Pipeline(steps=[('countvectorizer', CountVectorizer()),
                ('logisticregression', LogisticRegression())])

In [152]:
pipe2.score(X_train,y_train), pipe.score(X_test,y_test)

(0.968, 0.692)

In [95]:
analyzer.polarity_scores(neutral['selftext'].sample(5))

{'neg': 0.08, 'neu': 0.593, 'pos': 0.327, 'compound': 0.9989}

In [96]:
analyzer.polarity_scores(depression['selftext'].sample(5))

{'neg': 0.225, 'neu': 0.707, 'pos': 0.068, 'compound': -0.9916}

### Keras SKlearn Classifier

In [26]:
from tensorflow.keras.callbacks import EarlyStopping

In [27]:
callback = EarlyStopping(monitor = 'loss', patience = 5)

In [28]:
#define a network
def network_model():
    model2 = Sequential()
    model2.add(Dense(64, activation = 'relu'))
    model2.add(Dense(64, activation='relu'))
    model2.add(Dense(3,activation='softmax'))

    model2.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['acc'])
    
    return model2

In [41]:
#create the keras regressor
clf = KerasClassifier(build_fn = network_model, 
                     nb_epoch=50, 
                     )

In [42]:
pipe3 = make_pipeline(WordVectorTransformer(), clf)

In [43]:
pipe3.fit(X_train,y_train)



Pipeline(steps=[('wordvectortransformer', WordVectorTransformer()),
                ('kerasclassifier',
                 <tensorflow.python.keras.wrappers.scikit_learn.KerasClassifier object at 0x0000022B8F4578B0>)])

In [44]:
pipe3.score(X_train, y_train)



0.5893333554267883

In [45]:
pipe3.score(X_test,y_test)



0.4880000054836273

### BERT Transform to Keras Model (No Wrap)

In [29]:
bertvect = WordVectorTransformer()

In [30]:
X_train_bvect = bertvect.fit_transform(X_train)
X_test_bvect = bertvect.transform(X_test)

In [31]:
y_train_vect = tf.keras.utils.to_categorical(y_train)
y_test_vect = tf.keras.utils.to_categorical(y_test)

In [45]:
model_k = Sequential()
model_k.add(Flatten())
model_k.add(Dense(32,activation='relu'))
model_k.add(Dropout(0.5))
model_k.add(Dense(64,activation='relu'))
model_k.add(Dropout(0.5))
model_k.add(Dense(128,activation='relu'))
model_k.add(Dropout(0.4))

model_k.add(Dense(3, activation='softmax'))

In [46]:
model_k.compile(
    loss = 'categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [47]:
history_k = model_k.fit(X_train_bvect,y_train_vect,validation_data = (X_test_bvect,y_test_vect),epochs=150)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78