In [None]:
import pandas as pd
import sqlite3

connection = None
try:
    connection = sqlite3.connect("data/mental_health.db")
except sqlite3.Error as e:
    print(e)

df = pd.read_sql_query('''
    select id, text, label
    from texts
    ''', connection, index_col="id")
df.head()

In [None]:
df = df.sample(n=1000, random_state=1)

on ii711 at 4GHz: 4m 24s

In [None]:
import spacy

nlp = spacy.load("en_core_web_lg")
df["nlp"] = df["text"].apply(nlp)
df.head()

### POS-Rule-Based Detections

In [None]:
topics_df = pd.DataFrame(columns=[
    "document", 
    "sentence", 
    "topic", 
    "tb_polarity", 
    "tb_subjectivity", 
    "sia_neg", 
    "sia_neu", 
    "sia_pos", 
    "sia_compound"])

In [None]:
from tqdm import tqdm

i = 0

for index, row in tqdm(df.iterrows()):
    for sentence in row["nlp"].sents:
        sentence_document = nlp(sentence.text)
        targets = []
        for token in sentence_document:
            if token.dep_ == "nsubj" and token.pos_ == "NOUN":
                targets.append(token.text)
        for target in targets:
            new_row = pd.DataFrame({
                "document": index,
                "sentence": sentence.text,
                "topic": target.lower(),
            }, index=[i])
            topics_df = pd.concat([new_row, topics_df])
            i = i + 1

topics_df.head()

### Sentiment Detections

In [None]:
from textblob import TextBlob

for index, row in tqdm(topics_df.iterrows()):
    sentiment_tb = TextBlob(row["sentence"]).sentiment
    row["tb_polarity"] = sentiment_tb.polarity
    row["tb_subjectivity"] = sentiment_tb.subjectivity 

topics_df.head()

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()
for index, row in tqdm(topics_df.iterrows()):
    sentiment_sia = sia.polarity_scores(row["sentence"])
    row["sia_neg"] = sentiment_sia["neg"]
    row["sia_neu"] = sentiment_sia["neu"]
    row["sia_pos"] = sentiment_sia["pos"]
    row["sia_compound"] = sentiment_sia["compound"]

topics_df.head()

In [None]:
import sqlite3

connection = None
try:
    connection = sqlite3.connect("data/mental_health.db")
except sqlite3.Error as e:
    print(e)

topics_df.to_sql("sentiments", connection, if_exists="append", index=False)

connection.commit()