# Setup

In [None]:
import sqlite3
import pandas as pd
import datetime
from wordcloud import WordCloud, STOPWORDS 
import sys
from os import path
import numpy as np
from PIL import Image
from textblob import TextBlob
import nltk
import re
import matplotlib
import matplotlib.pyplot as plt

# Grab text messages

### Grab your iMessages courtesy of https://stmorse.github.io/journal/iMessage.html 

In [None]:
# path to message db
conn = sqlite3.connect('/Users/wbr/Library/Messages/chat.db')
c = conn.cursor()

In [None]:
# grab the messages from chat id 194, which is my the id for my Family group chat
# 198 is Katie chat
cmd1 = 'SELECT ROWID, text, handle_id, \
            datetime(date + strftime(\'%s\',\'2001-01-01\'), \'unixepoch\') as date_utc \
            FROM message T1 \
            INNER JOIN chat_message_join T2 \
                ON T2.chat_id=198 \
                AND T1.ROWID=T2.message_id \
            ORDER BY T1.date'
c.execute(cmd1)
df = pd.DataFrame(c.fetchall(), columns=['id', 'text', 'sender', 'time'])

In [None]:
df.head()

# Plot a histogram of term frequency over time

In [None]:
# concatenate all the texts together, dealing with blank cells 
df['text'].replace('', np.nan, inplace=True)
df.dropna(subset=['text'], inplace=True)
# now conc everything the right way
conc_messages = ' '.join(df['text'])


In [None]:
# make a df containing only texts that include the word 'pizza'
df_pizza = df[df['text'].str.contains('pizza')]

In [None]:
# plot a historgram of months when pizza was mentioned
df_pizza["time"] = df_pizza["time"].astype("datetime64")

In [None]:
# plot size stuff
font = {'weight' : 'bold',
        'size'   : 22}
matplotlib.rc('font', **font)

In [None]:
# %matplotlib inline

# October 2016 was all about the pizza
df_pizza.groupby([df_pizza["time"].dt.year, df_pizza["time"].dt.month]).count().plot(kind="bar")      
plt.rcParams['figure.figsize'] = [30, 20]
plt.show()

# Make a Wordcloud

### wordcloud source https://github.com/nikhilkumarsingh/wordcloud-example/blob/master/mywc.py

In [None]:
# get path to directory
currdir = '/Users/WBR/walter/python_psc290/hw'
# cloud mask
mask = np.array(Image.open(path.join(currdir, "cloud.png")))

In [None]:
def create_wordcloud(text):
    # create numpy array for wordcloud mask image
    mask = np.array(Image.open(path.join(currdir, "cloud.png")))

    # create set of stopwords	
    stopwords = set(STOPWORDS)

    # create wordcloud object
    wc = WordCloud(background_color="white",
                    max_words=150, 
                    mask=mask,
                    stopwords=stopwords)

    # generate wordcloud
#     wc.generate(text)
    wc.generate_from_text(text)
    # save wordcloud
    wc.to_file(path.join(currdir, "wc.png"))

In [None]:
create_wordcloud(conc_messages)

# Trying textblob

In [None]:
# create a textblob object
da_words = TextBlob(conc_messages)
# tag the POS
tagged = da_words.tags

In [None]:
# get a list of the verbs from the textblob object 
verbs_from_texts =  [t[0] for t in tagged if t[1] == 'VB']

In [None]:
# Now  put the verbs and their counts in a pandas df 
items = da_words.word_counts.items()
df_wordcounts = pd.DataFrame(list(items),columns=['word','count'])

In [None]:
# now create df with just the verbs
df_verbs =  df_wordcounts[df_wordcounts['word'].isin(verbs_from_texts)]

In [None]:
df_verbs.head()

In [None]:
df_verbs.sort_values('count',ascending=False)