In [None]:
# Create initial tweets database from csv
import csv, sqlite3

con = sqlite3.connect("db/db_tweets.db")
cur = con.cursor()
cur.execute("DROP TABLE IF EXISTS tweets;")
cur.execute("CREATE TABLE IF NOT EXISTS tweets (party, handle, tweet);")

with open('original_data/ExtractedTweets.csv','rt',encoding='utf-8') as fin:
    dr = csv.DictReader(fin)
    to_db = [(i['Party'], i['Handle'], i['Tweet']) for i in dr]
    
cur.executemany("INSERT INTO tweets (party, handle, tweet) VALUES (?, ?, ?);", to_db)
con.commit()
con.close()

In [None]:
# Create initial stop-words database
import csv, sqlite3

con = sqlite3.connect("db/db_stop-words.db")
cur = con.cursor()
cur.execute("DROP TABLE IF EXISTS stopwords;")
cur.execute("CREATE TABLE IF NOT EXISTS stopwords (word);")

with open('original_data/stop-word-list.csv', newline='') as csvfile:
    dr = csv.reader(csvfile)
    to_db = []
    for i in dr:
        to_db = [(j.strip(), ) for j in i]
#     print(to_db)

cur.executemany("INSERT INTO stopwords (word) VALUES (?);", (to_db))
con.commit()
con.close()

In [None]:
import sqlite3

con = sqlite3.connect("db/db_stop-words.db")
cur = con.cursor()

cur.execute("SELECT word FROM stopwords")
for i in cur.fetchall():
    print(i[0])

con.commit()
con.close()

In [None]:
# Print tweets from db
import sqlite3
import matplotlib.pyplot as plt
import re

conn = sqlite3.connect('db/db_tweets.db')
cur = conn.cursor()

def plot_mud(mud_d, mud_r):
    fig, ax = plt.subplots()
    ax.bar(("Democrats","Republicans"),(mud_d,mud_r),color=("lightblue","#ff6666"))
    ax.set_xlabel('Political Party')
    ax.set_ylabel('# of instances')
    ax.set_title('Instances of political parties mentioning opposing party')
    fig.tight_layout()
    plt.show()

def analyze_mud(plot):
    # Dems
    cur.execute("SELECT tweet FROM tweets WHERE party='Democrat'")
    mud_d = get_mud(cur.fetchall(), True)
    # Reps
    cur.execute("SELECT tweet FROM tweets WHERE party='Republican'")
    mud_r = get_mud(cur.fetchall(), False)
    #print("\n\n---RESULTS---\n\nDEM MUD: {}\nREP MUD: {}".format(mud_d, mud_r))
    if(plot):
        plot_mud(mud_d,mud_r)
    
def get_mud(tweets, dem):
    regex = r""
    result = 0
    if dem:
        regex = r"(?!\brepublic\b)\brepub+|\bgop\b|\bconservative+|\bright\w*wing\b"
    else:
        regex = r"(?!\bdemarest)(?!\bdemocracy\b)\bdem+|\bdnc\b|\bliberal+|\blibs+|\bleft\w*wing\b"
    for tweet in tweets:
        if(re.search(regex, tweet[0], re.I)):
            #print("\n{}\n".format(tweet[0]))
            result+=1
    return result

analyze_mud(True)

conn.close()

In [None]:
# Print tweets from db
import sqlite3
import matplotlib.pyplot as plt
import re

conn = sqlite3.connect('db/db_tweets.db')
cur = conn.cursor()

def plot_words(words, counts, fig_title, fig_color):
    fig, ax = plt.subplots()
    ax.barh(words,counts,color=fig_color)
    ax.set_xlabel('# of instances')
    ax.set_ylabel('Word')
    ax.set_title('Top ten words used in tweets from {}'.format(fig_title))
    fig.tight_layout()
    plt.show()

def analyze_words():
    stopwords = get_stop_words()
    # Dems
    cur.execute("SELECT tweet FROM tweets WHERE party='Democrat'")
    analyze(cur.fetchall(), stopwords, "Democrats", "lightblue")
    # Reps
    cur.execute("SELECT tweet FROM tweets WHERE party='Republican'")
    analyze(cur.fetchall(), stopwords, "Republicans", "#ff6666")
    
def get_stop_words():
    stopwords = []
    con = sqlite3.connect("db/db_stop-words.db")
    cur = con.cursor()
    cur.execute("SELECT word FROM stopwords")
    for i in cur.fetchall():
        stopwords.append(i[0])
    con.commit()
    con.close()
    return stopwords
    
def filter_word(word, stopwords):
    wordFilterList = ['rt', '&amp;', '-']
    wordFilterList.extend(stopwords)
    if not word.lower().strip() in wordFilterList:
        return True
    
def analyze(tweets, stopwords, fig_title, fig_color):
    dWords = dict()
    for tweet in tweets:
        for word in tweet[0].split():
            if not filter_word(word, stopwords):
                continue
            elif word in dWords:
                dWords[word] += 1
            else:
                dWords[word] = 1
    
    d_view = [ (v,k) for k,v in dWords.items() ]
    d_view.sort(reverse=True) # natively sort tuples by first element
    
    word_list = []
    word_count = []
    
    index = 0
    for v,k in d_view:
        index+=1
        if index > 10:
            break
        word_list.append(k)
        word_count.append(v)
        #print("{} - {}: {}".format(index,k,v))
    
    plot_words(word_list, word_count, fig_title, fig_color)

analyze_words()

conn.close()