In [1]:
import os
import pandas as pd
import numpy as np

import nltk as nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from string import punctuation
import re

from dotenv import load_dotenv
from sqlalchemy import create_engine

from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/stuartperry/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
# get db connection
load_dotenv()
db_connection = os.getenv("KICKSTARTER_DB_URL")

# init database engine
engine = create_engine(db_connection)

In [3]:
# Query the database for text data, include only blurbs that have a majority of english language.
query = """
        SELECT  blurb, country, name, slug, state
        FROM kickstarters
        WHERE country in ('US', 'GB', 'CA', 'AU', 'NZ', 'JP', 'SG', 'PL', 'NL', 'IE', 'GR', 'AT')
        """
        #     
text_data = pd.read_sql(query, engine)

# drop na values
text_data = text_data.dropna().reset_index(drop=True)
text_data.head()

Unnamed: 0,blurb,country,name,slug,state
0,A cookbook for lyke minded poeple who love sim...,US,Lyke Minded Cookbook,lyke-minded-cookbook,successful
1,"A Chef's Guide to Hunting, Preparing and Cooki...",US,The Hog Book by Jesse Griffiths,the-hog-book-by-jesse-griffiths,successful
2,A selection of the recipes we love from the co...,GB,Gloria & Lil's: the recipe book.,gloria-and-lils-the-recipe-book,successful
3,A Book Where Interesting Food Things Happen: 1...,CA,The Depanneur Cookbook,the-depanneur-cookbook,successful
4,Learn the homestead kitchen skills you need to...,AU,Real Food Cookbook: A Year in an Off-Grid Kitchen,real-food-cookbook-a-year-in-an-off-grid-kitchen,successful


In [4]:
# init analyzer and calculate sentiment scores for each blurb
analyzer = SentimentIntensityAnalyzer()
text_data['sentiment'] = text_data['blurb'].apply(analyzer.polarity_scores)

# unpack dict in rows into df concat unpacked sentiment scores to original dataframe
sentiment = pd.json_normalize(text_data['sentiment']).set_index(text_data.index)
text_data = pd.concat([text_data, sentiment], axis=1, join='inner').drop(columns=['sentiment'])
text_data.head()

Unnamed: 0,blurb,country,name,slug,state,neg,neu,pos,compound
0,A cookbook for lyke minded poeple who love sim...,US,Lyke Minded Cookbook,lyke-minded-cookbook,successful,0.0,0.595,0.405,0.872
1,"A Chef's Guide to Hunting, Preparing and Cooki...",US,The Hog Book by Jesse Griffiths,the-hog-book-by-jesse-griffiths,successful,0.0,1.0,0.0,0.0
2,A selection of the recipes we love from the co...,GB,Gloria & Lil's: the recipe book.,gloria-and-lils-the-recipe-book,successful,0.0,0.769,0.231,0.6369
3,A Book Where Interesting Food Things Happen: 1...,CA,The Depanneur Cookbook,the-depanneur-cookbook,successful,0.0,0.787,0.213,0.4019
4,Learn the homestead kitchen skills you need to...,AU,Real Food Cookbook: A Year in an Off-Grid Kitchen,real-food-cookbook-a-year-in-an-off-grid-kitchen,successful,0.0,1.0,0.0,0.0
