In [1]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [3]:
reviews = pd.read_csv('amazon_baby.csv')

In [4]:
reviews.head()

Unnamed: 0,name,review,rating
0,Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3
1,Planetwise Wipe Pouch,it came early and was not disappointed. i love...,5
2,Annas Dream Full Quilt with 2 Shams,Very soft and comfortable and warmer than it l...,5
3,Stop Pacifier Sucking without tears with Thumb...,This is a product well worth the purchase. I ...,5
4,Stop Pacifier Sucking without tears with Thumb...,All of my kids have cried non-stop when I trie...,5


In [5]:
def text2worddict(text):
    worddict = dict()
    for word in re.findall(r'[\w]+', text.lower()):
        if word not in worddict:
            worddict[word] = 1
        else:
            worddict[word] += 1
    return worddict

In [6]:
reviews['word_count'] = reviews['review'].astype(str).apply(text2worddict)

In [7]:
reviews = reviews[reviews['rating'] != 3]

In [8]:
reviews['sentiment'] = (reviews['rating'] >= 4).astype(int)

In [9]:
reviews.head()

Unnamed: 0,name,review,rating,word_count,sentiment
1,Planetwise Wipe Pouch,it came early and was not disappointed. i love...,5,"{'it': 3, 'came': 1, 'early': 1, 'and': 3, 'wa...",1
2,Annas Dream Full Quilt with 2 Shams,Very soft and comfortable and warmer than it l...,5,"{'very': 1, 'soft': 1, 'and': 2, 'comfortable'...",1
3,Stop Pacifier Sucking without tears with Thumb...,This is a product well worth the purchase. I ...,5,"{'this': 4, 'is': 4, 'a': 2, 'product': 2, 'we...",1
4,Stop Pacifier Sucking without tears with Thumb...,All of my kids have cried non-stop when I trie...,5,"{'all': 2, 'of': 1, 'my': 1, 'kids': 2, 'have'...",1
5,Stop Pacifier Sucking without tears with Thumb...,"When the Binky Fairy came to our house, we did...",5,"{'when': 2, 'the': 6, 'binky': 3, 'fairy': 3, ...",1


In [10]:
dictVectorizer = DictVectorizer()
y = reviews['sentiment']
X = dictVectorizer.fit_transform(reviews['word_count'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [11]:
sentiment_model1 = LogisticRegression(verbose=1, max_iter=1000)
sentiment_model1.fit(X_train, y_train)



[LibLinear]

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=1,
                   warm_start=False)

In [12]:
y_preds1 = sentiment_model1.predict(X_test)
print(classification_report(y_test, y_preds1))

              precision    recall  f1-score   support

           0       0.81      0.73      0.77      5362
           1       0.95      0.97      0.96     27989

    accuracy                           0.93     33351
   macro avg       0.88      0.85      0.86     33351
weighted avg       0.93      0.93      0.93     33351



In [13]:
selected_words = ['awesome','great','fantastic','amazing','love','horrible','bad','terrible','awful','wow','hate']

In [14]:
def word_count(word):
    count = reviews['word_count'].apply(dict.get, args=[word,0])
    return count

In [15]:
for word in selected_words:
    reviews[word] = word_count(word)

In [16]:
reviews.head()

Unnamed: 0,name,review,rating,word_count,sentiment,awesome,great,fantastic,amazing,love,horrible,bad,terrible,awful,wow,hate
1,Planetwise Wipe Pouch,it came early and was not disappointed. i love...,5,"{'it': 3, 'came': 1, 'early': 1, 'and': 3, 'wa...",1,0,0,0,0,1,0,0,0,0,0,0
2,Annas Dream Full Quilt with 2 Shams,Very soft and comfortable and warmer than it l...,5,"{'very': 1, 'soft': 1, 'and': 2, 'comfortable'...",1,0,0,0,0,0,0,0,0,0,0,0
3,Stop Pacifier Sucking without tears with Thumb...,This is a product well worth the purchase. I ...,5,"{'this': 4, 'is': 4, 'a': 2, 'product': 2, 'we...",1,0,0,0,0,2,0,0,0,0,0,0
4,Stop Pacifier Sucking without tears with Thumb...,All of my kids have cried non-stop when I trie...,5,"{'all': 2, 'of': 1, 'my': 1, 'kids': 2, 'have'...",1,0,1,0,0,1,0,0,0,0,0,0
5,Stop Pacifier Sucking without tears with Thumb...,"When the Binky Fairy came to our house, we did...",5,"{'when': 2, 'the': 6, 'binky': 3, 'fairy': 3, ...",1,0,1,0,0,0,0,0,0,0,0,0


In [17]:
reviews[selected_words].sum().sort_values(ascending=False)

great        55791
love         41994
bad           4183
awesome       3892
amazing       2628
fantastic     1664
terrible      1146
horrible      1110
hate          1107
awful          687
wow            425
dtype: int64

In [18]:
sel_words_model = LogisticRegression(verbose=1, max_iter=1000)
y_sw=reviews['sentiment']
X_sw=reviews[selected_words]
X_train_sw, X_test_sw, y_train_sw, y_test_sw = train_test_split(X_sw, y_sw, test_size=0.2, random_state=0)
sel_words_model.fit(X_train_sw, y_train_sw)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [19]:
parameters = sel_words_model.coef_
parameters

array([[ 1.17190339,  0.86596357,  0.90039213,  1.05418113,  1.38092967,
        -2.2690806 , -0.97524971, -2.209991  , -2.05579658, -0.0973475 ,
        -1.43351813]])

In [20]:
y_pred_sw = sel_words_model.predict(X_test_sw)

In [21]:
print(classification_report(y_test_sw, y_pred_sw))

              precision    recall  f1-score   support

           0       0.71      0.07      0.13      5362
           1       0.85      0.99      0.92     27989

    accuracy                           0.85     33351
   macro avg       0.78      0.53      0.52     33351
weighted avg       0.83      0.85      0.79     33351



In [22]:
reviews['sentiment'].sum()/reviews['sentiment'].count()

0.8411233448474381

In [23]:
rev1 = reviews[reviews['name']=='Baby Trend Diaper Champ']
rev1.head()

Unnamed: 0,name,review,rating,word_count,sentiment,awesome,great,fantastic,amazing,love,horrible,bad,terrible,awful,wow,hate
312,Baby Trend Diaper Champ,Ok - newsflash. Diapers are just smelly. We\...,4,"{'ok': 1, 'newsflash': 1, 'diapers': 2, 'are':...",1,0,0,0,0,0,0,0,0,0,0,0
314,Baby Trend Diaper Champ,"My husband and I selected the Diaper ""Champ"" m...",1,"{'my': 2, 'husband': 1, 'and': 8, 'i': 3, 'sel...",0,0,0,0,0,0,0,0,0,0,0,0
315,Baby Trend Diaper Champ,Excellent diaper disposal unit. I used it in ...,5,"{'excellent': 1, 'diaper': 1, 'disposal': 1, '...",1,0,0,0,0,0,0,0,0,0,0,0
316,Baby Trend Diaper Champ,We love our diaper champ. It is very easy to ...,5,"{'we': 3, 'love': 1, 'our': 2, 'diaper': 4, 'c...",1,0,0,0,0,1,0,0,0,0,0,0
317,Baby Trend Diaper Champ,Two girlfriends and two family members put me ...,5,"{'two': 2, 'girlfriends': 1, 'and': 12, 'famil...",1,0,0,0,1,0,1,0,0,0,1,0


In [30]:
X2.shape

(298, 2582)

In [36]:
y2_sw = rev1['sentiment']
X2_sw = rev1[selected_words]
y2_pred_sw = sel_words_model.predict_proba(X2_sw)

In [58]:
y2_pred_sw_df = pd.DataFrame(data=y2_pred_sw.transpose()[1], index=rev1['review'])
y2_pred_sw_df.sort_values(by=[0], ascending=False)

Unnamed: 0_level_0,0
review,Unnamed: 1_level_1
"I LOVE LOVE LOVE this product! It is SO much easier to use than the Diaper Genie, (you need a PHD in poopy to figure out how to use the darn thing!) and it even takes the same bags as my kitchen trash can, shich is super convenient, and cost efficient as I can buy them in bulk.The only reason for not rating it a 5 star was that I did have one small problem with it. The foam gasket in the barrell which keeps the poopy smell inside the unit ripped somehow, and it got VERY stinky. HOWEVER, I contacted the manufacturer though their website, and received an email back the same day stating that this was unusual, and that replacement gaskets were on their way to me. They arrived inside of a week and after replacing, it works great again! (They even sent me extras should it happen again)I HIGHLY reccomend this diaper pail over ANY competitors, you will not be sorry!",0.998251
"Love it, love it, love it! This lives up to the hype. You cannot smell diapers-unless of course you don\'t change it or clean out. You can also use any trash bags you already have around the house, which is why we purchased this particular diaper pail. It can be hard to open if you have long nails, but I have found that if you just use the side of your finger instead of your fingertips then it\'s no problem. I have recommended this to all new parents!",0.995853
"I received my Diaper Champ at my baby shower for the birth of my first son 11 months ago. I use it faithfully every day and love the ease and convenience of only having to change the bag once a week! I love that you can use regular kitchen-size trash bags and don\'t need to purchase any special expensive bags. One thing you might want to be careful of, however...make sure you do not throw loose baby wipes into the Diaper Champ or else the flip mechanism can become jammed and after time will not seal properly due to having to pull out wipes that are stuck. I love my diaper champ so much, I have asked for a second one for my upcoming baby shower for my second son.",0.995853
"Let me just say, I LOVE THIS PRODUCT!! I used the diaper genie from the time my daughter was born until the time she was 16 months. That was all I could take. Constantly buying expensive refills, emptying it every couple of days, juggling a wiggly baby while trying to open, lift, push, spin, and close the genie was just too much. Then I was shopping at Babies R Us and in the STORE\'s changing room is the Diaper Champ. It was easy, didn\'t smell, and used regular trash bags. I was sold.After using the Diaper Champ for 2 months now, I am confident I made the right choice. Yes, when it gets too full, you have to change the bag or the weight will get stuck (duh!). Yes, if you don\'t wrap up the poopy wipies in the dirty diaper, you will have to clean poop from the chute (just wrap it up). Yes, poop does smell (not like roses), but my daughter\'s room doesn\'t smell like poop because the Diaper Champ does a great job of containing odor. You do need to disenfect it when you change the bag, but I would recommend that with any product that is a holding tank for feces (you have to disinfect your toilets, too).I have recommended the Diaper Champ over the genie to every one of my friends. It is fantastic!!",0.988857
"I have been using this diaper pail for 41/2 months now and just love it. It is taller than other diaper pails so you don\'t have to bend so far down to dispose of the diaper AND you can use regular kitchen garbage bags. I didn\'t want to have to buy special bags which are more expensive and just one more thing you can run out of. My son is still exclusively breastfed, so I don\'t know if the Diaper Champ will continue doing such a great job once he is on solids, but so far, it has been great; even in the VERY hot weather it has contained all diaper odors! I am very happy with it and would gladly recommend it, especially if you are the least bit tall.",0.988471
I love this diaper pale and wouldn\'t dream of trying anything else. It is great that you can use regular trashbags and not have to buy expensive cartridges. So far it has done great on containing the odor and I like how easy it is to use.,0.988471
Works great - no smells. LOVE that it uses regular garbage bags. Easy to change and keep clean. Great option instead of the Diaper Genie that requires special bags and is so wasteful since it wraps each diaper in plastic.,0.988471
I love this diaper pail. It keeps the diapers from stinking up your room/house. I also love that I can use regualer old kitchen bags and dont\' have to fork over extra bucks for special bags. The reason I gave it 4 stars is that I wish that it held more diapers.,0.983703
I\'ve worked with kids more than half my life. First as a babysitter /nanny and now as a mother. I\'ve used diaper genies and they\'ve all been really smelly. I never had to deal with changing cartridges but I can imagine how annoying it could be to open that stench-filled object and take out the poop sausage. I love my diaper champ. It\'s so easy to use. I never had a problem with it squishing my finger because I always remember that the lid is heavy. No smell escapes from it and I keep it in my bathroom. We\'ll see if I still love it once my DD\'s soiled diapers become stinky.,0.983703
"I have a 10 year old daughter and an 8 month old son. There was nothing like this when I had my first child and I really looked into the champ v\'s genie before I brought this and I have to say that I truly LOVE it. I change the bag once a week. My kids have to share a room so if it was smelly I would hear about it. I just spray with a little Lysol at the time of bag change and it is as good as new. Aside form that it is so easy to use with one hand, anyone with a wiggly baby will appreciate that. In fact I love it so much I am going to take one all the way to Australia with me for my niece since she can\'t get them over there. I am the owner of a used baby store in NJ, (I have a champ there too) and this is one of my hottest items, I have several diaper genies there that just do not sell.You will not be sorry you brought the Diaper Champ.",0.983703
