# YELP REVIEW

In [1]:
import pandas as pd
import nltk
import re
from nltk.tokenize import word_tokenize

In [2]:
data = pd.read_csv("Dataset/Yelp Review.csv")
data = data[["text"]]
data.rename(columns={'text':'Reviews'}, inplace=True)
data.head()

Unnamed: 0,Reviews
0,I was not expecting this concert to be as much...
1,brought mama for a belated birthday present. v...
2,I attended this show last night and was under ...
3,My husband got us tickets and I could not beli...
4,J. Lo was phenomenal!! Such a great show! She ...


In [3]:
data["Tokens"] = data["Reviews"].apply(word_tokenize)
data.head()

Unnamed: 0,Reviews,Tokens
0,I was not expecting this concert to be as much...,"[I, was, not, expecting, this, concert, to, be..."
1,brought mama for a belated birthday present. v...,"[brought, mama, for, a, belated, birthday, pre..."
2,I attended this show last night and was under ...,"[I, attended, this, show, last, night, and, wa..."
3,My husband got us tickets and I could not beli...,"[My, husband, got, us, tickets, and, I, could,..."
4,J. Lo was phenomenal!! Such a great show! She ...,"[J, ., Lo, was, phenomenal, !, !, Such, a, gre..."


In [4]:
data["POS Tagging"] = data["Tokens"].apply(nltk.pos_tag)
data.head()

Unnamed: 0,Reviews,Tokens,POS Tagging
0,I was not expecting this concert to be as much...,"[I, was, not, expecting, this, concert, to, be...","[(I, PRP), (was, VBD), (not, RB), (expecting, ..."
1,brought mama for a belated birthday present. v...,"[brought, mama, for, a, belated, birthday, pre...","[(brought, JJ), (mama, NN), (for, IN), (a, DT)..."
2,I attended this show last night and was under ...,"[I, attended, this, show, last, night, and, wa...","[(I, PRP), (attended, VBD), (this, DT), (show,..."
3,My husband got us tickets and I could not beli...,"[My, husband, got, us, tickets, and, I, could,...","[(My, PRP$), (husband, NN), (got, VBD), (us, P..."
4,J. Lo was phenomenal!! Such a great show! She ...,"[J, ., Lo, was, phenomenal, !, !, Such, a, gre...","[(J, NNP), (., .), (Lo, NNP), (was, VBD), (phe..."


In [5]:
def filter(x, tag):
    ans = set()
    
    for el in x:
        if el[1].startswith(tag):
            ans.add(el)
            
    return list(ans)

data["Nouns"] = data["POS Tagging"].apply(filter, args=("NN",))
data["Adjectives"] = data["POS Tagging"].apply(filter, args=("JJ",))

data.head()

Unnamed: 0,Reviews,Tokens,POS Tagging,Nouns,Adjectives
0,I was not expecting this concert to be as much...,"[I, was, not, expecting, this, concert, to, be...","[(I, PRP), (was, VBD), (not, RB), (expecting, ...","[(Versace, NNP), (daaaays, NNS), (performance,...","[(moves-, JJ), (couple, JJ), (beautiful, JJ), ..."
1,brought mama for a belated birthday present. v...,"[brought, mama, for, a, belated, birthday, pre...","[(brought, JJ), (mama, NN), (for, IN), (a, DT)...","[(mama, NN), (costume, NN), (JLo, NNP), (prese...","[(little, JJ), (overall, JJ), (new, JJ), (ligh..."
2,I attended this show last night and was under ...,"[I, attended, this, show, last, night, and, wa...","[(I, PRP), (attended, VBD), (this, DT), (show,...","[(etc, NN), (music, NN), (beware, NN), (straw,...","[(few, JJ), (first, JJ), (longer, JJR), (nice,..."
3,My husband got us tickets and I could not beli...,"[My, husband, got, us, tickets, and, I, could,...","[(My, PRP$), (husband, NN), (got, VBD), (us, P...","[(...., NNP), (stage, NN), (.The, NNP), (omg, ...","[(right, JJ), (high, JJ), (multiple, JJ), (ama..."
4,J. Lo was phenomenal!! Such a great show! She ...,"[J, ., Lo, was, phenomenal, !, !, Such, a, gre...","[(J, NNP), (., .), (Lo, NNP), (was, VBD), (phe...","[(Lo, NNP), (energy, NN), (seats, NNS), (hits,...","[(phenomenal, JJ), (musical, JJ), (Such, JJ), ..."


In [6]:
def combination(x,y):
    lst = []
    for i in range(len(x)):
        for j in range(len(y)):
            lst.append((re.sub(r'[^A-Za-z]', '', x[i][0].lower()),re.sub(r'[^A-Za-z]', '', y[j][0].lower())))
            
    return lst

data["Pairs"] = data.apply(lambda x: combination(x["Nouns"], x["Adjectives"]), axis=1)

data.head()

Unnamed: 0,Reviews,Tokens,POS Tagging,Nouns,Adjectives,Pairs
0,I was not expecting this concert to be as much...,"[I, was, not, expecting, this, concert, to, be...","[(I, PRP), (was, VBD), (not, RB), (expecting, ...","[(Versace, NNP), (daaaays, NNS), (performance,...","[(moves-, JJ), (couple, JJ), (beautiful, JJ), ...","[(versace, moves), (versace, couple), (versace..."
1,brought mama for a belated birthday present. v...,"[brought, mama, for, a, belated, birthday, pre...","[(brought, JJ), (mama, NN), (for, IN), (a, DT)...","[(mama, NN), (costume, NN), (JLo, NNP), (prese...","[(little, JJ), (overall, JJ), (new, JJ), (ligh...","[(mama, little), (mama, overall), (mama, new),..."
2,I attended this show last night and was under ...,"[I, attended, this, show, last, night, and, wa...","[(I, PRP), (attended, VBD), (this, DT), (show,...","[(etc, NN), (music, NN), (beware, NN), (straw,...","[(few, JJ), (first, JJ), (longer, JJR), (nice,...","[(etc, few), (etc, first), (etc, longer), (etc..."
3,My husband got us tickets and I could not beli...,"[My, husband, got, us, tickets, and, I, could,...","[(My, PRP$), (husband, NN), (got, VBD), (us, P...","[(...., NNP), (stage, NN), (.The, NNP), (omg, ...","[(right, JJ), (high, JJ), (multiple, JJ), (ama...","[(, right), (, high), (, multiple), (, amazing..."
4,J. Lo was phenomenal!! Such a great show! She ...,"[J, ., Lo, was, phenomenal, !, !, Such, a, gre...","[(J, NNP), (., .), (Lo, NNP), (was, VBD), (phe...","[(Lo, NNP), (energy, NN), (seats, NNS), (hits,...","[(phenomenal, JJ), (musical, JJ), (Such, JJ), ...","[(lo, phenomenal), (lo, musical), (lo, such), ..."


In [7]:
temp = data["Pairs"].tolist()
pairs_list = []

for lst in temp:
    pairs_list.extend(lst)

pairs_list

[('versace', 'moves'),
 ('versace', 'couple'),
 ('versace', 'beautiful'),
 ('versace', 'live'),
 ('versace', 'voice'),
 ('versace', 'different'),
 ('versace', 'too'),
 ('versace', 'rican'),
 ('versace', 'gotten'),
 ('versace', 'colorful'),
 ('versace', 'impressed'),
 ('versace', 'worth'),
 ('versace', 'magical'),
 ('versace', 'chronological'),
 ('versace', 'entire'),
 ('versace', 'great'),
 ('versace', 'energetic'),
 ('versace', 'mobile'),
 ('versace', 'much'),
 ('versace', 'sexy'),
 ('daaaays', 'moves'),
 ('daaaays', 'couple'),
 ('daaaays', 'beautiful'),
 ('daaaays', 'live'),
 ('daaaays', 'voice'),
 ('daaaays', 'different'),
 ('daaaays', 'too'),
 ('daaaays', 'rican'),
 ('daaaays', 'gotten'),
 ('daaaays', 'colorful'),
 ('daaaays', 'impressed'),
 ('daaaays', 'worth'),
 ('daaaays', 'magical'),
 ('daaaays', 'chronological'),
 ('daaaays', 'entire'),
 ('daaaays', 'great'),
 ('daaaays', 'energetic'),
 ('daaaays', 'mobile'),
 ('daaaays', 'much'),
 ('daaaays', 'sexy'),
 ('performance', 'moves'

In [8]:
seen = set()
repeated = {}

for pair in pairs_list:
    if pair in seen:
        if pair in repeated:
            repeated[pair] += 1
        else:
            repeated[pair] = 2
    else:
        seen.add(pair)

In [9]:
repeated = {k:v for k,v in sorted(repeated.items(), key=lambda x:x[1], reverse=True)}

repeated

{('show', 'great'): 10,
 ('jlo', 'great'): 9,
 ('jlo', 'good'): 9,
 ('seats', 'good'): 7,
 ('seats', 'great'): 7,
 ('show', 'good'): 6,
 ('jlo', 'huge'): 6,
 ('show', 'amazing'): 6,
 ('fan', 'huge'): 6,
 ('time', 'great'): 6,
 ('jlo', 'beautiful'): 5,
 ('jlo', 'much'): 5,
 ('jlo', 'able'): 5,
 ('jlo', 'more'): 5,
 ('concert', 'good'): 5,
 ('show', 'high'): 5,
 ('show', 'huge'): 5,
 ('jlo', 'old'): 5,
 ('jlo', 'cool'): 5,
 ('time', 'huge'): 5,
 ('stage', 'great'): 5,
 ('vegas', 'great'): 5,
 ('jlo', 'many'): 5,
 ('jlo', 'sexy'): 4,
 ('jlo', 'last'): 4,
 ('seats', 'able'): 4,
 ('concert', 'first'): 4,
 ('concert', 'able'): 4,
 ('show', 'old'): 4,
 ('time', 'beautiful'): 4,
 ('jlo', 'amazing'): 4,
 ('people', 'able'): 4,
 ('people', 'good'): 4,
 ('time', 'able'): 4,
 ('time', 'good'): 4,
 ('jlo', 'hard'): 4,
 ('show', 'best'): 4,
 ('vegas', 'good'): 4,
 ('show', 'first'): 4,
 ('concert', 'bad'): 4,
 ('show', 'bad'): 4,
 ('people', 'bad'): 4,
 ('music', 'more'): 4,
 ('lo', 'amazing'): 4,
 

In [10]:
nouns = set()
top5_pairs = []

for pair in list(repeated.keys()):
    if len(top5_pairs) == 5:
        break
    if pair[0] not in nouns:
        top5_pairs.append(pair)
        nouns.add(pair[0])
        
top5_pairs

[('show', 'great'),
 ('jlo', 'great'),
 ('seats', 'good'),
 ('fan', 'huge'),
 ('time', 'great')]