In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import io

# Import data

In [4]:
data = []

with open('./HP11.txt',"r") as myfile:
    for line in myfile:
        line = line.replace('\n', '')
        if line == '':
            continue
        else:
            data.append(str(line))
    
myfile.close()

In [5]:
data

["Harry Potter and the Sorcerer's Stone ",
 'CHAPTER ONE ',
 'THE BOY WHO LIVED ',
 "Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people you'd expect to be involved in anything strange or mysterious, because they just didn't hold with such nonsense. ",
 'Mr. Dursley was the director of a firm called Grunnings, which made drills. He was a big, beefy man with hardly any neck, although he did have a very large mustache. Mrs. Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbors. The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere. ',
 "The Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it. They didn't think they could bear it if anyone found out 

# Preprocessing & cleaning

In [6]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem.porter import PorterStemmer
from nltk.stem.wordnet import WordNetLemmatizer

porter=PorterStemmer()
wordnet_lemmatizer = WordNetLemmatizer()

nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Vivek.Sasikumar\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [7]:
from nltk.corpus import stopwords
nltk.download('stopwords')
stop = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Vivek.Sasikumar\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [8]:
import string
exclude = set(string.punctuation)

In [9]:
def clean(doc):
    stop_free = " ".join([i for i in doc.lower().split() if i not in stop])
    punc_free = "".join(ch for ch in stop_free if ch not in exclude)
    normalized = " ".join(wordnet_lemmatizer.lemmatize(word) for word in punc_free.split())
    return normalized

In [10]:
doc_clean = [clean(line).split() for line in data] 

In [11]:
doc_clean

[['harry', 'potter', 'sorcerer', 'stone'],
 ['chapter', 'one'],
 ['boy', 'lived'],
 ['mr',
  'mr',
  'dursley',
  'number',
  'four',
  'privet',
  'drive',
  'proud',
  'say',
  'perfectly',
  'normal',
  'thank',
  'much',
  'last',
  'people',
  'expect',
  'involved',
  'anything',
  'strange',
  'mysterious',
  'hold',
  'nonsense'],
 ['mr',
  'dursley',
  'director',
  'firm',
  'called',
  'grunnings',
  'made',
  'drill',
  'big',
  'beefy',
  'man',
  'hardly',
  'neck',
  'although',
  'large',
  'mustache',
  'mr',
  'dursley',
  'thin',
  'blonde',
  'nearly',
  'twice',
  'usual',
  'amount',
  'neck',
  'came',
  'useful',
  'spent',
  'much',
  'time',
  'craning',
  'garden',
  'fence',
  'spying',
  'neighbor',
  'dursleys',
  'small',
  'son',
  'called',
  'dudley',
  'opinion',
  'finer',
  'boy',
  'anywhere'],
 ['dursleys',
  'everything',
  'wanted',
  'also',
  'secret',
  'greatest',
  'fear',
  'somebody',
  'would',
  'discover',
  'it',
  'think',
  'could',

# Latent Dirichlet Allocation (LDA) using Gensim

In [12]:
import gensim
from gensim import corpora
dictionary = corpora.Dictionary(doc_clean)

doc_term_matrix = [dictionary.doc2bow(doc) for doc in doc_clean]

In [13]:
doc_term_matrix

[[(0, 1), (1, 1), (2, 1), (3, 1)],
 [(4, 1), (5, 1)],
 [(6, 1), (7, 1)],
 [(8, 1),
  (9, 1),
  (10, 1),
  (11, 1),
  (12, 1),
  (13, 1),
  (14, 1),
  (15, 1),
  (16, 2),
  (17, 1),
  (18, 1),
  (19, 1),
  (20, 1),
  (21, 1),
  (22, 1),
  (23, 1),
  (24, 1),
  (25, 1),
  (26, 1),
  (27, 1),
  (28, 1)],
 [(6, 1),
  (10, 2),
  (16, 2),
  (17, 1),
  (29, 1),
  (30, 1),
  (31, 1),
  (32, 1),
  (33, 1),
  (34, 1),
  (35, 2),
  (36, 1),
  (37, 1),
  (38, 1),
  (39, 1),
  (40, 1),
  (41, 1),
  (42, 1),
  (43, 1),
  (44, 1),
  (45, 1),
  (46, 1),
  (47, 1),
  (48, 1),
  (49, 1),
  (50, 1),
  (51, 1),
  (52, 1),
  (53, 2),
  (54, 1),
  (55, 1),
  (56, 1),
  (57, 1),
  (58, 1),
  (59, 1),
  (60, 1),
  (61, 1),
  (62, 1),
  (63, 1),
  (64, 1)],
 [(1, 5),
  (6, 1),
  (10, 1),
  (16, 3),
  (26, 1),
  (40, 1),
  (41, 4),
  (54, 1),
  (56, 1),
  (57, 1),
  (65, 1),
  (66, 1),
  (67, 1),
  (68, 1),
  (69, 1),
  (70, 1),
  (71, 1),
  (72, 1),
  (73, 1),
  (74, 1),
  (75, 1),
  (76, 1),
  (77, 1),
  (78,

In [14]:
Lda = gensim.models.ldamodel.LdaModel

ldamodel = Lda(doc_term_matrix, num_topics=20, id2word = dictionary, passes=50)

In [15]:
import pyLDAvis
import pyLDAvis.gensim 

In [16]:
lda_display = pyLDAvis.gensim.prepare(ldamodel, doc_term_matrix, dictionary, sort_topics=False)
pyLDAvis.display(lda_display)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return pd.concat([default_term_info] + list(topic_dfs))


In [43]:
for idx, topic in ldamodel.print_topics(-1):
    print('Topic: {} Word: {}'.format(idx, topic))

Topic: 0 Word: 0.034*"harry" + 0.016*"looked" + 0.014*"next" + 0.012*"stared" + 0.008*"one" + 0.008*"like" + 0.008*"quidditch" + 0.008*"bright" + 0.008*"around" + 0.007*"platform"
Topic: 1 Word: 0.020*"harry" + 0.014*"hundred" + 0.012*"answer" + 0.012*"now" + 0.011*"chapter" + 0.011*"table" + 0.010*"ron" + 0.010*"them" + 0.009*"hut" + 0.008*"unicorn"
Topic: 2 Word: 0.031*"let" + 0.026*"went" + 0.023*"snape" + 0.022*"then" + 0.016*"room" + 0.015*"bed" + 0.014*"troll" + 0.013*"in" + 0.012*"could" + 0.009*"easy"
Topic: 3 Word: 0.038*"harry" + 0.015*"turned" + 0.013*"one" + 0.013*"him" + 0.012*"hermione" + 0.010*"looked" + 0.010*"hagrid" + 0.009*"hall" + 0.009*"another" + 0.009*"last"
Topic: 4 Word: 0.022*"slytherin" + 0.020*"said" + 0.019*"gryffindor" + 0.018*"point" + 0.016*"house" + 0.014*"harry" + 0.014*"we" + 0.013*"snape" + 0.012*"nothing" + 0.012*"and"
Topic: 5 Word: 0.037*"harry" + 0.033*"ron" + 0.016*"hermione" + 0.012*"one" + 0.010*"malfoy" + 0.008*"first" + 0.008*"note" + 0.007*

In [47]:
example = doc_clean[100]
print(example,'\n')
example_lda = dictionary.doc2bow(example)

for index, score in sorted(ldamodel[example_lda], key=lambda tup: -1*tup[1]):
    print("Score: {}\t Topic: {}".format(score, ldamodel.print_topic(index, 5)))

['hagrid', 'said', 'dumbledore', 'sounding', 'relieved', 'at', 'last', 'get', 'motorcycle'] 

Score: 0.9049884080886841	 Topic: 0.039*"said" + 0.029*"professor" + 0.019*"dumbledore" + 0.016*"quirrell" + 0.014*"mcgonagall"


# Topic modelling

In [17]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

In [18]:
def preprocess(sent):
    sent = nltk.word_tokenize(sent)
    sent = nltk.pos_tag(sent)
    return sent

In [19]:
sent = preprocess(data[4])
sent

[('Mr.', 'NNP'),
 ('Dursley', 'NNP'),
 ('was', 'VBD'),
 ('the', 'DT'),
 ('director', 'NN'),
 ('of', 'IN'),
 ('a', 'DT'),
 ('firm', 'NN'),
 ('called', 'VBN'),
 ('Grunnings', 'NNP'),
 (',', ','),
 ('which', 'WDT'),
 ('made', 'VBD'),
 ('drills', 'NNS'),
 ('.', '.'),
 ('He', 'PRP'),
 ('was', 'VBD'),
 ('a', 'DT'),
 ('big', 'JJ'),
 (',', ','),
 ('beefy', 'JJ'),
 ('man', 'NN'),
 ('with', 'IN'),
 ('hardly', 'RB'),
 ('any', 'DT'),
 ('neck', 'NN'),
 (',', ','),
 ('although', 'IN'),
 ('he', 'PRP'),
 ('did', 'VBD'),
 ('have', 'VB'),
 ('a', 'DT'),
 ('very', 'RB'),
 ('large', 'JJ'),
 ('mustache', 'NN'),
 ('.', '.'),
 ('Mrs.', 'NNP'),
 ('Dursley', 'NNP'),
 ('was', 'VBD'),
 ('thin', 'JJ'),
 ('and', 'CC'),
 ('blonde', 'NN'),
 ('and', 'CC'),
 ('had', 'VBD'),
 ('nearly', 'RB'),
 ('twice', 'RB'),
 ('the', 'DT'),
 ('usual', 'JJ'),
 ('amount', 'NN'),
 ('of', 'IN'),
 ('neck', 'NN'),
 (',', ','),
 ('which', 'WDT'),
 ('came', 'VBD'),
 ('in', 'IN'),
 ('very', 'RB'),
 ('useful', 'JJ'),
 ('as', 'IN'),
 ('she', 'P

In [20]:
pattern = 'NP: {<DT>?<JJ>*<NN>}'

cp = nltk.RegexpParser(pattern)
cs = cp.parse(sent)
print(cs)

(S
  Mr./NNP
  Dursley/NNP
  was/VBD
  (NP the/DT director/NN)
  of/IN
  (NP a/DT firm/NN)
  called/VBN
  Grunnings/NNP
  ,/,
  which/WDT
  made/VBD
  drills/NNS
  ./.
  He/PRP
  was/VBD
  a/DT
  big/JJ
  ,/,
  (NP beefy/JJ man/NN)
  with/IN
  hardly/RB
  (NP any/DT neck/NN)
  ,/,
  although/IN
  he/PRP
  did/VBD
  have/VB
  a/DT
  very/RB
  (NP large/JJ mustache/NN)
  ./.
  Mrs./NNP
  Dursley/NNP
  was/VBD
  thin/JJ
  and/CC
  (NP blonde/NN)
  and/CC
  had/VBD
  nearly/RB
  twice/RB
  (NP the/DT usual/JJ amount/NN)
  of/IN
  (NP neck/NN)
  ,/,
  which/WDT
  came/VBD
  in/IN
  very/RB
  useful/JJ
  as/IN
  she/PRP
  spent/VBD
  so/RB
  much/JJ
  of/IN
  her/PRP$
  (NP time/NN)
  (NP craning/NN)
  over/IN
  (NP garden/NN)
  fences/NNS
  ,/,
  spying/VBG
  on/IN
  the/DT
  neighbors/NNS
  ./.
  The/DT
  Dursleys/NNP
  had/VBD
  (NP a/DT small/JJ son/NN)
  called/VBN
  Dudley/NNP
  and/CC
  in/IN
  their/PRP$
  (NP opinion/NN)
  there/EX
  was/VBD
  (NP no/DT finer/NN)
  boy/RB
  anywhere

In [21]:
from nltk.chunk import conlltags2tree, tree2conlltags
from pprint import pprint
iob_tagged = tree2conlltags(cs)
pprint(iob_tagged)

[('Mr.', 'NNP', 'O'),
 ('Dursley', 'NNP', 'O'),
 ('was', 'VBD', 'O'),
 ('the', 'DT', 'B-NP'),
 ('director', 'NN', 'I-NP'),
 ('of', 'IN', 'O'),
 ('a', 'DT', 'B-NP'),
 ('firm', 'NN', 'I-NP'),
 ('called', 'VBN', 'O'),
 ('Grunnings', 'NNP', 'O'),
 (',', ',', 'O'),
 ('which', 'WDT', 'O'),
 ('made', 'VBD', 'O'),
 ('drills', 'NNS', 'O'),
 ('.', '.', 'O'),
 ('He', 'PRP', 'O'),
 ('was', 'VBD', 'O'),
 ('a', 'DT', 'O'),
 ('big', 'JJ', 'O'),
 (',', ',', 'O'),
 ('beefy', 'JJ', 'B-NP'),
 ('man', 'NN', 'I-NP'),
 ('with', 'IN', 'O'),
 ('hardly', 'RB', 'O'),
 ('any', 'DT', 'B-NP'),
 ('neck', 'NN', 'I-NP'),
 (',', ',', 'O'),
 ('although', 'IN', 'O'),
 ('he', 'PRP', 'O'),
 ('did', 'VBD', 'O'),
 ('have', 'VB', 'O'),
 ('a', 'DT', 'O'),
 ('very', 'RB', 'O'),
 ('large', 'JJ', 'B-NP'),
 ('mustache', 'NN', 'I-NP'),
 ('.', '.', 'O'),
 ('Mrs.', 'NNP', 'O'),
 ('Dursley', 'NNP', 'O'),
 ('was', 'VBD', 'O'),
 ('thin', 'JJ', 'O'),
 ('and', 'CC', 'O'),
 ('blonde', 'NN', 'B-NP'),
 ('and', 'CC', 'O'),
 ('had', 'VBD', 'O

In [24]:
ne_tree = nltk.ne_chunk(pos_tag(word_tokenize(data[4])))
print(ne_tree)

(S
  (PERSON Mr./NNP)
  (PERSON Dursley/NNP)
  was/VBD
  the/DT
  director/NN
  of/IN
  a/DT
  firm/NN
  called/VBN
  (PERSON Grunnings/NNP)
  ,/,
  which/WDT
  made/VBD
  drills/NNS
  ./.
  He/PRP
  was/VBD
  a/DT
  big/JJ
  ,/,
  beefy/JJ
  man/NN
  with/IN
  hardly/RB
  any/DT
  neck/NN
  ,/,
  although/IN
  he/PRP
  did/VBD
  have/VB
  a/DT
  very/RB
  large/JJ
  mustache/NN
  ./.
  Mrs./NNP
  (PERSON Dursley/NNP)
  was/VBD
  thin/JJ
  and/CC
  blonde/NN
  and/CC
  had/VBD
  nearly/RB
  twice/RB
  the/DT
  usual/JJ
  amount/NN
  of/IN
  neck/NN
  ,/,
  which/WDT
  came/VBD
  in/IN
  very/RB
  useful/JJ
  as/IN
  she/PRP
  spent/VBD
  so/RB
  much/JJ
  of/IN
  her/PRP$
  time/NN
  craning/NN
  over/IN
  garden/NN
  fences/NNS
  ,/,
  spying/VBG
  on/IN
  the/DT
  neighbors/NNS
  ./.
  The/DT
  (ORGANIZATION Dursleys/NNP)
  had/VBD
  a/DT
  small/JJ
  son/NN
  called/VBN
  (PERSON Dudley/NNP)
  and/CC
  in/IN
  their/PRP$
  opinion/NN
  there/EX
  was/VBD
  no/DT
  finer/NN
  boy/RB


In [25]:
import spacy
from spacy import displacy
from spacy.gold import GoldParse, Doc
from spacy.vocab import Vocab
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()

In [26]:
#Create a longer sentence. Since we need a paragraph!

long_sentence = data[4]+data[5]+data[6]+data[7]+data[8]+data[9]

In [27]:
long_sentence

'Mr. Dursley was the director of a firm called Grunnings, which made drills. He was a big, beefy man with hardly any neck, although he did have a very large mustache. Mrs. Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbors. The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere. The Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it. They didn\'t think they could bear it if anyone found out about the Potters. Mrs. Potter was Mrs. Dursley\'s sister, but they hadn\'t met for several years; in fact, Mrs. Dursley pretended she didn\'t have a sister, because her sister and her good-for-nothing husband were as unDursleyish as it was possible to be. The Dursleys shuddered to think what the neighbors would say if the Potters arrived in the street. 

In [48]:
vocab = Vocab(tag_map={'DL': {'pos': 'NOUN'}})
doc = Doc(Vocab(), words=['Harry', 'Dursley', 'Dudley','Petunia' ])
gold = GoldParse(doc, entities=['DL', 'DL', 'DL', 'DL', 'DL'])

In [49]:
article = nlp(long_sentence)
len(article.ents)

33

In [50]:
labels = [x.label_ for x in article.ents]
Counter(labels)

Counter({'PERSON': 15,
         'ORG': 9,
         'DATE': 3,
         'NORP': 1,
         'CARDINAL': 1,
         'ORDINAL': 2,
         'LOC': 1,
         'FAC': 1})

In [51]:
items = [x.text for x in article.ents]
Counter(items).most_common(5)

[('Dursley', 14),
 ('Dudley', 5),
 ('Potters', 4),
 ('Privet Drive', 2),
 ('Potter', 1)]

In [52]:
sentences = [x for x in article.sents]
print(sentences[5])

They didn't think they could bear it if anyone found out about the Potters.


In [53]:
displacy.render(nlp(str(sentences)), jupyter=True, style='ent')

In [56]:
displacy.render(nlp(str(sentences[0])), style='dep', jupyter = True, options = {'distance':200})

In [35]:
[(x.orth_,x.pos_, x.lemma_) for x in [y 
                                      for y
                                      in nlp(str(sentences)) 
                                      if not y.is_stop and y.pos_ != 'PUNCT']]

[('Mr.', 'PROPN', 'mr.'),
 ('Dursley', 'PROPN', 'dursley'),
 ('director', 'NOUN', 'director'),
 ('firm', 'NOUN', 'firm'),
 ('called', 'VERB', 'call'),
 ('Grunnings', 'PROPN', 'grunnings'),
 ('drills', 'NOUN', 'drill'),
 ('He', 'PRON', '-PRON-'),
 ('big', 'ADJ', 'big'),
 ('beefy', 'ADJ', 'beefy'),
 ('man', 'NOUN', 'man'),
 ('hardly', 'ADV', 'hardly'),
 ('neck', 'NOUN', 'neck'),
 ('large', 'ADJ', 'large'),
 ('mustache', 'NOUN', 'mustache'),
 ('Mrs.', 'PROPN', 'mrs.'),
 ('Dursley', 'PROPN', 'dursley'),
 ('thin', 'ADJ', 'thin'),
 ('blonde', 'ADJ', 'blonde'),
 ('nearly', 'ADV', 'nearly'),
 ('twice', 'ADJ', 'twice'),
 ('usual', 'ADJ', 'usual'),
 ('neck', 'NOUN', 'neck'),
 ('came', 'VERB', 'come'),
 ('useful', 'ADJ', 'useful'),
 ('spent', 'VERB', 'spend'),
 ('time', 'NOUN', 'time'),
 ('craning', 'VERB', 'crane'),
 ('garden', 'NOUN', 'garden'),
 ('fences', 'NOUN', 'fence'),
 ('spying', 'VERB', 'spy'),
 ('neighbors', 'NOUN', 'neighbor'),
 ('The', 'DET', 'the'),
 ('Dursleys', 'PROPN', 'dursleys'

In [36]:
dict([(str(x), x.label_) for x in nlp(str(sentences[:])).ents])

{'Dursley': 'PERSON',
 'Dudley': 'ORG',
 'Potters': 'ORG',
 'Potter': 'PERSON',
 'several years': 'DATE',
 'unDursleyish': 'NORP',
 'Tuesday': 'DATE',
 'half past eight': 'DATE',
 'four': 'CARDINAL',
 'first': 'ORDINAL',
 'second': 'ORDINAL',
 'Privet Drive': 'FAC'}

In [37]:
displacy.render(nlp(str(sentences)), jupyter=True, style='ent')

In [62]:
from textblob import TextBlob

for sentence in data[4:8]:
    print(sentence,'\n')
    senti = TextBlob(sentence)
    print(senti.sentiment,'\n')

Mr. Dursley was the director of a firm called Grunnings, which made drills. He was a big, beefy man with hardly any neck, although he did have a very large mustache. Mrs. Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbors. The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere.  

Sentiment(polarity=-0.02485347985347985, subjectivity=0.36144688644688644) 

The Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it. They didn't think they could bear it if anyone found out about the Potters. Mrs. Potter was Mrs. Dursley's sister, but they hadn't met for several years; in fact, Mrs. Dursley pretended she didn't have a sister, because her sister and her good-for-nothing husband were as unDursleyish as it was possible to be. The Dursleys shuddered t

In [64]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

for sentence in data[4:8]:
    print(sentence,'\n')
    senti = analyzer.polarity_scores(sentence)
    print(senti,'\n')

Mr. Dursley was the director of a firm called Grunnings, which made drills. He was a big, beefy man with hardly any neck, although he did have a very large mustache. Mrs. Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbors. The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere.  

{'neg': 0.026, 'neu': 0.936, 'pos': 0.038, 'compound': 0.2484} 

The Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it. They didn't think they could bear it if anyone found out about the Potters. Mrs. Potter was Mrs. Dursley's sister, but they hadn't met for several years; in fact, Mrs. Dursley pretended she didn't have a sister, because her sister and her good-for-nothing husband were as unDursleyish as it was possible to be. The Dursleys shuddered to think what

In [68]:
#from examples in nltk

from nltk.sentiment.vader import SentimentIntensityAnalyzer

sentences = ["VADER is smart, handsome, and funny.",
             "VADER is smart, handsome, and funny!", 
             "VADER is very smart, handsome, and funny.", 
             "VADER is VERY SMART, handsome, and FUNNY.", 
             "VADER is VERY SMART, handsome, and FUNNY!!!",
             "VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!",
             "VADER is SHIT, really ugly, and repugnant!!!"]

analyzer = SentimentIntensityAnalyzer()

for sentence in sentences:
    print(sentence)
    senti = analyzer.polarity_scores(sentence)
    print(senti,'\n')

VADER is smart, handsome, and funny.
{'neg': 0.0, 'neu': 0.254, 'pos': 0.746, 'compound': 0.8316} 

VADER is smart, handsome, and funny!
{'neg': 0.0, 'neu': 0.248, 'pos': 0.752, 'compound': 0.8439} 

VADER is very smart, handsome, and funny.
{'neg': 0.0, 'neu': 0.299, 'pos': 0.701, 'compound': 0.8545} 

VADER is VERY SMART, handsome, and FUNNY.
{'neg': 0.0, 'neu': 0.246, 'pos': 0.754, 'compound': 0.9227} 

VADER is VERY SMART, handsome, and FUNNY!!!
{'neg': 0.0, 'neu': 0.233, 'pos': 0.767, 'compound': 0.9342} 

VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!
{'neg': 0.0, 'neu': 0.294, 'pos': 0.706, 'compound': 0.9469} 

VADER is SHIT, really ugly, and repugnant!!!
{'neg': 0.638, 'neu': 0.362, 'pos': 0.0, 'compound': -0.869} 

