In [1]:
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *

#2. Import os
#Make the input data files available in the computer directory.
#In this case, running this will list the files

import os
print(os.listdir("/"))
nltk.download('subjectivity')
nltk.download('lexicon')



['$RECYCLE.BIN', '$WinREAgent', 'apache-jena-fuseki-4.3.2', 'Documents and Settings', 'DumpStack.log.tmp', 'EUMONBMP.SYS', 'hiberfil.sys', 'Intel', 'OneDriveTemp', 'pagefile.sys', 'PerfLogs', 'Program Files', 'Program Files (x86)', 'ProgramData', 'Project.log', 'Python', 'Python310', 'Recovery', 'swapfile.sys', 'System Volume Information', 'Users', 'Windows']


[nltk_data] Downloading package subjectivity to
[nltk_data]     C:\Users\adria\AppData\Roaming\nltk_data...
[nltk_data]   Package subjectivity is already up-to-date!
[nltk_data] Error loading lexicon: Package 'lexicon' not found in
[nltk_data]     index


False

In [2]:
#3. Make the creation of data in subjective and objective point of view of the sentences.
#Read 100 obj phases and 100 subj phases.

n_instances = 100
subjective_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
objective_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
len(subjective_docs), len(objective_docs)


(100, 100)

In [3]:
#4.Deviding the 2 kinds of sentences to maintain an evenly balanced distribution.
subjective_docs[0]
objective_docs[0]


(['the',
  'movie',
  'begins',
  'in',
  'the',
  'past',
  'where',
  'a',
  'young',
  'boy',
  'named',
  'sam',
  'attempts',
  'to',
  'save',
  'celebi',
  'from',
  'a',
  'hunter',
  '.'],
 'obj')

In [4]:
train_subjective_docs = subjective_docs[:80]
test_subjective_docs = subjective_docs[80:100]
train_objective_docs = objective_docs[:80]
test_objective_docs = objective_docs[80:100]
training_docs = train_subjective_docs+train_objective_docs
testing_docs = test_subjective_docs+test_objective_docs

sentim_analyzer = SentimentAnalyzer()
total_neg_words = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])


In [5]:
#5. I created unigrams for then the simple strokes of the words that are used, managing the negation.
#For unigram we will have 3 characteristics and all 3 are independent of each other.

#
unigram_feats = sentim_analyzer.unigram_word_feats(total_neg_words, min_freq=4)
len(unigram_feats)

sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)

In [6]:
#6.We are ready to train our classifier on the training set and generate the #assessment results:
#Training classify

training_set = sentim_analyzer.apply_features(training_docs)
test_set = sentim_analyzer.apply_features(testing_docs)

In [7]:
trainer = NaiveBayesClassifier.train
classifier = sentim_analyzer.train(trainer, training_set)

Training classifier


In [8]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [9]:
#7. Test Phrase List Definition
#Manipulate paragraphs

sentences = ["Brad Pitt is smart, handsome, and funny!", #  positive phrases example
             "The movie was good.",
             "The movie was kind of good."
             "However, Johnny Depp is charming ",
             "At least it isn't a horrible movie.",
             "Action movies with Angelina has never been this good like this one.",
             ":) and :D"

]

In [10]:
more_sentences = [
    "Interstellar est imposant, mais il n'est pas impressionnant. ", # negative phrases example
    "The plot was good, but the characters are uncompelling and the dialog is not great.",
    "Most cartoons movies are childish.",
    "A really bad, horrible book.",
    "Drama has never been good.",
    ":( "
 ]

sentences.extend(more_sentences)

In [11]:
paragraph = "It was one of the worst movies I've seen, despite good reviews. \
 Unbelievably bad acting!! Poor direction. VERY poor production. "

In [12]:
from nltk import tokenize
lines_list = tokenize.sent_tokenize(paragraph)
sentences.extend(lines_list)

In [13]:
total = SentimentIntensityAnalyzer()
for sentence in sentences:
     print(sentence)
     loading = total.polarity_scores(sentence)
     for k in sorted(loading):
         print('{0}: {1}, '.format(k, loading[k]), end='')
     print()

Brad Pitt is smart, handsome, and funny!
compound: 0.8439, neg: 0.0, neu: 0.306, pos: 0.694, 
The movie was good.
compound: 0.4404, neg: 0.0, neu: 0.508, pos: 0.492, 
The movie was kind of good.However, Johnny Depp is charming 
compound: 0.5859, neg: 0.0, neu: 0.703, pos: 0.297, 
At least it isn't a horrible movie.
compound: 0.431, neg: 0.0, neu: 0.637, pos: 0.363, 
Action movies with Angelina has never been this good like this one.
compound: 0.7073, neg: 0.0, neu: 0.63, pos: 0.37, 
:) and :D
compound: 0.7925, neg: 0.0, neu: 0.124, pos: 0.876, 
Interstellar est imposant, mais il n'est pas impressionnant. 
compound: 0.0, neg: 0.0, neu: 1.0, pos: 0.0, 
The plot was good, but the characters are uncompelling and the dialog is not great.
compound: -0.7042, neg: 0.327, neu: 0.579, pos: 0.094, 
Most cartoons movies are childish.
compound: -0.296, neg: 0.355, neu: 0.645, pos: 0.0, 
A really bad, horrible book.
compound: -0.8211, neg: 0.791, neu: 0.209, pos: 0.0, 
Drama has never been good.
com