# Feeling analysis

# 1-Importing Data & Preprocessing

In [10]:
import nltk
import os

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

# Download the list of stop words if you haven't already
nltk.download('stopwords')
nltk.download('punkt')

def preprocess_text(text):
    # Tokenize the text into words
    words = word_tokenize(text.lower())

    # Remove stop words
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in words if word not in stop_words]

    # Perform stemming using Porter Stemmer
    stemmer = PorterStemmer()
    stemmed_words = [stemmer.stem(word) for word in filtered_words]


    return ({ word: True for word in stemmed_words })

def load_training_set():
    training = []
    folder_train_path=r"C:\Users\Hp\Desktop\mes projets\NLP and  text analysis\feeling_analysis\mini_dataset\train"
    folder_train_path_positive=os.path.join(folder_train_path,'positive')
    text_names=os.listdir(folder_train_path_positive)
    for text in text_names:
        text_path=os.path.join(folder_train_path_positive,text)
        with open(text_path, 'r') as file:
            contenu = file.read()
            training.append([preprocess_text(contenu),'positive'])
    folder_train_path_negative=os.path.join(folder_train_path,'negative')
    text_names=os.listdir(folder_train_path_negative)
    for text in text_names:
        text_path=os.path.join(folder_train_path_negative,text)
        with open(text_path, 'r') as file:
            contenu = file.read()
            training.append([preprocess_text(contenu),'negative'])
    return training

training = load_training_set()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Hp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [16]:
print(training[:2])

[[{'bromwel': True, 'high': True, 'cartoon': True, 'comedi': True, '.': True, 'ran': True, 'time': True, 'program': True, 'school': True, 'life': True, ',': True, '``': True, 'teacher': True, "''": True, '35': True, 'year': True, 'teach': True, 'profess': True, 'lead': True, 'believ': True, "'s": True, 'satir': True, 'much': True, 'closer': True, 'realiti': True, 'scrambl': True, 'surviv': True, 'financi': True, 'insight': True, 'student': True, 'see': True, 'right': True, 'pathet': True, "'": True, 'pomp': True, 'petti': True, 'whole': True, 'situat': True, 'remind': True, 'knew': True, 'saw': True, 'episod': True, 'repeatedli': True, 'tri': True, 'burn': True, 'immedi': True, 'recal': True, '.........': True, '..........': True, 'classic': True, 'line': True, ':': True, 'inspector': True, "'m": True, 'sack': True, 'one': True, 'welcom': True, 'expect': True, 'mani': True, 'adult': True, 'age': True, 'think': True, 'far': True, 'fetch': True, 'piti': True, "n't": True, '!': True}, 'po

# Train The Classifier

In [11]:
from nltk.classify import NaiveBayesClassifier

classifier = NaiveBayesClassifier.train(training)

In [12]:
classifier.show_most_informative_features(n=25)

Most Informative Features
                   worst = True           negati : positi =     25.7 : 1.0
                   trier = True           positi : negati =     16.3 : 1.0
                 william = True           positi : negati =     13.7 : 1.0
                     lar = True           positi : negati =     11.7 : 1.0
                  unless = True           negati : positi =     10.3 : 1.0
                   peter = True           negati : positi =      9.7 : 1.0
                    busi = True           positi : negati =      9.4 : 1.0
                   jungl = True           negati : positi =      9.0 : 1.0
                   zombi = True           negati : positi =      9.0 : 1.0
               brilliant = True           positi : negati =      8.3 : 1.0
                 element = True           positi : negati =      7.8 : 1.0
                    crap = True           negati : positi =      7.7 : 1.0
                 fantast = True           positi : negati =      7.7 : 1.0

# Implement Data_test

In [13]:
def load_test_set():
    training = []
    folder_train_path=r"C:\Users\Hp\Desktop\mes projets\NLP and  text analysis\feeling_analysis\mini_dataset\test"
    folder_train_path_positive=os.path.join(folder_train_path,'positive')
    text_names=os.listdir(folder_train_path_positive)
    for text in text_names:
        text_path=os.path.join(folder_train_path_positive,text)
        with open(text_path, 'r') as file:
            contenu = file.read()
            training.append([preprocess_text(contenu),'positive'])
    folder_train_path_negative=os.path.join(folder_train_path,'negative')
    text_names=os.listdir(folder_train_path_negative)
    for text in text_names:
        text_path=os.path.join(folder_train_path_negative,text)
        with open(text_path, 'r') as file:
            contenu = file.read()
            training.append([preprocess_text(contenu),'negative'])
    return training

test = load_test_set()

# Evaluate The Model

In [14]:
import nltk

# Assuming you have already trained your Naive Bayes classifier (classifier) and prepared the test data (test)
# classifier = NaiveBayesClassifier.train(train)
# test = [...]

# Calculate accuracy
accuracy = nltk.classify.accuracy(classifier, test)
print(accuracy)

0.7254901960784313


# Predict the class of a text

In [15]:
text="i'm in a bad situation actually , i can't breathe"
text_to_predict=preprocess_text(text)
# Make the prediction using the trained classifier
predicted_class = classifier.classify(text_to_predict)
print(predicted_class)

negative


# we can use this Model to distuing feedbacks of users in a website

we can start by webscrap a website to exatract users's comments 

In [16]:
import requests
import bs4
from bs4 import BeautifulSoup

url=""
response=requests.get(url)
html_content=response.content
soup=BeautifulSoup(html_content,'lxml')



once the text is extracted, we apply preprocessing_text function to prepare the test step 

In [None]:
#preprocessing code


we can calculate the number of positive and negaive comments, by setting counters 