In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Veri setini yükleme
data_path ='/content/drive/My Drive/tweet_emotions.csv'
df = pd.read_csv(data_path)

In [None]:
# Veri setini inceleme
print(df.head())

     tweet_id   sentiment                                            content
0  1956967341       empty  @tiffanylue i know  i was listenin to bad habi...
1  1956967666     sadness  Layin n bed with a headache  ughhhh...waitin o...
2  1956967696     sadness                Funeral ceremony...gloomy friday...
3  1956967789  enthusiasm               wants to hang out with friends SOON!
4  1956968416     neutral  @dannycastillo We want to trade with someone w...


In [None]:
# Duygu dağılımını inceleyin
df['sentiment'].value_counts()

sentiment
neutral       8638
worry         8459
happiness     5209
sadness       5165
love          3842
surprise      2187
fun           1776
relief        1526
hate          1323
empty          827
enthusiasm     759
boredom        179
anger          110
Name: count, dtype: int64

In [None]:
df = df[(df['sentiment'] != 'anger')
 & (df['sentiment'] != 'boredom')
 &(df['sentiment'] != 'enthusiasm')
 & (df['sentiment'] != 'empty')
 & (df['sentiment'] != 'relief')
 & (df['sentiment'] != 'fun')
 & (df['sentiment'] != 'love')]

In [None]:
# Duygu dağılımını inceleyin
df['sentiment'].value_counts()

sentiment
neutral      8638
worry        8459
happiness    5209
sadness      5165
surprise     2187
hate         1323
Name: count, dtype: int64

In [None]:
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwordsS

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
stop_words = set(stopwords.words('english'))

In [None]:
def clean_text(text):
    # Küçük harfe çevir
    text = text.lower()
    # URL'leri kaldır
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    # Özel karakterleri ve sayıları kaldır
    text = re.sub(r'\@\w+|\#','', text)
    text = re.sub(r'\d+', '', text)
    # Noktalama işaretlerini kaldır
    text = re.sub(r'[^\w\s]', '', text)
    # Durak kelimeleri kaldır
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

# Metin verisini temizleyin
df['content'] = df['content'].apply(clean_text)

In [None]:
data_neu = df.query('sentiment == "neutral"')[:1300]
data_wor = df.query('sentiment == "worry"')[:1300]
data_hap = df.query('sentiment == "happiness"')[:1300]
data_sad = df.query('sentiment == "sadness"')[:1300]
data_sup = df.query('sentiment == "surprise"')[:1300]
data_hat = df.query('sentiment == "hate"')[:1300]

df = pd.concat([ data_neu, data_wor, data_hap, data_sad, data_sup, data_hat])

In [None]:
df.shape[0]

7800

In [None]:
# Duygu dağılımını inceleyin
df['sentiment'].value_counts()

sentiment
neutral      1300
worry        1300
happiness    1300
sadness      1300
surprise     1300
hate         1300
Name: count, dtype: int64

In [None]:
# TF-IDF vektörlerine dönüştürme
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
X = tfidf_vectorizer.fit_transform(df['content'])
y = df['sentiment']

In [None]:
# Veriyi eğitim ve test setlerine ayırma
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Naive Bayes modelini oluşturma ve eğitme
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)

In [None]:
# Modeli test etme
y_pred = nb_model.predict(X_test)
# Modelin performansını değerlendirme
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:\n", report)

Accuracy: 0.325
Classification Report:
               precision    recall  f1-score   support

   happiness       0.36      0.47      0.41       252
        hate       0.41      0.41      0.41       252
     neutral       0.26      0.16      0.20       272
     sadness       0.32      0.36      0.34       259
    surprise       0.29      0.28      0.28       254
       worry       0.28      0.28      0.28       271

    accuracy                           0.33      1560
   macro avg       0.32      0.33      0.32      1560
weighted avg       0.32      0.33      0.32      1560



In [None]:
import joblib

# Modeli ve vektörleştiriciyi kaydetme
joblib.dump(nb_model, '/content/drive/My Drive/NavieBayes/NB_model.joblib')
joblib.dump(tfidf_vectorizer, '/content/drive/My Drive/NavieBayes/tfidf_vectorizer.joblib')


['/content/drive/My Drive/Colab Notebooks/YapayZekaDonemSonuUygulaması(Emotion detection from text)/NavieBayes/tfidf_vectorizer.joblib']

In [None]:
import joblib
# Modeli ve vektörleştiriciyi yükleme
loaded_model = joblib.load('/content/drive/My Drive/NavieBayes/NB_model.joblib')
loaded_tfidf_vectorizer = joblib.load('/content/drive/My Drive/NavieBayes/tfidf_vectorizer.joblib')

In [4]:
# Örnek metinleri sınıflandırma
text = "the weather is very nice today"
# Metinleri TF-IDF vektörlerine dönüştürme
text_vector = loaded_tfidf_vectorizer.transform([text])
# Metinleri sınıflandırma
sentiment = loaded_model.predict(text_vector)[0]
print(sentiment)


happiness


In [5]:
# Örnek metinleri sınıflandırma
text = "very nice product"
# Metinleri TF-IDF vektörlerine dönüştürme
text_vector = loaded_tfidf_vectorizer.transform([text])
# Metinleri sınıflandırma
sentiment = loaded_model.predict(text_vector)[0]
print(sentiment)

happiness


In [6]:
# Örnek metinleri sınıflandırma
text = "My stomach hurt from laughing"
# Metinleri TF-IDF vektörlerine dönüştürme
text_vector = loaded_tfidf_vectorizer.transform([text])
# Metinleri sınıflandırma
sentiment = loaded_model.predict(text_vector)[0]
print(sentiment)

worry


In [7]:
# Örnek metinleri sınıflandırma
text = "Happy Mothers Day mummy!"
# Metinleri TF-IDF vektörlerine dönüştürme
text_vector = loaded_tfidf_vectorizer.transform([text])
# Metinleri sınıflandırma
sentiment = loaded_model.predict(text_vector)[0]
print(sentiment)

happiness


In [8]:
# Örnek metinleri sınıflandırma
text = "Bugün harika bir gün!"
# Metinleri TF-IDF vektörlerine dönüştürme
text_vector = loaded_tfidf_vectorizer.transform([text])
# Metinleri sınıflandırma
sentiment = loaded_model.predict(text_vector)[0]
print(sentiment)

happiness


In [9]:
# Örnek metinleri sınıflandırma
text = "I disgust you"
# Metinleri TF-IDF vektörlerine dönüştürme
text_vector = loaded_tfidf_vectorizer.transform([text])
# Metinleri sınıflandırma
sentiment = loaded_model.predict(text_vector)[0]
print(sentiment)

hate
