In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
file_path = '/content/drive/My Drive/tweet_emotions.csv'
df = pd.read_csv(file_path)

In [None]:
# İlk birkaç satırı görüntüleyin
df.head()

Unnamed: 0,tweet_id,sentiment,content
0,1956967341,empty,@tiffanylue i know i was listenin to bad habi...
1,1956967666,sadness,Layin n bed with a headache ughhhh...waitin o...
2,1956967696,sadness,Funeral ceremony...gloomy friday...
3,1956967789,enthusiasm,wants to hang out with friends SOON!
4,1956968416,neutral,@dannycastillo We want to trade with someone w...


In [None]:
df.shape[0]

40000

In [None]:
# Veri setinin yapısını görüntüleyin
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40000 entries, 0 to 39999
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   tweet_id   40000 non-null  int64 
 1   sentiment  40000 non-null  object
 2   content    40000 non-null  object
dtypes: int64(1), object(2)
memory usage: 937.6+ KB


In [None]:
# Duygu dağılımını inceleyin
df['sentiment'].value_counts()

sentiment
neutral       8638
worry         8459
happiness     5209
sadness       5165
love          3842
surprise      2187
fun           1776
relief        1526
hate          1323
empty          827
enthusiasm     759
boredom        179
anger          110
Name: count, dtype: int64

In [None]:
# Eksik verileri kontrol edin
print(df.isnull().sum())

tweet_id     0
sentiment    0
content      0
dtype: int64


In [None]:
# Tekrarlanan satırları kontrol edin
print(df.duplicated().sum())

0


In [None]:
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
stop_words = set(stopwords.words('english'))

In [None]:
def clean_text(text):
    # Küçük harfe çevir
    text = text.lower()
    # URL'leri kaldır
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    # Özel karakterleri ve sayıları kaldır
    text = re.sub(r'\@\w+|\#','', text)
    text = re.sub(r'\d+', '', text)
    # Noktalama işaretlerini kaldır
    text = re.sub(r'[^\w\s]', '', text)
    # Durak kelimeleri kaldır
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

# Metin verisini temizleyin
df['content'] = df['content'].apply(clean_text)

In [None]:
df.head()

Unnamed: 0,tweet_id,sentiment,content
0,1956967341,empty,know listenin bad habit earlier started freaki...
1,1956967666,sadness,layin n bed headache ughhhhwaitin call
2,1956967696,sadness,funeral ceremonygloomy friday
3,1956967789,enthusiasm,wants hang friends soon
4,1956968416,neutral,want trade someone houston tickets one


In [None]:
# Metin ve etiket sütunlarını seçin
X = df['content']
y = df['sentiment']

# Eğitim ve test setlerine bölün
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# TF-IDF vektörleştirici oluşturun
tfidf = TfidfVectorizer(max_features=5000)

# Eğitim verisini vektörleştirin
X_train_tfidf = tfidf.fit_transform(X_train)

# Test verisini vektörleştirin
X_test_tfidf = tfidf.transform(X_test)

In [None]:
# Lojistik regresyon modelini oluşturun
model = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')

# Modeli eğitin
model.fit(X_train_tfidf, y_train)


In [None]:
# Test verisi ile tahmin yapın
y_pred = model.predict(X_test_tfidf)

# Doğruluk oranını hesaplayın
accuracy = accuracy_score(y_test, y_pred)
print(f'Doğruluk Oranı: {accuracy}')

# Sınıflandırma raporu oluşturun
print(classification_report(y_test, y_pred))

Doğruluk Oranı: 0.346625


  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       anger       0.00      0.00      0.00        19
     boredom       0.00      0.00      0.00        31
       empty       0.33      0.01      0.01       162
  enthusiasm       0.00      0.00      0.00       163
         fun       0.15      0.02      0.04       338
   happiness       0.34      0.37      0.35      1028
        hate       0.49      0.16      0.25       268
        love       0.50      0.38      0.43       762
     neutral       0.33      0.56      0.42      1740
      relief       0.35      0.02      0.04       352
     sadness       0.34      0.25      0.29      1046
    surprise       0.32      0.05      0.09       425
       worry       0.33      0.48      0.39      1666

    accuracy                           0.35      8000
   macro avg       0.27      0.18      0.18      8000
weighted avg       0.34      0.35      0.31      8000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
import joblib

# Modeli kaydedin
joblib.dump(model, '/content/drive/My Drive/LogisticRegresyon/logistic_regression_model.pkl')

# Vektörleştiriciyi kaydedin
joblib.dump(tfidf, '/content/drive/My Drive/LogisticRegresyon/tfidf_vectorizer.pkl')


['/content/drive/My Drive/Colab Notebooks/YapayZekaDonemSonuUygulaması(Emotion detection from text)/LogisticRegresyon/tfidf_vectorizer.pkl']

In [None]:
import joblib
# Modeli ve vektörleştiriciyi yükleyin
model = joblib.load('/content/drive/My Drive/LogisticRegresyon/logistic_regression_model.pkl')
tfidf = joblib.load('/content/drive/My Drive/LogisticRegresyon/tfidf_vectorizer.pkl')

In [8]:
# Yeni bir metin verisini tahmin edin
new_text = ["the weather is very nice today"]
new_text_tfidf = tfidf.transform(new_text)
prediction = model.predict(new_text_tfidf)
print(prediction)

['happiness']


In [9]:
# Yeni bir metin verisini tahmin edin
new_text = ["very nice product"]
new_text_tfidf = tfidf.transform(new_text)
prediction = model.predict(new_text_tfidf)
print(prediction)

['happiness']


In [10]:
# Yeni bir metin verisini tahmin edin
new_text = ["My stomach hurt from laughing"]
new_text_tfidf = tfidf.transform(new_text)
prediction = model.predict(new_text_tfidf)
print(prediction)

['worry']


In [11]:
# Yeni bir metin verisini tahmin edin
new_text = ["Happy Mothers Day mummy"]
new_text_tfidf = tfidf.transform(new_text)
prediction = model.predict(new_text_tfidf)
print(prediction)

['love']


In [12]:
# Yeni bir metin verisini tahmin edin
new_text = ["I disgust you"]
new_text_tfidf = tfidf.transform(new_text)
prediction = model.predict(new_text_tfidf)
print(prediction)

['neutral']
