In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, class_likelihood_ratios, classification_report


In [7]:
data = pd.DataFrame([("i love spending time with my friends and family", "positive"),
("that was the best meal i've ever had in my life", "positive"),
("i feel so grateful for everything i have in my life", "positive"),
("i received a promotion at work and i couldn't be happier", "positive"),
("watching a beautiful sunset always fills me with joy", "positive"),
("my partner surprised me with a thoughtful gift and it made my day", "positive"),
("i am so proud of my daughter for graduating with honors", "positive"),
("listening to my favorite music always puts me in a good mood", "positive"),
("i love the feeling of accomplishment after completing a challenging task", "positive"),
("i am excited to go on vacation next week", "positive"),
("i feel so overwhelmed with work and responsibilities", "negative"),
("the traffic during my commute is always so frustrating", "negative"),
("i received a parking ticket and it ruined my day", "negative"),
("i got into an argument with my partner and we're not speaking", "negative"),
("i have a headache and i feel terrible", "negative"),
("i received a rejection letter for the job i really wanted", "negative"),
("my car broke down and it's going to be expensive to fix", "negative"),
("i'm feeling sad because i miss my friends who live far away", "negative"),
("i'm frustrated because i can't seem to make progress on my project", "negative"),
("i'm disappointed because my team lost the game", "negative")],
                    columns=['text', 'sentiment'])

In [9]:
data = data.sample(frac=1).reset_index(drop=True)

In [10]:
data

Unnamed: 0,text,sentiment
0,i'm feeling sad because i miss my friends who ...,negative
1,i got into an argument with my partner and we'...,negative
2,i feel so overwhelmed with work and responsibi...,negative
3,the traffic during my commute is always so fru...,negative
4,watching a beautiful sunset always fills me wi...,positive
5,my car broke down and it's going to be expensi...,negative
6,i received a parking ticket and it ruined my day,negative
7,i feel so grateful for everything i have in my...,positive
8,i am so proud of my daughter for graduating wi...,positive
9,i'm disappointed because my team lost the game,negative


In [11]:
x = data['text']
y = data['sentiment']

In [None]:
# テキストをBag of Wordsに変換するためのクライアント
countvec = CountVectorizer()

In [None]:
# 実際のデータ x をBag of Wordsに変換
countvec_fit = countvec.fit_transform(x)

In [17]:
countvec_fit

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 180 stored elements and shape (20, 118)>

In [None]:
# countvec_fit.toarray()
# 　→ 各文章の「単語ごとの出現回数」を、普通の2次元配列（NumPy array）に変換
# columns=countvec.get_feature_names_out()
# 　→ 列名（語彙の単語一覧）を取得
# pd.DataFrame(...)
# 　→ 文章ごと・単語ごとの出現回数を、単語名付きの表（DataFrame）にする
bag_of_words = pd.DataFrame(countvec_fit.toarray(), columns=countvec.get_feature_names_out())

In [21]:
bag_of_words

Unnamed: 0,accomplishment,after,always,am,an,and,argument,at,away,be,...,vacation,ve,wanted,was,watching,we,week,who,with,work
0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0
1,0,0,0,0,1,1,1,0,0,0,...,0,0,0,0,0,1,0,0,1,0
2,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,1
3,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,1,0
5,0,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
x_train, x_test, y_train, y_test = train_test_split(bag_of_words, y, test_size=0.3, random_state=42)

In [24]:
Ir = LogisticRegression(random_state=1).fit(x_train, y_train)

In [27]:
y_pred = Ir.predict(x_test)

In [28]:
y_pred

array(['positive', 'positive', 'positive', 'positive', 'positive',
       'positive'], dtype=object)

In [29]:
accuracy_score(y_pred, y_test)

0.3333333333333333

In [35]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    negative       0.00      0.00      0.00         4
    positive       0.33      1.00      0.50         2

    accuracy                           0.33         6
   macro avg       0.17      0.50      0.25         6
weighted avg       0.11      0.33      0.17         6



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [36]:
from sklearn.naive_bayes import MultinomialNB

In [37]:
nb = MultinomialNB().fit(x_train, y_train)

In [38]:
y_pred_nb = nb.predict(x_test)
accuracy_score(y_pred_nb, y_test)

0.3333333333333333

In [39]:
print(classification_report(y_test, y_pred_nb))

              precision    recall  f1-score   support

    negative       0.50      0.25      0.33         4
    positive       0.25      0.50      0.33         2

    accuracy                           0.33         6
   macro avg       0.38      0.38      0.33         6
weighted avg       0.42      0.33      0.33         6



In [42]:
from sklearn.linear_model import  SGDClassifier
svm = SGDClassifier().fit(x_train, y_train)
y_pred_svm = svm.predict(x_test)
print(accuracy_score(y_pred_svm, y_test))

0.3333333333333333


In [43]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    negative       0.00      0.00      0.00         4
    positive       0.33      1.00      0.50         2

    accuracy                           0.33         6
   macro avg       0.17      0.50      0.25         6
weighted avg       0.11      0.33      0.17         6



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
