In [None]:
!pip install vaderSentiment

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import pandas as pd

In [None]:
analyzer = SentimentIntensityAnalyzer()

def analyze_sentiment_vader(text):
    # 确保输入是字符串类型
    if pd.isna(text):
        return 0  # 如果文本是NaN，则返回中性的情绪分数0
    sentiment_score = analyzer.polarity_scores(str(text))
    return sentiment_score['compound']


In [None]:
file_path1 = '/content/drive/My Drive/Colab Notebooks/merged_post.csv'
df1 = pd.read_csv(file_path1)

# 对 'processed_title' 和 'processed_content' 进行情感分析
df1['title_sentiment'] = df1['processed_title'].apply(analyze_sentiment_vader)
df1['content_sentiment'] = df1['processed_content'].apply(analyze_sentiment_vader)


In [None]:
# 对评论进行情感分析
file_path2 = '/content/drive/My Drive/Colab Notebooks/merged_comments.csv'
df2 = pd.read_csv(file_path2)

df2['comment_sentiment'] = df2['processed_comment_body'].apply(analyze_sentiment_vader)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# 设置图表风格
sns.set(style="whitegrid")

# 第一个图：Title Sentiment
plt.figure(figsize=(8, 6))
sns.histplot(df1['title_sentiment'], bins=20, kde=True, color='orange')
plt.title('Title Sentiment Score Distribution')
plt.xlabel('Sentiment Score')
plt.ylabel('Count')
plt.show()

# 第二个图：Content Sentiment
plt.figure(figsize=(8, 6))
sns.histplot(df1['content_sentiment'], bins=20, kde=True, color='skyblue')
plt.title('Content Sentiment Score Distribution')
plt.xlabel('Sentiment Score')
plt.ylabel('Count')
plt.show()

# 第三个图：Comment Sentiment
plt.figure(figsize=(8, 6))
sns.histplot(df2['comment_sentiment'], bins=20, kde=True, color='green')
plt.title('Comment Sentiment Score Distribution')
plt.xlabel('Sentiment Score')
plt.ylabel('Count')
plt.show()

# 第四个图：合并KDE曲线
plt.figure(figsize=(8, 6))
sns.kdeplot(df1['title_sentiment'], shade=True, color='orange', label='Title Sentiment')
sns.kdeplot(df1['content_sentiment'], shade=True, color='skyblue', label='Content Sentiment')
sns.kdeplot(df2['comment_sentiment'], shade=True, color='green', label='Comment Sentiment')
plt.title('Combined Sentiment Score KDE')
plt.xlabel('Sentiment Score')
plt.ylabel('Density')
plt.legend()
plt.show()
