In [None]:
import joblib
from bertopic import BERTopic

nations = ['korea', 'china', 'japan']
emotion_labels = ['joy', 'sadness', 'anger', 'fear', 'disgust', 'surprise', 'neutral']

models = {}

for nation in nations:
    for emotion in emotion_labels:
        model_filename = f"/Users/gyungmin/VS_PRJ/DH/models/{nation}_{emotion}_bertopic_model"
        try:
            model = joblib.load(model_filename)
            models[f"{nation}_{emotion}"] = model
            print(f"Loaded model: {model_filename}")
        except FileNotFoundError:
            print(f"Model file not found: {model_filename}")

# 모델 확인
print(models.keys())



In [None]:
from matplotlib import pyplot as plt
# Jupyter Notebook에서 그래프를 인라인으로 표시
%matplotlib inline


for nation in nations:
    for emotion in emotion_labels:
        nation_emotion_model = models.get(f"{nation}_{emotion}")
        if nation_emotion_model:
            fig = nation_emotion_model.visualize_barchart(top_n_topics=20)
            fig.update_layout(title_text=f"{nation} {emotion} Word Scores")
            fig.show()
        else:
            print("{nation}_{emotion} model is not loaded.")

In [None]:
import joblib
from matplotlib import pyplot as plt
from umap import UMAP
# Jupyter Notebook에서 그래프를 인라인으로 표시
%matplotlib inline

# 모델의 UMAP 설정을 조정하여 오류 해결
umap_model = UMAP(n_neighbors=15, n_components=2, metric='cosine')

for nation in nations:
    for emotion in emotion_labels:
        nation_emotion_model = models.get(f"{nation}_{emotion}")
        if nation_emotion_model:
            # 모델의 UMAP 파라미터를 조정하여 오류를 방지
            nation_emotion_model.umap_model = umap_model
            
            try:
                fig = nation_emotion_model.visualize_topics()
                fig.update_layout(title_text=f"{nation} {emotion} visualize_topics")
                fig.show()
            except Exception as e:
                print(f"Error visualizing {nation}_{emotion}: {e}")
        else:
            print(f"{nation}_{emotion} model is not loaded.")

In [None]:
import joblib
from matplotlib import pyplot as plt
from umap import UMAP
# Jupyter Notebook에서 그래프를 인라인으로 표시
%matplotlib inline

# 모델의 UMAP 설정을 조정하여 오류 해결
umap_model = UMAP(n_neighbors=15, n_components=2, metric='cosine')

for nation in nations:
    for emotion in emotion_labels:
        nation_emotion_model = models.get(f"{nation}_{emotion}")
        if nation_emotion_model:
            # 모델의 UMAP 파라미터를 조정하여 오류를 방지
            nation_emotion_model.umap_model = umap_model
            
            try:
                fig = nation_emotion_model.visualize_hierarchy(top_n_topics=20)
                fig.update_layout(title_text=f"{nation} {emotion} visualize_hierarchy")
                fig.show()
            except Exception as e:
                print(f"Error visualizing {nation}_{emotion}: {e}")
        else:
            print(f"{nation}_{emotion} model is not loaded.")

In [None]:
import joblib
from matplotlib import pyplot as plt
from umap import UMAP
# Jupyter Notebook에서 그래프를 인라인으로 표시
%matplotlib inline

# 모델의 UMAP 설정을 조정하여 오류 해결
umap_model = UMAP(n_neighbors=15, n_components=2, metric='cosine')

for nation in nations:
    for emotion in emotion_labels:
        nation_emotion_model = models.get(f"{nation}_{emotion}")
        if nation_emotion_model:
            # 모델의 UMAP 파라미터를 조정하여 오류를 방지
            nation_emotion_model.umap_model = umap_model
            
            try:
                fig = nation_emotion_model.visualize_heatmap(top_n_topics=20)
                fig.update_layout(title_text=f"{nation} {emotion} visualize_heatmap")
                fig.show()
            except Exception as e:
                print(f"Error visualizing {nation}_{emotion}: {e}")
        else:
            print(f"{nation}_{emotion} model is not loaded.")

In [None]:
import joblib
from matplotlib import pyplot as plt
from umap import UMAP
# Jupyter Notebook에서 그래프를 인라인으로 표시
%matplotlib inline

# 모델의 UMAP 설정을 조정하여 오류 해결
umap_model = UMAP(n_neighbors=15, n_components=2, metric='cosine')

for nation in nations:
    for emotion in emotion_labels:
        nation_emotion_model = models.get(f"{nation}_{emotion}")
        if nation_emotion_model:
            # 모델의 UMAP 파라미터를 조정하여 오류를 방지
            nation_emotion_model.umap_model = umap_model
            
            try:
                fig = nation_emotion_model.visualize_distribution(nation_emotion_model.probabilities_[0], min_probability=0.0015)
                fig.update_layout(title_text=f"{nation} {emotion} visualize_distribution")
                fig.show()
            except Exception as e:
                print(f"Error visualizing {nation}_{emotion}: {e}")
        else:
            print(f"{nation}_{emotion} model is not loaded.")

In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from umap import UMAP
from bertopic import BERTopic
import joblib
import tqdm as notebook_tqdm

# 모델 로드
nations = ['korea', 'china', 'japan']
emotion_labels = ['joy', 'sadness', 'anger', 'fear', 'disgust', 'surprise', 'neutral']
models = {}

for nation in nations:
    for emotion in emotion_labels:
        model_filename = f"/Users/gyungmin/VS_PRJ/DH/models/lemma_{nation}_{emotion}_bertopic_model"
        try:
            model = joblib.load(model_filename)
            models[f"{nation}_{emotion}"] = model
            print(f"Loaded model: {model_filename}")
        except FileNotFoundError:
            print(f"Model file not found: {model_filename}")

# 모델 확인
print(models.keys())

# SentenceTransformer 모델 초기화
sentence_model = SentenceTransformer("all-MiniLM-L6-v2")

# 데이터프레임 로드 (pickle 파일 로드)
korea_df = pd.read_pickle("/Users/gyungmin/VS_PRJ/DH/models/reddit_korea.pickle")
china_df = pd.read_pickle("/Users/gyungmin/VS_PRJ/DH/models/reddit_china.pickle")
japan_df = pd.read_pickle("/Users/gyungmin/VS_PRJ/DH/models/reddit_japan.pickle")

# '삭제된', '삭제됨'이 포함된 내용 제거
def remove_deleted_content(df):
    return df[~df['body'].str.contains("삭제된|삭제됨")]

korea_df = remove_deleted_content(korea_df)
china_df = remove_deleted_content(china_df)
japan_df = remove_deleted_content(japan_df)

# 각 국가별 데이터 프레임 처리
dataframes = [('korea', korea_df), ('china', china_df), ('japan', japan_df)]
start_date = '2018-01-01'
end_date = '2023-12-31'
valid_labels = ['joy', 'sadness', 'anger', 'fear', 'disgust', 'surprise', 'neutral']

for nation, df in dataframes:
    df['created_utc'] = pd.to_datetime(df['created_utc'], errors='coerce')
    df = df[(df['created_utc'] >= start_date) & (df['created_utc'] <= end_date)]
    df['year_month'] = df['created_utc'].dt.to_period('M')
    df['weighted_score'] = df['score']
    df = df[df['label'].isin(valid_labels)]
    
    if df.empty:
        print(f"No data for {nation} with label {valid_labels}")
        continue
    
    # 감정별로 모델을 불러와서 시각화
    for emotion in valid_labels:
        nation_emotion_model = models.get(f"{nation}_{emotion}")
        if nation_emotion_model:
            emotion_df = df[df['label'] == emotion]
            if not emotion_df.empty:
                sorted_df = emotion_df.sort_values(by='score', ascending=False)
                text_data = sorted_df['body'].tolist()
                embeddings = sentence_model.encode(text_data, show_progress_bar=False)
                
                # 디버깅: 텍스트 데이터와 임베딩 길이 확인
                if len(text_data) != len(embeddings):
                    print(f"Error: Text data length {len(text_data)} and embeddings length {len(embeddings)} do not match for {nation} {emotion}")
                    continue
                
                # 차원 축소 수행
                try:
                    umap_model = UMAP(n_neighbors=10, n_components=2, min_dist=0.0, metric='cosine')
                    reduced_embeddings = umap_model.fit_transform(embeddings)
                    
                    # 기존 모델을 재설정
                    nation_emotion_model = BERTopic(language='multilingual', nr_topics='auto', calculate_probabilities=True)
                    nation_emotion_model.fit_transform(sorted_df['body'].tolist(), embeddings)
                except Exception as e:
                    print(f"UMAP reduction error for {nation} {emotion}: {e}")
                    continue
                
                # 문서 시각화
                try:
                    fig = nation_emotion_model.visualize_documents(text_data, reduced_embeddings=reduced_embeddings)
                    fig.update_layout(title_text=f"{nation} {emotion} visualize_topics")
                    fig.show()
                except IndexError as e:
                    print(f"Error visualizing {nation} {emotion}: {e}")
        else:
            print(f"{nation}_{emotion} model is not loaded.")

