In [6]:
import os
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity

# Stopwords 다운로드
nltk.download('punkt')
nltk.download('stopwords')

# 텍스트 전처리 함수
def preprocess_text(text):
    stop_words = set(stopwords.words('english'))
    words = []
    if isinstance(text, list):
        for phrase in text:
            words.extend([word.lower() for word in nltk.word_tokenize(phrase) if word.isalnum() and word not in stop_words])
    else:
        words = [word.lower() for word in nltk.word_tokenize(text) if word.isalnum() and word not in stop_words]
    return words

# 검색어와 유사한 단어를 포함하는 게임을 찾는 함수
def search_games(query, model, embeddings_df):
    query_words = preprocess_text(query)
    if not query_words:
        print("검색어가 너무 짧습니다.")
        return []

    similar_words = []
    for word in query_words:
        if word in model.wv:
            similar_words.extend([w for w, _ in model.wv.most_similar(word, topn=10)])
    
    similar_words = set(similar_words)  # 유사한 단어들을 집합으로 만듭니다.

    # 유사한 단어를 포함하는 게임 찾기
    results = []
    for _, row in embeddings_df.iterrows():
        game_words = set(row['embedding_words'])
        common_words = game_words.intersection(similar_words)
        if common_words:
            # 유사도 계산
            game_vector = np.mean([model.wv[word] for word in game_words if word in model.wv], axis=0)
            query_vector = np.mean([model.wv[word] for word in query_words if word in model.wv], axis=0)
            similarity = cosine_similarity([query_vector], [game_vector])[0][0]
            results.append({
                'name': row['name'],
                'genre': row['genre'],
                'recommendation_count': row['recommendation_count'],
                'common_words': common_words,
                'similarity': similarity
            })

    results.sort(key=lambda x: -x['recommendation_count'])

    max_similarity = results[0]['similarity'] if results else 1
    for result in results:
        result['similarity_percentage'] = (result['similarity'] / max_similarity) * 100

    return results[:10]

# 검색어 입력 및 결과 출력
def main_search():
    query = input("검색어를 입력하세요: ")

    # 모델 로드 경로 설정
    model_dir = "models"
    model_path = os.path.join(model_dir, 'word2vec_model.bin')
    embed_path = os.path.join(model_dir, 'game_embeddings.pkl')
    
    model = Word2Vec.load(model_path)
    embeddings_df = pd.read_pickle(embed_path)

    top_games = search_games(query, model, embeddings_df)

    print(f"검색어: {query}")
    for game in top_games:
        print(f"Name: {game['name']}, Genre: {game['genre']}, Recommendation Count: {game['recommendation_count']}, Similarity: {game['similarity_percentage']:.2f}%")
        print(f"Common Words: {', '.join(game['common_words'])}")

# 검색 함수 호출
if __name__ == "__main__":
    main_search()


[nltk_data] Downloading package punkt to /home/downtown/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/downtown/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


검색어: battle
Name: Battlefield™ 2042, Genre: Action, Adventure, Casual, Recommendation Count: 208302, Similarity: 100.00%
Common Words: battlefield
Name: Titanfall® 2, Genre: Action, Recommendation Count: 197493, Similarity: -11.16%
Common Words: blast, campaign
Name: Battlefield™ V, Genre: Action, Recommendation Count: 189366, Similarity: 100.58%
Common Words: battlefield, battles
Name: Battlefield™ 1, Genre: Action, Massively Multiplayer, Recommendation Count: 132762, Similarity: 93.51%
Common Words: battlefield, battles
Name: STAR WARS Jedi: Fallen Order™, Genre: Action, Adventure, Recommendation Count: 126185, Similarity: -5.78%
Common Words: battles
Name: ULTRAKILL, Genre: Action, Indie, Early Access, Recommendation Count: 107448, Similarity: 11.43%
Common Words: campaign, attacks
Name: MORDHAU, Genre: Action, Indie, Recommendation Count: 90396, Similarity: 52.20%
Common Words: brutal, battlefield, attacks
Name: FOR HONOR™, Genre: Action, Recommendation Count: 89739, Similarity: 14