<a href="https://colab.research.google.com/github/unie12/recommendation/blob/main/app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Dict, Optional
import pandas as pd
from annoy import AnnoyIndex
import numpy as np
import os
import logging
import pickle
from transformers import BertTokenizer, BertModel
from hybrid_recommender import HybridRecommender  # HybridRecommender 클래스 임포트

# 로깅 설정
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI()

class RecommendRequest(BaseModel):
    tmdb_ids: List[str]
    ratings: Optional[Dict[str, float]] = None

class MovieRecommendDTO(BaseModel):
    tmdbId: str
    title: str
    poster_path: str = ""
    popularity: str = "0"
    recommendation_type: str = "hybrid"
    recommendedFrom: Optional[str] = None
    similarity: Optional[float] = None

# 전역 변수 선언
top_50k_movies = None
annoy_index = None
features_df = None
hybrid_model = None
tokenizer = None
bert_model = None

@app.on_event("startup")
async def startup_event():
    global top_50k_movies, annoy_index, features_df, hybrid_model, tokenizer, bert_model
    try:
        # BERT 모델 초기화
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        bert_model = BertModel.from_pretrained('bert-base-uncased')

        # 컨텐츠 기반 모델 로드
        base_path = '/home/ubuntu/model_data/contents'
        files_to_check = ['bert_top_movies.pkl', 'bert_movie_features.pkl', 'bert_movie_similarity.ann']

        for file in files_to_check:
            file_path = os.path.join(base_path, file)
            if not os.path.exists(file_path):
                raise FileNotFoundError(f"File not found: {file_path}")
            logger.info(f"Found file: {file_path}")

        top_50k_movies = pd.read_pickle(os.path.join(base_path, 'bert_top_movies.pkl'))
        features_df = pd.read_pickle(os.path.join(base_path, 'bert_movie_features.pkl'))

        n_features = features_df.shape[1]
        annoy_index = AnnoyIndex(n_features, 'angular')
        annoy_index.load(os.path.join(base_path, 'bert_movie_similarity.ann'))

        # 하이브리드 모델 로드
        hybrid_model_path = '/home/ubuntu/model_data/hybrids/hybrid_model_v4.pkl'
        hybrid_model = load_hybrid_model(hybrid_model_path)

        if hybrid_model is None:
            logger.warning("Hybrid model loading failed, will use content-based only")
        else:
            logger.info("Hybrid model loaded successfully")

        logger.info("All models loaded successfully")

    except Exception as e:
        logger.error(f"Error loading models: {str(e)}")
        raise e

def get_content_recommendations(movie_ids, n=30):
    """컨텐츠 기반 추천 함수"""
    logger.info(f"Received request for movie IDs: {movie_ids}")
    numeric_ids = [int(id_) for id_ in movie_ids]
    matching_movies = top_50k_movies[top_50k_movies['id'].isin(numeric_ids)]
    logger.info(f"Found {len(matching_movies)} matching movies")

    if len(matching_movies) == 0:
        raise HTTPException(status_code=404, detail="No movies found")

    final_recommendations = []  # 리스트로 변경
    movie_info = {}
    excluded_ids = set(movie_ids)
    min_recommendations_per_movie = max(n // len(movie_ids), 5)

    for idx in matching_movies.index:
        source_movie = top_50k_movies.iloc[idx]
        try:
            similar_indices = annoy_index.get_nns_by_item(idx, n*3, include_distances=True)
            indices, distances = similar_indices[0], similar_indices[1]

            for similar_idx, distance in zip(indices, distances):
                similar_movie = top_50k_movies.iloc[similar_idx]
                similar_id = str(similar_movie['id'])

                if similar_id in excluded_ids:
                    continue

                similarity_score = 1 / (1 + distance)
                recommendation = {
                    'tmdbId': similar_id,
                    'title': str(similar_movie['title']),
                    'poster_path': "",
                    'popularity': str(similarity_score),
                    'recommendation_type': 'content',
                    'recommendedFrom': str(source_movie['title']),  # 추천 출처 추가
                    'similarity': similarity_score  # 유사도 점수 추가
                }
                final_recommendations.append((recommendation, similarity_score))

        except Exception as e:
            logger.error(f"Error processing movie: {e}")
            continue

    if not final_recommendations:
        raise HTTPException(status_code=404, detail="No recommendations found")

    # 유사도 점수로 정렬하고 상위 N개 선택
    sorted_recommendations = sorted(
        final_recommendations,
        key=lambda x: x[1],
        reverse=True
    )[:n]

    # 최종 결과에서 유사도 점수 제거
    result = [rec[0] for rec in sorted_recommendations]
    return result


@app.post("/recommend/content", response_model=List[MovieRecommendDTO])
async def content_recommend(request: RecommendRequest):
    """컨텐츠 기반 추천 엔드포인트"""
    if not request.tmdb_ids:
        raise HTTPException(status_code=400, detail="tmdb_ids list is required")

    try:
        n = max(30, 10 * len(request.tmdb_ids))
        recommendations = get_content_recommendations(request.tmdb_ids, n=n)

        if not recommendations:
            raise HTTPException(status_code=404, detail="No recommendations found")

        return [MovieRecommendDTO(**movie) for movie in recommendations]

    except Exception as e:
        logger.error(f"Content recommendation error: {str(e)}")
        raise HTTPException(status_code=500, detail="Internal server error")

@app.post("/recommend/hybrid", response_model=List[MovieRecommendDTO])
async def hybrid_recommend(request: RecommendRequest):
    """하이브리드 추천 엔드포인트"""
    if not request.tmdb_ids or not request.ratings:
        raise HTTPException(status_code=400, detail="Both tmdb_ids and ratings are required")

    try:
        # 평점 데이터 변환
        user_ratings = [
            {"tmdb_id": tmdb_id, "rating": rating}
            for tmdb_id, rating in request.ratings.items()
        ]

        # 하이브리드 추천 생성
        if hybrid_model is None:
            logger.warning("Hybrid model not available, falling back to content-based")
            return await content_recommend(request)

        recommendations = hybrid_model.recommend_realtime(
            user_ratings=user_ratings,
            n_recommendations=30
        )

        if not recommendations:
            logger.warning("No hybrid recommendations found, falling back to content-based")
            return await content_recommend(request)

        # 결과 변환
        result = []
        for rec in recommendations:
            try:
                result.append({
                    'tmdbId': rec['tmdbId'],
                    'title': rec['title'],
                    'poster_path': rec.get('poster_path', ""),
                    'popularity': rec.get('popularity', "0"),
                    'recommendation_type': 'hybrid',
                    'recommendedFrom': rec.get('recommendedFrom', ""),
                    'similarity': float(rec.get('similarity', 0.0))
                })
                logger.info(f"Processed recommendation: {result[-1]}")
            except Exception as e:
                logger.error(f"Error processing recommendation: {str(e)}")
                continue

        logger.info(f"Returning {len(result)} hybrid recommendations")
        return result

    except Exception as e:
        logger.error(f"Hybrid recommendation error: {str(e)}")
        return await content_recommend(request)

def load_hybrid_model(model_path):
    """하이브리드 모델 로드"""
    try:
        logger.info(f"Attempting to load hybrid model from: {model_path}")

        if not os.path.exists(model_path):
            logger.error(f"Hybrid model file not found at: {model_path}")
            return None

        # 모델 상태 로드
        with open(model_path, 'rb') as f:
            model_state = pickle.load(f)

        logger.info("Successfully loaded model state")

        # 매핑 정보 확인
        if 'cf_model_data' in model_state and model_state['cf_model_data'] is not None:
            tmdb_to_movieid = model_state['cf_model_data'].get('tmdb_to_movieid', {})
            movieid_to_tmdb = model_state['cf_model_data'].get('movieid_to_tmdb', {})

            logger.info(f"Number of TMDB to MovieLens mappings: {len(tmdb_to_movieid)}")
            logger.info(f"Number of MovieLens to TMDB mappings: {len(movieid_to_tmdb)}")
            if tmdb_to_movieid:
                logger.info(f"Sample TMDB to MovieLens mappings: {list(tmdb_to_movieid.items())[:5]}")
                        # content_model_data에 annoy_path 추가
        if 'content_model_data' in model_state:
            model_state['content_model_data']['annoy_path'] = '/home/ubuntu/model_data/contents/bert_movie_similarity.ann'


        # 하이브리드 모델 인스턴스 생성
        hybrid_model = HybridRecommender(
            content_model_data=model_state['content_model_data'],
            cf_model_data=model_state['cf_model_data'],
            content_weight=model_state.get('content_weight', 0.5)
        )

        # ID 매핑 복원
        hybrid_model.tmdb_to_movieid = model_state.get('tmdb_to_movieid', {})
        hybrid_model.movieid_to_tmdb = model_state.get('movieid_to_tmdb', {})

        logger.info("Successfully created hybrid model instance")
        return hybrid_model

    except Exception as e:
        logger.error(f"Error loading hybrid model: {str(e)}")
        import traceback
        logger.error(f"Traceback: {traceback.format_exc()}")
        return None


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)