# TMDB Movie Recommender (Testing Notebook)

This notebook fetches 5 movie recommendations based on a user-provided movie title using the TMDB API. Once validated, we can wrap this in a simple UI.

Requirements:
- A TMDB API key stored in the environment variable `TMDB_API_KEY` (or manually pasted in the cells below).
- Internet access enabled.

Flow:
1. Setup and Configuration
2. Install Dependencies
3. Fetch Movie by Title (TMDB)
4. Retrieve Candidate Movies (TMDB)
5. Build Text Features with TF‑IDF
6. Compute Similarities and Recommend Top 5
7. CLI Function for VS Code Terminal
8. Unit Tests (pytest)
9. Save and Inspect Output in VS Code
import os
import sys
import json
import time
import math
import typing as t
from pathlib import Path

import requests
from IPython.display import display, HTML

BASE_URL = "https://api.themoviedb.org/3"
IMAGE_BASE_URL = "https://image.tmdb.org/t/p"
POSTER_SIZE = "w342"

# Optional .env loader if python-dotenv is available later
TMDB_API_KEY = os.environ.get("TMDB_API_KEY")

def load_env_if_present():
    try:
        from dotenv import load_dotenv
        load_dotenv()
        return os.environ.get("TMDB_API_KEY")
    except Exception:
        return None

if not TMDB_API_KEY:
    env_key = load_env_if_present()
    if env_key:
        TMDB_API_KEY = env_key

if not TMDB_API_KEY:
    print("TMDB_API_KEY not found in environment. You can paste it below.")

HEADERS = {"Authorization": f"Bearer {TMDB_API_KEY}"} if TMDB_API_KEY and TMDB_API_KEY.startswith("eyJ") else None

def make_params(extra: dict = None):
    params = {"api_key": TMDB_API_KEY} if not HEADERS else {}
    if extra:
        params.update(extra)
    return params

print("Config ready. BASE_URL:", BASE_URL)
print("Using", "Authorization header" if HEADERS else "api_key query param")
# Install dependencies needed for this notebook
# If already installed, pip will skip. Safe to re-run.
import sys

def pip_install(pkgs):
    # Use -q for quieter logs
    !{sys.executable} -m pip install -q {" ".join(pkgs)}

pkgs = [
    "requests",
    "scikit-learn",
    "pandas",
    "numpy",
    "python-dotenv",
    "pytest",
    "requests-mock",
]
pip_install(pkgs)

print("Dependencies installed.")
import os
import requests
from typing import Optional, Dict, Any

session = requests.Session()
session.headers.update({"Accept": "application/json"})
if HEADERS:
    session.headers.update(HEADERS)


def tmdb_get(path: str, params: Dict[str, Any] = None) -> Dict[str, Any]:
    url = f"{BASE_URL}{path}"
    params = params or {}
    if not HEADERS:
        params.update({"api_key": TMDB_API_KEY})
    try:
        resp = session.get(url, params=params, timeout=20)
        resp.raise_for_status()
        return resp.json()
    except requests.HTTPError as he:
        print("HTTP error:", he)
        try:
            return resp.json()
        except Exception:
            return {"error": str(he)}
    except Exception as e:
        print("Request error:", e)
        return {"error": str(e)}


def search_movie(title: str) -> Optional[Dict[str, Any]]:
    if not title or not title.strip():
        print("Please provide a non-empty title.")
        return None
    data = tmdb_get("/search/movie", params={"query": title.strip()})
    results = data.get("results", []) if isinstance(data, dict) else []
    if not results:
        print("No search results found for:", title)
        return None
    best = results[0]
    movie_id = best.get("id")
    details = tmdb_get(f"/movie/{movie_id}") if movie_id else {}
    genres = details.get("genres", []) if isinstance(details, dict) else []
    return {
        "id": movie_id,
        "title": best.get("title"),
        "overview": details.get("overview") or best.get("overview") or "",
        "genres": [g.get("name") for g in genres if isinstance(g, dict)],
        "release_date": best.get("release_date") or details.get("release_date"),
        "vote_average": details.get("vote_average") or best.get("vote_average"),
        "poster_path": best.get("poster_path") or details.get("poster_path"),
    }

print("search_movie ready.")
from typing import List


def get_recommendations(movie_id: int, page: int = 1) -> List[dict]:
    # Recommendations
    rec = tmdb_get(f"/movie/{movie_id}/recommendations", params={"page": page})
    recs = rec.get("results", []) if isinstance(rec, dict) else []
    # Similar
    sim = tmdb_get(f"/movie/{movie_id}/similar", params={"page": page})
    sims = sim.get("results", []) if isinstance(sim, dict) else []
    # Merge & dedupe by id
    by_id = {}
    for m in recs + sims:
        if m and isinstance(m, dict) and m.get("id"):
            by_id[m["id"]] = m
    merged = list(by_id.values())
    # Enrich details for better text features
    enriched = []
    for m in merged:
        mid = m.get("id")
        det = tmdb_get(f"/movie/{mid}") if mid else {}
        genres = det.get("genres", []) if isinstance(det, dict) else []
        enriched.append({
            "id": mid,
            "title": m.get("title") or det.get("title"),
            "overview": det.get("overview") or m.get("overview") or "",
            "genres": [g.get("name") for g in genres if isinstance(g, dict)],
            "release_date": m.get("release_date") or det.get("release_date"),
            "vote_average": det.get("vote_average") or m.get("vote_average"),
            "poster_path": m.get("poster_path") or det.get("poster_path"),
        })
    return enriched

print("get_recommendations ready.")
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np


def build_corpus(input_movie: dict, candidates: list):
    def doc_from(m: dict) -> str:
        genres = " ".join(m.get("genres", []) or [])
        overview = (m.get("overview") or "").strip()
        return f"{overview} {genres}".strip()

    docs = [doc_from(input_movie)] + [doc_from(m) for m in candidates]
    titles = [input_movie.get("title")] + [m.get("title") for m in candidates]
    ids = [input_movie.get("id")] + [m.get("id") for m in candidates]
    return docs, titles, ids


def tfidf_features(docs: list):
    vec = TfidfVectorizer(stop_words="english", ngram_range=(1, 2), min_df=1)
    X = vec.fit_transform(docs)
    return vec, X


def recommend_top5(input_movie: dict, candidates: list):
    docs, titles, ids = build_corpus(input_movie, candidates)
    vec, X = tfidf_features(docs)
    input_vec = X[0]
    cands = X[1:]
    sims = (cands @ input_vec.T).toarray().ravel()
    order = np.argsort(-sims)  # descending
    top_idx = order[:5]
    top = []
    for i in top_idx:
        m = candidates[i]
        score = float(sims[i])
        top.append({
            "id": m.get("id"),
            "title": m.get("title"),
            "similarity": round(score, 4),
            "tmdb_url": f"https://www.themoviedb.org/movie/{m.get('id')}",
            "release_date": m.get("release_date"),
            "vote_average": m.get("vote_average"),
            "poster_path": m.get("poster_path"),
            "genres": m.get("genres"),
            "overview": m.get("overview"),
        })
    return top

print("TF‑IDF & recommend_top5 ready.")
from IPython.display import HTML


def build_poster_url(path: str, size: str = POSTER_SIZE) -> t.Optional[str]:
    if not path:
        return None
    return f"{IMAGE_BASE_URL}/{size}{path}"


def display_recommendations(input_movie: dict, recs: list):
    items_html = []
    for r in recs:
        poster = build_poster_url(r.get("poster_path"))
        title = r.get("title") or "Untitled"
        year = (r.get("release_date") or "")[0:4]
        rating = r.get("vote_average")
        overview = (r.get("overview") or "").strip()
        genres = ", ".join(r.get("genres") or [])
        tmdb_link = r.get("tmdb_url")
        img_html = f"<img src='{poster}' alt='{title}' style='width:160px;border-radius:6px;'>" if poster else "<div style='width:160px;height:240px;background:#eee;border-radius:6px;display:flex;align-items:center;justify-content:center;color:#999;'>No Image</div>"
        items_html.append(f"""
        <div class='card'>
            <div class='poster'>{img_html}</div>
            <div class='meta'>
                <div class='title'><a href='{tmdb_link}' target='_blank'>{title}</a> {f'({year})' if year else ''}</div>
                <div class='rating'>Rating: {rating if rating is not None else 'N/A'}</div>
                <div class='genres'>{genres}</div>
                <div class='overview'>{overview[:300]}{'...' if len(overview)>300 else ''}</div>
            </div>
        </div>
        """)
    html = f"""
    <style>
    .grid {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(360px, 1fr)); gap: 14px; }}
    .card {{ display: grid; grid-template-columns: 160px 1fr; gap: 12px; padding: 12px; border: 1px solid #ddd; border-radius: 8px; background: #fff; }}
    .title {{ font-weight: 600; font-size: 16px; }}
    .genres {{ color: #555; margin-top: 4px; font-size: 13px; }}
    .rating {{ color: #333; margin-top: 4px; font-size: 13px; }}
    .overview {{ margin-top: 6px; font-size: 13px; line-height: 1.35; color: #333; }}
    </style>
    <div class='grid'>
        {''.join(items_html)}
    </div>
    """
    display(HTML(html))

print("Display helpers ready.")
# Main: ask for a movie title, fetch, compute, and show 5 recommendations
movie_title = input("Enter a movie title: ")

if not TMDB_API_KEY:
    pasted = input("Paste TMDB API KEY (or press Enter to skip): ").strip()
    if pasted:
        os.environ["TMDB_API_KEY"] = pasted
        TMDB_API_KEY = pasted
        if pasted.startswith("eyJ"):
            session.headers.update({"Authorization": f"Bearer {TMDB_API_KEY}"})
            HEADERS = {"Authorization": f"Bearer {TMDB_API_KEY}"}
        else:
            HEADERS = None

if not TMDB_API_KEY:
    raise RuntimeError("TMDB API key required. Set TMDB_API_KEY env var or paste it.")

inp = search_movie(movie_title)
if not inp:
    print("Could not find the input movie. Try another title.")
else:
    cands = get_recommendations(inp["id"]) if inp.get("id") else []
    if not cands:
        print("No candidate movies found from TMDB recommendations/similar.")
    else:
        recs = recommend_top5(inp, cands)
        print(f"Top {len(recs)} recommendations for '{inp['title']}':")
        for i, r in enumerate(recs, 1):
            print(f"{i}. {r['title']} (score={r['similarity']})")
        display_recommendations(inp, recs)

# CLI helper: main(movie_title) prints 5 recommendations for quick terminal runs

def main(movie_title: str):
    inp = search_movie(movie_title)
    if not inp:
        print("Input movie not found.")
        return []
    cands = get_recommendations(inp["id"]) if inp.get("id") else []
    if not cands:
        print("No candidates from TMDB.")
        return []
    recs = recommend_top5(inp, cands)
    for i, r in enumerate(recs, 1):
        print(f"{i}. {r['title']} (score={r['similarity']}) -> {r['tmdb_url']}")
    return recs

# Quick test run example
if __name__ == "__main__":
    # For notebook cell run, emulate argparse-like behavior
    import sys
    if len(sys.argv) > 1:
        title = " ".join(sys.argv[1:])
        main(title)
    else:
        print("Usage: run the cell with arguments, e.g., %run -i tmdb_recommender_test.ipynb 'Inception'")

# Unit tests with pytest and requests-mock
# Note: In notebooks, we keep tests minimal and illustrative.

import pytest
import requests_mock


def test_search_movie_no_title():
    assert search_movie("") is None


def test_search_movie_http_error():
    with requests_mock.Mocker() as m:
        m.get(f"{BASE_URL}/search/movie", status_code=500, json={"status_message":"error"})
        res = tmdb_get("/search/movie")
        assert "error" in res or res.get("status_message") == "error"


def test_recommend_top5_shapes():
    inp = {"id": 1, "title": "A", "overview": "hero saves the world", "genres": ["Action"]}
    cand = [
        {"id": 2, "title": "B", "overview": "hero fights villains", "genres": ["Action"]},
        {"id": 3, "title": "C", "overview": "romantic comedy story", "genres": ["Comedy", "Romance"]},
        {"id": 4, "title": "D", "overview": "space adventure journey", "genres": ["Sci-Fi"]},
        {"id": 5, "title": "E", "overview": "dramatic life events", "genres": ["Drama"]},
        {"id": 6, "title": "F", "overview": "mystery thriller", "genres": ["Thriller"]},
    ]
    recs = recommend_top5(inp, cand)
    assert len(recs) == 5

print("Pytest tests defined.")

# Run tests quietly
!pytest -q