In [23]:
import os 
import requests
import pandas as pd

API_KEY = os.getenv("TMDB_API_KEY", "981245faa8d227df3246bc1b0ccc25dd")
NOW_LANG = "ko-KR"
PAGE = 1
GENRE_LANG = "ko"
POSTER_BASE = "https://image.tmdb.org/t/p/w500" 
BACKDROP_BASE = "https://image.tmdb.org/t/p/w780" # 바탕화면이라 w780으로 고정!

# genres 데이터 적용하기
def genre_ids_to_names(ids, genre_map: dict) -> str :
    # [28, 53, 80] -> '액션|스릴러|범죄'

    if not isinstance(ids, list) :
        return ""

    names = [genre_map.get(i, str(i)) for i in ids] # ["액션", "스릴러", "범죄"]
    return "|".join(names) # '액션|스릴러|범죄'

# 텍스트 개행 수정 함수
def clean_one_line(text: str) -> str :
    if not text : 
        return ""
    text = text.replace("\n", " ")
    return " ".join(text.split())

# genres 데이터 가져오기
def fetch_genre_map(api_key: str, language: str = "ko") -> dict :
    url = "https://api.themoviedb.org/3/genre/movie/list"
    params = {"api_key": api_key, "language": language}
    res = requests.get(url, params=params, timeout=20)
    res.raise_for_status()

    genres = res.json().get("genres", [])
    return {g["id"] : g["name"] for g in genres} # {28 : "액션", 12: "모험", ...}

# genres 값 찾아오기
genre_map = fetch_genre_map(API_KEY, GENRE_LANG)

# now playing 데이터 가져오기
url = "https://api.themoviedb.org/3/movie/now_playing"
params = {"api_key": API_KEY, "language": NOW_LANG, "page": PAGE}

res = requests.get(url, params=params, timeout=20)
res.raise_for_status()

data = res.json()

rows = []

for m in data.get("results", []) :
    poster_path = m.get("poster_path")
    backdrop_path = m.get("backdrop_path")

    poster_url = f"{POSTER_BASE}{poster_path}" if poster_path else ""
    backdrop_url = f"{BACKDROP_BASE}{backdrop_path}" if backdrop_path else ""

    genre_ids = m.get("genre_ids", [])

    rows.append({
        "id": m.get("id"),
        "title": m.get("title"),
        "original_title": m.get("original_title"),
        "overview": clean_one_line(m.get("overview")),
        "genre_ids":  ",".join(map(str, genre_ids)) if isinstance(genre_ids, list) else "",
        "genre_names": genre_ids_to_names(genre_ids, genre_map),
        "adult": m.get("adult"),
        "original_language": m.get("original_language"),
        "vote_average": m.get("vote_average"),
        "popularity": m.get("popularity"),
        "vote_count": m.get("vote_count"),
        "poster_url": poster_url,
        "backdrop_url": backdrop_url
    })

df = pd.DataFrame(rows, columns=[
    "id",
    "title",
    "original_title",
    "overview",
    "genre_ids",
    "genre_names",
    "adult",
    "original_language",
    "vote_average",
    "popularity",
    "vote_count",
    "poster_url",
    "backdrop_url"
])
out_path = "tmdb_now_playing.csv"
df.to_csv(out_path, index=False, encoding="utf-8-sig")
print(f"✅ 저장완료 : {out_path} (rows={len(df)})")

df.head()
# """

✅ 저장완료 : tmdb_now_playing.csv (rows=20)


Unnamed: 0,id,title,original_title,overview,genre_ids,genre_names,adult,original_language,vote_average,popularity,vote_count,poster_url,backdrop_url
0,1306368,더 립,The Rip,허름한 은닉처에서 수백만 달러의 현금을 발견한 마이애미 경찰. 신뢰에 금이 가기 시...,285380,액션|스릴러|범죄,False,en,7.033,346.4826,842,https://image.tmdb.org/t/p/w500/o0d6Us9VWOW0nH...,https://image.tmdb.org/t/p/w780/3F2EXWF1thX0Bd...
1,1043197,더스트 바니,Dust Bunny,“어른도 아이도 함께하는 몬스터 사냥 이야기” 열 살 소녀 오로라는 침대 밑에 숨어...,281453,액션|판타지|스릴러,False,en,6.564,288.1896,128,https://image.tmdb.org/t/p/w500/vobigFZFvbYPf6...,https://image.tmdb.org/t/p/w780/AecGG1XVCmkk7f...
2,83533,아바타: 불과 재,Avatar: Fire and Ash,"인간들과의 전쟁으로 첫째 아들 ‘네테이얌’을 잃은 후, ‘제이크’와 ‘네이티리’는 ...",8781214,SF|모험|판타지,False,en,7.308,241.8643,1631,https://image.tmdb.org/t/p/w500/l18o0AK18KS118...,https://image.tmdb.org/t/p/w780/3Dqievkc7krcTt...
3,991494,스폰지밥 무비: 네모바지를 찾아서,The SpongeBob Movie: Search for SquarePants,비키니시티에 사는 스폰지밥은 무시무시한 롤러코스터를 탈 수 있는 용감한 ‘빅 가이’...,1610751351214,애니메이션|가족|코미디|모험|판타지,False,en,6.526,183.5545,136,https://image.tmdb.org/t/p/w500/2QCGC4toQXYuIs...,https://image.tmdb.org/t/p/w780/gbjK8p5S1aLXWC...
4,1368166,하우스메이드,The Housemaid,과거를 숨긴 채 완벽한 저택의 가정부로 들어간 밀리가 이상한 행동을 보이는 아내 니...,964853,미스터리|스릴러,False,en,7.102,157.4883,542,https://image.tmdb.org/t/p/w500/fUDX16A4fJXmAu...,https://image.tmdb.org/t/p/w780/sK3z0Naed3H1Wu...


In [31]:
import pandas as pd 

CSV_PATH = "tmdb_now_playing.csv"

df = pd.read_csv(CSV_PATH, encoding="utf-8-sig")

number_cols = ["vote_average", "popularity", "vote_count"]

for c in number_cols :
    if c in df.columns :
        df[c] = pd.to_numeric(df[c], errors="coerce")

def split_genres(x : str) :
    x = "" if pd.isna(x) else str(x).strip()
  # is : true? false?, na(not available) : 사용할 수 없는 값 = 결측값 = null not set undefined 
    if not x : 
        return []
    if "|" in x :
        parts = [p.strip() for p in x.split("|")]
    else :
        parts = [p.strip() for p in x.split(",")]
    return [p for p in parts if p] # ["범죄" "액션" "스릴러"]

df_g = df.copy()
df_g["genre"] = df_g["genre_names"].apply(split_genres)
df_g = df_g.explode("genre")

agg = (
    df_g.groupby("genre", as_index=False)
        .agg(
             movie_count=("id", "nunique"),
             avg_popularity=("popularity", "mean"),
             avg_vote_average=("vote_average", "mean"),
             avg_vote_count=("vote_count", "mean"),
        )
)

agg
# agg(aggregation) : 집계
# 시각화 = 차트 | 도표
# 데이터 => 수치화 (숫자)
# 남, 여 => 남 = 0, 여 = 1
# 10대, 20대, 30대, 40대, 50대 => 10대 = 0, 20대 = 1

Unnamed: 0,genre,movie_count,avg_popularity,avg_vote_average,avg_vote_count
0,SF,6,111.126967,6.990833,351.166667
1,가족,1,183.5545,6.526,136.0
2,공포,5,88.19096,6.4922,883.6
3,드라마,4,133.71225,6.36025,185.0
4,모험,4,148.66405,6.6835,480.75
5,미스터리,3,117.834467,6.036,212.0
6,범죄,4,192.833475,7.03725,964.25
7,스릴러,14,137.773407,6.616571,652.5
8,애니메이션,2,120.6299,7.513,75.0
9,액션,9,160.758267,6.79,838.222222
