<a href="https://colab.research.google.com/github/JH-KIM-82/Final-Team1/blob/main/API_%EB%A7%A4%EC%B6%9C_%EC%B6%94%EC%B6%9C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =======================================================
# SteamSpy API: 188k 앱 ID 최적화 수집 (빠른 버전)
# =======================================================

import requests
import pandas as pd
from tqdm.notebook import tqdm
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
import os

# ================================
# SteamSpy API 호출 함수
# ================================
def get_core_app_data(appid, retries=3):
    """
    SteamSpy API에서 appid별 핵심 데이터 (owners, ccu) 가져오기
    재시도 로직 포함
    """
    base_url = "https://steamspy.com/api.php"
    params = {"request": "appdetails", "appid": appid}

    for attempt in range(retries):
        try:
            response = requests.get(base_url, params=params, timeout=10)
            response.raise_for_status()
            full_data = response.json()

            core_data = {
                "appid": full_data.get("appid"),
                "owners": full_data.get("owners"),
                "ccu": full_data.get("ccu")
            }
            return core_data

        except (requests.exceptions.RequestException, ValueError, KeyError) as e:
            if attempt < retries - 1:
                time.sleep(0.5)  # 재시도 전 잠깐 대기
            else:
                return {"appid": appid, "owners": None, "ccu": None}

# ================================
# 원본 데이터 불러오기
# ================================
try:
    df = pd.read_csv('/content/weighted_score_above_08_250924.csv')
    appids_to_fetch = df['appid'].unique().tolist()
    print(f"총 {len(appids_to_fetch)}개의 고유 앱 ID 확인")
except FileNotFoundError:
    print("CSV 파일 없음. 예시 App ID 사용")
    appids_to_fetch = [570, 730, 440, 240, 252490, 271590]

# ================================
# Chunked + Parallel 수집 함수
# ================================
def fetch_chunk(appid_chunk, max_workers=20):
    """
    앱 ID chunk를 병렬로 처리하여 리스트 반환
    """
    results = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_appid = {executor.submit(get_core_app_data, appid): appid for appid in appid_chunk}
        for future in as_completed(future_to_appid):
            data = future.result()
            results.append(data)
    return results

# ================================
# CSV 저장 설정
# ================================
output_file = "steam_core_data.csv"
if os.path.exists(output_file):
    os.remove(output_file)  # 기존 파일 제거

chunk_size = 3000  # 3000개 단위로 처리
max_workers = 20   # 동시 요청 스레드 수
sleep_time = 0.5   # chunk 간 지연

all_failed_ids = []

print("\n===== SteamSpy API 데이터 수집 시작 =====")
for i in tqdm(range(0, len(appids_to_fetch), chunk_size), desc="전체 진행"):
    chunk = appids_to_fetch[i:i+chunk_size]
    chunk_data = fetch_chunk(chunk, max_workers=max_workers)

    # 실패한 앱 ID 추적
    failed_ids = [d['appid'] for d in chunk_data if d['owners'] is None]
    all_failed_ids.extend(failed_ids)

    # CSV에 append 저장
    df_chunk = pd.DataFrame(chunk_data)
    if i == 0:
        df_chunk.to_csv(output_file, index=False, encoding='utf-8-sig')
    else:
        df_chunk.to_csv(output_file, mode='a', index=False, header=False, encoding='utf-8-sig')

    time.sleep(sleep_time)  # 서버 부하 최소화

# ================================
# 실패한 앱 재시도
# ================================
retry_count = 0
max_retry = 3

while all_failed_ids and retry_count < max_retry:
    print(f"\n재시도 {retry_count+1}: {len(all_failed_ids)}개의 앱 재시도")
    retry_count += 1
    failed_chunked_data = fetch_chunk(all_failed_ids, max_workers=max_workers)

    # 재시도 후 실패 앱 재수집
    all_failed_ids = [d['appid'] for d in failed_chunked_data if d['owners'] is None]

    # CSV에 append 저장
    df_retry = pd.DataFrame(failed_chunked_data)
    df_retry.to_csv(output_file, mode='a', index=False, header=False, encoding='utf-8-sig')

    time.sleep(sleep_time)

if all_failed_ids:
    print(f"\n최종 실패 앱 ID {len(all_failed_ids)}개 존재. 수동 확인 필요")
else:
    print("\n모든 앱 데이터 정상 수집 완료 ✅")

print(f"\n최종 CSV 저장 완료: {output_file}")
