In [50]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time

# Headers to avoid blocking
headers = {'User-Agent': 'Mozilla/5.0'}

In [52]:
# Sample: Only Top 1000 games for test
def get_top_games(limit=1000):
    url = "https://steamspy.com/api.php?request=all"
    response = requests.get(url)
    data = response.json()
    sorted_games = sorted(data.items(), key=lambda x: x[1]['owners'], reverse=True)
    return sorted_games[:limit]

In [54]:
def get_game_details(appid):
    store_url = f"https://store.steampowered.com/app/{appid}/"
    response = requests.get(store_url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    def safe_find(selector, attr='text'):
        try:
            element = soup.select_one(selector)
            if attr == 'text':
                return element.text.strip()
            elif attr == 'all_texts':
                return ', '.join(e.text.strip() for e in soup.select(selector))
        except:
            return None
        return None

    game_name = safe_find(".apphub_AppName")
    game_tags = safe_find(".glance_tags.popular_tags", 'all_texts')
    release_date = safe_find(".release_date .date")

    developer = publisher = None
    try:
        rows = soup.find_all("div", class_="dev_row")
        for row in rows:
            subtitle = row.find("div", class_="subtitle")
            summary = row.find("div", class_="summary")
            if subtitle and summary:
                if "Developer" in subtitle.text:
                    developer = summary.text.strip()
                elif "Publisher" in subtitle.text:
                    publisher = summary.text.strip()
    except:
        pass

    # Reviews
    recent_reviews = all_reviews = None
    try:
        review_blocks = soup.find_all("div", class_="user_reviews_summary_row")
        for block in review_blocks:
            label = block.find("div", class_="title")
            summary = block.find("span", class_="game_review_summary")
            if label and summary:
                label_text = label.text.strip()
                if "Recent" in label_text:
                    recent_reviews = summary.text.strip()
                elif "All Reviews" in label_text:
                    all_reviews = summary.text.strip()
    except:
        pass

    # Price handling
    price = None
    try:
        discount_block = soup.find("div", class_="discount_final_price")
        if discount_block:
            price = discount_block.text.strip()
        else:
            price_block = soup.select_one(".game_purchase_price")
            if price_block:
                price = price_block.text.strip()
        if not price:
            price = "Free to Play"
    except:
        price = "Unavailable"

    return {
        "AppID": appid,
        "Game Name": game_name,
        "Game Tags": game_tags,
        "Release Date": release_date,
        "Developer": developer,
        "Publisher": publisher,
        "Recent Reviews": recent_reviews,
        "All Reviews": all_reviews,
        "Price": price
    }

In [56]:
top_games = get_top_games(1000)
game_data = []

for idx, (appid, _) in enumerate(top_games):
    try:
        print(f"[{idx+1}/1000] Fetching AppID {appid}...")
        details = get_game_details(appid)
        game_data.append(details)
        time.sleep(1.5)
    except Exception as e:
        print(f"Failed AppID {appid}: {e}")

[1/1000] Fetching AppID 578080...
[2/1000] Fetching AppID 1623730...
[3/1000] Fetching AppID 1172470...
[4/1000] Fetching AppID 1063730...
[5/1000] Fetching AppID 2358720...
[6/1000] Fetching AppID 1938090...
[7/1000] Fetching AppID 271590...
[8/1000] Fetching AppID 550...
[9/1000] Fetching AppID 553850...
[10/1000] Fetching AppID 1599340...
[11/1000] Fetching AppID 304930...
[12/1000] Fetching AppID 526870...
[13/1000] Fetching AppID 1238810...
[14/1000] Fetching AppID 60...
[15/1000] Fetching AppID 221100...
[16/1000] Fetching AppID 1089350...
[17/1000] Fetching AppID 204360...
[18/1000] Fetching AppID 632360...
[19/1000] Fetching AppID 96000...
[20/1000] Fetching AppID 275850...
[21/1000] Fetching AppID 1811260...
[22/1000] Fetching AppID 322170...
[23/1000] Fetching AppID 1046930...
[24/1000] Fetching AppID 49520...
[25/1000] Fetching AppID 224260...
[26/1000] Fetching AppID 489520...
[27/1000] Fetching AppID 80...
[28/1000] Fetching AppID 394360...
[29/1000] Fetching AppID 466240.

In [58]:
df = pd.DataFrame(game_data)
df.to_csv("top_steam_games_fixed.csv", index=False)
print("Data saved to top_steam_games_fixed.csv")

Data saved to top_steam_games_fixed.csv
