In [None]:
import requests, re
from bs4 import BeautifulSoup, SoupStrainer
import pandas as pd
import time
from tqdm import tqdm   # progress bar (pip install tqdm)
import requests_cache
requests_cache.install_cache("steamcharts_cache", expire_after=12*60*60)




100%|██████████| 89618/89618 [4:59:16<00:00,  4.99it/s]    


Finished Logging!


In [None]:
#UPDATING THE PEAK CCU THROUGH STEAM CHARTS
BASE_URL = "https://steamcharts.com/app/"
UA = {"User-Agent": "Mozilla/5.0"}          # helps avoid bot blocks

# pre-compiled regex that accepts ANY whitespace/hyphen sequence
LABEL_RX = re.compile(r"all\s*[-\u00A0\u2010-\u2015]?\s*time\s+peak", re.I)

def get_peak_ccu(appid: int, timeout: int = 10) -> int:
    """
    Return the all-time peak concurrent-player count for a Steam app.
    Falls back to 0 on any error or markup change.
    """
    url = f"{BASE_URL}{appid}"
    try:
        resp = requests.get(url, headers=UA, timeout=timeout)
        resp.raise_for_status()
    except requests.RequestException:
        return 0

    # Only parse the few blocks we care about → faster & safer
    only_stats = SoupStrainer("div", class_="app-stat")
    soup = BeautifulSoup(resp.text, "html.parser", parse_only=only_stats)

    for stat in soup.find_all("div", class_="app-stat"):
        # Strip all whitespace & non-breaking spaces for comparison
        label_text = stat.get_text(" ", strip=True)
        if LABEL_RX.search(label_text):       # found the 'all-time peak' block
            num_span = stat.find("span", class_="num")
            if num_span:
                try:
                    return int(num_span.text.replace(",", ""))
                except ValueError:
                    return 0
    return 0


steam = pd.read_csv("../data/updated_steam_games.csv")

# ── 1.  Add a progress bar helper ───────────────────────────────
tqdm.pandas()

# ── 2.  Defensive wrapper (so one failure doesn’t stop the loop) ─
def safe_peak(appid):
    try:
        ccu = get_peak_ccu(int(appid))
        return ccu
    except Exception:
        print("could not get ccu")
        return 0          # fall back gracefully

# ── 3.  Recompute the column ────────────────────────────────────
steam['peak_ccu'] = steam['appid'].progress_apply(safe_peak)

# ── 4.  (Optional) write the file back out ──────────────────────
steam.to_csv("../data/updated_steam_games.csv", index=False)

print("Finished Logging!")

In [None]:
#To get the columns for day 1 release counts.
BASE_URL = "https://steamcharts.com/app/"
UA = {"User-Agent": "Mozilla/5.0"}          # helps avoid bot blocks

# pre-compiled regex that accepts ANY whitespace/hyphen sequence
LABEL_RX = re.compile(r"all\s*[-\u00A0\u2010-\u2015]?\s*time\s+peak", re.I)

def get_day1_count(appid: int, timeout: int = 10) -> int:
    """
    Return the all-time peak concurrent-player count for a Steam app.
    Falls back to 0 on any error or markup change.
    """
    url = f"{BASE_URL}{appid}"
    try:
        resp = requests.get(url, headers=UA, timeout=timeout)
        resp.raise_for_status()
    except requests.RequestException:
        return 0

    # Only parse the few blocks we care about → faster & safer
    only_stats = SoupStrainer("div", class_="app-stat")
    soup = BeautifulSoup(resp.text, "html.parser", parse_only=only_stats)

    for stat in soup.find_all("div", class_="app-stat"):
        # Strip all whitespace & non-breaking spaces for comparison
        label_text = stat.get_text(" ", strip=True)
        if LABEL_RX.search(label_text):       # found the 'all-time peak' block
            num_span = stat.find("span", class_="num")
            if num_span:
                try:
                    return int(num_span.text.replace(",", ""))
                except ValueError:
                    return 0
    return 0


steam = pd.read_csv("../data/updated_steam_games.csv")

# ── 1.  Add a progress bar helper ───────────────────────────────
tqdm.pandas()

# ── 2.  Defensive wrapper (so one failure doesn’t stop the loop) ─
def safe_peak(appid):
    try:
        ccu = get_peak_ccu(int(appid))
        return ccu
    except Exception:
        print("could not get ccu")
        return 0          # fall back gracefully

# ── 3.  Recompute the column ────────────────────────────────────
steam['peak_ccu'] = steam['appid'].progress_apply(safe_peak)

# ── 4.  (Optional) write the file back out ──────────────────────
steam.to_csv("../data/updated_steam_games.csv", index=False)

print("Finished Logging!")