In [391]:
import requests
from bs4 import BeautifulSoup
import re
import time

In [365]:
url = "https://pocket.limitlesstcg.com/cards"
soup = BeautifulSoup(requests.get(url).text, "html.parser")

card_sets = {}

sets = soup.find("table", class_="data-table").find_all("tr")[2:]
for set in sets:
    set_code = set.find("a")["href"].split("/")[-1]
    set_name = set.find_all("td")[0].text.strip().split("\n")[0]
    set_release_date = set.find_all("td")[1].text.strip()
    set_total_cards = set.find_all("td")[2].text.strip()
    card_sets[set_code] = {
        "name": set_name,
        "release_date": set_release_date,
        "total_cards": set_total_cards
    }

card_sets

{'A2b': {'name': 'Shining Revelry',
  'release_date': '27 Mar 25',
  'total_cards': '111'},
 'A2a': {'name': 'Triumphant Light',
  'release_date': '28 Feb 25',
  'total_cards': '96'},
 'A2': {'name': 'Space-Time Smackdown',
  'release_date': '29 Jan 25',
  'total_cards': '207'},
 'A1a': {'name': 'Mythical Island',
  'release_date': '17 Dec 24',
  'total_cards': '86'},
 'A1': {'name': 'Genetic Apex',
  'release_date': '30 Oct 24',
  'total_cards': '286'},
 'P-A': {'name': 'Promo-A', 'release_date': '', 'total_cards': '59'}}

In [438]:
def scrape_card(set_code, card_no):
    url = f"https://pocket.limitlesstcg.com/cards/{set_code}/{card_no}"

    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    card_dict = {}

    # card number
    card_dict["set_code"] = set_code
    card_dict["card_number"] = card_no

    _card_type = soup.find("p", class_="card-text-type").text.strip().split(" - ")
    _card_type = [a.strip() for a in _card_type]
    card_type = _card_type[0]
    card_dict["name"] = soup.find("span", class_="card-text-name").text

    image_url = soup.find("div", class_="card-image").find("img")["src"]
    card_dict["image_url"] = image_url

    if card_type == "Pokémon":
        _name_type_hp = soup.find("p", class_="card-text-title").find_all(string=True, recursive=True)
        _name_type_hp = [i.strip() for i in _name_type_hp]
        _name_type_hp = "".join(_name_type_hp)
        _name_type_hp = _name_type_hp.split("-")
        _name_type_hp = [i.strip() for i in _name_type_hp]
        card_dict["pkmn_type"] = _name_type_hp[1]
        card_dict["pkmn_hp"] = _name_type_hp[2]

        # stage
        card_dict["stage"] = _card_type[1]
        if card_dict["stage"] != "Basic":
            card_dict["evolves_from"] = _card_type[2].split("\n")[1].strip()
        
        # card ability
        _ability = soup.find("p", class_="card-text-ability-info")
        if _ability:
            card_dict["ability"] = {}
            ability_name = _ability.string.replace("Ability: ", "").strip()
            ability_effect = soup.find("p", class_="card-text-ability-effect").find_all(string=True, recursive=False)
            ability_effect = [i.strip() for i in ability_effect]
            ability_effect = "".join(ability_effect).strip()
            card_dict["ability"][ability_name] = ability_effect

        # card attacks
        _attacks = soup.find_all("div", class_="card-text-attack")
        card_dict["attacks"] = {}
        for _attack in _attacks:
            _info = _attack.find("p", class_="card-text-attack-info").find_all(string=True, recursive=False)
            _info = [i.strip() for i in _info]
            _info = "".join(_info).strip()
            _match = re.search(r'(.*?)(\d+[+x]{0,1})?$', _info)
            if _match:
                attack_name = _match.group(1).strip()
                card_dict["attacks"][attack_name] = {}
                
                energy_cost = _attack.find("p", class_="card-text-attack-info").find("span", class_="ptcg-symbol").string
                card_dict["attacks"][attack_name]["energy_cost"] = energy_cost

                attack_effect = _attack.find("p", class_="card-text-attack-effect").find_all(string=True, recursive=False)
                attack_effect = [i.strip() for i in attack_effect]
                attack_effect = "".join(attack_effect).strip()
                if attack_effect:
                    card_dict["attacks"][attack_name]["effect"] = attack_effect

                if _match.group(2):
                    damage = _match.group(2).strip()
                    card_dict["attacks"][attack_name]["damage"] = damage

        # weakness and retreat cost
        _wrr = soup.find("p", class_="card-text-wrr").find_all(string=True)
        card_dict["weakness"] = _wrr[0].replace("Weakness: ", "").strip()
        card_dict["retreat_cost"] = _wrr[1].replace("Retreat: ", "").strip()


        # card text flavor
        if soup.find("div", class_="card-text-section card-text-flavor"):
            card_dict["flavor"] = soup.find("div", class_="card-text-section card-text-flavor").string.strip()

    elif card_type == "Trainer":
        _card_type_detail = _card_type[1].strip()
        card_dict["trainer_type"] = _card_type_detail

        # card text
        _card_text_ls = soup.find_all("div", class_="card-text-section")
        if len(_card_text_ls) > 1:
            card_text = _card_text_ls[1].text.strip()
            card_dict["text"] = card_text

    # artist
    card_dict["artist"] = soup.find("div", class_="card-text-section card-text-artist").select('a')[0].string.strip()

    # rarity
    card_rarity =soup.find("div", class_="prints-current-details").find_all("span")[1].text.strip().split("·")[1].strip()
    card_dict["rarity"] = card_rarity

    # other versions
    _other_versions = soup.find("table", class_="card-prints-versions").find_all("tr")[1:]
    if _other_versions:
        card_dict["other_versions"] = {}
        for _version in _other_versions:
            is_current_version = False if _version.get('class') and _version.get('class')[0] == "current" else True
            if is_current_version:
                version_set_code = _version.find("a").get('href').split("/")[-2]
                version_card_no = _version.find("a").get('href').split("/")[-1]
                card_dict["other_versions"]["set_code"] = version_set_code
                card_dict["other_versions"]["card_number"] = version_card_no

    return card_dict


In [441]:
set_code = next(iter(card_sets))
set_total_cards = int(card_sets[set_code]["total_cards"])
set_cards = []
for card_no in range(1, set_total_cards+1):
    start_time = time.time()
    card_dict = scrape_card(set_code, card_no)
    estimated_time_left = (time.time() - start_time) * (set_total_cards - card_no)
    print(f"scraping card {card_no} of {set_total_cards} took {round(time.time() - start_time, 1)} seconds, estimated time left: {round(estimated_time_left/60, 1)} minutes")
    set_cards.append(card_dict)

scraping card 1 of 111 took 1.4 seconds
scraping card 2 of 111 took 1.0 seconds
scraping card 3 of 111 took 1.1 seconds
scraping card 4 of 111 took 1.2 seconds
scraping card 5 of 111 took 1.3 seconds
scraping card 6 of 111 took 1.5 seconds
scraping card 7 of 111 took 1.2 seconds
scraping card 8 of 111 took 1.3 seconds
scraping card 9 of 111 took 1.2 seconds
scraping card 10 of 111 took 1.2 seconds
scraping card 11 of 111 took 1.2 seconds
scraping card 12 of 111 took 1.2 seconds
scraping card 13 of 111 took 1.2 seconds
scraping card 14 of 111 took 1.2 seconds
scraping card 15 of 111 took 1.2 seconds
scraping card 16 of 111 took 1.3 seconds
scraping card 17 of 111 took 1.2 seconds
scraping card 18 of 111 took 1.2 seconds
scraping card 19 of 111 took 1.2 seconds
scraping card 20 of 111 took 1.2 seconds
scraping card 21 of 111 took 1.0 seconds
scraping card 22 of 111 took 1.1 seconds
scraping card 23 of 111 took 1.1 seconds
scraping card 24 of 111 took 1.0 seconds
scraping card 25 of 111 t