In [1]:
import requests
import pandas as pd

from tqdm.notebook import tqdm
from bs4 import BeautifulSoup

In [2]:
url = 'https://afk-arena.fandom.com/wiki/Heroes'

In [3]:
req = requests.get(url)
req.status_code

200

In [4]:
soup = BeautifulSoup(req.text, 'html.parser')

In [10]:
items = [i for c in soup.findAll('table', attrs={'class': 'wikitable'})[1:] for i in c.tbody.findAll('tr')][1:]
len(items)

142

In [14]:
def get_item_information(url):
    req = requests.get(url)
    if req.status_code != 200:
        raise requests.ConnectionError(f'Connection failed [{req.status_code}] to {url}.')
    
    soup = BeautifulSoup(req.text, 'html.parser')
    
    data = {}
    
    data['name'] = soup.find('h2', attrs={'data-source': 'name'}).text.strip()
    title = soup.find('h2', attrs={'data-source': 'title'})
    if title:
        data['title'] = title.text.strip()
        
    stats = soup.findAll('div', attrs={'class': 'pi-item'})
    for s in stats:
        key = s.h3.text.strip()
        if s.find('ul'):
            data[key] = [list(u)[0].text.strip() for u in s.findAll('li')]
        elif s.find('img'):
            data[key] = s.div.text.strip()
        else:
            data[key] = list(s.div.children)[0].text.strip()
    
    data['image'] = soup.find('a', attrs={'class': 'image-thumbnail'})['href']
    
    return data

In [15]:
data = []

In [16]:
for i in tqdm(items):
    if i.find('th'):
        continue
    
    columns = i.findAll('td')
    link = 'https://afk-arena.fandom.com' + columns[2].a['href']
    
    item = get_item_information(link)
    item['link'] = link
    
    data.append(item)

  0%|          | 0/142 [00:00<?, ?it/s]

In [17]:
data[0]

{'name': 'Estrilda',
 'title': 'Knight Of Valor',
 'Faction': 'Lightbearer',
 'Type': 'Strength',
 'Class': 'Warrior',
 'Role': 'Burst Damage',
 'Rarity': 'Ascended',
 'Union': 'Castellans',
 'Monikers': ['The Fledgling of House Rayne',
  'Miss -- Thane',
  'Commander -- Her men'],
 'Gender': 'Female',
 'Height': '170 cm',
 'Age': '23',
 'Relatives': ['Baron Rayne -- Father', 'Baden', 'Amanda', 'Sofia -- Nanny'],
 'Affiliations': ['Thane'],
 'Enemies': ['The Court of Terror', 'The Eldritch Council'],
 'Past Residence': 'Ranhorn',
 'Voice Actor': ['Rachel Roasek [EN]', 'Aoi Yūki [JP]'],
 'image': 'https://static.wikia.nocookie.net/afk-arena/images/0/0b/Estrilda.png/revision/latest?cb=20181215113356',
 'link': 'https://afk-arena.fandom.com/wiki/Estrilda'}

In [18]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,name,title,Faction,Type,Class,Role,Rarity,Union,Monikers,Gender,...,Enemies,Past Residence,Voice Actor,image,link,Race,Current Residence,Added,Primary Role,Secondary Role
0,Estrilda,Knight Of Valor,Lightbearer,Strength,Warrior,Burst Damage,Ascended,Castellans,"[The Fledgling of House Rayne, Miss -- Thane, ...",Female,...,"[The Court of Terror, The Eldritch Council]",Ranhorn,"[Rachel Roasek [EN], Aoi Yūki [JP]]",https://static.wikia.nocookie.net/afk-arena/im...,https://afk-arena.fandom.com/wiki/Estrilda,,,,,
1,Belinda,Beam of Hope,Lightbearer,Intelligence,Mage,AoE,Ascended,The Lightbringers,,Female,...,,,"[Holly Lindin [EN], Yui Horie [JP]]",https://static.wikia.nocookie.net/afk-arena/im...,https://afk-arena.fandom.com/wiki/Belinda,,,,,
2,Raine,Death's Denier,Lightbearer,Agility,Support,Buffer,Ascended,The Bounty Hunters,Young Lady -- Fisherman,Female,...,,The Lightbearers Empire,"[Sandra Osborne [EN], Wakura Yayoi [JP]]",https://static.wikia.nocookie.net/afk-arena/im...,https://afk-arena.fandom.com/wiki/Raine,Human,Rustport,,,
3,Rosaline,The Kind,Lightbearer,Intelligence,Support,Buffer,Ascended,,My Lovely Rosaline --,Female,...,,,"[Madison Brunoehler [EN], Yuko Toba [JP]]",https://static.wikia.nocookie.net/afk-arena/im...,https://afk-arena.fandom.com/wiki/Rosaline,Human,,"26 July, 2019 (",,
4,Lucius,Lightbringer,Lightbearer,Strength,Tank,Regen,Ascended,The Lightbringers,Dumb Beefcake -,Male,...,,,Kouji Katano,https://static.wikia.nocookie.net/afk-arena/im...,https://afk-arena.fandom.com/wiki/Lucius,Human,,"7 March, 2019 (",,


In [19]:
df.shape

(128, 24)

In [21]:
df.to_csv('data.csv', index=False)