-
Notifications
You must be signed in to change notification settings - Fork 0
/
parsex.py
75 lines (41 loc) · 1.81 KB
/
parsex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import collections
import os.path
import requests
from bs4 import BeautifulSoup
PADXURL = 'http://puzzledragonx.com/'
class Event(collections.namedtuple('Event', ['title', 'members'])):
def __add__(self, other):
if isinstance(other, type(self)):
title = ', '.join([self.title, other.title])
members = self.members | other.members
return Event(title, members)
return super().__add__(other)
def urlsoup(url, parser='html.parser'):
r = requests.get(url)
return BeautifulSoup(r.text, parser)
def parse_godfest(soup):
"""Searches a soup for the current REM godfest"""
title_links = soup.find('td', class_='godfeslist').find_all('a', class_='bold')
title = ', '.join(link.string for link in title_links)
table = soup.find('td', class_='godfeslist').parent.find_next_sibling('table')
members = {id_from_url(img['data-original']) for img in table.find('img', title=True)}
return Event(title, members)
def parse_gala(soup):
"""Searches a soup for the current REM gala."""
title = soup.find('a', id='rareegg').find_next_sibling('div').h2.string
members = {id_from_url(img['data-original']) for img in
soup.find('td', class_='rareegg').find_all('img', title=True)}
return Event(title, members)
def parse_rem(soup):
"""Searches a soup for current REM events."""
godfest = parse_godfest(soup)
gala = parse_gala(soup)
return godfest + gala
def id_from_url(url):
"""Parses the monster id from its resource path and returns as an integer."""
base = os.path.basename(url)
id, ext = os.path.splitext(base)
return int(id)
if __name__ == '__main__':
soup = urlsoup(PADXURL)
print(parse_rem(soup))