/
scraping_organization_user.py
34 lines (25 loc) · 1.15 KB
/
scraping_organization_user.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import requests
from bs4 import BeautifulSoup
member_id_list = []
url = "https://qiita.com/organizations/opst/members"
url_base = "https://qiita.com"
while True:
# Organizationsのユーザーでとりあえず1ページ目を取り出す
print(url)
o_s = requests.get(url)
# ページ全体をBeautifulSoup
o_soup = BeautifulSoup(o_s.text, features="html.parser")
# メンバー表示部を選択
member_soup = o_soup.find("ul", attrs={"class", "p-organization_memberlist"})
# メンバーのリストを取得
member_list_soup = member_soup.find_all("div", attrs={"class", "od-MemberCardContent"})
# メンバーの名前を出してみる
for m in member_list_soup:
name = m.contents[0].find("strong", attrs={"class", "od-MemberCardHeaderIdentities_username"}).contents
id = m.contents[0].find("span", attrs={"class", "od-MemberCardHeaderIdentities_userid"}).contents[0]
member_id_list.append(id.split('@')[1])
next_pg_soup = o_soup.find("a", attrs={"class": "st-Pager_link", "rel": "next"})
if not next_pg_soup:
break
url = url_base + next_pg_soup.get("href")
print(member_id_list)