# BeautifulSoup
---
pip install beautifulsoup4  
pip install lxml  

In [11]:
import requests
from bs4 import BeautifulSoup

In [12]:
url = 'https://comic.naver.com/webtoon/weekday.nhn'
res = requests.get(url)
res.raise_for_status()

In [13]:
len(res.text)

352589

In [14]:
soup = BeautifulSoup(res.text, 'lxml') # lxml 구문을 분석하는 parser
print(soup.title)

<title>네이버 만화 &gt; 요일별  웹툰 &gt; 전체웹툰</title>


In [15]:
print(soup.title.get_text())

네이버 만화 > 요일별  웹툰 > 전체웹툰


In [16]:
# soup 객체에서 처음 발견되는 a element를 반환
print(soup.a)

<a href="#menu" onclick="document.getElementById('menu').tabIndex=-1;document.getElementById('menu').focus();return false;"><span>메인 메뉴로 바로가기</span></a>


In [17]:
# a element의 속성 정보를 반환
print(soup.a.attrs)

{'href': '#menu', 'onclick': "document.getElementById('menu').tabIndex=-1;document.getElementById('menu').focus();return false;"}


In [18]:
# a element의 href 속성 '값' 정보를 출력
print(soup.a['href'])

#menu


#### 대상 Web을 잘 알지 못하는 경우

In [19]:
# class='Nbtn_upload' 인 a element를 검색
soup.find('a', attrs={'class':'Nbtn_upload'})

<a class="Nbtn_upload" href="/mypage/myActivity.nhn" onclick="nclk_v2(event,'olk.upload');">웹툰 올리기</a>

In [20]:
# class='Nbtn_upload' 인 어떤 element를 검색
soup.find(attrs = {'class': 'Nbtn_upload'})

<a class="Nbtn_upload" href="/mypage/myActivity.nhn" onclick="nclk_v2(event,'olk.upload');">웹툰 올리기</a>

In [25]:
# 인기 급상승
rank1 = soup.find('li', attrs={'class':'rank01'})
print(rank1.a)

<a href="/webtoon/detail.nhn?titleId=703846&amp;no=125" onclick="nclk_v2(event,'rnk*p.cont','703846','1')" title="여신강림-120화">여신강림-120화</a>


In [26]:
# 형제 객체
print(rank1.a.get_text())

여신강림-120화


In [27]:
rank1.next_sibling # 내부적으로 줄바꿈이 있어서 다음 항목으로 안넘어감

'\n'

In [28]:
rank1.next_sibling.next_sibling # next

<li class="rank02">
<a href="/webtoon/detail.nhn?titleId=703852&amp;no=113" onclick="nclk_v2(event,'rnk*p.cont','703852','2')" title="바른연애 길잡이-113">바른연애 길잡이-113</a>
<span class="rankBox">
<img alt="변동없음" height="10" src="https://ssl.pstatic.net/static/comic/images/migration/common/arrow_no.gif" title="변동없음" width="7"/> 0
						
					
				</span>
</li>

In [32]:
rank1.find_next_sibling('li') # 다음 li tag 정보로 검색

<li class="rank02">
<a href="/webtoon/detail.nhn?titleId=703852&amp;no=113" onclick="nclk_v2(event,'rnk*p.cont','703852','2')" title="바른연애 길잡이-113">바른연애 길잡이-113</a>
<span class="rankBox">
<img alt="변동없음" height="10" src="https://ssl.pstatic.net/static/comic/images/migration/common/arrow_no.gif" title="변동없음" width="7"/> 0
						
					
				</span>
</li>

In [29]:
rank2 = rank1.next_sibling.next_sibling
rank2.next_sibling.next_sibling

<li class="rank03">
<a href="/webtoon/detail.nhn?titleId=743139&amp;no=14" onclick="nclk_v2(event,'rnk*p.cont','743139','3')" title="한림체육관-14화">한림체육관-14화</a>
<span class="rankBox">
<img alt="변동없음" height="10" src="https://ssl.pstatic.net/static/comic/images/migration/common/arrow_no.gif" title="변동없음" width="7"/> 0
						
					
				</span>
</li>

In [30]:
rank2.previous_sibling.previous_sibling # previous

<li class="rank01">
<a href="/webtoon/detail.nhn?titleId=703846&amp;no=125" onclick="nclk_v2(event,'rnk*p.cont','703846','1')" title="여신강림-120화">여신강림-120화</a>
<span class="rankBox">
<img alt="변동없음" height="10" src="https://ssl.pstatic.net/static/comic/images/migration/common/arrow_no.gif" title="변동없음" width="7"/> 0
						
					
				</span>
</li>

In [34]:
rank2.find_previous_sibling('li')

<li class="rank01">
<a href="/webtoon/detail.nhn?titleId=703846&amp;no=125" onclick="nclk_v2(event,'rnk*p.cont','703846','1')" title="여신강림-120화">여신강림-120화</a>
<span class="rankBox">
<img alt="변동없음" height="10" src="https://ssl.pstatic.net/static/comic/images/migration/common/arrow_no.gif" title="변동없음" width="7"/> 0
						
					
				</span>
</li>

In [36]:
# 모든 정보를 다 가져오기
rank1.find_next_siblings('li')

[<li class="rank02">
 <a href="/webtoon/detail.nhn?titleId=703852&amp;no=113" onclick="nclk_v2(event,'rnk*p.cont','703852','2')" title="바른연애 길잡이-113">바른연애 길잡이-113</a>
 <span class="rankBox">
 <img alt="변동없음" height="10" src="https://ssl.pstatic.net/static/comic/images/migration/common/arrow_no.gif" title="변동없음" width="7"/> 0
 						
 					
 				</span>
 </li>, <li class="rank03">
 <a href="/webtoon/detail.nhn?titleId=743139&amp;no=14" onclick="nclk_v2(event,'rnk*p.cont','743139','3')" title="한림체육관-14화">한림체육관-14화</a>
 <span class="rankBox">
 <img alt="변동없음" height="10" src="https://ssl.pstatic.net/static/comic/images/migration/common/arrow_no.gif" title="변동없음" width="7"/> 0
 						
 					
 				</span>
 </li>, <li class="rank04">
 <a href="/webtoon/detail.nhn?titleId=702608&amp;no=145" onclick="nclk_v2(event,'rnk*p.cont','702608','4')" title="랜덤채팅의 그녀!-145. 해일(1)">랜덤채팅의 그녀!-145. 해일(1)</a>
 <span class="rankBox">
 <img alt="변동없음" height="10" src="https://ssl.pstatic.net/static/comic/image

In [31]:
# 부모 객체
rank1.parent

<ol class="asideBoxRank" id="realTimeRankFavorite">
<li class="rank01">
<a href="/webtoon/detail.nhn?titleId=703846&amp;no=125" onclick="nclk_v2(event,'rnk*p.cont','703846','1')" title="여신강림-120화">여신강림-120화</a>
<span class="rankBox">
<img alt="변동없음" height="10" src="https://ssl.pstatic.net/static/comic/images/migration/common/arrow_no.gif" title="변동없음" width="7"/> 0
						
					
				</span>
</li>
<li class="rank02">
<a href="/webtoon/detail.nhn?titleId=703852&amp;no=113" onclick="nclk_v2(event,'rnk*p.cont','703852','2')" title="바른연애 길잡이-113">바른연애 길잡이-113</a>
<span class="rankBox">
<img alt="변동없음" height="10" src="https://ssl.pstatic.net/static/comic/images/migration/common/arrow_no.gif" title="변동없음" width="7"/> 0
						
					
				</span>
</li>
<li class="rank03">
<a href="/webtoon/detail.nhn?titleId=743139&amp;no=14" onclick="nclk_v2(event,'rnk*p.cont','743139','3')" title="한림체육관-14화">한림체육관-14화</a>
<span class="rankBox">
<img alt="변동없음" height="10" src="https://ssl.pstatic.net/static/c