### 網頁解析 & 自動化測試套件
* Beautiful Soup
* Selenium

### 1. Beautiful Soup

#### 1.1 安裝套件
* Python 2+ : pip install beautifulsoup4
* Python 3+ : pip3 install beautifulsoup4

#### 1.2 引用 Beautiful Soup 模組

In [2]:
from bs4 import BeautifulSoup

#### 1.3 使用Beautiful Soup 解析網頁

In [5]:
html_doc = """
<html><head><title>Hello World</title>

<style>
    .large {
      color:blue;
      text-align: center;
    }
</style>

</head>
<body><h2>Test Header</h2>
<p>This is a test.</p>
<a id="link1" href="https://www.google.com.tw"> Google網站</a>
<a id="link2" class="large" href="https://www.facebook.com.tw">FaceBook</a>
<p>Hello, <b id="link1" class="boldtext">Bold Text</b></p>
</body></html>
"""

In [7]:
# 以 Beautiful Soup 解析 HTML 程式碼

soup = BeautifulSoup(html_doc, 'html.parser')
#soup = BeautifulSoup(open('data/A.html'), 'html.parser')


In [11]:
#將soup物件美化
print(soup.prettify())

<html>
 <head>
  <title>
   Hello World
  </title>
  <style>
   .large {
      color:blue;
      text-align: center;
    }
  </style>
 </head>
 <body>
  <h2>
   Test Header
  </h2>
  <p>
   This is a test.
  </p>
  <a href="https://www.google.com.tw" id="link1">
   Google網站
  </a>
  <a class="large" href="https://www.facebook.com.tw" id="link2">
   FaceBook
  </a>
  <p>
   Hello,
   <b class="boldtext" id="link1">
    Bold Text
   </b>
  </p>
 </body>
</html>



In [10]:
soup.html

<html><head><title>Hello World</title>
<style>
    .large {
      color:blue;
      text-align: center;
    }
</style>
</head>
<body><h2>Test Header</h2>
<p>This is a test.</p>
<a href="https://www.google.com.tw" id="link1"> Google網站</a>
<a class="large" href="https://www.facebook.com.tw" id="link2">FaceBook</a>
<p>Hello, <b class="boldtext" id="link1">Bold Text</b></p>
</body></html>

#### 1.4 解析網頁結構

In [None]:
# Head tag 

In [20]:
soup.head

<head><title>Hello World</title>
<style>
    .large {
      color:blue;
      text-align: center;
    }
</style>
</head>

In [29]:
soup.head.text

'Hello World\n\n    .large {\n      color:blue;\n      text-align: center;\n    }\n\n'

In [None]:
# Title tag 

In [13]:
soup.title

<title>Hello World</title>

In [19]:
soup.title.text
#soup.title.string

'Hello World'

In [32]:
# body tag 
soup.body

<body><h2>Test Header</h2>
<p>This is a test.</p>
<a href="https://www.google.com.tw" id="link1"> Google網站</a>
<a class="large" href="https://www.facebook.com.tw" id="link2">FaceBook</a>
<p>Hello, <b class="boldtext" id="link1">Bold Text</b></p>
</body>

In [31]:
soup.body.text

'Test Header\nThis is a test.\n Google網站\nFaceBook\nHello, Bold Text\n'

In [94]:
soup.body.a['href']

'https://www.google.com.tw'

In [38]:
# a tag 
soup.a

<a href="https://www.google.com.tw" id="link1"> Google網站</a>

In [37]:
soup.a.text
#soup.a.string

' Google網站'

In [88]:
soup.a['href']

'https://www.google.com.tw'

In [40]:
# p tag 
soup.p

<p>This is a test.</p>

In [45]:
soup.p.text
#soup.p.string

'This is a test.'

In [43]:
#.contents 属性可將tag的子節點以列表方式输出
print(soup.head.contents)

print(len(soup.head.contents))

for item in soup.head.contents:
    print(item)

[<title>Hello World</title>, '\n', <style>
    .large {
      color:blue;
      text-align: center;
    }
</style>, '\n']
4
<title>Hello World</title>


<style>
    .large {
      color:blue;
      text-align: center;
    }
</style>




In [46]:
#.children 訪問子節點
for item in soup.head.children:
    print(item)

<title>Hello World</title>


<style>
    .large {
      color:blue;
      text-align: center;
    }
</style>




In [47]:
#.children 訪問子節點
for item in soup.head.children:
    print(item)

<title>Hello World</title>


<style>
    .large {
      color:blue;
      text-align: center;
    }
</style>




In [48]:
print(soup.title.string)

print(soup.title.string.parent)

Hello World
<title>Hello World</title>


In [49]:
 #.next_sibling 和 .previous_sibling 属性来訪問同一層兄弟節點
print(soup.body)
print("-----")
print(soup.body.p)
print("-----")

body = soup.body
print(body.p)
print(body.p.next_sibling)
print(body.p.next_sibling.next_sibling)
print(body.p.next_sibling.next_sibling.previous_sibling.previous_sibling)

<body><h2>Test Header</h2>
<p>This is a test.</p>
<a href="https://www.google.com.tw" id="link1"> Google網站</a>
<a class="large" href="https://www.facebook.com.tw" id="link2">FaceBook</a>
<p>Hello, <b class="boldtext" id="link1">Bold Text</b></p>
</body>
-----
<p>This is a test.</p>
-----
<p>This is a test.</p>


<a href="https://www.google.com.tw" id="link1"> Google網站</a>
<p>This is a test.</p>


In [50]:
#取得網頁所有文字內容
print(soup.getText())


Hello World

    .large {
      color:blue;
      text-align: center;
    }


Test Header
This is a test.
 Google網站
FaceBook
Hello, Bold Text




In [53]:
for string in soup.strings:
    print(string)
    #print(repr(string))



Hello World



    .large {
      color:blue;
      text-align: center;
    }





Test Header


This is a test.


 Google網站


FaceBook


Hello, 
Bold Text






In [54]:
# .stripped_strings 可以移除多餘的空白内容:
for string in soup.stripped_strings:
    print(string)
    #print(repr(string))

Hello World
.large {
      color:blue;
      text-align: center;
    }
Test Header
This is a test.
Google網站
FaceBook
Hello,
Bold Text


####  1.4.1 搜尋單一節點

In [59]:
#搜尋節點 by tag
a_tag = soup.find('a')
print(a_tag.text)
print(a_tag['href'])
print(a_tag.get('href')) # .get(attribute's name) 取得tag 屬性內容

 Google網站
https://www.google.com.tw
https://www.google.com.tw


In [62]:
#搜尋節點 by tag ,tag id
a_tag = soup.find(name ='a',attrs={"id":"link2"})
#a_tag = soup.find('a',{'id':"link2"})
print(a_tag.text)

FaceBook


In [63]:
#搜尋節點 by tag ,class name
a_tag = soup.find(name ='a',attrs={"class":"large"})
#a_tag = soup.find('a',{'class':"large"})
#a_tag = soup.find('a','large')

print(a_tag.text)
print(a_tag['href'])
print(a_tag['class'])

FaceBook
https://www.facebook.com.tw
['large']


In [70]:
tags = soup.find(["a", "b"]) #只會找到第一個符合的節點 
print(tags)

<a href="https://www.google.com.tw" id="link1"> Google網站</a>


####  1.4.2 搜尋節點

In [65]:
#搜尋節點
a_tags = soup.find_all('a')
for tag in a_tags:
    print(tag.text)
    print(tag['href'])   

 Google網站
https://www.google.com.tw
FaceBook
https://www.facebook.com.tw


In [66]:
#取出節點屬性
for tag in a_tags:
    print(tag.get('href'))
    print(tag.get('class'))

https://www.google.com.tw
None
https://www.facebook.com.tw
['large']


In [72]:
# 搜尋所有超連結與粗體字
tags = soup.find_all(["a", "b"])
print(tags)

for tag in tags:
    print(tag)
    print(tag.text)
    print(tag.get('href'))

[<a href="https://www.google.com.tw" id="link1"> Google網站</a>, <a class="large" href="https://www.facebook.com.tw" id="link2">FaceBook</a>, <b class="boldtext" id="link1">Bold Text</b>]
<a href="https://www.google.com.tw" id="link1"> Google網站</a>
 Google網站
https://www.google.com.tw
<a class="large" href="https://www.facebook.com.tw" id="link2">FaceBook</a>
FaceBook
https://www.facebook.com.tw
<b class="boldtext" id="link1">Bold Text</b>
Bold Text
None


In [73]:
# 限制搜尋結果數量
tags = soup.find_all(["a", "b"], limit=2)
print(tags)

[<a href="https://www.google.com.tw" id="link1"> Google網站</a>, <a class="large" href="https://www.facebook.com.tw" id="link2">FaceBook</a>]


####  1.4.3 Select 選擇器

In [75]:
soup.select("title")

[<title>Hello World</title>]

In [76]:
soup.select("body a")

[<a href="https://www.google.com.tw" id="link1"> Google網站</a>,
 <a class="large" href="https://www.facebook.com.tw" id="link2">FaceBook</a>]

In [78]:
# Select by css name 
soup.select(".large")

[<a class="large" href="https://www.facebook.com.tw" id="link2">FaceBook</a>]

In [79]:
soup.select("a.large")

[<a class="large" href="https://www.facebook.com.tw" id="link2">FaceBook</a>]

In [80]:
# Select by id 
soup.select("#link1")

[<a href="https://www.google.com.tw" id="link1"> Google網站</a>,
 <b class="boldtext" id="link1">Bold Text</b>]

In [81]:
soup.select("a#link1")

[<a href="https://www.google.com.tw" id="link1"> Google網站</a>]

In [82]:
#依據屬性來選擇
soup.select('a[href]')

[<a href="https://www.google.com.tw" id="link1"> Google網站</a>,
 <a class="large" href="https://www.facebook.com.tw" id="link2">FaceBook</a>]

In [83]:
soup.select('a[class]')

[<a class="large" href="https://www.facebook.com.tw" id="link2">FaceBook</a>]

In [84]:
soup.select('a[style]')

[]

####  1.4.5 Example-1 抓取蘋果新聞清單

In [1]:
import requests
from bs4 import BeautifulSoup

domainurl='http://www.appledaily.com.tw/realtimenews/section/new/'

res = requests.get(domainurl)
soup = BeautifulSoup(res.text)

#print (soup)

for news in soup.select('.rtddt'):
        link = news.select('a')[0]['href']
        Type = news.select('h2')[0].text
        Title = news.select('h1')[0].text
        print (Type, Title)
        print (link)

地產 半夜摺垃圾袋　「變態收納術」轟動FB社團(140933)
https://tw.finance.appledaily.com/realtime/20181030/1456953/
娛樂 《抖音》神救援中國民謠　女聲版被讚：屌打...(4236)
https://tw.entertainment.appledaily.com/realtime/20181030/1456888/
生活 台中花博開幕在即　郵輪巴士串連商家搏千萬...
https://tw.news.appledaily.com/life/realtime/20181030/1457289/
國際 【金庸逝世9】「華山論劍」成絕響　金庸8...(4245)
https://tw.news.appledaily.com/international/realtime/20181030/1457221/
微視蘋 【蘋果頭家】愛爾達霸氣女董　迷上精油想當...(26139)
https://tw.news.appledaily.com/micromovie/realtime/20181030/1448130/
3C 【紐約直擊】蘋果發表會登場！爆料新品4大...(7568)
https://tw.lifestyle.appledaily.com/gadget/realtime/20181030/1456644/
生活 金庸辭世　文化部長鄭麗君：一代傳奇
https://tw.news.appledaily.com/life/realtime/20181030/1457331/
國際 【不斷更新】蘋果發表會紐約登場　全新Ma...(284)
https://tw.news.appledaily.com/international/realtime/20181030/1457323/
體育 【台灣大賽】潘彥廷幸運三壘打　8局統一1...(10776)
https://tw.sports.appledaily.com/realtime/20181030/1456988/
社會 詐欺犯出庭欲脫逃　3法警即時制伏
https://tw.news.appledaily.com/local/realtime/20181030/1457329/
國際 【金庸逝世37】「我是他的粉絲」　瓊瑤：...
https://tw.n

####  1.4.5 Example-2 抓取蘋果新聞內文

In [2]:
import requests
from bs4 import BeautifulSoup

domainurl='https://tw.news.appledaily.com/international/realtime/20181030/1457243/'

res = requests.get(domainurl)
soup = BeautifulSoup(res.text)

#print (soup)

title = soup.select('article.ndArticle_leftColumn h1')[0].text
content = soup.select('article.ndArticle_content div.ndArticle_margin p')[0].text
print(title)
print(content)

【金庸逝世14】挑哪一段向金庸致敬？網友選「蛛兒淚拒無忌表哥」
一代武俠小說泰斗查良鏞（筆名為金庸）今晚驚傳逝世，享年94歲，網友們紛紛哀悼，有位粉絲就問大家：「如果要挑一個段子跟大師致敬，你會挑哪一段？」 這位網友在《PTT》「八卦板」發文說：「話說，金學，陪伴著大家高中那段青澀歲月，練功，武林絕學，飛雪連天射白鹿，笑書神俠倚碧鴛。沒練完功的自己去補齊。如果要挑個段子跟大師致敬，大家印象最深的是那段啊？」這位網友引了這段：她轉過頭來，柔聲道「阿牛哥哥，你一直待我很好，我好生感激。可是我的心，早就許了給那個狠心的、兇惡的小張無忌了。你不是他，不，不是他……」（摘自《倚天屠龍記》）不識張郎是張郎。永別了。大師。 （即時新聞中心／綜合報導）


####  1.4.5 Example-3 抓取蘋果新聞清單列表,內文並儲存至資料庫

In [None]:
import pymongo
import requests
from bs4 import BeautifulSoup


def getContent(link):
    res = requests.get(link)
    soup = BeautifulSoup(res.text)
    title = soup.select('article.ndArticle_leftColumn h1')[0].text
    content = soup.select('article.ndArticle_content div.ndArticle_margin p')[0].text
    #print(title)
    #print(content)
    return content

def insertDB(tag,title,link,content):
    myclient = pymongo.MongoClient("mongodb://localhost:27017/")
    mydb = myclient["Article"]
    mycol = mydb['AppleNews']
    mydict = { "tag": tag,"title":title,"link": link, "content":content } 
    x = mycol.insert_one(mydict) 
    print(x.inserted_id) 

domainurl='http://www.appledaily.com.tw/realtimenews/section/new/'

res = requests.get(domainurl)
soup = BeautifulSoup(res.text)


for news in soup.select('.rtddt'):
        link = news.select('a')[0]['href']
        tag= news.select('h2')[0].text
        title = news.select('h1')[0].text
        print (tag, title)
        print (link)
        content = getContent(link)
        insertDB(tag,title,link,content)

####  1.4.5  Example-4 抓取PTT文章內容及回文

In [4]:
import requests
from bs4 import BeautifulSoup

res = requests.get('https://www.ptt.cc/bbs/HatePolitics/M.1539669111.A.B2D.html')
soup = BeautifulSoup(res.text)
mainContent = soup.find(name = 'div',attrs={"class":"bbs-screen bbs-content"})
#print(mainContent)
content = mainContent.text
print(content)

作者kero2377 (顆顆)看板HatePolitics標題[討論] 高雄選情分析時間Tue Oct 16 13:51:46 2018
等等要出門了 趁著還有一些空檔 就不富奸了


帶來眾所矚目的高雄選情


高雄是由


正義 溫暖  臉書擁有國際粉絲的巨星  陳其邁



與


黑道 色情狂 北部菜蟲王  獎金發太多的散財童子  跟著月亮走的北農ceo韓國瑜



.... 乾  我不會被吉吧   國瑜  我都是看電視學來的 母湯針對



進入正題吧




陳其邁  45.7


韓國瑜  43.5


不知道/沒意見     10.8



猶如各位所見  真的很膠著 .


但韓國瑜在這三個禮拜追的速度非常的 非常的  驚人


在一個月以前他是穩定輸15%的  而在做了館長直播後  更加的追近了


但韓粉們也不能過於樂觀  畢竟現在是韓國瑜的名望高峰期  但還是壓不倒陳其邁


主要是深綠區太穩固了  市區已經轉韓.  而不知道或沒意見的區塊 研判也是陳其邁居多


所以目前陳其邁還是贏在3-5%之間的  所以以我個人的經驗分析


陳其邁應該會以微小票數勝出  除非外地的年輕人非常的carry 把我們韓總推上市長大位


否則綠地的機率 應還是超過六成  就看這個月韓總能帶給我們甚麼樣的內容了


----


北市選情分析  #1RnMSOiN



--
※ 發信站: 批踢踢實業坊(ptt.cc), 來自: 118.232.84.177
※ 文章網址: https://www.ptt.cc/bbs/HatePolitics/M.1539669111.A.B2D.html
推 mark2165: 推 韓加油 10/16 13:52
推 medlife0830: 真的要看高雄年輕朋友了 10/16 13:53
推 liunwaiqoo: 你這預估很合理,但我仍然認為韓能以5萬票左右勝出 10/16 13:53
          韓要更努力 陳要爆得更用力 就有機會
推 zenan321: 你的數字是支持率還是投票結果百分比?
            百分比
推 gerund: 3F到底哪裡來的自信? 10/16 13:54
※ 編輯: kero2377 (118.232.84.177), 10/16/2018 13:55:43
推 npcx

In [5]:
import requests
from bs4 import BeautifulSoup

res = requests.get('https://www.ptt.cc/bbs/HatePolitics/M.1539669111.A.B2D.html')
soup = BeautifulSoup(res.text)
mainContent = soup.find(name = 'div',attrs={"class":"bbs-screen bbs-content"})
#print(mainContent)
content = mainContent.text
print(content)


board_meta = mainContent.find(name = 'div',attrs={"class":"article-metaline-right"})
#print(board_meta)
board_tag = board_meta.find(name = 'span',attrs={"class":"article-meta-tag"}).text
board_value = board_meta.find(name = 'span',attrs={"class":"article-meta-value"}).text
print(board_tag,board_value)

article_meta = mainContent.find_all(name = 'div',attrs={"class":"article-metaline"})

#get meta-data
for item in article_meta:
    meta_tag = item.find(name = 'span',attrs={"class":"article-meta-tag"}).text
    meta_value =  item.find(name = 'span',attrs={"class":"article-meta-value"}).text
    print(meta_tag,meta_value)
    
 
    
feebackList = mainContent.find_all(name = 'div',attrs={"class":"push"})
for feeback in feebackList:
     push_userId= feeback.find(name ='span',attrs={"class":"f3 hl push-userid"}).text
     push_content = feeback.find(name='span',attrs={"class":"f3 push-content"}).text
     push_time = feeback.find(name='span',attrs={"class":"push-ipdatetime"}).text
     print(push_userId,push_content,push_time )
    
for remove in mainContent(['span','div']):
     remove.decompose()

print(mainContent)

作者kero2377 (顆顆)看板HatePolitics標題[討論] 高雄選情分析時間Tue Oct 16 13:51:46 2018
等等要出門了 趁著還有一些空檔 就不富奸了


帶來眾所矚目的高雄選情


高雄是由


正義 溫暖  臉書擁有國際粉絲的巨星  陳其邁



與


黑道 色情狂 北部菜蟲王  獎金發太多的散財童子  跟著月亮走的北農ceo韓國瑜



.... 乾  我不會被吉吧   國瑜  我都是看電視學來的 母湯針對



進入正題吧




陳其邁  45.7


韓國瑜  43.5


不知道/沒意見     10.8



猶如各位所見  真的很膠著 .


但韓國瑜在這三個禮拜追的速度非常的 非常的  驚人


在一個月以前他是穩定輸15%的  而在做了館長直播後  更加的追近了


但韓粉們也不能過於樂觀  畢竟現在是韓國瑜的名望高峰期  但還是壓不倒陳其邁


主要是深綠區太穩固了  市區已經轉韓.  而不知道或沒意見的區塊 研判也是陳其邁居多


所以目前陳其邁還是贏在3-5%之間的  所以以我個人的經驗分析


陳其邁應該會以微小票數勝出  除非外地的年輕人非常的carry 把我們韓總推上市長大位


否則綠地的機率 應還是超過六成  就看這個月韓總能帶給我們甚麼樣的內容了


----


北市選情分析  #1RnMSOiN



--
※ 發信站: 批踢踢實業坊(ptt.cc), 來自: 118.232.84.177
※ 文章網址: https://www.ptt.cc/bbs/HatePolitics/M.1539669111.A.B2D.html
推 mark2165: 推 韓加油 10/16 13:52
推 medlife0830: 真的要看高雄年輕朋友了 10/16 13:53
推 liunwaiqoo: 你這預估很合理,但我仍然認為韓能以5萬票左右勝出 10/16 13:53
          韓要更努力 陳要爆得更用力 就有機會
推 zenan321: 你的數字是支持率還是投票結果百分比?
            百分比
推 gerund: 3F到底哪裡來的自信? 10/16 13:54
※ 編輯: kero2377 (118.232.84.177), 10/16/2018 13:55:43
推 npcx

####  1.4.5  Example-5 抓取Pixnet相簿圖片

In [None]:
import requests
from bs4 import BeautifulSoup
import os
import urllib.request


with open('/home/user/Code/PIXNETPhoto_list',"r") as f:
    lines = f.readline()
    print (lines)    
    url = lines
    folderName = url.replace('.pixnet.net/album/list','').replace('http://','')
    print(folderName)

    folderPath = '/home/user/Code/'+folderName

    if not os.path.exists(folderPath) :
        os.mkdir(folderPath)

    source = requests.get(url)

    soup = BeautifulSoup(source.text,'html.parser')
    content = soup.findAll("div", {"class": "grid-photo"})
    for item in content:
        href = item.a
        if href != None:
            href=item.find('a')['href']
            if 'album' in href :                
                source = requests.get(href)
                soup = BeautifulSoup(source.text,'html.parser')
                content = soup.findAll("a", {"class": "photolink"}) 
                for item in content:
                    savePath = folderPath+'/'
                    
                    if 'pic.pimg.tw' in item.img['src']:
                        print (savePath)
                        img = item.img['src'].replace('_s.jpg','.jpg')
                        print(img)                    
                        filename = img.split('/')
                        fileName =filename[4].split('-')
                        print(fileName[0]+"-download finish!")                    
                        savePath = savePath+fileName[0]    
                        urllib.request.urlretrieve(img,savePath)

### 2. Selenium

#### 2.1 安裝套件
* Python 2+ : pip install selenium
* Python 3+ : pip3 install selenium
* Install Chrome WebDriver : sudo apt-get install chromium-chromedriver

#### 2.2 Selenium基本操作

##### 2.2.1 引用套件

In [4]:
from selenium import webdriver

##### 2.2.2 啟動瀏覽器

In [5]:
driver = webdriver.Chrome()
#driver = webdriver.Chrome('/usr/lib/chromium-browser/chromedriver')

In [6]:
#設定視窗最大化
driver.maximize_window()

In [7]:
url = 'http://www.google.com'
driver.get(url)

In [8]:
#回傳目前的網址
driver.current_url 

'https://www.google.com/?gws_rd=ssl'

In [9]:
#回傳目前的網頁標題
driver.title 

'Google'

In [10]:
# 網頁重新整理
driver.refresh()

In [None]:
driver.save_screenshot('pic/screen.png')  # 儲存截圖 save_screenshot()

##### 2.2.3 關閉瀏覽器

In [121]:
driver.close() #關閉目前視窗
#driver.quit() #關閉所有視窗,並退出Driver

##### 2.3 元素定位

<B> By id</B>

In [None]:
# <div id="aaa">...</div>

In [None]:
driver.find_element_by_id("aaa")
driver.find_elements_by_id("aaa")

In [None]:
from selenium.webdriver.common.by import By
driver.find_element(By.ID, "aaa")
driver.find_elements(By.ID, "aaa")

<B> Class Name </B>

In [11]:
#<div class="myclass">...</div>

In [None]:
driver.find_element_by_class_name("myclass")
driver.find_elements_by_class_name("myclass")

In [None]:
from selenium.webdriver.common.by import By
driver.find_element(By.CLASS_NAME, "myclass")
driver.find_elements(By.CLASS_NAME, "myclass")

#### By Tag Name

In [None]:
#<a href="..."></a>

In [None]:
driver.find_element_by_tag_name("a")
driver.find_elements_by_tag_name("a")

In [None]:
from selenium.webdriver.common.by import By
driver.find_element(By.TAG_NAME, "a")
driver.find_elements(By.TAG_NAME, "a")

#### By Name

In [111]:
#<div name="abc">...</div>

In [None]:
driver.find_element_by_name("abc")
driver.find_elements_by_name("abc")

In [None]:
from selenium.webdriver.common.by import By
driver.find_element(By.NAME, "abc")
driver.find_elements(By.NAME, "abc")

#### By CSS

In [None]:
#<div id="area"><span class="location">abc</span></div>

In [None]:
driver.find_element_by_css_selector("#area span.location")
driver.find_elements_by_css_selector("#area span.location")

In [None]:
from selenium.webdriver.common.by import By
driver.find_element("#area span.location")
driver.find_elements("#area span.location")

#### By Link Text

In [None]:
#<a href="http://www.google.com/search?q=iphone">iphone</a>>

In [None]:
driver.find_element_by_link_text("iphone")
driver.find_elements_by_link_text("iphone")

In [None]:
from selenium.webdriver.common.by import By
driver.find_element(By.LINK_TEXT, "iphone")
driver.find_elements(BY.LINK_TEXT, "iphone")

#### By Partial Link Text

In [None]:
#<a href="http://www.google.com/search?q=iphone">iphone</a>>

In [None]:
driver.find_element_by_partial_link_text("phone")
driver.find_elements_by_partial_link_text("phone")

In [None]:
from selenium.webdriver.common.by import By
driver.find_element(By.PARTIAL_LINK_TEXT, "phone")
driver.find_elements(By.PARTIAL_LINK_TEXT, "phone")

#### By XPATH

In [None]:
#<input type="text" name="abc" />

In [None]:
driver.find_element_by_xpath("//input")
driver.find_elements_by_xpath("//input")

In [None]:
from selenium.webdriver.common.by import By
driver.find_element(By.XPATH, "//input")
driver.find_elements(By.XPATH, "//input")

##### 2.4 常用Action

* .send_keys('XXX') : 輸入框輸入內容

* .click() : 點擊

* .get_attribute(name) : 得到特定屬性的值

* clear() :清除輸入的內容

* location :回傳元素位置

* submit() : 提交內容 (Form)

#### 2.5 Example 8591

In [149]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

In [150]:
driver = webdriver.Chrome()

In [151]:
driver.get('https://www.591.com.tw/')

In [152]:
driver.find_element_by_id('auto-search-input')

<selenium.webdriver.remote.webelement.WebElement (session="4cd87efff055c0f28741c72ae33e476d", element="0.5744800835105888-1")>

In [153]:
a = driver.find_element_by_id('auto-search-input')

In [154]:
a.get_attribute("class")

'auto-search-input ie-color'

In [155]:
a.location

{'x': 184, 'y': 280}

In [156]:
driver.find_element_by_id('auto-search-input').send_keys('竹東')

In [157]:
driver.find_element_by_id('auto-search-input').clear()

In [158]:
driver.find_element_by_id('auto-search-input').send_keys('竹北')

In [159]:
driver.find_element_by_id('auto-search-input').send_keys(Keys.ENTER)

In [160]:
driver.find_element_by_xpath('//*[@id="search-location"]/span[1]/i').click()

In [161]:
driver.find_element_by_link_text('新竹縣').click()

In [162]:
driver.find_element_by_xpath('//*[@id="optionBox"]/li[3]/label/span').click()

In [163]:
driver.find_element_by_xpath('//*[@id="optionBox"]/li[1]/label/span').click()

In [164]:
driver.find_element_by_link_text('獨立套房').click()

In [165]:
driver.find_element_by_xpath('//*[@id="search-price"]/span[3]').click()

In [166]:
driver.find_element_by_xpath('//*[@id="search-plain"]/span[2]').click()

In [167]:
print(driver.current_url)

https://rent.591.com.tw/?kind=2&type=1&region=5&rentprice=2&section=54,55&area=0,10


In [168]:
driver.find_element_by_partial_link_text('三民路').click()

In [170]:
target = driver.find_elements_by_partial_link_text('飯店')

In [172]:
for item in target :
    item.click()