# 載入所需套件

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os
from urllib.request import urlretrieve

# 定義爬取永慶房仲網房屋資訊函數

In [2]:
def yungching_house_crawler(url):
    headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'}
    #添加User-Agent
    response=requests.get(url,headers)
    soup=BeautifulSoup(response.text,'html.parser')
    
    houses=soup.select('li.m-list-item.vr')
    titles=[e.select('a')[1].text.split('\u3000')[0] for e in houses]
    locations=[e.select('a')[1].text.split('\u3000')[1] for e in houses]
    links=['https://buy.yungching.com.tw'+e.select('a')[0]['href'] for e in houses]
    types=[e.select('li')[0].text for e in houses]
    years=[e.select('li')[1].text.replace('\n','').replace('\r','') for e in houses]
    floors=[e.select('li')[2].text.replace('\n','').replace('\r','') for e in houses]
    lands=[e.select('li')[3].text.strip('土地 ') for e in houses]
    livings=[e.select('li')[4].text for e in houses]
    buildings=[e.select('li')[5].text.strip('建物 ') for e in houses]
    rooms=[e.select('li')[6].text.replace('\n','').replace('\r','') for e in houses]
    prices=[e.select('div.price')[0].text for e in houses]
    imgs=['https:'+e.select('img')[0]['src'] for e in houses]
    
    df=pd.DataFrame({
        '標題':titles,
        '地點':locations,
        '類型':types,
        '年數':years,
        '樓層':floors,
        '土地':lands,
        '主廳':livings,
        '建物':buildings,
        '房間數':rooms,
        '價格':prices,
        '網址':links,
        '照片':imgs
    })        
    
    return df

# 定義爬取多頁永慶房仲網房屋資訊函數

In [3]:
def page_function(page):
    info=pd.DataFrame()
    urls=['https://buy.yungching.com.tw/region?pg={}'.format(i+1) for i in range(page)]
    
    dfs=[]
    for url in urls:
        d=yungching_house_crawler(url)
        dfs.append(d)
    df=pd.concat(dfs,ignore_index=True)
    
    return df

# 抓取指定總頁數永慶房仲網房屋資訊

In [4]:
df=page_function(5)
df.head()

Unnamed: 0,標題,地點,類型,年數,樓層,土地,主廳,建物,房間數,價格,網址,照片
0,景勤公園邊間華廈,台北市信義區嘉興街,電梯大樓,42.6年,4 ~ 4 / 7樓,6.38坪,主+陽 22.64坪,27.22坪,2房(室)2廳1衛,"1,770 萬",https://buy.yungching.com.tw/house/4216636,https://fps.hfcdn.com/v1/image/?key=NF58Ko5oaC...
1,芝山捷運方正美寓,台北市士林區雨聲街,公寓,41.4年,4 ~ 4 / 4樓,9.15坪,主+陽 28.87坪,28.87坪,3房(室)2廳2衛,"1,580 萬",https://buy.yungching.com.tw/house/4207578,https://fps.hfcdn.com/v1/image/?key=NF58Ko5oaC...
2,安縵莊園高樓景觀,台北市內湖區內湖路三段,電梯大樓,2.4年,13 ~ 13 / 19樓,10.88坪,主+陽 40.82坪,86.92坪,3房(室)2廳2衛,"6,489 萬",https://buy.yungching.com.tw/house/4079039,https://fps.hfcdn.com/v1/image/?key=NF58Ko5oaC...
3,永建靜巷溫馨美居,台北市文山區和興路,公寓,41.9年,3 ~ 3 / 4樓,9.68坪,主+陽 28.38坪,28.38坪,2房(室)2廳1衛,"1,298 萬",https://buy.yungching.com.tw/house/3932555,https://fps.hfcdn.com/v1/image/?key=NF58Ko5oaC...
4,薇閣捷運優質社區,台北市北投區翠宜路,電梯大樓,25.0年,4 ~ 4 / 7樓,26.85坪,主+陽 36.07坪,54.63坪,3房(室)2廳2衛,"1,750 萬",https://buy.yungching.com.tw/house/4032993,https://fps.hfcdn.com/v1/image/?key=NF58Ko5oaC...


# 抓取永慶房仲網房屋資訊封面照

In [5]:
titles=df['標題'].values.tolist()
imgs=df['照片'].values.tolist()

directory='永慶房仲網房屋照片'
if not os.path.isdir(directory):
    os.makedirs(directory)

#執行方便,以前10筆資料為例
for title,img in zip(titles[:10],imgs[:10]):    
    print(title)
    urlretrieve(img,directory+'/{}.jpg'.format(title))

景勤公園邊間華廈
芝山捷運方正美寓
安縵莊園高樓景觀
永建靜巷溫馨美居
薇閣捷運優質社區
明亮方正三房美寓
旺族大戶典雅豪邸
捷運雙拼電梯三房
河堤電梯平地別墅
天母國中首購首選
