### 信義房屋待售房屋行情資訊
### Price information of selling house(source from Sinyi)

In [1]:
import requests
import re
import json
import pandas as pd
import numpy as np
from datetime import datetime
from bs4 import BeautifulSoup
pd.options.mode.chained_assignment = None


def get_total_pages(district: str) -> int:

    url = f'https://www.sinyi.com.tw/buy/list/NewTaipei-city/{district}-zip'
    res = requests.get(url)
    soup = BeautifulSoup(res.text, 'html.parser')
    total_pages = int(soup.find_all('a', {'class': 'pageLinkClassName'})[-1].getText())

    return total_pages


def determine_payload(district, page) -> str:

    payload = """
    {
        "machineNo":"", "ipAddress":"0.0.0.0", "osType":3,
        "model":"web","deviceVersion":"Windows 10","appVersion":"91.0.4472.77",
        "deviceType":3,"apType":3,"browser":1,"memberId":"",
        "domain":"www.sinyi.com.tw","utmSource":"","utmMedium":"",
        "utmCampaign":"","utmCode":"","requestor":1,
        "utmContent":"","utmTerm":"","sinyiGroup":1,
        "filter":{
                "exludeSameTrade":false,
                "objectStatus":0,
                "retType":2,
                "retRange":[]
            },
        "page":1,
        "pageCnt":20,
        "sort":"0",
        "isReturnTotal":true
    }
    """
    payload = json.loads(payload)
    payload['page'] = page
    payload['filter']['retRange'] = [district]

    return json.dumps(payload)


def get_sat() -> str:

    url = 'https://www.sinyi.com.tw/buy/list/NewTaipei-city/220-zip'
    res = requests.get(url)
    source = res.text

    pat = '"sat":"[0-9]+"'
    sat = re.findall(pat, source)[0].replace('"', '').split(':')[1]
    return sat


def get_district_selling_house(district: str, page: int) -> list:

    url = 'https://sinyiwebapi.sinyi.com.tw/searchObject.php'
    payload = determine_payload(district, page)
    header = {
        'code': '0',
        'sat': sat,
        'sid': '20210607170904856',
    }
    res = requests.post(url, headers=header, data=payload)
    source = res.json()

    return source['content']['object']

In [2]:
district = '110'
sat = get_sat()
nums_of_total_pages = get_total_pages(district)

result = []
for i in range(1, nums_of_total_pages+1):
    result = result + get_district_selling_house(district, i)
    print(f'共{nums_of_total_pages}頁, 還剩{nums_of_total_pages -i}頁')

df = pd.DataFrame(result)
df['price_per_area'] = df['totalPrice'] / df['areaBuilding']
df['houselandtype'] = [x[0] for x in list(df['houselandtype'])]
output = df[['price_per_area', 'houselandtype']]
output.replace([np.inf, -np.inf], np.nan, inplace=True)
group_by_type = output.groupby('houselandtype').mean().round(2)

共42頁, 還剩41頁
共42頁, 還剩40頁
共42頁, 還剩39頁
共42頁, 還剩38頁
共42頁, 還剩37頁
共42頁, 還剩36頁
共42頁, 還剩35頁
共42頁, 還剩34頁
共42頁, 還剩33頁
共42頁, 還剩32頁
共42頁, 還剩31頁
共42頁, 還剩30頁
共42頁, 還剩29頁
共42頁, 還剩28頁
共42頁, 還剩27頁
共42頁, 還剩26頁
共42頁, 還剩25頁
共42頁, 還剩24頁
共42頁, 還剩23頁
共42頁, 還剩22頁
共42頁, 還剩21頁
共42頁, 還剩20頁
共42頁, 還剩19頁
共42頁, 還剩18頁
共42頁, 還剩17頁
共42頁, 還剩16頁
共42頁, 還剩15頁
共42頁, 還剩14頁
共42頁, 還剩13頁
共42頁, 還剩12頁
共42頁, 還剩11頁
共42頁, 還剩10頁
共42頁, 還剩9頁
共42頁, 還剩8頁
共42頁, 還剩7頁
共42頁, 還剩6頁
共42頁, 還剩5頁
共42頁, 還剩4頁
共42頁, 還剩3頁
共42頁, 還剩2頁
共42頁, 還剩1頁
共42頁, 還剩0頁


In [3]:
"""
郵遞區號 110 :信義區

A:公寓  L:大樓  M:華廈  C:套房
"""
all_type = output['price_per_area'].mean().round(2)
large = group_by_type.loc['L'][0]
apartment = group_by_type.loc['A'][0]
mansion = group_by_type.loc['M'][0]

print(f'目前郵遞區號 {district} 區待售行情')
print(f'所有房屋 平均: {all_type} 萬/坪')
print(f'大樓 平均: {large} 萬/坪')
print(f'公寓 平均: {apartment} 萬/坪')
print(f'華廈 平均: {mansion} 萬/坪')

目前郵遞區號 110 區待售行情
所有房屋 平均: 107.11 萬/坪
大樓 平均: 105.38 萬/坪
公寓 平均: 80.32 萬/坪
華廈 平均: 93.82 萬/坪
