In [1]:
from datetime import datetime, timedelta
import time 
import re
import requests
from bs4 import BeautifulSoup
from pytz import timezone,utc
import json
import urllib.request
from urllib.parse import urlparse
import os
from uuid import UUID, uuid4, uuid5

In [2]:
DOMAIN = 'https://ecshweb.pchome.com.tw'

UUID_NAMESPACE = os.environ.get('UUID_NAMESPACE') or '91461c99-f89d-49d2-af96-d8e2e14e9b58'
UUID_NS = UUID(UUID_NAMESPACE)

def uuid(my_str=None):
    uuid_obj=None
    if my_str is None:
        uuid_obj = uuid4()
    else:
        uuid_obj = uuid5(UUID_NS, my_str)
    return str( uuid_obj )

def brand(brand_code):
    menu_url = 'https://ecapi.pchome.com.tw/cdn/ecshop/cateapi/v1.5/store&id={}'.format(brand_code)
    resp = requests.get(menu_url).text
    doc = json.loads(resp)
    for i in doc: name = i['Name'].replace('●','').replace('★','').replace('▼','').replace('◆','').replace('■','').replace('　└►','').replace('➔','').replace('➽','').replace('▌','').replace('☑','').replace('❱','').replace('》','').replace('．','').replace('►','').replace('♀','').replace('♂','').replace('\u3000','')
    return(name)

def category(brand_code):
    code = brand_code[:4]
    cat_url = 'https://ecapi.pchome.com.tw/cdn/ecshop/cateapi/v1.5/region&region={}&_callback=cb_ecshopCategoryRegion'.format(code)
    resp = requests.get(cat_url).text
    resp = resp.split('ecshopCategoryRegion(')[1].split(');}catch(e)')[0]
    doc = json.loads(resp)
    category = doc['Name']
    return(category)

def getPchomekey(keyword):
    data={}
    data['keyword used'] = keyword
    data['Access Type'] = 'keyword search'
    data['Section'] = 'Regular'
    data['Section Order'] = '1'
    data['etailer'] = 'PChome'
    
    parseAllPagination(data, keyword)
    
def parseAllPagination(data, keyword):
    ty_url = DOMAIN + '/search/v3.3/all/results?q={}'.format(keyword)
    resp = requests.get(ty_url).text
    doc = json.loads(resp)  
    numPage = doc['totalPage']
    data['# of SKU'] = doc['totalRows']
    for num in range(1, numPage+1):
        pg_url = ty_url + '&page={}&sort=sale/dc'.format(str(num))
        data['Page'] = num
        parseList(pg_url, data, num)
        
def parseList(pg_url, data, num):
    print(pg_url)
    resp = requests.get(pg_url).text
    doc = json.loads(resp)
    position = 0
    count = 0
    
    for product in doc['prods']:
        data['Description'] = product['name']
        pro_id = product['Id']
        data['PrdCode'] = pro_id
        article_url = 'https://24h.pchome.com.tw/prod/' + product['Id']
        data['url'] = article_url
        
        if num == 1: position = position + 1
        else:
            count = count + 1
            position = ((num - 1) * 20) + count
        data['position_section'] = position
        data['position_total'] = data['position_section']
        
        parseArticle(pro_id, data) 
    
def parseArticle(pro_id, data):
    pg_id = pro_id + '-000'
    pg_url = 'https://24h.pchome.com.tw/ecapi/ecshop/prodapi/v2/prod?id={}&fields=Price,Store,isArrival24h'.format(pg_id)
    resp = requests.get(pg_url).text
    time.sleep(2)
    doc = json.loads(resp)
    
    if not len(doc) == 0:
        data['Selling Price'] = doc[pg_id]['Price']['P']
        data['List Price'] = doc[pg_id]['Price']['M']
        if data['List Price'] == 0: data['List Price'] = doc[pg_id]['Price']['P']
    
        brand_code = doc[pg_id]['Store']
        data['Brand'] = brand(brand_code)
        
        if doc[pg_id]['isArrival24h'] == 1:
            data['route'] = 'PChome>線上購物>24h購物>{}>{}'.format(category(brand_code), brand(brand_code))
        else:
            data['route'] = 'PChome>線上購物>購物中心>{}>{}'.format(category(brand_code), brand(brand_code))
       
        if re.search(r'/DJ', data['url']):
            data['route'] = 'PChome24h書店>{}>{}>{}'.format(category(brand_code), brand(brand_code), data['Description'])
    
        crawler_tm = datetime.now(tz=timezone('Asia/Taipei'))
        data['rtime'] = datetime.strftime(crawler_tm, '%Y-%m-%d %H:%M:%S')
        data['ID'] = uuid(pg_url)
        data['Out of Stock'] = 0
        
        pro_url = 'https://ecapi.pchome.com.tw/cdn/marketing/order/v2/prod/activity?prodid='.format(pg_id)
        resp2 = requests.get(pro_url).text
        doc2 = json.loads(resp2)
        for promotion in doc2:
            data['promotion tag'] = [ i['Name'] for i in promotion['Activity'] ]
    
    else:
        data['Selling Price'] = ''
        data['List Price'] = ''
        data['Description'] = ''
        data['Brand'] = ''
        data['route'] = ''
        data['rtime'] = ''
        data['ID'] = ''
        data['Out of Stock'] = 1
    
    print(data)

In [5]:
keyword = input("請輸入關鍵字:") 
getPchomekey(keyword)

請輸入關鍵字:NBA球衣
https://ecshweb.pchome.com.tw/search/v3.3/all/results?q=NBA球衣&page=1&sort=sale/dc
{'keyword used': 'NBA球衣', 'Access Type': 'keyword search', 'Section': 'Regular', 'Section Order': '1', 'etailer': 'PChome', '# of SKU': 98, 'Page': 1, 'Description': 'NBA NIKE 巫師隊 John Wall 球衣 (AJ4650-010)', 'PrdCode': 'DEBNRR-A9009TOAF', 'url': 'https://24h.pchome.com.tw/prod/DEBNRR-A9009TOAF', 'position_section': 1, 'position_total': 1, 'Selling Price': 1090, 'List Price': 2680, 'Brand': '└NBA OUTLET', 'route': 'PChome>線上購物>24h購物>運動服>└NBA OUTLET', 'rtime': '2020-06-27 16:42:32', 'ID': '5dd7a5dc-5272-5d2f-b5e2-8c60d2c8d005', 'Out of Stock': 0}
{'keyword used': 'NBA球衣', 'Access Type': 'keyword search', 'Section': 'Regular', 'Section Order': '1', 'etailer': 'PChome', '# of SKU': 98, 'Page': 1, 'Description': 'NBA NIKE 拓荒者 Damian Lillard 球衣 (AJ4640-010)', 'PrdCode': 'DEBNRR-A900AH9J5', 'url': 'https://24h.pchome.com.tw/prod/DEBNRR-A900AH9J5', 'position_section': 2, 'position_total': 2, 'Selling

KeyboardInterrupt: 

In [11]:
bb = dict({'keyword used': 'NBA球衣', 'Access Type': 'keyword search', 'Section': 'Regular', 'Section Order': '1', 'etailer': 'PChome', '# of SKU': 98, 'Page': 1, 'Description': 'NBA NIKE 巫師隊 John Wall 球衣 (AJ4650-010)', 'PrdCode': 'DEBNRR-A9009TOAF', 'url': 'https://24h.pchome.com.tw/prod/DEBNRR-A9009TOAF', 'position_section': 1, 'position_total': 1, 'Selling Price': 1090, 'List Price': 2680, 'Brand': '└NBA OUTLET', 'route': 'PChome>線上購物>24h購物>運動服>└NBA OUTLET', 'rtime': '2020-06-27 16:42:32', 'ID': '5dd7a5dc-5272-5d2f-b5e2-8c60d2c8d005', 'Out of Stock': 0})

bb

{'keyword used': 'NBA球衣',
 'Access Type': 'keyword search',
 'Section': 'Regular',
 'Section Order': '1',
 'etailer': 'PChome',
 '# of SKU': 98,
 'Page': 1,
 'Description': 'NBA NIKE 巫師隊 John Wall 球衣 (AJ4650-010)',
 'PrdCode': 'DEBNRR-A9009TOAF',
 'url': 'https://24h.pchome.com.tw/prod/DEBNRR-A9009TOAF',
 'position_section': 1,
 'position_total': 1,
 'Selling Price': 1090,
 'List Price': 2680,
 'Brand': '└NBA OUTLET',
 'route': 'PChome>線上購物>24h購物>運動服>└NBA OUTLET',
 'rtime': '2020-06-27 16:42:32',
 'ID': '5dd7a5dc-5272-5d2f-b5e2-8c60d2c8d005',
 'Out of Stock': 0}