In [None]:
!pip list | findstr selenium

In [None]:
import sys
sys.executable

In [None]:
import re
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from selenium.webdriver import Chrome
from selenium.webdriver.support.ui import Select
import time

### Create function to run selenium and retrieve soup from HTML


<font size=3 color=Blue>**參數說明**</font>
> special_day: 查詢日期（單日 or 開始日期）<br>
> period_mode: 查詢特定日期到今日為止的期間模式，預設為False<br>
> fruit_number: 水果編號

In [None]:
# 鳳梨以金鑽為大宗
fruit_dict = {"香蕉":"A1", "鳳梨":"B2"}

In [None]:
def All_Market_Prices(specific_day, fruit_name, period_mode=False):
    
    url = "https://amis.afa.gov.tw/fruit/FruitProdDayTransInfo.aspx"
    driver = Chrome("../../chromedriver")
    driver.get(url)
    
    ## 輸入日期 => 除去日期input只能read限制，然後清空既有input並放入keys
    driver.execute_script("$('input[id=ctl00_contentPlaceHolder_txtSTransDate]').removeAttr('readonly')")
    driver.find_element_by_id('ctl00_contentPlaceHolder_txtSTransDate').clear()         
    driver.find_element_by_id('ctl00_contentPlaceHolder_txtSTransDate').send_keys(specific_day)
    
    ## 期間模式 => 範圍別點選為期間
    if period_mode == True: 
        driver.find_element_by_xpath("//*[@id='ctl00_contentPlaceHolder_ucDateScope_rblDateScope_1']").click()
    
    ## 選取市場
    driver.find_element_by_xpath("//*[@id='ctl00_contentPlaceHolder_txtMarket']").click() 
    iframe = driver.find_elements_by_tag_name("iframe")[0]
    driver.switch_to.frame(iframe)
    radio_target = driver.find_element_by_xpath("//*[@id='radlMarketRange_0']")
    radio_target.click()

    ## 選取水果種類
    driver.find_element_by_xpath("//*[@id='ctl00_contentPlaceHolder_txtProduct']").click()
    iframe = driver.find_elements_by_tag_name("iframe")[0]
    driver.switch_to.frame(iframe)
    select = Select(driver.find_element_by_name('lstProduct'))
    select.select_by_value(fruit_dict[fruit_name])
    driver.find_element_by_xpath("//*[@id='btnConfirm']").click()

    ## 點選查詢button
    driver.find_element_by_xpath("//*[@id='ctl00_contentPlaceHolder_btnQuery']").click()
    time.sleep(2)
    
    ## 取得soup
    soup = BeautifulSoup(driver.page_source, "html.parser")
    
    ## 關閉視窗
    driver.close()
    
    return soup

In [None]:
soup = All_Market_Prices("110/06/04", "鳳梨", period_mode=True)
soup

### raw data processing

In [None]:
raw_data = re.sub(" ", "", soup.select("table[border='1']")[0].text).split("\n")
data = list(filter(lambda x: len(x)>0, raw_data))
data

In [None]:
# columns
cols = data[:10]
cols

In [None]:
# table data
table = data[18:]
table

In [None]:
# 將data分割，每天資料存成一個小list放在大list裡面
output = []
end = 10
for start in range(0, len(table), 10):
    output.append(table[start: end])
    end += 10
output

### 轉換成dataframe & data cleaning

In [None]:
# create an empty dataframe
df = pd.DataFrame(columns=cols)

# 將資料逐筆放入
for i in range(len(output)):
    df.loc[i] = output[i]
    
df

In [None]:
df["交易量(公斤)"] = df["交易量(公斤)"].str.replace(",","").apply(pd.to_numeric)

In [None]:
target_cols = ["上價", "中價", "下價", "平均價(元/公斤)"]
df[target_cols] = df[target_cols].apply(pd.to_numeric)

In [None]:
df

In [None]:
df.dtypes

In [None]:
df.columns

In [None]:
fun_dicts = {
    '上價':np.mean, 
    '中價':np.mean, 
    '下價':np.mean, 
    '平均價(元/公斤)':np.mean, 
    '交易量(公斤)':np.sum
}

In [None]:
df_agg = df.groupby("市場").agg(fun_dicts)
df_agg.sort_values()