In [None]:
'''
匯入套件
'''
# 操作 browser 的 API
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# 處理逾時例外的工具
from selenium.common.exceptions import TimeoutException

# 面對動態網頁，等待某個元素出現的工具，通常與 exptected_conditions 搭配
from selenium.webdriver.support.ui import WebDriverWait

# 搭配 WebDriverWait 使用，對元素狀態的一種期待條件，若條件發生，則等待結束，往下一行執行
from selenium.webdriver.support import expected_conditions as EC

# 期待元素出現要透過什麼方式指定，通常與 EC、WebDriverWait 一起使用
from selenium.webdriver.common.by import By

# 取得系統時間的工具
from datetime import datetime

# 強制等待 (執行期間休息一下)
from time import sleep

# 執行 command 的時候用的
import os

# 處理下拉式選單的工具
from selenium.webdriver.support.ui import Select

# 隨機取得 User-Agent
from fake_useragent import UserAgent

ua = UserAgent()

my_options = webdriver.ChromeOptions()
# my_options.add_argument("--headless")                #不開啟實體瀏覽器背景執行
my_options.add_argument("--start-maximized")         #最大化視窗
my_options.add_argument("--incognito")               #開啟無痕模式
my_options.add_argument("--disable-popup-blocking") #禁用彈出攔截
my_options.add_argument("--disable-notifications")  #取消通知
my_options.add_argument(f'--user-agent={ua.random}') #隨機User-agent

# 加入行為鍊 ActionChain (在 WebDriver 中模擬滑鼠移動、點繫、拖曳、按右鍵出現選單，以及鍵盤輸入文字、按下鍵盤上的按鈕等)
from selenium.webdriver.common.action_chains import ActionChains

# 加入鍵盤功能 (例如 Ctrl、Alt 等)
from selenium.webdriver.common.keys import Keys

# 使用 Chrome 的 WebDriver
driver = webdriver.Chrome(
    options = my_options,
    service = Service(ChromeDriverManager().install())
)

In [None]:
# 建立下載資料夾
folderPath = 'data1'
if not os.path.exists(folderPath):
    os.makedirs(folderPath)

download_path = os.path.join(os.getcwd(), 'data1')

my_options.add_experimental_option("prefs", {
  "download.default_directory": download_path 
})

In [None]:
url = 'https://e-service.cwb.gov.tw/HistoryDataQuery/' #資料網址
yy = '2022' # 資料起始年分

In [None]:
# 進入網站
def visit():
    driver.get(url)
    sleep(1)

In [None]:
#抓取資料
def catch():
    for i in range(0, 22):
        city = Select(driver.find_element(By.CSS_SELECTOR, 'select#stationCounty'))
        city.select_by_index(i)  #自動選擇縣市
        for j in range(0, 120):       
            station = Select(driver.find_element(By.CSS_SELECTOR,'select#station'))
            try:
                station.select_by_index(j) #自動選擇測站
            except:
                continue
            sleep(1)
            try:
                WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located( 
                        (By.CSS_SELECTOR,'input#datepicker') 
                    )
                )
                inputElement = driver.find_element(By.CSS_SELECTOR,'input#datepicker')
            
                inputElement.send_keys(f'{yy}-01-01') #輸入起始日期
    
            except TimeoutException:
                print("等待逾時")
            
            sleep(1)
    
            ac = ActionChains(driver)
            
            search = driver.find_element(By.CSS_SELECTOR, 'img#doquery')
    
            ac.click(search) #點擊search

            ac.perform()
        
            sleep(1)
        
            handles = driver.window_handles
        
            handle2 = handles[-1]
        
            driver.switch_to.window(handle2) #切換分頁
            
            countnum = 1
            
            while(countnum <= 365): #設定總計下載天數
                try:
                    WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located( 
                            (By.CSS_SELECTOR,'a#downloadCSV > input[type="image"]') 
                        )
                    )
            
                    downLoadCsv = driver.find_element(By.CSS_SELECTOR, 'a#downloadCSV > input[type="image"]')
        
                    downLoadCsv.click() #點擊下載
            
                    changeDtae = driver.find_element(By.CSS_SELECTOR, 'a#nexItem > input[type="image"]')
            
                    changeDtae.click() #點擊切換到隔天
                
                    countnum = countnum+1
        
                except TimeoutException:
                    print("等待逾時")
            
            sleep(1)
        
            driver.close() #下載結束關閉分頁
        
            handle1 = handles[0]
        
            driver.switch_to.window(handle1) #切換回原分頁
        
            inputElement.clear() #清空時間欄位
        

In [None]:
#主程式
if __name__ == '__main__':
    visit()
    catch()

In [None]:
'''
以下為天氣檔案資料清整作業
匯入套件
'''

import numpy as np
import pandas as pd

In [None]:
#抓取各觀測站經緯度
ID = []
name = []
Long = []
Lat = []
df = pd.DataFrame()
for a in range(0, 22):
    se = Select(driver.find_element(By.CSS_SELECTOR, 'select#stationCounty'))
    se.select_by_index(a)
    se5 = driver.find_elements(By.CSS_SELECTOR, 'select#station > option')
    for h in se5:
        ID.append(h.get_attribute('value'))
        name.append(h.text)
    for b in range(0, 130): 
        se1 = Select(driver.find_element(By.CSS_SELECTOR, 'select#station'))
        try:
            se1.select_by_index(b)
        except:
            continue
        se3 = driver.find_element(By.CSS_SELECTOR,'label#Longitude')
        se4 = driver.find_element(By.CSS_SELECTOR,'label#Latitude')
        Long.append(se3.text)
        Lat.append(se4.text)
IDD = pd.DataFrame(ID, columns=['ID'])
name = pd.DataFrame(name, columns=['name'])
long = pd.DataFrame(Long, columns=['long'])
lat = pd.DataFrame(Lat, columns=['lat'])
stationData = pd.concat([IDD, name, long, lat], axis=1)        

In [None]:
#設定日月
dd = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31']
mm = ['01','02','03','04','05','06','07','08','09','10','11','12']

In [None]:
df2 = pd.DataFrame()
for l in range(stationData.shape[0]):
    for j in mm:
        for i in dd:
            try:
                df = pd.read_csv(f'./data1/{stationData.iloc[l,0]}-2022-{j}-{i}.csv')
            except:
                continue
            df.insert(0, 'date', f'2022-{j}-{i}') #資料增加日期欄位
            df.insert(1, 'location', stationData.iloc[l,1]) #資料加入測站名稱
            df.insert(2, 'longitude', stationData.iloc[l,2])#資料加入經度
            df.insert(3, 'latitude', stationData.iloc[l,3])#資料加入緯度
            df2 = pd.concat([df2,df], join='outer')
                
df2.drop(index=0, inplace=True)
df2.index = range(0,len(df2))
df2.to_csv(f'./test/weather{yy}.csv')#資料合併後另存為新CSV檔