In [3]:
# BEAUTIFULSOUP
from bs4 import BeautifulSoup

# SELENIUM
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.select import Select

# MISCELLANEOUS
import csv
import time

In [5]:
# CONFIGURING SELENIUM ---
PATH = 'C:\SeleniumDrivers\chromedriver.exe' # replace this with your selenium driver path
serv_obj = Service(executable_path=PATH) 
driver = webdriver.Chrome(service=serv_obj)

# OPENING A NEW CHROME PROFILE ---
driver.get('http://www.bom.gov.au/climate/data/index.shtml?bookmark=193')
driver.maximize_window()
driver.implicitly_wait(10)

### Option 1: Select Your Area Of Interest

In [6]:
# ENTER YOUR AREA OF INTEREST ---
area = 'armidale' # change this with your area name
driver.find_element(By.XPATH, "//div[@class='line']//input[@id='p_locSearch']").send_keys(area) 
driver.find_element(By.XPATH, "//input[@id='text']").click()
time.sleep(2) # comment out if running one cell at a time

In [7]:
# ALL THE TOWNS IN THAT AREA ---
towns = driver.find_elements(By.XPATH, "//div[@id='Matching']//select[@id='matchList']/option")
ntowns = len(towns)
print(f'There are {ntowns} town(s) is the area {area.capitalize()}:', end='\n\n')

for i in range(1, ntowns+1):
    town = driver.find_element(By.XPATH, f"//div[@id='Matching']//select[@id='matchList']/option[{i}]").text
    print(i-1, town)

There are 2 town(s) is the area Armidale:

0 Armidale, NSW, 30.52°S, 151.67°E
1 West Armidale, NSW, 30.51°S, 151.65°E


In [8]:
# SELECTING A PARTICULAR TOWN ---
select_box = driver.find_element(By.XPATH, "//div[@id='Matching']//select[@id='matchList']")
town_list = Select(select_box)
town_list.select_by_index("0")

# Or Select by name -
# town = 'Armidale, NSW, 30.52°S, 151.67°E' # paste your town here
# town_list.select_by_visible_text(town)

time.sleep(2) # comment out if running one cell at a time

In [9]:
# ALL THE STATIONS IN THAT TOWN ---
stations = driver.find_elements(By.XPATH, "//form[@id='nearestForm']//select[@id='nearest10']/option")
nstations = len(stations)
print(f'There are {nstations} station(s) is the town {town}:', end='\n\n')

for i in range(1, nstations+1):
    station = driver.find_element(By.XPATH, f"//form[@id='nearestForm']//select[@id='nearest10']/option[{i}]").text
    print(i-1, station)

There are 9 station(s) is the town West Armidale, NSW, 30.51°S, 151.65°E:

0 056037 Armidale (Tree Group Nursery) NSW (0.5km away)
1 056238 Armidale Airport AWS NSW (5.2km away)
2 056034 Uralla (Dumaresq St) NSW (21.9km away)
3 056240 Guyra (Box Point) NSW (21.9km away)
4 056065 Uralla (Mihi) NSW (22.2km away)
5 057091 Uralla (Blue Nobby) NSW (27.9km away)
6 056028 Uralla (Salisbury Court) NSW (28.1km away)
7 056016 Guyra Post Office NSW (33.1km away)
8 056229 Guyra Hospital NSW (34.0km away)


In [10]:
# SELECTING A STATION ---
select_box = driver.find_element(By.XPATH, "//form[@id='nearestForm']//select[@id='nearest10']")
station_list = Select(select_box)
station_list.select_by_index('0')

# Or Select by name -
# station = '056037 Armidale (Tree Group Nursery) NSW (0.5km away)' # paste your station here
# station_list.select_by_visible_text(station)

In [11]:
# GETTING THE TABLE ---
time.sleep(2)
driver.find_element(By.XPATH, "//input[@id='getData']").click()

***

### Option 2: Directly Enter Station Number

In [21]:
# ENTER THE STATION NUMBER ---
driver.find_element(By.XPATH, "//input[@id='p_stn_num']").send_keys("056037") # here
driver.find_element(By.XPATH, "//input[@id='getData']").click()

###

***

In [12]:
# SWITCHING TABS ---
# time.sleep(2) # uncomment in case of traceback
winids = driver.window_handles
driver.switch_to.window(winids[1])

# GETTING THE HTML ---
soup = BeautifulSoup(driver.page_source, "lxml")
# print(soup.prettify())

driver.minimize_window()

In [13]:
# CREATING A CSV FILE ---
csv_file = open(f"weather_report_{area.capitalize()}.csv", "w", newline="", encoding="utf-8")
csv_writer = csv.writer(csv_file)

columns = ['2023','Jan','Feb','Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec']
csv_writer.writerow(columns)

56

In [14]:
# DAILY GLOBAL SOLAR EXPOSURE TABLE ---
rows = soup.select('tbody>tr')
for i, row in enumerate(rows):
    if i != 0:
        csv_writer.writerow([
            row.th.text,
            row.select('td')[0].text,
            row.select('td')[1].text,
            row.select('td')[2].text,
            row.select('td')[3].text,
            row.select('td')[4].text,
            row.select('td')[5].text,
            row.select('td')[6].text,
            row.select('td')[7].text,
            row.select('td')[8].text,
            row.select('td')[9].text,
            row.select('td')[10].text,
            row.select('td')[11].text
        ])
csv_file.close()
driver.quit()