# Get station data

In [1]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
from tqdm import tqdm
import os
import csv
import re

## Get states

In [2]:
url = "https://www.faa.gov/air_traffic/weather/asos/"
html = urlopen(url).read()
soup = BeautifulSoup(html, features="html.parser")
state_options = soup.find_all("select", {"id": "state"});
str_state_options = str(state_options)

list_sate_options = re.findall('<option value=\"[A-Z]{2}\">.*\(.*\)<', str_state_options)

dict_states = {}
for str_state_option in list_sate_options:
    temp_int_start_1 = len('<option value=') + 1
    temp_int_start_2 = len('<option value=') + 5
    temp_int_stop_2 = len(str_state_option) - 7
    dict_states[str_state_option[temp_int_start_1: temp_int_start_1 + 2]] \
        = {"full_name":  str_state_option[temp_int_start_2: temp_int_stop_2]}
# state_options

list_abbreviations = list(dict_states.keys())

list_station_dict_key = ['city', 
'county', 
'latitude', 
'longitude', 
'stateabbreviation', 
'weatherstationairportid', 
'weatherstationfrequency', 
'weatherstationphone', 
'weatherstationtype', 
'weatherstationurl']

list_abbreviations.sort()

## Get station for each state

In [4]:
i_count = 0
# for str_state_option in list_abbreviations:
for str_state_option in tqdm (list_abbreviations, desc="Loading..."):
    i_count += 1
    # print("Working on {}".format(str_state_option))
    str_single_state_url = "https://www.faa.gov/air_traffic/weather/asos/?state={}".format(str_state_option)
    html_single_state = urlopen(str_single_state_url).read()
    soup_single_state = BeautifulSoup(html_single_state, features="html.parser")
    list_str_singel_page = str(soup_single_state).split('\n')
    list_station_info = []

    for i_line in range(len(list_str_singel_page)):
        temp_str = list_str_singel_page[i_line]
        if temp_str.startswith('mInfo.city = '):
            temp_dict_station_info = {}
            # print(list_str_singel_page[i_line + 0])
            for i_key in range(len(list_station_dict_key)):
                list_match = re.findall("\".*\"", list_str_singel_page[i_line + i_key])
                if len(list_match) == 1:
                    temp_dict_station_info[list_station_dict_key[i_key]] \
                    = list_match[0][1:-1]
            list_station_info.append(temp_dict_station_info)

    dict_states[str_state_option]['stations'] = list_station_info

Loading...: 100%|██████████| 56/56 [00:12<00:00,  4.47it/s]


##  Get station types

In [5]:
list_station_type = []
dict_station_type_count = {}
for str_state_key in dict_states:
    for i_station in range(len(dict_states[str_state_key]['stations'])):
        str_station_type = dict_states[str_state_key]['stations'][i_station]['weatherstationtype'].replace('/','_')
        if str_station_type in list_station_type:
            dict_station_type_count[str_station_type] += 1
        else:
            list_station_type.append(str_station_type)
            dict_station_type_count[str_station_type] = 1

# Export station location data

In [21]:
# Clear existing data
temp_list_files = os.listdir("data")
for str_filename in temp_list_files:
    if str_filename.startswith("ASOS_station_"):
        print("Deleting", str_filename)
        os.remove("data/" +  str_filename)

## Export all station by part 

In [22]:
fieldnames = ['latitude', 'longitude', 'weatherstationtype', 'weatherstationurl', 'weatherstationairportid', 'state']

In [26]:
i_part = 0
i_count = 0
csvfile = open('data/ASOS_station_part_{}.csv'.format(i_part), 'w', newline='')

writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for str_state_key in dict_states:
    # print('State:', str_state_key)
    for i_station in range(len(dict_states[str_state_key]['stations'])):
        # print('Station:', dict_states[str_state_key]['stations'][i_station]['weatherstationairportid'])
        str_station_type = dict_states[str_state_key]['stations'][i_station]['weatherstationtype'].replace('/','_')
        writer.writerow({
        'latitude': dict_states[str_state_key]['stations'][i_station]['latitude'], 
        'longitude': dict_states[str_state_key]['stations'][i_station]['longitude'], 
        'weatherstationtype': str_station_type,
        'weatherstationurl':  dict_states[str_state_key]['stations'][i_station]['weatherstationurl'],
        'weatherstationairportid': dict_states[str_state_key]['stations'][i_station]['weatherstationairportid'],
        "state": str_state_key
        })
        i_count += 1
        if i_count > 1500: # Google map limit
            i_part +=1 
            i_count = 0
            csvfile.close()
            csvfile = open('data/ASOS_station_part_{}.csv'.format(i_part), 'w', newline='')
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            print('open_new_file')

csvfile.close()


open_new_file


## Export all station by station type

In [27]:
for str_target_station_type in list_station_type:
    with open('data/ASOS_station_{}.csv'.format(str_target_station_type), 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for str_state_key in dict_states:
            # print('State:', str_state_key)
            for i_station in range(len(dict_states[str_state_key]['stations'])):
                # print('Station:', dict_states[str_state_key]['stations'][i_station]['weatherstationairportid'])
                str_station_type = dict_states[str_state_key]['stations'][i_station]['weatherstationtype'].replace('/','_')
                if str_target_station_type == str_station_type:
                    writer.writerow({
                    'latitude': dict_states[str_state_key]['stations'][i_station]['latitude'], 
                    'longitude': dict_states[str_state_key]['stations'][i_station]['longitude'], 
                    'weatherstationtype': str_station_type,
                    'weatherstationurl':  dict_states[str_state_key]['stations'][i_station]['weatherstationurl'],
                    'weatherstationairportid': dict_states[str_state_key]['stations'][i_station]['weatherstationairportid'],
                    "state": str_state_key
                    })
