In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import time
import csv
import os
import xarray as xr

In [19]:
# Define coordinate range
LAT_MIN, LAT_MAX = 29.0, 39.0  # Corrected Southern to Northern range
LON_MIN, LON_MAX = -117.0, -104.0  # Corrected Western to Eastern range

# Base URLs
station_list_url = 'http://geodesy.unr.edu/NGLStationPages/GlobalStationList'
base_station_url = 'http://geodesy.unr.edu/'
tenv3_base_url = 'http://geodesy.unr.edu/gps_timeseries/tenv3/IGS14/'

# Load last index (if exists) to continue in case of failure 
try:
    with open('last_index.json', 'r') as f:
        last_index = json.load(f).get("index", 0)
    print(f"Resuming from station {last_index}\n")
except FileNotFoundError:
    last_index = 0
    
# Fetch station list page
response = requests.get(station_list_url)
soup = BeautifulSoup(response.text, 'html.parser')

# Extract station links
station_links = soup.find_all('a', href=True)

print("\n=== Getting Station Data ===\n")

for i in range(last_index, len(station_links)):
    link = station_links[i]
    station_name = link.text.strip()
    station_url = base_station_url + link['href']

    # Fetch station page with retry logic
    try:
        station_response = requests.get(station_url, timeout=10)
        station_soup = BeautifulSoup(station_response.text, 'html.parser')
    except requests.RequestException as e:
        print(f"Connection error for {station_name}, skipping. Error: {e}")
        continue  # Skip this station and try the next

    # Extract latitude & longitude
    page_text = station_soup.get_text()
    try:
        lat_line = next(line for line in page_text.splitlines() if "Latitude:" in line)
        lon_line = next(line for line in page_text.splitlines() if "Longitude:" in line)

        latitude = float(lat_line.split(":")[1].split()[0])
        longitude = float(lon_line.split(":")[1].split()[0])

        # print(f"{station_name}: Lat {latitude}, Lon {longitude}")

    except StopIteration:
        # print(f"Skipping {station_name}: Coordinates not found.")
        continue

    # Check if coordinates are within range
    if LAT_MIN <= latitude <= LAT_MAX and LON_MIN <= longitude <= LON_MAX:
        # print(f"Within range!")
        tenv3_url = f'{tenv3_base_url}{station_name}.tenv3'

        try:
            tenv3_response = requests.get(tenv3_url, timeout=10)
            
            if tenv3_response.status_code == 200:
                # Parse tenv3 data
                lines = tenv3_response.text.strip().split("\n")
    
                data = [line.split() for line in lines if not line.startswith("#")]
                
                csv_filename = f"{station_name}.csv"
                csv_exists = os.path.exists(csv_filename)
                with open(csv_filename, mode="a", newline="") as csvfile:
                    csv_writer = csv.writer(csvfile)
    
                    # Write station data
                    csv_writer.writerows(data)

            else:
                print(f"No IGS14 tenv3 data for {station_name}")
        except requests.RequestException as e:
            print(f"Error fetching tenv3 data for {station_name}: {e}")
    
    # Save last processed index
    with open('last_index.json', 'w') as f:
        json.dump({"index": i + 1}, f)  # Save next station index

print("\nData saved to stations_data.csv")


=== Fetching Station Data ===


✅ Data saved to stations_data.json
