In [17]:
import pandas as pd
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
from tenacity import retry, stop_after_attempt, wait_exponential
import os

# File paths
input_file = 'wfigs_az.csv'
intermediate_file = 'wfigs_az_with_weather_partial.csv'
final_output_file = 'wfigs_az_with_weather_single_date.csv'

# Load the CSV file
if os.path.exists(intermediate_file):
    print(f"Resuming from partially saved file: {intermediate_file}")
    data = pd.read_csv(intermediate_file)
else:
    print(f"Starting from the original file: {input_file}")
    data = pd.read_csv(input_file)
    # Ensure proper date formatting
    data['FireDiscoveryDateTime'] = pd.to_datetime(data['FireDiscoveryDateTime'], errors='coerce').dt.strftime('%Y-%m-%d')
    # Filter invalid latitude and longitude
    data = data[(data['InitialLatitude'].between(-90, 90)) & (data['InitialLongitude'].between(-180, 180))]
    # Add new columns for weather data
    data['tmax'] = None
    data['tmin'] = None
    data['prcp'] = None

# Function to fetch weather data from Daymet API for a specific date with retries
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=1, max=10))
def get_daymet_data(lat, lon, date, idx):
    url = "https://daymet.ornl.gov/single-pixel/api/data"
    params = {
        'lat': lat,
        'lon': lon,
        'vars': 'tmax,tmin,prcp',  # Variables: max temp, min temp, and precipitation
        'start': date,  # Start date
        'end': date,    # End date (same as start date for a single day)
        'format': 'json'
    }

    response = requests.get(url, params=params, timeout=10)  # Add timeout for the request

    # If response is successful
    if response.status_code == 200:
        data = response.json()
        # Check if the response contains data
        if 'data' in data and len(data['data']) > 0:
            record = data['data']
            return {
                'idx': idx,
                'tmax': record.get("tmax (deg c)", [None])[0],
                'tmin': record.get("tmin (deg c)", [None])[0],
                'prcp': record.get("prcp (mm/day)", [None])[0]
            }
        else:
            return {'idx': idx, 'tmax': None, 'tmin': None, 'prcp': None}
    else:
        response.raise_for_status()

# Concurrent request processing
def process_rows_concurrently(data, intermediate_file, max_workers=10):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Prepare rows to process (skip already completed rows)
        rows_to_process = data[(data['tmax'].isnull()) | (data['tmin'].isnull()) | (data['prcp'].isnull())]
        futures = {
            executor.submit(get_daymet_data, row['InitialLatitude'], row['InitialLongitude'], row['FireDiscoveryDateTime'], idx): idx
            for idx, row in rows_to_process.iterrows()
        }

        # Process futures as they complete
        for i, future in enumerate(tqdm(as_completed(futures), total=len(futures), desc="Processing rows")):
            try:
                result = future.result()
                if result:
                    idx = result['idx']
                    data.at[idx, 'tmax'] = result['tmax']
                    data.at[idx, 'tmin'] = result['tmin']
                    data.at[idx, 'prcp'] = result['prcp']
            except Exception as e:
                print(f"Exception: {e}")

            # Save intermediate progress every 100 rows
            if i % 100 == 0:
                data.to_csv(intermediate_file, index=False)
                print(f"Intermediate progress saved to {intermediate_file}")

        # Final save after processing all rows
        data.to_csv(intermediate_file, index=False)
        print(f"All intermediate progress saved to {intermediate_file}")

# Run the processing
process_rows_concurrently(data, intermediate_file)

# Final save
data.to_csv(final_output_file, index=False)
print(f"Final results saved to {final_output_file}.")


Resuming from partially saved file: wfigs_az_with_weather_partial.csv


  data = pd.read_csv(intermediate_file)
Processing rows:   0%|          | 0/17 [00:00<?, ?it/s]

Exception: RetryError[<Future at 0x7d196e8d2950 state=finished raised HTTPError>]


Processing rows:   6%|▌         | 1/17 [00:25<06:46, 25.39s/it]

Intermediate progress saved to wfigs_az_with_weather_partial.csv
Exception: RetryError[<Future at 0x7d195ed08e20 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d18a8f2afb0 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d18a8e164a0 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d18a9012530 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d195ed09540 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d195eeee200 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d191cb1f970 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d195ed74910 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d191cb1ff70 state=finished raised HTTPError>]


Processing rows:  82%|████████▏ | 14/17 [00:45<00:07,  2.50s/it]

Exception: RetryError[<Future at 0x7d196e834fa0 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d18a8fadcf0 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d196e837130 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d196e835c60 state=finished raised HTTPError>]


Processing rows: 100%|██████████| 17/17 [00:45<00:00,  2.67s/it]

Exception: RetryError[<Future at 0x7d196e837b50 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d18a8f29930 state=finished raised HTTPError>]
Exception: RetryError[<Future at 0x7d195ed96a40 state=finished raised HTTPError>]





All intermediate progress saved to wfigs_az_with_weather_partial.csv
Final results saved to wfigs_az_with_weather_single_date.csv.
