In [1]:
import pandas as pd
import requests
import os
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

# Output directory and file paths
output_dir = './weather_data/'
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, 'final_open_meteo_hourly_weather_data.xlsx')
progress_file = os.path.join(output_dir, 'progress.txt')

# Open-Meteo API base URL for hourly data
base_url = "https://api.open-meteo.com/v1/forecast"

# List of 10 places with latitude, longitude, and time ranges
locations = [
    {"latitude": 10.401193, "longitude": 99.278365, "start": "2025-01-01T00:00:00Z", "end": "2025-01-02T00:00:00Z"}  # Pakke Tiger Reserve, Arunachal Pradesh]
]
# Function to fetch hourly data from Open-Meteo API with retries
def fetch_open_meteo_hourly_data(lat, lon, start_datetime, end_datetime, retries=3, delay=2):
    params = {
        "latitude": lat,
        "longitude": lon,
        "start": start_datetime,
        "end": end_datetime,
        "hourly": [
            "temperature_2m", "windspeed_10m", "relative_humidity_2m", "precipitation",
            "dewpoint_2m", "cloudcover", "surface_pressure", "shortwave_radiation",
            "sunshine_duration"
        ],
        "timezone": "UTC"
    }
    for attempt in range(retries):
        try:
            response = requests.get(base_url, params=params, timeout=30)
            response.raise_for_status()
            data = response.json().get("hourly", {})
            if data:
                return data
        except requests.exceptions.RequestException as e:
            time.sleep(delay)
    return None

# Process a single location
def process_location(index, location):
    print(f"Processing location {index + 1}...")
    lat, lon, start_datetime, end_datetime = location['latitude'], location['longitude'], location['start'], location['end']
    weather_data = fetch_open_meteo_hourly_data(lat, lon, start_datetime, end_datetime)
    results = []
    if weather_data:
        for hour, time_value in enumerate(weather_data['time']):
            results.append({
                'latitude': lat,
                'longitude': lon,
                'timestamp': pd.to_datetime(time_value),
                'temperature': weather_data.get('temperature_2m', [None])[hour],
                'windspeed': weather_data.get('windspeed_10m', [None])[hour],
                'humidity': weather_data.get('relative_humidity_2m', [None])[hour],
                'precipitation': weather_data.get('precipitation', [None])[hour],
                'dewpoint': weather_data.get('dewpoint_2m', [None])[hour],
                'cloud_cover': weather_data.get('cloudcover', [None])[hour],
                'pressure': weather_data.get('surface_pressure', [None])[hour],
                'solar_radiation': weather_data.get('shortwave_radiation', [None])[hour],
                'sunshine_duration': weather_data.get('sunshine_duration', [None])[hour]
            })
    return results

# Save processed data
def save_data(results):
    if results:
        df = pd.DataFrame(results)
        df.to_excel(output_file, index=False)
        print(f"Weather data saved to {output_file}.")

# Process all locations
def main():
    results = []
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(process_location, idx, loc): idx for idx, loc in enumerate(locations)}
        for future in as_completed(futures):
            result = future.result()
            if result:
                results.extend(result)
    save_data(results)

if __name__ == "__main__":
    main()


Processing location 1...
Weather data saved to ./weather_data/final_open_meteo_hourly_weather_data.xlsx.


In [2]:
import pandas as pd
import ee
import concurrent.futures
import time

# Initialize Google Earth Engine
ee.Authenticate()
ee.Initialize(project='ee-71762205062')

# Function to get elevation using Google Earth Engine SRTM dataset
def get_elevation_ee(lat, lon):
    try:
        point = ee.Geometry.Point(lon, lat)
        elevation = ee.Image('USGS/SRTMGL1_003').select('elevation')
        result = elevation.reduceRegion(
            reducer=ee.Reducer.first(),
            geometry=point,
            scale=30  # 30m resolution
        ).get('elevation')
        return result.getInfo() if result else None
    except Exception as e:
        print(f"EE Elevation Error: lat {lat}, lon {lon}: {e}")
        return None

# Function to get NDVI using updated MODIS dataset
def get_modis_ndvi(lat, lon, start_date, end_date):
    try:
        point = ee.Geometry.Point(lon, lat)
        collection = ee.ImageCollection('MODIS/061/MOD13Q1') \
            .filterBounds(point) \
            .filterDate(start_date, end_date)
        image = collection.first()
        if not image:
            return None
        ndvi = image.select('NDVI').clip(point)
        result = ndvi.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=point,
            scale=250  # MODIS spatial resolution is 250m
        ).get('NDVI')
        return result.getInfo() if result else None
    except Exception as e:
        print(f"NDVI Error: lat {lat}, lon {lon}, {start_date} - {end_date}: {e}")
        return None

# Function to process a single location
def process_location(location):
    lat, lon = location['rounded_lat'], location['rounded_lon']
    elevation = get_elevation_ee(lat, lon)
    location_results = []
    for year in [2014]:
        for month in range(1, 13):
            start_date = f"{year}-{month:02d}-01"
            end_date = pd.to_datetime(start_date) + pd.offsets.MonthEnd(1)
            ndvi = get_modis_ndvi(lat, lon, start_date, end_date.strftime('%Y-%m-%d'))
            location_results.append({
                'latitude': lat,
                'longitude': lon,
                'year': year,
                'month': month,
                'ndvi': ndvi,
                'elevation': elevation
            })
            print(ndvi,month,year,lat,lon)
    return location_results

# Main function
def main():
    # Read Excel file
    df = pd.read_excel('./weather_data/final_open_meteo_hourly_weather_data.xlsx')

    # Round latitude and longitude
    df['rounded_lat'] = df['latitude'].round(2)
    df['rounded_lon'] = df['longitude'].round(2)

    # Get unique lat/lon combinations
    unique_locations = df[['rounded_lat', 'rounded_lon']].drop_duplicates().reset_index(drop=True)

    results = []
    start_time = time.time()

    # Use ThreadPoolExecutor for concurrent processing
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(process_location, location): location for _, location in unique_locations.iterrows()}

        # Collect results as they complete
        for future in concurrent.futures.as_completed(futures):
            try:
                location_results = future.result()
                results.extend(location_results)
            except Exception as e:
                print(f"Error processing location: {e}")

    # Convert results to DataFrame
    results_df = pd.DataFrame(results)

    # Save results to Excel
    results_df.to_excel('ndvi_elevation_2015_20161.xlsx', index=False)

    end_time = time.time()
    print(f"Processing complete! Total time: {end_time - start_time:.2f} seconds")

# Run the main function
if __name__ == "__main__":
    main()


7877 1 2014 10.4 99.28
7703 2 2014 10.4 99.28
6705 3 2014 10.4 99.28
3645 4 2014 10.4 99.28
7068 5 2014 10.4 99.28
4096 6 2014 10.4 99.28
5205 7 2014 10.4 99.28
8607 8 2014 10.4 99.28
5679 9 2014 10.4 99.28
7261 10 2014 10.4 99.28
8613 11 2014 10.4 99.28
9095 12 2014 10.4 99.28
Processing complete! Total time: 14.85 seconds


In [None]:
import pandas as pd

# Sample df1
df1 = pd.DataFrame({
    'latitude': [26.789, 26.789, 26.789],
    'longitude': [93.2923, 93.2923, 93.2923],
    'timestamp': ['2025-01-01 00:00:00', '2025-01-01 01:00:00', '2025-01-01 02:00:00'],
    'temperature': [14.6, 15.0, 16.2],
    'windspeed': [0.8, 1.5, 1.6],
    'humidity': [96, 94, 92],
    'precipitation': [0, 0, 0],
    'dewpoint': [13.9, 14.0, 14.9],
    'cloud_cover': [37, 26, 66],
    'pressure': [1002.1, 1003.4, 1004.5],
    'solar_radiation': [0, 8, 123],
    'sunshine_duration': [0, 0, 3600]
})

# Sample df2
df2 = pd.DataFrame({
    'latitude': [27.1, 27.1],
    'longitude': [92.37, 92.37],
    'year': [2025, 2025],
    'month': [1, 2],
    'ndvi': [8546, None],
    'elevation': [1912, 1912]
})

# Convert the 'timestamp' in df1 to datetime and extract year and month
df1['timestamp'] = pd.to_datetime(df1['timestamp'])
df1['year'] = df1['timestamp'].dt.year
df1['month'] = df1['timestamp'].dt.month

# Merge df1 and df2 on latitude, longitude, year, and month
df_merged = pd.merge(df1, df2, on=['latitude', 'longitude', 'year', 'month'], how='left')

# Display the resulting DataFrame
print(df_merged)
