In [2]:
!pip install pandas httpx

Defaulting to user installation because normal site-packages is not writeable


In [1]:
import asyncio
import httpx
import pandas as pd
import requests 
from datetime import datetime 
import numpy as np

In [2]:
GAME_CONFIG = {
    "location": {
        "name": "Tháp Mười, Đồng Tháp",
        "longitude": 105.86227561323018,
        "latitude": 10.535512374246121
    }, 
    "total_turns": 16,
    "stages": {
        "Gieo mạ": {"start_turn": 1, "end_turn": 4},
        "Đẻ nhánh": {"start_turn": 5, "end_turn": 8},
        "Làm đòng": {"start_turn": 9, "end_turn": 12},
        "Vào chắc": {"start_turn": 13, "end_turn": 16}
    },
    "seasons": {
        "dong-xuan": {
            "name": "Vụ Đông-Xuân",
            "start_date": "20231101",
            "end_date": "20240220"
        },
        "he-thu": {
            "name": "Vụ Hè-Thu",
            "start_date": "20240415",
            "end_date": "20240804"
        },
        "thu-dong": {
            "name": "Vụ Thu-Đông",
            "start_date": "20240915",
            "end_date": "20250104"
        }
    }
}

In [3]:
# Parameters for weather data from NASA POWER API
params = "T2M,RH2M,PRECTOTCORR"

NASA_POWER_API = "https://power.larc.nasa.gov/api/temporal/daily/point"

def fetch_daily_power_data(
    start: int,
    end: int,
    longitude: float,
    latitude: float,
    community: str = "ag",
    parameters: str = "RH2M",
    format: str = "json",
    header: str = "true",
    time_standard: str = "lst"
):
    params = {
        "start": start,
        "end": end,
        "longitude": longitude,
        "latitude": latitude,
        "community": community,
        "parameters": parameters,
        "format": format.lower(),
        "header": header.lower(),
        "time-standard": time_standard.lower()
    }
    try:
        response = requests.get(NASA_POWER_API, params=params, timeout=30)
    except requests.RequestException as e:
        print(f"Error fetching data from NASA POWER API: {e}")
        return None
    
    return response.json()

Processes the raw JSON data from NASA POWER, converts it to a pandas DataFrame,
and aggregates it into weekly averages/sums.

Xử lý dữ liệu JSON thô, chuyển thành DataFrame và tính toán trung bình/tổng theo tuần.

In [4]:
print("--- Starting NASA Data Fetch & Process Script ---")
season_data = {}
for key, season_info in GAME_CONFIG["seasons"].items():
        print(f"\nProcessing season: {season_info['name']} ({key})")
        
        # Lấy dữ liệu theo ngày
        daily_data_json = fetch_daily_power_data(season_info["start_date"], season_info["end_date"], GAME_CONFIG['location']['longitude'], GAME_CONFIG['location']['latitude'], parameters=params)
        print(f"Fetched daily data for {season_info['name']}: {daily_data_json}")
        season_data[key] = daily_data_json

--- Starting NASA Data Fetch & Process Script ---

Processing season: Vụ Đông-Xuân (dong-xuan)
Fetched daily data for Vụ Đông-Xuân: {'type': 'Feature', 'geometry': {'type': 'Point', 'coordinates': [105.862, 10.536, 2.46]}, 'properties': {'parameter': {'T2M': {'20231101': 27.18, '20231102': 27.42, '20231103': 27.75, '20231104': 27.28, '20231105': 27.44, '20231106': 27.13, '20231107': 26.93, '20231108': 27.12, '20231109': 27.55, '20231110': 26.61, '20231111': 26.71, '20231112': 27.1, '20231113': 27.84, '20231114': 27.65, '20231115': 27.0, '20231116': 26.56, '20231117': 26.37, '20231118': 24.94, '20231119': 24.17, '20231120': 25.69, '20231121': 26.11, '20231122': 25.85, '20231123': 26.05, '20231124': 25.05, '20231125': 26.0, '20231126': 26.56, '20231127': 26.41, '20231128': 25.8, '20231129': 26.36, '20231130': 27.11, '20231201': 27.13, '20231202': 26.49, '20231203': 25.9, '20231204': 27.07, '20231205': 27.74, '20231206': 27.27, '20231207': 26.84, '20231208': 27.36, '20231209': 27.57, '202

Đông Xuân

In [5]:
dong_xuan_data = season_data["dong-xuan"]

if not dong_xuan_data or 'properties' not in dong_xuan_data or 'parameter' not in dong_xuan_data['properties']:
    print("    - No valid data to process.")

In [6]:
# Chuyển dữ liệu JSON thành một DataFrame dễ xử lý
df = pd.DataFrame(dong_xuan_data['properties']['parameter'])

In [7]:
df

Unnamed: 0,T2M,RH2M,PRECTOTCORR
20231101,27.18,88.89,17.75
20231102,27.42,87.00,5.85
20231103,27.75,86.31,8.65
20231104,27.28,88.54,10.42
20231105,27.44,90.54,15.99
...,...,...,...
20240216,29.47,61.55,0.00
20240217,29.17,58.60,0.00
20240218,29.69,64.31,0.00
20240219,29.75,64.70,0.00


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 112 entries, 20231101 to 20240220
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   T2M          112 non-null    float64
 1   RH2M         112 non-null    float64
 2   PRECTOTCORR  112 non-null    float64
dtypes: float64(3)
memory usage: 3.5+ KB


In [9]:
# NASA trả về ngày tháng dưới dạng chuỗi 'YYYYMMDD', ta cần chuyển nó thành kiểu datetime
df['date'] = pd.to_datetime(df.index, format='%Y%m%d')
df = df.set_index('date')

In [10]:
df

Unnamed: 0_level_0,T2M,RH2M,PRECTOTCORR
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-11-01,27.18,88.89,17.75
2023-11-02,27.42,87.00,5.85
2023-11-03,27.75,86.31,8.65
2023-11-04,27.28,88.54,10.42
2023-11-05,27.44,90.54,15.99
...,...,...,...
2024-02-16,29.47,61.55,0.00
2024-02-17,29.17,58.60,0.00
2024-02-18,29.69,64.31,0.00
2024-02-19,29.75,64.70,0.00


In [11]:
df.replace(-999, pd.NA, inplace=True)

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 112 entries, 2023-11-01 to 2024-02-20
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   T2M          112 non-null    float64
 1   RH2M         112 non-null    float64
 2   PRECTOTCORR  112 non-null    float64
dtypes: float64(3)
memory usage: 3.5 KB


In [13]:
df['turn_number'] = (np.arange(len(df)) // 28) + 1

df

Unnamed: 0_level_0,T2M,RH2M,PRECTOTCORR,turn_number
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-11-01,27.18,88.89,17.75,1
2023-11-02,27.42,87.00,5.85,1
2023-11-03,27.75,86.31,8.65,1
2023-11-04,27.28,88.54,10.42,1
2023-11-05,27.44,90.54,15.99,1
...,...,...,...,...
2024-02-16,29.47,61.55,0.00,4
2024-02-17,29.17,58.60,0.00,4
2024-02-18,29.69,64.31,0.00,4
2024-02-19,29.75,64.70,0.00,4


In [14]:
weekly_df = df.groupby('turn_number').agg(
        # Lấy ngày cuối cùng của mỗi tuần làm đại diện
        # week_ending_date=('date', 'last'), 
        avg_temp_celsius=('T2M', 'mean'),
        total_rainfall_mm=('PRECTOTCORR', 'sum'),
        avg_humidity=('RH2M', 'mean')
    ).reset_index()

In [15]:
weekly_df.rename(columns={'turn_number': 'Week', 'avg_temp_celsius': 'Avg Temp (°C)', 'total_rainfall_mm': 'Total Rainfall (mm)', 'avg_humidity': 'Avg Humidity (%)'}, inplace=True)

weekly_df.round(2)

weekly_df

Unnamed: 0,Week,Avg Temp (°C),Total Rainfall (mm),Avg Humidity (%)
0,1,26.581071,169.37,88.632143
1,2,26.420357,35.78,85.4725
2,3,26.805357,3.12,79.166786
3,4,28.285357,0.0,67.576429


In [16]:
if not weekly_df.empty:
    output_filename = f"weather_data_dong_xuan.csv"
    weekly_df.to_csv(output_filename, index=False)
    print(f"  - ✅ Successfully saved weekly data to '{output_filename}'")

  - ✅ Successfully saved weekly data to 'weather_data_dong_xuan.csv'


Hè Thu 

In [17]:
he_thu_data = season_data["he-thu"]

if not he_thu_data or 'properties' not in he_thu_data or 'parameter' not in he_thu_data['properties']:
    print("    - No valid data to process.")

In [18]:
# Chuyển dữ liệu JSON thành một DataFrame dễ xử lý
df = pd.DataFrame(he_thu_data['properties']['parameter'])

df 

Unnamed: 0,T2M,RH2M,PRECTOTCORR
20240415,32.67,56.36,0.00
20240416,32.81,55.58,0.00
20240417,32.80,52.51,0.00
20240418,33.27,48.75,0.00
20240419,33.75,48.66,0.00
...,...,...,...
20240731,27.41,89.13,1.26
20240801,27.43,90.07,1.09
20240802,27.79,87.28,1.89
20240803,27.32,90.45,15.19


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 112 entries, 20240415 to 20240804
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   T2M          112 non-null    float64
 1   RH2M         112 non-null    float64
 2   PRECTOTCORR  112 non-null    float64
dtypes: float64(3)
memory usage: 3.5+ KB


In [20]:
# NASA trả về ngày tháng dưới dạng chuỗi 'YYYYMMDD', ta cần chuyển nó thành kiểu datetime
df['date'] = pd.to_datetime(df.index, format='%Y%m%d')
df = df.set_index('date')

In [21]:
df.replace(-999, pd.NA, inplace=True)

df 

Unnamed: 0_level_0,T2M,RH2M,PRECTOTCORR
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-04-15,32.67,56.36,0.00
2024-04-16,32.81,55.58,0.00
2024-04-17,32.80,52.51,0.00
2024-04-18,33.27,48.75,0.00
2024-04-19,33.75,48.66,0.00
...,...,...,...
2024-07-31,27.41,89.13,1.26
2024-08-01,27.43,90.07,1.09
2024-08-02,27.79,87.28,1.89
2024-08-03,27.32,90.45,15.19


In [22]:
df['turn_number'] = (np.arange(len(df)) // 28) + 1

df

Unnamed: 0_level_0,T2M,RH2M,PRECTOTCORR,turn_number
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-04-15,32.67,56.36,0.00,1
2024-04-16,32.81,55.58,0.00,1
2024-04-17,32.80,52.51,0.00,1
2024-04-18,33.27,48.75,0.00,1
2024-04-19,33.75,48.66,0.00,1
...,...,...,...,...
2024-07-31,27.41,89.13,1.26,4
2024-08-01,27.43,90.07,1.09,4
2024-08-02,27.79,87.28,1.89,4
2024-08-03,27.32,90.45,15.19,4


In [23]:
weekly_df = df.groupby('turn_number').agg(
        # Lấy ngày cuối cùng của mỗi tuần làm đại diện
        # week_ending_date=('date', 'last'), 
        avg_temp_celsius=('T2M', 'mean'),
        total_rainfall_mm=('PRECTOTCORR', 'sum'),
        avg_humidity=('RH2M', 'mean')
    ).reset_index()

In [24]:
weekly_df.rename(columns={'turn_number': 'Week', 'avg_temp_celsius': 'Avg Temp (°C)', 'total_rainfall_mm': 'Total Rainfall (mm)', 'avg_humidity': 'Avg Humidity (%)'}, inplace=True)

weekly_df.round(2)

weekly_df

Unnamed: 0,Week,Avg Temp (°C),Total Rainfall (mm),Avg Humidity (%)
0,1,33.446071,51.37,54.847143
1,2,29.758929,188.81,79.246786
2,3,28.0475,218.75,85.239286
3,4,27.216786,402.63,90.102143


In [25]:
if not weekly_df.empty:
    output_filename = f"weather_data_he_thu.csv"
    weekly_df.to_csv(output_filename, index=False)
    print(f"  - ✅ Successfully saved weekly data to '{output_filename}'")

  - ✅ Successfully saved weekly data to 'weather_data_he_thu.csv'


Thu Đông

In [26]:
thu_dong_data = season_data["thu-dong"]

if not thu_dong_data or 'properties' not in thu_dong_data or 'parameter' not in thu_dong_data['properties']:
    print("    - No valid data to process.")

In [27]:
# Chuyển dữ liệu JSON thành một DataFrame dễ xử lý
df = pd.DataFrame(thu_dong_data['properties']['parameter'])

df 

Unnamed: 0,T2M,RH2M,PRECTOTCORR
20240915,26.86,90.62,16.93
20240916,27.41,88.99,4.69
20240917,26.34,93.31,20.17
20240918,26.46,93.10,15.53
20240919,26.50,93.92,25.82
...,...,...,...
20241231,25.17,84.44,0.00
20250101,24.72,82.51,0.00
20250102,25.18,81.51,0.11
20250103,24.95,83.31,0.00


In [28]:
# NASA trả về ngày tháng dưới dạng chuỗi 'YYYYMMDD', ta cần chuyển nó thành kiểu datetime
df['date'] = pd.to_datetime(df.index, format='%Y%m%d')
df = df.set_index('date')

In [29]:
df 

Unnamed: 0_level_0,T2M,RH2M,PRECTOTCORR
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-09-15,26.86,90.62,16.93
2024-09-16,27.41,88.99,4.69
2024-09-17,26.34,93.31,20.17
2024-09-18,26.46,93.10,15.53
2024-09-19,26.50,93.92,25.82
...,...,...,...
2024-12-31,25.17,84.44,0.00
2025-01-01,24.72,82.51,0.00
2025-01-02,25.18,81.51,0.11
2025-01-03,24.95,83.31,0.00


In [30]:
df.replace(-999, pd.NA, inplace=True)

df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 112 entries, 2024-09-15 to 2025-01-04
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   T2M          112 non-null    float64
 1   RH2M         112 non-null    float64
 2   PRECTOTCORR  112 non-null    float64
dtypes: float64(3)
memory usage: 3.5 KB


In [31]:
df['turn_number'] = (np.arange(len(df)) // 28) + 1

df 

Unnamed: 0_level_0,T2M,RH2M,PRECTOTCORR,turn_number
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-09-15,26.86,90.62,16.93,1
2024-09-16,27.41,88.99,4.69,1
2024-09-17,26.34,93.31,20.17,1
2024-09-18,26.46,93.10,15.53,1
2024-09-19,26.50,93.92,25.82,1
...,...,...,...,...
2024-12-31,25.17,84.44,0.00,4
2025-01-01,24.72,82.51,0.00,4
2025-01-02,25.18,81.51,0.11,4
2025-01-03,24.95,83.31,0.00,4


In [32]:
weekly_df = df.groupby('turn_number').agg(
        # Lấy ngày cuối cùng của mỗi tuần làm đại diện
        # week_ending_date=('date', 'last'), 
        avg_temp_celsius=('T2M', 'mean'),
        total_rainfall_mm=('PRECTOTCORR', 'sum'),
        avg_humidity=('RH2M', 'mean')
    ).reset_index()

In [33]:
weekly_df 

Unnamed: 0,turn_number,avg_temp_celsius,total_rainfall_mm,avg_humidity
0,1,27.185357,253.74,88.978929
1,2,27.050714,294.69,88.800714
2,3,26.905714,59.77,85.965714
3,4,25.177143,53.18,85.048571


In [34]:
weekly_df.rename(columns={'turn_number': 'Week', 'avg_temp_celsius': 'Avg Temp (°C)', 'total_rainfall_mm': 'Total Rainfall (mm)', 'avg_humidity': 'Avg Humidity (%)'}, inplace=True)

weekly_df.round(2)

weekly_df

Unnamed: 0,Week,Avg Temp (°C),Total Rainfall (mm),Avg Humidity (%)
0,1,27.185357,253.74,88.978929
1,2,27.050714,294.69,88.800714
2,3,26.905714,59.77,85.965714
3,4,25.177143,53.18,85.048571


In [35]:
if not weekly_df.empty:
    output_filename = f"weather_data_thu_dong.csv"
    weekly_df.to_csv(output_filename, index=False)
    print(f"  - ✅ Successfully saved weekly data to '{output_filename}'")

  - ✅ Successfully saved weekly data to 'weather_data_thu_dong.csv'
