In [None]:
import os, json, time, random, threading
from datetime import datetime
from typing import Dict, List, Tuple, Set

import requests
import pandas as pd
from shapely.geometry import shape

GEOJSON_PATH   = r'geojs-35-mun.json'      
OUTPUT_DIR     = r'MINERACAO NASA'      
DATA_DIR       = os.path.join(OUTPUT_DIR, 'data')     
CHECK_DIR      = os.path.join(OUTPUT_DIR, 'checkpoints')   

COMMUNITY      = 'AG'                       
REQUEST_TIMEOUT = 90                      
RETRIES         = 3                    
BACKOFF_BASE    = 2.0                      
ROUND_TO_GRID   = True                     

MIN_SECS_BETWEEN_CALLS = 1.0   


START_DATE = '20200101'  
END_DATE   = '20241231'   

MET_PARAMS: List[str] = [
    'T2M_MAX','T2M_MIN','T2M',
    'RH2M','QV2M',
    'PS',
    'WS2M','WD2M',
    'PRECTOT',
    'T2MDEW','T2MWET','T2M_RANGE',
    'ALLSKY_SFC_SW_DWN','ALLSKY_SFC_LW_DWN','CLRSKY_SFC_SW_DWN',
    'HDD0','CDD0','HDD10','CDD10',
    'PRECTOTCORR',
    'TS',
    'PW',
    'ALLSKY_SFC_PAR_TOT',
    'WS2M_MAX','WS2M_MIN','WS2M_RANGE'
]


ALIASES = {
    "PRECIP_TOT": ["PRECTOTCORR", "PRECTOT"], 
}

class RateLimiter:
    def __init__(self, min_interval_sec: float):
        self.min_interval = float(min_interval_sec)
        self._lock = threading.Lock()
        self._last = 0.0

    def wait(self):
        with self._lock:
            now = time.time()
            gap = now - self._last
            if gap < self.min_interval:
                time.sleep(self.min_interval - gap)
            self._last = time.time()

_rate = RateLimiter(MIN_SECS_BETWEEN_CALLS)

def jitter_sleep(base: float, attempt: int):
    delay = (base ** attempt) + random.random()
    time.sleep(min(delay, 30.0))

def round_to_half_degree(v: float) -> float:
    return round(v * 2) / 2.0

def generate_date_ranges_fixed() -> List[Tuple[str, str]]:
    s = pd.to_datetime(START_DATE, format='%Y%m%d')
    e = pd.to_datetime(END_DATE,   format='%Y%m%d')
    ranges, cur = [], s
    while cur <= e:
        ne = min(cur + pd.Timedelta(days=365 - 1), e)
        ranges.append((cur.strftime('%Y%m%d'), ne.strftime('%Y%m%d')))
        cur = ne + pd.Timedelta(days=1)
    return ranges

def normalize_date_str(dt: str) -> str:
    if isinstance(dt, str) and len(dt) == 8 and dt.isdigit():
        try:
            return pd.to_datetime(dt, format='%Y%m%d').strftime('%Y-%m-%d')
        except Exception:
            return dt
    return dt

def load_municipios_centroides(geojson_path: str) -> List[Dict]:
    with open(geojson_path, 'r', encoding='utf-8') as f:
        geo = json.load(f)
    out = []
    for feat in geo.get('features', []):
        props = feat.get('properties', {})
        geom  = feat.get('geometry')
        if not geom:
            continue
        try:
            poly = shape(geom)
            pt = poly.representative_point()  
            lon, lat = pt.x, pt.y
            out.append({
                'id': props.get('id'),
                'name': props.get('name'),
                'latitude': lat,
                'longitude': lon
            })
        except Exception:
            continue
    return out

def safe_name(name: str) -> str:
    return str(name).strip().replace(' ', '_').replace('/', '_').replace('\\', '_')

def city_paths(name: str) -> Tuple[str, str]:
    base = safe_name(name)
    return os.path.join(DATA_DIR, f"{base}.json"), os.path.join(CHECK_DIR, f"{base}.json")

def load_city_checkpoint(path: str) -> int:
    if os.path.exists(path):
        try:
            with open(path, 'r', encoding='utf-8') as f:
                obj = json.load(f)
                return int(obj.get('last_window', -1))
        except Exception:
            return -1
    return -1

def save_city_checkpoint(path: str, last_w: int):
    try:
        with open(path, 'w', encoding='utf-8') as f:
            json.dump({'last_window': last_w, 'updated_at': datetime.utcnow().isoformat()+'Z'}, f, ensure_ascii=False)
    except Exception:
        pass

def load_city_data(path: str) -> Dict[str, Dict[str, float]]:
    if os.path.exists(path):
        try:
            with open(path, 'r', encoding='utf-8') as f:
                arr = json.load(f)
                out = {}
                for rec in arr:
                    d = rec.get('date')
                    if not d: continue
                    out[d] = {k: v for k, v in rec.items() if k != 'date'}
                return out
        except Exception:
            return {}
    return {}

def save_city_data(path: str, date_map: Dict[str, Dict[str, float]]):
    items = sorted(date_map.items(), key=lambda kv: kv[0])
    arr = []
    for d, vals in items:
        rec = {'date': d}
        rec.update(vals)
        arr.append(rec)
    try:
        with open(path, 'w', encoding='utf-8') as f:
            json.dump(arr, f, ensure_ascii=False, indent=2)
    except Exception:
        pass

def _request_power(lat: float, lon: float, param: str, start: str, end: str, community: str):
    base_url = 'https://power.larc.nasa.gov/api/temporal/daily/point'
    q = {
        'start': start, 'end': end,
        'latitude': lat, 'longitude': lon,
        'parameters': param,
        'community': community,
        'format': 'JSON'
    }

    sess = requests.Session()
    for attempt in range(1, RETRIES + 1):
        _rate.wait()  

        try:
            r = sess.get(base_url, params=q, timeout=REQUEST_TIMEOUT)
            try:
                payload = r.json()
            except Exception:
                payload = r.text

            if r.status_code == 422 and ROUND_TO_GRID and attempt == 1:
                lat_r, lon_r = round_to_half_degree(lat), round_to_half_degree(lon)
                q2 = dict(q)
                q2['latitude'] = lat_r
                q2['longitude'] = lon_r

                _rate.wait()
                r2 = sess.get(base_url, params=q2, timeout=REQUEST_TIMEOUT)
                try:
                    payload2 = r2.json()
                except Exception:
                    payload2 = r2.text

                if r2.status_code == 200 and isinstance(payload2, dict):
                    return 200, payload2


            if r.status_code == 200 and isinstance(payload, dict):
                return 200, payload

            if attempt < RETRIES:
                jitter_sleep(BACKOFF_BASE, attempt)
                continue
            else:
                return r.status_code, payload

        except Exception as e:
            if attempt < RETRIES:
                jitter_sleep(BACKOFF_BASE, attempt)
                continue
            else:
                return -1, str(e)

    return -1, "erro_desconhecido"

def fetch_param_series(lat: float, lon: float, param: str, start: str, end: str, community: str) -> Dict[str, float]:
    code, payload = _request_power(lat, lon, param, start, end, community)
    if code == 200 and isinstance(payload, dict):
        return payload.get('properties', {}).get('parameter', {}).get(param, {}) or {}
    return {}

def merge_window_param(date_map: Dict[str, Dict[str, float]], param: str, series: Dict[str, float]):
    for dt, val in series.items():
        d = normalize_date_str(dt)
        tgt = date_map.setdefault(d, {})
        tgt[param] = val

def process_city_sequential(m: Dict, date_windows: List[Tuple[str, str]]) -> Tuple[str, int, Set[str]]:
    mid, name, lat, lon = m['id'], m['name'], m['latitude'], m['longitude']
    data_path, ck_path = city_paths(name)

    last_w = load_city_checkpoint(ck_path)
    date_map = load_city_data(data_path)

    nwin = len(date_windows)
    print(f"[{name}] retomando da janela {last_w+1}/{nwin}")

    invalid_params: Set[str] = set()

    for wi in range(last_w + 1, nwin):
        st, ed = date_windows[wi]
        print(f"  [{name}] Janela {wi+1}/{nwin}: {st}..{ed}")

        returned_params: Set[str] = set()

        for p in MET_PARAMS:
            series = fetch_param_series(lat, lon, p, st, ed, COMMUNITY)
            if series:
                merge_window_param(date_map, p, series)
                returned_params.add(p)
            else:
                invalid_params.add(p)

        covered = set(returned_params)
        for alias, variants in ALIASES.items():
            if any(v in returned_params for v in variants):
                covered.add(alias)
        desired_for_report = (set(MET_PARAMS) - {"PRECTOT"}) | set(ALIASES.keys())
        missing_now = sorted(list(desired_for_report - covered))

        ok_n = len(returned_params)
        print(f"    OK {ok_n} params | faltando agora: {missing_now or 'nenhum'}")

        for d, vals in date_map.items():
            if 'PRECTOT' in vals and vals['PRECTOT'] is None:
                vals['PRECTOT'] = 0
            if vals.get('PRECTOTCORR') is not None:
                vals['PRECIP'] = vals['PRECTOTCORR']
            elif 'PRECTOT' in vals:
                vals['PRECIP'] = vals['PRECTOT']
            if 'T2M' not in vals and vals.get('T2M_MAX') is not None and vals.get('T2M_MIN') is not None:
                vals['T2M_MEAN_CALC'] = (vals['T2M_MAX'] + vals['T2M_MIN']) / 2.0

        save_city_data(data_path, date_map)
        save_city_checkpoint(ck_path, wi)

    total_rows = len(date_map)
    print(f"[{name}] concluído — {total_rows} linhas; possivelmente inválidos: {sorted(list(invalid_params)) or 'nenhum'}")
    return name, total_rows, invalid_params

def main():
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    os.makedirs(DATA_DIR, exist_ok=True)
    os.makedirs(CHECK_DIR, exist_ok=True)

    assert START_DATE == '20200101' and END_DATE == '20241231', "Período deve ser 2020-01-01 .. 2024-12-31"
    date_windows = generate_date_ranges_fixed()

    municipios = load_municipios_centroides(GEOJSON_PATH)
    print(f"Total de municípios: {len(municipios)}")
    print(f"Janelas (365d): {len(date_windows)} — cobrindo {START_DATE}..{END_DATE}")
    print(f"Parâmetros (um a um): {len(MET_PARAMS)}")

    all_invalid: Set[str] = set()
    results = []

    for m in municipios:
        try:
            name, nrows, invalid_local = process_city_sequential(m, date_windows)
            results.append((name, nrows))
            all_invalid |= invalid_local
        except Exception as e:
            print(f"[ERRO] {m.get('name')}: {e}")

    if all_invalid:
        print("\n Parâmetros que falharam para pelo menos um município/janela:")
        print(sorted(list(all_invalid)))
    else:
        print("\n Nenhum parâmetro falhou em toda a execução.")

    results.sort(key=lambda x: x[1], reverse=True)
    print("\nTop-5 por linhas gravadas:")
    for nm, n in results[:5]:
        print(f"  {nm}: {n} linhas")

    print("\nProcessamento finalizado.")

if __name__ == '__main__':
    main()
