In [None]:
import json
import math
import re
from pathlib import Path
from typing import Any, Dict, List, Union

PASTA_ENTRADA = Path(r"SISAWEB\Mineracao")  
PADRAO_ARQS   = "Mineracao tipo *.json"
PASTA_SAIDA   = Path("TRATADOS")
PASTA_SAIDA.mkdir(exist_ok=True)

def to_number(v: Any) -> Union[int, float, Any]:
    if not isinstance(v, str):
        return v
    s = v.strip().replace(",", ".")
    if re.fullmatch(r"[+-]?(\d+(\.\d+)?|\.\d+)", s):
        try:
            f = float(s)
            return int(f) if math.isfinite(f) and f.is_integer() else f
        except Exception:
            return v
    return v

def deep_convert_numbers(x: Any) -> Any:
    if isinstance(x, dict):
        return {k: deep_convert_numbers(v) for k, v in x.items()}
    if isinstance(x, list):
        return [deep_convert_numbers(i) for i in x]
    return to_number(x)

def is_info_error(raw: Any) -> bool:
    if raw is None:
        return True

    if isinstance(raw, list):
        return len(raw) == 0

    if isinstance(raw, dict):
        msg = (raw.get("message") or raw.get("mensagem") or "").strip().lower()
        if "nenhum registro encontrado" in msg:
            return True
        if any(k in raw for k in ("error", "erro")):
            return True
        return False

    if isinstance(raw, str):
        s = raw.strip()
        if not s:
            return True
        low = s.lower()
        if "erro_http_" in low or "nenhum registro encontrado" in low:
            return True
        try:
            parsed = json.loads(s)
            return is_info_error(parsed)
        except Exception:
            return False

    return False

def parse_informacao(raw: Any) -> List[Dict[str, Any]]:
    if is_info_error(raw):
        return []
    if isinstance(raw, list):
        return [deep_convert_numbers(o) if isinstance(o, dict) else o for o in raw]
    if isinstance(raw, dict):
        return [deep_convert_numbers(raw)]
    if isinstance(raw, str):
        s = raw.strip()
        try:
            parsed = json.loads(s)
        except Exception:
            try:
                parsed = json.loads(s.encode("utf-8").decode("unicode_escape"))
            except Exception:
                return []
        if isinstance(parsed, list):
            return [deep_convert_numbers(o) if isinstance(o, dict) else o for o in parsed]
        if isinstance(parsed, dict):
            return [deep_convert_numbers(parsed)]
        return [deep_convert_numbers(parsed)]
    return []

por_municipio: Dict[int, Dict[str, Any]] = {}

arquivos = sorted(PASTA_ENTRADA.glob(PADRAO_ARQS))
if not arquivos:
    raise SystemExit(f"Nenhum arquivo encontrado em {PASTA_ENTRADA}/'{PADRAO_ARQS}'")

tipo_regex = re.compile(r"tipo\s*(\d+)", re.IGNORECASE)

for arq in arquivos:
    m = tipo_regex.search(arq.stem)
    if not m:
        print(f"Ignorando (n√£o consegui extrair tipo): {arq.name}")
        continue
    tipo_val = int(m.group(1))
    tipo_key = f"{tipo_val:02d}"


    with open(arq, "r", encoding="utf-8") as f:
        try:
            data = json.load(f)
        except Exception as e:
            print(f"‚ö†Ô∏è  Falha ao ler {arq.name}: {e}")
            continue

    if isinstance(data, list):
        registros = data
    elif isinstance(data, dict):
        registros = data.get("data") or data.get("registros") or []
        if not isinstance(registros, list):
            registros = []
    else:
        try:
            with open(arq, "r", encoding="utf-8") as f2:
                registros = [json.loads(ln) for ln in f2 if ln.strip()]
        except Exception:
            print(f"Formato n√£o reconhecido: {arq.name}")
            continue

    for rec in registros:
        mun_id   = rec.get("Id")
        mun_nome = rec.get("Nome")
        data_str = rec.get("Data")
        info_raw = rec.get("Informacao")

        if mun_id is None or mun_nome is None or data_str is None:
            continue

        if is_info_error(info_raw):
            continue

        info_list = parse_informacao(info_raw)
        if not info_list:
            continue

        if mun_id not in por_municipio:
            por_municipio[mun_id] = {
                "sisaweb_id": mun_id,
                "municipio_nome": mun_nome,
                "datas": {}
            }
        muni = por_municipio[mun_id]

        if data_str not in muni["datas"]:
            muni["datas"][data_str] = {}

        if tipo_key not in muni["datas"][data_str]:
            muni["datas"][data_str][tipo_key] = []

        for info in info_list:
            if isinstance(info, dict):
                muni["datas"][data_str][tipo_key].append(info)

for muni in por_municipio.values():
    datas_ord = {}
    for dt in sorted(muni["datas"].keys()):
        tipos = muni["datas"][dt]
        tipos_ord = {k: tipos[k] for k in sorted(tipos.keys())}
        datas_ord[dt] = tipos_ord
    muni["datas"] = datas_ord

def sanitize(nome: str) -> str:
    return re.sub(r"[^A-Za-z0-9_-]+", "_", nome).strip("_")

for mun_id, payload in por_municipio.items():
    nome = sanitize(str(payload.get("municipio_nome", mun_id)))
    saida = PASTA_SAIDA / f"{nome}.json" 
    with open(saida, "w", encoding="utf-8") as f:
        json.dump(payload, f, ensure_ascii=False, indent=2)

print(f"‚úÖ Municipios gerados: {len(por_municipio)} | Pasta: {PASTA_SAIDA.resolve()}")


In [None]:
import json
import re
import unicodedata
from pathlib import Path
from collections import defaultdict

PASTA_SAIDA = Path("TRATADOS")  
PASTA_TEMPO = Path(r"NASA\MINERACAO NASA\data")     
EXT_TEMPO   = "*.json"


def strip_accents_and_slug(s: str) -> str:
    if not isinstance(s, str):
        s = str(s or "")
    s = unicodedata.normalize("NFKD", s)
    s = "".join(ch for ch in s if not unicodedata.combining(ch))
    s = s.lower()
    s = re.sub(r"[^a-z0-9]+", "_", s)
    s = re.sub(r"_+", "_", s).strip("_")
    return s

municipios = {}     
slug_to_muni_files = defaultdict(list)

for arq in PASTA_SAIDA.glob("*.json"):
    try:
        with open(arq, "r", encoding="utf-8") as f:
            payload = json.load(f)
    except Exception as e:
        print(f"Falha ao ler munic√≠pio {arq.name}: {e}")
        continue
    nome = payload.get("municipio_nome") or arq.stem
    slug = strip_accents_and_slug(nome)
    municipios[slug] = (arq, payload)
    slug_to_muni_files[slug].append(arq)

clima_files = list(PASTA_TEMPO.glob(EXT_TEMPO))
clima_por_slug = defaultdict(list)
for clima_arq in clima_files:
    cidade_nome_arquivo = clima_arq.stem
    slug = strip_accents_and_slug(cidade_nome_arquivo)
    clima_por_slug[slug].append(clima_arq)

erros = False

dups_muni = {slug: files for slug, files in slug_to_muni_files.items() if len(files) > 1}
if dups_muni:
    erros = True
    print("\nDuplicidade de munic√≠pios (mesmo slug) em TRATADOS:")
    for slug, files in dups_muni.items():
        print(f"   - {slug}: " + ", ".join(f.name for f in files))

dups_clima = {slug: files for slug, files in clima_por_slug.items() if len(files) > 1}
if dups_clima:
    erros = True
    print("\nDuplicidade de cidades clim√°ticas (mesmo slug) em NASA:")
    for slug, files in dups_clima.items():
        print(f"   - {slug}: " + ", ".join(f.name for f in files))

if not dups_muni and not dups_clima:
    slugs_muni = set(municipios.keys())
    slugs_clima = set(clima_por_slug.keys())

    muni_sem_clima = sorted(slugs_muni - slugs_clima)
    clima_sem_muni = sorted(slugs_clima - slugs_muni)

    if muni_sem_clima:
        erros = True
        print("\nMunic√≠pios SEM arquivo clim√°tico correspondente (por slug):")
        for slug in muni_sem_clima:
            muni_file = municipios[slug][0]
            print(f"   - {slug}  ‚Üê  {muni_file.name}")

    if clima_sem_muni:
        erros = True
        print("\nArquivos clim√°ticos SEM munic√≠pio correspondente (por slug):")
        for slug in clima_sem_muni:
            files = clima_por_slug[slug]
            print(f"   - {slug}  ‚Üê  " + ", ".join(f.name for f in files))

if erros:
    print("\nPr√©-flight falhou. Corrija os itens acima e rode novamente. Nenhum arquivo foi alterado.")
    raise SystemExit(1)

inseridos_total = 0

for slug, (arq_muni, payload) in municipios.items():
    clima_arq = clima_por_slug[slug][0]

    try:
        with open(clima_arq, "r", encoding="utf-8") as f:
            clima_lista = json.load(f)
    except Exception as e:
        print(f"Falha ao ler clima {clima_arq.name}: {e}")
        continue

    if not isinstance(clima_lista, list):
        print(f"Formato clim√°tico inesperado (n√£o √© lista): {clima_arq.name}")
        continue

    datas = payload.setdefault("datas", {})

    clima_por_data = {}
    for item in clima_lista:
        if not isinstance(item, dict):
            continue
        dt = item.get("date")
        if not dt:
            continue
        clima_por_data[dt] = {k: v for k, v in item.items() if k != "date"}

    insercoes = 0

    for dt, clima_dt in clima_por_data.items():
        if dt not in datas:
            datas[dt] = {}  

        datas[dt]["meteorologia"] = clima_dt
        insercoes += 1

    if insercoes:
        try:
            payload["datas"] = {k: datas[k] for k in sorted(datas)}
            with open(arq_muni, "w", encoding="utf-8") as f:
                json.dump(payload, f, ensure_ascii=False, indent=2)
            inseridos_total += insercoes
        except Exception as e:
            print(f"Falha ao salvar {arq_muni.name}: {e}")
    else:
        print(f"{clima_arq.name}: nenhuma data v√°lida para inserir em {arq_muni.name}")




In [None]:
import json
import re
import unicodedata
from pathlib import Path
from collections import defaultdict, OrderedDict

# === Caminhos ===
PASTA_TRATADOS = Path("TRATADOS")            
ARQUIVO_GEOJS  = Path(r"SUPORTE\geojs-35-mun.json")    

def slugify_nome(s: str) -> str:
    if not isinstance(s, str):
        s = str(s or "")
    s = unicodedata.normalize("NFKD", s)
    s = "".join(ch for ch in s if not unicodedata.combining(ch))
    s = s.lower()
    s = re.sub(r"[^a-z0-9]+", "_", s)
    s = re.sub(r"_+", "_", s).strip("_")
    return s

def extrair_ibge_id(feature: dict) -> str:
    if isinstance(feature.get("id"), (str, int)):
        return str(feature["id"])
    props = feature.get("properties") or {}
    for k in ("cod_muni", "CD_MUN", "CD_MUNIC", "CD_MUNICIP", "code", "id"):
        if k in props and props[k]:
            return str(props[k])
    return None

tratados_by_slug = {}
dups_tratados = defaultdict(list)

for arq in sorted(PASTA_TRATADOS.glob("*.json")):
    try:
        with open(arq, "r", encoding="utf-8") as f:
            payload = json.load(f)
    except Exception as e:
        print(f"Falha ao ler {arq.name}: {e}")
        continue
    nome = payload.get("municipio_nome") or arq.stem
    slug = slugify_nome(nome)
    tratados_by_slug[slug] = {"file": arq, "payload": payload, "nome": nome}
    dups_tratados[slug].append(arq.name)

with open(ARQUIVO_GEOJS, "r", encoding="utf-8") as f:
    geo = json.load(f)

if not isinstance(geo, dict) or geo.get("type") != "FeatureCollection":
    raise SystemExit("GeoJSON inv√°lido: esperava FeatureCollection.")

features = geo.get("features") or []
if not isinstance(features, list):
    raise SystemExit("GeoJSON inv√°lido: 'features' n√£o √© lista.")

feats_by_slug = defaultdict(list)
feat_info_by_slug = {}

for i, feat in enumerate(features):
    props = feat.get("properties") or {}
    nome_geo = props.get("name") or props.get("NAME") or props.get("Name") or ""
    slug = slugify_nome(nome_geo)
    feats_by_slug[slug].append(i)
    feat_info_by_slug[slug] = {
        "feature": feat,
        "geometry": feat.get("geometry"),
        "ibge_id": extrair_ibge_id(feat),
        "nome_geo": nome_geo,
    }

erros = False

dups_t = {s: lst for s, lst in dups_tratados.items() if len(lst) > 1}
if dups_t:
    erros = True
    print("\nDuplicidade em TRATADOS (mesmo slug):")
    for s, lst in dups_t.items():
        print(f"   - {s}: {', '.join(lst)}")

dups_g = {s: idxs for s, idxs in feats_by_slug.items() if len(idxs) > 1}
if dups_g:
    erros = True
    print("\nDuplicidade no GeoJSON (mesmo slug):")
    for s, idxs in dups_g.items():
        print(f"   - {s}: features {idxs}")

slugs_tratados = set(tratados_by_slug.keys())
slugs_geo      = set(feats_by_slug.keys())

faltando_no_geo = sorted(slugs_tratados - slugs_geo)
faltando_nos_tratados = sorted(slugs_geo - slugs_tratados)

if faltando_no_geo:
    erros = True
    print("\nEm TRATADOS mas N√ÉO no GeoJSON:")
    for s in faltando_no_geo:
        print(f"   - {s}  ‚Üê {tratados_by_slug[s]['file'].name}")

if faltando_nos_tratados:
    erros = True
    print("\nNo GeoJSON mas N√ÉO em TRATADOS:")
    for s in faltando_nos_tratados:
        ex = feat_info_by_slug.get(s, {}).get("nome_geo", s)
        print(f"   - {s} (ex.: '{ex}')")

if erros:
    print("\nPr√©-flight falhou. Nada foi modificado.")
    raise SystemExit(1)

print("Pr√©-flight OK! Conjunto casa 1‚Äìpara‚Äì1. Inserindo em TRATADOS...\n")

atualizados = 0

for slug in sorted(slugs_tratados):
    arq = tratados_by_slug[slug]["file"]
    payload = tratados_by_slug[slug]["payload"]
    info   = feat_info_by_slug[slug]

    ibge_id = info["ibge_id"]
    geo_geom = info["geometry"]

    if geo_geom is None:
        print(f"Sem geometria para {slug} (pulando).")
        continue

    payload.pop("geo_properties", None)
    payload.pop("geo_geometry", None)
    payload.pop("ibge_id", None)

    novo = OrderedDict()

    if "municipio_id" in payload:
        novo["municipio_id"] = payload["municipio_id"]
        novo["ibge_id"] = ibge_id
        for k, v in payload.items():
            if k in ("municipio_id", "ibge_id", "geo_properties", "geo_geometry"):
                continue
            novo[k] = v
    else:
        novo["ibge_id"] = ibge_id
        for k, v in payload.items():
            if k in ("ibge_id", "geo_properties", "geo_geometry"):
                continue
            novo[k] = v

    novo["geo_geometry"] = geo_geom

    try:
        with open(arq, "w", encoding="utf-8") as f:
            json.dump(novo, f, ensure_ascii=False, indent=2)
        atualizados += 1
    except Exception as e:
        print(f"Falha ao salvar {arq.name}: {e}")

print(f"\nResumo: {atualizados} munic√≠pios atualizados com 'geojs_id' e 'geo_geometry' (sem geo_properties).")


In [None]:
import csv
import json
import re
from pathlib import Path
from collections import defaultdict, OrderedDict

PASTA_TRATADOS = Path("TRATADOS")        
PASTA_SINAN    = Path("SINAN")             
PADRAO_CSV     = "*.csv"

if not PASTA_TRATADOS.exists():
    raise SystemExit(f"Pasta TRATADOS n√£o encontrada em: {PASTA_TRATADOS.resolve()}")
if not PASTA_SINAN.exists():
    raise SystemExit(f"Pasta SINAN n√£o encontrada em: {PASTA_SINAN.resolve()}")

def agravo_from_filename(name: str) -> str:
    u = name.upper()
    if u.startswith("CHIK"):
        return "chikungunya"
    if u.startswith("DENG"):
        return "dengue"
    if u.startswith("ZIKA"):
        return "zika"
    return "desconhecido"

def norm_ibge_code(v) -> str | None:
    if v is None or v == "":
        return None
    try:
        n = int(str(v).strip())
        return f"{n:07d}"
    except Exception:
        s = re.sub(r"\D", "", str(v))
        return s.zfill(7) if s else None

_dt_compact = re.compile(r"^\s*(\d{4})[/-]?(\d{2})[/-]?(\d{2})\s*$")

def norm_dt(s: str) -> str | None:
    if s is None:
        return None
    m = _dt_compact.match(str(s))
    if not m:
        return None
    y, mth, d = m.groups()
    return f"{y}-{mth}-{d}"

def sniff_delimiter(sample: str) -> str:
    try:
        return csv.Sniffer().sniff(sample, delimiters=";,\t").delimiter
    except Exception:
        counts = {d: sample.count(d) for d in (";", ",", "\t")}
        return max(counts, key=counts.get) if any(counts.values()) else ","

def normalize_header_name(s: str) -> str:
    s = (s or "").strip().upper()
    s = re.sub(r"\s+", "_", s)
    s = (
        s.replace("√ç", "I").replace("√â", "E").replace("√ì", "O")
         .replace("√Ç", "A").replace("√É", "A").replace("√á", "C")
    )
    return s

def open_csv_detect_robust(path: Path):
    with path.open("rb") as fb:
        raw_sample = fb.read(8192)

    enc_candidates = ["utf-8-sig", "utf-8", "cp1252", "latin-1"]
    sample_text = None
    for enc in enc_candidates:
        try:
            sample_text = raw_sample.decode(enc, errors="strict")
            break
        except Exception:
            continue
    if sample_text is None:
        sample_text = raw_sample.decode("latin-1", errors="replace")

    delim = sniff_delimiter(sample_text)

    for enc in enc_candidates + ["latin-1"]:
        f = None
        try:
            f = path.open("r", encoding=enc, newline="")
            reader = csv.DictReader(f, delimiter=delim)
            for _ in reader:
                pass
            f.close()
            f = path.open("r", encoding=enc, newline="")
            reader = csv.DictReader(f, delimiter=delim)
            headers = reader.fieldnames or []
            header_map = {normalize_header_name(h): h for h in headers}
            return f, reader, header_map, delim, enc
        except UnicodeDecodeError:
            if f is not None:
                try:
                    f.close()
                except Exception:
                    pass
            continue

    f = path.open("r", encoding="latin-1", errors="replace", newline="")
    reader = csv.DictReader(f, delimiter=delim)
    headers = reader.fieldnames or []
    header_map = {normalize_header_name(h): h for h in headers}
    return f, reader, header_map, delim, "latin-1(+replace)"

def find_col(header_map: dict, *candidates: str) -> str | None:
    for c in candidates:
        if c in header_map:
            return header_map[c]
    return None

def slim_row(row: dict, srcfile: str, agravo: str, ibge: str, dt: str) -> dict:
    d = {k: v for k, v in row.items() if v not in ("", None, "")}

    d["_FONTE"] = "SINAN"
    d["_ARQUIVO"] = srcfile
    d["_AGRAVO"] = agravo
    d["_IBGE_ID"] = ibge
    d["_DATA"] = dt
    return d

tratados_by_ibge: dict[str, tuple[Path, dict]] = {}
dups_trat = defaultdict(list)

for arq in sorted(PASTA_TRATADOS.glob("*.json")):
    try:
        payload = json.loads(arq.read_text(encoding="utf-8"))
    except Exception as e:
        print(f"Falha ao ler {arq.name}: {e}")
        continue

    ibge = norm_ibge_code(payload.get("ibge_id"))  # usa ibge_id
    if not ibge:
        print(f"{arq.name}: sem 'ibge_id' v√°lido ‚Äî ignorando.")
        continue

    tratados_by_ibge[ibge] = (arq, payload)
    dups_trat[ibge].append(arq.name)

dups = {k: v for k, v in dups_trat.items() if len(v) > 1}
if dups:
    print("\nDuplicidade de 'ibge_id' em TRATADOS:")
    for k, lst in dups.items():
        print(f"   - {k}: {', '.join(lst)}")
    raise SystemExit("\nAbortado por duplicidades em TRATADOS.")

arquivos_csv = sorted(PASTA_SINAN.glob(PADRAO_CSV))
if not arquivos_csv:
    raise SystemExit(f"Nenhum CSV encontrado em {PASTA_SINAN}/'{PADRAO_CSV}'")

ids_sinan = set()
for csv_path in arquivos_csv:
    fh, reader, header_map, delim, enc = open_csv_detect_robust(csv_path)
    id_col = find_col(header_map, "ID_MUNICIP", "ID_MUNICIPIO")
    dt_col = find_col(header_map, "DT_NOTIFIC")
    if not id_col or not dt_col:
        print(f"\nüîé DEBUG {csv_path.name}:")
        print("  encoding:", enc, "delimiter:", repr(delim))
        print("  colunas:", reader.fieldnames)
        fh.close()
        raise SystemExit(f"{csv_path.name}: n√£o encontrei colunas ID_MUNICIP/DT_NOTIFIC (veja DEBUG).")

    for row in reader:
        ibge = norm_ibge_code(row.get(id_col))
        if ibge:
            ids_sinan.add(ibge)
    fh.close()

faltantes = sorted(ids_sinan - set(tratados_by_ibge.keys()))
if faltantes:
    print("\nIDs do SINAN sem JSON correspondente (ibge_id) em TRATADOS:")
    for i in faltantes[:30]:
        print(f"   - {i}")
    if len(faltantes) > 30:
        print(f"   ... (+{len(faltantes)-30} outros)")
    raise SystemExit("\nPr√©-flight falhou. Nada foi modificado.")

print("Pr√©-flight OK! Todos os ID_MUNICIP t√™m JSON correspondente via ibge_id.\n")


acc: dict[str, dict[str, dict[str, list[dict]]]] = defaultdict(
    lambda: defaultdict(lambda: defaultdict(list))
)

for csv_path in arquivos_csv:
    agravo = agravo_from_filename(csv_path.stem)
    print(f"Lendo {csv_path.name} (agravo: {agravo}) ...")

    fh, reader, header_map, delim, enc = open_csv_detect_robust(csv_path)
    id_col = find_col(header_map, "ID_MUNICIP", "ID_MUNICIPIO")
    dt_col = find_col(header_map, "DT_NOTIFIC")
    if not id_col or not dt_col:
        print(f"\nüîé DEBUG {csv_path.name}:")
        print("  encoding:", enc, "delimiter:", repr(delim))
        print("  colunas:", reader.fieldnames)
        fh.close()
        raise SystemExit(f"{csv_path.name}: n√£o encontrei colunas ID_MUNICIP/DT_NOTIFIC.")

    dt_cols_norm = [c for c in header_map.keys() if c.startswith("DT")]
    dt_cols = [header_map[c] for c in dt_cols_norm]

    for row in reader:
        ibge = norm_ibge_code(row.get(id_col))
        if not ibge:
            continue

        for c in dt_cols:
            if c in row and row[c]:
                row[c] = norm_dt(row[c]) or row[c]

        dt = norm_dt(row.get(dt_col))
        if not dt:
            continue

        acc[ibge][dt][agravo].append(
            slim_row(dict(row), csv_path.name, agravo, ibge, dt)
        )

    fh.close()

print("Agrega√ß√£o conclu√≠da.\n")

salvos = 0
for ibge, (src_path, base_payload) in tratados_by_ibge.items():
    payload = json.loads(json.dumps(base_payload, ensure_ascii=False))
    datas = payload.setdefault("datas", {})

    por_dia = acc.get(ibge, {})
    for dt, por_agravo in por_dia.items():
        if dt not in datas:
            datas[dt] = {}
        casos = datas[dt].setdefault("casos", {})

        for agravo, linhas in por_agravo.items():
            atual = casos.get(agravo)
            if isinstance(atual, list):
                lst = atual
            else:
                lst = []
                casos[agravo] = lst
            lst.extend(linhas)

    ordem = ["municipio_id", "geojs_id", "ibge_id", "municipio_nome", "datas", "geo_geometry"]
    novo = OrderedDict()
    for k in ordem:
        if k in payload:
            novo[k] = payload[k]
    for k, v in payload.items():
        if k not in novo:
            novo[k] = v

    with src_path.open("w", encoding="utf-8") as f:
        json.dump(novo, f, ensure_ascii=False, indent=2)
    salvos += 1

print(f"Pronto! {salvos} arquivos atualizados em: {PASTA_TRATADOS.resolve()}")
