In [None]:
"""
This notebook will swap in new utility rates pulled by the dGen team and attach them to the existing agent file. It will save the agent file.
"""

In [None]:
#Imports
import pandas as pd

In [None]:
# Load existing agent file
agents = pd.read_pickle("../../input_agents/agent_df_base_res_national_load_adjusted.pkl")
rates = pd.read_csv("../../../data/residential_agents_updated.csv")

In [None]:
# Pull out agent_id column
agents = agents.reset_index(drop = False)

# Join new rates to agent file
agents_rates = agents.drop("tariff_dict", axis = 1).merge(rates[['bldg_id', 'tariff_dict']], on = 'bldg_id').set_index("agent_id")

In [None]:
# Save the .pkl file
agents_rates.to_pickle("../../input_agents/agent_df_base_res_national_updated_rates.pkl")

In [None]:
df = pd.read_pickle("../../input_agents/agent_df_base_res_national_updated_wholesale_prices.pkl")

In [None]:
charges = []
missing_ids = []

for _, row in df.iterrows():
    try:
        td = row["tariff_dict"]
        if isinstance(td, str):
            td = ast.literal_eval(td)  # parse string into dict
        if "ur_monthly_fixed_charge" in td:
            charges.append(td["ur_monthly_fixed_charge"])
        else:
            missing_ids.append(row.get("agent_id", _))
    except Exception as e:
        print(f"Error parsing tariff_dict for agent {row.get('agent_id', _)}: {e}")

charges = np.array(charges, dtype=object)  # don't coerce yet to keep nans as-is

# Unique values (preserves nans if any)
unique_vals = pd.Series(charges).drop_duplicates()

print("=== Unique ur_monthly_fixed_charge values ===")
print(unique_vals)

if missing_ids:
    print(f"\nAgents missing ur_monthly_fixed_charge: {len(missing_ids)}")
    print(missing_ids[:20], "..." if len(missing_ids) > 20 else "")

In [None]:
import pandas as pd, json, ast, re


In [None]:
import pandas as pd, json, ast, re

# Load your agents file (adjust as needed)
# df = pd.read_parquet("dgen_os/input_agents/agents.parquet")
# or:
# df = pd.read_csv("dgen_os/input_agents/agents.csv")

def try_parse_tariff(raw):
    """Return (parsed_dict, parse_method). Robust to dict/JSON/literal, tolerates nan/None tokens."""
    if isinstance(raw, dict):
        return raw, "dict"
    if isinstance(raw, str):
        s = re.sub(r'\bnan\b', 'null', raw, flags=re.IGNORECASE)
        s = re.sub(r'\bNone\b', 'null', s)
        try:
            return json.loads(s), "json"
        except Exception:
            try:
                return ast.literal_eval(raw), "literal"
            except Exception:
                return None, "unparseable"
    return None, type(raw).__name__

def classify_schema(td):
    """Classify by top-level keys."""
    if not isinstance(td, dict):
        return "NONE"
    ks = set(td.keys())
    has_ur = any(k.startswith("ur_") for k in ks)
    has_ed = any(k.startswith(("e_","d_")) for k in ks)
    if has_ur and not has_ed: return "UR5"
    if has_ed and not has_ur: return "LEGACY_ED"
    if has_ur and has_ed:     return "MIXED"
    return "UNKNOWN"

REQUIRED_UR_KEYS = {
    "ur_monthly_fixed_charge",
    "ur_ec_tou_mat", "ur_ec_sched_weekday", "ur_ec_sched_weekend",
    "ur_dc_enable", "ur_dc_sched_weekday", "ur_dc_sched_weekend"
}

def nullish(x):
    if x is None: return True
    if isinstance(x, str) and x.strip().lower() in ("", "nan", "none", "null"): return True
    return False

def shape_12x24(mat):
    try:
        return len(mat) == 12 and all(len(r) == 24 for r in mat)
    except Exception:
        return False

audit_rows = []
for idx, row in df.iterrows():
    td, how = try_parse_tariff(row["tariff_dict"])
    schema = classify_schema(td)
    agent_id = row.get("agent_id", idx)

    missing = set()
    bad_sched = []
    if isinstance(td, dict) and schema in ("UR5","MIXED"):
        for k in REQUIRED_UR_KEYS:
            if k not in td or nullish(td.get(k)):
                missing.add(k)
        # shallow schedule shape check
        if "ur_ec_sched_weekday" in td and not shape_12x24(td.get("ur_ec_sched_weekday")):
            bad_sched.append("ur_ec_sched_weekday")
        if "ur_ec_sched_weekend" in td and not shape_12x24(td.get("ur_ec_sched_weekend")):
            bad_sched.append("ur_ec_sched_weekend")
        if "ur_dc_sched_weekday" in td and not shape_12x24(td.get("ur_dc_sched_weekday")):
            bad_sched.append("ur_dc_sched_weekday")
        if "ur_dc_sched_weekend" in td and not shape_12x24(td.get("ur_dc_sched_weekend")):
            bad_sched.append("ur_dc_sched_weekend")

    audit_rows.append({
        "idx": idx,
        "agent_id": agent_id,
        "parse_method": how,
        "schema": schema,
        "has_ur_mfc": isinstance(td, dict) and "ur_monthly_fixed_charge" in td,
        "ur_mfc_nullish": isinstance(td, dict) and nullish(td.get("ur_monthly_fixed_charge")),
        "missing_required_count": len(missing),
        "missing_required_keys": sorted(missing)[:6],   # preview first few
        "bad_sched": bad_sched[:4],                     # preview first few
    })

audit = pd.DataFrame(audit_rows)

# ---- High-level overview ----
print("=== parse_method counts ===")
print(audit["parse_method"].value_counts(), "\n")

print("=== schema counts ===")
print(audit["schema"].value_counts(), "\n")

print("=== rows with unparseable tariff_dict ===")
print(audit[audit["parse_method"]=="unparseable"][["agent_id","idx"]].head(20), "\n")

print("=== rows not in UR5 schema (LEGACY/MIXED/UNKNOWN) ===")
print(audit[audit["schema"]!="UR5"][["agent_id","idx","schema","parse_method"]].head(20), "\n")

print("=== rows missing any required UR keys ===")
print(audit[audit["missing_required_count"]>0][["agent_id","idx","missing_required_keys"]].head(20), "\n")

print("=== rows with nullish ur_monthly_fixed_charge ===")
print(audit[audit["ur_mfc_nullish"]][["agent_id","idx"]].head(20), "\n")

print("=== schedule shape issues (not 12x24) ===")
print(audit[audit["bad_sched"].map(bool)][["agent_id","idx","bad_sched"]].head(20), "\n")


In [None]:
df[df['agent_id'] == 486643]['tariff_dict'].values[0]