In [None]:
# normalization_agent.py

import csv
import json
from datetime import datetime
from typing import List, Dict, Any, Tuple, Optional

# ---------- Helpers ----------

def parse_datetime(value_date: str, value_time: Optional[str] = None) -> Optional[str]:
    """
    Try multiple datetime formats and return ISO 8601 with Z suffix, or None on failure.
    """
    if value_time:
        raw = f"{value_date} {value_time}"
    else:
        raw = str(value_date)

    candidates = [
        "%Y/%m/%d %H:%M:%S",
        "%Y-%m-%d %H:%M:%S",
        "%d-%m-%Y %H:%M:%S",
        "%Y-%m-%dT%H:%M:%SZ",
        "%Y-%m-%d %H:%M:%S",
        "%Y/%m/%dT%H:%M:%SZ",
    ]

    # Unix timestamp case
    try:
        if isinstance(value_date, int) or (isinstance(value_date, str) and value_date.isdigit()):
            ts = int(value_date)
            dt = datetime.utcfromtimestamp(ts)
            return dt.isoformat() + "Z"
    except Exception:
        pass

    for fmt in candidates:
        try:
            dt = datetime.strptime(raw, fmt)
            return dt.isoformat() + "Z"
        except Exception:
            continue

    # last resort: try datetime.fromisoformat
    try:
        dt = datetime.fromisoformat(raw.replace("Z", ""))
        return dt.isoformat() + "Z"
    except Exception:
        return None


def normalize_side(raw_side: str) -> Optional[str]:
    if not raw_side:
        return None
    s = raw_side.strip().upper()
    if s in ("B", "BUY"):
        return "BUY"
    if s in ("S", "SELL"):
        return "SELL"
    return None  # invalid / unexpected


def to_float(val) -> Optional[float]:
    try:
        if val is None or val == "":
            return None
        return float(val)
    except Exception:
        return None


def to_int(val) -> Optional[int]:
    try:
        if val is None or val == "":
            return None
        return int(float(val))
    except Exception:
        return None


def compute_notional(qty: Optional[int], price: Optional[float]) -> Optional[float]:
    if qty is None or price is None:
        return None
    return round(qty * price, 2)


# ---------- Normalizers for each RAW format ----------

def normalize_row_oms_csv(row: Dict[str, str]) -> Tuple[Optional[Dict[str, Any]], List[str]]:
    """
    Normalize a single row from raw_large_feed_A.csv into canonical trade.
    """
    errors = []

    trade_id = row.get("TradeID") or row.get("id")
    symbol = row.get("Symbol") or row.get("ticker") or row.get("SYM")
    isin = row.get("ISIN") or row.get("ISIN_Code")
    side = normalize_side(row.get("Side") or row.get("SIDE"))
    qty = to_int(row.get("Qty") or row.get("quantity"))
    price = to_float(row.get("TradePrice") or row.get("px") or row.get("price"))
    venue = row.get("Venue") or row.get("Market") or row.get("VENUE")
    cp_lei = (row.get("CP_LEI") or row.get("counterparty_lei") or "").strip() or None

    # date/time variations
    trade_date = row.get("TradeDate") or row.get("date") or row.get("dt")
    trade_time = row.get("TradeTime") or row.get("time") or row.get("tm")
    execution_time = parse_datetime(trade_date, trade_time) if trade_date else None

    if not trade_id:
        errors.append("Missing trade_id")
    if not execution_time:
        errors.append("Could not parse execution_time")
    if not isin:
        errors.append("Missing ISIN")
    if not side:
        errors.append(f"Invalid side: {row.get('Side')}")
    if qty is None or qty <= 0:
        errors.append(f"Invalid quantity: {row.get('Qty')}")
    if price is None or price <= 0:
        errors.append(f"Invalid price: {row.get('TradePrice')}")
    if not venue:
        errors.append("Missing venue")

    if errors:
        return None, errors

    notional = compute_notional(qty, price)

    canonical = {
        "trade_id": f"INT_{trade_id}",
        "execution_time": execution_time,
        "instrument": {
            "isin": isin,
            "symbol": symbol,
            "asset_class": "EQUITY"  # you can refine with lookup later
        },
        "side": side,
        "quantity": qty,
        "price": price,
        "notional": notional,
        "venue_mic": venue,
        "trader_id": "TRDR_UNKNOWN",
        "account_id": "ACC_UNKNOWN",
        "counterparty_lei": cp_lei,
        "currency": "USD",
        "order_type": "LIMIT" if price is not None else "MARKET",
        "trading_capacity": "DEAL",
        "short_sell_flag": "N"
    }

    return canonical, []


def normalize_record_json_batch(rec: Dict[str, Any]) -> Tuple[Optional[Dict[str, Any]], List[str]]:
    """
    Normalize a record from raw_large_feed_B.json (inside trade_feed[]).
    """
    errors = []

    trade_id = rec.get("id")
    # timestamp could be in 'timestamp' or 'ts'
    ts_val = rec.get("timestamp") or rec.get("ts")
    execution_time = None

    if isinstance(ts_val, (int, float)) or (isinstance(ts_val, str) and ts_val.isdigit()):
        execution_time = parse_datetime(int(ts_val))
    else:
        execution_time = parse_datetime(str(ts_val)) if ts_val else None

    instrument = rec.get("instrument") or rec.get("inst") or {}
    isin = instrument.get("isin") or instrument.get("id") or instrument.get("instrumentId")
    symbol = instrument.get("symbol") or instrument.get("ticker")

    # side may be in root or inside order
    side_raw = rec.get("side")
    if not side_raw and rec.get("order"):
        side_raw = rec["order"].get("side")

    side = normalize_side(side_raw) if side_raw else None

    qty = rec.get("qty")
    if qty is None and rec.get("order"):
        qty = rec["order"].get("shares")
    qty = to_int(qty)

    price = rec.get("px")
    if price is None and rec.get("order"):
        price = rec["order"].get("price")
    price = to_float(price)

    venue = rec.get("venue") or rec.get("mic")
    cp = rec.get("cp") or rec.get("counterparty") or {}
    cp_lei = cp.get("lei") if isinstance(cp, dict) else None

    if not trade_id:
        errors.append("Missing trade_id")
    if not execution_time:
        errors.append("Could not parse execution_time")
    if not isin:
        errors.append("Missing ISIN")
    if not side:
        errors.append(f"Invalid side: {side_raw}")
    if qty is None or qty <= 0:
        errors.append(f"Invalid quantity: {qty}")
    if price is None or price <= 0:
        errors.append(f"Invalid price: {price}")
    if not venue:
        errors.append("Missing venue")

    if errors:
        return None, errors

    notional = compute_notional(qty, price)

    canonical = {
        "trade_id": f"INT_{trade_id}",
        "execution_time": execution_time,
        "instrument": {
            "isin": isin,
            "symbol": symbol,
            "asset_class": "EQUITY"
        },
        "side": side,
        "quantity": qty,
        "price": price,
        "notional": notional,
        "venue_mic": venue,
        "trader_id": "TRDR_UNKNOWN",
        "account_id": "ACC_UNKNOWN",
        "counterparty_lei": cp_lei,
        "currency": "USD",
        "order_type": "LIMIT" if price is not None else "MARKET",
        "trading_capacity": "DEAL",
        "short_sell_flag": "N"
    }

    return canonical, []


# ---------- Top-level functions ----------

def normalize_csv_file(path: str) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
    """
    Normalize CSV file (using OMS/Broker-style columns).
    Returns (list of canonical trades, list of error records).
    """
    normalized = []
    errors = []
    with open(path, newline="") as f:
        reader = csv.DictReader(f)
        for row in reader:
            canon, err_list = normalize_row_oms_csv(row)
            if canon:
                normalized.append(canon)
            else:
                errors.append({"raw": row, "errors": err_list})
    return normalized, errors


def normalize_json_file(path: str) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
    """
    Normalize JSON file in raw_large_feed_B.json structure.
    """
    normalized = []
    errors = []
    with open(path) as f:
        data = json.load(f)

    records = data.get("trade_feed") or data.get("executions") or data
    if isinstance(records, dict):
        # in case it's under another key
        records = records.get("trades", [])

    for rec in records:
        canon, err_list = normalize_record_json_batch(rec)
        if canon:
            normalized.append(canon)
        else:
            errors.append({"raw": rec, "errors": err_list})
    return normalized, errors


if __name__ == "__main__":
    # Example usage:

    csv_trades, csv_errors = normalize_csv_file(r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\raw_large_feed_B.csv")
    print(f"CSV normalized trades: {len(csv_trades)}, errors: {len(csv_errors)}")

    json_trades, json_errors = normalize_json_file(r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\raw_large_feed_B.json")
    print(f"JSON normalized trades: {len(json_trades)}, errors: {len(json_errors)}")

    # Look at one normalized trade
    print("\nSample normalized CSV trade:")
    from pprint import pprint
    pprint(csv_trades[0])

    print("\nSample normalized JSON trade:")
    pprint(json_trades[0])

    # Look at a couple of error records
    print("\nSample CSV errors:")
    pprint(csv_errors[:2])

    print("\nSample JSON errors:")
    pprint(json_errors[:2])


CSV normalized trades: 25, errors: 5
JSON normalized trades: 5, errors: 2

Sample normalized CSV trade:
{'account_id': 'ACC_UNKNOWN',
 'counterparty_lei': '5493001KJTIIGC8Y1R12',
 'currency': 'USD',
 'execution_time': '2024-01-25T10:32:09Z',
 'instrument': {'asset_class': 'EQUITY',
                'isin': 'US1234567890',
                'symbol': 'STK001'},
 'notional': 63525.0,
 'order_type': 'LIMIT',
 'price': 42.35,
 'quantity': 1500,
 'short_sell_flag': 'N',
 'side': 'BUY',
 'trade_id': 'INT_A1001',
 'trader_id': 'TRDR_UNKNOWN',
 'trading_capacity': 'DEAL',
 'venue_mic': 'XNAS'}

Sample normalized JSON trade:
{'account_id': 'ACC_UNKNOWN',
 'counterparty_lei': 'LEI55544433322211100099',
 'currency': 'USD',
 'execution_time': '2024-01-25T10:32:09Z',
 'instrument': {'asset_class': 'EQUITY',
                'isin': 'US5554443332',
                'symbol': 'STK201'},
 'notional': 110295.0,
 'order_type': 'LIMIT',
 'price': 122.55,
 'quantity': 900,
 'short_sell_flag': 'N',
 'side': 'BU

  dt = datetime.utcfromtimestamp(ts)


In [6]:
    # Combine all normalized trades from CSV + JSON
all_trades = csv_trades + json_trades

    # Save to JSON for analysis
import json
with open("C:\\Users\\Lenovo\\OneDrive\\Desktop\\GenAI\\VoyagersRegionalGenAI\\savedFiles\\canonical_trades_normalized_phase2.json", "w") as f:
    json.dump(all_trades, f, indent=2)

    # Save errors as well
all_errors = csv_errors + json_errors
with open("c:\\Users\\Lenovo\\OneDrive\\Desktop\\GenAI\\VoyagersRegionalGenAI\\savedFiles\\normalization_errors_phase2.json", "w") as f:
    json.dump(all_errors, f, indent=2)

print(f"\nSaved {len(all_trades)} normalized trades to canonical_trades_normalized_phase2.json")
print(f"Saved {len(all_errors)} error records to normalization_errors_phase2.json")



Saved 30 normalized trades to canonical_trades_normalized_phase2.json
Saved 7 error records to normalization_errors_phase2.json


In [3]:
import json
import pandas as pd

with open(r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\canonical_trades_normalized_phase2.json") as f:
    trades = json.load(f)

df = pd.DataFrame(trades)
print(df.head())

print(df["venue_mic"].value_counts())
print(df["side"].value_counts())
print(df[["quantity", "price", "notional"]].describe())


    trade_id        execution_time  \
0  INT_A1001  2024-01-25T10:32:09Z   
1  INT_A1002  2024-01-25T10:35:42Z   
2  INT_A1003  2024-01-25T10:40:12Z   
3  INT_A1004  2024-01-25T10:41:01Z   
4  INT_A1006  2024-01-25T10:50:09Z   

                                          instrument  side  quantity   price  \
0  {'isin': 'US1234567890', 'symbol': 'STK001', '...   BUY      1500   42.35   
1  {'isin': 'US9876543210', 'symbol': 'STK007', '...  SELL       820  118.50   
2  {'isin': 'US4433559922', 'symbol': 'STK009', '...   BUY      3400   55.20   
3  {'isin': 'US0000000000', 'symbol': 'STK011', '...   BUY      1200  220.55   
4  {'isin': 'US5647382910', 'symbol': 'STK015', '...  SELL       300  112.10   

   notional venue_mic     trader_id   account_id      counterparty_lei  \
0   63525.0      XNAS  TRDR_UNKNOWN  ACC_UNKNOWN  5493001KJTIIGC8Y1R12   
1   97170.0      XNYS  TRDR_UNKNOWN  ACC_UNKNOWN  4RRT9021KJTIIGC9PU77   
2  187680.0      BATS  TRDR_UNKNOWN  ACC_UNKNOWN  84HD9K21KJT99H33UQ

In [4]:
#regulation_mapping_agent.py3

import json
from typing import Any, Dict, List, Tuple


# ----------------- Helpers -----------------

def is_valid_isin(isin: str) -> bool:
    """
    Very simple ISIN check for demo:
    - length 12
    - first 2 are letters
    - remaining 10 are digits

    (Real ISIN check is more complex with a checksum,
     but this is enough for hackathon demo.)
    """
    if not isin or len(isin) != 12:
        return False
    if not isin[:2].isalpha():
        return False
    if not isin[2:].isdigit():
        return False
    return True


def is_valid_lei(lei: str) -> bool:
    """
    Simple LEI validation for demo:
    - length 20
    - alphanumeric
    """
    if not lei:
        return False
    if len(lei) != 20:
        return False
    return lei.isalnum()


def get_nested_value(obj: Dict[str, Any], path: str):
    """
    Resolve dot-path like 'instrument.isin' from the canonical trade object.
    """
    parts = path.split(".")
    cur = obj
    for p in parts:
        if isinstance(cur, dict) and p in cur:
            cur = cur[p]
        else:
            return None
    return cur


# ----------------- Core Mapping & Validation -----------------

def map_trade_to_report(
    trade: Dict[str, Any],
    template: Dict[str, Any]
) -> Tuple[Dict[str, Any], List[str]]:
    """
    Map a canonical trade to a regulatory report record
    using the given template. Return (report_record, errors).
    """
    errors: List[str] = []
    report_record: Dict[str, Any] = {}

    # 1) Field mapping
    for field_spec in template["fields_required"]:
        name = field_spec["name"]
        source = field_spec["source"]
        fmt = field_spec.get("format")

        value = get_nested_value(trade, source)

        # Basic presence check
        if value is None or value == "":
            errors.append(f"Missing required field: {name} (source: {source})")
        else:
            # Type / format specific checks
            if fmt == "integer":
                try:
                    value = int(value)
                except Exception:
                    errors.append(f"Invalid integer for {name}: {value}")
            elif fmt == "decimal":
                try:
                    value = float(value)
                except Exception:
                    errors.append(f"Invalid decimal for {name}: {value}")
            # you can add more types if needed

        report_record[name] = value

    # 2) Extra validation rules from template
    # (We implement them explicitly rather than parsing the strings)
    q = report_record.get("Quantity")
    p = report_record.get("Price")
    isin = report_record.get("InstrumentIdentificationCode")
    lei = report_record.get("BuyerLei")

    if q is None or q <= 0:
        errors.append(f"Validation: Quantity must be > 0, got {q}")
    if p is None or p <= 0:
        errors.append(f"Validation: Price must be > 0, got {p}")
    if not isin or not isinstance(isin, str) or not is_valid_isin(isin):
        errors.append(f"Validation: Invalid ISIN: {isin}")
    if not lei or not isinstance(lei, str) or not is_valid_lei(lei):
        errors.append(f"Validation: Invalid LEI: {lei}")

    return report_record, errors


def process_trades_for_regulation(
    trades: List[Dict[str, Any]],
    template: Dict[str, Any]
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
    """
    For each canonical trade, produce either:
    - a valid regulatory record, or
    - an error record with reasons.

    Returns (valid_records, error_records).
    """
    valid_records: List[Dict[str, Any]] = []
    error_records: List[Dict[str, Any]] = []

    for trade in trades:
        report_record, errors = map_trade_to_report(trade, template)
        if errors:
            error_records.append({
                "trade_id": trade.get("trade_id"),
                "raw_trade": trade,
                "errors": errors
            })
        else:
            valid_records.append({
                "trade_id": trade.get("trade_id"),
                "report_record": report_record
            })

    return valid_records, error_records


# ----------------- Main -----------------

if __name__ == "__main__":
    # 1) Load canonical trades (output of Phase 2 or Phase 1)
    with open(r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\canonical_trades_phase1_500.json") as f:
        canonical_trades = json.load(f)

    print(f"Loaded {len(canonical_trades)} canonical trades")

    # 2) Load regulation template
    with open(r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\regulation_template_phase1.json") as f:
        template = json.load(f)

    # 3) Process
    valid_records, invalid_records = process_trades_for_regulation(canonical_trades, template)

    print(f"Valid regulatory records: {len(valid_records)}")
    print(f"Invalid / failed records: {len(invalid_records)}")

    # 4) Save outputs
    with open(r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\regulatory_report_records.json", "w") as f:
        json.dump(valid_records, f, indent=2)

    with open(r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\regulatory_validation_errors.json", "w") as f:
        json.dump(invalid_records, f, indent=2)

    print("Saved regulatory_report_records.json and regulatory_validation_errors.json")


Loaded 500 canonical trades
Valid regulatory records: 500
Invalid / failed records: 0
Saved regulatory_report_records.json and regulatory_validation_errors.json


In [None]:
#regulatory_report_records.json
import json
import pandas as pd

# Load valid regulatory records
with open(r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\regulatory_report_records.json") as f:
    data = json.load(f)

# Flatten structure: each entry has { trade_id, report_record }
rows = []
for rec in data:
    row = {"trade_id": rec["trade_id"]}
    row.update(rec["report_record"])
    rows.append(row)

df = pd.DataFrame(rows)

print(df.head())
print("\nCounts by VenueMIC:")
print(df["VenueMIC"].value_counts())

print("\nSide-by-side stats:")
print(df[["Price", "Quantity"]].describe())


     trade_id       TradingDateTime InstrumentIdentificationCode   Price  \
0  INT_100000  2024-01-11T23:15:00Z                 US7469267437   95.00   
1  INT_100001  2024-01-29T11:14:00Z                 US0317315593  484.94   
2  INT_100002  2024-01-02T20:55:00Z                 US2072204969  168.79   
3  INT_100003  2024-01-05T10:14:00Z                 US3625198453  271.97   
4  INT_100004  2024-01-01T14:14:00Z                 US0021495636  187.05   

   Quantity VenueMIC              BuyerLei  
0        92     XNSE  VY01POCGUIRDYNIZBT1G  
1       255     XNYS  J9N4OHNWUPRXZY2OAYGI  
2       206     XNSE  V08A8NCAEZN0IOOZM55D  
3       393     XNYS  6A0MCFH0I6XO55XLBCQB  
4       327     ARCX  VKP6IFP2CT9SZCST5G6R  

Counts by VenueMIC:
VenueMIC
XNSE    107
BATS    106
XNYS    105
ARCX     94
XNAS     88
Name: count, dtype: int64

Side-by-side stats:
            Price     Quantity
count  500.000000   500.000000
mean   246.954120   252.008000
std    146.960486   329.124764
min      5.5

In [7]:
#regulatory_report_records.json

import json
import csv
from typing import List, Dict, Any
from collections import Counter


def load_valid_records(path: str) -> List[Dict[str, Any]]:
    """
    Load valid regulatory records from Phase 3 output.
    Each record looks like:
    {
      "trade_id": "...",
      "report_record": { ...fields... }
    }
    """
    with open(path) as f:
        return json.load(f)


def load_validation_errors(path: str) -> List[Dict[str, Any]]:
    """
    Load invalid records from Phase 3 output.
    Each record looks like:
    {
      "trade_id": "...",
      "raw_trade": { ...canonical... },
      "errors": [ ... ]
    }
    """
    with open(path) as f:
        return json.load(f)


def write_regulator_csv(valid_records: List[Dict[str, Any]], path: str):
    """
    Generate final regulator-facing CSV file.
    """
    # Define output order of columns
    fieldnames = [
        "trade_id",
        "TradingDateTime",
        "InstrumentIdentificationCode",
        "Price",
        "Quantity",
        "VenueMIC",
        "BuyerLei"
    ]

    with open(path, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()

        for rec in valid_records:
            tid = rec.get("trade_id")
            rr = rec.get("report_record", {})

            row = {
                "trade_id": tid,
                "TradingDateTime": rr.get("TradingDateTime"),
                "InstrumentIdentificationCode": rr.get("InstrumentIdentificationCode"),
                "Price": rr.get("Price"),
                "Quantity": rr.get("Quantity"),
                "VenueMIC": rr.get("VenueMIC"),
                "BuyerLei": rr.get("BuyerLei")
            }
            writer.writerow(row)


def build_summary(
    valid_records: List[Dict[str, Any]],
    invalid_records: List[Dict[str, Any]]
) -> Dict[str, Any]:
    """
    Build a high-level summary for compliance / reporting.
    """
    total = len(valid_records) + len(invalid_records)
    valid = len(valid_records)
    invalid = len(invalid_records)

    # Count errors by type
    error_counter = Counter()
    for err_rec in invalid_records:
        for e in err_rec.get("errors", []):
            error_counter[e] += 1

    # Basic venue stats
    venue_counter = Counter()
    for rec in valid_records:
        rr = rec.get("report_record", {})
        venue = rr.get("VenueMIC")
        if venue:
            venue_counter[venue] += 1

    summary = {
        "total_trades_processed": total,
        "valid_regulatory_records": valid,
        "invalid_records": invalid,
        "valid_ratio": round(valid / total, 4) if total > 0 else None,
        "invalid_ratio": round(invalid / total, 4) if total > 0 else None,
        "top_error_types": error_counter.most_common(10),
        "venue_distribution": venue_counter.most_common(),
    }

    return summary


def write_summary_files(summary: Dict[str, Any], path_json: str, path_txt: str):
    """
    Save summary in JSON and a human-readable text version.
    """
    # JSON
    with open(path_json, "w") as f:
        json.dump(summary, f, indent=2)

    # Text
    lines = []
    lines.append("Compliance Reporting Summary")
    lines.append("============================")
    lines.append(f"Total trades processed: {summary['total_trades_processed']}")
    lines.append(f"Valid regulatory records: {summary['valid_regulatory_records']}")
    lines.append(f"Invalid records: {summary['invalid_records']}")
    lines.append(f"Valid ratio: {summary['valid_ratio']}")
    lines.append(f"Invalid ratio: {summary['invalid_ratio']}")
    lines.append("")
    lines.append("Top error types:")
    if summary["top_error_types"]:
        for err_msg, count in summary["top_error_types"]:
            lines.append(f"  - {err_msg}  (count={count})")
    else:
        lines.append("  (No errors)")

    lines.append("")
    lines.append("Venue distribution (valid records):")
    if summary["venue_distribution"]:
        for venue, count in summary["venue_distribution"]:
            lines.append(f"  - {venue}: {count}")
    else:
        lines.append("  (No valid records)")

    with open(path_txt, "w") as f:
        f.write("\n".join(lines))


if __name__ == "__main__":
    # 1) Load Phase 3 outputs
    valid_records = load_valid_records(r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\regulatory_report_records.json")
    invalid_records = load_validation_errors(r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\regulatory_validation_errors.json")

    print(f"Loaded {len(valid_records)} valid regulatory records")
    print(f"Loaded {len(invalid_records)} invalid records")

    # 2) Generate regulator CSV
    write_regulator_csv(valid_records, r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\regulatory_transaction_report.csv")
    print("Wrote regulatory_transaction_report.csv")

    # 3) Build summary
    summary = build_summary(valid_records, invalid_records)

    # 4) Save summary in JSON + TXT
    write_summary_files(summary, r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\regulatory_report_summary.json", r"c:\Users\Lenovo\OneDrive\Desktop\GenAI\VoyagersRegionalGenAI\savedFiles\regulatory_report_summary.txt")
    print("Wrote regulatory_report_summary.json and regulatory_report_summary.txt")


Loaded 500 valid regulatory records
Loaded 0 invalid records
Wrote regulatory_transaction_report.csv
Wrote regulatory_report_summary.json and regulatory_report_summary.txt
