<a href="https://colab.research.google.com/github/varmams2023/phd/blob/main/20250104CanadaDataSetGenerator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import random
import csv

def CanadaCheckDataGenerator(COUNTRY_CODE, RECORD_COUNT, OUTPUT_FILE):
    # Predefined bank data with fixed transit numbers (Canada equivalent of routing numbers)
    banks = [
        {
            "name": "Royal Bank of Canada (RBC)",
            "swift_prefix": "ROYCCAT2",
            "account_number_length": 12,
            "check_number_length": 8,
            "branches": {
                "Toronto": "000305123",
                "Vancouver": "000100002",
                "Montreal": "000200456",
                "Calgary": "000400789",
                "Ottawa": "000500321"
            }
        },
        {
            "name": "Toronto-Dominion Bank (TD)",
            "swift_prefix": "TDOMCAT2",
            "account_number_length": 11,
            "check_number_length": 7,
            "branches": {
                "Toronto": "004000987",
                "Vancouver": "004500123",
                "Montreal": "003000456",
                "Calgary": "006700321",
                "Ottawa": "005600789"
            }
        },
        {
            "name": "Scotiabank",
            "swift_prefix": "NOSCCAT2",
            "account_number_length": 10,
            "check_number_length": 6,
            "branches": {
                "Toronto": "002000123",
                "Vancouver": "002500789",
                "Montreal": "002300456",
                "Calgary": "002400987",
                "Ottawa": "002600321"
            }
        },
        {
            "name": "Bank of Montreal (BMO)",
            "swift_prefix": "BOFMCAT2",
            "account_number_length": 12,
            "check_number_length": 8,
            "branches": {
                "Toronto": "001000123",
                "Vancouver": "001500789",
                "Montreal": "001300456",
                "Calgary": "001400987",
                "Ottawa": "001600321"
            }
        },
        {
            "name": "Canadian Imperial Bank of Commerce (CIBC)",
            "swift_prefix": "CIBCCAT2",
            "account_number_length": 10,
            "check_number_length": 7,
            "branches": {
                "Toronto": "010000123",
                "Vancouver": "010500789",
                "Montreal": "010300456",
                "Calgary": "010400987",
                "Ottawa": "010600321"
            }
        }
    ]

    # Generate random names for payers
    first_names = ["James", "Mary", "Robert", "Patricia", "John", "Jennifer", "Michael", "Linda", "William", "Elizabeth",
                   "David", "Barbara", "Richard", "Susan", "Joseph", "Jessica", "Thomas", "Sarah", "Charles", "Karen",
                   "Christopher", "Nancy", "Daniel", "Lisa", "Matthew", "Betty", "Anthony", "Margaret", "Donald", "Sandra",
                   "Mark", "Ashley", "Paul", "Kimberly", "Steven", "Emily", "Andrew", "Donna", "Kenneth", "Michelle",
                   "Joshua", "Dorothy", "Kevin", "Carol", "Brian", "Amanda", "George", "Melissa", "Edward", "Deborah"]
    last_names = ["Smith", "Johnson", "Williams", "Jones", "Brown", "Davis", "Miller", "Wilson", "Moore", "Taylor",
                  "Anderson", "Thomas", "Jackson", "White", "Harris", "Martin", "Thompson", "Garcia", "Martinez", "Robinson"]

    # Function to validate check data
    def validate_check(account_number, check_number, transit_number, swift_code, account_length, check_length):
        if len(account_number) != account_length:
            return False, "Invalid Account Number"
        if len(check_number) != check_length:
            return False, "Invalid Check Number"
        if len(transit_number) != 9 or not transit_number.isdigit():
            return False, "Invalid Transit Number"
        if not swift_code.startswith("ROYCCAT2") and not swift_code.startswith("TDOMCAT2") and not swift_code.startswith("NOSCCAT2") and not swift_code.startswith("BOFMCAT2") and not swift_code.startswith("CIBCCAT2"):
            return False, "Invalid SWIFT Code"
        return True, "Valid"

    # Generate check data
    dataset = []
    invalid_percentage = random.randint(15, 34)  # 15% to 34% invalid data
    invalid_count = int(RECORD_COUNT * invalid_percentage / 100)
    valid_count = RECORD_COUNT - invalid_count

    for _ in range(valid_count):
        bank = random.choice(banks)
        branch_name, transit_number = random.choice(list(bank["branches"].items()))
        payer_name = f"{random.choice(first_names)} {random.choice(last_names)}"

        swift_code = f"{bank['swift_prefix']}{random.randint(10, 99)}XXX"
        account_number = f"{random.randint(10**(bank['account_number_length'] - 1), (10**bank['account_number_length']) - 1)}"
        check_number = f"{random.randint(10**(bank['check_number_length'] - 1), (10**bank['check_number_length']) - 1)}"

        valid, comment = validate_check(account_number, check_number, transit_number, swift_code, bank['account_number_length'], bank['check_number_length'])

        record = {
            "Country": COUNTRY_CODE,
            "Bank Name": bank["name"],
            "Branch Name": branch_name,
            "Payer Name": payer_name,
            "Transit Number": transit_number,
            "SWIFT Code": swift_code,
            "Account Number": account_number,
            "Check Number": check_number,
            "Validation Result": valid,
            "Validation Comment": comment
        }
        dataset.append(record)

    for _ in range(invalid_count):
        bank = random.choice(banks)
        branch_name, transit_number = random.choice(list(bank["branches"].items()))
        payer_name = f"{random.choice(first_names)} {random.choice(last_names)}"

        # Introduce invalid data
        invalid_type = random.choice(["account", "check", "transit", "swift"])
        if invalid_type == "account":
            account_number = f"{random.randint(10**(bank['account_number_length'] - 2), (10**bank['account_number_length']) - 1)}"  # Invalid length
            check_number = f"{random.randint(10**(bank['check_number_length'] - 1), (10**bank['check_number_length']) - 1)}"
        elif invalid_type == "check":
            account_number = f"{random.randint(10**(bank['account_number_length'] - 1), (10**bank['account_number_length']) - 1)}"
            check_number = f"{random.randint(10**(bank['check_number_length'] - 2), (10**bank['check_number_length']) - 1)}"  # Invalid length
        elif invalid_type == "transit":
            transit_number = f"{random.randint(10000000, 99999999)}"  # Invalid transit number
            account_number = f"{random.randint(10**(bank['account_number_length'] - 1), (10**bank['account_number_length']) - 1)}"
            check_number = f"{random.randint(10**(bank['check_number_length'] - 1), (10**bank['check_number_length']) - 1)}"
        elif invalid_type == "swift":
            swift_code = f"INVALID{random.randint(10, 99)}XXX"  # Invalid SWIFT code
            account_number = f"{random.randint(10**(bank['account_number_length'] - 1), (10**bank['account_number_length']) - 1)}"
            check_number = f"{random.randint(10**(bank['check_number_length'] - 1), (10**bank['check_number_length']) - 1)}"

        valid, comment = validate_check(account_number, check_number, transit_number, swift_code, bank['account_number_length'], bank['check_number_length'])

        record = {
            "Country": COUNTRY_CODE,
            "Bank Name": bank["name"],
            "Branch Name": branch_name,
            "Payer Name": payer_name,
            "Transit Number": transit_number,
            "SWIFT Code": swift_code,
            "Account Number": account_number,
            "Check Number": check_number,
            "Validation Result": valid,
            "Validation Comment": comment
        }
        dataset.append(record)

    # Write dataset to CSV
    with open(OUTPUT_FILE, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=dataset[0].keys())
        writer.writeheader()
        writer.writerows(dataset)

# Call the function
CanadaCheckDataGenerator("Canada", 100000, "canada_check_dataset.csv")
