In [51]:
#Mock Up transactions generator
#Status: to be reviewed
#Ver: 0.0.1
#pending: 
# set timestamps for account opening. Apply rules to accounts opened outside of branch hours COMPLETED
# set timestamp for signature associated to PIM transaction COMPLETED
# get transactions amount from np.random.normal(mean, stddev, trans) COMPLETED
# get abs() from array values to avoid negative numbers COMPLETED
# sort array and get higher transaction values from higher values in array according to account level day distribution COMPLETED

from utils.get_root_path import get_project_root
from dateutil.relativedelta import relativedelta
import datetime
import pandas as pd
import csv
import random
import decimal
import numpy as np

path = str(get_project_root())

def read_file() -> tuple:
    """
    Read files for the names
    :return:
    """
    accounts = pd.read_csv(path + "/fake_bank/inputs/mockup_accounts_v2.csv", sep=";")
    branches = pd.read_csv(path + "/fake_bank/inputs/mockup_branches_v2.csv", sep=";")
    return accounts, branches

def len_count(accounts: int) -> tuple:
    """
    Define the length of the features
    :param len_customers:
    :return:
    """
    accts_count = len(accounts)
    return accts_count

def set_id(accts_count: int) -> tuple:
    """
    Sets the ID for Joining Account Data
    :param cust_count:
    :return:
    """
    account_id = random.randint(1, 7499)
    return account_id

def read_account_features(accounts, account_id: int) -> tuple:
    """
    Reads data from accounts file for joining on TXN table
    :param cust_count:
    :return:
    """
    
    account_balance = accounts.loc[account_id, "Account Open Amount"]
    account_level = accounts.loc[account_id, "Level"]
    elderly_flag = accounts.loc[account_id, "Elderly Flag"]
    employee_id = accounts.loc[account_id, "Employee ID"]
    account_since = accounts.loc[account_id, "Open DT"] #Or maybe use "Account open time"
    account_end = accounts.loc[account_id, "Close DT"]
    branch_id = accounts.loc[account_id, "Branch ID"]
    signature_in = accounts.loc[account_id, "Signature Obtained Indicator"]
    return account_balance, account_level, elderly_flag, employee_id, account_since, account_end, branch_id, signature_in

def read_branches_features(branches, branch_id: int) -> tuple:
    """
    Reads data from branches file for joining on TXN table
    :param cust_count:
    :return:
    """
    branch_id=branch_id%3000 #SOLO PARA PACHEAR, BORRAR LUEGO
    open_hour = branches.loc[branch_id, "Branch_Open_Hour"]
    close_hour = branches.loc[branch_id, "Branch_Close_Hour"]
    return open_hour, close_hour

def days_from_month(month):
    if month in [1,3,5,7,8,10,12]:
        days=31
    elif month in [4,6,9,11]:
        days=30
    else:
        days=28
    return days

def check_date(day,date):
    days=days_from_month(date.month)
    return int((day-1)%days)+1



def txn_id_time(month, day_lvl_1, days, level, open_hour, close_hour) -> str:
    """
    Generates TXN ID and timestamps (date, start, end and duration) for the transaction
    :param cust_count:
    :return:
    """
    #monthly behavior
    if level in ["level 1", "level 2", "level 3"]:
        #dates are concentrated toward foc_lvl_1
        txn_day=day_lvl_1
    elif level=="level 4":
        #more transactions at end and start of month
        txn_day=int(np.random.normal(0,10))
    else:
        #uniform
        txn_day=int(np.random.uniform(0,days))
    date=month.replace(day=check_date(txn_day,month))

    #generate time
    time_open_hour=datetime.time.fromisoformat(open_hour)
    time_close_hour=datetime.time.fromisoformat(close_hour)
    day_start=datetime.datetime.combine(date,time_open_hour)
    day_end=datetime.datetime.combine(date,time_close_hour)
    
    #hourly behavior
    delta2=random.random()
    is_open=day_start+delta2*(day_end-day_start)
    half_hour=random.random()/2
    before_open=day_start-datetime.timedelta(hours=half_hour)
    after_close=day_end+datetime.timedelta(hours=half_hour)
    delta3=random.random()
    is_closed=after_close+delta3*(after_close-before_open)
    
    txn_start=random.choices([is_open, before_open, after_close, is_closed], weights=[85,5,5,5])[0]

    duration=random.random()*0.15
    txn_end=txn_start+datetime.timedelta(hours=duration)
    txn_date = str(txn_start.date())
    txn_time = relativedelta(txn_end, txn_start).minutes
    return txn_date, txn_start, txn_end, txn_time

def txn_time_acct_opening(day, open_hour, close_hour, process_following_tag) -> str:
    """
    Generates TXN ID and timestamps (date, start, end and duration) for the transaction of opening an account
    :param cust_count:
    :return:
    """
    date=day
    if (process_following_tag):
        txn_start=date
    else:
        #generate time
        time_open_hour=datetime.time.fromisoformat(open_hour)
        time_close_hour=datetime.time.fromisoformat(close_hour)
        day_start=datetime.datetime.combine(date,time_open_hour)
        day_end=datetime.datetime.combine(date,time_close_hour)

        #hourly behavior
        delta2=random.random()
        is_open=day_start+delta2*(day_end-day_start)
        half_hour=random.random()/2
        before_open=day_start-datetime.timedelta(hours=half_hour)
        after_close=day_end+datetime.timedelta(hours=half_hour)
        delta3=random.random()
        is_closed=after_close+delta3*(after_close-before_open)

        txn_start=random.choices([is_open, before_open, after_close, is_closed], weights=[85,5,5,5])[0]

    duration=random.random()*0.15
    txn_end=txn_start+datetime.timedelta(hours=duration)
    txn_date = str(txn_start.date())
    txn_time = relativedelta(txn_end, txn_start).minutes
    return txn_date, txn_start, txn_end, txn_time

def txn_code_logic() -> str:
    """
    Generates TXN Code to define Transaction Operation (other function)
    :param cust_count:
    :return:
    """
    code_list = ["WIT"] * 50 + ["TRA"] * 28 + ["DEP"] * 20 + ["PIM"] * 2 
    txn_code = random.choice(code_list)
    return txn_code

def txn_amount_logic(amounts, level, date) -> float:
    """
    Defines TXN amount, requires more research
    :param cust_count:
    :return:
    """
    day=date.day
    txn_amount=0
    #monthly behavior
    if level in list(["level 1","level 2", "level 3"]):
        if day<10 or day>20:
            #bigger transactions at start and end of month
            txn_amount=amounts[0]
            amounts=np.delete(amounts,np.where(amounts==txn_amount))
        else:
            txn_amount=np.random.choice(amounts)
            amounts=np.delete(amounts,np.where(amounts==txn_amount))
    elif level == "level 4":
        #bigger transactions at start of month, plus small chance of big transaction at end of month
        if day<10:
            txn_amount=amounts[0]
            amounts=np.delete(amounts,np.where(amounts==txn_amount))
        elif day>20 and random.random()<0.2:
            txn_amount=amounts[0]
            amounts=np.delete(amounts,np.where(amounts==txn_amount))
        else:
            txn_amount=np.random.choice(amounts)
            amounts=np.delete(amounts,np.where(amounts==txn_amount))
    else:
        #level 5 and 6 have no change of amount
        txn_amount=np.random.choice(amounts)
        amounts=np.delete(amounts,np.where(amounts==txn_amount))
    return amounts, txn_amount

def txn_operation_logic(txn_code, account_balance, txn_amount) -> tuple:
    """
    Generates TXN Operation Name, Description and Ending Balance. In the case of PII Modification
    the description is assigned with the specific modified information
    :param cust_count:
    :return:
    """
 
    ending_balance = account_balance
    if txn_code == 'WIT':
        txn_name = 'Withdraw'
        txn_desc = txn_name
        ending_balance = (account_balance - txn_amount)
    elif txn_code == 'TRA':
        txn_name = 'Transfer'
        txn_desc = txn_name
        ending_balance = (account_balance - txn_amount)
    elif txn_code == 'DEP':
        txn_name = 'Deposit'
        txn_desc = txn_name
        ending_balance = (account_balance + txn_amount)
    elif txn_code == 'PIM':
        txn_amount = 0
        txn_name = 'Personal Information Modification Process begin'
        desc_list = ['Email', 'Address Line 1', 'Address Line 2', 'Phone Number', 'State', 'City']
        txn_desc = random.choice(desc_list)
    return txn_code, txn_name, txn_desc, ending_balance, txn_amount

def date_list(account_since, account_end):
    
    #shortened period of transactions
    account_since = str(datetime.date(2018, 1, 1))
    #lll202108
    account_since=datetime.date.fromisoformat(account_since)
    account_end = str(account_end)
    if datetime.date.fromisoformat(account_end)>datetime.date.today():
        today=datetime.date.today()
    else:
        today=datetime.date.fromisoformat(account_end)
    date_list=pd.date_range(start=account_since, end=today, freq="M")
    return date_list
    
def signature(txn_id, account_id, employee_id,account_balance, txn_start, level,open_hour,close_hour):
    txn_id = txn_id
    account_id = account_id
    employee_id = employee_id
    txn_amount = 0
    ending_balance = account_balance
    txn_code = "SIG"
    txn_name = "Signature Obtained"
    txn_desc = txn_name
    txn_date, txn_start, txn_end, txn_time = txn_time_acct_opening(txn_start, open_hour, close_hour, 1)
    account_level = level
    return txn_date, txn_code, txn_amount, ending_balance, txn_name, txn_desc, txn_start, txn_end, txn_time

def account_open(txn_id, account_id, employee_id,account_balance ,account_since, level,open_hour,close_hour):
    account_since=datetime.date.fromisoformat(account_since)
    txn_id = txn_id
    account_id = account_id
    employee_id = employee_id
    txn_amount = 0
    ending_balance = account_balance
    txn_code = "NAO"
    txn_name = "New Account Opening"
    txn_desc = txn_name
    txn_date, txn_start, txn_end, txn_time = txn_time_acct_opening(account_since, open_hour, close_hour, 0)
    account_level = level
    return txn_date, txn_code, txn_amount, ending_balance, txn_name, txn_desc, txn_start, txn_end, txn_time

def account_open_process(txn_id, account_id, employee_id,account_balance, txn_start, level,open_hour,close_hour):
    txn_id = txn_id
    account_id = account_id
    employee_id = employee_id
    txn_amount = 0
    ending_balance = account_balance
    txn_code = "NAP"
    txn_name = "New Account Opening Process Begin"
    txn_desc = txn_name
    txn_date, txn_start, txn_end, txn_time = txn_time_acct_opening(txn_start, open_hour, close_hour, 1)
    account_level = level
    return txn_date, txn_code, txn_amount, ending_balance, txn_name, txn_desc, txn_start, txn_end, txn_time

def write_lines(wr, txn_id, account_id, employee_id, txn_date, txn_amount, ending_balance,
                txn_code, txn_name, txn_desc, txn_start, txn_end, txn_time, account_level):
    """
    Write lines to the CSV File
    :return:
    """
    print(ending_balance,str(ending_balance).replace(".",",") )
    wr.writerow({
        "Transaction ID ": txn_id,
        "Account ID": account_id,
        "Employee ID": employee_id,
        "Date": txn_date,
        "Operation": "",
        "Amount": str(txn_amount).replace(".",","),
        "Ending Balance": str(ending_balance).replace(".",","),
        "Code": txn_code,
        "Name": txn_name,
        "Description": txn_desc,
        "Start date": txn_start,
        "End date": txn_end,
        "Transaction time": txn_time,
        "Account Level": account_level
    })


def generate_data(reg, head, accounts, branches):

    with open("mockup_transactions_v2_test.csv", "wt") as csvFile:
        wr = csv.DictWriter(csvFile, fieldnames = head, delimiter=";")
        list_count = len(accounts)
        wr.writeheader()
        txn_id=0
        for i in range(reg):
            account_id = set_id(list_count)
            account_balance, account_level, elderly_flag, employee_id, account_since, account_end, branch_id, signature_in = read_account_features(accounts, account_id)
            open_hour, close_hour=read_branches_features(branches, branch_id)
            level = account_level
            dates=date_list(account_since, account_end)
            signature_in = 1
            #tentative for signature and new account open
            #THIS ONE nao > process > signature
            #signature / when account sign_obtain in = 0 then signature miss
            #New account opening transaction
            txn_date, txn_code, txn_amount, account_balance, txn_name, txn_desc, txn_start, txn_end, txn_time = account_open(txn_id, account_id, employee_id,account_balance, account_since, level,open_hour,close_hour)
            write_lines(wr, txn_id, account_id, employee_id, txn_date, txn_amount, account_balance,
                            txn_code, txn_name, txn_desc, txn_start, txn_end, txn_time, account_level)
            txn_id+=1
            #new line, new account opening process
            txn_date, txn_code, txn_amount, account_balance, txn_name, txn_desc, txn_start, txn_end, txn_time = account_open_process(txn_id, account_id, employee_id,account_balance, txn_end, level,open_hour,close_hour)
            write_lines(wr, txn_id, account_id, employee_id, txn_date, txn_amount, account_balance,
                            txn_code, txn_name, txn_desc, txn_start, txn_end, txn_time, account_level)
            txn_id+=1
            
            #new line, signature obtained
            if signature_in == 1:
                #if the account has the signature indicator = 1, then write the signature event
                txn_date, txn_code, txn_amount, account_balance, txn_name, txn_desc, txn_start, txn_end, txn_time = signature(txn_id, account_id, employee_id,account_balance, txn_end, level,open_hour,close_hour)
                write_lines(wr, txn_id, account_id, employee_id, txn_date, txn_amount, account_balance,
                                txn_code, txn_name, txn_desc, txn_start, txn_end, txn_time, account_level)
                txn_id+=1

            if level == "level 1":
                mean = random.randint(0,100)
                stddev = mean*2
            elif level == "level 2":
                mean = random.randint(101,500)
                stddev = mean*2
            elif level == "level 3":
                mean = random.randint(501,1000)
                stddev = mean*2
            elif level == "level 4":
                mean = random.randint(1001,4999)
                stddev = mean*2
            elif level == "level 5":
                mean = random.randint(5000,10000)
                stddev = mean*2
            else: 
                mean = random.randint(10000,50000)
                stddev = mean*2
            for month_timestamp in dates:
                trans = random.randint(0, 10)
                amounts=np.random.normal(mean, stddev, trans)
                amounts=np.abs(amounts)
                amounts=np.sort(amounts)
                amounts=np.flip(amounts)
                for x in range (trans):
                    #transactions since 2018 > 2021
                    days=days_from_month(month_timestamp.month)
                    foc_lvl_1=random.randint(0,days)
                    day_lvl_1=int(np.random.normal(foc_lvl_1,scale=1))
                    #in case of PIM
                    #pim process begin
                    #process > pim > sign
                    txn_date, txn_start, txn_end, txn_time = txn_id_time(month_timestamp, day_lvl_1, days, level, open_hour, close_hour)
                    txn_code = txn_code_logic()
                    amounts,txn_amount = txn_amount_logic(amounts, account_level, txn_start)
                    txn_code, txn_name, txn_desc, account_balance, txn_amount = txn_operation_logic(txn_code, account_balance, txn_amount)
                    
                    if txn_code == 'PIM':
                        txn_amount = 0
                        ending_balance = account_balance
                        txn_code = "PIM"
                        txn_name = "Personal Information Modification"
                        txn_date, txn_start, txn_end, txn_time = txn_time_acct_opening(month_timestamp, open_hour, close_hour, 0)
                        write_lines(wr, txn_id, account_id, employee_id, txn_date, txn_amount, ending_balance,
                            txn_code, txn_name, txn_desc, txn_start, txn_end, txn_time, account_level)
                        txn_id+=1
                        txn_code = "PIP"
                        txn_name = "Personal Information Modification Process Begin"
                        txn_date, txn_start, txn_end, txn_time = txn_time_acct_opening(txn_end, open_hour, close_hour, 1)
                        write_lines(wr, txn_id, account_id, employee_id, txn_date, txn_amount, ending_balance,
                            txn_code, txn_name, txn_desc, txn_start, txn_end, txn_time, account_level)
                        txn_id+=1
                        txn_code = "SIG"
                        txn_name = "Signature Obtained"
                        txn_date, txn_start, txn_end, txn_time = txn_time_acct_opening(txn_end, open_hour, close_hour, 1)
                        write_lines(wr, txn_id, account_id, employee_id, txn_date, txn_amount, ending_balance,
                            txn_code, txn_name, txn_desc, txn_start, txn_end, txn_time, account_level)
                        txn_id+=1
                        
                        #add timestamp difference from 15> mins and a small perc (>15 mins) for alerting
                    
                    else:   
                        write_lines(wr, txn_id, account_id, employee_id, txn_date, txn_amount, account_balance,
                            txn_code, txn_name, txn_desc, txn_start, txn_end, txn_time, account_level)
                    txn_id+=1
            
            
if __name__ == '__main__':
    reg = 1 #tested up to 10k unique names,not tested beyond that
    head = ["Transaction ID ","Account ID", "Employee ID", "Date","Operation","Amount","Ending Balance","Code","Name","Description","Start date","End date","Transaction time", "Account Level"]
    accounts,branches = read_file()
    generate_data(reg, head, accounts, branches)
    print("done!")


726.45 726,45
726.45 726,45
726.45 726,45
420.731971263747 420,731971263747
221.51006344925628 221,51006344925628
221.51006344925628 221,51006344925628
221.51006344925628 221,51006344925628
221.51006344925628 221,51006344925628
-175.62592434273353 -175,62592434273353
-304.06942565134466 -304,06942565134466
-315.9489746148703 -315,9489746148703
-75.9592549279437 -75,9592549279437
78.44941329799644 78,44941329799644
95.10195754033857 95,10195754033857
-112.65273543945385 -112,65273543945385
-217.46859608903083 -217,46859608903083
-300.1714883010177 -300,1714883010177
-688.2352614154643 -688,2352614154643
-763.0461224595025 -763,0461224595025
-829.3101057120128 -829,3101057120128
-841.358846036489 -841,358846036489
-875.6247932261535 -875,6247932261535
-908.4100120600999 -908,4100120600999
-946.5177016608928 -946,5177016608928
-1415.2859661688117 -1415,2859661688117
-1510.2380106530227 -1510,2380106530227
-1768.1917591146391 -1768,1917591146391
-1874.530240024228 -1874,530240024228
-1949.