In [108]:
%load_ext autoreload
%autoreload 2

%config InteractiveShell.ast_node_interactivity='all'
%config InteractiveShell.ast_node_interactivity='all'

import sys
sys.path.insert(1, '../')


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Identify unique accounts in list
This script will determine the complete list of unique account names from the lists provided in each Excel sheet.

Assumptions:
* There are no duplicate account names
* The accounts are in the correct order, with no instances where accounts are displayed in a different order.

Potential issues
There can be instances where the order determined is not correct, for example, if  the following lists are provided:
* 1:	A	B	D	G		
* 2:	A	B	C	E	F	
* 3:	A	B	C	E	F	G
Since the first list is taken as the base and D doesnt apprear in any other list, D will come after B, but really it should be C then D.
This is a limitation of the information available.

List update
If you already have the updated list and you want to run a new list into it, it is best to start from scratch again. This is due to the above mentioned issue and the potential for hte order to be incorrect in the list determined. (A new function could be developed with the )


# Setup
Provide the file to use.

Conditions for the file:
* one sheet per account listing
* the account should be listed in the first column
* there needs to be now empty rows between any accounts or the heading

In [57]:
file_name = "account_lists_single_tst.xlsx"

# Run Complete Script
Runs all checks and produces all outputs in one go.

In [102]:
from account_list_generator import main

main(file_name = file_name)

2024-02-03 16:16:54,422 - INFO - Running yaml logger
2024-02-03 16:16:54,424 - DEBUG - 


Running: Main
2024-02-03 16:16:54,425 - DEBUG - 


Running: assert_file_extension
2024-02-03 16:16:54,428 - DEBUG - 


Running: read_sheets_as_df
2024-02-03 16:16:54,430 - DEBUG - 


Running: basic_checks_acc_gen
2024-02-03 16:16:54,431 - DEBUG - All checks have passes.
2024-02-03 16:16:54,431 - DEBUG - 


Running: get_list_of_unique_accounts
2024-02-03 16:16:54,432 - DEBUG - account_lists_dict:{'Sheet1': ['A', 'B', 'D', 'G'], 'Sheet2': ['A', 'B', 'C', 'E', 'F'], 'Sheet3': ['A', 'B', 'C', 'E', 'F', 'G']}
2024-02-03 16:16:54,432 - DEBUG - list of all unique accounts:['A', 'B', 'F', 'D', 'G', 'E', 'C']
2024-02-03 16:16:54,432 - DEBUG - 


Running: generate_check_account_order_dict
2024-02-03 16:16:54,433 - DEBUG - Current account listing:Sheet1
2024-02-03 16:16:54,433 - DEBUG - Current account listing:Sheet2
2024-02-03 16:16:54,433 - DEBUG - Current account listing:Sheet3
2024-02-03 16:16:54,434 - D

['A', 'B', 'C', 'E', 'D', 'F', 'G']

# Run Script step by step
This should follow all required steps from the model, but allows you to execute it step by step.

In [105]:
import pandas as pd
import logging
import logging.config
from datetime import datetime
from openpyxl import Workbook

# from account_list_generator import check_file_extension
from account_list_generator import setup_logging
from account_list_generator import assert_file_extension
from account_list_generator import read_sheets_as_df
from account_list_generator import basic_checks_acc_gen
from account_list_generator import get_list_of_unique_accounts
from account_list_generator import generate_check_account_order_dict
from account_list_generator import complete_account_order_checker
from account_list_generator import correctly_ordered_list
from account_list_generator import document_account_lists

In [89]:
# Set up logging using the configuration file
setup_logging()

# Create a logger variable
logger = logging.getLogger(__name__)
logger.info('Logging initiated.')

Running yaml logger
2024-02-03 15:30:47,499 - INFO - Logging initiated.


In [90]:
# Check file extension is correct
assert_file_extension(file_name, expected_extension='.xlsx')

2024-02-03 15:30:48,625 - DEBUG - Running: assert_file_extension


True

In [91]:
# Read the Excel file
xls = pd.ExcelFile(file_name)

In [92]:
# Read in sheets as dfs
account_list_dfs_dict = read_sheets_as_df(xls)
account_list_dfs_dict

2024-02-03 15:30:50,746 - DEBUG - Running: read_sheets_as_df


{'Sheet1':   list_1
 0      A
 1      B
 2      D
 3      G,
 'Sheet2':   list_2
 0      A
 1      B
 2      C
 3      E
 4      F,
 'Sheet3':   list_3
 0      A
 1      B
 2      C
 3      E
 4      F
 5      G}

In [94]:
basic_checks_acc_gen(account_list_dfs_dict=account_list_dfs_dict)

Running yaml logger
2024-02-03 15:31:59,161 - DEBUG - Running: basic_checks_acc_gen
2024-02-03 15:31:59,162 - DEBUG - All checks have passes.


True

In [97]:
# Get a list of unique accounts from all lists
account_lists_dict, unique_accounts_list=get_list_of_unique_accounts(account_list_dfs_dict)

Running yaml logger
2024-02-03 15:39:32,159 - DEBUG - 
Running: get_list_of_unique_accounts
2024-02-03 15:39:32,159 - DEBUG - account_lists_dict:{'Sheet1': ['A', 'B', 'D', 'G'], 'Sheet2': ['A', 'B', 'C', 'E', 'F'], 'Sheet3': ['A', 'B', 'C', 'E', 'F', 'G']}
2024-02-03 15:39:32,160 - DEBUG - list of all unique accounts:['A', 'B', 'F', 'D', 'G', 'E', 'C']


In [98]:
check_account_order_dict=generate_check_account_order_dict(account_lists_dict, unique_accounts_list)

2024-02-03 15:39:34,083 - DEBUG - 
Running: generate_check_account_order_dict
2024-02-03 15:39:34,083 - DEBUG - Current account listing:Sheet1
2024-02-03 15:39:34,084 - DEBUG - Current account listing:Sheet2
2024-02-03 15:39:34,085 - DEBUG - Current account listing:Sheet3
2024-02-03 15:39:34,085 - DEBUG - check_account_order_dict:{'A': {'before': [], 'after': ['B', 'F', 'D', 'G', 'E', 'C']}, 'B': {'before': ['A'], 'after': ['F', 'D', 'G', 'E', 'C']}, 'F': {'before': ['B', 'E', 'A', 'C'], 'after': ['G']}, 'D': {'before': ['B', 'A'], 'after': ['G']}, 'G': {'before': ['A', 'B', 'F', 'D', 'E', 'C'], 'after': []}, 'E': {'before': ['B', 'A', 'C'], 'after': ['G', 'F']}, 'C': {'before': ['B', 'A'], 'after': ['G', 'E', 'F']}}


In [31]:
# Check all account lists are in the expected order
assert complete_account_order_checker(
        check_account_order_dict,
        account_lists_dict,
), 'Some accounts are not in the correctr order, please review'

# At this point you have a list of acount lists
# These account lists are all in the correct order

In [32]:
ordered_account_list = correctly_ordered_list(check_account_order_dict)
ordered_account_list

['A', 'B', 'C', 'E', 'D', 'F', 'G']

In [111]:
# Generate Excel with details on accounts
document_account_lists(ordered_account_list,account_lists_dict,)

2024-02-03 16:24:27,242 - DEBUG - 


Running: document_account_lists
2024-02-03 16:24:27,247 - INFO - Excel saved as: Ordered_Accounts_2024_02_03_at_04_24PM_.xlsx


In [55]:
main(file_name = file_name)

2024-02-03 14:22:02,507 - INFO - Main file running


['A', 'B', 'C', 'E', 'D', 'F', 'G']