In [1]:
!pip install openpyxl fuzzywuzzy python-Levenshtein

Defaulting to user installation because normal site-packages is not writeable
Collecting python-Levenshtein
  Downloading python_levenshtein-0.27.1-py3-none-any.whl.metadata (3.7 kB)
Collecting Levenshtein==0.27.1 (from python-Levenshtein)
  Downloading levenshtein-0.27.1-cp313-cp313-win_amd64.whl.metadata (3.6 kB)
Downloading python_levenshtein-0.27.1-py3-none-any.whl (9.4 kB)
Downloading levenshtein-0.27.1-cp313-cp313-win_amd64.whl (100 kB)
Installing collected packages: Levenshtein, python-Levenshtein
Successfully installed Levenshtein-0.27.1 python-Levenshtein-0.27.1



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:

import pandas as pd
from datetime import datetime
from fuzzywuzzy import fuzz, process
import matplotlib.pyplot as plt
import seaborn as sns
import io
import ipywidgets as widgets
from IPython.display import display

# 📊 Setup
sns.set_theme(style="whitegrid", palette="viridis")
plt.rcParams['figure.figsize'] = (10, 6)

# 🛠 Helpers
def safe_float(x):
    try:
        return float(str(x).replace(',', '').strip())
    except:
        return None

def parse_date(date_str):
    formats = ['%Y-%m-%d', '%d/%m/%Y', '%d.%m.%Y', '%Y/%m/%d']
    for fmt in formats:
        try:
            return datetime.strptime(date_str.strip(), fmt)
        except:
            continue
    return None

def normalize_bank_key(name):
    return name.strip().lower().replace('.csv', '').replace(' ', '_')


In [3]:

upload_adj = widgets.FileUpload(description="📥 Upload Local Currency Adjustment CSV", accept='.csv')
upload_banks = widgets.FileUpload(description="🏦 Upload Bank Records (CSV)", multiple=True, accept='.csv')
display(upload_adj, upload_banks)


FileUpload(value=(), accept='.csv', description='📥 Upload Local Currency Adjustment CSV')

FileUpload(value=(), accept='.csv', description='🏦 Upload Bank Records (CSV)', multiple=True)

In [4]:

bank_raw_files = {}
bank_dfs = {}

if upload_adj.value:
    adj_key, adj_data = list(upload_adj.value.items())[0]
    adjustment_df = pd.read_csv(io.BytesIO(adj_data['content']))
    print(f"✅ Loaded Adjustment file: {adj_key} ({adjustment_df.shape})")

if upload_banks.value:
    for fname, meta in upload_banks.value.items():
        df = pd.read_csv(io.BytesIO(meta['content']))
        key = normalize_bank_key(fname)
        bank_raw_files[key] = fname
        bank_dfs[key] = df
        print(f"🏦 Loaded: {fname} as {key} ({df.shape})")


In [5]:

# Step 1: Prepare bank names for matching
bank_keys = list(bank_dfs.keys())

# Step 2: Match function
def find_best_bank_match(name):
    match, score = process.extractOne(name, bank_keys, scorer=fuzz.token_sort_ratio)
    return match if score >= 85 else None


In [6]:

matched = []
unmatched = []

for idx, row in adjustment_df.iterrows():
    status = str(row.get('Status', '')).lower().strip()
    if status != 'successful':
        continue

    intermed = str(row.get('Intermediary Account', '')).strip().lower()
    date_raw = str(row.get('Completed At', '')).strip()
    amount = safe_float(row.get('Amount'))
    operation = str(row.get('Operation', '')).lower()

    if operation not in ['credit', 'debit'] or not amount or not date_raw:
        continue

    parsed_date = parse_date(date_raw)
    if not parsed_date:
        continue

    bank_key = find_best_bank_match(intermed)
    if not bank_key or bank_key not in bank_dfs:
        unmatched.append({**row, "Reason": "Bank match not found"})
        continue

    bank_df = bank_dfs[bank_key]
    date_cols = [c for c in bank_df.columns if 'date' in c.lower()]
    amt_cols = [c for c in bank_df.columns if operation in c.lower()]

    found = False
    for dc in date_cols:
        for ac in amt_cols:
            try:
                bank_df[dc] = bank_df[dc].astype(str)
                date_match = bank_df[dc].apply(lambda x: parsed_date.strftime('%d/%m/%Y') in x or parsed_date.strftime('%d.%m.%Y') in x)
                amt_match = bank_df[ac].apply(lambda x: abs(safe_float(x) - amount) < 1)
                match_df = bank_df[date_match & amt_match]
                if not match_df.empty:
                    matched.append({**row, "Matched Table": bank_key, "Matched Column": ac})
                    found = True
                    break
            except:
                continue
        if found:
            break
    if not found:
        unmatched.append({**row, "Reason": "No match found in bank table"})


NameError: name 'adjustment_df' is not defined

In [None]:

matched_df = pd.DataFrame(matched)
unmatched_df = pd.DataFrame(unmatched)

print("✅ Total Matched:", len(matched_df))
print("❌ Total Unmatched:", len(unmatched_df))

sns.barplot(data=pd.DataFrame({
    'Status': ['Matched', 'Unmatched'],
    'Count': [len(matched_df), len(unmatched_df)]
}), x='Status', y='Count')
plt.title("Local Currency Adjustment Matching Summary")
plt.show()
