In [2]:
#Install (Colab usually has these, but this ensures latest Plotly + Kaleido for export)
!pip install -q plotly kaleido


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/66.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.3/66.3 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/52.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.3/52.3 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
#Imports
import pandas as pd
import plotly.graph_objects as go
from IPython.display import HTML


In [4]:
# Build a simple illustrative income & tax dataset
# NOTE: These numbers are fictional and only for demonstration.
# We will visualize how Revenue -> Operating Profit -> Taxable Income -> Tax -> Net position flows.

# Income statement pieces (USD, in millions for readability)
revenue = 1000.0
cogs = 400.0
gross_profit = revenue - cogs                 # 1000 - 400 = 600
op_expenses = 200.0
operating_profit = gross_profit - op_expenses # 600 - 200 = 400
other_income = 50.0
profit_before_tax = operating_profit + other_income  # 400 + 50 = 450

# Tax adjustments
non_deductible_addbacks = 30.0    # items that increase taxable income
tax_depreciation = 50.0           # tax deduction that reduces taxable income

# Taxable income: PBT + addbacks - deductions
taxable_income = profit_before_tax + non_deductible_addbacks - tax_depreciation
# 450 + 30 - 50 = 430

statutory_tax_rate = 0.21         # 21% (change to local statutory rate as needed)
tax_liability = round(taxable_income * statutory_tax_rate, 2)  # 430 * 0.21 = 90.3

# Prepayments / withholding (example)
tax_prepaid = 100.0               # company already paid/withheld during the year

# Net tax position = prepaid - liability (positive = refund to company)
net_tax_refund = round(tax_prepaid - tax_liability, 2) # 100 - 90.3 = 9.7 (refund)

# Build a small DataFrame summary for clarity
summary = pd.DataFrame([
    ["Revenue", revenue],
    ["COGS", cogs],
    ["Gross profit", gross_profit],
    ["Operating expenses", op_expenses],
    ["Operating profit", operating_profit],
    ["Other (non-op) income", other_income],
    ["Profit before tax", profit_before_tax],
    ["Non-deductible addbacks", non_deductible_addbacks],
    ["Tax depreciation (deduction)", tax_depreciation],
    ["Taxable income", taxable_income],
    [f"Tax liability ({int(statutory_tax_rate*100)}%)", tax_liability],
    ["Tax prepaid / credits", tax_prepaid],
    ["Net tax (refund positive)", net_tax_refund],
], columns=["Line", "Value (USD, millions)"])

summary

Unnamed: 0,Line,"Value (USD, millions)"
0,Revenue,1000.0
1,COGS,400.0
2,Gross profit,600.0
3,Operating expenses,200.0
4,Operating profit,400.0
5,Other (non-op) income,50.0
6,Profit before tax,450.0
7,Non-deductible addbacks,30.0
8,Tax depreciation (deduction),50.0
9,Taxable income,430.0


In [5]:
#Prepare Sankey nodes & links
# We will show flows left -> right: Revenue -> Gross Profit -> Operating Profit -> Profit Before Tax -> Taxable Income -> Tax -> Net Tax position.
labels = [
    "Revenue",                    # 0
    "COGS",                       # 1
    "Gross profit",               # 2
    "Operating expenses",         # 3
    "Operating profit (EBIT)",    # 4
    "Other (non-op) income",      # 5
    "Profit before tax (PBT)",    # 6
    "Addbacks (non-deductible)",  # 7
    "Taxable income",             # 8
    "Tax depreciation (deduction)",# 9 (we'll show as outflow from taxable income)
    "Tax liability (at 21%)",     # 10
    "Tax prepaid / credits",      # 11
    "Net tax (Payable / Refund)"  # 12
]

# Links: source index, target index, value
# NOTE: we intentionally model tax adjustments as small side flows so the Sankey shows the arithmetic visually.
sources = [
    0,  # Revenue -> COGS
    0,  # Revenue -> Gross profit
    2,  # Gross profit -> Operating expenses
    2,  # Gross profit -> Operating profit
    4,  # Operating profit -> Profit before tax
    5,  # Other income -> Profit before tax
    6,  # Profit before tax -> Taxable income (initial PBT flow)
    6,  # Profit before tax -> Addbacks (we treat addbacks as items derived from PBT)
    7,  # Addbacks -> Taxable income (add)
    8,  # Taxable income -> Tax depreciation (deduction outflow)
    8,  # Taxable income -> Tax liability (the taxable base that is taxed)
    10, # Tax liability -> Net tax position
    11, # Tax prepaid -> Net tax position
]

targets = [
    1,  # COGS
    2,  # Gross profit
    3,  # Op. expenses
    4,  # Operating profit
    6,  # Profit before tax
    6,  # Profit before tax
    8,  # Taxable income
    7,  # Addbacks
    8,  # Taxable income
    9,  # Deduction node (outflow)
    10, # Tax liability
    12, # Net tax
    12, # Net tax
]

values = [
    cogs,                    # Revenue -> COGS (400)
    gross_profit,            # Revenue -> Gross profit (600)
    op_expenses,             # Gross profit -> OpEx (200)
    operating_profit,        # Gross profit -> Operating profit (400)
    operating_profit,        # Operating profit -> PBT (400)
    other_income,            # Other income -> PBT (50)
    profit_before_tax,       # PBT -> Taxable income (450)
    non_deductible_addbacks, # PBT -> Addbacks (30)
    non_deductible_addbacks, # Addbacks -> Taxable income (30)
    tax_depreciation,        # Taxable income -> Tax depreciation (50) (outflow)
    taxable_income,          # Taxable income -> Tax liability (430)
    tax_liability,           # Tax liability -> Net tax (90.3)
    tax_prepaid,             # Tax prepaid -> Net tax (100)
]

# Node colors (one per node) — pick friendly palette
node_colors = [
    "#4C78A8", "#A0CBE8", "#F58518", "#FFBE7D",
    "#54A24B", "#8CD17D", "#B279A2", "#E45756",
    "#72B7B2", "#9E9AC8", "#6A4A3C", "#C6B38E", "#F2F3F4"
]

link_colors = []
# (Optional) color links similar to their source node for clarity
for s in sources:
    link_colors.append(node_colors[s])

# Build Sankey
sankey = go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=labels,
        color=node_colors
    ),
    link=dict(
        source=sources,
        target=targets,
        value=values,
        color=link_colors,
        hovertemplate='%{source.label} → %{target.label}: %{value:.2f}M<extra></extra>'
    )
)

fig = go.Figure(sankey)
fig.update_layout(
    title_text="Demo: Corporate tax flow (illustrative) — Revenue → Profit → Taxable Income → Tax → Net",
    font_size=12,
    height=600,
    margin=dict(l=50, r=50, t=80, b=50)
)

fig.show()
