In [2]:
!poetry add watermark

Using version ^2.3.1 for watermark

Updating dependencies
Resolving dependencies...

Writing lock file

Package operations: 13 installs, 7 updates, 0 removals

  • Updating certifi (2022.9.24 -> 2022.12.7)
  • Updating charset-normalizer (2.1.1 -> 3.1.0)
  • Updating urllib3 (1.26.12 -> 1.26.15)
  • Updating filelock (3.8.0 -> 3.10.7)
  • Installing mpmath (1.3.0)
  • Updating requests (2.28.1 -> 2.28.2)
  • Installing pyyaml (6.0)
  • Installing tqdm (4.65.0)
  • Installing future (0.18.3)
  • Installing huggingface-hub (0.13.3)
  • Installing regex (2023.3.23)
  • Installing networkx (3.1)
  • Installing sympy (1.11.1)
  • Installing tokenizers (0.13.3)
  • Installing ffmpeg-python (0.2.0)
  • Updating more-itertools (9.0.0 -> 9.1.0)
  • Installing transformers (4.27.4)
  • Installing torch (2.0.0)
  • Installing watermark (2.3.1)
  • Updating whisper (1.0 9f70a35 -> 1.0 b5851c6)


In [1]:
import pandas as pd
import numpy as np

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Wedge, Patch
from matplotlib.collections import PatchCollection
import matplotlib.font_manager as font_manager

import watermark

%load_ext watermark
%matplotlib inline

ModuleNotFoundError: No module named 'watermark'

In [None]:
%watermark -n -v -m -g -iv

## Load the dataset


In [None]:
data = pd.read_excel(
    "https://github.com/DataForScience/Viz4Sci/raw/master/data/Nightingale.xlsx"
)

In [None]:
data.describe()

In [None]:
data.info()

In [None]:
data.head()

Combine month and year into a single column



In [None]:
data["date"] = data[["Month", "Year"]].apply(
    lambda x: x["Month"] + " " + str(x["Year"]), axis=1
)

In [None]:
del data["Month"]

In [None]:
del data["Year"]

Pivot data into a table with each cause of death as a column. We're keeping only the mortality rate.

In [None]:
infectious = pd.pivot_table(
    data,
    index="date",
    columns="Cause of Death",
    values="Annual Mortality Rate (per 1000 soldiers)",
    aggfunc="sum",
)

In [None]:
infectious

Put the columns in the right order

In [None]:
infectious = infectious[
    [
        "Wounds and Injuries",
        "All Other Causes",
        "Infectious Disease",
    ]
]

Normalize values

In [None]:
infectious = (infectious**2).cumsum(axis=1)
infectious /= infectious.max().max()
infectious = np.sqrt(infectious)

Subset and order the rows

In [None]:
order = [
    "April 1854",
    "May 1854",
    "June 1854",
    "July 1854",
    "August 1854",
    "September 1854",
    "October 1854",
    "November 1854",
    "December 1854",
    "January 1855",
    "February 1855",
    "March 1855",
]

infectious = infectious.loc[order].reset_index()
infectious = infectious[::-1]

Color list

In [None]:
colors = [
    "#51a7f9",
    "black",
    "#f9517b",
]

Generate the figure

In [None]:
fig, ax = plt.subplots(1, figsize=(10, 10))
ax.set_aspect(1.0)

patches = []
color = []
total = 90  # First wedge starts off at 90'
angle = 30  # Each wedge covers 30'

for i in range(infectious.shape[0])[::1]:
    for j, col in enumerate(
        [
            "Infectious Disease",
            "All Other Causes",
            "Wounds and Injuries",
        ]
    ):
        value = infectious[col].iloc[i]
        patches.append(Wedge((0, 0), np.sqrt(value), total, total + angle))
        color.append(colors[j])

    length = np.max([np.sqrt(infectious.iloc[i, 1:4].max()) + 0.02, 0.4])
    x = length * np.cos((total + angle / 2) * np.pi / 180)
    y = length * np.sin((total + angle / 2) * np.pi / 180)

    label = infectious["date"].iloc[i]

    if label[:3] not in ["Mar", "Jan", "Apr"]:
        label = label.split()[0]
    elif label[:3] == "Apr":
        label = "\n".join(label.split())

    ax.text(
        x,
        y,
        label.upper(),
        rotation=(total + angle / 2 - 90),
        ha="center",
        va="center",
        fontsize=12,
    )

    total += angle

p = PatchCollection(patches, color=color, alpha=0.5)
ax.add_collection(p)
ax.set_xlim(-1.2, 1.2)
ax.set_ylim(-1.2, 1.2)

patches = []
legend = [
    "deaths from preventable diseases",
    "deaths from wounds",
    "deaths from all other sources",
]

# you'll need to change the font location
font = font_manager.FontProperties(
    fname="Lucida Calligraphy Italic.ttf", weight="normal", style="italic", size=16
)

for i, leg in enumerate(legend):
    patches.append(Patch(color=colors[i], label=leg, alpha=0.5))

ax.legend(handles=patches, loc=(0.5, 0.7), fancybox=False, frameon=False, prop=font)

ax.axis("off")
fig.tight_layout()