# Example notebook for evaluating OCR results

In [1]:
import pandas as pd
import stringalign
from stringalign.evaluation import TranscriptionEvaluator

## Some example data
Make up some example data (not real)

In [2]:
example_strings = [
    {"reference": "abc", "predicted": "ABc"},
    {"reference": "hello", "predicted": "helo"},
    {"reference": "hello", "predicted": "helllo"},
    {"reference": "hooked on knitting, one stitch at a time", "predicted": "hooked on knitting, one stitch at a time"},
    {"reference": "A turtle named Bob", "predicted": "a turtle named bob"},
    {"reference": "- 'Pass me the cupcakes, please'", "predicted": "'Pass me the cupcakes, please'"},
    {
        "reference": "Continue knitting in stockinette st for four rounds",
        "predicted": "Continue nitting in stockinete st for four round",
    },
    {"reference": "Purls of wisdom: always count your rows.", "predicted": "PURLS OF WISDOM: ALWAYS COUNT YOUR ROWS"},
    {"reference": "Python: the snake that codes", "predicted": "Python: the snake that codes"},
    {"reference": "Embroidery: painting with thread", "predicted": "Embroidery: painting with needle and thread"},
    {
        "reference": "Cupcakes are muffins that believed in miracles",
        "predicted": "Cupcakes are muffins that believed in miracles!",
    },
    {"reference": "Knit one, purl two, repeat until fabulous", "predicted": "Knit 1, purl 2, repeat until fabulous"},
    {"reference": "Turtles: nature's living tanks", "predicted": "Turtles: natures living tanks"},
    {"reference": "Snakes: no arms, no legs, no problem", "predicted": "Snakes: no arms, no legs, no problem!"},
    {
        "reference": "Yarn: a cat's worst enemy and a knitter's best friend",
        "predicted": "Yarn: a cats worst enemy and a knitters best friend",
    },
    {"reference": "def knit_scarf(yarn, needles):", "predicted": "def knit_scarf(yarn, needles):"},
    {
        "reference": "Embroidery hoops: framing your creativity",
        "predicted": "Embroidery hoops: framing your creativity one stitch at a time",
    },
    {
        "reference": "Cupcake decorating: where sprinkles reign supreme",
        "predicted": "Cupcake decorating: where sprinkles reign supreme!",
    },
    {
        "reference": "Turtle power: slow and steady wins the race",
        "predicted": "Turtle power: slow and steady wins the race",
    },
    {"reference": "Python: indentation matters", "predicted": "Python: indentation maters"},
    {
        "reference": "Knitting needles: the magic wands of fiber artists",
        "predicted": "Knitting needles: the magic wands of fiber artists",
    },
    {
        "reference": "Snakes on a plane? No, snakes in my code!",
        "predicted": "Snakes on a plane? No, snakes in my code!",
    },
    {"reference": "Embroidery floss: tangled rainbows", "predicted": "Embroidery floss: tangled rainbows of joy"},
    {
        "reference": "Cupcakes: because sometimes you need cake NOW",
        "predicted": "Cupcakes: because sometimes you need cake NOW",
    },
    {"reference": "Turtles all the way down", "predicted": "turtles all the way down"},
    {
        "reference": "Python: where indentation is not just pretty, it's required",
        "predicted": "Python: where indentation is not just pretty, its required",
    },
    {"reference": "Knit happens", "predicted": "Knit happens!"},
    {
        "reference": "Embroidery: threading the needle of creativity",
        "predicted": "Embroidery: threading the needle of creativity",
    },
    {"reference": "Cupcake or muffin? That is the question", "predicted": "Cupcake or muffin? That is the question."},
    {"reference": "Turtles: masters of the slow and low", "predicted": "Turtles: masters of the slow and low"},
    {
        "reference": "Python: where whitespace is not just space",
        "predicted": "Python: where whitespace is not just space",
    },
    {
        "reference": "Knitting: creating fabric one loop at a time",
        "predicted": "Knitting: creating fabric one loop at a time",
    },
    {"reference": "Snakes: noodles with attitude", "predicted": "Snakes: noodles with atitude"},
    {"reference": "Embroidery: pixel art with thread", "predicted": "Embroidery: pixel art with thread"},
    {"reference": "Cupcakes: frosting delivery systems", "predicted": "Cupcakes: frosting delivery systems"},
    {"reference": "Turtles: nature's mobile homes", "predicted": "Turtles: natures mobile homes"},
    {
        "reference": "Python: where 'import this' is a zen moment",
        "predicted": "Python: where 'import this' is a zen moment",
    },
    {"reference": "Knit fast, die warm", "predicted": "Knit fast, die warm"},
    {
        "reference": "Embroidery: stabbing fabric repeatedly until it's pretty",
        "predicted": "Embroidery: stabbing fabric repeatedly until its pretty",
    },
    {
        "reference": "Cupcakes: because full-sized cakes are too committed",
        "predicted": "Cupcakes: because full-sized cakes are too commited",
    },
    {
        "reference": "Turtles: experts in social distancing since forever",
        "predicted": "Turtles: experts in social distancing since forever",
    },
    {"reference": "Python: where 'else' can follow 'for'", "predicted": "Python: where else can follow for"},
    {
        "reference": "Knitting: making winter bearable one scarf at a time",
        "predicted": "Knitting: making winter bearable one scarf at a time",
    },
    {"reference": "Snakes: danger noodles or nope ropes?", "predicted": "Snakes: danger noodles or nope ropes?"},
    {
        "reference": "Embroidery: where mistakes become 'design features'",
        "predicted": "Embroidery: where mistakes become design features",
    },
    {
        "reference": "Cupcakes: the original single-serving dessert",
        "predicted": "Cupcakes: the original single-serving desert",
    },
    {
        "reference": "Turtles: proving that slow and steady wins the race",
        "predicted": "Turtles: proving that slow and steady wins the race",
    },
    {
        "reference": "Python: where 'yield' is more than just giving up",
        "predicted": "Python: where yield is more than just giving up",
    },
    {"reference": "Knitting: cheaper than therapy", "predicted": "Knitting: cheaper than therapy"},
    {"reference": "Embroidery: painting with thread", "predicted": "Embroidery: painting with thread"},
    {
        "reference": "Cupcakes: because everyone deserves a little cake",
        "predicted": "Cupcakes: because everyone deserves a little cake",
    },
    {"reference": "Turtles: the original tank tops", "predicted": "Turtles: the original tank tops"},
    {"reference": "Python: where 'self' is not selfish", "predicted": "Python: where self is not selfish"},
    {
        "reference": "Knitting: creating warmth one stitch at a time",
        "predicted": "Knitting: creating warmth one stitch at a time",
    },
    {"reference": "Snakes: the original legless dancers", "predicted": "Snakes: the original legles dancers"},
    {
        "reference": "Embroidery: where every stitch tells a story",
        "predicted": "Embroidery: where every stitch tells a story",
    },
    {
        "reference": "Cupcakes: because sometimes you can have your cake and eat it too",
        "predicted": "Cupcakes: because sometimes you can have your cake and eat it too",
    },
    {"reference": "Turtles: nature's ultimate introverts", "predicted": "Turtles: natures ultimate introverts"},
    {"reference": "Python: where 'pass' is a valid statement", "predicted": "Python: where pass is a valid statement"},
    {"reference": "Knitting: turning string into things", "predicted": "Knitting: turning string into things"},
    {
        "reference": "Embroidery: where every mistake is just a new design opportunity",
        "predicted": "Embroidery: where every mistake is just a new design oportunity",
    },
    {
        "reference": "Cupcakes: because life is short, eat dessert first",
        "predicted": "Cupcakes: because life is short, eat dessert first",
    },
    {"reference": "Turtles: masters of the art of chilling", "predicted": "Turtles: masters of the art of chilling"},
    {
        "reference": "Python: where 'lambda' is more than just a Greek letter",
        "predicted": "Python: where lambda is more than just a Greek letter",
    },
    {"reference": "Knitting: because naked sheep are cold", "predicted": "Knitting: because naked sheep are cold"},
    {"reference": "Snakes: nature's slinkies", "predicted": "Snakes: natures slinkies"},
    {
        "reference": "Embroidery: adding color to life, one stitch at a time",
        "predicted": "Embroidery: adding color to life, one stitch at a time",
    },
    {
        "reference": "Cupcakes: spreading happiness, one bite at a time",
        "predicted": "Cupcakes: spreading hapiness, one bite at a time",
    },
    {
        "reference": "Turtles: where 'shell we go?' is always a valid question",
        "predicted": "Turtles: where shell we go? is always a valid question",
    },
    {
        "reference": "Python: where 'dict' is not short for dictator",
        "predicted": "Python: where dict is not short for dictator",
    },
    {"reference": "Knitting: creating warmth from thin air", "predicted": "Knitting: creating warmth from thin air"},
    {"reference": "Embroidery: where every thread counts", "predicted": "Embroidery: where every thread counts"},
    {
        "reference": "Cupcakes: because you can't be sad when you're holding a cupcake",
        "predicted": "Cupcakes: because you cant be sad when youre holding a cupcake",
    },
    {"reference": "Turtles: nature's ultimate survivors", "predicted": "Turtles: natures ultimate survivors"},
    {"reference": "Python: where 'None' is something", "predicted": "Python: where None is something"},
    {
        "reference": "Knitting: where dropping a stitch is not the end of the world",
        "predicted": "Knitting: where droping a stitch is not the end of the world",
    },
    {
        "reference": "Snakes: the original 'no limbs' challenge winners",
        "predicted": "Snakes: the original no limbs challenge winners",
    },
    {
        "reference": "Embroidery: where patience meets creativity",
        "predicted": "Embroidery: where patience meets creativity",
    },
    {
        "reference": "Cupcakes: because sometimes you need a little sweetness in life",
        "predicted": "Cupcakes: because sometimes you need a little sweetnes in life",
    },
    {
        "reference": "Turtles: proving that slow and steady wins the race since ancient times",
        "predicted": "Turtles: proving that slow and steady wins the race since ancient times",
    },
    {"reference": "Python: where 'True' is not always true", "predicted": "Python: where True is not always true"},
    {
        "reference": "Knitting: where 'frogging' doesn't involve amphibians",
        "predicted": "Knitting: where frogging doesnt involve amphibians",
    },
    {
        "reference": "Embroidery: where every stitch is a pixel",
        "predicted": "Embroidery: where every stitch is a pixel",
    },
    {
        "reference": "Cupcakes: because everyone deserves their own personal cake",
        "predicted": "Cupcakes: because everyone deserves their own personal cake",
    },
    {
        "reference": "Turtles: the original shell game champions",
        "predicted": "Turtles: the original shell game champions",
    },
    {
        "reference": "Python: where 'beautiful is better than ugly'",
        "predicted": "Python: where beautiful is beter than ugly",
    },
    {
        "reference": "Knitting: where 'casting on' doesn't involve fishing",
        "predicted": "Knitting: where casting on doesnt involve fishing",
    },
    {
        "reference": "Snakes: the original 'no hands' challenge winners",
        "predicted": "Snakes: the original no hands challenge winners",
    },
    {
        "reference": "Embroidery: where every knot is a full stop",
        "predicted": "Embroidery: where every knot is a full stop",
    },
    {
        "reference": "Cupcakes: because size doesn't matter when it comes to cake",
        "predicted": "Cupcakes: because size doesnt matter when it comes to cake",
    },
    {
        "reference": "Turtles: where 'shell shock' is just a lifestyle",
        "predicted": "Turtles: where shell shock is just a lifestyle",
    },
    {
        "reference": "Python: where 'explicit is better than implicit'",
        "predicted": "Python: where explicit is better than implicit",
    },
    {"reference": "Knitting: where 'tension' is a good thing", "predicted": "Knitting: where tension is a good thing"},
    {
        "reference": "Embroidery: where every backstitch tells a story",
        "predicted": "Embroidery: where every backstitch tells a story",
    },
    {
        "reference": "Cupcakes: because frosting makes everything better",
        "predicted": "Cupcakes: because frosting makes everything beter",
    },
    {
        "reference": "Turtles: where 'coming out of your shell' is optional",
        "predicted": "Turtles: where coming out of your shell is optional",
    },
    {
        "reference": "Python: where 'flat is better than nested'",
        "predicted": "Python: where flat is better than nested",
    },
    {
        "reference": "Knitting: where 'dropping a stitch' is not about exercising",
        "predicted": "Knitting: where dropping a stitch is not about exercising",
    },
    {
        "reference": "Snakes: where 'shedding' is a fashion statement",
        "predicted": "Snakes: where shedding is a fashion statement",
    },
    {"reference": "Embroidery: where 'knotty' is nice", "predicted": "Embroidery: where knotty is nice"},
    {
        "reference": "Cupcakes: because sometimes you need a little cake in your life",
        "predicted": "Cupcakes: because sometimes you need a little cake in your life",
    },
]

## Create evaluator and dataframe

In [3]:
evaluator = TranscriptionEvaluator.from_strings(
    [ex["reference"] for ex in example_strings],
    [ex["predicted"] for ex in example_strings],
)

In [4]:
df = pd.DataFrame(evaluator.dump())
df.head()

Unnamed: 0,reference,predicted,horisontal_segmentation_error,character_duplication_error,removed_duplicate_character_error,case_error
0,abc,ABc,True,False,False,True
1,hello,helo,False,False,True,False
2,hello,helllo,False,True,False,False
3,"hooked on knitting, one stitch at a time","hooked on knitting, one stitch at a time",False,False,False,False
4,A turtle named Bob,a turtle named bob,True,False,False,True


## Inspect the mistakes

### Look at most common mistakes

In [6]:
for k, v in evaluator.confusion_matrix.edit_counts.most_common(10):
    print(f"{k}: {v:2d}")

Insert(substring="'"): 62
Delete(substring=' '):  9
Insert(substring='t'):  6
Replace(substring='O', replacement='o'):  5
Delete(substring='e'):  5
Insert(substring='s'):  4
Replace(substring='S', replacement='s'):  4
Delete(substring='!'):  4
Delete(substring='t'):  4
Replace(substring='A', replacement='a'):  3


### Look at most commonly missed letters

In [7]:
for k, v in evaluator.confusion_matrix.false_negatives.most_common(10):
    print(f"{k}: {v:2d}")

': 62
t:  8
s:  8
o:  7
w:  4
a:  3
l:  3
u:  3
r:  3
p:  3


### Look at most halucinated letters

In [8]:
for k, v in evaluator.confusion_matrix.false_positives.most_common(10):
    print(f"{k}: {v:2d}")

 :  9
O:  5
e:  5
t:  5
a:  4
S:  4
!:  4
A:  3
U:  3
R:  3


### Look at most correctly found letters

In [9]:
for k, v in evaluator.confusion_matrix.true_positives.most_common(10):
    print(f"{k}: {v:2d}")

 : 569
e: 464
t: 319
i: 269
s: 259
n: 253
a: 232
r: 228
o: 195
h: 137


### Letter sensitivity

In [10]:
for k, v in evaluator.confusion_matrix.compute_sensitivity().items():
    print(f"{k}: {v:.1%}")

h: 100.0%
g: 100.0%
,: 100.0%
P: 100.0%
C: 100.0%
:: 100.0%
E: 100.0%
v: 100.0%
K: 100.0%
S: 100.0%
Y: 100.0%
_: 100.0%
(: 100.0%
): 100.0%
?: 100.0%
N: 100.0%
!: 100.0%
O: 100.0%
W: 100.0%
j: 100.0%
q: 100.0%
x: 100.0%
z: 100.0%
G: 100.0%
 : 99.8%
e: 99.8%
i: 99.6%
n: 99.2%
c: 99.0%
d: 98.9%
m: 98.8%
a: 98.7%
r: 98.7%
k: 98.2%
b: 98.2%
f: 97.9%
l: 97.7%
t: 97.6%
y: 97.5%
u: 97.1%
s: 97.0%
o: 96.5%
p: 95.7%
w: 94.4%
T: 94.4%
-: 66.7%
': 6.1%
A: 0.0%
B: 0.0%
.: 0.0%


### Letter precision

In [11]:
for k, v in evaluator.confusion_matrix.compute_precision().items():
    print(f"{k}: {v:.1%}")

k: 100.0%
g: 100.0%
,: 100.0%
u: 100.0%
r: 100.0%
': 100.0%
P: 100.0%
p: 100.0%
:: 100.0%
E: 100.0%
w: 100.0%
v: 100.0%
K: 100.0%
_: 100.0%
(: 100.0%
): 100.0%
?: 100.0%
q: 100.0%
x: 100.0%
z: 100.0%
-: 100.0%
G: 100.0%
s: 99.6%
h: 99.3%
i: 99.3%
c: 99.0%
e: 98.9%
n: 98.8%
m: 98.8%
y: 98.7%
o: 98.5%
t: 98.5%
 : 98.4%
l: 98.4%
a: 98.3%
b: 98.2%
d: 97.9%
f: 97.9%
C: 94.7%
T: 94.4%
j: 85.7%
N: 75.0%
S: 69.2%
Y: 33.3%
W: 25.0%
!: 20.0%
O: 16.7%


### Letter $F_1$ score

In [12]:
for k, v in evaluator.confusion_matrix.compute_f1_score().items():
    print(f"{k}: {v:.1%}")

,: 100.0%
P: 100.0%
x: 100.0%
v: 100.0%
?: 100.0%
g: 100.0%
E: 100.0%
): 100.0%
q: 100.0%
:: 100.0%
G: 100.0%
z: 100.0%
(: 100.0%
K: 100.0%
_: 100.0%
h: 99.6%
i: 99.4%
e: 99.4%
r: 99.3%
 : 99.1%
k: 99.1%
c: 99.0%
n: 99.0%
m: 98.8%
u: 98.5%
a: 98.5%
d: 98.4%
s: 98.3%
b: 98.2%
y: 98.1%
l: 98.1%
t: 98.0%
f: 97.9%
p: 97.8%
o: 97.5%
C: 97.3%
w: 97.1%
T: 94.4%
j: 92.3%
N: 85.7%
S: 81.8%
-: 80.0%
Y: 50.0%
W: 40.0%
!: 33.3%
O: 28.6%
': 11.4%
.: 0.0%
B: 0.0%
U: 0.0%
I: 0.0%
R: 0.0%
1: 0.0%
2: 0.0%
D: 0.0%
A: 0.0%
L: 0.0%
M: 0.0%
F: 0.0%


### Aggregated $F_1$ score

In [13]:
s = "abc"
f1 = evaluator.confusion_matrix.compute_f1_score(s)
print(f"F_1 score for the letters {s!r}, aggregated: {f1:.1%}")

F_1 score for the letters 'abc', aggregated: 98.6%


## Look at data subsets

### Only lines with potential segmentation errors

In [14]:
df.query("horisontal_segmentation_error")

Unnamed: 0,reference,predicted,horisontal_segmentation_error,character_duplication_error,removed_duplicate_character_error,case_error
0,abc,ABc,True,False,False,True
4,A turtle named Bob,a turtle named bob,True,False,False,True
5,"- 'Pass me the cupcakes, please'","'Pass me the cupcakes, please'",True,False,False,False
6,Continue knitting in stockinette st for four r...,Continue nitting in stockinete st for four round,True,False,True,False
7,Purls of wisdom: always count your rows.,PURLS OF WISDOM: ALWAYS COUNT YOUR ROWS,True,False,False,True
10,Cupcakes are muffins that believed in miracles,Cupcakes are muffins that believed in miracles!,True,False,False,False
13,"Snakes: no arms, no legs, no problem","Snakes: no arms, no legs, no problem!",True,False,False,False
16,Embroidery hoops: framing your creativity,Embroidery hoops: framing your creativity one ...,True,False,False,False
17,Cupcake decorating: where sprinkles reign supreme,Cupcake decorating: where sprinkles reign supr...,True,False,False,False
22,Embroidery floss: tangled rainbows,Embroidery floss: tangled rainbows of joy,True,False,False,False


### Only lines with potential case-errors

In [15]:
df.query("case_error")

Unnamed: 0,reference,predicted,horisontal_segmentation_error,character_duplication_error,removed_duplicate_character_error,case_error
0,abc,ABc,True,False,False,True
4,A turtle named Bob,a turtle named bob,True,False,False,True
7,Purls of wisdom: always count your rows.,PURLS OF WISDOM: ALWAYS COUNT YOUR ROWS,True,False,False,True
24,Turtles all the way down,turtles all the way down,True,False,False,True


### Only lines with removed duplicated characters

In [16]:
df.query("removed_duplicate_character_error")

Unnamed: 0,reference,predicted,horisontal_segmentation_error,character_duplication_error,removed_duplicate_character_error,case_error
1,hello,helo,False,False,True,False
6,Continue knitting in stockinette st for four r...,Continue nitting in stockinete st for four round,True,False,True,False
19,Python: indentation matters,Python: indentation maters,False,False,True,False
32,Snakes: noodles with attitude,Snakes: noodles with atitude,False,False,True,False
39,Cupcakes: because full-sized cakes are too com...,Cupcakes: because full-sized cakes are too com...,False,False,True,False
45,Cupcakes: the original single-serving dessert,Cupcakes: the original single-serving desert,False,False,True,False
54,Snakes: the original legless dancers,Snakes: the original legles dancers,False,False,True,False
60,Embroidery: where every mistake is just a new ...,Embroidery: where every mistake is just a new ...,False,False,True,False
67,"Cupcakes: spreading happiness, one bite at a time","Cupcakes: spreading hapiness, one bite at a time",False,False,True,False
75,Knitting: where dropping a stitch is not the e...,Knitting: where droping a stitch is not the en...,False,False,True,False


### Only lines with added duplicated characters

In [17]:
df.query("character_duplication_error")

Unnamed: 0,reference,predicted,horisontal_segmentation_error,character_duplication_error,removed_duplicate_character_error,case_error
2,hello,helllo,False,True,False,False
