# Word Typo Augmentations
The function `augtxt.augmenters.wordtypo` applies randomly different augmentations to one word.
The result is a simulated distribution of possible word augmentations, e.g. how are possible typological errors distributed for a specific original word.
The procedure does **not guarantee** that the original word will be augmented.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('..')

In [3]:
from augtxt.augmenters import wordtypo
import augtxt.keyboard_layouts as kbl
import numpy as np
from collections import Counter

In [4]:
settings = [
    {
        'p': 0.04,
        'fn': 'typo.drop_n_next_twice',
        'args': {'loc': ['m', 'e'], 'keep_case': True}
    },
    {
        'p': 0.04,
        'fn': 'typo.swap_consecutive',
        'args': {'loc': ['m', 'e'], 'keep_case': True}
    },
    {
        'p': 0.02,
        'fn': 'typo.pressed_twice',
        'args': {'loc': 'u', 'keep_case': True}
    },
    {
        'p': 0.02,
        'fn': 'typo.drop_char',
        'args': {'loc': ['m', 'e'], 'keep_case': True}
    },
    {
        'p': 0.02,
        'fn': 'typo.pressed_shiftalt',
        'args': {'loc': ['b', 'm'], 'keymap': kbl.macbook_us, 'trans': kbl.keyboard_transprob}
    },
]

In [5]:
%%time
np.random.seed(seed=42)
word = "Blume"
newwords = []
for i in range(1000):
    newwords.append( wordtypo(word, settings) )

Counter(newwords)

CPU times: user 416 ms, sys: 29.4 ms, total: 446 ms
Wall time: 423 ms


Counter({'Blume': 858,
         'Blum': 6,
         'Bllume': 3,
         'Bmme': 1,
         'Blumee': 3,
         'Bblum': 1,
         'BlUme': 2,
         'Bluee': 18,
         'Bluem': 14,
         'Buume': 8,
         'Blmme': 13,
         'Blue': 6,
         'Blmue': 4,
         'BLume': 4,
         'blume': 8,
         'Blme': 3,
         'Bume': 5,
         'Bulme': 11,
         'B¬ume': 2,
         'Bmue': 2,
         'Blumme': 1,
         'Buue': 2,
         'Bluume': 5,
         'BlumE': 2,
         'Bl¨me': 3,
         'Llume': 3,
         'Buum': 1,
         'Lume': 3,
         'Blmmee': 1,
         'Blluee': 1,
         'Blum´': 1,
         'Lbume': 1,
         'Bblume': 3,
         'BUme': 1})