### Last updated: 2018-12-20

### Import libraries

In [None]:
import numpy as np
import os
import sys

### Store path to repo

In [None]:
path_to_repo = "path/to/repo"

### Read text file with 10,000 English (USA) words (no swears)

In [None]:
# I downloaded this file from https://github.com/first20hours/google-10000-english

text_file = open(os.path.join(path_to_repo, "data", "captchas", 
                              "google-10000-english-usa-no-swears-long.txt"), "r")

lines = text_file.read().split('\n')
lines = [e for e in lines if e]
lines[:10]

### Define function to generate captchas

In [None]:
def genCaptchas(lst, N, k):
    """
    Select N random words from lst, generate captchas for each
    by replacing letters in k randomly selected indices, 
    return list of lists with captcha as first element and 
    original word as second element in each sublist
    
    lst - list object
    N - int
    k - int
    """
    random_words = np.random.choice(lst, N, replace = False)
    
    len_words = [len(random_word) for random_word in random_words]
    
    random_ids = [np.random.choice(len_words[i], 
                                   k, 
                                   replace=False) for i in range(len(len_words))]
    
    captchas = [random_word for random_word in random_words]
    
    for i in range(len(len_words)):
        for j in range(len_words[i]):
            if j in random_ids[i]:
                captchas[i] = captchas[i][:j] + '_' + captchas[i][j+1:]
    return [np.asarray(captchas), random_words]

### Generate captchas and write them to text files

##### Select iteration number

In [None]:
iteration_no = "0-"

##### Captchas to be used in the demo run

In [None]:
lst = genCaptchas(lines, 100, 3)

sys.stdout = open(os.path.join(path_to_repo, "data", "captchas", "output", 
                               iteration_no + "captchas-100-3-demo.txt"), "w")

for i in range(len(lst[0])):
    print(lst[0][i])

sys.stdout = open(os.path.join(path_to_repo, "data", "captchas", "output",
                               iteration_no + "answers-100-3-demo.txt"), "w")

for i in range(len(lst[1])):
    print(lst[1][i])

##### Captchas to be used in the real game

In [None]:
lst = genCaptchas(lines, 100, 3)

sys.stdout = open(os.path.join(path_to_repo, "data", "captchas", "output", 
                               iteration_no + "captchas-100-3-real.txt"), "w")

for i in range(len(lst[0])):
    print(lst[0][i])

sys.stdout = open(os.path.join(path_to_repo, "data", "captchas", "output",
                               iteration_no + "answers-100-3-real.txt"), "w")

for i in range(len(lst[1])):
    print(lst[1][i])

##### Generate 80 separate sets of captchas

In [None]:
for i in range(1,81):
    
    # Iteration number
    iteration_no = str(i) + "-"
    
    # Captchas for demo run
    lst = genCaptchas(lines, 100, 3)
    sys.stdout = open(os.path.join(path_to_repo, "data", "captchas", "output", 
                                   iteration_no + "captchas-100-3-demo.txt"), "w")
    for i in range(len(lst[0])):
        print(lst[0][i])
    sys.stdout = open(os.path.join(path_to_repo, "data", "captchas", "output",
                                   iteration_no + "answers-100-3-demo.txt"), "w")
    for i in range(len(lst[1])):
        print(lst[1][i])
        
    # Captchas for real run
    lst = genCaptchas(lines, 100, 3)
    sys.stdout = open(os.path.join(path_to_repo, "data", "captchas", "output", 
                                   iteration_no + "captchas-100-3-real.txt"), "w")
    for i in range(len(lst[0])):
        print(lst[0][i])
    sys.stdout = open(os.path.join(path_to_repo, "data", "captchas", "output",
                                   iteration_no + "answers-100-3-real.txt"), "w")
    for i in range(len(lst[1])):
        print(lst[1][i])