In [1]:
import pandas as pd
import numpy as np
from scipy import interpolate 
import math
import os
from pathlib import Path


In [2]:
pixels_affected = {10: 78, 15: 118, 20: 157, 25: 196, 30: 235, 35: 274, 40: 313, 45: 352, 50: 391, 55: 430, 60: 470, 65: 509, 70: 549}
radius = {k: int(round(math.sqrt(v / math.pi))) for k, v in pixels_affected.items()}
#this is num pixel affected by percent obfuscation 

In [3]:
#Spline-based region selection 
def choose_random_region(n):
    """
    Chooses a random contiguous region of 'n' pixels from a 28x28 array.
    """
    array_size = 28
    if n > array_size * array_size:
        raise ValueError("n cannot be larger than the total number of pixels in the array.")

    # Create a 28x28 array of zeros
    array = np.zeros((array_size, array_size), dtype=int)

    # Randomly choose a starting point
    start_x = np.random.randint(0, array_size)
    start_y = np.random.randint(0, array_size)

    # Use a queue to grow the region
    region = [(start_x, start_y)]
    array[start_x, start_y] = 1

    while len(region) < n:
        x, y = region[np.random.randint(0, len(region))]
        neighbors = [(x + dx, y + dy) for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]]
        neighbors = [(nx, ny) for nx, ny in neighbors if 0 <= nx < array_size and 0 <= ny < array_size and array[nx, ny] == 0]

        if neighbors:
            nx, ny = neighbors[np.random.randint(0, len(neighbors))]
            array[nx, ny] = 1
            region.append((nx, ny))

    return array

In [4]:
#function that creates a circle of radius r at a random location in a 28x28 array
def create_circle(radius):
    """
    Creates a circle of a given radius at a random location in a 28x28 array.
    """
    array_size = 28
    array = np.zeros((array_size, array_size), dtype=int)

    # Randomly choose a center point for the circle - must be able to fit the whole circle 
    center_x = np.random.randint(radius, array_size - radius)
    center_y = np.random.randint(radius, array_size - radius)

    for x in range(array_size):
        for y in range(array_size):
            if (x - center_x) ** 2 + (y - center_y) ** 2 <= radius ** 2:
                array[x, y] = 1

    return array


In [5]:
#function that creates a square, triangle, or pentagon (at random) with an area of n pixels at a random location in a 28x28 array
def create_polygon(n):
    """
    Creates a polygon of a given area at a random location in a 28x28 array.
    """
    array_size = 28
    array = np.zeros((array_size, array_size), dtype=int)

    # Randomly choose a starting point
    start_x = np.random.randint(0, array_size - math.ceil(math.sqrt(n)))
    start_y = np.random.randint(0, array_size- math.ceil(math.sqrt(n)))

    # Randomly choose a shape: square, triangle, or pentagon
    shape_type = np.random.choice(['square', 'triangle', 'pentagon'])

    if shape_type == 'square':
        side_length = int(np.sqrt(n))
        for x in range(start_x, min(start_x + side_length, array_size)):
            for y in range(start_y, min(start_y + side_length, array_size)):
                array[x, y] = 1

    elif shape_type == 'triangle':
        base_length = int(np.sqrt(n * 2))  # Approximate base length for triangle area
        for x in range(start_x, min(start_x + base_length, array_size)):
            for y in range(start_y, min(start_y + (base_length - abs(x - start_x)), array_size)):
                if y >= start_y and y <= start_y + (base_length - abs(x - start_x)):
                    array[x, y] = 1
                    
    return array

In [6]:
#now, we create the masks at different levels and put them in their own folders 
for i in range(10, 71, 5): 
    #make the folder if it doesn't exist
    dir_path = Path('./mask/')
    dir_path.mkdir(parents=True, exist_ok=True)
    # type of obfuscation array
    obfuscation_array = np.empty(70000, dtype=str)
    # Preallocate big array
    big_array = np.empty((70000, 28, 28), dtype=np.uint8)
    #choose a random obfuscation to perform for each mnist image - 60,000 train and 10,000 test
    for j in range(60000 + 10000):  # Total number of MNIST images
        obfuscation_type = np.random.choice(['circle', 'region', 'polygon'])
        if obfuscation_type == 'circle':
            mask = create_circle(radius[i])
        elif obfuscation_type == 'region':
            mask = choose_random_region(pixels_affected[i])
        elif obfuscation_type == 'polygon':
            mask = create_polygon(pixels_affected[i])
        #check to make sure the mask isnt empty
        while np.count_nonzero(mask == 0) == 784:
            obfuscation_type = np.random.choice(['circle', 'region', 'polygon'])
            if obfuscation_type == 'circle':
                mask = create_circle(radius[i])
            elif obfuscation_type == 'region':
                mask = choose_random_region(pixels_affected[i])
            elif obfuscation_type == 'polygon':
                mask = create_polygon(pixels_affected[i])
        # Save the mask to the big array
        obfuscation_array[j] = obfuscation_type
        big_array[j] = mask
    # Save the big array to a .npy file
    np.save('mask/'+ 'mask_' + str(i), big_array)
    np.save('mask/' + 'obfuscation_type_' + str(i), obfuscation_array)
    print(str(i) + "done")

        

10done
15done
20done
25done
30done
35done
40done
45done
50done
55done
60done
65done
70done


In [7]:
print(create_polygon(78))

[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 