In [None]:
# Data Cleaning a 4D Array with Masked Indexing¶
# You have a 4D array representing a batch of images, with shape (batch_size, height, width, channels). The data contains some noise and invalid values. Your goal is to clean it by clipping outliers and removing negative sensor errors.

# Your Task:

# Create a random 4D NumPy array named image_batch of shape (2, 10, 10, 3). Populate it with integers from -20 to 120.
# Create a boolean mask to identify all values that are "outliers" (less than 0 or greater than 100).
# Use the mask to clip the data: set all values greater than 100 to 100, and all values less than 0 to 0. (Hint: You can do this with two separate masks or one combined operation).
# Finally, create another mask to find all values that are exactly 0 and replace them with -1 to indicate they were corrected values.
# Print the array after clipping and after the final replacement.



import numpy as np

# Step 1: Create random 4D array (2 images, 10x10 pixels, 3 channels)
image_batch = np.random.randint(-20, 121, size=(2, 10, 10, 3))
print("Original image_batch:\n", image_batch)

# Step 2: Create boolean mask for outliers (<0 or >100)
outliers_mask = (image_batch < 0) | (image_batch > 100)

# Step 3: Clip outliers
# Set all >100 to 100
image_batch[image_batch > 100] = 100
# Set all <0 to 0
image_batch[image_batch < 0] = 0
print("\nAfter clipping:\n", image_batch)

# Step 4: Replace corrected values (0) with -1
corrected_mask = (image_batch == 0)
image_batch[corrected_mask] = -1
print("\nAfter replacing corrected values (0 -> -1):\n", image_batch)




# np.random.randint(-20, 121, size=(2, 10, 10, 3))
# → Generates values in range [-20, 120].

# Mask:

# (image_batch < 0) | (image_batch > 100)


# finds all invalid values.

# Clipping outliers:

# Values > 100 → 100

# Values < 0 → 0

# Mark corrected values:

# All 0s (from correction step) replaced with -1.

In [4]:
import numpy as np

# Random 2D array with values between -10 and 10
data = np.random.randint(-10, 11, size=(5, 5))
print("Original Data:\n", data)

# A mask is just a Boolean array (True/False) of the same shape, created by applying a condition.

# Example: Mask for all negative values

mask = data < 0
print("Mask (True = negative):\n", mask)


# Clean data using the mask
# Option A: Replace negatives with 0

cleaned = data.copy()
cleaned[mask] = 0
print("Cleaned Data (negatives set to 0):\n", cleaned)

# Option B: Extract only valid values (flattened)

positive_values = data[data >= 0]
print("Only positive values:", positive_values)



Original Data:
 [[  0  -9   6   5  -4]
 [-10  -3   4   4  -6]
 [  2  -1   3  -1   8]
 [ -1   8  10 -10   3]
 [ 10   3   9   4  -8]]
Mask (True = negative):
 [[False  True False False  True]
 [ True  True False False  True]
 [False  True False  True False]
 [ True False False  True False]
 [False False False False  True]]
Cleaned Data (negatives set to 0):
 [[ 0  0  6  5  0]
 [ 0  0  4  4  0]
 [ 2  0  3  0  8]
 [ 0  8 10  0  3]
 [10  3  9  4  0]]
Only positive values: [ 0  6  5  4  4  2  3  8  8 10  3 10  3  9  4]
