In [1]:
import numpy as np

arr = np.arange(1000)
print(arr.shape)  # Output: (1000,)

(1000,)


In [2]:
reshaped = arr.reshape(200, 5)
print(reshaped.shape)  # Output: (200, 5)

(200, 5)


In [3]:
reshaped_to_4x1 = arr[:4].reshape(4, 1)
print(reshaped_to_4x1.shape)  # Output: (4, 1)

(4, 1)


In [5]:
# Step 2: Replace the last column (column index 4) with random 0s and 1s
reshaped[:, 4] = np.random.randint(0, 2, size=200)

# Step 3: Optional - Print and verify
print(reshaped[:5])  # print first 5 rows to check

[[ 0  1  2  3  1]
 [ 5  6  7  8  1]
 [10 11 12 13  1]
 [15 16 17 18  0]
 [20 21 22 23  1]]


In [7]:
# Step 3: Split into X (features) and y (target)
X = reshaped[:, :4]  # all rows, columns 0 to 3
y = reshaped[:, 4]   # all rows, column 4

# Step 4: Print to verify
print("X shape:", X.shape)
print("y shape:", y.shape)
print("X sample:\n", X[:5])
print("y sample:\n", y[:5])

X shape: (200, 4)
y shape: (200,)
X sample:
 [[ 0  1  2  3]
 [ 5  6  7  8]
 [10 11 12 13]
 [15 16 17 18]
 [20 21 22 23]]
y sample:
 [1 1 1 0 1]


In [8]:
# Step 3: Shuffle indices
np.random.seed(42)  # for reproducibility
indices = np.random.permutation(len(X))

# Step 4: Compute split point
split_idx = int(0.8 * len(X))  # 80% for training

# Step 5: Create train/test splits
train_idx, test_idx = indices[:split_idx], indices[split_idx:]

train_X, test_X = X[train_idx], X[test_idx]
train_y, test_y = y[train_idx], y[test_idx]

# Step 6: Verify shapes
print("Train X shape:", train_X.shape)
print("Train y shape:", train_y.shape)
print("Test X shape:", test_X.shape)
print("Test y shape:", test_y.shape)

Train X shape: (160, 4)
Train y shape: (160,)
Test X shape: (40, 4)
Test y shape: (40,)


In [9]:
from collections import Counter

def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2, axis=1))

def knn_predict(train_X, train_y, test_X, k=3):
    predictions = []
    for test_point in test_X:
        # Step 1: Compute distances to all training points
        distances = euclidean_distance(train_X, test_point)

        # Step 2: Get indices of k nearest neighbors
        nn_indices = np.argsort(distances)[:k]

        # Step 3: Get the labels of nearest neighbors
        nn_labels = train_y[nn_indices]

        # Step 4: Majority vote
        most_common = Counter(nn_labels).most_common(1)[0][0]
        predictions.append(most_common)

    return np.array(predictions)

In [27]:
# Make predictions on test set
k = 10 # choose number of neighbors
predictions = knn_predict(train_X, train_y, test_X, k=k)

# Calculate accuracy
accuracy = np.mean(predictions == test_y)
print(f"KNN (k={k}) Accuracy: {accuracy:.2f}")

KNN (k=10) Accuracy: 0.60
