In [2]:
# Download the CIFAR-10 dataset
!wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
# Extract the dataset
!tar -xzf cifar-10-python.tar.gz

--2026-01-09 01:41:29--  https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Resolving www.cs.toronto.edu (www.cs.toronto.edu)... 128.100.3.30
Connecting to www.cs.toronto.edu (www.cs.toronto.edu)|128.100.3.30|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 170498071 (163M) [application/x-gzip]
Saving to: ‘cifar-10-python.tar.gz’


2026-01-09 01:41:31 (85.4 MB/s) - ‘cifar-10-python.tar.gz’ saved [170498071/170498071]



In [5]:
import numpy as np
import pickle
import os

def loading_cifar_batch(filename):
  with open(filename, 'rb') as f:
    data = pickle.load(f, encoding='bytes')
    X= data[b'data'].astype(np.float32)
    y= data[b'labels']
    return X, y

def load_cifar(root):
  Xs, ys = [], []
  for i in range(1, 6):
    X, y = loading_cifar_batch(os.path.join(root, f'data_batch_{i}'))
    Xs.append(X)
    ys.append(y)
  X_train = np.concatenate(Xs)
  y_train = np.concatenate(ys)

  X_test, y_test = loading_cifar_batch(os.path.join(root, 'test_batch'))
  # Convert y_test to a numpy array, as loading_cifar_batch returns a list for labels
  y_test = np.array(y_test)
  return X_train, y_train, X_test, y_test


X_train, y_train, X_test, y_test = load_cifar('cifar-10-batches-py')
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(50000, 3072)
(50000,)
(10000, 3072)
(10000,)


In [25]:
#normalising the data
X_mean = np.mean(X_train, axis=0)
X_train -= X_mean
X_test -= X_mean
print(X_train.shape)

#now the imagewill not be botheered by light brithness and all those things



(4000, 3072)


In [28]:
num_val_samples = 1000    # Number of samples to use for the validation set
num_train_samples = X_train.shape[0] - num_val_samples # Adjust num_train_samples to ensure validation set is not empty

# Ensure X_train and y_train are the full datasets loaded from cifar before slicing
# (Assuming X_train and y_train refer to the full loaded data here)

# Create the new training sets
X_train_new = X_train[:num_train_samples]
y_train_new = y_train[:num_train_samples]

# Create the validation sets from the original data, immediately following the training samples
X_val_new = X_train[num_train_samples : num_train_samples + num_val_samples]
y_val_new = y_train[num_train_samples : num_train_samples + num_val_samples]

# Reassign to the original variable names
X_train = X_train_new
y_train = y_train_new
X_val = X_val_new
y_val = y_val_new


print(X_train.shape)
print(y_train.shape)

print(X_val.shape)
print(y_val.shape)

(3000, 3072)
(3000,)
(1000, 3072)
(1000,)


In [27]:
class linearClassifier:
  def __init__(self):
    self.w = None


  def train(self, X, y, lr= 1e-7,reg=1e-5,num_iters=100,batch_size=200,verbose=False):
    num_train, dim = X.shape
    num_classes = np.max(y) + 1

    if self.w is None:
      self.w = 0.001 * np.random.randn(dim, num_classes)

    loss_history = []

    for i in range(num_iters):
      batch_index = np.random.choice(num_train, batch_size)
      X_batch = X[batch_index]
      y_batch = y[batch_index]

      loss, grad = self.loss(X_batch, y_batch, reg)
      loss_history.append(loss)

      self.w -= lr * grad

      if i % 10==0:
        print(f"iter {i}: loss {loss}")

    return loss_history


  def predict(self, X):
    scores = X.dot(self.w)
    return np.argmax(scores, axis=1)

class linearSVM(linearClassifier):
  def loss(self, X_batch, y_batch, reg):
    num_train = X_batch.shape[0]
    scores = X_batch.dot(self.w)
    # Subtract the maximum score for numerical stability
    scores -= np.max(scores, axis=1, keepdims=True)

    exp_scores = np.exp(scores)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    # Calculate the cross-entropy loss
    correct_log_probs = -np.log(probs[np.arange(num_train), y_batch])
    data_loss = np.sum(correct_log_probs) / num_train

    # Add regularization
    reg_loss = 0.5 * reg * np.sum(self.w * self.w)
    loss = data_loss + reg_loss

    # Compute gradient
    dscores = probs
    dscores[np.arange(num_train), y_batch] -= 1
    dscores /= num_train

    dW = X_batch.T.dot(dscores)
    # Add regularization gradient
    dW += reg * self.w

    return loss, dW

In [23]:
model = linearSVM()
loss_history = model.train(X_train, y_train, lr=1e-7, reg=5e4, num_iters=1500, batch_size=200, verbose=True)

iter 0: loss 775.0970989241519
iter 10: loss 701.0219135503994
iter 20: loss 634.1342211255068
iter 30: loss 573.407182493447
iter 40: loss 518.5520741289662
iter 50: loss 469.2123482186666
iter 60: loss 424.31931410960476
iter 70: loss 383.8343595746399
iter 80: loss 347.4248896428685
iter 90: loss 314.5128688490752
iter 100: loss 284.63106131385024
iter 110: loss 257.5595876163201
iter 120: loss 233.1795352312717
iter 130: loss 210.96883351703585
iter 140: loss 190.84680380983087
iter 150: loss 172.96393895186696
iter 160: loss 156.61242688760407
iter 170: loss 141.66264320252787
iter 180: loss 128.4450359287655
iter 190: loss 116.34794899555077
iter 200: loss 105.35167424948031
iter 210: loss 95.43021016603355
iter 220: loss 86.61808654343875
iter 230: loss 78.53848127293315
iter 240: loss 71.14938076836309
iter 250: loss 64.6113516794001
iter 260: loss 58.68777288926101
iter 270: loss 53.14943638904039
iter 280: loss 48.359656580811176
iter 290: loss 43.86459092022828
iter 300: los

In [29]:
y_val_pred = model.predict(X_val)
print(f"Validation accuracy: {np.mean(y_val == y_val_pred)}")


Validation accuracy: 0.352


In [34]:
class softmaxClassifier(linearClassifier):
  def loss(self, X, y, reg):
    loss = 0.0
    dw = np.zeros_like(self.w).astype(np.float32)

    num_train = X.shape[0]
    num_classes = self.w.shape[1]

    for i in range(num_train):
      scores = X[i].dot(self.w)
      scores -= np.max(scores)

      exp_scores = np.exp(scores)
      probability = exp_scores/np.sum(exp_scores)

      loss += -np.log(probability[y[i]])

      for j in range(num_classes):
        if j == y[i]:
          dw[:, j] += (probability[j] - 1) * X[i]
        else:
          dw[:, j] += probability[j] * X[i]

    # These lines were inside the loop and should be outside to average over the entire batch
    # and apply regularization correctly once per batch.
    loss /= num_train
    dw /= num_train

    loss += reg * np.sum(self.w * self.w)
    dw += 2 * reg * self.w

    return loss, dw

In [35]:
softmax = softmaxClassifier()
loss_history = softmax.train(X_train, y_train, lr=1e-7, reg=5e4, num_iters=1500, batch_size=200, verbose=True)


iter 0: loss 1575.6257915357094
iter 10: loss 1287.9212252918637
iter 20: loss 1053.3753391028335
iter 30: loss 861.3694242165944
iter 40: loss 704.448719676872
iter 50: loss 576.3508056424245
iter 60: loss 471.66359149767885
iter 70: loss 385.82651621519807
iter 80: loss 315.99015231622604
iter 90: loss 258.73830442328307
iter 100: loss 211.9441983748102
iter 110: loss 173.63978775053457
iter 120: loss 142.40786856751657
iter 130: loss 116.82068294503901
iter 140: loss 95.93801789853057
iter 150: loss 78.85947928358686
iter 160: loss 64.81622401628792
iter 170: loss 53.387334841138696
iter 180: loss 43.95321489019472
iter 190: loss 36.38914532865286
iter 200: loss 30.14376292874821
iter 210: loss 25.001520265581785
iter 220: loss 20.90078260446838
iter 230: loss 17.44998342052756
iter 240: loss 14.596508470401814
iter 250: loss 12.358814951994525
iter 260: loss 10.466382538935859
iter 270: loss 9.004395805924158
iter 280: loss 7.730209356110397
iter 290: loss 6.688411257264583
iter 30

In [37]:
y_val_pred = softmax.predict(X_val)
answer = np.mean(y_val == y_val_pred)
percentage = answer * 100
print(f"Validation accuracy: {percentage} %")

Validation accuracy: 30.2 %
