In [None]:
import numpy as np
import os
import random

from PIL import Image

In [None]:
import torch 
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn as nn
import time

from tqdm import tqdm

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [None]:
torch.manual_seed(0)

# Importing Dataset

In [None]:
def extractDataFromDir(dir_path):
  """
  Given a directory path, extracts all image paths for different classes 
  inside the given directory.
  """

  class_names = os.listdir(dir_path)
  print(f"Class names in the dataset = {class_names}")

  image_paths=[] # paths of images
  image_classes=[] # labels of images

  # generator for image paths from a class
  def img_list(path):
    return (os.path.join(path, f) for f in os.listdir(path))

  for i in range(len(class_names)):
      training_name = class_names[i]
      class_dir = os.path.join(dir_path, training_name)
      class_path = list(img_list(class_dir))
      image_paths += class_path
      image_classes += [i] * (len(class_path))

  print(f"Length of dataset = {len(image_paths)}")

  dataset = []
  for i in range(len(image_paths)):
      dataset.append((image_paths[i], image_classes[i]))

  return dataset

In [None]:
def shuffle_dataset(dataset, seed=42):
  """
  Shuffles dataset and separates the label and image paths
  """
  random.seed(seed)
  random.shuffle(dataset)

  images_path, images_class = zip(*dataset)
  return (images_path, images_class)

In [None]:
path = "../input/bikevshorses/Assignment2_BikeHorses"
dataset = extractDataFromDir(path)

In [None]:
images_path, images_label = shuffle_dataset(dataset)

In [None]:
train_images_path, test_images_path, trainLabels, testLabels = train_test_split(images_path, images_label, test_size=0.3, random_state=42)

In [None]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
trainData = []

for path in train_images_path:
    img = Image.open(path)
    transformed_img = transform(img)
    trainData.append(torch.unsqueeze(transformed_img, 0))

In [None]:
testData = []

for path in test_images_path:
    img = Image.open(path)
    transformed_img = transform(img)
    testData.append(torch.unsqueeze(transformed_img, 0))

# AlexNet Feature extraction

In [None]:
alexnet = models.alexnet(pretrained = True)
alexnet.eval()

In [None]:
trainFeaturesList = []

for inputs in trainData:
    with torch.no_grad():
        feature = alexnet(inputs).detach().numpy()
    trainFeaturesList.append(feature)

In [None]:
testFeaturesList = []

for inputs in testData:
    with torch.no_grad():
        feature = alexnet(inputs).detach().numpy()
    testFeaturesList.append(feature)

In [None]:
trainFeatures = np.array(trainFeaturesList).reshape(len(trainFeaturesList), trainFeaturesList[0].shape[0] * trainFeaturesList[0].shape[1])
print(trainFeatures.shape)

In [None]:
testFeatures = np.array(testFeaturesList).reshape(len(testFeaturesList), testFeaturesList[0].shape[0] * testFeaturesList[0].shape[1])
print(testFeatures.shape)

# Models training and prediction

## Logistic Regresssion

In [None]:
lrmodel = LogisticRegression(solver = 'saga', max_iter = 5000)
lrmodel.fit(trainFeatures, trainLabels)

In [None]:
y_pred = lrmodel.predict(testFeatures)
acc = accuracy_score(y_pred, testLabels)
print(acc)

## Linear SVC

In [None]:
clf = LinearSVC(max_iter = 5000)
clf.fit(trainFeatures, trainLabels)

In [None]:
y_pred = clf.predict(testFeatures)
acc = accuracy_score(y_pred, testLabels)
print(acc)

## Gaussian NB

In [None]:
gnb = GaussianNB()
gnb.fit(trainFeatures, trainLabels)

In [None]:
y_pred = gnb.predict(testFeatures)
acc = accuracy_score(y_pred, testLabels)
print(acc)