## This for just testing the classification between two classes from Caltech100

### Computing Platform Check GPU (CUDA) or CPU

In [None]:
import torch
if torch.cuda.is_available():
    device = "cuda"
else:
    print ('[WARNING] CUDA/GPU is not available! Compute-intensive scripts on this notebook will be run on CPU.')
    device =  "cpu"

## Check Environment

In [None]:
import os

if 'COLAB_GPU' in os.environ:
    print("Environment is colab")
elif 'KAGGLE_URL_BASE' in os.environ:
    print("Environment is kaggle")
else:
    print("Environment is local")

In [None]:
#@title Download Dataset
%%capture
!wget https://data.caltech.edu/records/nyy15-4j048/files/256_ObjectCategories.tar
!tar -xvf /content/256_ObjectCategories.tar

In [2]:
#@title Install OpenCLIP
%%capture
!pip install open_clip_torch

In [3]:
#@title Imports
import torch
import torchvision
import os
import open_clip
from torch.utils.data import DataLoader
import numpy as np
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm

In [4]:
#@title Delete Unwanted Folders and Make Only Two Classes
directory = '/content/256_ObjectCategories' # Directory holds all the image's folders
dir_list  =     ['029.cannon' , '026.cake' , '027.calculator' ,
                '025.cactus','028.camel' , '024.butterfly'] # List of all the wnated folders 
# Remove other folders
for folder in os.listdir(directory):
    f = os.path.join(directory, folder)    
    if folder not in dir_list:
      !rm -r $f
      continue


In [5]:
#@title Clip Model
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32',pretrained='openai')
device = "cuda" if torch.cuda.is_available() else "cpu"

100%|███████████████████████████████████████| 354M/354M [00:05<00:00, 64.5MiB/s]


In [17]:
#@title get features function from OpenAI CLIP Github
# https://github.com/openai/CLIP#linear-probe-evaluation
def get_features(dataset):
    all_features = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in DataLoader(dataset, batch_size=100):
            features = model.encode_image(images.to(device))

            all_features.append(features)
            all_labels.append(labels)

    return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy()

In [20]:
#@title Making Dataset Out Of Images' folder
import random
transform = preprocess
dataset = torchvision.datasets.ImageFolder(directory, transform)
n = len(dataset)  # total number of examples
n_test = int(0.1 * n)  # take ~10% for test

test_list = [] 
while len(test_list) < n_test:
  rand = random.randint(0, n)
  if rand not in test_list:
    test_list.append(rand)

train_list = []
for num in range(n):
  if num not in test_list:
    train_list.append(num)

test_set = torch.utils.data.Subset(dataset, test_list,)  # take 10%
train_set = torch.utils.data.Subset(dataset, train_list)

In [21]:
dataset.classes

['024.butterfly',
 '025.cactus',
 '026.cake',
 '027.calculator',
 '028.camel',
 '029.cannon']

In [22]:
#@title  Calculating images' features for train/test sets.
train_features, train_labels = get_features(train_set)

In [23]:
test_features, test_labels = get_features(test_set)

In [24]:
test_labels

array([1, 3, 1, 2, 5, 4, 3, 0, 1, 1, 0, 1, 3, 5, 5, 1, 1, 2, 1, 5, 1, 0,
       3, 0, 2, 5, 4, 1, 5, 3, 2, 5, 5, 3, 2, 3, 1, 3, 3, 1, 2, 4, 4, 4,
       0, 3, 5, 5, 1, 2, 2, 1, 2, 0, 2, 1, 0, 3, 2, 0, 1, 4, 1, 2])

# Using LogisticRegression

In [29]:
# Calculate the image features
train_features, train_labels = get_features(train_set)
test_features, test_labels = get_features(test_set)

# Perform logistic regression
## max_iter reduced to 100 
classifier = LogisticRegression(random_state=0, max_iter=1000, verbose=1)
classifier.fit(train_features, train_labels)

# Evaluate using the logistic regression classifier
predictions = classifier.predict(test_features)
accuracy = np.mean((test_labels == predictions).astype(np.float)) * 100.
print(f"Accuracy = {accuracy:.3f}")

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Accuracy = 100.000


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s finished
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if sys.path[0] == '':


In [30]:
predictions

array([1, 3, 1, 2, 5, 4, 3, 0, 1, 1, 0, 1, 3, 5, 5, 1, 1, 2, 1, 5, 1, 0,
       3, 0, 2, 5, 4, 1, 5, 3, 2, 5, 5, 3, 2, 3, 1, 3, 3, 1, 2, 4, 4, 4,
       0, 3, 5, 5, 1, 2, 2, 1, 2, 0, 2, 1, 0, 3, 2, 0, 1, 4, 1, 2])

In [31]:
test_labels

array([1, 3, 1, 2, 5, 4, 3, 0, 1, 1, 0, 1, 3, 5, 5, 1, 1, 2, 1, 5, 1, 0,
       3, 0, 2, 5, 4, 1, 5, 3, 2, 5, 5, 3, 2, 3, 1, 3, 3, 1, 2, 4, 4, 4,
       0, 3, 5, 5, 1, 2, 2, 1, 2, 0, 2, 1, 0, 3, 2, 0, 1, 4, 1, 2])

In [34]:
#@title Test LogisticRegression using internet images
from PIL import Image
images_dir = '/content/drive/MyDrive/Brandon/butterflies'
print("Classsifying the images in the butterfly directoy")
for file_name in os.listdir(images_dir):
  image_path = os.path.join(images_dir , file_name)
  image = preprocess(Image.open(image_path)).unsqueeze(0)
  image_features =  model.encode_image(image)
  print(dataset.classes[classifier.predict(image_features.detach().numpy()).item()])

Classsifying the images in the butterfly directoy
024.butterfly
024.butterfly
024.butterfly
024.butterfly
024.butterfly
024.butterfly
024.butterfly
024.butterfly
024.butterfly


In [35]:
images_dir = '/content/drive/MyDrive/Brandon/calculators'
print("Classsifying the images in the claculators directoy")
for file_name in os.listdir(images_dir):
  image_path = os.path.join(images_dir , file_name)
  image = preprocess(Image.open(image_path)).unsqueeze(0)
  image_features =  model.encode_image(image)
  print(dataset.classes[classifier.predict(image_features.detach().numpy()).item()])

Classsifying the images in the claculators directoy
027.calculator
027.calculator
027.calculator
027.calculator
027.calculator
027.calculator
027.calculator
027.calculator
027.calculator
027.calculator
027.calculator
027.calculator
