In [41]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image 
import os 
from tqdm import tqdm 

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, log_loss, confusion_matrix

In [None]:
# Loading labels 
labels = pd.read_csv("data/labels.csv")

# These will be the targets we're fitting to 
breeds = labels['breed'].unique() 
targets = {b: i for i, b in enumerate(breeds)}

print(targets)

X = []
y = [] 

# build matrix 
for i, row in tqdm(labels.iterrows(), total=len(labels)): 
    path = os.path.join("data/train/", f"{row['id']}.jpg")
    img = Image.open(path).convert('RGB').resize((64, 64))
    X.append(np.array(img).flatten())
    y.append(targets[row['breed']])

X = np.array(X)
y = np.array(y)


{'boston_bull': 0, 'dingo': 1, 'pekinese': 2, 'bluetick': 3, 'golden_retriever': 4, 'bedlington_terrier': 5, 'borzoi': 6, 'basenji': 7, 'scottish_deerhound': 8, 'shetland_sheepdog': 9, 'walker_hound': 10, 'maltese_dog': 11, 'norfolk_terrier': 12, 'african_hunting_dog': 13, 'wire-haired_fox_terrier': 14, 'redbone': 15, 'lakeland_terrier': 16, 'boxer': 17, 'doberman': 18, 'otterhound': 19, 'standard_schnauzer': 20, 'irish_water_spaniel': 21, 'black-and-tan_coonhound': 22, 'cairn': 23, 'affenpinscher': 24, 'labrador_retriever': 25, 'ibizan_hound': 26, 'english_setter': 27, 'weimaraner': 28, 'giant_schnauzer': 29, 'groenendael': 30, 'dhole': 31, 'toy_poodle': 32, 'border_terrier': 33, 'tibetan_terrier': 34, 'norwegian_elkhound': 35, 'shih-tzu': 36, 'irish_terrier': 37, 'kuvasz': 38, 'german_shepherd': 39, 'greater_swiss_mountain_dog': 40, 'basset': 41, 'australian_terrier': 42, 'schipperke': 43, 'rhodesian_ridgeback': 44, 'irish_setter': 45, 'appenzeller': 46, 'bloodhound': 47, 'samoyed': 

  0%|          | 0/10222 [00:00<?, ?it/s]

100%|██████████| 10222/10222 [02:06<00:00, 80.72it/s]


In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components = 500)
X_pca = pca.fit_transform(X_scaled)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_pca, y, test_size = 0.2, stratify=y)

In [None]:
model = LogisticRegression(max_iter=1000, solver='lbfgs', verbose=1)
model.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  4.6min finished


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,5000


In [None]:
y_val_pred = model.predict(X_val)
y_val_prob = model.predict_proba(X_val)

print(y_val_pred)
print(y_val)

acc = accuracy_score(y_val, y_val_pred)
loss = log_loss(y_val, y_val_prob)
cm = confusion_matrix(y_val, y_val_pred)

print("Accuracy: ",acc)
print("Loss: ",loss)

[ 68 112  46 ...  48 109  58]
[56 45 18 ... 30 67 75]
Accuracy:  0.02689486552567237
Loss:  17.648645880121354
