In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Codemonk Machine Learning Intern Assignment**

# Fashion Product Image Classifier

In [None]:
import os
import glob
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from pathlib import Path
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as transforms
import torchvision.models as models

from tqdm.notebook import tqdm
import seaborn as sns

In [None]:
BASE_DIR = Path('/kaggle/input/fashion-product-images-dataset/fashion-dataset')
IMAGE_DIR = BASE_DIR / 'images'
METADATA_FILE = BASE_DIR / 'styles.csv'

# Load metadata
metadata_df = pd.read_csv(METADATA_FILE, on_bad_lines='skip')
metadata_df = metadata_df.dropna(subset=['id', 'baseColour', 'articleType', 'season', 'gender'])

# Add image paths
metadata_df['image_path'] = metadata_df['id'].apply(lambda x: os.path.join(IMAGE_DIR, f"{x}.jpg"))
metadata_df = metadata_df[metadata_df['image_path'].apply(os.path.exists)].reset_index(drop=True)

print("[INFO] Cleaned metadata shape:", metadata_df.shape)
metadata_df.head()

# Exploratory Data Analysis

In [None]:
metadata_df.info()
metadata_df.describe(include='all')
metadata_df.isnull().sum()

In [None]:
# Display available columns
print("[INFO] Available columns:")
print(metadata_df.columns.tolist())

# Columns to check for unique values
category_columns = ['subCategory', 'articleType', 'productType']

# Loop through each column and show top 10 value counts
for col in category_columns:
    if col in metadata_df.columns:
        print(f"\n[INFO] Top unique values in '{col}':")
        print(metadata_df[col].value_counts().head(10))
    else:
        print(f"\n[WARNING] Column '{col}' not found in the DataFrame.")


In [None]:
# Gender distribution
plt.figure(figsize=(8, 5))
sns.countplot(data=metadata_df, x='gender', order=metadata_df['gender'].value_counts().index, palette='pastel')
plt.title('🧑‍🤝‍🧑 Distribution of Gender')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.xticks(rotation=30)
plt.show()

# Season distribution
plt.figure(figsize=(8, 5))
sns.countplot(data=metadata_df, x='season', order=metadata_df['season'].value_counts().index, palette='viridis')
plt.title('⛅ Distribution of Seasons')
plt.xlabel('Season')
plt.ylabel('Count')
plt.xticks(rotation=30)
plt.show()

# BaseColour: Top 10 colours
top_colours = metadata_df['baseColour'].value_counts().nlargest(10)
plt.figure(figsize=(10, 6))
sns.barplot(x=top_colours.index, y=top_colours.values, palette='plasma')
plt.title('🎨 Top 10 Base Colours')
plt.xlabel('Colour')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

# ProductDisplayName: Top 10 types
top_products = metadata_df['productDisplayName'].value_counts().nlargest(10)
plt.figure(figsize=(12, 6))
sns.barplot(x=top_products.index, y=top_products.values, palette='coolwarm')
plt.title('👕 Top 10 Product Types')
plt.xlabel('Product Type')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()


# Encode Labels

In [None]:
label_encoders = {
    'baseColour': LabelEncoder(),
    'articleType': LabelEncoder(),
    'season': LabelEncoder(),
    'gender': LabelEncoder()
}

for col, le in label_encoders.items():
    metadata_df[f'{col}_label'] = le.fit_transform(metadata_df[col].astype(str))

# Optional: filter rare labels
min_samples = 3
counts = metadata_df['articleType_label'].value_counts()
valid_classes = counts[counts >= min_samples].index
metadata_df = metadata_df[metadata_df['articleType_label'].isin(valid_classes)].reset_index(drop=True)

print("[INFO] Final dataset shape:", metadata_df.shape)


# Train/Val/Test Split

In [None]:
from sklearn.model_selection import train_test_split

# Step 1: Remove classes with fewer than 3 samples (so that even after splitting they have >=1 sample in all sets)
label_counts = metadata_df['articleType_label'].value_counts()
valid_labels = label_counts[label_counts >= 3].index  # 3 ensures at least 1 sample in val and test
metadata_df = metadata_df[metadata_df['articleType_label'].isin(valid_labels)]

# Step 2: Stratified split train vs (val+test)
train_df, temp_df = train_test_split(
    metadata_df,
    test_size=0.2,
    stratify=metadata_df['articleType_label'],
    random_state=42
)

# Step 3: Stratified split val vs test
# IMPORTANT: Use only those classes that still have at least 2 samples in temp_df
temp_counts = temp_df['articleType_label'].value_counts()
valid_temp_labels = temp_counts[temp_counts >= 2].index
temp_df = temp_df[temp_df['articleType_label'].isin(valid_temp_labels)]

val_df, test_df = train_test_split(
    temp_df,
    test_size=0.5,
    stratify=temp_df['articleType_label'],
    random_state=42
)

print(f"[INFO] Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")


# Dataset and Transforms

In [None]:
class FashionDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(row['image_path']).convert('RGB')
        if self.transform:
            img = self.transform(img)

        labels = {
            'colour': row['baseColour_label'],
            'product_type': row['articleType_label'],
            'season': row['season_label'],
            'gender': row['gender_label']
        }
        return img, labels

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_ds = FashionDataset(train_df, transform)
val_ds = FashionDataset(val_df, transform)
test_ds = FashionDataset(test_df, transform)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=64, shuffle=False)


# Model - Use EfficientNet

In [None]:
from torchvision.models import efficientnet_b0

class MultiOutputEfficientNet(nn.Module):
    def __init__(self, n_colours, n_product_types, n_seasons, n_genders):
        super().__init__()
        self.backbone = efficientnet_b0(pretrained=True)
        self.backbone.classifier = nn.Identity()
        self.fc_colour = nn.Linear(1280, n_colours)
        self.fc_product_type = nn.Linear(1280, n_product_types)
        self.fc_season = nn.Linear(1280, n_seasons)
        self.fc_gender = nn.Linear(1280, n_genders)

    def forward(self, x):
        feat = self.backbone(x)
        return {
            'colour': self.fc_colour(feat),
            'product_type': self.fc_product_type(feat),
            'season': self.fc_season(feat),
            'gender': self.fc_gender(feat)
        }

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MultiOutputEfficientNet(
    n_colours=len(label_encoders['baseColour'].classes_),
    n_product_types=len(label_encoders['articleType'].classes_),
    n_seasons=len(label_encoders['season'].classes_),
    n_genders=len(label_encoders['gender'].classes_)
).to(device)


# Training Loop

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for imgs, labels in tqdm(loader):
        imgs = imgs.to(device)
        targets = {k: torch.tensor(v).to(device) for k, v in labels.items()}
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = sum(criterion(outputs[k], targets[k]) for k in outputs)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)


# Evaluation Function

In [None]:
def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss, correct = 0, 0
    total = 0
    with torch.no_grad():
        for imgs, labels in loader:
            imgs = imgs.to(device)
            targets = {k: torch.tensor(v).to(device) for k, v in labels.items()}
            outputs = model(imgs)
            loss = sum(criterion(outputs[k], targets[k]) for k in outputs)
            total_loss += loss.item()
            total += imgs.size(0)
    return total_loss / len(loader)


# Training Loop

In [None]:
EPOCHS = 4
best_val_loss = float('inf')

for epoch in range(EPOCHS):
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_loss = evaluate(model, val_loader, criterion, device)
    print(f"[EPOCH {epoch+1}] Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pth')


# Save Encoders & Model

In [None]:
torch.save(model.state_dict(), 'best_model.pth')

for name, le in label_encoders.items():
    with open(f'le_{name}.pkl', 'wb') as f:
        pickle.dump(le, f)

print("✅ Model and encoders saved.")

# Inference on New Image

In [None]:
def predict_image(img_path, model, device):
    model.eval()
    img = Image.open(img_path).convert('RGB')
    img = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(img)
        preds = {
            'colour': label_encoders['baseColour'].inverse_transform([output['colour'].argmax(1).item()])[0],
            'type': label_encoders['articleType'].inverse_transform([output['product_type'].argmax(1).item()])[0],
            'season': label_encoders['season'].inverse_transform([output['season'].argmax(1).item()])[0],
            'gender': label_encoders['gender'].inverse_transform([output['gender'].argmax(1).item()])[0]
        }
    return preds

# Test on sample image
sample_imgs = glob.glob('/kaggle/input/fashion-product-images-dataset/fashion-dataset/images/*.jpg')[:5]

for img_path in sample_imgs:
    preds = predict_image(img_path, model, device)
    img = Image.open(img_path)
    plt.imshow(img)
    plt.title(str(preds))
    plt.axis('off')
    plt.show()


# Conclusion

1.I conducted EDA and preprocessing with Python to prepare data.
2.Built a multi-output classifier for color, type, season, and gender predictions.
3.Applied model to Amazon images.