In [1]:
import glob
from pathlib import Path

import albumentations as A
import numpy as np
import pandas as pd
import pillow_avif
import torch
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from PIL import Image
from torchvision.models import EfficientNet_B0_Weights, efficientnet_b0
from torchvision.models.efficientnet import _efficientnet_conf
from tqdm import tqdm
from efficientnet_pytorch import EfficientNet
from torchvision import transforms as transforms
import torchvision

In [16]:
class EffNet_B0(nn.Module):
    def __init__(
        self, coarse_classes_num, fine_classes_num, pretrained=True, dropout=0
    ):
        super(EffNet_B0, self).__init__()

        if pretrained:
            self.model = EfficientNet.from_pretrained("efficientnet-b0")
        else:
            self.model = EfficientNet.from_name("efficientnet-b0")

        last_channels = self.model._blocks_args[-1].output_filters * 4

        self.pool = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
        )

        self.coarse_head = nn.Sequential(
            nn.Linear(last_channels, 128),
            nn.Dropout(dropout),
            nn.Linear(128, coarse_classes_num),
        )

        self.fine_head = nn.Sequential(
            nn.Linear(last_channels+128, 128),
            nn.Dropout(dropout),
            nn.Linear(128, fine_classes_num),
        )
        
    def forward(self, x):
        self.model.eval()
        with torch.no_grad():
            x = self.model.extract_features(x)
            x = self.pool(x)
        hidden = self.coarse_head[0](x)
        coarse = self.coarse_head(x)
        x = torch.concat([x,hidden], dim = -1)
        fine = self.fine_head(x)
        return coarse, fine

In [17]:
device = "cuda" if torch.cuda.is_available() else "cpu"

CKPT_DIR = Path("checkpoints")
CKPT_FILE = CKPT_DIR / "effnet-b0.pth"
IMAGE_SIZE = 256

transform = A.Compose(
    [
        A.Normalize(),
        A.Resize(IMAGE_SIZE, IMAGE_SIZE),
        ToTensorV2(),
    ]
)
softmax = nn.Softmax(1)

In [18]:
print(CKPT_FILE)

checkpoints/effnet-b0.pth


In [29]:
def model_prediction(test_dir):
    # categories
    categories = ["Pagodas", "Pyramids", "Modern", "Gothic", "Mughal", "Neoclassical"]

    # landmarks
    landmarks = [
        "TianningTemplePagoda",
        "ThienMuPagoda",
        "GiantWildGoosePagoda",
        "ShwedagonPagoda",
        "FogongTemplePagoda",
        "Pyramid of Giza",
        "Pyramid of Djoser",
        "Santa Cecilia Acatitlan Pyramid",
        "El Castillo, Chichen Itza",
        "Louvre Pyramid",
        "Chrysler Building",
        "Cathedral of Brasília",
        "CCTV Headquarters",
        "Hallgrímskirkja",
        "eiffel",
        "St.VitusCathedral",
        "MilanCathedral",
        "ChartresCathedral",
        "CologneCathedral",
        "Notre-DameCathedral",
        "Taj Mahal",
        "Tomb of Akbar",
        "Jama Masjid",
        "Tomb of I_timad-ud-Daulah",
        "Bibi Ka Maqbara",
        "Buckingham Palace",
        "Concertgebouw",
        "Academy of Athens",
        "Panthéon",
        "Ripon Building",
    ]

    model = EffNet_B0(len(categories), len(landmarks), False)
    model.load_state_dict(torch.load(CKPT_FILE, map_location=device))
    model.to(device)
    model.eval()

    import random
    
    results = []
    with torch.no_grad():
        for images in tqdm(glob.glob(test_dir + "/*/*/*")):
            if images != ".DS_Store":
                
                img = transform(image=np.array(Image.open(images).convert("RGB")))[
                    "image"
                ]
                
                cat, land = model(img.to(device).unsqueeze(0))
                cat = softmax(cat)
                land = softmax(land)
                
                if land.max().numpy() >= 0.05:
                    land_result = landmarks[land.argmax(dim=1).numpy()[0]]
                else:
                    land_result = 'other'
                results.append(
                    [
                        images.split("/")[-1],
                        categories[cat.argmax(dim=1).numpy()[0]],
                        cat.max().numpy(),
                        land_result,
                        land.max().numpy(),
                    ]
                )
    prediction = pd.DataFrame(
        results,
        columns=[
            "image",
            "category",
            "category_confidence",
            "landmark",
            "landmark_confidence",
        ],
    )
    return prediction  # data frame

In [30]:
prediction = model_prediction("data/Landmarks-v1_0/")

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 420/420 [01:32<00:00,  4.54it/s]


In [31]:
prediction

Unnamed: 0,image,category,category_confidence,landmark,landmark_confidence
0,TianningTemplePagoda18.jpeg,Pagodas,0.99992096,TianningTemplePagoda,0.9301038
1,TianningTemplePagoda1.jpeg,Pagodas,0.93576235,TianningTemplePagoda,0.5072759
2,TianningTemplePagoda0.jpeg,Pagodas,0.996414,TianningTemplePagoda,0.74065596
3,TianningTemplePagoda19.jpeg,Pagodas,0.9865834,TianningTemplePagoda,0.62819105
4,TianningTemplePagoda15.jpeg,Pagodas,0.8570526,TianningTemplePagoda,0.5322447
...,...,...,...,...,...
415,NotreDameDeParis3.jpeg,Gothic,0.98942214,Notre-DameCathedral,0.54669404
416,NotreDameDeParis8.jpeg,Gothic,0.83121085,Notre-DameCathedral,0.5641628
417,NotreDameDeParis4.jpeg,Gothic,0.9699182,Notre-DameCathedral,0.5688455
418,NotreDameDeParis16.jpeg,Gothic,0.9889015,Notre-DameCathedral,0.68241954


In [32]:
prediction['category'].unique()

array(['Pagodas', 'Neoclassical', 'Pyramids', 'Modern', 'Mughal',
       'Gothic'], dtype=object)

In [33]:
from sklearn.metrics import f1_score

In [34]:
dataset = pd.read_csv("data/dataset.csv")
dataset

Unnamed: 0,image_path,style,landmark
0,data/Landmarks-v1_0/Pagodas/TianningTemplePago...,Pagodas,TianningTemplePagoda
1,data/Landmarks-v1_0/Pagodas/TianningTemplePago...,Pagodas,TianningTemplePagoda
2,data/Landmarks-v1_0/Pagodas/TianningTemplePago...,Pagodas,TianningTemplePagoda
3,data/Landmarks-v1_0/Pagodas/TianningTemplePago...,Pagodas,TianningTemplePagoda
4,data/Landmarks-v1_0/Pagodas/TianningTemplePago...,Pagodas,TianningTemplePagoda
...,...,...,...
415,data/Landmarks-v1_0/Gothic/Notre-DameCathedral...,Gothic,Notre-DameCathedral
416,data/Landmarks-v1_0/Gothic/Notre-DameCathedral...,Gothic,Notre-DameCathedral
417,data/Landmarks-v1_0/Gothic/Notre-DameCathedral...,Gothic,Notre-DameCathedral
418,data/Landmarks-v1_0/Gothic/Notre-DameCathedral...,Gothic,Notre-DameCathedral


In [35]:
dataset['image_path'] = dataset['image_path'].apply(lambda x : x.split('/')[4])
dataset

Unnamed: 0,image_path,style,landmark
0,TianningTemplePagoda18.jpeg,Pagodas,TianningTemplePagoda
1,TianningTemplePagoda1.jpeg,Pagodas,TianningTemplePagoda
2,TianningTemplePagoda0.jpeg,Pagodas,TianningTemplePagoda
3,TianningTemplePagoda19.jpeg,Pagodas,TianningTemplePagoda
4,TianningTemplePagoda15.jpeg,Pagodas,TianningTemplePagoda
...,...,...,...
415,NotreDameDeParis3.jpeg,Gothic,Notre-DameCathedral
416,NotreDameDeParis8.jpeg,Gothic,Notre-DameCathedral
417,NotreDameDeParis4.jpeg,Gothic,Notre-DameCathedral
418,NotreDameDeParis16.jpeg,Gothic,Notre-DameCathedral


In [36]:
result = pd.merge(dataset, prediction, left_on = 'image_path', right_on = 'image', how = 'inner')
result

Unnamed: 0,image_path,style,landmark_x,image,category,category_confidence,landmark_y,landmark_confidence
0,TianningTemplePagoda18.jpeg,Pagodas,TianningTemplePagoda,TianningTemplePagoda18.jpeg,Pagodas,0.99992096,TianningTemplePagoda,0.9301038
1,TianningTemplePagoda1.jpeg,Pagodas,TianningTemplePagoda,TianningTemplePagoda1.jpeg,Pagodas,0.93576235,TianningTemplePagoda,0.5072759
2,TianningTemplePagoda0.jpeg,Pagodas,TianningTemplePagoda,TianningTemplePagoda0.jpeg,Pagodas,0.996414,TianningTemplePagoda,0.74065596
3,TianningTemplePagoda19.jpeg,Pagodas,TianningTemplePagoda,TianningTemplePagoda19.jpeg,Pagodas,0.9865834,TianningTemplePagoda,0.62819105
4,TianningTemplePagoda15.jpeg,Pagodas,TianningTemplePagoda,TianningTemplePagoda15.jpeg,Pagodas,0.8570526,TianningTemplePagoda,0.5322447
...,...,...,...,...,...,...,...,...
415,NotreDameDeParis3.jpeg,Gothic,Notre-DameCathedral,NotreDameDeParis3.jpeg,Gothic,0.98942214,Notre-DameCathedral,0.54669404
416,NotreDameDeParis8.jpeg,Gothic,Notre-DameCathedral,NotreDameDeParis8.jpeg,Gothic,0.83121085,Notre-DameCathedral,0.5641628
417,NotreDameDeParis4.jpeg,Gothic,Notre-DameCathedral,NotreDameDeParis4.jpeg,Gothic,0.9699182,Notre-DameCathedral,0.5688455
418,NotreDameDeParis16.jpeg,Gothic,Notre-DameCathedral,NotreDameDeParis16.jpeg,Gothic,0.9889015,Notre-DameCathedral,0.68241954


In [37]:
f1 = f1_score(result['style'],result['category'] , average = 'macro')
f1

0.707045607495988

In [38]:
f1 = f1_score(result['landmark_x'],result['landmark_y'] , average = 'macro')
f1

0.7986907345528035