In [1]:
# Clean dataset
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from torchvision.datasets.folder import default_loader
from tqdm import tqdm


# logger = logging.getLogger(__file__)


def _remove_all_not_found_image(df: pd.DataFrame, path_to_images: Path) -> pd.DataFrame:
    clean_rows = []
    for _, row in df.iterrows():
        image_id = row["image_id"]
        try:
            file_name = f"{path_to_images}/{image_id}.jpg"
            _ = default_loader(file_name)
        except (FileNotFoundError, OSError, UnboundLocalError) as ex:
            print(f"broken image {file_name} : {ex}")
        else:
            clean_rows.append(row)
    df_clean = pd.DataFrame(clean_rows)
    return df_clean


def remove_all_not_found_image(df: pd.DataFrame, path_to_images: Path, num_workers: int) -> pd.DataFrame:
    futures = []
    results = []
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        for df_batch in np.array_split(df, num_workers):
            future = executor.submit(_remove_all_not_found_image, df=df_batch, path_to_images=path_to_images)
            futures.append(future)
        for future in tqdm(as_completed(futures), total=len(futures)):
            results.append(future.result())
    new_df = pd.concat(results)
    return new_df


def read_ava_txt(path_to_ava: Path) -> pd.DataFrame:
    # NIMA origin file format and indexes
    df = pd.read_csv(path_to_ava, header=None, sep=" ")
    del df[0]
    score_first_column = 2
    score_last_column = 12
    tag_first_column = 1
    tag_last_column = 4
    score_names = [f"score{i}" for i in range(score_first_column, score_last_column)]
    tag_names = [f"tag{i}" for i in range(tag_first_column, tag_last_column)]
    df.columns = ["image_id"] + score_names + tag_names
    # leave only score columns
    df = df[["image_id"] + score_names]
    return df


def clean_and_split(
    path_to_ava_txt: Path, path_to_save_csv: Path, path_to_images: Path, train_size: float, num_workers: int
):
    print("read ava txt")
    df = read_ava_txt(path_to_ava_txt)
    print("removing broken images")
    df = remove_all_not_found_image(df, path_to_images, num_workers=num_workers)
    print("train val test split")
    df_train, df_val_test = train_test_split(df, train_size=train_size)
    df_val, df_test = train_test_split(df_val_test, train_size=0.5)
    train_path = f"{path_to_save_csv}/train.csv"
    val_path = f"{path_to_save_csv}/val.csv"
    test_path = f"{path_to_save_csv}/test.csv"
    print(f"saving to {train_path} {val_path} and {test_path}")
    df_train.to_csv(train_path, index=False)
    df_val.to_csv(val_path, index=False)
    df_test.to_csv(test_path, index=False)


path_to_ava_txt = ".../AVA.txt"
path_to_save_csv = ".../dataset_csv"
path_to_images = ".../image"
train_size = 0.8
num_workers = 12

print(f"Clean and split dataset to train|val|test in {num_workers} threads. It will takes several minutes")
clean_and_split(
    path_to_ava_txt=path_to_ava_txt,
    path_to_save_csv=path_to_save_csv,
    path_to_images=path_to_images,
    train_size=train_size,
    num_workers=num_workers,
)
print("Done!")

In [1]:
IMAGE_NET_MEAN = [0.485, 0.456, 0.406]
IMAGE_NET_STD = [0.229, 0.224, 0.225]

from torchvision import transforms

class SquarePad:
    def __call__(self, image):
        w, h = image.size
        max_wh = np.max([w, h])
        hp = int((max_wh - w) / 2)
        vp = int((max_wh - h) / 2)
        padding = (hp, vp, hp, vp)
        return transforms.functional.pad(image, padding, 0, 'constant')
    
class Transform:
    def __init__(self):
        normalize = transforms.Normalize(mean=IMAGE_NET_MEAN, std=IMAGE_NET_STD)
        
        self._train_transform = transforms.Compose(
            [
                # SquarePad(),
                transforms.Resize((256, 256)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop((224, 224)),
                transforms.ToTensor(),
                normalize,
            ]
        )
        
        '''
        
        # inception
        self._train_transform = transforms.Compose(
            [
                transforms.Resize((350, 350)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop((299, 299)),
                transforms.ToTensor(),
                normalize,
            ]
        )
        
        self._val_transform = transforms.Compose([transforms.Resize((299, 299)), transforms.ToTensor(), normalize])
        '''
        self._val_transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), normalize])
        # self._val_transform = transforms.Compose([SquarePad(), transforms.Resize((224, 224)), transforms.ToTensor(), normalize])

    @property
    def train_transform(self):
        return self._train_transform

    @property
    def val_transform(self):
        return self._val_transform


class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def set_up_seed(seed=42):
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)

In [2]:
from pathlib import Path
from typing import Tuple

import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset
from torchvision.datasets.folder import default_loader


class AVADataset(Dataset):
    def __init__(self, path_to_csv: Path, images_path: Path, transform):
        self.df = pd.read_csv(path_to_csv)
        self.images_path = images_path
        self.transform = transform

    def __len__(self) -> int:
        return self.df.shape[0]

    def __getitem__(self, item: int) -> Tuple[torch.Tensor, np.ndarray]:
        row = self.df.iloc[item]

        image_id = row["image_id"]
        image_path = f"{self.images_path}/{image_id}.jpg"
        image = default_loader(image_path)
        x = self.transform(image)

        y = row[1:].values.astype("float32")
        p = y / y.sum()

        return x, p


In [3]:
import torch
import torch.nn as nn


class EDMLoss(nn.Module):
    def __init__(self):
        super(EDMLoss, self).__init__()

    def forward(self, p_target: torch.Tensor, p_estimate: torch.Tensor):
        assert p_target.shape == p_estimate.shape
        # cdf for values [1, 2, ..., 10]
        cdf_target = torch.cumsum(p_target, dim=1)
        # cdf for values [1, 2, ..., 10]
        cdf_estimate = torch.cumsum(p_estimate, dim=1)
        cdf_diff = cdf_estimate - cdf_target
        samplewise_emd = torch.sqrt(torch.mean(torch.pow(torch.abs(cdf_diff), 2)))
        return samplewise_emd.mean()

In [4]:
class BinaryLoss(nn.Module):
    def __init__(self):
        super(BinaryLoss, self).__init__()

    def forward(self, p_target: torch.Tensor, p_estimate: torch.Tensor):
        target = get_mean_score(p_target)
        estimate = get_mean_score(p_estimate)
        total_loss = 0
        batch_size = len(target)
        for i in range(batch_size):
            if min(target[i], estimate[i]) >= 5 or max(target[i], estimate[i]) <= 5:
                total_loss += 1
        return total_loss/batch_size

In [5]:
class BinaryLoss5(nn.Module):
    def __init__(self):
        super(BinaryLoss5, self).__init__()

    def forward(self, p_target: torch.Tensor, p_estimate: torch.Tensor):
        target = get_mean_score(p_target)
        estimate = get_mean_score(p_estimate)
        total_loss = 0
        batch_size = len(target)
        for i in range(batch_size):
            if min(target[i], estimate[i]) >= 5.5 or max(target[i], estimate[i]) <= 5.5:
                total_loss += 1
        return total_loss/batch_size

In [6]:
import pretrainedmodels
import torch.nn as nn
import torchvision as tv


MODELS = {
    "resnet18": (tv.models.resnet18, 512),
    "resnet34": (tv.models.resnet34, 512),
    "resnet50": (tv.models.resnet50, 2048),
    "resnet101": (tv.models.resnet101, 2048),
    "resnet152": (tv.models.resnet152, 2048),
    "mobilenet-v2": (tv.models.mobilenet_v2, 1280),
    "mobilenet-v3-small": (tv.models.mobilenet_v3_small, 576),
    "inception-v3": (tv.models.inception_v3, 2048),
    "nasnetmobile": (pretrainedmodels.__dict__['nasnetamobile'], 1056)
}


class NIMA(nn.Module):
    def __init__(self, base_model: nn.Module, input_features: int, drop_out: float):
        super(NIMA, self).__init__()
        self.base_model = base_model

        self.head = nn.Sequential(
            nn.ReLU(inplace=True), nn.Dropout(p=drop_out), nn.Linear(input_features, 10), nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.base_model(x)
        # x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = x.view(x.size(0), -1)
        x = self.head(x)
        return x


def create_model(model_type: str, drop_out: float) -> NIMA:
    create_function, input_features = MODELS[model_type]
    base_model = create_function(pretrained=True)
    # base_model = create_function(pretrained=True, aux_logits=False)
    # base_model = create_function(num_classes=1000, pretrained='imagenet')
    # def identity(x): return x
    # base_model.logits = identity
    base_model = nn.Sequential(*list(base_model.children())[:-1])
    return NIMA(base_model=base_model, input_features=input_features, drop_out=drop_out)

In [7]:
import logging
import time
from pathlib import Path
from typing import Tuple

import torch
import torch.optim
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
# logger = logging.getLogger(__file__)
def get_dataloaders(
    path_to_save_csv: Path, path_to_images: Path, batch_size: int, num_workers: int
) -> Tuple[DataLoader, DataLoader, DataLoader]:
    transform = Transform()

    train_ds = AVADataset(f"{path_to_save_csv}/train.csv", path_to_images, transform.train_transform)
    val_ds = AVADataset(f"{path_to_save_csv}/val.csv", path_to_images, transform.val_transform)
    test_ds = AVADataset(f"{path_to_save_csv}/test.csv", path_to_images, transform.val_transform)

    train_loader = DataLoader(train_ds, batch_size=batch_size, num_workers=num_workers, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=batch_size, num_workers=num_workers, shuffle=False)
    test_ds = DataLoader(test_ds, batch_size=batch_size, num_workers=num_workers, shuffle=False)
    return train_loader, val_loader, test_ds


def validate_and_test(
    path_to_save_csv: Path,
    path_to_images: Path,
    batch_size: int,
    num_workers: int,
    drop_out: float,
    path_to_model_state: Path,
) -> None:
    _, val_loader, test_loader = get_dataloaders(
        path_to_save_csv=path_to_save_csv, path_to_images=path_to_images, batch_size=batch_size, num_workers=num_workers
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    criterion = EDMLoss().to(device)

    best_state = torch.load(path_to_model_state)

    model = create_model(best_state["model_type"], drop_out=drop_out).to(device)
    model.load_state_dict(best_state["state_dict"])

    model.eval()
    validate_losses = AverageMeter()

    with torch.no_grad():
        for (x, y) in tqdm(val_loader):
            x = x.to(device)
            y = y.to(device)
            y_pred = model(x)
            loss = criterion(p_target=y, p_estimate=y_pred)
            validate_losses.update(loss.item(), x.size(0))

    test_losses = AverageMeter()
    with torch.no_grad():
        for (x, y) in tqdm(test_loader):
            x = x.to(device)
            y = y.to(device)
            y_pred = model(x)
            loss = criterion(p_target=y, p_estimate=y_pred)
            test_losses.update(loss.item(), x.size(0))
   
    print(f"val loss {validate_losses.avg}; test loss {test_losses.avg}")
    # logger.info(f"val loss {validate_losses.avg}; test loss {test_losses.avg}")


def get_optimizer(optimizer_type: str, model: NIMA, init_lr: float) -> torch.optim.Optimizer:
    if optimizer_type == "adam":
        # optimizer = torch.optim.Adam(model.parameters(), lr=init_lr)
        optimizer = torch.optim.Adam([
                {'params': model.base_model.parameters(), 'lr': init_lr * 0.1},
                {'params': model.head.parameters(), 'lr': init_lr}
            ])
    elif optimizer_type == "sgd":
        optimizer = torch.optim.SGD(model.parameters(), lr=init_lr, momentum=0.5, weight_decay=9)
    else:
        raise ValueError(f"not such optimizer {optimizer_type}")
    return optimizer


In [8]:


class Trainer:
    def __init__(
        self,
        *,
        path_to_save_csv: Path,
        path_to_images: Path,
        num_epoch: int,
        model_type: str,
        model_pth: Path,
        num_workers: int,
        batch_size: int,
        init_lr: float,
        experiment_dir: Path,
        drop_out: float,
        optimizer_type: str,
    ):

        train_loader, val_loader, test_loader = get_dataloaders(
            path_to_save_csv=path_to_save_csv,
            path_to_images=path_to_images,
            batch_size=batch_size,
            num_workers=num_workers,
        )
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        model = create_model(model_type, drop_out=drop_out).to(self.device)
        # print(model)

        # load pretrained parameter
        if model_pth:
            state = torch.load(model_pth, map_location=torch.device(self.device))
            model.load_state_dict(state["state_dict"])
            model.eval()
        

        optimizer = get_optimizer(optimizer_type=optimizer_type, model=model, init_lr=init_lr)

        self.model = model
        self.optimizer = optimizer

        # self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=self.optimizer, mode="min", patience=5)
        self.scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.95)

        self.criterion = EDMLoss().to(self.device)
        self.binary_loss = BinaryLoss().to(self.device)
        self.binary_loss5 = BinaryLoss5().to(self.device)
        
        self.model_type = model_type

        # experiment_dir.mkdir(exist_ok=True, parents=True)
        self.experiment_dir = experiment_dir
        self.writer = SummaryWriter(f"{experiment_dir}/logs")
        self.print_freq = 300
        self.num_epoch = num_epoch

    def train_model(self):
        best_loss = float("inf")
        best_state = None
        for e in range(1, self.num_epoch + 1):
            
            train_loss = self.train()
            val_loss = self.validate()
            binary_loss = self.binary_classify()
            binary_loss5 = self.binary_classify5()
            
            # self.scheduler.step(metrics=val_loss)
            self.scheduler.step()

            self.writer.add_scalar("train/loss", train_loss, global_step=e)
            self.writer.add_scalar("val/loss", val_loss, global_step=e)
            print(f"updated loss from {best_loss} to {val_loss}")
            logger.info(f"updated loss from {best_loss} to {val_loss}")
            

            current_state = {
                "state_dict": self.model.state_dict(),
                "model_type": self.model_type,
                "epoch": e,
                "val_loss": val_loss,
            }
            torch.save(current_state, f"{self.experiment_dir}/{self.model_type}_epoch{e}_new.pth")
            
            if best_state is None or val_loss < best_loss:
                # logger.info(f"updated loss from {best_loss} to {val_loss}")
                
                best_loss = val_loss
                best_state = {
                    "state_dict": self.model.state_dict(),
                    "model_type": self.model_type,
                    "epoch": e,
                    "best_loss": best_loss,
                }
                torch.save(best_state, f"{self.experiment_dir}/{self.model_type}_best_state_epoch{e}.pth")
            
            logger.info(f"Binary classification accuracy is {binary_loss}")
            logger.info(f"Binary classification accuracy 5.5 is {binary_loss5}")
            

    def train(self):
        self.model.train()
        train_losses = AverageMeter()
        total_iter = len(self.train_loader.dataset) // self.train_loader.batch_size
        total_loss = 0

        for idx, (x, y) in enumerate(self.train_loader):
            s = time.monotonic()

            x = x.to(self.device)
            y = y.to(self.device)
            y_pred = self.model(x)
            loss = self.criterion(p_target=y, p_estimate=y_pred)
            self.optimizer.zero_grad()

            loss.backward()

            self.optimizer.step()
            train_losses.update(loss.item(), x.size(0))


            e = time.monotonic()
            total_loss += loss
            if not (idx + 1) % self.print_freq:
                
                log_time = self.print_freq * (e - s)
                eta = ((total_iter - idx) * log_time) / 60.0
                logger.info(f"iter #[{idx}/{total_iter}] " f"loss = {total_loss/self.print_freq:.3f} " f"time = {log_time:.2f} " f"eta = {eta:.2f}")
                total_loss = 0

        return train_losses.avg

    def validate(self):
        self.model.eval()
        validate_losses = AverageMeter()

        with torch.no_grad():
            for idx, (x, y) in enumerate(self.val_loader):
                x = x.to(self.device)
                y = y.to(self.device)
                y_pred = self.model(x)
                loss = self.criterion(p_target=y, p_estimate=y_pred)
                validate_losses.update(loss.item(), x.size(0))

        return validate_losses.avg

    def binary_classify(self):
        self.model.eval()
        binary_losses = AverageMeter()
        
        with torch.no_grad():
            for (x, y) in tqdm(self.test_loader):
                x = x.to(self.device)
                y = y.to(self.device)
                y_pred = self.model(x)
                loss = self.binary_loss(p_target=y, p_estimate=y_pred)
                binary_losses.update(loss, x.size(0))
        return binary_losses.avg
        
        
    def binary_classify5(self):
        self.model.eval()
        binary_losses = AverageMeter()
        
        with torch.no_grad():
            for (x, y) in tqdm(self.test_loader):
                x = x.to(self.device)
                y = y.to(self.device)
                y_pred = self.model(x)
                loss = self.binary_loss5(p_target=y, p_estimate=y_pred)
                binary_losses.update(loss, x.size(0))
        return binary_losses.avg

In [9]:
def train_model(
    path_to_save_csv: Path,
    path_to_images: Path,
    experiment_dir: Path,
    model_type: str,
    model_pth: Path,
    batch_size: int,
    num_workers: int,
    num_epoch: int,
    init_lr: float,
    drop_out: float,
    optimizer_type: str,
    seed: int,
):
    logger.info(f'''
    path_to_save_csv:{path_to_save_csv},
    path_to_images:{path_to_images},
    experiment_dir:{experiment_dir},
    model_type:{model_type},
    model_pth:{model_pth},
    batch_size:{batch_size},
    num_workers:{num_workers},
    num_epoch:{num_epoch},
    init_lr:{init_lr},
    drop_out:{drop_out},
    optimizer_type:{optimizer_type},
    seed:{seed},
    ''')

    logger.info("Train and validate model")
    set_up_seed(seed)
    trainer = Trainer(
        path_to_save_csv=path_to_save_csv,
        path_to_images=path_to_images,
        experiment_dir=experiment_dir,
        model_type=model_type,
        model_pth=model_pth,
        batch_size=batch_size,
        num_workers=num_workers,
        num_epoch=num_epoch,
        init_lr=init_lr,
        drop_out=drop_out,
        optimizer_type=optimizer_type,
    )
    trainer.train_model()
    logger.info("Done!")

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def get_mean_score(score):
    buckets = torch.arange(1, 11).to(device)
    mu = torch.sum(buckets * score, 1)
    return mu

In [11]:
import logging
import sys

logger = logging.getLogger()
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')


file_handler = logging.FileHandler('***.log', mode='w')
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)

logger.addHandler(file_handler)

In [None]:
import warnings
warnings.filterwarnings('ignore')
train_model(path_to_save_csv = ".../dataset_csv",
    path_to_images = "/.../image",
    experiment_dir = ".../result/",
    model_type = "mobilenet-v3-small",
    model_pth = "",
    batch_size = 128,
    num_workers = 4,
    num_epoch = 64,
    # init_lr = 0.0001,
    # init_lr = 0.000003,
    init_lr = 0.0001,
    # drop_out = 0.5,
    drop_out = 0.75,
    optimizer_type = "adam",
    seed = 9,)
