In [1]:
import pandas as pd
df_path = '../df.pqt'
df = pd.read_parquet(df_path)
df

Unnamed: 0,fileName,gender,country,rate,voices,img
0,001154acf0a14bff8512297114d35e89_m_unknown_7.0...,m,unknown,7.0,100.0,b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...
1,00aeac4d739a4e5ca814a2136e93e2d4_m_unknown_8.0...,m,unknown,8.0,100.0,b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...
2,0118e6a1260a4a55b0fbab0184b6356b_m_unknown_7.8...,m,unknown,7.8,100.0,b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...
3,020981d71e5e4da8b34f39877298f569_m_unknown_7.6...,m,unknown,7.6,100.0,b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...
4,02532b0120c7434fa3d5ff5a1e178ef9_m_unknown_8.0...,m,unknown,8.0,100.0,b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\...
...,...,...,...,...,...,...
50753,fff6229cd5d2b_m_United Kingdom_6.3_23.png,m,United Kingdom,6.3,23.0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
50754,fff66b6910334_w_Canada_6.2_18.png,w,Canada,6.2,18.0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
50755,fff97f4533036_m_United Kingdom_5.9_193.png,m,United Kingdom,5.9,193.0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
50756,fffd72003c4bc_w_United States_5_1090.png,w,United States,5.0,1090.0,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...


In [2]:
import sys
sys.path.append('../')
import os
import torch
import torchvision.transforms as transforms
from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights
from io import BytesIO
from PIL import Image
from torch.utils.data import DataLoader
from tqdm import tqdm
from utils import SquarePad, NotFoundPerson, get_person_rect, test_transform

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:64'
torch.cuda.empty_cache()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 4
treshold = 0.66
person_index = RetinaNet_ResNet50_FPN_V2_Weights.COCO_V1.meta["categories"].count('person')

In [4]:
model = retinanet_resnet50_fpn_v2(weights=RetinaNet_ResNet50_FPN_V2_Weights.COCO_V1).to(device).eval()

In [5]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self, df: pd.DataFrame, transforms: transforms.Compose = None):
        self.transforms = transforms
        self.df = df

    def __getitem__(self, idx):
        img = Image.open(BytesIO(self.df.loc[idx, 'img'])).convert('RGB')
        if self.transforms is not None:
            img = self.transforms(img)
        return img

    def __len__(self):
        return len(self.df)

In [6]:
person_transform = transforms.Compose([
    SquarePad(),
    transforms.Resize((256, 256)),
    RetinaNet_ResNet50_FPN_V2_Weights.COCO_V1.transforms()
])
dataset = Dataset(df, person_transform)
dataloader = DataLoader(dataset, shuffle=False, batch_size=batch_size)

In [7]:
person_data = []
with torch.no_grad():
    for x in tqdm(dataloader):
        x = x.to(device)
        preds = model(x)
        for pred in preds:
            pred['boxes'] = pred['boxes'].detach().cpu().to(torch.int64)
            pred['scores'] = pred['scores'].detach().cpu()
            pred['labels'] = pred['labels'].detach().cpu()
        person_data += preds


100%|██████████| 12690/12690 [2:33:27<00:00,  1.38it/s] 


In [8]:
for i in tqdm(range(len(dataset))):
    try:
        io = BytesIO()
        img = transforms.functional.to_pil_image(
            test_transform(
                transforms
                .functional
                .to_pil_image(dataset[i])
                .crop(
                    get_person_rect(
                        person_data[i],
                        person_index
                    )
                )
            )
        )
        img.save(io, format='png')
        df.loc[i, 'img'] = io.getvalue()
    except NotFoundPerson:
        df.drop(i, inplace=True)

100%|██████████| 50758/50758 [1:06:05<00:00, 12.80it/s]


In [9]:
df.reset_index(drop=True).to_parquet(df_path)