In [17]:
import torch 
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
import torchvision
import torchvision.transforms as transforms

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split

import torch.utils.tensorboard as pttb
from torch.utils.tensorboard import SummaryWriter

from typing import Tuple,List,Any


In [18]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
%load_ext tensorboard

In [19]:
image_data_path = "/content/drive/MyDrive/NoseNet-DataSet/images/"
label_data_path = "/content/drive/MyDrive/NoseNet-DataSet/labels/"

labels_file_name = "nosenet.csv"

In [48]:
df = pd.read_csv(label_data_path + labels_file_name)

In [49]:
df = df.set_axis(['PART','X', 'Y', 'IMG-PATH','WIDTH','HEIGHT'], axis=1, inplace=False)

In [50]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048 entries, 0 to 1047
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   PART      1048 non-null   object
 1   X         1048 non-null   int64 
 2   Y         1048 non-null   int64 
 3   IMG-PATH  1048 non-null   object
 4   WIDTH     1048 non-null   int64 
 5   HEIGHT    1048 non-null   int64 
dtypes: int64(4), object(2)
memory usage: 49.2+ KB


In [51]:
# https://stackoverflow.com/questions/34962104/how-can-i-use-the-apply-function-for-a-single-column
# https://youtu.be/smPLY_5gVv4

def reshape_annotation(x_annotation,y_annotation,width,height):
  
    resized_x = 300
    resized_y = 300

    x_scale = (resized_x / width)
    y_scale = (resized_y / height)

    X = int(np.round(x_annotation * x_scale))
    Y = int(np.round(y_annotation * y_scale))

    return X,Y


def reshape_annotation_x(x_annotation,width):

    resized_x = 300
    
    for w in range(len(width)):
      x_scale = (resized_x / width[w])
      X = int(np.round(x_annotation * x_scale))

      return X

def reshape_annotation_y(y_annotation,height):

    resized_y = 300

    for h in range(len(height)):
      y_scale = (resized_y / height[h])
      Y = int(np.round(y_annotation * y_scale))

      return Y


In [52]:
df.head(5)

Unnamed: 0,PART,X,Y,IMG-PATH,WIDTH,HEIGHT
0,nose,258,432,1 (1).jpg,500,750
1,nose,222,283,1 (1).png,429,527
2,nose,580,487,1 (10).jpg,1066,1332
3,nose,441,577,1 (10).png,1067,1600
4,nose,129,206,1 (100).jpg,236,354


In [53]:
df["X"] = df["X"].apply(reshape_annotation_x,args=[list(df["WIDTH"])])

In [54]:
df["Y"] = df["Y"].apply(reshape_annotation_x,args=[list(df["HEIGHT"])])

In [55]:
df.head(5)

Unnamed: 0,PART,X,Y,IMG-PATH,WIDTH,HEIGHT
0,nose,103,259,1 (1).jpg,500,750
1,nose,89,170,1 (1).png,429,527
2,nose,232,292,1 (10).jpg,1066,1332
3,nose,176,346,1 (10).png,1067,1600
4,nose,52,124,1 (100).jpg,236,354


In [56]:
def draw_annotation(img_path:str,coords:Tuple[int,int],isReshape:bool) -> Any:
    
    image = cv2.imread(img_path)
    if isReshape:
      image = image = cv2.resize(image,(300,300))
    
    image = cv2.circle(image, (coords[0],coords[1]), radius=5, color=(0, 0, 255), thickness=-1)
    cv2.imwrite("/content/img.jpg",image)


In [58]:
index = np.random.randint(0,1040)

img_path = image_data_path + df["IMG-PATH"][index]
coords = (df["X"][index],df["Y"][index])
print(index,coords)

draw_annotation(img_path,coords,True)

489 (133, 244)


In [None]:

# 1.refer these both links
# https://towardsdatascience.com/facial-keypoints-detection-image-and-keypoints-augmentation-6c2ea824a59
# https://www.kaggle.com/dhyeydabhi/facial-keypoints-fine-tuning-on-resnet50

# 2
# https://medium.com/analytics-vidhya/facial-keypoint-detection-with-pytorch-e9f94ab321a2
# https://github.com/nalbert9/Facial-Keypoint-Detection/blob/master/data_load.py

# 3
# https://www.coursera.org/projects/facial-keypoint-detection

# 4
# https://www.kaggle.com/kakash/cnn-pytorch/notebook


In [None]:
X = df["IMG-PATH"].to_numpy().reshape(-1,1)
y = df[['X', 'Y']].to_numpy()

In [None]:
print(X.shape,y.shape)

(1048, 1) (1048, 2)


In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
print(X_train.shape,y_train.shape)

(838, 1) (838, 2)


In [None]:
print(X_test.shape,y_test.shape)

(210, 1) (210, 2)


In [None]:
class NoseNetDataSet(Dataset):

  def __init__(self,X,y,img_path,label_path,transform=None):
    self.x = X
    self.y = y
    self.img_path = img_path
    self.label_path = label_path
    self.transform = transform

  def __len__(self):
    return len(self.x)

  def __getitem__(self,index):
    img = Image.open(self.img_path + self.x[index][0])
    label = self.y[index]

    if self.transform != None:
      img = self.transform(img)
    
    return img,label

In [None]:
transformation = transforms.Compose([
      transforms.ToTensor(),
      transforms.Resize((300,300)),
      transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
      transforms.RandomAdjustSharpness(sharpness_factor=2),
      transforms.RandomAutocontrast()
])

train_dataset = NoseNetDataSet(X_train,y_train,image_data_path,label_data_path,transformation)
test_dataset = NoseNetDataSet(X_test,y_test,image_data_path,label_data_path,transformation)

train_dataloader = DataLoader(train_dataset,batch_size=100)
test_dataloader = DataLoader(test_dataset,batch_size=100)

In [None]:
input_batch,label_batch = next(iter(train_dataloader))

In [None]:
def makegrid(input_batch):
  grid = torchvision.utils.make_grid(input_batch, nrow=40, pad_value=1)
  torchvision.utils.save_image(grid, 'inputs_batch_preview.png')

In [None]:
makegrid(input_batch)