# 01. Setup and Load Data

### 1.1 Dependencies Install and Setup

In [1]:
import tensorflow as tf
import os
import cv2
import imghdr
import numpy as np
from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.metrics import Precision, Recall, BinaryAccuracy
from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix
import seaborn as sns
import pandas as pd
from mtcnn.mtcnn import MTCNN

2024-02-01 18:18:50.746358: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  import imghdr


### 1.2 Target directories and Model 1 load

In [64]:
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
model_folder = os.path.join(parent_directory, "model")
model_path = os.path.join(model_folder, "parfois_product_feature_classification.h5")

In [65]:
downloaded_images_path = "downloaded_images"  
images_to_check = set(os.listdir(downloaded_images_path))
images_to_check = pd.DataFrame(images_to_check)
images_to_check.rename(columns={0: "image_file_name"}, inplace=True)
images_to_check["model_1_images_predictions"] = np.nan

##### Load Parfois Model 1, responsible for the first check 

In [3]:
parfois_model_1 = load_model(model_path)

# 02. Model 1 Run

### 2.1 Model 1 Run in function update_predictions

In [8]:
parent_directory = os.getcwd()

def update_predictions(row):
    img_file_name = row["image_file_name"]
    img_path = os.path.join(parent_directory, "downloaded_images", img_file_name)
    img_to_check = cv2.imread(img_path)

    if img_to_check is not None:
        resize = tf.image.resize(np.expand_dims(img_to_check, 0), (256, 256))[0]
        prediction = parfois_model_1.predict(np.expand_dims(resize / 255, 0))

        if prediction > 0.5:
            return 1
        else:
            return 0
    else:
        print(f"{img_file_name}: Unable to read the image")
        return None 

images_to_check["model_1_images_predictions"] = images_to_check.apply(update_predictions, axis=1)



##### Parfois Model 1 takes bout 14/15min run on 7859 images

In [13]:
images_to_check

Unnamed: 0,image_file_name,model_1_images_predictions
0,212678_GD_1yf.jpg,0
1,216536_GD_1yf.jpg,0
2,214681_KK_1yf.jpg,0
3,214806_GD_1yf.jpg,0
4,215568_EC_1y.jpg,1
...,...,...
7854,215119_BL_7y.jpg,0
7855,214889_BK_5yf.jpg,0
7856,215047_GN_2y.jpg,1
7857,215484_BM_4y.jpg,0


# 03. Model 2 Run

### 3.1 Model 1 Run in function update_predictions

In [19]:
images_to_check["model_2_images_predictions"] = np.nan

In [16]:
parfois_model_2 = MTCNN(min_face_size=30, 
                               steps_threshold=[0.7, 0.8, 0.8])

##### Parfois Model 2 is a pre-trained model for face detection. By targeting the keypoints we can get prediction only for images that have a right eye, right mouth or left eye, left mouth. This are the min features for a full recognition human model by Parfois

In [33]:
def model_predictions_2(row):
    img_file_name = row["image_file_name"]
    img_path = os.path.join(downloaded_images_path, img_file_name)
    img_to_check = cv2.imread(img_path)
    
    if img_to_check is not None:
        detected_faces = parfois_model_2.detect_faces(img_to_check)
        
        for face in detected_faces:
            if "keypoints" in face and "left_eye" in face["keypoints"] and "right_eye" in face["keypoints"] and "mouth_left" in face["keypoints"] and "mouth_right" in face["keypoints"]:
                return 1 
            if "keypoints" in face and "left_eye" in face["keypoints"] and "mouth_left" in face["keypoints"]:
                return 1  
            if "keypoints" in face and "right_eye" in face["keypoints"] and "mouth_right" in face["keypoints"]:
                return 1  
        return 0
    else:
        print(f"{img_file_name}: Unable to read the image")
        return -1

images_to_check["model_2_images_predictions"] = images_to_check.apply(model_predictions_2, axis=1)



In [34]:
images_to_check["model_2_images_predictions"].isna().sum()

0

##### Parfois Model 2 takes about 270min to run on the same 7859 images.

In [35]:
images_to_check

Unnamed: 0,image_file_name,model_1_images_predictions,model_2_images_predictions
0,212678_GD_1yf.jpg,0,0
1,216536_GD_1yf.jpg,0,0
2,214681_KK_1yf.jpg,0,0
3,214806_GD_1yf.jpg,0,0
4,215568_EC_1y.jpg,1,1
...,...,...,...
7854,215119_BL_7y.jpg,0,0
7855,214889_BK_5yf.jpg,0,0
7856,215047_GN_2y.jpg,1,0
7857,215484_BM_4y.jpg,0,0


In [None]:
images_to_check.to_csv("images_to_check", index=False)

# 04. Update the product ref. xlsx file on the product_ref_call

### 4.1 Data cleaning for the output data frame 

In [55]:
images_to_check = pd.read_csv("images_to_check")
images_to_check = images_to_check.sort_values(by="image_file_name").reset_index(drop=True)

In [67]:
images_to_check["image_file_name"] = images_to_check["image_file_name"].apply(lambda x: "_".join(x.split("_")[:2]))

In [57]:
images_to_check.loc[(images_to_check["model_1_images_predictions"] == 0) & (images_to_check["model_2_images_predictions"] == 0), "parfois_app_output"] = "Product"
images_to_check.loc[(images_to_check["model_1_images_predictions"] == 1) & (images_to_check["model_2_images_predictions"] == 0), "parfois_app_output"] = "No_Recognition"
images_to_check.loc[(images_to_check["model_1_images_predictions"] == 1) & (images_to_check["model_2_images_predictions"] == 1), "parfois_app_output"] = "Recognition"
images_to_check.loc[(images_to_check["model_1_images_predictions"] == 0) & (images_to_check["model_2_images_predictions"] == 1), "parfois_app_output"] = "Recognition"

##### Function run to keep only the final answer for every ref., meaning that if there is any picture ref. with human model or partial human model, there was a human model involved.

In [58]:
def process_output(group):
    recognition_row = group[group["parfois_app_output"] == "Recognition"]
    no_recognition_row = group[group["parfois_app_output"] == "No_Recognition"]
    product_row = group[group["parfois_app_output"] == "Product"]

    if not recognition_row.empty:
        return group.loc[[recognition_row.index[0]]]

    elif not no_recognition_row.empty:
        return group.loc[[no_recognition_row.index[0]]]

    elif not product_row.empty:
        return group.loc[[product_row.index[0]]]


##### This is the DF we are going to use to place the results on the product_ref_call xlsx file.

In [62]:
results_df = images_to_check.groupby("image_file_name").apply(process_output).reset_index(drop=True)

In [71]:
results_df

Unnamed: 0,image_file_name,model_1_images_predictions,model_2_images_predictions,parfois_app_output
0,119774_WT,0,0,Product
1,137523_SV,1,0,No_Recognition
2,138911_GD,0,0,Product
3,140486_HM,0,0,Product
4,140486_NV,0,0,Product
...,...,...,...,...
2190,219256_PK,0,0,Product
2191,219257_PK,0,0,Product
2192,219258_PK,0,0,Product
2193,219260_PK,0,0,Product


In [73]:
unique_ref = results_df["parfois_app_output"].value_counts()
unique_ref

Product           1477
No_Recognition     431
Recognition        287
Name: parfois_app_output, dtype: int64

### 4.2 Load product_ref xlsx file and create an upgraded one with the column parfois_app_output

In [74]:
product_ref_path = os.path.join("product_ref_call", "product_ref.xlsx")
product_ref_df = pd.read_excel(product_ref_path)

In [76]:
product_ref_df

Unnamed: 0,season,week,prod_ref,Status,Última Etapa,Category,Family,Theme,Store Date Planned,Store Date Final,...,Data Aprovação modelo s/ reconhecimento,Data Repetição fotografia,TXTS Etapa 9,Data Repetição modelo,TXTS Etapa 10,Data Aprovação repetição,TXTS Etapa 11,Date Published,# Fotos,Number of Units
0,23 Fall/Winter,2023_45,214658_GY,Online: After Store Date,Published,Winter Textiles,Blanket Scarves,STARLIGHT,2023-11-01,2023-11-01,...,NaT,NaT,0.0,NaT,0.0,NaT,0.0,2023-11-15,6.0,Singular
1,23 Fall/Winter,2023_45,214658_LM,Online: After Store Date,Published,Winter Textiles,Blanket Scarves,STARLIGHT,2023-11-01,2023-11-01,...,NaT,NaT,0.0,NaT,0.0,NaT,0.0,2023-11-15,2.0,Singular
2,23 Fall/Winter,2023_45,214658_NV,Online: After Store Date,Published,Winter Textiles,Blanket Scarves,STARLIGHT,2023-11-01,2023-11-01,...,NaT,NaT,0.0,NaT,0.0,NaT,0.0,2023-11-15,2.0,Singular
3,23 Fall/Winter,2023_45,214687_DM,Online: OK,Published,Winter Textiles,Blanket Scarves,STARLIGHT,2023-11-01,2023-11-01,...,NaT,NaT,0.0,NaT,0.0,NaT,0.0,2023-10-28,2.0,Singular
4,23 Fall/Winter,2023_45,214688_GN,Online: After Store Date,Published,Winter Textiles,Blanket Scarves,KUSAMA,2023-11-01,2023-11-01,...,NaT,NaT,0.0,NaT,0.0,NaT,0.0,2023-11-15,5.0,Singular
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2920,24 Spring/Summer,2024_4,219371_BK,Online: OK,Published,Party,Hand,CUPID,2024-01-24,2024-01-24,...,NaT,NaT,0.0,NaT,0.0,NaT,0.0,2024-01-23,11.0,Singular
2921,24 Spring/Summer,2024_5,219130_GD,Online: OK,Published,Silver,Other Jewellery,CHARM COLLECTION,2024-01-31,2024-01-31,...,NaT,NaT,0.0,NaT,0.0,NaT,0.0,2024-01-23,2.0,Singular
2922,24 Spring/Summer,2024_5,219141_GD,Online: OK,Published,Silver,Other Jewellery,CHARM COLLECTION,2024-01-31,2024-01-31,...,NaT,NaT,0.0,NaT,0.0,NaT,0.0,2024-01-23,2.0,Singular
2923,,,,,,,,,NaT,NaT,...,NaT,NaT,,NaT,,NaT,,NaT,,


In [78]:
product_ref_df_updated = pd.merge(product_ref_df, results_df[["image_file_name", "parfois_app_output"]], 
                     how="left", left_on="prod_ref", 
                     right_on="image_file_name")

In [79]:
product_ref_df_updated

Unnamed: 0,season,week,prod_ref,Status,Última Etapa,Category,Family,Theme,Store Date Planned,Store Date Final,...,TXTS Etapa 9,Data Repetição modelo,TXTS Etapa 10,Data Aprovação repetição,TXTS Etapa 11,Date Published,# Fotos,Number of Units,image_file_name,parfois_app_output
0,23 Fall/Winter,2023_45,214658_GY,Online: After Store Date,Published,Winter Textiles,Blanket Scarves,STARLIGHT,2023-11-01,2023-11-01,...,0.0,NaT,0.0,NaT,0.0,2023-11-15,6.0,Singular,,
1,23 Fall/Winter,2023_45,214658_LM,Online: After Store Date,Published,Winter Textiles,Blanket Scarves,STARLIGHT,2023-11-01,2023-11-01,...,0.0,NaT,0.0,NaT,0.0,2023-11-15,2.0,Singular,214658_LM,Product
2,23 Fall/Winter,2023_45,214658_NV,Online: After Store Date,Published,Winter Textiles,Blanket Scarves,STARLIGHT,2023-11-01,2023-11-01,...,0.0,NaT,0.0,NaT,0.0,2023-11-15,2.0,Singular,,
3,23 Fall/Winter,2023_45,214687_DM,Online: OK,Published,Winter Textiles,Blanket Scarves,STARLIGHT,2023-11-01,2023-11-01,...,0.0,NaT,0.0,NaT,0.0,2023-10-28,2.0,Singular,214687_DM,Product
4,23 Fall/Winter,2023_45,214688_GN,Online: After Store Date,Published,Winter Textiles,Blanket Scarves,KUSAMA,2023-11-01,2023-11-01,...,0.0,NaT,0.0,NaT,0.0,2023-11-15,5.0,Singular,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2920,24 Spring/Summer,2024_4,219371_BK,Online: OK,Published,Party,Hand,CUPID,2024-01-24,2024-01-24,...,0.0,NaT,0.0,NaT,0.0,2024-01-23,11.0,Singular,,
2921,24 Spring/Summer,2024_5,219130_GD,Online: OK,Published,Silver,Other Jewellery,CHARM COLLECTION,2024-01-31,2024-01-31,...,0.0,NaT,0.0,NaT,0.0,2024-01-23,2.0,Singular,,
2922,24 Spring/Summer,2024_5,219141_GD,Online: OK,Published,Silver,Other Jewellery,CHARM COLLECTION,2024-01-31,2024-01-31,...,0.0,NaT,0.0,NaT,0.0,2024-01-23,2.0,Singular,,
2923,,,,,,,,,NaT,NaT,...,,NaT,,NaT,,NaT,,,,


In [80]:
unique_ref = product_ref_df_updated["parfois_app_output"].value_counts()
unique_ref

Product           1437
No_Recognition     430
Recognition        286
Name: parfois_app_output, dtype: int64

### 4.3 Output the df to a xlsx file to use on any BI tool

In [82]:
product_ref_df_updated.to_excel("product_ref_updated.xlsx", index=False)