#### Importing required Libraries

In [3]:
import cv2
import os
from mtcnn import MTCNN
from IPython.display import display
from PIL import Image
from deepface import DeepFace
import numpy as np
import joblib
import pandas as pd
detector = MTCNN()

## Data Preprocessing
Preparing the data for training 

### Videos to images
Given the path of folder containing each candidate face video recording, a seperate folder will be created in which each folder will contain all the images of a candidate cut from the respective video

In [None]:
num_of_frames_to_extract = 100
data_path = r'C:\Users\Venkatesh Yeturi\Desktop\auto_attendance_python\Data_2'

In [49]:
def convert_videos_to_images(videos_folder_path, num_of_frames_to_extract):

    data_folder_path = os.path.dirname(videos_folder_path)
    faces_folder_path  = os.path.join(data_folder_path, 'images_dataset')

    if not os.path.exists(faces_folder_path):
        os.makedirs(faces_folder_path)

    for candidate_video_file in os.listdir(videos_folder_path):

        candidate_label = candidate_video_file[:-4]
        candidate_images_folder_path = os.path.join(faces_folder_path, candidate_label)

        if not os.path.exists(candidate_images_folder_path):
            os.makedirs(candidate_images_folder_path)
        
        candidate_video_file_path = os.path.join(videos_folder_path, candidate_video_file)

        cap = cv2.VideoCapture(candidate_video_file_path)

        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        step_size = total_frames // num_of_frames_to_extract
        frame_count = 1

        while frame_count <= num_of_frames_to_extract :
            ret , frame = cap.read()
            if not ret : 
                break
            
            frame_filename = os.path.join(candidate_images_folder_path, f"{frame_count}.jpg")
            cv2.imwrite(frame_filename, frame)

            frame_count += 1

            for _ in range(step_size-1):
                cap.read()

        cap.release()

### Images to cropped faces
Given the path of images folder , a seperate folder will be created for cropped faceso f each candidate performing face detection

In [None]:
images_directory_path = r'C:\Users\Venkatesh Yeturi\Desktop\auto_attendance_python\Data_2\images_dataset'

# save_cropped_images(images_directory_path)

In [54]:
def save_cropped_images(dataset_path):
    dataset_parent_directory_path = os.path.dirname(dataset_path)
    cropped_dataset_path = os.path.join(dataset_parent_directory_path,'cropped_faces_dataset')
    
    if not os.path.exists(cropped_dataset_path):
        os.makedirs(cropped_dataset_path)

    for rollnumber in os.listdir(dataset_path):
        rollnumber_path = os.path.join(dataset_path,rollnumber)
        new_rollnumber_path = os.path.join(cropped_dataset_path,rollnumber)
        if not os.path.exists(new_rollnumber_path):
            os.makedirs(new_rollnumber_path)
        print(rollnumber)
        for file_name in os.listdir(rollnumber_path):
            each_file_path = os.path.join(rollnumber_path,file_name)
            each_image = cv2.imread(each_file_path)
            faces = detector.detect_faces(each_image)
            if len(faces)==0:
                continue
            face = faces[0]
            x,y,w,h = face['box']
            face_image = each_image[y:y+h,x:x+w]
            image_saving_path = os.path.join(new_rollnumber_path,file_name)
            cv2.imwrite(image_saving_path,face_image)


In [57]:
# no.of cropped faces present for each candidate 
cropped_faces_folder_path = r"C:\Users\Venkatesh Yeturi\Desktop\auto_attendance_python\Data_2\cropped_faces_dataset"
print("No.of cropped faces available from each candidate : ")
total = 0
for candidate in os.listdir(cropped_faces_folder_path):
    path = os.path.join(cropped_faces_folder_path, candidate)
    print(f"{candidate :<30} : {len(os.listdir(path))}")
    total += len(os.listdir(path))
print("=======================================================================")
print(f"{'total' :<30} : {total}")


No.of cropped faces available from each candidate : 
226102108_uma                  : 92
234102301_Amit_Kumar_Das       : 78
234102302_Amit_Nagora          : 82
234102303_AsutoshSahu          : 26
234102304_Dillip               : 97
234102305_Durgesh_Yadav        : 90
234102307_Muralidhar           : 28
234102308_Rajavelu E           : 100
234102309_Yash_Natholia        : 76
234102310_Adrija               : 100
234102311_Bhavesh_Joshi        : 93
234102312_anvesh               : 85
234102314_jithu_j              : 83
234102315_umapathi             : 80
234102316_Pritam_Kumar_Singh   : 72
234102317_Soumya               : 100
234102318_PoojaKumari          : 100
236102004_Himashri_Deka        : 100
236102005_Manash_Gogoi         : 48
236150003_Kartikay             : 92
236302003_PRINCE_KUMAR         : 89
total                          : 1711


### Faces to Embeddings
Given a folder path of cropped faces of each candidate, a dataframe and then CSV file will be created with the embeddings and respective student identities as labels

In [59]:
def generate_embeddings_and_labels(data_path):

    embeddings = []
    labels = []
    
    for each_roll in os.listdir(data_path):
    
        roll_path = os.path.join(data_path,each_roll)
        person_embeddings, person_labels = get_embeddings_of_person(roll_path)
        embeddings.append(person_embeddings)
        labels.append(person_labels)
        print("Done with : ",each_roll)
    
        
    embeddings = np.concatenate(embeddings, axis=0)
    labels = np.concatenate(labels, axis=0)

    return embeddings, labels

    
def get_embeddings_of_person(roll_path):
    embeddings = []
    labels = []
    for file in os.listdir(roll_path):
        file_path = os.path.join(roll_path,file)
        embedding = DeepFace.represent(file_path, model_name='Facenet',enforce_detection=False)
        embeddings.append(embedding[0]['embedding'])
    roll_number = os.path.basename(roll_path)
    labels = [roll_number]*len(os.listdir(roll_path))  
    return embeddings,labels
    


In [60]:
data_path = r'Data_2/cropped_faces_dataset'
embeddings, labels = generate_embeddings_and_labels(data_path)   

Done with :  226102108_uma
Done with :  234102301_Amit_Kumar_Das


In [None]:
len(embeddings), len(labels)

In [None]:
data = {}
for i in range(128):
    col_name = f'e{i+1}'
    data[col_name] = [emb[i] for emb in embeddings]

data['Label'] = labels
embeddings_df = pd.DataFrame(data)
embeddings_df.to_csv("embeddings_data.csv", index = False)

In [None]:
embeddings_df = pd.read_csv(r"C:\Users\Venkatesh Yeturi\Desktop\auto_attendance_python\embeddings_data.csv")
embeddings_df.head()


In [None]:
embeddings_df.shape

## Model Training and Testing

### Setting up data for training and testing

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [None]:
X = embeddings_df.iloc[:,:-1]
y = embeddings_df.iloc[:,-1]
X.shape, y.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 42)

X_train , y_train, X_test , y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

### Classification models and their Performance

#### RandomForest

In [39]:
from sklearn.ensemble import RandomForestClassifier

In [40]:
pre_trained_model_path = 'pre_trained_models/RandomForest_model.joblib'
if not os.path.exists(pre_trained_model_path):
    model = RandomForestClassifier(n_estimators=100, random_state = 42)
    model.fit(X_train, y_train)
    joblib.dump(model,'RandomForest_model.joblib')

else:
    model = joblib.load(pre_trained_model_path)

In [41]:
y_pred = model.predict(X_test)
accuracy_random_forest = accuracy_score(y_test, y_pred)
print("Accuracy with Random Forest Classifier is : ", accuracy_random_forest)
print("==================================================================================================================")
report = classification_report(y_test, y_pred)
print("Classification Report with Random Forest Classifier is : \n")
print(report)

Accuracy with Random Forest Classifier is :  0.9504373177842566
Classification Report with Random Forest Classifier is : 

                              precision    recall  f1-score   support

               226102108_uma       1.00      0.94      0.97        18
    234102301_Amit_Kumar_Das       1.00      1.00      1.00        13
       234102302_Amit_Nagora       0.86      0.95      0.90        19
       234102303_AsutoshSahu       1.00      0.75      0.86         8
            234102304_Dillip       0.88      1.00      0.94        22
     234102305_Durgesh_Yadav       1.00      1.00      1.00        17
        234102307_Muralidhar       1.00      0.62      0.77         8
        234102308_Rajavelu E       0.96      1.00      0.98        22
     234102309_Yash_Natholia       0.92      0.92      0.92        13
            234102310_Adrija       1.00      1.00      1.00        22
     234102311_Bhavesh_Joshi       1.00      1.00      1.00        15
            234102312_anvesh       1

#### Logistic Regression

In [42]:
from sklearn.linear_model import LogisticRegression

In [43]:
pre_trained_model_path = 'pre_trained_models\LogisticRegression_model'

if not os.path.exists(pre_trained_model_path):
    model = LogisticRegression(multi_class='multinomial',solver='lbfgs',random_state=42)
    model.fit(X_train, y_train)
    joblib.dump(model,'LogisticRegression_model')

else:
    model = joblib.load(pre_trained_model_path)

In [44]:
y_pred = model.predict(X_test)
accuracy_logistic_regression = accuracy_score(y_test, y_pred)
print("Accuracy with Logistic Regression Classifier is : ", accuracy_logistic_regression)
print("==================================================================================================================")
report = classification_report(y_test, y_pred)
print("Classification Report with Logistic Regression is : \n")
print(report)

Accuracy with Logistic Regression Classifier is :  0.9475218658892128
Classification Report with Logistic Regression is : 

                              precision    recall  f1-score   support

               226102108_uma       1.00      0.94      0.97        18
    234102301_Amit_Kumar_Das       1.00      0.92      0.96        13
       234102302_Amit_Nagora       1.00      0.95      0.97        19
       234102303_AsutoshSahu       1.00      0.88      0.93         8
            234102304_Dillip       0.88      1.00      0.94        22
     234102305_Durgesh_Yadav       1.00      1.00      1.00        17
        234102307_Muralidhar       0.80      0.50      0.62         8
        234102308_Rajavelu E       1.00      1.00      1.00        22
     234102309_Yash_Natholia       0.80      0.92      0.86        13
            234102310_Adrija       1.00      1.00      1.00        22
     234102311_Bhavesh_Joshi       1.00      1.00      1.00        15
            234102312_anvesh       

#### K Nearest Neighbours

In [45]:
from sklearn.neighbors import KNeighborsClassifier

In [46]:
pre_trained_model_path = 'pre_trained_models\KNN_model.joblib'

if not os.path.exists(pre_trained_model_path):
    model = KNeighborsClassifier(n_neighbors = 5)
    model.fit(X_train, y_train)
    joblib.dump(model,'KNN_model.joblib')

else:
    model = joblib.load(pre_trained_model_path)

In [47]:
y_pred = model.predict(X_test)
accuracy_KNN = accuracy_score(y_test, y_pred)
print("Accuracy with KNN classifier is : ", accuracy_KNN)
print("==================================================================================================================")
report = classification_report(y_test, y_pred)
print("Classification Report with KNN is : \n")
print(report)

Accuracy with KNN classifier is :  0.956268221574344
Classification Report with KNN is : 

                              precision    recall  f1-score   support

               226102108_uma       0.90      1.00      0.95        18
    234102301_Amit_Kumar_Das       1.00      1.00      1.00        13
       234102302_Amit_Nagora       0.90      1.00      0.95        19
       234102303_AsutoshSahu       1.00      0.75      0.86         8
            234102304_Dillip       0.85      1.00      0.92        22
     234102305_Durgesh_Yadav       0.85      1.00      0.92        17
        234102307_Muralidhar       1.00      0.75      0.86         8
        234102308_Rajavelu E       1.00      0.95      0.98        22
     234102309_Yash_Natholia       1.00      0.92      0.96        13
            234102310_Adrija       1.00      1.00      1.00        22
     234102311_Bhavesh_Joshi       1.00      1.00      1.00        15
            234102312_anvesh       1.00      0.95      0.97        2

### Comparision of performance of models

## Prediction functionality for group of faces

### Give the image and get the roll numbers

In [67]:
def get_predicted_roll_numbers(model, test_image_path):
    test_image = cv2.imread(test_image_path)
    faces = detector.detect_faces(test_image)
    parent_folder_path = os.path.dirname(test_image_path)
    str = os.path.basename(test_image_path) + "_faces"
    new_faces_folder = os.path.join(parent_folder_path,str)
    if not os.path.exists(new_faces_folder):
        os.makedirs(new_faces_folder)
    roll_numbers = []
    for i,face in enumerate(faces):
        x,y,w,h = face['box']
        face_image = test_image[y:y+h,x:x+w]
        image_saving_path = os.path.join(new_faces_folder,f'face_{i+1}.jpg')
        cv2.imwrite(image_saving_path,face_image)
        face_embedding = DeepFace.represent(image_saving_path,model_name='Facenet',enforce_detection=False)[0]['embedding']
        roll_number = model.predict([face_embedding])
        probabilities = model.predict_proba([face_embedding])
        mean_prob = probabilities[0].mean()
        max_prob = probabilities[0].max()
        if(max_prob<=3*mean_prob):
            continue
        roll_numbers.append(roll_number)
    return roll_numbers

In [68]:
test_image_1 = r"C:\Users\Venkatesh Yeturi\Desktop\auto_attendance_python\Data_2\group_images_for_testing\test_image_1.jpg"

model = joblib.load(r'C:\Users\Venkatesh Yeturi\Desktop\auto_attendance_python\pre_trained_models\RandomForest_model.joblib')

for roll in get_predicted_roll_numbers(model, test_image_1):
    print(roll)

['234102317_Soumya']
['234102316_Pritam_Kumar_Singh']
['234102318_PoojaKumari']
['236102005_Manash_Gogoi']
['234102301_Amit_Kumar_Das']
['234102305_Durgesh_Yadav']
['234102304_Dillip']
['234102310_Adrija']
['234102314_jithu_j']
['234102312_anvesh']
['234102311_Bhavesh_Joshi']
['234102307_Muralidhar']
['234102315_umapathi']
['234102315_umapathi']
['234102309_Yash_Natholia']
['234102302_Amit_Nagora']


In [1]:
import os
list_of_names = []
for file in os.listdir('Data_2\images_dataset'):
    list_of_names.append(file)
print(list_of_names)

['226102108_uma', '234102301_Amit_Kumar_Das', '234102302_Amit_Nagora', '234102303_AsutoshSahu', '234102304_Dillip', '234102305_Durgesh_Yadav', '234102307_Muralidhar', '234102308_Rajavelu E', '234102309_Yash_Natholia', '234102310_Adrija', '234102311_Bhavesh_Joshi', '234102312_anvesh', '234102314_jithu_j', '234102315_umapathi', '234102316_Pritam_Kumar_Singh', '234102317_Soumya', '234102318_PoojaKumari', '236102004_Himashri_Deka', '236102005_Manash_Gogoi', '236150003_Kartikay', '236302003_PRINCE_KUMAR']


In [6]:
roll_numbers = [num[:9] for num in list_of_names]
names = [name[10:] for name in list_of_names]

In [7]:
roll_numbers

['226102108',
 '234102301',
 '234102302',
 '234102303',
 '234102304',
 '234102305',
 '234102307',
 '234102308',
 '234102309',
 '234102310',
 '234102311',
 '234102312',
 '234102314',
 '234102315',
 '234102316',
 '234102317',
 '234102318',
 '236102004',
 '236102005',
 '236150003',
 '236302003']

In [8]:
names

['uma',
 'Amit_Kumar_Das',
 'Amit_Nagora',
 'AsutoshSahu',
 'Dillip',
 'Durgesh_Yadav',
 'Muralidhar',
 'Rajavelu E',
 'Yash_Natholia',
 'Adrija',
 'Bhavesh_Joshi',
 'anvesh',
 'jithu_j',
 'umapathi',
 'Pritam_Kumar_Singh',
 'Soumya',
 'PoojaKumari',
 'Himashri_Deka',
 'Manash_Gogoi',
 'Kartikay',
 'PRINCE_KUMAR']

In [9]:
complete_names = [(roll_numbers[i], names[i]) for i in range(len(roll_numbers))]
complete_names

[('226102108', 'uma'),
 ('234102301', 'Amit_Kumar_Das'),
 ('234102302', 'Amit_Nagora'),
 ('234102303', 'AsutoshSahu'),
 ('234102304', 'Dillip'),
 ('234102305', 'Durgesh_Yadav'),
 ('234102307', 'Muralidhar'),
 ('234102308', 'Rajavelu E'),
 ('234102309', 'Yash_Natholia'),
 ('234102310', 'Adrija'),
 ('234102311', 'Bhavesh_Joshi'),
 ('234102312', 'anvesh'),
 ('234102314', 'jithu_j'),
 ('234102315', 'umapathi'),
 ('234102316', 'Pritam_Kumar_Singh'),
 ('234102317', 'Soumya'),
 ('234102318', 'PoojaKumari'),
 ('236102004', 'Himashri_Deka'),
 ('236102005', 'Manash_Gogoi'),
 ('236150003', 'Kartikay'),
 ('236302003', 'PRINCE_KUMAR')]