In [1]:
from LoadingBMWDataset import ObjectDetectionDataset
from sklearn.model_selection import train_test_split
import torch
import glob
import numpy as np
import json
import os
from PIL import Image
import shutil
from collections import defaultdict

In [2]:
def convert_annotations_to_yolo_format(img_dir, annotation_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)  # Ensure the output directory exists

    # Iterating over each annotation file in the annotation directory, and convert the annotations to YOLO format
    for annotation_file in os.listdir(annotation_dir):
        if annotation_file.endswith('.json'):
            json_path = os.path.join(annotation_dir, annotation_file)
            with open(json_path) as f:
                annotations = json.load(f)
            
            # Deriving the corresponding image file path and load it to get dimensions
            img_file = annotation_file.replace('.json', '.jpg')
            img_path = os.path.join(img_dir, img_file)
            with Image.open(img_path) as img:
                img_width, img_height = img.size
            
            # Converting annotations to YOLO format
            yolo_annotations = []
            for annot in annotations:
                class_id = annot['ObjectClassId'] - 1  # Assuming class IDs start from 1, adjust if necessary
                x_center = ((annot['Right'] + annot['Left']) / 2) / img_width
                y_center = ((annot['Bottom'] + annot['Top']) / 2) / img_height
                width = (annot['Right'] - annot['Left']) / img_width
                height = (annot['Bottom'] - annot['Top']) / img_height
                yolo_annotations.append(f"{class_id} {x_center} {y_center} {width} {height}")
            
            # Saving converted annotations to TXT file
            txt_path = os.path.join(output_dir, annotation_file.replace('.json', '.txt'))
            with open(txt_path, 'w') as f:
                f.write('\n'.join(yolo_annotations))


In [3]:
img_dir = '/home/wgt/Desktop/InMind Academy/AI_Track/Amazing_Project/inmind_amazing_project/data/Training/images'
annotation_dir = '/home/wgt/Desktop/InMind Academy/AI_Track/Amazing_Project/inmind_amazing_project/data/Training/labels/json'
output_dir = '/home/wgt/Desktop/InMind Academy/AI_Track/Amazing_Project/inmind_amazing_project/data/Training/labels/yolo'

convert_annotations_to_yolo_format(img_dir, annotation_dir, output_dir)

In [4]:
from sklearn.model_selection import train_test_split
import glob

annotations_txt_dir = '/home/wgt/Desktop/InMind Academy/AI_Track/Amazing_Project/inmind_amazing_project/data/Training/labels/yolo'
img_dir = '/home/wgt/Desktop/InMind Academy/AI_Track/Amazing_Project/inmind_amazing_project/data/Training/images'

# Listing all .txt annotation files
annotation_files = glob.glob(os.path.join(annotations_txt_dir, '*.txt'))

# Extracting corresponding image file names from annotation file names
img_files = [os.path.join(img_dir, os.path.basename(f).replace('.txt', '.jpg')) for f in annotation_files]

# Splitting the dataset into training and validation
train_img_files, val_img_files, train_annotation_files, val_annotation_files = train_test_split(img_files, annotation_files, test_size=0.2, random_state=42)


In [6]:
print(f"Number of training images: {len(train_img_files)}")


Number of training images: 1995


# <center> Moving to YOLOV7 </center>

In [None]:
yolov7_train_images_dir = '/home/wgt/yolov7/train/images'
yolov7_train_labels_dir = '/home/wgt/yolov7/train/labels'

os.makedirs(yolov7_train_images_dir, exist_ok=True)
os.makedirs(yolov7_train_labels_dir, exist_ok=True)

for img_file in train_img_files:
    dest_file = os.path.join(yolov7_train_images_dir, os.path.basename(img_file))
    shutil.copy(img_file, dest_file)
    print(f"Copied {img_file} to {dest_file}")

for label_file in train_annotation_files:
    dest_file = os.path.join(yolov7_train_labels_dir, os.path.basename(label_file))
    shutil.copy(label_file, dest_file)
    print(f"Copied {label_file} to {dest_file}")

yolov7_val_images_dir = '/home/wgt/yolov7/val/images'
yolov7_val_labels_dir = '/home/wgt/yolov7/val/labels'

os.makedirs(yolov7_val_images_dir, exist_ok=True)
os.makedirs(yolov7_val_labels_dir, exist_ok=True)

for img_file in val_img_files:
    dest_file = os.path.join(yolov7_val_images_dir, os.path.basename(img_file))
    shutil.copy(img_file, dest_file)
    print(f"Copied {img_file} to {dest_file}")

for label_file in val_annotation_files:
    dest_file = os.path.join(yolov7_val_labels_dir, os.path.basename(label_file))
    shutil.copy(label_file, dest_file)
    print(f"Copied {label_file} to {dest_file}")

## <center> Results of the first Training process </center>

| Class      | Images | Labels   | P       | R         | mAP@.5    | mAP@.5:.95|
|------------|--------|----------|---------|-----------|-----------|-----------|
| all        | 483    | 1995     | 0.96    | 0.954     | 0.976     | 0.919     |
| dolly      | 483    | 847      | 0.934   | 0.913     | 0.957     | 0.855     |
| bin        | 483    | 349      | 0.961   | 0.983     | 0.992     | 0.948     |             
| jack       | 483    | 799      | 0.986   | 0.967     | 0.98      | 0.952     |

<br> We can see that the precision is very high (96% overall), as well as the recall (95.4%). 
<br> The Mean Average Precision (AP) at IoU (Intersection over Union) threshold of 0.5 indicates a 97.6% meaning a very high accuracy.
<br> The the mean AP calculated at different IoU thresholds from 0.5 to 0.95 (with a step size of 0.05), provides a more comprehensive view of the model performance across various strictness levels of object detection.

<br> The model shows high precision and recall across the classes dolly, bin, and jack, with overall great mAP scores, which indicates strong performance in both recognizing the correct objects (high recall) and ensuring that most detections are accurate (high precision)


## <center> Preparing the Testing Dataset </center>

In [19]:
test_img_dir = '/home/wgt/Desktop/InMind Academy/AI_Track/Amazing_Project/inmind_amazing_project/data/Testing/images'
test_annotation_dir = '/home/wgt/Desktop/InMind Academy/AI_Track/Amazing_Project/inmind_amazing_project/data/Testing/labels/json'
test_output_dir = '/home/wgt/Desktop/InMind Academy/AI_Track/Amazing_Project/inmind_amazing_project/data/Testing/labels/yolo'

convert_annotations_to_yolo_format(test_img_dir, test_annotation_dir, test_output_dir)

In [8]:
test_img_files = glob.glob(os.path.join(test_img_dir, '*.jpg'))
test_annotation_files = [os.path.join(test_output_dir, os.path.basename(img_file).replace('.jpg', '.txt')) for img_file in test_img_files]

yolov7_test_images_dir = '/home/wgt/yolov7/test/images'
yolov7_test_labels_dir = '/home/wgt/yolov7/test/labels'

print(f"Number of testing images: {len(test_img_files)}")
print(f"Number of testing labels: {len(test_annotation_files)}")

os.makedirs(yolov7_test_images_dir, exist_ok=True)
os.makedirs(yolov7_test_labels_dir, exist_ok=True)

for img_file in test_img_files:
    dest_file = os.path.join(yolov7_test_images_dir, os.path.basename(img_file))
    shutil.copy(img_file, dest_file)
    # print(f"Copied {img_file} to {dest_file}")

for label_file in test_annotation_files:
    dest_file = os.path.join(yolov7_test_labels_dir, os.path.basename(label_file))
    shutil.copy(label_file, dest_file)
    # print(f"Copied {label_file} to {dest_file}")

print('Copied all testing images and labels to the YOLOv7 testing directory.')

Number of testing images: 958
Number of testing labels: 958
Copied all testing images and labels to the YOLOv7 testing directory.


## <center> Metrics after testing the YOLOV7 model the first time </center>

### _*Hyperparameter Values*_

### <font color="green" size=5>
- lr0: 0.01
- lrf: 0.1
- iou_t: 0.2
- batches of 4
- 50

</font>

In [4]:
def load_labels(file_path):
    # Loading labels
    with open(file_path, 'r') as f:
        labels = [list(map(float, line.split()[1:])) for line in f.readlines()]
    return labels

In [5]:
def calculate_iou(box1, box2):

    #Calculating the Intersection over Union (IoU) of two bounding boxes.
    
    # Converting from center coordinates to box corners
    b1_x1, b1_y1, b1_x2, b1_y2 = box1[0] - box1[2] / 2, box1[1] - box1[3] / 2, box1[0] + box1[2] / 2, box1[1] + box1[3] / 2
    b2_x1, b2_y1, b2_x2, b2_y2 = box2[0] - box2[2] / 2, box2[1] - box2[3] / 2, box2[0] + box2[2] / 2, box2[1] + box2[3] / 2

    # Calculating intersection
    inter = (max(0, min(b1_x2, b2_x2) - max(b1_x1, b2_x1)) *
             max(0, min(b1_y2, b2_y2) - max(b1_y1, b2_y1)))
    # Calculating union
    union = (b1_x2 - b1_x1) * (b1_y2 - b1_y1) + (b2_x2 - b2_x1) * (b2_y2 - b2_y1) - inter

    # Calculating IoU
    iou = inter / union
    return iou

In [9]:
def evaluate_model(predictions_dir, ground_truths_dir):
    
    # Evaluating the model by comparing predicted labels to ground truth labels
    
    # Loading all ground truth and prediction files
    gt_files = glob.glob(os.path.join(ground_truths_dir, '*.txt'))
    pred_files = glob.glob(os.path.join(predictions_dir, '*.txt'))

    gt_labels = {os.path.basename(f): load_labels(f) for f in gt_files}
    pred_labels = {os.path.basename(f): load_labels(f) for f in pred_files}

    # Initializing variables to calculate precision and recall
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    iou_threshold = 0.6  # Defining the IoU threshold to consider a detection as a true positive

    # Iterating over each ground truth file and comparing with the corresponding prediction file
    for file_name, gt_boxes in gt_labels.items():
        pred_boxes = pred_labels.get(file_name, [])
        
        matched = set()
        for i, pred_box in enumerate(pred_boxes):
            for j, gt_box in enumerate(gt_boxes):
                if calculate_iou(pred_box, gt_box) >= iou_threshold and j not in matched:
                    true_positives += 1
                    matched.add(j)
                    break
            else:
                false_positives += 1
                
        false_negatives += len(gt_boxes) - len(matched)

    # Calculating precision and recall
    precision = true_positives / (true_positives + false_positives) if true_positives + false_positives > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1_score:.4f}")

In [11]:
# Placeholder paths
predictions_dir = '/home/wgt/yolov7/runs/detect/exp/labels'
ground_truths_dir = '/home/wgt/yolov7/test/labels'

evaluate_model(predictions_dir, ground_truths_dir)

Precision: 0.5391
Recall: 0.7352
F1 Score: 0.6221


### _**Analysis of the results**_
<br> Precision: _0.5391_. A low precision value such as the one we got may indicate a relatively higher number of false positives.

<br> Recall: _0.7352_. The model performed better in terms of recall. Higher recall means the model is better at detecting the objects of interest but may also be picking up some false positives to increase this score.

<br> F1 score: _0.6221_. This indicates the model has a moderate level of performance. As the F1 score ranges between 0 and 1, a score of _0.6221_ ranks in the mid range. While there is room for improvement, this score does outline a positive, related to the balance of the model between precision and recall.

<br> It is noteworthy to mention that precision and recall are inversely proprotional. However, it is still possible to improve both the values we got from the testing.

<br> Here are some steps we can take to improve model performance:
<br> 
1. Data Augmentation: Enhance the dataset with more varied data through augmentation techniques (e.g., flipping, scaling, cropping) to improve the model’s ability to generalize. This was already done in the DataAugmentation.py file.

2. Hyperparameter Tuning: Adjusting learning rate, IoU, batch size, or other training parameters may help optimize training.

3. Review of Misclassified Examples: Analyze specific cases where the model failed (both false positives and false negatives) to understand potential patterns or characteristics that led to incorrect predictions.

4. Fine-Tuning: Since we started with pre-trained weights, further fine-tuning on our specific dataset could lead to improvements.

### _**Implementation of a mock mAP function**_
<font color="red" size=5> <br> For correct results, this function needs to have the per-class precision and recall curves. </br>
<br> In this code, we will be using dummy values for the sake of demonstration.</font>


In [15]:
def calculate_ap(recalls, precisions):

    # Calculating the Average Precision (AP) based on recall and precision curves

    # Adding end points to recall and precision arrays
    recalls = np.concatenate(([0.], recalls, [1.]))
    precisions = np.concatenate(([0.], precisions, [0.]))

    # Calculating the precision envelope
    for i in range(precisions.size - 1, 0, -1):
        precisions[i - 1] = np.maximum(precisions[i - 1], precisions[i])

    # Calculating AP
    indices = np.where(recalls[1:] != recalls[:-1])[0]
    ap = np.sum((recalls[indices + 1] - recalls[indices]) * precisions[indices + 1])
    return ap

In [16]:
def calculate_map(predictions, ground_truths, iou_threshold=0.5):
    '''
    Simplified mAP calculation at a single IoU threshold.
    predictions: Dict of image_id to predicted bounding boxes [confidence_score, x, y, w, h]
    ground_truths: Dict of image_id to ground truth bounding boxes [x, y, w, h]
    '''
    
    # Mock data illustrating AP calculation
    precisions = np.array([0.9, 0.75, 0.6])
    recalls = np.array([0.5, 0.65, 0.85])
    
    ap = calculate_ap(recalls, precisions)
    print(f"AP: {ap:.4f}")

In [17]:
# Mock function calls - Need to be replaced with actual data processing and mAP calculation
predictions_dir = '/home/wgt/yolov7/runs/detect/exp/labels'
ground_truths_dir = '/home/wgt/yolov7/test/labels'
calculate_map(predictions_dir, ground_truths_dir)

AP: 0.6825


## <center> <font color="white"> Metrics after testing the YOLOV7 model the second time</font> </center>

### _*New Hyperparameter Values*_

### <font color="green" size=5>
- lr0: 0.0001
- lrf: 0.01
- iou_t: 0.6
- batches of 4
- 80 

</font>

In [10]:
predictions_dir = '/home/wgt/yolov7/runs/detect/exp2/exp3/labels'
ground_truths_dir = '/home/wgt/yolov7/test/labels'

evaluate_model(predictions_dir, ground_truths_dir)

Precision: 0.5411
Recall: 0.6790
F1 Score: 0.6022


### _**Analysis of the results**_
<br> Precision: _0.5411_. The precision increased slightly compared to the first test run. The model has improved in detecting positive cases.

<br> Recall: _0.6790_. While the precision increased compared to the previous test run, the recall decrease more significantly. The increase in precision is desired, however, a large decrease in the recall may counteract the latter. This will be reflected in the F1 score of the model.

<br> F1 Score: _0.6022_. This score is lower than the one we observed in the first training of the model. As mentioned, despite the precision increasing by around 1%, the decrease in recall was much larger, which led to this lower recall score. The results are not satisfactory, however, by observing TensorBoard we can see that after around 40 and 55 epochs, the precision and recall values stagnate respectively. Therefore, we can see that there is no need to have so many epochs especially if the metric values are not changing. 

<br> It is noteworthy to mention that precision and recall are inversely proprotional. However, it is still possible to improve both the values we got from the testing.