In [1]:
import torch
import os
import cv2
import numpy as np
import math
import time
import shutil
from fastapi import FastAPI,File, UploadFile
import uvicorn
from PIL import Image
import nest_asyncio
import dlib

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#intializing the fast api object
app = FastAPI()

def run(app=app):
    nest_asyncio.apply()
    uvicorn.run(app, port=8000)

#### Below code is the utility functions which will be used as a part of video detection.  

In [1]:
# given a dataframe of all the detected object coordinates in a frame, put them in a list of tuples 
def get_coordinates(df):
    objects = []
    for index, row in df.iterrows():
        objects.append( ( int(row[0]), int(row[1]), int(row[2]), int(row[3]) ) )

    return objects

# Takes frame and the model that got initialized. Then, process that frame and detects the desired objects and returns the coorindates, confidence and class label.
# However, we are only taking the coordinates of the detected objects and returning that as a list of tuples.
def detect(model, img):
    results = model(img)
    df = results.pandas().xyxy[0]
    detect_info = get_coordinates(df)
    return detect_info


# Given the coordinates of the detected objects, return the centroids of those objects as a list of centroids.
def get_centroid(detect_info):
    # object --> (top-left) x1,y1, (bottom-right) x2,y2
    centroids = []
    for object in detect_info:
        centroid_x = (object[0] + object[2]) // 2
        centroid_y = (object[1] + object[3]) // 2
        bboxHeight = object[3] - object[1]
        centroids.append([centroid_x, centroid_y, bboxHeight])
    return centroids


# returns the distance between two centroids 
def get_distance(centroid1, centroid2):
    distance = math.sqrt((centroid1[0] - centroid2[0]) ** 2 + (centroid1[1] - centroid2[1]) ** 2)
    return distance

# Given the centroids of the detected objects, computes  the distance between each centroids and those centroids which have less than MIN_DIST threshold will be marked as a violated people.
def get_violated_distance_people_test(centroids, MIN_DIST=80):
    violated_people = []
    for i in range(len(centroids)):
        for j in range(i + 1, len(centroids)):
            if get_distance(centroids[i], centroids[j]) <= MIN_DIST:
                violated_people.append((i, j))
    return violated_people


# Draws the rectangle around the detected objects with red color for violated people along with arrows and  
def draw_arrows(image, detect_info, centroids, violated_people):
    if len(detect_info) > 0:
        for idx, object in enumerate(detect_info):
            #checks if the current person or index is in violated people list, if yes, then draws a red color rectangale with label as ALert alongs with arrows
            if list(filter(lambda x: x.count(idx) > 0, violated_people)):
                temp = list(filter(lambda x: x.count(idx) > 0, violated_people))  # [(),()] temp - violated people index
                color = (0, 0, 255)
                label = 'Alert'
                for i in range(len(temp)):
                    # image = cv2.circle(image, (centroids[temp[i][0]][0], centroids[temp[i][0]][1]), 10, color, -1)
                    # image = cv2.circle(image, (centroids[temp[i][1]][0], centroids[temp[i][1]][1]), 10, color, -1)
                    image = cv2.arrowedLine(image, (centroids[temp[i][0]][0], centroids[temp[i][0]][1]),
                                     (centroids[temp[i][1]][0], centroids[temp[i][1]][1]), color, 2)
                    image = cv2.arrowedLine(image, (centroids[temp[i][1]][0], centroids[temp[i][1]][1]),(centroids[temp[i][0]][0], centroids[temp[i][0]][1]), color, 2)
            else:
                color = (0, 255, 0)
                label = 'Safe'
            image = cv2.rectangle(image, (object[0], object[1]), (object[2], object[3]), color, 2)
            image = cv2.putText(image, label, (object[0], object[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2,cv2.LINE_AA)

    text = "Social Distancing Violations: {}".format(len(set([item  for tup in violated_people for item in tup])))
    image = cv2.putText(image, text, (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.85, (0, 0, 255), 3)
    return image


# This part of code will download the model weights and skeleton of the Yolov5{s/m/l}.
# cls = 0 represents the person class which will make sure only the person object is detecteda and others are discarded.
# conf_thresh  = Minimum probability of the detected objects
# iou_thresh - Minimum threshold for IOU
def get_model(algo = 'yolov5s',cls =0, conf_thresh=0.30,iou_thresh=0.30):
    dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    # model = torch.hub.load('ultralytics/yolov5', 'yolov5s').to(dev) # force_reload=True
    model = torch.hub.load('ultralytics/yolov5', algo,force_reload=True).to(dev)
    model.classes = cls #person
    model.conf = conf_thresh
    model.iou = iou_thresh
    # filename
    return model



#### The main part of this project-- Phydical distance detector is implemented below

In [5]:

### The endpoint should return JSON-formatted information about the input video (physical distancing detections)

# Takes the uploaded file (filename) and computes PDD and saves the processed video with saving_file_name under the folder output_files
async def videodetect(model,filename, saving_file_name, isTracking =True):

    '''
    In order to reduce the time and memory consumption as well as flickering of the deteted object, I used detection once every 50 frames (skip_frames) 
     and in between used object tracking algorithm from dlib library.  

    To use only detection algorithm, then, assign isTracking to False
    
    '''


    filepath = os.path.join(filename)
    cap = cv2.VideoCapture(filepath)
    if cap.isOpened() ==False:
      print("Error in opening up the file")

    if not os.path.exists('output_files'):
        os.mkdir('output_files')
    #stores all the coordinates, centroids and violated people of all the frames in the video    
    detection_objects,all_centroids, all_violates = [],[],[]
    fourcc = cv2.VideoWriter_fourcc('X','V','I','D')
    save_file_path = os.path.join('output_files', saving_file_name)
    writer = cv2.VideoWriter(save_file_path, fourcc, 25,( int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),   int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

    start = time.time()
    num_frames = 0
    skip_frames =50
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            # Need to convert the image to RGB format as dlib accepts only RGB format
            RGBframe = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # For every N frames we will use detection algorithm
            if num_frames % skip_frames ==0:
                detect_info = detect(model, RGBframe)
                trackers = []
                # Each detected object needs a tracker object which helps in tracking that object for the following frames 
                for object in detect_info:
                    tracker = dlib.correlation_tracker()
                    rect = dlib.rectangle(object[0],object[1],object[2],object[3])
                    tracker.start_track(RGBframe, rect)
                    trackers.append(tracker)
                    # frame = cv2.rectangle(frame, (object[0],object[1]), (object[2],object[3]),(0, 255, 0), 2)
                
                # computing the distance between the objects and identifying the violated people in a frame
                centroids = get_centroid(detect_info)
                violated_people = get_violated_distance_people_test(centroids)
                frame = draw_arrows(frame, detect_info, centroids, violated_people)

                detection_objects.append(detect_info)
                all_centroids.append(centroids)
                all_violates.append(violated_people)
            else:

              # this part of code only uses object tracking, where each tracker (detected object) postion is updated based KCF tracking algorithm
              detect_info = []
              for tracker in trackers:

                tracker.update(RGBframe)
                pos = tracker.get_position()
                startX = int(pos.left())
                startY = int(pos.top())
                endX = int(pos.right())
                endY = int(pos.bottom())
                detect_info.append((startX, startY, endX, endY))
                # frame = cv2.rectangle(frame, (startX, startY), (endX, endY),(0, 255, 0), 2)

              # computing the distance between the objects and identifying the violated people in a frame
              centroids = get_centroid(detect_info)
              violated_people = get_violated_distance_people_test(centroids)
              frame = draw_arrows(frame, detect_info, centroids, violated_people)

              detection_objects.append(detect_info)
              all_centroids.append(centroids)
              all_violates.append(violated_people)

            # opencv-python/ opencv-python-contrib is not working in a container for the below api call (cv2.imshow) and hence live processing is not possible when running in container            
            try:
              cv2.imshow('Frame', frame)
              if cv2.waitKey(25) & 0xFF == ord('q'):
                break
            except:
              pass
            writer.write(frame)
            num_frames += 1
        else:
            break

    end = time.time()
    seconds = end - start
    print("Time taken : {0} seconds".format(seconds))
    # Calculate frames per second
    fps = num_frames / seconds
    print("Estimated frames per second : {0}".format(fps))
    print(f"Total number of frames processed: {num_frames}")
    cap.release()
    writer.release()
    try:
        cv2.destroyAllWindows()
    except:
        pass


    return (detection_objects,all_centroids,all_violates)

I am storing the uploaded file into the local storage under uploaded_files folder

In [6]:

async def _save_file_to_disk(uploaded_file, path=".", save_as="default"):
    # extension = os.path.splitext(uploaded_file.filename)[-1]
    temp_file = os.path.join(path, save_as)
    with open(temp_file, "wb") as buffer:
        shutil.copyfileobj(uploaded_file.file, buffer)
    return temp_file


### The endpoint should accept a video via the body of a POST request (video can be small to circumvent large video issues)
 
When the file is uploaded, the 'create_upload_file' function will be called first as this is our endpoint. This function will load the model, saves the uploaded file, and calls the videodetect function compute the PDD.

In [7]:
# You can change this to yolov5s,yolov5m,yolov5l which signifies different model configuration 
algo = 'yolov5s'

@app.post("/uploadfile/")
async def create_upload_file(file: UploadFile= File(...)):
    model = get_model(algo)
    folder_name = 'uploaded_files'
    if not os.path.exists(folder_name):
        os.mkdir(folder_name)
    temp_file = await _save_file_to_disk(file, path=folder_name, save_as=file.filename)
    detect_info,centroids,violated_people = await videodetect(model,temp_file,file.filename)
    return {'detection_objects':detect_info, 'centroids':centroids, 'violated_people_pair' : violated_people}
    # return {"filename": file.filename}

In [8]:
# this will start the process and opens ups the API endpoint in the localhost.  Use http://localhost:8000/docs, this will provide web interface given by FastAPI,
#  where you can  try to upload the file under the POST endpoint  'uploadfile'.
run()

INFO:     Started server process [12480]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:59413 - "GET /docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:59415 - "GET /openapi.json HTTP/1.1" 200 OK


Downloading: "https://github.com/ultralytics/yolov5/archive/master.zip" to C:\Users\deepi/.cache\torch\hub\master.zip
YOLOv5  2022-3-26 torch 1.11.0+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


Time taken : 37.07526397705078 seconds
Estimated frames per second : 2.939965581026474
Total number of frames processed: 109
INFO:     127.0.0.1:59420 - "POST /uploadfile/ HTTP/1.1" 200 OK


Downloading: "https://github.com/ultralytics/yolov5/archive/master.zip" to C:\Users\deepi/.cache\torch\hub\master.zip
YOLOv5  2022-3-26 torch 1.11.0+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


Time taken : 239.10304307937622 seconds
Estimated frames per second : 2.9527018598656047
Total number of frames processed: 706
INFO:     127.0.0.1:59430 - "POST /uploadfile/ HTTP/1.1" 200 OK


INFO:     Shutting down
Shutting down
INFO:     Waiting for application shutdown.
Waiting for application shutdown.
INFO:     Application shutdown complete.
Application shutdown complete.
INFO:     Finished server process [12480]
Finished server process [12480]


## Additional Questions

<i><b>  a)  What are some of the challenges you faced while designing your PDD? Where might it fail? Please justify and make suggestions for improvement. </i></b>

Ans:

<i><u> Challenges 1) </i></u> I found that due to the pixel wise distance calculation, vertical distance between people in the image is lower compared to the ground truth. However, the horizontal distance between people is seems to be closer to the ground truth distance. Hence, people who are far apart from each other also violate the MINIMUM DISTANCE when measured vertically.


<i><u> Solution 1) </i></u> : To use intrinsic and extrinsic parameters of camera caliberation, However it will be expensive to install and buy cameras.

<i><u> Challenges 2) </i></u> If the model size is big, the processing of the image will be time-consuming.

<i><u> Solution 2) </i></u>:  I have used object tracking algorithm along with object detection, where detection is executed once in every 10 or 20 frames and till the object tracking is executed which drastically improves the speed. Furthermore, using quantization and pruning, we can reduce the size of the detection model, but it will hurt the accuracy.


<i><u> Bug 3) </i></u> Initializing opencv-python/opencv-contrib{headless} in docker with GUI does not work.

<i><u> Solution 3) </i></u> Therefore, when installed in your local machine the live processing of the video will pop up. However, if you are running it through docker container, the video will processed in the background and stores in a folder.


 - https://github.com/opencv/opencv-python/issues/447#issuecomment-779847130
 - https://github.com/opencv/opencv-python/issues/370#issuecomment-671202529

<i><b>  b) How could you improve your PDD so that it does not raise an alert when two people are from the same household (and therefore allowed to be close together)?</b></i>

Ans : Possible solution is to check the minimum distance between two people for N number of frames, if it is less than some threshold than probably they are from same household or they know each other.

Step 1) Is to keep track of each objects' centroid using centroid tracking algorithm, through which we can assign a unique ID to each object and keep track of it for N frames.{object_ID : centroids}

Step 2) Fetch the ID's of people who violated the minimum distance rule and then check the mean of the distance between each of them for N frames, if their mean is less than some threshold then let's assume that they are from same household or they know each other.



## Optional

<i><b>  c) How could you improve this so that the predicted bounding boxes do not flicker between frames? </i></b>

Sol: I used object tracking along with object detection in order to get rid flickering. This resulted in lesser number of flickering than only using detection.

<i><b> d)Write functions to implement a, b and/or c. </i></b>

 I solved the a) (challenge 2) and c) using the  <b>videodetect</b> function. By combining both detection and tracking methodology as explained in the previous cell. 


