# Youtube Trending Videos - Thumbnail Analysis

### Developing/testing procedure for face detection

In [1]:
import cv2
import matplotlib.pyplot as plt
from urllib.request import urlopen
from PIL import Image
from skimage import io

def get_detected_face (face):  
    face_img = face.copy()  
    fd = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_alt2.xml')
    fr = fd.detectMultiScale(face_img)  
    for (x,y,width,height) in fr:  
        cv2.rectangle(face_img, (x,y), (x + width, y+height), (255,255,77), 5)             

    if len(fr) == 0:
        result = [False,'']
    else:
        result = [True,fr]# x, y, width, height
              
    return face_img,result

def detectFromUrl(yt_id):
    url ="https://i.ytimg.com/vi/"+yt_id+"/maxresdefault.jpg"
    face1 = io.imread(url)
    face_img,result = get_detected_face(face1)
#     plt.imshow(face_img)
#     plt.show()
    return result

In [2]:
thumbnails=["gPdUslndvVI",
        "3C66w5Z0ixs",
        "p7HGUZWq_8s",
        "uet14uf9NsE",
        "VIUo6yapDbc",
        "CezxKs9YLv0",
        "GYeKX_LPY9U",
        "9AecsACtkB4",
        "p6RatnMhDj8"]

results=[]
for i in thumbnails:
    results.append(detectFromUrl(i))

results

[[True, array([[256, 239, 204, 204]], dtype=int32)],
 [True, array([[814,  44,  62,  62]], dtype=int32)],
 [True,
  array([[726, 132, 318, 318],
         [162, 145, 307, 307]], dtype=int32)],
 [True,
  array([[219, 151, 201, 201],
         [591, 210, 181, 181]], dtype=int32)],
 [True,
  array([[ 423,  192,   84,   84],
         [ 160,  266,   82,   82],
         [1031,  261,   90,   90],
         [1133,  264,  117,  117],
         [ 906,  374,   84,   84],
         [ 303,  433,   70,   70]], dtype=int32)],
 [True, array([[570,  86, 173, 173]], dtype=int32)],
 [True, array([[ 21, 275,  51,  51]], dtype=int32)],
 [False, ''],
 [True, array([[736, 101, 280, 280]], dtype=int32)]]

## Detecting Faces in Youtube US Trending Dataset

### Library Imports

In [3]:
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

from datetime import datetime
import warnings

import matplotlib.pyplot as plt
import matplotlib.style as style
style.use('seaborn-whitegrid')
font = {'size' : 12}
figure = {'titlesize' : 18}
plt.rc('font', **font)
plt.rc('figure', **figure)

import seaborn as sns
sns.set_style("whitegrid")
sns.set(font_scale=1.2)

from tqdm import tqdm
import missingno as msno

## Import dataset

In [4]:
df = pd.read_csv("US_youtube_trending_data.csv")
df.head()

Unnamed: 0,video_id,title,publishedAt,channelId,channelTitle,categoryId,trending_date,tags,view_count,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,description
0,3C66w5Z0ixs,I ASKED HER TO BE MY GIRLFRIEND...,2020-08-11T19:20:14Z,UCvtRTOMP2TqYqu51xNrqAzg,Brawadis,22,2020-08-12T00:00:00Z,brawadis|prank|basketball|skits|ghost|funny vi...,1514614,156908,5855,35313,https://i.ytimg.com/vi/3C66w5Z0ixs/default.jpg,False,False,SUBSCRIBE to BRAWADIS ▶ http://bit.ly/Subscrib...
1,M9Pmf9AB4Mo,Apex Legends | Stories from the Outlands – “Th...,2020-08-11T17:00:10Z,UC0ZV6M2THA81QT9hrVWJG3A,Apex Legends,20,2020-08-12T00:00:00Z,Apex Legends|Apex Legends characters|new Apex ...,2381688,146739,2794,16549,https://i.ytimg.com/vi/M9Pmf9AB4Mo/default.jpg,False,False,"While running her own modding shop, Ramya Pare..."
2,J78aPJ3VyNs,I left youtube for a month and THIS is what ha...,2020-08-11T16:34:06Z,UCYzPXprvl5Y-Sf0g4vX-m6g,jacksepticeye,24,2020-08-12T00:00:00Z,jacksepticeye|funny|funny meme|memes|jacksepti...,2038853,353787,2628,40221,https://i.ytimg.com/vi/J78aPJ3VyNs/default.jpg,False,False,I left youtube for a month and this is what ha...
3,kXLn3HkpjaA,XXL 2020 Freshman Class Revealed - Official An...,2020-08-11T16:38:55Z,UCbg_UMjlHJg_19SZckaKajg,XXL,10,2020-08-12T00:00:00Z,xxl freshman|xxl freshmen|2020 xxl freshman|20...,496771,23251,1856,7647,https://i.ytimg.com/vi/kXLn3HkpjaA/default.jpg,False,False,Subscribe to XXL → http://bit.ly/subscribe-xxl...
4,VIUo6yapDbc,Ultimate DIY Home Movie Theater for The LaBran...,2020-08-11T15:10:05Z,UCDVPcEbVLQgLZX0Rt6jo34A,Mr. Kate,26,2020-08-12T00:00:00Z,The LaBrant Family|DIY|Interior Design|Makeove...,1123889,45802,964,2196,https://i.ytimg.com/vi/VIUo6yapDbc/default.jpg,False,False,Transforming The LaBrant Family's empty white ...


In [5]:
ids = df['video_id'].tolist()

# ids = ids[0:10]

results=[]
for i in ids:
    try:
        results.append(detectFromUrl(i))
    except:
        results.append(['exception',''])
results

[[True, array([[814,  44,  62,  62]], dtype=int32)],
 [True,
  array([[776, 134, 235, 235],
         [466, 102, 358, 358]], dtype=int32)],
 [True, array([[390,  14, 549, 549]], dtype=int32)],
 [True,
  array([[1062,  138,   56,   56],
         [ 887,  230,   34,   34],
         [ 990,  218,   44,   44],
         [ 932,  337,   50,   50],
         [ 998,  346,   51,   51]], dtype=int32)],
 [True,
  array([[ 423,  192,   84,   84],
         [ 160,  266,   82,   82],
         [1031,  261,   90,   90],
         [1133,  264,  117,  117],
         [ 906,  374,   84,   84],
         [ 303,  433,   70,   70]], dtype=int32)],
 [True, array([[545,  44,  59,  59]], dtype=int32)],
 [True,
  array([[219, 151, 201, 201],
         [591, 210, 181, 181]], dtype=int32)],
 [False, ''],
 [True,
  array([[ 488,   44,   75,   75],
         [ 767,  112,   62,   62],
         [1129,  245,   63,   63],
         [1042,  538,   66,   66]], dtype=int32)],
 [False, ''],
 [False, ''],
 [True,
  array([[152, 111, 23

In [12]:
newArray = []
for i in range(len(results)):
    newArray.append(results[i][0])

In [13]:
newArray.count(True)/(newArray.count(True)+newArray.count(False))

0.6381357873835345

In [14]:
df['faces_detected'] = newArray

In [15]:
df.to_csv('US_youtube_trending_data_with_faces.csv', encoding='utf-8', index=False)