# Import Labriaries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import JSON
from googleapiclient.discovery import build
import os
import pathlib
import re
notebook_path = os.getcwd()

# Functions for YouTube data API

In [2]:
'''
Function to read through the info of the channels and create a dataframe
METHOD: .channels().list()
'''

def get_channel_stats(youtube, channel_ids):
    data = []
    
    # create request
    request = youtube.channels().list(part="snippet,contentDetails,statistics", id=channel_ids)
    
    response = request.execute()

    # loop through the channel
    for item in response['items']:
        info = {'ChannelName': item['snippet']['title'],
                'Subscribers': item['statistics']['subscriberCount'],
                'TotalViews': item['statistics']['viewCount'],
                'Videos': item['statistics']['videoCount'],
                'PlaylistID': item['contentDetails']['relatedPlaylists']['uploads'],   
               
               }
        data.append(info)
    return(pd.DataFrame(data))


'''
Function to create a list of the video IDs of a PlaylistID
METHOD: .playlistItems().list()
'''

def get_videoIDs(youtube, playlistID):
    video_IDs = []   
    
    request = youtube.playlistItems().list(part="snippet, contentDetails", playlistId=playlistID, maxResults=50) #maximum value by default
    response = request.execute()

     # loop through the items
    for item in response['items']:
        video_IDs.append(item['contentDetails']['videoId'])
                        
    next_page_token = response.get('nextPageToken')
    
    while next_page_token is not None:
        request = youtube.playlistItems().list(part="snippet, contentDetails", playlistId=playlistID, maxResults=50, pageToken=next_page_token) #maximum value by default
        response = request.execute()
    
     # loop through the items
        for item in response['items']:
            video_IDs.append(item['contentDetails']['videoId'])
        
        next_page_token = response.get('nextPageToken')

        
    return video_IDs


'''
Function to read through the each video of the channel and create a dataframe
METHOD: .channels().list()
'''

def get_video_details(youtube, VideoList): 
    data = []
    j = 0
    
    # Loop through the video list in batches of 50
    while j < len(VideoList):
        # Create request
        request = youtube.videos().list(part="snippet,contentDetails,statistics", id=','.join(VideoList[j:j+50]))
        
        try:
            response = request.execute()
        except Exception as e:
            print(f"An error occurred: {e}")
            j += 50
            continue
    
        # Loop through the response items
        for item in response.get('items', []):
            info = {
                'VideoID': item['id'],
                'Date/Time': item['snippet']['publishedAt'],
                'Title': item['snippet']['title'],
                'Description': item['snippet']['description'],
                'Tags': item['snippet'].get('tags', []),   # Tags might not always be present
                'Duration': item['contentDetails']['duration'],
                'Definition': item['contentDetails']['definition'],
                'Caption': item['contentDetails']['caption'],
                'Views': item['statistics'].get('viewCount', None),  # Handle missing statistics
                'Likes': item['statistics'].get('likeCount', None),
                #'Favorite': item['statistics'].get('favoriteCount', None)
            }
            data.append(info)
        
        j += 50  # Increment by 50 to get the next batch of video IDs
        print(f"Processed up to video index: {j}")
    
    # Convert the collected data into a DataFrame
    return pd.DataFrame(data)
    
    # Print the DataFrame to verify
    display(df)
    print(f"Total videos processed: {len(df)}")

# Retrieve Youtube data

## Read API key

In [3]:
file = open('API_key.txt')
api_key = file.read()
#api_key

## Import channel IDs

In [4]:
channel_ids = ['UC9HGzFGt7BLmWDqooUbWGBg', #Matthew ID
               'UCYqdbnuvx-jJZByotoNW-5Q', # Apollonia
              ]

## Build API client

In [5]:
api_service_name = "youtube"
api_version = "v3"

youtube = build(api_service_name, api_version, developerKey=api_key)

## Retrieve channel info/statistics | .channels() Method

In [6]:
df = get_channel_stats(youtube, channel_ids)
df

Unnamed: 0,ChannelName,Subscribers,TotalViews,Videos,PlaylistID
0,Matthew Hussey,3050000,541655958,942,UU9HGzFGt7BLmWDqooUbWGBg
1,Apollonia Ponti,682000,105424181,1044,UUYqdbnuvx-jJZByotoNW-5Q


## Retrieve video lists  |  .playlistItems() Method

In [7]:
# retrieve video list for Matthew
playlistID = df.loc[df['ChannelName'] == 'Matthew Hussey', 'PlaylistID'].iloc[0]
VideoList_mat = get_videoIDs(youtube, playlistID) 

# retrieve video list for Apollonia
playlistID = df.loc[df['ChannelName'] == 'Apollonia Ponti', 'PlaylistID'].iloc[0]
VideoList_apo = get_videoIDs(youtube,  playlistID) 
#VideoList_mat

## Retrieve video info and statistics  |  .videos() Method

### Build dataframe

In [8]:
video_mat = get_video_details(youtube, VideoList_mat)
video_apo = get_video_details(youtube, VideoList_apo)

Processed up to video index: 50
Processed up to video index: 100
Processed up to video index: 150
Processed up to video index: 200
Processed up to video index: 250
Processed up to video index: 300
Processed up to video index: 350
Processed up to video index: 400
Processed up to video index: 450
Processed up to video index: 500
Processed up to video index: 550
Processed up to video index: 600
Processed up to video index: 650
Processed up to video index: 700
Processed up to video index: 750
Processed up to video index: 800
Processed up to video index: 850
Processed up to video index: 900
Processed up to video index: 950
Processed up to video index: 50
Processed up to video index: 100
Processed up to video index: 150
Processed up to video index: 200
Processed up to video index: 250
Processed up to video index: 300
Processed up to video index: 350
Processed up to video index: 400
Processed up to video index: 450
Processed up to video index: 500
Processed up to video index: 550
Processed up

In [9]:
print()
print()
print('Dataframe Matthew')
print('---------------------------------------------------------------------------------------------------')
display(video_mat)
print()
print()
print('Dataframe Apollonia')
print('---------------------------------------------------------------------------------------------------')
display(video_apo)



Dataframe Matthew
---------------------------------------------------------------------------------------------------


Unnamed: 0,VideoID,Date/Time,Title,Description,Tags,Duration,Definition,Caption,Views,Likes
0,L5l_X4TXrZ8,2024-05-19T11:45:01Z,"How To AVOID Dating ""Players""","►► Get Vulnerable Stories, Real Insights, and ...","[Matthew Hussey, Matt Hussey, Get The Guy, How...",PT19M53S,hd,false,19989,788
1,JgKATVJUY7A,2024-05-15T17:34:12Z,Stop These Insecure Dating Behaviors! - w/ Sab...,"►► Get Vulnerable Stories, Real Insights, and ...","[Matthew Hussey, Matt Hussey, Get The Guy, How...",PT54M46S,hd,false,18017,653
2,ZQ13ZgSJwic,2024-05-15T15:00:36Z,"The Problem With Pretending We're ""Fine""",The toll pretending “we’re fine” takes on us c...,"[Matthew Hussey, Matt Hussey, Get The Guy, How...",PT1M,hd,false,20045,1499
3,nHqyyt66dQk,2024-05-12T12:05:06Z,THIS Is the Most Dangerous Moment in Dating......,"►► Get Vulnerable Stories, Real Insights and P...","[Matthew Hussey, Matt Hussey, Get The Guy, How...",PT12M50S,hd,false,62398,2527
4,l_Cjd2ya5R0,2024-05-11T15:00:42Z,Attracted to the Wrong People?,Why is it that we're drawn to people who treat...,"[Matthew Hussey, Matt Hussey, Get The Guy, How...",PT59S,hd,false,68703,5024
...,...,...,...,...,...,...,...,...,...,...
938,ndra_xByibg,2010-07-21T09:02:05Z,How To Flirt With A Guy - Surefire Tips For Wo...,►► Confused about what to text him? Just copy ...,"[flirting tips for women, flirting with men, f...",PT2M38S,hd,false,1880224,10227
939,zkqYk2rJogs,2010-07-21T08:10:50Z,Powerful Conversational Flirting Tips For Wome...,►► Confused about what to text him? Just copy ...,"[flirting tips for women, flirting, flirting a...",PT2M13S,hd,false,923860,4294
940,osLuZa7SvDw,2010-07-16T11:51:38Z,The Best Places To Go On A Date - Avoid Gettin...,►► Confused about what to text him? Just copy ...,"[places to go on a date, dating advice, matthe...",PT1M50S,hd,false,171262,1216
941,RJe_C-iF3Bk,2010-07-15T17:43:41Z,How To Tell If A Guy Likes You Instantly - Pro...,►► Confused About Why Men Disappear? My FREE g...,"[how to tell if a guy likes you, how to know i...",PT1M32S,hd,false,2799444,13832




Dataframe Apollonia
---------------------------------------------------------------------------------------------------


Unnamed: 0,VideoID,Date/Time,Title,Description,Tags,Duration,Definition,Caption,Views,Likes
0,5Y0nNvCkSrI,2024-05-20T13:00:08Z,The #1 Trap You Fall Into When Seeking Validat...,Subscribe Here: https://www.youtube.com/c/Apol...,"[shorts, shorts video, youtube shorts, dating,...",PT42S,hd,false,1573,68
1,UzmqwtC4TU8,2024-05-19T13:00:18Z,The Difference Between High Quality and Emotio...,Subscribe Here: https://www.youtube.com/c/Apol...,"[shorts, shorts video, youtube shorts, dating,...",PT39S,hd,false,2142,96
2,ojDaIZLIcz0,2024-05-18T13:00:46Z,Who Pays On The First Date? Watch This!,Subscribe Here: https://www.youtube.com/c/Apol...,"[shorts, shorts video, youtube shorts, dating,...",PT31S,hd,false,784,51
3,ybWnOmrEGN8,2024-05-15T16:00:55Z,Men With Broken Relationship With Their Mother...,"* 7-DAY FREE TRIAL ( APOLLONIA'S MEN""S MASTERY...","[dating advice for men, trauma, mother wound, ...",PT14M27S,hd,false,1850,160
4,ajIQ5KngR50,2024-05-15T13:00:05Z,The Truth On How To Make A Woman Miss You #shorts,Subscribe Here: https://www.youtube.com/c/Apol...,"[shorts, shorts video, youtube shorts, dating,...",PT1M,hd,false,3528,218
...,...,...,...,...,...,...,...,...,...,...
1040,mdza2ANvx4I,2017-12-19T15:15:32Z,Why Do I Dream So Much: Detect Signals Fast!,Why do I dream so much? I asked this question ...,"[why do i dream so much, dreams, what do dream...",PT4M39S,hd,false,52162,703
1041,ioKqqeURDyY,2017-12-14T22:02:26Z,How To Love: Why People Are Addicted to The Wr...,"HOW TO LOVE, is a big topic in this modern wor...","[How to love, addicted to love, how to love yo...",PT8M1S,hd,false,2742,118
1042,fnUmNM-ax2A,2017-12-13T01:04:05Z,How To Communicate in Relationships: 5 Tips To...,How to communicate in relationships is somethi...,"[how to communicate in relationships, communic...",PT7M42S,hd,false,9787,228
1043,gNEB21fANAc,2017-11-21T20:11:42Z,Signs She's Flirting With You! - Dating Advice...,FLIRTING CAN BE CONFUSING BUT NOT ANYMORE! In ...,"[flirting, how to flirt, flirting tips, relati...",PT2M59S,hd,false,9390,174


# Formatting and Structuring Dataframe
- nan values | unique values | type of values
- Format: Duration | Date & Time


## Functions for formatting and structuring

In [10]:

'''
Function to convert columns to float
'''

def convert_to_float(df, columns):
    for col in columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')  # Convert to numeric, setting errors to NaN
        df[col] = df[col].astype(float)  # Convert to float

'''
Function to add date and time columns
'''

def add_datetime_cols(df,col):
    # Convert the 'datetime' column to a pandas datetime object
    df[col] = pd.to_datetime(df[col])
    
    # Create a new column for the date part
    df['date'] = df[col].dt.date
    
    # Create a new column for the time part
    df['time'] = df[col].dt.time

'''
Function to convert ISO 8601 duration to total minutes
'''

def iso8601_to_minutes(duration):
    match = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', duration)
    if match:
        hours = int(match.group(1)) if match.group(1) else 0
        minutes = int(match.group(2)) if match.group(2) else 0
        seconds = int(match.group(3)) if match.group(3) else 0
        total_minutes = hours * 60 + minutes + seconds / 60
        return total_minutes
    else:
        return 0  # Default value when the duration string doesn't match the expected pattern



## Create Dataframe copy

In [14]:
mat_df = video_mat.copy()
apo_df = video_apo.copy()

## Formatting and restructuring

In [15]:
# add columns Date and Time
col = 'Date/Time'
add_datetime_cols(mat_df,col)
add_datetime_cols(apo_df,col)

In [16]:
#convert Duration to total minutes
mat_df['TotalDuration'] = mat_df['Duration'].apply(iso8601_to_minutes)
apo_df['TotalDuration'] = apo_df['Duration'].apply(iso8601_to_minutes)

In [18]:
# convert numerical columns to float numbers
num_col = ['Views',	'Likes', 'TotalDuration']
convert_to_float(mat_df, num_col)
convert_to_float(apo_df, num_col)

## Restructured Dataframe

In [22]:
print()
print()
print('Dataframe Matthew')
print('---------------------------------------------------------------------------------------------------')
display(mat_df)
print()
print()
print('Dataframe Apollonia')
print('---------------------------------------------------------------------------------------------------')
display(apo_df)



Dataframe Matthew
---------------------------------------------------------------------------------------------------


Unnamed: 0,VideoID,Date/Time,Title,Description,Tags,Duration,Definition,Caption,Views,Likes,date,time,TotalDuration
0,L5l_X4TXrZ8,2024-05-19 11:45:01+00:00,"How To AVOID Dating ""Players""","►► Get Vulnerable Stories, Real Insights, and ...","[Matthew Hussey, Matt Hussey, Get The Guy, How...",PT19M53S,hd,false,19989.0,788.0,2024-05-19,11:45:01,19.883333
1,JgKATVJUY7A,2024-05-15 17:34:12+00:00,Stop These Insecure Dating Behaviors! - w/ Sab...,"►► Get Vulnerable Stories, Real Insights, and ...","[Matthew Hussey, Matt Hussey, Get The Guy, How...",PT54M46S,hd,false,18017.0,653.0,2024-05-15,17:34:12,54.766667
2,ZQ13ZgSJwic,2024-05-15 15:00:36+00:00,"The Problem With Pretending We're ""Fine""",The toll pretending “we’re fine” takes on us c...,"[Matthew Hussey, Matt Hussey, Get The Guy, How...",PT1M,hd,false,20045.0,1499.0,2024-05-15,15:00:36,1.000000
3,nHqyyt66dQk,2024-05-12 12:05:06+00:00,THIS Is the Most Dangerous Moment in Dating......,"►► Get Vulnerable Stories, Real Insights and P...","[Matthew Hussey, Matt Hussey, Get The Guy, How...",PT12M50S,hd,false,62398.0,2527.0,2024-05-12,12:05:06,12.833333
4,l_Cjd2ya5R0,2024-05-11 15:00:42+00:00,Attracted to the Wrong People?,Why is it that we're drawn to people who treat...,"[Matthew Hussey, Matt Hussey, Get The Guy, How...",PT59S,hd,false,68703.0,5024.0,2024-05-11,15:00:42,0.983333
...,...,...,...,...,...,...,...,...,...,...,...,...,...
938,ndra_xByibg,2010-07-21 09:02:05+00:00,How To Flirt With A Guy - Surefire Tips For Wo...,►► Confused about what to text him? Just copy ...,"[flirting tips for women, flirting with men, f...",PT2M38S,hd,false,1880224.0,10227.0,2010-07-21,09:02:05,2.633333
939,zkqYk2rJogs,2010-07-21 08:10:50+00:00,Powerful Conversational Flirting Tips For Wome...,►► Confused about what to text him? Just copy ...,"[flirting tips for women, flirting, flirting a...",PT2M13S,hd,false,923860.0,4294.0,2010-07-21,08:10:50,2.216667
940,osLuZa7SvDw,2010-07-16 11:51:38+00:00,The Best Places To Go On A Date - Avoid Gettin...,►► Confused about what to text him? Just copy ...,"[places to go on a date, dating advice, matthe...",PT1M50S,hd,false,171262.0,1216.0,2010-07-16,11:51:38,1.833333
941,RJe_C-iF3Bk,2010-07-15 17:43:41+00:00,How To Tell If A Guy Likes You Instantly - Pro...,►► Confused About Why Men Disappear? My FREE g...,"[how to tell if a guy likes you, how to know i...",PT1M32S,hd,false,2799444.0,13832.0,2010-07-15,17:43:41,1.533333




Dataframe Apollonia
---------------------------------------------------------------------------------------------------


Unnamed: 0,VideoID,Date/Time,Title,Description,Tags,Duration,Definition,Caption,Views,Likes,date,time,TotalDuration
0,5Y0nNvCkSrI,2024-05-20 13:00:08+00:00,The #1 Trap You Fall Into When Seeking Validat...,Subscribe Here: https://www.youtube.com/c/Apol...,"[shorts, shorts video, youtube shorts, dating,...",PT42S,hd,false,1573.0,68.0,2024-05-20,13:00:08,0.700000
1,UzmqwtC4TU8,2024-05-19 13:00:18+00:00,The Difference Between High Quality and Emotio...,Subscribe Here: https://www.youtube.com/c/Apol...,"[shorts, shorts video, youtube shorts, dating,...",PT39S,hd,false,2142.0,96.0,2024-05-19,13:00:18,0.650000
2,ojDaIZLIcz0,2024-05-18 13:00:46+00:00,Who Pays On The First Date? Watch This!,Subscribe Here: https://www.youtube.com/c/Apol...,"[shorts, shorts video, youtube shorts, dating,...",PT31S,hd,false,784.0,51.0,2024-05-18,13:00:46,0.516667
3,ybWnOmrEGN8,2024-05-15 16:00:55+00:00,Men With Broken Relationship With Their Mother...,"* 7-DAY FREE TRIAL ( APOLLONIA'S MEN""S MASTERY...","[dating advice for men, trauma, mother wound, ...",PT14M27S,hd,false,1850.0,160.0,2024-05-15,16:00:55,14.450000
4,ajIQ5KngR50,2024-05-15 13:00:05+00:00,The Truth On How To Make A Woman Miss You #shorts,Subscribe Here: https://www.youtube.com/c/Apol...,"[shorts, shorts video, youtube shorts, dating,...",PT1M,hd,false,3528.0,218.0,2024-05-15,13:00:05,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1040,mdza2ANvx4I,2017-12-19 15:15:32+00:00,Why Do I Dream So Much: Detect Signals Fast!,Why do I dream so much? I asked this question ...,"[why do i dream so much, dreams, what do dream...",PT4M39S,hd,false,52162.0,703.0,2017-12-19,15:15:32,4.650000
1041,ioKqqeURDyY,2017-12-14 22:02:26+00:00,How To Love: Why People Are Addicted to The Wr...,"HOW TO LOVE, is a big topic in this modern wor...","[How to love, addicted to love, how to love yo...",PT8M1S,hd,false,2742.0,118.0,2017-12-14,22:02:26,8.016667
1042,fnUmNM-ax2A,2017-12-13 01:04:05+00:00,How To Communicate in Relationships: 5 Tips To...,How to communicate in relationships is somethi...,"[how to communicate in relationships, communic...",PT7M42S,hd,false,9787.0,228.0,2017-12-13,01:04:05,7.700000
1043,gNEB21fANAc,2017-11-21 20:11:42+00:00,Signs She's Flirting With You! - Dating Advice...,FLIRTING CAN BE CONFUSING BUT NOT ANYMORE! In ...,"[flirting, how to flirt, flirting tips, relati...",PT2M59S,hd,false,9390.0,174.0,2017-11-21,20:11:42,2.983333


# Dataframe first-order Statistics

In [21]:
num_col = ['Views',	'Likes', 'TotalDuration']
cat_col = ['Title',	'Description', 'TotalDuration']

In [19]:
display(video_mat.dtypes)

display(video_apo.dtypes)

VideoID        object
Date/Time      object
Title          object
Description    object
Tags           object
Duration       object
Definition     object
Caption        object
Views          object
Likes          object
dtype: object

VideoID        object
Date/Time      object
Title          object
Description    object
Tags           object
Duration       object
Definition     object
Caption        object
Views          object
Likes          object
dtype: object

In [None]:
mat = video_mat.copy()
numerical_col = ['Views',	'Likes',]
convert_to_int(mat, numerical_col)
mat.dtypes
mat

In [None]:
# Apply the function to the 'duration' column
mat['TotalDuration'] = mat['Duration'].apply(iso8601_to_minutes)

mat

In [None]:
video_apo['Caption'].unique()

In [None]:
video_apo.isna().sum()

In [None]:
# Create a dictionary to store the results
agg_results = {}

# Apply 'nunique'
agg_results['nunique'] = video_mat.nunique()

# Apply 'count'
agg_results['count'] = video_mat.count()

# Apply 'size'
agg_results['size'] = video_mat.shape[0]

# Combine the results into a DataFrame
result = pd.DataFrame(agg_results)

print(result)

In [None]:
aggregations = ['nunique', 'count', 'size']
# Apply the aggregation functions to each column
result = video_mat.agg({col: aggregations for col in video_mat.columns})

result = result.T

print(result)

# Plot statistics

# Save data

In [None]:
'''
data_path = str(print(notebook_path[0:(len(notebook_path)-9)] + str('data')))
csv_file='data_mat.csv'
video_mat.to_csv(data_path,csv_file, index='False', sep=b'\t', encoding='utf-8', header=True)
'''