In [35]:
!pip install plotly nbformat




[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [36]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import ast
import re


# Load Data

In [37]:
file_path = "../../../data/processed/merged_data.csv"
df = pd.read_csv(file_path)

In [38]:
df.head()

Unnamed: 0,CategoryType,author.commentSetting,author.downloadSetting,author.duetSetting,author.ftc,author.id,author.isADVirtual,author.isEmbedBanned,author.nickname,author.openFavorite,...,video.videoQuality,video.volumeInfo.Loudness,video.volumeInfo.Peak,video.width,collectTime,video.claInfo.captionsType,hashtags,num_hashtags,video_transcription,url
0,111,0,0,0,False,7128234498731803674,False,False,1 phút Sài Gòn,False,...,normal,-7.2,1.0,576,1741176061,,"1phutsaigon,saigon,saigondidau,cafesaigon,yenc...",7,cuối cùng mà mình thấy nhau và rồi tháng mấy đ...,https://www.tiktok.com/@7128234498731803674/vi...
1,105,0,0,0,False,7128234498731803674,False,False,1 phút Sài Gòn,False,...,normal,-8.3,1.0,576,1741176061,,"1phutsaigon,saigon,saigondidau,duxuan,tet2025",5,Nghe xuân sang thấy trong lòng mình chứa chan ...,https://www.tiktok.com/@7128234498731803674/vi...
2,111,0,0,0,False,7128234498731803674,False,False,1 phút Sài Gòn,False,...,normal,-7.0,1.0,576,1741176062,,"1phutsaigon,saigon,saigondidau,halacoffee,cafe...",6,bồi hồi liên kết sẽ quay về thăm quê em xuân đ...,https://www.tiktok.com/@7128234498731803674/vi...
3,111,0,0,0,False,7128234498731803674,False,False,1 phút Sài Gòn,False,...,normal,-10.4,1.0,576,1741176062,,"1phutsaigon,saigon,saigondidau,saigondamdang,s...",9,bánh chưng bánh giò bánh chưng tương lai mấy g...,https://www.tiktok.com/@7128234498731803674/vi...
4,105,0,0,0,False,7128234498731803674,False,False,1 phút Sài Gòn,False,...,normal,-14.8,0.51286,576,1741176063,,"1phutsaigon,saigondidau,langthangsaigon,tungng...",6,chờ mãi đến bây giờ thì anh mới nhận ra em mìn...,https://www.tiktok.com/@7128234498731803674/vi...


In [39]:
df.columns

Index(['CategoryType', 'author.commentSetting', 'author.downloadSetting',
       'author.duetSetting', 'author.ftc', 'author.id', 'author.isADVirtual',
       'author.isEmbedBanned', 'author.nickname', 'author.openFavorite',
       'author.privateAccount', 'author.relation', 'author.secUid',
       'author.secret', 'author.signature', 'author.stitchSetting',
       'author.uniqueId', 'author.verified', 'authorStats.diggCount',
       'authorStats.followerCount', 'authorStats.followingCount',
       'authorStats.friendCount', 'authorStats.heart',
       'authorStats.heartCount', 'authorStats.videoCount', 'collected',
       'createTime', 'desc', 'digged', 'diversificationId', 'duetDisplay',
       'duetEnabled', 'forFriend', 'id', 'isAd', 'itemCommentStatus',
       'item_control.can_repost', 'music.authorName', 'music.duration',
       'music.id', 'music.isCopyrighted', 'music.original', 'music.title',
       'officalItem', 'originalItem', 'privateItem', 'secret', 'shareEnabled',
     

# Video Performance (Views, Likes, Share, Repost)

In [40]:
# Check necessary columns
required_columns = ['video.duration', 'statsV2.playCount', 'statsV2.diggCount', 'statsV2.commentCount', 'statsV2.shareCount', 'CategoryType']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
    raise ValueError(f"Missing columns: {missing_columns}")

In [41]:
# Ensure bins are unique and correctly categorized
max_duration = df['video.duration'].max()

bins = [0, 10, 30, 60, 120, 180, 240, 300, max_duration + 1]
labels = ['<10s', '10-30s', '30-60s', '1-2 mins', '2-3 mins', '3-4 mins', '4-5 mins', '>5 mins']

df['video_length_category'] = pd.cut(df['video.duration'], bins=bins, labels=labels, right=False)


In [42]:
# Insight 1.1: Views by Video Length Category
views_by_length = df.groupby('video_length_category')['statsV2.playCount'].mean().reset_index()
fig1 = px.bar(views_by_length, x='video_length_category', y='statsV2.playCount',
              title="Average Views by Video Length Category",
              labels={'statsV2.playCount': 'Average Views', 'video_length_category': 'Video Length Category'},
              color='video_length_category')
fig1.show()









In [43]:
df['CategoryType']

0        111
1        105
2        111
3        111
4        105
        ... 
23094      0
23095    105
23096    111
23097    111
23098    111
Name: CategoryType, Length: 23099, dtype: int64

In [44]:
# Insight 1.2: Optimal Video Length by Industry
if 'CategoryType' in df.columns:
    industry_length = df.groupby(['CategoryType', 'video_length_category'])[['statsV2.playCount', 'statsV2.diggCount', 'statsV2.commentCount', 'statsV2.shareCount']].mean().reset_index()
    fig2 = px.bar(industry_length, x='CategoryType', y='statsV2.playCount', color='video_length_category',
                  title="Average Views by Industry and Video Length Category",
                  labels={'statsV2.playCount': 'Average Views', 'CategoryType': 'Industry', 'video_length_category': 'Video Length Category'},
                  barmode='group')
    fig2.show()









In [45]:
# Insight 1.3: Engagement Rate vs Video Length
for metric in ['statsV2.diggCount', 'statsV2.commentCount', 'statsV2.shareCount']:
    df[f'{metric}_rate'] = df[metric] / df['statsV2.playCount'] * 100

engagement_by_length = df.groupby('video_length_category')[[f'statsV2.diggCount_rate', f'statsV2.commentCount_rate', f'statsV2.shareCount_rate']].mean().reset_index()
fig3 = go.Figure()
for metric in ['statsV2.diggCount_rate', 'statsV2.commentCount_rate', 'statsV2.shareCount_rate']:
    fig3.add_trace(go.Bar(x=engagement_by_length['video_length_category'], y=engagement_by_length[metric], name=metric.replace('statsV2.', '').replace('_rate', '').capitalize()))
fig3.update_layout(title="Engagement Rate by Video Length Category", xaxis_title="Video Length Category", yaxis_title="Engagement Rate", barmode='group')
fig3.show()





# Music

In [46]:
# Insight 2.4: Do effects increase virality?
viral_threshold = df['statsV2.playCount'].quantile(0.90)
df['is_viral'] = df['statsV2.playCount'] > viral_threshold
effects_impact = df.groupby('stitchEnabled')['is_viral'].mean().reset_index()
fig7 = px.bar(effects_impact, x='stitchEnabled', y='is_viral',
              title="Effect Usage and Virality",
              labels={'stitchEnabled': 'Used Effects', 'is_viral': 'Proportion Viral'})
fig7.show()

In [47]:
# Insight 2.3: Does music tempo affect completion rate?
df['music_tempo'] = df['video.volumeInfo.Loudness'].apply(lambda x: 'Fast' if x > df['video.volumeInfo.Loudness'].median() else 'Slow')
completion_by_tempo = df.groupby('music_tempo')[['statsV2.playCount']].mean().reset_index()
fig6 = px.bar(completion_by_tempo, x='music_tempo', y='statsV2.playCount',
              title="Completion Rate by Music Tempo",
              labels={'music_tempo': 'Music Tempo', 'statsV2.playCount': 'Avg Views'})
fig6.show()

In [48]:
df['video.volumeInfo.Loudness']

0        -7.2
1        -8.3
2        -7.0
3       -10.4
4       -14.8
         ... 
23094   -17.3
23095   -17.9
23096   -17.9
23097   -13.8
23098   -13.5
Name: video.volumeInfo.Loudness, Length: 23099, dtype: float64

# Time Posting

In [49]:
df['createTime'] = pd.to_datetime(df['createTime'])  # If it's already in a recognizable datetime format

In [50]:
df['createTime']

0       1970-01-01 00:00:01.739455727
1       1970-01-01 00:00:01.738066138
2       1970-01-01 00:00:01.737296133
3       1970-01-01 00:00:01.736946020
4       1970-01-01 00:00:01.734006160
                     ...             
23094   1970-01-01 00:00:01.701865220
23095   1970-01-01 00:00:01.701517563
23096   1970-01-01 00:00:01.700912317
23097   1970-01-01 00:00:01.700652252
23098   1970-01-01 00:00:01.741177467
Name: createTime, Length: 23099, dtype: datetime64[ns]

In [51]:
# Insight 3.1: Views by Posting Time

df['hour'] = df['createTime'].dt.hour
time_bins = [(0, 12, 'Morning (0-12h)'), (12, 19, 'Afternoon (12-19h)'), (19, 24, 'Evening (19-24h)')]
df['time_category'] = pd.cut(df['hour'], bins=[0, 12, 19, 24], labels=['Morning', 'Afternoon', 'Evening'], right=False)

views_by_time = df.groupby('time_category', observed=False)['statsV2.playCount'].mean().reset_index()
fig4 = px.bar(views_by_time, x='time_category', y='statsV2.playCount',
              title="Average Views by Posting Time",
              labels={'statsV2.playCount': 'Average Views', 'time_category': 'Time of Day'},
              color='time_category')
fig4.show()







In [52]:
# Insight 3.2: Engagement on Weekends vs. Weekdays
df['day_of_week'] = df['createTime'].dt.dayofweek
df['is_weekend'] = df['day_of_week'].apply(lambda x: 'Weekend' if x >= 4 else 'Weekday')

engagement_by_day = df.groupby('is_weekend', observed=False)[['statsV2.diggCount', 'statsV2.commentCount', 'statsV2.shareCount']].mean().reset_index()
fig5 = px.bar(engagement_by_day.melt(id_vars=['is_weekend'], var_name='Metric', value_name='Average Engagement'),
              x='is_weekend', y='Average Engagement', color='Metric',
              title="Engagement Comparison: Weekday vs. Weekend",
              barmode='group')
fig5.show()





# Hashtags

In [53]:
df['hashtags'] = df['hashtags'].apply(lambda x: x.split(',') if isinstance(x, str) and x.strip() else [])

In [54]:
trending_hashtags = {'fyp', 'viral', 'xyzbca', 'trending'}  # Add other trending hashtags if needed

df['has_trending_hashtag'] = df['hashtags'].apply(lambda tags: any(tag in trending_hashtags for tag in tags))
df['has_trending_hashtag']


0        False
1        False
2        False
3        False
4        False
         ...  
23094    False
23095    False
23096    False
23097    False
23098    False
Name: has_trending_hashtag, Length: 23099, dtype: bool

In [55]:
industry_hashtags = {'skincare', 'fashion', 'tech', 'food', 'education'}

df['has_industry_hashtag'] = df['hashtags'].apply(lambda tags: any(tag in industry_hashtags for tag in tags))
df['has_industry_hashtag'] 


0        False
1        False
2        False
3        False
4        False
         ...  
23094    False
23095    False
23096    False
23097    False
23098    False
Name: has_industry_hashtag, Length: 23099, dtype: bool

In [56]:
# Visualization for Trending Hashtags
trending_stats = df.groupby('has_trending_hashtag')['statsV2.playCount'].mean().reset_index()
fig1 = px.bar(trending_stats, x='has_trending_hashtag', y='statsV2.playCount',
              title="Average Views for Videos with vs. without Trending Hashtags",
              labels={'has_trending_hashtag': 'Has Trending Hashtag?', 'statsV2.playCount': 'Average Views'},
              color='has_trending_hashtag')
fig1.show()






In [57]:
# Insight 4.2: Do 3-5 hashtags perform better than too many or too few?
df['num_hashtags'] = df['hashtags'].apply(len)
df['hashtag_group'] = pd.cut(df['num_hashtags'], bins=[0, 2, 5, 10, float('inf')],
                             labels=['1-2', '3-5', '6-10', '10+'], right=False)

hashtag_stats = df.groupby('hashtag_group')[['statsV2.playCount', 'statsV2.diggCount', 'statsV2.commentCount', 'statsV2.shareCount']].mean().reset_index()

fig2 = px.bar(hashtag_stats, x='hashtag_group', y='statsV2.playCount',
              title="Average Views by Number of Hashtags",
              labels={'statsV2.playCount': 'Average Views', 'hashtag_group': 'Hashtag Count Group'},
              color='hashtag_group')
fig2.show()









In [58]:
# Insight 4.3: Do industry-specific hashtags perform better?
industry_hashtags = {'skincare', 'fashion', 'tech', 'food', 'education'}
df['has_industry_hashtag'] = df['hashtags'].apply(lambda x: any(tag in x for tag in industry_hashtags))

industry_stats = df.groupby('has_industry_hashtag')['statsV2.playCount'].mean().reset_index()
fig3 = px.bar(industry_stats, x='has_industry_hashtag', y='statsV2.playCount',
              title="Average Views for Videos with Industry-Specific Hashtags",
              labels={'has_industry_hashtag': 'Has Industry Hashtag?', 'statsV2.playCount': 'Average Views'},
              color='has_industry_hashtag')
fig3.show()





In [59]:
# Insight 4.4: Do localized hashtags improve reach?
localized_hashtags = {'vietnam', 'hanoi', 'saigon'}
df['has_localized_hashtag'] = df['hashtags'].apply(lambda x: any(tag in x for tag in localized_hashtags))

local_stats = df.groupby('has_localized_hashtag')['statsV2.playCount'].mean().reset_index()
fig4 = px.bar(local_stats, x='has_localized_hashtag', y='statsV2.playCount',
              title="Average Views for Videos with Localized Hashtags",
              labels={'has_localized_hashtag': 'Has Localized Hashtag?', 'statsV2.playCount': 'Average Views'},
              color='has_localized_hashtag')
fig4.show()





# Hashtag2

In [60]:
df['hashtags']

0        [1phutsaigon, saigon, saigondidau, cafesaigon,...
1        [1phutsaigon, saigon, saigondidau, duxuan, tet...
2        [1phutsaigon, saigon, saigondidau, halacoffee,...
3        [1phutsaigon, saigon, saigondidau, saigondamda...
4        [1phutsaigon, saigondidau, langthangsaigon, tu...
                               ...                        
23094      [hotteok, pulmuone, hotteokxucxich, hotteokngo]
23095                                [detox, cleanhealthy]
23096                                [dememoria, bodymist]
23097                                                   []
23098                               [cheesecoffee, diudao]
Name: hashtags, Length: 23099, dtype: object

In [61]:
def analyze_hashtag_engagement(df, top_n=10):

    # Create a dictionary to store hashtag engagement
    hashtag_dict = {}

    # Iterate through rows
    for _, row in df.iterrows():
        for hashtag in row['hashtags']:
            if hashtag not in hashtag_dict:
                hashtag_dict[hashtag] = {'count': 0, 'total_comments': 0, 'total_shares': 0, 'total_likes': 0, 'total_views': 0}

            # Update hashtag statistics
            hashtag_dict[hashtag]['count'] += 1
            hashtag_dict[hashtag]['total_comments'] += row['statsV2.commentCount']
            hashtag_dict[hashtag]['total_shares'] += row['statsV2.shareCount']
            hashtag_dict[hashtag]['total_likes'] += row['statsV2.diggCount']
            hashtag_dict[hashtag]['total_views'] += row['statsV2.playCount']

    # Convert to DataFrame
    hashtag_df = pd.DataFrame.from_dict(hashtag_dict, orient='index').reset_index()
    hashtag_df.rename(columns={'index': 'hashtag'}, inplace=True)

    # Compute average engagement per occurrence
    hashtag_df['avg_comments'] = hashtag_df['total_comments'] / hashtag_df['count']
    hashtag_df['avg_shares'] = hashtag_df['total_shares'] / hashtag_df['count']
    hashtag_df['avg_likes'] = hashtag_df['total_likes'] / hashtag_df['count']
    hashtag_df['avg_views'] = hashtag_df['total_views'] / hashtag_df['count']

    # Get separate top hashtags for each metric
    top_hashtags = {
        "views": hashtag_df.sort_values(by='avg_views', ascending=False).head(top_n),
        "likes": hashtag_df.sort_values(by='avg_likes', ascending=False).head(top_n),
        "shares": hashtag_df.sort_values(by='avg_shares', ascending=False).head(top_n),
        "comments": hashtag_df.sort_values(by='avg_comments', ascending=False).head(top_n)
    }

    return top_hashtags

In [None]:
def plot_combined_top_hashtags(top_hashtags):
    """
    Plots a grouped bar chart comparing different top hashtags for each engagement type.

    Parameters:
    top_hashtags (dict): Dictionary containing top hashtags ranked by different metrics.
    """
    # Prepare data for plotting
    metrics = ["views", "likes", "shares", "comments"]
    data = {metric: top_hashtags[metric].set_index("hashtag") for metric in metrics}

    # Get all unique hashtags that appear in any of the top 10 lists
    all_hashtags = list(set().union(*[df.index.tolist() for df in data.values()]))

    # Create a DataFrame with all hashtags and fill missing values with 0
    combined_df = pd.DataFrame(index=all_hashtags)
    for metric in metrics:
        combined_df[metric] = data[metric]['avg_' + metric].reindex(combined_df.index, fill_value=0)

    # Convert DataFrame index to column
    combined_df = combined_df.reset_index().rename(columns={"index": "hashtag"})

    # Plot grouped bar chart
    fig = go.Figure()
    for metric in metrics:
        fig.add_trace(go.Bar(x=combined_df['hashtag'], y=combined_df[metric], name=metric.capitalize()))

    fig.update_layout(
        title="Top Hashtags Ranked Separately by Views, Likes, Shares, and Comments",
        xaxis_title="Hashtag",
        yaxis_title="Average Engagement",
        barmode="group",
        xaxis=dict(tickangle=-45)
    )

    fig.show()

In [63]:

# Get separate top hashtags for each engagement metric
top_hashtags = analyze_hashtag_engagement(df, 20)

# Plot comparison of different top hashtags
plot_combined_top_hashtags(top_hashtags)

In [64]:
import pandas as pd
import ast
import plotly.express as px

def analyze_hashtag_engagement(df, top_n=5):
    hashtag_dict = {}

    for _, row in df.iterrows():
        for hashtag in row['hashtags']:
            if hashtag not in hashtag_dict:
                hashtag_dict[hashtag] = {'count': 0, 'total_comments': 0, 'total_shares': 0, 'total_likes': 0, 'total_views': 0}

            hashtag_dict[hashtag]['count'] += 1
            hashtag_dict[hashtag]['total_comments'] += row['statsV2.commentCount']
            hashtag_dict[hashtag]['total_shares'] += row['statsV2.shareCount']
            hashtag_dict[hashtag]['total_likes'] += row['statsV2.diggCount']
            hashtag_dict[hashtag]['total_views'] += row['statsV2.playCount']

    hashtag_df = pd.DataFrame.from_dict(hashtag_dict, orient='index').reset_index()
    hashtag_df.rename(columns={'index': 'hashtag'}, inplace=True)

    hashtag_df['avg_comments'] = hashtag_df['total_comments'] / hashtag_df['count']
    hashtag_df['avg_shares'] = hashtag_df['total_shares'] / hashtag_df['count']
    hashtag_df['avg_likes'] = hashtag_df['total_likes'] / hashtag_df['count']
    hashtag_df['avg_views'] = hashtag_df['total_views'] / hashtag_df['count']

    # Extract top hashtags for each metric
    top_hashtags = {
        "Views": hashtag_df.nlargest(top_n, 'avg_views')[['hashtag', 'avg_views']],
        "Likes": hashtag_df.nlargest(top_n, 'avg_likes')[['hashtag', 'avg_likes']],
        "Shares": hashtag_df.nlargest(top_n, 'avg_shares')[['hashtag', 'avg_shares']],
        "Comments": hashtag_df.nlargest(top_n, 'avg_comments')[['hashtag', 'avg_comments']]
    }

    return top_hashtags

def plot_improved_top_hashtags(top_hashtags):
    all_data = []
    for metric, df in top_hashtags.items():
        df = df.copy()
        df.rename(columns={df.columns[1]: 'Engagement'}, inplace=True)
        df['Metric'] = metric  # Add a column to identify engagement type
        all_data.append(df)

    final_df = pd.concat(all_data)

    fig = px.bar(final_df, 
                 x='hashtag', 
                 y='Engagement', 
                 color='Metric', 
                 barmode='group',
                 title="Top Hashtags by Views, Likes, Shares, and Comments",
                 labels={'hashtag': 'Hashtag', 'Engagement': 'Average Engagement'},
                 text_auto=True)

    fig.update_layout(xaxis_tickangle=-45)  # Rotate x-axis labels for readability
    fig.show()

top_hashtags = analyze_hashtag_engagement(df, 10)
plot_improved_top_hashtags(top_hashtags)






# Hashtag Trends

In [None]:
import plotly.express as px

def plot_hashtag_trends(df, top_n=5):
    """Plots trending hashtags over time based on views."""
    df["create_time"] = pd.to_datetime(df["create_time"])
    
    # Explode hashtags for proper grouping
    exploded = df.explode("hashtags")
    
    # Aggregate engagement by date and hashtag
    hashtag_trends = exploded.groupby(["create_time", "hashtags"])["statsV2.playCount"].sum().reset_index()
    
    # Get top N hashtags by total views
    top_hashtags = hashtag_trends.groupby("hashtags")["statsV2.playCount"].sum().nlargest(top_n).index
    filtered_data = hashtag_trends[hashtag_trends["hashtags"].isin(top_hashtags)]
    
    # Plot line chart
    fig = px.line(filtered_data, 
                  x="create_time", 
                  y="statsV2.playCount", 
                  color="hashtags",
                  title="📈 Hashtag Popularity Over Time (Views)",
                  labels={"create_time": "Date", "statsV2.playCount": "Total Views"},
                  markers=True)
    
    fig.update_layout(xaxis_tickangle=-45)
    return fig


In [67]:
df['createTime']

0       1970-01-01 00:00:01.739455727
1       1970-01-01 00:00:01.738066138
2       1970-01-01 00:00:01.737296133
3       1970-01-01 00:00:01.736946020
4       1970-01-01 00:00:01.734006160
                     ...             
23094   1970-01-01 00:00:01.701865220
23095   1970-01-01 00:00:01.701517563
23096   1970-01-01 00:00:01.700912317
23097   1970-01-01 00:00:01.700652252
23098   1970-01-01 00:00:01.741177467
Name: createTime, Length: 23099, dtype: datetime64[ns]

In [74]:
video_info_df = pd.read_csv("../../../data/interim/video_info.csv", low_memory=False)
video_info_df['createTime'] = pd.to_datetime(video_info_df['createTime'], unit='s')

In [75]:
set(video_info_df.columns)

{'AIGCDescription',
 'BAInfo',
 'CategoryType',
 'adAuthorization',
 'adLabelVersion',
 'aigcLabelType',
 'audio_to_text',
 'author.commentSetting',
 'author.downloadSetting',
 'author.duetSetting',
 'author.ftc',
 'author.id',
 'author.isADVirtual',
 'author.isEmbedBanned',
 'author.nickname',
 'author.openFavorite',
 'author.privateAccount',
 'author.relation',
 'author.roomId',
 'author.secUid',
 'author.secret',
 'author.signature',
 'author.stitchSetting',
 'author.uniqueId',
 'author.verified',
 'authorStats.diggCount',
 'authorStats.followerCount',
 'authorStats.followingCount',
 'authorStats.friendCount',
 'authorStats.heart',
 'authorStats.heartCount',
 'authorStats.videoCount',
 'backendSourceEventTracking',
 'brandOrganicType',
 'collectTime',
 'collected',
 'createTime',
 'desc',
 'digged',
 'diversificationId',
 'duetDisplay',
 'duetEnabled',
 'duetInfo.duetFromId',
 'forFriend',
 'id',
 'imagePost.cover.imageHeight',
 'imagePost.cover.imageWidth',
 'imagePost.shareCover.i

In [76]:
video_info_df['createTime']

0       2025-03-04 14:08:17
1       2025-03-03 14:07:35
2       2025-03-01 04:55:00
3       2025-02-28 13:15:37
4       2025-02-27 14:16:53
                ...        
32598   2023-11-25 11:38:37
32599   2023-11-22 11:24:12
32600   2023-11-18 11:55:32
32601   2023-11-17 11:57:23
32602   2025-03-05 12:24:27
Name: createTime, Length: 32603, dtype: datetime64[ns]

In [87]:
# Find the row with the given id
target_id = 7363237625462787345
row = df[df['video.id'] == target_id]

# Print the row
row


Unnamed: 0,CategoryType,author.commentSetting,author.downloadSetting,author.duetSetting,author.ftc,author.id,author.isADVirtual,author.isEmbedBanned,author.nickname,author.openFavorite,...,is_viral,music_tempo,hour,time_category,day_of_week,is_weekend,has_trending_hashtag,has_industry_hashtag,hashtag_group,has_localized_hashtag
