#### Develop a code to perform basic to advanced operation using both Numpy and Pandas using TikTok video performance dataset

In [2]:
import numpy as np
import pandas as pd
data=pd.read_csv("Datasets/tiktok_dataset.csv")

In [3]:
print(data.head())
print(data.info())

   # claim_status    video_id  video_duration_sec  \
0  1        claim  7017666017                  59   
1  2        claim  4014381136                  32   
2  3        claim  9859838091                  31   
3  4        claim  1866847991                  25   
4  5        claim  7105231098                  19   

                            video_transcription_text verified_status  \
0  someone shared with me that drone deliveries a...    not verified   
1  someone shared with me that there are more mic...    not verified   
2  someone shared with me that american industria...    not verified   
3  someone shared with me that the metro of st. p...    not verified   
4  someone shared with me that the number of busi...    not verified   

  author_ban_status  video_view_count  video_like_count  video_share_count  \
0      under review          343296.0           19425.0              241.0   
1            active          140877.0           77355.0            19034.0   
2            a

In [4]:
#BASIC OPERATIONS

mean_duration = data['video_duration_sec'].mean()
print(f"Mean video duration: {mean_duration}")

total_views = data['video_view_count'].sum()
print(f"Total view count: {total_views}")


Mean video duration: 32.42173150345682
Total view count: 4860858134.0


In [5]:
#ADVANCED OPERATIONS

std_dev_views = np.std(data['video_view_count'])
print(f"Standard deviation of video view counts: {std_dev_views}")

likes = data['video_like_count'].dropna()
median_likes = np.median(likes)
print(f"Median of video like counts: {median_likes}")

correlation_views_likes = data['video_view_count'].corr(data['video_like_count'])
print(f"Correlation between views and likes: {correlation_views_likes}")


Standard deviation of video view counts: 322884.82091253944
Median of video like counts: 3403.5
Correlation between views and likes: 0.8043221351779141


In [6]:
# Grouping data by claim status
grouped_by_claim = data.groupby('claim_status')['video_view_count'].mean()
print(grouped_by_claim)

claim_status
claim      501029.452748
opinion      4956.432250
Name: video_view_count, dtype: float64


In [7]:
# Applying a function to a column
data['likes_per_view'] = data['video_like_count'] / data['video_view_count']
print(data[['video_id', 'likes_per_view']])

         video_id  likes_per_view
0      7017666017        0.056584
1      4014381136        0.549096
2      9859838091        0.108282
3      1866847991        0.548459
4      7105231098        0.622910
...           ...             ...
19377  7578226840             NaN
19378  6079236179             NaN
19379  2565539685             NaN
19380  2969178540             NaN
19381  8132759688             NaN

[19382 rows x 2 columns]


In [8]:
# Filtering data based on a condition
verified_videos = data[data['verified_status'] == 'verified']
print(verified_videos.head())

       # claim_status    video_id  video_duration_sec  \
9     10        claim  4660861094                  45   
76    77        claim  2342638585                  31   
105  106        claim  7687467484                  49   
117  118        claim  2115386652                  48   
140  141        claim  6296788623                  16   

                              video_transcription_text verified_status  \
9    someone shared with me that it would take a 50...        verified   
76   a friend shared with a friend that only one-fo...        verified   
105  a friend mentioned someone discovered that lon...        verified   
117  a friend mentioned someone discovered that bee...        verified   
140  a friend reported someone discovered that shan...        verified   

    author_ban_status  video_view_count  video_like_count  video_share_count  \
9              active          931587.0          171051.0            67739.0   
76             active          677288.0          214