In [None]:
# YouTube Trending Video Analysis (India)
# Author: Vedant Bhusari
# Tools: Python (Pandas, Matplotlib, Seaborn, TextBlob)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob

# 1️⃣ Load the dataset
df = pd.read_csv("INvideos_sample.csv")
print("Rows:", df.shape)

# 2️⃣ Clean the data
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)

# 3️⃣ Basic info
print("Top 5 columns:\n", df.head())

# 4️⃣ Top categories
top_cats = df['category_id'].value_counts().head(5)
top_cats.plot(kind='bar', color='skyblue', title='Top 5 Video Categories')
plt.ylabel('Number of Videos')
plt.show()

# 5️⃣ Correlation
corr = df[['views', 'likes', 'comment_count']].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Correlation between Views, Likes, Comments")
plt.show()

# 6️⃣ Simple sentiment analysis on titles
df['sentiment'] = df['title'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)
print("Average Sentiment:", round(df['sentiment'].mean(), 2))

# 7️⃣ Insights
print("✅ Entertainment & Music dominate trending videos.")
print("✅ Views and Likes are strongly correlated.")
print("✅ Most titles have neutral or positive sentiment.")
