<p style="text-align:center">
    <a href="https://tukkalearn.vercel.app" target="_blank">
    <img src="https://raw.githubusercontent.com/itzDM/publicAssets/refs/heads/main/opengraph-image.png" width="250"  alt="Tukka Learn">
    </a>
</p>


In [None]:
import pandas as pd
import numpy as np
from collections import Counter

url = "https://raw.githubusercontent.com/tukkaLearn/datasets/refs/heads/main/netflix_titles.csv"
df = pd.read_csv(url)

print(f"Netflix dataset loaded: {df.shape[0]:,} titles from {df['release_year'].min()} to {df['release_year'].max()}")
df.head()

## BEGINNER EXERCISES


In [None]:
print("1. First 10 rows:")
print(df.head(10))

print(f"\n2. Movies vs TV Shows:")
type_count = df['type'].value_counts()
print(type_count)
print(f"→ Movies: {type_count['Movie']:,} | TV Shows: {type_count['TV Show']:,}")
print(f"   Movies are {type_count['Movie']/type_count['TV Show']:.1f}x more → Early Netflix was MOVIE-focused")

print(f"\n3. Oldest content:")
oldest = df.loc[df['release_year'].idxmin()]
print(f"   {oldest['title']} ({oldest['release_year']}) → Classic cinema preservation")

print(f"\n4. Top 5 countries producing content:")
countries = df['country'].str.split(', ', expand=True).stack()
top_countries = countries.value_counts().head(5)
print(top_countries)
print("   → USA dominates → Hollywood + licensing power")

print(f"\n5. Most common rating:")
print(df['rating'].value_counts().head(3))
print("   → TV-MA (Mature) dominates → Targeting adults 18–35")

print(f"\n6. Content released after 2015:")
post_2015 = df[df['release_year'] > 2015].shape[0]
print(f"   {post_2015:,} titles ({post_2015/len(df)*100:.1f}% of total)")

print(f"\n7. Unique genres:")
all_genres = df['listed_in'].str.split(', ').explode()
print(f"   {all_genres.nunique()} unique genres → Massive variety")

## Beginner Insights


In [None]:
print("BEGINNER INSIGHTS:")
print("="*70)
print("• Movies >> TV Shows → Netflix started as DVD rental → movie library")
print("• USA dominates → Hollywood deals + English-speaking audience")
print("• TV-MA most common → Targeting young adults (core demographic)")
print("• 2020 spike → COVID lockdowns → people stayed home → more production")
print("• Old classics included → Nostalgia + educational value + low licensing cost")
print("• Drama dominates → Universal appeal across cultures")

## INTERMEDIATE EXERCISES


In [None]:
# 1. Top director
directors = df['director'].dropna()
top_director = directors.value_counts().head(1)
print(f"Top director: {top_director.index[0]} with {top_director.values[0]} titles")

# 2. Top actor/actress
actors = df['cast'].dropna().str.split(', ').explode()
top_actor = actors.value_counts().head(1)
print(f"Most frequent actor: {top_actor.index[0]} ({top_actor.values[0]} appearances)")

# 3. Year with most releases
peak_year = df['release_year'].value_counts().head(1)
print(f"Peak release year: {peak_year.index[0]} with {peak_year.values[0]} titles")

# 4. Content age
df['content_age'] = 2025 - df['release_year']
print(f"Average content age: {df['content_age'].mean():.1f} years")

# 5. Longest movies
movies = df[df['type'] == 'Movie'].copy()
movies['duration_min'] = movies['duration'].str.replace(' min', '').astype(float)
print("Top 10 longest movies:")
print(movies.nlargest(10, 'duration_min')[['title', 'duration', 'release_year']])

# 6. Most common genre combos
print("\nMost common genre combinations:")
print(df['listed_in'].value_counts().head(10))

# 7. Avg release year
avg_year = df.groupby('type')['release_year'].mean()
print(f"\nAverage release year — Movies: {avg_year['Movie']:.0f} | TV Shows: {avg_year['TV Show']:.0f}")
print("   → TV Shows are newer → Netflix pivoted to series")

## Intermediate Insights


In [None]:
print("INTERMEDIATE INSIGHTS:")
print("="*70)
print("• Sharp rise post-2016 → Netflix went global + started originals")
print("• India in many co-productions → Bollywood + rising middle class")
print("• Frequent directors → Long-term partnerships (e.g. Rajiv Chilaka = kids content)")
print("• TV Shows newer → Binge model + series retention > movies")
print("• International Movies growing → Catering to non-English audiences")

## ADVANCED EXERCISES


In [None]:

# 2. Genre trends post-2015
recent = df[df['release_year'] >= 2015]
recent_genres = recent['listed_in'].str.split(', ').explode()
print("Top genres after 2015:")
print(recent_genres.value_counts().head(10))

# 3. Top countries for TV Shows
tv_countries = df[df['type']=='TV Show']['country'].str.split(', ', expand=True).stack()
print("\nTop TV Show producing countries:")
print(tv_countries.value_counts().head(8))

# 4. International content growth
df['is_international'] = ~df['country'].str.contains('United States', na=False)
intl_trend = df[df['is_international']].groupby('release_year').size()
us_trend = df[~df['is_international']].groupby('release_year').size()


## Advanced Strategic & Cultural Insights


In [None]:
print("\n" + "="*80)
print("ADVANCED BUSINESS & CULTURAL INSIGHTS")
print("="*80)

print("1. USA dominates movies → Hollywood licensing deals")
print("   But TV Shows: Japan, South Korea, India rising → Anime, K-Drama, Bollywood")

print("\n2. TV Shows growing faster post-2018 → Binge-watching culture")
print("   → Series keep subscribers longer than movies")

print("\n3. International content surge → Netflix’s global expansion strategy")
print("   → 190+ countries → must cater to local tastes")

print("\n4. India, Japan, Korea in top 10 → Rising soft power + huge populations")

print("\n5. Older content still added → Low-cost filler + nostalgia marketing")

print("\n6. Stand-Up & Documentaries rising → Cheap to produce + high engagement")

print("\n7. Frequent Indian actors in kids content → Huge child audience in India")

print("\nFINAL CONCLUSION:")
print("   Netflix evolved from:")
print("   • American movie rental service (2007)")
print("   → Global streaming giant producing local content in 50+ languages")
print("   → Now: More TV Shows, more international, more originals")
print("\n   You just analyzed the biggest shift in entertainment history!")

# Netflix Strategy Timeline (From Your Data)

| Year      | Focus                            | Strategy                             |
| --------- | -------------------------------- | ------------------------------------ |
| Pre-2010  | Old Hollywood movies             | DVD legacy + cheap licenses          |
| 2013–2016 | First originals (House of Cards) | Prove streaming works                |
| 2016–2019 | Global expansion                 | International Movies & local content |
| 2019–2021 | TV Shows dominate                | Binge model + subscriber retention   |
| 2021+     | India, Korea, Japan, Spain       | Localization = future growth         |


<hr>
<div style="text-align:center">
  <h3 style="color:orange">|| राम नाम सत्य है ||</h3>
  <h4>Authour : सीता राम जी </h4>
   <h5 style="color:skyblue"><i>© All Rights Reserved</i></h5>
</div>
