# Netflix Case Study: 
## The Effect of TV Shows and Movies on the Stock Price.
<img src="https://images.ctfassets.net/y2ske730sjqp/5QQ9SVIdc1tmkqrtFnG9U1/de758bba0f65dcc1c6bc1f31f161003d/BrandAssets_Logos_02-NSymbol.jpg?w=940" alt="Netflix Logo" width="300" height="200">

### Table of Contents
[1. Import Libraries and Data](#import)

### Import the Libraries and Data <a id="import"></a>

In [10]:
import pandas as pd
from ast import literal_eval # for list evaluation
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [123]:
colors = ['#aa00ff', '#2962ff', '#00c853', '#ffd600', '#3e2723', '#263238']

In [124]:
titles = pd.read_csv('data/titles.csv')
stocks = pd.read_csv('data/stocks.csv')

In [125]:
# convert genres, actors, and directors to list type
titles['genres'] = titles['genres'].apply(lambda x: literal_eval(x) if pd.notna(x) else [])
titles['actors'] = titles['actors'].apply(lambda x: literal_eval(x) if pd.notna(x) else [])
titles['directors'] = titles['directors'].apply(lambda x: literal_eval(x) if pd.notna(x) else [])

titles['release_year'] = titles['release_year'].astype(str)
titles['year_added'] = titles['year_added'].astype(str)

In [126]:
titles.head()

Unnamed: 0,title,country,release_year,rating,duration,description,year_added,month_added,year_month_added,imdb_score,genres,actors,directors
0,Dick Johnson Is Dead,United States,2020,PG-13,90 min,"As her father nears the end of his life, filmm...",2021,September,"September, 2021",7.4,"[documentation, drama]","[Richard Johnson, Kirsten Johnson, Isla Sierck...",[Kirsten Johnson]
1,Blood & Water,South Africa,2021,TV-MA,2 Seasons,"After crossing paths at a party, a Cape Town t...",2021,September,"September, 2021",,[],[],[]
2,Ganglands,,2021,TV-MA,1 Season,To protect his family from a powerful drug lor...,2021,September,"September, 2021",7.0,"[action, crime, drama, thriller]","[Sami Bouajila, Tracy Gotoas, Sofia Lesaffre, ...",[]
3,Jailbirds New Orleans,,2021,TV-MA,1 Season,"Feuds, flirtations and toilet talk go down amo...",2021,September,"September, 2021",6.6,"[documentation, reality]",[],[]
4,Kota Factory,India,2021,TV-MA,2 Seasons,In a city of coaching centers known to train I...,2021,September,"September, 2021",9.1,"[drama, comedy]",[Mayur More],[]


In [127]:
stocks.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Year,Month,Year-Month
0,2016-01-04,109.0,110.0,105.209999,109.959999,109.959999,20794800,2016,January,"January, 2016"
1,2016-01-05,110.449997,110.580002,105.849998,107.660004,107.660004,17664600,2016,January,"January, 2016"
2,2016-01-06,105.290001,117.910004,104.959999,117.68,117.68,33045700,2016,January,"January, 2016"
3,2016-01-07,116.360001,122.18,112.290001,114.559998,114.559998,33636700,2016,January,"January, 2016"
4,2016-01-08,116.330002,117.720001,111.099998,111.389999,111.389999,18067100,2016,January,"January, 2016"


### Questions To Be Answered
- How does the genre of titles released affect the trading volume of stock?
- How does the parental rating of titles release affect the trading volume of stock?
- Which actors are related to the most trading volume of stock?
- Which directors are related to the most trading volume of stock?
- How does the IMDb rating of a title affect the trading volume of stock?
- How does the release year of a title affect the trading volume of stock?

#### TL;DR
Which factors of an added title can lead to increased liquidity for Netflix stock?

#### How does the total volume change from year-to-year and year,month-to-year,month?

In [128]:
stocks_agg = stocks.groupby('Year')['Volume'].sum().reset_index()

fig = go.Figure()
fig.add_trace(go.Scatter(x=stocks_agg['Year'], 
                         y=stocks_agg['Volume'], 
                         mode='lines+markers', 
                         name='Total Volume',
                         line=dict(color='#E50914'),
                         marker=dict(color='#E50914')))

fig.update_layout(title='Total Volume Changes Over Years',
                  xaxis_title='Year',
                  yaxis_title='Total Volume')


fig.show()

In [129]:
temp_stocks = stocks.copy()
temp_stocks['Year-Month'] = pd.to_datetime(stocks['Year-Month'])
stocks_agg = temp_stocks.groupby('Year-Month')['Volume'].sum().reset_index()
stocks_agg.sort_values(by='Year-Month', inplace=True)

fig = go.Figure()
fig.add_trace(go.Scatter(x=stocks_agg['Year-Month'], 
                         y=stocks_agg['Volume'], 
                         mode='lines+markers', 
                         name='Total Volume',
                         line=dict(color='#E50914'),
                         marker=dict(color='#E50914')))

fig.update_layout(title='Total Volume Changes Over Months',
                  xaxis_title='Year',
                  yaxis_title='Total Volume')


fig.show()

#### What genres were most added each year.

In [140]:
for i in range(2016,2022):
    temp_titles = titles.explode('genres')
    temp_titles = temp_titles[temp_titles.year_added == f'{i}']

    fig = px.bar(x=temp_titles[temp_titles['genres'].isna() == False]['genres'].unique(), 
                 y=temp_titles['genres'].value_counts(),
                 color_discrete_sequence=[colors[i-2016]])

    fig.update_layout(title=f'Number of Titles of Various Genres Added To Netflix In {i}',
                      xaxis_title='Genre',
                      yaxis_title='Count')

    fig.show()

In [148]:
sorted_year_months = [date.strftime('%B, %Y') for date in sorted(pd.to_datetime(titles['year_month_added'].unique(), format='%B, %Y'))]

#### What actors were most added each year.

In [156]:
for i in range(2016,2022):
    temp_titles = titles.explode('actors')
    temp_titles = temp_titles[temp_titles.year_added == f'{i}']
    print(f"Year {i}:\n{temp_titles['actors'].value_counts()[:5]}\n")

Year 2016:
Donnie Yen           4
Jim Gaffigan         4
Lynn Hung            3
Mamoru Miyano        3
Katsuyuki Konishi    2
Name: actors, dtype: int64

Year 2017:
Adam Scott          5
Gulshan Grover      5
Andy Richter        4
Janeane Garofalo    4
Sharat Saxena       3
Name: actors, dtype: int64

Year 2018:
Kareena Kapoor Khan    10
Paresh Rawal            8
Om Puri                 8
Anupam Kher             8
Patrick Roach           8
Name: actors, dtype: int64

Year 2019:
Boman Irani      7
Vincent Tong     6
Niña Dolino      6
Jeff Dunham      6
Farhan Akhtar    6
Name: actors, dtype: int64

Year 2020:
Sharat Saxena          9
Kareena Kapoor Khan    9
Nawazuddin Siddiqui    8
Rani Mukerji           7
Fred Armisen           7
Name: actors, dtype: int64

Year 2021:
Tusshar Kapoor       7
Ramsey Nouah         5
Fred Tatasciore      5
Jide Kosoko          4
Leonardo DiCaprio    4
Name: actors, dtype: int64



#### What directors were most added each year.

In [157]:
for i in range(2016,2022):
    temp_titles = titles.explode('directors')
    temp_titles = temp_titles[temp_titles.year_added == f'{i}']
    print(f"Year {i}:\n{temp_titles['directors'].value_counts()[:5]}\n")

Year 2016:
Wilson Yip           3
Jay Karas            3
Qaushiq Mukherjee    2
Troy Miller          2
Alexandre Lehmann    2
Name: directors, dtype: int64

Year 2017:
Jay Chapman     7
Marcus Raboy    6
Umesh Mehra     4
Raúl Campos     3
Jan Suter       3
Name: directors, dtype: int64

Year 2018:
Jan Suter             8
Raúl Campos           8
Vishal Bhardwaj       4
Aziz Mirza            3
Ashutosh Gowariker    3
Name: directors, dtype: int64

Year 2019:
Cathy Garcia-Molina    6
Farhan Akhtar          4
Rocky Soraya           4
Kunle Afolayan         4
Wenn V. Deramas        3
Name: directors, dtype: int64

Year 2020:
Youssef Chahine         7
Cathy Garcia-Molina     6
Shaun Paul Piccinino    4
Karan Johar             4
Philippe Aractingi      4
Name: directors, dtype: int64

Year 2021:
Milan Luthria       3
Todd Phillips       3
Dani de la Orden    3
Kayode Kasum        3
Yoshiyuki Tomino    3
Name: directors, dtype: int64



#### What parental ratings were most added each year

In [175]:
for i in range(2016,2022):
    temp_titles = titles.explode('rating')
    temp_titles = temp_titles[temp_titles.year_added == f'{i}']
    temp_titles = temp_titles.drop(temp_titles[temp_titles['rating'] == '84 min'].index)
    temp_titles = temp_titles.drop(temp_titles[temp_titles['rating'] == '66 min'].index)
    temp_titles = temp_titles.drop(temp_titles[temp_titles['rating'] == '74 min'].index)

    fig = px.bar(x=temp_titles[temp_titles['rating'].isna() == False]['rating'].unique(), 
                 y=temp_titles['rating'].value_counts(),
                 color_discrete_sequence=[colors[i-2016]])

    fig.update_layout(title=f'Number of Titles of Various Parental Ratings Added To Netflix In {i}',
                      xaxis_title='Rating',
                      yaxis_title='Count')

    fig.show()

#### What parental ratings and genre combinations were most added each year

In [178]:
for i in range(2016,2022):
    temp_titles = titles.explode('genres')
    temp_titles = temp_titles[temp_titles.year_added == f'{i}']
    temp_titles = temp_titles.drop(temp_titles[temp_titles['rating'] == '84 min'].index)
    temp_titles = temp_titles.drop(temp_titles[temp_titles['rating'] == '66 min'].index)
    temp_titles = temp_titles.drop(temp_titles[temp_titles['rating'] == '74 min'].index)
    
    grouped_data = temp_titles.groupby(['rating', 'genres']).size().reset_index(name='count').sort_values(by='count', ascending=False).head(5)

    fig = px.bar(grouped_data,
                 x=grouped_data.apply(lambda row: f"{row['genres']} ({row['rating']})", axis=1),
                 y='count',
                 color_discrete_sequence=[colors[i-2016]])

    fig.update_layout(title=f'Top 5 Parental Rating and Genre of Titles Added To Netflix In {i}',
                      xaxis_title='Rating',
                      yaxis_title='Count')

    fig.show()

#### What IMDb rating was assigned to the titles each year

In [188]:
for i in range(2016,2022):
    temp_titles = titles.explode('imdb_score')
    temp_titles = temp_titles[temp_titles.year_added == f'{i}']

    fig = px.bar(x=temp_titles[temp_titles['imdb_score'].isna() == False]['imdb_score'].unique(), 
                 y=temp_titles['imdb_score'].value_counts(),
                 color_discrete_sequence=[colors[i-2016]])

    fig.update_layout(title=f'Number of Titles of Various IMDb Ratings Added To Netflix In {i}',
                      xaxis_title='Rating',
                      yaxis_title='Count')
    
    
    #fig.update_xaxes(tickvals=temp_titles['imdb_score'].unique(), ticktext=[f'{score:.1f}' for score in temp_titles['imdb_score'].unique()])

    fig.show()

#### What release year was most added each year

In [192]:
for i in range(2016,2022):
    temp_titles = titles.explode('release_year')
    temp_titles = temp_titles[temp_titles.year_added == f'{i}']
    temp_titles.sort_values(by='release_year', inplace=True)

    fig = px.bar(x=temp_titles[temp_titles['release_year'].isna() == False]['release_year'].unique(), 
                 y=temp_titles['release_year'].value_counts(),
                 color_discrete_sequence=[colors[i-2016]])

    fig.update_layout(title=f'Number of Titles of Various Release Years Added To Netflix In {i}',
                      xaxis_title='Release Year',
                      yaxis_title='Count')
    
    
    fig.show()