# Transform/Clean Data

#### In this notebook we will be cleaning the csv with the complete data we collected from the OMDB API. We decided to leave most of the columns untouched as we will perform further data preprocessing depending on selected models going forward.

In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

In [33]:
# Visualize more columns
pd.options.display.max_columns = None

In [34]:
# Import downloaded CSV which contains a list 6820 movies
file_path = '../resources/cleaned_data/movies_complete.csv'
movies_df = pd.read_csv(file_path)
movies_df.head()

Unnamed: 0,budget,production,country_kaggle,director,genre_kaggle,gross,name,rating,released,runtime,score_imdb,star_kaggle,votes_imdb,writer_kaggle,year,genres_omdb,writers_omdb,actors_omdb,plot,language_omdb,country_omdb,awards,poster,score_metacritic,type
0,8000000.0,Columbia Pictures Corporation,USA,Rob Reiner,Adventure,52287414.0,Stand by Me,R,1986-08-22,89,8.1,Wil Wheaton,299174,Stephen King,1986,"Adventure, Drama","Stephen King (novel), Raynold Gideon (screenpl...","Wil Wheaton, River Phoenix, Corey Feldman, Jer...","It's the summer of 1959 in Castlerock, Oregon ...",English,USA,Nominated for 1 Oscar. Another 5 wins & 10 nom...,https://m.media-amazon.com/images/M/MV5BODJmY2...,75.0,movie
1,6000000.0,Paramount Pictures,USA,John Hughes,Comedy,70136369.0,Ferris Bueller's Day Off,PG-13,1986-06-11,103,7.8,Matthew Broderick,264740,John Hughes,1986,Comedy,John Hughes,"Matthew Broderick, Alan Ruck, Mia Sara, Jeffre...",High school student Ferris Bueller wants a day...,"English, German",USA,Nominated for 1 Golden Globe. Another 2 wins.,https://m.media-amazon.com/images/M/MV5BMDA0Nj...,61.0,movie
2,15000000.0,Paramount Pictures,USA,Tony Scott,Action,179800601.0,Top Gun,PG,1986-05-16,110,6.9,Tom Cruise,236909,Jim Cash,1986,"Action, Drama","Jim Cash, Jack Epps Jr., Ehud Yonay (magazine ...","Tom Cruise, Kelly McGillis, Val Kilmer, Anthon...","Lieutenant Pete ""Maverick"" Mitchell is an expe...",English,USA,Won 1 Oscar. Another 10 wins & 5 nominations.,https://m.media-amazon.com/images/M/MV5BZjQxYT...,50.0,movie
3,18500000.0,Twentieth Century Fox Film Corporation,USA,James Cameron,Action,85160248.0,Aliens,R,1986-07-18,137,8.4,Sigourney Weaver,540152,James Cameron,1986,"Action, Adventure, Sci-Fi, Thriller","James Cameron (story by), David Giler (story b...","Sigourney Weaver, Carrie Henn, Michael Biehn, ...",Fifty seven years after Ellen Ripley survived ...,English,"UK, USA",Won 2 Oscars. Another 18 wins & 23 nominations.,https://m.media-amazon.com/images/M/MV5BZGU2OG...,84.0,movie
4,9000000.0,Walt Disney Pictures,USA,Randal Kleiser,Adventure,18564613.0,Flight of the Navigator,PG,1986-08-01,90,6.9,Joey Cramer,36636,Mark H. Baker,1986,"Adventure, Comedy, Family, Sci-Fi","Mark H. Baker (story), Michael Burton (screenp...","Joey Cramer, Paul Reubens, Cliff De Young, Ver...","A 12-year-old boy goes missing in 1978, only t...",English,USA,4 nominations.,https://m.media-amazon.com/images/M/MV5BMjUwNm...,64.0,movie


In [23]:
# movies_df['type'].value_counts()
movies_df.isna().sum()
# movies_df[movies_df['writers_omdb'].isnull()]

budget                 0
production             0
country_kaggle         0
director               0
genre_kaggle           0
gross                  0
name                   0
rating                 0
released               0
runtime                0
score_imdb             0
star_kaggle            0
votes_imdb             0
writer_kaggle          0
year                   0
genres_omdb          349
writers_omdb         414
actors_omdb          365
plot                 399
language_omdb        359
country_omdb         348
awards              1435
poster               390
score_metacritic    1824
type                 345
dtype: int64

In [35]:
movies_df.head(2)

Unnamed: 0,budget,production,country_kaggle,director,genre_kaggle,gross,name,rating,released,runtime,score_imdb,star_kaggle,votes_imdb,writer_kaggle,year,genres_omdb,writers_omdb,actors_omdb,plot,language_omdb,country_omdb,awards,poster,score_metacritic,type
0,8000000.0,Columbia Pictures Corporation,USA,Rob Reiner,Adventure,52287414.0,Stand by Me,R,1986-08-22,89,8.1,Wil Wheaton,299174,Stephen King,1986,"Adventure, Drama","Stephen King (novel), Raynold Gideon (screenpl...","Wil Wheaton, River Phoenix, Corey Feldman, Jer...","It's the summer of 1959 in Castlerock, Oregon ...",English,USA,Nominated for 1 Oscar. Another 5 wins & 10 nom...,https://m.media-amazon.com/images/M/MV5BODJmY2...,75.0,movie
1,6000000.0,Paramount Pictures,USA,John Hughes,Comedy,70136369.0,Ferris Bueller's Day Off,PG-13,1986-06-11,103,7.8,Matthew Broderick,264740,John Hughes,1986,Comedy,John Hughes,"Matthew Broderick, Alan Ruck, Mia Sara, Jeffre...",High school student Ferris Bueller wants a day...,"English, German",USA,Nominated for 1 Golden Globe. Another 2 wins.,https://m.media-amazon.com/images/M/MV5BMDA0Nj...,61.0,movie


In [36]:
# Check data types
movies_df.dtypes

budget              float64
production           object
country_kaggle       object
director             object
genre_kaggle         object
gross               float64
name                 object
rating               object
released             object
runtime               int64
score_imdb          float64
star_kaggle          object
votes_imdb            int64
writer_kaggle        object
year                  int64
genres_omdb          object
writers_omdb         object
actors_omdb          object
plot                 object
language_omdb        object
country_omdb         object
awards               object
poster               object
score_metacritic    float64
type                 object
dtype: object

In [44]:
# Cast butget and gross columns into integer data types
int_df = movies_df.copy()
int_df[['budget', 'gross']] = int_df[['budget', 'gross']].astype(int)
int_df[['budget', 'gross']].dtypes

budget    int64
gross     int64
dtype: object

In [46]:
# Cast released column into datetime data type
time_df = int_df.copy()
time_df['released'] = pd.to_datetime(time_df['released'])
time_df['released'].dtype

dtype('<M8[ns]')

In [53]:
# Add a 'month' column
month_df = time_df.copy()
month_df['month'] = time_df['released'].dt.month
month_df.head(2)

Unnamed: 0,budget,production,country_kaggle,director,genre_kaggle,gross,name,rating,released,runtime,score_imdb,star_kaggle,votes_imdb,writer_kaggle,year,genres_omdb,writers_omdb,actors_omdb,plot,language_omdb,country_omdb,awards,poster,score_metacritic,type,month
0,8000000,Columbia Pictures Corporation,USA,Rob Reiner,Adventure,52287414,Stand by Me,R,1986-08-22,89,8.1,Wil Wheaton,299174,Stephen King,1986,"Adventure, Drama","Stephen King (novel), Raynold Gideon (screenpl...","Wil Wheaton, River Phoenix, Corey Feldman, Jer...","It's the summer of 1959 in Castlerock, Oregon ...",English,USA,Nominated for 1 Oscar. Another 5 wins & 10 nom...,https://m.media-amazon.com/images/M/MV5BODJmY2...,75.0,movie,8
1,6000000,Paramount Pictures,USA,John Hughes,Comedy,70136369,Ferris Bueller's Day Off,PG-13,1986-06-11,103,7.8,Matthew Broderick,264740,John Hughes,1986,Comedy,John Hughes,"Matthew Broderick, Alan Ruck, Mia Sara, Jeffre...",High school student Ferris Bueller wants a day...,"English, German",USA,Nominated for 1 Golden Globe. Another 2 wins.,https://m.media-amazon.com/images/M/MV5BMDA0Nj...,61.0,movie,6


In [55]:
# Check dtypes again
month_df.dtypes

budget                       int64
production                  object
country_kaggle              object
director                    object
genre_kaggle                object
gross                        int64
name                        object
rating                      object
released            datetime64[ns]
runtime                      int64
score_imdb                 float64
star_kaggle                 object
votes_imdb                   int64
writer_kaggle               object
year                         int64
genres_omdb                 object
writers_omdb                object
actors_omdb                 object
plot                        object
language_omdb               object
country_omdb                object
awards                      object
poster                      object
score_metacritic           float64
type                        object
month                        int64
dtype: object

**We will also rearrange the orders of all the columns**

In [63]:
print(f'Column Names: {month_df.columns}')
print(f'Columns Count: {len(month_df.columns)}')

Column Names: Index(['budget', 'production', 'country_kaggle', 'director', 'genre_kaggle',
       'gross', 'name', 'rating', 'released', 'runtime', 'score_imdb',
       'star_kaggle', 'votes_imdb', 'writer_kaggle', 'year', 'genres_omdb',
       'writers_omdb', 'actors_omdb', 'plot', 'language_omdb', 'country_omdb',
       'awards', 'poster', 'score_metacritic', 'type', 'month'],
      dtype='object')
Columns Count: 26


In [60]:
ordered_df.head(1)

Unnamed: 0,budget,production,country_kaggle,director,genre_kaggle,gross,name,rating,released,runtime,score_imdb,star_kaggle,votes_imdb,writer_kaggle,year,genres_omdb,writers_omdb,actors_omdb,plot,language_omdb,country_omdb,awards,poster,score_metacritic,type,month
0,8000000,Columbia Pictures Corporation,USA,Rob Reiner,Adventure,52287414,Stand by Me,R,1986-08-22,89,8.1,Wil Wheaton,299174,Stephen King,1986,"Adventure, Drama","Stephen King (novel), Raynold Gideon (screenpl...","Wil Wheaton, River Phoenix, Corey Feldman, Jer...","It's the summer of 1959 in Castlerock, Oregon ...",English,USA,Nominated for 1 Oscar. Another 5 wins & 10 nom...,https://m.media-amazon.com/images/M/MV5BODJmY2...,75.0,movie,8


In [65]:
# We will reorder all columns and also remove "poster" and "type" columns later
# The "poster" column could be a resource to pull poster pictures of movies
# The "type" column will be used to identify "non-movies" (series) to be removed
ordered_df = month_df[['name', 'production', 'director', 'runtime',
                       'released', 'year', 'month',
                       'country_kaggle', 'country_omdb',
                       'star_kaggle', 'actors_omdb',
                       'writer_kaggle', 'writers_omdb',
                       'language_omdb', 'plot', 'awards',
                       'score_imdb', 'votes_imdb', 'score_metacritic',
                       'budget', 'genre_kaggle',
                       'gross', 'genres_omdb', 'rating',
                       'poster', 'type'
                      ]]
len(ordered_df.columns)

26

In [66]:
ordered_df.head(2)

Unnamed: 0,name,production,director,runtime,released,year,month,country_kaggle,country_omdb,star_kaggle,actors_omdb,writer_kaggle,writers_omdb,language_omdb,plot,awards,score_imdb,votes_imdb,score_metacritic,budget,genre_kaggle,gross,genres_omdb,rating,poster,type
0,Stand by Me,Columbia Pictures Corporation,Rob Reiner,89,1986-08-22,1986,8,USA,USA,Wil Wheaton,"Wil Wheaton, River Phoenix, Corey Feldman, Jer...",Stephen King,"Stephen King (novel), Raynold Gideon (screenpl...",English,"It's the summer of 1959 in Castlerock, Oregon ...",Nominated for 1 Oscar. Another 5 wins & 10 nom...,8.1,299174,75.0,8000000,Adventure,52287414,"Adventure, Drama",R,https://m.media-amazon.com/images/M/MV5BODJmY2...,movie
1,Ferris Bueller's Day Off,Paramount Pictures,John Hughes,103,1986-06-11,1986,6,USA,USA,Matthew Broderick,"Matthew Broderick, Alan Ruck, Mia Sara, Jeffre...",John Hughes,John Hughes,"English, German",High school student Ferris Bueller wants a day...,Nominated for 1 Golden Globe. Another 2 wins.,7.8,264740,61.0,6000000,Comedy,70136369,Comedy,PG-13,https://m.media-amazon.com/images/M/MV5BMDA0Nj...,movie


In [69]:
ordered_df['type'].value_counts()

movie     6386
series      89
Name: type, dtype: int64

In [74]:
# Remove "series" types from df
type_filter = ordered_df['type'] != 'series'
ordered_df = ordered_df.loc[type_filter]

In [78]:
# Now we remove "poster" and "type" columns as we no longer need them
cleaned_df = ordered_df.iloc[:, :-2]
cleaned_df.head(2)

Unnamed: 0,name,production,director,runtime,released,year,month,country_kaggle,country_omdb,star_kaggle,actors_omdb,writer_kaggle,writers_omdb,language_omdb,plot,awards,score_imdb,votes_imdb,score_metacritic,budget,genre_kaggle,gross,genres_omdb,rating
0,Stand by Me,Columbia Pictures Corporation,Rob Reiner,89,1986-08-22,1986,8,USA,USA,Wil Wheaton,"Wil Wheaton, River Phoenix, Corey Feldman, Jer...",Stephen King,"Stephen King (novel), Raynold Gideon (screenpl...",English,"It's the summer of 1959 in Castlerock, Oregon ...",Nominated for 1 Oscar. Another 5 wins & 10 nom...,8.1,299174,75.0,8000000,Adventure,52287414,"Adventure, Drama",R
1,Ferris Bueller's Day Off,Paramount Pictures,John Hughes,103,1986-06-11,1986,6,USA,USA,Matthew Broderick,"Matthew Broderick, Alan Ruck, Mia Sara, Jeffre...",John Hughes,John Hughes,"English, German",High school student Ferris Bueller wants a day...,Nominated for 1 Golden Globe. Another 2 wins.,7.8,264740,61.0,6000000,Comedy,70136369,Comedy,PG-13


In [85]:
# We will also reverse the order of the df by sorting it descending by year 
sorted_df = cleaned_df.copy()
sorted_df.sort_values(by=['name'], ascending=True, inplace=True)
sorted_df.sort_values(by=['year'], ascending=False, inplace=True)
sorted_df = sorted_df.reset_index(drop=True)
sorted_df.head()

Unnamed: 0,name,production,director,runtime,released,year,month,country_kaggle,country_omdb,star_kaggle,actors_omdb,writer_kaggle,writers_omdb,language_omdb,plot,awards,score_imdb,votes_imdb,score_metacritic,budget,genre_kaggle,gross,genres_omdb,rating
0,Gold,Black Bear Pictures,Stephen Gaghan,120,2017-01-27,2016,1,USA,USA,Matthew McConaughey,"Matthew McConaughey, Edgar Ramírez, Bryce Dall...",Patrick Massett,"Patrick Massett, John Zinman","English, Indonesian","With the sudden death of his father, fourth-ge...",1 win & 5 nominations.,6.7,32147,49.0,20000000,Adventure,7222964,"Crime, Drama",R
1,The Choice,Nicholas Sparks Productions,Ross Katz,111,2016-02-05,2016,2,USA,United States,Benjamin Walker,"Benjamin Walker, Teresa Palmer, Alexandra Dadd...",Bryan Sipe,"Bryan Sipe, Nicholas Sparks",English,"In a small coastal town, the veterinarian Trav...",3 nominations,6.6,22972,26.0,0,Drama,18709066,"Drama, Romance",PG-13
2,Middle School: The Worst Years of My Life,CBS Films,Steve Carr,92,2016-10-07,2016,10,USA,"USA, Cambodia",Griffin Gluck,"Griffin Gluck, Lauren Graham, Alexa Nisenson, ...",Chris Bowman,"Chris Bowman (screenplay by), Hubbel Palmer (s...","English, Khmer",Imaginative quiet teenager Rafe Katchadorian i...,5 nominations.,6.1,4556,51.0,8500000,Animation,19985196,"Animation, Comedy, Family",PG
3,Midnight Special,Warner Bros.,Jeff Nichols,112,2016-04-21,2016,4,USA,"USA, Greece",Michael Shannon,"Michael Shannon, Joel Edgerton, Kirsten Dunst,...",Jeff Nichols,Jeff Nichols,English,Alton Meyer is a boy unlike any other in the w...,3 wins & 14 nominations.,6.7,58549,76.0,18000000,Drama,3707794,"Drama, Mystery, Sci-Fi, Thriller",PG-13
4,A Monster Calls,Apaches Entertainment,J.A. Bayona,108,2017-01-06,2016,1,UK,"UK, Spain, USA",Lewis MacDougall,"Lewis MacDougall, Sigourney Weaver, Felicity J...",Patrick Ness,"Patrick Ness (screenplay by), Patrick Ness (ba...",English,The monster does not come walking often. This ...,39 wins & 56 nominations.,7.5,49969,76.0,43000000,Drama,3730982,"Adventure, Drama, Family, Fantasy",PG-13


In [89]:
# Finally, we will also double check if there's any duplicated movie names
print(f"Duplicate values? {sorted_df['name'].duplicated().any()}")
print(f"How many? {sorted_df['name'].duplicated().sum()}")

Duplicate values? True
How many? 89


In [127]:
# Visualize duplicated names to make decision on drop method
sorted_df[sorted_df['name'].duplicated(keep=False)].sort_values(by=['name']).head(10)

Unnamed: 0,name,production,director,runtime,released,year,month,country_kaggle,country_omdb,star_kaggle,actors_omdb,writer_kaggle,writers_omdb,language_omdb,plot,awards,score_imdb,votes_imdb,score_metacritic,budget,genre_kaggle,gross,genres_omdb,rating
4330,Anna Karenina,Icon Entertainment International,Bernard Rose,108,1997-04-04,1997,4,USA,United Kingdom,Sophie Marceau,"Keira Knightley, Jude Law, Aaron Taylor-Johnson",Leo Tolstoy,"Tom Stoppard, Lev Tolstoy",English,"In 1874, in the Imperial Russia, the aristocra...",Won 1 Oscar. 33 wins & 53 nominations total,6.4,4851,63.0,35000000,Drama,791830,"Drama, Romance",PG-13
968,Anna Karenina,Universal Pictures,Joe Wright,129,2013-03-01,2012,3,UK,United Kingdom,Keira Knightley,"Keira Knightley, Jude Law, Aaron Taylor-Johnson",Tom Stoppard,"Tom Stoppard, Lev Tolstoy",English,"In 1874, in the Imperial Russia, the aristocra...",Won 1 Oscar. 33 wins & 53 nominations total,6.6,77083,63.0,0,Drama,12816367,"Drama, Romance",R
3119,Bad Company,Touchstone Pictures,Joel Schumacher,116,2002-06-07,2002,6,USA,"USA, Czech Republic",Anthony Hopkins,"Anthony Hopkins, Chris Rock, Peter Stormare, G...",Gary M. Goodman,"Gary M. Goodman (story), David Himmelstein (st...",English,When a CIA agent is killed during a nuclear ar...,,5.6,41284,37.0,70000000,Action,30160161,"Action, Comedy, Thriller",PG-13
4758,Bad Company,Touchstone Pictures,Damian Harris,108,1995-01-20,1995,1,USA,"USA, Czech Republic",Ellen Barkin,"Anthony Hopkins, Chris Rock, Peter Stormare, G...",Ross Thomas,"Gary M. Goodman (story), David Himmelstein (st...",English,When a CIA agent is killed during a nuclear ar...,,5.4,2411,37.0,0,Crime,3700000,"Action, Comedy, Thriller",R
855,Beautiful Creatures,Alcon Entertainment,Richard LaGravenese,124,2013-02-14,2013,2,USA,USA,Alice Englert,"Alden Ehrenreich, Alice Englert, Jeremy Irons,...",Richard LaGravenese,"Richard LaGravenese (screenplay), Kami Garcia ...",English,Teenager Ethan Wate is obsessed with his urge ...,9 nominations.,6.2,73570,52.0,60000000,Drama,19452138,"Drama, Fantasy, Romance",PG-13
3658,Beautiful Creatures,Arts Council of England,Bill Eagles,86,2001-01-19,2000,1,UK,USA,Rachel Weisz,"Alden Ehrenreich, Alice Englert, Jeremy Irons,...",Simon Donald,"Richard LaGravenese (screenplay), Kami Garcia ...",English,Teenager Ethan Wate is obsessed with his urge ...,9 nominations.,5.9,3150,52.0,0,Comedy,55381,"Drama, Fantasy, Romance",R
6609,Behind Enemy Lines,Not specified,Gideon Amir,90,1986-04-04,1986,4,USA,USA,David Carradine,"Owen Wilson, Gene Hackman, Gabriel Macht, Char...",Malcolm Barbour,"Jim Thomas (story), John Thomas (story), David...","English, Serbo-Croatian, Spanish, Czech",Fighter navigator Chris Burnett wants out: he ...,1 win & 2 nominations.,4.9,368,49.0,0,Action,2497233,"Action, Drama, Thriller, War",R
3345,Behind Enemy Lines,Twentieth Century Fox Film Corporation,John Moore,106,2001-11-30,2001,11,USA,USA,Gene Hackman,"Owen Wilson, Gene Hackman, Gabriel Macht, Char...",Jim Thomas,"Jim Thomas (story), John Thomas (story), David...","English, Serbo-Croatian, Spanish, Czech",Fighter navigator Chris Burnett wants out: he ...,1 win & 2 nominations.,6.4,90731,49.0,40000000,Action,59068786,"Action, Drama, Thriller, War",PG-13
2340,Black Sheep,New Zealand Film Commission,Jonathan King,87,2007-03-29,2006,3,New Zealand,"New Zealand, South Korea",Oliver Driver,"Oliver Driver, Nathan Meister, Tammy Davis",Jonathan King,Jonathan King,English,An experiment in genetic engineering turns har...,5 wins & 5 nominations,5.8,36120,62.0,0,Comedy,82987,"Comedy, Horror, Sci-Fi",NOT RATED
4405,Black Sheep,Broadway Pictures,Penelope Spheeris,87,1996-02-02,1996,2,USA,"New Zealand, South Korea",Chris Farley,"Oliver Driver, Nathan Meister, Tammy Davis",Fred Wolf,Jonathan King,English,An experiment in genetic engineering turns har...,5 wins & 5 nominations,6.2,31582,62.0,0,Comedy,32377000,"Comedy, Horror, Sci-Fi",PG-13


In [140]:
# Remove all rows with duplicated movie names
final_df = sorted_df.copy()
final_df = final_df.drop_duplicates(subset=['name'], keep=False, ignore_index=True)
final_df.head()

Unnamed: 0,name,production,director,runtime,released,year,month,country_kaggle,country_omdb,star_kaggle,actors_omdb,writer_kaggle,writers_omdb,language_omdb,plot,awards,score_imdb,votes_imdb,score_metacritic,budget,genre_kaggle,gross,genres_omdb,rating
0,Gold,Black Bear Pictures,Stephen Gaghan,120,2017-01-27,2016,1,USA,USA,Matthew McConaughey,"Matthew McConaughey, Edgar Ramírez, Bryce Dall...",Patrick Massett,"Patrick Massett, John Zinman","English, Indonesian","With the sudden death of his father, fourth-ge...",1 win & 5 nominations.,6.7,32147,49.0,20000000,Adventure,7222964,"Crime, Drama",R
1,The Choice,Nicholas Sparks Productions,Ross Katz,111,2016-02-05,2016,2,USA,United States,Benjamin Walker,"Benjamin Walker, Teresa Palmer, Alexandra Dadd...",Bryan Sipe,"Bryan Sipe, Nicholas Sparks",English,"In a small coastal town, the veterinarian Trav...",3 nominations,6.6,22972,26.0,0,Drama,18709066,"Drama, Romance",PG-13
2,Middle School: The Worst Years of My Life,CBS Films,Steve Carr,92,2016-10-07,2016,10,USA,"USA, Cambodia",Griffin Gluck,"Griffin Gluck, Lauren Graham, Alexa Nisenson, ...",Chris Bowman,"Chris Bowman (screenplay by), Hubbel Palmer (s...","English, Khmer",Imaginative quiet teenager Rafe Katchadorian i...,5 nominations.,6.1,4556,51.0,8500000,Animation,19985196,"Animation, Comedy, Family",PG
3,Midnight Special,Warner Bros.,Jeff Nichols,112,2016-04-21,2016,4,USA,"USA, Greece",Michael Shannon,"Michael Shannon, Joel Edgerton, Kirsten Dunst,...",Jeff Nichols,Jeff Nichols,English,Alton Meyer is a boy unlike any other in the w...,3 wins & 14 nominations.,6.7,58549,76.0,18000000,Drama,3707794,"Drama, Mystery, Sci-Fi, Thriller",PG-13
4,A Monster Calls,Apaches Entertainment,J.A. Bayona,108,2017-01-06,2016,1,UK,"UK, Spain, USA",Lewis MacDougall,"Lewis MacDougall, Sigourney Weaver, Felicity J...",Patrick Ness,"Patrick Ness (screenplay by), Patrick Ness (ba...",English,The monster does not come walking often. This ...,39 wins & 56 nominations.,7.5,49969,76.0,43000000,Drama,3730982,"Adventure, Drama, Family, Fantasy",PG-13


In [142]:
len(final_df)

6555

In [143]:
# Export final_df to CSV
final_df.to_csv('../resources/cleaned_data/movies_complete_cleaned.csv', index=False)