# Collect Movie Metadata

In [2]:
from time import time
from multiprocessing import Pool, cpu_count
import tqdm  # show multi-processing progress bar

import pandas as pd

import mojo_api

# for auto reload a module after editing.
%load_ext autoreload
%autoreload 2

### Step 1: Collect list of movies

In [2]:
# Collect movie list for each year from https://www.boxofficemojo.com/.
df = pd.DataFrame()
year_start = 2000
year_end = 2019
for year in range(year_start - 1, year_end + 1):
    year_df = mojo_api.get_movie_list(year)
    year_df.to_csv('./data/movie_list/_movie_list_{}.csv'.format(year), index=False)
    print("Finished collecting movie list of {}, movie number: {}".format(year, year_df.shape[0]))
    df = pd.concat([df, year_df])

Finished collecting movie list of 1999, movie number: 440
Finished collecting movie list of 2000, movie number: 429
Finished collecting movie list of 2001, movie number: 405
Finished collecting movie list of 2002, movie number: 553
Finished collecting movie list of 2003, movie number: 636
Finished collecting movie list of 2004, movie number: 654
Finished collecting movie list of 2005, movie number: 628
Finished collecting movie list of 2006, movie number: 712
Finished collecting movie list of 2007, movie number: 750
Finished collecting movie list of 2008, movie number: 708
Finished collecting movie list of 2009, movie number: 625
Finished collecting movie list of 2010, movie number: 615
Finished collecting movie list of 2011, movie number: 696
Finished collecting movie list of 2012, movie number: 766
Finished collecting movie list of 2013, movie number: 802
Finished collecting movie list of 2014, movie number: 808
Finished collecting movie list of 2015, movie number: 803
Finished colle

In [5]:
# A movie title can appear across two years. Remove duplicate records across years.
df_remove_dup = df.groupby('Title').agg({
    'Year': 'first',
    'Rank': 'first',
    'Title': 'first'
})

# Only keep the high rank movies.
df_remove_dup = df_remove_dup.sort_values(['Year', 'Rank'])
df_remove_dup_p1 = df_remove_dup[df_remove_dup.Year < 2014]
df_remove_dup_p1 = df_remove_dup_p1[df_remove_dup_p1.Rank <= 80]
df_remove_dup_p2 = df_remove_dup[df_remove_dup.Year >= 2014]
df_remove_dup_p2 = df_remove_dup_p2[df_remove_dup_p.Rank <= 150]

df_remove_dup = pd.concat([df_remove_dup_p1, df_remove_dup_p2])
df_remove_dup = df_remove_dup.sort_values(['Year', 'Rank'])
df_remove_dup = df_remove_dup.reset_index(drop=True)
df_remove_dup.to_csv('./data/movie_list/_movie_list_2000-2019.csv', index=False)

### Step 2: Collect metadata of each movie

In [31]:
# First process movies after 2014
df = pd.read_csv('./data/movie_list/_movie_list_2000-2019.csv')
df = df[df.Year >= 2014]
df['tt_id'] = None
df['rl_id'] = None
df['release_date'] = None
df['company'] = None
df['mpaa'] = None
df['genres'] = None
df['runtime'] = None
df['director'] = None
df['actors'] = None
df['budget'] = None
df['bo_opening'] = None
df['bo_gross'] = None
df['imdb_score'] = None

def add_info_chunk(df):
    for i in range(df.shape[0]):
        title = df['Title'].iloc[i]
        year = df['Year'].iloc[i]
        movie = mojo_api.Movie()
        movie.get_train_movie_info(title, year)
        df.loc[i, 'tt_id'] = movie.tt_id
        df.loc[i, 'rl_id'] = movie.rl_id
        df.loc[i, 'release_date'] = movie.release_date
        df.loc[i, 'company'] = movie.company
        df.loc[i, 'mpaa'] = movie.mpaa
        df.loc[i, 'genres'] = movie.genres
        df.loc[i, 'runtime'] = movie.movie_length
        df.loc[i, 'director'] = movie.director
        df.loc[i, 'actors'] = movie.actors
        df.loc[i, 'budget'] = movie.budget
        df.loc[i, 'bo_opening'] = movie.bo_opening
        df.loc[i, 'bo_gross'] = movie.bo_gross
        df.loc[i, 'imdb_score'] = movie.imdb_score
#     print(i, movie.title)
    return df

time_start = time()
top = 150
with Pool(cpu_count()) as p:
    total = top #df.shape[1]
    chunks = list(tqdm.tqdm(
                p.imap(add_info_chunk, [df[df['Rank'] == i].reset_index(drop=True) for i in range(1, top+1)]), total=total))
time_end = time()
time_run = (time_end - time_start) / 60
print("Finished. Time: {0:.1f} min.".format(time_run))

df_filled = pd.DataFrame()
for chunk in chunks:
    df_filled = pd.concat([df_filled, chunk])
df_filled = df_filled.sort_values(['Year', 'Rank'])
df_filled = df_filled.reset_index(drop=True)
df_filled = df_filled[~df_filled['tt_id'].isnull()]
df_filled = df_filled[~df_filled['rl_id'].isnull()]
df_filled = df_filled[~df_filled['budget'].isnull()]
df_filled = df_filled[~df_filled['release_date'].isnull()]
df_filled = df_filled[~df_filled['bo_opening'].isnull()]
df_filled = df_filled[~df_filled['bo_gross'].isnull()]
df_filled.to_csv('./data/movie_list/metadata_2014-2019.csv', index=False)





  0%|          | 0/150 [00:00<?, ?it/s][A[A[A[A



  1%|          | 1/150 [00:27<1:08:23, 27.54s/it][A[A[A[A



  2%|▏         | 3/150 [00:27<47:19, 19.32s/it]  [A[A[A[A

imdb_id not found. title: Star Wars: Episode IX - The Rise of Skywalker, year: 2019.






  3%|▎         | 5/150 [00:49<40:40, 16.83s/it][A[A[A[A



  5%|▍         | 7/150 [00:51<28:33, 11.98s/it][A[A[A[A



  6%|▌         | 9/150 [00:59<22:47,  9.70s/it][A[A[A[A



  7%|▋         | 10/150 [01:10<23:04,  9.89s/it][A[A[A[A



  8%|▊         | 12/150 [01:12<16:41,  7.26s/it][A[A[A[A



  9%|▊         | 13/150 [01:22<18:19,  8.03s/it][A[A[A[A



  9%|▉         | 14/150 [01:24<14:08,  6.24s/it][A[A[A[A



 10%|█         | 15/150 [01:29<13:07,  5.83s/it][A[A[A[A



 11%|█         | 16/150 [01:29<09:21,  4.19s/it][A[A[A[A



 11%|█▏        | 17/150 [01:46<17:32,  7.91s/it][A[A[A[A



 13%|█▎        | 19/150 [01:52<14:12,  6.51s/it][A[A[A[A



 14%|█▍        | 21/150 [02:02<13:05,  6.09s/it][A[A[A[A



 15%|█▍        | 22/150 [02:03<09:25,  4.42s/it][A[A[A[A



 15%|█▌        | 23/150 [02:09<10:05,  4.77s/it][A[A[A[A

imdb_id not found. title: The Angry Birds Movie, year: 2016.
imdb_id not found. title: The Upside, year: 2019.






 17%|█▋        | 25/150 [02:21<10:48,  5.19s/it][A[A[A[A



 19%|█▊        | 28/150 [02:23<07:53,  3.88s/it][A[A[A[A



 19%|█▉        | 29/150 [02:39<14:54,  7.39s/it][A[A[A[A



 21%|██▏       | 32/150 [02:40<10:22,  5.28s/it][A[A[A[A



 22%|██▏       | 33/150 [02:54<15:18,  7.85s/it][A[A[A[A



 23%|██▎       | 34/150 [03:00<14:01,  7.25s/it][A[A[A[A

imdb_id not found. title: A Madea Family Funeral, year: 2019.






 25%|██▍       | 37/150 [03:11<11:45,  6.24s/it][A[A[A[A



 27%|██▋       | 41/150 [03:17<08:41,  4.79s/it][A[A[A[A



 28%|██▊       | 42/150 [03:20<07:56,  4.42s/it][A[A[A[A



 29%|██▊       | 43/150 [03:27<08:50,  4.95s/it][A[A[A[A



 29%|██▉       | 44/150 [03:30<07:47,  4.41s/it][A[A[A[A



 30%|███       | 45/150 [03:37<09:11,  5.25s/it][A[A[A[A



 31%|███       | 46/150 [03:38<07:07,  4.12s/it][A[A[A[A



 31%|███▏      | 47/150 [03:44<07:59,  4.66s/it][A[A[A[A



 33%|███▎      | 49/150 [03:52<07:24,  4.40s/it][A[A[A[A



 33%|███▎      | 50/150 [03:59<08:51,  5.32s/it][A[A[A[A



 35%|███▌      | 53/150 [04:09<07:34,  4.68s/it][A[A[A[A



 36%|███▌      | 54/150 [04:10<05:34,  3.48s/it][A[A[A[A



 37%|███▋      | 56/150 [04:14<04:45,  3.04s/it][A[A[A[A



 38%|███▊      | 57/150 [04:22<07:04,  4.57s/it][A[A[A[A



 39%|███▊      | 58/150 [04:22<04:58,  3.25s/it][A[A[A[A



 39%|███▉      | 59/150 [04:25<04:47

imdb_id not found. title: Little, year: 2019.






 44%|████▍     | 66/150 [04:57<06:50,  4.89s/it][A[A[A[A



 45%|████▌     | 68/150 [05:10<07:20,  5.38s/it][A[A[A[A



 47%|████▋     | 70/150 [05:17<06:26,  4.83s/it][A[A[A[A



 47%|████▋     | 71/150 [05:20<05:29,  4.17s/it][A[A[A[A



 48%|████▊     | 72/150 [05:21<04:12,  3.24s/it][A[A[A[A



 49%|████▊     | 73/150 [05:23<03:46,  2.94s/it][A[A[A[A



 49%|████▉     | 74/150 [05:42<09:53,  7.81s/it][A[A[A[A



 52%|█████▏    | 78/150 [06:03<08:22,  6.98s/it][A[A[A[A



 54%|█████▍    | 81/150 [06:04<05:49,  5.06s/it][A[A[A[A



 55%|█████▍    | 82/150 [06:08<05:08,  4.53s/it][A[A[A[A



 55%|█████▌    | 83/150 [06:17<06:37,  5.93s/it][A[A[A[A



 56%|█████▌    | 84/150 [06:25<07:19,  6.65s/it][A[A[A[A



 58%|█████▊    | 87/150 [06:26<05:01,  4.78s/it][A[A[A[A

Isle of Dogs : budget still not found on IMDB.






 59%|█████▉    | 89/150 [06:41<05:38,  5.55s/it][A[A[A[A



 60%|██████    | 90/150 [06:42<04:11,  4.20s/it][A[A[A[A



 61%|██████    | 91/150 [06:47<04:18,  4.37s/it][A[A[A[A

Judy : budget still not found on IMDB.






 61%|██████▏   | 92/150 [06:47<03:01,  3.13s/it][A[A[A[A



 62%|██████▏   | 93/150 [06:49<02:45,  2.90s/it][A[A[A[A



 63%|██████▎   | 94/150 [07:00<04:47,  5.13s/it][A[A[A[A



 63%|██████▎   | 95/150 [07:02<03:54,  4.27s/it][A[A[A[A



 65%|██████▍   | 97/150 [07:03<02:46,  3.14s/it][A[A[A[A

imdb_id not found. title: The Witch, year: 2016.






 65%|██████▌   | 98/150 [07:19<06:02,  6.96s/it][A[A[A[A



 68%|██████▊   | 102/150 [07:28<04:26,  5.55s/it][A[A[A[A



 69%|██████▊   | 103/150 [07:31<03:50,  4.90s/it][A[A[A[A



 69%|██████▉   | 104/150 [07:35<03:27,  4.50s/it][A[A[A[A

Won't You Be My Neighbor? : budget still not found on IMDB.






 71%|███████   | 106/150 [07:44<03:15,  4.44s/it][A[A[A[A



 71%|███████▏  | 107/150 [07:46<02:42,  3.78s/it][A[A[A[A



 73%|███████▎  | 109/150 [07:47<01:57,  2.86s/it][A[A[A[A

The Farewell : budget still not found on IMDB.






 73%|███████▎  | 110/150 [08:00<03:57,  5.94s/it][A[A[A[A



 75%|███████▍  | 112/150 [08:03<02:52,  4.54s/it][A[A[A[A



 76%|███████▌  | 114/150 [08:12<02:42,  4.51s/it][A[A[A[A

The Choice : budget still not found on IMDB.






 77%|███████▋  | 115/150 [08:18<02:53,  4.95s/it][A[A[A[A

Monkey Kingdom : budget still not found on IMDB.






 78%|███████▊  | 117/150 [08:23<02:19,  4.22s/it][A[A[A[A



 79%|███████▊  | 118/150 [08:27<02:14,  4.19s/it][A[A[A[A

imdb_id not found. title: Bears, year: 2014.






 79%|███████▉  | 119/150 [08:30<02:01,  3.91s/it][A[A[A[A



 80%|████████  | 120/150 [08:34<01:59,  3.98s/it][A[A[A[A

Megan Leavey : budget still not found on IMDB.






 81%|████████▏ | 122/150 [08:45<02:03,  4.42s/it][A[A[A[A



 82%|████████▏ | 123/150 [08:50<02:02,  4.54s/it][A[A[A[A



 83%|████████▎ | 125/150 [08:50<01:20,  3.21s/it][A[A[A[A



 84%|████████▍ | 126/150 [08:59<01:58,  4.94s/it][A[A[A[A



 85%|████████▍ | 127/150 [09:00<01:24,  3.68s/it][A[A[A[A



 85%|████████▌ | 128/150 [09:06<01:35,  4.32s/it][A[A[A[A

RBG : budget still not found on IMDB.
Strange Magic : budget still not found on IMDB.
The Lighthouse : budget still not found on IMDB.






 86%|████████▌ | 129/150 [09:10<01:32,  4.41s/it][A[A[A[A

Greta : budget still not found on IMDB.
Dark Waters : budget still not found on IMDB.






 87%|████████▋ | 130/150 [09:19<01:56,  5.81s/it][A[A[A[A



 89%|████████▊ | 133/150 [09:24<01:16,  4.49s/it][A[A[A[A

Padmaavat : budget still not found on IMDB.
Three Identical Strangers : budget still not found on IMDB.
Hotel Mumbai : budget still not found on IMDB.






 89%|████████▉ | 134/150 [09:30<01:20,  5.04s/it][A[A[A[A

No manches Frida : budget still not found on IMDB.
Apollo 11 : budget still not found on IMDB.
Free Solo : budget still not found on IMDB.
The Old Man & the Gun : budget still not found on IMDB.
No Manches Frida 2 : budget still not found on IMDB.






 91%|█████████▏| 137/150 [09:43<01:02,  4.82s/it][A[A[A[A



 92%|█████████▏| 138/150 [09:45<00:47,  3.95s/it][A[A[A[A

The Miracle Season : budget still not found on IMDB.
Amy : budget still not found on IMDB.
Brittany Runs a Marathon : budget still not found on IMDB.
PK : budget still not found on IMDB.
Bajrangi Bhaijaan : budget still not found on IMDB.
The Dead Don't Die : budget still not found on IMDB.
Penguins : budget still not found on IMDB.






 95%|█████████▍| 142/150 [10:09<00:36,  4.59s/it][A[A[A[A

I Am Not Your Negro : budget still not found on IMDB.






 97%|█████████▋| 146/150 [10:12<00:13,  3.46s/it][A[A[A[A

Run the Race : budget still not found on IMDB.






 98%|█████████▊| 147/150 [10:16<00:10,  3.58s/it][A[A[A[A

Island of Lemurs: Madagascar : budget still not found on IMDB.
I'll See You in My Dreams : budget still not found on IMDB.
The Wife : budget still not found on IMDB.
Beatriz at Dinner : budget still not found on IMDB.






 99%|█████████▊| 148/150 [10:29<00:12,  6.39s/it][A[A[A[A



100%|██████████| 150/150 [10:30<00:00,  4.20s/it][A[A[A[A


Finished. Time: 10.5 min.


In [32]:
# Then process movies before 2014
year_start = 2010
year_end = 2014
df = pd.read_csv('./data/movie_list/_movie_list_2000-2019.csv')
df = df[df.Year >= year_start]
df = df[df.Year < year_end]
df['tt_id'] = None
df['rl_id'] = None
df['release_date'] = None
df['company'] = None
df['mpaa'] = None
df['genres'] = None
df['runtime'] = None
df['director'] = None
df['actors'] = None
df['budget'] = None
df['bo_opening'] = None
df['bo_gross'] = None
df['imdb_score'] = None

def add_info_chunk(df):
    for i in range(df.shape[0]):
        title = df['Title'].iloc[i]
        year = df['Year'].iloc[i]
        movie = mojo_api.Movie()
        movie.get_train_movie_info(title, year)
        df.loc[i, 'tt_id'] = movie.tt_id
        df.loc[i, 'rl_id'] = movie.rl_id
        df.loc[i, 'release_date'] = movie.release_date
        df.loc[i, 'company'] = movie.company
        df.loc[i, 'mpaa'] = movie.mpaa
        df.loc[i, 'genres'] = movie.genres
        df.loc[i, 'runtime'] = movie.movie_length
        df.loc[i, 'director'] = movie.director
        df.loc[i, 'actors'] = movie.actors
        df.loc[i, 'budget'] = movie.budget
        df.loc[i, 'bo_opening'] = movie.bo_opening
        df.loc[i, 'bo_gross'] = movie.bo_gross
        df.loc[i, 'imdb_score'] = movie.imdb_score
#     print(i, movie.title)
    return df

time_start = time()
top = 80
with Pool(cpu_count()) as p:
    total = top #df.shape[1]
    chunks = list(tqdm.tqdm(
                p.imap(add_info_chunk, [df[df['Rank'] == i].reset_index(drop=True) for i in range(1, top+1)]), total=total))
time_end = time()
time_run = (time_end - time_start) / 60
print("Finished. Time: {0:.1f} min.".format(time_run))

df_filled = pd.DataFrame()
for chunk in chunks:
    df_filled = pd.concat([df_filled, chunk])
df_filled = df_filled.sort_values(['Year', 'Rank'])
df_filled = df_filled.reset_index(drop=True)
df_filled = df_filled[~df_filled['tt_id'].isnull()]
df_filled = df_filled[~df_filled['rl_id'].isnull()]
df_filled = df_filled[~df_filled['budget'].isnull()]
df_filled = df_filled[~df_filled['release_date'].isnull()]
df_filled = df_filled[~df_filled['bo_opening'].isnull()]
df_filled = df_filled[~df_filled['bo_gross'].isnull()]
df_filled.to_csv('./data/movie_list/metadata_{}-{}.csv'.format(year_start, year_end-1), index=False)





  0%|          | 0/80 [00:00<?, ?it/s][A[A[A[A

imdb_id not found. title: Iron Man 3, year: 2013.






  1%|▏         | 1/80 [00:11<15:13, 11.56s/it][A[A[A[A



  2%|▎         | 2/80 [00:17<12:56,  9.96s/it][A[A[A[A



  4%|▍         | 3/80 [00:17<09:00,  7.02s/it][A[A[A[A



  6%|▋         | 5/80 [00:27<07:53,  6.31s/it][A[A[A[A



  8%|▊         | 6/80 [00:29<06:14,  5.06s/it][A[A[A[A

imdb_id not found. title: Fast & Furious 6, year: 2013.






  9%|▉         | 7/80 [00:33<05:49,  4.78s/it][A[A[A[A



 11%|█▏        | 9/80 [00:43<05:41,  4.82s/it][A[A[A[A



 12%|█▎        | 10/80 [00:47<05:15,  4.51s/it][A[A[A[A



 15%|█▌        | 12/80 [00:49<04:03,  3.58s/it][A[A[A[A

imdb_id not found. title: X-Men: First Class, year: 2011.






 16%|█▋        | 13/80 [01:00<06:10,  5.52s/it][A[A[A[A



 18%|█▊        | 14/80 [01:01<04:38,  4.22s/it][A[A[A[A



 19%|█▉        | 15/80 [01:06<04:50,  4.47s/it][A[A[A[A



 20%|██        | 16/80 [01:07<03:38,  3.41s/it][A[A[A[A



 21%|██▏       | 17/80 [01:13<04:31,  4.32s/it][A[A[A[A



 22%|██▎       | 18/80 [01:15<03:44,  3.62s/it][A[A[A[A



 24%|██▍       | 19/80 [01:23<04:52,  4.80s/it][A[A[A[A



 26%|██▋       | 21/80 [01:27<03:53,  3.96s/it][A[A[A[A



 29%|██▉       | 23/80 [01:32<03:25,  3.60s/it][A[A[A[A



 30%|███       | 24/80 [01:39<04:10,  4.48s/it][A[A[A[A

imdb_id not found. title: Lee Daniels' The Butler, year: 2013.






 31%|███▏      | 25/80 [01:40<03:15,  3.55s/it][A[A[A[A



 34%|███▍      | 27/80 [01:45<02:47,  3.16s/it][A[A[A[A



 35%|███▌      | 28/80 [01:48<02:49,  3.26s/it][A[A[A[A



 36%|███▋      | 29/80 [01:50<02:21,  2.77s/it][A[A[A[A



 38%|███▊      | 30/80 [01:52<02:15,  2.70s/it][A[A[A[A

imdb_id not found. title: The Lion King  2011 3D Release, year: 2011.






 39%|███▉      | 31/80 [02:00<03:33,  4.35s/it][A[A[A[A



 40%|████      | 32/80 [02:01<02:37,  3.28s/it][A[A[A[A



 42%|████▎     | 34/80 [02:07<02:22,  3.11s/it][A[A[A[A



 44%|████▍     | 35/80 [02:12<02:47,  3.73s/it][A[A[A[A



 46%|████▋     | 37/80 [02:15<02:14,  3.12s/it][A[A[A[A



 48%|████▊     | 38/80 [02:21<02:46,  3.96s/it][A[A[A[A



 49%|████▉     | 39/80 [02:24<02:33,  3.74s/it][A[A[A[A



 50%|█████     | 40/80 [02:26<01:58,  2.96s/it][A[A[A[A



 51%|█████▏    | 41/80 [02:29<01:59,  3.06s/it][A[A[A[A



 52%|█████▎    | 42/80 [02:33<02:10,  3.43s/it][A[A[A[A



 54%|█████▍    | 43/80 [02:38<02:27,  4.00s/it][A[A[A[A



 56%|█████▋    | 45/80 [02:39<01:43,  2.95s/it][A[A[A[A



 57%|█████▊    | 46/80 [02:41<01:25,  2.50s/it][A[A[A[A



 59%|█████▉    | 47/80 [02:49<02:19,  4.22s/it][A[A[A[A



 61%|██████▏   | 49/80 [02:52<01:43,  3.35s/it][A[A[A[A



 64%|██████▍   | 51/80 [02:56<01:24,  2.91s/it][A

imdb_id not found. title: Titanic  2012 3D Release, year: 2012.






 65%|██████▌   | 52/80 [03:00<01:35,  3.41s/it][A[A[A[A



 68%|██████▊   | 54/80 [03:06<01:23,  3.21s/it][A[A[A[A



 70%|███████   | 56/80 [03:11<01:12,  3.02s/it][A[A[A[A



 72%|███████▎  | 58/80 [03:17<01:08,  3.11s/it][A[A[A[A



 74%|███████▍  | 59/80 [03:19<00:59,  2.81s/it][A[A[A[A



 75%|███████▌  | 60/80 [03:23<01:00,  3.02s/it][A[A[A[A



 78%|███████▊  | 62/80 [03:26<00:45,  2.53s/it][A[A[A[A



 79%|███████▉  | 63/80 [03:29<00:44,  2.59s/it][A[A[A[A



 80%|████████  | 64/80 [03:32<00:48,  3.00s/it][A[A[A[A



 81%|████████▏ | 65/80 [03:34<00:40,  2.68s/it][A[A[A[A

Temptation: Confessions of a Marriage Counselor : budget still not found on IMDB.






 82%|████████▎ | 66/80 [03:37<00:37,  2.69s/it][A[A[A[A

imdb_id not found. title: Beauty and the Beast  2012 3D Release, year: 2012.






 85%|████████▌ | 68/80 [03:46<00:39,  3.28s/it][A[A[A[A



 89%|████████▉ | 71/80 [03:51<00:24,  2.76s/it][A[A[A[A



 90%|█████████ | 72/80 [03:52<00:17,  2.13s/it][A[A[A[A



 91%|█████████▏| 73/80 [03:57<00:21,  3.06s/it][A[A[A[A



 94%|█████████▍| 75/80 [04:00<00:12,  2.56s/it][A[A[A[A



 95%|█████████▌| 76/80 [04:01<00:08,  2.08s/it][A[A[A[A

imdb_id not found. title: Star Wars: Episode I - The Phantom Menace  2012 3D Release, year: 2012.






 98%|█████████▊| 78/80 [04:06<00:04,  2.32s/it][A[A[A[A



100%|██████████| 80/80 [04:09<00:00,  3.12s/it][A[A[A[A

Finished. Time: 4.2 min.





In [3]:
df_p1 = pd.read_csv('./data/movie_list/metadata_2000-2004.csv')
df_p2 = pd.read_csv('./data/movie_list/metadata_2005-2009.csv')
df_p3 = pd.read_csv('./data/movie_list/metadata_2010-2013.csv')
df_p4 = pd.read_csv('./data/movie_list/metadata_2014-2019.csv')
df = pd.concat([df_p1, df_p2, df_p3, df_p4])
df = df.sort_values(['Year', 'Rank'])
df = df.reset_index(drop=True)
df.to_csv('./data/movie_list/metadata_2000-2019.csv', index=False)