# Form Response From API

In [None]:
response ={
  "Title": "Guardians of the Galaxy Vol. 2",
  "Year": "2017",
  "Rated": "PG-13",
  "Released": "05 May 2017",
  "Runtime": "136 min",
  "Genre": "Action, Adventure, Comedy",
  "Director": "James Gunn",
  "Writer": "James Gunn, Dan Abnett, Andy Lanning",
  "Actors": "Chris Pratt, Zoe Saldaña, Dave Bautista",
  "Plot": "The Guardians struggle to keep together as a team while dealing with their personal family issues, notably Star-Lord's encounter with his father, the ambitious celestial being Ego.",
  "Language": "English",
  "Country": "United States",
  "Awards": "Nominated for 1 Oscar. 15 wins & 61 nominations total",
  "Poster": "https://m.media-amazon.com/images/M/MV5BNWE5MGI3MDctMmU5Ni00YzI2LWEzMTQtZGIyZDA5MzQzNDBhXkEyXkFqcGc@._V1_SX300.jpg",
  "Ratings": [
    {
      "Source": "Internet Movie Database",
      "Value": "7.6/10"
    },
    {
      "Source": "Rotten Tomatoes",
      "Value": "85%"
    },
    {
      "Source": "Metacritic",
      "Value": "67/100"
    }
  ],
  "Metascore": "67",
  "imdbRating": "7.6",
  "imdbVotes": "788,570",
  "imdbID": "tt3896198",
  "Type": "movie",
  "DVD": "N/A",
  "BoxOffice": "$389,813,101",
  "Production": "N/A",
  "Website": "N/A",
  "Response": "True"
}

# Get Poster URL from API https://www.omdbapi.com

In [None]:
import pandas as pd

links = pd.read_csv('./dataset/ml-latest-small/links.csv', dtype={'tmdbId': str, 'imdbId': str})
movies = pd.read_csv('./dataset/ml-latest-small/movies.csv')

imdbID_list = links['imdbId'].tolist()

In [7]:
def get_movie_id_by_imdb_id(imdb_id):
  movie_id = links.loc[links['imdbId'] == imdb_id]['movieId'].values[0]
  return movie_id

In [None]:
import asyncio
import aiohttp

api_key = '' # Replace with your OMDb API key

async def fetch_poster(session, imdb_id):
  url = f'https://www.omdbapi.com/?i=tt{imdb_id}&apikey={api_key}'
  async with session.get(url) as response:
    data = await response.json()
    return imdb_id, data.get('Poster')

async def get_poster_urls_async(imdb_ids):
  poster_dict = {}
  async with aiohttp.ClientSession() as session:
    tasks = [fetch_poster(session, imdb_id) for imdb_id in imdb_ids]
    results = await asyncio.gather(*tasks)
    for imdb_id, poster_url in results:
      movie_id = get_movie_id_by_imdb_id(imdb_id)
      poster_dict[movie_id] = poster_url
  return poster_dict

In [None]:
import nest_asyncio
import asyncio

# Cho phép lồng event loop trong Jupyter Notebook
nest_asyncio.apply()

# Gọi hàm bất đồng bộ, mỗi API_KEY chỉ được gọi 1000 lần trong 1 ngày
poster_dict = await get_poster_urls_async(imdbID_list[8900:])

In [88]:
poster_dict

{np.int64(134853): 'https://m.media-amazon.com/images/M/MV5BOTgxMDQwMDk0OF5BMl5BanBnXkFtZTgwNjU5OTg2NDE@._V1_SX300.jpg',
 np.int64(134859): 'https://m.media-amazon.com/images/M/MV5BMzg2NjE5OTQwNl5BMl5BanBnXkFtZTgwNjYwNDk2NTE@._V1_SX300.jpg',
 np.int64(134861): 'https://m.media-amazon.com/images/M/MV5BMjMzNDU4NDk4OF5BMl5BanBnXkFtZTgwMzkxODA2MDE@._V1_SX300.jpg',
 np.int64(134881): 'https://m.media-amazon.com/images/M/MV5BMTk1MTkwMzU4Nl5BMl5BanBnXkFtZTgwNjY0MDE1NTE@._V1_SX300.jpg',
 np.int64(135133): 'https://m.media-amazon.com/images/M/MV5BNjQzNDI2NTU1Ml5BMl5BanBnXkFtZTgwNTAyMDQ5NjE@._V1_SX300.jpg',
 np.int64(135137): 'https://m.media-amazon.com/images/M/MV5BMzIyNTc1NmUtOTBlNS00YzEwLTlkZTMtZjJkMGM2YzNkYmY3XkEyXkFqcGc@._V1_SX300.jpg',
 np.int64(135143): 'https://m.media-amazon.com/images/M/MV5BMjMxOTM1OTI4MV5BMl5BanBnXkFtZTgwODE5OTYxMDI@._V1_SX300.jpg',
 np.int64(135198): 'https://m.media-amazon.com/images/M/MV5BMTI4NjIwNDY2Nl5BMl5BanBnXkFtZTcwNTkwMTM2Mw@@._V1_SX300.jpg',
 np.int64(135216

In [None]:
# count url null
null_count = sum(1 for url in poster_dict.values() if url is None)
null_count

0

In [None]:
# Create poster_urls.csv file if it doesn't exist
import os
import pandas as pd
poster_urls_path = './dataset/ml-latest-small/poster_urls.csv'
if not os.path.exists(poster_urls_path):
  poster_urls_df = pd.DataFrame(columns=['movieId', 'poster_url'])
  poster_urls_df.to_csv(poster_urls_path, index=False)

In [None]:
# load the CSV file
poster_urls = pd.read_csv('./dataset/ml-latest-small/poster_urls.csv')
# add data from poster_dict to poster_urls
new_urls = pd.DataFrame(poster_dict.items(), columns=['movieId', 'poster_url'])
poster_urls = pd.concat([poster_urls, new_urls], ignore_index=True)
poster_urls

Unnamed: 0,movieId,poster_url
0,1,https://m.media-amazon.com/images/M/MV5BZTA3OW...
1,2,https://m.media-amazon.com/images/M/MV5BYTFkMj...
2,3,https://m.media-amazon.com/images/M/MV5BMjc0OW...
3,4,https://m.media-amazon.com/images/M/MV5BZWU4Nz...
4,5,https://m.media-amazon.com/images/M/MV5BOTMwNz...
...,...,...
9737,2518,https://m.media-amazon.com/images/M/MV5BNzNlNT...
9738,2520,https://m.media-amazon.com/images/M/MV5BOTg4NW...
9739,2521,https://m.media-amazon.com/images/M/MV5BODIwZT...
9740,2522,https://m.media-amazon.com/images/M/MV5BOTZmNm...


In [None]:
poster_urls.to_csv('./dataset/ml-latest-small/poster_urls.csv', index=False)