### Import Required Libraries and Set Up Environment Variables

In [33]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json

In [34]:
# Set environment variables from the .env in the local environment
load_dotenv()

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

### Access the New York Times API

In [35]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL
#query_url = f"{url}api-key={api_key}&q={filter_query}&sort={sort}&fl={field_list}&begin_date={begin_date}&end_date={end_date}"
query_url = f"{url}api-key={nyt_api_key}&begin_date={begin_date}&end_date={end_date}"+ f'&fq={filter_query}&sort={sort}&fl={field_list}'

In [45]:
# Create an empty list to store the reviews
reviews_list = []

# Define your parameters
url = "your_url_here"
filter_query = "your_filter_query_here"
sort = "your_sort_option_here"
begin_date = "your_begin_date_here"
end_date = "your_end_date_here"
nyt_api_key = "your_api_key_here"
field_list = "your_field_list_here"

# Loop through pages 0-19
for page in range(20):

    query_url = f"{url}q={filter_query}&sort={sort}&begin_date={begin_date}&end_date={end_date}&api-key={nyt_api_key}&fl={field_list}&page={page}"
    response = requests.get(query_url)
    reviews = response.json()
    time.sleep(12)  # Pause to stay within API limits

    try:
        for review in reviews["response"]["docs"]:
            reviews_list.append(review)
        print(f"Page {page} processed")
    except KeyError:
        print(f"No results on page {page}")
        break

# Create query with a page number
page = 1  # Adjust as necessary
query_url = f"{url}&page={page}"
print(f"Checked page number {page}")  # Print the query page number

try:
    # Make a "GET" request and retrieve the JSON
    articles = requests.get(query_url).json()
    
    # Add a twelve-second interval between queries to stay within API query limits
    time.sleep(12)
    
    # Check if the response is successful
    if articles.get("status") == "OK":
        
        # Try and save the reviews to the reviews_list
        if "response" in articles and "docs" in articles["response"]:
            for doc in articles["response"]["docs"]:
                reviews_list.append(doc)
                #print("Checked page", page_number)
        else:
            print(f"No results found for page {page}")
    else:
        print(f"Failed to retrieve data for page {page}")
except Exception as e:
    print(f"An error occurred while processing page {page}: {e}")

# Print the retrieved reviews
print(f"Retrieved reviews for all pages successfully.")
print(f"Number of reviews in the list: {len(reviews_list)}")

# Print a sample of the reviews
print(json.dumps(reviews_list[:10], indent=4))
    



MissingSchema: Invalid URL 'your_url_hereq=your_filter_query_here&sort=your_sort_option_here&begin_date=your_begin_date_here&end_date=your_end_date_here&api-key=your_api_key_here&fl=your_field_list_here&page=0': No scheme supplied. Perhaps you meant https://your_url_hereq=your_filter_query_here&sort=your_sort_option_here&begin_date=your_begin_date_here&end_date=your_end_date_here&api-key=your_api_key_here&fl=your_field_list_here&page=0?

In [37]:
# Preview the first 5 results in JSON format
from pandas import json_normalize
print(f"Number of reviews in the list: {len(reviews_list)}")
# Use json.dumps with argument indent=4 to format data
import json
num_elements = min(5, len(reviews_list))
for article in reviews_list[:5]:
    print(json.dumps(article, indent=4))

Number of reviews in the list: 0


In [38]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
nyt_df = json_normalize(reviews_list)
reviews_df = pd.json_normalize(reviews_list)
reviews_df

In [42]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early
#nyt_df['title'] = nyt_df['headline.main'].apply(lambda st: st[st.find("\u2018")+1:st.find("\u2019 Review")])
extract_title = lambda st: st[st.find("\u2018")+1:st.find("\u2019 Review")]
reviews_df['title'] = reviews_df['headline.main'].apply(extract_title)
reviews_df

KeyError: 'headline.main'

In [43]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string


In [44]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
titles = nyt_df['title'].to_list()

KeyError: 'title'

### Access The Movie Database API

In [24]:
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key

TypeError: can only concatenate str (not "NoneType") to str

In [31]:
# Create an empty list to store the results
tmdb_movies_list = []

# Create a request counter to sleep the requests after a multiple of 50 requests
request_counter = 1

# Loop through the titles
for title in titles:
    # Check if we need to sleep before making a request
    if request_counter % 50 == 0:
        time.sleep(1)
        print("Sleeping for 1 second...")

    # Add 1 to the request counter
    request_counter += 1

    # Perform a "GET" request for The Movie Database
    #CODE ASSISTANCE FROM JAMIE MCGRANER - FOLLOWING 3 LINES
    search_url = url + title + tmdb_key_string
    search_response = requests.get(search_url)
    search_results = search_response.json()
 
    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie
    # is not found.
    try:
        # Get movie id
        #movie_id = movie_data['results'][0]['id']
         if 'results' in search_results and search_results['results']:
            movie_id = search_results['results'][0]['id']


        # Make a request for the full movie details
        #response = requests.get(url + str(movie_id) + tmdb_key_string)
        #CODE ASSISTANCE FROM JAMIE MCGRANER - FOLLOWING 3 LINES
            movie_url = f"https://api.themoviedb.org/3/movie/{movie_id}?{tmdb_key_string}"
            movie_response = requests.get(movie_url)
            movie_details = movie_response.json()
        
        #movie_details = response.json()

        # Execute "GET" request with url

        # Extract the genre names into a list
            genres = [genre['name'] for genre in movie_details['genres']]

        # Extract the spoken_languages' English name into a list
            spoken_languages = [lang['english_name'] for lang in movie_details['spoken_languages']]

        # Extract the production_countries' name into a list
            production_countries = [country['name'] for country in movie_details['production_countries']]

        # Add the relevant data to a dictionary and
        # append it to the tmdb_movies_list list
            movie_dict = {
                'title': movie_details['title'],
                'original_title': movie_details['original_title'],
                'budget': movie_details['budget'],
                'original_language': movie_details['original_language'],
                'homepage': movie_details['homepage'],
                'overview': movie_details['overview'],
                'popularity': movie_details['popularity'],
                'runtime': movie_details['runtime'],
                'revenue': movie_details['revenue'],
                'release_date': movie_details['release_date'],
                'vote_average': movie_details['vote_average'],
                'vote_count': movie_details['vote_count'],
                'genres': genres,
                'spoken_languages': spoken_languages,
                'production_countries': production_countries
            }
            tmdb_movies_list.append(movie_dict)

        # Print out the title that was found
            print(f"Title found: {movie_details['title']}")

    except:
        print(f"Movie not found for title: {title}")



NameError: name 'titles' is not defined

In [32]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
print(json.dumps(tmdb_movies_list[:5], indent=4))

[]


In [13]:
# Convert the results to a DataFrame
# Convert the results to a DataFrame
tmdb_df = pd.DataFrame(tmdb_movies_list)
tmdb_df

Unnamed: 0,title,original_title,budget,genre,language,spoken_languages,homepage,overview,popularity,runtime,revenue,release_date,vote_average,vote_count,production_countries
0,The Attachment Diaries,El apego,0,"[Drama, Mystery, Thriller, Horror]",es,[Spanish],,"Argentina, 1970s. A desperate young woman goes...",0.708,102,0,2021-10-07,4.000,2,[Argentina]
1,You Can Live Forever,You Can Live Forever,0,"[Drama, Romance]",en,"[French, English]",https://gooddeedentertainment.com/you-can-live...,"When Jaime, a gay teenager, is sent to live in...",12.027,96,0,2023-03-24,7.643,21,[Canada]
2,A Tourist's Guide to Love,A Tourist's Guide to Love,0,"[Romance, Comedy, Adventure]",en,[English],https://www.netflix.com/title/81424906,"After an unexpected break up, a travel executi...",25.369,96,0,2023-04-21,6.389,122,[United States of America]
3,Other People's Children,Les Enfants des autres,0,[Drama],fr,[French],https://www.wildbunch.biz/movie/other-peoples-...,"Rachel loves her life, her students, her frien...",16.246,104,0,2022-09-21,6.900,137,[France]
4,One True Loves,One True Loves,0,"[Romance, Drama, Comedy]",en,[English],,Emma and Jesse are living the perfect life tog...,28.872,100,37820,2023-04-07,6.681,36,"[Germany, United States of America]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,The Last Face,The Last Face,0,[Drama],en,"[English, Portuguese]",,"Miguel, a heroic Spanish doctor, puts himself ...",9.081,130,0,2017-01-11,5.617,184,[United States of America]
151,Lost in Paris,Paris pieds nus,0,[Comedy],fr,[French],,Fiona visits Paris for the first time to assis...,6.798,83,0,2017-01-14,6.051,108,"[Belgium, France]"
152,The Other Half,The Other Half,0,"[Drama, Comedy, Romance]",en,[English],,When English Mark surprises his American bride...,1.058,94,0,2006-05-15,6.708,12,[United Kingdom]
153,The Ottoman Lieutenant,The Ottoman Lieutenant,40000000,"[Romance, Drama, War]",en,"[English, Turkish]",https://www.uphe.com/movies/the-ottoman-lieute...,"Lillie, a determined American woman, ventures ...",12.097,111,413844,2017-03-10,6.150,236,"[Turkey, United States of America]"


### Merge and Clean the Data for Export

In [14]:
# Merge the New York Times reviews and TMDB DataFrames on title
merged_df = pd.merge(reviews_df, tmdb_df, on='title', how='inner')
merged_df

Unnamed: 0,title,original_title,budget,genre,language,spoken_languages,homepage,overview,popularity,runtime,...,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,The Attachment Diaries,El apego,0,"[Drama, Mystery, Thriller, Horror]",es,[Spanish],,"Argentina, 1970s. A desperate young woman goes...",0.708,102,...,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
1,You Can Live Forever,You Can Live Forever,0,"[Drama, Romance]",en,"[French, English]",https://gooddeedentertainment.com/you-can-live...,"When Jaime, a gay teenager, is sent to live in...",12.027,96,...,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
2,One True Loves,One True Loves,0,"[Romance, Drama, Comedy]",en,[English],,Emma and Jesse are living the perfect life tog...,28.872,100,...,‘One True Loves’ Review: A Romance Lost at Sea,,,One True Loves,,,,By Brandon Yu,"[{'firstname': 'Brandon', 'middlename': None, ...",
3,The Lost Weekend: A Love Story,The Lost Weekend: A Love Story,0,[Documentary],en,[],,May Pang lovingly recounts her life in rock & ...,2.368,97,...,‘The Lost Weekend: A Love Story’ Review: When ...,,,The Lost Weekend: A Love Story,,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
4,A Thousand and One,A Thousand and One,0,[Drama],en,[English],https://www.focusfeatures.com/a-thousand-and-one/,Struggling but unapologetically living on her ...,11.704,116,...,‘A Thousand and One’ Review: A New York Love S...,Critic’s Pick,,An Unbending Will Meets a Shifting City,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,In Search of Fellini,In Search of Fellini,0,"[Drama, Adventure]",en,[English],,"Lucy, a small-town girl from Ohio, discovers t...",4.103,93,...,Review: Love of Il Maestro Drives ‘In Search o...,,,In Search of Fellini,,,,By Monica Castillo,"[{'firstname': 'Monica', 'middlename': None, '...",
121,The Last Face,The Last Face,0,[Drama],en,"[English, Portuguese]",,"Miguel, a heroic Spanish doctor, puts himself ...",9.081,130,...,Review: Aid Workers in Love and War in Sean Pe...,,,The Last Face,,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
122,Lost in Paris,Paris pieds nus,0,[Comedy],fr,[French],,Fiona visits Paris for the first time to assis...,6.798,83,...,Review: Finding Love (and Slapstick) While ‘Lo...,,,Lost in Paris,,,,By Ben Kenigsberg,"[{'firstname': 'Ben', 'middlename': None, 'las...",
123,The Other Half,The Other Half,0,"[Drama, Comedy, Romance]",en,[English],,When English Mark surprises his American bride...,1.058,94,...,Review: A Combustible Pair Find Love in ‘The O...,,,Review: A Combustible Pair Find Love in ‘The O...,,,,By Andy Webster,"[{'firstname': 'Andy', 'middlename': None, 'la...",


In [15]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing
columns_to_fix = ['genres', 'spoken_languages', 'production_countries']

# Create a list of characters to remove
characters_to_remove = ['[', ']', "'"]

# Loop through the list of columns to fix
for col in columns_to_fix:
    # Convert the column to type 'str'
    merged_df[col] = merged_df[col].astype(str)

    # Loop through characters to remove
    for char in characters_to_remove:
        merged_df[col] = merged_df[col].str.replace(char, '')


# Display the fixed DataFrame
merged_df.head()

Unnamed: 0,title,original_title,budget,genre,language,spoken_languages,homepage,overview,popularity,runtime,...,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,The Attachment Diaries,El apego,0,"Drama, Mystery, Thriller, Horror",es,Spanish,,"Argentina, 1970s. A desperate young woman goes...",0.708,102,...,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
1,You Can Live Forever,You Can Live Forever,0,"Drama, Romance",en,"French, English",https://gooddeedentertainment.com/you-can-live...,"When Jaime, a gay teenager, is sent to live in...",12.027,96,...,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
2,One True Loves,One True Loves,0,"Romance, Drama, Comedy",en,English,,Emma and Jesse are living the perfect life tog...,28.872,100,...,‘One True Loves’ Review: A Romance Lost at Sea,,,One True Loves,,,,By Brandon Yu,"[{'firstname': 'Brandon', 'middlename': None, ...",
3,The Lost Weekend: A Love Story,The Lost Weekend: A Love Story,0,Documentary,en,,,May Pang lovingly recounts her life in rock & ...,2.368,97,...,‘The Lost Weekend: A Love Story’ Review: When ...,,,The Lost Weekend: A Love Story,,,,By Glenn Kenny,"[{'firstname': 'Glenn', 'middlename': None, 'l...",
4,A Thousand and One,A Thousand and One,0,Drama,en,English,https://www.focusfeatures.com/a-thousand-and-one/,Struggling but unapologetically living on her ...,11.704,116,...,‘A Thousand and One’ Review: A New York Love S...,Critic’s Pick,,An Unbending Will Meets a Shifting City,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",


In [16]:
# Drop "byline.person" column
merged_df = merged_df.drop("byline.person", axis=1)
merged_df


In [17]:
# Delete duplicate rows and reset index
merged_df = merged_df.drop_duplicates()
merged_df = merged_df.reset_index(drop=True)
merged_df

Unnamed: 0,title,original_title,budget,genre,language,spoken_languages,homepage,overview,popularity,runtime,...,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.organization
0,The Attachment Diaries,El apego,0,"Drama, Mystery, Thriller, Horror",es,Spanish,,"Argentina, 1970s. A desperate young woman goes...",0.708,102,...,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,
1,You Can Live Forever,You Can Live Forever,0,"Drama, Romance",en,"French, English",https://gooddeedentertainment.com/you-can-live...,"When Jaime, a gay teenager, is sent to live in...",12.027,96,...,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,
2,One True Loves,One True Loves,0,"Romance, Drama, Comedy",en,English,,Emma and Jesse are living the perfect life tog...,28.872,100,...,320,‘One True Loves’ Review: A Romance Lost at Sea,,,One True Loves,,,,By Brandon Yu,
3,The Lost Weekend: A Love Story,The Lost Weekend: A Love Story,0,Documentary,en,,,May Pang lovingly recounts her life in rock & ...,2.368,97,...,327,‘The Lost Weekend: A Love Story’ Review: When ...,,,The Lost Weekend: A Love Story,,,,By Glenn Kenny,
4,A Thousand and One,A Thousand and One,0,Drama,en,English,https://www.focusfeatures.com/a-thousand-and-one/,Struggling but unapologetically living on her ...,11.704,116,...,971,‘A Thousand and One’ Review: A New York Love S...,Critic’s Pick,,An Unbending Will Meets a Shifting City,,,,By Manohla Dargis,


In [18]:
# Export data to CSV without the index
merged_df.to_csv('merged_data.csv', index=False)