### Import Required Libraries and Set Up Environment Variables

In [52]:
# Dependencies
import re
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json
from pandas import json_normalize

In [53]:
# Set environment variables from the .env in the local environment
load_dotenv()

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

In [54]:
nyt_api_key

'GQ6L5nzGGZkpch4dTwYHk8WKM3XAON9Y'

In [55]:
tmdb_api_key

'31a4563557426ea03e824038071bb9f4'

### Access the New York Times API

In [56]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL


In [57]:
res = requests.get(f"{url}q=noodles&api-key={nyt_api_key}").json()
res

{'status': 'OK',
 'copyright': 'Copyright (c) 2024 The New York Times Company. All Rights Reserved.',
 'response': {'docs': [{'abstract': 'Naengmyeon, a filling but light meal, is a necessity for Koreans all over the world.',
    'web_url': 'https://www.nytimes.com/2023/11/22/magazine/naengmyeon-recipe.html',
    'snippet': 'Naengmyeon, a filling but light meal, is a necessity for Koreans all over the world.',
    'lead_paragraph': 'At Olle, the bustling Korean restaurant on East 30th Street in Manhattan, the morning starts with noodle dough. From a mass run through with buckwheat and sweet-potato starch, the noodles are extruded and boiled to order, which is just one of many reasons the naengmyeon at Olle is the best in the city. In less than a second, the doughy tangle drops into a stainless-steel tub of whirling, boiling water. Suddenly animated, the strands swim away like an octopus in flight.',
    'print_section': 'MM',
    'print_page': '18',
    'source': 'The New York Times',


In [58]:
# Create an empty list to store the reviews


reviews_list = []

# Loop through pages 0-19
for page_number in range(20):
    # Create a query with a page number
    query_params = {
        'page': page_number,
        "api-key": nyt_api_key,
    }

    # Make a "GET" request and retrieve the JSON
    response = requests.get('https://api.nytimes.com/svc/search/v2/articlesearch.json', params=query_params)

    # Add a twelve-second interval between queries to stay within API query limits
    time.sleep(12)

    if response.status_code == 200:
        reviews = response.json()

        # Try and save the reviews to the reviews_list
        try:
            # Loop through the reviews["response"]["docs"] and append each review to the list
            for review in reviews["response"]["docs"]:
                reviews_list.append(review)
        except KeyError:
            # Print the page that was just retrieved
            print(f"Page {page_number} retrieved successfully.")
    else:
        print(f"Error fetching page {page_number}. Status code: {response.status_code}")
        break 

print(reviews_list)

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [59]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
preview_results = json.dumps(reviews_list[:5], indent=4)
print(preview_results)

[
    {
        "abstract": "Danvers wrestles with her demons. Navarro does, too, but hers appear to be of a different sort.",
        "web_url": "https://www.nytimes.com/2024/02/04/arts/television/true-detective-recap-episode-4.html",
        "snippet": "Danvers wrestles with her demons. Navarro does, too, but hers appear to be of a different sort.",
        "lead_paragraph": "There\u2019s a classic bit on \u201cThe Simpsons\u201d where a panel of children are seated as a focus group for \u201cThe Itchy & Scratchy Show\u201d and asked what they want to see from the long-running cartoon, which has started to flag in the ratings. After an exasperating series of responses, the moderator sums up his findings: \u201cSo you want a realistic, down-to-earth show that\u2019s completely off the wall and swarming with magic robots?\u201d",
        "source": "The New York Times",
        "multimedia": [
            {
                "rank": 0,
                "subtype": "xlarge",
                

In [60]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
nyt_df = json_normalize(reviews_list)
nyt_df

Unnamed: 0,abstract,web_url,snippet,lead_paragraph,source,multimedia,keywords,pub_date,document_type,news_desk,...,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,print_section,print_page
0,Danvers wrestles with her demons. Navarro does...,https://www.nytimes.com/2024/02/04/arts/televi...,Danvers wrestles with her demons. Navarro does...,There’s a classic bit on “The Simpsons” where ...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Television', 'r...",2024-02-05T03:01:08+0000,article,Culture,...,,,,,,By Scott Tobias,"[{'firstname': 'Scott', 'middlename': None, 'l...",,,
1,At the end of her tribute to the outspoken art...,https://www.nytimes.com/2024/02/04/arts/music/...,At the end of her tribute to the outspoken art...,In an emotional ode to Sinead O’Connor at the ...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Pop and Rock Mu...",2024-02-05T02:54:38+0000,article,Culture,...,,,,,,By Julia Jacobs,"[{'firstname': 'Julia', 'middlename': None, 'l...",,,
2,"During her win for best pop vocal album, the s...",https://www.nytimes.com/2024/02/04/arts/music/...,"During her win for best pop vocal album, the s...",As she accepted the Grammy for best pop vocal ...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Pop and Rock Mu...",2024-02-05T02:49:32+0000,article,Culture,...,,,,,,By Christopher Kuo,"[{'firstname': 'Christopher', 'middlename': No...",,,
3,Her duet with Luke Combs was one of her few pu...,https://www.nytimes.com/2024/02/04/arts/music/...,Her duet with Luke Combs was one of her few pu...,"In a major coup for the Grammys, an influentia...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Music', 'rank':...",2024-02-05T01:30:47+0000,article,Culture,...,,,,,,By Marc Hogan,"[{'firstname': 'Marc', 'middlename': None, 'la...",,,
4,The police responded to a call from a U.S. Air...,https://www.nytimes.com/2024/02/04/us/seattle-...,The police responded to a call from a U.S. Air...,Garages are often cluttered with dusty boxes o...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Nuclear Weapons...",2024-02-05T01:10:15+0000,article,Express,...,,,,,,By Gaya Gupta,"[{'firstname': 'Gaya', 'middlename': None, 'la...",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,"The strikes, in response to a drone attack tha...",https://www.nytimes.com/2024/02/02/us/politics...,"The strikes, in response to a drone attack tha...",The United States on Friday carried out a seri...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'glocations', 'value': 'Iraq', 'rank...",2024-02-02T21:12:02+0000,article,Washington,...,,U.S. Strikes Back At Iranian Forces In Syria A...,,,,"By Helene Cooper, Eric Schmitt and Julian E. B...","[{'firstname': 'Helene', 'middlename': None, '...",,A,1
196,"Fani T. Willis, the Fulton County, Ga., distri...",https://www.nytimes.com/interactive/2024/02/02...,"Fani T. Willis, the Fulton County, Ga., distri...","Fani T. Willis, the Fulton County, Ga., distri...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",[],2024-02-02T21:12:02+0000,multimedia,U.S.,...,,,,,,,[],,,
197,"Originally a painter, she worked low-level off...",https://www.nytimes.com/2024/02/02/arts/alice-...,"Originally a painter, she worked low-level off...","Alice Mackler, who toiled in obscurity as a pa...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Deaths (Obituar...",2024-02-02T21:02:53+0000,article,Obits,...,,,,,,By Will Heinrich,"[{'firstname': 'Will', 'middlename': None, 'la...",,,
198,"For the first time, a Sinn Fein politician wil...",https://www.nytimes.com/2024/02/02/world/europ...,"For the first time, a Sinn Fein politician wil...","After two years of political gridlock, Norther...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Good Friday Agr...",2024-02-02T20:41:45+0000,article,Foreign,...,,"After Two-Year Hiatus, Power-Sharing Making Co...",,,,By Stephen Castle,"[{'firstname': 'Stephen', 'middlename': None, ...",,,


In [61]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early
def extract_title(headline):
    match = re.search(r'\u2018(.*?)\u2019', headline)
    if match:
        return match.group(1).strip() + " Review"
    else:
        return None


nyt_df['title'] = nyt_df['headline.main'].apply(extract_title)


print(nyt_df[['headline.main', 'title']])

                                         headline.main                  title
0    ‘True Detective’ Season 4, Episode 4 Recap: Th...  True Detective Review
1    Annie Lennox Honors Sinead O’Connor With Power...                   None
2    Taylor Swift Announces New Album During Grammy...                   None
3    Tracy Chapman Returns to the Grammy Stage for ...        Fast Car Review
4    Remnants of a Nuclear Missile Are Found in a G...                   None
..                                                 ...                    ...
195  U.S. Conducts Retaliatory Strikes Against Iran...                   None
196                   Read the Filing From Fani Willis                   None
197  Alice Mackler, Sculptor Discovered in Her 80s,...                   None
198  What to Know About the Return of Power-Sharing...                   None
199            Remembering Chita Rivera’s Unique Voice                   None

[200 rows x 2 columns]


In [62]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']}; "
        # Append the keyword item to the extracted_keywords string
        extracted_keywords += keyword
    return extracted_keywords.strip()

# Apply the function to create a new "keywords_str" column
nyt_df['keywords_str'] = nyt_df['keywords'].apply(extract_keywords)

# Print the DataFrame to check the results
print(nyt_df[['keywords', 'keywords_str']])

                                              keywords  \
0    [{'name': 'subject', 'value': 'Television', 'r...   
1    [{'name': 'subject', 'value': 'Pop and Rock Mu...   
2    [{'name': 'subject', 'value': 'Pop and Rock Mu...   
3    [{'name': 'subject', 'value': 'Music', 'rank':...   
4    [{'name': 'subject', 'value': 'Nuclear Weapons...   
..                                                 ...   
195  [{'name': 'glocations', 'value': 'Iraq', 'rank...   
196                                                 []   
197  [{'name': 'subject', 'value': 'Deaths (Obituar...   
198  [{'name': 'subject', 'value': 'Good Friday Agr...   
199  [{'name': 'subject', 'value': 'Culture (Arts)'...   

                                          keywords_str  
0    subject: Television; creative_works: True Dete...  
1    subject: Pop and Rock Music; subject: Grammy A...  
2    subject: Pop and Rock Music; persons: Swift, T...  
3    subject: Music; subject: Country Music; person...  
4    subject: Nucl

In [63]:
# Create a list from the "title" column
titles_list = nyt_df['title'].to_list()

# Print the list to check the results
print(titles_list)

['True Detective Review', None, None, 'Fast Car Review', None, None, None, 'Barbie Review', None, 'Good Review', None, None, 'Some Like It Hot Review', None, None, None, None, None, None, None, None, None, None, None, 'Bark of Millions Review', None, 'We Review', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'They Come in Waves Review', None, None, 'I Take a Long, Deep, Wild Draught on Your Lips Review', None, None, 'Rock Review', None, None, None, None, None, None, None, 'It Turned Out That We Lived About 10 Blocks From Each Other Review', None, None, None, None, None, None, None, None, 'Dalifornia, Review', None, None, None, None, None, None, None, None, None, None, None, 'Invasion Review', None, None, None, None, None, None, None, None, None, None, None, None, None, 'If Destruction Be Our Lot, We Must Ourselves Be Its Author and Finisher Review', None, None, None, None, None, None, None, None, None, None, None, None

### Access The Movie Database API

In [64]:
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key

In [65]:
# Loop through the titles
for title in titles_list:
    # Check if title is not None
    if title:
        # Check if we need to sleep before making a request
        if request_counter % 50 == 0 and request_counter != 0:
            time.sleep(10)  # Sleep for 10 seconds after every 50 requests

        # Add 1 to the request counter
        request_counter += 1

        # Perform a "GET" request for The Movie Database
        response_tmdb = requests.get(url + title.replace(" ", "%20") + tmdb_key_string)

        # Include a try clause to search for the full movie details.
        # Use the except clause to print out a statement if a movie
        # is not found.
        try:
            # Get movie id
            movie_id = response_tmdb.json()["results"][0]["id"]

            # Make a request for the full movie details
            response_full_movie = requests.get(f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb_api_key}")

            # Execute "GET" request with url
            if response_full_movie.status_code == 200:
                full_movie_data = response_full_movie.json()

                # Extract the genre names into a list
                genres_list = [genre["name"] for genre in full_movie_data.get("genres", [])]

                # Extract the spoken_languages' English name into a list
                spoken_languages_list = [lang["english_name"] for lang in full_movie_data.get("spoken_languages", [])]

                # Extract the production_countries' name into a list
                production_countries_list = [country["name"] for country in full_movie_data.get("production_countries", [])]

                # Add the relevant data to a dictionary and
                # append it to the tmdb_movies_list list
                movie_info = {
                    "title": title,
                    "genres": genres_list,
                    "spoken_languages": spoken_languages_list,
                    "production_countries": production_countries_list
                }
                tmdb_movies_list.append(movie_info)

                # Print out the title that was found
                logging.info(f"Movie details found for: {title}")

            else:
                logging.error(f"Error fetching full movie details for {title}. Status code: {response_full_movie.status_code}")

        except (IndexError, KeyError) as e:
            logging.error(f"Error processing details for: {title}. Error: {e}")

# Print the list of movie details
print(tmdb_movies_list)

ERROR:root:Error processing details for: True Detective Review. Error: list index out of range
ERROR:root:Error processing details for: Fast Car Review. Error: list index out of range
ERROR:root:Error processing details for: Barbie Review. Error: list index out of range
INFO:root:Movie details found for: Good Review
ERROR:root:Error processing details for: Some Like It Hot Review. Error: list index out of range
ERROR:root:Error processing details for: Bark of Millions Review. Error: list index out of range
INFO:root:Movie details found for: We Review
ERROR:root:Error processing details for: They Come in Waves Review. Error: list index out of range
ERROR:root:Error processing details for: I Take a Long, Deep, Wild Draught on Your Lips Review. Error: list index out of range
INFO:root:Movie details found for: Rock Review
ERROR:root:Error processing details for: It Turned Out That We Lived About 10 Blocks From Each Other Review. Error: list index out of range
ERROR:root:Error processing de

[{'title': 'Good Review', 'genres': [], 'spoken_languages': ['English'], 'production_countries': []}, {'title': 'We Review', 'genres': [], 'spoken_languages': [], 'production_countries': []}, {'title': 'Rock Review', 'genres': [], 'spoken_languages': [], 'production_countries': []}, {'title': 'Good Review', 'genres': [], 'spoken_languages': ['English'], 'production_countries': []}, {'title': 'We Review', 'genres': [], 'spoken_languages': [], 'production_countries': []}, {'title': 'Rock Review', 'genres': [], 'spoken_languages': [], 'production_countries': []}]


In [66]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
preview_results = json.dumps(tmdb_movies_list[:5], indent=4)
print(preview_results)

[
    {
        "title": "Good Review",
        "genres": [],
        "spoken_languages": [
            "English"
        ],
        "production_countries": []
    },
    {
        "title": "We Review",
        "genres": [],
        "spoken_languages": [],
        "production_countries": []
    },
    {
        "title": "Rock Review",
        "genres": [],
        "spoken_languages": [],
        "production_countries": []
    },
    {
        "title": "Good Review",
        "genres": [],
        "spoken_languages": [
            "English"
        ],
        "production_countries": []
    },
    {
        "title": "We Review",
        "genres": [],
        "spoken_languages": [],
        "production_countries": []
    }
]


In [67]:
# Convert the results to a DataFrame
tmdb_df = pd.DataFrame(tmdb_movies_list)

# Display the DataFrame
tmdb_df.head()

Unnamed: 0,title,genres,spoken_languages,production_countries
0,Good Review,[],[English],[]
1,We Review,[],[],[]
2,Rock Review,[],[],[]
3,Good Review,[],[English],[]
4,We Review,[],[],[]


### Merge and Clean the Data for Export

In [133]:
# Merge the New York Times reviews and TMDB DataFrames on title
merged_df = pd.merge(nyt_df, tmdb_df, on='title', how='inner')

# Display the merged DataFrame
print(merged_df)

                                            abstract  \
0  Federal Reserve officials are debating when to...   
1  Federal Reserve officials are debating when to...   
4  Deployed to Djibouti, Maj. Michael Haley has m...   
5  Deployed to Djibouti, Maj. Michael Haley has m...   

                                             web_url  \
0  https://www.nytimes.com/2024/02/04/business/ec...   
1  https://www.nytimes.com/2024/02/04/business/ec...   
2  https://www.nytimes.com/2024/02/04/world/ameri...   
3  https://www.nytimes.com/2024/02/04/world/ameri...   
4  https://www.nytimes.com/2024/02/04/us/politics...   
5  https://www.nytimes.com/2024/02/04/us/politics...   

                                             snippet  \
0  Federal Reserve officials are debating when to...   
1  Federal Reserve officials are debating when to...   
4  Deployed to Djibouti, Maj. Michael Haley has m...   
5  Deployed to Djibouti, Maj. Michael Haley has m...   

                                      lead_pa

In [134]:
# Create a list of the columns that need fixing
columns_to_fix = ['genres', 'spoken_languages', 'production_countries']

# Create a list of characters to remove
characters_to_remove = ["[", "]", "'", '"']

# Loop through the list of columns to fix
for column in columns_to_fix:
    # Convert the column to type 'str'
    merged_df[column] = merged_df[column].astype(str)

    # Loop through characters to remove
    for char in characters_to_remove:
        merged_df[column] = merged_df[column].str.replace(char, '')

# Display the fixed DataFrame
merged_df.head()

Unnamed: 0,abstract,web_url,snippet,lead_paragraph,source,multimedia,keywords,pub_date,document_type,news_desk,...,byline.original,byline.person,byline.organization,print_section,print_page,title,keywords_str,genres,spoken_languages,production_countries
0,Federal Reserve officials are debating when to...,https://www.nytimes.com/2024/02/04/business/ec...,Federal Reserve officials are debating when to...,"Jerome H. Powell, the chair of the Federal Res...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'persons', 'value': 'Powell, Jerome ...",2024-02-05T00:01:04+0000,article,Business,...,By Jeanna Smialek,"[{'firstname': 'Jeanna', 'middlename': None, '...",,B,5.0,Good Review,"persons: Powell, Jerome H; subject: Interest R...",,English,
1,Federal Reserve officials are debating when to...,https://www.nytimes.com/2024/02/04/business/ec...,Federal Reserve officials are debating when to...,"Jerome H. Powell, the chair of the Federal Res...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'persons', 'value': 'Powell, Jerome ...",2024-02-05T00:01:04+0000,article,Business,...,By Jeanna Smialek,"[{'firstname': 'Jeanna', 'middlename': None, '...",,B,5.0,Good Review,"persons: Powell, Jerome H; subject: Interest R...",,English,
2,Officials are warning of major destruction and...,https://www.nytimes.com/2024/02/04/world/ameri...,Officials are warning of major destruction and...,Days after devastating wildfires ripped throug...,The New York Times,[],"[{'name': 'subject', 'value': 'Fires and Firef...",2024-02-04T17:17:20+0000,article,Foreign,...,By Annie Correal and John Bartlett,"[{'firstname': 'Annie', 'middlename': None, 'l...",,A,9.0,We Review,subject: Fires and Firefighters; subject: Wild...,,,
3,Officials are warning of major destruction and...,https://www.nytimes.com/2024/02/04/world/ameri...,Officials are warning of major destruction and...,Days after devastating wildfires ripped throug...,The New York Times,[],"[{'name': 'subject', 'value': 'Fires and Firef...",2024-02-04T17:17:20+0000,article,Foreign,...,By Annie Correal and John Bartlett,"[{'firstname': 'Annie', 'middlename': None, 'l...",,A,9.0,We Review,subject: Fires and Firefighters; subject: Wild...,,,
4,"Deployed to Djibouti, Maj. Michael Haley has m...",https://www.nytimes.com/2024/02/04/us/politics...,"Deployed to Djibouti, Maj. Michael Haley has m...",When Nikki Haley conceded her deflating third-...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Presidential El...",2024-02-04T10:01:02+0000,article,Politics,...,By Ken Bensinger and Matthew Rosenberg,"[{'firstname': 'Ken', 'middlename': None, 'las...",,,,Rock Review,subject: Presidential Election of 2024; subjec...,,,


In [135]:
# Check the columns of the DataFrame
print(merged_df.columns)

# Drop "byline.person" column using the correct indexing approach
merged_df = merged_df.drop(columns=['byline.person'])

# Display the updated DataFrame
merged_df.head()

Index(['abstract', 'web_url', 'snippet', 'lead_paragraph', 'source',
       'multimedia', 'keywords', 'pub_date', 'document_type', 'news_desk',
       'section_name', 'subsection_name', 'type_of_material', '_id',
       'word_count', 'uri', 'headline.main', 'headline.kicker',
       'headline.content_kicker', 'headline.print_headline', 'headline.name',
       'headline.seo', 'headline.sub', 'byline.original', 'byline.person',
       'byline.organization', 'print_section', 'print_page', 'title',
       'keywords_str', 'genres', 'spoken_languages', 'production_countries'],
      dtype='object')


Unnamed: 0,abstract,web_url,snippet,lead_paragraph,source,multimedia,keywords,pub_date,document_type,news_desk,...,headline.sub,byline.original,byline.organization,print_section,print_page,title,keywords_str,genres,spoken_languages,production_countries
0,Federal Reserve officials are debating when to...,https://www.nytimes.com/2024/02/04/business/ec...,Federal Reserve officials are debating when to...,"Jerome H. Powell, the chair of the Federal Res...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'persons', 'value': 'Powell, Jerome ...",2024-02-05T00:01:04+0000,article,Business,...,,By Jeanna Smialek,,B,5.0,Good Review,"persons: Powell, Jerome H; subject: Interest R...",,English,
1,Federal Reserve officials are debating when to...,https://www.nytimes.com/2024/02/04/business/ec...,Federal Reserve officials are debating when to...,"Jerome H. Powell, the chair of the Federal Res...",The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'persons', 'value': 'Powell, Jerome ...",2024-02-05T00:01:04+0000,article,Business,...,,By Jeanna Smialek,,B,5.0,Good Review,"persons: Powell, Jerome H; subject: Interest R...",,English,
2,Officials are warning of major destruction and...,https://www.nytimes.com/2024/02/04/world/ameri...,Officials are warning of major destruction and...,Days after devastating wildfires ripped throug...,The New York Times,[],"[{'name': 'subject', 'value': 'Fires and Firef...",2024-02-04T17:17:20+0000,article,Foreign,...,,By Annie Correal and John Bartlett,,A,9.0,We Review,subject: Fires and Firefighters; subject: Wild...,,,
3,Officials are warning of major destruction and...,https://www.nytimes.com/2024/02/04/world/ameri...,Officials are warning of major destruction and...,Days after devastating wildfires ripped throug...,The New York Times,[],"[{'name': 'subject', 'value': 'Fires and Firef...",2024-02-04T17:17:20+0000,article,Foreign,...,,By Annie Correal and John Bartlett,,A,9.0,We Review,subject: Fires and Firefighters; subject: Wild...,,,
4,"Deployed to Djibouti, Maj. Michael Haley has m...",https://www.nytimes.com/2024/02/04/us/politics...,"Deployed to Djibouti, Maj. Michael Haley has m...",When Nikki Haley conceded her deflating third-...,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","[{'name': 'subject', 'value': 'Presidential El...",2024-02-04T10:01:02+0000,article,Politics,...,,By Ken Bensinger and Matthew Rosenberg,,,,Rock Review,subject: Presidential Election of 2024; subjec...,,,


In [138]:
# Delete duplicate rows
merged_df = merged_df.apply(str)
merged_df = merged_df.drop_duplicates()
updated_merged_df = pd.DataFrame(merged_df)
# Reset index
updated_merged_df = updated_merged_df.reset_index(drop=True)

# Display the updated DataFrame
updated_merged_df.head

<bound method NDFrame.head of                                                     0
0   0    Federal Reserve officials are debating wh...
1   0    https://www.nytimes.com/2024/02/04/busine...
2   0    Federal Reserve officials are debating wh...
3   0    Jerome H. Powell, the chair of the Federa...
4   0    The New York Times\n1    The New York Tim...
5   0    [{'rank': 0, 'subtype': 'xlarge', 'captio...
6   0    [{'name': 'persons', 'value': 'Powell, Je...
7   0    2024-02-05T00:01:04+0000\n1    2024-02-05...
8   0    article\n1    article\n2    article\n3   ...
9   0    Business\n1    Business\n2     Foreign\n3...
10  0    Business Day\n1    Business Day\n2       ...
11  0     Economy\n1     Economy\n2    Americas\n3...
12  0    News\n1    News\n2    News\n3    News\n4 ...
13  0    nyt://article/81ee1331-3bbb-5b14-a491-91c...
14  0     604\n1     604\n2    1222\n3    1222\n4 ...
15  0    nyt://article/81ee1331-3bbb-5b14-a491-91c...
16  0    Fed Chair Powell Says Officials Need More..

In [140]:
# Export data to CSV without the index
updated_merged_df.to_csv('your_file.csv', index=False)