In [3]:
import requests
import pandas as pd
import time

def fetch_gdelt_context(query, max_records=200, start=0):
    base_url = "https://api.gdeltproject.org/api/v2/context/context"
    
    params = {
        'query': query,
        'mode': 'artlist',
        'format': 'json',
        'maxrecords': max_records,
        'start': start
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors

        # Parse JSON response
        data = response.json()
        articles = data.get('articles', [])

        return articles

    except requests.exceptions.RequestException as e:
        print(f"Error fetching articles: {e}")
        return []

# List of topics to query
topics = [
    'climate change', 'artificial intelligence', 'global warming', 'covid-19', 'mental health',
    'electric vehicles', 'blockchain', 'cybersecurity', 'space exploration', 'quantum computing',
    'renewable energy', 'cryptocurrency', 'big data', 'internet of things', '5G technology',
    'nanotechnology', 'biotechnology', 'genomics', 'robotics', 'smart cities'
]

# Initialize DataFrame to store all articles
all_articles_df = pd.DataFrame()

# Fetch data for each topic
for topic in topics:
    start = 0
    while True:
        articles = fetch_gdelt_context(query=topic, max_records=200, start=start)
        if not articles:
            break  # Exit loop if no more articles are returned

        # Convert list of dictionaries to DataFrame
        articles_df = pd.DataFrame(articles)

        # Append to the main DataFrame
        all_articles_df = pd.concat([all_articles_df, articles_df], ignore_index=True)

        # Break if we reach the required number of articles
        if len(all_articles_df) >= 9000:
            break

        # Update start for next page
        start += 200

        print("Fetched total data : ",len(all_articles_df))

        # Pause to respect API rate limits
        time.sleep(1)

    # Check if we have reached the required number of articles
    if len(all_articles_df) >= 9000:
        break

print(f"Fetched {len(all_articles_df)} articles.")

# Save DataFrame to CSV file
all_articles_df.to_csv('gdelt_news_articles.csv', index=False)

# Optionally, display or further process the DataFrame
print(all_articles_df.head())


Fetched total data :  109
Fetched total data :  218
Fetched total data :  327
Fetched total data :  436
Fetched total data :  545
Fetched total data :  654
Fetched total data :  763
Fetched total data :  872
Fetched total data :  981
Fetched total data :  1090
Fetched total data :  1199
Fetched total data :  1308
Fetched total data :  1417
Fetched total data :  1526
Fetched total data :  1635
Fetched total data :  1744
Fetched total data :  1853
Fetched total data :  1962
Fetched total data :  2071
Fetched total data :  2180
Fetched total data :  2289
Fetched total data :  2398
Fetched total data :  2507
Fetched total data :  2616
Fetched total data :  2725
Fetched total data :  2834
Fetched total data :  2943
Fetched total data :  3052
Fetched total data :  3161
Fetched total data :  3270
Fetched total data :  3379
Fetched total data :  3488
Fetched total data :  3597
Fetched total data :  3706
Fetched total data :  3815
Fetched total data :  3924
Fetched total data :  4033
Fetched to