# Fetching News Articles from the Guardian with API

Fetching the Guardian articles with the developer API key. Same code is used for both Syrian and Ukrainian refugees, the only parameter that is changed is the keyword. 

In [None]:
import csv
import requests

api_key = {api_key}
endpoint = "http://content.guardianapis.com/search"

params = {
    "q": "Syrian refugee",
    "api-key": {api_key},
    "page-size": 100,  # The number of articles per page
    "page": 1,  # The starting page
    "show-fields": "body"  # Include the article body in the API response
}

filename = '/Users/emiliatrulsson/Desktop/syria_guardian.csv'

with open(filename, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['id', 'publication_date', 'sectionName', 'webTitle', 'body'])

    while True:
        response = requests.get(endpoint, params=params)
        data = response.json()
        
        if 'results' not in data['response']:
            break
        
        articles = data['response']['results']
        if not articles:
            break

        for article in articles:
            id_parts = article['id'].split('/')
            if len(id_parts) == 5:
                publication_date = '/'.join(id_parts[1:4])
            else:
                publication_date = '/'.join(id_parts[1:5])
            writer.writerow([article['id'], publication_date, article['sectionName'], article['webTitle'], article.get('fields', {}).get('body', '')])

        params['page'] += 1


In [None]:
# The extracted date column was in an odd format, therefore we are adding another column 'publication_date_fixed to have a stradard format of it
import datetime
import sys 

csv.field_size_limit(sys.maxsize)

filename = 'syria_guardian.csv'
new_filename = 'syria_guardian_final.csv'

with open(filename, 'r') as file, open(new_filename, 'w', newline='') as new_file:
    reader = csv.reader(file)
    writer = csv.writer(new_file)

    headers = next(reader)
    headers.append('publication_date_fixed')
    writer.writerow(headers)

    for row in reader:
        id = row[0]
        id_parts = id.split('/')
        if len(id_parts) >= 4:
            date_string = f"{id_parts[-4]} {id_parts[-3]} {id_parts[-2]}"
            date_format = "%Y %b %d"
            publication_date_fixed = datetime.datetime.strptime(date_string, date_format).strftime("%Y-%m-%d")
            row.append(publication_date_fixed)
        else:
            row.append('')
        writer.writerow(row)
