In [1]:
import requests
from bs4 import BeautifulSoup
import time
import csv
import os

In [None]:
def get_text(article_url):
    headers = {'User-Agent': 'Mozilla/5.0'}
    
    try:
        response = requests.get(article_url, headers=headers)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching article: {article_url}, {e}")
        return "Error fetching article"
    
    soup = BeautifulSoup(response.text, 'html.parser')
    content = soup.find('div', class_='field-item')
    
    return content.text.strip() if content else "No content found"

In [None]:
def get_articles(base_url, max_pages=115, csv_n="isw_articles.csv"):
    headers = {'User-Agent': 'Mozilla/5.0'}
    
    file_exists = os.path.isfile(csv_n)
    
    with open(csv_n, 'a', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['id', 'title', 'date', 'summary']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        if not file_exists:
            writer.writeheader()
        
        foo = 1
        
        if file_exists:
            with open(csv_n, 'r', encoding='utf-8') as f:
                reader = csv.reader(f)
                rows = list(reader)
                if len(rows) > 1:
                    foo = int(rows[-1][0]) + 1
        
        for page in range(1, max_pages + 1):
            url = f"{base_url}&page={page}"
            try:
                response = requests.get(url, headers=headers)
                response.raise_for_status()
            except requests.exceptions.RequestException as e:
                print(f"Error fetching {page}: {e}")
                continue
            
            soup = BeautifulSoup(response.text, 'html.parser')
            articles_list = soup.find_all('div', class_='views-row')
            print(f"Found {len(articles_list)} articles on page {page}.")
            
            for article in articles_list:
                title_tag = article.find('h2')
                link_tag = title_tag.find('a') if title_tag else None
                date_tag = article.find('span', class_='datespan')
                summary_tag = article.find('p')
                
                if link_tag:
                    title = link_tag.text.strip()
                    article_url = 'https://www.understandingwar.org' + link_tag['href']
                    date = date_tag.text.strip() if date_tag else "Unknown"
                    summary = summary_tag.text.strip() if summary_tag else "No summary"
                    
                    print(f"Fetching article {foo}: {title} ({article_url})")
                    
                    writer.writerow({
                        'id': foo,
                        'title': title,
                        'date': date,
                        'summary': summary,
                    })
                    
                    csvfile.flush()
                    
                    foo += 1
                    time.sleep(2)
            
            print(f"Completed page {page} of {max_pages}")
    
    return foo

In [4]:
if __name__ == "__main__":
    base_url = "https://www.understandingwar.org/publications?type%5B0%5D=backgrounder&type%5B1%5D=map&type%5B2%5D=other_work&type%5B3%5D=report&tid%5B0%5D=300&field_lastname_value=&sort_by=created&sort_order=DESC"
    csv_n = "data/ISW_csv/isw_articles.csv"
    
    if not os.path.exists(csv_n):
        with open(csv_n, 'w', encoding='utf-8') as f:
            pass
    
    num_articles = get_articles(base_url, max_pages=114, csv_n=csv_n)
    print(f"Saved {num_articles} to {csv_n}")

Found 10 articles on page 1.
Fetching article 1: Putin is Still Stealing Ukrainian Children (https://www.understandingwar.org/backgrounder/putin-still-stealing-ukrainian-children)
Fetching article 2: Russian Offensive Campaign Assessment, March 23, 2025 (https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-march-23-2025)
Fetching article 3: Russian Offensive Campaign Assessment, March 22, 2025 (https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-march-22-2025)
Fetching article 4: Russian Offensive Campaign Assessment, March 21, 2025 (https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-march-21-2025)
Fetching article 5: Russian Offensive Campaign Assessment, March 20, 2025 (https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-march-20-2025)
Fetching article 6: Russian Offensive Campaign Assessment, March 19, 2025 (https://www.understandingwar.org/backgrounder/rus