In [5]:
#################################
# Get all privacy patterns      #
# Return dictionary of patterns #
# mapped to parts               #
#################################
import requests
from bs4 import BeautifulSoup
import json

# Send a GET request to the URL
url = 'https://privacypatterns.org/patterns/'
response = requests.get(url)

# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')

# Find the ul element with id="patterns_listing"
ul_element = soup.find('ul', id='patterns_listing')

# Initialize an empty dictionary to store the response data
response_data = {}

# Iterate over each li element within the ul element
for li in ul_element.find_all('li'):
    # Get the href attribute of the a element
    href = li.a.get('href')
    
    # Send a GET request to the URL in href
    pattern_url = f'https://privacypatterns.org{href}'
    pattern_response = requests.get(pattern_url)
    
    # Parse the HTML content of the pattern page
    pattern_soup = BeautifulSoup(pattern_response.content, 'html.parser')
    
    # Find all relevant h2 elements
    summary_h2 = pattern_soup.find('h2', {'id': 'summary'})
    context_h2 = pattern_soup.find('h2', {'id': 'context'})
    problem_h2 = pattern_soup.find('h2', {'id': 'problem'})
    solution_h2 = pattern_soup.find('h2', {'id': 'solution'})
    examples_h2 = pattern_soup.find('h2', {'id': 'examples'})
    
    # Initialize empty strings for each section
    summary_text = context_text = problem_text = solution_text = example_text = ''
    
    # Extract text for each section if the corresponding h2 element is found
    if summary_h2:
        summary_text = summary_h2.find_next('p').get_text(strip=True)
    if context_h2:
        context_text = context_h2.find_next('p').get_text(strip=True)
    if problem_h2:
        problem_text = problem_h2.find_next('p').get_text(strip=True)
    if solution_h2:
        solution_text = solution_h2.find_next('p').get_text(strip=True)
    if examples_h2:
        example_text = examples_h2.find_next('p').get_text(strip=True)
    
    # Store the extracted text in the dictionary
    response_data[href] = {
        'summary': summary_text,
        'context': context_text,
        'problem': problem_text,
        'solution': solution_text,
        'examples': example_text,
    }

# Save the extracted data to a JSON file
with open('../../data/privacy_patterns.json', 'w') as f:
    json.dump(response_data, f, indent=4)

print("Successful")


KeyboardInterrupt: 