In [2]:
import requests
from bs4 import BeautifulSoup

def get_html_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching content from {url}: {e}")
        return None

def extract_article_title(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    title = soup.find('title').text
    return title

def extract_article_text(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    article_text = {}
    
    for heading in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
        paragraph = heading.find_next('p')
        if paragraph:
            article_text[heading.text] = paragraph.text
    
    return article_text

def collect_redirect_links(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    redirect_links = []
    
    for link in soup.find_all('a', {'class': 'mw-redirect'}):
        redirect_links.append(link.get('href'))
    
    return redirect_links

def process_wikipedia_page(url):
    html_content = get_html_content(url)
    
    if html_content:
        title = extract_article_title(html_content)
        article_text = extract_article_text(html_content)
        redirect_links = collect_redirect_links(html_content)
        
        result = {
            'title': title,
            'article_text': article_text,
            'redirect_links': redirect_links
        }
        
        return result
    else:
        return None

# Example usage:
if __name__ == '__main__':
    wikipedia_url = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
    result = process_wikipedia_page(wikipedia_url)
    
    if result:
        print(f"Title: {result['title']}")
        print("\nArticle Text:")
        for heading, paragraph in result['article_text'].items():
            print(f"\n{heading}\n{paragraph}\n")
        print("\nRedirect Links:")
        for link in result['redirect_links']:
            print(link)









Title: Python (programming language) - Wikipedia

Article Text:

Contents




Python (programming language)




History
Python was conceived in the late 1980s[43] by Guido van Rossum at Centrum Wiskunde & Informatica (CWI) in the Netherlands as a successor to the ABC programming language, which was inspired by SETL,[44] capable of exception handling and interfacing with the Amoeba operating system.[13] Its implementation began in December 1989.[45] Van Rossum shouldered sole responsibility for the project, as the lead developer, until 12 July 2018, when he announced his "permanent vacation" from his responsibilities as Python's "benevolent dictator for life", a title the Python community bestowed upon him to reflect his long-term commitment as the project's chief decision-maker.[46] In January 2019, active Python core developers elected a five-member Steering Council to lead the project.[47][48]



Design philosophy and features
Python is a multi-paradigm programming language. Object-o