Dependent on Number of Permalinks.

In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

def check_url(url):
    try:
        response = requests.get(url)
        if response.status_code == 404:
            print(f"404 Error: {url}")
        elif response.status_code == 500:
            print(f"500 Error: {url}")
        else:
            print(f"{response.status_code}: {url}")
    except requests.exceptions.RequestException as e:
        print(f"Error accessing {url}: {e}")

def get_all_links(base_url):
    try:
        response = requests.get(base_url)
        soup = BeautifulSoup(response.text, 'html.parser')
        links = [a['href'] for a in soup.find_all('a', href=True)]
        return links
    except requests.exceptions.RequestException as e:
        print(f"Error accessing {base_url}: {e}")
        return []

def is_valid_url(url):
    parsed = urlparse(url)
    return bool(parsed.scheme) and bool(parsed.netloc)

base_url = 'https://wikipedia.org'
links = get_all_links(base_url)

for link in links:
    if link.startswith('#'):
        # Skip fragment identifiers
        continue
    if not is_valid_url(link):
        # Convert relative URLs to absolute URLs
        link = urljoin(base_url, link)
    if link.startswith('http'):
        check_url(link)
    else:
        print(f"Skipping non-HTTP URL: {link}")


200: https://en.wikipedia.org/
200: https://ja.wikipedia.org/
200: https://ru.wikipedia.org/
200: https://es.wikipedia.org/
200: https://de.wikipedia.org/
200: https://fr.wikipedia.org/
200: https://it.wikipedia.org/
200: https://zh.wikipedia.org/
200: https://pt.wikipedia.org/
200: https://fa.wikipedia.org/
200: https://ar.wikipedia.org/
200: https://de.wikipedia.org/
200: https://en.wikipedia.org/
200: https://es.wikipedia.org/
200: https://fa.wikipedia.org/
200: https://fr.wikipedia.org/
200: https://it.wikipedia.org/
200: https://arz.wikipedia.org/
200: https://nl.wikipedia.org/
200: https://ja.wikipedia.org/
200: https://pl.wikipedia.org/
200: https://pt.wikipedia.org/
200: https://ceb.wikipedia.org/
200: https://sv.wikipedia.org/
200: https://uk.wikipedia.org/
200: https://vi.wikipedia.org/
200: https://war.wikipedia.org/
200: https://zh.wikipedia.org/
200: https://ru.wikipedia.org/
200: https://af.wikipedia.org/
200: https://ast.wikipedia.org/
200: https://az.wikipedia.org/
200:

In [2]:
import nest_asyncio
import aiohttp
import asyncio
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

nest_asyncio.apply()

async def fetch(session, url):
    try:
        async with session.get(url) as response:
            status = response.status
            if status == 404:
                print(f"404 Error: {url}")
            elif status == 500:
                print(f"500 Error: {url}")
            else:
                print(f"{status}: {url}")
    except Exception as e:
        print(f"Error accessing {url}: {e}")

async def main(base_url):
    async with aiohttp.ClientSession() as session:
        response = await session.get(base_url)
        soup = BeautifulSoup(await response.text(), 'html.parser')
        links = [a['href'] for a in soup.find_all('a', href=True)]
        tasks = []
        for link in links:
            if link.startswith('#'):
                continue
            if not urlparse(link).scheme:
                link = urljoin(base_url, link)
            if link.startswith('http'):
                tasks.append(fetch(session, link))
            else:
                print(f"Skipping non-HTTP URL: {link}")
        await asyncio.gather(*tasks)

base_url = 'https://bekushal.com'
asyncio.run(main(base_url))


200: https://bootstrapmade.com/
200: https://www.youtube.com/c/bekushal
200: https://www.bekushal.com
200: https://www.bekushal.com/mbmentor
200: https://www.bekushal.com/shweta
200: https://www.bekushal.com/kushal
200: https://sitare.org/univ
404 Error: https://bekushal.com/index.html
200: https://chat.whatsapp.com/EnUniRPl4iQA6skAmKBhlT
200: https://chat.whatsapp.com/EnUniRPl4iQA6skAmKBhlT
200: https://www.bekushal.com/pyp
200: https://github.com/atmabodha/Vedanta_Datasets
200: https://simplescribblings.blogspot.com/
