In [2]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def fetch_webpage(url):
    if not url.startswith(('http://', 'https://')):
        url = 'https://' + url
    response = requests.get(url)
    print(response)
    print(response.status_code)
    if response.status_code == 200:
        return response.text
    else:
        return None

def parse_html(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    return soup

def extract_css_js_files(soup, base_url):
    css_files = [urljoin(base_url, link.get('href')) for link in soup.find_all('link', rel='stylesheet')]
    js_files = [urljoin(base_url, script.get('src')) for script in soup.find_all('script') if script.get('src')]

    # Extract internal CSS and JS
    internal_css = [style.string for style in soup.find_all('style') if style.string]
    internal_js = [script.string for script in soup.find_all('script') if script.string]

    return css_files, js_files, internal_css, internal_js

def get_file_size(url):
    response = requests.head(url)
    if response.status_code == 200:
        content_length = response.headers.get('Content-Length')
        if content_length:
            return int(content_length)
        else:
            # Fallback to GET request if Content-Length is not provided
            response = requests.get(url)
            return len(response.content)
    return 0

def evaluate_file_sizes(css_files, js_files, internal_css, internal_js):
    total_css_size = sum(get_file_size(url) for url in css_files) + sum(len(css) for css in internal_css)
    total_js_size = sum(get_file_size(url) for url in js_files) + sum(len(js) for js in internal_js)
    return total_css_size, total_js_size

def main(url):
    html_content = fetch_webpage(url)
    if html_content:
        soup = parse_html(html_content)
        css_files, js_files, internal_css, internal_js = extract_css_js_files(soup, url)
        total_css_size, total_js_size = evaluate_file_sizes(css_files, js_files, internal_css, internal_js)
        print(f"Total CSS Size: {total_css_size / 1024:.2f} KB")
        print(f"Total JavaScript Size: {total_js_size / 1024:.2f} KB")
    else:
        print("Failed to fetch the webpage.")

if __name__ == "__main__":
    url = input("Enter the URL: ")
    main(url)


Enter the URL: https://bekushal.com
<Response [200]>
200
Total CSS Size: 6.73 KB
Total JavaScript Size: 0.00 KB


In [3]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import time

def fetch_webpage(url, retries=3, delay=5):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    for attempt in range(retries):
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
        elif response.status_code == 503:
            print(f"503 Service Unavailable. Retrying in {delay} seconds...")
            time.sleep(delay)
        else:
            return None
    return None

def parse_html(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    return soup

def extract_css_js_files(soup, base_url):
    css_files = [urljoin(base_url, link.get('href')) for link in soup.find_all('link', rel='stylesheet')]
    js_files = [urljoin(base_url, script.get('src')) for script in soup.find_all('script') if script.get('src')]

    # Extract internal CSS and JS
    internal_css = [style.string for style in soup.find_all('style') if style.string]
    internal_js = [script.string for script in soup.find_all('script') if script.string]

    return css_files, js_files, internal_css, internal_js

def get_file_size(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    response = requests.head(url, headers=headers)
    if response.status_code == 200:
        content_length = response.headers.get('Content-Length')
        if content_length:
            return int(content_length)
        else:
            # Fallback to GET request if Content-Length is not provided
            response = requests.get(url, headers=headers)
            return len(response.content)
    return 0

def evaluate_file_sizes(css_files, js_files, internal_css, internal_js):
    total_css_size = sum(get_file_size(url) for url in css_files) + sum(len(css) for css in internal_css)
    total_js_size = sum(get_file_size(url) for url in js_files) + sum(len(js) for js in internal_js)
    return total_css_size, total_js_size

def main(url):
    html_content = fetch_webpage(url)
    if html_content:
        soup = parse_html(html_content)
        css_files, js_files, internal_css, internal_js = extract_css_js_files(soup, url)
        total_css_size, total_js_size = evaluate_file_sizes(css_files, js_files, internal_css, internal_js)
        print(f"Total CSS Size: {total_css_size / 1024:.2f} KB")
        print(f"Total JavaScript Size: {total_js_size / 1024:.2f} KB")
    else:
        print("Failed to fetch the webpage.")

if __name__ == "__main__":
    url = input("Enter the URL: ")
    main(url)


Enter the URL: https://www.bekushal.com/
Total CSS Size: 518.65 KB
Total JavaScript Size: 343.45 KB
