In [2]:
! pip install cssutils

Collecting cssutils
  Downloading cssutils-2.11.1-py3-none-any.whl.metadata (8.7 kB)
Downloading cssutils-2.11.1-py3-none-any.whl (385 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m385.7/385.7 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cssutils
Successfully installed cssutils-2.11.1


In [None]:
import requests
from bs4 import BeautifulSoup
import cssutils

def extract_typography_properties(element, css_rules):
    style = element.get('style', '')
    properties = {
        'font-family': None,
        'font-size': None,
        'font-style': None,
        'font-weight': None
    }

    # Extract inline styles
    for prop in properties.keys():
        if prop in style:
            start = style.find(prop) + len(prop) + 1
            end = style.find(';', start)
            properties[prop] = style[start:end].strip()

    # Extract styles from CSS rules
    for rule in css_rules:
        if element.name in rule.selectorText:
            for prop in properties.keys():
                if properties[prop] is None and rule.style.getPropertyValue(prop):
                    properties[prop] = rule.style.getPropertyValue(prop)

    return properties

def get_css_rules(url):
    response = requests.get(url)
    css = response.text
    sheet = cssutils.parseString(css)
    return sheet.cssRules

def scrape_elements(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Get CSS rules from external stylesheets
    css_rules = []
    for link in soup.find_all('link', rel='stylesheet'):
        css_url = link['href']
        if not css_url.startswith('http'):
            css_url = url + css_url
        css_rules.extend(get_css_rules(css_url))

    elements = {
        'headers': soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']),
        'paragraphs': soup.find_all('p'),
        'images': soup.find_all('img'),
        'containers': soup.find_all(['div', 'section', 'article']),
        'lists': soup.find_all(['ul', 'ol', 'li']),
        'navigation': soup.find_all(['nav', 'ul', 'li'])
    }

    typography = {}
    for key, elems in elements.items():
        typography[key] = [extract_typography_properties(elem, css_rules) for elem in elems]

    return elements, typography

url = 'https://bekushal.com'
elements, typography = scrape_elements(url)
print(typography)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
ERROR	Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160136: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160136: position]
ERROR	Property: Invalid value for "CSS Level 2.1" property: sticky [5:160160: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: sticky [5:160160: position]
ERROR	Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160210: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160210: position]
ERROR	Property: Invalid value for "CSS Level 2.1" property: sticky [5:160234: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: sticky [5:160234: position]
ERROR	Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160312: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property

AttributeError: 'CSSFontFaceRule' object has no attribute 'selectorText'

In [None]:
import urllib.parse
from typing import Optional

import requests
from bs4 import BeautifulSoup

def extract_css_from_webpage(
    url: str, request_kwargs: Optional[dict] = None, verbose: bool = False
) -> tuple[list[str], list[str], list[dict]]:
    """Extracts CSS from webpage

    Args:
        url (str): Webpage URL
        request_kwargs (dict): These arguments are passed to requests.get() (when
                                fetching webpage HTML and external stylesheets)
        verbose (bool): Print diagnostic information

    Returns:
        tuple[ list[str], list[str], list[dict] ]: css_from_external_stylesheets, css_from_style_tags, inline_css
    """

    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets: list[str] = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content: str = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags: list[str] = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css: list[dict] = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    if verbose:
        print(
            f"""Extracted the following CSS from [{url}]:
    1. {len(css_from_external_stylesheets):,} external stylesheets (total {len("".join(css_from_external_stylesheets)):,} characters of text)
    2. {len(css_from_style_tags):,} style tags (total {len("".join(css_from_style_tags)):,} characters of text)
    3. {len(inline_css):,} tags with inline CSS (total {len("".join( (x["css"] for x in inline_css) )):,} characters of text)

"""
        )

    return css_from_external_stylesheets, css_from_style_tags, inline_css

In [None]:
url = 'https://bekushal.com'

# Call the function
css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url, verbose=True)

# Print the results
print("External Stylesheets:")
for css in css_from_external_stylesheets:
    print(css)  # Print the first 500 characters for brevity

print("\nStyle Tags:")
for css in css_from_style_tags:
    print(css)  # Print the first 500 characters for brevity

print("\nInline CSS:")
for css in inline_css:
    print(f"Tag: {css['tag']}")
    print(f"CSS: {css['css']}\n")

downloading external CSS stylesheet https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@0,300;0,400;0,500;0,600;0,700;1,300;1,400;1,600;1,700&family=Montserrat:ital,wght@0,300;0,400;0,500;0,600;0,700;1,300;1,400;1,500;1,600;1,700&family=Raleway:ital,wght@0,300;0,400;0,500;0,600;0,700;1,300;1,400;1,500;1,600;1,700&display=swap
downloading external CSS stylesheet https://bekushal.com/data/assets/vendor/bootstrap/css/bootstrap.min.css
downloading external CSS stylesheet https://bekushal.com/data/assets/vendor/bootstrap-icons/bootstrap-icons.css
downloading external CSS stylesheet https://bekushal.com/data/assets/vendor/aos/aos.css
downloading external CSS stylesheet https://bekushal.com/data/assets/vendor/glightbox/css/glightbox.min.css
downloading external CSS stylesheet https://bekushal.com/data/assets/vendor/swiper/swiper-bundle.min.css
downloading external CSS stylesheet https://bekushal.com/data/assets/css/main.css
Extracted the following CSS from [https://bekushal.com]:
   

In [None]:
import urllib.parse
from typing import Optional
import requests
from bs4 import BeautifulSoup
import cssutils

def extract_css_from_webpage(url: str, request_kwargs: Optional[dict] = None, verbose: bool = False):
    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def extract_typography_properties(element, css_rules):
    properties = {
        'background-color': None,
        'color': None,
        'font-family': None,
        'font-size': None,
        'font-weight': None,
        'margin': None,
        'padding': None,
        'text-align': None,
        'justify-content': None,
        'align-items': None
    }
    for rule in css_rules:
        if not hasattr(rule, 'selectorText'):
            continue
        if element.name in rule.selectorText:
            for prop in properties.keys():
                if properties[prop] is None and rule.style.getPropertyValue(prop):
                    properties[prop] = rule.style.getPropertyValue(prop)
    return properties

def find_properties(url: str):
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_parser = cssutils.CSSParser()
    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        sheet = css_parser.parseString(css)
        css_rules.extend(sheet.cssRules)

    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules)
        elements_properties[element.name] = properties

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties = find_properties(url)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
ERROR	Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160136: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160136: position]
ERROR	Property: Invalid value for "CSS Level 2.1" property: sticky [5:160160: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: sticky [5:160160: position]
ERROR	Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160210: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160210: position]
ERROR	Property: Invalid value for "CSS Level 2.1" property: sticky [5:160234: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: sticky [5:160234: position]
ERROR	Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160312: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property

Element: html
Element: head
  background-color: var(--bs-card-cap-bg)
  color: var(--bs-dropdown-header-color)
  font-family: var(--font-secondary)
  font-size: 0.875rem
  font-weight: 600
  margin: calc(-0.5 * var(--bs-modal-header-padding-y)) calc(-0.5 * var(--bs-modal-header-padding-x)) calc(-0.5 * var(--bs-modal-header-padding-y)) auto
  padding: var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x)
  text-align: center
  align-items: center
Element: meta
  color: #6c757d
  font-size: 16px
  margin: 0
  padding: 0
  align-items: center
Element: title
  color: var(--bs-card-title-color)
  font-family: arial
  font-size: 1em
  font-weight: 400
  margin: 0 0 20px 0
  padding: 0
Element: link
  background-color: var(--bs-nav-tabs-link-active-bg)
  color: var(--bs-btn-color)
  font-size: var(--bs-nav-link-font-size)
  font-weight: var(--bs-nav-link-font-weight)
  margin: 0 10px 10px 0
  padding: var(--bs-nav-link-padding-y) var(--bs-nav-link-padding-x)
  text-align: ce

In [None]:
import urllib.parse
from typing import Optional
import requests
from bs4 import BeautifulSoup
import cssutils
from concurrent.futures import ThreadPoolExecutor

def extract_css_from_webpage(url: str, request_kwargs: Optional[dict] = None, verbose: bool = False):
    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    session = requests.Session()
    session.headers.update(request_kwargs["headers"])
    url_response = session.get(url, timeout=request_kwargs["timeout"])
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "lxml")

    css_urls = [urllib.parse.urljoin(url, link["href"]) for link in soup.find_all("link", rel="stylesheet")]
    if verbose:
        for css_url in css_urls:
            print(f"downloading external CSS stylesheet {css_url}")

    with ThreadPoolExecutor() as executor:
        css_from_external_stylesheets = list(executor.map(lambda css_url: session.get(css_url).text, css_urls))

    css_from_style_tags = [style_tag.string for style_tag in soup.find_all("style")]

    inline_css = [{"tag": str(tag), "css": tag["style"]} for tag in soup.find_all(style=True)]

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def extract_typography_properties(element, css_rules):
    properties = {
        'background-color': None,
        'color': None,
        'font-family': None,
        'font-size': None,
        'font-weight': None,
        'margin': None,
        'padding': None,
        'text-align': None,
        'justify-content': None,
        'align-items': None
    }
    for rule in css_rules:
        if not hasattr(rule, 'selectorText'):
            continue
        if element.name in rule.selectorText:
            for prop in properties.keys():
                if properties[prop] is None and rule.style.getPropertyValue(prop):
                    properties[prop] = rule.style.getPropertyValue(prop)
    return properties

def find_properties(url: str):
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_parser = cssutils.CSSParser()
    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        sheet = css_parser.parseString(css)
        css_rules.extend(sheet.cssRules)

    soup = BeautifulSoup(requests.get(url).content, "lxml")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules)
        elements_properties[element.name] = properties

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties = find_properties(url)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
ERROR	Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160136: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160136: position]
ERROR	Property: Invalid value for "CSS Level 2.1" property: sticky [5:160160: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: sticky [5:160160: position]
ERROR	Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160210: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160210: position]
ERROR	Property: Invalid value for "CSS Level 2.1" property: sticky [5:160234: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: sticky [5:160234: position]
ERROR	Property: Invalid value for "CSS Level 2.1" property: -webkit-sticky [5:160312: position]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property

Element: html
Element: head
  background-color: var(--bs-card-cap-bg)
  color: var(--bs-dropdown-header-color)
  font-family: var(--font-secondary)
  font-size: 0.875rem
  font-weight: 600
  margin: calc(-0.5 * var(--bs-modal-header-padding-y)) calc(-0.5 * var(--bs-modal-header-padding-x)) calc(-0.5 * var(--bs-modal-header-padding-y)) auto
  padding: var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x)
  text-align: center
  align-items: center
Element: meta
  color: #6c757d
  font-size: 16px
  margin: 0
  padding: 0
  align-items: center
Element: title
  color: var(--bs-card-title-color)
  font-family: arial
  font-size: 1em
  font-weight: 400
  margin: 0 0 20px 0
  padding: 0
Element: link
  background-color: var(--bs-nav-tabs-link-active-bg)
  color: var(--bs-btn-color)
  font-size: var(--bs-nav-link-font-size)
  font-weight: var(--bs-nav-link-font-weight)
  margin: 0 10px 10px 0
  padding: var(--bs-nav-link-padding-y) var(--bs-nav-link-padding-x)
  text-align: ce

In [None]:
import logging
import urllib.parse
from typing import Optional
import requests
from bs4 import BeautifulSoup
import cssutils
from concurrent.futures import ThreadPoolExecutor

# Set up logging to suppress cssutils warnings and errors
logging.getLogger('cssutils').setLevel(logging.CRITICAL)

def extract_css_from_webpage(url: str, request_kwargs: Optional[dict] = None, verbose: bool = False):
    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    session = requests.Session()
    session.headers.update(request_kwargs["headers"])
    url_response = session.get(url, timeout=request_kwargs["timeout"])
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "lxml")

    css_urls = [urllib.parse.urljoin(url, link["href"]) for link in soup.find_all("link", rel="stylesheet")]
    if verbose:
        for css_url in css_urls:
            print(f"downloading external CSS stylesheet {css_url}")

    with ThreadPoolExecutor() as executor:
        css_from_external_stylesheets = list(executor.map(lambda css_url: session.get(css_url).text, css_urls))

    css_from_style_tags = [style_tag.string for style_tag in soup.find_all("style")]

    inline_css = [{"tag": str(tag), "css": tag["style"]} for tag in soup.find_all(style=True)]

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def filter_css(css):
    filtered_css = []
    for line in css.splitlines():
        if not any(prop in line for prop in ['-ms-flex-order', 'order', '97vh', '-webkit-zoom-in', 'zoom-in']):
            filtered_css.append(line)
    return '\n'.join(filtered_css)

def extract_typography_properties(element, css_rules):
    properties = {
        'background-color': None,
        'color': None,
        'font-family': None,
        'font-size': None,
        'font-weight': None,
        'margin': None,
        'padding': None,
        'text-align': None,
        'justify-content': None,
        'align-items': None
    }
    for rule in css_rules:
        if not hasattr(rule, 'selectorText'):
            continue
        if element.name in rule.selectorText:
            for prop in properties.keys():
                if properties[prop] is None and rule.style.getPropertyValue(prop):
                    properties[prop] = rule.style.getPropertyValue(prop)
    return properties

def find_properties(url: str):
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_parser = cssutils.CSSParser()
    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        filtered_css = filter_css(css)
        sheet = css_parser.parseString(filtered_css)
        css_rules.extend(sheet.cssRules)

    soup = BeautifulSoup(requests.get(url).content, "lxml")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules)
        elements_properties[element.name] = properties
        # Debugging information
        print(f"Element: {element.name}, Properties: {properties}")

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties = find_properties(url)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


ERROR	Property: Invalid value for "CSS Fonts Module Level 3 @font-face properties/CSS Level 2.1" property: var(--font-default) [36:3: font-family]
ERROR:CSSUTILS:Property: Invalid value for "CSS Fonts Module Level 3 @font-face properties/CSS Level 2.1" property: var(--font-default) [36:3: font-family]
ERROR	Property: Invalid value for "CSS Level 2.1" property: var(--color-default) [37:3: color]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: var(--color-default) [37:3: color]
ERROR	Property: Invalid value for "CSS Level 2.1" property: var(--color-primary) [41:3: color]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: var(--color-primary) [41:3: color]
ERROR	Property: Invalid value for "CSS Fonts Module Level 3 @font-face properties/CSS Level 2.1" property: var(--font-primary) [56:3: font-family]
ERROR:CSSUTILS:Property: Invalid value for "CSS Fonts Module Level 3 @font-face properties/CSS Level 2.1" property: var(--font-primary) [56:3: font-fami

Element: html, Properties: {'background-color': None, 'color': None, 'font-family': None, 'font-size': None, 'font-weight': None, 'margin': None, 'padding': None, 'text-align': None, 'justify-content': None, 'align-items': None}
Element: head, Properties: {'background-color': 'var(--color-primary)', 'color': '#6f6f6f', 'font-family': 'var(--font-secondary)', 'font-size': '32px', 'font-weight': '600', 'margin': 'auto', 'padding': '60px 0 60px 0', 'text-align': 'center', 'justify-content': None, 'align-items': None}
Element: meta, Properties: {'background-color': None, 'color': '#6c757d', 'font-family': None, 'font-size': '16px', 'font-weight': None, 'margin': '0', 'padding': '0', 'text-align': None, 'justify-content': None, 'align-items': 'center'}
Element: meta, Properties: {'background-color': None, 'color': '#6c757d', 'font-family': None, 'font-size': '16px', 'font-weight': None, 'margin': '0', 'padding': '0', 'text-align': None, 'justify-content': None, 'align-items': 'center'}
Elem

In [None]:
import urllib.parse
from typing import Optional, List, Dict, Tuple
import requests
from bs4 import BeautifulSoup
import cssutils
from concurrent.futures import ThreadPoolExecutor

def extract_css_from_webpage(
    url: str, request_kwargs: Optional[dict] = None, verbose: bool = False
) -> Tuple[List[str], List[str], List[Dict[str, str]]]:
    """Extracts CSS from webpage

    Args:
        url (str): Webpage URL
        request_kwargs (dict): These arguments are passed to requests.get() (when
                                fetching webpage HTML and external stylesheets)
        verbose (bool): Print diagnostic information

    Returns:
        tuple[ list[str], list[str], list[dict] ]: css_from_external_stylesheets, css_from_style_tags, inline_css
    """

    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets: List[str] = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content: str = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags: List[str] = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css: List[Dict[str, str]] = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    if verbose:
        print(
            f"""Extracted the following CSS from [{url}]:
    1. {len(css_from_external_stylesheets):,} external stylesheets (total {len("".join(css_from_external_stylesheets)):,} characters of text)
    2. {len(css_from_style_tags):,} style tags (total {len("".join(css_from_style_tags)):,} characters of text)
    3. {len(inline_css):,} tags with inline CSS (total {len("".join( (x["css"] for x in inline_css) )):,} characters of text)

"""
        )

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def filter_css(css: str) -> str:
    filtered_css = []
    for line in css.splitlines():
        if not any(prop in line for prop in ['-ms-flex-order', 'order', '97vh', '-webkit-zoom-in', 'zoom-in']):
            filtered_css.append(line)
    return '\n'.join(filtered_css)

def extract_typography_properties(element, css_rules) -> Dict[str, Optional[str]]:
    properties = {
        'background-color': None,
        'color': None,
        'font-family': None,
        'font-size': None,
        'font-weight': None,
        'margin': None,
        'padding': None,
        'text-align': None,
        'justify-content': None,
        'align-items': None
    }
    for rule in css_rules:
        if not hasattr(rule, 'selectorText'):
            continue
        if element.name in rule.selectorText:
            for prop in properties.keys():
                if properties[prop] is None and rule.style.getPropertyValue(prop):
                    properties[prop] = rule.style.getPropertyValue(prop)
    return properties

def find_properties(url: str) -> Dict[str, Dict[str, Optional[str]]]:
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_parser = cssutils.CSSParser()
    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        filtered_css = filter_css(css)
        sheet = css_parser.parseString(filtered_css)
        css_rules.extend(sheet.cssRules)

    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules)
        elements_properties[element.name] = properties

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties = find_properties(url)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


ERROR	Property: Invalid value for "CSS Fonts Module Level 3 @font-face properties/CSS Level 2.1" property: var(--font-default) [36:3: font-family]
ERROR:CSSUTILS:Property: Invalid value for "CSS Fonts Module Level 3 @font-face properties/CSS Level 2.1" property: var(--font-default) [36:3: font-family]
ERROR	Property: Invalid value for "CSS Level 2.1" property: var(--color-default) [37:3: color]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: var(--color-default) [37:3: color]
ERROR	Property: Invalid value for "CSS Level 2.1" property: var(--color-primary) [41:3: color]
ERROR:CSSUTILS:Property: Invalid value for "CSS Level 2.1" property: var(--color-primary) [41:3: color]
ERROR	Property: Invalid value for "CSS Fonts Module Level 3 @font-face properties/CSS Level 2.1" property: var(--font-primary) [56:3: font-family]
ERROR:CSSUTILS:Property: Invalid value for "CSS Fonts Module Level 3 @font-face properties/CSS Level 2.1" property: var(--font-primary) [56:3: font-fami

Element: html
Element: head
  background-color: var(--color-primary)
  color: #6f6f6f
  font-family: var(--font-secondary)
  font-size: 32px
  font-weight: 600
  margin: auto
  padding: 60px 0 60px 0
  text-align: center
Element: meta
  color: #6c757d
  font-size: 16px
  margin: 0
  padding: 0
  align-items: center
Element: title
  color: var(--color-default)
  font-size: 22px
  font-weight: 700
  margin: 0 0 20px 0
  padding: 0
Element: link
  color: rgba(255, 255, 255, 0.7)
  font-size: 16px
  margin: 0 10px 10px 0
  padding: 0
  justify-content: center
  align-items: center
Element: body
  color: var(--color-default)
  font-family: var(--font-default)
  padding: 0 40px 30px 45px
Element: header
  background-color: var(--color-primary)
  color: #6f6f6f
  font-family: var(--font-secondary)
  font-size: 32px
  font-weight: 600
  margin: auto
  padding: 60px 0 60px 0
  text-align: center
Element: div
Element: a
  background-color: var(--color-primary)
  color: var(--color-primary)
  fon

In [None]:
import urllib.parse
from typing import Optional, List, Dict, Tuple
import requests
from bs4 import BeautifulSoup

def extract_css_from_webpage(
    url: str, request_kwargs: Optional[dict] = None, verbose: bool = False
) -> Tuple[List[str], List[str], List[Dict[str, str]]]:
    """Extracts CSS from webpage

    Args:
        url (str): Webpage URL
        request_kwargs (dict): These arguments are passed to requests.get() (when
                                fetching webpage HTML and external stylesheets)
        verbose (bool): Print diagnostic information

    Returns:
        tuple[ list[str], list[str], list[dict] ]: css_from_external_stylesheets, css_from_style_tags, inline_css
    """

    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets: List[str] = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content: str = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags: List[str] = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css: List[Dict[str, str]] = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    if verbose:
        print(
            f"""Extracted the following CSS from [{url}]:
    1. {len(css_from_external_stylesheets):,} external stylesheets (total {len("".join(css_from_external_stylesheets)):,} characters of text)
    2. {len(css_from_style_tags):,} style tags (total {len("".join(css_from_style_tags)):,} characters of text)
    3. {len(inline_css):,} tags with inline CSS (total {len("".join( (x["css"] for x in inline_css) )):,} characters of text)

"""
        )

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def parse_css_rules(css: str) -> List[Dict[str, str]]:
    rules = []
    for line in css.split('}'):
        if '{' in line:
            selector, properties = line.split('{')
            properties = properties.strip().split(';')
            rule = {'selector': selector.strip(), 'properties': {}}
            for prop in properties:
                if ':' in prop:
                    name, value = prop.split(':')
                    rule['properties'][name.strip()] = value.strip()
            rules.append(rule)
    return rules

def extract_typography_properties(element, css_rules) -> Dict[str, Optional[str]]:
    properties = {
        'background-color': None,
        'color': None,
        'font-family': None,
        'font-size': None,
        'font-weight': None,
        'margin': None,
        'padding': None,
        'text-align': None,
        'justify-content': None,
        'align-items': None
    }
    for rule in css_rules:
        if element.name in rule['selector']:
            for prop in properties.keys():
                if properties[prop] is None and prop in rule['properties']:
                    properties[prop] = rule['properties'][prop]
    return properties

def find_properties(url: str) -> Dict[str, Dict[str, Optional[str]]]:
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        css_rules.extend(parse_css_rules(css))

    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules)
        elements_properties[element.name] = properties

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties = find_properties(url)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


ValueError: too many values to unpack (expected 2)

In [None]:
import urllib.parse
from typing import Optional, List, Dict, Tuple
import requests
from bs4 import BeautifulSoup

def extract_css_from_webpage(
    url: str, request_kwargs: Optional[dict] = None, verbose: bool = False
) -> Tuple[List[str], List[str], List[Dict[str, str]]]:
    """Extracts CSS from webpage

    Args:
        url (str): Webpage URL
        request_kwargs (dict): These arguments are passed to requests.get() (when
                                fetching webpage HTML and external stylesheets)
        verbose (bool): Print diagnostic information

    Returns:
        tuple[ list[str], list[str], list[dict] ]: css_from_external_stylesheets, css_from_style_tags, inline_css
    """

    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets: List[str] = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content: str = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags: List[str] = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css: List[Dict[str, str]] = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    if verbose:
        print(
            f"""Extracted the following CSS from [{url}]:
    1. {len(css_from_external_stylesheets):,} external stylesheets (total {len("".join(css_from_external_stylesheets)):,} characters of text)
    2. {len(css_from_style_tags):,} style tags (total {len("".join(css_from_style_tags)):,} characters of text)
    3. {len(inline_css):,} tags with inline CSS (total {len("".join( (x["css"] for x in inline_css) )):,} characters of text)

"""
        )

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def parse_css_rules(css: str) -> List[Dict[str, str]]:
    rules = []
    for line in css.split('}'):
        if '{' in line:
            selector, properties = line.split('{')
            properties = properties.strip().split(';')
            rule = {'selector': selector.strip(), 'properties': {}}
            for prop in properties:
                if ':' in prop:
                    name, value = prop.split(':', 1)  # Split only on the first colon
                    rule['properties'][name.strip()] = value.strip()
            rules.append(rule)
    return rules

def extract_typography_properties(element, css_rules) -> Dict[str, Optional[str]]:
    properties = {
        'background-color': None,
        'color': None,
        'font-family': None,
        'font-size': None,
        'font-weight': None,
        'margin': None,
        'padding': None,
        'text-align': None,
        'justify-content': None,
        'align-items': None
    }
    for rule in css_rules:
        if element.name in rule['selector']:
            for prop in properties.keys():
                if properties[prop] is None and prop in rule['properties']:
                    properties[prop] = rule['properties'][prop]
    return properties

def find_properties(url: str) -> Dict[str, Dict[str, Optional[str]]]:
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        css_rules.extend(parse_css_rules(css))

    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules)
        elements_properties[element.name] = properties

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties = find_properties(url)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


ValueError: too many values to unpack (expected 2)

In [None]:
import re
import urllib.parse
from typing import Optional, List, Dict, Tuple
import requests
from bs4 import BeautifulSoup

def extract_css_from_webpage(
    url: str, request_kwargs: Optional[dict] = None, verbose: bool = False
) -> Tuple[List[str], List[str], List[Dict[str, str]]]:

    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets: List[str] = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content: str = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags: List[str] = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css: List[Dict[str, str]] = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    if verbose:
        print(
            f"""Extracted the following CSS from [{url}]:
    1. {len(css_from_external_stylesheets):,} external stylesheets (total {len("".join(css_from_external_stylesheets)):,} characters of text)
    2. {len(css_from_style_tags):,} style tags (total {len("".join(css_from_style_tags)):,} characters of text)
    3. {len(inline_css):,} tags with inline CSS (total {len("".join( (x["css"] for x in inline_css) )):,} characters of text)

"""
        )

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def parse_css_rules(css: str) -> List[Dict[str, str]]:
    rules = []
    pattern = re.compile(r'([^{]+)\{([^}]+)\}')
    for match in pattern.finditer(css):
        selector = match.group(1).strip()
        properties = match.group(2).strip().split(';')
        rule = {'selector': selector, 'properties': {}}
        for prop in properties:
            if ':' in prop:
                name, value = prop.split(':', 1)
                rule['properties'][name.strip()] = value.strip()
        rules.append(rule)
    return rules

def extract_typography_properties(element, css_rules, properties_to_extract: List[str]) -> Dict[str, Optional[str]]:
    properties = {prop: None for prop in properties_to_extract}
    for rule in css_rules:
        if element.name in rule['selector']:
            for prop in properties.keys():
                if properties[prop] is None and prop in rule['properties']:
                    properties[prop] = rule['properties'][prop]
    return properties

def find_properties(url: str, properties_to_extract: List[str]) -> Dict[str, Dict[str, Optional[str]]]:
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)
    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        css_rules.extend(parse_css_rules(css))
    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules, properties_to_extract)
        elements_properties[element.name] = properties
    return elements_properties

url = 'https://bekushal.com'
properties_to_extract = [
    'background-color', 'color', 'font-family', 'font-size', 'font-weight',
    'margin', 'padding', 'text-align', 'justify-content', 'align-items'
]
properties = find_properties(url, properties_to_extract)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


Element: html
Element: head
  background-color: var(--bs-card-cap-bg)
  color: var(--bs-dropdown-header-color)
  font-family: var(--font-secondary)
  font-size: .875rem
  font-weight: 600
  margin: calc(-.5 * var(--bs-modal-header-padding-y)) calc(-.5 * var(--bs-modal-header-padding-x)) calc(-.5 * var(--bs-modal-header-padding-y)) auto
  padding: var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x)
  text-align: center
  align-items: center
Element: meta
  color: #6c757d
  font-size: 16px
  margin: 0
  padding: 0
  align-items: center
Element: title
  color: var(--bs-card-title-color)
  font-family: arial
  font-size: 1em
  font-weight: 400
  margin: 0 0 20px 0
  padding: 0
Element: link
  background-color: var(--bs-nav-tabs-link-active-bg)
  color: var(--bs-btn-color)
  font-size: var(--bs-nav-link-font-size)
  font-weight: var(--bs-nav-link-font-weight)
  margin: 0 10px 10px 0
  padding: var(--bs-nav-link-padding-y) var(--bs-nav-link-padding-x)
  text-align: center

In [None]:
import re
import urllib.parse
from typing import Optional, List, Dict, Tuple
import requests
from bs4 import BeautifulSoup

def extract_css_from_webpage(
    url: str, request_kwargs: Optional[dict] = None, verbose: bool = False
) -> Tuple[List[str], List[str], List[Dict[str, str]]]:
    """Extracts CSS from webpage

    Args:
        url (str): Webpage URL
        request_kwargs (dict): These arguments are passed to requests.get() (when
                                fetching webpage HTML and external stylesheets)
        verbose (bool): Print diagnostic information

    Returns:
        tuple[ list[str], list[str], list[dict] ]: css_from_external_stylesheets, css_from_style_tags, inline_css
    """

    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets: List[str] = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content: str = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags: List[str] = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css: List[Dict[str, str]] = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    if verbose:
        print(
            f"""Extracted the following CSS from [{url}]:
    1. {len(css_from_external_stylesheets):,} external stylesheets (total {len("".join(css_from_external_stylesheets)):,} characters of text)
    2. {len(css_from_style_tags):,} style tags (total {len("".join(css_from_style_tags)):,} characters of text)
    3. {len(inline_css):,} tags with inline CSS (total {len("".join( (x["css"] for x in inline_css) )):,} characters of text)

"""
        )

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def parse_css_rules(css: str) -> List[Dict[str, str]]:
    rules = []
    pattern = re.compile(r'([^{]+)\{([^}]+)\}')
    for match in pattern.finditer(css):
        selector = match.group(1).strip()
        properties = match.group(2).strip().split(';')
        rule = {'selector': selector, 'properties': {}}
        for prop in properties:
            if ':' in prop:
                name, value = prop.split(':', 1)  # Split only on the first colon
                rule['properties'][name.strip()] = value.strip()
        rules.append(rule)
    return rules

def extract_typography_properties(element, css_rules, properties_to_extract: List[str], root_properties: Dict[str, str]) -> Dict[str, Optional[str]]:
    properties = {prop: None for prop in properties_to_extract}
    for rule in css_rules:
        if element.name in rule['selector']:
            for prop in properties.keys():
                if properties[prop] is None and prop in rule['properties']:
                    properties[prop] = rule['properties'][prop]
    # Apply :root properties if not already set
    for prop in properties.keys():
        if properties[prop] is None and prop in root_properties:
            properties[prop] = root_properties[prop]
    return properties

def find_properties(url: str, properties_to_extract: List[str]) -> Dict[str, Dict[str, Optional[str]]]:
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        css_rules.extend(parse_css_rules(css))

    # Extract :root properties
    root_properties = {}
    for rule in css_rules:
        if rule['selector'] == ':root':
            root_properties = rule['properties']
            print(root_properties)
            break

    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules, properties_to_extract, root_properties)
        elements_properties[element.name] = properties

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties_to_extract = [
    'background-color', 'color', 'font-family', 'font-size', 'font-weight',
    'margin', 'padding', 'text-align', 'justify-content', 'align-items'
]
properties = find_properties(url, properties_to_extract)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


{'--swiper-theme-color': '#007aff'}
Element: html
Element: head
  background-color: var(--bs-card-cap-bg)
  color: var(--bs-dropdown-header-color)
  font-family: var(--font-secondary)
  font-size: .875rem
  font-weight: 600
  margin: calc(-.5 * var(--bs-modal-header-padding-y)) calc(-.5 * var(--bs-modal-header-padding-x)) calc(-.5 * var(--bs-modal-header-padding-y)) auto
  padding: var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x)
  text-align: center
  align-items: center
Element: meta
  color: #6c757d
  font-size: 16px
  margin: 0
  padding: 0
  align-items: center
Element: title
  color: var(--bs-card-title-color)
  font-family: arial
  font-size: 1em
  font-weight: 400
  margin: 0 0 20px 0
  padding: 0
Element: link
  background-color: var(--bs-nav-tabs-link-active-bg)
  color: var(--bs-btn-color)
  font-size: var(--bs-nav-link-font-size)
  font-weight: var(--bs-nav-link-font-weight)
  margin: 0 10px 10px 0
  padding: var(--bs-nav-link-padding-y) var(--bs-nav-

In [None]:
import re
import urllib.parse
from typing import Optional, List, Dict, Tuple
import requests
from bs4 import BeautifulSoup

def extract_css_from_webpage(
    url: str, request_kwargs: Optional[dict] = None, verbose: bool = False
) -> Tuple[List[str], List[str], List[Dict[str, str]]]:
    """Extracts CSS from webpage

    Args:
        url (str): Webpage URL
        request_kwargs (dict): These arguments are passed to requests.get() (when
                                fetching webpage HTML and external stylesheets)
        verbose (bool): Print diagnostic information

    Returns:
        tuple[ list[str], list[str], list[dict] ]: css_from_external_stylesheets, css_from_style_tags, inline_css
    """

    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets: List[str] = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content: str = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags: List[str] = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css: List[Dict[str, str]] = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    if verbose:
        print(
            f"""Extracted the following CSS from [{url}]:
    1. {len(css_from_external_stylesheets):,} external stylesheets (total {len("".join(css_from_external_stylesheets)):,} characters of text)
    2. {len(css_from_style_tags):,} style tags (total {len("".join(css_from_style_tags)):,} characters of text)
    3. {len(inline_css):,} tags with inline CSS (total {len("".join( (x["css"] for x in inline_css) )):,} characters of text)

"""
        )

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def parse_css_rules(css: str) -> List[Dict[str, str]]:
    rules = []
    pattern = re.compile(r'([^{]+)\{([^}]+)\}')
    for match in pattern.finditer(css):
        selector = match.group(1).strip()
        properties = match.group(2).strip().split(';')
        rule = {'selector': selector, 'properties': {}}
        for prop in properties:
            if ':' in prop:
                name, value = prop.split(':', 1)  # Split only on the first colon
                rule['properties'][name.strip()] = value.strip()
        rules.append(rule)
    return rules

def extract_typography_properties(element, css_rules, properties_to_extract: List[str], root_properties: Dict[str, str]) -> Dict[str, Optional[str]]:
    properties = {prop: None for prop in properties_to_extract}
    for rule in css_rules:
        if element.name in rule['selector']:
            for prop in properties.keys():
                if properties[prop] is None and prop in rule['properties']:
                    properties[prop] = rule['properties'][prop]
    # Apply :root properties if not already set
    for prop in properties.keys():
        if properties[prop] is None and prop in root_properties:
            properties[prop] = root_properties[prop]
    return properties

def find_properties(url: str, properties_to_extract: List[str]) -> Dict[str, Dict[str, Optional[str]]]:
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        css_rules.extend(parse_css_rules(css))

    # Extract :root properties
    root_properties = {}
    for rule in css_rules:
        if rule['selector'] == ':root':
            root_properties = rule['properties']
            break

    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules, properties_to_extract, root_properties)
        elements_properties[element.name] = properties

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties_to_extract = [
    'background-color', 'color', 'font-family', 'font-size', 'font-weight',
    'margin', 'padding', 'text-align', 'justify-content', 'align-items'
]
properties = find_properties(url, properties_to_extract)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


Element: html
Element: head
  background-color: var(--bs-card-cap-bg)
  color: var(--bs-dropdown-header-color)
  font-family: var(--font-secondary)
  font-size: .875rem
  font-weight: 600
  margin: calc(-.5 * var(--bs-modal-header-padding-y)) calc(-.5 * var(--bs-modal-header-padding-x)) calc(-.5 * var(--bs-modal-header-padding-y)) auto
  padding: var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x)
  text-align: center
  align-items: center
Element: meta
  color: #6c757d
  font-size: 16px
  margin: 0
  padding: 0
  align-items: center
Element: title
  color: var(--bs-card-title-color)
  font-family: arial
  font-size: 1em
  font-weight: 400
  margin: 0 0 20px 0
  padding: 0
Element: link
  background-color: var(--bs-nav-tabs-link-active-bg)
  color: var(--bs-btn-color)
  font-size: var(--bs-nav-link-font-size)
  font-weight: var(--bs-nav-link-font-weight)
  margin: 0 10px 10px 0
  padding: var(--bs-nav-link-padding-y) var(--bs-nav-link-padding-x)
  text-align: center

In [None]:
import re
import urllib.parse
from typing import Optional, List, Dict, Tuple
import requests
from bs4 import BeautifulSoup

def extract_css_from_webpage(
    url: str, request_kwargs: Optional[dict] = None, verbose: bool = False
) -> Tuple[List[str], List[str], List[Dict[str, str]]]:
    """Extracts CSS from webpage

    Args:
        url (str): Webpage URL
        request_kwargs (dict): These arguments are passed to requests.get() (when
                                fetching webpage HTML and external stylesheets)
        verbose (bool): Print diagnostic information

    Returns:
        tuple[ list[str], list[str], list[dict] ]: css_from_external_stylesheets, css_from_style_tags, inline_css
    """

    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets: List[str] = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content: str = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags: List[str] = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css: List[Dict[str, str]] = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    if verbose:
        print(
            f"""Extracted the following CSS from [{url}]:
    1. {len(css_from_external_stylesheets):,} external stylesheets (total {len("".join(css_from_external_stylesheets)):,} characters of text)
    2. {len(css_from_style_tags):,} style tags (total {len("".join(css_from_style_tags)):,} characters of text)
    3. {len(inline_css):,} tags with inline CSS (total {len("".join( (x["css"] for x in inline_css) )):,} characters of text)

"""
        )

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def parse_css_rules(css: str) -> List[Dict[str, str]]:
    rules = []
    pattern = re.compile(r'([^{]+)\{([^}]+)\}')
    for match in pattern.finditer(css):
        selector = match.group(1).strip()
        properties = match.group(2).strip().split(';')
        rule = {'selector': selector, 'properties': {}}
        for prop in properties:
            if ':' in prop:
                name, value = prop.split(':', 1)  # Split only on the first colon
                rule['properties'][name.strip()] = value.strip()
        rules.append(rule)
    return rules

def replace_vars_with_values(properties: Dict[str, Optional[str]], root_properties: Dict[str, str]) -> Dict[str, Optional[str]]:
    for prop, value in properties.items():
        while value and value.startswith('var('):
            var_name = value[4:-1].strip()
            if var_name in root_properties:
                value = root_properties[var_name]
            else:
                break
        properties[prop] = value
    return properties

def extract_typography_properties(element, css_rules, properties_to_extract: List[str], root_properties: Dict[str, str]) -> Dict[str, Optional[str]]:
    properties = {prop: None for prop in properties_to_extract}
    for rule in css_rules:
        if element.name in rule['selector']:
            for prop in properties.keys():
                if properties[prop] is None and prop in rule['properties']:
                    properties[prop] = rule['properties'][prop]
    # Apply :root properties if not already set
    for prop in properties.keys():
        if properties[prop] is None and prop in root_properties:
            properties[prop] = root_properties[prop]
    # Replace var() with actual values from :root
    properties = replace_vars_with_values(properties, root_properties)
    return properties

def find_properties(url: str, properties_to_extract: List[str]) -> Dict[str, Dict[str, Optional[str]]]:
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        css_rules.extend(parse_css_rules(css))

    # Extract :root properties
    root_properties = {}
    for rule in css_rules:
        if rule['selector'] == ':root':
            root_properties = rule['properties']
            break

    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules, properties_to_extract, root_properties)
        elements_properties[element.name] = properties

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties_to_extract = [
    'background-color', 'color', 'font-family', 'font-size', 'font-weight',
    'margin', 'padding', 'text-align', 'justify-content', 'align-items'
]
properties = find_properties(url, properties_to_extract)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


Element: html
Element: head
  background-color: var(--bs-card-cap-bg)
  color: var(--bs-dropdown-header-color)
  font-family: var(--font-secondary)
  font-size: .875rem
  font-weight: 600
  margin: calc(-.5 * var(--bs-modal-header-padding-y)) calc(-.5 * var(--bs-modal-header-padding-x)) calc(-.5 * var(--bs-modal-header-padding-y)) auto
  padding: var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x)
  text-align: center
  align-items: center
Element: meta
  color: #6c757d
  font-size: 16px
  margin: 0
  padding: 0
  align-items: center
Element: title
  color: var(--bs-card-title-color)
  font-family: arial
  font-size: 1em
  font-weight: 400
  margin: 0 0 20px 0
  padding: 0
Element: link
  background-color: var(--bs-nav-tabs-link-active-bg)
  color: var(--bs-btn-color)
  font-size: var(--bs-nav-link-font-size)
  font-weight: var(--bs-nav-link-font-weight)
  margin: 0 10px 10px 0
  padding: var(--bs-nav-link-padding-y) var(--bs-nav-link-padding-x)
  text-align: center

In [None]:
import re
import urllib.parse
from typing import Optional, List, Dict, Tuple
import requests
from bs4 import BeautifulSoup

def extract_css_from_webpage(
    url: str, request_kwargs: Optional[dict] = None, verbose: bool = False
) -> Tuple[List[str], List[str], List[Dict[str, str]]]:
    """Extracts CSS from webpage

    Args:
        url (str): Webpage URL
        request_kwargs (dict): These arguments are passed to requests.get() (when
                                fetching webpage HTML and external stylesheets)
        verbose (bool): Print diagnostic information

    Returns:
        tuple[ list[str], list[str], list[dict] ]: css_from_external_stylesheets, css_from_style_tags, inline_css
    """

    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets: List[str] = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content: str = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags: List[str] = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css: List[Dict[str, str]] = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    if verbose:
        print(
            f"""Extracted the following CSS from [{url}]:
    1. {len(css_from_external_stylesheets):,} external stylesheets (total {len("".join(css_from_external_stylesheets)):,} characters of text)
    2. {len(css_from_style_tags):,} style tags (total {len("".join(css_from_style_tags)):,} characters of text)
    3. {len(inline_css):,} tags with inline CSS (total {len("".join( (x["css"] for x in inline_css) )):,} characters of text)

"""
        )

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def parse_css_rules(css: str) -> List[Dict[str, str]]:
    rules = []
    pattern = re.compile(r'([^{]+)\{([^}]+)\}')
    for match in pattern.finditer(css):
        selector = match.group(1).strip()
        properties = match.group(2).strip().split(';')
        rule = {'selector': selector, 'properties': {}}
        for prop in properties:
            if ':' in prop:
                name, value = prop.split(':', 1)  # Split only on the first colon
                rule['properties'][name.strip()] = value.strip()
        rules.append(rule)
    return rules

def replace_vars_with_values(properties: Dict[str, Optional[str]], root_properties: Dict[str, str]) -> Dict[str, Optional[str]]:
    for prop, value in properties.items():
        while value and value.startswith('var('):
            var_name = value[4:-1].strip()
            if var_name in root_properties:
                value = root_properties[var_name]
            else:
                break
        properties[prop] = value
    return properties

def extract_typography_properties(element, css_rules, properties_to_extract: List[str], root_properties: Dict[str, str]) -> Dict[str, Optional[str]]:
    properties = {prop: None for prop in properties_to_extract}
    for rule in css_rules:
        if element.name in rule['selector']:
            for prop in properties.keys():
                if properties[prop] is None and prop in rule['properties']:
                    properties[prop] = rule['properties'][prop]
    # Apply :root properties if not already set
    for prop in properties.keys():
        if properties[prop] is None and prop in root_properties:
            properties[prop] = root_properties[prop]
    # Replace var() with actual values from :root
    properties = replace_vars_with_values(properties, root_properties)
    return properties

def find_properties(url: str, properties_to_extract: List[str]) -> Dict[str, Dict[str, Optional[str]]]:
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        css_rules.extend(parse_css_rules(css))

    # Extract :root properties
    root_properties = {}
    for rule in css_rules:
        if rule['selector'] == ':root':
            root_properties.update(rule['properties'])

    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules, properties_to_extract, root_properties)
        elements_properties[element.name] = properties

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties_to_extract = [
    'background-color', 'color', 'font-family', 'font-size', 'font-weight',
    'margin', 'padding', 'text-align', 'justify-content', 'align-items'
]
properties = find_properties(url, properties_to_extract)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


Element: html
Element: head
  background-color: var(--bs-card-cap-bg)
  color: var(--bs-dropdown-header-color)
  font-family: var(--font-secondary)
  font-size: .875rem
  font-weight: 600
  margin: calc(-.5 * var(--bs-modal-header-padding-y)) calc(-.5 * var(--bs-modal-header-padding-x)) calc(-.5 * var(--bs-modal-header-padding-y)) auto
  padding: var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x)
  text-align: center
  align-items: center
Element: meta
  color: #6c757d
  font-size: 16px
  margin: 0
  padding: 0
  align-items: center
Element: title
  color: var(--bs-card-title-color)
  font-family: arial
  font-size: 1em
  font-weight: 400
  margin: 0 0 20px 0
  padding: 0
Element: link
  background-color: var(--bs-nav-tabs-link-active-bg)
  color: var(--bs-btn-color)
  font-size: var(--bs-nav-link-font-size)
  font-weight: var(--bs-nav-link-font-weight)
  margin: 0 10px 10px 0
  padding: var(--bs-nav-link-padding-y) var(--bs-nav-link-padding-x)
  text-align: center

In [None]:
import re
import urllib.parse
from typing import Optional, List, Dict, Tuple
import requests
from bs4 import BeautifulSoup

def extract_css_from_webpage(
    url: str, request_kwargs: Optional[dict] = None, verbose: bool = False
) -> Tuple[List[str], List[str], List[Dict[str, str]]]:
    """Extracts CSS from webpage

    Args:
        url (str): Webpage URL
        request_kwargs (dict): These arguments are passed to requests.get() (when
                                fetching webpage HTML and external stylesheets)
        verbose (bool): Print diagnostic information

    Returns:
        tuple[ list[str], list[str], list[dict] ]: css_from_external_stylesheets, css_from_style_tags, inline_css
    """

    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets: List[str] = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content: str = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags: List[str] = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css: List[Dict[str, str]] = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    if verbose:
        print(
            f"""Extracted the following CSS from [{url}]:
    1. {len(css_from_external_stylesheets):,} external stylesheets (total {len("".join(css_from_external_stylesheets)):,} characters of text)
    2. {len(css_from_style_tags):,} style tags (total {len("".join(css_from_style_tags)):,} characters of text)
    3. {len(inline_css):,} tags with inline CSS (total {len("".join( (x["css"] for x in inline_css) )):,} characters of text)

"""
        )

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def parse_css_rules(css: str) -> List[Dict[str, str]]:
    rules = []
    pattern = re.compile(r'([^{]+)\{([^}]+)\}')
    for match in pattern.finditer(css):
        selector = match.group(1).strip()
        properties = match.group(2).strip().split(';')
        rule = {'selector': selector, 'properties': {}}
        for prop in properties:
            if ':' in prop:
                name, value = prop.split(':', 1)  # Split only on the first colon
                rule['properties'][name.strip()] = value.strip()
        rules.append(rule)
    return rules

def replace_vars_with_values(properties: Dict[str, Optional[str]], root_properties: Dict[str, str]) -> Dict[str, Optional[str]]:
    for prop, value in properties.items():
        while value and value.startswith('var('):
            var_name = value[4:-1].strip()
            if var_name in root_properties:
                value = root_properties[var_name]
            else:
                break
        properties[prop] = value
    return properties

def extract_typography_properties(element, css_rules, properties_to_extract: List[str], root_properties: Dict[str, str]) -> Dict[str, Optional[str]]:
    properties = {prop: None for prop in properties_to_extract}
    for rule in css_rules:
        if element.name in rule['selector']:
            for prop in properties.keys():
                if properties[prop] is None and prop in rule['properties']:
                    properties[prop] = rule['properties'][prop]
    # Apply :root properties if not already set
    for prop in properties.keys():
        if properties[prop] is None and prop in root_properties:
            properties[prop] = root_properties[prop]
    # Replace var() with actual values from :root
    properties = replace_vars_with_values(properties, root_properties)
    return properties

def find_properties(url: str, properties_to_extract: List[str]) -> Dict[str, Dict[str, Optional[str]]]:
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        css_rules.extend(parse_css_rules(css))

    # Extract :root properties
    root_properties = {}
    for rule in css_rules:
        if rule['selector'] == ':root':
            root_properties.update(rule['properties'])

    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules, properties_to_extract, root_properties)
        elements_properties[element.name] = properties

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties_to_extract = [
    'background-color', 'color', 'font-family', 'font-size', 'font-weight',
    'margin', 'padding', 'text-align', 'justify-content', 'align-items'
]
properties = find_properties(url, properties_to_extract)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


Element: html
Element: head
  background-color: var(--bs-card-cap-bg)
  color: var(--bs-dropdown-header-color)
  font-family: var(--font-secondary)
  font-size: .875rem
  font-weight: 600
  margin: calc(-.5 * var(--bs-modal-header-padding-y)) calc(-.5 * var(--bs-modal-header-padding-x)) calc(-.5 * var(--bs-modal-header-padding-y)) auto
  padding: var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x)
  text-align: center
  align-items: center
Element: meta
  color: #6c757d
  font-size: 16px
  margin: 0
  padding: 0
  align-items: center
Element: title
  color: var(--bs-card-title-color)
  font-family: arial
  font-size: 1em
  font-weight: 400
  margin: 0 0 20px 0
  padding: 0
Element: link
  background-color: var(--bs-nav-tabs-link-active-bg)
  color: var(--bs-btn-color)
  font-size: var(--bs-nav-link-font-size)
  font-weight: var(--bs-nav-link-font-weight)
  margin: 0 10px 10px 0
  padding: var(--bs-nav-link-padding-y) var(--bs-nav-link-padding-x)
  text-align: center

In [None]:
import re
import urllib.parse
from typing import Optional, List, Dict, Tuple
import requests
from bs4 import BeautifulSoup

def extract_css_from_webpage(
    url: str, request_kwargs: Optional[dict] = None, verbose: bool = False
) -> Tuple[List[str], List[str], List[Dict[str, str]]]:
    """Extracts CSS from webpage

    Args:
        url (str): Webpage URL
        request_kwargs (dict): These arguments are passed to requests.get() (when
                                fetching webpage HTML and external stylesheets)
        verbose (bool): Print diagnostic information

    Returns:
        tuple[ list[str], list[str], list[dict] ]: css_from_external_stylesheets, css_from_style_tags, inline_css
    """

    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets: List[str] = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content: str = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags: List[str] = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css: List[Dict[str, str]] = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    if verbose:
        print(
            f"""Extracted the following CSS from [{url}]:
    1. {len(css_from_external_stylesheets):,} external stylesheets (total {len("".join(css_from_external_stylesheets)):,} characters of text)
    2. {len(css_from_style_tags):,} style tags (total {len("".join(css_from_style_tags)):,} characters of text)
    3. {len(inline_css):,} tags with inline CSS (total {len("".join( (x["css"] for x in inline_css) )):,} characters of text)

"""
        )

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def parse_css_rules(css: str) -> List[Dict[str, str]]:
    rules = []
    pattern = re.compile(r'([^{]+)\{([^}]+)\}')
    for match in pattern.finditer(css):
        selector = match.group(1).strip()
        properties = match.group(2).strip().split(';')
        rule = {'selector': selector, 'properties': {}}
        for prop in properties:
            if ':' in prop:
                name, value = prop.split(':', 1)  # Split only on the first colon
                rule['properties'][name.strip()] = value.strip()
        rules.append(rule)
    return rules

def resolve_nested_vars(value: str, root_properties: Dict[str, str]) -> str:
    while value.startswith('var('):
        var_name = value[4:-1].strip()
        if var_name in root_properties:
            value = root_properties[var_name]
        else:
            break
    return value

def replace_vars_with_values(properties: Dict[str, Optional[str]], root_properties: Dict[str, str]) -> Dict[str, Optional[str]]:
    for prop, value in properties.items():
        if value:
            properties[prop] = resolve_nested_vars(value, root_properties)
    return properties

def extract_typography_properties(element, css_rules, properties_to_extract: List[str], root_properties: Dict[str, str]) -> Dict[str, Optional[str]]:
    properties = {prop: None for prop in properties_to_extract}
    for rule in css_rules:
        if element.name in rule['selector']:
            for prop in properties.keys():
                if properties[prop] is None and prop in rule['properties']:
                    properties[prop] = rule['properties'][prop]
    # Apply :root properties if not already set
    for prop in properties.keys():
        if properties[prop] is None and prop in root_properties:
            properties[prop] = root_properties[prop]
    # Replace var() with actual values from :root
    properties = replace_vars_with_values(properties, root_properties)
    return properties

def find_properties(url: str, properties_to_extract: List[str]) -> Dict[str, Dict[str, Optional[str]]]:
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        css_rules.extend(parse_css_rules(css))

    # Extract :root properties
    root_properties = {}
    for rule in css_rules:
        if rule['selector'] == ':root':
            root_properties.update(rule['properties'])

    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules, properties_to_extract, root_properties)
        elements_properties[element.name] = properties

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties_to_extract = [
    'background-color', 'color', 'font-family', 'font-size', 'font-weight',
    'margin', 'padding', 'text-align', 'justify-content', 'align-items'
]
properties = find_properties(url, properties_to_extract)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


Element: html
Element: head
  background-color: var(--bs-card-cap-bg)
  color: var(--bs-dropdown-header-color)
  font-family: var(--font-secondary)
  font-size: .875rem
  font-weight: 600
  margin: calc(-.5 * var(--bs-modal-header-padding-y)) calc(-.5 * var(--bs-modal-header-padding-x)) calc(-.5 * var(--bs-modal-header-padding-y)) auto
  padding: var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x)
  text-align: center
  align-items: center
Element: meta
  color: #6c757d
  font-size: 16px
  margin: 0
  padding: 0
  align-items: center
Element: title
  color: var(--bs-card-title-color)
  font-family: arial
  font-size: 1em
  font-weight: 400
  margin: 0 0 20px 0
  padding: 0
Element: link
  background-color: var(--bs-nav-tabs-link-active-bg)
  color: var(--bs-btn-color)
  font-size: var(--bs-nav-link-font-size)
  font-weight: var(--bs-nav-link-font-weight)
  margin: 0 10px 10px 0
  padding: var(--bs-nav-link-padding-y) var(--bs-nav-link-padding-x)
  text-align: center

In [None]:
import re
import urllib.parse
from typing import Optional, List, Dict, Tuple
import requests
from bs4 import BeautifulSoup

def extract_css_from_webpage(
    url: str, request_kwargs: Optional[dict] = None, verbose: bool = False
) -> Tuple[List[str], List[str], List[Dict[str, str]]]:
    """Extracts CSS from webpage

    Args:
        url (str): Webpage URL
        request_kwargs (dict): These arguments are passed to requests.get() (when
                                fetching webpage HTML and external stylesheets)
        verbose (bool): Print diagnostic information

    Returns:
        tuple[ list[str], list[str], list[dict] ]: css_from_external_stylesheets, css_from_style_tags, inline_css
    """

    if not request_kwargs:
        request_kwargs = {
            "timeout": 10,
            "headers": {"User-Agent": "Definitely not an Automated Script"},
        }
    url_response = requests.get(url, **request_kwargs)
    if url_response.status_code != 200:
        raise requests.exceptions.HTTPError(
            f"received response [{url_response.status_code}] from [{url}]"
        )

    soup = BeautifulSoup(url_response.content, "html.parser")

    css_from_external_stylesheets: List[str] = []
    for link in soup.find_all("link", rel="stylesheet"):
        css_url = urllib.parse.urljoin(url, link["href"])
        if verbose:
            print(f"downloading external CSS stylesheet {css_url}")
        css_content: str = requests.get(css_url, **request_kwargs).text
        css_from_external_stylesheets.append(css_content)

    css_from_style_tags: List[str] = []
    for style_tag in soup.find_all("style"):
        css_from_style_tags.append(style_tag.string)

    inline_css: List[Dict[str, str]] = []
    for tag in soup.find_all(style=True):
        inline_css.append({"tag": str(tag), "css": tag["style"]})

    if verbose:
        print(
            f"""Extracted the following CSS from [{url}]:
    1. {len(css_from_external_stylesheets):,} external stylesheets (total {len("".join(css_from_external_stylesheets)):,} characters of text)
    2. {len(css_from_style_tags):,} style tags (total {len("".join(css_from_style_tags)):,} characters of text)
    3. {len(inline_css):,} tags with inline CSS (total {len("".join( (x["css"] for x in inline_css) )):,} characters of text)

"""
        )

    return css_from_external_stylesheets, css_from_style_tags, inline_css

def parse_css_rules(css: str) -> List[Dict[str, str]]:
    rules = []
    pattern = re.compile(r'([^{]+)\{([^}]+)\}')
    for match in pattern.finditer(css):
        selector = match.group(1).strip()
        properties = match.group(2).strip().split(';')
        rule = {'selector': selector, 'properties': {}}
        for prop in properties:
            if ':' in prop:
                name, value = prop.split(':', 1)  # Split only on the first colon
                rule['properties'][name.strip()] = value.strip()
        rules.append(rule)
    return rules

def resolve_nested_vars(value: str, root_properties: Dict[str, str]) -> str:
    while value.startswith('var('):
        var_name = value[4:-1].strip()
        if var_name in root_properties:
            value = root_properties[var_name]
        else:
            break
    return value

def replace_vars_with_values(properties: Dict[str, Optional[str]], root_properties: Dict[str, str]) -> Dict[str, Optional[str]]:
    for prop, value in properties.items():
        if value:
            properties[prop] = resolve_nested_vars(value, root_properties)
    return properties

def extract_typography_properties(element, css_rules, properties_to_extract: List[str], root_properties: Dict[str, str]) -> Dict[str, Optional[str]]:
    properties = {prop: None for prop in properties_to_extract}
    for rule in css_rules:
        if element.name in rule['selector']:
            for prop in properties.keys():
                if properties[prop] is None and prop in rule['properties']:
                    properties[prop] = rule['properties'][prop]
    # Apply :root properties if not already set
    for prop in properties.keys():
        if properties[prop] is None and prop in root_properties:
            properties[prop] = root_properties[prop]
    # Replace var() with actual values from :root
    properties = replace_vars_with_values(properties, root_properties)
    return properties

def find_properties(url: str, properties_to_extract: List[str]) -> Dict[str, Dict[str, Optional[str]]]:
    css_from_external_stylesheets, css_from_style_tags, inline_css = extract_css_from_webpage(url)

    css_rules = []
    for css in css_from_external_stylesheets + css_from_style_tags:
        css_rules.extend(parse_css_rules(css))

    # Extract :root properties
    root_properties = {}
    for rule in css_rules:
        if rule['selector'] == ':root':
            root_properties.update(rule['properties'])

    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    elements_properties = {}
    for element in soup.find_all(True):
        properties = extract_typography_properties(element, css_rules, properties_to_extract, root_properties)
        elements_properties[element.name] = properties

    return elements_properties

# Example usage
url = 'https://bekushal.com'
properties_to_extract = [
    'background-color', 'color', 'font-family', 'font-size', 'font-weight',
    'margin', 'padding', 'text-align', 'justify-content', 'align-items'
]
properties = find_properties(url, properties_to_extract)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


Element: html
Element: head
  background-color: var(--bs-card-cap-bg)
  color: var(--bs-dropdown-header-color)
  font-family: var(--font-secondary)
  font-size: .875rem
  font-weight: 600
  margin: calc(-.5 * var(--bs-modal-header-padding-y)) calc(-.5 * var(--bs-modal-header-padding-x)) calc(-.5 * var(--bs-modal-header-padding-y)) auto
  padding: var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x)
  text-align: center
  align-items: center
Element: meta
  color: #6c757d
  font-size: 16px
  margin: 0
  padding: 0
  align-items: center
Element: title
  color: var(--bs-card-title-color)
  font-family: arial
  font-size: 1em
  font-weight: 400
  margin: 0 0 20px 0
  padding: 0
Element: link
  background-color: var(--bs-nav-tabs-link-active-bg)
  color: var(--bs-btn-color)
  font-size: var(--bs-nav-link-font-size)
  font-weight: var(--bs-nav-link-font-weight)
  margin: 0 10px 10px 0
  padding: var(--bs-nav-link-padding-y) var(--bs-nav-link-padding-x)
  text-align: center

In [1]:


# Example usage
url = 'https://bekushal.com'
properties_to_extract = [
    'background-color', 'color', 'font-family', 'font-size', 'font-weight',
    'margin', 'padding', 'text-align', 'justify-content', 'align-items'
]
properties = find_properties(url, properties_to_extract)
for element, props in properties.items():
    print(f"Element: {element}")
    for prop, value in props.items():
        if value:
            print(f"  {prop}: {value}")


Element: html
Element: head
  background-color: rgba(var(--bs-body-color-rgb), 0.03)
  color: #adb5bd
  font-family: "Poppins", sans-serif
  font-size: .875rem
  font-weight: 600
  margin: calc(-.5 * var(--bs-modal-header-padding-y)) calc(-.5 * var(--bs-modal-header-padding-x)) calc(-.5 * var(--bs-modal-header-padding-y)) auto
  padding: var(--bs-dropdown-header-padding-y) var(--bs-dropdown-header-padding-x)
  text-align: center
  align-items: center
Element: meta
  color: #6c757d
  font-size: 16px
  margin: 0
  padding: 0
  align-items: center
Element: title
  font-family: arial
  font-size: 1em
  font-weight: 400
  margin: 0 0 20px 0
  padding: 0
Element: link
  background-color: #212529
  color: #6ea8fe
  font-size: var(--bs-nav-link-font-size)
  margin: 0 10px 10px 0
  padding: var(--bs-nav-link-padding-y) var(--bs-nav-link-padding-x)
  text-align: center
  justify-content: center
  align-items: center
Element: body
  background-color: #212529
  color: #dee2e6
  font-family: system