In [1]:
import requests
from bs4 import BeautifulSoup

def scrape_elements(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    elements = {
        'headers': soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']),
        'paragraphs': soup.find_all('p'),
        'images': soup.find_all('img'),
        'containers': soup.find_all(['div', 'section', 'article']),
        'lists': soup.find_all(['ul', 'ol', 'li']),
        'navigation': soup.find_all(['nav', 'ul', 'li'])
    }

    return elements

url = 'https://bekushal.com'
elements = scrape_elements(url)

In [3]:
def save_elements_to_html(elements, filename='scraped_content.html'):
    with open(filename, 'w', encoding='utf-8') as file:
        file.write('<html><body>')
        for key, elems in elements.items():
            file.write(f'<h2>{key.capitalize()}</h2>')
            for elem in elems:
                file.write(str(elem))
        file.write('</body></html>')

save_elements_to_html(elements)





In [4]:
from google.colab.output import eval_js
from IPython.display import display, HTML

# Display the HTML content
with open('scraped_content.html', 'r', encoding='utf-8') as file:
    html_content = file.read()
display(HTML(html_content))

# JavaScript code to check alignment consistency
js_code = """
function checkAlignmentCounts(selector) {
  const elements = document.querySelectorAll(selector);
  const alignmentCounts = { total: elements.length, left: 0, center: 0, right: 0 };

  elements.forEach(element => {
    const styles = window.getComputedStyle(element);
    const textAlign = styles.textAlign;

    if (textAlign === 'left' || textAlign === 'start') {
      alignmentCounts.left += 1;
    } else if (textAlign === 'center') {
      alignmentCounts.center += 1;
    } else if (textAlign === 'right' || textAlign === 'end') {
      alignmentCounts.right += 1;
    } else {
      // Handle cases where textAlign is not explicitly set
      const parentStyles = window.getComputedStyle(element.parentElement);
      const parentTextAlign = parentStyles.textAlign;
      if (parentTextAlign === 'left' || parentTextAlign === 'start') {
        alignmentCounts.left += 1;
      } else if (parentTextAlign === 'center') {
        alignmentCounts.center += 1;
      } else if (parentTextAlign === 'right' || parentTextAlign === 'end') {
        alignmentCounts.right += 1;
      } else {
        alignmentCounts.left += 1; // Default to left if no alignment is found
      }
    }
  });

  return alignmentCounts;
}

function checkConsistency() {
  const selectors = {
    headers: 'h1, h2, h3, h4, h5, h6',
    paragraphs: 'p',
    images: 'img',
    containers: 'div, section, article',
    lists: 'ul, ol, li',
    navigation: 'nav, ul, li'
  };

  const consistencyReport = {};

  for (const [key, selector] of Object.entries(selectors)) {
    consistencyReport[key] = checkAlignmentCounts(selector);
  }

  console.log(consistencyReport);
  return consistencyReport;
}

checkConsistency();

"""

# Evaluate the JavaScript code
consistency_report = eval_js(js_code)
print(consistency_report)


{'headers': {'total': 79, 'left': 79, 'center': 0, 'right': 0}, 'paragraphs': {'total': 30, 'left': 30, 'center': 0, 'right': 0}, 'images': {'total': 10, 'left': 10, 'center': 0, 'right': 0}, 'containers': {'total': 250, 'left': 250, 'center': 0, 'right': 0}, 'lists': {'total': 41, 'left': 41, 'center': 0, 'right': 0}, 'navigation': {'total': 43, 'left': 43, 'center': 0, 'right': 0}}


In [None]:
from google.colab.output import eval_js
from IPython.display import display, HTML

# Display the HTML content
with open('scraped_content.html', 'r', encoding='utf-8') as file:
    html_content = file.read()
display(HTML(html_content))

# JavaScript code to check alignment consistency
js_code = """
function checkAlignmentCounts(selector) {
  const elements = document.querySelectorAll(selector);
  const alignmentCounts = { total: elements.length, left: 0, center: 0, right: 0 };

  elements.forEach(element => {
    const styles = window.getComputedStyle(element);
    const textAlign = styles.textAlign;

    if (textAlign === 'left' || textAlign === 'start') {
      alignmentCounts.left += 1;
    } else if (textAlign === 'center') {
      alignmentCounts.center += 1;
    } else if (textAlign === 'right' || textAlign === 'end') {
      alignmentCounts.right += 1;
    } else {
      // Handle cases where textAlign is not explicitly set
      const parentStyles = window.getComputedStyle(element.parentElement);
      const parentTextAlign = parentStyles.textAlign;
      if (parentTextAlign === 'left' || parentTextAlign === 'start') {
        alignmentCounts.left += 1;
      } else if (parentTextAlign === 'center') {
        alignmentCounts.center += 1;
      } else if (parentTextAlign === 'right' || parentTextAlign === 'end') {
        alignmentCounts.right += 1;
      } else {
        alignmentCounts.left += 1; // Default to left if no alignment is found
      }
    }
  });

  return alignmentCounts;
}

function checkFlexboxConsistency(selector) {
  const elements = document.querySelectorAll(selector);
  const flexboxCounts = { total: elements.length, matching: 0 };
  const flexboxValues = new Set();

  elements.forEach(element => {
    const styles = window.getComputedStyle(element);
    const display = styles.display;
    const justifyContent = styles.justifyContent;
    const alignItems = styles.alignItems;

    if (display === 'flex') {
      flexboxValues.add(`${justifyContent}-${alignItems}`);
    }
  });

  if (flexboxValues.size === 1) {
    flexboxCounts.matching = elements.length;
  } else {
    const firstFlexbox = elements.length > 0 ? `${window.getComputedStyle(elements[0]).justifyContent}-${window.getComputedStyle(elements[0]).alignItems}` : null;
    elements.forEach(element => {
      const styles = window.getComputedStyle(element);
      if (`${styles.justifyContent}-${styles.alignItems}` === firstFlexbox) {
        flexboxCounts.matching += 1;
      }
    });
  }

  return flexboxCounts;
}

function checkSpacingConsistency(selector) {
  const elements = document.querySelectorAll(selector);
  const spacingCounts = { total: elements.length, matching: 0 };
  const spacingValues = new Set();

  elements.forEach(element => {
    const styles = window.getComputedStyle(element);
    const margin = `${styles.marginTop}-${styles.marginRight}-${styles.marginBottom}-${styles.marginLeft}`;
    const padding = `${styles.paddingTop}-${styles.paddingRight}-${styles.paddingBottom}-${styles.paddingLeft}`;

    spacingValues.add(`${margin}-${padding}`);
  });

  if (spacingValues.size === 1) {
    spacingCounts.matching = elements.length;
  } else {
    const firstSpacing = elements.length > 0 ? `${window.getComputedStyle(elements[0]).marginTop}-${window.getComputedStyle(elements[0]).marginRight}-${window.getComputedStyle(elements[0]).marginBottom}-${window.getComputedStyle(elements[0]).marginLeft}-${window.getComputedStyle(elements[0]).paddingTop}-${window.getComputedStyle(elements[0]).paddingRight}-${window.getComputedStyle(elements[0]).paddingBottom}-${window.getComputedStyle(elements[0]).paddingLeft}` : null;
    elements.forEach(element => {
      const styles = window.getComputedStyle(element);
      const currentSpacing = `${styles.marginTop}-${styles.marginRight}-${styles.marginBottom}-${styles.marginLeft}-${styles.paddingTop}-${styles.paddingRight}-${styles.paddingBottom}-${styles.paddingLeft}`;
      if (currentSpacing === firstSpacing) {
        spacingCounts.matching += 1;
      }
    });
  }

  return spacingCounts;
}

function checkConsistency() {
  const selectors = {
    headers: 'h1, h2, h3, h4, h5, h6',
    paragraphs: 'p',
    images: 'img',
    containers: 'div, section, article',
    lists: 'ul, ol, li',
    navigation: 'nav, ul, li'
  };

  const consistencyReport = {};

  for (const [key, selector] of Object.entries(selectors)) {
    consistencyReport[key] = {
      alignment: checkAlignmentCounts(selector),
      flexbox: checkFlexboxConsistency(selector),
      spacing: checkSpacingConsistency(selector)
    };
  }

  console.log(consistencyReport);
  return consistencyReport;
}

checkConsistency();



"""

# Evaluate the JavaScript code
consistency_report = eval_js(js_code)
print(consistency_report)


{'headers': {'alignment': {'total': 79, 'left': 79, 'center': 0, 'right': 0}, 'flexbox': {'total': 79, 'matching': 79}, 'spacing': {'total': 79, 'matching': 15}}, 'paragraphs': {'alignment': {'total': 30, 'left': 30, 'center': 0, 'right': 0}, 'flexbox': {'total': 30, 'matching': 30}, 'spacing': {'total': 30, 'matching': 30}}, 'images': {'alignment': {'total': 10, 'left': 10, 'center': 0, 'right': 0}, 'flexbox': {'total': 10, 'matching': 10}, 'spacing': {'total': 10, 'matching': 10}}, 'containers': {'alignment': {'total': 250, 'left': 250, 'center': 0, 'right': 0}, 'flexbox': {'total': 250, 'matching': 250}, 'spacing': {'total': 250, 'matching': 250}}, 'lists': {'alignment': {'total': 41, 'left': 41, 'center': 0, 'right': 0}, 'flexbox': {'total': 41, 'matching': 41}, 'spacing': {'total': 41, 'matching': 9}}, 'navigation': {'alignment': {'total': 43, 'left': 43, 'center': 0, 'right': 0}, 'flexbox': {'total': 43, 'matching': 43}, 'spacing': {'total': 43, 'matching': 34}}}


In [None]:
import requests
from bs4 import BeautifulSoup

def scrape_elements(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    elements = {
        'headers': soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']),
        'paragraphs': soup.find_all('p'),
        'images': soup.find_all('img'),
        'containers': soup.find_all(['div', 'section', 'article']),
        'lists': soup.find_all(['ul', 'ol', 'li']),
        'navigation': soup.find_all(['nav', 'ul', 'li']),
        'sections': soup.find_all('section')  # Add sections to identify different parts of the page
    }

    return elements

url = 'https://bekushal.com'
elements = scrape_elements(url)
print(elements)


{'headers': [<h1>BEkushal<span>.</span></h1>, <h2>Mind Body <span>Mentor</span></h2>, <h4 class="title"><a class="stretched-link" href="https://www.youtube.com/c/bekushal">Videos on <br/>Practical Spirituality</a></h4>, <h4 class="title"><a class="stretched-link" href="https://simplescribblings.blogspot.com/">Blog on <br/>Indian Philosophy</a></h4>, <h4 class="title"><a class="stretched-link" href="https://github.com/atmabodha/Vedanta_Datasets">IKS Texts <br/>GitHub Repo</a></h4>, <h4 class="title"><a class="stretched-link" href="https://www.bekushal.com/pyp">Personalised <br/>Yoga Protocol</a></h4>, <h2>About Us</h2>, <h3>Frequently Asked <strong>Questions</strong></h3>, <h3 class="accordion-header">
<button class="accordion-button collapsed" data-bs-target="#faq-content-1" data-bs-toggle="collapse" type="button">
<span class="num">1.</span>
		    Does Yoga truly help in improving the quality of life?
                  </button>
</h3>, <h3 class="accordion-header">
<button class="acco

In [None]:
def apply_float_consistency(elements, float_direction='right'):
    for section in elements['sections']:
        images = section.find_all('img')
        for img in images:
            img['style'] = f'float: {float_direction};'
    return elements

elements = apply_float_consistency(elements)


In [None]:
def save_elements_to_html(elements, filename='scraped_content.html'):
    with open(filename, 'w', encoding='utf-8') as file:
        file.write('<html><body>')
        for key, elems in elements.items():
            file.write(f'<h2>{key.capitalize()}</h2>')
            for elem in elems:
                file.write(str(elem))
        file.write('</body></html>')

save_elements_to_html(elements)


In [None]:
from google.colab.output import eval_js
from IPython.display import display, HTML

# Display the HTML content
with open('scraped_content.html', 'r', encoding='utf-8') as file:
    html_content = file.read()
display(HTML(html_content))

from google.colab.output import eval_js

# Evaluate the JavaScript code
js_code = """
function checkUniformFloat(selector) {
  const elements = document.querySelectorAll(selector);
  const floatCounts = { total: elements.length, matching: 0 };
  const floatValues = new Set();

  elements.forEach(element => {
    const styles = window.getComputedStyle(element);
    floatValues.add(styles.float);
  });

  if (floatValues.size === 1) {
    floatCounts.matching = elements.length;
  } else {
    const firstFloat = elements.length > 0 ? window.getComputedStyle(elements[0]).float : null;
    elements.forEach(element => {
      const styles = window.getComputedStyle(element);
      if (styles.float === firstFloat) {
        floatCounts.matching += 1;
      }
    });
  }

  return floatCounts;
}

function checkFloatConsistency() {
  const selectors = {
    headers: 'h1, h2, h3, h4, h5, h6',
    paragraphs: 'p',
    images: 'img',
    containers: 'div, section, article',
    lists: 'ul, ol, li',
    navigation: 'nav, ul, li'
  };

  const consistencyReport = {};

  for (const [key, selector] of Object.entries(selectors)) {
    consistencyReport[key] = checkUniformFloat(selector);
  }

  console.log(consistencyReport);
  return consistencyReport;
}

checkFloatConsistency();
"""

consistency_report = eval_js(js_code)
print(consistency_report)



{'headers': {'total': 91, 'matching': 91}, 'paragraphs': {'total': 35, 'matching': 35}, 'images': {'total': 12, 'matching': 12}, 'containers': {'total': 297, 'matching': 297}, 'lists': {'total': 41, 'matching': 41}, 'navigation': {'total': 43, 'matching': 43}}
