In [1]:
import requests
from bs4 import BeautifulSoup
import csv

url = "https://www.baraasallout.com/test.html"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

data = []
headings = soup.find_all(["h1", "h2"])
for heading in headings:
    data.append(["Heading", heading.text.strip()])

paragraphs = soup.find_all("p")
for para in paragraphs:
    data.append(["Paragraph", para.text.strip()])

list_items = soup.find_all("li")
for li in list_items:
    data.append(["List Item", li.text.strip()])


csv_file = "Extract_Text_Data.csv"
with open(csv_file, "w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["Type", "Content"])
    writer.writerows(data)

print(f"Data successfully saved to {csv_file}")



Data successfully saved to Extract_Text_Data.csv


In [3]:
import requests
from bs4 import BeautifulSoup
import csv


url = "https://www.baraasallout.com/test.html"

try:
  
    response = requests.get(url)
    response.raise_for_status()  

    soup = BeautifulSoup(response.text, 'html.parser')

    table = soup.find('table')


    headers = [header.text.strip() for header in table.find_all('th')]

    # Extract table rows
    rows = []
    for row in table.find_all('tr'):
        cells = row.find_all(['td', 'th'])
        rows.append([cell.text.strip() for cell in cells])

    # Save to a CSV file
    filename = "Extract_Table_Data.csv"
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        if headers:  # Write headers if available
            writer.writerow(headers)
        writer.writerows(rows)

    print(f"Table data has been extracted and saved to {filename}")

except requests.exceptions.RequestException as e:
    print(f"An error occurred while fetching the webpage: {e}")
except Exception as e:
    print(f"An error occurred: {e}")


Table data has been extracted and saved to Extract_Table_Data.csv


In [43]:
import requests
from bs4 import BeautifulSoup
import json

def extract_product_information(url, filename):
  """
  Extracts product information from book cards at the bottom of a webpage and saves it to a JSON file.

  Args:
      url (str): The URL of the webpage containing the book cards.
      filename (str): The name of the JSON file to save the extracted data.
  """
  try:
    response = requests.get(url)
    response.raise_for_status()  
    soup = BeautifulSoup(response.content, 'html.parser')

    book_cards = soup.find_all('div', class_='book-card')  

    products = []
    for card in book_cards:
      product = {}

      title_element = card.find('h3', class_='book-title') 
      if title_element:
        product['Product'] = title_element.text.strip()

    
      price_element = card.find('span', class_='book-price') 
      if price_element:
        product['Price'] = price_element.text.strip()

      stock_text = card.find(text=lambda text: text.lower() in ['In stock', 'out of stock'])
      if stock_text:
        product['In Stock '] = stock_text.strip()

     
      button_element = card.find('button', class_='add-to-basket') 
      if button_element:
        product['Button Text'] = button_element.text.strip()

      products.append(product)

    # Save data to JSON file
    with open(filename, 'w') as outfile:
      json.dump(products, outfile, indent=4) 
    print(f"Successfully extracted product information and saved to {filename}")

  except requests.exceptions.RequestException as e:
    print(f"Error occurred while fetching data: {e}")


url = "https://www.baraasallout.com/test.html"
filename = "Product_Information.JSON"

extract_product_information(url, filename)

Successfully extracted product information and saved to Product_Information.JSON


In [29]:
import requests
from bs4 import BeautifulSoup
import json

def extract_form_details(url, filename):
  """
  Extracts form input fields and saves them to a JSON file.

  Args:
      url (str): The URL of the webpage containing the form.
      filename (str): The name of the JSON file to save the extracted data.
  """

  try:
    response = requests.get(url)
    response.raise_for_status()  
    soup = BeautifulSoup(response.content, 'html.parser')

 
    form = soup.find('form')

    form_fields = []
    for input_field in form.find_all('input'):
      field = {}
      field['name'] = input_field.get('name')
      field['type'] = input_field.get('type')
      field['default_value'] = input_field.get('value')
      form_fields.append(field)

    # Save data to JSON file
    with open(filename, 'w') as outfile:
      json.dump(form_fields, outfile, indent=4) 

    print(f"Successfully extracted form details and saved to {filename}")

  except requests.exceptions.RequestException as e:
    print(f"Error occurred while fetching data: {e}")


url = "https://www.baraasallout.com/test.html"  
filename = "Form_Details.JSON"

extract_form_details(url, filename)

Successfully extracted form details and saved to Form_Details.JSON


In [9]:
import requests
from bs4 import BeautifulSoup
import json

def extract_links_and_multimedia(url, filename):
  """
  Extracts hyperlinks and video links from a webpage and saves them to a JSON file.

  Args:
      url (str): The URL of the webpage to extract links from.
      filename (str): The name of the JSON file to save the extracted data.
  """

  try:
    response = requests.get(url)
    response.raise_for_status()  
    soup = BeautifulSoup(response.content, 'html.parser')

    links = []
    multimedia = []

    # Extract hyperlinks
    for link in soup.find_all('a', href=True):
      links.append({'href': link['href'], 'text': link.text.strip()})

 
    for iframe in soup.find_all('iframe'):
      src = iframe.get('src')
      if src and ('youtube.com' in src or 'vimeo.com' in src):
        multimedia.append({'type': 'video', 'url': src})

    # Save data to JSON file
    data = {'links': links, 'multimedia': multimedia}
    with open(filename, 'w') as outfile:
      json.dump(data, outfile, indent=4)  

    print(f"Successfully extracted links and multimedia and saved to {filename}")

  except requests.exceptions.RequestException as e:
    print(f"Error occurred while fetching data: {e}")

url = "https://www.baraasallout.com/test.html" 
filename = "Links_and_Multimedia.JSON"

extract_links_and_multimedia(url, filename)

Successfully extracted links and multimedia and saved to Links_and_Multimedia.JSON


In [37]:
import requests
from bs4 import BeautifulSoup
import json

def extract_product_information(url, filename):
  """
  Extracts product information from product cards and saves it to a JSON file.

  Args:
      url (str): The URL of the webpage containing the product cards.
      filename (str): The name of the JSON file to save the extracted data.
  """

  try:
    response = requests.get(url)
    response.raise_for_status()  
    soup = BeautifulSoup(response.content, 'html.parser')

    products = []

    # Find all product cards
    product_cards = soup.find_all('div', class_='product-card')  

    for card in product_cards:
      product_data = {}
      product_data['id'] = card.get('data-id')

      # Extract title from name element
      name_element = card.find('p', class_='name')
      product_data['name'] = name_element.text.strip() if name_element else None

      # Extract price from hidden price element
      price_element = card.find('p', class_='price', style='display: none;')
      product_data['price'] = price_element.text.strip() if price_element else None

    
      colors_element = card.find('p', class_='colors')
      product_data['colors'] = colors_element.text.strip().split(': ')[1] if colors_element else None

   
      if 'featured' in card.get('class', []):  
        product_data['featured'] = True
      else:
        product_data['featured'] = False

      products.append(product_data)

    # Save data to JSON file
    with open(filename, 'w') as outfile:
      json.dump(products, outfile, indent=4) 

    print(f"Successfully extracted product information and saved to {filename}")

  except requests.exceptions.RequestException as e:
    print(f"Error occurred while fetching data: {e}")

url = "https://www.baraasallout.com/test.html"  
filename = " Featured Products.JSON"

extract_product_information(url, filename)

Successfully extracted product information and saved to  Featured Products.JSON
