1. Write a python program which searches all the product under a particular product from www.amazon.in. The product to be searched will be taken as input from user. For e.g. If user input is ‘guitar’. Then search for guitars.
2. In the above question, now scrape the following details of each product listed in first 3 pages of your search results and save it in a data frame and csv. In case if any product has less than 3 pages in search results then scrape all the products available under that product name. Details to be scraped are: "Brand
Name", "Name of the Product", "Price", "Return/Exchange", "Expected Delivery", "Availability" and
“Product URL”. In case, if any of the details are missing for any of the product then replace it by “-“.

In [49]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def get_amazon_search_results(search_query):
    base_url = "https://www.amazon.in"
    search_url = base_url + "/s?k=" + search_query.replace(" ", "+")
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
    }
    response = requests.get(search_url, headers=headers)
    if response.status_code == 200:
        return response.text
    else:
        print("Failed to retrieve search results.")
        return None

def parse_search_results(html):
    soup = BeautifulSoup(html, "html.parser")
    products = []
    results = soup.find_all("div", {"data-component-type": "s-search-result"})
    for result in results:
        product = {}
        try:
            product["Brand Name"] = result.find("span", {"class": "a-size-base-plus"}).text.strip()
        except:
            product["Brand Name"] = "-"
        try:
            product["Name of the Product"] = result.find("span", {"class": "a-text-normal"}).text.strip()
        except:
            product["Name of the Product"] = "-"
        try:
            product["Price"] = result.find("span", {"class": "a-offscreen"}).text.strip()
        except:
            product["Price"] = "-"
        try:
            product["Return/Exchange"] = result.find("div", {"class": "a-row a-size-base a-color-secondary"}).text.strip()
        except:
            product["Return/Exchange"] = "-"
        try:
            product["Expected Delivery"] = result.find("span", {"class": "a-text-bold"}).text.strip()
        except:
            product["Expected Delivery"] = "-"
        try:
            product["Availability"] = result.find("span", {"class": "a-size-base"}).text.strip()
        except:
            product["Availability"] = "-"
        try:
            product["Product URL"] = result.find("a", {"class": "a-link-normal"}).get("href")
        except:
            product["Product URL"] = "-"
        products.append(product)
    return products

def scrape_amazon_products(search_query, pages=3):
    all_products = []
    for page in range(1, pages+1):
        print(f"Scraping page {page}...")
        html = get_amazon_search_results(search_query + "&page=" + str(page))
        if html:
            products = parse_search_results(html)
            all_products.extend(products)
    return all_products

def save_to_csv(products, filename):
    df = pd.DataFrame(products)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

if __name__ == "__main__":
    search_query = input("Enter the product to search: ")
    products = scrape_amazon_products(search_query)
    save_to_csv(products, "amazon_products.csv")


Enter the product to search: guitar
Scraping page 1...
Failed to retrieve search results.
Scraping page 2...
Failed to retrieve search results.
Scraping page 3...
Failed to retrieve search results.
Data saved to amazon_products.csv


In [3]:
!pip install selenium


Collecting selenium
  Obtaining dependency information for selenium from https://files.pythonhosted.org/packages/e0/7a/08f0ea19a0c835e88aad011083d9dda69a9dfa4585c3453b3bd842eb7bed/selenium-4.21.0-py3-none-any.whl.metadata
  Downloading selenium-4.21.0-py3-none-any.whl.metadata (6.9 kB)
Collecting trio~=0.17 (from selenium)
  Obtaining dependency information for trio~=0.17 from https://files.pythonhosted.org/packages/76/51/12d78ec8abcbda51d8f115d98ebd3ee3da9d9d9af00ac69d3097c5b8d51a/trio-0.25.1-py3-none-any.whl.metadata
  Downloading trio-0.25.1-py3-none-any.whl.metadata (8.7 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Obtaining dependency information for trio-websocket~=0.9 from https://files.pythonhosted.org/packages/48/be/a9ae5f50cad5b6f85bd2574c2c923730098530096e170c1ce7452394d7aa/trio_websocket-0.11.1-py3-none-any.whl.metadata
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting typing_extensions>=4.9.0 (from selenium)
  Obtaining dependency in

3. Write a python program to access the search bar and search button on images.google.com and scrape 10 images each for keywords ‘fruits’, ‘cars’ and ‘Machine Learning’, ‘Guitar’, ‘Cakes’.

In [48]:

import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import requests
import os
from selenium.webdriver.common.by import By


def scrape_images(keyword, num_images):
    try:
        driver = webdriver.Chrome() 
        driver.get("https://images.google.com/")
        time.sleep(2)
        
        search_bar = driver.find_element(By.NAME,"q")
        search_bar.clear()
        search_bar.send_keys(keyword)
        search_bar.send_keys(Keys.RETURN)
        time.sleep(2)
        
       
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height
        
        # Extract image URLs
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        img_tags = soup.find_all("img", class_="rg_i")
        image_urls = [img['src'] for img in img_tags if img.get('src')]
        image_urls = image_urls[:num_images]
        
        return image_urls
    except Exception as e:
        print(f"Error scraping images for {keyword}: {e}")
    finally:
        driver.quit()
        
keywords = ['fruits', 'cars', 'Guitar', 'Cakes']
num_images = 10
    
for keyword in keywords:
     image_urls = scrape_images(keyword, num_images)
     if image_urls:
        download_images(keyword, image_urls, "images")


4. Write a python program to search for a smartphone(e.g.: Oneplus Nord, pixel 4A, etc.) on www.flipkart.com and scrape following details for all the search results displayed on 1st page. Details to be scraped: “Brand Name”, “Smartphone name”, “Colour”, “RAM”, “Storage(ROM)”, “Primary Camera”,
“Secondary Camera”, “Display Size”, “Battery Capacity”, “Price”, “Product URL”. Incase if any of the details is missing then replace it by “- “. Save your results in a dataframe and CSV.

In [31]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

search_query = input("Enter smartphone name to search on Flipkart: ")
smartphones = scrape_flipkart_smartphones(search_query) 
if smartphones:
  df = pd.DataFrame(smartphones)
  df.to_csv(f"{search_query}_flipkart_smartphones.csv", index=False)
  print("Scraping completed. Results saved to CSV file.")
else:
  print("No smartphones found for the given search query.")

def scrape_flipkart_smartphones(search_query):
    url = f"https://www.flipkart.com/search?q={search_query}%20&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    print(response)
    soup = BeautifulSoup(response.content, "html.parser")
    
    smartphones = []

    for product in soup.find_all("div", class_="_1AtVbE"):
        brand_name = product.find("div", class_="_4rR01T").text if product.find("div", class_="_4rR01T") else "-"
        smartphone_name = product.find("a", class_="IRpwTa").text if product.find("a", class_="IRpwTa") else "-"
        color = product.find("a", class_="IRpwTa").get("title").split(')')[0].split('(')[-1] if product.find("a", class_="IRpwTa") else "-"
        specifications = product.find_all("li", class_="rgWa7D")
        ram = next((spec.find("li") for spec in specifications if "RAM" in spec.text), "-")
        rom = next((spec.find("li") for spec in specifications if "ROM" in spec.text), "-")
        primary_camera = next((spec.find("li") for spec in specifications if "MP Rear" in spec.text), "-")
        secondary_camera = next((spec.find("li") for spec in specifications if "MP Front" in spec.text), "-")
        display_size = next((spec.find("li") for spec in specifications if "cm (" in spec.text), "-")
        battery_capacity = next((spec.find("li") for spec in specifications if "mAh" in spec.text), "-")
        price = product.find("div", class_="_30jeq3 _1_WHN1").text if product.find("div", class_="_30jeq3 _1_WHN1") else "-"
        product_url = "https://www.flipkart.com" + product.find("a", class_="IRpwTa")["href"] if product.find("a", class_="IRpwTa") else "-"

        smartphone = {
            "Brand Name": brand_name,
            "Smartphone Name": smartphone_name,
            "Colour": color,
            "RAM": ram.text if ram != "-" else "-",
            "Storage(ROM)": rom.text if rom != "-" else "-",
            "Primary Camera": primary_camera.text if primary_camera != "-" else "-",
            "Secondary Camera": secondary_camera.text if secondary_camera != "-" else "-",
            "Display Size": display_size.text if display_size != "-" else "-",
            "Battery Capacity": battery_capacity.text if battery_capacity != "-" else "-",
            "Price": price,
            "Product URL": product_url
        }
        smartphones.append(smartphone)
    
    return smartphones


Enter smartphone name to search on Flipkart:  moto


No smartphones found for the given search query.


5. Write a program to scrap geospatial coordinates (latitude, longitude) of a city searched on google maps.

In [33]:

   
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import requests
import os   
    
driver.get("https://www.google.co.in/maps")
time.sleep(3)

city = input('Enter City Name : ')                                         
search = driver.find_element(By.ID,"searchboxinput")                       
search.clear()                                                             
time.sleep(2)
search.send_keys(city)                                                     
button = driver.find_element(By.ID,"searchbox-searchbutton")               
button.click()                                                            
time.sleep(3)

try:
    url_string = driver.current_url
    print("URL Extracted: ", url_string)
    lat_lng = re.findall(r'@(.*)data',url_string)
    if len(lat_lng):
        lat_lng_list = lat_lng[0].split(",")
        if len(lat_lng_list)>=2:
            lat = lat_lng_list[0]
            lng = lat_lng_list[1]
        print("Latitude = {}, Longitude = {}".format(lat, lng))

except Exception as e:
        print("Error: ", str(e))


Enter City Name : sangli
URL Extracted:  https://www.google.co.in/maps/place/Sangli,+Maharashtra/@19.1004672,72.8891392,14z/data=!4m6!3m5!1s0x3bc10c8187f060eb:0x37911f53cdc1ddb3!8m2!3d16.8523973!4d74.5814773!16zL20vMDJucG1k?entry=ttu
Latitude = 19.1004672, Longitude = 72.8891392


6. Write a program to scrap all the available details of best gaming laptops from digit.in.

In [47]:
from selenium import webdriver
import time


driver = webdriver.Chrome()


driver.get('https://www.digit.in/')


search_bar = driver.find_element(By.CLASS_NAME,'re-ajax-search')
search_bar.send_keys('gaming laptops')
search_bar.submit()


time.sleep(2)


laptop_elements = driver.find_elements(By.CLASS_NAME,'wcapf-before-products')
laptop_details = []

for laptop in laptop_elements:
  name = laptop.find_element(By.CLASS_NAME,' cat_for_grid lineheight15').text
  price = laptop.find_element(By.CLASS_NAME,'searchPrice').text
  specifications = laptop.find_element(By.CLASS_NAME,'searchSpec').text
  
  laptop_details.append({
  'Name': name,
  'Price': price,
  'Specifications': specifications
  })


for laptop in laptop_details:
  print(laptop)

driver.quit()


JavascriptException: Message: javascript error: {"status":32,"value":"An invalid or illegal selector was specified"}
  (Session info: chrome=125.0.6422.141)
Stacktrace:
	GetHandleVerifier [0x00007FF6C0A31F52+60322]
	(No symbol) [0x00007FF6C09ACEC9]
	(No symbol) [0x00007FF6C0867EBA]
	(No symbol) [0x00007FF6C086DCEE]
	(No symbol) [0x00007FF6C0870641]
	(No symbol) [0x00007FF6C08706E0]
	(No symbol) [0x00007FF6C08B733B]
	(No symbol) [0x00007FF6C08B773C]
	(No symbol) [0x00007FF6C08AAEEC]
	(No symbol) [0x00007FF6C08DC25F]
	(No symbol) [0x00007FF6C08AADB6]
	(No symbol) [0x00007FF6C08DC430]
	(No symbol) [0x00007FF6C08FBC80]
	(No symbol) [0x00007FF6C08DBFC3]
	(No symbol) [0x00007FF6C08A9617]
	(No symbol) [0x00007FF6C08AA211]
	GetHandleVerifier [0x00007FF6C0D494AD+3301629]
	GetHandleVerifier [0x00007FF6C0D936D3+3605283]
	GetHandleVerifier [0x00007FF6C0D89450+3563680]
	GetHandleVerifier [0x00007FF6C0AE4326+790390]
	(No symbol) [0x00007FF6C09B750F]
	(No symbol) [0x00007FF6C09B3404]
	(No symbol) [0x00007FF6C09B3592]
	(No symbol) [0x00007FF6C09A2F9F]
	BaseThreadInitThunk [0x00007FFE7ABA7614+20]
	RtlUserThreadStart [0x00007FFE7BD426A1+33]
