In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import random

# Initialize the WebDriver
chromedriver_path = 'chromedriver.exe'  # Path to your chromedriver
service = Service(chromedriver_path)
driver = webdriver.Chrome(service=service)

# URL of the page to scrape
url = 'https://www.hondacengkareng.com/motor/'
driver.get(url)
driver.implicitly_wait(10)

# List to store product names, categories, product codes, prices, and features
nama_produk = []
kategori = []
kode_produk = []
harga_produk = []
fitur_produk = []

def generate_random_code(existing_codes, length=8):
    while True:
        code = ''.join(random.choices('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=length))
        if code not in existing_codes:
            return code

def extract_features():
    features = []
    try:
        # Wait for the <h2 id="info">Info Produk</h2> element
        info_produk_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "info"))
        )

        # Find the <p> element containing the word 'fitur'
        p_elements = driver.find_elements(By.TAG_NAME, 'p')
        fitur_p_element = None
        for p in p_elements:
            if 'fitur' in p.text.lower():
                fitur_p_element = p
                break

        # If the fitur <p> element is found, scrape all <li> elements within it
        if fitur_p_element:
            ul_element = fitur_p_element.find_element(By.XPATH, "./following-sibling::ul[1]")
            li_elements = ul_element.find_elements(By.TAG_NAME, 'li')
            
            for i, li in enumerate(li_elements):
                features.append(li.text.strip())
                if i < len(li_elements) - 1:
                    features.append(', ')
    except Exception as e:
        print(f"Error occurred while extracting features: {e}")

    return features

def click_all_thumbnail_links_on_page():
    try:
        # Wait for thumbnail elements to be present
        elements = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, 'thumbnail'))
        )
    except Exception as e:
        print(f"Error occurred while finding thumbnail elements: {e}")
        return

    for i in range(len(elements)):
        try:
            # Retrieve elements again to ensure a fresh DOM
            elements = driver.find_elements(By.CLASS_NAME, 'thumbnail')
            if i >= len(elements):
                print(f"Index {i} not found in the list of elements.")
                continue

            element = elements[i]

            # Click the element directly
            element.click()

            # Wait for the product name element to be visible
            try:
                product_name_element = WebDriverWait(driver, 10).until(
                    EC.visibility_of_element_located((By.CLASS_NAME, 'entry-title'))
                )
                product_name = product_name_element.text
                nama_produk.append(product_name)
            except Exception as e:
                print(f"Error occurred while retrieving product name: {e}")
                nama_produk.append('')

            # Extract the price from the table
            try:
                # Wait for the price table to be visible
                price_table_element = WebDriverWait(driver, 10).until(
                    EC.visibility_of_element_located((By.XPATH, "//table[@class='table table-orange table-bordered table-hover']/tbody/tr/td[2]"))
                )
                price = price_table_element.text
                harga_produk.append(price)
            except Exception as e:
                print(f"Error occurred while retrieving product price: {e}")
                harga_produk.append('')

            # Append category and generate a unique product code
            kategori.append('motor')
            kode_produk.append(generate_random_code(kode_produk))

            # Extract features
            features = extract_features()
            fitur_produk.append(''.join(features))

            # Print product details for verification
            print("Nama produk:", nama_produk[-1])
            print("Harga produk:", harga_produk[-1])
            print("Kategori:", kategori[-1])
            print("Kode produk:", kode_produk[-1])
            print("Fitur produk:", fitur_produk[-1])

            # Go back to the previous page
            driver.back()
            
            # Wait until the button with class 'button button-large' is present
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, 'button button-large'))
            )

        except Exception as e:
            print(f"Error occurred while clicking or retrieving information: {e}")

# Call the function to execute the script
click_all_thumbnail_links_on_page()

# Print all collected product names, categories, product codes, prices, and features at the end
print("All collected product names:", nama_produk)
print("All collected categories:", kategori)
print("All collected product codes:", kode_produk)
print("All collected prices:", harga_produk)
print("All collected features:", fitur_produk)



Nama produk: Honda Revo
Harga produk: 16,717,000
Kategori: motor
Kode produk: Q6KZ53B7
Fitur produk: Mesin injeksi tangguh & irit teknologi PGM-FI, membuat New Honda Revo FI lebih bertenaga, mudah dirawat., Bagasi serba guna berkapasitas 7 liter., Front disk brake yang membantu pengereman., Secure key shutter – pengaman kunci kontak bermagnet (magnetic key shutter) yang efektif mengurangi resiko pencurian motor.
Error occurred while clicking or retrieving information: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF752C83E52+31618]
	(No symbol) [0x00007FF752BFB0B9]
	(No symbol) [0x00007FF752AB888A]
	(No symbol) [0x00007FF752B08524]
	(No symbol) [0x00007FF752B0862C]
	(No symbol) [0x00007FF752B4F787]
	(No symbol) [0x00007FF752B2D14F]
	(No symbol) [0x00007FF752B4CA80]
	(No symbol) [0x00007FF752B2CEB3]
	(No symbol) [0x00007FF752AFA46B]
	(No symbol) [0x00007FF752AFB001]
	GetHandleVerifier [0x00007FF752F8A02D+3202397]
	GetHandleVerifier [0x00007FF752FD6A4D+3516285]
	GetHandleVerifier [0x0

In [3]:
import pandas as pd
df_nama_produk = pd.DataFrame(nama_produk, columns=["Nama Produk"])
df_harga = pd.DataFrame(harga_produk, columns=["Harga"])
df_kode_produk = pd.DataFrame(kode_produk, columns=["Kode Produk"])
df_kategori = pd.DataFrame(kategori, columns=["Kategori"])
df_fitur = pd.DataFrame(fitur_produk, columns=["Fitur"])



# Combine all DataFrames into a single DataFrame
final_df = pd.concat([
    df_nama_produk,
    df_harga,
    df_kode_produk,
    df_kategori,
    df_fitur
], axis=1)

print(final_df)


# Save the DataFrame to a CSV file
final_df.to_csv('oli.csv', index=False, sep=';', encoding='utf-8')

print("DataFrame has been saved to 'motor.csv'")



                   Nama Produk       Harga Kode Produk Kategori  \
0                   Honda Revo  16,717,000    Q6KZ53B7    motor   
1                Honda Supra X  20,082,000    W6V9VKB7    motor   
2          Honda Supra GTR 150  26,097,000    USDH47JT    motor   
3              Honda Super Cub  76,877,000    W445GZJA    motor   
4                  Honda EM1 e                1NJVNSS8    motor   
5                   Honda BeAT  18,612,000    EBS5G7G6    motor   
6                  Honda Genio  19,757,000    3BGBUUXW    motor   
7                 Honda Scoopy  23,157,000    0L3GJEES    motor   
8          Honda Vario 125 eSP  24,021,000    UQ5S5I7A    motor   
9              Honda Vario 160  27,578,000    PBY5BIF1    motor   
10             Honda Stylo 160  28,227,000    Y3IKXQFX    motor   
11                   Honda PCX  33,628,000    BPPKJNK8    motor   
12               Honda ADV 160  36,747,000    1AUAKCTW    motor   
13                 Honda Forza  90,501,000    IK3M1135    moto

In [4]:
import csv
from IPython.display import FileLink, display  # Import FileLink and display
# Path to save the CSV file
file_path = 'motor.csv'

# Data headers
headers = ['Nama Produk', 'Harga', 'Kode Produk', 'Kategori', 'Fitur']

# Data rows
rows = zip(nama_produk, harga_produk, kode_produk, kategori, fitur_produk)

# Write the CSV file with UTF-8 BOM encoding
with open(file_path, mode='w', encoding='utf-8-sig', newline='') as file:
    writer = csv.writer(file, delimiter=';')
    
    # Write the headers
    writer.writerow(headers)
    
    # Write the rows
    writer.writerows(rows)

print(f"Data successfully written to {file_path}")

# Generate a download link
display(FileLink(file_path))

Data successfully written to motor.csv
