In [None]:
!pip install selenium beautifulsoup4

In [None]:
!pip install webdriver_manager

In [None]:
!pip install folium

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException
import time
import pandas as pd
import traceback
import logging
import re
import random

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s: %(message)s',
    handlers=[
        logging.FileHandler('scraping_debug.log', mode='w'),
        logging.StreamHandler()
    ]
)

def extract_merchant_coordinates(driver, merchant):
    try:
        # Cari tombol rute
        route_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((
                By.XPATH,
                ".//button[.//span[contains(text(), 'Rute')] or contains(text(), 'Rute')]"
            ))
        )
        
        # Scroll ke tombol
        driver.execute_script(
            "arguments[0].scrollIntoView({block: 'center'});", 
            route_button
        )
        time.sleep(1)
        
        # Simpan handle window saat ini
        original_window = driver.current_window_handle
        
        # Klik tombol rute (akan membuka tab baru)
        driver.execute_script("arguments[0].click();", route_button)
        time.sleep(2)
        
        # Tunggu window baru terbuka
        WebDriverWait(driver, 10).until(lambda d: len(d.window_handles) > 1)
        
        # Pindah ke window baru (Google Maps)
        maps_window = [window for window in driver.window_handles if window != original_window][0]
        driver.switch_to.window(maps_window)
        
        # Tunggu URL Google Maps loaded
        WebDriverWait(driver, 10).until(
            lambda d: "google.com/maps" in d.current_url
        )
        
        # Ambil URL dan ekstrak koordinat
        maps_url = driver.current_url
        logging.info(f"URL Maps: {maps_url}")
        
        # Extract koordinat dari URL
        coord_match = re.search(r'destination=([-\d.]+),([-\d.]+)', maps_url)
        koordinat = None
        
        if coord_match:
            koordinat = f"{coord_match.group(1)}, {coord_match.group(2)}"
            logging.info(f"Koordinat ditemukan: {koordinat}")
        
        # Tutup tab Google Maps
        driver.close()
        
        # Kembali ke window original
        driver.switch_to.window(original_window)
        time.sleep(1)
        
        return koordinat
        
    except Exception as e:
        logging.error(f"Error mendapatkan koordinat: {e}")
        logging.error(traceback.format_exc())
        
        # Cleanup: pastikan kembali ke window original
        try:
            if len(driver.window_handles) > 1:
                driver.close()
                driver.switch_to.window(original_window)
        except:
            pass
            
        return None

def scroll_and_count_merchants(driver, wait):
    try:
        # Coba beberapa strategi untuk menemukan container
        container_strategies = [
            "//div[contains(@class, 'myptm-Stack-root')]//div[contains(@class, 'merchant-list')]",
            "//div[contains(@class, 'merchant-container')]",
            "//div[contains(@class, 'myptm-Stack-root')]"
        ]
        
        scroll_container = None
        for strategy in container_strategies:
            try:
                scroll_container = wait.until(
                    EC.presence_of_element_located((By.XPATH, strategy))
                )
                break
            except TimeoutException:
                continue
        
        if not scroll_container:
            logging.error("Tidak dapat menemukan container merchant")
            return []
        
        logging.info("Container merchant ditemukan")
        
        # Inisialisasi variabel untuk tracking
        last_count = 0
        same_count_iterations = 0
        max_same_count = 3  # Berhenti jika jumlah merchant sama selama 3 iterasi
        scroll_pause_time = 2
        
        while same_count_iterations < max_same_count:
            # Scroll dengan kombinasi metode
            # 1. Scroll container
            driver.execute_script(
                """
                arguments[0].scrollTo({
                    top: arguments[0].scrollHeight,
                    behavior: 'smooth'
                });
                """, 
                scroll_container
            )
            
            # 2. Scroll window untuk memastikan
            driver.execute_script(
                "window.scrollTo({top: document.body.scrollHeight, behavior: 'smooth'});"
            )
            
            # Tunggu content load
            time.sleep(scroll_pause_time)
            
            # Hitung merchant dengan multiple selectors
            merchants = driver.find_elements(
                By.XPATH,
                """
                //*[contains(@class, 'merchant-card') or 
                   contains(@class, 'myptm-Card-root') or 
                   contains(@class, 'm_4081bf90')]
                """
            )
            
            current_count = len(merchants)
            logging.info(f"Jumlah merchant terdeteksi: {current_count}")
            
            if current_count == last_count:
                same_count_iterations += 1
            else:
                same_count_iterations = 0
                
            last_count = current_count
            
            # Tambahan: scroll ke merchant terakhir untuk memicu lazy loading
            if merchants:
                last_merchant = merchants[-1]
                driver.execute_script(
                    "arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});",
                    last_merchant
                )
                time.sleep(1)
        
        return merchants
            
    except Exception as e:
        logging.error(f"Error saat scroll dan hitung merchant: {str(e)}")
        return []

def scrape_lpg_locations():
    # Konfigurasi Chrome Options untuk mode headless
    options = webdriver.ChromeOptions()
    # options.add_argument('--headless')  # Comment headless mode untuk debug
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_argument('--disable-extensions')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--window-size=1920,1080')
    
    # Tambahan opsi untuk rendering yang lebih baik
    options.add_argument('--disable-gpu')
    options.add_argument('--remote-debugging-port=9222')
    
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 30)
    
    # Set untuk tracking unique merchants
    unique_merchant_ids = set()
    
    try:
        driver.get("https://subsiditepatlpg.mypertamina.id/infolpg3kg")
        logging.info("Halaman berhasil dimuat")
        
        def select_dropdown_with_text(driver, label, value):
            try:
                logging.info(f"Memilih {label}: {value}")
                
                # Tunggu loading overlay menghilang
                try:
                    wait.until(EC.invisibility_of_element_located(
                        (By.XPATH, "//div[contains(@class, 'loading-overlay')]")
                    ))
                except:
                    pass
                
                # Cari dropdown dengan berbagai strategi
                dropdown_strategies = [
                    f"//div[@aria-label='{label}']",
                    f"//div[contains(@class, 'legion-select') and contains(text(), '{label}')]",
                    f"//div[contains(@class, 'legion-select__searchbar')]"
                ]
                
                dropdown = None
                for strategy in dropdown_strategies:
                    try:
                        dropdown = wait.until(
                            EC.element_to_be_clickable((By.XPATH, strategy))
                        )
                        break
                    except TimeoutException:
                        continue
                
                if not dropdown:
                    logging.error(f"Tidak dapat menemukan dropdown {label}")
                    return False
                
                # Scroll ke dropdown
                driver.execute_script(
                    "arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});",
                    dropdown
                )
                time.sleep(1)
                
                # Klik dropdown
                driver.execute_script("arguments[0].click();", dropdown)
                time.sleep(2)
                
                # Cari input pencarian
                search_input = wait.until(
                    EC.presence_of_element_located((By.XPATH, "//input[contains(@placeholder, 'Cari')]"))
                )
                
                # Masukkan teks pencarian
                search_input.clear()
                search_input.send_keys(value)
                time.sleep(1.5)
                
                # Cari dan klik opsi
                option = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.XPATH, f"//span[contains(@class, 'legion-select__item') and contains(text(), '{value}')]"))
                )
                driver.execute_script("arguments[0].click();", option)
                time.sleep(2)
                
                return True
            
            except Exception as e:
                logging.error(f"Error memilih {label} - {value}: {e}")
                logging.error(traceback.format_exc())
                return False

        # Klik tombol detail
        div_lokasi = wait.until(EC.presence_of_element_located(
            (By.XPATH, "//h4[contains(text(), 'Lokasi Pangkalan LPG 3 Kg')]/ancestor::div[contains(@class, 'myptm-Paper-root')]")
        ))
        tombol_detail = div_lokasi.find_element(By.TAG_NAME, "button")
        driver.execute_script(
            "arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});",
            tombol_detail
        )
        time.sleep(1)
        tombol_detail.click()
        logging.info("Tombol 'Lokasi Pangkalan LPG 3 Kg' diklik.")
        time.sleep(2)

        # Lokasi yang ingin di-scrape
        locations = [
            {"Provinsi": "JAWA BARAT", "Kota": "KOTA BANDUNG", "Kecamatan": "COBLONG", "Kelurahan": "DAGO"}
        ]

        all_merchants = []
        for loc in locations:
            # Pilih dropdown berurutan
            dropdowns = [
                ("Provinsi", loc["Provinsi"]),
                ("Kab/Kota", loc["Kota"]),
                ("Kecamatan", loc["Kecamatan"]),
                ("Kelurahan", loc["Kelurahan"])
            ]

            success = True
            for label, value in dropdowns:
                if not select_dropdown_with_text(driver, label, value):
                    success = False
                    break
                time.sleep(2)

            if not success:
                logging.warning("Gagal memilih lokasi!")
                continue

            time.sleep(3)
            
            # Gunakan fungsi scroll_and_count_merchants dari Skrip 2
            merchant_elements = scroll_and_count_merchants(driver, wait)
            
            logging.info(f"Jumlah merchant ditemukan: {len(merchant_elements)}")
            
            for merchant in merchant_elements:
                try:
                    paragraphs = merchant.find_elements(By.CLASS_NAME, "m_b6d8b162")
                    if len(paragraphs) >= 3:
                        id_merchant = paragraphs[0].text.strip()
                        
                        if id_merchant in unique_merchant_ids:
                            logging.info(f"Skip merchant duplikat: {id_merchant}")
                            continue
                            
                        nama_pemilik = paragraphs[1].text.strip() 
                        alamat = paragraphs[2].text.strip()
                        
                        # Random delay sebelum ekstrak koordinat
                        time.sleep(random.uniform(1, 3))
                        
                        # Dapatkan koordinat
                        koordinat = extract_merchant_coordinates(driver, merchant)
                        
                        merchant_data = {
                            "Provinsi": loc["Provinsi"],
                            "Kota": loc["Kota"],
                            "Kecamatan": loc["Kecamatan"], 
                            "Kelurahan": loc["Kelurahan"],
                            "ID Merchant": id_merchant,
                            "Nama Pemilik": nama_pemilik,
                            "Alamat": alamat,
                            "Koordinat": koordinat
                        }
                        
                        unique_merchant_ids.add(id_merchant)
                        logging.info(f"Berhasil scrape merchant: {id_merchant} - {nama_pemilik} - Koordinat: {koordinat}")
                        all_merchants.append(merchant_data)
                        
                except Exception as e:
                    logging.error(f"Error ekstraksi data merchant: {e}")
                    logging.error(traceback.format_exc())
                    continue

        # Simpan ke CSV dengan timestamp
        if all_merchants:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"lpg_merchants_{timestamp}.csv"
            
            df = pd.DataFrame(all_merchants)
            df.to_csv(filename, index=False, encoding='utf-8-sig')
            
            logging.info(f"Berhasil menyimpan {len(all_merchants)} merchant ke {filename}")
            
            print("\n=== Summary Hasil Scraping ===")
            print(f"Total merchant: {len(all_merchants)}")
            print(f"File output: {filename}")
            print("\nSample data:")
            print(df.head().to_string())
            
        else:
            logging.warning("Tidak ada merchant yang ditemukan.")

    except Exception as e:
        logging.error("Error utama:")
        logging.error(traceback.format_exc())
    
    finally:
        driver.quit()

if __name__ == "__main__":
    scrape_lpg_locations()

In [None]:
import os
import pandas as pd
import logging
import traceback
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException, WebDriverException, NoSuchElementException
import time
import random
import re

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s: %(message)s',
    handlers=[
        logging.FileHandler('scraping_debug.log', mode='w'),
        logging.StreamHandler()
    ]
)

def parse_locations_from_csv(csv_path):
    """Membaca lokasi dari file CSV dengan separator semicolon."""
    try:
        df = pd.read_csv(csv_path, sep=';')
        df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
        df.columns = df.columns.str.strip()
        
        required_columns = ['Provinsi', 'Kab/Kota', 'Kecamatan', 'Kelurahan']
        missing_columns = [col for col in required_columns if col not in df.columns]
        
        if missing_columns:
            logging.error(f"Kolom yang tidak ditemukan: {missing_columns}")
            return []
            
        locations = []
        for _, row in df.iterrows():
            location = {
                "Provinsi": str(row['Provinsi']).strip(),
                "Kota": str(row['Kab/Kota']).strip(),
                "Kecamatan": str(row['Kecamatan']).strip(),
                "Kelurahan": str(row['Kelurahan']).strip()
            }
            locations.append(location)
            
        logging.info(f"Berhasil membaca {len(locations)} lokasi dari {csv_path}")
        return locations
        
    except Exception as e:
        logging.error(f"Error membaca file CSV: {e}")
        logging.error(traceback.format_exc())
        return []

def select_dropdown_with_text(driver, label, value):
    """Memilih nilai dari dropdown."""
    try:
        logging.info(f"Memilih {label}: {value}")
        try:
            wait = WebDriverWait(driver, 10)
            wait.until(EC.invisibility_of_element_located(
                (By.XPATH, "//div[contains(@class, 'loading-overlay')]")
            ))
        except:
            pass
        
        dropdown_strategies = [
            f"//div[@aria-label='{label}']",
            f"//div[contains(@class, 'legion-select') and contains(text(), '{label}')]",
            f"//div[contains(@class, 'legion-select__searchbar')]"
        ]
        
        dropdown = None
        wait = WebDriverWait(driver, 10)
        for strategy in dropdown_strategies:
            try:
                dropdown = wait.until(
                    EC.element_to_be_clickable((By.XPATH, strategy))
                )
                break
            except TimeoutException:
                continue
        
        if not dropdown:
            logging.error(f"Tidak dapat menemukan dropdown {label}")
            return False
            
        driver.execute_script(
            "arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});",
            dropdown
        )
        time.sleep(1)
        
        driver.execute_script("arguments[0].click();", dropdown)
        time.sleep(2)
        
        search_input = wait.until(
            EC.presence_of_element_located((By.XPATH, "//input[contains(@placeholder, 'Cari')]"))
        )
        
        search_input.clear()
        search_input.send_keys(value)
        time.sleep(1.5)
        
        option = wait.until(
            EC.element_to_be_clickable((By.XPATH, f"//span[contains(@class, 'legion-select__item') and contains(text(), '{value}')]"))
        )
        driver.execute_script("arguments[0].click();", option)
        time.sleep(1)
        
        return True
        
    except Exception as e:
        logging.error(f"Error memilih {label} - {value}: {e}")
        logging.error(traceback.format_exc())
        return False

def scroll_and_count_merchants(driver, wait):
    """Scroll dan hitung jumlah merchant."""
    try:
        container_strategies = [
            "//div[contains(@class, 'myptm-Stack-root')]//div[contains(@class, 'merchant-list')]",
            "//div[contains(@class, 'merchant-container')]",
            "//div[contains(@class, 'myptm-Stack-root')]"
        ]
        
        scroll_container = None
        for strategy in container_strategies:
            try:
                scroll_container = wait.until(
                    EC.presence_of_element_located((By.XPATH, strategy))
                )
                break
            except TimeoutException:
                continue
        
        if not scroll_container:
            logging.error("Tidak dapat menemukan container merchant")
            return []
            
        last_count = 0
        same_count_iterations = 0
        max_same_count = 3
        scroll_pause_time = 2
        
        while same_count_iterations < max_same_count:
            driver.execute_script(
                "arguments[0].scrollTo({top: arguments[0].scrollHeight, behavior: 'smooth'});",
                scroll_container
            )
            
            driver.execute_script(
                "window.scrollTo({top: document.body.scrollHeight, behavior: 'smooth'});"
            )
            
            time.sleep(scroll_pause_time)
            
            merchants = driver.find_elements(
                By.XPATH,
                """
                //*[contains(@class, 'merchant-card') or 
                   contains(@class, 'myptm-Card-root') or 
                   contains(@class, 'm_4081bf90')]
                """
            )
            
            current_count = len(merchants)
            logging.info(f"Jumlah merchant terdeteksi: {current_count}")
            
            if current_count == last_count:
                same_count_iterations += 1
            else:
                same_count_iterations = 0
                
            last_count = current_count
            
            if merchants:
                last_merchant = merchants[-1]
                driver.execute_script(
                    "arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});",
                    last_merchant
                )
                time.sleep(1)
        
        return merchants
            
    except Exception as e:
        logging.error(f"Error saat scroll dan hitung merchant: {str(e)}")
        return []

def extract_merchant_coordinates(driver, merchant):
    """
    Ekstrak koordinat dari merchant dengan penanganan error yang lebih baik.
    """
    try:
        original_window = driver.current_window_handle
        merchant_id = None
        
        # 1. Perbaikan pencarian merchant ID
        id_elements = merchant.find_elements(By.CLASS_NAME, "m_b6d8b162")
        for element in id_elements:
            if re.match(r'^\d{15}$', element.text.strip()):
                merchant_id = element.text.strip()
                break
                
        if not merchant_id:
            logging.warning("ID merchant tidak ditemukan")
            return None
            
        # 2. Perbaikan pencarian tombol rute dengan multiple strategies
        route_button = None
        button_strategies = [
            # Strategy 1: Tombol dalam container merchant dengan text "Rute"
            ".//button[.//span[contains(text(), 'Rute')] and ./ancestor::div[contains(@class, 'm_4081bf90')]]",
            # Strategy 2: Tombol dengan class tertentu yang mengandung "Rute"
            ".//button[contains(@class, 'myptm-Button-root') and .//span[contains(text(), 'Rute')]]",
            # Strategy 3: Span dengan text "Rute" dalam button
            ".//span[contains(text(), 'Rute')]/ancestor::button",
            # Strategy 4: Div yang mengandung text "Rute"
            ".//div[contains(text(), 'Rute')]/ancestor::button"
        ]
        
        for strategy in button_strategies:
            try:
                route_button = merchant.find_element(By.XPATH, strategy)
                if route_button and route_button.is_displayed():
                    break
            except NoSuchElementException:
                continue
                
        if not route_button:
            logging.warning(f"Tombol rute tidak ditemukan untuk merchant {merchant_id}")
            return None
            
        # 3. Perbaikan proses klik dan ekstraksi URL
        max_attempts = 3
        for attempt in range(max_attempts):
            try:
                # Scroll ke tombol dengan margin
                driver.execute_script(
                    "arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});",
                    route_button
                )
                time.sleep(2)
                
                # Klik dengan JavaScript dan native click sebagai backup
                try:
                    driver.execute_script("arguments[0].click();", route_button)
                except:
                    route_button.click()
                    
                # Tunggu window baru dengan timeout yang lebih lama
                try:
                    WebDriverWait(driver, 10).until(lambda d: len(d.window_handles) > 1)
                except TimeoutException:
                    if attempt == max_attempts - 1:
                        logging.warning(f"Timeout final menunggu Maps untuk merchant {merchant_id}")
                        return None
                    continue
                    
                # 4. Perbaikan ekstraksi koordinat dari URL
                maps_window = [w for w in driver.window_handles if w != original_window][0]
                driver.switch_to.window(maps_window)
                
                # Tunggu URL Maps dengan multiple checks
                try:
                    def url_contains_maps(driver):
                        current_url = driver.current_url
                        return any(domain in current_url.lower() for domain in [
                            'google.com/maps', 
                            'goo.gl/maps',
                            'maps.google'
                        ])
                    
                    WebDriverWait(driver, 10).until(url_contains_maps)
                except TimeoutException:
                    driver.close()
                    driver.switch_to.window(original_window)
                    if attempt == max_attempts - 1:
                        logging.warning(f"Timeout final menunggu URL Maps untuk {merchant_id}")
                        return None
                    continue
                    
                maps_url = driver.current_url
                logging.info(f"URL Maps: {maps_url}")
                
                # 5. Pattern matching yang lebih komprehensif
                coord_patterns = [
                    r'@([-\d.]+),([-\d.]+)',  # Format @lat,lng
                    r'destination=([-\d.]+),([-\d.]+)',  # Format destination=lat,lng
                    r'll=([-\d.]+),([-\d.]+)',  # Format ll=lat,lng
                    r'daddr=([-\d.]+),([-\d.]+)',  # Format daddr=lat,lng
                    r'place/([-\d.]+),([-\d.]+)',  # Format place/lat,lng
                    r'dir/([-\d.]+),([-\d.]+)',  # Format dir/lat,lng
                    r'data=!3d([-\d.]+)!4d([-\d.]+)',  # Format data=!3dlat!4dlng
                    r'q=([-\d.]+),([-\d.]+)'  # Format q=lat,lng
                ]
                
                koordinat = None
                for pattern in coord_patterns:
                    match = re.search(pattern, maps_url)
                    if match:
                        lat, lng = match.group(1), match.group(2)
                        try:
                            lat, lng = float(lat), float(lng)
                            # Validasi koordinat untuk Indonesia
                            if -11 <= lat <= 6 and 95 <= lng <= 141:
                                koordinat = f"{lat:.6f}, {lng:.6f}"
                                break
                        except ValueError:
                            continue
                
                # 6. Backup method: coba ambil dari element di halaman
                if not koordinat:
                    try:
                        time.sleep(1)
                        location_elements = driver.find_elements(
                            By.XPATH,
                            "//*[contains(@aria-label, '°') or contains(@data-item-id, 'location')]"
                        )
                        for element in location_elements:
                            location_text = element.get_attribute('aria-label') or element.text
                            coord_match = re.search(r'([-\d.]+)°[NS],\s*([-\d.]+)°[EW]', location_text)
                            if coord_match:
                                lat, lng = float(coord_match.group(1)), float(coord_match.group(2))
                                if -11 <= lat <= 6 and 95 <= lng <= 141:
                                    koordinat = f"{lat:.6f}, {lng:.6f}"
                                    break
                    except Exception as e:
                        logging.warning(f"Gagal ekstrak koordinat dari element: {e}")
                
                # Cleanup dan return
                driver.close()
                driver.switch_to.window(original_window)
                time.sleep(1)
                
                if koordinat:
                    logging.info(f"Koordinat ditemukan untuk merchant {merchant_id}: {koordinat}")
                    return koordinat
                    
                if attempt == max_attempts - 1:
                    logging.warning(f"Tidak dapat menemukan koordinat valid untuk {merchant_id}")
                    return None
                    
            except Exception as e:
                logging.error(f"Error pada attempt {attempt + 1}: {e}")
                if attempt == max_attempts - 1:
                    logging.error(traceback.format_exc())
                try:
                    if len(driver.window_handles) > 1:
                        driver.close()
                        driver.switch_to.window(original_window)
                except:
                    pass
                    
    except Exception as e:
        logging.error(f"Error utama ekstraksi koordinat: {e}")
        logging.error(traceback.format_exc())
        try:
            if len(driver.window_handles) > 1:
                driver.close()
                driver.switch_to.window(original_window)
        except:
            pass
            
    return None

def save_progress(merchants, batch_num):
    """Simpan progress sementara."""
    if merchants:
        timestamp = time.strftime("%Y%m%d_%H%M%S")
        filename = f"lpg_merchants_batch_{batch_num}_{timestamp}.csv"
        df = pd.DataFrame(merchants)
        df.to_csv(filename, index=False, encoding='utf-8-sig')
        logging.info(f"Progress tersimpan: {filename}")

def save_final_results(merchants, failed_locations):
    """Simpan hasil akhir dan lokasi yang gagal."""
    timestamp = time.strftime("%Y%m%d_%H%M%S")
    
    if merchants:
        filename = f"lpg_merchants_final_{timestamp}.csv"
        df = pd.DataFrame(merchants)
        df.to_csv(filename, index=False, encoding='utf-8-sig')
        logging.info(f"\nBerhasil menyimpan {len(merchants)} merchant ke {filename}")
    
    if failed_locations:
        failed_filename = f"failed_locations_{timestamp}.csv"
        failed_df = pd.DataFrame(failed_locations)
        failed_df.to_csv(failed_filename, index=False, encoding='utf-8-sig')
        logging.info(f"Lokasi yang gagal tersimpan di: {failed_filename}")

def scrape_lpg_locations(locations):
    """Fungsi utama untuk scraping data LPG."""
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_argument('--disable-extensions')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--window-size=1920,1080')
    options.add_argument('--disable-gpu')
    options.add_argument('--remote-debugging-port=9222')
    
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 10)
    
    unique_merchant_ids = set()
    all_merchants = []
    failed_locations = []
    
    try:
        driver.get("https://subsiditepatlpg.mypertamina.id/infolpg3kg")
        logging.info("Halaman berhasil dimuat")
        
        div_lokasi = wait.until(EC.presence_of_element_located(
            (By.XPATH, "//h4[contains(text(), 'Lokasi Pangkalan LPG 3 Kg')]/ancestor::div[contains(@class, 'myptm-Paper-root')]")
        ))
        tombol_detail = div_lokasi.find_element(By.TAG_NAME, "button")
        driver.execute_script(
            "arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});",
            tombol_detail
        )
        time.sleep(1)
        tombol_detail.click()
        logging.info("Tombol 'Lokasi Pangkalan LPG 3 Kg' diklik.")
        time.sleep(1)

        total_locations = len(locations)
        for idx, loc in enumerate(locations, 1):
            try:
                logging.info(f"\nMemproses lokasi {idx}/{total_locations}:")
                logging.info(f"Provinsi: {loc['Provinsi']}")
                logging.info(f"Kota: {loc['Kota']}")
                logging.info(f"Kecamatan: {loc['Kecamatan']}")
                logging.info(f"Kelurahan: {loc['Kelurahan']}")

                dropdowns = [
                    ("Provinsi", loc["Provinsi"]),
                    ("Kab/Kota", loc["Kota"]),
                    ("Kecamatan", loc["Kecamatan"]),
                    ("Kelurahan", loc["Kelurahan"])
                ]

                success = True
                for label, value in dropdowns:
                    if not select_dropdown_with_text(driver, label, value):
                        success = False
                        break
                    time.sleep(1)

                if not success:
                    logging.warning(f"Gagal memilih lokasi: {loc}")
                    failed_locations.append(loc)
                    continue

                time.sleep(1)
                
                merchant_elements = scroll_and_count_merchants(driver, wait)
                logging.info(f"Jumlah merchant ditemukan: {len(merchant_elements)}")
                
                for merchant in merchant_elements:
                    try:
                        paragraphs = merchant.find_elements(By.CLASS_NAME, "m_b6d8b162")
                        if len(paragraphs) >= 3:
                            id_merchant = paragraphs[0].text.strip()
                            
                            if id_merchant in unique_merchant_ids:
                                logging.info(f"Skip merchant duplikat: {id_merchant}")
                                continue
                                
                            nama_pemilik = paragraphs[1].text.strip() 
                            alamat = paragraphs[2].text.strip()
                            
                            time.sleep(random.uniform(1, 3))
                            
                            koordinat = extract_merchant_coordinates(driver, merchant)
                            
                            merchant_data = {
                                "Provinsi": loc["Provinsi"],
                                "Kota": loc["Kota"],
                                "Kecamatan": loc["Kecamatan"], 
                                "Kelurahan": loc["Kelurahan"],
                                "ID Merchant": id_merchant,
                                "Nama Pemilik": nama_pemilik,
                                "Alamat": alamat,
                                "Koordinat": koordinat
                            }
                            
                            unique_merchant_ids.add(id_merchant)
                            logging.info(f"Berhasil scrape merchant: {id_merchant}")
                            all_merchants.append(merchant_data)
                            
                    except Exception as e:
                        logging.error(f"Error ekstraksi data merchant: {e}")
                        continue

               # Lanjutan dari fungsi scrape_lpg_locations()
                if idx % 5 == 0:
                    save_progress(all_merchants, idx // 5)
            
            except Exception as e:
                logging.error(f"Error memproses lokasi {loc}: {e}")
                logging.error(traceback.format_exc())
                failed_locations.append(loc)
                continue

        # Simpan hasil akhir
        save_final_results(all_merchants, failed_locations)
        
        # Print summary
        print("\n=== Summary Hasil Scraping ===")
        print(f"Total lokasi diproses: {total_locations}")
        print(f"Total lokasi gagal: {len(failed_locations)}")
        print(f"Total merchant berhasil: {len(all_merchants)}")
        print(f"Total merchant unik: {len(unique_merchant_ids)}")
        
        if all_merchants:
            print("\nSample data:")
            df = pd.DataFrame(all_merchants)
            print(df.head().to_string())
            
    except Exception as e:
        logging.error("Error utama:")
        logging.error(traceback.format_exc())
    
    finally:
        driver.quit()

if __name__ == "__main__":
    # Verifikasi file input
    csv_path = 'loc_lpg_tangsel.csv'
    if not os.path.exists(csv_path):
        print(f"File tidak ditemukan: {csv_path}")
        exit(1)
    
    # Setup logging
    logging.info("="*50)
    logging.info("STARTING LPG LOCATION SCRAPER")
    logging.info("="*50)
    
    # Baca dan verifikasi lokasi
    locations = parse_locations_from_csv(csv_path)
    
    if locations:
        print(f"\nDitemukan {len(locations)} lokasi untuk di-scrape")
        print("\nContoh 3 lokasi pertama:")
        for i, loc in enumerate(locations[:3], 1):
            print(f"\nLokasi {i}:")
            print(f"Provinsi: {loc['Provinsi']}")
            print(f"Kota: {loc['Kota']}")
            print(f"Kecamatan: {loc['Kecamatan']}")
            print(f"Kelurahan: {loc['Kelurahan']}")
            
        proceed = input("\nLanjutkan dengan scraping? (y/n): ")
        if proceed.lower() == 'y':
            start_time = time.time()
            scrape_lpg_locations(locations)
            end_time = time.time()
            duration = end_time - start_time
            logging.info(f"\nTotal waktu eksekusi: {duration/60:.2f} menit")
        else:
            logging.info("Scraping dibatalkan oleh user")
    else:
        logging.error("\nGagal membaca lokasi dari CSV. Pastikan format CSV sesuai:")
        logging.error("No;Provinsi;Kab/Kota;Kecamatan;Kelurahan")