In [1]:
import os
import re
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests
import random


In [3]:
# 设置Selenium
def setup_browser():
    options = webdriver.ChromeOptions()
    options.add_experimental_option("debuggerAddress", "localhost:9999")
    # 这里可以根据需要添加更多的浏览器选项
    driver = webdriver.Chrome(options=options)
    driver.implicitly_wait(10)  # 推荐使用显式等待代替全局的隐式等待
    return driver

# 辅助函数
def retry_if_no_return(func):
    def wrapper(*args, **kwargs):
        retry_times = 5
        while retry_times > 0:
            try:
                result = func(*args, **kwargs)
                if result is not None:
                    return result
                time.sleep(2)
            except Exception as e:
                print(f"Error in {func.__name__}: {e}")
                if retry_times == 1:
                    raise
            retry_times -= 1
        print(f"Function {func.__name__} exceeded retry limit without success.")
        return None
    return wrapper

def wait_for_element(driver, locator, timeout=10):
    try:
        element = WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located(locator)
        )
        return element
    except Exception as e:
        print(f"Waiting for element timed out: {e}")
        return None

def click_element(driver, locator, timeout=10):
    try:
        element = wait_for_element(driver, locator, timeout)
        if element:
            element.click()
            return True
    except Exception as e:
        print(f"Clicking on element failed: {e}")
    return False


def get_detail_page_urls(driver):
    rows = driver.find_elements(By.CSS_SELECTOR, "tr.ant-table-row")
    detail_urls = []
    for row in rows:
        detail_button = row.find_element(By.CSS_SELECTOR, "button.ant-btn.ant-btn-link.ant-btn-lg")
        detail_button.click()
        # 假设详情页面的URL在新打开的页面或某个元素的属性中
        detail_url = driver.current_url  # 或根据实际情况修改获取URL的方式
        detail_urls.append(detail_url)
        driver.back()  # 返回列表页面
    return detail_urls

def process_detail_page(driver, detail_url):
    driver.get(detail_url)
    # 在这里添加处理详情页面的逻辑，例如提取数据、下载图片等
    # 示例：提取样本编码
    sample_code = driver.find_element(By.CSS_SELECTOR, ".sample-code-selector").text
    print(f"处理样本编码: {sample_code}")

# 由于篇幅限制，图片下载和其他特定逻辑将在下一部分继续。
    

def download_image(image_url, save_path):
    """下载图片，并保存到指定路径"""
    try:
        response = requests.get(image_url, stream=True)
        with open(save_path, 'wb') as file:
            for chunk in response.iter_content(chunk_size=128):
                file.write(chunk)
        print(f"Image downloaded: {save_path}")
    except requests.RequestException as e:
        print(f"Error downloading image {image_url}: {e}")

def extract_and_download_images(driver, base_path):
    """从当前详情页提取图片并下载"""
    # 假设有多种类型的图片需要下载，每种类型的图片都存放在不同的容器中
    image_types = ['景观照片', '混样点照片', '技术领队现场工作照片']
    for image_type in image_types:
        images = driver.find_elements(By.CSS_SELECTOR, f"img[data-type='{image_type}']")
        for index, image in enumerate(images):
            image_url = image.get_attribute('src')
            save_path = os.path.join(base_path, f"{image_type}_{index + 1}.jpg")
            download_image(image_url, save_path)

def process_pages(driver, num_pages):
    """处理指定数量的页面"""
    for _ in range(num_pages):
        detail_urls = get_detail_page_urls(driver)
        for detail_url in detail_urls:
            driver.get(detail_url)
            # 获取当前页面的样点编码，并创建文件夹
            sample_code = driver.find_element(By.CSS_SELECTOR, '.sample-code-selector').text
            base_path = os.path.join("path/to/save/images", sample_code)
            os.makedirs(base_path, exist_ok=True)
            # 提取并下载所有相关图片
            extract_and_download_images(driver, base_path)
            # 返回到列表页面（如果有必要）
            driver.back()
        # 跳转到下一个列表页面（如果有必要）
        click_element(driver, (By.CSS_SELECTOR, "li.ant-pagination-next"), 10)



In [None]:
def main():
    driver = setup_browser()
    num_pages = 5  # 假设要处理的页面数量
    process_pages(driver, base_url, num_pages)
    driver.quit()

if __name__ == "__main__":
    main()
