In [None]:
import os
import re
import time
import random
import requests
import json
import datetime
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from bs4 import BeautifulSoup


In [None]:
### 辅助函数
# 装饰器，如果函数没有返回值，则一直重试
def retry_if_no_return(func):
    def wrapper(*args, **kwargs):
        # 定义重试次数
        retry_times = 5
        while retry_times>=0:
            result = func(*args, **kwargs)
            if result:
                return result
            # 如果函数没有返回值，可以在这里添加日志或等待时间
            time.sleep(2)
            retry_times -= 1
    return wrapper
# 获取主页详情页链接
@retry_if_no_return
def get_detail_page_url(driver):
    """获取详情页链接"""
   # 定位包含详情按钮的所有<tr>元素
    rows = driver.find_elements(By.CSS_SELECTOR, "tr.ant-table-row")
    return rows
# 跳转到下一页
def step_next_page(driver):
    """跳转下一页"""
    # 使用类名和属性选择器
    next_page_button = driver.find_element(By.CSS_SELECTOR, "li.ant-pagination-next[title='下一页']")
    next_page_button.click()
    # # 使用XPath
    # next_page_button = driver.find_element(By.XPATH, "//li[@class='ant-pagination-next' and @title='下一页']")
    # next_page_button.click()
    # # 使用类名
    # next_page_button = driver.find_element(By.CLASS_NAME, "ant-pagination-next")
    # next_page_button.click()

# 从详情页返回到主页
def step_back_to_home(driver):
    button = driver.find_element(By.XPATH,"//button[span='返回']")
    button.click()

# # 下拉选择对应元素图片
# def select_element_image(driver, element_name):
#     body = driver.find_element(By.TAG_NAME, 'body')
#     body.click()
#     body.click()
#     # 定位
#     span_element = driver.find_element(By.XPATH, "//div[@class='ant-select select-width ant-select-single ant-select-allow-clear ant-select-show-arrow']")
#     span_element.click()
#     # 选择对应标签
#     option = driver.find_element(By.XPATH,f'//div[@title="{element_name}"]')
#     option.click()
#     # 点击清空
#     body = driver.find_element(By.TAG_NAME, 'body')
#     body.click()
#     body.click()

# 下拉选择对应元素图片
def select_element_image(driver, element_name):
    # 等待页面的body元素加载完成
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, 'body')))
    body = driver.find_element(By.TAG_NAME, 'body')
    # body.click()
    # body.click()

    # driver.find_element(By.XPATH,"(//span[@class='ant-select-selection-item'])[2]").click()
    # driver.find_element(By.XPATH,"(//div[text()='土壤混合样品采集照片'])[2]").click()
    # 等待下拉选择框元素加载并点击
    span_element_locator = (By.XPATH, "//div[@class='ant-select select-width ant-select-single ant-select-allow-clear ant-select-show-arrow']")
    WebDriverWait(driver, 10).until(EC.element_to_be_clickable(span_element_locator))
    span_element = driver.find_element(*span_element_locator)
    span_element.click()

    # 等待并选择对应的选项
    option_locator = (By.XPATH, f'//div[@title="{element_name}"]')
    WebDriverWait(driver, 10).until(EC.element_to_be_clickable(option_locator))
    option = driver.find_element(*option_locator)
    option.click()

    # 为了避免可能的遮挡或其他UI问题，最好在点击操作后等待一小段时间
    time.sleep(1)

    # 再次点击body以确保下拉菜单收起，如果有必要的话
    body.click()
    body.click()

# 匹配链接中的图片名称
def get_image_name(url):
    pattern = r'(\w+-\w+-\w+-\w+-\w+\.jpg)'
    image_name = re.findall(pattern, url)
    return image_name[0]
# 遍历url列表下载图片
def download_image(url,save_path_name):
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'}
    try:
        r = requests.get(url, headers=headers)
        if r.status_code == 200:
            with open(save_path_name, 'wb') as f:
                f.write(r.content)
                return True
    except:
        return False
    time.sleep(random.random())
# 获取景观照片链接
@retry_if_no_return
def get_view_picture_url(driver):
    html_source = driver.page_source
    pattern = re.compile(r'<div class="ant-image"[^>]*><img[^>]+src="([^"]+)"[^>]*>.*?<div class="img-text"[^>]*>([^<]+)</div>', re.DOTALL)
    matches = pattern.findall(html_source)
    result = [(_[0],f"{_[1]}_{index+1}") for index,_ in enumerate(matches)]
    return result
# 获取页面所有图片链接JS
@retry_if_no_return
def get_all_picture_url(driver):
    """以JS方式获取"""
    images_info = driver.execute_script("""
    var images = [];
    var imageElements = document.querySelectorAll('div.card-box div.ant-image img');
    imageElements.forEach(function(img) {
        var labelElement = img.nextElementSibling;
        var label = labelElement ? labelElement.innerText : '';
        images.push({'url': img.src, 'label': label});
    });
    return images;
""")
    return images_info    
# 获取页面所有图片链接By
@retry_if_no_return
def get_page_all_img_src(driver):
    """以By方式获取"""
    # 提取所有图片的src属性，即它们的链接
    image_elements = driver.find_elements(By.TAG_NAME, "img")
    image_urls = [image.get_attribute('src') for image in image_elements]
    return image_urls

# 获取混样点图片链接
@retry_if_no_return
def get_hy_picture_url(all_list,jg_list):
    """参数说明
    all_list:页面所有图片链接，用get_all_picture_url获取
    jg_list:所有景观点图片链接，用get_vie_picture_url获取"""
    jg_name_list = [get_image_name(jg_path[0]) for jg_path in jg_list]
    result_list = []
    index = 1
    for i in range(len(all_list)):
        pic_name = get_image_name(all_list[i]['url'])
        if pic_name in jg_name_list:
            pass
        else:
            result_list.append((all_list[i]['url'],index))
            index +=1
    return result_list
# 获取滚动窗口的图片链接
@retry_if_no_return
def get_scroll_picture_url(driver,view_url_list,mixed_url_list,element_name='技术领队现场工作照片'):
    """参数说明
    driver: 浏览器驱动
    all_list: 全部图片列表
    view_url_list: 景观名称列表
    mixed_url_list: 混样名称列表
    element_name: 切换名称"""
    # 对照列表
    view_list = [get_image_name(view_path[0]) for view_path in view_url_list]
    mixed_list = [get_image_name(mixed_path[0]) for mixed_path in mixed_url_list]
    # 切换到指定标签
    select_element_image(driver,element_name)
    time.sleep(1)
    all_list = get_all_picture_url(driver)
    scroll_list = []
    index = 1
    for one_pic in range(len(all_list)):
        pic_name = get_image_name(all_list[one_pic]['url'])
        if pic_name in (view_list+mixed_list):
            pass
        else:
            scroll_list.append((all_list[one_pic]['url'],index))
            index +=1
    return scroll_list

# 获取当前详情页面样点编码
@retry_if_no_return
def get_code_number(driver):
    """获取当前页面的样点编码"""
    code = driver.find_element(By.CSS_SELECTOR,'.address-logo-text')
    if code.text:
        return code.text 
    else:
        return None
    
# 得到规范化的jpg链接
def get_image_url(url):
    """得到规范化的jpg链接"""
    # pattern = r"https://sanpu.iarrp.+?\.jpg"
    # 使用re.search来查找匹配项
    pattern = r"https://sanpu.iarrp.+?\&"
    # 使用re.search来查找匹配项
    match = re.search(pattern, url)
    # 如果找到匹配项，则打印出来
    if match:
        return match.group()
    else:
        return url
    
def create_save_path(folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    else:
        print("目录已存在")

In [None]:
# 配置浏览器
# chrome.exe --remote-debugging-port=9999 --user-data-dir="D:\Selenium\AutomationProfile"
# chrome.exe --remote-debugging-port=9999 --user-data-dir="D:\Program Files\ChromeDir"
options = webdriver.ChromeOptions()
options.add_experimental_option("debuggerAddress", "localhost:9998")
driver = webdriver.Chrome(options=options)

driver.implicitly_wait(60)

In [None]:
# 图片保存路径
img_save_path = r"F:\collection_spb_img\李相楹"

In [None]:
# 遍历每一页
for one_page in range(43):
    # 遍历每个<tr>元素
    # 定位包含详情按钮的所有<tr>元素
    rows = get_detail_page_url(driver)
    for row in rows:
        while True:
            try:
                # 在每个<tr>元素中定位详情按钮
                detail_button = row.find_element(By.CSS_SELECTOR, "button.ant-btn.ant-btn-link.ant-btn-lg")
                # 点击详情按钮
                detail_button.click()
                time.sleep(5)
                # 获取当前页面的样点编码
                current_code_number = get_code_number(driver)
                print(current_code_number)
                # 获取所有图片链接
                all_picture_url = get_all_picture_url(driver)
                # 获取景观照片链接
                view_picture_url = get_view_picture_url(driver)
                # 获取混样点照片链接
                mixed_picture_url = get_hy_picture_url(all_picture_url,view_picture_url)
                # 获取技术领队照片链接
                # leader_picture_url = get_scroll_picture_url(driver,view_picture_url,mixed_picture_url,'技术领队现场工作照片')
                # time.sleep(2)
                # 获取土壤混合采集照片链接
                soil_picture_url = get_scroll_picture_url(driver,view_picture_url,mixed_picture_url,'土壤混合样品采集照片')
                # time.sleep(2)
                # 获取土壤容重样品采集照片链接
                # soil_weight_picture_url = get_scroll_picture_url(driver,view_picture_url,mixed_picture_url,'土壤容重样品采集照片')
                # time.sleep(2)
                # 图片链接字典
                # picture_dict = {"景观照片":view_picture_url,"混样点照片":mixed_picture_url,
                #                 "土壤混合采集照片":soil_picture_url,"土壤容重采集照片":soil_weight_picture_url,
                #                 "技术领队照片":leader_picture_url,}
                picture_dict = {"景观照片":view_picture_url,"混样点照片":mixed_picture_url,
                                "土壤混合采集照片":soil_picture_url
                                }

                # 下载图片
                # 创建对应文件夹
                root_path = os.path.join(img_save_path,current_code_number)
                create_save_path(root_path)
                for one_type in picture_dict:
                    for one_item in picture_dict[one_type]:
                        url = get_image_url(one_item[0])
                        index = one_type+'_'+str(one_item[1])
                        img_path = os.path.join(root_path,f"{index}.jpg")
                        if os.path.exists(img_path):
                            print(f"已存在{current_code_number}_{index}.jpg")
                        else:
                            download_image(url,img_path)
                            print(f"已下载{current_code_number}_{index}.jpg")
                # 从详情页返回
                step_back_to_home(driver)
                break
            except:
                print(f"当前页面{current_code_number}没有详情按钮")
                # 从详情页返回
                step_back_to_home(driver)
    step_next_page(driver)
    time.sleep(15)

In [None]:
def click_info(driver,info_name):

    """点击立地条件调查"""
    driver.find_element(By.XPATH,f"//div[text()='{info_name}']").click()


In [None]:
click_info(driver,'立地条件(新)')

In [None]:

# 执行JavaScript代码来拦截XHR请求
driver.execute_script("""
(function() {
  var originalSend = XMLHttpRequest.prototype.send;
  XMLHttpRequest.prototype.send = function(body) {
    this.addEventListener('load', function() {
      if (!window.xhrData) {
        window.xhrData = [];  // 初始化全局变量来存储捕获的数据
      }
      window.xhrData.push({
        url: this.responseURL,  // 请求的URL
        response: this.responseText  // 响应文本
      });
    });
    originalSend.apply(this, arguments);
  };
})();
""")

# 在这里执行可能会触发XHR请求的操作，例如点击按钮
click_info(driver,'立地条件(新)')
# 等待足够的时间以确保XHR请求完成
time.sleep(2)
driver.implicitly_wait(10)
click_info(driver,'调查采样')
# 等待足够的时间以确保XHR请求完成
time.sleep(2)
driver.implicitly_wait(10)
# 从全局变量中获取捕获的XHR数据
captured_data = driver.execute_script("return window.xhrData;")

# 打印捕获的数据
result_set = set()
result_url = set()
for data in captured_data or []:
    print("URL:", data['url'])
    print("Response:", data['response'])
    result_set.add(str(json.loads(data['response'])['result']))
    result_url.add(data['url'])
driver.execute_script("window.xhrData = [];")
print(len(captured_data))

In [None]:
# 施肥
'https://sanpu.iarrp.cn/api/ssp/dccy/ldtj/sfyl/5227220101000001?'
# 立地条件
'https://sanpu.iarrp.cn/api/ssp/dccy/ldtj/5227220101000001?'
# 调查样点信息
'https://sanpu.iarrp.cn/api/ssp/dcyd/5227220101000001?'
# 媒体信息
'https://sanpu.iarrp.cn/api/ssp/fjwj3/list'
# 采土袋
'https://sanpu.iarrp.cn/api/ssp/dccy/ctd/ydbh/5227220101000001?'

In [None]:
import requests

# 请求的URL
# url = "https://sanpu.iarrp.cn/api/ssp/dcyd/5227220103000086"
url = "https://sanpu.iarrp.cn/api/ssp/dccy/ldtj/5227220103000086?isFullDisplay=1"

# 请求的头部信息
headers = {
    "Accept": "application/json, text/plain, */*",
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Authorization": "eyJhbGciOiJIUzUxMiJ9.eyJ1c2VyX3Rva2VuOiI6InVzZXJfdG9rZW46MGNhYWQ1YzktYzUyZi00NDYwLWFiZDctZjY4ZGQ0NjM1NjEzIn0.ZT__0cxwjzosa-b_WF-nBvC9bof_iRG6-XpzU7bVANYcZVTiLqYb8lZ_N90yibZQUewvrUcFdy9taqdMH0GRyg",
    "Connection": "keep-alive",
    "Cookie": "HWWAFSESID=51dcc73c594961e824; HWWAFSESTIME=1711330630059",
    "Host": "sanpu.iarrp.cn",
    "Referer": "https://sanpu.iarrp.cn/",
    "Sec-Ch-Ua": '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
    "Sec-Ch-Ua-Mobile": "?0",
    "Sec-Ch-Ua-Platform": '"Windows"',
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-origin",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}

# 发送GET请求
response = requests.get(url, headers=headers)

# 检查响应状态码
if response.status_code == 200:
    # 解析响应内容
    data = response.json()
    print(data)
else:
    print("请求失败，状态码：", response.status_code)


In [None]:
# 使用JavaScript获取Authorization头
authorization_script = """
return document.headers;
"""
authorization_header = driver.execute_script(authorization_script)

# 使用JavaScript获取Cookie
cookie_script = """
return document.cookie;
"""
cookie_value = driver.execute_script(cookie_script)

# 打印获取的Authorization头和Cookie值
print("Authorization:", authorization_header)
print("Cookie:", cookie_value)

In [None]:
import requests
import json

# 请求的URL
url = "https://sanpu.iarrp.cn/api/ssp/fjwj3/list"

# 请求的头部信息
headers = {
    "Accept": "application/json, text/plain, */*",
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Authorization": "eyJhbGciOiJIUzUxMiJ9.eyJ1c2VyX3Rva2VuOiI6InVzZXJfdG9rZW46MGNhYWQ1YzktYzUyZi00NDYwLWFiZDctZjY4ZGQ0NjM1NjEzIn0.ZT__0cxwjzosa-b_WF-nBvC9bof_iRG6-XpzU7bVANYcZVTiLqYb8lZ_N90yibZQUewvrUcFdy9taqdMH0GRyg",
    "Connection": "keep-alive",
    "Content-Type": "application/json;charset=UTF-8",
    "Cookie": "HWWAFSESID=51dcc73c594961e824; HWWAFSESTIME=1711330630059",
    "Host": "sanpu.iarrp.cn",
    "Origin": "https://sanpu.iarrp.cn",
    "Referer": "https://sanpu.iarrp.cn/",
    "Sec-Ch-Ua": '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
    "Sec-Ch-Ua-Mobile": "?0",
    "Sec-Ch-Ua-Platform": '"Windows"',
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-origin",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
}

# 请求的JSON数据
# 请根据实际情况替换下面的data字典
data = {
    "glbh": "5227220101000002"
}

# 将字典转换为JSON格式的字符串
payload = json.dumps(data)

# 发送POST请求
response = requests.post(url, headers=headers, data=payload)

# 检查响应状态码
if response.status_code == 200:
    # 解析响应内容
    data = response.json()
    print(data)
else:
    print("请求失败，状态码：", response.status_code)


In [None]:
print(time.time())

In [None]:
for i in result_set:
    print(i)

In [None]:
for i in result_url:
    print(i)

In [None]:
# 注入用于捕获XHR数据的JavaScript代码
driver.execute_script("""
window.captureXHRData = function() {
  if (!window.xhrData) {
    window.xhrData = [];  // 初始化全局变量来存储捕获的数据
  } else {
    window.xhrData = [];  // 点击之前清空已有数据
  }
  var originalSend = XMLHttpRequest.prototype.send;
  XMLHttpRequest.prototype.send = function(body) {
    this.addEventListener('load', function() {
      window.xhrData.push({
        url: this.responseURL,  // 请求的URL
        response: this.responseText  // 响应文本
      });
    });
    originalSend.apply(this, arguments);
  };
};
""")

# 定义一个函数，用于点击按钮并获取对应的XHR数据
def click_button_and_get_xhr(driver):
    # 清空或初始化全局XHR数据存储
    driver.execute_script("window.captureXHRData();")
    
    # 在这里执行可能会触发XHR请求的操作，例如点击按钮
    click_info(driver,'立地条件(新)')
    # 等待足够的时间以确保XHR请求完成
    driver.implicitly_wait(10)
    # 在这里执行可能会触发XHR请求的操作，例如点击按钮
    click_info(driver,'调查采样')
    # 等待足够的时间以确保XHR请求完成
    driver.implicitly_wait(10)
    
    # 等待足够的时间以确保XHR请求完成
    # 注意：这里可能需要根据实际情况调整等待策略
    WebDriverWait(driver, 10).until(lambda d: d.execute_script("return window.xhrData.length > 0;"))
    
    # 获取并返回捕获的XHR数据
    return driver.execute_script("return window.xhrData;")

# 使用定义的函数点击按钮并获取数据
# 替换'.your-button-selector'为实际的按钮选择器
xhr_data_after_click = click_button_and_get_xhr(driver)
# 打印捕获的数据
result_set = set()
result_url = set()
for data in captured_data or []:
    print("URL:", data['url'])
    print("Response:", data['response'])
    result_set.add(data['response'])
    result_url.add(data['url'])



In [None]:
for i in result_set:
    print(i)

In [None]:
for i in result_url:
    print(i)

In [None]:


# 定义一个函数，用于重新注入拦截XHR的JavaScript代码并清空之前的数据
def setup_xhr_interception():
    driver.execute_script("""
    if (window.originalSend) {
        XMLHttpRequest.prototype.send = window.originalSend;  // 恢复原始的send方法
    } else {
        window.originalSend = XMLHttpRequest.prototype.send;  // 保存原始的send方法
    }

    window.xhrData = [];  // 初始化或清空XHR数据存储

    XMLHttpRequest.prototype.send = function(body) {
        this.addEventListener('load', function() {
            window.xhrData.push({
                url: this.responseURL,  // 请求的URL
                response: this.responseText  // 响应文本
            });
        });
        window.originalSend.apply(this, arguments);
    };
    """)

# 定义一个函数，用于点击按钮并获取对应的XHR数据
def click_button_and_get_xhr(button_selector):
    # 设置XHR拦截
    setup_xhr_interception()
    
    # 点击指定的按钮
    button = driver.find_element(By.XPATH, button_selector)
    button.click()
    
    # 等待足够的时间以确保XHR请求完成
    WebDriverWait(driver, 10).until(lambda d: d.execute_script("return window.xhrData.length > 10;"))
    
    # 获取并返回捕获的XHR数据
    return driver.execute_script("return window.xhrData;")


# 使用函数点击按钮并获取数据
xhr_data_after_click = click_button_and_get_xhr("//div[text()='立地条件(新)']")
for i in xhr_data_after_click:
    print(i['response'])
print(len(xhr_data_after_click))
# 使用函数点击按钮并获取数据
xhr_data_after_click = click_button_and_get_xhr("//div[text()='调查采样']")
for i in xhr_data_after_click:
    print(i['response'])
print(len(xhr_data_after_click))

In [None]:
xhr_data_after_click

In [None]:

# 定义一个函数用于重置XHR拦截并清空之前的数据
def reset_xhr_interception():
    driver.execute_script("""
        // 重置XMLHttpRequest到其原始状态
        if (window.originalXMLHttpRequest) {
            XMLHttpRequest = window.originalXMLHttpRequest;
        } else {
            window.originalXMLHttpRequest = XMLHttpRequest;
        }

        // 创建一个新的XMLHttpRequest代理以拦截数据
        window.xhrData = [];  // 清空之前的数据
        var XHR = window.XMLHttpRequest;

        window.XMLHttpRequest = function() {
            var xhr = new XHR();
            xhr.addEventListener('load', function() {
                if (this.readyState == 4 && this.status == 200) {
                    window.xhrData.push({
                        url: this.responseURL,
                        response: this.response
                    });
                }
            });
            return xhr;
        };
    """)

# 定义点击按钮并获取XHR数据的函数
def click_button_and_get_xhr(button_selector):
    # 重置并设置XHR拦截
    reset_xhr_interception()
    
    # 执行点击操作
    button = driver.find_element(By.XPATH, button_selector)
    button.click()

    # 等待操作完成，这里可能需要自定义等待条件
    driver.implicitly_wait(10)

    # 获取拦截到的XHR数据
    captured_data = driver.execute_script("return window.xhrData;")
    driver.implicitly_wait(10)
    return captured_data

# 使用函数进行操作
button_selector = "//div[text()='立地条件(新)']"  # 替换为实际的选择器
captured_data = click_button_and_get_xhr(button_selector)
for i in captured_data:
    print(i['response']['result'])

print(len(captured_data))


In [None]:


def setup_xhr_interception(driver):
    # 注入用于捕获XHR的JavaScript代码
    driver.execute_script("""
        (function() {
            if (window.originalSend) {
                XMLHttpRequest.prototype.send = window.originalSend;  // 恢复原始的send方法，如果已存在
            } else {
                window.originalSend = XMLHttpRequest.prototype.send;  // 保存原始的send方法
            }
            window.xhrData = [];  // 初始化或清空XHR数据存储
            XMLHttpRequest.prototype.send = function(body) {
                this.addEventListener('load', function() {
                    window.xhrData.push({
                        url: this.responseURL,
                        response: this.responseText
                    });
                });
                window.originalSend.apply(this, arguments);
            };
        })();
    """)

def click_and_capture_xhr(driver, button_selector):
    # 确保页面已完全加载
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, button_selector)))

    # 清空之前的XHR数据并设置拦截
    setup_xhr_interception(driver)

    # 执行点击操作
    driver.find_element(By.XPATH, button_selector).click()
    # 等待XHR数据被捕获，这里可能需要根据实际情况调整等待策略
    WebDriverWait(driver, 10).until(lambda d: d.execute_script("return window.xhrData.length > 5;"))
    # 获取捕获的XHR数据
    captured_data = driver.execute_script("return window.xhrData;")
    return captured_data


# 示例：点击按钮并捕获XHR
button_selector = "//div[text()='立地条件(新)']"  # 替换为实际的按钮选择器
captured_xhr_data = click_and_capture_xhr(driver, button_selector)
for i in captured_xhr_data or []:
    print(i)

print(len(captured_data))



In [None]:


def inject_xhr_interception_script(driver):
    # 注入用于设置XHR拦截的JavaScript
    driver.execute_script("""
    if (!window.setupXHRInterception) {
        window.setupXHRInterception = function() {
            if (window.originalSend) {
                XMLHttpRequest.prototype.send = window.originalSend;  // 恢复原始的send方法
            }
            window.originalSend = XMLHttpRequest.prototype.send;  // 保存原始的send方法
    
            window.xhrData = [];  // 初始化或清空XHR数据存储
            XMLHttpRequest.prototype.send = function(body) {
                if (!this.hasListener) {  // 检查是否已经为此XHR对象添加了监听器
                    this.addEventListener('load', function() {
                        window.xhrData.push({
                            url: this.responseURL,
                            response: this.responseText
                        });
                    });
                    this.hasListener = true;  // 标记此XHR对象已添加监听器
                }
                window.originalSend.apply(this, arguments);
            };
        };
    }
    window.setupXHRInterception();  // 设置XHR拦截
    """)

def click_and_capture_xhr(driver, button_selector):
    # 确保页面已完全加载
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, button_selector)))

    # 注入XHR拦截脚本
    inject_xhr_interception_script(driver)

    # 执行点击操作
    driver.find_element(By.XPATH, button_selector).click()

    # 等待XHR数据被捕获，这里可能需要根据实际情况调整等待策略
    WebDriverWait(driver, 100).until(lambda d: d.execute_script("return window.xhrData.length > 5;"))

    # 获取捕获的XHR数据
    captured_data = driver.execute_script("return window.xhrData;")
    return captured_data


# 示例：点击按钮并捕获XHR
button_selector = "//div[text()='立地条件(新)']"  # 替换为实际的按钮选择器
captured_xhr_data = click_and_capture_xhr(driver, button_selector)
for i in captured_xhr_data:
    print(i)



In [None]:
result_set = set()
for i in captured_xhr_data:
    result_set.add(i['response'])

In [None]:
for i in result_set:
    print(i)