In [1]:
import os
import re
import time
import random
import math
import requests
import json
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

In [2]:
# 读取需要的环境变量
load_dotenv()
# link info
SF_URL = os.getenv('SF_URL')
LDTJ_URL = os.getenv('LDTJ_URL')
JBXX_URL = os.getenv('JBXX_URL')
CTD_URL = os.getenv('CTD_URL')
IMG_URL = os.getenv('IMG_URL')
PM_URL = os.getenv('PM_URL')
PM_FC_URL = os.getenv('PM_FC_URL')
DSH_URL = os.getenv('DSH_URL')
YSH_URL = os.getenv('YSH_URL')
DSH_PAGE = os.getenv('DSH_PAGE')
YSH_PAGE = os.getenv('YSH_PAGE')
PAGE_TAIL = os.getenv('PAGE_TAIL')
BASE_NAME = os.getenv('BASE_NAME')
# headers info 
img_headers = {
    "Accept": os.getenv("ACCEPT"),
    "Accept-Encoding": os.getenv("ACCEPT_ENCODING"),
    "Accept-Language": os.getenv("ACCEPT_LANGUAGE"),
    "Authorization": os.getenv("AUTHORIZATION"),
    "Connection": os.getenv("CONNECTION"),
    "Content-Length": os.getenv("CONTENT_LENGTH"),
    "Content-Type": os.getenv("CONTENT_TYPE"),
    "Cookie": os.getenv("COOKIE"),
    "Host": os.getenv("HOST"),
    "origin": os.getenv("ORIGIN"),
    "Referer": os.getenv("REFERER"),
    "Sec-Ch-Ua": os.getenv("SEC_CH_UA"),
    "Sec-Ch-Ua-Mobile": os.getenv("SEC_CH_UA_MOBILE"),
    "Sec-Ch-Ua-Platform": os.getenv("SEC_CH_UA_PLATFORM"),
    "Sec-Fetch-Dest": os.getenv("SEC_FETCH_DEST"),
    "Sec-Fetch-Mode": os.getenv("SEC_FETCH_MODE"),
    "Sec-Fetch-Site": os.getenv("SEC_FETCH_SITE"),
    "User-Agent": os.getenv("USER_AGENT")
}
headers = {
    "Accept": os.getenv("ACCEPT"),
    "Accept-Encoding": os.getenv("ACCEPT_ENCODING"),
    "Accept-Language": os.getenv("ACCEPT_LANGUAGE"),
    "Authorization": os.getenv("AUTHORIZATION"),
    "Connection": os.getenv("CONNECTION"),
    "Cookie": os.getenv("COOKIE"),
    "Host": os.getenv("HOST"),
    "Referer": os.getenv("REFERER"),
    "Sec-Ch-Ua": os.getenv("SEC_CH_UA"),
    "Sec-Ch-Ua-Mobile": os.getenv("SEC_CH_UA_MOBILE"),
    "Sec-Ch-Ua-Platform": os.getenv("SEC_CH_UA_PLATFORM"),
    "Sec-Fetch-Dest": os.getenv("SEC_FETCH_DEST"),
    "Sec-Fetch-Mode": os.getenv("SEC_FETCH_MODE"),
    "Sec-Fetch-Site": os.getenv("SEC_FETCH_SITE"),
    "User-Agent": os.getenv("USER_AGENT")}


In [3]:
### 辅助函数
# 装饰器，如果函数没有返回值，则一直重试
def retry_if_no_return(func):
    def wrapper(*args, **kwargs):
        # 定义重试次数
        retry_times = 5
        while retry_times>=0:
            result = func(*args, **kwargs)
            if result:
                return result
            # 如果函数没有返回值，可以在这里添加日志或等待时间
            time.sleep(2)
            retry_times -= 1
    return wrapper

# 遍历url列表下载图片
@retry_if_no_return
def download_image(url,save_path_name,headers):
    headers = headers
    try:
        r = requests.get(url, headers=headers)
        if r.status_code == 200:
            with open(save_path_name, 'wb') as f:
                f.write(r.content)
                return True
    except:
        return False
    time.sleep(random.random())

In [4]:
# 获取施肥信息
def get_sf_info(point_id,headers):
    """获取施肥信息"""
    url = f"{SF_URL}{point_id}"
    # 发送GET请求
    response = requests.get(url, headers=headers)
    # 检查响应状态码
    if response.status_code == 200:
        # 解析响应内容
        data = response.json()
        return data
    else:
        print("请求失败，状态码：", response.status_code)
        return False
# 获取图片信息
def get_img_info(point_id,headers):
    """获取图片和视频链接"""
    url = IMG_URL
    # 请求的JSON数据
    # 请根据实际情况替换下面的data字典
    data = {
        "glbh": f"{int(point_id)}"
    }
    # 将字典转换为JSON格式的字符串
    payload = json.dumps(data)
    # 发送POST请求
    response = requests.post(url, headers=headers, data=payload)
    # 检查响应状态码
    if response.status_code == 200:
        # 解析响应内容
        data = response.json()
        return data
    else:
        print("请求失败，状态码：", response.status_code)
        return False
# 获取立地条件信息
def get_ldtj_info(point_id,headers):
    """获取立地条件信息"""
    url = f"{LDTJ_URL}{point_id}"
    # 发送GET请求
    response = requests.get(url, headers=headers)
    # 检查响应状态码
    if response.status_code == 200:
        # 解析响应内容
        data = response.json()
        return data
    else:
        print("请求失败，状态码：", response.status_code)
        return False
# 获取样点基本信息
def get_base_info(point_id,headers):
    """获取基本信息"""
    url = f"{JBXX_URL}{point_id}"
        # 发送GET请求
    response = requests.get(url, headers=headers)
    # 检查响应状态码
    if response.status_code == 200:
        # 解析响应内容
        data = response.json()
        return data
    else:
        print("请求失败，状态码：", response.status_code)
        return False
# 获取采土袋信息
def get_ctd_info(point_id,headers):
    """获取采土袋信息"""
    url = f"{CTD_URL}{point_id}"
    # 发送GET请求
    response = requests.get(url, headers=headers)
    # 检查响应状态码
    if response.status_code == 200:
        # 解析响应内容
        data = response.json()
        return data
    else:
        print("请求失败，状态码：", response.status_code)
        return False
    
# 获取剖面信息
def get_pm_info(point_id,headers):
    """获取剖面信息"""
    url = f"{PM_URL}{point_id}"
    # 发送GET请求
    response = requests.get(url, headers=headers)
    # 检查响应状态码
    if response.status_code == 200:
        # 解析响应内容
        data = response.json()
        return data
    else:
        print("请求失败，状态码：", response.status_code)
        return False
# 获取剖面发生层信息
def get_pmfc_info(point_id,headers):
    """获取剖面发生层信息"""
    url = f"{PM_FC_URL}{point_id}"
    # 发送GET请求
    response = requests.get(url, headers=headers)
    # 检查响应状态码
    if response.status_code == 200:
        # 解析响应内容
        data = response.json()
        return data
    else:
        print("请求失败，状态码：", response.status_code)
        return False
# 获取当前页面cookie信息
def get_cookie():
    """获取当前页面cookie信息"""
    # 使用JavaScript获取Cookie
    cookie_script = """
    return document.cookie;
    """
    cookie_value = driver.execute_script(cookie_script)
    return cookie_value
# 更新cookie信息
def update_cookie(headers):
    """更新cookie信息"""
    # 获取当前页面cookie信息
    cookie_value = get_cookie()
    # 更新headers
    headers.update({"Cookie": cookie_value})
    return headers
# 获取已审核数量
def get_audit_num(headers):
    """获取已审核数量"""
    url = YSH_URL
    # 发送GET请求
    response = requests.get(url, headers=headers)
    # 检查响应状态码
    if response.status_code == 200:
        # 解析响应内容
        data = response.json()
        return data['result']
    else:
        print("请求失败，状态码：", response.status_code)
        return False
# 获取待审核数量
def get_wait_num(headers):
    """获取待审核数量"""
    url = DSH_URL
    # 发送GET请求
    response = requests.get(url, headers=headers)
    # 检查响应状态码
    if response.status_code == 200:
        # 解析响应内容
        data = response.json()
        return data['result']
    else:
        print("请求失败，状态码：", response.status_code)
        return False
# 获取指定页面的编码
def get_set_page_num(url,headers):
    """获取指定页面的编码"""
    response = requests.get(url, headers=headers)
    # 检查响应状态码
    if response.status_code == 200:
        # 解析响应内容
        data = response.json()
        info_list = data['result']['records']
        # 循环获取编码
        temp_dict = [{_['ydbh']:_['ydlb']} for _ in info_list]
        return temp_dict
    else:
        print("请求失败，状态码：", response.status_code)
        return False

# 获取已审核的编码
def get_page_number(total,headers):
    """获取当前传入页面的编码,默认每一页50"""
    # 计算有多少页
    page_num = math.ceil(total / 50)
    # 循环获取每一页的编码
    result_list = []
    for i in range(1, page_num + 1):
        url = f"{YSH_PAGE}{i}{PAGE_TAIL}"
        result_list+=get_set_page_num(url,headers)
        time.sleep(random.random())
    return result_list

# 获取待审核的编码
def get_wait_page_number(total,headers):
    """获取当前传入页面的编码,默认每一页50"""
    # 计算有多少页
    page_num = math.ceil(total / 50)
    # 循环获取每一页的编码
    temp_list = []
    for i in range(1, page_num + 1):
        url = f"{DSH_PAGE}{i}{PAGE_TAIL}"
        temp_list+=get_set_page_num(url,headers)
        time.sleep(random.random())
    return temp_list
    
# 随机休眠
def random_sleep():
    """随机休眠"""
    time.sleep(random.random())
    return

In [5]:
# 配置浏览器
# chrome.exe --remote-debugging-port=9999 --user-data-dir="D:\Selenium\AutomationProfile"
# chrome.exe --remote-debugging-port=9999 --user-data-dir="D:\Program Files\ChromeDir"
options = webdriver.ChromeOptions()
options.add_experimental_option("debuggerAddress", "localhost:9998")
driver = webdriver.Chrome(options=options)

driver.implicitly_wait(60)

In [6]:
# 保存路径
save_path = r"F:\collection_spb_info\王维东"
# 如果没有该文件夹，则创建
if not os.path.exists(save_path):
    os.makedirs(save_path)

# 检查heraders并更新

In [7]:

headers = update_cookie(headers)

In [8]:
headers

{'Accept': 'application/json, text/plain, */*',
 'Accept-Encoding': 'gzip, deflate, br, zstd',
 'Accept-Language': 'zh-CN,zh;q=0.9',
 'Authorization': 'eyJhbGciOiJIUzUxMiJ9.eyJ1c2VyX3Rva2VuOiI6InVzZXJfdG9rZW46NGM3ODliZDMtYjNmOS00ZmI3LWJhMTQtMDhiOTIyNDVjMmVjIn0.VHLmmzx6P6EZLBaPEeeCNiqK0ID5YY--Z88UHb3x5cFwf8AgzGIgXFgP-X9vw7dNuomSx67dJ5dWqnq9D6surg',
 'Connection': 'keep-alive',
 'Cookie': 'HWWAFSESID=11976912bbd97118fd; HWWAFSESTIME=1715236146318',
 'Host': 'sanpu.iarrp.cn',
 'Referer': 'https://sanpu.iarrp.cn/',
 'Sec-Ch-Ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
 'Sec-Ch-Ua-Mobile': '?0',
 'Sec-Ch-Ua-Platform': '"Windows"',
 'Sec-Fetch-Dest': 'empty',
 'Sec-Fetch-Mode': 'cors',
 'Sec-Fetch-Site': 'same-origin',
 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'}

In [9]:
# 获取当前账户下的待审核和已审核编码，并写入文件
# 更新headers
headers = update_cookie(headers)
audit_point_list = get_page_number(get_audit_num(headers),headers)
wait_point_list = get_wait_page_number(get_wait_num(headers),headers)
all_point = audit_point_list + wait_point_list
# 提取编码
loop_all_point = [list(item.keys())[0] for item in all_point]

In [10]:
json_save_path = os.path.join(save_path,f'poind_id_{len(all_point)}.json')
# 写入JSON文件
with open(json_save_path, 'w') as json_file:
    json.dump(all_point, json_file, indent=4)
# 读取原始JSON文件
with open(json_save_path, 'r') as json_file:
    data_dict = json.load(json_file)

# 初始化字典，用于存储值为'1'的数据
data_with_1 = {}
# 遍历列表，每个元素都是一个字典
for item_dict in data_dict:
    for key, value in item_dict.items():
        if value == '0':
            pass
        elif value == '1':
            data_with_1[key] = value
# 存储值为剖面样的数据到JSON文件
with open(os.path.join(save_path,f'data_with_1_{len(data_with_1)}.json'), 'w') as json_file:
    json.dump(data_with_1, json_file, indent=4)


In [12]:
# 读取原信息
point_check_info = pd.read_excel(os.path.join(save_path,'base_info.xlsx'))
point_check_list = list(point_check_info['ydbh'])

In [19]:
# 获取更新点位
loop_all_point_update = []
loop_all_point_pmy = []
for one_check_point in all_point:
    check_id = list(one_check_point.keys())[0]
    check_type = list(one_check_point.values())[0]
    # 检查是否已经存在
    if int(check_id) in point_check_list:
        pass
    else:
        loop_all_point_update.append(check_id)
        if check_type == 1:
            loop_all_point_pmy.append(check_id)

In [20]:
len(loop_all_point),len(loop_all_point_update),len(point_check_list),len(loop_all_point_pmy)

(2250, 893, 1357, 0)

In [21]:
len(all_point),len(wait_point_list),len(audit_point_list)

(2250, 0, 2250)

In [26]:
# 更新替换信息
loop_all_point = loop_all_point_update

In [27]:
# 测试一个点位的基本信息
base_info = get_base_info(loop_all_point[0],headers)
base_info

{'success': True,
 'message': None,
 'code': 200,
 'result': {'ctime': '2023-09-13 17:54:11',
  'utime': '2024-04-16 14:38:16',
  'id': '909fcfb3-a1be-4ea6-a235-693d09a86abb',
  'bsm': None,
  'ysdm': None,
  'ydbh': '5227010101000178',
  'ydlb': '0',
  'zldwdm': '522701107210',
  'zldwmc': '甲壤村',
  'xzdm': '522701107',
  'xzmc': '平浪镇',
  'bsjd': 107.22447760821846,
  'bswd': 26.133782260766573,
  'hbgd': 1052.0,
  'cyjd': 107.22447760821846,
  'cywd': 26.133782260766573,
  'dwjd': 107.22494493,
  'dwwd': 26.1339884,
  'dwgc': 1017.162109375,
  'sjdm': '52',
  'shjdm': '5227',
  'xjdm': '522701',
  'zjdm': '522701107',
  'sfxz': '0',
  'xzjl': None,
  'xzyy': None,
  'xzyyms': None,
  'zt': '6',
  'ztfx': '1',
  'dczt': '40',
  'tdlylx': '0101',
  'ddx': None,
  'pd': '7',
  'tl': '水稻土',
  'yl': '潴育水稻土',
  'ts': '灰泥田',
  'tz': '大眼黄泥田',
  'sfsty': '0',
  'sfstypxy': '0',
  'sfyjd': '0',
  'sfyswygyd': '0',
  'sfjxzcyd': '0',
  'sfmqyd': '0',
  'sfzdbb': '0',
  'sftzy': '0',
  'sfgjswy':

In [28]:
# 获取基本信息
# 更新headers
headers = update_cookie(headers)
base_info_list = []
for one_point in tqdm(loop_all_point):
    one_point_base_info = get_base_info(one_point,headers)['result']
    # 随机休眠
    # time.sleep(random.random())
    base_info_list.append(one_point_base_info)


100%|██████████| 893/893 [02:40<00:00,  5.55it/s]


In [29]:
df_base_info = pd.DataFrame(base_info_list)
df_base_info.head(1)

Unnamed: 0,ctime,utime,id,bsm,ysdm,ydbh,ydlb,zldwdm,zldwmc,xzdm,...,gjjshsj,bz,sfspsdx,sfqmpk,gjlycjCy,zdbbzt,appVersion,sfwryxcyd,sfwrxcyd2,sfgd
0,2023-09-13 17:54:11,2024-04-16 14:38:16,909fcfb3-a1be-4ea6-a235-693d09a86abb,,,5227010101000178,0,522701107210,甲壤村,522701107,...,,,0,1,,,1.47.2,0,0,0


In [30]:
# 保存基本信息
df_base_info.to_excel(os.path.join(save_path,f'base_info_update_{len(loop_all_point)}.xlsx'),index=False)

In [31]:
# 测试一个点位的图片信息
img_info = get_img_info(loop_all_point[0],img_headers)
img_info

{'success': True,
 'message': None,
 'code': 200,
 'result': [{'ctime': '2024-03-28 18:26:31',
   'utime': '2024-03-28 18:26:31',
   'id': '1773295599996735489',
   'glbh': '5227010101000178',
   'wjbh': None,
   'wjmc': '1697531173927.mp4',
   'wjlx': None,
   'wjfl': '1400',
   'wjlj': 'ssp-dccy/2024-03-28/522701/7eeff7c7-1857-4a92-a48d-2055cf2ffbea.mp4',
   'jd': 107.224951,
   'wd': 26.133876,
   'jdu': None,
   'cjsj': '2023-10-17 16:26:37',
   'bz': None,
   'url': None,
   'rawUrl': None},
  {'ctime': '2024-03-28 18:26:31',
   'utime': '2024-03-28 18:26:31',
   'id': '1773295599996735490',
   'glbh': '5227010101000178',
   'wjbh': None,
   'wjmc': '1697531266970.mp4',
   'wjlx': None,
   'wjfl': '1400',
   'wjlj': 'ssp-dccy/2024-03-28/522701/bc78da5a-c349-432a-b3b6-36f3b17df9b7.mp4',
   'jd': 107.22496,
   'wd': 26.133884,
   'jdu': None,
   'cjsj': '2023-10-17 16:28:08',
   'bz': None,
   'url': None,
   'rawUrl': None},
  {'ctime': '2024-03-28 18:26:31',
   'utime': '2024-03-2

In [32]:
# 获取图片链接信息
# 更新headers
img_headers = update_cookie(img_headers)
img_info_list = []
for one_point in tqdm(loop_all_point):
    one_point_img_info = get_img_info(one_point,img_headers)['result']
    # 为链接增加域名
    prefix = BASE_NAME
    updated_list = [{**item, 'wjlj': f"{prefix}{item['wjlj']}"} for item in one_point_img_info]
    # 随机休眠
    # time.sleep(random.random())
    img_info_list+=updated_list

100%|██████████| 893/893 [02:56<00:00,  5.06it/s] 


In [33]:
df_img_info = pd.DataFrame(img_info_list)
df_img_info.head(1)

Unnamed: 0,ctime,utime,id,glbh,wjbh,wjmc,wjlx,wjfl,wjlj,jd,wd,jdu,cjsj,bz,url,rawUrl
0,2024-03-28 18:26:31,2024-03-28 18:26:31,1773295599996735489,5227010101000178,,1697531173927.mp4,,1400,https://sanpu.iarrp.cn/ssp-dccy/2024-03-28/522...,107.224951,26.133876,,2023-10-17 16:26:37,,,


In [34]:
# 保存媒体信息
df_img_info.to_excel(os.path.join(save_path, f'img_info_update_{len(loop_all_point)}.xlsx'), index=False)

In [35]:
# 测试一个点位的立地调查信息
ldtj_info = get_ldtj_info(loop_all_point[0],headers)
ldtj_info

{'success': True,
 'message': None,
 'code': 200,
 'result': {'ctime': '2024-03-28 18:26:31',
  'utime': '2024-03-28 18:28:08',
  'id': '1773295599971569665',
  'ydbh': '5227010101000178',
  'zldwmc': '甲壤村',
  'cyjd': 107.22447760821846,
  'cywd': 26.133782260766573,
  'hbgd': 1052.0,
  'cysj': '2023-10-17 15:50:07',
  'tqqk': '01',
  'dcz': '陈秋雷',
  'dcdw': '上海数喆数据科技有限公司',
  'qslx': 'W',
  'qscd': 'S',
  'jyclfd': 'N',
  'jycljj': None,
  'dblsfd': 'N',
  'dblsdx': None,
  'dbybfd': 'N',
  'dbybhd': None,
  'dblxkd': None,
  'dblxcd': None,
  'dblxfd': 'N',
  'dblxjj': None,
  'dblxsl': None,
  'trsh': '0',
  'ddx': 'MO',
  'zdx': 'MM',
  'xdx': 'SL',
  'dxbw': 'LS',
  'dxbwqt': None,
  'pd': 'III',
  'px': 'N',
  'pxn': '02',
  'my': '19',
  'myqt': None,
  'mz': 'LG',
  'mzqt': None,
  'zblx': '11',
  'zbzwysz': '农作物',
  'zbfgd': None,
  'zbqmfgd': None,
  'zbgmfgd': None,
  'zbcbfgd': None,
  'tdlylx': '0101',
  'tdlylxqt': None,
  'tdlylxbg': '[]',
  'sftsncp': '0',
  'tsncpzwlx':

In [36]:
# 获取立地条件信息
# 更新headers
headers = update_cookie(headers)
ldtj_info_list = []
for one_point in tqdm(loop_all_point):
    one_point_ldtj_info = get_ldtj_info(one_point,headers)['result']
    ldtj_info_list.append(one_point_ldtj_info)
    # 随机休眠
    # time.sleep(random.random())

100%|██████████| 893/893 [02:27<00:00,  6.03it/s]


In [37]:
df_ldtj_info = pd.DataFrame(ldtj_info_list)
df_ldtj_info.head(1)

Unnamed: 0,ctime,utime,id,ydbh,zldwmc,cyjd,cywd,hbgd,cysj,tqqk,...,glswylszl,rzlstjzb,hlsbctrhhfd,hlsbctrhhzl,cyhydgs,cbfmc,cbfzjhm,cbflxfs,hydgs,bz
0,2024-03-28 18:26:31,2024-03-28 18:28:08,1773295599971569665,5227010101000178,甲壤村,107.224478,26.133782,1052.0,2023-10-17 15:50:07,1,...,,,0,0,,刘洋,,13864578966,10,甲壤村信号差，造成混样点定位漂移


In [38]:
# 保存立地条件信息
df_ldtj_info.to_excel(os.path.join(save_path,f'ldtj_info_upate_{len(loop_all_point)}.xlsx'),index=False)

In [39]:
# 测试一个点位的采土袋信息
ctd_info = get_ctd_info(loop_all_point[0],headers)
ctd_info

{'success': True,
 'message': None,
 'code': 200,
 'result': [{'ctime': '2023-10-17 17:04:53',
   'utime': '2024-03-28 18:26:31',
   'id': '1714205833875787778',
   'ctdbh': '522701010100017820',
   'ydbh': '5227010101000178',
   'ydlb': '0',
   'cylx': '1',
   'yplx': '4',
   'fscxh': None,
   'ypzl': None,
   'jszt': '1',
   'jshzt': None},
  {'ctime': '2023-10-17 17:04:53',
   'utime': '2024-03-28 18:26:31',
   'id': '1714205833875787777',
   'ctdbh': '522701010100017810',
   'ydbh': '5227010101000178',
   'ydlb': '0',
   'cylx': '1',
   'yplx': '1',
   'fscxh': None,
   'ypzl': 4670.0,
   'jszt': '2',
   'jshzt': '2'}],
 'timestamp': 1715241795982,
 'elapse': 0}

In [40]:
# 获取采土袋信息
# 更新headers
headers = update_cookie(headers)
ctd_info_list = []
for one_point in tqdm(loop_all_point):
    one_point_ctd_info = get_ctd_info(one_point,headers)['result']
    ctd_info_list+=one_point_ctd_info
    # 随机休眠
    # time.sleep(random.random())

100%|██████████| 893/893 [02:55<00:00,  5.10it/s]


In [41]:
df_ctd_info = pd.DataFrame(ctd_info_list)
df_ctd_info.head(1)

Unnamed: 0,ctime,utime,id,ctdbh,ydbh,ydlb,cylx,yplx,fscxh,ypzl,jszt,jshzt
0,2023-10-17 17:04:53,2024-03-28 18:26:31,1714205833875787778,522701010100017820,5227010101000178,0,1,4,,,1,


In [42]:
# 保存采土袋信息
df_ctd_info.to_excel(os.path.join(save_path, f'ctd_info_update_{len(loop_all_point)}.xlsx'), index=False)

In [43]:
# 测试一个点位的施肥信息
sf_info = get_sf_info(loop_all_point[0],headers)
sf_info

{'success': True,
 'message': None,
 'code': 200,
 'result': [{'ctime': '2024-03-28 18:26:31',
   'utime': '2024-03-28 18:26:31',
   'id': '1773295599975763970',
   'ydbh': '5227010101000178',
   'jd': '第二季',
   'fllx': '化学氮肥',
   'flmc': '三元复合（混）肥',
   'swyl': 40.0,
   'hlzb': 13,
   'yshl': 5.2,
   'zwlx': '01',
   'zwlxqt': '',
   'jfzb': 70,
   'zfzb': 30},
  {'ctime': '2024-03-28 18:26:31',
   'utime': '2024-03-28 18:26:31',
   'id': '1773295599975763971',
   'ydbh': '5227010101000178',
   'jd': '第二季',
   'fllx': '化学氮肥',
   'flmc': '尿素',
   'swyl': 5.0,
   'hlzb': 46,
   'yshl': 2.3,
   'zwlx': '01',
   'zwlxqt': '',
   'jfzb': 0,
   'zfzb': 100},
  {'ctime': '2024-03-28 18:26:31',
   'utime': '2024-03-28 18:26:31',
   'id': '1773295599975763972',
   'ydbh': '5227010101000178',
   'jd': '第二季',
   'fllx': '磷肥',
   'flmc': '三元复合（混）肥',
   'swyl': 40.0,
   'hlzb': 5,
   'yshl': 2.0,
   'zwlx': '01',
   'zwlxqt': '',
   'jfzb': 70,
   'zfzb': 30},
  {'ctime': '2024-03-28 18:26:31',
   

In [44]:
# 获取施肥信息
# 更新headers
headers = update_cookie(headers)
sf_info_list = []
for one_point in tqdm(loop_all_point):
    one_point_sf_info = get_sf_info(one_point,headers)['result']
    sf_info_list+=one_point_sf_info
    # 随机休眠
    # time.sleep(random.random())

100%|██████████| 893/893 [02:38<00:00,  5.62it/s]


In [45]:
df_sf_info = pd.DataFrame(sf_info_list)
df_sf_info.head(1)

Unnamed: 0,ctime,utime,id,ydbh,jd,fllx,flmc,swyl,hlzb,yshl,zwlx,zwlxqt,jfzb,zfzb
0,2024-03-28 18:26:31,2024-03-28 18:26:31,1773295599975763970,5227010101000178,第二季,化学氮肥,三元复合（混）肥,40.0,13.0,5.2,1,,70.0,30.0


In [46]:
# 保存施肥信息
df_sf_info.to_excel(os.path.join(save_path, f'sf_info_update_{len(loop_all_point)}.xlsx'), index=False)

# 剖面信息

In [None]:
# 剖面点位信息
# 读取原始JSON文件
with open(os.path.join(save_path,f'data_with_1_{len(data_with_1)}.json'), 'r') as json_file:
    pm_dict = json.load(json_file)
pm_point_id = [_ for _,__ in pm_dict.items()]


In [None]:
# 测试剖面点位的信息
pm_info = get_pm_info(pm_point_id[0],headers)
pm_info

In [None]:
# 获取剖面信息
# 更新headers
headers = update_cookie(headers)
pm_info_list = []
for one_point in tqdm(pm_point_id):
    one_point_pm_info = get_pm_info(one_point,headers)['result']
    pm_info_list.append(one_point_pm_info)
    # 随机休眠
    time.sleep(random.random())

In [None]:
df_pm_info = pd.DataFrame(pm_info_list)
df_pm_info.head(1)

In [None]:
# 保存剖面信息
df_pm_info.to_excel(os.path.join(save_path, 'pm_info.xlsx'), index=False)

In [None]:
# 测试剖面点位发生层信息
pm_fc_info = get_pmfc_info(pm_point_id[0],headers)
pm_fc_info

In [None]:
# 获取剖面发生层信息
# 更新headers
headers = update_cookie(headers)
pm_fc_info_list = []
for one_point in tqdm(pm_point_id):
    one_point_pm_fc_info = get_pmfc_info(one_point,headers)['result']
    pm_fc_info_list+=one_point_pm_fc_info
    # 随机休眠
    time.sleep(random.random())

In [None]:
df_pm_fc_info = pd.DataFrame(pm_fc_info_list)
df_pm_fc_info.head(1)

In [None]:
# 保存剖面发生层信息
df_pm_fc_info.to_excel(os.path.join(save_path, 'pm_fc_info.xlsx'), index=False)

# TODO

In [49]:
# 合并更新后的信息表和原始表
excel_name_list = set([_.split('_')[0] for _ in  os.listdir(save_path) if _.endswith('.xlsx')])

In [54]:

for one_table in tqdm(excel_name_list):
    # 初始化空DataFrame
    result_df = pd.DataFrame()
    temp_df_list = []
    for one_excel in [_ for _ in os.listdir(save_path) if _.endswith('.xlsx')]:
        if one_excel.split('_')[0] == one_table:
            temp_df_list.append(pd.read_excel(os.path.join(save_path, one_excel)))
    result_df = pd.concat(temp_df_list)
    # 转换内容为字符串
    result_df = result_df.astype(str)
    result_df.to_excel(os.path.join(save_path, one_table + '_info.xlsx'),index=False)

100%|██████████| 5/5 [00:33<00:00,  6.66s/it]
