In [1]:
import requests
import json
import numpy as np
import pandas as pd
import geopandas as gpd
import geopy.distance
import folium
from shapely.geometry import Polygon
import requests
from shapely.ops import unary_union
import matplotlib.pyplot as plt

In [2]:
coords = np.load('coords.npy',allow_pickle=True)

In [3]:
import numpy as np
import geopy.distance

def get_grids(boundary, distance, bias):
    """
    在边界区域内生成指定大小的网格
    
    Parameters:
    boundary: 2D numpy数组，顺时针排列的边界坐标 [[lon, lat], [lon, lat], ...]
    distance: 网格边长（米）
    bias: 边界扩展偏置（度）
    
    Returns:
    grids: 列表，包含每个网格的边界和中心坐标 [((lon1, lat1, lon2, lat2), (lon_c, lat_c)), ...]
    """
    
    # 找到边界的最小和最大经纬度，并扩展边界
    min_lon = min(boundary[:, 0]) - bias
    max_lon = max(boundary[:, 0]) + bias
    min_lat = min(boundary[:, 1]) - bias
    max_lat = max(boundary[:, 1]) + bias

    print(f"边界范围: 经度[{min_lon:.6f}, {max_lon:.6f}], 纬度[{min_lat:.6f}, {max_lat:.6f}]")

    # 计算在平均纬度处1毫度(0.001度)对应的距离（米）
    avg_lat = (min_lat + max_lat) / 2
    lon_dist = geopy.distance.distance((avg_lat, min_lon), (avg_lat, min_lon + 0.001)).m
    lat_dist = geopy.distance.distance((min_lat, min_lon), (min_lat + 0.001, min_lon)).m

    print(f"1毫度经度距离: {lon_dist:.2f}米, 1毫度纬度距离: {lat_dist:.2f}米")

    # 计算经纬度方向上的网格数量
    n_lon = int((max_lon - min_lon) / 0.001 * lon_dist / distance)
    n_lat = int((max_lat - min_lat) / 0.001 * lat_dist / distance)

    print(f"网格划分: 经度方向{n_lon}个网格, 纬度方向{n_lat}个网格, 总共{n_lon * n_lat}个候选网格")

    # 初始化网格列表
    grids = []

    # 遍历所有可能的网格
    for i in range(n_lon):
        for j in range(n_lat):
            # 计算网格左下角坐标
            lon1 = min_lon + i * distance / lon_dist * 0.001
            lat1 = min_lat + j * distance / lat_dist * 0.001

            # 计算网格右上角坐标
            lon2 = lon1 + distance / lon_dist * 0.001
            lat2 = lat1 + distance / lat_dist * 0.001

            # 计算网格中心坐标
            lon_c = (lon1 + lon2) / 2
            lat_c = (lat1 + lat2) / 2

            # 使用射线投射算法判断中心点是否在边界内
            inside = is_point_in_polygon(lon_c, lat_c, boundary)

            # 如果中心点在边界内，将网格添加到结果列表
            if inside:
                grids.append(((lon1, lat1, lon2, lat2), (lon_c, lat_c)))

    print(f"生成有效网格数量: {len(grids)}")
    return grids

def is_point_in_polygon(lon, lat, polygon):
    """
    使用射线投射算法判断点是否在多边形内
    
    Parameters:
    lon: 点的经度
    lat: 点的纬度  
    polygon: 多边形边界点数组
    
    Returns:
    bool: 点是否在多边形内
    """
    inside = False
    n = len(polygon)
    
    for i in range(n):
        lon1, lat1 = polygon[i]
        lon2, lat2 = polygon[(i + 1) % n]
        
        # 检查点是否在边的纬度范围内
        if (lat1 > lat) != (lat2 > lat):
            # 计算射线与边的交点经度
            if lat2 - lat1 != 0:  # 避免除零
                x_intersect = (lon2 - lon1) * (lat - lat1) / (lat2 - lat1) + lon1
                if lon < x_intersect:
                    inside = not inside
                    
    return inside

In [8]:
grids = get_grids(coords, distance=2000, bias=0.001)
print(f"逆时针边界生成网格数量: {len(grids)}")

边界范围: 经度[113.751263, 114.622060], 纬度[22.447028, 22.864685]
1毫度经度距离: 102.78米, 1毫度纬度距离: 110.74米
网格划分: 经度方向44个网格, 纬度方向23个网格, 总共1012个候选网格
生成有效网格数量: 485
逆时针边界生成网格数量: 485


In [9]:
formatted_grids = []
for bbox, _ in grids:
    lon_min, lat_min, lon_max, lat_max = bbox

    # 顺时针四个角点
    coord = [
        (lon_min, lat_min),
        (lon_max, lat_max),
    ]
    # 首尾闭合
    #coord.append(coord[0])

    # 格式化输出
    coord_str = "|".join([f"{lon:.6f},{lat:.6f}" for lon, lat in coord])
    formatted_grids.append(coord_str)

print(formatted_grids[0])

113.751263,22.681820|113.770722,22.699880


In [10]:
def geo(polygon, page):#https://lbs.amap.com/api/webservice/guide/api-advanced/search
    name = []
    location = []
    id = []

    #url参数设置
    key = '28654ef6afcb76a92855c18a153ce5bc'
    types = '150700' #190305高速公路入口，190304高速公路出口，150500地铁站，150700公交站
    #city = '440300' #440303-440311,440300深圳市, 440000广东省
    offset = 25
    url = 'https://restapi.amap.com/v3/place/polygon?key={}&polygon={}&types={}&offset={}&page={}'.format(key,polygon,types,offset,page)
    response = requests.get(url)
    answer = response.json()
    if answer['info']=='OK':
        for i in answer['pois']:
            name.append(i['name'])
            location.append(i['location'])
            id.append(i['id'])
        return np.array([name, location, id]).T
    else:
        return print(answer['info'])

In [337]:
tag = 0
init = 485 #255 268
#起始栅格位置，可以直接设置为结果目录下最后一个文件的编号
for i in range(init,len(formatted_grids)):              # i为每个循环查询的栅格编号
    page = 1                                            #此次查询的页码
    length = 1                                          #POI返回结果长度，为0说明此栅格查询完毕
    array = np.array([['','','']])                      #定义数据存储格式，方便后续拼接结果
    while(length>0):                                    #若POI返回结果长度不为0，执行循环
        a = geo(formatted_grids[i], page)               #请求POI返回结果
        if type(a)==np.ndarray:                         #判断返回结果类型，若为nparray，说明未达上限，若为str说明达到API上限
            length = len(a)                             #单次请求返回结果长度 
            array = np.vstack([array,a])                #拼接结果
            page += 1#页码+1
        else:
            print('搜索停止')
            tag = 1
            break
    if tag == 1:
        break
    else:
        print('第{}个栅格查询完毕，共请求{}次,获取{}个POI'.format(i, page, len(array)-1))
        pd.DataFrame(array).to_csv('./results/Bus/{}.csv'.format(i),header = None,index = None)   #存储结果，首先要在根目录建立results文件夹

In [338]:
def split_rectangle(coordinates):
    corners = coordinates.split('|')
    bottom_left = list(map(float, corners[0].split(',')))
    top_right = list(map(float, corners[1].split(',')))
    
    x_min, y_min = bottom_left
    x_max, y_max = top_right
    
    # 计算中间点
    x_mid = (x_min + x_max) / 2
    y_mid = (y_min + y_max) / 2
    
    # 划分四个矩形
    rectangles = [
        # 左下部分
        f"{x_min},{y_min}|{x_mid},{y_mid}",
        # 右下部分
        f"{x_mid},{y_min}|{x_max},{y_mid}",
        # 左上部分
        f"{x_min},{y_mid}|{x_mid},{y_max}",
        # 右上部分
        f"{x_mid},{y_mid}|{x_max},{y_max}"
    ]
    print(rectangles)

In [344]:
split_rectangle(formatted_grids[268]) #255 268

['114.120983,22.591515|114.13071249999999,22.600545500000003', '114.13071249999999,22.591515|114.140442,22.600545500000003', '114.120983,22.600545500000003|114.13071249999999,22.609576', '114.13071249999999,22.600545500000003|114.140442,22.609576']


In [348]:
i = 268
j = 4

polygon = '114.1307125,22.6005455|114.140442,22.609576'

tag = 0
page = 1                                            #此次查询的页码
length = 1                                          #POI返回结果长度，为0说明此栅格查询完毕
array = np.array([['','','']])                      #定义数据存储格式，方便后续拼接结果
while(length>0):                                    #若POI返回结果长度不为0，执行循环
    a = geo(polygon, page)               #请求POI返回结果
    if type(a)==np.ndarray:                         #判断返回结果类型，若为nparray，说明未达上限，若为str说明达到API上限
        length = len(a)                             #单次请求返回结果长度 
        array = np.vstack([array,a])                #拼接结果
        page += 1#页码+1
    else:
        print('到达API上限')
        tag = 1
        break
print('共请求{}次,获取{}个POI'.format(page, len(array)-1))
pd.DataFrame(array).to_csv('./results/Bus/{}_{}.csv'.format(i, j),header = None,index = None)   #存储结果，首先要在根目录建立results文件夹

共请求3次,获取7个POI


In [349]:
import glob
import transbigdata as tbd

In [350]:
csv_files = glob.glob(f"{'results/Bus'}/*.csv")
df_list = [pd.read_csv(file) for file in csv_files]
data = pd.concat(df_list, ignore_index=True)

In [351]:
data.columns = ['name','location','id']
print(data.shape)
data = data.dropna()
print(data.shape)

data[['longitude', 'latitude']] = data['location'].str.split(',', expand=True)
data['longitude'] = data['longitude'].astype(float)
data['latitude'] = data['latitude'].astype(float)

data.head(2)

(6593, 3)
(6593, 3)


Unnamed: 0,name,location,id,longitude,latitude
0,冰雪世界南公交首末站(公交站),"113.769627,22.709732",BV09447897,113.769627,22.709732
1,前海冰雪世界(公交站),"113.770256,22.716250",BV09446847,113.770256,22.71625


In [352]:
bus = data.drop_duplicates(subset=['id'])
print(bus.shape)

(6589, 5)


In [353]:
bus.to_csv('bus.csv',index=None)