### GetTecentStreetViewMetadata

In [1]:
from pathlib import Path
import os
from joblib import Parallel, delayed, parallel_backend
import pandas as pd
from tqdm.notebook import tqdm
from CoordinateTransctionTools import wgs2gcj
import requests as req
import urllib.parse
import PIL.Image as Image
import multiprocessing




Root_DIR = Path('./Kowloon/')
Metrics_DIR = Path(Root_DIR, 'Metrics')
OUTPUT_DIR = Path(Root_DIR, 'Output', )
Tile_DIR = Path(OUTPUT_DIR, "Tiles")
Image_DIR = Path(OUTPUT_DIR, "Images")


key = 'K76BZ-W3O2Q-RFL5S-GXOPR-3ARIT-6KFE5'
output_format = 'json'
query_base_url = 'https://sv.map.qq.com/xf?lat={lat}&lng={lng}&r=200&key={key}&output={output_format}&pf=jsapi&ref=jsapi&cb=qq.maps._svcb2.cbjxjooc8y5'
level = 0
tile_base_url = 'https://sv0.map.qq.com/tile?svid={svid}&x={tile_x}&y={tile_y}&from=web&level={level}&v=2'

In [3]:
# 坐标集合
points_path = Path(Metrics_DIR, 'KowloonSamplingPoints.csv')
points_df = pd.read_csv(points_path, header=0)

# points_df['uuid'] = points_df['id'].apply(int)

# 坐标转换
func = lambda row: wgs2gcj(row['wgs_lat'], row['wgs_lon'])
points_df['gcj_lat'], points_df['gcj_lon'] = zip(*points_df.apply(func, axis=1))

In [11]:
'''
来源于-街景拾取器
通过坐标查询svid等相关信息
注： 此处坐标为：腾讯地图坐标系？
    key值姑且不变
    output可选项json

demo_url = https://sv.map.qq.com/xf?lat=30.611174806403625&lng=114.42840103787603&r=200&key=K76BZ-W3O2Q-RFL5S-GXOPR-3ARIT-6KFE5&output=jsonp&pf=jsapi&ref=jsapi&cb=qq.maps._svcb2.cbjxjooc8y5

'''


def iter_func(row):
    metadata_url = query_base_url.format(
        lat=str(row['gcj_lat']),
        lng=str(row['gcj_lon']),
        key=key,
        output_format=output_format)
    try:
        response = req.get(url=metadata_url)

        response_json = None
        response_json = response.json()
    except:
        with open(Path(Metrics_DIR, 'meta_request_error.txt'), 'a') as fp:
            std_str = '-'.join([str(row['Id']), str(row['gcj_lat']), str(row['gcj_lon'])])
            fp.write(std_str)
            fp.write('\n')
        return

    '''
    response_json sample:
    {'detail': {'get_way': 1, 'heading': 85, 'pitch': 0, 'road_name': 'NA', 'src': '1', 'svid': '10141019121231110908700', 
    'x': 12735087.78, 'y': 3570390.62, 'zoom': 1}, 'info': {'errno': 0, 'type': 1}}
    '''
    detail = response_json.get('detail')
    svid = detail.get('svid')
    road_name = detail.get('road_name', '')
    if svid == None:
        return 
    else:
        # 构建图片下载url
        tile_list = [(x, y) for y in range(1, 3) for x in range(0, 8)]
        # tile url sample: https://sv5.map.qq.com/tile?svid=10141019130228120933700&x=5&y=2&from=web&level=0&v=2
        image_dict_list = []
        for (tile_x, tile_y) in tile_list:
            image_dict = {}
            tile_url = tile_base_url.format(
                svid=str(svid),
                tile_x=str(tile_x),
                tile_y=str(tile_y),
                level=str(level))
            image_dict['tile_x'] = tile_x
            image_dict['tile_y'] = tile_y
            image_dict['tile_url'] = tile_url
            image_dict['svid'] = svid
            image_dict['road_name'] = road_name
            image_dict['wgs_lat'] = row['wgs_lat']
            image_dict['wgs_lon'] = row['wgs_lon']
            image_dict_list.append(image_dict)
        return image_dict_list



# tile_url_list = []

# ptqdm = tqdm(total=points_df.shape[0])
# for _, row in points_df.iterrows():
#     image_dict_list = iter_func(row)
#     if image_dict_list != None:
#         tile_url_list.extend(image_dict_list)
#     ptqdm.update(1)
# ptqdm.close()

# rows = [row for _, row in points_df.iterrows()]


res = []
with parallel_backend('threading', n_jobs=20):
    res = Parallel(verbose=1)(delayed(iter_func)(row) for _, row in points_df.iterrows())

tile_url_list = []
for _ in res:
    if _ != None:
        tile_url_list.extend(_)

tile_url_df = pd.DataFrame(tile_url_list)
tile_url_df.to_csv(Path(Metrics_DIR, "tile_url.csv"), header=True, index=False)

[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.2s
[Parallel(n_jobs=20)]: Done 160 tasks      | elapsed:    1.5s
[Parallel(n_jobs=20)]: Done 410 tasks      | elapsed:    3.6s
[Parallel(n_jobs=20)]: Done 760 tasks      | elapsed:    6.7s
[Parallel(n_jobs=20)]: Done 1210 tasks      | elapsed:   10.5s
[Parallel(n_jobs=20)]: Done 1760 tasks      | elapsed:   15.6s
[Parallel(n_jobs=20)]: Done 2410 tasks      | elapsed:   21.7s
[Parallel(n_jobs=20)]: Done 3160 tasks      | elapsed:   28.9s
[Parallel(n_jobs=20)]: Done 4010 tasks      | elapsed:   36.9s
[Parallel(n_jobs=20)]: Done 4960 tasks      | elapsed:   46.2s
[Parallel(n_jobs=20)]: Done 6010 tasks      | elapsed:   55.9s
[Parallel(n_jobs=20)]: Done 7160 tasks      | elapsed:  1.1min
[Parallel(n_jobs=20)]: Done 8410 tasks      | elapsed:  1.3min
[Parallel(n_jobs=20)]: Done 8799 out of 8799 | elapsed:  1.7min finished


In [12]:
tile_url_df = pd.read_csv(Path(Metrics_DIR, "tile_url.csv"), header=0)
tile_url_df.shape

(140640, 7)

### GetTileFromMeta 

In [13]:



def url_response(row):
    tile_x, tile_y, tile_url, svid, _, _, _ = row
    try:
        r = req.get(tile_url)
        r.raise_for_status()
        
        svid_path = Path(Tile_DIR, svid)
        if not svid_path.exists():
            svid_path.mkdir()
        tile_fp = Path(svid_path, '{}-{}.jpg'.format(int(tile_x), int(tile_y)))
        with open(tile_fp, 'wb') as fp:
            fp.write(r.content)
    except:
#         error_url_list.append(tile_url) 
        return tile_url
    return 
        
        
# ptqdm = tqdm(total=tile_url_df[:1000].shape[0])
# for _, row in tile_url_df[:1000].iterrows():
#     url_response(row)
#     ptqdm.update(1)
# ptqdm.close()


        
res = []
with parallel_backend('threading', n_jobs=20):
    res = Parallel(verbose=1)(delayed(url_response)(row) for _, row in tile_url_df.iterrows())

[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.3s
[Parallel(n_jobs=20)]: Done 160 tasks      | elapsed:    2.7s
[Parallel(n_jobs=20)]: Done 410 tasks      | elapsed:    7.3s
[Parallel(n_jobs=20)]: Done 760 tasks      | elapsed:   13.7s
[Parallel(n_jobs=20)]: Done 1210 tasks      | elapsed:   22.4s
[Parallel(n_jobs=20)]: Done 1760 tasks      | elapsed:   33.1s
[Parallel(n_jobs=20)]: Done 2410 tasks      | elapsed:   45.8s
[Parallel(n_jobs=20)]: Done 3160 tasks      | elapsed:  1.0min
[Parallel(n_jobs=20)]: Done 4010 tasks      | elapsed:  1.3min
[Parallel(n_jobs=20)]: Done 4960 tasks      | elapsed:  1.6min
[Parallel(n_jobs=20)]: Done 6010 tasks      | elapsed:  2.0min
[Parallel(n_jobs=20)]: Done 7160 tasks      | elapsed:  2.3min
[Parallel(n_jobs=20)]: Done 8410 tasks      | elapsed:  2.7min
[Parallel(n_jobs=20)]: Done 9760 tasks      | elapsed:  3.2min
[Parallel(n_jobs=20)]: Done 11210 tasks 

In [14]:
# 针对error list 重请求 有些是由于网络原因未下载
error_url_list_r = []
error_url_list = []

for _ in res:
    if _ != None:
        error_url_list.extend(_)


for url in tqdm(error_url_list):
    param_dict = urllib.parse.parse_qs(urllib.parse.urlsplit(url).query)
    svid = param_dict.get('svid')
    x = param_dict.get('x')
    y = param_dict.get('y')
    
    try:
        r = req.get(url)
        r.raise_for_status()

        svid_path = Path(Tile_DIR, svid)
        if not svid_path.exists():
            svid_path.mkdir()
        tile_fp = Path(svid_path, '{}-{}.jpg'.format(int(tile_x), int(tile_y)))
        with open(tile_fp, 'wb') as fp:
            fp.write(r.content)
    except:
        error_url_list_r.append(url)
print(len(error_url_list_r))

  0%|          | 0/93670 [00:00<?, ?it/s]

93670


### Tile2Image

In [15]:


def image_compose(images_root_path, image_names, images_size=512, images_row=2, images_column=8,
                  images_save_path='./final.jpg'):
    """
    定义图像拼接函数
    :param images_root_path: # 图片集根地址
    :param image_names: 获取图片集地址下的所有图片名称
    :param images_size: # 每张小图片的大小
    :param images_row: 图片间隔，也就是合并成一张图后，一共有几行
    :param images_column: 图片间隔，也就是合并成一张图后，一共有几列
    :param images_save_path: 图片转换后的地址
    :return:
    """

    # 简单的对于参数的设定和实际图片集的大小进行数量判断
    if len(image_names) != images_row * images_column:
        raise ValueError("合成图片的参数和要求的数量不能匹配！")

    to_image = Image.new('RGB', (images_column * images_size, images_row * images_size))  # 创建一个新图
    # 循环遍历，把每张图片按顺序粘贴到对应位置上
    for y in range(1, images_row + 1):
        for x in range(1, images_column + 1):
            from_image = Image.open(Path(images_root_path, image_names[images_column * (y - 1) + x - 1])).resize(
                (images_size, images_size), Image.ANTIALIAS)
            to_image.paste(from_image, ((x - 1) * images_size, (y - 1) * images_size))
    return to_image.save(images_save_path)  # 保存新图


def worker(dir):
    if dir.is_dir():
        image_root_path = str(dir)
        dir_name = dir.stem

        destination_path = Path(Image_DIR, dir_name)
        if not destination_path.exists():
            destination_path.mkdir(exist_ok=True)
        
        try:
            # # 全景图
            # tile_list = [(x, y) for y in range(1,3) for x in range(0, 8)]
            # image_names = ['{}-{}.jpg'.format(str(tile_x), str(tile_y)) for (tile_x, tile_y) in tile_list]
            # image_compose(images_root_path=image_root_path, image_names=image_names,
            #               images_size=512, images_row=2, images_column=8,
            #               images_save_path=Path(destination_path, '{}-all.jpg'.format(str(dir_name))))

            # 正前方 3 + 4
            tile_list = [(x, y) for y in range(1, 3) for x in range(3, 5)]
            image_names = ['{}-{}.jpg'.format(str(tile_x), str(tile_y)) for (tile_x, tile_y) in tile_list]

            work_tag = True  # 设定 判断标记，如果True，则合成该方向街景
            for image_name in image_names:
                p = Path(image_root_path, image_name)
                if not p.exists():
                    work_tag = False
                    break

            if work_tag:
                image_compose(images_root_path=image_root_path, image_names=image_names,
                              images_size=512, images_row=2, images_column=2,
                              images_save_path=Path(destination_path, '{}-front.jpg'.format(str(dir_name))))  # 调用函数

            # 正后方 0 + 7
            tile_list = [(x, y) for y in range(1, 3) for x in [7, 0]]
            image_names = ['{}-{}.jpg'.format(str(tile_x), str(tile_y)) for (tile_x, tile_y) in tile_list]

            work_tag = True  # 设定 判断标记，如果True，则合成该方向街景
            for image_name in image_names:
                p = Path(image_root_path, image_name)
                if not p.exists():
                    work_tag = False
                    break
            if work_tag:
                image_compose(images_root_path=image_root_path, image_names=image_names,
                              images_size=512, images_row=2, images_column=2,
                              images_save_path=Path(destination_path, '{}-back.jpg'.format(str(dir_name))))

            # 左边 1 + 2
            tile_list = [(x, y) for y in range(1, 3) for x in [1, 2]]
            image_names = ['{}-{}.jpg'.format(str(tile_x), str(tile_y)) for (tile_x, tile_y) in tile_list]
            work_tag = True  # 设定 判断标记，如果True，则合成该方向街景
            for image_name in image_names:
                p = Path(image_root_path, image_name)
                if not p.exists():
                    work_tag = False
                    break
            if work_tag:
                image_compose(images_root_path=image_root_path, image_names=image_names,
                              images_size=512, images_row=2, images_column=2,
                              images_save_path=Path(destination_path, '{}-left.jpg'.format(str(dir_name))))

            # 右边
            tile_list = [(x, y) for y in range(1, 3) for x in [5, 6]]
            image_names = ['{}-{}.jpg'.format(str(tile_x), str(tile_y)) for (tile_x, tile_y) in tile_list]
            work_tag = True  # 设定 判断标记，如果True，则合成该方向街景
            for image_name in image_names:
                p = Path(image_root_path, image_name)
                if not p.exists():
                    work_tag = False
                    break
            if work_tag:
                image_compose(images_root_path=image_root_path, image_names=image_names,
                              images_size=512, images_row=2, images_column=2,
                              images_save_path=Path(destination_path, '{}-right.jpg'.format(str(dir_name))))

        except:
            print(dir_name)
            
            
         
cores_num = multiprocessing.cpu_count()

with parallel_backend('multiprocessing', n_jobs=cores_num):
    res = Parallel(verbose=1)(delayed(worker)(d) for d in Tile_DIR.iterdir())

[Parallel(n_jobs=12)]: Using backend MultiprocessingBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done  26 tasks      | elapsed:    1.9s
[Parallel(n_jobs=12)]: Done 176 tasks      | elapsed:    8.6s
[Parallel(n_jobs=12)]: Done 426 tasks      | elapsed:   19.7s
[Parallel(n_jobs=12)]: Done 776 tasks      | elapsed:   34.9s
[Parallel(n_jobs=12)]: Done 1226 tasks      | elapsed:   55.0s
[Parallel(n_jobs=12)]: Done 1776 tasks      | elapsed:  1.3min
[Parallel(n_jobs=12)]: Done 2426 tasks      | elapsed:  1.8min
[Parallel(n_jobs=12)]: Done 3176 tasks      | elapsed:  2.4min
[Parallel(n_jobs=12)]: Done 4026 tasks      | elapsed:  3.0min
[Parallel(n_jobs=12)]: Done 4976 tasks      | elapsed:  3.8min
[Parallel(n_jobs=12)]: Done 6026 tasks      | elapsed:  4.6min
[Parallel(n_jobs=12)]: Done 7176 tasks      | elapsed:  5.4min
[Parallel(n_jobs=12)]: Done 8244 out of 8244 | elapsed:  6.2min finished


### DownloadedStatistic

In [16]:
# 统计已下载的影像图片数量
image_count_dict = {}
for svid_path in Image_DIR.iterdir():
    svid = svid_path.stem
    count = len([_ for _ in svid_path.iterdir()])
    image_count_dict[svid]= count


In [17]:
from collections import Counter
print(Counter([v for k, v in image_count_dict.items()]))

Counter({4: 7482, 3: 727, 2: 32, 1: 3})


In [18]:
tile_url_df = pd.read_csv(Path(Metrics_DIR, "tile_url.csv"))
tile_url_df.head()

Unnamed: 0,tile_x,tile_y,tile_url,svid,road_name,wgs_lat,wgs_lon
0,0,1,https://sv0.map.qq.com/tile?svid=3701100714032...,37011007140322135810100,１號幹線,22.292711,114.181733
1,1,1,https://sv0.map.qq.com/tile?svid=3701100714032...,37011007140322135810100,１號幹線,22.292711,114.181733
2,2,1,https://sv0.map.qq.com/tile?svid=3701100714032...,37011007140322135810100,１號幹線,22.292711,114.181733
3,3,1,https://sv0.map.qq.com/tile?svid=3701100714032...,37011007140322135810100,１號幹線,22.292711,114.181733
4,4,1,https://sv0.map.qq.com/tile?svid=3701100714032...,37011007140322135810100,１號幹線,22.292711,114.181733


In [19]:
svid_df = tile_url_df[['svid', 'road_name', 'wgs_lat', 'wgs_lon']].drop_duplicates(
    subset=['svid'], keep='first',
    inplace=False)
svid_df['downloaded_image_count'] = svid_df.svid.map(image_count_dict)
svid_df.head()

Unnamed: 0,svid,road_name,wgs_lat,wgs_lon,downloaded_image_count
0,37011007140322135810100,１號幹線,22.292711,114.181733,4.0
16,37011007140322135802300,１號幹線,22.293698,114.181569,4.0
32,37011007140322135754500,１號幹線,22.294684,114.181405,4.0
48,37011007140322135746700,１號幹線,22.295671,114.181241,4.0
64,37011007140322135737400,１號幹線,22.296657,114.181078,3.0


In [20]:
svid_df_path = Path(Metrics_DIR, "svid.csv")
svid_df.query('downloaded_image_count == 4').to_csv(svid_df_path, header=True, index=False)