<a href="https://colab.research.google.com/github/z-guard/analysis/blob/main/notebooks/place_photo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 場所の写真
- [場所を探す](https://developers.google.com/maps/documentation/places/web-service/search-find-place)
- [場所の写真](https://developers.google.com/maps/documentation/places/web-service/photos)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import requests
import json
import pandas as pd
from urllib.parse import urlparse, quote
import io
import numpy as np
import glob

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

In [3]:
DIR_NAME = '/content/drive/MyDrive/z-gard/data'

### 人口マスタ

In [4]:
df_pop_master = pd.read_csv(os.path.join(DIR_NAME, 'population_master.csv'))[['town_id', '市区町村名', '町名', '町丁目']]
print(df_pop_master.shape)

(3145, 4)


In [5]:
df_pop_master.head()

Unnamed: 0,town_id,市区町村名,町名,町丁目
0,101003,千代田区,丸の内,丸の内１丁目
1,101004,千代田区,丸の内,丸の内２丁目
2,101005,千代田区,丸の内,丸の内３丁目
3,101007,千代田区,大手町,大手町１丁目
4,101008,千代田区,大手町,大手町２丁目


In [None]:
# file_list = glob.glob(os.path.join(DIR_NAME, 'photo/*.jpg'))
# print(len(file_list))

In [None]:
# # 既に存在している写真のリスト
# df_exist_file = pd.DataFrame([int(os.path.splitext(os.path.basename(file))[0]) for file in file_list], columns=['file'])
# print(len(df_exist_file))

# df_pop_merge = pd.merge(df_pop_master, df_exist_file, left_on='地域ID', right_on='file', how='left')
# print(len(df_pop_merge))

# # 写真がない地域IDを抽出
# df_not_exist = df_pop_merge[df_pop_merge['file'].isna()].reset_index(drop=True)
# print(len(df_not_exist))

In [None]:
# df_not_exist.head()

In [87]:
YOUR_API_KEY = ''

In [88]:
# %%time
# tmp_place_name = '港区元赤坂１丁目にある名所'
# place = quote(tmp_place_name)
# url = f"https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input={place}&inputtype=textquery&fields=name%2Cphoto&language=ja&key={YOUR_API_KEY}"
# response = requests.get(url)
# if response.status_code == requests.codes.ok:
#     tmp_photo_reference = response.json()['candidates'][0]['photos'][0]['photo_reference']
#     print(tmp_photo_reference)

# response.json()

In [64]:
def get_place_info(place_name, unique_no):
    place = quote(place_name + 'にある名所')
    url = f"https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input={place}&inputtype=textquery&fields=name%2Cphoto&language=ja&key={YOUR_API_KEY}"
    response = requests.get(url)
    if response.status_code == requests.codes.ok:
        try:
            _df = pd.DataFrame(response.json()['candidates'])
            _df['no'] = unique_no
            _df['place_name'] = place_name
            return _df
        except Exception as e:
            print(unique_no, place_name, e)
            pass
    else:
        print(unique_no, place_name, response.status_code)

    return pd.DataFrame([{'no': unique_no, 'place_name': place_name}])

In [65]:
df_place = pd.DataFrame()
for index, row in df_pop_master.iterrows():
    _df = get_place_info(row['市区町村名']+row['町丁目'], row['town_id'])
    df_place = pd.concat([df_place, _df])
df_place = df_place.reset_index(drop=True)
print(df_place.shape)

(3193, 4)


In [66]:
df_place.head()

Unnamed: 0,name,photos,no,place_name
0,東京駅丸の内駅前広場,"[{'height': 1080, 'html_attributions': ['<a hr...",101003,千代田区丸の内１丁目
1,丸の内ラグビー神社,"[{'height': 3000, 'html_attributions': ['<a hr...",101004,千代田区丸の内２丁目
2,浅草寺・雷門モニュメント,,101005,千代田区丸の内３丁目
3,大手濠,"[{'height': 1944, 'html_attributions': ['<a hr...",101007,千代田区大手町１丁目
4,巨大赤べこ,"[{'height': 3000, 'html_attributions': ['<a hr...",101008,千代田区大手町２丁目


### 重複している行

In [67]:
duplicate_no_list = list(df_place[df_place.duplicated(subset=['no'])]['no'].unique())
len(duplicate_no_list)

20

In [69]:
df_dup = pd.DataFrame()
for no in duplicate_no_list:
    _tmp = df_place[df_place['no'] == no]
    _tmp_2 = _tmp[(_tmp['name'].notna())&(_tmp['photos'].notna())]
    if len(_tmp_2) > 0:
        df_dup = df_dup.append(_tmp_2.iloc[0], ignore_index=True)
    else:
        print(f'no data: {no}')
        df_dup = df_dup.append(_tmp.iloc[0], ignore_index=True)

print(df_dup.shape)

no data: 103092
(20, 4)


  df_dup = df_dup.append(_tmp_2.iloc[0], ignore_index=True)
  df_dup = df_dup.append(_tmp.iloc[0], ignore_index=True)


In [70]:
df_place_all = pd.concat([
    df_place[~df_place['no'].isin(duplicate_no_list)],
    df_dup
]).sort_values('no').reset_index(drop=True)
print(df_place_all.shape)

(3145, 4)


In [71]:
df_place_all['no'] = df_place_all['no'].astype(int)

In [72]:
# def get_geometry_lat(x):
#     try:
#         return x['location']['lat']
#     except:
#         return np.NaN

# def get_geometry_lng(x):
#     try:
#         return x['location']['lng']
#     except:
#         return np.NaN

def get_photos_height(x):
    try:
        return x[0]['height']
    except:
        return np.NaN

def get_photos_width(x):
    try:
        return x[0]['width']
    except:
        return np.NaN

def get_photos_ref(x):
    try:
        return x[0]['photo_reference']
    except:
        return np.NaN

In [73]:
# df_place_all['lat'] = df_place_all['geometry'].apply(get_geometry_lat)
# df_place_all['lng'] = df_place_all['geometry'].apply(get_geometry_lng)
df_place_all['height'] = df_place_all['photos'].apply(get_photos_height)
df_place_all['width'] = df_place_all['photos'].apply(get_photos_width)
df_place_all['photo_reference'] = df_place_all['photos'].apply(get_photos_ref)

In [74]:
df_place_all = df_place_all.drop(columns=['photos'])

In [75]:
df_place_all.head()

Unnamed: 0,name,no,place_name,height,width,photo_reference
0,東京駅丸の内駅前広場,101003,千代田区丸の内１丁目,1080.0,1080.0,AWU5eFgPasDCEmZ_Jfx7lw_DOiYJEwQO5C2sf_oYlTjQ-Z...
1,丸の内ラグビー神社,101004,千代田区丸の内２丁目,3000.0,4000.0,AWU5eFhq6XsspnuhJLOH9kJZmlZjy_nSzBQG9HT4YznIFg...
2,浅草寺・雷門モニュメント,101005,千代田区丸の内３丁目,,,
3,大手濠,101007,千代田区大手町１丁目,1944.0,2592.0,AWU5eFgr5mkU2W2MvOvRP453JPg5uVHpnua87f89NLvfvz...
4,巨大赤べこ,101008,千代田区大手町２丁目,3000.0,4000.0,AWU5eFhx7PSuwEfB9W40MGO7TESt5VKVS0RRpuvBoUrgOB...


In [76]:
df_place_all.to_csv(os.path.join(DIR_NAME, 'place_photo_20231122_2.csv'), index=False)

In [12]:
def save_photo_image(photo_ref, width, id):
    url = f"https://maps.googleapis.com/maps/api/place/photo?maxwidth={width}&photo_reference={photo_ref}&key={YOUR_API_KEY}"
    response = requests.get(url)
    if response.status_code == requests.codes.ok:
        image = response.content
        filename = os.path.join(DIR_NAME, f'photo/{id}.jpg')
        with open(filename, "wb") as f:
            f.write(image)

In [77]:
# save_photo_image(tmp_photo_reference, 1000, tmp_place_name)

In [78]:
# save_photo_count = 0
# no_photo_count = 0
# for index, row in df_place_all.iterrows():
#     photo_ref = row['photo_reference']
#     if not np.isnan(photo_ref):
#         width = int(row['width']) if row['width'] < 1000 else 1000
#         save_photo_image(photo_ref, width, row['no'])
#         save_photo_count += 1
#     else:
#         no_photo_count += 1

# print(f'save photo: {save_photo_count}, no photo: {no_photo_count}')

In [79]:
# photo_list = list(glob.glob(os.path.join(DIR_NAME, 'photo/*.jpg')))
# print(len(photo_list))

In [80]:
get_photo = df_place_all[df_place_all['photo_reference'].notna()]
print(len(get_photo))

3058


In [81]:
# 写真を取得できなかった場所のリスト
df_place_all[df_place_all['photo_reference'].isna()]

Unnamed: 0,name,no,place_name,height,width,photo_reference
2,浅草寺・雷門モニュメント,101005,千代田区丸の内３丁目,,,
92,神田富山町,101122,千代田区神田富山町,,,
112,神田佐久間町河岸,101145,千代田区神田佐久間河岸,,,
119,温泉の活用、温泉関連商品の研究開発,102008,中央区銀座１丁目,,,
178,日本橋馬喰町,102081,中央区東日本橋１丁目,,,
180,日本橋馬喰町,102083,中央区東日本橋３丁目,,,
235,太閤千代しだれ,103029,港区三田３丁目,,,
238,増上寺碑,103033,港区浜松町１丁目,,,
239,増上寺碑,103034,港区浜松町２丁目,,,
250,晚上の東京鐵塔,103048,港区虎ノ門５丁目,,,


### GCSにアップロード

In [None]:
# from google.cloud import storage, exceptions
# from google.oauth2 import service_account

In [None]:
# key_path = '/content/drive/MyDrive/key/z-gard-aff003f55fbb.json'
# credential = service_account.Credentials.from_service_account_file(key_path)

# project_id = 'z-gard'
# bucket_name = 'z-gard_town_images'
# client = storage.Client(project_id, credentials=credential)
# bucket = client.get_bucket(bucket_name)

In [None]:
# get_file_list = [f'/content/drive/MyDrive/z-gard/data/photo/{no}.jpg' for no in list(get_photo['no'])]

In [85]:
# for file_path in get_file_list:
#     file_name = os.path.basename(file_path)
#     blob = bucket.blob(file_name)
#     blob.upload_from_filename(file_path)
#     print(file_name)

In [86]:
# len([b for b in bucket.list_blobs()])