<a href="https://colab.research.google.com/github/z-gard/analysis/blob/main/notebooks/place_photo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 場所の写真
- [場所を探す](https://developers.google.com/maps/documentation/places/web-service/search-find-place)
- [場所の写真](https://developers.google.com/maps/documentation/places/web-service/photos)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import requests
import json
import pandas as pd
from urllib.parse import urlparse, quote
import io
import numpy as np

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

In [3]:
DIR_NAME = '/content/drive/MyDrive/z-gard/data'

### 人口マスタ

In [6]:
df_pop_master = pd.read_csv(os.path.join(DIR_NAME, 'population_master.csv'))[['地域ID', '市区町村名', '町名', '町丁目', '緯度', '経度']]
print(df_pop_master.shape)

(2921, 6)


In [96]:
df_pop_master.head()

Unnamed: 0,地域ID,市区町村名,町名,町丁目,緯度,経度
0,101003,千代田区,丸の内,丸の内１丁目,35.68161,139.767511
1,101004,千代田区,丸の内,丸の内２丁目,35.680071,139.763944
2,101005,千代田区,丸の内,丸の内３丁目,35.67689,139.764039
3,101007,千代田区,大手町,大手町１丁目,35.688067,139.764353
4,101008,千代田区,大手町,大手町２丁目,35.686416,139.76853


In [5]:
YOUR_API_KEY = 'xxxxxx'

In [85]:
def get_place_info(place_name, unique_no):
    place = quote(place_name)
    url = f"https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input={place}&inputtype=textquery&fields=name%2Cphoto%2Cgeometry&key={YOUR_API_KEY}"
    response = requests.get(url)
    if response.status_code == requests.codes.ok:
        try:
            _df = pd.DataFrame(response.json()['candidates'])
            _df['no'] = unique_no
            _df['place_name'] = place_name
            return _df
        except Exception as e:
            print(unique_no, place_name, e)
            pass
    else:
        print(unique_no, place_name, response.status_code)
    
    return pd.DataFrame([{'no': unique_no, 'place_name': place_name}])

In [92]:
df_place = pd.DataFrame()
for index, row in df_pop_master.iterrows():
    _df = get_place_info(row['市区町村名']+row['町丁目'], row['地域ID'])
    df_place = pd.concat([df_place, _df])
df_place = df_place.reset_index(drop=True)
print(df_place.shape)

(2970, 5)


In [118]:
df_place.head()

Unnamed: 0,geometry,name,photos,no,place_name
0,"{'location': {'lat': 35.6818718, 'lng': 139.76...",1 Chome Marunouchi,"[{'height': 4147, 'html_attributions': ['<a hr...",101003,千代田区丸の内１丁目
1,"{'location': {'lat': 35.6796995, 'lng': 139.76...",2 Chome Marunouchi,"[{'height': 3000, 'html_attributions': ['<a hr...",101004,千代田区丸の内２丁目
2,"{'location': {'lat': 35.6767681, 'lng': 139.76...",3 Chome Marunouchi,"[{'height': 4032, 'html_attributions': ['<a hr...",101005,千代田区丸の内３丁目
3,"{'location': {'lat': 35.6877339, 'lng': 139.76...",1 Chome Ōtemachi,"[{'height': 786, 'html_attributions': ['<a hre...",101007,千代田区大手町１丁目
4,"{'location': {'lat': 35.6863398, 'lng': 139.76...",2 Chome Ōtemachi,"[{'height': 2625, 'html_attributions': ['<a hr...",101008,千代田区大手町２丁目


### 重複している行

In [105]:
duplicate_no_list = list(df_place[df_place.duplicated(subset=['no'])]['no'].unique())
len(duplicate_no_list)

130

In [113]:
df_dup = pd.DataFrame()
for no in duplicate_no_list:
    _tmp = df_place[df_place['no'] == no]
    _tmp_2 = _tmp[(_tmp['geometry'].notna())&(_tmp['photos'].notna())]
    if len(_tmp_2) > 0:
        df_dup = df_dup.append(_tmp_2.iloc[0], ignore_index=True)
    else:
        print(f'no data: {no}')
        df_dup = df_dup.append(_tmp.iloc[0], ignore_index=True)

print(df_dup.shape)

no data: 111169
no data: 115022
no data: 119050
(130, 5)


In [122]:
df_place_all = pd.concat([
    df_place[~df_place['no'].isin(duplicate_no_list)],
    df_dup
]).sort_values('no').reset_index(drop=True)
print(df_place_all.shape)

(2809, 5)


In [125]:
df_place_all['no'] = df_place_all['no'].astype(int)

In [142]:
def get_photos_height(x):
    try:
        return x[0]['height']
    except:
        return np.NaN

def get_photos_width(x):
    try:
        return x[0]['width']
    except:
        return np.NaN

def get_photos_ref(x):
    try:
        return x[0]['photo_reference']
    except:
        return np.NaN

In [127]:
df_place_all['lat'] = df_place_all['geometry'].apply(lambda x: x['location']['lat'])
df_place_all['lng'] = df_place_all['geometry'].apply(lambda x: x['location']['lng'])
df_place_all['height'] = df_place_all['photos'].apply(get_photos_height)
df_place_all['width'] = df_place_all['photos'].apply(get_photos_width)
df_place_all['photo_reference'] = df_place_all['photos'].apply(get_photos_ref)

In [145]:
df_place_all = df_place_all.drop(columns=['geometry', 'photos', 'name'])

In [147]:
df_place_all.head()

Unnamed: 0,no,place_name,lat,lng,height,width,photo_reference
0,101003,千代田区丸の内１丁目,35.681872,139.765847,4147.0,5184.0,AcYSjRgYnEcFz-b85pT5mCBWHGmFPbxoEQJFsyJtYwvOTQ...
1,101004,千代田区丸の内２丁目,35.679699,139.763291,3000.0,5333.0,AcYSjRgdmiI4JCe0VreKniz4EE58uLjCVBiVhWCxmRY3DI...
2,101005,千代田区丸の内３丁目,35.676768,139.764569,4032.0,2268.0,AcYSjRiLx33aKxB8F3_NTqpImlbrNicnXWobQgqjt1Vd8O...
3,101007,千代田区大手町１丁目,35.687734,139.763291,786.0,664.0,AcYSjRiPDsSddQWQzGv5z9gQxNBCyHNYp_sqtKxeKwSTb8...
4,101008,千代田区大手町２丁目,35.68634,139.768403,2625.0,1836.0,AcYSjRiUOku7EXEuStodTjEO9ce8HkIH4WxDD_j_c6qnm2...


In [148]:
df_place_all.to_csv(os.path.join(DIR_NAME, 'place_photo.csv'), index=False)

In [32]:
width = df_place.iloc[0]['width']
photo_ref = df_place.iloc[0]['photo_reference']
id = df_place.iloc[0]['no']

In [152]:
def save_photo_image(photo_ref, width, id):
    url = f"https://maps.googleapis.com/maps/api/place/photo?maxwidth={width}&photo_reference={photo_ref}&key={YOUR_API_KEY}"
    response = requests.get(url)
    if response.status_code == requests.codes.ok:
        image = response.content
        filename = os.path.join(DIR_NAME, f'photo/{id}.jpg')
        with open(filename, "wb") as f:
            f.write(image)

In [154]:
save_photo_count = 0
no_photo_count = 0
for index, row in df_place_all.iterrows():
    photo_ref = row['photo_reference']
    if photo_ref is not None:
        width = int(row['width']) if row['width'] < 1000 else 1000
        save_photo_image(photo_ref, width, row['no'])
        save_photo_count += 1
    else:
        no_photo_count += 1

print(f'save photo: {save_photo_count}, no photo: {no_photo_count}')

save photo: 2809, no photo: 0
