<a href="https://colab.research.google.com/github/z-gard/analysis/blob/main/notebooks/place_photo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 場所の写真
- [場所を探す](https://developers.google.com/maps/documentation/places/web-service/search-find-place)
- [場所の写真](https://developers.google.com/maps/documentation/places/web-service/photos)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import requests
import json
import pandas as pd
from urllib.parse import urlparse, quote
import io
import numpy as np
import glob

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

In [3]:
DIR_NAME = '/content/drive/MyDrive/z-gard/data'

### 人口マスタ

In [4]:
df_pop_master = pd.read_csv(os.path.join(DIR_NAME, 'population_master.csv'))[['地域ID', '市区町村名', '町名', '町丁目', '緯度', '経度']]
print(df_pop_master.shape)

(3145, 6)


In [5]:
df_pop_master.head()

Unnamed: 0,地域ID,市区町村名,町名,町丁目,緯度,経度
0,101003,千代田区,丸の内,丸の内１丁目,35.68161,139.767511
1,101004,千代田区,丸の内,丸の内２丁目,35.680071,139.763944
2,101005,千代田区,丸の内,丸の内３丁目,35.67689,139.764039
3,101007,千代田区,大手町,大手町１丁目,35.688067,139.764353
4,101008,千代田区,大手町,大手町２丁目,35.686416,139.76853


In [8]:
file_list = glob.glob(os.path.join(DIR_NAME, 'photo/*.jpg'))
print(len(file_list))

2744


In [21]:
# 既に存在している写真のリスト
df_exist_file = pd.DataFrame([int(os.path.splitext(os.path.basename(file))[0]) for file in file_list], columns=['file'])
print(len(df_exist_file))

df_pop_merge = pd.merge(df_pop_master, df_exist_file, left_on='地域ID', right_on='file', how='left')
print(len(df_pop_merge))

# 写真がない地域IDを抽出
df_not_exist = df_pop_merge[df_pop_merge['file'].isna()].reset_index(drop=True)
print(len(df_not_exist))

2744
3145
401


In [22]:
df_not_exist.head()

Unnamed: 0,地域ID,市区町村名,町名,町丁目,緯度,経度,file
0,101014,千代田区,有楽町,有楽町２丁目,35.673923,139.763245,
1,101022,千代田区,隼町,隼町,35.681638,139.743347,
2,101025,千代田区,平河町,平河町２丁目,35.68027,139.740528,
3,101033,千代田区,紀尾井町,紀尾井町,35.680809,139.73568,
4,101034,千代田区,一番町,一番町,35.687525,139.741582,


In [70]:
YOUR_API_KEY = 'xxxxxx'

In [24]:
def get_place_info(place_name, unique_no):
    place = quote(place_name)
    url = f"https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input={place}&inputtype=textquery&fields=name%2Cphoto%2Cgeometry&key={YOUR_API_KEY}"
    response = requests.get(url)
    if response.status_code == requests.codes.ok:
        try:
            _df = pd.DataFrame(response.json()['candidates'])
            _df['no'] = unique_no
            _df['place_name'] = place_name
            return _df
        except Exception as e:
            print(unique_no, place_name, e)
            pass
    else:
        print(unique_no, place_name, response.status_code)
    
    return pd.DataFrame([{'no': unique_no, 'place_name': place_name}])

In [25]:
df_place = pd.DataFrame()
for index, row in df_not_exist.iterrows():
    _df = get_place_info(row['市区町村名']+row['町丁目'], row['地域ID'])
    df_place = pd.concat([df_place, _df])
df_place = df_place.reset_index(drop=True)
print(df_place.shape)

(402, 5)


In [26]:
df_place.head()

Unnamed: 0,geometry,name,photos,no,place_name
0,"{'location': {'lat': 35.6739601, 'lng': 139.76...",2 Chome Yurakucho,"[{'height': 3000, 'html_attributions': ['<a hr...",101014,千代田区有楽町２丁目
1,"{'location': {'lat': 35.6818309, 'lng': 139.74...",Hayabusacho,"[{'height': 1824, 'html_attributions': ['<a hr...",101022,千代田区隼町
2,"{'location': {'lat': 35.6808059, 'lng': 139.74...",2 Chome Hirakawachō,"[{'height': 2976, 'html_attributions': ['<a hr...",101025,千代田区平河町２丁目
3,"{'location': {'lat': 35.6810509, 'lng': 139.73...",Kioicho,"[{'height': 3492, 'html_attributions': ['<a hr...",101033,千代田区紀尾井町
4,"{'location': {'lat': 35.6870561, 'lng': 139.74...",Ichibancho,"[{'height': 3024, 'html_attributions': ['<a hr...",101034,千代田区一番町


### 重複している行

In [27]:
duplicate_no_list = list(df_place[df_place.duplicated(subset=['no'])]['no'].unique())
len(duplicate_no_list)

1

In [29]:
df_dup = pd.DataFrame()
for no in duplicate_no_list:
    _tmp = df_place[df_place['no'] == no]
    _tmp_2 = _tmp[(_tmp['geometry'].notna())&(_tmp['photos'].notna())]
    if len(_tmp_2) > 0:
        df_dup = df_dup.append(_tmp_2.iloc[0], ignore_index=True)
    else:
        print(f'no data: {no}')
        df_dup = df_dup.append(_tmp.iloc[0], ignore_index=True)

print(df_dup.shape)

(1, 5)


In [32]:
df_place_all = pd.concat([
    df_place[~df_place['no'].isin(duplicate_no_list)],
    df_dup
]).sort_values('no').reset_index(drop=True)
print(df_place_all.shape)

(401, 5)


In [33]:
df_place_all['no'] = df_place_all['no'].astype(int)

In [35]:
def get_photos_height(x):
    try:
        return x[0]['height']
    except:
        return np.NaN

def get_photos_width(x):
    try:
        return x[0]['width']
    except:
        return np.NaN

def get_photos_ref(x):
    try:
        return x[0]['photo_reference']
    except:
        return np.NaN

In [36]:
df_place_all['lat'] = df_place_all['geometry'].apply(lambda x: x['location']['lat'])
df_place_all['lng'] = df_place_all['geometry'].apply(lambda x: x['location']['lng'])
df_place_all['height'] = df_place_all['photos'].apply(get_photos_height)
df_place_all['width'] = df_place_all['photos'].apply(get_photos_width)
df_place_all['photo_reference'] = df_place_all['photos'].apply(get_photos_ref)

In [37]:
df_place_all = df_place_all.drop(columns=['geometry', 'photos', 'name'])

In [38]:
df_place_all.head()

Unnamed: 0,no,place_name,lat,lng,height,width,photo_reference
0,101014,千代田区有楽町２丁目,35.67396,139.763291,3000.0,4000.0,AcYSjRi57AVocX9SyWu9a6V4jwWRTvH5EQg0ulonflacf1...
1,101022,千代田区隼町,35.681831,139.742849,1824.0,1824.0,AcYSjRhMMopj3osIRqQlC-oDopT_f6sMQuHtADxlRh--MR...
2,101025,千代田区平河町２丁目,35.680806,139.740295,2976.0,3968.0,AcYSjRipw-A886FMjXkazoCM27MRlUsBniY2jP3bO_CggU...
3,101033,千代田区紀尾井町,35.681051,139.735186,3492.0,4656.0,AcYSjRgplY5VcuIVkpP5TBCl3kZHn0jmBvFQo3MECu9nA-...
4,101034,千代田区一番町,35.687056,139.741572,3024.0,4032.0,AcYSjRjzTJPDUMuOUcPYbhMjRJ1Sygcc4GGJ6xdFP43wvf...


In [39]:
df_place_all.to_csv(os.path.join(DIR_NAME, 'place_photo_2.csv'), index=False)

In [41]:
def save_photo_image(photo_ref, width, id):
    url = f"https://maps.googleapis.com/maps/api/place/photo?maxwidth={width}&photo_reference={photo_ref}&key={YOUR_API_KEY}"
    response = requests.get(url)
    if response.status_code == requests.codes.ok:
        image = response.content
        filename = os.path.join(DIR_NAME, f'photo/{id}.jpg')
        with open(filename, "wb") as f:
            f.write(image)

In [43]:
save_photo_count = 0
no_photo_count = 0
for index, row in df_place_all.iterrows():
    photo_ref = row['photo_reference']
    if not np.isnan(photo_ref):
        width = int(row['width']) if row['width'] < 1000 else 1000
        save_photo_image(photo_ref, width, row['no'])
        save_photo_count += 1
    else:
        no_photo_count += 1

print(f'save photo: {save_photo_count}, no photo: {no_photo_count}')

save photo: 401, no photo: 0


In [44]:
photo_list = list(glob.glob(os.path.join(DIR_NAME, 'photo/*.jpg')))
print(len(photo_list))

3087


In [52]:
get_photo = df_place_all[df_place_all['photo_reference'].notna()]
print(len(get_photo))

343


In [66]:
# 写真を取得できなかった場所のリスト
df_place_all[df_place_all['photo_reference'].isna()]

Unnamed: 0,no,place_name,lat,lng,height,width,photo_reference
14,101045,千代田区九段南１丁目,35.694267,139.752151,,,
15,101050,千代田区九段北１丁目,35.69663,139.751001,,,
34,102017,中央区新富１丁目,35.673105,139.774187,,,
35,102020,中央区入船１丁目,35.672987,139.776172,,,
39,102044,中央区新川２丁目,35.675004,139.781596,,,
56,102119,中央区晴海２丁目,35.656149,139.783068,,,
57,102123,中央区水面,35.669816,139.744166,,,
60,103028,港区三田２丁目,35.647959,139.744571,,,
63,103055,港区南麻布１丁目,35.650184,139.736773,,,
64,103063,港区元麻布３丁目,35.656481,139.729638,,,


### GCSにアップロード

In [50]:
from google.cloud import storage, exceptions
from google.oauth2 import service_account

In [67]:
key_path = '/content/drive/MyDrive/key/z-gard-aff003f55fbb.json'
credential = service_account.Credentials.from_service_account_file(key_path)

project_id = 'z-gard'
bucket_name = 'z-gard_town_images'
client = storage.Client(project_id, credentials=credential)
bucket = client.get_bucket(bucket_name)

In [68]:
get_file_list = [f'/content/drive/MyDrive/z-gard/data/photo/{no}.jpg' for no in list(get_photo['no'])]

In [69]:
for file_path in get_file_list:
    file_name = os.path.basename(file_path)
    blob = bucket.blob(file_name)
    blob.upload_from_filename(file_path)
    print(file_name)

101014.jpg
101022.jpg
101025.jpg
101033.jpg
101034.jpg
101035.jpg
101036.jpg
101037.jpg
101038.jpg
101039.jpg
101040.jpg
101041.jpg
101042.jpg
101043.jpg
101093.jpg
101120.jpg
101121.jpg
101122.jpg
101123.jpg
101128.jpg
101129.jpg
101130.jpg
101131.jpg
101136.jpg
101142.jpg
101143.jpg
101144.jpg
101145.jpg
101146.jpg
101147.jpg
102002.jpg
102013.jpg
102022.jpg
102027.jpg
102036.jpg
102056.jpg
102057.jpg
102060.jpg
102061.jpg
102062.jpg
102066.jpg
102071.jpg
102075.jpg
102079.jpg
102084.jpg
102089.jpg
102090.jpg
102099.jpg
102105.jpg
102116.jpg
102118.jpg
103003.jpg
103023.jpg
103052.jpg
103053.jpg
103089.jpg
104007.jpg
104008.jpg
104009.jpg
104014.jpg
104015.jpg
104016.jpg
104017.jpg
104018.jpg
104019.jpg
104020.jpg
104021.jpg
104022.jpg
104023.jpg
104024.jpg
104027.jpg
104040.jpg
104045.jpg
104049.jpg
104050.jpg
104051.jpg
104052.jpg
104053.jpg
104054.jpg
104062.jpg
104063.jpg
104064.jpg
104065.jpg
104066.jpg
104067.jpg
104068.jpg
104069.jpg
104070.jpg
104071.jpg
104072.jpg
104073.jpg

In [71]:
len([b for b in bucket.list_blobs()])

3087