<a href="https://colab.research.google.com/github/z-guard/analysis/blob/main/notebooks/place_photo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 場所の写真
- [場所を探す](https://developers.google.com/maps/documentation/places/web-service/search-find-place)
- [場所の写真](https://developers.google.com/maps/documentation/places/web-service/photos)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import requests
import json
import pandas as pd
from urllib.parse import urlparse, quote
import io
import numpy as np
import glob

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

In [3]:
DIR_NAME = '/content/drive/MyDrive/z-gard/data'

### 人口マスタ

In [8]:
df_pop_master = pd.read_csv(os.path.join(DIR_NAME, 'population_master.csv'))[['town_id', '市区町村名', '町名', '町丁目']]
print(df_pop_master.shape)

(3145, 4)


In [9]:
df_pop_master.head()

Unnamed: 0,town_id,市区町村名,町名,町丁目
0,101003,千代田区,丸の内,丸の内１丁目
1,101004,千代田区,丸の内,丸の内２丁目
2,101005,千代田区,丸の内,丸の内３丁目
3,101007,千代田区,大手町,大手町１丁目
4,101008,千代田区,大手町,大手町２丁目


In [13]:
# file_list = glob.glob(os.path.join(DIR_NAME, 'photo/*.jpg'))
# print(len(file_list))

In [12]:
# # 既に存在している写真のリスト
# df_exist_file = pd.DataFrame([int(os.path.splitext(os.path.basename(file))[0]) for file in file_list], columns=['file'])
# print(len(df_exist_file))

# df_pop_merge = pd.merge(df_pop_master, df_exist_file, left_on='地域ID', right_on='file', how='left')
# print(len(df_pop_merge))

# # 写真がない地域IDを抽出
# df_not_exist = df_pop_merge[df_pop_merge['file'].isna()].reset_index(drop=True)
# print(len(df_not_exist))

In [11]:
# df_not_exist.head()

In [14]:
YOUR_API_KEY = 'AIzaSyDbyTP873QCjlAnLtsz9vw42_Fc9iNaz9Q'

In [15]:
def get_place_info(place_name, unique_no):
    place = quote(place_name)
    url = f"https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input={place}&inputtype=textquery&fields=name%2Cphoto%2Cgeometry&key={YOUR_API_KEY}"
    response = requests.get(url)
    if response.status_code == requests.codes.ok:
        try:
            _df = pd.DataFrame(response.json()['candidates'])
            _df['no'] = unique_no
            _df['place_name'] = place_name
            return _df
        except Exception as e:
            print(unique_no, place_name, e)
            pass
    else:
        print(unique_no, place_name, response.status_code)

    return pd.DataFrame([{'no': unique_no, 'place_name': place_name}])

In [19]:
df_place = pd.DataFrame()
for index, row in df_pop_master.iterrows():
    _df = get_place_info(row['市区町村名']+row['町丁目'], row['town_id'])
    df_place = pd.concat([df_place, _df])
df_place = df_place.reset_index(drop=True)
print(df_place.shape)

121331 足立区六町３丁目 500
(3153, 5)


In [20]:
df_place.head()

Unnamed: 0,geometry,name,photos,no,place_name
0,"{'location': {'lat': 35.6818718, 'lng': 139.76...",1 Chome Marunouchi,"[{'height': 4147, 'html_attributions': ['<a hr...",101003,千代田区丸の内１丁目
1,"{'location': {'lat': 35.6796995, 'lng': 139.76...",2 Chome Marunouchi,"[{'height': 3000, 'html_attributions': ['<a hr...",101004,千代田区丸の内２丁目
2,"{'location': {'lat': 35.6767681, 'lng': 139.76...",3 Chome Marunouchi,"[{'height': 3264, 'html_attributions': ['<a hr...",101005,千代田区丸の内３丁目
3,"{'location': {'lat': 35.6877339, 'lng': 139.76...",1 Chome Ōtemachi,"[{'height': 786, 'html_attributions': ['<a hre...",101007,千代田区大手町１丁目
4,"{'location': {'lat': 35.6863398, 'lng': 139.76...",2 Chome Ōtemachi,,101008,千代田区大手町２丁目


### 重複している行

In [21]:
duplicate_no_list = list(df_place[df_place.duplicated(subset=['no'])]['no'].unique())
len(duplicate_no_list)

9

In [22]:
df_dup = pd.DataFrame()
for no in duplicate_no_list:
    _tmp = df_place[df_place['no'] == no]
    _tmp_2 = _tmp[(_tmp['geometry'].notna())&(_tmp['photos'].notna())]
    if len(_tmp_2) > 0:
        df_dup = df_dup.append(_tmp_2.iloc[0], ignore_index=True)
    else:
        print(f'no data: {no}')
        df_dup = df_dup.append(_tmp.iloc[0], ignore_index=True)

print(df_dup.shape)

no data: 101066
no data: 101099
no data: 101101
(9, 5)


  df_dup = df_dup.append(_tmp_2.iloc[0], ignore_index=True)
  df_dup = df_dup.append(_tmp.iloc[0], ignore_index=True)


In [23]:
df_place_all = pd.concat([
    df_place[~df_place['no'].isin(duplicate_no_list)],
    df_dup
]).sort_values('no').reset_index(drop=True)
print(df_place_all.shape)

(3144, 5)


In [24]:
df_place_all['no'] = df_place_all['no'].astype(int)

In [29]:
def get_geometry_lat(x):
    try:
        return x['location']['lat']
    except:
        return np.NaN

def get_geometry_lng(x):
    try:
        return x['location']['lng']
    except:
        return np.NaN

def get_photos_height(x):
    try:
        return x[0]['height']
    except:
        return np.NaN

def get_photos_width(x):
    try:
        return x[0]['width']
    except:
        return np.NaN

def get_photos_ref(x):
    try:
        return x[0]['photo_reference']
    except:
        return np.NaN

In [30]:
df_place_all['lat'] = df_place_all['geometry'].apply(get_geometry_lat)
df_place_all['lng'] = df_place_all['geometry'].apply(get_geometry_lng)
df_place_all['height'] = df_place_all['photos'].apply(get_photos_height)
df_place_all['width'] = df_place_all['photos'].apply(get_photos_width)
df_place_all['photo_reference'] = df_place_all['photos'].apply(get_photos_ref)

In [31]:
df_place_all = df_place_all.drop(columns=['geometry', 'photos', 'name'])

In [32]:
df_place_all.head()

Unnamed: 0,no,place_name,lat,lng,height,width,photo_reference
0,101003,千代田区丸の内１丁目,35.681872,139.765847,4147.0,5184.0,AWU5eFgpegrHL-hi5DhA9ZfsKZlNilcGtMKbk6coU9ir6z...
1,101004,千代田区丸の内２丁目,35.679699,139.763291,3000.0,5333.0,AWU5eFg91w8fI7kBiqMdn08yhPtVezrdniuHL76dW1IR0T...
2,101005,千代田区丸の内３丁目,35.676768,139.764569,3264.0,2448.0,AWU5eFh890yCHQBOZdYYG9DRVg-G3Yf-k1rNYi9NrEqXx8...
3,101007,千代田区大手町１丁目,35.687734,139.763291,786.0,664.0,AWU5eFiec6gbMlqHVa_IITkrSpe327YqZfSZgcxpeg1aa-...
4,101008,千代田区大手町２丁目,35.68634,139.768403,,,


In [33]:
df_place_all.to_csv(os.path.join(DIR_NAME, 'place_photo_20231122.csv'), index=False)

In [None]:
def save_photo_image(photo_ref, width, id):
    url = f"https://maps.googleapis.com/maps/api/place/photo?maxwidth={width}&photo_reference={photo_ref}&key={YOUR_API_KEY}"
    response = requests.get(url)
    if response.status_code == requests.codes.ok:
        image = response.content
        filename = os.path.join(DIR_NAME, f'photo/{id}.jpg')
        with open(filename, "wb") as f:
            f.write(image)

In [None]:
# save_photo_count = 0
# no_photo_count = 0
# for index, row in df_place_all.iterrows():
#     photo_ref = row['photo_reference']
#     if not np.isnan(photo_ref):
#         width = int(row['width']) if row['width'] < 1000 else 1000
#         save_photo_image(photo_ref, width, row['no'])
#         save_photo_count += 1
#     else:
#         no_photo_count += 1

# print(f'save photo: {save_photo_count}, no photo: {no_photo_count}')

save photo: 401, no photo: 0


In [None]:
# photo_list = list(glob.glob(os.path.join(DIR_NAME, 'photo/*.jpg')))
# print(len(photo_list))

3087


In [34]:
get_photo = df_place_all[df_place_all['photo_reference'].notna()]
print(len(get_photo))

728


In [35]:
# 写真を取得できなかった場所のリスト
df_place_all[df_place_all['photo_reference'].isna()]

Unnamed: 0,no,place_name,lat,lng,height,width,photo_reference
4,101008,千代田区大手町２丁目,35.686340,139.768403,,,
6,101011,千代田区内幸町２丁目,35.670851,139.753389,,,
9,101016,千代田区霞が関１丁目,35.674453,139.753069,,,
11,101018,千代田区霞が関３丁目,35.671891,139.746682,,,
13,101021,千代田区永田町２丁目,35.675580,139.741572,,,
...,...,...,...,...,...,...,...
3139,123235,江戸川区臨海町２丁目,35.650634,139.858566,,,
3140,123236,江戸川区臨海町３丁目,35.648273,139.865606,,,
3141,123237,江戸川区臨海町４丁目,35.646102,139.863046,,,
3142,123238,江戸川区臨海町５丁目,35.645049,139.869766,,,


### GCSにアップロード

In [None]:
# from google.cloud import storage, exceptions
# from google.oauth2 import service_account

In [None]:
# key_path = '/content/drive/MyDrive/key/z-gard-aff003f55fbb.json'
# credential = service_account.Credentials.from_service_account_file(key_path)

# project_id = 'z-gard'
# bucket_name = 'z-gard_town_images'
# client = storage.Client(project_id, credentials=credential)
# bucket = client.get_bucket(bucket_name)

In [None]:
# get_file_list = [f'/content/drive/MyDrive/z-gard/data/photo/{no}.jpg' for no in list(get_photo['no'])]

In [None]:
# for file_path in get_file_list:
#     file_name = os.path.basename(file_path)
#     blob = bucket.blob(file_name)
#     blob.upload_from_filename(file_path)
#     print(file_name)

101014.jpg
101022.jpg
101025.jpg
101033.jpg
101034.jpg
101035.jpg
101036.jpg
101037.jpg
101038.jpg
101039.jpg
101040.jpg
101041.jpg
101042.jpg
101043.jpg
101093.jpg
101120.jpg
101121.jpg
101122.jpg
101123.jpg
101128.jpg
101129.jpg
101130.jpg
101131.jpg
101136.jpg
101142.jpg
101143.jpg
101144.jpg
101145.jpg
101146.jpg
101147.jpg
102002.jpg
102013.jpg
102022.jpg
102027.jpg
102036.jpg
102056.jpg
102057.jpg
102060.jpg
102061.jpg
102062.jpg
102066.jpg
102071.jpg
102075.jpg
102079.jpg
102084.jpg
102089.jpg
102090.jpg
102099.jpg
102105.jpg
102116.jpg
102118.jpg
103003.jpg
103023.jpg
103052.jpg
103053.jpg
103089.jpg
104007.jpg
104008.jpg
104009.jpg
104014.jpg
104015.jpg
104016.jpg
104017.jpg
104018.jpg
104019.jpg
104020.jpg
104021.jpg
104022.jpg
104023.jpg
104024.jpg
104027.jpg
104040.jpg
104045.jpg
104049.jpg
104050.jpg
104051.jpg
104052.jpg
104053.jpg
104054.jpg
104062.jpg
104063.jpg
104064.jpg
104065.jpg
104066.jpg
104067.jpg
104068.jpg
104069.jpg
104070.jpg
104071.jpg
104072.jpg
104073.jpg

In [None]:
# len([b for b in bucket.list_blobs()])

3087