<a href="https://colab.research.google.com/github/z-guard/analysis/blob/main/notebooks/famous_place_master.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 名所・旧跡

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

In [3]:
DIR_NAME = '/content/drive/MyDrive/z-gard/data'

In [4]:
df_tourist_org = pd.read_csv(os.path.join(DIR_NAME, 'tourist_worship_nearby.csv'))
print(df_tourist_org.shape)

(14983, 7)


In [5]:
df_tourist_org['types_list'] = df_tourist_org['types'].apply(lambda x: x.replace('"', '').replace('[', '').replace(']', '').replace("'", '').replace(' ', '').split(','))
types_list = df_tourist_org['types_list'].explode()

In [6]:
types_list.value_counts().head(25)

point_of_interest     14983
establishment         14983
tourist_attraction     4880
park                   2753
store                   840
food                    672
place_of_worship        625
health                  584
parking                 464
restaurant              365
museum                  304
general_contractor      222
transit_station         208
school                  170
finance                 168
hair_care               152
atm                     119
real_estate_agency      118
storage                 113
home_goods_store        109
cafe                     77
laundry                  68
doctor                   64
beauty_salon             64
clothing_store           63
Name: types_list, dtype: int64

In [7]:
df_tourist_org['施設分類'] = None

In [8]:
df_tourist_org.loc[df_tourist_org['types_list'].apply(lambda x: 'park' in x), '施設分類'] = '公園'
for place in ['公園', '広場', '遊園', '遊園地', '緑地']:
    df_tourist_org.loc[df_tourist_org['name'].str.endswith(place), '施設分類'] = '公園'

In [9]:
conv_worship_place = {
    '寺': '寺',
    '院': '寺',
    '地蔵': '寺',
    '観音': '寺',
    '不動尊': '寺',
    '仏': '寺',
    '天': '寺',
    '堂': '寺',
    '神社': '神社',
    '社': '神社',
    '宮': '神社',
    '稲荷': '神社',
    '教会': '教会',
}

In [10]:
for worship_place in ['寺', '神社', '院', '社', '宮', '教会', '地蔵', '観音', '稲荷', '不動尊', '仏', '天', '堂']:
    df_tourist_org.loc[
        (df_tourist_org['施設分類'].isna()) &
        (df_tourist_org['types_list'].apply(lambda x: 'place_of_worship' in x)) &
        (df_tourist_org['name'].str.contains(worship_place)), '施設分類'
    ] = conv_worship_place[worship_place]

In [11]:
df_tourist_org.loc[
    (df_tourist_org['施設分類'].isna()) &
    ((df_tourist_org['types_list'].apply(lambda x: 'cemetery' in x)) |
     (df_tourist_org['name'].str.endswith('墓')) |
     (df_tourist_org['name'].str.endswith('墓地')) |
     (df_tourist_org['name'].str.endswith('墓所'))
     ), '施設分類'
] = '墓地'

In [12]:
df_tourist_org.loc[
    (df_tourist_org['施設分類'].isna()) &
    (df_tourist_org['types_list'].apply(lambda x: 'museum' in x)), '施設分類'
] = '美術館・博物館'

In [13]:
df_tourist_org.loc[
    (df_tourist_org['施設分類'].isna()) &
    (df_tourist_org['types_list'].apply(lambda x: 'tourist_attraction' in x)) &
    ((df_tourist_org['name'].str.endswith('跡')) |
     (df_tourist_org['name'].str.startswith('旧')) |
     (df_tourist_org['name'].str.endswith('の地')) |
     (df_tourist_org['name'].str.endswith('跡地')) |
     (df_tourist_org['name'].str.endswith('屋敷')) |
     (df_tourist_org['name'].str.contains('史跡')) |
     (df_tourist_org['name'].str.contains('旧跡'))
     ), '施設分類'
] = '旧跡'

In [14]:
df_tourist_org.loc[
    (df_tourist_org['施設分類'].isna()) &
    ((df_tourist_org['name'].str.endswith('街')) |
     (df_tourist_org['name'].str.endswith('横丁')) |
     (df_tourist_org['name'].str.endswith('通り')) |
     (df_tourist_org['name'].str.endswith('新道'))
     ), '施設分類'
] = '通り'

In [15]:
for place in ['並木', '道', '桜']:
    df_tourist_org.loc[
        (df_tourist_org['施設分類'].isna()) &
        (df_tourist_org['types_list'].apply(lambda x: 'tourist_attraction' in x)) &
        (df_tourist_org['name'].str.endswith(place)), '施設分類'
    ] = '並木道'

In [16]:
for place in ['坂', '橋', '古墳', '碑', '像', '湯', '池', '川']:
    df_tourist_org.loc[
        (df_tourist_org['施設分類'].isna()) &
        (df_tourist_org['types_list'].apply(lambda x: 'tourist_attraction' in x)) &
        (df_tourist_org['name'].str.endswith(place)), '施設分類'
    ] = place

In [17]:
df_tourist_org['施設分類'].value_counts()

公園         2866
寺           365
美術館・博物館     303
旧跡          253
神社          194
坂           161
橋           156
碑           112
通り           58
墓地           57
並木道          45
像            41
教会           24
湯            18
池            18
古墳            7
川             5
Name: 施設分類, dtype: int64

In [18]:
df_place_master = df_tourist_org[
    (df_tourist_org['施設分類'].isin(['寺', '神社', '旧跡', '並木道', '橋', '墓地']))
].rename(columns={
    'name': '名称',
    'vicinity': '住所',
    'lat': '緯度',
    'lng': '経度',
})[[
    '名称', '住所', '緯度', '経度', '施設分類', 'rating'
]].reset_index(drop=True)
print(df_place_master.shape)

(1070, 6)


In [19]:
df_place_master.to_csv(os.path.join(DIR_NAME, 'famous_place_master.csv'), index=False, encoding='utf-8_sig')