<a href="https://colab.research.google.com/github/z-guard/analysis/blob/main/notebooks/various_shops.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 各種店舗
- '本DVD', 'ビューティーサロン', '衣料品', '100円ショップ'

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

In [3]:
DIR_NAME = '/content/drive/MyDrive/z-gard/data'

In [4]:
df_shop_master = pd.concat([
    pd.read_csv(os.path.join(DIR_NAME, 'book_movie_rental_nearby.csv')),
    pd.read_csv(os.path.join(DIR_NAME, 'clothing_store_nearby.csv')),
    pd.read_csv(os.path.join(DIR_NAME, 'home_goods_store_nearby.csv')),
    pd.read_csv(os.path.join(DIR_NAME, 'beauty_salon_nearby.csv')),
    pd.read_csv(os.path.join(DIR_NAME, 'hair_care_nearby.csv')),
]).drop_duplicates(subset=['place_id']).reset_index(drop=True)
print(df_shop_master.shape)

(29707, 7)


In [5]:
df_shop_master['types_list'] = df_shop_master['types'].apply(lambda x: x.replace('"', '').replace('[', '').replace(']', '').replace("'", '').replace(' ', '').split(','))
types_list = df_shop_master['types_list'].explode()

In [6]:
types_list.value_counts().head(15)

point_of_interest     29707
establishment         29707
store                 15255
hair_care              9490
home_goods_store       7818
beauty_salon           6444
clothing_store         6208
health                 3182
electronics_store      2393
book_store             1476
furniture_store        1223
food                    647
school                  467
spa                     340
general_contractor      306
Name: types_list, dtype: int64

In [7]:
store_types = ['本DVD', 'ビューティーサロン', '衣料品', '100円ショップ']
for t in store_types:
    df_shop_master[t] = 0

In [8]:
df_shop_master.loc[df_shop_master['types_list'].apply(lambda x: 'clothing_store' in x), '衣料品'] = 1
df_shop_master.loc[df_shop_master['types_list'].apply(lambda x: 'beauty_salon' in x), 'ビューティーサロン'] = 1
df_shop_master.loc[df_shop_master['types_list'].apply(lambda x: 'hair_care' in x), 'ビューティーサロン'] = 1

In [9]:
book_store = [
    '書店', '書房', 'BOOK', 'ブック', 'ゲオ', 'HMV', 'ＤＶＤ', 'ビデオ', 'TSUTAYA'
]
for store in book_store:
    df_shop_master.loc[df_shop_master['name'].str.contains(store), '本DVD'] = 1

In [10]:
hundred_store = [
    'ダイソー', 'セリア', '100円', 'ミーツ', 'ワッツ', 'フレッツ', 'キャンドゥ', 
]
for store in hundred_store:
    df_shop_master.loc[df_shop_master['name'].str.contains(store), '100円ショップ'] = 1

In [11]:
ng_cloth_store = [
    '洋服', '洋品', '呉服', '染', '着物', '繊維', '洋装', 'きもの', 'ドン・キホーテ', 'コーナン', '商事'
]
for store in ng_cloth_store:
    df_shop_master.loc[df_shop_master['name'].str.contains(store), '衣料品'] = 0

In [12]:
df_shop_master['施設分類'] = None

df_shop_master.loc[(df_shop_master['施設分類'].isna())&(df_shop_master['100円ショップ'] == 1), '施設分類'] = '100円ショップ'
df_shop_master.loc[(df_shop_master['施設分類'].isna())&(df_shop_master['本DVD'] == 1), '施設分類'] = '本DVD'
df_shop_master.loc[(df_shop_master['施設分類'].isna())&(df_shop_master['衣料品'] == 1), '施設分類'] = '衣料品'
df_shop_master.loc[(df_shop_master['施設分類'].isna())&(df_shop_master['ビューティーサロン'] == 1), '施設分類'] = 'ビューティーサロン'

In [13]:
df_shop_master['施設分類'].value_counts()

ビューティーサロン    14775
衣料品           5625
本DVD           913
100円ショップ       206
Name: 施設分類, dtype: int64

In [14]:
df_shop_master = df_shop_master[df_shop_master['施設分類'].notna()].rename(columns={
    'name': '名称',
    'lat': '緯度',
    'lng': '経度',
    'vicinity': '住所',
})[['名称', '住所', '緯度', '経度', '施設分類', 'rating']].reset_index(drop=True)
print(df_shop_master.shape)

(21519, 6)


In [15]:
df_shop_master.to_csv(os.path.join(DIR_NAME, 'various_shops_master.csv'), encoding='utf-8_sig', index=False)