## <p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing:2px; color:#4E4FEB; font-size:140%; text-align:left;padding: 0px; border-bottom: 3px solid #4E4FEB">Libraries</p>

In [1]:
import warnings
warnings.filterwarnings("ignore")

from bs4 import BeautifulSoup
import urllib.request as req
import json
from tqdm import tqdm, tqdm_notebook
import time
import requests

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
from pandas.io.formats.style import Styler

from itertools import chain

pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 999

tqdm.pandas()

rc = {
    "axes.facecolor": "#F8F8F8", 
    "figure.facecolor": "#F8F8F8", 
    "axes.edgecolor": "#000000",  
    "grid.color": "#EBEBE7" + "30",
    "font.family": "serif",
    "axes.labelcolor": "#000000",
    "xtick.color": "#000000", 
    "ytick.color": "#000000",
    "grid.alpha": 0.4 
}

sns.set(rc=rc) 
palette = ['#ff7f50', '#ffd700', '#ffdab9', '#9fe2bf',
           '#d2b48c', '#008080', '#98ff98', '#000080']


from colorama import Style, Fore 
blk = Style.BRIGHT + Fore.BLACK
gld = Style.BRIGHT + Fore.YELLOW
grn = Style.BRIGHT + Fore.GREEN
red = Style.BRIGHT + Fore.RED
blu = Style.BRIGHT + Fore.BLUE
res = Style.RESET_ALL

In [2]:
def magnify(is_test : bool = False): 
    base_color = '#b57edc'
    if is_test:
        highlight_target_row = []
    else:
        highlight_target_row = dict(selector = 'tr:last-child',
                            props = [('background-color', f'{base_color}' + '20')]) 
    
    return [dict(selector="th", 
                props=[("font-size", "11pt"),
                    ('background-color', f'{base_color}'),
                    ('color', 'white'),
                    ('font-weight', 'bold'),
                    ('border-bottom', '0.1px solid white'), 
                    ('border-left', '0.1px solid white'), 
                    ('text-align', 'right')]),
        
            dict(selector='th.blank.level0', 
                props=[('font-weight', 'bold'),
                        ('border-left', '1.7px solid white'),
                        ('background-color', 'white')]),

            dict(selector="td", 
                    props=[('padding', "0.5em 1em"), 
                        ('text-align', 'right')]),

            dict(selector="th:hover",
                    props=[("font-size", "14pt")]),

            dict(selector="tr:hover td:hover",
                    props=[('max-width', '250px'),
                        ('font-size', '14pt'),
                        ('color', f'{base_color}'),
                        ('font-weight', 'bold'),
                        ('background-color', 'white'),
                        ('border', f'1px dashed {base_color}')]),
            
            dict(selector="caption", 
                props=[(('caption-side', 'bottom'))])] + highlight_target_row

def stylize_simple(df: pd.DataFrame, caption: str) -> Styler:
    """
        Args:
            df: any dataframe (train/test/origin)

        Returns:
            s: the dataframe wrapped into Styler.
    """
    s = df
    s = s.style.set_table_styles(magnify(True)).set_caption(f"{caption}")
    return s

## <p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing:2px; color:#4E4FEB; font-size:140%; text-align:left;padding: 0px; border-bottom: 3px solid #4E4FEB">Intro</p>

Collecting Tokyo Real Estate Case Data

 <a href = 'http://www.oshimaland.co.jp'> 오시마랜드(Oshimaland)</a>
>This website showcases buildings with unfortunate past incidents such as suicide cases, corpse disposal incidents, and murder cases.


## <p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing:2px; color:#4E4FEB; font-size:140%; text-align:left;padding: 0px; border-bottom: 3px solid #4E4FEB">Data Crawling</p>

In [3]:
chrome_path = chromedriver_autoinstaller.install()
driver = webdriver.Chrome(chrome_path)
url = 'https://www.oshimaland.co.jp/'
driver.get(url)
driver.implicitly_wait(10)

In [4]:
main_district = pd.read_csv('C:\\Users\\lucky\\PycharmProjects\\PROJECT\\AirbnbWise\\Oshimaland_data\\jieun\\main_district_df2.csv')
display(stylize_simple(main_district.head(4), 'main_district'))

Unnamed: 0,neighbourhood_cleansed,count
0,Shinjuku Ku,2278
1,Taito Ku,1597
2,Sumida Ku,1290
3,Toshima Ku,1002


In [5]:
regionList = list(main_district['neighbourhood_cleansed'])
print(regionList)

['Shinjuku Ku', 'Taito Ku', 'Sumida Ku', 'Toshima Ku', 'Shibuya Ku', 'Minato Ku', 'Setagaya Ku', 'Ota Ku', 'Nakano Ku', 'Chuo Ku', 'Kita Ku', 'Katsushika Ku', 'Suginami Ku', 'Koto Ku', 'Edogawa Ku', 'Bunkyo Ku', 'Arakawa Ku', 'Itabashi Ku', 'Shinagawa Ku', 'Chiyoda Ku', 'Adachi Ku', 'Meguro Ku', 'Nerima Ku', 'Fuchu Shi', 'Hachioji Shi', 'Hino Shi', 'Kokubunji Shi', 'Machida Shi', 'Ome Shi', 'Chofu Shi', 'Musashino Shi', 'Akiruno Shi', 'Mitaka Shi', 'Koganei Shi', 'Higashimurayama Shi', 'Tama Shi', 'Kunitachi Shi', 'Komae Shi', 'Nishitokyo Shi', 'Tachikawa Shi', 'Kodaira Shi', 'Hamura Shi', 'Musashimurayama Shi', 'Okutama Machi', 'Akishima Shi', 'Fussa Shi']


## <p style="font-family:JetBrains Mono; font-weight:normal; letter-spacing:2px; color:#4E4FEB; font-size:140%; text-align:left;padding: 0px; border-bottom: 3px solid #4E4FEB">Shinjuku ku</p>

In [6]:
#TODO shinjuku ku test
search_region = driver.find_element(By.CSS_SELECTOR, '#geocoder-text')
search_region.send_keys(regionList[0])
time.sleep(2)
driver.find_element(By.ID, "geocoder-button").click()
time.sleep(2)

In [7]:
json_url = 'https://api.oshimaland.co.jp/map'
headers = {
    "User-Agent": "Mozilla/5.0",
    "Origin": "https://www.oshimaland.co.jp"
}
data = {
    'keys': ["1330021123012133", "1330021123012311", "1330021123013022", "1330021123013200"]
}

response = requests.post(json_url, headers=headers, json=data)
json_data = response.json()


In [8]:
origin = list(data.values())
flatten_list = list(chain(*origin))
flatten_list

['1330021123012133',
 '1330021123012311',
 '1330021123013022',
 '1330021123013200']

In [9]:
jsonValueList = []
for idx in range(len(flatten_list)):
    for idx2 in range(len(json_data['markers'][flatten_list[idx]])):
        jsonValueList.append(json_data['markers'][flatten_list[idx]][idx2]['key'])

In [10]:
getDataList = []
for idx in tqdm_notebook(range(len(jsonValueList))):
    json_url = f'https://www.oshimaland.co.jp/d/{jsonValueList[idx]}.json'
    url = req.Request(json_url, headers={"User-Agent": "Mozilla/5.0"})
    code = req.urlopen(url)
    soup = BeautifulSoup(code, 'html.parser')
    json_data = json.loads(soup.text)
    getDataList.append({
        'info' : json_data['info'],
        'address' : json_data['ad'],
        'dt' : json_data['dt'],
        'cr' : json_data['cr'],
     })

  0%|          | 0/135 [00:00<?, ?it/s]

In [13]:
shinjuku = pd.DataFrame(getDataList)
shinjuku

Unnamed: 0,info,address,dt,cr
0,飛び降り自殺,東京都新宿区西新宿七丁目3-10山京ビル,平成27年2月20日,令和2年1月27日
1,2019年10月下旬、40代後半男性が部屋の中で孤高死。\r\n2019年12月下旬に発見される。\r\n死亡から2ヶ月経っていたため、遺体の腐乱が進んでいた模様。\r\n遺体回収時には内廊下のため、建物内1階全体が腐乱臭で充満。\r\n\r\n2020年2月初旬現在、清掃終了、その後内装解体中。,東京都新宿区西新宿七丁目15-14FONTAINE三須102号室,2019年10月下旬,令和2年2月3日
2,飛び降り自殺,東京都新宿区歌舞伎町一丁目20-2アパホテル新宿歌舞伎町タワー↓,令和3年5月11日,令和3年5月11日
3,819号室で女性死亡,東京都新宿区歌舞伎町一丁目20-2,平成30年5月23日,平成30年7月4日
4,飛び降り自殺,東京都新宿区歌舞伎町一丁目20-2アパホテル新宿歌舞伎町タワー↓,令和3年5月18日,令和3年5月26日
...,...,...,...,...
130,遺体発見,東京都新宿区新宿三丁目34-2 個室ビデオ個室内,平成26年6月25日,平成26年6月26日
131,飛び降り自殺,東京都新宿区新宿三丁目17-5カワセビル屋上↓,平成20年3月2日,
132,男性飛び降り自殺,東京都新宿区新宿三丁目18-1,平成18年9月4日,平成26年1月24日
133,清掃業者が転落して死亡,東京都新宿区新宿三丁目32-6,2011年ごろ,平成28年6月16日
