#### 직방의 원룸 매물정보 수집
- 절차
    - 동이름 > 위도, 경도
    - 위도, 경도 > geohash(영역) 변환
    - 매물 아이디 > 매물 정보

In [3]:
import requests
import pandas as pd
import geohash2

In [2]:
!pip install geohash2

Collecting geohash2
  Downloading geohash2-1.1.tar.gz (15 kB)
Building wheels for collected packages: geohash2
  Building wheel for geohash2 (setup.py): started
  Building wheel for geohash2 (setup.py): finished with status 'done'
  Created wheel for geohash2: filename=geohash2-1.1-py3-none-any.whl size=15544 sha256=8a65922cefa73cecb9d9c4d44934c8ee660d0baf4895dde1fb6273cae147a932
  Stored in directory: c:\users\user\appdata\local\pip\cache\wheels\2d\0c\66\dcd768c9e7c26fc81ec59bfe24af9d780fc9dbdf7f90b0cf00
Successfully built geohash2
Installing collected packages: geohash2
Successfully installed geohash2-1.1


In [None]:
# 1. 동이름 > 위도, 경도

In [4]:
# url 인코딩 : https://meyerweb.com/eric/tools/dencoder/
addr = "강남구 역삼동"
url = f"https://apis.zigbang.com/v2/search?leaseYn=N&q={addr}&serviceType=원룸"
response = requests.get(url)
response

<Response [200]>

In [16]:
data = response.json()['items'][0]
lat, lng = data['lat'], data['lng']
lat, lng

(37.495365142822266, 127.03306579589844)

In [None]:
# 2. 위도, 경도 > geohash(영역) 변환

In [19]:
# precision이 커질수록 영역이 작아짐
geohash = geohash2.encode(lat, lng, precision=5)
geohash

'wydm6'

In [None]:
# 3. geohash > 매물 아이디

In [22]:
url = f"https://apis.zigbang.com/v2/items?deposit_gteq=0&domain=zigbang\
&geohash={geohash}&needHasNoFiltered=true&rent_gteq=0&sales_type_in=전세|월세&service_type_eq=원룸"

response = requests.get(url)
response

<Response [200]>

In [28]:
items = response.json()['items']
len(items), items[:2]

(2344,
 [{'lat': 37.485141261794524, 'lng': 127.01296495741643, 'item_id': 32819635},
  {'lat': 37.485627384450254, 'lng': 127.0136283391101, 'item_id': 32838798}])

In [33]:
ids = [item['item_id'] for item in items]
ids[:3]

[32819635, 32838798, 32872608]

In [None]:
# 4. 매물 아이디 > 매물 정보

In [36]:
url = "https://apis.zigbang.com/v2/items/list"
params = {
    "domain" : "zigbang",
    "withCoalition": "true",
    "item_ids" : ids[:900]
}
response = requests.post(url, params)
response

<Response [200]>

In [37]:
response.text[:300]

'{"items":[{"section_type":null,"item_id":32819635,"images_thumbnail":"https://ic.zigbang.com/ic/items/32819635/1.jpg","sales_type":"전세","sales_title":"전세","deposit":16500,"rent":0,"size_m2":39.89,"공급면적":{"m2":39.89,"p":"12.1"},"전용면적":{"m2":20.82,"p":"6.3"},"계약면적":null,"room_type_title":null,"floor":'

In [45]:
# max row, max column 설정
pd.options.display.max_columns = 50

In [49]:
items = response.json()["items"]
columns = ["item_id", "sales_type", "deposit", "rent", "size_m2", "address1", "manage_cost"]
items[:2]
df = pd.DataFrame(items)[columns]
df.tail(5)

Unnamed: 0,item_id,sales_type,deposit,rent,size_m2,address1,manage_cost
895,32850137,월세,300,260,32.91,서울시 강남구 역삼동,10
896,32852414,월세,130,250,33.44,서울시 강남구 역삼동,10
897,32809154,월세,120,70,19.83,서울시 강남구 역삼동,9
898,32844154,월세,100,100,29.75,서울시 강남구 역삼동,9
899,32656329,전세,24000,0,59.5,서울시 강남구 역삼동,10


In [60]:
# 함수 만들기
def oneroom(addr):
    """ This function is crawling information of oneroom from zigbang
    
    Params
    ------
    addr : str : address
    
    Return
    ------
    type : DataFrame : display item_id, sales_type, deposit, rent, size_m2, address1, manage_cost columns
    
    """
    
    # 1. 동이름 > 위도, 경도
    url = f"https://apis.zigbang.com/v2/search?leaseYn=N&q={addr}&serviceType=원룸"
    response = requests.get(url)
    
    data = response.json()['items'][0]
    lat, lng = data['lat'], data['lng']
    
    # 2. 위도, 경도 > geohash(영역) 변환
    geohash = geohash2.encode(lat, lng, precision=5)
    
    # 3. geohash > 매물 아이디
    url = f"https://apis.zigbang.com/v2/items?deposit_gteq=0&domain=zigbang\
&geohash={geohash}&needHasNoFiltered=true&rent_gteq=0&sales_type_in=전세|월세&service_type_eq=원룸"

    response = requests.get(url)
    
    items = response.json()['items']
    ids = [item['item_id'] for item in items]
    
    # 4. 매물 아이디 > 매물 정보
    url = "https://apis.zigbang.com/v2/items/list"
    params = {
        "domain" : "zigbang",
        "withCoalition": "true",
        "item_ids" : ids[:900]
    }
    response = requests.post(url, params)
    
    items = response.json()["items"]
    columns = ["item_id", "sales_type", "deposit", "rent", "size_m2", "address1", "manage_cost"]
    
    return pd.DataFrame(items)[columns]
    

In [61]:
addr = "마포구 합정동"
df = oneroom(addr)
df.tail(2)

Unnamed: 0,item_id,sales_type,deposit,rent,size_m2,address1,manage_cost
744,32862490,전세,43000,0,31.74,서울시 마포구 중동,5
745,32866975,월세,3000,90,49.59,서울시 마포구 중동,2


In [63]:
df_filtered = df[df["address1"].str.contains(addr)].reset_index(drop=True)
df_filtered.tail(5)

Unnamed: 0,item_id,sales_type,deposit,rent,size_m2,address1,manage_cost
95,32794268,월세,1000,90,59.5,서울시 마포구 합정동,2
96,32794303,월세,3000,80,59.5,서울시 마포구 합정동,2
97,32804404,월세,2000,85,61.24,서울시 마포구 합정동,2
98,32812101,월세,3000,80,61.24,서울시 마포구 합정동,2
99,32836233,월세,1000,45,13.95,서울시 마포구 합정동,5


In [None]:
# 모듈 파일 만들기 : .py

In [64]:
ls

 C 드라이브의 볼륨에는 이름이 없습니다.
 볼륨 일련 번호: 7C6C-FC35

 C:\Users\User\Webcrawing\20220803\code 디렉터리

2022-08-04  오후 12:26    <DIR>          .
2022-08-04  오후 12:26    <DIR>          ..
2022-08-04  오전 11:08    <DIR>          .ipynb_checkpoints
2022-08-04  오전 11:07            58,808 01_naver_api_2.ipynb
2022-08-03  오후 03:37            33,157 01_requests_naver_stock.ipynb
2022-08-03  오후 05:21            23,642 02_requests_api.ipynb
2022-08-04  오후 12:26            19,703 02_zigbang.ipynb
2022-08-03  오후 04:45            10,060 covid.xlsx
2022-08-03  오후 04:55             6,132 covid_en.xlsx
               6개 파일             151,502 바이트
               3개 디렉터리  134,529,159,168 바이트 남음


In [65]:
%%writefile zigbang.py

import requests
import pandas as pd
import geohash2

def oneroom(addr):
    """ This function is crawling information of oneroom from zigbang
    
    Params
    ------
    addr : str : address
    
    Return
    ------
    type : DataFrame : display item_id, sales_type, deposit, rent, size_m2, address1, manage_cost columns
    
    """
    
    # 1. 동이름 > 위도, 경도
    url = f"https://apis.zigbang.com/v2/search?leaseYn=N&q={addr}&serviceType=원룸"
    response = requests.get(url)
    
    data = response.json()['items'][0]
    lat, lng = data['lat'], data['lng']
    
    # 2. 위도, 경도 > geohash(영역) 변환
    geohash = geohash2.encode(lat, lng, precision=5)
    
    # 3. geohash > 매물 아이디
    url = f"https://apis.zigbang.com/v2/items?deposit_gteq=0&domain=zigbang\
&geohash={geohash}&needHasNoFiltered=true&rent_gteq=0&sales_type_in=전세|월세&service_type_eq=원룸"

    response = requests.get(url)
    
    items = response.json()['items']
    ids = [item['item_id'] for item in items]
    
    # 4. 매물 아이디 > 매물 정보
    url = "https://apis.zigbang.com/v2/items/list"
    params = {
        "domain" : "zigbang",
        "withCoalition": "true",
        "item_ids" : ids[:900]
    }
    response = requests.post(url, params)
    
    items = response.json()["items"]
    columns = ["item_id", "sales_type", "deposit", "rent", "size_m2", "address1", "manage_cost"]
    
    return pd.DataFrame(items)[columns]
    

Writing zigbang.py


In [69]:
import zigbang as zb

In [70]:
df = zb.oneroom("망원동")
df.tail(2)

Unnamed: 0,item_id,sales_type,deposit,rent,size_m2,address1,manage_cost
744,32862490,전세,43000,0,31.74,서울시 마포구 중동,5
745,32866975,월세,3000,90,49.59,서울시 마포구 중동,2


In [71]:
%whos #현재 사용하고 있는 변수목록
%reset #변수 초기화

Variable      Type         Data/Info
------------------------------------
ab            module       <module 'zigbang' from 'C<...>20803\\code\\zigbang.py'>
addr          str          마포구 합정동
addr1         str          마포구 합정동
columns       list         n=7
data          dict         n=13
df            DataFrame          item_id sales_type <...>n\n[746 rows x 7 columns]
df_filtered   DataFrame         item_id sales_type  <...>n\n[100 rows x 7 columns]
geohash       str          wydm6
geohash2      module       <module 'geohash2' from '<...>\\geohash2\\__init__.py'>
ids           list         n=2344
items         list         n=900
lat           float        37.495365142822266
lng           float        127.03306579589844
oneroom       function     <function oneroom at 0x000002A7949A6DC0>
params        dict         n=3
pd            module       <module 'pandas' from 'C:<...>es\\pandas\\__init__.py'>
requests      module       <module 'requests' from '<...>\\requests\\__init__.py'>
resp