In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests

In [11]:
def read_files_spark(local_dir,header):
    df = spark.read.csv(local_dir,encoding='cp949',header=header)
    return df

In [12]:
from pyspark.sql import SparkSession
import databricks.koalas as ks

spark = SparkSession.builder.getOrCreate()



In [13]:
spark

----------------

### 상권-영역 데이터 API호출 및 CSV생성

In [14]:
auth_key = '6f666551526a6f6132345a62664e58'
change_keys = {"TRDAR_SE_CD":"상권_구분_코드","TRDAR_SE_CD_NM":"상권_구분_코드_명","TRDAR_CD":"상권_코드","TRDAR_CD_NM":"상권_코드_명","XCNTS_VALUE":"엑스좌표_값","YDNTS_VALUE":"와이좌표_값","SIGNGU_CD":"시군구_코드","ADSTRD_CD":"행정동_코드","STDR_YM_CD":"기준_년월_코드"}

start, end  = 1, 1000
df = pd.DataFrame()
for i in range(2):
    url = f'http://openapi.seoul.go.kr:8088/{auth_key}/json/TbgisTrdarRelm/{start}/{end}/'
    res = requests.get(url)
    data = res.json()['TbgisTrdarRelm']['row']
    data = [{change_keys[k]:v for k,v in row.items()} for row in data]

    df = pd.concat([df,pd.json_normalize(data)])
            
    start += 1000
    end += 1000

df.to_csv("./상권-영역/서울시 우리마을가게 상권분석서비스(상권영역).csv", index=None, encoding='cp949')

In [15]:
df

Unnamed: 0,상권_구분_코드,상권_구분_코드_명,상권_코드,상권_코드_명,엑스좌표_값,와이좌표_값,시군구_코드,행정동_코드,기준_년월_코드
0,A,골목상권,1000275,보문로26길,201886,454183,11290,11290600,201810
1,A,골목상권,1000276,보문로31길,201385,454290,11290,11290555,201810
2,D,발달상권,1001045,동작구 총신대입구역_1,198304,442948,11590,11590630,201810
3,D,발달상권,1001046,서울 관악구 신림역_4,193828,443001,11620,11620695,201810
4,D,발달상권,1001047,뱅뱅사거리_1,202960,443242,11680,11680655,201810
...,...,...,...,...,...,...,...,...,...
491,D,발달상권,1001153,당산역_1,191112,448327,11560,11560620,201810
492,D,발달상권,1001154,용산 전자상가_4,196450,448406,11170,11170560,201810
493,D,발달상권,1001155,용산 전자상가_5,196870,448350,11170,11170560,201810
494,D,발달상권,1001156,서울 강동구 길동역,212214,448512,11740,11740685,201810


### 좌표 변환 모듈 설치 및 import

In [7]:
!pip install pyproj



In [16]:
from pyproj import Transformer

#### 도로명 주소에서 위도,경도 좌표로 변환

In [34]:
# from EPSG5181(GR80) to EPSG4326(WGS84)
transformer = Transformer.from_crs("EPSG:5181", "EPSG:4326")

In [31]:
DataFrame = df.copy()

long_list = []
lat_list = []

for idx, row in DataFrame.iterrows():
    x, y = row['엑스좌표_값'], row['와이좌표_값']
    latitude, longitude = transformer.transform(y, x)
    long_list.append(longitude)
    lat_list.append(latitude)


In [32]:
df['위도'] = lat_list
df['경도'] = long_list

In [25]:
df = df.drop(columns=['기준_년월_코드','상권_구분_코드'])

KeyError: "['기준_년월_코드' '상권_구분_코드'] not found in axis"

In [33]:
df

Unnamed: 0,상권_구분_코드_명,상권_코드,상권_코드_명,엑스좌표_값,와이좌표_값,시군구_코드,행정동_코드,위도,경도
0,골목상권,1000275,보문로26길,201886,454183,11290,11290600,37.587205,127.021354
1,골목상권,1000276,보문로31길,201385,454290,11290,11290555,37.588170,127.015681
2,발달상권,1001045,동작구 총신대입구역_1,198304,442948,11590,11590630,37.485978,126.980824
3,발달상권,1001046,서울 관악구 신림역_4,193828,443001,11620,11620695,37.486436,126.930214
4,발달상권,1001047,뱅뱅사거리_1,202960,443242,11680,11680655,37.488623,127.033469
...,...,...,...,...,...,...,...,...,...
491,발달상권,1001153,당산역_1,191112,448327,11560,11560620,37.534401,126.899440
492,발달상권,1001154,용산 전자상가_4,196450,448406,11170,11170560,37.535149,126.959834
493,발달상권,1001155,용산 전자상가_5,196870,448350,11170,11170560,37.534646,126.964587
494,발달상권,1001156,서울 강동구 길동역,212214,448512,11740,11740685,37.536030,127.138194


In [35]:
df.to_csv("./상권-영역/서울시 우리마을가게 상권분석서비스(상권영역)-위도,경도 추가.csv", index=None, encoding='cp949')

In [36]:
df = df.astype({"상권_구분_코드_명":str,"상권_코드":int,"상권_코드_명":str,"엑스좌표_값":int,"와이좌표_값":int,"시군구_코드":int,"행정동_코드":int,"위도":float,"경도":float})

In [37]:
df.dtypes

상권_구분_코드_명     object
상권_코드           int32
상권_코드_명        object
엑스좌표_값          int32
와이좌표_값          int32
시군구_코드          int32
행정동_코드          int32
위도            float64
경도            float64
dtype: object

In [38]:
import pymongo

In [39]:
connection = pymongo.MongoClient("mongodb+srv://user1:uZGuuMyRngM3izgG@cluster0.cu0c3.mongodb.net/myFirstDatabase?retryWrites=true&w=majority")
connection.list_database_names()

['elice', 'admin', 'local']

In [40]:
db = connection.get_database("elice")

In [41]:
collection = db.get_collection("area_info")

In [43]:
df.to_dict('records')

[{'상권_구분_코드_명': '골목상권',
  '상권_코드': 1000275,
  '상권_코드_명': '보문로26길',
  '엑스좌표_값': 201886,
  '와이좌표_값': 454183,
  '시군구_코드': 11290,
  '행정동_코드': 11290600,
  '위도': 37.58720469334632,
  '경도': 127.02135356029717},
 {'상권_구분_코드_명': '골목상권',
  '상권_코드': 1000276,
  '상권_코드_명': '보문로31길',
  '엑스좌표_값': 201385,
  '와이좌표_값': 454290,
  '시군구_코드': 11290,
  '행정동_코드': 11290555,
  '위도': 37.58816964537452,
  '경도': 127.01568136950489},
 {'상권_구분_코드_명': '발달상권',
  '상권_코드': 1001045,
  '상권_코드_명': '동작구 총신대입구역_1',
  '엑스좌표_값': 198304,
  '와이좌표_값': 442948,
  '시군구_코드': 11590,
  '행정동_코드': 11590630,
  '위도': 37.48597765914372,
  '경도': 126.9808235857054},
 {'상권_구분_코드_명': '발달상권',
  '상권_코드': 1001046,
  '상권_코드_명': '서울 관악구 신림역_4',
  '엑스좌표_값': 193828,
  '와이좌표_값': 443001,
  '시군구_코드': 11620,
  '행정동_코드': 11620695,
  '위도': 37.486436139402514,
  '경도': 126.9302137115341},
 {'상권_구분_코드_명': '발달상권',
  '상권_코드': 1001047,
  '상권_코드_명': '뱅뱅사거리_1',
  '엑스좌표_값': 202960,
  '와이좌표_값': 443242,
  '시군구_코드': 11680,
  '행정동_코드': 11680655,
  '위도': 37.4886234383130

In [44]:
collection.insert_many(df.to_dict('records'))

<pymongo.results.InsertManyResult at 0x221df5e6ac8>

In [45]:
for item in collection.find():
    print(item)

{'_id': ObjectId('6194a480a04fa6007b69102e'), '상권_구분_코드_명': '골목상권', '상권_코드': 1000275, '상권_코드_명': '보문로26길', '엑스좌표_값': 201886, '와이좌표_값': 454183, '시군구_코드': 11290, '행정동_코드': 11290600, '위도': 37.58720469334632, '경도': 127.02135356029717}
{'_id': ObjectId('6194a480a04fa6007b69102f'), '상권_구분_코드_명': '골목상권', '상권_코드': 1000276, '상권_코드_명': '보문로31길', '엑스좌표_값': 201385, '와이좌표_값': 454290, '시군구_코드': 11290, '행정동_코드': 11290555, '위도': 37.58816964537452, '경도': 127.01568136950489}
{'_id': ObjectId('6194a480a04fa6007b691030'), '상권_구분_코드_명': '발달상권', '상권_코드': 1001045, '상권_코드_명': '동작구 총신대입구역_1', '엑스좌표_값': 198304, '와이좌표_값': 442948, '시군구_코드': 11590, '행정동_코드': 11590630, '위도': 37.48597765914372, '경도': 126.9808235857054}
{'_id': ObjectId('6194a480a04fa6007b691031'), '상권_구분_코드_명': '발달상권', '상권_코드': 1001046, '상권_코드_명': '서울 관악구 신림역_4', '엑스좌표_값': 193828, '와이좌표_값': 443001, '시군구_코드': 11620, '행정동_코드': 11620695, '위도': 37.486436139402514, '경도': 126.9302137115341}
{'_id': ObjectId('6194a480a04fa6007b691032'), '상권_구분_코드_명': '발달상권

{'_id': ObjectId('6194a480a04fa6007b691093'), '상권_구분_코드_명': '발달상권', '상권_코드': 1001014, '상권_코드_명': '양재동 꽃시장', '엑스좌표_값': 203547, '와이좌표_값': 440820, '시군구_코드': 11650, '행정동_코드': 11650652, '위도': 37.466798926021674, '경도': 127.04009514194523}
{'_id': ObjectId('6194a480a04fa6007b691094'), '상권_구분_코드_명': '발달상권', '상권_코드': 1001015, '상권_코드_명': '서울 금천구 독산1동_1', '엑스좌표_값': 190842, '와이좌표_값': 441181, '시군구_코드': 11545, '행정동_코드': 11545610, '위도': 37.470013027610726, '경도': 126.89647392543884}
{'_id': ObjectId('6194a480a04fa6007b691095'), '상권_구분_코드_명': '발달상권', '상권_코드': 1001016, '상권_코드_명': '서울 관악구 대학동_1', '엑스좌표_값': 194671, '와이좌표_값': 441227, '시군구_코드': 11620, '행정동_코드': 11620735, '위도': 37.47045748675511, '경도': 126.9397582692801}
{'_id': ObjectId('6194a480a04fa6007b691096'), '상권_구분_코드_명': '골목상권', '상권_코드': 1000155, '상권_코드_명': '아차산로78길', '엑스좌표_값': 209529, '와이좌표_값': 449647, '시군구_코드': 11215, '행정동_코드': 11215810, '위도': 37.54628821661367, '경도': 127.10782968128034}
{'_id': ObjectId('6194a480a04fa6007b691097'), '상권_구분_코드_명': 

In [1]:
!pip install folium

Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1


In [46]:
import folium

In [61]:
m = folium.Map(location=[37.5335,126.9896], zoom_start=12)
from folium.plugins import MarkerCluster
marker_cluster = MarkerCluster().add_to(m)

for item in collection.find():
    folium.Marker(location=[item['위도'],item['경도']], popup=item['상권_코드_명'], icon=folium.Icon(color='red', icon='ok'), ).add_to(marker_cluster)

In [62]:
m