In [21]:
import skmob
import pandas as pd
import csv, os, sys, json
import urllib.request
from skmob.preprocessing import clustering


In [22]:
# 주소 정보 전처리해주기
def process_addr(addr_info):
    name_exist = False
    addr = ''
    adm = ''
    roadaddr = ''

    for info in addr_info['results']:
        # 도로명 주소가 있는 경우 건물 이름 빼오기
        if info['name'] == 'roadaddr':
            building_name = info['land']['addition0']['value']
            if len(building_name) == 0:
                road_name = info['land']['name']
                number1 = info['land']['number1']
                number2 = info['land']['number2']
                roadaddr = ' '.join([road_name, number1, number2])
                roadaddr = roadaddr.strip()
            else:
                name_exist = True
                    
        if info['name'] == 'addr':
            area1 = info['region']['area1']['name']
            area2 = info['region']['area2']['name']
            area3 = info['region']['area3']['name']
            area4 = info['region']['area4']['name']
            addr = ' '.join([area1, area2, area3, area4])
            addr = addr.strip()

        if info['name'] == 'admcode':
            area1 = info['region']['area1']['name']
            area2 = info['region']['area2']['name']
            area3 = info['region']['area3']['name']
            area4 = info['region']['area4']['name']
            adm = ' '.join([area1, area2, area3, area4])
            adm = adm.strip()


    if name_exist:
        search = building_name
    else:
        if len(adm) != 0:
            search = adm
        if len(addr) != 0:
            search = addr
        if len(roadaddr) != 0:
            search = roadaddr

    return search

In [23]:
def coords_to_place(x, y):
    client_id = "ktta1uumf3"
    client_secret = "PDFJ27DxqU6bQSsf40GE9DwybUnlgRzZnni42Dqi"
    coords = ','.join([x, y])
    
    url = "https://naveropenapi.apigw.ntruss.com/map-reversegeocode/v2/gc?request=coordsToaddr&coords=" + \
        coords + "&sourcecrs=epsg:4326&output=json&orders=roadaddr,admcode,roadaddr"

    request = urllib.request.Request(url)
    request.add_header("X-NCP-APIGW-API-KEY-ID", client_id)
    request.add_header("X-NCP-APIGW-API-KEY", client_secret)
    response = urllib.request.urlopen(request)
    rescode = response.getcode()
    if (rescode == 200):
        response_body = response.read()
        addr_info = response_body.decode('utf-8')
        addr_info = json.loads(addr_info)
    else:
        print("Error Code:" + rescode)

    search = process_addr(addr_info)
    
    return search

In [24]:
def place_to_category(place):
    if len(place) == 0 or place == "none":
        return "none"

    client_id = "G3_TXQoFDd0lBFsM8fpG"
    client_secret = "oJaqz0CK0W"
    encText = urllib.parse.quote(place)
    url = "https://openapi.naver.com/v1/search/local?query=" + encText  # JSON 결과
    # url = "https://openapi.naver.com/v1/search/blog.xml?query=" + encText # XML 결과
    request = urllib.request.Request(url)
    request.add_header("X-Naver-Client-Id", client_id)
    request.add_header("X-Naver-Client-Secret", client_secret)
    response = urllib.request.urlopen(request)
    rescode = response.getcode()

    if (rescode == 200):
        response_body = response.read()
        category_info = response_body.decode('utf-8')
        category_info = json.loads(category_info)
    else:
        print("Error Code:" + rescode)

    if len(category_info['items']) == 0:
        return "none"
    item = category_info['items'][0]
    category = item['category']

    return category

In [29]:
device_id_list = []
for it in os.scandir('./staypoint'):
    if it.is_dir():
        if it.name.isdigit():
            device_id_list.append(it.name)
device_id_list.sort()

for device_id in device_id_list:
    path_dir = './cluster/'+device_id
    file_list = os.listdir(path_dir)

    fields = ['deviceid', 'latitude', 'longitude', 'cluster', 'address']

    for cluster in file_list:
        if not cluster.startswith("cluster_"):
            continue

        result = pd.DataFrame(columns=fields)
        with open('./cluster/'+device_id+'/'+cluster, newline='') as file:
            df = pd.read_csv(file)
            for idx in range (len(df)):
                latitude = df.loc[idx]['latitude']
                longitude = df.loc[idx]['longitude']
                x = "{:.7f}".format(latitude)
                y = "{:.7f}".format(longitude)
                address = coords_to_place(y, x)
                res = pd.DataFrame({
                    'deviceid': [df.loc[idx]['deviceid']],
                    'latitude': [latitude],
                    'longitude': [longitude],
                    'cluster': [df.loc[idx]['cluster']],
                    'address': [address]
                })
                result = pd.concat([result, res])

        c = cluster.split('_')[1].split('.')[0]
        result.to_csv("./cluster/"+device_id+'/address_'+ c +'.csv', index=False)

In [30]:
for device_id in device_id_list:
    path_dir = './cluster/'+device_id
    file_list = os.listdir(path_dir)

    fields = ['deviceid', 'latitude', 'longitude',
              'datetime', 'leaving_datetime', 'cluster', 'category']
    f = open('./cluster/'+device_id+'/category.csv', 'w', newline='')
    write = csv.writer(f)
    write.writerow(fields)

    for cluster in file_list:
        if not cluster.startswith("cluster_"):
            continue
        with open('./cluster/'+device_id+'/'+cluster, newline='') as file:
            df = pd.read_csv(file)
            df['datetime'] = pd.to_datetime(df['datetime'])
            # df['year'] = df['datetime'].map(lambda x: x.isocalendar()[0])
            # df['week'] = df['datetime'].map(lambda x: x.isocalendar()[1])
            latitude = df['latitude'].mean()
            longitude = df['longitude'].mean()

            idx = 0
            dif = 1e6
            for row in range(len(df)):
                tmp = abs(df.loc[row]['latitude']-latitude) + \
                    abs(df.loc[row]['longitude']-longitude)
                if (dif > tmp):
                    idx = row
                    dif = tmp

            x = "{:.7f}".format(df.loc[idx]['latitude'])
            y = "{:.7f}".format(df.loc[idx]['longitude'])
            place = coords_to_place(y, x)
            category = place_to_category(place)

            df['category'] = category
            write.writerow(df.loc[idx])

    f.close()

In [31]:
for device_id in device_id_list:
    df = pd.read_csv('./cluster/'+device_id+'/category.csv')
    df = df.sort_values(by=['cluster'])
    df.to_csv("./cluster/"+device_id+'/category.csv', index=False)

In [32]:
df = pd.read_csv('./cluster/51/address_0.csv')
for i in range(len(df)):
    x = str(df.loc[i]['latitude'])
    y = str(df.loc[i]['longitude'])
    place = coords_to_place(y, x)
    category = place_to_category(place)
    # print(place)
    # print(category)


In [None]:
# x = str(37.5646166)
# y = str(127.0356865)
# x = str(37.5687204)
# y = str(127.0261365)
# place = coords_to_place(y,x)
# category = place_to_category(place)
# print(place)
# print(category)