In [71]:
import skmob
import pandas as pd
import csv, os, sys, json
import urllib.request
from skmob.preprocessing import clustering


In [72]:
def coords_to_place(x, y):
    client_id = "ktta1uumf3"
    client_secret = "PDFJ27DxqU6bQSsf40GE9DwybUnlgRzZnni42Dqi"
    coords = ','.join([x, y])
    # print(f"x: {x} y: {y}")
    url = "https://naveropenapi.apigw.ntruss.com/map-reversegeocode/v2/gc?request=coordsToaddr&coords=" + \
        coords + "&sourcecrs=epsg:4326&output=json&orders=roadaddr"
    # url = "https://openapi.naver.com/v1/search/blog.xml?query=" + encText # XML 결과
    # print(f"url: {url}")
    request = urllib.request.Request(url)
    # print(f"request: {request}")
    request.add_header("X-NCP-APIGW-API-KEY-ID", client_id)
    request.add_header("X-NCP-APIGW-API-KEY", client_secret)
    response = urllib.request.urlopen(request)
    rescode = response.getcode()
    if (rescode == 200):
        response_body = response.read()
        addr_info = response_body.decode('utf-8')
        # print(addr_info)
        addr_info = json.loads(addr_info)
    else:
        print("Error Code:" + rescode)
    # print(addr_info)
    if len(addr_info['results']) == 0:
        # print(addr_info)

        return "none"
    results = addr_info['results'][0]
    land = results['land']
    building_name = land['addition0']['value']
    road_name = land['name']
    number1 = land['number1']
    number2 = land['number2']

    # search = ' '.join([building_name, road_name, number1, number2])
    search = building_name

    # print(f"building name: {building_name} road addr: {road_name} {number1} {number2}")
    # print(search)
    return search


In [73]:
def place_to_category(place):
    # print(f"place: {place}")
    if len(place) == 0 or place == "none":
        return "none"
    client_id = "G3_TXQoFDd0lBFsM8fpG"
    client_secret = "oJaqz0CK0W"
    encText = urllib.parse.quote(place)
    url = "https://openapi.naver.com/v1/search/local?query=" + encText  # JSON 결과
    # url = "https://openapi.naver.com/v1/search/blog.xml?query=" + encText # XML 결과
    request = urllib.request.Request(url)
    request.add_header("X-Naver-Client-Id", client_id)
    request.add_header("X-Naver-Client-Secret", client_secret)
    response = urllib.request.urlopen(request)
    rescode = response.getcode()
    if (rescode == 200):
        response_body = response.read()
        category_info = response_body.decode('utf-8')
        category_info = json.loads(category_info)
    else:
        print("Error Code:" + rescode)

    if len(category_info['items']) == 0:
        return "none"
    item = category_info['items'][0]
    category = item['category']
    # print(f"category: {category}")

    return category


In [74]:
device_id_list = []
for it in os.scandir('./staypoint'):
    if it.is_dir():
        if it.name.isdigit():
            device_id_list.append(it.name)
device_id_list.sort()


def get_week(device_id):
    stdf = skmob.TrajDataFrame.from_file('./staypoint/'+device_id+'/stay_points.csv',
        latitude='latitude', longitude='longitude', user_id='deviceid', datetime='arrive_time')
    stdf['leaving_datetime'] = pd.to_datetime(stdf['leaving_datetime'])
    weeks = [g for n, g in stdf.set_index('datetime').groupby(pd.Grouper(freq='W'))]
    return weeks


def get_cluster(weeks):
    cluster_list = []
    for x in weeks:
        if x.empty: continue
        cstdf = clustering.cluster(x.reset_index(), cluster_radius_km=0.2, min_samples=1)
        cluster_list.append(cstdf)
    return cluster_list


def get_rep_cluster(cluster_list):
    rep_cluster_list = []
    for df in cluster_list:

        idx = df[df['cluster'] == 0].index
        ndf = df.drop(idx)

        if ndf.empty: continue
        ndf['year'] = ndf['datetime'].map(lambda x: x.isocalendar()[0])
        ndf['week'] = ndf['datetime'].map(lambda x: x.isocalendar()[1])

        # 평균값을 취해서 cluster별로 좌표 추출
        ndf = ndf.groupby('cluster').mean()
        ndf['year'] = ndf['year'].astype('object')
        ndf['category'] = ''

        for idx in ndf.index:
            x = "{:.7f}".format(ndf.loc[idx, 'lng'])
            y = "{:.7f}".format(ndf.loc[idx, 'lat'])
            place = coords_to_place(x, y)
            category = place_to_category(place)
            ndf.loc[idx, 'category'] = category

        rep_cluster_list.append(ndf)

    return rep_cluster_list


for device_id in device_id_list:
    fields = ['deviceid', 'latitude', 'longitude', 'year', 'week', 'category']
    path = "./staypoint/" + device_id
    weeks = get_week(device_id)
    cluster_list = get_cluster(weeks)
    represent = get_rep_cluster(cluster_list)

    with open("./staypoint/"+device_id+'/category.csv', 'w', newline='') as f:
        write = csv.writer(f)
        write.writerow(fields)

        for df in represent:
            rep = df.values.tolist()
            # print(rep)
            write.writerows(rep)


In [76]:
weeks = get_week('53')
cluster_list = get_cluster(weeks)
print(cluster_list)

[              datetime  uid        lat         lng    leaving_datetime  \
0  2019-10-24 11:37:58   53  37.570761  127.034212 2019-10-24 13:42:10   
1  2019-10-24 14:09:18   53  37.570762  127.034217 2019-10-24 17:39:30   
2  2019-10-24 18:21:04   53  37.560932  127.032245 2019-10-24 19:03:08   
3  2019-10-24 19:18:16   53  37.560945  127.032240 2019-10-25 08:21:12   
4  2019-10-25 08:25:18   53  37.564035  127.036923 2019-10-25 09:41:02   
5  2019-10-25 12:17:50   53  38.076584  127.320265 2019-10-25 13:04:01   
6  2019-10-25 14:23:11   53  38.067250  127.325462 2019-10-25 15:27:01   
7  2019-10-25 17:53:39   53  37.562298  127.035562 2019-10-25 21:01:18   
8  2019-10-25 21:12:30   53  37.560922  127.032230 2019-10-26 17:49:13   
9  2019-10-26 17:53:16   53  37.560868  127.038064 2019-10-26 18:27:34   
10 2019-10-26 18:27:34   53  37.559276  127.034890 2019-10-26 22:04:57   
11 2019-10-26 22:07:01   53  37.560921  127.032232 2019-10-27 13:02:46   
12 2019-10-27 14:08:45   53  37.56924