### 00. 필요한 데이터 및 모듈 로드



In [9]:
from PIL import Image
from tqdm import tqdm
import os, json, glob, shutil
import pandas as pd 

import warnings
warnings.filterwarnings('ignore')             # warnings 출력 방지

### 01. 함수 및 변수 정의

In [1]:
img_height = 360
img_width = 640

#### 01-1 ) 하나의 좌표 추출 함수


In [7]:
def get_points(points):
  '''
    | get_points | 좌표 추출 함수
  '''
  area_info = []

  for i in range(len(points)):
    # label을 좌표 정보 추출
    y = points[i][0]
    x = points[i][1]
    area_info.append(x)
    area_info.append(y)

  return area_info

#### 01-2 ) 전체 추출 함수

In [8]:
def get_area_infos(dataset_json, area_code):
  '''
    | get_area_infos | 좌표 추출 함수
  '''

  idx = 0
  area_Infos = pd.DataFrame(columns=['video_code','points'])
  video_list = []
  videos = []

  
  # json 파일 열기
  for file in tqdm(dataset_json):
            
    with open(file) as f:
      json_object = json.load(f)

    clip_name = file.split('\\')[-1].split('.')[0]       # ex) 'S000113_065_0270_C_D_F_0'
    video_code = clip_name.split('_')[0]                 # ex) 'S000113'
    
    if video_code not in videos:
      videos.append(video_code)

    # 이미 추출한 정보이면 추출 x
    if video_code in video_list:
      continue

    areas = json_object['annotations']['environment']

    for a in areas:
      # 1 | 보행자도로, 2 | 횡단보도, 8 | 정지선 위반 구역
      if a['area_code'] in area_code:
        points = a['points']

        # 각 area_좌표 추출
        area_info = get_points(points)
        video_list.append(video_code)

        # 데이터 프레임에 정보 추가 
        area_Infos = area_Infos.append(pd.Series({'video_code':video_code, 'points':area_info}), ignore_index=True)

        '''
        if idx == 0:
          area_Infos[idx] = area_info
        else:
          area_Infos.append(area_info)
        '''
        idx += 1

  print(f'video_list : {len(videos)}')
  return area_Infos

#### 01-3) 이미지 사이즈 변경 함수

In [2]:
def img_resize(path):
    '''
         img_resize | 샘플 이미지 size 조절
    '''
    # image 로드 
    imgs = glob.glob(f'{path}/*.jpg')

    for img_file in tqdm(imgs):

        img = Image.open(img_file)
        img_resize = img.resize((640,360))
        img_resize.save(f'{img_file}')

In [30]:
def get_img_sample(dataset_img):

  videos = []
  # image 로드 
  imgs = glob.glob(f'tmp/images/*.jpg')

  # img 파일 열기
  for file in tqdm(imgs):
    clip_name = file.split('\\')[-1].split('.')[0]       # ex) 'S000113_065_0270_C_D_F_0'
    video_code = clip_name.split('_')[0]                 # ex) 'S000113'
    print(video_code)
    # NEW 이미지 추출
    if video_code not in videos:
      videos.append(video_code)
      
    # 이미 추출한 경우, 추출 x
    else: continue
    
    if os.path.exists(file):
        shutil.copy(file, "./custom_sidewalk/images")
        shutil.copy(file, "./custom_stopline/images")
        #shutil.move(file, "./tmp/images")

    else: pass

#### 01-4) 라벨링 클래스 선언

In [10]:
import glob, json, shutil
import pandas as pd
from tqdm import tqdm
from PIL import Image

class Area():
    def __init__(self):

        self.dataset = glob.glob('D:/Users/user/OneDrive/바탕 화면/CODE/Trading/data/tmp/*.json')
        self.img_width = 640
        self.img_height = 360

        self.all_labels = glob.glob('C:./stopline/*.txt')               # 전체 학습 데이터 약 10만개
        self.sample_train_label_path = './custom_crosswalk/labels'            
        self.sample_train_img_path = './custom_crosswalk/images'         

    def make_label(self, clip_name, area_code, points):
        '''
            | make_label | label 생성을 위한 좌표 정규화 작업
        '''
        area_info = [clip_name, area_code]

        for i in range(len(points)):
            # label을 좌표 정보 추출
            y = float(self.img_height*points[i][0]/1080)/self.img_height
            x = float(self.img_width*points[i][1]/1920)/self.img_width
            area_info.append(x)
            area_info.append(y)

        return area_info

    def get_area_infos(self):
        '''
            | get_area_infos | 좌표 추출 함수
        '''
        idx = 0
        area_Infos = [0]
        sample_image_list = []
        sample_img = glob.glob(f'{self.sample_train_img_path}/*.jpg')

        for i, img in enumerate(sample_img):
            sample_image_list.append(img.split('\\')[-1].split('.')[0])
        print(f'샘플이미지 리스트 원소 개수 : {len(sample_image_list)}')

        for file in tqdm(self.dataset):
            
            with open(file) as f:
                json_object = json.load(f)
    
            clip_name = file.split('\\')[-1].split('.')[0] # ex ) 'S000113_065_0270_C_D_F_0'

            # sample에 해당 데이터가 없으면 좌표 추출 x
            if clip_name not in sample_image_list :
                continue

            areas = json_object['annotations']['environment']

            for a in areas:
                
                # 1 | 보행자도로, 2 | 횡단보도, 8 | 정지선, 11, 12 | 신호등 
                 if a['area_code'] in ['2']:
                    class_num = 0
                    points = a['points']

                    # class 정지선만 설정 => class_num : 0 (임시) 
                    area_info = self.make_label(clip_name, class_num , points)

                    if idx == 0:
                        area_Infos[idx] = area_info
                    else:
                        area_Infos.append(area_info)
                    idx += 1

        return area_Infos
    
    def write_labels(self):
        '''
            | write_labels | 전체 라벨 생성 함수 
        '''
        area_infos = self.get_area_infos()
        print(f'area_infos의 len : {len(area_infos)}')

        for clip in tqdm(area_infos):
            
            path = f"{self.sample_train_label_path}/{clip[0]}.txt"
            points = clip[2:]

            values = f'{str(clip[1])}'
            for point in points:
                values += f" {str(point)}"
            values += '\n'
            
            with open(path, 'w') as f:
                f.write(values)

    def copy_to_custom(self):

        # 샘플 학습 이미지 (약 3만개)
        train_images_sample = glob.glob('C:/custom/images/*.jpg')
        images_list = []

        # train_image_sample 리스트 생성
        for file in train_images_sample:
            images_list.append(file.split('\\')[-1].split('.')[0])

        # train_image
        for file in self.all_labels:
            label_name = file.split('\\')[-1].split('.')[0]

            if label_name in images_list:
                shutil.copy(file, self.sample_train_label_path) 
       
if __name__ == '__main__':
    area = Area()
    # 샘플 이미지 크기 재조정
    # area.img_resize()

    # 샘플 이미지 label 생성
    area.write_labels()

샘플이미지 리스트 원소 개수 : 697


100%|██████████| 36495/36495 [03:07<00:00, 194.37it/s]


area_infos의 len : 204


100%|██████████| 204/204 [00:00<00:00, 1929.15it/s]


In [13]:
import glob, os
imgs = glob.glob('./custom_crosswalk/images/*.jpg')
print(len(imgs))

labels = glob.glob('./custom_crosswalk/labels/*.txt')
print(len(labels))



692
692


In [12]:
label_list = []

for i, label in enumerate(labels):
    label_list.append(label.split('\\')[-1].split('.')[0])
print(f'샘플이미지 리스트 원소 개수 : {len(label_list)}')

for file in tqdm(imgs):

    clip_name = file.split('\\')[-1].split('.')[0] # ex ) 'S000113_065_0270_C_D_F_0'

    # sample에 해당 데이터가 없으면 좌표 추출 x
    if clip_name not in label_list :
        os.remove(file)


샘플이미지 리스트 원소 개수 : 692


100%|██████████| 697/697 [00:00<00:00, 966.59it/s]


### 02. 데이터 준비

#### 02-1) 이미지 사이즈 재설정

In [3]:
path ='D:/Users/user/OneDrive/바탕 화면/CCTV/주간/맑음/실증'
img_resize(path)

100%|██████████| 94277/94277 [1:14:18<00:00, 21.14it/s]


In [4]:
path ='D:/custom/images_30000'
img_resize(path)

100%|██████████| 46751/46751 [36:47<00:00, 21.18it/s]


#### 02-2) 영상 1개씩 추출

In [32]:
path ='D:/Users/user/OneDrive/바탕 화면/CCTV/주간/맑음/실증'
get_img_sample(path)

path ='D:/custom/images_30000'
#get_img_sample(path)

 13%|█▎        | 173/1306 [00:00<00:01, 870.75it/s]

C000002
C000002
C000004
C000004
C000006
C000006
C000007
C000007
C000008
C000008
C000009
C000009
C000011
C000011
C000012
C000033
C000033
C000035
C000035
C000036
C000036
C000037
C000037
C000038
C000038
C000039
C000039
C000040
C000040
C000041
C000041
C000042
C000042
C000043
C000043
C000044
C000044
C000045
C000045
C000046
C000046
C000047
C000047
C000048
C000048
C000049
C000049
C000050
C000050
C000051
C000059
C000060
C000060
C000061
C000061
C000062
C000063
C000063
C000064
C000065
C000065
C000067
C000067
C000068
C000068
C000080
C000080
C000084
C000084
C000089
C000090
C000091
C000092
C000101
C000102
C000102
C000103
C000103
C000104
C000104
C000105
C000105
C000106
C000106
C000107
C000107
C000108
C000108
C000109
C000109
C000110
C000110
C000111
C000111
C000112
C000112
C000113
C000113
C000114
C000114
C000115
C000115
C000116
C000116
C000117
C000117
C000294
C000294
C000295
C000295
C000299
C000299
C000300
C000300
C000301
C000301
C000302
C000302
C000303
C000303
C000304
C000304
C000305
C000306
C000306


 20%|█▉        | 261/1306 [00:00<00:01, 838.06it/s]

C000663
C000664
C000665
C000666
C000666
C000667
C000667
C000670
C000670
C000671
C000671
C000675
C000675
C000676
C000676
C000682
C000682
C000685
C000685
C000686
C000687
C000687
C000689
C000689
C000690
C000690
C000691
C000691
C000692
C000693
C000693
C000694
C000695
C000695
C000696
C000696
C000697
C000697
C000698
C000698
C000699
C000699
C000700
C000700
C000701
C000702
C000702
C000703
C000703
C000704
C000704
C000705
C000705
C000706
C000706
C000707
C000708
C000708
C000709
C000709
C000710
C000711
C000711
C000712
C000712
C000713
C000713
C000715
C000715
C000716
C000716
C000717
C000722
C000722
C000723
C000723
C000725
C000725
C000726
C000727
C000728
C000729
C000730
C000730
C000731
C000731
C000738
C000743
C000743
C000744
C000745
C000746
C000747
C000748
C000748
C000749
C000750
C000750
C000751
C000752
C000752
C000753
C000753
C000754
C000755
C000755
C000756
C000756
C000759
C000759
C000760
C000760
C000761
C000761
C000843
C000843
C000844
C000844
C000845
C000845
C000846
C000846
C000848
C000848
C000849


 33%|███▎      | 429/1306 [00:00<00:01, 788.54it/s]

C000879
C000880
C000880
C000881
C000881
C000885
C000885
C000886
C000886
C000887
C000887
C000888
C000888
C000889
C000889
C000890
C000890
C000891
C000891
C000892
C000892
C000893
C000893
C000894
C000894
C000895
C000895
C000896
C000896
C000897
C000897
C000898
C000898
C000900
C000901
C000902
C000902
C000903
C000903
C000904
C000904
C000905
C000905
C000906
C000907
C000908
C000909
C000909
C000911
C000911
C000912
C000913
C000914
C000914
C000915
C000915
C000916
C000916
C000917
C000919
M090535
M090536
M090537
M090538
M090539
M090541
M090543
M090543
M090544
M090546
M090547
M090549
M090550
M090552
M090552
M090553
M090554
M090556
M090557
M090558
M090560
M090561
M090563
M090566
M090567
M090568
M090569
M090570
M090571
M090572
M090574
M090575
M090576
M090578
M090579
M090580
M090581
M090582
M090583
M090584
M090585
M090586
M090588
M090589
M090590
M090591
M090592
M090593
M090594
M090595
M090596
M090597
M090598
M090599
M090600
M090601
M090602
M090603
M090604
M090605
M090606
M090607
M090608
M090609
M090610


 45%|████▍     | 586/1306 [00:00<00:00, 732.04it/s]

M100652
M100652
M100653
M100653
M100654
M100654
M100655
M100656
M100656
M100657
M100657
M100658
M100658
M100659
M100659
M100660
M100660
M100661
M100661
M100662
M100665
M100665
M100667
M100667
M100668
M100669
M100669
M100670
M100670
M100671
M100671
M100672
M100672
M100673
M100673
M100674
M100674
M100675
M100675
M100676
M100677
M100677
M100678
M100679
M100679
M100680
M100681
M100681
M100682
M100682
M100683
M100684
M100684
M100685
M100685
M100686
M100687
M100688
M100689
M100690
M100690
M100691
M100691
M100692
M100692
M100693
M100695
M100695
M100696
M100696
M100697
M100698
M100699
M100699
M100701
M100701
M100702
M100704
M100704
M100705
M100705
M100706
M100707
M100707
M100708
M100710
M100710
M100711
M100711
M100712
M100713
M100714
M100714
M100715
M100715
M100716
M100716
M100718
M100718
M100719
M100720
M100721
M100721
M100722
M100722
M100723
M100724
M100724
M100725
M100727
M100728
M100728
M100729
M100730
M100730
M100731
M100732
M100732
M100733
M100734
M100734
M100735
M100735
M100736
M100736


 58%|█████▊    | 761/1306 [00:00<00:00, 802.12it/s]

S000050
S000051
S000051
S000052
S000052
S000053
S000053
S000054
S000054
S000055
S000055
S000056
S000056
S000057
S000057
S000058
S000058
S000064
S000065
S000065
S000066
S000066
S000067
S000067
S000068
S000068
S000069
S000070
S000070
S000071
S000071
S000073
S000075
S000075
S000079
S000082
S000083
S000083
S000084
S000084
S000085
S000085
S000088
S000088
S000089
S000089
S000090
S000090
S000091
S000091
S000092
S000092
S000093
S000093
S000094
S000094
S000095
S000095
S000096
S000096
S000097
S000097
S000098
S000098
S000099
S000099
S000100
S000100
S000101
S000102
S000102
S000103
S000103
S000105
S000105
S000106
S000106
S000108
S000109
S000109
S000110
S000110
S000111
S000111
S000112
S000112
S000113
S000113
S000114
S000114
S000115
S000115
S000116
S000116
S000117
S000118
S000118
S000119
S000119
S000120
S000120
S000121
S000122
S000122
S000123
S000123
S000126
S000127
S000127
S000128
S000128
S000129
S000129
S000130
S000130
S000132
S000132
S000133
S000133
S000134
S000134
S000135
S000135
S000136
S000136


 72%|███████▏  | 935/1306 [00:01<00:00, 836.01it/s]

S000190
S000191
S000191
S000193
S000193
S000196
S000196
S000197
S000198
S000198
S000199
S000200
S000201
S000201
S000202
S000202
S000203
S000203
S000204
S000205
S000208
S000208
S000209
S000209
S000214
S000214
S000221
S000221
S000222
S000222
S000223
S000223
S000224
S000227
S000227
S000228
S000228
S000229
S000229
S000230
S000230
S000232
S000233
S000233
S000234
S000235
S000235
S000236
S000236
S000237
S000237
S000238
S000238
S000239
S000239
S000241
S000241
S000242
S000242
S000243
S000243
S000244
S000244
S000478
S000478
S000479
S000479
S000481
S000481
S000483
S000483
S000487
S000487
S000488
S000488
S000494
S000495
S000495
S000496
S000496
S000504
S000504
S000505
S000505
S000506
S000506
S000507
S000507
S000508
S000508
S000509
S000509
S000510
S000510
S000511
S000511
S000512
S000512
S000513
S000513
S000521
S000522
S000526
S000526
S000527
S000527
S000528
S000528
S000529
S000529
S000532
S000532
S000533
S000533
S000534
S000534
S000535
S000535
S000536
S000536
S000537
S000537
S000538
S000538
S000541


 84%|████████▍ | 1101/1306 [00:01<00:00, 673.97it/s]

S000743
S000744
S000744
S000745
S000746
S000747
S000747
S000748
S000749
S000750
S000751
S000752
S000753
S000754
S000754
S000755
S000755
S000756
S000756
S000757
S000758
S000761
S000761
S000762
S000762
S000763
S000763
S000764
S000764
S000765
S000765
S000766
S000766
S000768
S000769
S000769
S000770
S000770
S000772
S000772
S000782
S000783
S000784
S000785
S000786
S000787
S000788
S000789
S000790
S000791
S000792
S000793
S000796
S000797
S000798
S000799
S000800
S000801
S000802
S000805
S000806
S000809
S000810
S000811
S000813
S000814
S000815
S000816
S000817
S000818
S000819
S000820
S000821
S000822
S000823
S000824
S000825
S000932
S000933
S000934
S000935
S000940
S000941
S000942
S000944
S000945
S000946
S000947
S000948
S000949
S000950
S000951
S000952
S000953
S000954
S000955
S000956
S000961
S000962
S000963
S000964
S000965
S000966
S000967
S000968
S000969
S000970
S000971
S000973
S000974
S000975
S000976
S000977
S000979
S000981
S000982
S001016
S001017


 90%|████████▉ | 1172/1306 [00:01<00:00, 604.68it/s]

S001019
S001037
S001049
S001050
S001051
S001052
S001059
S001060
S001061
S001062
S001063
S001065
S001066
S001067
S001068
S001070
S001071
S001073
S001074
S001075
S001076
S001077
S001078
S001096
S001097
S001098
S001099
S001100
S001101
S001102
S001103
S001104
S001105
S001110
S001111
S001113
S001114
S001115
S001116
S001117
S002010
S002011
S002012
S002013
S002015
S002016
S002017
S002018
S002019
S002020
S002021
S002022
S002023
S002025
S002026
S002027
S002028
S002029
S002031
S002032
S002033
S002034
S002036
S002037
S002038
S002039
S002040
S002041
S002042
S002043
S002044
S002045
S002046
S002047
S002048
S002049
S002051
S002053
S002054
S002055
S002058
S002059
S002060
S002061
S002062
S002063
S002064
S002066
S002067
S002069
S002082
S002083
S002084
S002085
S002086


 99%|█████████▉| 1295/1306 [00:01<00:00, 535.05it/s]

S002087
S002091
S002092
S002093
S002094
S002095
S002096
S002097
S002098
S002099
S002100
S002155
S002156
S002163
S002164
S002165
S002166
S002167
S002168
S002169
S002170
S002172
S002173
S002174
S002175
S002176
S002177
S002178
S002179
S002180
S002181
S002182
S002183
S002184
S002185
S002186
S002188
S002189
S002190
S002191
S002193
S002194
S002196
S002197
S002199
S002200
S002201
S002202
S002203
S002204
S002206
S002207
S002208
S002209
S002210
S002211
S002215
S002216
S002220
S002221
S002222
S002223
S002224
S002225
S002226
S002227
S002229
S002230
S002231
S002232
S002233
S002234
S002235
S002236
S002237
S002244
S002245
S002246
S002247
S002248
S002249
S002250
S002251
S002253
S002254
S002255
S002262
S002263
S002264
S002265
S002266
S002267
S002268
S002269
S002270
S002271
S002272
S002273
S002274


100%|██████████| 1306/1306 [00:01<00:00, 691.27it/s]

S002275
S002276
S002277
S002505
S002506
S002507
S002508
S002509
S002510
S002511
S002512





### 03. 좌표 데이터 프레임 생성

02-0 ) 데이터 파일 로드

In [7]:
import glob
import pandas as pd
dataset_json = glob.glob('D:/Users/user/OneDrive/바탕 화면/CODE/Trading/data/*.json')
print(len(dataset_json))

108892


02-1 ) 보행자 도로 파일 생성

In [14]:
sidewalk = get_area_infos(dataset_json,'1')
sidewalk.to_csv('sidewalk_ori.csv',index = False)

100%|██████████| 108892/108892 [08:54<00:00, 203.88it/s]

video_list : 651





02-2 ) 횡단보도 파일 생성

In [15]:
crosswalk = get_area_infos(dataset_json,'2')
crosswalk.to_csv('crosswalk_ori.csv',index = False)

100%|██████████| 108892/108892 [00:35<00:00, 3076.05it/s]

video_list : 651





02-3 ) 정지선 위반 구역 파일 생성

In [16]:
stopline_area = get_area_infos(dataset_json,'8')
stopline_area.to_csv('stopline_area_ori.csv',index = False)

100%|██████████| 108892/108892 [00:35<00:00, 3096.91it/s]

video_list : 651





In [32]:
print(len(sidewalk['video_code'].unique()))
print(len(crosswalk['video_code'].unique()))
print(len(stopline_area['video_code'].unique()))

NameError: name 'sidewalk' is not defined

02-4 ) 신호등 좌표 파일 생성

##### 신호등 전용 함수

In [81]:
def get_points(points):
  '''
    | get_points | 좌표 추출 함수
  '''
  area_info = []

  for i in range(len(points)):
    # label을 좌표 정보 추출
    y = points[i][0]
    x = points[i][1]
    area_info.append(x)
    area_info.append(y)

  return area_info

def get_area_infos(dataset_json, area_code):
  '''
    | get_area_infos | 좌표 추출 함수
  '''

  idx = 0
  area_Infos = pd.DataFrame(columns=['video_code','points'])
  video_list = []
  videos = []

  
  # json 파일 열기
  for file in tqdm(dataset_json):
            
    with open(file) as f:
      json_object = json.load(f)

    clip_name = file.split('\\')[-1].split('.')[0]       # ex) 'S000113_065_0270_C_D_F_0'
    video_code = clip_name.split('_')[0]                 # ex) 'S000113'
    
    if video_code not in videos:
      videos.append(video_code)

    # 이미 추출한 정보이면 추출 x
    if video_code in video_list:
      continue

    areas = json_object['annotations']['environment']

    for a in areas:
      # 1 | 보행자도로, 2 | 횡단보도, 8 | 정지선 위반 구역
      x_list = []
      y_list = []
      if a['area_code'] in area_code:
        points = a['points']

        # 각 area_좌표 추출
        area_info = get_points(points)
        video_list.append(video_code)
      
        # 좌표 리스트 
        for i in range(len(area_info)):
          if i % 2 != 0:      # 홀수 인덱스
            y_list.append(int(area_info[i]))
          else :              # 짝수 인덱스
            x_list.append(int(area_info[i]))

        min_x = min(x_list)
        min_y = min(y_list)
        max_x = max(x_list)
        max_y = max(y_list)
        
        w = max_x - min_x
        h = max_y - min_y
        left_x = min_x
        left_y = min_y


        if w <= 0 or h <= 0:
          print(video_code)
          print("----------------------")
          print('x_list : ', x_list)
          print('y_list : ', y_list)
          print('x_left : ', left_x)
          print('y_left : ', left_y)
          print('w : ', w)
          print('h : ', h)
          print("----------------------")


        # 데이터 프레임에 정보 추가 
        area_Infos = area_Infos.append(pd.Series({'video_code':video_code, 'left_x':left_x, 'left_y': left_y, 'width': w, 'height': h}), ignore_index=True)

        '''
        if idx == 0:
          area_Infos[idx] = area_info
        else:
          area_Infos.append(area_info)
        '''
        idx += 1

  print(f'video_list : {len(videos)}')
  return area_Infos

In [82]:
stopline_area = get_area_infos(dataset_json,['11', '12'])
print(stopline_area)
# stopline_area.to_csv('traffic_ori.csv',index = False)

100%|██████████| 108892/108892 [07:19<00:00, 247.88it/s] 

video_list : 651
    video_code points  left_x  left_y  width  height
0      C000002    NaN   557.0   235.0   21.0     7.0
1      C000004    NaN  1349.0    85.0   79.0    28.0
2      C000006    NaN  1444.0   147.0   86.0    28.0
3      C000007    NaN  1278.0   130.0   88.0    34.0
4      C000008    NaN  1395.0   101.0   27.0    13.0
..         ...    ...     ...     ...    ...     ...
498    S000766    NaN   830.0   134.0   57.0    23.0
499    S000768    NaN   410.0   277.0   53.0    21.0
500    S000782    NaN   872.0    98.0   53.0    16.0
501    S000783    NaN   737.0    99.0   47.0    23.0
502    S000785    NaN  1298.0   113.0   32.0    14.0

[503 rows x 6 columns]





In [31]:
stopline_area = stopline_area[['video_code','left_x', 'left_y', 'width', 'height']]
stopline_area.to_csv('traffic_ori_ver2.csv',index = False)

### 04. 데이터 위도, 경도 생성

04-1 ) json 데이터 불러오기

In [59]:
import glob
import pandas as pd
dataset_json = glob.glob('D:/Users/user/OneDrive/바탕 화면/CODE/Trading/data/*.json')
seoul_info = pd.read_csv('seoul_lat_lon.csv')
print(len(dataset_json))
print(len(seoul_info))

108892
718


04-2) 사용할 함수 정의

In [78]:
def get_video_list(dataset_json):
    '''
    | get_video_list | video list 반환 함수
    '''

    video_list = []
    area_Infos = pd.DataFrame(columns=['video_code'])

    
    # json 파일 열기
    for file in tqdm(dataset_json):

        clip_name = file.split('\\')[-1].split('.')[0]       # ex) 'S000113_065_0270_C_D_F_0'
        video_code = clip_name.split('_')[0]                 # ex) 'S000113'
        
        if video_code not in video_list:
            video_list.append(video_code)
            area_Infos = area_Infos.append(pd.Series({'video_code':video_code}), ignore_index=True)
    return area_Infos

In [79]:
area_Infos = get_video_list(dataset_json)
print(len(area_Infos))
area_Infos

100%|██████████| 108892/108892 [00:01<00:00, 87239.55it/s]

651





Unnamed: 0,video_code
0,C000002
1,C000004
2,C000006
3,C000007
4,C000008
...,...
646,S000772
647,S000782
648,S000783
649,S000784


In [80]:
area_Infos.to_csv('video_infos.csv', index=False)

In [54]:
print(len(seoul_info['시군구'].unique()))
seoul_info['시군구'].unique()

25


array(['종로구', '중구', '용산구', '성동구', '광진구', '동대문구', '중랑구', '성북구', '강북구',
       '도봉구', '노원구', '은평구', '서대문구', '마포구', '양천구', '강서구', '구로구', '금천구',
       '영등포구', '동작구', '관악구', '서초구', '강남구', '송파구', '강동구'], dtype=object)

In [72]:
seoul_info = pd.read_csv('seoul_lat_lon.csv')
print(len(seoul_info))

# 1. 종로구 
종로구 = seoul_info[seoul_info['시군구'] == '종로구']
print('종로구 행정동 수 : ', len(종로구))

# 2. 중구 
중구 = seoul_info[seoul_info['시군구'] == '중구']
print('중구 행정동 수 : ', len(중구))

# 3. 용산구 
용산구 = seoul_info[seoul_info['시군구'] == '용산구']
print('용산구 행정동 수 : ', len(용산구))

# 4. 성동구 
성동구 = seoul_info[seoul_info['시군구'] == '성동구']
print('성동구 행정동 수 : ', len(성동구))

# 5. 광진구 
광진구 = seoul_info[seoul_info['시군구'] == '광진구']
print('광진구 행정동 수 : ', len(광진구))

# 6. 동대문구 
동대문구 = seoul_info[seoul_info['시군구'] == '동대문구']
print('동대문구 행정동 수 : ', len(동대문구))

# 7. 중랑구
중랑구 = seoul_info[seoul_info['시군구'] == '중랑구']
print('중랑구 행정동 수 : ', len(중랑구))

# 8. 성북구
성북구 = seoul_info[seoul_info['시군구'] == '성북구']
print('성북구 행정동 수 : ', len(성북구))

# 9. 성북구
강북구 = seoul_info[seoul_info['시군구'] == '강북구']
print('강북구 행정동 수 : ', len(강북구))

# 10. 도봉구
도봉구 = seoul_info[seoul_info['시군구'] == '도봉구']
print('도봉구 행정동 수 : ', len(도봉구))

# 11. 노원구
노원구 = seoul_info[seoul_info['시군구'] == '노원구']
print('노원구 행정동 수 : ', len(노원구))

# 12. 은평구
은평구 = seoul_info[seoul_info['시군구'] == '은평구']
print('은평구 행정동 수 : ', len(은평구))

# 13. 서대문구
서대문구 = seoul_info[seoul_info['시군구'] == '서대문구']
print('서대문구 행정동 수 : ', len(서대문구))

# 14. 마포구
마포구 = seoul_info[seoul_info['시군구'] == '마포구']
print('마포구 행정동 수 : ', len(마포구))

# 15. 양천구
양천구 = seoul_info[seoul_info['시군구'] == '양천구']
print('양천구 행정동 수 : ', len(양천구))

# 16. 강서구
강서구 = seoul_info[seoul_info['시군구'] == '강서구']
print('강서구 행정동 수 : ', len(강서구))

# 17. 구로구
구로구 = seoul_info[seoul_info['시군구'] == '구로구']
print('구로구 행정동 수 : ', len(구로구))

# 18. 금천구
금천구 = seoul_info[seoul_info['시군구'] == '금천구']
print('금천구 행정동 수 : ', len(금천구))

# 19. 영등포구
영등포구 = seoul_info[seoul_info['시군구'] == '영등포구']
print('영등포구 행정동 수 : ', len(영등포구))

# 20. 동작구
동작구 = seoul_info[seoul_info['시군구'] == '동작구']
print('동작구 행정동 수 : ', len(동작구))

# 21. 관악구
관악구= seoul_info[seoul_info['시군구'] == '관악구']
print('관악구 행정동 수 : ', len(관악구))

# 22. 서초구
서초구= seoul_info[seoul_info['시군구'] == '서초구']
print('서초구 행정동 수 : ', len(서초구))

# 23. 강남구
강남구 = seoul_info[seoul_info['시군구'] == '강남구']
print('강남구 행정동 수 : ', len(강남구))

# 24. 송파구
송파구 = seoul_info[seoul_info['시군구'] == '송파구']
print('송파구 행정동 수 : ', len(송파구))

# 25. 강동구
강동구= seoul_info[seoul_info['시군구'] == '강동구']
print('강동구 행정동 수 : ', len(강동구))




651
종로구 행정동 수 :  49
중구 행정동 수 :  48
용산구 행정동 수 :  46
성동구 행정동 수 :  31
광진구 행정동 수 :  13
동대문구 행정동 수 :  17
중랑구 행정동 수 :  13
성북구 행정동 수 :  47
강북구 행정동 수 :  17
도봉구 행정동 수 :  5
노원구 행정동 수 :  24
은평구 행정동 수 :  20
서대문구 행정동 수 :  26
마포구 행정동 수 :  39
양천구 행정동 수 :  21
강서구 행정동 수 :  19
구로구 행정동 수 :  14
금천구 행정동 수 :  5
영등포구 행정동 수 :  41
동작구 행정동 수 :  15
관악구 행정동 수 :  24
서초구 행정동 수 :  28
강남구 행정동 수 :  36
송파구 행정동 수 :  40
강동구 행정동 수 :  13
