In [1]:
from google.colab import drive
drive.mount('/content/drive')
import os
import shutil

Mounted at /content/drive


# label의 클래스 73개 확인하기


In [2]:
import os
from collections import Counter

def count_classes(label_dir):
    """
    label_dir 경로 내 모든 .txt 파일을 순회하며
    각 라인 첫 번째 값을 클래스 인덱스로 간주하고 개수를 센다.
    """
    class_counts = Counter()
    for root, _, files in os.walk(label_dir):
        for file in files:
            if not file.endswith('.txt'):
                continue
            path = os.path.join(root, file)
            with open(path, 'r') as f:
                for line in f:
                    parts = line.strip().split()
                    if parts:
                        try:
                            cls = int(parts[0])
                            class_counts[cls] += 1
                        except ValueError:
                            # 첫 토큰이 정수가 아닐 경우 무시
                            continue
    return class_counts

In [3]:
train_class_count = count_classes('/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/train/labels')
val_class_count = count_classes('/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/val/labels')

print(f'train class count: {len(train_class_count)}')
print(f'val: {len(val_class_count)}')

train class count: 73
val: 0


# 데이터 스플릿

In [4]:
import os
import random

from_ai02_img_path = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/ai02-level1-project/train/images'
to_val_img_path = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/val/images'
to_train_img_path = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/train/images'

from_train_labels_path = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/train/labels'
to_val_labels_path = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/val/labels'


file_list = os.listdir(from_ai02_img_path)
file_name_list = []
for file in file_list:
  file = os.path.splitext(file)[0]
  file_name_list.append(file)

random.seed(42)
random.shuffle(file_name_list)

num = len(file_name_list)
train_n = int(num * 0.8)

train_names = file_name_list[:train_n]
val_names = file_name_list[train_n:]

In [5]:
print(f'total: {len(file_list)}')
print(f'train_names: {len(train_names)}')
print(f'val_names: {len(val_names)}')

total: 1489
train_names: 1191
val_names: 298


In [6]:
# ai02 -> data/train 이미지 옮기기
for file in train_names:
  if not os.path.exists(f'{from_train_labels_path}/{file}.txt'):
    continue
  shutil.move(f'{from_ai02_img_path}/{file}.png', f'{to_train_img_path}/{file}.png')

# train/label -> val/label으로 txt 파일 옮겨주기
for file in val_names:
  # 만약 train label에 없으면 넘긱기
  if not os.path.exists(f'{from_train_labels_path}/{file}.txt'):
    continue
  shutil.move(f'{from_train_labels_path}/{file}.txt', f'{to_val_labels_path}/{file}.txt')

# ai02 -> data/val 이미지 옮기기
for file in val_names:
  # 만약 label txt가 없으면 이미지 옮기지 말기
  if not os.path.exists(f'{to_val_labels_path}/{file}.txt'):
    continue
  shutil.move(f'{from_ai02_img_path}/{file}.png', f'{to_val_img_path}/{file}.png')



In [7]:
from_train_img = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/train/images'
to_ai02_img = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/ai02-level1-project/train/images'

from_val_img = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/val/images'
to_val_img = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/ai02-level1-project/train/images'

from_val_label = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/val/labels'
to_train_label = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/train/labels'


check_train_img_list = os.listdir(from_train_img)
check_val_img_list = os.listdir(from_val_img)
check_val_label_list = os.listdir(from_val_label)
check_train_label_list = os.listdir(to_train_label)

print(f'check_train_img_list: {len(check_train_img_list)}')
print(f'check_train_label_list: {len(check_train_label_list)}')
print(f'check_val_img_list: {len(check_val_img_list)}')
print(f'check_val_label_list: {len(check_val_label_list)}')

check_train_img_list: 1191
check_train_label_list: 1192
check_val_img_list: 298
check_val_label_list: 298


## 클래스 균등 개수 확인

In [8]:
train_class_count = count_classes('/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/train/labels')
val_class_count = count_classes('/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/val/labels')

print(f'train class count: {len(train_class_count)}')
print(f'val: {len(val_class_count)}')
print(f'train class count: {train_class_count}')
print(f'val: {val_class_count}')

train class count: 73
val: 73
train class count: Counter({50: 515, 29: 226, 0: 182, 16: 176, 4: 146, 1: 136, 38: 115, 39: 114, 31: 110, 32: 106, 37: 104, 64: 101, 34: 96, 52: 95, 62: 91, 58: 87, 60: 85, 56: 85, 53: 84, 57: 83, 61: 81, 59: 80, 54: 80, 55: 80, 51: 75, 27: 60, 15: 58, 28: 58, 6: 39, 65: 38, 7: 38, 18: 37, 2: 36, 24: 36, 25: 36, 9: 35, 17: 35, 19: 35, 20: 35, 22: 35, 26: 35, 3: 34, 21: 34, 23: 34, 63: 33, 72: 33, 8: 33, 5: 30, 10: 30, 12: 30, 14: 28, 11: 27, 13: 27, 47: 27, 42: 26, 40: 24, 35: 24, 33: 23, 45: 23, 44: 23, 48: 23, 49: 23, 46: 22, 30: 21, 41: 20, 36: 20, 43: 19, 66: 14, 67: 11, 70: 11, 69: 9, 71: 9, 68: 9})
val: Counter({50: 115, 29: 68, 16: 49, 0: 42, 1: 38, 37: 34, 32: 30, 4: 29, 64: 25, 34: 24, 31: 24, 38: 23, 59: 23, 51: 23, 53: 22, 54: 22, 55: 22, 39: 21, 61: 20, 60: 19, 56: 18, 15: 18, 52: 17, 62: 17, 28: 17, 58: 14, 27: 14, 17: 13, 3: 11, 21: 11, 43: 11, 57: 11, 65: 10, 2: 10, 22: 10, 19: 10, 20: 10, 26: 10, 30: 10, 36: 10, 63: 9, 72: 9, 13: 9, 24: 9, 

# train/val img, label 초기화 to ai02

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
import shutil

from_train_img = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/train/images'
to_ai02_img = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/ai02-level1-project/train/images'

from_val_img = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/val/images'
to_val_img = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/ai02-level1-project/train/images'

from_val_label = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/val/labels'
to_train_label = '/content/drive/Othercomputers/내 MacBook Air/Documents/ai_project/beginner_project/data/train/labels'

train_img_list = os.listdir(from_train_img)
val_img_list = os.listdir(from_val_img)
label_list = os.listdir(from_val_label)

for file in train_img_list:
  shutil.move(f'{from_train_img}/{file}', f'{to_ai02_img}/{file}')
for file in val_img_list:
  shutil.move(f'{from_val_img}/{file}', f'{to_ai02_img}/{file}')
for file in label_list:
  shutil.move(f'{from_val_label}/{file}', f'{to_train_label}/{file}')

check_train_img_list = os.listdir(from_train_img)
check_val_img_list = os.listdir(from_val_img)
check_val_label_list = os.listdir(from_val_label)
print(f'train img 개수: {len(check_train_img_list)}')
print(f'val img 개수: {len(check_val_img_list)}')
print(f'val label 개수: {len(check_val_label_list)}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
train img 개수: 0
val img 개수: 0
val label 개수: 0
