In [1]:
from tensorflow import keras
import os

### 1. Cats and Dogs 데이터셋 다운로드

In [2]:
url = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'

In [3]:
path_zip = keras.utils.get_file(fname='cats_and_dogs_filtered.zip',origin=url,
                                extract=True, cache_dir='/content')

Downloading data from https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip


In [4]:
path_zip

'/content/datasets/cats_and_dogs_filtered.zip'

### 2. ImageDataGenerator를 이용해 이미지 파일을 load 하기 위한 경로 지정

In [5]:
train_dir = '/content/datasets/cats_and_dogs_filtered/train'

### 3.ImageDataGenerator 객체 생성

In [6]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [7]:
datagen = ImageDataGenerator()

In [8]:
type(datagen)

tensorflow.python.keras.preprocessing.image.ImageDataGenerator

### 4. ImageDataGenerator.flow_from_directory() 함수 사용
* ImageDataGenerator에 데이터를 읽어오는 경로 연결
* 이미지 데이터를 읽어 텐서 객체로 변환하는 파이프라인 설정
* DirectoryIterator 객체로 변화됨

In [9]:
train_generator = datagen.flow_from_directory(
    directory=train_dir,
    target_size=(150, 150),
    batch_size=32,
    shuffle=True,
    class_mode='binary')

Found 2000 images belonging to 2 classes.


In [10]:
type(train_generator)

tensorflow.python.keras.preprocessing.image.DirectoryIterator

### 5. DirectoryIterator 객체의 속성 및 메서드

* .samples : 연결된 경로에서 읽어들일 이미지 파일의 개수

In [11]:
train_generator.samples

2000

* len() : batch 덩어리의 개수 확인 

In [12]:
len(train_generator)

63

In [13]:
2000/32

62.5

* next() : batch 데이터 호출

In [14]:
x, y = next(train_generator)

In [15]:
x.shape, y.shape

((32, 150, 150, 3), (32,))

In [16]:
y

array([0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 1., 0., 0., 0.,
       0., 0., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1.],
      dtype=float32)

* .__getitem__(idx) : 원하는 index 의 batch 데이터 호출

In [17]:
x, y = train_generator.__getitem__(0)

In [18]:
x.shape, y.shape

((32, 150, 150, 3), (32,))

In [19]:
x, y = train_generator.__getitem__(62)

In [20]:
x.shape, y.shape

((16, 150, 150, 3), (16,))

In [21]:
train_generator.reset()

In [22]:
for idx, data in enumerate(train_generator):
  print(idx, data[0].shape)

0 (32, 150, 150, 3)
1 (32, 150, 150, 3)
2 (32, 150, 150, 3)
3 (32, 150, 150, 3)
4 (32, 150, 150, 3)
5 (32, 150, 150, 3)
6 (32, 150, 150, 3)
7 (32, 150, 150, 3)
8 (32, 150, 150, 3)
9 (32, 150, 150, 3)
10 (32, 150, 150, 3)
11 (32, 150, 150, 3)
12 (32, 150, 150, 3)
13 (32, 150, 150, 3)
14 (32, 150, 150, 3)
15 (32, 150, 150, 3)
16 (32, 150, 150, 3)
17 (32, 150, 150, 3)
18 (32, 150, 150, 3)
19 (32, 150, 150, 3)
20 (32, 150, 150, 3)
21 (32, 150, 150, 3)
22 (32, 150, 150, 3)
23 (32, 150, 150, 3)
24 (32, 150, 150, 3)
25 (32, 150, 150, 3)
26 (32, 150, 150, 3)
27 (32, 150, 150, 3)
28 (32, 150, 150, 3)
29 (32, 150, 150, 3)
30 (32, 150, 150, 3)
31 (32, 150, 150, 3)
32 (32, 150, 150, 3)
33 (32, 150, 150, 3)
34 (32, 150, 150, 3)
35 (32, 150, 150, 3)
36 (32, 150, 150, 3)
37 (32, 150, 150, 3)
38 (32, 150, 150, 3)
39 (32, 150, 150, 3)
40 (32, 150, 150, 3)
41 (32, 150, 150, 3)
42 (32, 150, 150, 3)
43 (32, 150, 150, 3)
44 (32, 150, 150, 3)
45 (32, 150, 150, 3)
46 (32, 150, 150, 3)
47 (32, 150, 150, 3)
48

KeyboardInterrupt: ignored