*Last Edited: 06/07/2022*

In [1]:
import os
import time
import numpy as np
import pandas as pd

from skimage import io
from PIL import Image

from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

os.chdir('/Users/hanyin')

### Step 1: Prepare Data

In [2]:
## extract item names 
data_path = 'meetfresh_data/orig'
items = [f.split('.')[0] for f in os.listdir(data_path) \
         if os.path.isfile(os.path.join(data_path, f))]

In [3]:
## split train, valid, test
import splitfolders

splitfolders.ratio('meetfresh_data/orig', output = 'meetfresh_data', seed = 2022, ratio = (.8, 0.1, 0.1)) 

Copying files: 854 files [00:07, 111.56 files/s]


### Step 2: Augment Training
#### (a) create functions

In [4]:
import glob
from datetime import datetime as dtdt

def augment_img(generator, item, batch_size, image_size = 256):
    
    ## in/out folders 
    input_folder = IN_DIR + item
    output_folder = OUT_DIR + item
    
    ## orig images
    dataset = []
    images = os.listdir(input_folder)
    for i, image in enumerate(images):    
        if (image.split('.')[1] == 'JPG'):        
            image = io.imread(input_folder + '/' + image)        
            image = Image.fromarray(image, 'RGB')        
            image = image.resize((image_size, image_size)) 
            dataset.append(np.array(image))
    x = np.array(dataset)
    
    ## check output folder 
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    else: 
        files = glob.glob(output_folder + '/*')
        for f in files:
            os.remove(f)
            
    ## augment images
    i = 0
    for batch in datagen.flow(
        x, batch_size = batch_size, 
        save_to_dir = output_folder, save_prefix = 'aug', save_format = 'jpg'):    
        i += 1    
        if i > 100:        
            break            
    print(f'{item}:  created {len(glob.glob(output_folder + "/*"))} images by {len(images)}   @   {dtdt.now()}')

#### (b) implement function

In [5]:
IN_DIR = 'meetfresh_data/train/'
OUT_DIR = 'meetfresh_data/train_aug/'

datagen = ImageDataGenerator(
    rotation_range = 15,
    width_shift_range = 0.2, 
    height_shift_range = 0.2, 
    rescale = 1./255, 
    shear_range = 0.2, 
    zoom_range = [0.3, 0.7], 
    horizontal_flip = True, 
    fill_mode = 'nearest', 
    data_format = 'channels_last', 
    brightness_range = [0.5, 1.5]) 

In [6]:
for item in items: 
    augment_img(generator = datagen, item = item, batch_size = 16)

杏仁布丁:  created 1482 images by 44   @   2022-06-07 10:01:32.694646
仙草1号:  created 1349 images by 40   @   2022-06-07 10:02:07.199485
抹茶鸡蛋仔:  created 1383 images by 41   @   2022-06-07 10:02:51.272096
小芋圆冬瓜茶:  created 1515 images by 45   @   2022-06-07 10:03:39.913611
焦糖布丁:  created 1463 images by 58   @   2022-06-07 10:25:03.753662
小芋圆豆花:  created 1290 images by 51   @   2022-06-07 10:25:40.139280
鲜奶巧克力黑糖珍珠奶茶:  created 1513 images by 45   @   2022-06-07 10:26:38.319486
双芋招牌:  created 1351 images by 40   @   2022-06-07 10:27:15.271464
小芋圆奶茶:  created 1480 images by 44   @   2022-06-07 10:28:10.361219
芋圆6号:  created 1264 images by 50   @   2022-06-07 10:28:49.301295
仙草4号:  created 1482 images by 44   @   2022-06-07 10:29:33.035390
芋圆招牌:  created 1550 images by 46   @   2022-06-07 10:30:15.604906
Q麻薯拼盘:  created 1316 images by 39   @   2022-06-07 10:31:02.924913
绿豆糕:  created 1613 images by 48   @   2022-06-07 10:31:54.762784
原味冰淇凌鸡蛋仔:  created 1417 images by 42   @   2022-06-07 10:32:48.5