In [1]:
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt

from PIL import Image

## Load image data

In [2]:
def convert_img_to_npy(file_list):
    """Convert images in file_list to a numpy array.
    
    Only include the color images.
    Return the numpy array and the color image file list.
    """
    data, flist = [], []
    for f in file_list:
        img = np.array(Image.open(f))
        if img.shape == (256, 256, 3):
            data.append(img)
            flist.append(f)
    return np.array(data), flist

In [3]:
val_files = glob.glob("data/val/*.jpg")
len(val_files)

36500

In [4]:
val_files[:5]

['data/val/Places365_val_00030936.jpg',
 'data/val/Places365_val_00001533.jpg',
 'data/val/Places365_val_00011720.jpg',
 'data/val/Places365_val_00007142.jpg',
 'data/val/Places365_val_00008271.jpg']

In [5]:
test, ftest = convert_img_to_npy(val_files[:5])
test.shape

(5, 256, 256, 3)

In [7]:
val_data, val_flist = convert_img_to_npy(val_files)
val_data.shape

(36421, 256, 256, 3)

In [8]:
assert val_data.shape[0] == len(val_flist)

In [9]:
# save color image data in val to a .npy file
np.save("data/val/val_data.npy", val_data)

# save color image filenames in val to a .txt file
with open("data/val/val_files.txt", "w") as f:
    f.writelines(f"{file}\n" for file in val_flist)