In [11]:
class Image(object):
     """Image operations

     Operations for images based on PIL/Pillow
     """
     def __init__(self, expression):
         self.expression = expression

In [7]:
import base64
from copy import deepcopy
from glob import glob
from io import BytesIO

import PIL.Image
import numpy as np
import vaex
from vaex.image import rgba_2_pil

basedir = '../data/images'

filenames = glob(f'{basedir}/**/*[jpg|png|tiff|jpeg|svd]', recursive=True)
'data/images/cat.jpg' in filenames


@vaex.register_dataframe_accessor('image', override=True)
class Image(object):

    def __init__(self, df):
        self.df = df

    def read_image(self, path):
        print('1')
        try:
            return deepcopy(PIL.Image.open(path))
        except:
            return None

    def _open(self, paths):
        images = [self._read_image(path) for path in self.df[paths].tolist()]
        return np.array(images, dtype="O")

    def as_numpy(self, images):
        images = [np.array(image) for image in images]
        return np.array(images)

    def resize(self, images, size, resample=3):
        images = [image.resize(size, resample=resample) for image in images]
        return np.array(images, dtype="O")

    def as_image(self, arrays):
        return [rgba_2_pil(image_array) for image_array in arrays]


@vaex.register_function(as_property=True)
def path(ar):
    images = []
    for path in ar.tolist():
        try:
            if isinstance(path, bytes):
                path = BytesIO(base64.b64decode(path))
            images.append(deepcopy(PIL.Image.open(deepcopy(path))))
        except:
            images.append(None)
    return np.array(images, dtype="O")


@vaex.register_function()
def open_image(ar):
    images = []
    for path in ar.tolist():
        try:
            if isinstance(path, bytes):
                path = BytesIO(base64.b64decode(path))
            images.append(deepcopy(PIL.Image.open(deepcopy(path))))
        except:
            images.append(None)
    return np.array(images, dtype="O")


def read_images(basedir, image_column_name='image', path_column_name='path'):
    paths = glob(f'{basedir}/**/*[jpg|png|tiff|jpeg|svd]', recursive=True)
    df = vaex.from_dict({path_column_name: paths})
    df[image_column_name] = df[path_column_name].open_image()
    return df


df = read_images(basedir).dropna()
df

images = []
for path in filenames[5:7]:
    with open(path, "rb") as f:
        images.append(base64.b64encode(f.read()))

df2 = vaex.from_dict({'path': images})
df2['image'] = df2.path.open_image()


In [9]:
df.dropna()



#,path,image
0,../data/images/cat,
1,../data/images/dog,
2,../data/images/cat.jpg,'<PIL.WebPImagePlugin.WebPImageFile image mode=R...
3,../data/images/cat/9733.jpg,'<PIL.JpegImagePlugin.JpegImageFile image mode=R...
4,../data/images/cat/63.jpg,'<PIL.JpegImagePlugin.JpegImageFile image mode=R...
...,...,...
18603,../data/images/dog/5676.jpg,'<PIL.JpegImagePlugin.JpegImageFile image mode=R...
18604,../data/images/dog/2119.jpg,'<PIL.JpegImagePlugin.JpegImageFile image mode=R...
18605,../data/images/dog/5110.jpg,'<PIL.JpegImagePlugin.JpegImageFile image mode=R...
18606,../data/images/dog/3561.jpg,'<PIL.JpegImagePlugin.JpegImageFile image mode=R...


In [17]:
.show()



In [19]:
url_data = df2['image'].values[0]
f'<img src="{url_data}"></img>'



'<img src="<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=320x240 at 0x139FD6190>"></img>'