# Cloning GetHub Repository

In [1]:
%%capture
!git clone https://github.com/kk-digital/kcg-ml.git
%cd kcg-ml/

# File Cache Module Example

## Creating FileCache Instance and FileCache DB.

In [2]:
from file_cache.file_cache import FileCache

# Create file cache object
fileCache = FileCache()

# Create file cache database. Default to './output/file_cache.sqlite')
fileCache.create_file_cache()

[INFO]: database ./output/file_cache.sqlite created


## Adding a Folder to Cache DB

In [3]:
# Adding image files contained in a folder or ZIP archive to file cache database
fileCache.add_folder_to_file_cache('./datasets/testdata.zip')

[INFO] Processing ZIP archive: ./datasets/testdata.zip
 Fetching: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_ee_d1_f0eed195037b8fc2135a44ceb5ed2044.jpg
 Fetching: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_f0_71_f0f071d89545fb378150180d9257f306.jpg
 Fetching: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_f0_b4_f0f0b4f3f1d172a789589915fc4ad212.jpg
 Fetching: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_f5_b8_f0f5b8c94169977da31274f36b0aa703.jpg
 Fetching: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_f6_08_f0f608ca8c7e5bcd0b3ffc565442d74f.jpg
 Fetching: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_fc_b5_f0fcb58b15c19afc5fc1f33ff2acda74.jpg
 Fetching: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/

## Getting Random Hash from Cache DB

In [4]:
# Get random file hash_id
hash_id = fileCache.get_random_hash('./output/file_cache.sqlite')
print(f"Hash ID: {hash_id}")

Hash ID: d08308e5788f548b0173d83f2cdf69b6b9564aed829adfe279dc1496e98f850e0bac6f775a19e8376d87507a398c285708bb77fee6a1b16d053b9bddc2351033


## Fetch Image File Data from File Cache DB with Specific Hash

In [5]:
# Fetch image file data from file cache database with specific hash
img_dict = fileCache.get_img_by_hash('./output/file_cache.sqlite', hash_id)
# Printing the dictionary.
for key in img_dict:
  print(f"{key} : {img_dict[key]}")

file_name : https___i.pinimg.com_originals_f0_fc_b5_f0fcb58b15c19afc5fc1f33ff2acda74.jpg
file_path : ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_fc_b5_f0fcb58b15c19afc5fc1f33ff2acda74.jpg
hash_id : d08308e5788f548b0173d83f2cdf69b6b9564aed829adfe279dc1496e98f850e0bac6f775a19e8376d87507a398c285708bb77fee6a1b16d053b9bddc2351033
file_type : .jpg
is_archive : None
n_content : None
container_archive : ./datasets/testdata.zip


## Fetch Random Image File Data from File Cache DB.

In [6]:
# Fetch random image file data from file cache database.
img_dict = fileCache.get_random_image('./output/file_cache.sqlite')
# Printing the dictionary.
for key in img_dict:
  print(f"{key} : {img_dict[key]}")

file_name : https___i.pinimg.com_originals_00_0b_2d_000b2d8351f053664b41ca1024b41b1f.jpg
file_path : ./datasets/testdata.zip/testdata/other-training/https___i.pinimg.com_originals_00_0b_2d_000b2d8351f053664b41ca1024b41b1f.jpg
hash_id : 3092e6075de0b2b9266021b2ca9dd312d3b1624ef40f377a5989a7757d8a1bdabd82f3101e15237c2dcd46edef5e4fcb06b51c347dbeabaf9acfbff5ddc250cc
file_type : .jpg
is_archive : None
n_content : None
container_archive : ./datasets/testdata.zip


## Clearing All the FileCache DB

In [7]:
# Clear all data from the table in file cache database.
fileCache.clear_cache('./output/file_cache.sqlite', delete_cache=False)

[INFO] Table "file_cache" on ./output/file_cache.sqlite database has been cleared.


# ClipCache Module Example

## Installing open_clip Library

In [8]:
%%capture
!pip install open_clip_torch

## Initializing ClipCache Instance and Creating CLIP Cache DB

In [9]:
from clip_cache.cache_clip import ClipCache

# Create CLIP cache object
clipCache = ClipCache()

# Create CLIP cache database. Default to './output/clip_cache.sqlite')
clipCache.create_clip_cache()


[INFO]: database ./output/clip_cache.sqlite created


## Adding Image Files Contained In a Folder Or ZIP Archive to CLIP Cache DB

In [10]:
# Adding image files contained in a folder or ZIP archive to CLIP cache database
clipCache.add_folder_to_clip_cache('./datasets/testdata.zip')

100%|███████████████████████████████████████| 354M/354M [00:03<00:00, 94.9MiB/s]


[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] Calculating CLIP vector for ./datasets/testdata.zip...
[INFO] C

## Get Random File Hash from DB

In [11]:
# Get random file hash_id
hash_id = clipCache.get_random_hash('./output/clip_cache.sqlite')
print(f"Hash ID: {hash_id}")

Hash ID: 9b2f1890ea7ec35ec49ab0625b74fa99e86d7185f7776d1683466e8c52123f115add65b5dd7d5d1b48955fa008cf86e3ea6d0756e412e859edb5e3bc2b62d827


## Get CLIP Vector Data from CLIP Cache DB with Specific Hash 

In [12]:
# Fetch CLIP vector data from CLIP cache database with specific hash
clip_dict = clipCache.get_clip_by_hash('./output/clip_cache.sqlite', hash_id)
for key in clip_dict:
  print(f"{key} : {clip_dict[key]}")

hash_id : 9b2f1890ea7ec35ec49ab0625b74fa99e86d7185f7776d1683466e8c52123f115add65b5dd7d5d1b48955fa008cf86e3ea6d0756e412e859edb5e3bc2b62d827
clip_vector : [[ 2.36666113e-01 -1.26065508e-01  2.90830135e-01 -7.51582623e-01
   2.49981239e-01 -5.97876132e-01  7.35423490e-02  6.30756855e-01
   2.58274585e-01  3.75845611e-01 -4.35824811e-01  2.19376191e-01
   4.55207109e-01 -2.48262808e-01  1.66447476e-01  4.23831373e-01
  -4.41205129e-02  1.34212971e-01 -3.66661102e-01  1.65257260e-01
  -1.49023384e-02  6.32396564e-02  2.92944312e-01  3.53740811e-01
  -2.19550803e-02 -4.17181253e-01 -4.02600855e-01  4.62936342e-01
  -1.03281327e-01  1.30987957e-01  1.23471923e-01 -2.26849541e-02
   2.26294640e-02  3.46390635e-01 -4.18478176e-02 -3.55137318e-01
  -4.71387655e-01  1.75701082e-01 -2.18381062e-01 -1.23044705e+00
  -2.02351600e-01  2.83436149e-01 -2.35975925e-02 -1.52335996e-02
   1.30977049e-01 -3.84485334e-01 -3.65354359e-01 -3.69191855e-01
  -8.95074382e-02  9.34458300e-02 -1.61364764e-01  2.21

## Get Random CLIP Vector from CLIP Cache DB

In [13]:
# Fetch random CLIP vector data from CLIP cache database.
clip_dict = clipCache.get_random_clip('./output/clip_cache.sqlite')

for key in clip_dict:
  print(f"{key} : {clip_dict[key]}")

hash_id : cbed440a1d639986a1d52ecca508d8ab8b92c27040463702c07a271195e2adaebf1c9f9a82bfa02e421f10cf6fe9a887207f925d7fc856e7df669e4544dfbf71
clip_vector : [[-1.79136395e-01 -1.14715315e-01 -2.01706234e-02  3.21493685e-01
   6.05420828e-01 -2.02845633e-01  9.70829129e-02 -1.06028304e-01
  -2.14949399e-01  4.25283730e-01 -1.08159624e-01  9.63202957e-03
  -2.82802954e-02 -3.81042715e-03  4.94117856e-01  2.84032851e-01
   3.59351426e-01  3.86067390e-01 -3.49933386e-01 -1.96620777e-01
  -2.39127681e-01  9.07916389e-03 -1.64673869e-02  1.67921603e-01
  -3.90282869e-01 -1.35500319e-02 -1.88837424e-01  3.00699115e-01
  -4.91323173e-02  3.47665966e-01  4.14400160e-01 -2.36875996e-01
   1.54079674e-02  4.32490781e-02  2.81052310e-02  1.71977744e-01
  -2.78922647e-01  4.52218294e-01  3.58586669e-01 -1.32136837e-01
   7.10119540e-03  2.61320800e-01  1.69510767e-02 -2.19594300e-01
   4.98262376e-01 -9.02989805e-01  2.85441190e-01 -6.55185282e-02
  -2.55165577e-01  4.89071488e-01 -2.36719057e-01  7.10

## Delete Data from the Table in CLIP Cache DB.

In [14]:
# Clear all data from the table in CLIP cache database.
clipCache.clear_cache('./output/clip_cache.sqlite', delete_cache=False)

[INFO] Table "clip_cache" on ./output/clip_cache.sqlite database has been cleared.


# TagCache Module Example

## Initializing TagCache Instance and Creating TagCache DB

In [15]:
from clip_cache.cache_tag import TagCache

# Create tag cache object
tagCache = TagCache()

# Create tag cache database. Default to './output/tag_cache.sqlite')
tagCache.create_tag_cache()


[INFO]: database ./output/tag_cache.sqlite created


## Adding Image Files to TagCache DB

In [16]:
# Adding image files contained in a folder or ZIP archive to tag cache database
tagCache.add_folder_to_tag_cache('./datasets/testdata.zip')

 Processing: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_ee_d1_f0eed195037b8fc2135a44ceb5ed2044.jpg
 Processing: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_f0_71_f0f071d89545fb378150180d9257f306.jpg
 Processing: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_f0_b4_f0f0b4f3f1d172a789589915fc4ad212.jpg
 Processing: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_f5_b8_f0f5b8c94169977da31274f36b0aa703.jpg
 Processing: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_f6_08_f0f608ca8c7e5bcd0b3ffc565442d74f.jpg
 Processing: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_fc_b5_f0fcb58b15c19afc5fc1f33ff2acda74.jpg
 Processing: ./datasets/testdata.zip/testdata/not-pixel-art-real-photo/https___i.pinimg.com_originals_f0_fe_b3_f

## Getting Random Hash from TagCache DB

In [17]:
# Get random file hash_id
hash_id = tagCache.get_random_hash('./output/tag_cache.sqlite')
print(f"Hash ID: {hash_id}")

Hash ID: 7da1dfa9ea839c749bf8a80f0102ca0ef387bac9a5003acfe3280de88b014a286b3b39d189c5e294a7dd7a8c9ab493f921d6f512355b7300725b55b91a67c512


## Getting Tag for Specific Hash Id

In [18]:
# Fetch tag string from tag cache database with specific hash
tag_str = tagCache.get_tag_by_hash('./output/tag_cache.sqlite', hash_id)
print(f"Image hash: {hash_id}")
print(f"Image tag:  {tag_str}")

Image hash: 7da1dfa9ea839c749bf8a80f0102ca0ef387bac9a5003acfe3280de88b014a286b3b39d189c5e294a7dd7a8c9ab493f921d6f512355b7300725b55b91a67c512
Image tag:  not-pixel-art


## Getting List of Hash Ids for Specific Tag

In [19]:
# Fetch list of hash ids for specific tag string
hash_id_list = tagCache.get_hash_by_tag('./output/tag_cache.sqlite', tag_str)
for i, hash in enumerate(hash_id_list):
  print(f"Image {i+1} with hash : {hash}")

Image 1 with hash : e33238b3d3cbd34caaad0b4c44eb67306c6e780fa2323abc643722cd991e6e75b8627543245116363057d213ea66abf898d7c36beae9e74e97c0f91ed553d8f3
Image 2 with hash : 56702d6c6048b8d51147a50137c9cb5956194e5e3dcebb9dd9536897298bc945d49e9a2a2138ecf585bd7f7cc4bbef6cb7c79a2026eea6870365d6d1a78333f1
Image 3 with hash : aa3096d9d84187828a45002e478a8b165add4101e946ff5565d13efa6beae9b684b659bdfba37bc056189cb6e0da9c70ef2eb44f4fc1fbec12e58330b891f0e7
Image 4 with hash : 51b52816d626a97f5281db03607d40ddc55ef13f69323f3408b89a8b3ebd586d984fe5f65d618e3b8af84600a87da35a338293d302763df7e795bd408e0cf825
Image 5 with hash : 09b54922fd5065a9acacfd3c19e7c6bb057f9c2dd55d881fc7a1c5297237495d21e8191d17da2bcabf994ee04454346550b09601c12cee66dee0af8149fd72f2
Image 6 with hash : 1e1d28c15c3d54772d464d7df3aed436f281a27e76519336710a4b4e22e544daa9ccdef8c7d791ce5d8b188b2c58d56c71b9ce03f61f9b15a74a7b2bdbc9a9e4
Image 7 with hash : 50c55bd1e165532436d7296f5bf367d56459f97b3f7af417e781f729d428234d50a51c8f5da17d2cdb2d88

## Getting Random Hash ID, Tag from TagCache DB

In [20]:
# Fetch random hash_id and tag pair from tag cache database.
tag_dict = tagCache.get_random_tag('./output/tag_cache.sqlite')
hash_id = tag_dict['hash_id']
tag = tag_dict['tag']
print(f"Image hash: {hash_id}")
print(f'Image tag: {tag}')

Image hash: 56702d6c6048b8d51147a50137c9cb5956194e5e3dcebb9dd9536897298bc945d49e9a2a2138ecf585bd7f7cc4bbef6cb7c79a2026eea6870365d6d1a78333f1
Image tag: not-pixel-art


## Clear All Data from TagCache DB

In [21]:
# Clear all data from the table in tag cache database.
tagCache.clear_cache('./output/tag_cache.sqlite', delete_cache=False)

[INFO] Table "tag_cache" on ./output/tag_cache.sqlite database has been cleared.
