# Image select

Randomly select n_images images, for each of n_folders folders, from an image directory. Set relevant variables:

In [1]:
n_images = 200
n_folders = 3
image_type = "train" #train or test

bucket_name = 'amlr-imagery-viame-dev'
deployment = "amlr08-20220513"

path_full_deployment = f'glider/SANDIEGO/2022/{deployment}-shadowgraph'
bucket_path_out = f'{bucket_name}/viame-projects/shadowgraph/{image_type}'

## Processing

Prep, and list all .jpg files in specified bucket path

In [2]:
from google.cloud import storage
import random
import subprocess 

client = storage.Client()
n = n_images * n_folders

blobs = client.list_blobs(bucket_name, prefix = path_full_deployment)
file_name_list = [x.name for x in blobs if '.jpg' in x.name]

Explore the files that were identified

In [3]:
# len(file_name_list)
# file_name_list[:7]
# file_name_list[len(file_name_list)-7:len(file_name_list)]

Sample n random files from the file name list

In [4]:
files_sample = random.sample(file_name_list, k=n)
files_sample
files_sample[:7]

['glider/SANDIEGO/2022/amlr08-20220513-shadowgraph/SG01 20220516-093653-005.jpg',
 'glider/SANDIEGO/2022/amlr08-20220513-shadowgraph/SG01 20220514-184708-004.jpg',
 'glider/SANDIEGO/2022/amlr08-20220513-shadowgraph/SG01 20220513-224704-009.jpg',
 'glider/SANDIEGO/2022/amlr08-20220513-shadowgraph/SG01 20220515-115109-002.jpg',
 'glider/SANDIEGO/2022/amlr08-20220513-shadowgraph/SG01 20220514-070708-020.jpg',
 'glider/SANDIEGO/2022/amlr08-20220513-shadowgraph/SG01 20220515-144411-004.jpg',
 'glider/SANDIEGO/2022/amlr08-20220513-shadowgraph/SG01 20220514-223235-016.jpg']

### Copy files

Copy selected files to training folder(s). Return codes of 0 are good

In [5]:
# # Works for one folder
# files_sample_path = [f"gs://{bucket_name}/{i}" for i in files_sample]
# args = ["gsutil", "-m", "cp", "-I", f"gs://{bucket_path_out}"]
# p = subprocess.run(args, input='\n'.join(files_sample_path).encode(), capture_output=True)

In [6]:
files_sample_path = [f"gs://{bucket_name}/{i}" for i in files_sample]
lst = files_sample_path
z = [sorted(lst[i:i + n_images]) for i in range(0, len(lst), n_images)]

In [9]:
for i in range(n_folders):
    args = ["gsutil", "-m", "cp", "-I", f"gs://{bucket_path_out}/{image_type}-{i+1:02}-{deployment}"]
    # print(args)
    p = subprocess.run(args, input='\n'.join(z[i]).encode(), capture_output=True)
    print(f"Subprocess return code for folder {image_type}-{i+1:02}-{deployment}: {p.returncode}")

Subprocess return code for folder train-01-amlr08-20220513: 0
Subprocess return code for folder train-02-amlr08-20220513: 0
Subprocess return code for folder train-03-amlr08-20220513: 0
