In [34]:
import os
import shutil
import csv
import pandas as pd
import random
import numpy as np

### Step 1: Put all pictures of the same animal type into one folder for further processing:

In [23]:
def combine_datasets(source_dirs, destination_dir):
    '''
    take in a list of directories, and combine all files under
    these directories into one destination folder
    '''

    # Create the destination directory if it doesn't exist
    os.makedirs(destination_dir, exist_ok=True)

    # Iterate through each source directory
    for source_dir in source_dirs:
        # Iterate through each file in the source directory
        for filename in os.listdir(source_dir):
            source_path = os.path.join(source_dir, filename)
            destination_path = os.path.join(destination_dir, filename)

            # Copy the file to the destination directory
            shutil.copy(source_path, destination_path)

    print("Files copied successfully!")

In [20]:
def get_dataset_dirs(animal_dirs):
    '''
    this function takes in a dataset directory in the standard
    image classification format, and returns a list of source_dirs for the use of
    function "combine_datasets"
    '''
    source_dirs = []
    specific_dirs = [os.path.join(animal_dirs,'data','test'), os.path.join(animal_dirs,'data','train'), os.path.join(animal_dirs,'data','val')]
    for sub_folder_dirs in specific_dirs:
        for folders in os.listdir(sub_folder_dirs):
            source_dirs.append(os.path.join(sub_folder_dirs,folders))
    return source_dirs

In [27]:
destination = 'C:/Users/leiyi/OneDrive/Desktop/Dataset'
for animal_type in os.listdir(general_dirs):
    animal_dirs = os.path.join(general_dirs,animal_type)
    source_dirs = get_dataset_dirs(animal_dirs)
    destination_dir = os.path.join(destination,animal_type)
    combine_datasets(source_dirs,destination_dir)

Files copied successfully!
Files copied successfully!
Files copied successfully!
Files copied successfully!
Files copied successfully!
Files copied successfully!
Files copied successfully!
Files copied successfully!
Files copied successfully!
Files copied successfully!


### Step 2: Rename the images

In [28]:
def rename_files(directory,newnames):
    '''
    take in a directory containing multiple files that need to be renamed,
    and rename them to what is specified in a list
    '''

    for filename in os.listdir(directory):
        old_path = os.path.join(directory, filename)
        new_filename = newnames.pop(0)
        new_path = os.path.join(directory, new_filename)

        # Rename file
        os.rename(old_path, new_path)

In [34]:
gen_dir = 'C:/Users/leiyi/OneDrive/Desktop/Dataset'
i = 0
for animal_type in os.listdir(gen_dir):
    directory = os.path.join(gen_dir,animal_type)
    image_no = len(os.listdir(directory))
    image_no_list = list(range(1,image_no+1))
    image_name = [str(i) + str(x).zfill(7) + '.jpg' for x in image_no_list]
    rename_files(directory,image_name)
    i += 1

### Step 3: Move all images into one folder and create a metadata. csv file for it

In [36]:
general_dirs = 'C:/Users/leiyi/OneDrive/Desktop/Dataset'
animal_types = os.listdir(general_dirs)
source_dirs = [os.path.join(general_dirs,x) for x in animal_types]

In [38]:
destination_dir = 'C:/Users/leiyi/OneDrive/Desktop/DatasetOriginal'
combine_datasets(source_dirs,destination_dir)

Files copied successfully!


Now we move some of the leopard and hyena pictures that have sufficient light into the dataset

##### First we randomly select 300 leopard and hyena pictures that are sufficiently exposed

In [7]:
df_leo = pd.read_csv('C:/Users/leiyi/Downloads/metadata_leopard.csv')
df_hye = pd.read_csv('C:/Users/leiyi/Downloads/metadata_hyena.csv')

In [56]:
df_hye

Unnamed: 0,file_name,bbox
0,000000000001.jpg,"[[580.1369863013698, 606.8493150684931, 1334.9..."
1,000000000002.jpg,"[[331.1170212765957, 599.2021276595744, 1709.0..."
2,000000000003.jpg,"[[180.8102345415778, 232.83582089552237, 2084...."
3,000000000004.jpg,"[[1380.7453416149067, 733.2298136645962, 654.0..."
4,000000000005.jpg,"[[426.027397260274, 829.4520547945204, 333.561..."
...,...,...
3098,000000003099.jpg,"[[754.7945205479451, 498.63013698630135, 764.3..."
3099,000000003100.jpg,"[[612.3287671232877, 503.42465753424653, 1026...."
3100,000000003101.jpg,"[[0.0, 202.02020202020202, 2338.5858585858587,..."
3101,000000003102.jpg,"[[980.1369863013698, 276.027397260274, 1266.43..."


In [8]:
leo_name = df_leo['file_name'].tolist()
hye_name = df_hye['file_name'].tolist()

In [9]:
import random
leo_rand_list = random.choices(leo_name,k = 300)
hye_rand_list = random.choices(hye_name,k = 300)

Now we move them to a new folder, and change their names

In [60]:
leo_dir = 'C:/Users/leiyi/Downloads/leopard.coco/leopard.coco/images/train2022'
hye_dir = 'C:/Users/leiyi/Downloads/hyena.coco/hyena/images/train'
destination_dir = 'C:/Users/leiyi/OneDrive/Desktop/DatasetLeoHye'
for i in range(300):
    shutil.copy(os.path.join(leo_dir,leo_rand_list[i]), os.path.join(destination_dir,'leopard',leo_rand_list[i]))
    shutil.copy(os.path.join(hye_dir,'1' + hye_rand_list[i]), os.path.join(destination_dir,'hyena',hye_rand_list[i]))

Rename them

In [62]:
number_list = list(range(1,301))
leo_new_name = [str(9) + str(x).zfill(7) + '.jpg' for x in number_list]
hye_new_name = [str(10) + str(x).zfill(7) + '.jpg' for x in number_list]
rename_files(os.path.join(destination_dir,'leopard'),leo_new_name)
rename_files(os.path.join(destination_dir,'hyena'),hye_new_name)

Resize them

In [2]:
from PIL import Image

In [19]:
def resize_image(input_path,output_path, width = 600, height = 400):
    '''
    take in an input path containing multiple images,
    compress them into the size of 600*400,
    and move them to the folder given by output_path
    '''

    image_list = os.listdir(input_path)
    for image_name in image_list:
        image = Image.open(os.path.join(input_path,image_name))
        new_size = (width, height)
        resized_image = image.resize(new_size)
        resized_image.save(os.path.join(output_path,image_name))

In [65]:
leo_input_path = 'C:/Users/leiyi/OneDrive/Desktop/DatasetLeoHye/leopard'
hye_input_path = 'C:/Users/leiyi/OneDrive/Desktop/DatasetLeoHye/hyena'
output_path = 'C:/Users/leiyi/OneDrive/Desktop/DatasetOriginal'
resize_image(leo_input_path,output_path)
resize_image(hye_input_path,output_path)

Now we create a metadata.csv file for this folder

In [31]:
import csv
import torch
from torchvision import transforms
import ast
def create_a_metadata(images_dir):
    '''
    take in an image dir, and create a metadata.csv file for the images contained
    '''

    metadata = [['file_name', 'image_id','width','height','objects']]
    with open("C:/Users/leiyi/OneDrive/Desktop/metadata.csv", "w", newline="") as file:
        writer = csv.writer(file)

        # Write the header row (optional)
        writer.writerow(metadata[0])
    image_id = 0
    transform = transforms.ToTensor()
    for filename in os.listdir(images_dir):
        image = Image.open(os.path.join(images_dir,filename))
        newline = [filename]
        objects = dict()
        newline.append(image_id)
        width, height = image.size
        newline.append(width)
        newline.append(height)
        image_tensor = transform(image)
        objects['pixel_values'] = image_tensor
        objects['labels'] = ast.literal_eval(filename[:-11])
        
        if objects['labels'] == 0:
            objects['category'] = 'antelope'
        elif objects['labels'] == 1:
            objects['category'] = 'bear'
        elif objects['labels'] == 2:
            objects['category'] = 'deer'
        elif objects['labels'] == 3:
            objects['category'] = 'fox'
        elif objects['labels'] == 4:
            objects['category'] = 'hare'
        elif objects['labels'] == 5:
            objects['category'] = 'lion'
        elif objects['labels'] == 6:
            objects['category'] = 'raccoon'
        elif objects['labels'] == 7:
            objects['category'] = 'tiger'
        elif objects['labels'] == 8:
            objects['category'] = 'wolf'
        elif objects['labels'] == 9:
            objects['category'] = 'leopard'
        else:
            objects['category'] = 'hyena'
        
        newline.append(objects)
        
        with open("C:/Users/leiyi/OneDrive/Desktop/metadata.csv", "a", newline="") as file:
            writer = csv.writer(file)
            writer.writerows([newline])
            
        image_id += 1

In [3]:
metadata = [['filename'],['aba'],['here']]

In [11]:
images_dir = 'C:/Users/leiyi/OneDrive/Desktop/Dataset/antelope'

In [16]:
images_dir = 'C:/Users/leiyi/OneDrive/Desktop/DatasetOriginal'
create_a_metadata(images_dir)

#### Create a subset of pictures that are to be darkened 

In [15]:
def create_rand_pictures(input_path, output_path, perc = 0.1):
    '''
    take in an input path of images,
    and randomly choose 10% of the pictures to be copied
    to the output path for later use
    '''

    # Create the destination directory if it doesn't exist
    os.makedirs(output_path, exist_ok=True)
    image_list = os.listdir(input_path)
    picture_no = int(perc*len(image_list))
    new_image_list = random.choices(image_list, k = picture_no)
    for image in new_image_list:
        shutil.copy(os.path.join(input_path,image),os.path.join(output_path,image))

In [17]:
input_path_gen = 'C:/Users/leiyi/OneDrive/Desktop/Dataset'
output_path_gen = 'C:/Users/leiyi/OneDrive/Desktop/DatasetDark'
dir_list = os.listdir(input_path_gen)
for animal_type in dir_list:
    input_path = os.path.join(input_path_gen,animal_type)
    output_path = os.path.join(output_path_gen, animal_type)
    create_rand_pictures(input_path,output_path,perc = 0.2)

### Get the darkest pictures from the hyena and leopard dataset

In [18]:
def calculate_brightness(image):
    # Convert image to grayscale
    grayscale_image = image.convert('L')

    # Get pixel values as a numpy array
    pixels = np.array(grayscale_image)

    # Calculate mean pixel value
    mean_brightness = np.mean(pixels)

    return mean_brightness

In [21]:
aba = '10000003103.jpg'
print(aba[-8:])
print('0'*3)

3103.jpg
000


In [30]:
dark_path = 'C:/dark'
image_name_list = os.listdir(dark_path)
output_path = 'C:/Users/leiyi/OneDrive/Desktop/DatasetMixed'
for image_name in image_name_list:
    image = Image.open(os.path.join(dark_path,image_name))
    if calculate_brightness(image) <= 20:
        resized_image = image.resize((600,400))
        if image_name[0] == '0':
            image_name_new = '9' + '0'*3 + image_name[-8:]
            resized_image.save(os.path.join(output_path,image_name_new))
        else:
            image_name_new = '10' + '0'*3 + image_name[-8:]
            resized_image.save(os.path.join(output_path,image_name_new))

UnidentifiedImageError: cannot identify image file 'C:\\dark\\metadata.csv'

In [32]:
images_dir = 'C:/Users/leiyi/OneDrive/Desktop/DatasetMixed'
create_a_metadata(images_dir)

In [33]:
from pathlib import Path

cwd = Path.cwd()
print(cwd)

C:\Users\leiyi\Documents
