In [1]:
import os
import shutil

source_directory = 'faiss/active-learning/remove-by-hand-trial2'
image_directory = os.path.join(source_directory, 'images')
label_directory = os.path.join(source_directory, 'labels')
txt_directory = 'data/filterme/labels'

# Create target directories if they don't exist
os.makedirs(image_directory, exist_ok=True)
os.makedirs(label_directory, exist_ok=True)

# Copy files from source to image_directory
for file in os.listdir(source_directory):
    if file.endswith('.jpg'):
        shutil.copy(os.path.join(source_directory, file), image_directory)

# Copy corresponding txt files from txt_directory to label_directory
for file in os.listdir(txt_directory):
    if file.endswith('.txt'):
        corresponding_image_file = os.path.join(image_directory, file.replace('.txt', '.jpg'))
        if os.path.isfile(corresponding_image_file):
            shutil.copy(os.path.join(txt_directory, file), label_directory)

# Remove non-corresponding jpg files in image_directory
for file in os.listdir(image_directory):
    if file.endswith('.jpg'):
        corresponding_label_file = os.path.join(label_directory, file.replace('.jpg', '.txt'))
        if not os.path.isfile(corresponding_label_file):
            os.remove(os.path.join(image_directory, file))

# Remove non-corresponding txt files in label_directory
for file in os.listdir(label_directory):
    if file.endswith('.txt'):
        corresponding_image_file = os.path.join(image_directory, file.replace('.txt', '.jpg'))
        if not os.path.isfile(corresponding_image_file):
            os.remove(os.path.join(label_directory, file))


In [4]:
import os
from collections import defaultdict

def count_files_with_same_extension(directory):
    extension_count = defaultdict(lambda: defaultdict(int))

    for root, dirs, files in os.walk(directory):
        for file in files:
            try:
                file_extension = os.path.splitext(file)[1]
                extension_count[root][file_extension] += 1
            except PermissionError:
                print(f"Permission denied: {os.path.join(root, file)}, skipping this file.")
            except Exception as e:
                print(f"Error occurred: {e}. Skipping this file.")
    return extension_count

def print_extension_counts(extension_count):
    for dir_path, extensions in extension_count.items():
        print(dir_path + ":")
        for ext, count in extensions.items():
            print(f"- with {ext} files: {count}")

def main():
    try:
        directory = input("Enter the directory path: ")
        if not os.path.exists(directory):
            print("Invalid directory path. Please try again.")
            return
        extension_count = count_files_with_same_extension(directory)
        print_extension_counts(extension_count)
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

faiss/active-learning/remove-by-hand-trial2/set1/model-label-and-remove-similarity:
- with .txt files: 1
faiss/active-learning/remove-by-hand-trial2/set1/model-label-and-remove-similarity/labels:
- with .txt files: 40
faiss/active-learning/remove-by-hand-trial2/set1/model-label-and-remove-similarity/images:
- with .jpg files: 40
faiss/active-learning/remove-by-hand-trial2/set1/manually:
- with .zip files: 1
faiss/active-learning/remove-by-hand-trial2/set1/manually/activ-labeling.v1i.yolov8:
- with .txt files: 2
- with .yaml files: 1
faiss/active-learning/remove-by-hand-trial2/set1/manually/activ-labeling.v1i.yolov8/train/labels:
- with .txt files: 39
faiss/active-learning/remove-by-hand-trial2/set1/manually/activ-labeling.v1i.yolov8/train/images:
- with .jpg files: 39


In [None]:
# data preprocessing

In [10]:
import os
import shutil

def copy_files(src_dir, dest_dir):
    files = os.listdir(src_dir)
    for file_name in files:
        full_file_name = os.path.join(src_dir, file_name)
        if os.path.isfile(full_file_name):
            shutil.copy(full_file_name, dest_dir)
    print(f"Copied files from {src_dir} to {dest_dir}")

def remove_same_name_images(src_dir, reference_dir, labels_dir):
    ref_files = os.listdir(reference_dir)
    img_files = os.listdir(src_dir)
    common_files = set(ref_files).intersection(img_files)
    for file_name in common_files:
        os.remove(os.path.join(src_dir, file_name))
        txt_file_name = file_name.rsplit('.', 1)[0] + '.txt'
        os.remove(os.path.join(labels_dir, txt_file_name))
    print(f"Removed images and corresponding labels from {src_dir} and {labels_dir}")

copy_files('data/filterme', 'data/filterme-backup')

remove_same_name_images('data/filterme/images', 'faiss/human-trial2', 'data/filterme/labels')

copy_files('faiss/active-learning/remove-by-hand-trial2/set1/manually/activ-labeling.v1i.yolov8/train/images', 'data/filterme/images')
copy_files('faiss/active-learning/remove-by-hand-trial2/set1/manually/activ-labeling.v1i.yolov8/train/labels', 'data/filterme/labels')


Copied files from data/filterme to data/filterme-backup
Removed images and corresponding labels from data/filterme/images and data/filterme/labels
Copied files from faiss/active-learning/remove-by-hand-trial2/set1/manually/activ-labeling.v1i.yolov8/train/images to data/filterme/images
Copied files from faiss/active-learning/remove-by-hand-trial2/set1/manually/activ-labeling.v1i.yolov8/train/labels to data/filterme/labels


In [11]:
import os
from collections import defaultdict

def count_files_with_same_extension(directory):
    extension_count = defaultdict(lambda: defaultdict(int))

    for root, dirs, files in os.walk(directory):
        for file in files:
            try:
                file_extension = os.path.splitext(file)[1]
                extension_count[root][file_extension] += 1
            except PermissionError:
                print(f"Permission denied: {os.path.join(root, file)}, skipping this file.")
            except Exception as e:
                print(f"Error occurred: {e}. Skipping this file.")
    return extension_count

def print_extension_counts(extension_count):
    for dir_path, extensions in extension_count.items():
        print(dir_path + ":")
        for ext, count in extensions.items():
            print(f"- with {ext} files: {count}")

def main():
    try:
        directory = input("Enter the directory path: ")
        if not os.path.exists(directory):
            print("Invalid directory path. Please try again.")
            return
        extension_count = count_files_with_same_extension(directory)
        print_extension_counts(extension_count)
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

data/filterme/labels:
- with .txt files: 8104
data/filterme/images:
- with .jpg files: 8104
