In [8]:
import json
import random
import shutil

def update_filename(image, max_id_length):
    """Update the filename using zero-padding."""
    # Extract the file extension
    _, ext = image['file_name'].rsplit('.', 1)
    # Update the file name using the image id with proper zero-padding
    formatted_id = f"{(image['id'] - 1):0{max_id_length}d}"
    image['file_name'] = f"{formatted_id}.{ext}"


def merge_annotations(annotations1, annotations2):
    """Merge two sets of annotations."""
    # Calculate the maximum IDs from the first dataset
    max_image_id = max(image['id'] for image in annotations1['images'])
    max_annotation_id = max(anno['id'] for anno in annotations1['annotations'])

    # Determine the max ID length for zero-padding filenames, considering the total number of images after merging
    total_image_count = len(annotations1['images']) + len(annotations2['images'])
    max_id_length = len(str(total_image_count - 1))  # Adjust for zero-padding to match the largest ID

    # Update IDs and filenames in the second dataset
    for image in annotations2['images']:
        image['id'] += max_image_id
        update_filename(image, max_id_length)

    for annotation in annotations2['annotations']:
        annotation['id'] += max_annotation_id
        annotation['image_id'] += max_image_id

    # Merge the images, annotations, and keep categories from the first dataset unchanged
    merged_annotations = {
        "images": annotations1['images'] + annotations2['images'],
        "annotations": annotations1['annotations'] + annotations2['annotations'],
        "categories": annotations1['categories']  # Assuming categories do not need to be merged
    }
    return merged_annotations


# File paths for input and output
input_file1 = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\train\\_annotations.coco.json'
input_file2 = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\val\\_annotations.coco.json'
output_file = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\_annotations.coco.json'

# Load annotations from files
with open(input_file1, 'r') as file:
    annotations_cable = json.load(file)

with open(input_file2, 'r') as file:
    annotations_capacitor = json.load(file)

# Merge annotations and save to a new file
merged_annotations = merge_annotations(annotations_cable, annotations_capacitor)
with open(output_file, 'w') as file:
    json.dump(merged_annotations, file, indent=4)


In [12]:
def update_ids_in_coco_json(file_path, output_path, increment=1792):
    """Update image and annotation IDs in a COCO JSON file."""
    # Load the existing data
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    # Update 'id' in 'images'
    for image in data['images']:
        image['id'] += increment
    
    # Update 'id' and 'image_id' in 'annotations'
    for annotation in data['annotations']:
        annotation['id'] += increment
        annotation['image_id'] += increment
    
    # Save the updated data
    with open(output_path, 'w') as file:
        json.dump(data, file, indent=4)

# Example usage
update_ids_in_coco_json(
    'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\_annotations.coco.json',
    'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\_annotations.coco.json'
)


In [None]:
def rename_images(folder_path, start_index):
    """Rename images in a folder with incremental index starting from start_index."""
    # Get the list of files in the folder
    files = os.listdir(folder_path)
    # Sort the files in numerical order
    files.sort(key=lambda x: int(os.path.splitext(x)[0]))

    # Initialize index starting from start_index
    index = start_index

    # Iterate through each file in the folder
    for filename in files:
        # Get the extension of the file
        ext = os.path.splitext(filename)[1]
        # Create a new filename
        new_filename = str(index) + ext
        new_file_path = os.path.join(folder_path, new_filename)
        old_file_path = os.path.join(folder_path, filename)
        
        # Check if the new filename already exists
        if not os.path.exists(new_file_path):
            # Rename the file
            os.rename(old_file_path, new_file_path)
        else:
            print(f"Error: '{new_filename}' already exists.")

        # Increment the index
        index += 1

# Update this path to your folder path
folder_path = "E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\data"
start_index = 1793
rename_images(folder_path, start_index)

In [20]:
def update_file_names(json_file):
    """Update file names of images in a JSON file based on their IDs."""
    # Read the JSON file
    with open(json_file, 'r') as file:
        data = json.load(file)
    
    # Update the file_name of images
    for image in data['images']:
        image['file_name'] = f"{image['id']}.jpg"  # Update file_name based on id

    # Write the updated data to a new file
    output_file_path = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\_annotations.coco.json'
    with open(output_file_path, 'w') as file:
        json.dump(data, file, indent=4)

# Call the function with the path to the annotations file
json_file_path = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\_annotations.coco.json'
update_file_names(json_file_path)


In [39]:
# Define paths
annotation_file = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\_annotations.coco.json'
image_directory = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\data'

# Define output directories
train_dir = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\data\\train'
val_dir = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\data\\val'

# Ensure output directories exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Load annotations
with open(annotation_file, 'r') as file:
    data = json.load(file)

# Shuffle images
image_list = data['images']
random.shuffle(image_list)

# Calculate split indices
total_images = len(image_list)
train_end = int(total_images * 0.8)  # Adjusted to 80% for training

# Split image list
train_images = image_list[:train_end]
val_images = image_list[train_end:]  # All remaining images for validation

# Function to create datasets
def create_dataset(images, annotations, categories, output_dir):
    """Create dataset and save JSON annotations."""
    dataset = {
        'images': images,
        'annotations': [ann for ann in annotations if ann['image_id'] in [img['id'] for img in images]],
        'categories': categories
    }
    # Save dataset to JSON
    json_file_path = os.path.join(output_dir, '_annotations.coco.json')
    with open(json_file_path, 'w') as f:
        json.dump(dataset, f, indent=4)
    
    # Copy images to their respective folders
    for image in images:
        source_path = os.path.join(image_directory, image['file_name'])
        destination_path = os.path.join(output_dir, image['file_name'])
        try:
            shutil.copy(source_path, destination_path)
        except FileNotFoundError:
            print(f"File not found: {source_path}")

# Create and save datasets
create_dataset(train_images, data['annotations'], data['categories'], train_dir)
create_dataset(val_images, data['annotations'], data['categories'], val_dir)


In [2]:
def merge_json(file_path1, file_path2, output_file_path):
    """Merge two JSON files and save the combined data to a new JSON file."""
    # Load data from the first JSON file
    with open(file_path1, 'r') as file1:
        data1 = json.load(file1)
    
    # Load data from the second JSON file
    with open(file_path2, 'r') as file2:
        data2 = json.load(file2)
    
    # Merge images
    images = data1["images"] + data2["images"]
    
    # Merge annotations
    annotations = data1["annotations"] + data2["annotations"]
    
    # Merge categories (assuming categories are identical in both files)
    categories = data1["categories"]  + data2["categories"]
    
    # Combine all data into a new dictionary
    combined_data = {
        "images": images,
        "annotations": annotations,
        "categories": categories
    }
    
    # Write the combined data to the output file
    with open(output_file_path, 'w') as output_file:
        json.dump(combined_data, output_file, indent=4)
    
    print(f"Combined JSON has been saved to {output_file_path}")

# Paths to the input JSON files and the output JSON file
file_path1 = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\val\\_annotations.json'
file_path2 = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\Wood\\data\\val\\_annotations.coco.json'
output_file_path = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\val\\_annotations.json'

# Merge the JSON files
merge_json(file_path1, file_path2, output_file_path)


In [None]:
def count_jpg_images(directory):
    """
    Counts the number of .jpg files in the specified directory.

    Args:
        directory (str): The path to the directory where the images are stored.

    Returns:
        int: The number of .jpg files in the directory.
    """
    # Ensure the directory exists
    if not os.path.exists(directory):
        print(f"Directory not found: {directory}")
        return 0

    # List all files in the directory
    files = os.listdir(directory)

    # Filter and count files that end with .jpg
    jpg_count = sum(1 for file in files if file.lower().endswith('.jpg'))

    return jpg_count

# Example usage:
directory_path = 'E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\val'
jpg_count = count_jpg_images(directory_path)
print(f"There are {jpg_count} .jpg files in the directory {directory_path}")

In [None]:
import filecmp

def find_duplicate_images(folder1, folder2):
    """
    Finds duplicate images between two folders.

    Args:
        folder1 (str): The path to the first folder.
        folder2 (str): The path to the second folder.

    Returns:
        list: A list of duplicate image filenames.
    """
    # Create lists containing image files from both folders
    images1 = [f for f in os.listdir(folder1) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    images2 = [f for f in os.listdir(folder2) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

    # Create a directory comparison object
    dcmp = filecmp.dircmp(folder1, folder2)

    # Create a list to store duplicate images
    duplicate_images = []

    # Compare image files in the directories
    for common_file in dcmp.common_files:
        if common_file in images1 and common_file in images2:
            file1 = os.path.join(folder1, common_file)
            file2 = os.path.join(folder2, common_file)
            if filecmp.cmp(file1, file2):
                duplicate_images.append(common_file)

    return duplicate_images

# Example usage
folder1 = "E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\train"
folder2 = "E:\\research2024\\VISION-Datasets - Copy\\VISION-Datasets extracted\\val"
duplicates = find_duplicate_images(folder1, folder2)
if duplicates:
    print("The following images are duplicates:")
    for image in duplicates:
        print(image)
else:
    print("No duplicate images found.")
