## Import

In [None]:
import pandas as pd
import cv2
import os
import matplotlib.pyplot as plt
import glob
import shutil
from pathlib import Path
import seaborn as sns
import numpy as np
import math

#### Example Preprocessing of whole "ET22-05" experiment and every subdirectory

In [1]:
def segment_image(top_input_dir, top_output_dir, min_area=100):
    """
    Processes .tif files (except binary) in the input directory and its subdirectories, 
    extracting cell images from each and saving them in sorted order in the output directory.
    
    Parameters:
    - top_input_dir: The top-level directory containing the subdirectories with .tif files to process.
    - top_output_dir: The top-level directory where the 'split' directories will be created for output.
    - min_area: The minimum area threshold for a contour to be considered a valid rectangle.
    
    Returns:
    - None
    """
    # Ensure the top-level output directory exists
    top_output_dir = os.path.join(top_output_dir, 'ET22-05-split')
    if not os.path.isdir(top_output_dir):
        os.makedirs(top_output_dir)

    for subdir, dirs, files in os.walk(top_input_dir):
        for file in files:
            if file.endswith('.tif') and 'bin' not in file:
                file_path = os.path.join(subdir, file)
                print(f"Processing {file_path}...")
                
                # Compute a relative path to create a parallel structure in the output
                relative_path = os.path.relpath(subdir, top_input_dir)
                output_dir = os.path.join(top_output_dir, relative_path)
                
                # Ensure the output directory exists
                if not os.path.isdir(output_dir):
                    os.makedirs(output_dir)
                
                # Read the image
                image = cv2.imread(file_path)
                
                # Convert the image to grayscale and apply a binary threshold
                gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                _, binary_threshold = cv2.threshold(gray_image, 1, 255, cv2.THRESH_BINARY)
                
                # Find contours
                contours, _ = cv2.findContours(binary_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                valid_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_area]
                sorted_contours = sorted(valid_contours, key=lambda cnt: (cv2.boundingRect(cnt)[1], cv2.boundingRect(cnt)[0]))
                
                # Extract and save rectangles
                for i, contour in enumerate(sorted_contours):
                    x, y, w, h = cv2.boundingRect(contour)
                    sorted_extracted_image = image[y:y+h, x:x+w]
                    file_name = f'{Path(file).stem}_{i+1:06}.png'  # Use the original file name with a counter
                    file_save_path = os.path.join(output_dir, file_name)
                    cv2.imwrite(file_save_path, sorted_extracted_image)

                print(f"Finished processing {file_path}")

# Example usage:
top_input_dir = 'data/ET22-05'
top_output_dir = 'data'
segment_image(top_input_dir, top_output_dir)

Processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000001.tif...
Finished processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000001.tif
Processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000002.tif...
Finished processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000002.tif
Processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000003.tif...
Finished processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000003.tif
Processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000004.tif...
Finished processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000004.tif
Processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000005.tif...
Finished processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000005.tif
Processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000006.tif...
Finished processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000006.tif
Processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000007.tif...
Finished processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000007.tif
Processing data/ET22-05\Control\ET-1 C3\ET-1 C3_000008.tif...
Finished processing data/ET2

In [2]:
def process_lst_files(top_input_dir, top_output_dir):
    """
    Processes .lst files in the input directory and its subdirectories,
    reading each .lst file into a pandas DataFrame and saving it in the
    corresponding mirrored directory structure under the specified top-level output directory.
    
    Parameters:
    - top_input_dir: The top-level directory containing the subdirectories with .lst files.
    - top_output_dir: The top-level directory where the processed .lst files will be saved as DataFrames.
    """
    column_names = [
        'id', 'image_x', 'image_y', 'image_w', 'image_h', 'src_x', 'src_y', 'src_image',
        'cal_image', 'ppc', 'camera', 'cal_const', 'fringe_size', 'sphere_count',
        'raw_sphere_volume', 'raw_sphere_complement', 'raw_sphere_unknown', 'raw_area',
        'raw_feret_max', 'raw_feret_min', 'raw_feret_mean', 'raw_perimeter',
        'raw_convex_perimeter', 'raw_legendre_major', 'raw_legendre_minor', 'abd_area',
        'abd_diameter', 'length', 'width', 'esd_diameter', 'fd_diameter', 'perimeter',
        'convex_perimeter', 'intensity', 'sigma_intensity', 'sum_intensity', 'compactness',
        'elongation', 'roughness', 'edge_gradient', 'elapsed_time', 'circle_fit',
        'symmetry', 'circularity_hu', 'intensity_calimage', 'raw_convex_hull_area',
        'raw_filled_area', 'filled_area', 'feret_max_angle', 'feret_min_angle',
        'avg_red', 'avg_green', 'avg_blue', 'timestamp', 'collage_file', 'image_id'
    ]
    skip_rows = 58

    for subdir, dirs, files in os.walk(top_input_dir):
        for file in files:
            if file.endswith('.lst'):
                file_path = os.path.join(subdir, file)
                print(f"Processing {file_path}...")

                df = pd.read_csv(file_path, delimiter='|', skiprows=skip_rows, header=None, names=column_names)

                # Compute the relative path for the output
                relative_path = os.path.relpath(subdir, top_input_dir)
                output_dir = os.path.join(top_output_dir, relative_path)

                # Ensure the output directory exists
                if not os.path.isdir(output_dir):
                    os.makedirs(output_dir)

                # Define the output file path
                output_file_path = os.path.join(output_dir, Path(file).stem + '_df.csv')

                # Save the DataFrame
                df.to_csv(output_file_path, index=False)
                print(f"Saved DataFrame to {output_file_path}")

top_input_dir = 'data/ET22-05'
top_output_dir = 'data/ET22-05-split'
process_lst_files(top_input_dir, top_output_dir)


Processing data/ET22-05\Control\ET-1 C3\ET-1 C3.lst...
Saved DataFrame to data/ET22-05-split\Control\ET-1 C3\ET-1 C3_df.csv
Processing data/ET22-05\Control\ET-1 C4\ET-1 C4.lst...
Saved DataFrame to data/ET22-05-split\Control\ET-1 C4\ET-1 C4_df.csv
Processing data/ET22-05\Control\ET-1 Final\ET-1 Final.lst...
Saved DataFrame to data/ET22-05-split\Control\ET-1 Final\ET-1 Final_df.csv
Processing data/ET22-05\Control\ET-2 C3\ET-2 C3.lst...
Saved DataFrame to data/ET22-05-split\Control\ET-2 C3\ET-2 C3_df.csv
Processing data/ET22-05\Control\ET-2 C4\ET-2 C4.lst...
Saved DataFrame to data/ET22-05-split\Control\ET-2 C4\ET-2 C4_df.csv
Processing data/ET22-05\Control\ET-2 Final\ET-2 Final.lst...
Saved DataFrame to data/ET22-05-split\Control\ET-2 Final\ET-2 Final_df.csv
Processing data/ET22-05\ET-B\ET-B1 C1\ET-B1 In.lst...
Saved DataFrame to data/ET22-05-split\ET-B\ET-B1 C1\ET-B1 In_df.csv
Processing data/ET22-05\ET-B\ET-B1 C2\ET-B1 C2.lst...
Saved DataFrame to data/ET22-05-split\ET-B\ET-B1 C2\ET-B