---

# Cellpose Batch Processing with Custom Model: Soma
* Last edited: 09/20/2025

_Brief overview_: 
- Processes brightfield tile image files named W0001F0001T0001Z001C1.tif etc.
- Saves soma-only masks as filename_seg.npy with preview outputs (.png)

*See the [Cellpose distributed documentation](https://cellpose.readthedocs.io/en/latest/distributed.html) for further details.*

---

---

QUICK START - RUN WITH YOUR EMPIRICAL PARAMETERS:
=================================================    
The parameters will be:
- Logged to console during processing
- Saved in each image's summary.txt file
- Included in the batch processing summary
- Shown in visualization plots

PARAMETER EXPLANATION:
=====================
- flow_threshold: Controls how strict the flow consistency requirement is. Range: 0.0-3.0.
  Higher values (e.g., 1.0) require flows to be more consistent to keep a cell.
  Lower values (e.g., 0.2) are more permissive. 
  
- cellprob_threshold: Controls which pixels are considered as potential cell pixels. Range: -6.0 to 6.0.
  Higher values (e.g., 0.4) only keep high-confidence cell pixels.
  Lower values (e.g., -1.0) include more uncertain pixels. 
  
- min_size: Minimum number of pixels for a detected object to be considered a cell. Default = 15
  Smaller values detect tiny objects, larger values filter out small debris.
  
*Note*: These default parameters are overrided by invoking a custom trained model. If using a custom model, you can manually specify these threshold parameters to override the custom weights.   

---

## Setup

In [1]:
import os, re, logging, numpy as np
from pathlib import Path
from tqdm import tqdm
from scipy.ndimage import gaussian_filter
import matplotlib.pyplot as plt

from cellpose import models, io, utils, plot
from cellpose.io import imread

# ── Logging ────────────────────────────────────────────────────────────────
io.logger_setup()
logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s - %(levelname)s - %(message)s",
                    force=True)
logger = logging.getLogger(__name__)

# ── Paths ──────────────────────────────────────────────────────────────────
# Set this to the top-level repo directory (parent of the script/notebook)
REPO_ROOT = Path.cwd().parent  # already in 'vista-fish/brightfield-segmentation'

MODEL_PATH  = REPO_ROOT / "models" / "cpsam_soma"
INPUT_DIR   = REPO_ROOT / "data" / "example" / "raw"
OUTPUT_DIR  = REPO_ROOT / "data" / "example" / "interim" / "soma" # masks & plots saved beside the TIFFs

# ── Cellpose configuration ────────────────────────────────────────────────
model_kwargs = dict(
    gpu=True,
    pretrained_model=MODEL_PATH    # custom network + saved thresholds
)
eval_kwargs  = dict(
    diameter=None,      # per-image diameter estimation
    channels=0,         # bright-field single-channel
    do_3D=False,        # 2-D tiles
    normalize=True,      # important for 16-bit data
    flow_threshold=1.5,       # ↑ accept weaker flows
    cellprob_threshold=-0.5,  # ↓ include faint cell‐prob pixels
    # min_size=-1                # ↓ allow small neurite segments; Set to -1 to turn off this functionality. Default is 15.
)
# (flow_threshold, cellprob_threshold or min_size specified)

# ── Helpers ────────────────────────────────────────────────────────────────
TileRegex = re.compile(r"^W\d+F\d+T\d+Z\d+C0?1\.(?:tif|tiff)$", re.IGNORECASE) # masks from BF images only

def find_image_files(root_dir: str):
    """Return [(full_path, stem)] for CQ-1/Bio-formats-style tile names."""
    hits = []
    for root, _, files in os.walk(root_dir):
        for f in files:
            if TileRegex.fullmatch(f):
                hits.append((os.path.join(root, f), Path(f).stem))
    return sorted(hits)

def save_segmentation_plot(img, mask, flow, out_png):
    """Create and save a segmentation overview figure."""
    fig = plt.figure(figsize=(15, 10))

    # try the native Cellpose visualisation first
    success = False
    try:
        # flow can be list-like or ndarray; pull HSV layer if present
        flow_hsv = flow[0] if isinstance(flow, list) and flow and flow[0].ndim == 3 else None
        if flow_hsv is not None:
            plot.show_segmentation(fig, img, mask, flow_hsv)
            success = True
    except Exception as e:
        logger.debug(f"show_segmentation failed: {e}")

    if not success:
        # fallback – three simple panels
        plt.subplot(1, 3, 1)
        plt.imshow(img, cmap='gray')
        plt.title('Raw')
        plt.axis('off')

        plt.subplot(1, 3, 2)
        plt.imshow(mask, cmap='jet' if mask.max() else 'gray')
        plt.title(f"Mask (n={int(mask.max())})")
        plt.axis('off')

        plt.subplot(1, 3, 3)
        plt.imshow(plot.mask_overlay(img, mask))
        plt.title('Overlay')
        plt.axis('off')

    plt.tight_layout()
    plt.savefig(out_png, dpi=300, bbox_inches='tight')
    plt.close(fig)

def process_with_cellpose(image_path: str,
                          model: models.CellposeModel,
                          save_dir: str,
                          fname_base: str):
    """Segment one image, save *_seg.npy and a diagnostic plot."""
    try:
        img = imread(image_path)

        if img.ndim > 2:            # take first channel if multi-channel
            img = img[..., 0]

        img = gaussian_filter(img, sigma=1)

        results = model.eval([img], **eval_kwargs)

        # unpack depending on Cellpose version
        if len(results) == 4:
            masks, flows, styles, diams = results
            diam = diams[0] if hasattr(diams, "__getitem__") else diams
        else:
            masks, flows, styles = results
            diam = getattr(model, "diam_mean", None)

        mask = masks[0]
        flow = flows[0]
        num  = int(mask.max())
        logger.info(f"{Path(image_path).name}: {num} cells")

        np.save(os.path.join(save_dir, f"{fname_base}_seg.npy"), {
            "filename" : image_path,
            "masks"    : mask.astype(np.uint16),
            "outlines" : utils.masks_to_outlines(mask) if num else np.zeros_like(mask),
            "flows"    : flow,
            "diameter" : diam,
        })

        # save visual diagnostic
        save_segmentation_plot(img, mask, flow,
                               os.path.join(save_dir, f"{fname_base}_segmentation.png"))

    except Exception as e:
        logger.error(f"Failed on {image_path}: {e}")



Welcome to CellposeSAM, cellpose v
cellpose version: 	4.0.6 
platform:       	win32 
python version: 	3.10.18 
torch version:  	2.8.0+cu126! The neural network component of
CPSAM is much larger than in previous versions and CPU excution is slow. 
We encourage users to use GPU/MPS if available. 


2025-09-20 15:21:28,093 [INFO] WRITING LOG OUTPUT TO C:\Users\neilzhao\.cellpose\run.log
2025-09-20 15:21:28,094 [INFO] 
cellpose version: 	4.0.6 
platform:       	win32 
python version: 	3.10.18 
torch version:  	2.8.0+cu126


In [2]:
# ── Main ───────────────────────────────────────────────────────────────────
def main():
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    logger.info("Loading Cellpose model …")
    model = models.CellposeModel(**model_kwargs)

    files = find_image_files(INPUT_DIR)
    logger.info(f"Found {len(files)} TIFF tiles.")
    for fpath, stem in tqdm(files, desc="Segmenting"):
        process_with_cellpose(fpath, model, OUTPUT_DIR, stem)

if __name__ == "__main__":
    main()

2025-09-20 15:21:28,115 - INFO - Loading Cellpose model …
2025-09-20 15:21:28,284 - INFO - ** TORCH CUDA version installed and working. **
2025-09-20 15:21:28,285 - INFO - >>>> using GPU (CUDA)
2025-09-20 15:21:30,495 - INFO - >>>> loading model z:\Active_Users_Data\Neil\Analyses\Results\dry\xenium\vista-fish\brightfield-segmentation\models\cpsam_soma
2025-09-20 15:21:49,354 - INFO - Found 3 TIFF tiles.
2025-09-20 15:24:04,013 - INFO - W0001F0062T0001Z001C1.tif: 140 cells
2025-09-20 15:26:28,749 - INFO - W0001F0132T0001Z001C1.tif: 297 cells
2025-09-20 15:28:43,273 - INFO - W0001F0190T0001Z001C1.tif: 577 cells
Segmenting: 100%|██████████| 3/3 [07:06<00:00, 142.03s/it]


## Plotting PNG and Stitched Outputs
Here, we opt to also save the `_seg.npy` as `.png` binary mask outputs.

In [3]:
"""
Convert *_seg.npy → 16‑bit PNG masks.
Requires
--------
numpy, imageio‑v3
"""
import numpy as np
import imageio.v3 as imageio
from pathlib import Path
import sys
# ──────────────────────────────────────────────────────────────────────────────
# edit this to point at the folder containing your *_seg.npy files
# ──────────────────────────────────────────────────────────────────────────────
mask_dir = OUTPUT_DIR
# ──────────────────────────────────────────────────────────────────────────────
# find all *_seg.npy
seg_files = sorted(mask_dir.glob("*_seg.npy"))
if not seg_files:
    sys.exit(":exclamation: No *_seg.npy files found in mask_dir")
# ──────────────────────────────────────────────────────────────────────────────
# dump each "masks" array as a 16‑bit PNG
for seg_path in seg_files:
    data = np.load(seg_path, allow_pickle=True).item()
    mask = data["masks"].astype(np.uint16, copy=False)
    png_path = seg_path.with_suffix(".png")
    imageio.imwrite(png_path, mask)
    print(f"Wrote {png_path}")

Wrote z:\Active_Users_Data\Neil\Analyses\Results\dry\xenium\vista-fish\brightfield-segmentation\data\example\interim\soma\W0001F0062T0001Z001C1_seg.png
Wrote z:\Active_Users_Data\Neil\Analyses\Results\dry\xenium\vista-fish\brightfield-segmentation\data\example\interim\soma\W0001F0132T0001Z001C1_seg.png
Wrote z:\Active_Users_Data\Neil\Analyses\Results\dry\xenium\vista-fish\brightfield-segmentation\data\example\interim\soma\W0001F0190T0001Z001C1_seg.png
