Extend CLI parameters and Add Github Actions for Docker . (#11)

* Add more extensive argument parsing * Add more extensive argument parsing * Add more extensive argument parsing * Add more extensive argument parsing * Add Dockerfile, environment.yml and update README.md * Update readme * Create docker-publish.yml * Update docker-publish.yml * Update README.md * Update environment.yml * Update README.md * Update README.md * Update README.md Co-authored-by: Albert Dominguez Mantes <31998088+AlbertDominguez@users.noreply.github.com> * "Rename environment.yml" * Update dockerfile * Remove gipod.yml --------- Co-authored-by: Albert Dominguez Mantes <31998088+AlbertDominguez@users.noreply.github.com>
weigertlab · Jul 2, 2024 · 1b6c228 · 1b6c228
1 parent 58b1e99
commit 1b6c228
Show file tree

Hide file tree

Showing 5 changed files with 245 additions and 9 deletions.
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
@@ -0,0 +1,99 @@
+name: Docker
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+on:
+#   schedule:
+#     - cron: '21 13 * * *'
+  push:
+    branches: [ "main" ]
+    # Publish semver tags as releases.
+    tags: [ '*.*.*' ]
+  pull_request:
+    branches: [ "main" ]
+
+env:
+  # Use docker.io for Docker Hub if empty
+  REGISTRY: ghcr.io
+  # github.repository as <account>/<repo>
+  IMAGE_NAME: ${{ github.repository }}
+
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+      # This is used to complete the identity challenge
+      # with sigstore/fulcio when running outside of PRs.
+      id-token: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      # Install the cosign tool except on PR
+      # https://github.com/sigstore/cosign-installer
+      - name: Install cosign
+        if: github.event_name != 'pull_request'
+        uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 #v3.5.0
+        with:
+          cosign-release: 'v2.2.4'
+
+      # Set up BuildKit Docker container builder to be able to build
+      # multi-platform images and export cache
+      # https://github.com/docker/setup-buildx-action
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0
+
+      # Login against a Docker registry except on PR
+      # https://github.com/docker/login-action
+      - name: Log into registry ${{ env.REGISTRY }}
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      # Extract metadata (tags, labels) for Docker
+      # https://github.com/docker/metadata-action
+      - name: Extract Docker metadata
+        id: meta
+        uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+
+      # Build and push Docker image with Buildx (don't push on PR)
+      # https://github.com/docker/build-push-action
+      - name: Build and push Docker image
+        id: build-and-push
+        uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      # Sign the resulting Docker image digest except on PRs.
+      # This will only write to the public Rekor transparency log when the Docker
+      # repository is public to avoid leaking data.  If you would like to publish
+      # transparency data even for private images, pass --force to cosign below.
+      # https://github.com/sigstore/cosign
+      - name: Sign the published Docker image
+        if: ${{ github.event_name != 'pull_request' }}
+        env:
+          # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable
+          TAGS: ${{ steps.meta.outputs.tags }}
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        # This step uses the identity token to provision an ephemeral certificate
+        # against the sigstore community Fulcio instance.
+        run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST}
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,33 @@
+FROM mambaorg/micromamba:lunar
+LABEL authors="Albert Dominguez, Miguel Ibarra"
+
+# Set the base layer for micromamba
+USER root
+COPY docker-env-config.yml .
+
+RUN apt-get update -qq && apt-get install -y \
+    build-essential \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    procps \
+    git
+
+# Set the environment variable for the root prefix
+ARG MAMBA_ROOT_PREFIX=/opt/conda
+
+# Add /opt/conda/bin to the PATH
+ENV PATH $MAMBA_ROOT_PREFIX/bin:$PATH
+
+# Install stuff with micromamba
+RUN micromamba env create -f docker-env-config.yml && \
+    micromamba clean --all --yes
+
+# Add environment to PATH
+ENV PATH="/opt/conda/envs/spotiflow/bin:$PATH"
+
+# Set the working directory
+WORKDIR /spotiflow
+
+# Copy contents of the folder to the working directory
+COPY . .
diff --git a/README.md b/README.md
@@ -59,6 +59,28 @@ spotiflow-predict PATH
 
 where PATH can be either an image or a folder. By default, the command will use the `general` pretrained model. You can specify a different model by using the `--pretrained-model` flag. Moreover, spots are saved to a subfolder `spotiflow_results` created inside the input folder (this can be changed with the `--out-dir` flag). For more information, please refer to the help message of the CLI (`$ spotiflow-predict -h`).
 
+### Inference (Docker)
+
+Alternatively to installing Spotiflow as command line tool on your operating system, you can also use it directly from our Docker container (thanks to @migueLib for the contribution!). To do so, you can use the following command:
+
+To pull the Docker container from Dockerhub use:
+``` console
+docker pull weigertlab/spotiflow:main
+```
+
+Then, run spotiflow-predict with:
+```console
+docker run -it -v [/local/input/folder]:/spotiflow/input weigertlab/spotiflow:main spotiflow-predict input/your_file.tif -o .
+```
+Where:  
+`-v`: represents the volume flag, which allows you to mount a folder from your local machine to the container.    
+`/path/to/your/data:/spotiflow`: is the path to the folder containing the image you want to analyze.
+
+Note:
+- The current implementation of Spotiflow in Docker only supports CPU inference.
+
+
+
 ### Inference (API)
 
 The API allows detecting spots in a new image in a few lines of code! Please check the [corresponding example notebook](examples/2_inference.ipynb) and the documentation for a more in-depth explanation.

diff --git a/docker-env-config.yml b/docker-env-config.yml
@@ -0,0 +1,11 @@
+name: spotiflow
+channels:
+  - pytorch
+  - conda-forge
+dependencies:
+  - "python=3.9"
+  - "pytorch"
+  - "torchvision"
+  - "cpuonly"
+  - pip:
+      - "spotiflow"
diff --git a/spotiflow/cli/predict.py b/spotiflow/cli/predict.py
@@ -17,35 +17,93 @@
 
 ALLOWED_EXTENSIONS = ("tif", "tiff", "png", "jpg", "jpeg")
 
-def main():
-    parser = argparse.ArgumentParser("spotiflow-predict", 
+# Argument parser
+def get_args():
+    parser = argparse.ArgumentParser("spotiflow-predict",
                                      description="Predict spots in image(s) using Spotiflow.")
-    parser.add_argument("data_path", type=Path, help=f"Path to image file or directory of image files. If a directory, will process all images in the directory.")
-    parser.add_argument("--pretrained-model", type=str, required=False, default="general", help="Pretrained model name. Defaults to 'general'.")
-    parser.add_argument("--model-dir", type=str, required=False, default=None, help="Model directory to load. If provided, will override --pretrained-model.")
-    parser.add_argument("--out-dir", type=Path, required=False, default=None, help="Output directory. If not provided, will create a 'spotiflow_results' subfolder in the input folder and write the CSV(s) there.")
+
+    required = parser.add_argument_group(title="Required arguments",
+                                         description="Arguments required to run the prediction model")
+    required.add_argument("data_path",
+                          type=Path,
+                          help=f"Path to image file or directory of image files. If a directory, will process all images in the directory.")
+    required.add_argument("-pm","--pretrained-model",
+                          type=str, required=False, default="general",
+                          help="Pretrained model name. Defaults to 'general'.")
+    required.add_argument("-md", "--model-dir",
+                          type=str, required=False, default=None,
+                          help="Model directory to load. If provided, will override --pretrained-model.")
+    required.add_argument("-o", "--out-dir",
+                          type=Path, required=False, default=None,
+                          help="Output directory. If not provided, will create a 'spotiflow_results' subfolder in the input folder and write the CSV(s) there.")
+
+    predict = parser.add_argument_group(title="Prediction arguments",
+                                        description="Arguments to change the behaviour of spotiflow during prediction. To keep the default behaviour, do not provide these arguments.")
+    predict.add_argument("-t", "--probability-threshold",
+                            type=float, required=False, default=None,
+                            help="Probability threshold for peak detection. If None, will load the optimal one. Defaults to None.")
+    predict.add_argument("-n", "--n-tiles",
+                            type=int, required=False, default=(1, 1), nargs=2,
+                            help="Number of tiles to split the image into. Defaults to (1, 1). This parameter can be used to calculate spots on larger images.")
+    predict.add_argument("-min", "--min-distance",
+                            type=int, required=False, default=1,
+                            help="Minimum distance between spots for NMS. Defaults to 1.")
+    predict.add_argument("-eb", "--exclude-border",
+                            action="store_true", required=False,
+                            help="Exclude spots within this distance from the border. Defaults to 0.")
+    predict.add_argument("-s", "--scale",
+                            type=int, required=False, default=None,
+                            help=" Scale factor to apply to the image. Defaults to None.")
+    predict.add_argument("-sp", "--subpix",
+                            action="store_true", required=False,
+                            help="Whether to use the stereographic flow to compute subpixel localization. If None, will deduce from the model configuration. Defaults to None.")
+    predict.add_argument("-p", "--peak-mode",
+                            type=str, required=False, default="fast", choices=["fast", "skimage"],
+                            help="Peak detection mode (can be either 'skimage' or 'fast', which is a faster custom C++ implementation). Defaults to 'fast'.")
+    predict.add_argument("-norm", "--normalizer",
+                            type=str, required=False, default="auto",
+                            help="Normalizer to use. If None, will use the default normalizer. Defaults to 'auto' (percentile-based normalization with p_min=1, p_max=99.8).")
+    predict.add_argument("-v", "--verbose",
+                            action="store_true", required=False,
+                            help="Print verbose output. Defaults to False.")
+    predict.add_argument("-d", "--device",
+                            type=str, required=False, default="auto", choices=["auto", "cpu", "cuda", "mps"],
+                            help="Device to run model on. Defaults to 'auto'.")
+
     args = parser.parse_args()
+    return args
+
 
+def main():
+    # Get arguments from command line
+    args = get_args()
     log.info(f"Spotiflow - version {__version__}")
 
+    # Choose prediction method from_folder or from_pretrained
     if args.model_dir is not None:
         model = Spotiflow.from_folder(args.model_dir)
         log.info("Given local model loaded.")
     else:
         model = Spotiflow.from_pretrained(args.pretrained_model)
+
+    # Try to compile model
     try:
         model = torch.compile(model)
     except RuntimeError as _:
         log.info("Could not compile model. Will proceed without compilation.")
-
+
+    # Set out_dir
     out_dir = args.out_dir
 
+    # Check if data_path is a file or directory
+    # If it's a file , check if it is a valid image file
     if args.data_path.is_file():
         assert args.data_path.suffix[1:] in ALLOWED_EXTENSIONS, f"File {args.data_path} is not a valid image file. Allowed extensions are: {ALLOWED_EXTENSIONS}"
         image_files = [args.data_path]
         if out_dir is None:
             out_dir = args.data_path.parent/"spotiflow_results"
-
+
+    # If directory, get all image files in the directory
     elif args.data_path.is_dir():
         image_files = sorted(
             tuple(chain(*tuple(args.data_path.glob(f"*.{ext}") for ext in ALLOWED_EXTENSIONS)))
@@ -57,15 +115,28 @@ def main():
     else:
         raise ValueError(f"Path {args.data_path} does not exist!")
 
+    # Create out_dir if it doesn't exist
     out_dir.mkdir(exist_ok=True, parents=True)
 
+    # Predict spots in images and write to CSV
     images = [imread(img) for img in image_files]
     for img, fname in tqdm(zip(images, image_files), desc="Predicting", total=len(images)):
-        spots, _ = model.predict(img, verbose=False)
+        spots, _ = model.predict(img,
+                                 prob_thresh=args.probability_threshold,
+                                 n_tiles=tuple(args.n_tiles),
+                                 min_distance=args.min_distance,
+                                 exclude_border=args.exclude_border,
+                                 scale=args.scale,
+                                 subpix=args.subpix,
+                                 peak_mode=args.peak_mode,
+                                 normalizer=args.normalizer,
+                                 verbose=args.verbose,
+                                 device=args.device,)
         write_coords_csv(spots, out_dir/f"{fname.stem}.csv")
 
     return 0
 
+
 if __name__ == "__main__":
     import sys
     sys.exit(main())