nf-core · FloWuenne · Oct 17, 2023 · Sep 27, 2023 · Oct 6, 2023 · Oct 10, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,25 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## v1.0.1dev - [2023.12.10]
+
+Molkart adapted to most nf-core standards with optional parameters, multiple segmentation options, as well as membrane channel handling. Started work on creating training subset functionality.
+
+### `Added`
+
+- parameters for pipeline execution
+- ext.args logic for almost all modules with external parameters
+- channel logic for membrane handling
+- create stack process if membrane image present for Cellpose
+- optional clahe
+- started work on create subset functionality
+
+### `Fixed`
+
+### `Dependencies`
+
+### `Deprecated`
+
 ## v1.0dev - [date]
 
 Initial release of nf-core/molkart, created with the [nf-core](https://nf-co.re/) template.

diff --git a/README.md b/README.md
@@ -42,8 +42,8 @@ First, prepare a samplesheet with your input data that looks as follows:
 `samplesheet.csv`:
 
 ```csv
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
+sample,nuclear_image,spot_locations,membrane_image
+sample1,sample1_DAPI.tif,sample1_spots.txt, sample1_WGA.tif
 ```
 
 Each row represents a fastq file (single-end) or a pair of fastq files (paired end).

diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv
@@ -1,3 +1,3 @@
-sample,fastq_1,fastq_2
-SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz
-SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,
+sample,nuclear_image,spot_table
+sample_fov1,/path/to/dapi/file/sample_fov1.DAPI.tiff,/path/to/spots/file/sample_fov1.spots.txt
+sample_fov2,/path/to/dapi/file/sample_fov2.DAPI.tiff,/path/to/spots/file/sample_fov2.spots.txt
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -9,18 +9,32 @@
         "properties": {
             "sample": {
                 "type": "string",
-                "pattern": ".*$",
+                "pattern": "^\\S+$",
                 "errorMessage": "Sample name must be provided and cannot contain spaces"
             },
             "nuclear_image": {
                 "type": "string",
-                "pattern": ".*.tiff$",
-                "errorMessage": "Nuclear image must be provided, cannot contain spaces and must have extension '.tiff'"
+                "pattern": "^\\S+\\.(tif|tiff)$",
+                "format": "file-path",
+                "errorMessage": "Nuclear image must be provided, cannot contain spaces and must have extension '.tif' or '.tiff'"
             },
             "spot_table": {
                 "type": "string",
-                "pattern": ".*.txt$",
+                "pattern": "^\\S+\\.(txt|tsv)$",
                 "errorMessage": "Spot table must be provided, has to have shape x,y,z,gene with sep = '\t', cannot contain spaces and must have extension '.txt'"
+            },
+            "membrane_image": {
+                "errorMessage": "Membrane image is optional, and cannot contain spaces and must have extension '.tif' or '.tiff'",
+                "anyOf": [
+                    {
+                        "type": "string",
+                        "pattern": "^\\S+\\.(tif|tiff)$"
+                    },
+                    {
+                        "type": "string",
+                        "maxLength": 0
+                    }
+                ]
             }
         },
         "required": ["sample", "nuclear_image", "spot_table"]

diff --git a/bin/collect_QC.py b/bin/collect_QC.py
@@ -73,7 +73,8 @@ def summarize_segmasks(mcquant, spots_summary):
         ]
     )
     summary_df.loc[0] = [
-        args.sample_id + "_" + args.segmentation_method,
+        args.sample_id,
+        ##args.sample_id + "_" + args.segmentation_method,
         args.segmentation_method,
         summary_segmentation[0],
         summary_segmentation[1],

diff --git a/bin/create_stack.py b/bin/create_stack.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+
+### This script takes a list of images and stacks them into a single image stack using Dask
+
+import numpy as np
+import argparse
+import tifffile
+from aicsimageio.writers import OmeTiffWriter
+from aicsimageio import aics_image as AI
+import aicsimageio
+import dask
+import dask.array as da
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--input", nargs="+", help="List of images to stack")
+    parser.add_argument(
+        "-o",
+        "--output",
+        dest="output",
+        type=str,
+    )
+    parser.add_argument("--num-channels", dest="num_channels", type=int)
+
+    args = parser.parse_args()
+
+    channel_counter = 0
+
+    img = AI.AICSImage(args.input[0]).get_image_dask_data("CYX")
+    out = da.empty(shape=[args.num_channels, img[0].shape[0], img[0].shape[1]])
+    print(out.shape)
+    if img.shape[0] > 1:
+        for channel in range(img.shape[0]):
+            out[channel_counter] = img[channel]
+            channel_counter += 1
+    else:
+        out[channel_counter] = img[0]
+        channel_counter += 1
+
+    if len(args.input) > 1:
+        for i in range(len(args.input[1:])):
+            img = AI.AICSImage(args.input[1 + i]).get_image_dask_data("CYX")
+            if img.shape[0] > 1:
+                for channel in range(img.shape[0]):
+                    out[channel_counter] = img[channel]
+                    channel_counter += 1
+            else:
+                out[channel_counter] = img[0]
+                channel_counter += 1
+
+    OmeTiffWriter.save(out, args.output, dim_order="CYX")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/crop_tiff.py b/bin/crop_tiff.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+# importing the module
+import ast
+import tifffile as tiff
+import os
+import argparse
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+# Create a function to create crops from a tiff image and a dictionary of crop coordinates
+def create_crops(tiff_image, crop_dict):
+    for index, (crop_name, crop) in enumerate(crop_dict.items()):
+        crop_image = tiff_image[:, crop[0][0] : crop[0][1], crop[1][0] : crop[1][1]]
+        basename = os.path.basename(args.input)
+        basename = os.path.splitext(basename)[0]
+        tiff.imsave(f"./{basename}_crop{index}.tiff", crop_image)
+        ## Create a plot with all crop regions highlighted on the full image for easier selection
+        # Create a maximum projection of the channels in tiff_image
+        tiff_image_max = np.max(tiff_image, axis=0)
+        plt.imshow(tiff_image_max, cmap="gray")
+        plt.plot(
+            [crop[1][0], crop[1][1], crop[1][1], crop[1][0], crop[1][0]],
+            [crop[0][0], crop[0][0], crop[0][1], crop[0][1], crop[0][0]],
+            "red",
+            linewidth=1,
+        )
+        plt.text(
+            crop[1][0], crop[0][0], str(index), color="white"
+        )  # make the text red and add a label to each box with index of the crop
+    plt.savefig(f"{basename}.crop_overview.png", dpi=300)
+
+
+## Run the script
+if __name__ == "__main__":
+    # Add argument parser with arguments for input tiffile, crop_summary input file and output tiffile
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--input", help="Input tiffile.")
+    parser.add_argument("-c", "--crop_summary", help="Crop summary file.")
+    args = parser.parse_args()
+
+    # reading the crop information from the file
+    with open(args.crop_summary) as f:
+        crops = f.read()
+    # reconstructing the data as a dictionary
+    crops = ast.literal_eval(crops)
+    ## Read in tiff image
+    tiff_image = tiff.imread(args.input)
+    if len(tiff_image.shape) == 2:
+        tiff_image = np.expand_dims(tiff_image, axis=0)
+
+    create_crops(tiff_image, crops)
diff --git a/bin/project_spots.dask.py b/bin/project_spots.dask.py
@@ -56,7 +56,6 @@ def project_spots(spot_table, img):
 
     # Stack images on the c-axis
     spot_2d_stack = da.stack(spots_2d_list, axis=0)
-
     ## Write a csv file containing the channel names
     channel_names = spots_zsum.gene.unique().tolist()
     pd.DataFrame(channel_names).to_csv(args.sample_id + ".channel_names.csv", index=False, header=False)

diff --git a/conf/modules.config b/conf/modules.config
@@ -34,15 +34,121 @@ process {
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
     }
+
+    withName: 'MOLCART_QC' {
+        publishDir = [
+            path: { "${params.outdir}/molcart_qc" },
+            pattern: { "*.csv" }
+        ]
+    }
+
+    withName: 'CREATE_STACK' {
+        publishDir = [
+            path: { "${params.outdir}/stack" },
+            pattern: "*.{ome.tif}"
+        ]
+    }
+
+    withName: "CREATETRAININGTIFF" {
+        publishDir = [
+            path: { "${params.outdir}/training_subset/tiff" }
+        ]
+    }
+
+    withName: "TIFFH5CONVERT" {
+        publishDir = [
+            path: { "${params.outdir}/ilastik" }
+        ]
+    }
+
+    withName: "ILASTIK_PIXELCLASSIFICATION" {
+        publishDir = [
+            path: { "${params.outdir}/ilastik" },
+            pattern: "*.{h5}",
+            saveAs: { filename -> "${meta.id}_probability_$filename" }
+        ]
+    }
+
+    withName: "ILASTIK_MULTICUT" {
+        publishDir = [
+            path: { "${params.outdir}/ilastik" },
+            pattern: "*.tiff",
+            saveAs: { filename -> "${meta.id}_ilastik_$filename" }
+        ]
+    }
+
+    withName: "CREATEILASTIKTRAININGSUBSET" {
+        memory     = "16GB"
+        publishDir = [
+            path: { "${params.outdir}/training_subset/hdf5" }
+        ]
+        ext.args = [ "",
+            "--crop",
+            "--nuclei_index 1",
+            params.crop_amount                       ? "--crop_amount ${params.crop_amount}"                     : "",
+            params.crop_size_x && params.crop_size_y ? "--crop_size ${params.crop_size_x} ${params.crop_size_y}" : "",
+            params.crop_nonzero_fraction             ? "--nonzero_fraction ${params.crop_nonzero_fraction}"      : ""
+        ].join(" ").trim()
+    }
+
     withName: "PROJECT_SPOTS" {
-        memory = "16GB"
+        memory     = "16GB"
+        publishDir = [
+            path: "${params.outdir}/projectedspots",
+            pattern: "*.{tiff,csv}"
+        ]
+    }
+
+    withName: "CLAHE_DASK" {
+        memory    = "16GB"
+        ext.when  = { params.clahe }
+        ext.args  = [ "",
+            "--channel 0",
+            params.clahe_cliplimit  ? "--cliplimit ${params.clahe_cliplimit}"   : "",
+            params.clahe_nbins      ? "--nbins ${params.clahe_nbins}"           : "",
+            params.clahe_pixel_size ? "--pixel-size ${params.clahe_pixel_size}" : "",
+            params.clahe_kernel     ? "--kernel ${params.clahe_kernel}"         : ""
+        ].join(" ").trim()
     }
 
     withName: "DEEPCELL_MESMER" {
-        memory = "16GB"
-        cpus = 8
-        ext.args = '--image-mpp=0.138 --nuclear-channel 0 --compartment nuclear'
+        ext.when  = { params.segmentation_method.split(',').contains('mesmer') }
+        memory    = "16GB"
+        cpus      = 8
+        ext.args  = [ "",
+            params.mesmer_compartment ? "--compartment ${params.mesmer_compartment}" : "",
+            params.mesmer_image_mpp   ? "--image-mpp ${params.mesmer_image_mpp}"     : "",
+            "--nuclear-channel 0"
+        ].join(" ").trim()
+        containerOptions =  '--entrypoint ""'
+        publishDir = [
+            path: "${params.outdir}/mesmer",
+            pattern: "*.tif",
+            saveAs: { filename -> "${meta.id}_mesmer_$filename" }
+        ]
+    }
+
+    withName: "CELLPOSE" {
+        ext.when  = { params.segmentation_method.split(',').contains('cellpose') }
+        memory    = "16GB"
+        cpus      = 8
+        ext.args = [ "",
+            "--channel_axis 0",
+            "--no_npy",
+            params.cellpose_save_flows       ? "--save_flows"                                           : "",
+            params.cellpose_diameter         ? "--diameter ${params.cellpose_diameter}"                 : "",
+            params.cellpose_chan             ? "--chan ${params.cellpose_chan}"                         : "",
+            params.cellpose_chan2            ? "--chan2 ${params.cellpose_chan2}"                       : "",
+            params.cellpose_custom_model     ? ""                                                       : params.cellpose_pretrained_model ? "--pretrained_model ${params.cellpose_pretrained_model}" : "",
+            params.cellpose_flow_threshold   ? "--flow_threshold ${params.cellpose_flow_threshold}"     : "",
+            params.cellpose_edge_exclude     ? "--exclude_on_edges"                                     : ""
+        ].join(" ").trim()
         containerOptions =  '--entrypoint ""'
+        publishDir = [
+            path: "${params.outdir}/cellpose",
+            pattern: "*_cp_masks.tif",
+            saveAs: { filename -> "${meta.id}_cellpose_mask.tif" }
+        ]
     }
 
     withName: "MCQUANT" {

diff --git a/main.nf b/main.nf
@@ -11,17 +11,6 @@
 
 nextflow.enable.dsl = 2
 
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    GENOME PARAMETER VALUES
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-// TODO nf-core: Remove this line if you don't need a FASTA file
-//   This is an example of how to use getGenomeAttribute() to fetch parameters
-//   from igenomes.config using `--genome`
-// params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta')
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     VALIDATE & PRINT PARAMETER SUMMARY

diff --git a/modules/local/clahe_dask.nf b/modules/local/clahe_dask.nf
@@ -10,16 +10,17 @@ process CLAHE_DASK{
     output:
     tuple val(meta), path("*.clahe.tiff") , emit: img_clahe
 
+    when:
+    task.ext.when == null || task.ext.when
+
     script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}_${meta.stain}"
     """
-    apply_clahe.dask.py \
-    --raw ${image} \
-    --output ${image.baseName}.clahe.tiff \
-    --cliplimit 0.01 \
-    --kernel 25 \
-    --nbins 256 \
-    --channel 0 \
-    --pixel-size 0.138
+    apply_clahe.dask.py \\
+        --raw ${image} \\
+        --output ${prefix}.clahe.tiff \\
+        $args
     """
 
 }