From c219b05942714eef33ec3e77c54e5573407db37e Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 18 Jun 2025 15:52:21 +0200 Subject: [PATCH 1/6] Add pairgenomealign config to test cases for nf-core pipelines download. --- tests/data/mock_config_containers/nextflow.config | 8 ++++++++ tests/pipelines/test_download.py | 9 ++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/data/mock_config_containers/nextflow.config b/tests/data/mock_config_containers/nextflow.config index a761121746..4195a3e68d 100644 --- a/tests/data/mock_config_containers/nextflow.config +++ b/tests/data/mock_config_containers/nextflow.config @@ -26,4 +26,12 @@ process { container = { "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1':'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1' }" } } + // example from nf-core/pairgenomealign 2.2.0 + + withName:'ALIGNMENT_.*' { + container = { "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/06/06beccfa4d48e5daf30dd8cee4f7e06fd51594963db0d5087ab695365b79903b/data' : + 'community.wave.seqera.io/library/last_samtools_open-fonts:176a6ab0c8171057'}" } + } + } diff --git a/tests/pipelines/test_download.py b/tests/pipelines/test_download.py index c4cced293b..ada67d3b1b 100644 --- a/tests/pipelines/test_download.py +++ b/tests/pipelines/test_download.py @@ -230,13 +230,20 @@ def test__find_container_images_config_nextflow(self, tmp_path, mock_fetch_wf_co pass mock_fetch_wf_config.return_value = config download_obj.find_container_images("workflow") - assert len(download_obj.containers) == 4 assert "nfcore/methylseq:1.0" in download_obj.containers assert "nfcore/methylseq:1.4" in download_obj.containers assert "nfcore/sarek:dev" in download_obj.containers assert ( "https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1" in download_obj.containers ) + assert ( + "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/06/06beccfa4d48e5daf30dd8cee4f7e06fd51594963db0d5087ab695365b79903b/data" + in download_obj.containers + ) + assert ( + "community.wave.seqera.io/library/last_samtools_open-fonts:176a6ab0c8171057" in download_obj.containers + ) + assert "singularity" not in download_obj.containers # does not yet pick up nfcore/sarekvep:dev.${params.genome}, because that is no valid URL or Docker URI. # From 9ff8fae0ae9b9f8961203e5e3ff98747ad165a2a Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 18 Jun 2025 19:47:01 +0200 Subject: [PATCH 2/6] feat:download Eliminate known false positives also from direct matches. --- nf_core/pipelines/download.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nf_core/pipelines/download.py b/nf_core/pipelines/download.py index efe6af5c23..677e46b2e4 100644 --- a/nf_core/pipelines/download.py +++ b/nf_core/pipelines/download.py @@ -892,7 +892,9 @@ def rectify_raw_container_matches(self, raw_findings): """ direct_match = re.match(either_url_or_docker, container_value.strip()) if direct_match: - cleaned_matches.append(direct_match.group(0)) + # eliminate known false positives also from direct matches + if direct_match.group(0) not in ["singularity", "apptainer"]: + cleaned_matches.append(direct_match.group(0)) continue # oh yes, that was plain sailing """ From e37652123bd4eaf642c50b788f64627dca8d0b78 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 18 Jun 2025 20:09:43 +0200 Subject: [PATCH 3/6] feat:config_parser Enable parsing of multi-line config values with nextflow config --flat --- nf_core/utils.py | 22 ++++++++++++++-------- tests/pipelines/test_download.py | 15 ++++++++------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/nf_core/utils.py b/nf_core/utils.py index 83cea6e748..803a732120 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -325,13 +325,18 @@ def fetch_wf_config(wf_path: Path, cache_config: bool = True) -> dict: result = run_cmd("nextflow", f"config -flat {wf_path}") if result is not None: nfconfig_raw, _ = result - for line in nfconfig_raw.splitlines(): - ul = line.decode("utf-8") - try: - k, v = ul.split(" = ", 1) - config[k] = v.strip("'\"") - except ValueError: - log.debug(f"Couldn't find key=value config pair:\n {ul}") + nfconfig = nfconfig_raw.decode("utf-8") + multiline_key_value_pattern = re.compile(r"(^|\n)([^\n=]+?)\s*=\s*(.*?)(?=(\n[^\n=]+?\s*=)|$)", re.DOTALL) + + for config_match in multiline_key_value_pattern.finditer(nfconfig): + k = config_match.group(2).strip() + v = config_match.group(3).strip().strip("'\"") + if k and v: + config[k] = v + log.debug(f"Config key: {k}, value: {v}") + else: + log.debug(f"Couldn't find key=value config pair:\n {config_match.group(0)}") + del config_match # Scrape main.nf for additional parameter declarations # Values in this file are likely to be complex, so don't both trying to capture them. Just get the param name. @@ -341,8 +346,9 @@ def fetch_wf_config(wf_path: Path, cache_config: bool = True) -> dict: for line in fh: line_str = line.decode("utf-8") match = re.match(r"^\s*(params\.[a-zA-Z0-9_]+)\s*=(?!=)", line_str) - if match: + if match and match.group(1) and config[match.group(1)]: config[match.group(1)] = "null" + except FileNotFoundError as e: log.debug(f"Could not open {main_nf} to look for parameter declarations - {e}") diff --git a/tests/pipelines/test_download.py b/tests/pipelines/test_download.py index ada67d3b1b..610ac69575 100644 --- a/tests/pipelines/test_download.py +++ b/tests/pipelines/test_download.py @@ -221,13 +221,14 @@ def test__find_container_images_config_nextflow(self, tmp_path, mock_fetch_wf_co if result is not None: nfconfig_raw, _ = result config = {} - for line in nfconfig_raw.splitlines(): - ul = line.decode("utf-8") - try: - k, v = ul.split(" = ", 1) - config[k] = v.strip("'\"") - except ValueError: - pass + nfconfig = nfconfig_raw.decode("utf-8") + multiline_key_value_pattern = re.compile(r"(^|\n)([^\n=]+?)\s*=\s*(.*?)(?=(\n[^\n=]+?\s*=)|$)", re.DOTALL) + + for match in multiline_key_value_pattern.finditer(nfconfig): + k = match.group(2).strip() + v = match.group(3).strip().strip("'\"") + if k and v: + config[k] = v mock_fetch_wf_config.return_value = config download_obj.find_container_images("workflow") assert "nfcore/methylseq:1.0" in download_obj.containers From c313e4315d71b25a169f0fdf8797092f5f2034c0 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 18 Jun 2025 18:26:53 +0000 Subject: [PATCH 4/6] [automated] Update CHANGELOG.md --- CHANGELOG.md | 1 + nf_core/utils.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a111ee48c..39cc8ae229 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,7 @@ - Update pre-commit hook astral-sh/ruff-pre-commit to v0.12.1 ([#3648](https://github.com/nf-core/tools/pull/3648)) - Update error message for rocrate_readme_sync ([#3652](https://github.com/nf-core/tools/pull/3652)) - Update `nf-core modules info` command after `meta.yml` restructuring ([#3659](https://github.com/nf-core/tools/pull/3659)) +- Enable parsing of multi-line config values ([#3629](https://github.com/nf-core/tools/pull/3629)) ## [v3.3.1 - Tungsten Tamarin Patch](https://github.com/nf-core/tools/releases/tag/3.3.1) - [2025-06-02] diff --git a/nf_core/utils.py b/nf_core/utils.py index 803a732120..180574b6bc 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -346,7 +346,7 @@ def fetch_wf_config(wf_path: Path, cache_config: bool = True) -> dict: for line in fh: line_str = line.decode("utf-8") match = re.match(r"^\s*(params\.[a-zA-Z0-9_]+)\s*=(?!=)", line_str) - if match and match.group(1) and config[match.group(1)]: + if match and match.group(1): config[match.group(1)] = "null" except FileNotFoundError as e: From 044e95bc10abc96f7d291a74673ebaa6157526da Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 23 Jun 2025 18:01:51 +0200 Subject: [PATCH 5/6] Handle empty string config values: Change regex to avoid catastrophic backtracking and assign them to the dict as well. --- nf_core/utils.py | 7 +++++-- tests/pipelines/test_download.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/nf_core/utils.py b/nf_core/utils.py index 180574b6bc..4df53420f7 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -326,12 +326,15 @@ def fetch_wf_config(wf_path: Path, cache_config: bool = True) -> dict: if result is not None: nfconfig_raw, _ = result nfconfig = nfconfig_raw.decode("utf-8") - multiline_key_value_pattern = re.compile(r"(^|\n)([^\n=]+?)\s*=\s*(.*?)(?=(\n[^\n=]+?\s*=)|$)", re.DOTALL) + multiline_key_value_pattern = re.compile(r"(^|\n)([^\n=]+?)\s*=\s*((?:(?!\n[^\n=]+?\s*=).)*)", re.DOTALL) for config_match in multiline_key_value_pattern.finditer(nfconfig): k = config_match.group(2).strip() v = config_match.group(3).strip().strip("'\"") - if k and v: + if k and v == "": + config[k] = "" # or do we want to set it to "null"? + log.debug(f"Config key: {k}, value: empty string") + elif k and v: config[k] = v log.debug(f"Config key: {k}, value: {v}") else: diff --git a/tests/pipelines/test_download.py b/tests/pipelines/test_download.py index 610ac69575..84675c7ca0 100644 --- a/tests/pipelines/test_download.py +++ b/tests/pipelines/test_download.py @@ -222,7 +222,7 @@ def test__find_container_images_config_nextflow(self, tmp_path, mock_fetch_wf_co nfconfig_raw, _ = result config = {} nfconfig = nfconfig_raw.decode("utf-8") - multiline_key_value_pattern = re.compile(r"(^|\n)([^\n=]+?)\s*=\s*(.*?)(?=(\n[^\n=]+?\s*=)|$)", re.DOTALL) + multiline_key_value_pattern = re.compile(r"(^|\n)([^\n=]+?)\s*=\s*((?:(?!\n[^\n=]+?\s*=).)*)", re.DOTALL) for match in multiline_key_value_pattern.finditer(nfconfig): k = match.group(2).strip() From a9450f2bbb689d31eabd2715eb89b2da4a85cccd Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 2 Jul 2025 12:49:44 +0200 Subject: [PATCH 6/6] Set config value to null for empty values like it is done for parameters parsed from main.nf --- nf_core/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nf_core/utils.py b/nf_core/utils.py index 4df53420f7..f9a3413ef9 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -332,7 +332,7 @@ def fetch_wf_config(wf_path: Path, cache_config: bool = True) -> dict: k = config_match.group(2).strip() v = config_match.group(3).strip().strip("'\"") if k and v == "": - config[k] = "" # or do we want to set it to "null"? + config[k] = "null" log.debug(f"Config key: {k}, value: empty string") elif k and v: config[k] = v