Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
.DS_Store

# Artifacts potentially generated by tests/docs
.nextflow
null
.coverage
.pytest_cache
docs/api/_build
Expand Down
16 changes: 11 additions & 5 deletions nf_core/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,7 @@
subworkflows_test,
subworkflows_update,
)
from nf_core.commands_test_datasets import (
test_datasets_list_branches,
test_datasets_list_remote,
test_datasets_search,
)
from nf_core.commands_test_datasets import test_datasets_list_branches, test_datasets_list_remote, test_datasets_search
from nf_core.components.components_completion import autocomplete_modules, autocomplete_subworkflows
from nf_core.components.constants import NF_CORE_MODULES_REMOTE
from nf_core.pipelines.download.download import DownloadError
Expand Down Expand Up @@ -384,6 +380,13 @@ def command_pipelines_lint(
default=4,
help="Number of allowed parallel tasks",
)
@click.option(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have some reservations regarding the--api-downloadoption from a user experience perspective. The current solution exposes implementation details that users shouldn't need to think about. Instead, we could focus on what the user actually wants to achieve - authenticated vs. anonymous downloads.

Consider something like --authenticated and a help text like Enable authenticated download (with better rate limits, access to private repos, etc.) instead.

Potentially, even --auth-method <method> to future-proof it, if we want to support multiple authentication methods in the future. For example, the option to download pipelines not hosted on GitHub has also been a long-standing request and similar features could then be successively added without renaming / replacing too many CLI arguments.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MatthiasZepper while I agree with the switch to authenticated, I am not sure if auth_method is helpful here, even in the future.
I think there is usually only one method for each SCM/git provider and even if there are more, you would require extensive logic to make sure that people do not use "github authentication" when actually wanting to download from gitlab.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we just use gh_api.get instead of requests.get in nf_core/pipelines/download/download.py, no new flag should be needed at all. Or am I missing something?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I vaguely recall that the problem was the need to authenticate at the API even for public repositories. For us developers, who anyway have some key-based authentication or token set up for GitHub, this is essentially unnoticeable.

But for ordinary users, who just get started with nf-core and would like to download their first pipeline, this represents a significant obstacle since they do barely understand the error message or might not even have a GitHub account.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes kind of. I think it is possible to use the API unauthenticated, too but you would be rate limited more easily (which is not the case for the public zip download url, afaik). Since I don't know how many requests the CI or some power user does in a short timeframe I decided to put it behind a flag.

"-a",
"--authenticated",
is_flag=True,
default=False,
help="Enable authenticated download via the API of the SCM provider (with better rate limits, access to private repos, etc.). Requires e.g., GITHUB_TOKEN to be set in the environment.",
)
@click.pass_context
def command_pipelines_download(
ctx,
Expand All @@ -400,6 +403,7 @@ def command_pipelines_download(
container_cache_utilisation,
container_cache_index,
parallel_downloads,
authenticated,
):
"""
Download a pipeline, nf-core/configs and pipeline singularity images.
Expand All @@ -419,6 +423,7 @@ def command_pipelines_download(
container_cache_utilisation,
container_cache_index,
parallel_downloads,
authenticated,
)


Expand Down Expand Up @@ -2387,6 +2392,7 @@ def command_download(
container_cache_utilisation,
container_cache_index,
parallel_downloads,
authenticated=False,
)


Expand Down
29 changes: 17 additions & 12 deletions nf_core/commands_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def pipelines_download(
pipeline,
revision,
outdir,
compress,
compress_type,
force,
platform,
download_configuration,
Expand All @@ -175,6 +175,7 @@ def pipelines_download(
container_cache_utilisation,
container_cache_index,
parallel_downloads,
authenticated,
):
"""
Download a pipeline, nf-core/configs and pipeline singularity images.
Expand All @@ -188,17 +189,18 @@ def pipelines_download(
pipeline,
revision,
outdir,
compress,
force,
platform,
download_configuration,
tag,
container_system,
container_library,
container_cache_utilisation,
container_cache_index,
parallel_downloads,
ctx.obj["hide_progress"],
compress_type=compress_type,
force=force,
platform=platform,
download_configuration=download_configuration,
additional_tags=tag,
container_system=container_system,
container_library=container_library,
container_cache_utilisation=container_cache_utilisation,
container_cache_index=container_cache_index,
parallel=parallel_downloads,
hide_progress=ctx.obj["hide_progress"],
authenticated=authenticated,
)
dl.download_workflow()

Expand Down Expand Up @@ -455,3 +457,6 @@ def pipelines_schema_docs(schema_path, output, format, force, columns):
schema_obj.get_schema_path(schema_path)
schema_obj.load_schema()
schema_obj.print_documentation(output, format, force, columns.split(","))
schema_obj.get_schema_path(schema_path)
schema_obj.load_schema()
schema_obj.print_documentation(output, format, force, columns.split(","))
50 changes: 36 additions & 14 deletions nf_core/pipelines/download/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
NF_INSPECT_MIN_NF_VERSION,
NFCORE_VER_LAST_WITHOUT_NF_INSPECT,
check_nextflow_version,
gh_api,
pretty_nf_version,
run_cmd,
)
Expand All @@ -47,7 +48,7 @@ class DownloadWorkflow:
Can also download its Singularity container image if required.

Args:
pipeline (str | None): A nf-core pipeline name.
pipeline (str | None): The name of an nf-core-compatible pipeline in the form org/repo
revision (tuple[str] | str | None): The workflow revision(s) to download, like `1.0` or `dev` . Defaults to None.
outdir (Path | None): Path to the local download directory. Defaults to None.
compress_type (str | None): Type of compression for the downloaded files. Defaults to None.
Expand All @@ -61,6 +62,7 @@ class DownloadWorkflow:
container_cache_index (Path | None): An index for the remote container cache. Defaults to None.
parallel (int): The number of parallel downloads to use. Defaults to 4.
hide_progress (bool): Flag to hide the progress bar. Defaults to False.
authenticated (bool): If True, use the API of the SCM provider to download. Requires authentication e.g., via GITHUB_TOKEN. Defaults to False.
"""

def __init__(
Expand All @@ -79,6 +81,7 @@ def __init__(
container_cache_index: Path | None = None,
parallel: int = 4,
hide_progress: bool = False,
authenticated: bool = False,
):
# Verify that the flags provided make sense together
if (
Expand Down Expand Up @@ -148,6 +151,8 @@ def __init__(
self.container_cache_index = container_cache_index
# allows to specify a container library / registry or a respective mirror to download images from
self.parallel = parallel
self.hide_progress = hide_progress
self.authenticated = authenticated
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should still be able to read the authenticated status from gh_api.auth instead of having the new flag.

Suggested change
self.authenticated = authenticated
if not gh_api.has_init:
gh_api.lazy_init()
self.authenticated = gh_api.auth is not None


self.wf_revisions: list[dict[str, Any]] = []
self.wf_branches: dict[str, Any] = {}
Expand Down Expand Up @@ -461,11 +466,19 @@ def get_revision_hash(self) -> None:

if not self.platform:
for revision, wf_sha in self.wf_sha.items():
# Set the download URL and return - only applicable for classic downloads
self.wf_download_url = {
**self.wf_download_url,
revision: f"https://github.com/{self.pipeline}/archive/{wf_sha}.zip",
}
# Set the download URL - only applicable for classic downloads
if self.authenticated:
# For authenticated downloads, use the GitHub API
self.wf_download_url = {
**self.wf_download_url,
revision: f"https://api.github.com/repos/{self.pipeline}/zipball/{wf_sha}",
}
else:
# For unauthenticated downloads, use the archive URL
self.wf_download_url = {
**self.wf_download_url,
revision: f"https://github.com/{self.pipeline}/archive/{wf_sha}.zip",
}

def prompt_config_inclusion(self) -> None:
"""Prompt for inclusion of institutional configurations"""
Expand Down Expand Up @@ -555,22 +568,31 @@ def prompt_compression_type(self) -> None:

def download_wf_files(self, revision: str, wf_sha: str, download_url: str) -> str:
"""Downloads workflow files from GitHub to the :attr:`self.outdir`."""

log.debug(f"Downloading {download_url}")

# Download GitHub zip file into memory and extract
url = requests.get(download_url)
with ZipFile(io.BytesIO(url.content)) as zipfile:
zipfile.extractall(self.outdir)
# Fetch content and determine top-level directory based on authentication method
if self.authenticated:
# GitHub API download: fetch via API and get topdir from zip contents
content = gh_api.get(download_url).content
with ZipFile(io.BytesIO(content)) as zipfile:
topdir = zipfile.namelist()[0] # API zipballs have a generated directory name
zipfile.extractall(self.outdir)
else:
# Direct URL download: fetch and construct expected topdir name
content = requests.get(download_url).content
topdir = f"{self.pipeline}-{wf_sha if bool(wf_sha) else ''}".split("/")[-1]
with ZipFile(io.BytesIO(content)) as zipfile:
zipfile.extractall(self.outdir)
Comment on lines +574 to +586
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Fetch content and determine top-level directory based on authentication method
if self.authenticated:
# GitHub API download: fetch via API and get topdir from zip contents
content = gh_api.get(download_url).content
with ZipFile(io.BytesIO(content)) as zipfile:
topdir = zipfile.namelist()[0] # API zipballs have a generated directory name
zipfile.extractall(self.outdir)
else:
# Direct URL download: fetch and construct expected topdir name
content = requests.get(download_url).content
topdir = f"{self.pipeline}-{wf_sha if bool(wf_sha) else ''}".split("/")[-1]
with ZipFile(io.BytesIO(content)) as zipfile:
zipfile.extractall(self.outdir)
# GitHub API download: fetch via API and get topdir from zip contents
content = gh_api.get(download_url).content
with ZipFile(io.BytesIO(content)) as zipfile:
topdir = zipfile.namelist()[0] # API zipballs have a generated directory name
zipfile.extractall(self.outdir)

Copy link
Contributor

@JulianFlesch JulianFlesch Oct 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am talking about replacing the old way of downloading with your Github API urls. This works also with unauthenticated requests (within a quota) and that way we can reduce complexity and remove the new parameter.

What are your thoughts @MatthiasZepper ?


# create a filesystem-safe version of the revision name for the directory
# Create a filesystem-safe version of the revision name for the directory
revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", revision)
# account for name collisions, if there is a branch / release named "configs" or container output dir
# Account for name collisions, if there is a branch / release named "configs" or container output dir
if revision_dirname in ["configs", self.get_container_output_dir()]:
revision_dirname = re.sub("[^0-9a-zA-Z]+", "_", self.pipeline + revision_dirname)

# Rename the internal directory name to be more friendly
gh_name = f"{self.pipeline}-{wf_sha if bool(wf_sha) else ''}".split("/")[-1]
((self.outdir / gh_name).rename(self.outdir / revision_dirname),)
(self.outdir / topdir).rename(self.outdir / revision_dirname)

# Make downloaded files executable
for dirpath, _, filelist in os.walk(self.outdir / revision_dirname):
Expand Down
23 changes: 12 additions & 11 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,17 +191,18 @@ def test_cli_download(self, mock_dl):
cmd[-1],
(params["revision"],),
params["outdir"],
params["compress"],
"force" in params,
"platform" in params,
params["download-configuration"],
(params["tag"],),
params["container-system"],
(params["container-library"],),
params["container-cache-utilisation"],
params["container-cache-index"],
params["parallel-downloads"],
"hide-progress" in toplevel_params,
compress_type=params["compress"],
force="force" in params,
platform="platform" in params,
download_configuration=params["download-configuration"],
additional_tags=(params["tag"],),
container_system=params["container-system"],
container_library=(params["container-library"],),
container_cache_utilisation=params["container-cache-utilisation"],
container_cache_index=params["container-cache-index"],
parallel=params["parallel-downloads"],
hide_progress="hide-progress" in toplevel_params,
authenticated=False,
)

mock_dl.return_value.download_workflow.assert_called_once()
Expand Down