From 705a9a45268cd648dcc3a359cbe1d8795b11e73d Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 31 May 2018 01:20:57 +0200 Subject: [PATCH 01/12] WIP: New nf-core download command * Validates pipeline input, fetches workflow details and release hash * Download workflow files * If -s specified, automatically finds container names from workflow config * Tries to pull singularity images using first singularity, then docker --- nf_core/download.py | 174 ++++++++++++++++++++++++++++++++++++++++++++ scripts/nf-core | 27 ++++++- 2 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 nf_core/download.py diff --git a/nf_core/download.py b/nf_core/download.py new file mode 100644 index 0000000000..fc333080c6 --- /dev/null +++ b/nf_core/download.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python +""" Download a nf-core pipeline """ + +from __future__ import print_function + +from io import BytesIO +import logging +import os +import requests +import subprocess +import sys +from zipfile import ZipFile + + +import nf_core.list, nf_core.lint + +def download_workflow(pipeline, release=None, singularity=False, outdir=None): + """ Main function to download a nf-core workflow """ + + # init + wf = DownloadWorkflow(pipeline, release, singularity, outdir) + + # Get workflow details + if not wf.get_workflow(): + sys.exit(1) + + # Check that the outdir doesn't already exist + if os.path.exists(wf.outdir): + logging.error("Output directory '{}' already exists".format(wf.outdir)) + sys.exit(1) + else: + logging.info("Saving nf-core/{} to '{}'".format(wf.wf_name, wf.outdir)) + + # Download the pipeline files + logging.info("Downloading workflow files from GitHub") + wf.download_wf_files() + + # Download the singularity images + if singularity: + logging.info("Fetching container names for workflow") + wf.find_singularity_images() + if len(wf.containers) == 0: + logging.info("No container names found in workflow") + else: + os.mkdir(os.path.join(wf.outdir, 'singularity-images')) + for container in wf.containers: + wf.download_singularity_image(container) + +class DownloadWorkflow(): + + def __init__(self, pipeline, release=None, singularity=False, outdir=None): + """ Set class variables """ + + self.pipeline = pipeline + self.release = release + self.singularity = singularity + self.outdir = outdir + + self.wf_name = None + self.wf_sha = None + self.wf_download_url = None + self.containers = list() + + + def get_workflow(self): + """ Fetch details of nf-core workflow to download """ + wfs = nf_core.list.Workflows() + wfs.get_remote_workflows() + + # Get workflow download details + for wf in wfs.remote_workflows: + if wf.full_name == self.pipeline or wf.name == self.pipeline: + + # Set pipeline name + self.wf_name = wf.name + + # Find latest release hash + if self.release is None and len(wf.releases) > 0: + self.release = wf.releases[0]['tag_name'] + self.wf_sha = wf.releases[0]['tag_sha'] + # Find specified release hash + elif self.release is not None: + for r in wf.releases: + if r['tag_name'] == self.release.lstrip('v'): + self.wf_sha = r['tag_sha'] + break + else: + logging.error("Not able to find release '{}' for {} (found '{}')".format(self.release, wf.full_name, "', '".join([r['tag_name'] for r in wf.releases]))) + return False + # Must be a dev-only pipeline + elif self.release is None: + self.release = 'dev' + self.wf_sha = 'master' # Cheating a little, but GitHub download link works + + # Set outdir name if not defined + if self.outdir is None: + self.outdir = 'nf-core-{}'.format(wf.name) + if self.release is not None: + self.outdir += '-{}'.format(self.release) + + # Set the download URL + self.wf_download_url = 'https://github.com/{}/archive/{}.zip'.format(wf.full_name, self.wf_sha) + + # Finished + return True + + # If we got this far, must have not found the pipeline + logging.error("Not able to find pipeline '{}'".format(self.pipeline)) + logging.info("Available pipelines: {}".format(', '.join([w.name for w in wfs.remote_workflows]))) + return False + + def download_wf_files(self): + """ Download workflow files from GitHub - save in outdir """ + + # Download GitHub zip file into memory and extract + url = requests.get(self.wf_download_url) + zipfile = ZipFile(BytesIO(url.content)) + zipfile.extractall(self.outdir) + + # Rename the internal directory name to be more friendly + gh_name = '{}-{}'.format(self.wf_name, self.wf_sha) + os.rename(os.path.join(self.outdir, gh_name), os.path.join(self.outdir, 'workflow')) + + def find_singularity_images(self): + """ Find singularity image names for workflow """ + + # Use linting code to parse the pipeline nextflow config + lint_obj = nf_core.lint.PipelineLint(os.path.join(self.outdir, 'workflow')) + lint_obj.check_nextflow_config() + + # Find any config variables that look like a container + for k,v in lint_obj.config.items(): + if k.startswith('process.') and k.endswith('.container'): + self.containers.append(v.strip('"').strip("'")) + + def download_singularity_image(self, container): + """ Download singularity images for workflow """ + + out_name = '{}.simg'.format(container.replace('nfcore/', 'nf-core-').replace(':', '-')) + out_path = os.path.abspath(os.path.join(self.outdir, 'singularity-images', out_name)) + address = 'docker://{}'.format(container.replace('docker://', '')) + singularity_command = ["singularity", "pull", "--name", out_path, address] + docker_command = [ + 'docker', 'run', + '-v', '/var/run/docker.sock:/var/run/docker.sock', + '-v', '{}:/output'.format(out_path), + '--privileged', '-t', '--rm', + 'singularityware/docker2singularity', + container + ] + + logging.info("Building singularity image '{}', saving to {}".format(container, out_path)) + + # Try to use singularity to pull image + try: + subprocess.call(singularity_command) + except OSError as e: + if e.errno == os.errno.ENOENT: + # Singularity is not installed + logging.warn('Singularity is not installed. Attempting to use Docker instead.') + + # Try to use docker to use singularity to pull image + try: + subprocess.call(docker_command) + except OSError as e: + if e.errno == os.errno.ENOENT: + # Docker is not installed + logging.warn('Docker is not installed.') + else: + # Something else went wrong with docker command + raise + else: + # Something else went wrong with singularity command + raise diff --git a/scripts/nf-core b/scripts/nf-core index b258e36677..3e1d55b7d9 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -8,7 +8,7 @@ import sys import os import nf_core -import nf_core.lint, nf_core.list, nf_core.release +import nf_core.lint, nf_core.list, nf_core.download, nf_core.release import logging @@ -57,6 +57,31 @@ def list(json): """ List nf-core pipelines with local info """ nf_core.list.list_workflows(json) +@nf_core_cli.command() +@click.argument( + 'pipeline', + required = True, + metavar = "" +) +@click.option( + '-r', '--release', + type = str, + help = "Pipeline release" +) +@click.option( + '-s', '--singularity', + is_flag = True, + help = "Pull pipeline singularity containers" +) +@click.option( + '-o', '--outdir', + type = str, + help = "Output directory" +) +def download(pipeline, release, singularity, outdir): + """ Download a nf-core pipeline and singularity container """ + nf_core.download.download_workflow(pipeline, release, singularity, outdir) + @nf_core_cli.command() @click.argument( 'pipeline_dir', From 770a87ef5375e4ffba6f356da162e1bd1fdc2d2e Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 31 May 2018 07:57:27 +0200 Subject: [PATCH 02/12] Make command line singularity option explicit and required. --- nf_core/download.py | 9 ++++++++- scripts/nf-core | 7 ++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index fc333080c6..974dcae030 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -29,7 +29,12 @@ def download_workflow(pipeline, release=None, singularity=False, outdir=None): logging.error("Output directory '{}' already exists".format(wf.outdir)) sys.exit(1) else: - logging.info("Saving nf-core/{} to '{}'".format(wf.wf_name, wf.outdir)) + logging.info( + "Saving nf-core/{}".format(wf.wf_name) + + "\n Pipeline release: {}".format(wf.release) + + "\n Pull singularity containers: {}".format('Yes' if wf.singularity else 'No') + + "\n Output directory: {}".format(wf.outdir) + ) # Download the pipeline files logging.info("Downloading workflow files from GitHub") @@ -78,6 +83,7 @@ def get_workflow(self): if self.release is None and len(wf.releases) > 0: self.release = wf.releases[0]['tag_name'] self.wf_sha = wf.releases[0]['tag_sha'] + logging.debug("No release specified. Using latest release: {}".format(self.release)) # Find specified release hash elif self.release is not None: for r in wf.releases: @@ -91,6 +97,7 @@ def get_workflow(self): elif self.release is None: self.release = 'dev' self.wf_sha = 'master' # Cheating a little, but GitHub download link works + logging.info("Pipeline is in development. Using current code on master branch.") # Set outdir name if not defined if self.outdir is None: diff --git a/scripts/nf-core b/scripts/nf-core index 3e1d55b7d9..efaceae060 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -69,9 +69,10 @@ def list(json): help = "Pipeline release" ) @click.option( - '-s', '--singularity', - is_flag = True, - help = "Pull pipeline singularity containers" + '-s/-n', '--singularity/--no-singularity', + default = None, + required = True, + help = "Pull / don't pull pipeline singularity containers" ) @click.option( '-o', '--outdir', From 9ce2083e61720821b2bb500d4c0f59b1efffc83a Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 31 May 2018 08:54:57 +0200 Subject: [PATCH 03/12] Now works with any GH repo. Better logging. --- nf_core/download.py | 38 +++++++++++++++++++++++++++----------- scripts/nf-core | 2 +- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 974dcae030..bcfbf2f2c3 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -30,7 +30,7 @@ def download_workflow(pipeline, release=None, singularity=False, outdir=None): sys.exit(1) else: logging.info( - "Saving nf-core/{}".format(wf.wf_name) + + "Saving {}".format(wf.pipeline) + "\n Pipeline release: {}".format(wf.release) + "\n Pull singularity containers: {}".format('Yes' if wf.singularity else 'No') + "\n Output directory: {}".format(wf.outdir) @@ -105,19 +105,33 @@ def get_workflow(self): if self.release is not None: self.outdir += '-{}'.format(self.release) - # Set the download URL + # Set the download URL and return self.wf_download_url = 'https://github.com/{}/archive/{}.zip'.format(wf.full_name, self.wf_sha) - - # Finished return True - # If we got this far, must have not found the pipeline - logging.error("Not able to find pipeline '{}'".format(self.pipeline)) - logging.info("Available pipelines: {}".format(', '.join([w.name for w in wfs.remote_workflows]))) - return False + # If we got this far, must not be a nf-core pipeline + if self.pipeline.count('/') == 1: + # Looks like a GitHub address - try working with this repo + self.wf_name = self.pipeline + if self.release is None: + self.release = 'master' + self.wf_sha = self.release + if self.outdir is None: + self.outdir = self.pipeline.replace('/', '-').lower() + if self.release is not None: + self.outdir += '-{}'.format(self.release) + # Set the download URL and return + self.wf_download_url = 'https://github.com/{}/archive/{}.zip'.format(self.pipeline, self.release) + return True + else: + logging.error("Not able to find pipeline '{}'".format(self.pipeline)) + logging.info("Available pipelines: {}".format(', '.join([w.name for w in wfs.remote_workflows]))) + return False + def download_wf_files(self): """ Download workflow files from GitHub - save in outdir """ + logging.debug("Downloading {}".format(self.wf_download_url)) # Download GitHub zip file into memory and extract url = requests.get(self.wf_download_url) @@ -125,7 +139,7 @@ def download_wf_files(self): zipfile.extractall(self.outdir) # Rename the internal directory name to be more friendly - gh_name = '{}-{}'.format(self.wf_name, self.wf_sha) + gh_name = '{}-{}'.format(self.wf_name, self.wf_sha).split('/')[-1] os.rename(os.path.join(self.outdir, gh_name), os.path.join(self.outdir, 'workflow')) def find_singularity_images(self): @@ -156,7 +170,8 @@ def download_singularity_image(self, container): container ] - logging.info("Building singularity image '{}', saving to {}".format(container, out_path)) + logging.info("Building singularity image '{}'".format(out_name)) + logging.debug("Singularity command: {}".format(' '.join(singularity_command))) # Try to use singularity to pull image try: @@ -164,7 +179,8 @@ def download_singularity_image(self, container): except OSError as e: if e.errno == os.errno.ENOENT: # Singularity is not installed - logging.warn('Singularity is not installed. Attempting to use Docker instead.') + logging.debug('Singularity is not installed. Attempting to use Docker instead.') + logging.debug("Docker command: {}".format(' '.join(docker_command))) # Try to use docker to use singularity to pull image try: diff --git a/scripts/nf-core b/scripts/nf-core index efaceae060..910237020e 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -80,7 +80,7 @@ def list(json): help = "Output directory" ) def download(pipeline, release, singularity, outdir): - """ Download a nf-core pipeline and singularity container """ + """ Download a pipeline and singularity container """ nf_core.download.download_workflow(pipeline, release, singularity, outdir) @nf_core_cli.command() From fd90e46ef55c585871c08b9cb0119fbea2e0d7b3 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 31 May 2018 12:12:38 +0200 Subject: [PATCH 04/12] download improvements, small lint refactor * Moved the nextflow config parsing into new common utils.py module * Made singularity download option a single command line boolean flag * Improved logging for download script --- nf_core/download.py | 12 +++++++----- nf_core/lint.py | 35 ++++++++++++++--------------------- nf_core/utils.py | 27 +++++++++++++++++++++++++++ scripts/nf-core | 6 ++++-- 4 files changed, 52 insertions(+), 28 deletions(-) create mode 100644 nf_core/utils.py diff --git a/nf_core/download.py b/nf_core/download.py index bcfbf2f2c3..414a98cf20 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -12,7 +12,7 @@ from zipfile import ZipFile -import nf_core.list, nf_core.lint +import nf_core.list, nf_core.utils def download_workflow(pipeline, release=None, singularity=False, outdir=None): """ Main function to download a nf-core workflow """ @@ -64,6 +64,7 @@ def __init__(self, pipeline, release=None, singularity=False, outdir=None): self.wf_name = None self.wf_sha = None self.wf_download_url = None + self.config = dict() self.containers = list() @@ -112,6 +113,8 @@ def get_workflow(self): # If we got this far, must not be a nf-core pipeline if self.pipeline.count('/') == 1: # Looks like a GitHub address - try working with this repo + logging.warn("Pipeline name doesn't match any nf-core workflows") + logging.info("Pipeline name looks like a GitHub address - attempting to download anyway") self.wf_name = self.pipeline if self.release is None: self.release = 'master' @@ -146,18 +149,17 @@ def find_singularity_images(self): """ Find singularity image names for workflow """ # Use linting code to parse the pipeline nextflow config - lint_obj = nf_core.lint.PipelineLint(os.path.join(self.outdir, 'workflow')) - lint_obj.check_nextflow_config() + self.config = nf_core.utils.fetch_wf_config(os.path.join(self.outdir, 'workflow')) # Find any config variables that look like a container - for k,v in lint_obj.config.items(): + for k,v in self.config.items(): if k.startswith('process.') and k.endswith('.container'): self.containers.append(v.strip('"').strip("'")) def download_singularity_image(self, container): """ Download singularity images for workflow """ - out_name = '{}.simg'.format(container.replace('nfcore/', 'nf-core-').replace(':', '-')) + out_name = '{}.simg'.format(container.replace('nfcore', 'nf-core').replace('/','-').replace(':', '-')) out_path = os.path.abspath(os.path.join(self.outdir, 'singularity-images', out_name)) address = 'docker://{}'.format(container.replace('docker://', '')) singularity_command = ["singularity", "pull", "--name", out_path, address] diff --git a/nf_core/lint.py b/nf_core/lint.py index a838dbe4a0..b73734072d 100644 --- a/nf_core/lint.py +++ b/nf_core/lint.py @@ -18,6 +18,8 @@ import requests_cache import yaml +import nf_core.utils + # Set up local caching for requests to speed up remote queries cachedir = os.path.join(tempfile.gettempdir(), 'nfcore_cache') if not os.path.exists(cachedir): @@ -278,27 +280,18 @@ def check_nextflow_config(self): 'params.singleEnd' ] - # Call `nextflow config` and pipe stderr to /dev/null - try: - with open(os.devnull, 'w') as devnull: - nfconfig_raw = subprocess.check_output(['nextflow', 'config', '-flat', self.path], stderr=devnull) - except subprocess.CalledProcessError as e: - raise AssertionError("`nextflow config` returned non-zero error code: %s,\n %s", e.returncode, e.output) - else: - for l in nfconfig_raw.splitlines(): - ul = l.decode() - k, v = ul.split(' = ', 1) - self.config[k] = v - for cf in config_fail: - if cf in self.config.keys(): - self.passed.append((4, "Config variable found: {}".format(cf))) - else: - self.failed.append((4, "Config variable not found: {}".format(cf))) - for cf in config_warn: - if cf in self.config.keys(): - self.passed.append((4, "Config variable found: {}".format(cf))) - else: - self.warned.append((4, "Config variable not found: {}".format(cf))) + # Get the nextflow config for this pipeline + self.config = nf_core.utils.fetch_wf_config(self.path) + for cf in config_fail: + if cf in self.config.keys(): + self.passed.append((4, "Config variable found: {}".format(cf))) + else: + self.failed.append((4, "Config variable not found: {}".format(cf))) + for cf in config_warn: + if cf in self.config.keys(): + self.passed.append((4, "Config variable found: {}".format(cf))) + else: + self.warned.append((4, "Config variable not found: {}".format(cf))) # Check the variables that should be set to 'true' for k in ['timeline.enabled', 'report.enabled', 'trace.enabled', 'dag.enabled']: diff --git a/nf_core/utils.py b/nf_core/utils.py new file mode 100644 index 0000000000..2befccb6e8 --- /dev/null +++ b/nf_core/utils.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +""" +Common utility functions for the nf-core python package. +""" + +import logging +import os +import subprocess + +def fetch_wf_config(wf_path): + """ + Use nextflow to retrieve the nf configuration variables from a workflow + """ + + config = dict() + # Call `nextflow config` and pipe stderr to /dev/null + try: + with open(os.devnull, 'w') as devnull: + nfconfig_raw = subprocess.check_output(['nextflow', 'config', '-flat', wf_path], stderr=devnull) + except subprocess.CalledProcessError as e: + raise AssertionError("`nextflow config` returned non-zero error code: %s,\n %s", e.returncode, e.output) + else: + for l in nfconfig_raw.splitlines(): + ul = l.decode() + k, v = ul.split(' = ', 1) + config[k] = v + return config diff --git a/scripts/nf-core b/scripts/nf-core index 910237020e..d9bbb172d1 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -69,10 +69,12 @@ def list(json): help = "Pipeline release" ) @click.option( - '-s/-n', '--singularity/--no-singularity', + '-s', '--singularity', + type = click.BOOL, + metavar = "[y/n]", default = None, required = True, - help = "Pull / don't pull pipeline singularity containers" + help = "Download singularity containers?" ) @click.option( '-o', '--outdir', From b0e5625dda9c96bb899e13e35cc5de1fe4747501 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 31 May 2018 12:33:43 +0200 Subject: [PATCH 05/12] Move main download function into class --- nf_core/download.py | 71 ++++++++++++++++++++++----------------------- scripts/nf-core | 3 +- 2 files changed, 36 insertions(+), 38 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 414a98cf20..be50a53d9d 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -14,43 +14,6 @@ import nf_core.list, nf_core.utils -def download_workflow(pipeline, release=None, singularity=False, outdir=None): - """ Main function to download a nf-core workflow """ - - # init - wf = DownloadWorkflow(pipeline, release, singularity, outdir) - - # Get workflow details - if not wf.get_workflow(): - sys.exit(1) - - # Check that the outdir doesn't already exist - if os.path.exists(wf.outdir): - logging.error("Output directory '{}' already exists".format(wf.outdir)) - sys.exit(1) - else: - logging.info( - "Saving {}".format(wf.pipeline) + - "\n Pipeline release: {}".format(wf.release) + - "\n Pull singularity containers: {}".format('Yes' if wf.singularity else 'No') + - "\n Output directory: {}".format(wf.outdir) - ) - - # Download the pipeline files - logging.info("Downloading workflow files from GitHub") - wf.download_wf_files() - - # Download the singularity images - if singularity: - logging.info("Fetching container names for workflow") - wf.find_singularity_images() - if len(wf.containers) == 0: - logging.info("No container names found in workflow") - else: - os.mkdir(os.path.join(wf.outdir, 'singularity-images')) - for container in wf.containers: - wf.download_singularity_image(container) - class DownloadWorkflow(): def __init__(self, pipeline, release=None, singularity=False, outdir=None): @@ -67,6 +30,40 @@ def __init__(self, pipeline, release=None, singularity=False, outdir=None): self.config = dict() self.containers = list() + def download_workflow(self): + """ Main function to download a nf-core workflow """ + + # Get workflow details + if not self.get_workflow(): + sys.exit(1) + + # Check that the outdir doesn't already exist + if os.path.exists(self.outdir): + logging.error("Output directory '{}' already exists".format(self.outdir)) + sys.exit(1) + + logging.info( + "Saving {}".format(self.pipeline) + + "\n Pipeline release: {}".format(self.release) + + "\n Pull singularity containers: {}".format('Yes' if self.singularity else 'No') + + "\n Output directory: {}".format(self.outdir) + ) + + # Download the pipeline files + logging.info("Downloading workflow files from GitHub") + self.download_wf_files() + + # Download the singularity images + if self.singularity: + logging.info("Fetching container names for workflow") + self.find_singularity_images() + if len(self.containers) == 0: + logging.info("No container names found in workflow") + else: + os.mkdir(os.path.join(self.outdir, 'singularity-images')) + for container in self.containers: + self.download_singularity_image(container) + def get_workflow(self): """ Fetch details of nf-core workflow to download """ diff --git a/scripts/nf-core b/scripts/nf-core index d9bbb172d1..6ce5386210 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -83,7 +83,8 @@ def list(json): ) def download(pipeline, release, singularity, outdir): """ Download a pipeline and singularity container """ - nf_core.download.download_workflow(pipeline, release, singularity, outdir) + dl = nf_core.download.DownloadWorkflow(pipeline, release, singularity, outdir) + dl.download_workflow() @nf_core_cli.command() @click.argument( From d53214ddf3dec9849db750a9ed59f764245630d9 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 31 May 2018 17:04:22 +0200 Subject: [PATCH 06/12] Tweaks for @senthil10 and update to docker2singularity for singularityware/docker2singularity#30 --- nf_core/download.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index be50a53d9d..92bc4a30fa 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -34,7 +34,7 @@ def download_workflow(self): """ Main function to download a nf-core workflow """ # Get workflow details - if not self.get_workflow(): + if not self.fetch_workflow_details(): sys.exit(1) # Check that the outdir doesn't already exist @@ -65,7 +65,7 @@ def download_workflow(self): self.download_singularity_image(container) - def get_workflow(self): + def fetch_workflow_details(self): """ Fetch details of nf-core workflow to download """ wfs = nf_core.list.Workflows() wfs.get_remote_workflows() @@ -157,15 +157,16 @@ def download_singularity_image(self, container): """ Download singularity images for workflow """ out_name = '{}.simg'.format(container.replace('nfcore', 'nf-core').replace('/','-').replace(':', '-')) - out_path = os.path.abspath(os.path.join(self.outdir, 'singularity-images', out_name)) + out_dir = os.path.abspath(os.path.join(self.outdir, 'singularity-images')) address = 'docker://{}'.format(container.replace('docker://', '')) - singularity_command = ["singularity", "pull", "--name", out_path, address] + singularity_command = ["singularity", "pull", "--name", os.path.join(out_dir, out_name), address] docker_command = [ 'docker', 'run', '-v', '/var/run/docker.sock:/var/run/docker.sock', - '-v', '{}:/output'.format(out_path), + '-v', '{}:/output'.format(out_dir), '--privileged', '-t', '--rm', 'singularityware/docker2singularity', + '--name', out_name, container ] From 48fff1c144855cabc6010ee8386bf0e7fe6f0e86 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Fri, 1 Jun 2018 10:52:42 +0200 Subject: [PATCH 07/12] Download: Changes for @sven1103 * if not x: instead of if x is None: * Raise and catch exceptions instead of returning booleans --- nf_core/download.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 92bc4a30fa..55a7df8249 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -34,7 +34,9 @@ def download_workflow(self): """ Main function to download a nf-core workflow """ # Get workflow details - if not self.fetch_workflow_details(): + try: + self.fetch_workflow_details() + except LookupError: sys.exit(1) # Check that the outdir doesn't already exist @@ -89,23 +91,25 @@ def fetch_workflow_details(self): self.wf_sha = r['tag_sha'] break else: - logging.error("Not able to find release '{}' for {} (found '{}')".format(self.release, wf.full_name, "', '".join([r['tag_name'] for r in wf.releases]))) - return False + logging.error("Not able to find release '{}' for {}".format(self.release, wf.full_name)) + logging.info("Available {} releases: {}".format(wf.full_name, ', '.join([r['tag_name'] for r in wf.releases]))) + raise LookupError("Not able to find release '{}' for {}".format(self.release, wf.full_name)) + # Must be a dev-only pipeline - elif self.release is None: + elif not self.release: self.release = 'dev' self.wf_sha = 'master' # Cheating a little, but GitHub download link works logging.info("Pipeline is in development. Using current code on master branch.") # Set outdir name if not defined - if self.outdir is None: + if not self.outdir: self.outdir = 'nf-core-{}'.format(wf.name) if self.release is not None: self.outdir += '-{}'.format(self.release) # Set the download URL and return self.wf_download_url = 'https://github.com/{}/archive/{}.zip'.format(wf.full_name, self.wf_sha) - return True + return # If we got this far, must not be a nf-core pipeline if self.pipeline.count('/') == 1: @@ -113,20 +117,19 @@ def fetch_workflow_details(self): logging.warn("Pipeline name doesn't match any nf-core workflows") logging.info("Pipeline name looks like a GitHub address - attempting to download anyway") self.wf_name = self.pipeline - if self.release is None: + if not self.release: self.release = 'master' self.wf_sha = self.release - if self.outdir is None: + if not self.outdir: self.outdir = self.pipeline.replace('/', '-').lower() if self.release is not None: self.outdir += '-{}'.format(self.release) # Set the download URL and return self.wf_download_url = 'https://github.com/{}/archive/{}.zip'.format(self.pipeline, self.release) - return True else: logging.error("Not able to find pipeline '{}'".format(self.pipeline)) logging.info("Available pipelines: {}".format(', '.join([w.name for w in wfs.remote_workflows]))) - return False + raise LookupError("Not able to find pipeline '{}'".format(self.pipeline)) def download_wf_files(self): From 36a8d48cfefb81380489b66dc158d5e3c7b8b66d Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 5 Jun 2018 00:34:00 +0200 Subject: [PATCH 08/12] Remove docker2singularity command --- nf_core/download.py | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 55a7df8249..61fe778c6c 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -163,15 +163,6 @@ def download_singularity_image(self, container): out_dir = os.path.abspath(os.path.join(self.outdir, 'singularity-images')) address = 'docker://{}'.format(container.replace('docker://', '')) singularity_command = ["singularity", "pull", "--name", os.path.join(out_dir, out_name), address] - docker_command = [ - 'docker', 'run', - '-v', '/var/run/docker.sock:/var/run/docker.sock', - '-v', '{}:/output'.format(out_dir), - '--privileged', '-t', '--rm', - 'singularityware/docker2singularity', - '--name', out_name, - container - ] logging.info("Building singularity image '{}'".format(out_name)) logging.debug("Singularity command: {}".format(' '.join(singularity_command))) @@ -182,19 +173,7 @@ def download_singularity_image(self, container): except OSError as e: if e.errno == os.errno.ENOENT: # Singularity is not installed - logging.debug('Singularity is not installed. Attempting to use Docker instead.') - logging.debug("Docker command: {}".format(' '.join(docker_command))) - - # Try to use docker to use singularity to pull image - try: - subprocess.call(docker_command) - except OSError as e: - if e.errno == os.errno.ENOENT: - # Docker is not installed - logging.warn('Docker is not installed.') - else: - # Something else went wrong with docker command - raise + logging.error('Singularity is not installed!') else: # Something else went wrong with singularity command - raise + raise e From be282563fda10ae8f914d5f099df165045dcd2c1 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 5 Jun 2018 01:35:41 +0200 Subject: [PATCH 09/12] Late night WIP: Download from shub directly. --- nf_core/download.py | 50 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 61fe778c6c..e5426dba21 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -6,6 +6,7 @@ from io import BytesIO import logging import os +import re import requests import subprocess import sys @@ -160,10 +161,53 @@ def download_singularity_image(self, container): """ Download singularity images for workflow """ out_name = '{}.simg'.format(container.replace('nfcore', 'nf-core').replace('/','-').replace(':', '-')) - out_dir = os.path.abspath(os.path.join(self.outdir, 'singularity-images')) + out_path = os.path.abspath(os.path.join(self.outdir, 'singularity-images', out_name)) address = 'docker://{}'.format(container.replace('docker://', '')) - singularity_command = ["singularity", "pull", "--name", os.path.join(out_dir, out_name), address] - + shub_api_url = 'https://www.singularity-hub.org/api/container/{}'.format(container.replace('nfcore', 'nf-core').replace('docker://', '')) + shub_api_url = 'https://www.singularity-hub.org/api/container/ewels/nf-core-methylseq:latest' + singularity_command = ["singularity", "pull", "--name", out_path, address] + + # Try to download the singularity image from singularity-hub first + logging.debug("Checking shub API: {}".format(shub_api_url)) + response = requests.get(shub_api_url, timeout=10) + if response.status_code == 200: + shub_response = response.json() + logging.info("Downloading singularity image from singularity-hub.org") + + # Strip the ?generation= key in URL, as this breaks the download + + # + # TODO: May not need to do this? + # + dl_url = re.sub(r"\?generation=\d+&", '?', shub_response['image']) + + # Stream the download as it's going to be large + logging.debug("Starting download: {}".format(dl_url)) + + # + # TODO: This next line hangs! WHHYYYYYY?Y??????? + # + dl_request = requests.get(dl_url, stream=True) + if dl_request.status_code == 200: + logging.debug("Response code 200. Streaming download to disk.") + with open(out_path, 'wb') as f: + for chunk in dl_request.iter_content(chunk_size=1024): + if chunk: + f.write(chunk) + return + else: + logging.error("Error with singularity hub API call: {}".format(response.status_code)) + +# dl_request = requests.get(shub_response['image'], stream=True) +# total_length = int(dl_request.headers.get('content-length')) +# logging.debug("Total image file size: {} bytes".format(total_length)) +# with click.progressbar(dl_request.iter_content(1024), length=total_size) as pbar, open(out_path, 'wb') as f: +# for chunk in pbar: +# if chunk: +# f.write(chunk) +# pbar.update(len(chunk)) + + logging.debug("Singularity image not found on singularity-hub") logging.info("Building singularity image '{}'".format(out_name)) logging.debug("Singularity command: {}".format(' '.join(singularity_command))) From 80ad08736cfeaf15a4c030fc193475f764053dc5 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 5 Jun 2018 12:43:21 +0200 Subject: [PATCH 10/12] Got singularity hub download to work --- nf_core/download.py | 70 ++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 39 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index e5426dba21..68d7fdaa89 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -4,10 +4,12 @@ from __future__ import print_function from io import BytesIO +import click import logging import os import re import requests +import requests_cache import subprocess import sys from zipfile import ZipFile @@ -58,12 +60,13 @@ def download_workflow(self): # Download the singularity images if self.singularity: - logging.info("Fetching container names for workflow") + logging.debug("Fetching container names for workflow") self.find_singularity_images() if len(self.containers) == 0: logging.info("No container names found in workflow") else: os.mkdir(os.path.join(self.outdir, 'singularity-images')) + logging.info("Downloading {} singularity container{}".format(len(self.containers), 's' if len(self.containers) > 1 else '')) for container in self.containers: self.download_singularity_image(container) @@ -100,7 +103,8 @@ def fetch_workflow_details(self): elif not self.release: self.release = 'dev' self.wf_sha = 'master' # Cheating a little, but GitHub download link works - logging.info("Pipeline is in development. Using current code on master branch.") + logging.warn("Pipeline is in development - downloading current code on master branch.\n" + + "This is likely to change soon should not be considered fully reproducible.") # Set outdir name if not defined if not self.outdir: @@ -162,53 +166,41 @@ def download_singularity_image(self, container): out_name = '{}.simg'.format(container.replace('nfcore', 'nf-core').replace('/','-').replace(':', '-')) out_path = os.path.abspath(os.path.join(self.outdir, 'singularity-images', out_name)) - address = 'docker://{}'.format(container.replace('docker://', '')) shub_api_url = 'https://www.singularity-hub.org/api/container/{}'.format(container.replace('nfcore', 'nf-core').replace('docker://', '')) - shub_api_url = 'https://www.singularity-hub.org/api/container/ewels/nf-core-methylseq:latest' - singularity_command = ["singularity", "pull", "--name", out_path, address] # Try to download the singularity image from singularity-hub first logging.debug("Checking shub API: {}".format(shub_api_url)) response = requests.get(shub_api_url, timeout=10) if response.status_code == 200: shub_response = response.json() - logging.info("Downloading singularity image from singularity-hub.org") - - # Strip the ?generation= key in URL, as this breaks the download - - # - # TODO: May not need to do this? - # - dl_url = re.sub(r"\?generation=\d+&", '?', shub_response['image']) - # Stream the download as it's going to be large - logging.debug("Starting download: {}".format(dl_url)) - - # - # TODO: This next line hangs! WHHYYYYYY?Y??????? - # - dl_request = requests.get(dl_url, stream=True) - if dl_request.status_code == 200: - logging.debug("Response code 200. Streaming download to disk.") - with open(out_path, 'wb') as f: - for chunk in dl_request.iter_content(chunk_size=1024): - if chunk: - f.write(chunk) - return - else: - logging.error("Error with singularity hub API call: {}".format(response.status_code)) - -# dl_request = requests.get(shub_response['image'], stream=True) -# total_length = int(dl_request.headers.get('content-length')) -# logging.debug("Total image file size: {} bytes".format(total_length)) -# with click.progressbar(dl_request.iter_content(1024), length=total_size) as pbar, open(out_path, 'wb') as f: -# for chunk in pbar: -# if chunk: -# f.write(chunk) -# pbar.update(len(chunk)) + logging.debug("Starting download: {}".format(shub_response['image'])) + + # Don't use the requests cache for the download + with requests_cache.disabled(): + dl_request = requests.get(shub_response['image'], stream=True) + # Check that we got a good response code + if dl_request.status_code == 200: + total_size = int(dl_request.headers.get('content-length')) + logging.debug("Total image file size: {} bytes".format(total_size)) + dl_label = "{} [{:.2f}MB]".format(out_name, total_size/1024.0/1024) + # Open file in bytes mode + with open(out_path, 'wb') as f: + dl_iter = dl_request.iter_content(1024) + # Use a click progress bar whilst we stream the download + with click.progressbar(dl_iter, length=total_size/1024, label=dl_label, show_pos=True) as pbar: + for chunk in pbar: + if chunk: + f.write(chunk) + return + else: + logging.error("Error with singularity hub API call: {}".format(response.status_code)) logging.debug("Singularity image not found on singularity-hub") - logging.info("Building singularity image '{}'".format(out_name)) + + address = 'docker://{}'.format(container.replace('docker://', '')) + singularity_command = ["singularity", "pull", "--name", out_path, address] + logging.info("Building singularity image from dockerhub: {}".format(address)) logging.debug("Singularity command: {}".format(' '.join(singularity_command))) # Try to use singularity to pull image From 119450c5b0ee5394b2c362faae95c2183aa46b17 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 5 Jun 2018 13:53:03 +0200 Subject: [PATCH 11/12] Check downloaded md5 hash against remote. Refactor a little. --- nf_core/download.py | 47 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 68d7fdaa89..570c1ae052 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -6,6 +6,7 @@ from io import BytesIO import click import logging +import hashlib import os import re import requests @@ -68,7 +69,12 @@ def download_workflow(self): os.mkdir(os.path.join(self.outdir, 'singularity-images')) logging.info("Downloading {} singularity container{}".format(len(self.containers), 's' if len(self.containers) > 1 else '')) for container in self.containers: - self.download_singularity_image(container) + try: + # Download from singularity hub if we can + self.download_shub_image(container) + except RuntimeWarning: + # Try to build from dockerhub + self.pull_singularity_image(container) def fetch_workflow_details(self): @@ -161,14 +167,13 @@ def find_singularity_images(self): if k.startswith('process.') and k.endswith('.container'): self.containers.append(v.strip('"').strip("'")) - def download_singularity_image(self, container): - """ Download singularity images for workflow """ + def download_shub_image(self, container): + """ Download singularity images from singularity-hub """ out_name = '{}.simg'.format(container.replace('nfcore', 'nf-core').replace('/','-').replace(':', '-')) out_path = os.path.abspath(os.path.join(self.outdir, 'singularity-images', out_name)) shub_api_url = 'https://www.singularity-hub.org/api/container/{}'.format(container.replace('nfcore', 'nf-core').replace('docker://', '')) - # Try to download the singularity image from singularity-hub first logging.debug("Checking shub API: {}".format(shub_api_url)) response = requests.get(shub_api_url, timeout=10) if response.status_code == 200: @@ -179,6 +184,7 @@ def download_singularity_image(self, container): # Don't use the requests cache for the download with requests_cache.disabled(): dl_request = requests.get(shub_response['image'], stream=True) + # Check that we got a good response code if dl_request.status_code == 200: total_size = int(dl_request.headers.get('content-length')) @@ -192,12 +198,25 @@ def download_singularity_image(self, container): for chunk in pbar: if chunk: f.write(chunk) - return + f.flush() + + # Check that the downloaded image has the right md5sum hash + self.validate_md5(out_path, shub_response['version']) else: logging.error("Error with singularity hub API call: {}".format(response.status_code)) + raise RuntimeWarning("Error with singularity hub API call: {}".format(response.status_code)) - logging.debug("Singularity image not found on singularity-hub") + elif response.status_code == 404: + logging.debug("Singularity image not found on singularity-hub") + raise RuntimeWarning("Singularity image not found on singularity-hub") + else: + logging.error("Error with singularity hub API call: {}".format(response.status_code)) + raise ImportError("Error with singularity hub API call: {}".format(response.status_code)) + def pull_singularity_image(self, container): + """ Use a local installation of singularity to pull an image from docker hub """ + out_name = '{}.simg'.format(container.replace('nfcore', 'nf-core').replace('/','-').replace(':', '-')) + out_path = os.path.abspath(os.path.join(self.outdir, 'singularity-images', out_name)) address = 'docker://{}'.format(container.replace('docker://', '')) singularity_command = ["singularity", "pull", "--name", out_path, address] logging.info("Building singularity image from dockerhub: {}".format(address)) @@ -213,3 +232,19 @@ def download_singularity_image(self, container): else: # Something else went wrong with singularity command raise e + + def validate_md5(self, fname, expected): + """ Calculate the md5sum for a file on the disk and validate with expected """ + logging.debug("Validating image hash: {}".format(fname)) + + # Calculate the md5 for the file on disk + hash_md5 = hashlib.md5() + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + file_hash = hash_md5.hexdigest() + + if file_hash == expected: + logging.debug('md5 sum of image matches expected: {}'.format(expected)) + else: + raise IOError ("{} md5 does not match remote: {} - {}".format(out_path, expected, file_hash)) From 027801899c3c5d2af46468232bbb62005a9740ed Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 7 Jun 2018 15:11:08 +0200 Subject: [PATCH 12/12] Make --singularity a simple enabling flag. --- scripts/nf-core | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/scripts/nf-core b/scripts/nf-core index 6ce5386210..973a7e2d68 100755 --- a/scripts/nf-core +++ b/scripts/nf-core @@ -70,11 +70,9 @@ def list(json): ) @click.option( '-s', '--singularity', - type = click.BOOL, - metavar = "[y/n]", - default = None, - required = True, - help = "Download singularity containers?" + is_flag = True, + default = False, + help = "Download singularity containers" ) @click.option( '-o', '--outdir',