|
15 | 15 | "input": { |
16 | 16 | "type": "string", |
17 | 17 | "mimetype": "text/csv", |
18 | | - "format": "file-path-pattern", |
| 18 | + "format": "file-path", |
19 | 19 | "exists": true, |
20 | 20 | "schema": "assets/schema_input.json", |
21 | 21 | "pattern": "^\\S+\\.csv$", |
|
32 | 32 | "assembly_input": { |
33 | 33 | "type": "string", |
34 | 34 | "mimetype": "text/csv", |
35 | | - "format": "file-path-pattern", |
| 35 | + "format": "file-path", |
36 | 36 | "exists": true, |
37 | 37 | "schema": "assets/schema_assembly_input.json", |
38 | 38 | "pattern": "^\\S+\\.csv$", |
|
324 | 324 | "host_fasta": { |
325 | 325 | "type": "string", |
326 | 326 | "description": "Fasta reference file for host contamination removal.", |
327 | | - "help_text": "This parameter is mutually exclusive with `--host_genome`. The reference can be masked. Host read removal is done with Bowtie2." |
| 327 | + "help_text": "This parameter is mutually exclusive with `--host_genome`. The reference can be masked. Host read removal is done with Bowtie2.", |
| 328 | + "format": "file-path", |
| 329 | + "exists": true |
328 | 330 | }, |
329 | 331 | "host_fasta_bowtie2index": { |
330 | 332 | "type": "string", |
331 | 333 | "description": "Bowtie2 index directory corresponding to `--host_fasta` reference file for host contamination removal.", |
332 | | - "help_text": "This parameter must be used in combination with `--host_fasta`, and should be a directory containing files from the output of `bowtie2-build`, i.e. files ending in `.bt2`" |
| 334 | + "help_text": "This parameter must be used in combination with `--host_fasta`, and should be a directory containing files from the output of `bowtie2-build`, i.e. files ending in `.bt2`", |
| 335 | + "format": "directory-path", |
| 336 | + "exists": true |
333 | 337 | }, |
334 | 338 | "host_removal_verysensitive": { |
335 | 339 | "type": "boolean", |
|
351 | 355 | "type": "string", |
352 | 356 | "default": "${baseDir}/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz", |
353 | 357 | "description": "Genome reference used to remove Illumina PhiX contaminant reads.", |
354 | | - "hidden": true |
| 358 | + "hidden": true, |
| 359 | + "format": "file-path", |
| 360 | + "exists": true |
355 | 361 | }, |
356 | 362 | "skip_clipping": { |
357 | 363 | "type": "boolean", |
|
419 | 425 | "type": "string", |
420 | 426 | "default": "${baseDir}/assets/data/GCA_000840245.1_ViralProj14204_genomic.fna.gz", |
421 | 427 | "hidden": true, |
422 | | - "description": "Genome reference used to remove ONT Lambda contaminant reads." |
| 428 | + "description": "Genome reference used to remove ONT Lambda contaminant reads.", |
| 429 | + "format": "file-path", |
| 430 | + "exists": true |
423 | 431 | }, |
424 | 432 | "save_lambdaremoved_reads": { |
425 | 433 | "type": "boolean", |
|
455 | 463 | "properties": { |
456 | 464 | "centrifuge_db": { |
457 | 465 | "type": "string", |
458 | | - "format": "file-path", |
| 466 | + "format": "path", |
459 | 467 | "exists": true, |
460 | 468 | "description": "Database for taxonomic binning with centrifuge.", |
461 | 469 | "help_text": "Local directory containing `*.cf` files, or a URL or local path to a downloaded compressed tar archive of a Centrifuge database. E.g. ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz." |
462 | 470 | }, |
463 | 471 | "kraken2_db": { |
464 | 472 | "type": "string", |
465 | | - "format": "file-path", |
| 473 | + "format": "path", |
| 474 | + "exists": true, |
466 | 475 | "description": "Database for taxonomic binning with kraken2.", |
467 | 476 | "help_text": "Path to a local directory, archive file, or a URL to compressed tar archive that contains at least the three files `hash.k2d`, `opts.k2d` and `taxo.k2d`. E.g. ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken_8GB_202003.tgz." |
468 | 477 | }, |
469 | 478 | "krona_db": { |
470 | 479 | "type": "string", |
471 | 480 | "description": "Database for taxonomic binning with krona", |
472 | | - "help_text": "Path to `taxonomy.tab` file for Krona, instead of downloading the default file. Point at the `.tab` file." |
| 481 | + "help_text": "Path to `taxonomy.tab` file for Krona, instead of downloading the default file. Point at the `.tab` file.", |
| 482 | + "format": "file-path", |
| 483 | + "exists": true |
473 | 484 | }, |
474 | 485 | "skip_krona": { |
475 | 486 | "type": "boolean", |
|
478 | 489 | "cat_db": { |
479 | 490 | "type": "string", |
480 | 491 | "description": "Database for taxonomic classification of metagenome assembled genomes. Can be either a zipped file or a directory containing the extracted output of such.", |
481 | | - "help_text": "E.g. https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20210107.tar.gz. This parameter is mutually exclusive with `--cat_db_generate`. The file needs to contain a folder named `*taxonomy*` and `*database*` that hold the respective files." |
| 492 | + "help_text": "E.g. https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20210107.tar.gz. This parameter is mutually exclusive with `--cat_db_generate`. The file needs to contain a folder named `*taxonomy*` and `*database*` that hold the respective files.", |
| 493 | + "format": "path", |
| 494 | + "exists": true |
482 | 495 | }, |
483 | 496 | "cat_db_generate": { |
484 | 497 | "type": "boolean", |
|
501 | 514 | "gtdb_db": { |
502 | 515 | "type": "string", |
503 | 516 | "description": "Specify the location of a GTDBTK database. Can be either an uncompressed directory or a `.tar.gz` archive. If not specified will be downloaded for you when GTDBTK or binning QC is not skipped.", |
504 | | - "default": "https://data.gtdb.ecogenomic.org/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz" |
| 517 | + "default": "https://data.gtdb.ecogenomic.org/releases/release220/220.0/auxillary_files/gtdbtk_package/full_package/gtdbtk_r220_data.tar.gz", |
| 518 | + "format": "path", |
| 519 | + "exists": true |
505 | 520 | }, |
506 | 521 | "gtdb_mash": { |
507 | 522 | "type": "string", |
508 | | - "description": "Specify the location of a GTDBTK mash database. If missing, GTDB-Tk will skip the ani_screening step" |
| 523 | + "description": "Specify the location of a GTDBTK mash database. If missing, GTDB-Tk will skip the ani_screening step", |
| 524 | + "format": "path", |
| 525 | + "exists": true |
509 | 526 | }, |
510 | 527 | "gtdbtk_min_completeness": { |
511 | 528 | "type": "number", |
512 | | - "default": 50.0, |
| 529 | + "default": 50, |
513 | 530 | "description": "Min. bin completeness (in %) required to apply GTDB-tk classification.", |
514 | 531 | "help_text": "Completeness assessed with BUSCO analysis (100% - %Missing). Must be greater than 0 (min. 0.01) to avoid GTDB-tk errors. If too low, GTDB-tk classification results can be impaired due to not enough marker genes!", |
515 | 532 | "minimum": 0.01, |
516 | 533 | "maximum": 100 |
517 | 534 | }, |
518 | 535 | "gtdbtk_max_contamination": { |
519 | 536 | "type": "number", |
520 | | - "default": 10.0, |
| 537 | + "default": 10, |
521 | 538 | "description": "Max. bin contamination (in %) allowed to apply GTDB-tk classification.", |
522 | 539 | "help_text": "Contamination approximated based on BUSCO analysis (%Complete and duplicated). If too high, GTDB-tk classification results can be impaired due to contamination!", |
523 | 540 | "minimum": 0, |
524 | 541 | "maximum": 100 |
525 | 542 | }, |
526 | 543 | "gtdbtk_min_perc_aa": { |
527 | 544 | "type": "number", |
528 | | - "default": 10.0, |
| 545 | + "default": 10, |
529 | 546 | "description": "Min. fraction of AA (in %) in the MSA for bins to be kept.", |
530 | 547 | "minimum": 0, |
531 | 548 | "maximum": 100 |
|
547 | 564 | "type": "boolean", |
548 | 565 | "description": "Speed up pplacer step of GTDB-Tk by loading to memory.", |
549 | 566 | "help_text": "Will be faster than writing to disk (default setting), however at the expense of much larger memory (RAM) requirements for GDTBTK/CLASSIFY." |
550 | | - }, |
551 | | - "genomad_db": { |
552 | | - "type": "string", |
553 | | - "description": "Database for virus classification with geNomad", |
554 | | - "help_text": "Must be a directory containing the uncompressed contents from https://zenodo.org/doi/10.5281/zenodo.6994741 (nf-core/mag tested with v1.1)" |
555 | 567 | } |
556 | 568 | } |
557 | 569 | }, |
|
629 | 641 | "metaeuk_db": { |
630 | 642 | "type": "string", |
631 | 643 | "description": "Path to either a local fasta file of protein sequences, or to a directory containing an mmseqs2-formatted database, for annotation of eukaryotic genomes.", |
632 | | - "help_text": "One option would be the databases from the MetaEuk publication (https://wwwuser.gwdg.de/~compbiol/metaeuk/), however it should be noted that these are focused on marine eukaryotes." |
| 644 | + "help_text": "One option would be the databases from the MetaEuk publication (https://wwwuser.gwdg.de/~compbiol/metaeuk/), however it should be noted that these are focused on marine eukaryotes.", |
| 645 | + "format": "file-path", |
| 646 | + "exists": true |
633 | 647 | }, |
634 | 648 | "save_mmseqs_db": { |
635 | 649 | "type": "boolean", |
|
646 | 660 | "type": "boolean", |
647 | 661 | "description": "Run virus identification." |
648 | 662 | }, |
| 663 | + "genomad_db": { |
| 664 | + "type": "string", |
| 665 | + "description": "Database for virus classification with geNomad", |
| 666 | + "help_text": "Must be a directory containing the uncompressed contents from https://zenodo.org/doi/10.5281/zenodo.6994741 (nf-core/mag tested with v1.1)", |
| 667 | + "format": "path", |
| 668 | + "exists": true |
| 669 | + }, |
649 | 670 | "genomad_min_score": { |
650 | 671 | "type": "number", |
651 | 672 | "default": 0.7, |
|
757 | 778 | "busco_db": { |
758 | 779 | "type": "string", |
759 | 780 | "description": "Download URL for BUSCO lineage dataset, or path to a tar.gz archive, or local directory containing already downloaded and unpacked lineage datasets.", |
760 | | - "help_text": "E.g. https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2024-01-08.tar.gz or '/path/to/buscodb' (files still need to be unpacked manually). Available databases are listed here: https://busco-data.ezlab.org/v5/data/lineages/." |
| 781 | + "help_text": "E.g. https://busco-data.ezlab.org/v5/data/lineages/bacteria_odb10.2024-01-08.tar.gz or '/path/to/buscodb' (files still need to be unpacked manually). Available databases are listed here: https://busco-data.ezlab.org/v5/data/lineages/.", |
| 782 | + "format": "path", |
| 783 | + "exists": true |
761 | 784 | }, |
762 | 785 | "busco_auto_lineage_prok": { |
763 | 786 | "type": "boolean", |
|
783 | 806 | "checkm_db": { |
784 | 807 | "type": "string", |
785 | 808 | "description": "Path to local folder containing already downloaded and uncompressed CheckM database.", |
786 | | - "help_text": "The pipeline can also download this for you if not specified, and you can save the resulting directory into your output directory by specifying `--save_checkm_data`. You should move this directory to somewhere else on your machine (and supply back to the pipeline in future runs again with `--checkm_db`." |
| 809 | + "help_text": "The pipeline can also download this for you if not specified, and you can save the resulting directory into your output directory by specifying `--save_checkm_data`. You should move this directory to somewhere else on your machine (and supply back to the pipeline in future runs again with `--checkm_db`.", |
| 810 | + "format": "directory-path", |
| 811 | + "exists": true |
787 | 812 | }, |
788 | 813 | "save_checkm_data": { |
789 | 814 | "type": "boolean", |
|
793 | 818 | "checkm2_db": { |
794 | 819 | "type": "string", |
795 | 820 | "description": "Path to local folder containing already downloaded and uncompressed CheckM2 database (.dmnd file).", |
796 | | - "help_text": "The pipeline can also download this for you if not specified, and you can save the resulting directory into your output directory by specifying `--save_checkm2_data`. You should move this directory to somewhere else on your machine (and supply back to the pipeline in future runs again with `--checkm2_db`)." |
| 821 | + "help_text": "The pipeline can also download this for you if not specified, and you can save the resulting directory into your output directory by specifying `--save_checkm2_data`. You should move this directory to somewhere else on your machine (and supply back to the pipeline in future runs again with `--checkm2_db`).", |
| 822 | + "format": "directory-path", |
| 823 | + "exists": true |
797 | 824 | }, |
798 | 825 | "checkm2_db_version": { |
799 | 826 | "type": "integer", |
|
828 | 855 | }, |
829 | 856 | "gunc_db": { |
830 | 857 | "type": "string", |
831 | | - "description": "Specify a path to a pre-downloaded GUNC dmnd database file" |
| 858 | + "description": "Specify a path to a pre-downloaded GUNC dmnd database file", |
| 859 | + "format": "file-path", |
| 860 | + "exists": true |
832 | 861 | }, |
833 | 862 | "gunc_database_type": { |
834 | 863 | "type": "string", |
|
0 commit comments