Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,22 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v1.2dev - Bouncy Basenji [unreleased]

### `Added`

- [#417](https://github.com/nf-core/taxprofiler/pull/417) - Added reference-free metagenome estimation with Nonpareil (added by @jfy133)

### `Fixed`

### `Dependencies`

| Tool | Previous version | New version |
| ------- | ---------------- | ----------- |
| multiqc | 1.15 | 1.18 |

### `Deprecated`

## v1.1.2 - Augmented Akita Patch [2023-10-27]

### `Added`
Expand Down
4 changes: 4 additions & 0 deletions CITATIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@

> Schubert, M., Lindgreen, S., & Orlando, L. (2016). AdapterRemoval v2: rapid adapter trimming, identification, and read merging. BMC Research Notes, 9, 88. https://doi.org/10.1186/s13104-016-1900-2

- [Nonpareil](https://doi.org/10.1128/mSystems.00039-18)

- Rodriguez-R, L. M., Gunturu, S., Tiedje, J. M., Cole, J. R., & Konstantinidis, K. T. (2018). Nonpareil 3: Fast Estimation of Metagenomic Coverage and Sequence Diversity. mSystems, 3(3). https://doi.org/10.1128/mSystems.00039-18

- [Porechop](https://github.com/rrwick/Porechop)

> Wick, R. R., Judd, L. M., Gorrie, C. L., & Holt, K. E. (2017). Completing bacterial genome assemblies with multiplex MinION sequencing. Microbial Genomics, 3(10), e000132. https://doi.org/10.1099/mgen.0.000132
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
- Host-read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2))
- Run merging
3. Supports statistics for host-read removal ([Samtools](http://www.htslib.org/))
3. Supports statistics metagenome coverage estimation ([Nonpareil](https://nonpareil.readthedocs.io/en/latest/)) and for host-read removal ([Samtools](http://www.htslib.org/))
4. Performs taxonomic classification and/or profiling using one or more of:
- [Kraken2](https://ccb.jhu.edu/software/kraken2/)
- [MetaPhlAn](https://huttenhower.sph.harvard.edu/metaphlan/)
Expand Down
57 changes: 48 additions & 9 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,44 @@ report_section_order:
order: -1001
"nf-core-taxprofiler-summary":
order: -1002
general_stats":
order: 1000
fastqc:
order: 900
fastqc-1:
order: 800
fastp:
order: 700
adapterRemoval:
order: 600
nonpareil_all_samples:
order: 500
porechop:
order: 400
bbduk:
order: 300
prinseqplusplus:
order: 200
filtlong:
order: 100
bowtie2:
order: 90
samtools:
order: 80
kraken:
order: 70
bracken:
order: 60
centrifuge:
order: 50
malt:
order: 40
diamond:
order: 30
kaiju:
order: 20
motus:
order: 10

export_plots: true

Expand All @@ -21,6 +59,7 @@ run_modules:
- fastqc
- adapterRemoval
- fastp
- custom_content
- bbduk
- prinseqplusplus
- porechop
Expand All @@ -33,7 +72,6 @@ run_modules:
- diamond
- malt
- motus
- custom_content

sp:
diamond:
Expand All @@ -42,6 +80,13 @@ sp:
fn_re: ".*(fastqc|falco)_data.txt$"
fastqc/zip:
fn: "*_fastqc.zip"
nonpareil_all_samples_mqc:
fn: "nonpareil_all_samples_mqc.png"

custom_data:
nonpareil_all_samples:
section_name: "Nonpareil"
description: "Nonpareil uses the redundancy of the reads in metagenomic datasets to estimate the average coverage and predict the amount of sequences that will be required to achieve “nearly complete coverage”. Plots here are not interactive - being exported directly from the tool. If you have difficulty reading the plot, please see the individual PNG files in the results directory. DOI: https://doi.org/10.1128/mSystems.00039-18"

top_modules:
- "fastqc":
Expand All @@ -58,13 +103,8 @@ top_modules:
path_filters_exclude:
- "*raw*"
extra: "If used in this run, Falco is a drop-in replacement for FastQC producing the same output, written by Guilherme de Sena Brandine and Andrew D. Smith."
- "fastp"
- "adapterRemoval"
- "porechop":
extra: "ℹ️: if you get the error message 'Error - was not able to plot data.' this means that porechop did not detect any adapters and therefore no statistics generated."
- "bbduk"
- "prinseqplusplus"
- "filtlong"
- "bowtie2":
name: "bowtie2"
- "samtools":
Expand Down Expand Up @@ -93,12 +133,11 @@ top_modules:
- "*.centrifuge.txt"
- "malt":
name: "MALT"
- "diamond"
- "kaiju":
name: "Kaiju"
- "motus"

#It is not possible to set placement for custom kraken and centrifuge columns.
# It is not possible to set placement for custom kraken
# and centrifuge columns.

table_columns_placement:
FastQC / Falco (pre-Trimming):
Expand Down
28 changes: 28 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,34 @@ process {
]
}

// Redundancy estimation with nonpareil
withName: NONPAREIL_NONPAREIL {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/nonpareil/" },
mode: params.publish_dir_mode,
pattern: '*.np{a,c,l,o}'
]
}

withName: 'NONPAREIL_CURVE' {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
publishDir = [
path: { "${params.outdir}/nonpareil/" },
mode: params.publish_dir_mode,
pattern: '*.png'
]
}

withName: 'NONPAREIL_SET' {
ext.prefix = { "nonpareil_all_samples_mqc" }
publishDir = [
path: { "${params.outdir}/nonpareil/" },
mode: params.publish_dir_mode,
pattern: '*.png'
]
}

// AdapterRemoval separate output merging
withName: CAT_FASTQ {
ext.prefix = { "${meta.id}_${meta.run_accession}" }
Expand Down
60 changes: 32 additions & 28 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,34 +20,35 @@ params {
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_mergepairs = true
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = true
run_malt = false
run_metaphlan = true
run_centrifuge = true
run_diamond = true
run_krakenuniq = true
run_motus = false
run_ganon = true
run_krona = true
run_kmcp = true
kmcp_mode = 0
krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
malt_save_reads = true
kraken2_save_reads = true
centrifuge_save_reads = true
run_profile_standardisation = true
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_mergepairs = true
perform_shortread_redundancyestimation = true
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = true
run_malt = false
run_metaphlan = true
run_centrifuge = true
run_diamond = true
run_krakenuniq = true
run_motus = false
run_ganon = true
run_krona = true
run_kmcp = true
kmcp_mode = 0
krona_taxonomy_directory = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
malt_save_reads = true
kraken2_save_reads = true
centrifuge_save_reads = true
run_profile_standardisation = true
}

process {
Expand All @@ -61,4 +62,7 @@ process {
withName: MEGAN_RMA2INFO_KRONA {
maxForks = 1
}
withName: NONPAREIL_NONPAREIL {
ext.args = { "-k 5" }
}
}
48 changes: 26 additions & 22 deletions conf/test_adapterremoval.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,33 +20,37 @@ params {
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_tool = 'adapterremoval'
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = false
run_malt = false
run_metaphlan = false
run_centrifuge = false
run_diamond = false
run_krakenuniq = false
run_motus = false
run_ganon = false
run_kmcp = false
kmcp_mode = 0
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_tool = 'adapterremoval'
perform_shortread_redundancyestimation = true
perform_shortread_complexityfilter = true
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = false
run_malt = false
run_metaphlan = false
run_centrifuge = false
run_diamond = false
run_krakenuniq = false
run_motus = false
run_ganon = false
run_kmcp = false
kmcp_mode = 0
}

process {
withName: MALT_RUN {
maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
}
withName: NONPAREIL_NONPAREIL {
ext.args = { "-k 5" }
}
}
3 changes: 3 additions & 0 deletions conf/test_bbduk.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,7 @@ process {
maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
}
withName: NONPAREIL_NONPAREIL {
ext.args = { "-k 5" }
}
}
3 changes: 3 additions & 0 deletions conf/test_falco.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,7 @@ process {
maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
}
withName: NONPAREIL_NONPAREIL {
ext.args = { "-k 5" }
}
}
50 changes: 27 additions & 23 deletions conf/test_fastp.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,34 +20,38 @@ params {
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_tool = 'fastp'
perform_shortread_complexityfilter = true
shortread_complexityfilter_tool = 'fastp'
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = false
run_malt = false
run_metaphlan = false
run_centrifuge = false
run_diamond = false
run_krakenuniq = false
run_motus = false
run_ganon = false
run_kmcp = false
kmcp_mode = 0
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
databases = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_v1.1.csv'
perform_shortread_qc = true
perform_longread_qc = true
shortread_qc_tool = 'fastp'
perform_shortread_redundancyestimation = true
perform_shortread_complexityfilter = true
shortread_complexityfilter_tool = 'fastp'
perform_shortread_hostremoval = true
perform_longread_hostremoval = true
perform_runmerging = true
hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
run_kaiju = true
run_kraken2 = true
run_bracken = false
run_malt = false
run_metaphlan = false
run_centrifuge = false
run_diamond = false
run_krakenuniq = false
run_motus = false
run_ganon = false
run_kmcp = false
kmcp_mode = 0
}

process {
withName: MALT_RUN {
maxForks = 1
ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
}
withName: NONPAREIL_NONPAREIL {
ext.args = { "-k 5" }
}
}
Loading