Commit 75611fdb authored by Nijsse, Bart's avatar Nijsse, Bart
Browse files

destination input and docs

parent 071ab123
......@@ -89,6 +89,11 @@ inputs:
label: GTF file
doc: GTF file location
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
#########################################
# Workflow for quality and filtering using fastqc, fastp and optionally bbduk
......@@ -225,6 +230,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -94,6 +94,11 @@ inputs:
- GeneCounts
doc: Run with get gene quantification
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
#########################################
# Workflow for quality and filtering using fastqc, fastp and optionally bbduk
......@@ -216,6 +221,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -28,6 +28,11 @@ inputs:
type: File
doc: Mapping file containing barcode information
label: The mapping file
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
############################
......@@ -61,6 +66,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -45,6 +45,11 @@ inputs:
doc: Codon table number for gene prediction and translation
label: codon table
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
outputs:
x11:
type: File[]
......@@ -120,6 +125,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -131,6 +131,11 @@ inputs:
prefix: -taxonomy
default: /tempZone/References/Databases/UniProt/taxonomy.hdt
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
arguments: ["java", "-Xmx3g", "-jar", "/unlock/infrastructure/binaries/GenomeSync.jar"]
outputs:
......@@ -148,6 +153,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -26,7 +26,7 @@ outputs:
label: Filtered statistics
doc: Statistics on quality and preprocessing of the reads
type: Directory
outputSource: workflow_quality/reports_to_folder
outputSource: workflow_quality/reports_folder
spades_output:
label: SPAdes
doc: Metagenome assembly output by SPADES
......@@ -46,6 +46,10 @@ outputs:
type: Directory
outputSource: busco_files_to_folder/results
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
inputs:
identifier:
......@@ -118,7 +122,7 @@ steps:
identifier: identifier
step:
default: 1
out: [QC_reverse_reads, QC_forward_reads, reports_to_folder]
out: [QC_reverse_reads, QC_forward_reads, reports_folder]
#############################################
#### SPADes assembly
workflow_spades:
......@@ -298,6 +302,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2022-02-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -9,15 +9,22 @@ requirements:
label: Illumina read quality control, trimming and contamination filter.
doc: |
Workflow for Illumina paired read quality control, trimming and filtering.
Multiple read pairs will be merged into single paired dataset.
Steps:
- FastQC (on raw data files)
- fastp (read quality trimming)
- Kraken2 taxonomic classification of reads
- bbduk used for phix and/or rrna filtering
- bbmap for (contamination) filtering
- FastQC (on filtered (merged) data)
**Workflow for Illumina paired read quality control, trimming and filtering.**<br />
Multiple read pairs will be merged into single paired dataset.<br />
Summary:
- FastQC on raw data files<br />
- fastp for read quality trimming<br />
- Kraken2 for taxonomic classification of reads<br />
- BBduk for phix and rRNA filtering<br />
- BBmap for (contamination) filtering using given references<br />
- FastQC on filtered (merged) data<br />
**All tool CWL files and other workflows can be found here:**<br>
Tools: https://git.wur.nl/unlock/cwl/-/tree/master/cwl<br>
Workflows: https://git.wur.nl/unlock/cwl/-/tree/master/cwl/workflows<br>
WorkflowHub: https://workflowhub.eu/projects/16/workflows?view=default
outputs:
reports_folder:
......@@ -35,6 +42,11 @@ outputs:
label: Filtered reverse read
doc: Filtered reverse read
outputSource: phix_filter/out_reverse_reads
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
inputs:
identifier:
......@@ -312,7 +324,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-04-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -83,7 +83,12 @@ inputs:
type: int?
doc: For small genomes, the parameter --genomeSAindexNbases must be scaled down.
label: STAR parameter
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
#########################################
# Workflow for SAPP rdf2gtf
......@@ -151,6 +156,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -26,7 +26,7 @@ outputs:
label: Filtered statistics
doc: Statistics on quality and preprocessing of the reads
type: Directory
outputSource: workflow_quality/reports_to_folder
outputSource: workflow_quality/reports_folder
spades_output:
label: SPAdes
doc: Metagenome assembly output by SPADES
......@@ -85,6 +85,11 @@ inputs:
doc: Run with contig binning workflow
default: true
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
#############################################
......@@ -92,17 +97,17 @@ steps:
workflow_quality:
label: Quality and filtering workflow
doc: Quality assessment of illumina reads with rRNA filtering option
run: workflow_quality.cwl
run: workflow_illumina_quality.cwl
in:
forward_reads: forward_reads
reverse_reads: reverse_reads
contamination_references: contamination_references
filter_references: contamination_references
memory: memory
threads: threads
identifier: identifier
step:
default: 1
out: [QC_reverse_reads, QC_forward_reads, reports_to_folder]
out: [QC_reverse_reads, QC_forward_reads, reports_folder]
#############################################
#### assembly using SPADES
workflow_spades:
......@@ -277,7 +282,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-02-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -117,7 +117,11 @@ inputs:
doc: maximum memory usage in megabytes
label: memory usage (MB)
default: 4000
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
#############################################
......@@ -363,7 +367,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-02-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -8,14 +8,22 @@ requirements:
SubworkflowFeatureRequirement: {}
ScatterFeatureRequirement: {}
label: Metagenomic binning from assembly
label: Metagenomic Binning from Assembly
doc: |
Workflow for Metagenomics from raw reads to annotated bins.
Steps:
- MetaBAT2 (binning)
- CheckM (bin completeness and contamination)
- GTDB-Tk (bin taxonomic classification)
- BUSCO (bin completeness)
Workflow for Metagenomics from raw reads to annotated bins.<br>
Summary
- MetaBAT2 (binning)
- CheckM (bin completeness and contamination)
- GTDB-Tk (bin taxonomic classification)
- BUSCO (bin completeness)
**All tool CWL files and other workflows can be found here:**<br>
Tools: https://git.wur.nl/unlock/cwl/-/tree/master/cwl<br>
Workflows: https://git.wur.nl/unlock/cwl/-/tree/master/cwl/workflows<br>
The dependencies are either accessible from https://unlock-icat.irods.surfsara.nl (anonymous,anonymous)<br>
and/or<br>
By using the conda / pip environments as shown in https://git.wur.nl/unlock/docker/-/blob/master/kubernetes/scripts/setup.sh<br>
outputs:
metabat2_output:
......@@ -74,12 +82,15 @@ inputs:
default: "/unlock/references/databases/BUSCO/BUSCO_odb10"
label: BUSCO dataset
doc: Path to the BUSCO dataset download location
step:
type: int?
label: CWL base step number
doc: Step number for order of steps
default: 1
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
#############################################
......@@ -208,6 +219,7 @@ steps:
merge_busco_summaries:
run: ../expressions/merge_file_arrays.cwl
label: Merge BUSCO summaries
in:
input:
source: busco/short_summaries
......@@ -367,7 +379,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2022-00-00"
s:dateModified: "2022-04-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -74,6 +74,11 @@ inputs:
doc: Step number for order of steps
default: 1
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
outputs:
filtered_stats:
label: Filtered statistics
......@@ -118,7 +123,7 @@ steps:
workflow_kraken2:
label: Kraken2 workflow
doc: Read classification using the kraken2 database
run: ../metagenomics/kraken2/kraken2.cwl
run: ../kraken2/kraken2.cwl
in:
identifier: identifier
threads: threads
......@@ -240,6 +245,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -67,6 +67,11 @@ inputs:
type: int
doc: Memory used in megabytes
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
spades:
run: ../assembly/spades.cwl
......@@ -142,6 +147,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -51,8 +51,10 @@ inputs:
doc: UNLOCK assay metadata file
label: Metadata file
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
############################
......@@ -123,6 +125,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -8,21 +8,25 @@ requirements:
SubworkflowFeatureRequirement: {}
ScatterFeatureRequirement: {}
label: nanopore workflow
label: Nanopore Guppy Basecalling Assembly Workflow
doc: |
Workflow for sequencing with ONT nanopore, from basecalling to assembly quality.
Steps:
- Guppy (basecalling of raw reads)
- MinIONQC (quality check)
- FASTQ merging from multi into one file
- Kraken2 (taxonomic classification)
- Krona (classification visualization)
- Flye (de novo assembly)
- Medaka (assembly polishing)
- QUAST (assembly quality reports)
Workflow for sequencing with ONT Nanopore, from basecalling to assembly.
- Guppy (basecalling of raw reads)
- MinIONQC (quality check)
- FASTQ merging from multi into one file
- Kraken2 (taxonomic classification)
- Krona (classification visualization)
- Flye (de novo assembly)
- Medaka (assembly polishing)
- QUAST (assembly quality reports)
The dependencies are either accessible from https://unlock-icat.irods.surfsara.nl (anonymous) or
by using the conda / pip environments as shown in https://unlock-icat.irods.surfsara.nl/infrastructure/binaries/scripts/setup.sh
**All tool CWL files and other workflows can be found here:**<br>
Tools: https://git.wur.nl/unlock/cwl/-/tree/master/cwl<br>
Workflows: https://git.wur.nl/unlock/cwl/-/tree/master/cwl/workflows<br>
The dependencies are either accessible from https://unlock-icat.irods.surfsara.nl (anonymous,anonymous)<br>
and/or<br>
By using the conda / pip environments as shown in https://git.wur.nl/unlock/docker/-/blob/master/kubernetes/scripts/setup.sh<br>
outputs:
guppy_output:
......@@ -109,6 +113,11 @@ inputs:
label: when working with metagenomes
default: true
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
#############################################
#### basecalling with Guppy for CPU
......@@ -220,6 +229,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2021-12-10"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -10,21 +10,29 @@ requirements:
label: Nanopore assembly workflow
doc: |
Workflow for sequencing with ONT nanopore, from basecalled reads to (meta)assembly and binning
Steps:
- Read filtering
- Kraken2 (taxonomic classification of FASTQ reads)
- Flye (de novo assembly)
- Medaka (assembly polishing)
- metaQUAST (assembly quality reports)
**Workflow for sequencing with ONT Nanopore data, from basecalled reads to (meta)assembly and binning**<br>
- Workflow Nanopore Quality
- Kraken2 taxonomic classification of FASTQ reads
- Flye (de-novo assembly)
- Medaka (assembly polishing)
- metaQUAST (assembly quality reports)
When Illumina reads are provided:
- Assembly polishing with Pilon
Binnning:
- Metabat2
- CheckM
- BUSCO
- GTDB-Tk
**When Illumina reads are provided:**
- Workflow Illumina Quality: https://workflowhub.eu/workflows/336?version=1
- Assembly polishing with Pilon<br>
- Workflow binnning https://workflowhub.eu/workflows/64?version=11
- Metabat2
- CheckM
- BUSCO
- GTDB-Tk
**All tool CWL files and other workflows can be found here:**<br>
Tools: https://git.wur.nl/unlock/cwl/-/tree/master/cwl<br>
Workflows: https://git.wur.nl/unlock/cwl/-/tree/master/cwl/workflows<br>
The dependencies are either accessible from https://unlock-icat.irods.surfsara.nl (anonymous,anonymous)<br>
and/or<br>
By using the conda / pip environments as shown in https://git.wur.nl/unlock/docker/-/blob/master/kubernetes/scripts/setup.sh<br>
outputs:
nanopore_quality_output:
......@@ -73,10 +81,10 @@ inputs:
type: string[]?
doc: List of file paths with Nanopore raw reads in fastq format
label: Nanopore reads
# nanopore_fastq_reads:
# type: File[]?
# doc: File(s) of FASTQ reads in gzip format
# label: Nanopore FASTQ reads
nanopore_fastq_reads:
type: File[]?
doc: File(s) of FASTQ reads in gzip format
label: Nanopore FASTQ reads
illumina_forward_reads:
type: string[]?
doc: illumina sequenced forward read file
......@@ -134,6 +142,11 @@ inputs:
doc: Run with contig binning workflow
default: false
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
#############################################
#### Quality Nanopore
......@@ -500,7 +513,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2021-12-23"
s:dateModified: "2022-04-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -7,14 +7,19 @@ requirements:
MultipleInputFeatureRequirement: {}
ScatterFeatureRequirement: {}
label: Nanopore read quality filter.
label: Nanopore Quality Control and Filtering
doc: |
Workflow for nanopore read quality control and contamination filtering.
Steps:
- FastQC before filtering (read quality control)
- Kraken2 taxonomic read classification
- Minimap2 contamination filter based on given references
- FastQC after filtering (read quality control)
**Workflow for nanopore read quality control and contamination filtering.**
- FastQC before filtering (read quality control)
- Kraken2 taxonomic read classification
- Minimap2 read filtering based on given references
- FastQC after filtering (read quality control)
**All tool CWL files and other workflows can be found here:**<br>
Tools: https://git.wur.nl/unlock/cwl/-/tree/master/cwl<br>
Workflows: https://git.wur.nl/unlock/cwl/-/tree/master/cwl/workflows<br>
WorkflowHub: https://workflowhub.eu/projects/16/workflows?view=default
outputs:
reports_folder:
......@@ -77,6 +82,11 @@ inputs:
doc: Step number for order of steps
default: 1
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
#############################################
#### merging of FASTQ files to only one
......@@ -134,7 +144,7 @@ steps:
run_kraken2: run_kraken2
tmp_id: identifier
identifier:
valueFrom: $(inputs.tmp_id)_nanopore_unfiltered_
valueFrom: $(inputs.tmp_id)_nanopore_unfiltered
threads: threads
nanopore: merge_nanopore_fastq/output
database: kraken_database
......@@ -209,7 +219,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2020-00-00"
s:dateModified: "2022-04-00"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -62,6 +62,11 @@ inputs:
doc: UNLOCK assay metadata file
label: Metadata file
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.
steps:
############################
fastqc:
......@@ -170,6 +175,7 @@ s:author:
s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2021-01-01"
s:dateModified: "2022-05-00"
s:license: https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"
......
......@@ -68,6 +68,10 @@ inputs:
doc: UNLOCK assay metadata file
label: Metadata file
destination:
type: string?
label: Output Destination
doc: Optional Output destination used for cwl-prov reporting.