workflow_nanopore_assembly.cwl 16.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
#!/usr/bin/env cwltool
cwlVersion: v1.2
class: Workflow
requirements:
  StepInputExpressionRequirement: {}
  InlineJavascriptRequirement: {}
  MultipleInputFeatureRequirement: {}
  SubworkflowFeatureRequirement: {}
  ScatterFeatureRequirement: {}

11
label: Nanopore assembly workflow
12
doc: |
13
    Workflow for sequencing with ONT nanopore, from basecalled reads to (meta)assembly and binning 
14
    Steps:
15
      - Read filtering
16
17
18
      - Kraken2 (taxonomic classification of FASTQ reads)
      - Flye (de novo assembly)
      - Medaka (assembly polishing)
19
      - metaQUAST (assembly quality reports)
20
21
22
23
24
25
26
      
      When Illumina reads are provided:
      - Assembly polishing with Pilon
      - Metabat2 binning
      - CheckM
      - BUSCO
      - GTDB-Tk
27
28

outputs:
29
  nanopore_quality_output:
30
31
    label: Read quality and filtering reports
    doc: Quality reports
32
    type: Directory
Jasper Koehorst's avatar
Jasper Koehorst committed
33
    outputSource: workflow_quality_nanopore/reports_to_folder
34
35
36
37
38
  illumina_quality_stats:
    label: Filtered statistics
    doc: Statistics on quality and preprocessing of the reads
    type: Directory
    outputSource: workflow_quality_illumina/reports_to_folder
39
40
41
42
43
  kraken2_output:
    label: Kraken2 reports
    doc: Kraken2 taxonomic classification reports
    type: Directory
    outputSource: kraken2_files_to_folder/results
44
45
46
  assembly_output:
    label: Assembly output
    doc: Output from different assembly steps
47
    type: Directory
48
    outputSource: assembly_files_to_folder/results
49

Nijsse, Bart's avatar
Nijsse, Bart committed
50
51
52
53
54
  binning_output:
    label: Binning output
    doc: Binning outputfolders
    type: Directory
    outputSource: binning_files_to_folder/results
55
56

inputs:
Jasper Koehorst's avatar
Jasper Koehorst committed
57
  # General
58
59
60
  identifier:
    type: string
    doc: Identifier for this dataset used in this workflow
61
    label: Identifier used
62
63
  threads:
    type: int?
64
65
    doc: Number of threads to use for computational processes
    label: Number of threads
Jasper Koehorst's avatar
Jasper Koehorst committed
66
67
68
69
70
71
  memory:
    type: int?
    doc: Maximum memory usage in megabytes
    label: Maximum memory in MB
    default: 40000
  nanopore_fastq_files:
Jasper Koehorst's avatar
Jasper Koehorst committed
72
    type: string[]?
73
74
    doc: List of file paths with Nanopore raw reads in fastq format
    label: Nanopore reads
Jasper Koehorst's avatar
Jasper Koehorst committed
75
76
77
78
79
  # nanopore_fastq_reads:
  #   type: File[]?
  #   doc: File(s) of FASTQ reads in gzip format
  #   label: Nanopore FASTQ reads
  illumina_forward_reads:
Nijsse, Bart's avatar
Nijsse, Bart committed
80
    type: string[]?
Jasper Koehorst's avatar
Jasper Koehorst committed
81
82
83
    doc: illumina sequenced forward read file
    label: illumina forward reads
  illumina_reverse_reads:
Nijsse, Bart's avatar
Nijsse, Bart committed
84
    type: string[]?
Jasper Koehorst's avatar
Jasper Koehorst committed
85
86
    doc: illumina sequenced reverse file
    label: illumina reverse reads
87
  use_reference_mapped_reads:
88
89
    type: boolean
    doc: Continue with reads mapped to the given reference
90
    label: Use mapped reads
91
92
93
94
95
96
97
    default: false
  deduplicate:
    type: boolean?
    doc: Remove exact duplicate reads (Illumina) with fastp 
    label: Deduplicate reads
    default: false

Jasper Koehorst's avatar
Jasper Koehorst committed
98
  kraken_database:
99
    type: string
100
101
    doc: Absolute path with database location of kraken2
    label: Kraken2 database
Jasper Koehorst's avatar
Jasper Koehorst committed
102
    default: "/unlock/references/databases/Kraken2/K2_PlusPF_20210517"
Jasper Koehorst's avatar
Jasper Koehorst committed
103
  # Medaka
104
105
  basecall_model:
    type: string
106
107
    doc: Basecalling model used with Guppy
    label: Basecalling model
108
109
110
111
  # bam_workers:
    # type: int
    # doc: number of workers for bam
    # label: number of workers
Jasper Koehorst's avatar
Jasper Koehorst committed
112
  
113
114
115
  # Flye
  metagenome:
    type: boolean?
Nijsse, Bart's avatar
Nijsse, Bart committed
116
    default: true
117
    doc: Metagenome option for the flye assembly
118
    label: When working with metagenomes
Jasper Koehorst's avatar
Jasper Koehorst committed
119
  # Filtering
120
  filter_references:
Jasper Koehorst's avatar
Jasper Koehorst committed
121
    type: string[]
122
123
    doc: Reference fasta file(s) for contamination filtering
    label: Contamination reference file(s)
124
125
126
127
128
  pilon_fixlist:
    type: string
    label: Pilon fix list
    doc: A comma-separated list of categories of issues to try to fix
    default: "snps,gaps,local"
129

Nijsse, Bart's avatar
Nijsse, Bart committed
130
131
132
133
  binning:
    type: boolean?
    label: Run binning workflow
    doc: Run with contig binning workflow
Jasper Koehorst's avatar
Jasper Koehorst committed
134
    default: false
Nijsse, Bart's avatar
Nijsse, Bart committed
135

136
steps:
Jasper Koehorst's avatar
Jasper Koehorst committed
137
#############################################
138
#### Quality Nanopore
Jasper Koehorst's avatar
Jasper Koehorst committed
139
  workflow_quality_nanopore:
140
141
    label: Nanopore quality and filtering
    doc: Quality and filtering workflow for nanopore reads
142
    run: workflow_nanopore_quality.cwl
143
    in:
Jasper Koehorst's avatar
Jasper Koehorst committed
144
      reads: nanopore_fastq_files
145
      filter_references: filter_references
146
      keep_reference_mapped_reads: use_reference_mapped_reads
Jasper Koehorst's avatar
Jasper Koehorst committed
147
148
149
150
151
152
      threads: threads
      identifier: identifier
      step: 
        default: 1
    out: [filtered_reads, reports_to_folder]
#############################################
153
#### Quality Illumina
Jasper Koehorst's avatar
Jasper Koehorst committed
154
  workflow_quality_illumina:
155
156
    label: Illumina quality and filtering workflow
    doc: Quality and filtering workflow for illumina reads
157
    when: $(inputs.binning)
Jasper Koehorst's avatar
Jasper Koehorst committed
158
159
    run: workflow_illumina_quality.cwl
    in:
160
      binning: binning
Jasper Koehorst's avatar
Jasper Koehorst committed
161
162
      forward_reads: illumina_forward_reads
      reverse_reads: illumina_reverse_reads
163
164
      filter_references: filter_references
      deduplicate: deduplicate
165
      keep_reference_mapped_reads: use_reference_mapped_reads
Jasper Koehorst's avatar
Jasper Koehorst committed
166
167
      memory: memory
      threads: threads
168
169
      identifier: identifier
      step: 
170
        default: 2
171
    out: [QC_reverse_reads, QC_forward_reads, reports_to_folder]
172
#############################################
173
174
#### Taxonomic classification of with Kraken2
  nanopore_kraken2:
175
176
177
178
    label: Taxonomic classification with Kraken2
    doc: Taxonomic classification of FASTQ reads
    run: ../kraken2/kraken2.cwl
    in:
179
180
181
      tmp_id: identifier
      identifier: 
        valueFrom: $(inputs.tmp_id)_filtered_nanopore
182
      threads: threads
Jasper Koehorst's avatar
Jasper Koehorst committed
183
      nanopore: workflow_quality_nanopore/filtered_reads
Jasper Koehorst's avatar
Jasper Koehorst committed
184
      database: kraken_database
185
    out: [standard_report, sample_report]
186
187

  illumina_kraken2:
188
189
    label: Taxonomic classification with Kraken2
    doc: Taxonomic classification of FASTQ reads
190
    when: $(inputs.binning)
191
    run: ../kraken2/kraken2.cwl
192
193
194
195
196
197
198
199
200
    in:
      binning: binning
      tmp_id: identifier
      identifier: 
        valueFrom: $(inputs.tmp_id)_filtered_illumina
      threads: threads
      database: kraken_database
      forward_reads: workflow_quality_illumina/QC_forward_reads
      reverse_reads: workflow_quality_illumina/QC_forward_reads
201
202
203
204
      paired_end: 
        default: true
    out: [standard_report, sample_report]

205
  kraken2_compress:
206
    run: ../bash/pigz.cwl
207
208
    label: Compress kraken2
    doc: Compress large kraken2 report file 
209
    scatter: inputfile
210
    in:
211
      inputfile: [nanopore_kraken2/standard_report, illumina_kraken2/standard_report]
212
213
      threads: threads
    out: [outfile]
214

215
  kraken2_krona:
216
217
    label: Krona Kraken2
    doc: Visualization of kraken2 with Krona
218
    run: ../krona/krona.cwl
219
    scatter: kraken
220
    in:
221
      kraken: [nanopore_kraken2/sample_report, illumina_kraken2/sample_report]
222
    out: [krona_html]
223

Jasper Koehorst's avatar
Jasper Koehorst committed
224
#############################################
225
#### De novo assembly with Flye
226
  flye:
227
228
    label: Nanopore Flye assembly
    doc: De novo assembly of single-molecule reads with Flye
229
230
    run: ../flye/flye.cwl
    in:
Jasper Koehorst's avatar
Jasper Koehorst committed
231
      nano_raw: workflow_quality_nanopore/filtered_reads
232
      threads: threads
233
      metagenome: metagenome
234
    out: [00_assembly, 10_consensus, 20_repeat, 30_contigger, 40_polishing, assembly, assembly_info, flye_log, params]
Jasper Koehorst's avatar
Jasper Koehorst committed
235
#############################################
236
#### Polishing of assembled genome with Medaka
237
  medaka:
238
    label: Medaka polishing of assembly
239
    doc: Medaka for polishing of assembled genome
240
    run: ../medaka/medaka_py.cwl
241
242
    in:
      threads: threads
243
      draft_assembly: flye/assembly
Jasper Koehorst's avatar
Jasper Koehorst committed
244
      reads: workflow_quality_nanopore/filtered_reads
245
      basecall_model: basecall_model
246
    out: [polished_assembly, gaps_in_draft_coords] # probs, calls_to_draft
Jasper Koehorst's avatar
Jasper Koehorst committed
247
#############################################
248
#### Assembly evaluation with QUAST
249
  metaquast_medaka:
250
    label: assembly evaluation
251
252
    doc: evaluation of polished assembly with metaQUAST
    run: ../metaquast/metaquast.cwl
253
    in:
254
      assembly: medaka/polished_assembly
255
    out: [metaquast_outdir, meta_combined_ref, meta_icarusDir, metaquast_krona, not_aligned, meta_downloaded_ref, runs_per_reference, meta_summary, meta_icarus, metaquast_log, metaquast_report, basicStats, quast_icarusDir, quast_icarusHtml, quastReport, quastLog, transposedReport]
Nijsse, Bart's avatar
Nijsse, Bart committed
256

Nijsse, Bart's avatar
Nijsse, Bart committed
257
#############################################
258
#### Workflow Pilon assembly polishing
259
  workflow_pilon:
260
    label: Pilon
261
    doc: Illumina reads assembly polishing with Pilon
262
    when: $(inputs.binning)
263
    run: workflow_pilon_mapping.cwl
264
265
266
    in:
      binning: binning
      identifier: identifier
267
      assembly: medaka/polished_assembly
268
269
      illumina_forward_reads: workflow_quality_illumina/QC_forward_reads
      illumina_reverse_reads: workflow_quality_illumina/QC_reverse_reads
270
271
272
      fixlist: pilon_fixlist
      threads: threads
      memory: memory
273
    out: [pilon_polished_assembly, vcf, log]
274
275
276

#############################################
#### Assembly evaluation with QUAST
277
  metaquast_nanopore_pilon:
278
    label: Illumina assembly evaluation
279
    doc: Illumina evaluation of pilon polished assembly with metaQUAST
Jasper Koehorst's avatar
Jasper Koehorst committed
280
    when: $(inputs.binning)
281
    run: ../metaquast/metaquast.cwl
Nijsse, Bart's avatar
Nijsse, Bart committed
282
    in:
283
      binning: binning
284
      assembly: workflow_pilon/pilon_polished_assembly
285
286
287
288
289
    out: [metaquast_outdir, meta_combined_ref, meta_icarusDir, metaquast_krona, not_aligned, meta_downloaded_ref, runs_per_reference, meta_summary, meta_icarus, metaquast_log, metaquast_report, basicStats, quast_icarusDir, quast_icarusHtml, quastReport, quastLog, transposedReport]

#############################################
#### BBmap read mapping (illumina reads) for binning
  illumina_pilon_readmapping:
290
291
    label: Read mapping
    doc: Illumina read mapping on pilon assembly for binning
292
293
    when: $(inputs.binning)
    run: ../bbmap/bbmap.cwl
Jasper Koehorst's avatar
Jasper Koehorst committed
294
    in:
295
296
      binning: binning
      identifier: identifier
297
      reference: workflow_pilon/pilon_polished_assembly
Jasper Koehorst's avatar
Jasper Koehorst committed
298
299
      forward_reads: workflow_quality_illumina/QC_forward_reads
      reverse_reads: workflow_quality_illumina/QC_reverse_reads
300
      threads: threads
301
      memory: memory
302
    out: [sam, stats, covstats, log]
303
304
305
306
307
308
#############################################
#### Convert sam file to sorted bam
  illumina_pilon_sam_to_sorted_bam:
    label: sam conversion to sorted bam
    doc: Sam file conversion to a sorted bam file
    when: $(inputs.binning)
Jasper Koehorst's avatar
Jasper Koehorst committed
309
310
    run: ../samtools/sam_to_sorted-bam.cwl
    in:
311
      binning: binning
312
313
      identifier: identifier
      sam: illumina_pilon_readmapping/sam
Jasper Koehorst's avatar
Jasper Koehorst committed
314
      threads: threads
315
316
    out: [sortedbam]

Nijsse, Bart's avatar
Nijsse, Bart committed
317
318
319
320
321
322
323
324
325
326
#############################################
#### Binning workflow
  workflow_binning:
    label: Binning workflow
    doc: Binning workflow to create bins
    when: $(inputs.binning)
    run: workflow_metagenomics_binning.cwl
    in:
      binning: binning
      identifier: identifier
327
      assembly: workflow_pilon/pilon_polished_assembly
328
      bam_file: illumina_pilon_sam_to_sorted_bam/sortedbam
Nijsse, Bart's avatar
Nijsse, Bart committed
329
330
331
      threads: threads
      step: 
        default: 1
332
    out: [metabat2_output,checkm_output,gtdbtk_output,busco_output]
Nijsse, Bart's avatar
Nijsse, Bart committed
333

334
335
336
337
338
339
340
341
#############################################    
#### Move to folder if not part of a workflow
  kraken2_files_to_folder:
    doc: Preparation of Kraken2 output files to a specific output folder
    label: Kraken2 output folder
    run: ../expressions/files_to_folder.cwl
    in:
      files:
342
        source: [kraken2_compress/outfile, kraken2_krona/krona_html, nanopore_kraken2/sample_report, illumina_kraken2/sample_report]
343
344
        linkMerge: merge_flattened
      destination:
345
        valueFrom: $("2_Kraken2_classification")
346
347
    out:
      [results]
348

349
350
351
352
353
354
355
356
#############################################
#### Move to folder if not part of a workflow
  flye_files_to_folder:
    doc: Preparation of Flye output files to a specific output folder
    label: Flye output folder
    run: ../expressions/files_to_folder.cwl
    in:
      files:
357
        source: [flye/assembly, flye/assembly_info, flye/flye_log, flye/params]
358
        linkMerge: merge_flattened
Jasper Koehorst's avatar
Jasper Koehorst committed
359
360
361
      # folders:
        # source: [workflow_flye/00_assembly, workflow_flye/10_consensus, workflow_flye/20_repeat, workflow_flye/30_contigger, workflow_flye/40_polishing]
        # linkMerge: merge_flattened
362
      destination:
363
        valueFrom: $("1_Fly_Assembly")
364
365
    out:
      [results]
366
367
368
369
370
371
372
373
374

#############################################
#### Move to folder if not part of a workflow
  metaquast_medaka_files_to_folder:
    doc: Preparation of metaQUAST output files to a specific output folder
    label: Nanopore metaQUAST output folder
    run: ../expressions/files_to_folder.cwl
    in:
      files: 
375
        source: [metaquast_medaka/metaquast_report, metaquast_medaka/quastReport]
376
377
378
        linkMerge: merge_flattened
        pickValue: all_non_null
      folders:
379
        source: [metaquast_medaka/metaquast_krona, metaquast_medaka/not_aligned, metaquast_medaka/runs_per_reference]
380
381
382
383
384
385
386
        linkMerge: merge_flattened
        pickValue: all_non_null
      destination:
        valueFrom: $("QUAST_Nanopore_assembly_quality")
    out:
      [results]

387
388
389
390
391
392
393
394
#############################################
#### Move to folder if not part of a workflow
  medaka_files_to_folder:
    doc: Preparation of Medaka output files to a specific output folder
    label: Medaka output folder
    run: ../expressions/files_to_folder.cwl
    in:
      files:
395
        source: [medaka/polished_assembly, medaka/gaps_in_draft_coords] # , workflow_medaka/probs, workflow_medaka/calls_to_draft
396
        linkMerge: merge_flattened
397
398
399
400
401
        pickValue: all_non_null
      folders:
        source: [metaquast_medaka_files_to_folder/results]
        linkMerge: merge_flattened
        pickValue: all_non_null
402
      destination:
403
        valueFrom: $("2_Nanopore_assembly_polishing")
404
405
    out:
      [results]
406

407
408
#############################################
#### Move to folder if not part of a workflow
409
410
411
412
  metaquast_pilon_files_to_folder:
    doc: Preparation of QUAST output files to a specific output folder
    label: Illumina metaQUAST output folder
    when: $(inputs.binning)
413
414
    run: ../expressions/files_to_folder.cwl
    in:
415
      binning: binning
416
      files: 
417
        source: [metaquast_nanopore_pilon/metaquast_report, metaquast_nanopore_pilon/quastReport]
418
419
420
        linkMerge: merge_flattened
        pickValue: all_non_null
      folders:
421
        source: [metaquast_nanopore_pilon/metaquast_krona, metaquast_nanopore_pilon/not_aligned]
422
423
424
        linkMerge: merge_flattened
        pickValue: all_non_null
      destination:
425
        valueFrom: $("QUAST_Illumina_polished_assembly_quality")
426
427
428
429
430
431
432
433
    out:
      [results]

#############################################
#### Move to folder if not part of a workflow
  pilon_files_to_folder:
    doc: Preparation of pilon output files to a specific output folder
    label: Pilon output folder
434
    when: $(inputs.binning)
435
436
    run: ../expressions/files_to_folder.cwl
    in:
437
      binning: binning
438
      files: 
439
        source: [workflow_pilon/vcf, workflow_pilon/pilon_polished_assembly, workflow_pilon/log]
440
        linkMerge: merge_flattened
441
442
443
444
445
        pickValue: all_non_null
      folders:
        source: [metaquast_pilon_files_to_folder/results]
        linkMerge: merge_flattened
        pickValue: all_non_null
446
      destination:
447
        valueFrom: $("3_Illumina_polished_assembly")
448
449
    out:
      [results]
450
  
451
452
#############################################
#### Move to folder if not part of a workflow
453
454
455
  assembly_files_to_folder:
    doc: Preparation of Flye output files to a specific output folder
    label: Flye output folder
456
457
458
    run: ../expressions/files_to_folder.cwl
    in:
      folders:
459
        source: [flye_files_to_folder/results, medaka_files_to_folder/results, pilon_files_to_folder/results]
460
        linkMerge: merge_flattened
Jasper Koehorst's avatar
Jasper Koehorst committed
461
        pickValue: all_non_null
462
      destination:
463
        valueFrom: $("3_Assembly")
464
465
466
    out:
      [results]

Nijsse, Bart's avatar
Nijsse, Bart committed
467
468
469
470
471
472
473
474
475
#### Move to folder if not part of a workflow
  binning_files_to_folder:
    doc: Preparation of quast output files to a specific output folder
    label: Binning output folder
    when: $(inputs.binning)
    run: ../expressions/files_to_folder.cwl
    in:
      binning: binning
      folders:
476
        source: [workflow_binning/metabat2_output, workflow_binning/checkm_output, workflow_binning/gtdbtk_output, workflow_binning/busco_output]
Nijsse, Bart's avatar
Nijsse, Bart committed
477
        linkMerge: merge_flattened
478
        pickValue: all_non_null
Nijsse, Bart's avatar
Nijsse, Bart committed
479
      destination:
480
        valueFrom: $("4_Binning")
Nijsse, Bart's avatar
Nijsse, Bart committed
481
482
483
484
    out:
      [results]
#############################################

485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
s:author:
  - class: s:Person
    s:identifier: https://orcid.org/0000-0002-5516-8391
    s:email: mailto:german.royvalgarcia@wur.nl
    s:name: Germán Royval
  - class: s:Person
    s:identifier: https://orcid.org/0000-0001-8172-8981
    s:email: mailto:jasper.koehorst@wur.nl
    s:name: Jasper Koehorst
  - class: s:Person
    s:identifier: https://orcid.org/0000-0001-9524-5964
    s:email: mailto:bart.nijsse@wur.nl
    s:name: Bart Nijsse

s:citation: https://m-unlock.nl
s:codeRepository: https://gitlab.com/m-unlock/cwl
s:dateCreated: "2021-12-23"
Nijsse, Bart's avatar
text    
Nijsse, Bart committed
502
s:dateModified: "2022-04-00"
503
504
505
506
s:license: https://spdx.org/licenses/Apache-2.0 
s:copyrightHolder: "UNLOCK - Unlocking Microbial Potential"

$namespaces:
Jasper Koehorst's avatar
Jasper Koehorst committed
507
  s: https://schema.org/