Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Overduin, Sam
TaxaSPAdes
Commits
f1323363
Commit
f1323363
authored
Mar 05, 2020
by
Overduin, Sam
Browse files
Upload cut_barcode_from_reads.py
parent
f62a6ce7
Changes
1
Hide whitespace changes
Inline
Side-by-side
scripts/cut_barcode_from_reads.py
0 → 100644
View file @
f1323363
#!/usr/bin/env python3
import
gzip
import
argparse
import
datetime
import
subprocess
#./cut_barcode_from_reads.py -i ../data/barcoded/enriched.fastq.gz -o ../data/barcoded/enriched_trimmed.fastq.gz
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-i'
,
'--infile'
,
required
=
True
)
#parser.add_argument('-p', '--threads', type=int, required=False)
parser
.
add_argument
(
'-o'
,
'--outfile'
,
required
=
True
)
args
=
parser
.
parse_args
()
def
write_output
(
output_file
,
output_lines
):
#compress_file = False
if
output_file
.
endswith
(
'.gz'
):
compress_file
=
True
output_file
=
output_file
[:
-
3
]
with
open
(
output_file
,
'w+'
)
as
f
:
for
line
in
output_lines
:
f
.
write
(
line
)
print
(
'Done writing'
)
if
compress_file
:
print
(
'Starting compression'
)
cmd
=
'pigz -p16 "'
+
output_file
+
'"'
subprocess
.
call
(
cmd
,
shell
=
True
)
print
(
'Done compression'
)
new_fasta
=
[]
print
(
datetime
.
datetime
.
now
(),
'started reading fastq.gz'
)
with
gzip
.
open
(
args
.
infile
,
'rt'
)
as
f
:
i
=
0
n
=
0
corrected_instances
=
0
records
=
0
for
line
in
f
:
n
+=
1
if
n
==
1
:
start_bar
=
line
.
find
(
'BX:Z:'
)
+
5
if
start_bar
==
4
:
new_fasta
.
append
(
line
)
barcode
=
None
else
:
new_fasta
.
append
(
line
)
barcode
=
line
[
start_bar
:
start_bar
+
16
]
#print(line, barcode)
elif
n
==
2
:
if
barcode
:
bar_in_seq
=
line
.
find
(
barcode
)
if
bar_in_seq
!=
-
1
:
cut_pos
=
bar_in_seq
+
16
#print(line, line[cut_pos:], barcode)
new_fasta
.
append
(
line
[
cut_pos
:])
corrected_instances
+=
1
else
:
cut_pos
=
0
new_fasta
.
append
(
line
)
else
:
cut_pos
=
0
new_fasta
.
append
(
line
)
elif
n
==
3
:
new_fasta
.
append
(
line
)
elif
n
==
4
:
new_fasta
.
append
(
line
[
cut_pos
:])
records
+=
1
if
records
%
100000000
==
0
:
print
(
datetime
.
datetime
.
now
(),
records
,
'records parsed'
)
n
=
0
else
:
print
(
'this state should never be reached, check code'
)
print
(
line
)
exit
()
i
+=
1
#if i == 100000:
#break
print
(
datetime
.
datetime
.
now
(),
'Trimmed'
,
corrected_instances
,
'out of'
,
records
,
'records'
)
print
(
datetime
.
datetime
.
now
(),
'Writing file'
)
write_output
(
args
.
outfile
,
new_fasta
)
print
(
datetime
.
datetime
.
now
(),
'Done writing'
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment