Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
optical-mapping
bnx-info
Commits
ef1027e7
Commit
ef1027e7
authored
Mar 31, 2015
by
Haarst, Jan van
Browse files
First import of script to extract scansize information from a BNX file.
parents
Changes
1
Hide whitespace changes
Inline
Side-by-side
scansize.py
0 → 100644
View file @
ef1027e7
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 27 13:27:16 2015
@author: traca001
"""
import
os
import
sys
import
json
import
fileinput
import
pandas
as
pd
import
numpy
as
np
import
matplotlib.pyplot
as
plt
debug
=
''
runsizes
=
dict
()
def
parse_bnx
(
file_path
,
size_dict
=
{}):
for
line
in
fileinput
.
input
():
if
line
.
startswith
(
'# Run '
):
if
debug
:
print
line
.
strip
()
line
=
line
.
split
(
'
\t
'
)
# The SourceFolder of this Run is the 2nd entry of the line
if
debug
:
SourceFolder
=
line
[
1
]
print
SourceFolder
# The RunId of this Run is the last entry of the line
RunId
=
int
(
line
[
-
1
])
if
debug
:
print
RunId
runsizes
[
RunId
]
=
dict
()
# The NumberofScans of this Run is the 8th entry of the line
NumberofScans
=
int
(
line
[
7
])
for
Scan
in
range
(
1
,
NumberofScans
+
1
):
runsizes
[
RunId
][
Scan
]
=
0
size_dict
[
Scan
]
=
0
if
debug
:
print
runsizes
if
debug
:
print
size_dict
elif
line
.
startswith
(
'0
\t
'
):
line
=
line
.
split
(
'
\t
'
)
if
debug
:
print
line
# Length is the 3rd entry
Length
=
float
(
line
[
2
])
# ScanNumber is the 8th entry
ScanNumber
=
int
(
line
[
7
])
# RunId is the second to last entry
RunId
=
int
(
line
[
-
2
])
if
debug
:
print
ScanNumber
,
Length
runsizes
[
RunId
][
ScanNumber
]
+=
Length
size_dict
[
ScanNumber
]
+=
Length
if
debug
:
print
runsizes
if
debug
:
print
size_dict
return
size_dict
def
hist_plot
(
size_dict
,
values
=
[]):
for
n
in
size_dict
:
values
.
append
(
size_dict
[
n
])
plt
.
plot
(
values
)
plt
.
show
()
if
__name__
==
'__main__'
:
parse_bnx
(
sys
.
argv
[
1
])
# print size_dict
print
json
.
dumps
(
runsizes
,
sort_keys
=
True
,
indent
=
4
,
separators
=
(
','
,
': '
))
labels
=
runsizes
.
keys
()
# 1,2,..
# hist_plot(size_dict)
"""
Example data :
# BNX File Version: 1.2
# Label Channels: 1
# Nickase Recognition Site 1:
# Min Molecule Length (Kb): 150
# Label SNR Filter Type: Static
# Min Label SNR: 2.750
# Software Version: 2.1.0.5973
#rh SourceFolder InstrumentSerial Time NanoChannelPixelsPerScan StretchFactor BasesPerPixel NumberofScans ChipId Flowcell LabelSNRFilterType MinMoleculeLength MinLabelSNR RunId
# Run data Z:\Labdata\AlphaUnit_07
\201
4-10\92544_tomato_nuclei_2014-10-02_10_24\Detect Molecules ALPHAUNIT07 10/2/2014 10:24:31 AM 68819821 0.85 490.646636962891 12 20249,11887,8/27/2014,850015130 1 Dynamic 100 4.481689 1
# Run Data Z:\Labdata\AlphaUnit_07
\201
4-10\92544_tomato_nuclei_2014-10-01_10_17\Detect Molecules ALPHAUNIT07 10/1/2014 10:17:14 AM 68819821 0.85 490.646636962891 3 20249,11887,8/27/2014,850015130 1 Dynamic 100 3.669297 2
# Run Data Z:\Labdata\AlphaUnit_07
\201
4-10\92544_tomato_nuclei_2014-10-02_17_40\Detect Molecules ALPHAUNIT07 10/2/2014 5:40:56 PM 68819821 0.85 490.646636962891 30 20249,11887,8/27/2014,850015130 1 Dynamic 100 4.0552 3
# Run Data Z:\Labdata\AlphaUnit_07
\201
4-10\92544_tomato_nuclei_2014-10-07_09_52\Detect Molecules ALPHAUNIT07 10/7/2014 9:52:50 AM 68819821 0.85 490.646636962891 30 20249,11887,8/27/2014,850015138 1 Dynamic 100 4.481689 4
# Run Data Z:\Labdata\AlphaUnit_07
\201
4-10\92544_tomato_nuclei_2014-10-07_15_57\Detect Molecules ALPHAUNIT07 10/7/2014 3:57:36 PM 68819821 0.85 490.646636962891 30 20249,11887,8/27/2014,850015138 1 Dynamic 100 4.481689 5
# Run Data Z:\Labdata\AlphaUnit_07
\201
4-10\92544_tomato_nuclei_2014-10-08_18_51\Detect Molecules ALPHAUNIT07 10/8/2014 6:51:47 PM 68819821 0.85 490.646636962891 30 20249,11897,9/19/2014,850015323 1 Dynamic 100 4.953032 6
# Run Data Z:\Labdata\AlphaUnit_04
\201
4-10\92544_tomato_nuclei_2014-10-09_16_33\Detect Molecules ALPHAUNIT04 10/9/2014 4:33:42 PM 68819821 0.85 490.646636962891 30 20249,11897,9/19/2014,850015349 1 Static 0 0 7
# Run Data foo\swap92571_tomato_nuclei_2014-10-23_17_27
\b
ar B023 10/23/2014 5:27:39 PM 69207771 0.85 560.93212890625 30 20249,11887,8/27/2014,850015110 1 Static 0 0 8
# Run Data foo\swap92571_tomato_nuclei_2014-10-23_18_22
\b
ar B023 10/23/2014 6:22:45 PM 69207771 0.85 560.93212890625 30 20249,11887,8/27/2014,850015110 2 Static 0 0 9
# Quality Score QX01: SNR
# Quality Score QX02: Ave Intensity
#0h LabelChannel MoleculeId Length AvgIntensity SNR NumberofLabels OriginalMoleculeId ScanNumber ScanDirection ChipId Flowcell RunId GlobalScanNumber
#0f int int float float float int int int int string int int int
#1h LabelChannel LabelPositions[N]
#1f int float
#2h LabelChannel LabelPositions[N]
#2h int float
#Qh QualityScoreID QualityScores[N]
#Qf str float
0 3 286801.4 0.177417 42.720 23 3 1 -1 20249,11887,8/27/2014,850015130 1 1 1
1 12681.6 34673.8 41179.9 44849.6 53791.5 67825.2 69610.3 77348.8 82893.8 88369.7 106001.2 112227.1 133172.5 141298.1 161661.7 169396.6 180201.0 212370.5 221516.8 245870.2 248775.1 276038.5 284017.0 286801.4
QX11 37.3425 33.6353 10.9667 29.1389 18.0582 30.1201 19.3122 13.6342 25.4127 27.4380 40.7450 22.8699 29.0980 11.4947 57.1931 8.3590 64.5878 23.0364 52.9536 18.7634 58.5257 22.7799 31.2551
QX12 0.0841 0.0816 0.0458 0.0708 0.0555 0.1042 0.0725 0.0519 0.0638 0.0674 0.1016 0.0647 0.0727 0.0408 0.1263 0.0393 0.1502 0.0704 0.1481 0.0764 0.1518 0.0577 0.0916
0 4 209794.0 0.117181 20.013 13 4 1 -1 20249,11887,8/27/2014,850015130 1 1 1
1 11480.0 13296.0 17692.7 91294.8 100287.0 102576.2 124496.7 142540.4 143437.5 173266.5 178092.0 201619.3 206410.0 209794.0
QX11 8.3894 32.0272 16.9435 70.4016 42.7193 4.9234 29.2025 16.6782 25.7460 16.2417 24.1204 42.1043 8.1092
QX12 0.0467 0.1062 0.0561 0.1684 0.1221 0.0425 0.0632 0.0575 0.0709 0.0515 0.0655 0.1128 0.0391
"""
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment