Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Tu, Beiyu
Fine mapping of QTLs for potato development
Commits
e0573de3
Commit
e0573de3
authored
Nov 17, 2021
by
Tu, Beiyu
Browse files
Upload New File
parent
38addb36
Changes
1
Hide whitespace changes
Inline
Side-by-side
pre_Joinmap.py
0 → 100644
View file @
e0573de3
"""
Scripted by Beiyu
For joinmap pre-work
To handle the grid with only nnxnp markers
"""
def
parse
(
filename
):
"""
parse the txt tab file saved from excel
input:
filename: txt tab file, the head line should be the marker names in order, the first column should be individuals
genotype marker1 marker2 ...
individual1 allele1 allele2 ...
individual2 allele3 allele4 ...
... ... ... ...
output:
allele_dict: {individual: [alleles in order]}
markers: [markers]
"""
f
=
open
(
filename
,
"r"
)
allele_dict
=
{}
for
line
in
f
:
if
line
.
startswith
(
"genotype"
):
markers
=
line
.
split
()[
1
:]
else
:
individual
=
line
.
split
()[
0
]
alleles
=
line
.
split
()[
1
:]
allele_dict
[
individual
]
=
alleles
return
allele_dict
,
markers
def
nnxnp_table
(
allele_dict
,
markers
):
"""
convert the grid table to nnxnp table that can be used in Joinmap for CP population type
"""
nnxnp_dict
=
{}
for
individual
in
allele_dict
:
nnxnp_dict
[
individual
]
=
[]
for
i
in
range
(
len
(
markers
)):
if
allele_dict
[
individual
][
i
]
==
"?"
:
nnxnp_dict
[
individual
]
+=
[
""
]
else
:
a
,
b
=
allele_dict
[
individual
][
i
].
split
(
":"
)
if
a
==
b
:
nnxnp_dict
[
individual
]
+=
[
"nn"
]
else
:
nnxnp_dict
[
individual
]
+=
[
"np"
]
return
nnxnp_dict
def
axb_table
(
nnxnp_dict
,
markers
):
"""
convert the grid table to axb table that can be used in Joinmap for DH population type
"""
axb_dict
=
{}
for
individual
in
nnxnp_dict
:
axb_dict
[
individual
]
=
[]
for
i
in
range
(
len
(
markers
)):
if
nnxnp_dict
[
individual
][
i
]
==
"nn"
:
axb_dict
[
individual
]
+=
[
"a"
]
elif
nnxnp_dict
[
individual
][
i
]
==
"np"
:
axb_dict
[
individual
]
+=
[
"b"
]
elif
nnxnp_dict
[
individual
][
i
]
==
""
:
axb_dict
[
individual
]
+=
[
""
]
for
i
in
range
(
1
,
len
(
markers
)):
count_equal
=
0
count_unequal
=
0
for
individual
in
axb_dict
:
if
axb_dict
[
individual
][
i
]
==
axb_dict
[
individual
][
i
-
1
]
and
axb_dict
[
individual
][
i
]
!=
""
and
\
axb_dict
[
individual
][
i
-
1
]
!=
""
:
count_equal
+=
1
elif
axb_dict
[
individual
][
i
]
!=
axb_dict
[
individual
][
i
-
1
]
and
axb_dict
[
individual
][
i
]
!=
""
and
\
axb_dict
[
individual
][
i
-
1
]
!=
""
:
count_unequal
+=
1
if
count_equal
<
count_unequal
:
for
individual
in
axb_dict
:
if
axb_dict
[
individual
][
i
]
==
"a"
:
axb_dict
[
individual
][
i
]
=
"b"
elif
axb_dict
[
individual
][
i
]
==
"b"
:
axb_dict
[
individual
][
i
]
=
"a"
return
axb_dict
def
correct_markers
(
allele_dict
,
markers
):
"""
Remove the markers that do not show segregation or show 3 different alleles in the grid.
"""
corrected_allele_dict
=
{}
corrected_markers
=
[]
for
individual
in
allele_dict
:
corrected_allele_dict
[
individual
]
=
[]
for
i
in
range
(
len
(
markers
)):
alleles
=
[]
for
individual
in
allele_dict
:
if
allele_dict
[
individual
][
i
]
!=
"?"
and
allele_dict
[
individual
][
i
]
not
in
alleles
:
alleles
+=
[
allele_dict
[
individual
][
i
]]
if
len
(
alleles
)
==
2
:
corrected_markers
+=
[
markers
[
i
]]
for
individual
in
allele_dict
:
corrected_allele_dict
[
individual
]
+=
[
allele_dict
[
individual
][
i
]]
return
corrected_allele_dict
,
corrected_markers
def
correct_isolated_cells
(
allele_dict
,
axb_dict
,
markers
):
"""
Replace the isolated coloured cells with missing value
"""
for
individual
in
axb_dict
:
for
i
in
range
(
1
,
len
(
markers
)
-
1
):
if
axb_dict
[
individual
][
i
]
!=
""
and
axb_dict
[
individual
][
i
-
1
]
!=
""
and
axb_dict
[
individual
][
i
+
1
]
!=
""
:
if
axb_dict
[
individual
][
i
]
!=
axb_dict
[
individual
][
i
-
1
]
and
\
axb_dict
[
individual
][
i
]
!=
axb_dict
[
individual
][
i
+
1
]:
axb_dict
[
individual
][
i
]
=
""
allele_dict
[
individual
][
i
]
=
"?"
return
allele_dict
,
axb_dict
def
output
(
filename
):
f
=
filename
.
split
(
"."
)[
0
]
f1
=
open
(
f
+
"_nnxnp.txt"
,
"w"
)
f2
=
open
(
f
+
"_axb.txt"
,
"w"
)
f1
.
write
(
"
\t
"
)
f2
.
write
(
"
\t
"
)
allele_dict
,
markers
=
parse
(
filename
)
allele_dict
,
markers
=
correct_markers
(
allele_dict
,
markers
)
for
marker
in
markers
:
f1
.
write
(
marker
+
"
\t
"
)
f2
.
write
(
marker
+
"
\t
"
)
f1
.
write
(
"
\n\t
"
+
"<nnxnp>
\t
"
*
len
(
markers
)
+
"
\n\n
"
)
f2
.
write
(
"
\n
"
)
nnxnp_dict
=
nnxnp_table
(
allele_dict
,
markers
)
axb_dict
=
axb_table
(
nnxnp_dict
,
markers
)
allele_dict
,
axb_dict
=
correct_isolated_cells
(
allele_dict
,
axb_dict
,
markers
)
nnxnp_dict
=
nnxnp_table
(
allele_dict
,
markers
)
for
individual
in
nnxnp_dict
:
f1
.
write
(
"
\n
"
+
individual
+
"
\t
"
)
for
value
in
nnxnp_dict
[
individual
]:
f1
.
write
(
value
+
"
\t
"
)
for
individual
in
axb_dict
:
f2
.
write
(
"
\n
"
+
individual
+
"
\t
"
)
for
value
in
axb_dict
[
individual
]:
f2
.
write
(
value
+
"
\t
"
)
def
reorder
(
filename
):
"""
reorder the grid by the positions of recombination breakpoints
"""
allele_dict
,
markers
=
parse
(
filename
)
allele_dict
,
markers
=
correct_markers
(
allele_dict
,
markers
)
nnxnp_dict
=
nnxnp_table
(
allele_dict
,
markers
)
axb_dict
=
axb_table
(
nnxnp_dict
,
markers
)
allele_dict
,
axb_dict
=
correct_isolated_cells
(
allele_dict
,
axb_dict
,
markers
)
index
=
{}
for
individual
in
axb_dict
:
count
=
0
recombinant
=
False
i
=
0
while
recombinant
==
False
and
i
<
len
(
markers
):
if
count
>=
0
and
axb_dict
[
individual
][
i
]
==
"a"
:
count
+=
1
i
+=
1
elif
count
<=
0
and
axb_dict
[
individual
][
i
]
==
"b"
:
count
-=
1
i
+=
1
elif
axb_dict
[
individual
][
i
]
==
""
:
if
count
==
0
:
i
+=
1
elif
count
>
0
:
count
+=
1
i
+=
1
elif
count
<
0
:
count
-=
1
i
+=
1
else
:
recombinant
=
True
index
[
individual
]
=
count
if
recombinant
==
False
and
i
==
len
(
markers
)
-
1
:
if
count
>
0
:
index
[
individual
]
=
len
(
markers
)
else
:
index
[
individual
]
=
-
len
(
markers
)
i
=
0
order
=
[]
while
-
len
(
markers
)
<=
i
:
for
individual
in
index
:
if
index
[
individual
]
==
i
:
order
+=
[
individual
]
i
-=
1
i
=
1
while
len
(
markers
)
>=
i
:
for
individual
in
index
:
if
index
[
individual
]
==
i
:
order
+=
[
individual
]
i
+=
1
f
=
open
(
filename
.
split
(
"."
)[
0
]
+
"_reordered.txt"
,
"w"
)
f
.
write
(
"genotype
\t
"
)
for
marker
in
markers
:
f
.
write
(
marker
+
"
\t
"
)
for
individual
in
order
:
f
.
write
(
"
\n
"
+
individual
+
"
\t
"
)
for
i
in
range
(
len
(
markers
)):
f
.
write
(
allele_dict
[
individual
][
i
]
+
"
\t
"
)
if
__name__
==
"__main__"
:
#replace the file names here and run the script
output
(
"filename"
)
reorder
(
"filename"
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment