Commit e0573de3 authored by Tu, Beiyu's avatar Tu, Beiyu
Browse files

Upload New File

parent 38addb36
"""
Scripted by Beiyu
For joinmap pre-work
To handle the grid with only nnxnp markers
"""
def parse(filename):
"""
parse the txt tab file saved from excel
input:
filename: txt tab file, the head line should be the marker names in order, the first column should be individuals
genotype marker1 marker2 ...
individual1 allele1 allele2 ...
individual2 allele3 allele4 ...
... ... ... ...
output:
allele_dict: {individual: [alleles in order]}
markers: [markers]
"""
f = open(filename,"r")
allele_dict = {}
for line in f:
if line.startswith("genotype"):
markers = line.split()[1:]
else:
individual = line.split()[0]
alleles = line.split()[1:]
allele_dict[individual] = alleles
return allele_dict, markers
def nnxnp_table(allele_dict, markers):
"""
convert the grid table to nnxnp table that can be used in Joinmap for CP population type
"""
nnxnp_dict = {}
for individual in allele_dict:
nnxnp_dict[individual] = []
for i in range(len(markers)):
if allele_dict[individual][i] == "?":
nnxnp_dict[individual] += [""]
else:
a,b = allele_dict[individual][i].split(":")
if a == b:
nnxnp_dict[individual] += ["nn"]
else:
nnxnp_dict[individual] += ["np"]
return nnxnp_dict
def axb_table(nnxnp_dict, markers):
"""
convert the grid table to axb table that can be used in Joinmap for DH population type
"""
axb_dict = {}
for individual in nnxnp_dict:
axb_dict[individual] = []
for i in range(len(markers)):
if nnxnp_dict[individual][i] == "nn":
axb_dict[individual] += ["a"]
elif nnxnp_dict[individual][i] == "np":
axb_dict[individual] += ["b"]
elif nnxnp_dict[individual][i] == "":
axb_dict[individual] += [""]
for i in range(1,len(markers)):
count_equal = 0
count_unequal = 0
for individual in axb_dict:
if axb_dict[individual][i] == axb_dict[individual][i-1] and axb_dict[individual][i] != "" and \
axb_dict[individual][i-1] != "":
count_equal += 1
elif axb_dict[individual][i] != axb_dict[individual][i-1] and axb_dict[individual][i] != "" and \
axb_dict[individual][i-1] != "":
count_unequal += 1
if count_equal < count_unequal:
for individual in axb_dict:
if axb_dict[individual][i] == "a":
axb_dict[individual][i] = "b"
elif axb_dict[individual][i] == "b":
axb_dict[individual][i] = "a"
return axb_dict
def correct_markers(allele_dict, markers):
"""
Remove the markers that do not show segregation or show 3 different alleles in the grid.
"""
corrected_allele_dict = {}
corrected_markers = []
for individual in allele_dict:
corrected_allele_dict[individual] = []
for i in range(len(markers)):
alleles = []
for individual in allele_dict:
if allele_dict[individual][i] != "?" and allele_dict[individual][i] not in alleles:
alleles += [allele_dict[individual][i]]
if len(alleles) == 2:
corrected_markers += [markers[i]]
for individual in allele_dict:
corrected_allele_dict[individual] += [allele_dict[individual][i]]
return corrected_allele_dict, corrected_markers
def correct_isolated_cells(allele_dict, axb_dict, markers):
"""
Replace the isolated coloured cells with missing value
"""
for individual in axb_dict:
for i in range(1, len(markers) - 1):
if axb_dict[individual][i] != "" and axb_dict[individual][i-1] != "" and axb_dict[individual][i+1] != "":
if axb_dict[individual][i] != axb_dict[individual][i-1] and \
axb_dict[individual][i] != axb_dict[individual][i+1]:
axb_dict[individual][i] = ""
allele_dict[individual][i] = "?"
return allele_dict, axb_dict
def output(filename):
f = filename.split(".")[0]
f1 = open(f + "_nnxnp.txt", "w")
f2 = open(f + "_axb.txt", "w")
f1.write("\t")
f2.write("\t")
allele_dict, markers = parse(filename)
allele_dict, markers = correct_markers(allele_dict, markers)
for marker in markers:
f1.write(marker + "\t")
f2.write(marker + "\t")
f1.write("\n\t" + "<nnxnp>\t" * len(markers) + "\n\n")
f2.write("\n")
nnxnp_dict = nnxnp_table(allele_dict, markers)
axb_dict = axb_table(nnxnp_dict, markers)
allele_dict, axb_dict = correct_isolated_cells(allele_dict, axb_dict, markers)
nnxnp_dict = nnxnp_table(allele_dict, markers)
for individual in nnxnp_dict:
f1.write("\n" + individual + "\t")
for value in nnxnp_dict[individual]:
f1.write(value + "\t")
for individual in axb_dict:
f2.write("\n" + individual + "\t")
for value in axb_dict[individual]:
f2.write(value + "\t")
def reorder(filename):
"""
reorder the grid by the positions of recombination breakpoints
"""
allele_dict, markers = parse(filename)
allele_dict, markers = correct_markers(allele_dict, markers)
nnxnp_dict = nnxnp_table(allele_dict, markers)
axb_dict = axb_table(nnxnp_dict, markers)
allele_dict, axb_dict = correct_isolated_cells(allele_dict, axb_dict, markers)
index = {}
for individual in axb_dict:
count = 0
recombinant = False
i = 0
while recombinant == False and i < len(markers):
if count >= 0 and axb_dict[individual][i] == "a":
count += 1
i += 1
elif count <= 0 and axb_dict[individual][i] == "b":
count -= 1
i += 1
elif axb_dict[individual][i] == "":
if count == 0:
i += 1
elif count > 0:
count += 1
i +=1
elif count < 0:
count -= 1
i +=1
else:
recombinant = True
index[individual] = count
if recombinant == False and i == len(markers) - 1:
if count > 0:
index[individual] = len(markers)
else:
index[individual] = -len(markers)
i = 0
order = []
while -len(markers) <= i:
for individual in index:
if index[individual] == i:
order += [individual]
i -= 1
i = 1
while len(markers) >= i:
for individual in index:
if index[individual] == i:
order += [individual]
i += 1
f = open(filename.split(".")[0] + "_reordered.txt", "w")
f.write("genotype\t")
for marker in markers:
f.write(marker + "\t")
for individual in order:
f.write("\n" + individual + "\t")
for i in range(len(markers)):
f.write(allele_dict[individual][i]+"\t")
if __name__ == "__main__":
#replace the file names here and run the script
output("filename")
reorder("filename")
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment