Commit def67dda authored by Aflitos, Saulo Alves's avatar Aflitos, Saulo Alves
Browse files

fasta merge script

parent d228c226
......@@ -3,12 +3,14 @@
*.sql
*.log
*.sqlite
*~
pypy
tosql
win/
data/
data2/
data3/
data4/
introgression_viewer.tgz
introgression_viewer.xz
static/FileSaver.js/demo/
......
......@@ -4,8 +4,8 @@ import os
import sys
import csv
import re
import unicodedata
from unidecode import unidecode
#import unicodedata
#from unidecode import unidecode
from filemanager import checkfile, openfile
......
......@@ -4,8 +4,8 @@ import os
import sys
import csv
import re
import unicodedata
from unidecode import unidecode
#import unicodedata
#from unidecode import unidecode
"""
EX1=vcfmerger/csv_list_multicolumn.py
......@@ -65,7 +65,7 @@ def get_translation(intbl, tbl_k, tbl_vs):
#vs = [ unidecode(v) for v in vs ]
v = "_".join(vs)
k = sanitize(k, ' -.,:()=#&;')
#k = sanitize(k, ' -.,:()=#&;')
v = sanitize(v, ' -.,:()=#&;')
assert k not in data, "key %s found more than once" % ( k )
......@@ -77,6 +77,17 @@ def get_translation(intbl, tbl_k, tbl_vs):
return data, atad
def sanitize(s, k, v="_"):
for r in k:
s = s.replace(r, v)
s = re.sub(v+'+', v, s)
s = s.strip(v)
s = s.decode('utf8').encode('ascii', 'backslashreplace')#, 'xmlcharrefreplace')
return s
def main():
try:
inlst = sys.argv[1]
......@@ -121,15 +132,6 @@ def main():
writer.writerow(cols)
def sanitize(s, k, v="_"):
for r in k:
s = s.replace(r, v)
s = re.sub(v+'+', v, s)
s = s.strip(v)
s = s.decode('utf8').encode('ascii', 'xmlcharrefreplace')
return s
if __name__ == '__main__':
main()
LST=introgress_moneymaker.lst
#ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py {} pimp_problems.lst; ./newick_to_png.py {} cherry.lst;'
ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py {} '$LST';'
#ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py --infile {} --inlist pimp_problems.lst; ./newick_to_png.py {} cherry.lst;'
ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py --infile {} --inlist '$LST';'
#./pngfolder_to_html.py trees/*_pimp_problems.lst.png
#convert -page A4 -resample 1200 -quality 100 -density 1200 -compress Zip *_pimp_problems.lst.png*.png index_trees_short2.lst.vcf.gz.simplified.vcf.gz.filtered.vcf.gz.SL2.40ch06.0000_.vcf.gz.SL2.40ch06.fasta.tree_pimp_problems.lst.png.pdf
......
LST=introgress_moneymaker.lst
#ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py {} pimp_problems.lst; ./newick_to_png.py {} cherry.lst;'
ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py {} '$LST';'
#ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py --input {} --inlist pimp_problems.lst; ./newick_to_png.py --input {} cherry.lst;'
ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py --input {} --inlist '$LST';'
#./pngfolder_to_html.py trees/*_pimp_problems.lst.png
#convert -page A4 -resample 1200 -quality 100 -density 1200 -compress Zip *_pimp_problems.lst.png*.png index_trees_short2.lst.vcf.gz.simplified.vcf.gz.filtered.vcf.gz.SL2.40ch06.0000_.vcf.gz.SL2.40ch06.fasta.tree_pimp_problems.lst.png.pdf
......
#!/usr/bin/python
import sys
import os
from collections import OrderedDict
def main():
outfile = sys.argv[1]
if os.path.exists(outfile):
print "output file %s exists. quitting" % outfile
sys.exit(1)
else:
print "output file %s" % outfile
infiles = sys.argv[2:]
for infile in infiles:
if not os.path.exists(infile):
print "input file %s does not exists" % infile
sys.exit(1)
data = OrderedDict()
for infile in infiles:
print "reading %s" % infile
with open(infile, 'r') as fhd:
for line in fhd:
line = line.strip()
if len(line) == 0:
continue
if line[0] == ">":
name = line
print "reading %s seq %s" % (infile, name),
if name not in data:
data[name] = ""
print " *"
else:
print
else:
data[name] += line
with open(outfile, 'w') as fhd:
for name in data:
line = data[name]
print "saving %s (%d)" % (name, len(line))
fhd.write(name + "\n")
for seq in split_by_n(line, 80):
fhd.write(seq + "\n")
def split_by_n( seq, n ):
"""
A generator to divide a sequence into chunks of n units.
http://stackoverflow.com/questions/9475241/split-python-string-every-nth-character
"""
while seq:
yield seq[:n]
seq = seq[n:]
if __name__ == '__main__':
main()
......@@ -154,18 +154,18 @@ def listChromsGff(ingff):
def main(args):
parser = argparse.ArgumentParser(description='Create makefile to convert files.')
parser.add_argument( '-i' , '--input', '--inlist' , dest='inlist' , default=None , nargs='?', type=str , help='input tab separated file')
parser.add_argument( '-f' , '--fasta', '--infasta' , dest='infasta' , default=None , nargs='?', type=str , help='input reference fasta. requires split size')
parser.add_argument( '-s' , '--size' , dest='size' , default=0 , nargs='?', type=int , help='split size')
parser.add_argument( '-p' , '--proj' , '--project' , dest='project' , default=None , nargs='?', type=str , help='project name')
parser.add_argument( '-o' , '--out' , '--outfile' , dest='outfile' , default='Makefile', nargs='?', type=str , help='output name [default: makefile]')
parser.add_argument( '-i' , '--input' , '--inlist' , dest='inlist' , default=None , nargs='?', type=str , help='input tab separated file')
parser.add_argument( '-f' , '--fasta' , '--infasta' , dest='infasta' , default=None , nargs='?', type=str , help='input reference fasta. requires split size')
parser.add_argument( '-s' , '--size' , dest='size' , default=0 , nargs='?', type=int , help='split size')
parser.add_argument( '-p' , '--proj' , '--project' , dest='project' , default=None , nargs='?', type=str , help='project name')
parser.add_argument( '-o' , '--out' , '--outfile' , dest='outfile' , default='Makefile', nargs='?', type=str , help='output name [default: makefile]')
parser.add_argument( '-ec' , '--excluded-chrom' , dest='excluded_chroms' , default=[] , action='append' , type=str , help='Do not use the following chromosomes' )
parser.add_argument( '-ic' , '--included-chrom' , dest='included_chroms' , default=[] , action='append' , type=str , help='Use EXCLUSIVELY these chromosomes' )
#parser.add_argument( '-g' , '--gff' , '--ingff' , dest='ingff' , default=None , nargs='?', type=str , help='input gff file')
parser.add_argument( '-n' , '--dry' , '--dry-run' , dest='dry' , default=False , action='store_true' , help='dry-run')
parser.add_argument( '-m' , '--merge', '--cluster_merge' , dest='merge' , default=False , action='store_true' , help='do merged clustering (resource intensive) [default: no]')
parser.add_argument( '-np' , '--no-pickle', dest='dopickle' , default=True , action='store_false', help='do not generate pickle database [default: no]')
parser.add_argument( '-n' , '--dry' , '--dry-run' , dest='dry' , default=False , action='store_true' , help='dry-run')
parser.add_argument( '-m' , '--merge' , '--cluster_merge' , dest='merge' , default=False , action='store_true' , help='do merged clustering (resource intensive) [default: no]')
parser.add_argument( '-np' , '--no-pickle' , dest='dopickle' , default=True , action='store_false', help='do not generate pickle database [default: no]')
parser.add_argument( '-t' , '--sub_threads' , dest='sub_threads' , default=5 , nargs='?', type=int , help='threads of submake to tree building [default: 5]')
parser.add_argument( '-St' , '--smart_threads' , dest='smart_threads' , default=None , nargs='?', type=int , help='threads of submake to tree building [default: 5]')
......@@ -175,12 +175,13 @@ def main(args):
parser.add_argument( '-SS' , '--simplify-include-singleton' , dest='simplify_do_singleton_filter', default=True , action='store_false', help='Do not simplify single SNPS')
parser.add_argument( '-So' , '--simplify-output' , dest='simplify_output' , default=None , nargs='?', type=str , help='Simplify output file')
parser.add_argument( '-Coc', '--concat-chrom', '--concat-chromosome' , dest='concat_chromosome' , default=None , nargs='?', action='store' , type=str , help='Concat - Chromosome to filter [all]')
parser.add_argument( '-Coc', '--concat-chrom' , '--concat-chromosome' , dest='concat_chromosome' , default=None , nargs='?', action='store' , type=str , help='Concat - Chromosome to filter [all]')
parser.add_argument( '-CoI', '--concat-ignore', '--concat-skip' , dest='concat_ignore' , default=[] , nargs='*', action='append' , type=str , help='Concat - Chromosomes to skip')
parser.add_argument( '-Cos', '--concat-start' , dest='concat_start' , default=None , nargs='?', action='store' , type=int , help='Concat - Chromosome start position to filter [0]')
parser.add_argument( '-Coe', '--concat-end' , dest='concat_end' , default=None , nargs='?', action='store' , type=int , help='Concat - Chromosome end position to filter [-1]')
parser.add_argument( '-Cot', '--concat-threads' , dest='concat_threads' , default=None , nargs='?', action='store' , type=int , help='Concat - Number of threads [num chromosomes]')
parser.add_argument( '-Cor', '--concat-noref' , dest='concat_noref' , action='store_false', help='Concat - Do not print reference [default: true]')
parser.add_argument( '-Con', '--concat-ref-name' , dest='concat_refname' , default=None , nargs='?', action='store' , type=str , help='Concat - Reference name [default: ref]')
parser.add_argument( '-CoR', '--concat-RIL' , dest='concat_RIL' , action='store_true' , help='Concat - RIL mode: false]')
parser.add_argument( '-CoRm','--concat-RIL-mads' , dest='concat_RILmads' , default=None , nargs='?', action='store' , type=float, help='Concat - RIL percentage of Median Absolute Deviation to use (smaller = more restrictive): 0.25]')
parser.add_argument( '-CoRs','--concat-RIL-minsim' , dest='concat_RILminsim' , default=None , nargs='?', action='store' , type=float, help='Concat - RIL percentage of nucleotides identical to reference to classify as reference: 0.75]')
......@@ -249,6 +250,7 @@ def main(args):
concat_end = options.concat_end
concat_threads = options.concat_threads
concat_noref = options.concat_noref
concat_refname = options.concat_refname
concat_RIL = options.concat_RIL
concat_RILmads = options.concat_RILmads
concat_RILminsim = options.concat_RILminsim
......@@ -851,6 +853,9 @@ cleanpickle_%(dirFix)s: cleanok
if not concat_noref:
concat_opts += " --noref"
if concat_refname:
concat_opts += " --ref-name %s" % concat_refname
if concat_RIL:
concat_opts += " --RIL"
......@@ -907,13 +912,13 @@ tree: $(OUTTREE)
png: $(OUTPNG)
%%.vcf.gz.fasta.tree.png: %%.vcf.gz.fasta.tree
\t%(topng)s $^
\t%(topng)s --infile $^
.PHONY: fasta
fasta: $(OUTFASTA)
%%.vcf.gz.fasta: %%.vcf.gz
\t%(concat)s %(concat_opts)s --fasta -i $^
\t%(concat)s %(concat_opts)s -i $^
\tif [ -f "$@" ]; then rm $@; fi
\tln `readlink -f $^.*.fasta` $@
......
......@@ -2,6 +2,8 @@
import sys
import os
import argparse
from ete2 import Tree
try:
print "importing image"
......@@ -19,7 +21,7 @@ except ImportError:
import math
import tempfile
#ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py {} pimp_problems.lst; ./newick_to_png.py {} cherry.lst;'
#ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py --input {} --inlist pimp_problems.lst; ./newick_to_png.py {} cherry.lst;'
print_ascii = False
......@@ -40,11 +42,49 @@ def makeColorTransparent(image, color, thresh2=0):
t=thresh2, d=distance2, c=color, r=red, g=green, b=blue, a=alpha))
return image
def main(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=True, addcaption=True, extension="png", dpi=1200, fontsize=14):
add_file(infile, inlist=inlist, capt=capt, ofp=ofp, output=output, ladderize=ladderize, addcaption=addcaption, extension=extension, dpi=dpi, fontsize=fontsize)
def main():
parser = argparse.ArgumentParser(description='Convert Newick file to PNG.')
parser.add_argument('--infile' , dest='infile' , default=None , action='store' , nargs='?', required=True, type=str , help='Input Newick file' )
parser.add_argument('--inlist' , dest='inlist' , default=None , action='store' , nargs='?', type=str , help='Input rename list' )
parser.add_argument('--caption' , dest='caption' , default=None , action='store' , nargs='?', type=str , help='Image caption' )
parser.add_argument('--prefix' , dest='prefix' , default=None , action='store' , nargs='?', type=str , help='File prefix' )
parser.add_argument('--output' , dest='output' , default=None , action='store' , nargs='?', type=str , help='Output name' )
parser.add_argument('--extension' , dest='extension' , default="png" , action='store' , nargs='?', type=str , help='Image extension' )
parser.add_argument('--dpi' , dest='dpi' , default=1200 , action='store' , nargs='?', type=int , help='Image DPI' )
parser.add_argument('--fontsize' , dest='fontsize' , default=14 , action='store' , nargs='?', type=int , help='Font size' )
parser.add_argument('--no_ladderize' , dest='ladderize' , action='store_false', help="Don't ladderize image" )
parser.add_argument('--no_addcaption', dest='addcaption' , action='store_false', help='Do not add caption to image')
parser.add_argument('--show_distance', dest='show_distance', action='store_true' , help='Plot with distance')
options = parser.parse_args()
print options
if options.infile is None:
print "No input file given"
parser.print_help()
sys.exit(1)
run(options.infile,
inlist = options.inlist ,
capt = options.caption ,
ofp = options.prefix ,
output = options.output ,
ladderize = options.ladderize ,
addcaption = options.addcaption ,
extension = options.extension ,
dpi = options.dpi ,
show_distance = options.show_distance,
fontsize = options.fontsize)
def add_file(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=True, addcaption=True, extension="png", dpi=1200, fontsize=14):
def run(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=True, addcaption=True, extension="png", dpi=1200, fontsize=14, show_distance=False):
add_file(infile, inlist=inlist, capt=capt, ofp=ofp, output=output, ladderize=ladderize, addcaption=addcaption, extension=extension, dpi=dpi, fontsize=fontsize, show_distance=show_distance)
def add_file(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=True, addcaption=True, extension="png", dpi=1200, fontsize=14, show_distance=False):
if not os.path.exists( infile ):
print "input file %s does not exists" % infile
sys.exit( 1 )
......@@ -62,7 +102,12 @@ def add_file(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=Tr
if ofp:
outfile = ofp + "." + extension
tree = Tree(infile, format=9)
if show_distance:
tree = Tree(infile, format=0)
else:
#tree = Tree(infile, format=2)
#tree = Tree(infile, format=5)
tree = Tree(infile, format=9)
#tree = Tree(open(infile, 'r').read())
......@@ -85,8 +130,12 @@ def add_file(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=Tr
if ofp:
outfile = ofp + "_" + inlist + "." + extension
if output:
outfile = output
elif ladderize:
tree.ladderize()
if output:
outfile = output
makeimage(infile, outfile, caption, tree, addcaption=addcaption, dpi=dpi, fontsize=fontsize)
......@@ -101,14 +150,17 @@ def add_seq(inseq, inlist=None, capt=None, ladderize=True, addcaption=False, ext
with open(fnm, 'w') as fhi:
fhi.write(inseq)
ofn = add_file(fnm, inlist=inlist, capt=capt, ladderize=ladderize, addcaption=addcaption, extension=extension, dpi=dpi, fontsize=fontsize)
ofn = add_file(fnm, inlist=inlist, capt=capt, ladderize=ladderize, addcaption=addcaption, extension=extension, dpi=dpi, fontsize=fontsize)
data = None
print "opening png", ofn
if os.path.exists( ofn ):
with open(ofn, 'rb') as fho:
data = fho.read()
os.remove(ofn)
else:
print "tree image %s does not exists" % ofn
......@@ -119,7 +171,9 @@ def add_seq(inseq, inlist=None, capt=None, ladderize=True, addcaption=False, ext
def prune(inlist, tree, ladderize=True):
print "pruning", inlist
reqlist = []
with open( inlist, 'r' ) as fhd:
for line in fhd:
line = line.strip()
......@@ -135,7 +189,9 @@ def prune(inlist, tree, ladderize=True):
reqlist.append( line )
print reqlist
tree.prune( reqlist, preserve_branch_length=True )
if ladderize:
tree.ladderize()
......@@ -229,20 +285,4 @@ def makeimage(infile, outfile, caption, tree, addcaption=True, dpi=1200, fontsiz
if __name__ == '__main__':
try:
infile = sys.argv[1]
except:
print "no input file given"
sys.exit( 1 )
try:
inlist = sys.argv[2]
except:
print "no input list given"
inlist = None
main(infile, inlist=inlist)
main()
......@@ -14,8 +14,8 @@ import vcfmerger
import editdist
from treemanager import fixsppname
#GZ=SL2.40ch06g50000_000100001_000150000.vcf.gz.raw.vcf.gz; FA=$GZ.SL2.40ch06.fasta; ../vcfconcat.py -f -RIL -Rg -Rd -i $GZ; ../FastTreeMP -fastest -gamma -nt -bionj -boot 100 -log $FA.log -out $FA.tree $FA; ../FastTreeMP -nt -makematrix $FA > $FA.matrix; ./newick_to_png.py $FA.tree
#FA=SL2.40ch06g50000_000100001_000150000.vcf.gz.SL2.40ch06.fasta; ../FastTreeMP -fastest -gamma -nt -bionj -boot 100 -log $FA.log -out $FA.tree $FA; ../FastTreeMP -nt -makematrix $FA > $FA.matrix; ./newick_to_png.py $FA.tree
#GZ=SL2.40ch06g50000_000100001_000150000.vcf.gz.raw.vcf.gz; FA=$GZ.SL2.40ch06.fasta; ../vcfconcat.py -f -RIL -Rg -Rd -i $GZ; ../FastTreeMP -fastest -gamma -nt -bionj -boot 100 -log $FA.log -out $FA.tree $FA; ../FastTreeMP -nt -makematrix $FA > $FA.matrix; ./newick_to_png.py --infile $FA.tree
#FA=SL2.40ch06g50000_000100001_000150000.vcf.gz.SL2.40ch06.fasta; ../FastTreeMP -fastest -gamma -nt -bionj -boot 100 -log $FA.log -out $FA.tree $FA; ../FastTreeMP -nt -makematrix $FA > $FA.matrix; ./newick_to_png.py --infile $FA.tree
......@@ -30,21 +30,22 @@ def main(args):
parser = argparse.ArgumentParser(description='Concatenate SNPs as a single sequence for each species.')
parser.add_argument('-c', '--chrom', '--chromosome', dest='chromosome' , default=None , action='store' , nargs='?', type=str , help='Chromosome to filter [all]')
parser.add_argument('-I', '--ignore', '--skip' , dest='ignore' , default=[] , action='append' , nargs='*', type=str , help='Chromosomes to skip')
parser.add_argument('-s', '--start' , dest='start' , default=None , action='store' , nargs='?', type=int , help='Chromosome start position to filter [0]')
parser.add_argument('-e', '--end' , dest='end' , default=None , action='store' , nargs='?', type=int , help='Chromosome end position to filter [-1]')
parser.add_argument('-t', '--threads' , dest='threads' , default=0 , action='store' , nargs='?', type=int , help='Number of threads [num chromosomes]')
parser.add_argument('-f', '--fasta' , dest='fasta' , action='store_true' , help='Output in fasta format [default: clustal alignment .aln format]')
parser.add_argument('-r', '--noref' , dest='noref' , action='store_false', help='Do not print reference [default: true]')
parser.add_argument('-R', '--RIL' , dest='RIL' , action='store_true' , help='RIL mode: false]')
parser.add_argument('-Rm','--RIL-mads' , dest='RILmads' , default=0.25 , action='store' , nargs='?', type=float, help='RIL percentage of Median Absolute Deviation to use (smaller = more restrictive): 0.25]')
parser.add_argument('-Rs','--RIL-minsim' , dest='RILminsim' , default=0.75 , action='store' , nargs='?', type=float, help='RIL percentage of nucleotides identical to reference to classify as reference: 0.75]')
parser.add_argument('-Rg','--RIL-greedy' , dest='RILgreedy' , action='store_true' , help='RIL greedy convert nucleotides to either the reference sequence or the alternative sequence: false]')
parser.add_argument('-Rd','--RIL-delete' , dest='RILdelete' , action='store_true' , help='RIL delete invalid sequences: false]')
parser.add_argument('-M' ,'--RIL-method' , dest='groupMethod', default=dflmethod, action='store' , nargs='?', choices=methods.keys(), type=str , help='Clustering method for RIL selection of good and bad sequences [' + ','.join(methods.keys()) + ']')
parser.add_argument('-i', '--input' , dest='input' , default=None , nargs='?', type=str , help='Input file')
parser.add_argument('-c', '--chrom' , '--chromosome', dest='chromosome' , default=None , action='store' , nargs='?', type=str , help='Chromosome to filter [all]')
parser.add_argument('-I', '--ignore', '--skip' , dest='ignore' , default=[] , action='append' , nargs='*', type=str , help='Chromosomes to skip')
parser.add_argument('-s', '--start' , dest='start' , default=None , action='store' , nargs='?', type=int , help='Chromosome start position to filter [0]')
parser.add_argument('-e', '--end' , dest='end' , default=None , action='store' , nargs='?', type=int , help='Chromosome end position to filter [-1]')
parser.add_argument('-t', '--threads' , dest='threads' , default=0 , action='store' , nargs='?', type=int , help='Number of threads [num chromosomes]')
parser.add_argument('-a', '--clustal' , dest='fasta' , action='store_false', help='Output in clustal .aln format [default: fasta format]')
parser.add_argument('-r', '--noref' , dest='noref' , action='store_false', help='Do not print reference [default: true]')
parser.add_argument('-n', '--ref-name' , dest='refname' , default='ref' , action='store' , nargs='?', type=str , help='Reference name [default: ref]')
parser.add_argument('-R', '--RIL' , dest='RIL' , action='store_true' , help='RIL mode: false]')
parser.add_argument('-Rm','--RIL-mads' , dest='RILmads' , default=0.25 , action='store' , nargs='?', type=float, help='RIL percentage of Median Absolute Deviation to use (smaller = more restrictive): 0.25]')
parser.add_argument('-Rs','--RIL-minsim' , dest='RILminsim' , default=0.75 , action='store' , nargs='?', type=float, help='RIL percentage of nucleotides identical to reference to classify as reference: 0.75]')
parser.add_argument('-Rg','--RIL-greedy' , dest='RILgreedy' , action='store_true' , help='RIL greedy convert nucleotides to either the reference sequence or the alternative sequence: false]')
parser.add_argument('-Rd','--RIL-delete' , dest='RILdelete' , action='store_true' , help='RIL delete invalid sequences: false]')
parser.add_argument('-M' ,'--RIL-method' , dest='groupMethod', default=dflmethod, action='store' , nargs='?', choices=methods.keys(), type=str , help='Clustering method for RIL selection of good and bad sequences [' + ','.join(methods.keys()) + ']')
parser.add_argument('-i' , '--input' , dest='input' , default=None , nargs='?', type=str , help='Input file')
#parser.add_argument('input' , default=None , action='store' , nargs='?', metavar='input file', type=str , help='Input file')
options = parser.parse_args(args)
......@@ -55,7 +56,7 @@ def main(args):
parallel = False
config = {
'format' : 'aln',
'format' : 'fasta',
'ignore' : [],
'inchr' : None,
'inend' : None,
......@@ -63,6 +64,7 @@ def main(args):
'infile' : None,
'instart' : None,
'noref' : True,
'refname' : None,
'ouchr' : None,
'oufhd' : None,
'RIL' : False,
......@@ -83,6 +85,7 @@ def main(args):
config['inend' ] = options.end
config['instart' ] = options.start
config['noref' ] = options.noref
config['refname' ] = options.refname
config['threads' ] = options.threads
config['RIL' ] = options.RIL
config['RILmads' ] = options.RILmads
......@@ -110,8 +113,8 @@ def main(args):
sys.exit(1)
if options.fasta:
config['format' ] = 'fasta'
if not options.fasta:
config['format' ] = 'aln'
if ( config['instart'] is not None ) and ( config['inend'] is not None ):
......@@ -465,7 +468,7 @@ def parse(config, refs, chro):
if config['noref' ]:
refsfrag = refsStrs[frag:frag+60]
printfilealn( config, 'ref' , refsfrag, chro )
printfilealn( config, config['refname'], refsfrag, chro )
for spp in sorted( sourcesStrs ):
poses = sourcesStrs[spp]
......@@ -483,7 +486,7 @@ def parse(config, refs, chro):
printfilefasta(config, sppname, poses , chro)
if config['noref' ]:
printfilefasta(config, 'ref' , refsStrs, chro)
printfilefasta(config, config['refname'], refsStrs, chro)
......
......@@ -126,7 +126,7 @@ def main(args):
print "IDX", config['idx']
print "GFF", config['ingffreader'].index
assert set(config['idx'].keys()) <= set(config['ingffreader'].index.keys()), "VCF chromosomes (%s) are not a subset from GFF (%s)" % (", ".join(config['idx'].keys()), ", ".join(config['ingffreader'].index.keys()) )
assert set(config['ingffreader'].index.keys()).issubset( set(config['idx'].keys()) ), "VCF chromosomes (%s) are not a subset from GFF (%s)" % (", ".join(config['idx'].keys()), ", ".join(config['ingffreader'].index.keys()) )
if config['inchr'] is not None:
......
......@@ -309,6 +309,8 @@ class vcfResult(object):
print "register #%d is empty" % register
sys.exit( 1 )
#print register
#if rcount % 100 == 0:
# sys.stderr.write("\n")
# sys.stderr.flush()
......@@ -332,14 +334,14 @@ class vcfResult(object):
if ( vcfResult.excludHET ) and ( desti.find(',') != -1 ):
vcfResult.simpliStats['Heterozygous Dest'] += 1
#print "heretozygous: %s" % desti
print "heterozygous: %s" % desti
self.printprogress('h', key='Heterozygous Dest', skip=vcfResult.print_every)
#return ""
continue
if ( vcfResult.excludHOMO ) and ( set(sourc.split(',')) == set(desti.split(',')) ):
vcfResult.simpliStats['Homozygous'] += 1
#print "heretozygous: %s" % desti
print "homozygous : %s" % desti
self.printprogress('o', key='Homozygous', skip=vcfResult.print_every)
#return ""
continue
......@@ -589,13 +591,13 @@ class vcfFile(object):
descIndex = 9 + self.fileCol - 1
info = cols[8]
desc = cols[descIndex] # 1 based
#print cols
self.register['chrom'] = cols[0]
self.register['pos' ] = int(cols[1])
self.register['src' ] = cols[3]
self.register['dst' ] = cols[4]
self.register['desc' ] = desc
self.register['dst' ] = ",".join(sorted(list(set(self.register['src' ].split(",") + self.register['dst' ].split(",")))))
if ':' in desc and ':' in info and 'GT' in info:
#assert ':' in info, info
......@@ -640,9 +642,12 @@ class vcfFile(object):
#print '.',
continue
if (len(s) == 1) and (gt0 == '0'): # homozygous identical to reference
#print '0',
continue
if (gt0 == '0'): # homozygous identical to reference
if len(s) == 1:
#print '0',
continue
else:
self.register['dst' ] = ",".join(sorted(list(set(self.register['src' ].split(",") + self.register['dst' ].split(",")))))
#print 'v'
#if any([gt == '0' for gt in (gt0, gt1)]): #
......@@ -720,6 +725,7 @@ class vcfHeap(object):
self.noncarefiles = []
self.currResult = vcfResult( simplify=self.simplify, noncarefiles=self.noncarefiles, translation=translation )
self.currResult = None
print self.ctime
def addFile( self, filecare, fileName, filedesc ):
......@@ -991,7 +997,7 @@ def main(incsv, translation_str):
translation = {}
if translation_str is not None:
for pair in translation_str.split(';'):
src, dst = pair.split(',')
src, dst = pair.split(':')
assert src not in translation
translation[ src ] = dst
......
......@@ -3,7 +3,9 @@ import os
import sys
import csv
import datetime
import argparse
from copy import