Commit def67dda authored by Aflitos, Saulo Alves's avatar Aflitos, Saulo Alves
Browse files

fasta merge script

parent d228c226
...@@ -3,12 +3,14 @@ ...@@ -3,12 +3,14 @@
*.sql *.sql
*.log *.log
*.sqlite *.sqlite
*~
pypy pypy
tosql tosql
win/ win/
data/ data/
data2/ data2/
data3/ data3/
data4/
introgression_viewer.tgz introgression_viewer.tgz
introgression_viewer.xz introgression_viewer.xz
static/FileSaver.js/demo/ static/FileSaver.js/demo/
......
...@@ -4,8 +4,8 @@ import os ...@@ -4,8 +4,8 @@ import os
import sys import sys
import csv import csv
import re import re
import unicodedata #import unicodedata
from unidecode import unidecode #from unidecode import unidecode
from filemanager import checkfile, openfile from filemanager import checkfile, openfile
......
...@@ -4,8 +4,8 @@ import os ...@@ -4,8 +4,8 @@ import os
import sys import sys
import csv import csv
import re import re
import unicodedata #import unicodedata
from unidecode import unidecode #from unidecode import unidecode
""" """
EX1=vcfmerger/csv_list_multicolumn.py EX1=vcfmerger/csv_list_multicolumn.py
...@@ -65,7 +65,7 @@ def get_translation(intbl, tbl_k, tbl_vs): ...@@ -65,7 +65,7 @@ def get_translation(intbl, tbl_k, tbl_vs):
#vs = [ unidecode(v) for v in vs ] #vs = [ unidecode(v) for v in vs ]
v = "_".join(vs) v = "_".join(vs)
k = sanitize(k, ' -.,:()=#&;') #k = sanitize(k, ' -.,:()=#&;')
v = sanitize(v, ' -.,:()=#&;') v = sanitize(v, ' -.,:()=#&;')
assert k not in data, "key %s found more than once" % ( k ) assert k not in data, "key %s found more than once" % ( k )
...@@ -77,6 +77,17 @@ def get_translation(intbl, tbl_k, tbl_vs): ...@@ -77,6 +77,17 @@ def get_translation(intbl, tbl_k, tbl_vs):
return data, atad return data, atad
def sanitize(s, k, v="_"):
for r in k:
s = s.replace(r, v)
s = re.sub(v+'+', v, s)
s = s.strip(v)
s = s.decode('utf8').encode('ascii', 'backslashreplace')#, 'xmlcharrefreplace')
return s
def main(): def main():
try: try:
inlst = sys.argv[1] inlst = sys.argv[1]
...@@ -121,15 +132,6 @@ def main(): ...@@ -121,15 +132,6 @@ def main():
writer.writerow(cols) writer.writerow(cols)
def sanitize(s, k, v="_"):
for r in k:
s = s.replace(r, v)
s = re.sub(v+'+', v, s)
s = s.strip(v)
s = s.decode('utf8').encode('ascii', 'xmlcharrefreplace')
return s
if __name__ == '__main__': if __name__ == '__main__':
main() main()
LST=introgress_moneymaker.lst LST=introgress_moneymaker.lst
#ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py {} pimp_problems.lst; ./newick_to_png.py {} cherry.lst;' #ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py --infile {} --inlist pimp_problems.lst; ./newick_to_png.py {} cherry.lst;'
ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py {} '$LST';' ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py --infile {} --inlist '$LST';'
#./pngfolder_to_html.py trees/*_pimp_problems.lst.png #./pngfolder_to_html.py trees/*_pimp_problems.lst.png
#convert -page A4 -resample 1200 -quality 100 -density 1200 -compress Zip *_pimp_problems.lst.png*.png index_trees_short2.lst.vcf.gz.simplified.vcf.gz.filtered.vcf.gz.SL2.40ch06.0000_.vcf.gz.SL2.40ch06.fasta.tree_pimp_problems.lst.png.pdf #convert -page A4 -resample 1200 -quality 100 -density 1200 -compress Zip *_pimp_problems.lst.png*.png index_trees_short2.lst.vcf.gz.simplified.vcf.gz.filtered.vcf.gz.SL2.40ch06.0000_.vcf.gz.SL2.40ch06.fasta.tree_pimp_problems.lst.png.pdf
......
LST=introgress_moneymaker.lst LST=introgress_moneymaker.lst
#ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py {} pimp_problems.lst; ./newick_to_png.py {} cherry.lst;' #ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py --input {} --inlist pimp_problems.lst; ./newick_to_png.py --input {} cherry.lst;'
ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py {} '$LST';' ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py --input {} --inlist '$LST';'
#./pngfolder_to_html.py trees/*_pimp_problems.lst.png #./pngfolder_to_html.py trees/*_pimp_problems.lst.png
#convert -page A4 -resample 1200 -quality 100 -density 1200 -compress Zip *_pimp_problems.lst.png*.png index_trees_short2.lst.vcf.gz.simplified.vcf.gz.filtered.vcf.gz.SL2.40ch06.0000_.vcf.gz.SL2.40ch06.fasta.tree_pimp_problems.lst.png.pdf #convert -page A4 -resample 1200 -quality 100 -density 1200 -compress Zip *_pimp_problems.lst.png*.png index_trees_short2.lst.vcf.gz.simplified.vcf.gz.filtered.vcf.gz.SL2.40ch06.0000_.vcf.gz.SL2.40ch06.fasta.tree_pimp_problems.lst.png.pdf
......
#!/usr/bin/python
import sys
import os
from collections import OrderedDict
def main():
outfile = sys.argv[1]
if os.path.exists(outfile):
print "output file %s exists. quitting" % outfile
sys.exit(1)
else:
print "output file %s" % outfile
infiles = sys.argv[2:]
for infile in infiles:
if not os.path.exists(infile):
print "input file %s does not exists" % infile
sys.exit(1)
data = OrderedDict()
for infile in infiles:
print "reading %s" % infile
with open(infile, 'r') as fhd:
for line in fhd:
line = line.strip()
if len(line) == 0:
continue
if line[0] == ">":
name = line
print "reading %s seq %s" % (infile, name),
if name not in data:
data[name] = ""
print " *"
else:
print
else:
data[name] += line
with open(outfile, 'w') as fhd:
for name in data:
line = data[name]
print "saving %s (%d)" % (name, len(line))
fhd.write(name + "\n")
for seq in split_by_n(line, 80):
fhd.write(seq + "\n")
def split_by_n( seq, n ):
"""
A generator to divide a sequence into chunks of n units.
http://stackoverflow.com/questions/9475241/split-python-string-every-nth-character
"""
while seq:
yield seq[:n]
seq = seq[n:]
if __name__ == '__main__':
main()
...@@ -154,18 +154,18 @@ def listChromsGff(ingff): ...@@ -154,18 +154,18 @@ def listChromsGff(ingff):
def main(args): def main(args):
parser = argparse.ArgumentParser(description='Create makefile to convert files.') parser = argparse.ArgumentParser(description='Create makefile to convert files.')
parser.add_argument( '-i' , '--input', '--inlist' , dest='inlist' , default=None , nargs='?', type=str , help='input tab separated file') parser.add_argument( '-i' , '--input' , '--inlist' , dest='inlist' , default=None , nargs='?', type=str , help='input tab separated file')
parser.add_argument( '-f' , '--fasta', '--infasta' , dest='infasta' , default=None , nargs='?', type=str , help='input reference fasta. requires split size') parser.add_argument( '-f' , '--fasta' , '--infasta' , dest='infasta' , default=None , nargs='?', type=str , help='input reference fasta. requires split size')
parser.add_argument( '-s' , '--size' , dest='size' , default=0 , nargs='?', type=int , help='split size') parser.add_argument( '-s' , '--size' , dest='size' , default=0 , nargs='?', type=int , help='split size')
parser.add_argument( '-p' , '--proj' , '--project' , dest='project' , default=None , nargs='?', type=str , help='project name') parser.add_argument( '-p' , '--proj' , '--project' , dest='project' , default=None , nargs='?', type=str , help='project name')
parser.add_argument( '-o' , '--out' , '--outfile' , dest='outfile' , default='Makefile', nargs='?', type=str , help='output name [default: makefile]') parser.add_argument( '-o' , '--out' , '--outfile' , dest='outfile' , default='Makefile', nargs='?', type=str , help='output name [default: makefile]')
parser.add_argument( '-ec' , '--excluded-chrom' , dest='excluded_chroms' , default=[] , action='append' , type=str , help='Do not use the following chromosomes' ) parser.add_argument( '-ec' , '--excluded-chrom' , dest='excluded_chroms' , default=[] , action='append' , type=str , help='Do not use the following chromosomes' )
parser.add_argument( '-ic' , '--included-chrom' , dest='included_chroms' , default=[] , action='append' , type=str , help='Use EXCLUSIVELY these chromosomes' ) parser.add_argument( '-ic' , '--included-chrom' , dest='included_chroms' , default=[] , action='append' , type=str , help='Use EXCLUSIVELY these chromosomes' )
#parser.add_argument( '-g' , '--gff' , '--ingff' , dest='ingff' , default=None , nargs='?', type=str , help='input gff file') #parser.add_argument( '-g' , '--gff' , '--ingff' , dest='ingff' , default=None , nargs='?', type=str , help='input gff file')
parser.add_argument( '-n' , '--dry' , '--dry-run' , dest='dry' , default=False , action='store_true' , help='dry-run') parser.add_argument( '-n' , '--dry' , '--dry-run' , dest='dry' , default=False , action='store_true' , help='dry-run')
parser.add_argument( '-m' , '--merge', '--cluster_merge' , dest='merge' , default=False , action='store_true' , help='do merged clustering (resource intensive) [default: no]') parser.add_argument( '-m' , '--merge' , '--cluster_merge' , dest='merge' , default=False , action='store_true' , help='do merged clustering (resource intensive) [default: no]')
parser.add_argument( '-np' , '--no-pickle', dest='dopickle' , default=True , action='store_false', help='do not generate pickle database [default: no]') parser.add_argument( '-np' , '--no-pickle' , dest='dopickle' , default=True , action='store_false', help='do not generate pickle database [default: no]')
parser.add_argument( '-t' , '--sub_threads' , dest='sub_threads' , default=5 , nargs='?', type=int , help='threads of submake to tree building [default: 5]') parser.add_argument( '-t' , '--sub_threads' , dest='sub_threads' , default=5 , nargs='?', type=int , help='threads of submake to tree building [default: 5]')
parser.add_argument( '-St' , '--smart_threads' , dest='smart_threads' , default=None , nargs='?', type=int , help='threads of submake to tree building [default: 5]') parser.add_argument( '-St' , '--smart_threads' , dest='smart_threads' , default=None , nargs='?', type=int , help='threads of submake to tree building [default: 5]')
...@@ -175,12 +175,13 @@ def main(args): ...@@ -175,12 +175,13 @@ def main(args):
parser.add_argument( '-SS' , '--simplify-include-singleton' , dest='simplify_do_singleton_filter', default=True , action='store_false', help='Do not simplify single SNPS') parser.add_argument( '-SS' , '--simplify-include-singleton' , dest='simplify_do_singleton_filter', default=True , action='store_false', help='Do not simplify single SNPS')
parser.add_argument( '-So' , '--simplify-output' , dest='simplify_output' , default=None , nargs='?', type=str , help='Simplify output file') parser.add_argument( '-So' , '--simplify-output' , dest='simplify_output' , default=None , nargs='?', type=str , help='Simplify output file')
parser.add_argument( '-Coc', '--concat-chrom', '--concat-chromosome' , dest='concat_chromosome' , default=None , nargs='?', action='store' , type=str , help='Concat - Chromosome to filter [all]') parser.add_argument( '-Coc', '--concat-chrom' , '--concat-chromosome' , dest='concat_chromosome' , default=None , nargs='?', action='store' , type=str , help='Concat - Chromosome to filter [all]')
parser.add_argument( '-CoI', '--concat-ignore', '--concat-skip' , dest='concat_ignore' , default=[] , nargs='*', action='append' , type=str , help='Concat - Chromosomes to skip') parser.add_argument( '-CoI', '--concat-ignore', '--concat-skip' , dest='concat_ignore' , default=[] , nargs='*', action='append' , type=str , help='Concat - Chromosomes to skip')
parser.add_argument( '-Cos', '--concat-start' , dest='concat_start' , default=None , nargs='?', action='store' , type=int , help='Concat - Chromosome start position to filter [0]') parser.add_argument( '-Cos', '--concat-start' , dest='concat_start' , default=None , nargs='?', action='store' , type=int , help='Concat - Chromosome start position to filter [0]')
parser.add_argument( '-Coe', '--concat-end' , dest='concat_end' , default=None , nargs='?', action='store' , type=int , help='Concat - Chromosome end position to filter [-1]') parser.add_argument( '-Coe', '--concat-end' , dest='concat_end' , default=None , nargs='?', action='store' , type=int , help='Concat - Chromosome end position to filter [-1]')
parser.add_argument( '-Cot', '--concat-threads' , dest='concat_threads' , default=None , nargs='?', action='store' , type=int , help='Concat - Number of threads [num chromosomes]') parser.add_argument( '-Cot', '--concat-threads' , dest='concat_threads' , default=None , nargs='?', action='store' , type=int , help='Concat - Number of threads [num chromosomes]')
parser.add_argument( '-Cor', '--concat-noref' , dest='concat_noref' , action='store_false', help='Concat - Do not print reference [default: true]') parser.add_argument( '-Cor', '--concat-noref' , dest='concat_noref' , action='store_false', help='Concat - Do not print reference [default: true]')
parser.add_argument( '-Con', '--concat-ref-name' , dest='concat_refname' , default=None , nargs='?', action='store' , type=str , help='Concat - Reference name [default: ref]')
parser.add_argument( '-CoR', '--concat-RIL' , dest='concat_RIL' , action='store_true' , help='Concat - RIL mode: false]') parser.add_argument( '-CoR', '--concat-RIL' , dest='concat_RIL' , action='store_true' , help='Concat - RIL mode: false]')
parser.add_argument( '-CoRm','--concat-RIL-mads' , dest='concat_RILmads' , default=None , nargs='?', action='store' , type=float, help='Concat - RIL percentage of Median Absolute Deviation to use (smaller = more restrictive): 0.25]') parser.add_argument( '-CoRm','--concat-RIL-mads' , dest='concat_RILmads' , default=None , nargs='?', action='store' , type=float, help='Concat - RIL percentage of Median Absolute Deviation to use (smaller = more restrictive): 0.25]')
parser.add_argument( '-CoRs','--concat-RIL-minsim' , dest='concat_RILminsim' , default=None , nargs='?', action='store' , type=float, help='Concat - RIL percentage of nucleotides identical to reference to classify as reference: 0.75]') parser.add_argument( '-CoRs','--concat-RIL-minsim' , dest='concat_RILminsim' , default=None , nargs='?', action='store' , type=float, help='Concat - RIL percentage of nucleotides identical to reference to classify as reference: 0.75]')
...@@ -249,6 +250,7 @@ def main(args): ...@@ -249,6 +250,7 @@ def main(args):
concat_end = options.concat_end concat_end = options.concat_end
concat_threads = options.concat_threads concat_threads = options.concat_threads
concat_noref = options.concat_noref concat_noref = options.concat_noref
concat_refname = options.concat_refname
concat_RIL = options.concat_RIL concat_RIL = options.concat_RIL
concat_RILmads = options.concat_RILmads concat_RILmads = options.concat_RILmads
concat_RILminsim = options.concat_RILminsim concat_RILminsim = options.concat_RILminsim
...@@ -851,6 +853,9 @@ cleanpickle_%(dirFix)s: cleanok ...@@ -851,6 +853,9 @@ cleanpickle_%(dirFix)s: cleanok
if not concat_noref: if not concat_noref:
concat_opts += " --noref" concat_opts += " --noref"
if concat_refname:
concat_opts += " --ref-name %s" % concat_refname
if concat_RIL: if concat_RIL:
concat_opts += " --RIL" concat_opts += " --RIL"
...@@ -907,13 +912,13 @@ tree: $(OUTTREE) ...@@ -907,13 +912,13 @@ tree: $(OUTTREE)
png: $(OUTPNG) png: $(OUTPNG)
%%.vcf.gz.fasta.tree.png: %%.vcf.gz.fasta.tree %%.vcf.gz.fasta.tree.png: %%.vcf.gz.fasta.tree
\t%(topng)s $^ \t%(topng)s --infile $^
.PHONY: fasta .PHONY: fasta
fasta: $(OUTFASTA) fasta: $(OUTFASTA)
%%.vcf.gz.fasta: %%.vcf.gz %%.vcf.gz.fasta: %%.vcf.gz
\t%(concat)s %(concat_opts)s --fasta -i $^ \t%(concat)s %(concat_opts)s -i $^
\tif [ -f "$@" ]; then rm $@; fi \tif [ -f "$@" ]; then rm $@; fi
\tln `readlink -f $^.*.fasta` $@ \tln `readlink -f $^.*.fasta` $@
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
import sys import sys
import os import os
import argparse
from ete2 import Tree from ete2 import Tree
try: try:
print "importing image" print "importing image"
...@@ -19,7 +21,7 @@ except ImportError: ...@@ -19,7 +21,7 @@ except ImportError:
import math import math
import tempfile import tempfile
#ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py {} pimp_problems.lst; ./newick_to_png.py {} cherry.lst;' #ls trees/*.tree | xargs -I{} -P 20 bash -c 'echo {}; ./newick_to_png.py --input {} --inlist pimp_problems.lst; ./newick_to_png.py {} cherry.lst;'
print_ascii = False print_ascii = False
...@@ -40,11 +42,49 @@ def makeColorTransparent(image, color, thresh2=0): ...@@ -40,11 +42,49 @@ def makeColorTransparent(image, color, thresh2=0):
t=thresh2, d=distance2, c=color, r=red, g=green, b=blue, a=alpha)) t=thresh2, d=distance2, c=color, r=red, g=green, b=blue, a=alpha))
return image return image
def main(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=True, addcaption=True, extension="png", dpi=1200, fontsize=14): def main():
add_file(infile, inlist=inlist, capt=capt, ofp=ofp, output=output, ladderize=ladderize, addcaption=addcaption, extension=extension, dpi=dpi, fontsize=fontsize) parser = argparse.ArgumentParser(description='Convert Newick file to PNG.')
parser.add_argument('--infile' , dest='infile' , default=None , action='store' , nargs='?', required=True, type=str , help='Input Newick file' )
parser.add_argument('--inlist' , dest='inlist' , default=None , action='store' , nargs='?', type=str , help='Input rename list' )
parser.add_argument('--caption' , dest='caption' , default=None , action='store' , nargs='?', type=str , help='Image caption' )
parser.add_argument('--prefix' , dest='prefix' , default=None , action='store' , nargs='?', type=str , help='File prefix' )
parser.add_argument('--output' , dest='output' , default=None , action='store' , nargs='?', type=str , help='Output name' )
parser.add_argument('--extension' , dest='extension' , default="png" , action='store' , nargs='?', type=str , help='Image extension' )
parser.add_argument('--dpi' , dest='dpi' , default=1200 , action='store' , nargs='?', type=int , help='Image DPI' )
parser.add_argument('--fontsize' , dest='fontsize' , default=14 , action='store' , nargs='?', type=int , help='Font size' )
parser.add_argument('--no_ladderize' , dest='ladderize' , action='store_false', help="Don't ladderize image" )
parser.add_argument('--no_addcaption', dest='addcaption' , action='store_false', help='Do not add caption to image')
parser.add_argument('--show_distance', dest='show_distance', action='store_true' , help='Plot with distance')
options = parser.parse_args()
print options
if options.infile is None:
print "No input file given"
parser.print_help()
sys.exit(1)
run(options.infile,
inlist = options.inlist ,
capt = options.caption ,
ofp = options.prefix ,
output = options.output ,
ladderize = options.ladderize ,
addcaption = options.addcaption ,
extension = options.extension ,
dpi = options.dpi ,
show_distance = options.show_distance,
fontsize = options.fontsize)
def add_file(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=True, addcaption=True, extension="png", dpi=1200, fontsize=14): def run(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=True, addcaption=True, extension="png", dpi=1200, fontsize=14, show_distance=False):
add_file(infile, inlist=inlist, capt=capt, ofp=ofp, output=output, ladderize=ladderize, addcaption=addcaption, extension=extension, dpi=dpi, fontsize=fontsize, show_distance=show_distance)
def add_file(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=True, addcaption=True, extension="png", dpi=1200, fontsize=14, show_distance=False):
if not os.path.exists( infile ): if not os.path.exists( infile ):
print "input file %s does not exists" % infile print "input file %s does not exists" % infile
sys.exit( 1 ) sys.exit( 1 )
...@@ -62,7 +102,12 @@ def add_file(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=Tr ...@@ -62,7 +102,12 @@ def add_file(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=Tr
if ofp: if ofp:
outfile = ofp + "." + extension outfile = ofp + "." + extension
tree = Tree(infile, format=9) if show_distance:
tree = Tree(infile, format=0)
else:
#tree = Tree(infile, format=2)
#tree = Tree(infile, format=5)
tree = Tree(infile, format=9)
#tree = Tree(open(infile, 'r').read()) #tree = Tree(open(infile, 'r').read())
...@@ -85,8 +130,12 @@ def add_file(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=Tr ...@@ -85,8 +130,12 @@ def add_file(infile, inlist=None, capt=None, ofp=None, output=None, ladderize=Tr
if ofp: if ofp:
outfile = ofp + "_" + inlist + "." + extension outfile = ofp + "_" + inlist + "." + extension
if output: elif ladderize:
outfile = output tree.ladderize()
if output:
outfile = output
makeimage(infile, outfile, caption, tree, addcaption=addcaption, dpi=dpi, fontsize=fontsize) makeimage(infile, outfile, caption, tree, addcaption=addcaption, dpi=dpi, fontsize=fontsize)
...@@ -101,14 +150,17 @@ def add_seq(inseq, inlist=None, capt=None, ladderize=True, addcaption=False, ext ...@@ -101,14 +150,17 @@ def add_seq(inseq, inlist=None, capt=None, ladderize=True, addcaption=False, ext
with open(fnm, 'w') as fhi: with open(fnm, 'w') as fhi:
fhi.write(inseq) fhi.write(inseq)
ofn = add_file(fnm, inlist=inlist, capt=capt, ladderize=ladderize, addcaption=addcaption, extension=extension, dpi=dpi, fontsize=fontsize) ofn = add_file(fnm, inlist=inlist, capt=capt, ladderize=ladderize, addcaption=addcaption, extension=extension, dpi=dpi, fontsize=fontsize)
data = None data = None
print "opening png", ofn print "opening png", ofn
if os.path.exists( ofn ): if os.path.exists( ofn ):
with open(ofn, 'rb') as fho: with open(ofn, 'rb') as fho:
data = fho.read() data = fho.read()
os.remove(ofn) os.remove(ofn)
else: else:
print "tree image %s does not exists" % ofn print "tree image %s does not exists" % ofn
...@@ -119,7 +171,9 @@ def add_seq(inseq, inlist=None, capt=None, ladderize=True, addcaption=False, ext ...@@ -119,7 +171,9 @@ def add_seq(inseq, inlist=None, capt=None, ladderize=True, addcaption=False, ext
def prune(inlist, tree, ladderize=True): def prune(inlist, tree, ladderize=True):
print "pruning", inlist print "pruning", inlist
reqlist = [] reqlist = []
with open( inlist, 'r' ) as fhd: with open( inlist, 'r' ) as fhd:
for line in fhd: for line in fhd:
line = line.strip() line = line.strip()
...@@ -135,7 +189,9 @@ def prune(inlist, tree, ladderize=True): ...@@ -135,7 +189,9 @@ def prune(inlist, tree, ladderize=True):
reqlist.append( line ) reqlist.append( line )
print reqlist print reqlist
tree.prune( reqlist, preserve_branch_length=True ) tree.prune( reqlist, preserve_branch_length=True )
if ladderize: if ladderize:
tree.ladderize() tree.ladderize()
...@@ -229,20 +285,4 @@ def makeimage(infile, outfile, caption, tree, addcaption=True, dpi=1200, fontsiz ...@@ -229,20 +285,4 @@ def makeimage(infile, outfile, caption, tree, addcaption=True, dpi=1200, fontsiz
if __name__ == '__main__': if __name__ == '__main__':
try: main()
infile = sys.argv[1]
except:
print "no input file given"
sys.exit( 1 )
try:
inlist = sys.argv[2]
except:
print "no input list given"
inlist = None
main(infile, inlist=inlist)
...@@ -14,8 +14,8 @@ import vcfmerger ...@@ -14,8 +14,8 @@ import vcfmerger
import editdist import editdist
from treemanager import fixsppname from treemanager import fixsppname
#GZ=SL2.40ch06g50000_000100001_000150000.vcf.gz.raw.vcf.gz; FA=$GZ.SL2.40ch06.fasta; ../vcfconcat.py -f -RIL -Rg -Rd -i $GZ; ../FastTreeMP -fastest -gamma -nt -bionj -boot 100 -log $FA.log -out $FA.tree $FA; ../FastTreeMP -nt -makematrix $FA > $FA.matrix; ./newick_to_png.py $FA.tree #GZ=SL2.40ch06g50000_000100001_000150000.vcf.gz.raw.vcf.gz; FA=$GZ.SL2.40ch06.fasta; ../vcfconcat.py -f -RIL -Rg -Rd -i $GZ; ../FastTreeMP -fastest -gamma -nt -bionj -boot 100 -log $FA.log -out $FA.tree $FA; ../FastTreeMP -nt -makematrix $FA > $FA.matrix; ./newick_to_png.py --infile $FA.tree
#FA=SL2.40ch06g50000_000100001_000150000.vcf.gz.SL2.40ch06.fasta; ../FastTreeMP -fastest -gamma -nt -bionj -boot 100 -log $FA.log -out $FA.tree $FA; ../FastTreeMP -nt -makematrix $FA > $FA.matrix; ./newick_to_png.py $FA.tree #FA=SL2.40ch06g50000_000100001_000150000.vcf.gz.SL2.40ch06.fasta; ../FastTreeMP -fastest -gamma -nt -bionj -boot 100 -log $FA.log -out $FA.tree $FA; ../FastTreeMP -nt -makematrix $FA > $FA.matrix; ./newick_to_png.py --infile $FA.tree
...@@ -30,21 +30,22 @@ def main(args): ...@@ -30,21 +30,22 @@ def main(args):
parser = argparse.ArgumentParser(description='Concatenate SNPs as a single sequence for each species.') parser = argparse.ArgumentParser(description='Concatenate SNPs as a single sequence for each species.')
parser.add_argument('-c', '--chrom', '--chromosome', dest='chromosome' , default=None , action='store' , nargs='?', type=str , help='Chromosome to filter [all]') parser.add_argument('-c', '--chrom' , '--chromosome', dest='chromosome' , default=None , action='store' , nargs='?', type=str , help='Chromosome to filter [all]')
parser.add_argument('-I', '--ignore', '--skip' , dest='ignore' , default=[] , action='append' , nargs='*', type=str , help='Chromosomes to skip') parser.add_argument('-I', '--ignore', '--skip' , dest='ignore' , default=[] , action='append' , nargs='*', type=str , help='Chromosomes to skip')
parser.add_argument('-s', '--start' , dest='start' , default=None , action='store' , nargs='?', type=int , help='Chromosome start position to filter [0]') parser.add_argument('-s', '--start' , dest='start' , default=None , action='store' , nargs='?', type=int , help='Chromosome start position to filter [0]')
parser.add_argument('-e', '--end' , dest='end' , default=None , action='store' , nargs='?', type=int , help='Chromosome end position to filter [-1]') parser.add_argument('-e', '--end' , dest='end' , default=None , action='store' , nargs='?', type=int , help='Chromosome end position to filter [-1]')
parser.add_argument('-t', '--threads' , dest='threads' , default=0 , action='store' , nargs='?', type=int , help='Number of threads [num chromosomes]') parser.add_argument('-t', '--threads' , dest='threads' , default=0 , action='store' , nargs='?', type=int , help='Number of threads [num chromosomes]')
parser.add_argument('-f', '--fasta' , dest='fasta' , action='store_true' , help='Output in fasta format [default: clustal alignment .aln format]') parser.add_argument('-a', '--clustal' , dest='fasta' , action='store_false', help='Output in clustal .aln format [default: fasta format]')
parser.add_argument('-r', '--noref' , dest='noref' , action='store_false', help='Do not print reference [default: true]') parser.add_argument('-r', '--noref' , dest='noref' , action='store_false', help='Do not print reference [default: true]')
parser.add_argument('-R', '--RIL' , dest='RIL' , action='store_true' , help='RIL mode: false]') parser.add_argument('-n', '--ref-name' , dest='refname' , default='ref' , action='store' , nargs='?', type=str , help='Reference name [default: ref]')
parser.add_argument('-Rm','--RIL-mads' , dest='RILmads' , default=0.25 , action='store' , nargs='?', type=float, help='RIL percentage of Median Absolute Deviation to use (smaller = more restrictive): 0.25]') parser.add_argument('-R', '--RIL' , dest='RIL' , action='store_true' , help='RIL mode: false]')
parser.add_argument('-Rs','--RIL-minsim' , dest='RILminsim' , default=0.75 , action='store' , nargs='?', type=float, help='RIL percentage of nucleotides identical to reference to classify as reference: 0.75]') parser.add_argument('-Rm','--RIL-mads' , dest='RILmads' , default=0.25 , action='store' , nargs='?', type=float, help='RIL percentage of Median Absolute Deviation to use (smaller = more restrictive): 0.25]')
parser.add_argument('-Rg','--RIL-greedy' , dest='RILgreedy' , action='store_true' , help='RIL greedy convert nucleotides to either the reference sequence or the alternative sequence: false]') parser.add_argument('-Rs','--RIL-minsim' , dest='RILminsim' , default=0.75 , action='store' , nargs='?', type=float, help='RIL percentage of nucleotides identical to reference to classify as reference: 0.75]')
parser.add_argument('-Rd','--RIL-delete' , dest='RILdelete' , action='store_true' , help='RIL delete invalid sequences: false]') parser.add_argument('-Rg','--RIL-greedy' , dest='RILgreedy' , action='store_true' , help='RIL greedy convert nucleotides to either the reference sequence or the alternative sequence: false]')
parser.add_argument('-M' ,'--RIL-method' , dest='groupMethod', default=dflmethod, action='store' , nargs='?', choices=methods.keys(), type=str , help='Clustering method for RIL selection of good and bad sequences [' + ','.join(methods.keys()) + ']') parser.add_argument('-Rd','--RIL-delete' , dest='RILdelete' , action='store_true' , help='RIL delete invalid sequences: false]')
parser.add_argument('-M' ,'--RIL-method' , dest='groupMethod', default=dflmethod, action='store' , nargs='?', choices=methods.keys(), type=str , help='Clustering method for RIL selection of good and bad sequences [' + ','.join(methods.keys()) + ']')
parser.add_argument('-i', '--input' , dest='input' , default=None , nargs='?', type=str , help='Input file')
parser.add_argument('-i' , '--input' , dest='input' , default=None , nargs='?', type=str , help='Input file')
#parser.add_argument('input' , default=None , action='store' , nargs='?', metavar='input file', type=str , help='Input file') #parser.add_argument('input' , default=None , action='store' , nargs='?', metavar='input file', type=str , help='Input file')
options = parser.parse_args(args) options = parser.parse_args(args)
...@@ -55,7 +56,7 @@ def main(args): ...@@ -55,7 +56,7 @@ def main(args):
parallel = False parallel = False
config = { config = {
'format' : 'aln', 'format' : 'fasta',
'ignore' : [], 'ignore' : [],