Commit 398ab7eb authored by Aflitos, Saulo Alves's avatar Aflitos, Saulo Alves
Browse files

converter for multicolumn vcf. again

parent e7aeb5bd
......@@ -10,62 +10,62 @@ timestamp = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
#/home/assembly/tomato150/programs/vcfmerger_ui/data/src/ara/indata
#./vcfmerger/aux/gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#./vcfmerger/gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#make -f makefile_arabidopsis_50k
#
#./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#make -f makefile_arabidopsis_xianwen_50k
#
#./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#make -f makefile_arabidopsis_xianwen_50k_sing
#
#./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#make -f makefile_arabidopsis_xianwen_10k
#
#./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#make -f makefile_arabidopsis_xianwen_10k_sing
#
#
#
#/home/assembly/tomato150/programs/vcfmerger_ui/data/src/tom85
#./vcfmerger/aux/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_10k
#
#./vcfmerger/aux/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_50k
#
#./vcfmerger/aux/gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_genes
#
#./vcfmerger/aux/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_10k_introgression
#
#./vcfmerger/aux/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_50k_introgression
#
#
#
#/home/assembly/tomato150/programs/vcfmerger_ui/data/src/RIL
#./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_RIL_50k
#
#./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril
#
#./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril_greedy
#
#./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril_delete
#
#./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril_delete_greedy
#
#./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_RIL_10k
SCRIPT_DIR = 'vcfmerger'
AUX_DIR = os.path.join(SCRIPT_DIR, 'aux')
AUX_DIR = os.path.join(SCRIPT_DIR)
merger = os.path.abspath( os.path.join( SCRIPT_DIR, 'vcfmerger.py' ) )
......@@ -76,8 +76,8 @@ walk_ram = os.path.abspath( os.path.join( SCRIPT_DIR, 'vcf_walk_ram.py' ) )
walk_sql = os.path.abspath( os.path.join( SCRIPT_DIR, 'vcf_walk_sql.py' ) )
cluster = os.path.abspath( os.path.join( SCRIPT_DIR, 'cluster.py' ) )
topng = os.path.abspath( os.path.join( SCRIPT_DIR, 'newick_to_png.py') )
fasta_spacer = os.path.abspath( os.path.join( AUX_DIR , 'fasta_spacer.py' ) )
tree_maker = os.path.abspath( os.path.join( AUX_DIR , 'FastTreeMP' ) )
fasta_spacer = os.path.abspath( os.path.join( SCRIPT_DIR, 'fasta_spacer.py' ) )
tree_maker = os.path.abspath( os.path.join( SCRIPT_DIR, 'FastTreeMP' ) )
class makewriter(object):
......@@ -372,7 +372,7 @@ def main(args):
if infasta:
#vcfmerger/aux/fasta_spacer.py GENOME.fa 50000
#vcfmerger/fasta_spacer.py GENOME.fa 50000
gff_cmd = "%s %s %s" % (fasta_spacer, infasta, size)
writer.write( infasta, filter_gff, gff_cmd, nick='gff' )
......
/home/assembly/tomato150/programs/vcfmerger_ui/data/src/ara/indata
./vcfmerger/aux/gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
./vcfmerger/gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
make -f makefile_arabidopsis_50k
./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
make -f makefile_arabidopsis_xianwen_50k
./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
make -f makefile_arabidopsis_xianwen_50k_sing
./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
make -f makefile_arabidopsis_xianwen_10k
./vcfmerger/aux/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
make -f makefile_arabidopsis_xianwen_10k_sing
/home/assembly/tomato150/programs/vcfmerger_ui/data/src/tom85
./vcfmerger/aux/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_10k
./vcfmerger/aux/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_50k
./vcfmerger/aux/gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_genes
./vcfmerger/aux/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_10k_introgression
./vcfmerger/aux/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_50k_introgression
/home/assembly/tomato150/programs/vcfmerger_ui/data/src/RIL
./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_RIL_50k
./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
make -f makefile_RIL_50k_mode_ril
./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
make -f makefile_RIL_50k_mode_ril_greedy
./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
make -f makefile_RIL_50k_mode_ril_delete
./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
make -f makefile_RIL_50k_mode_ril_delete_greedy
./vcfmerger/aux/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_RIL_10k
......
#!/usr/bin/python
import os
import sys
import string
ignores = ['0/0', './.'] # reference, nocov
valid_chars = frozenset("_%s%s" % (string.ascii_letters, string.digits))
def sanitize(name):
return ''.join(c if c in valid_chars else '_' for c in name)
def main():
try:
infile = os.sys.argv[1]
except:
print "no input file given"
print sys.argv[0], "<INPUT MULTICOLUMN CSV>"
sys.exit(1)
if not os.path.exists( infile ):
print "input file %s does not exists" % infile
sys.exit(1)
if os.path.isdir( infile ):
print "input file %s is a folder" % infile
sys.exit(1)
print "splitting %s" % infile
defs = []
names = []
outfiles = []
num_cols = None
with open(infile) as fhd:
for line in fhd:
line = line.strip()
if len(line) == 0:
continue
if line.startswith("#"): # header
#print "HEADER", line
if line.startswith("##"): # definition lines
#print "HEADER :: DEF", line
defs.append( line )
else: # column description
#print "HEADER :: COL", line
cols = line.split("\t")
num_cols = len(cols)
shared = cols[:9] #CHROM POS ID REF ALT QUAL FILTER INFO FORMA
names = cols[9:]
#print "HEADER :: COL :: SHARED", shared
#print "HEADER :: COL :: NAMES" , names
outfiles = [None]*len(names)
outlist = open("%s.lst" % infile, 'w')
for np, name in enumerate(names):
nof = ("%s_%0"+str(len("%d"%len(names)))+"d_%s.vcf") % (infile, np+1, sanitize(name))
print ("creating %"+str(len("%d"%len(names)))+"d %-"+str(max([len(x) for x in names]))+"s to %s") % (np+1, name, nof)
nop = open( nof, 'w' )
# skipped valid
outfiles[np] = [name, nof, nop, 0 , 0]
outlist.write("1\t%s\t%s\n" % (os.path.abspath(nof), name))
nop.write("\n".join(defs) + "\n")
nop.write("##Split from: %s column %d\n" % ( os.path.abspath(infile), np + 1) )
nop.write("\t".join(shared))
nop.write("\t%s\n" % name)
nop.flush()
continue
#print "DATA", line
cols = line.split("\t")
assert len(cols) == num_cols
shared = cols[:9] #CHROM POS ID REF ALT QUAL FILTER INFO FORMA
data = cols[9:]
#print "shared", shared
#print "data" , data
for pos, ndata in enumerate(data):
#outfiles[np] = [name, nof, 0, 0, nop]
if any([ndata.startswith(x) for x in ignores]):
outfiles[pos][3] += 1 # skipped
continue
outfiles[pos][4] += 1 # valid
outfiles[pos][2].write("\t".join(shared) + "\t%s\n" % ndata)
for nop, ndata in enumerate(outfiles):
ndata[2].close()
print ("closing %"+str(len("%d"%len(outfiles)))+"d %-"+str(max([len(x[0]) for x in outfiles]))+"s :: %-"+str(max([len(x[1]) for x in outfiles]))+"s :: skipped %6d exported %6d total %7d") % (nop+1, ndata[0], ndata[1], ndata[3], ndata[4], ndata[3] + ndata[4])
if __name__ == '__main__':
main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment