Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Aflitos, Saulo Alves
ibrowser
Commits
398ab7eb
Commit
398ab7eb
authored
Jun 01, 2015
by
Aflitos, Saulo Alves
Browse files
converter for multicolumn vcf. again
parent
e7aeb5bd
Changes
3
Hide whitespace changes
Inline
Side-by-side
vcfmerger/gen_makefile.py
View file @
398ab7eb
...
...
@@ -10,62 +10,62 @@ timestamp = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
#/home/assembly/tomato150/programs/vcfmerger_ui/data/src/ara/indata
#./vcfmerger/
aux/
gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#./vcfmerger/gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#make -f makefile_arabidopsis_50k
#
#./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#make -f makefile_arabidopsis_xianwen_50k
#
#./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#make -f makefile_arabidopsis_xianwen_50k_sing
#
#./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
#make -f makefile_arabidopsis_xianwen_10k
#
#./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
#make -f makefile_arabidopsis_xianwen_10k_sing
#
#
#
#/home/assembly/tomato150/programs/vcfmerger_ui/data/src/tom85
#./vcfmerger/
aux/
gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_10k
#
#./vcfmerger/
aux/
gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_50k
#
#./vcfmerger/
aux/
gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_genes
#
#./vcfmerger/
aux/
gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_10k_introgression
#
#./vcfmerger/
aux/
gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_tom84_50k_introgression
#
#
#
#/home/assembly/tomato150/programs/vcfmerger_ui/data/src/RIL
#./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_RIL_50k
#
#./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril
#
#./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril_greedy
#
#./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril_delete
#
#./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
#make -f makefile_RIL_50k_mode_ril_delete_greedy
#
#./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
#make -f makefile_RIL_10k
SCRIPT_DIR
=
'vcfmerger'
AUX_DIR
=
os
.
path
.
join
(
SCRIPT_DIR
,
'aux'
)
AUX_DIR
=
os
.
path
.
join
(
SCRIPT_DIR
)
merger
=
os
.
path
.
abspath
(
os
.
path
.
join
(
SCRIPT_DIR
,
'vcfmerger.py'
)
)
...
...
@@ -76,8 +76,8 @@ walk_ram = os.path.abspath( os.path.join( SCRIPT_DIR, 'vcf_walk_ram.py' ) )
walk_sql
=
os
.
path
.
abspath
(
os
.
path
.
join
(
SCRIPT_DIR
,
'vcf_walk_sql.py'
)
)
cluster
=
os
.
path
.
abspath
(
os
.
path
.
join
(
SCRIPT_DIR
,
'cluster.py'
)
)
topng
=
os
.
path
.
abspath
(
os
.
path
.
join
(
SCRIPT_DIR
,
'newick_to_png.py'
)
)
fasta_spacer
=
os
.
path
.
abspath
(
os
.
path
.
join
(
AUX
_DIR
,
'fasta_spacer.py'
)
)
tree_maker
=
os
.
path
.
abspath
(
os
.
path
.
join
(
AUX
_DIR
,
'FastTreeMP'
)
)
fasta_spacer
=
os
.
path
.
abspath
(
os
.
path
.
join
(
SCRIPT
_DIR
,
'fasta_spacer.py'
)
)
tree_maker
=
os
.
path
.
abspath
(
os
.
path
.
join
(
SCRIPT
_DIR
,
'FastTreeMP'
)
)
class
makewriter
(
object
):
...
...
@@ -372,7 +372,7 @@ def main(args):
if
infasta
:
#vcfmerger/
aux/
fasta_spacer.py GENOME.fa 50000
#vcfmerger/fasta_spacer.py GENOME.fa 50000
gff_cmd
=
"%s %s %s"
%
(
fasta_spacer
,
infasta
,
size
)
writer
.
write
(
infasta
,
filter_gff
,
gff_cmd
,
nick
=
'gff'
)
...
...
vcfmerger/gen_makefile.py.examples
View file @
398ab7eb
/home/assembly/tomato150/programs/vcfmerger_ui/data/src/ara/indata
./vcfmerger/
aux/
gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
./vcfmerger/gen_makefile.py --input arabidopsis.csv --infasta TAIR10.fasta --size 50000 --project arabidopsis_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
make -f makefile_arabidopsis_50k
./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
make -f makefile_arabidopsis_xianwen_50k
./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_50000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_50k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
make -f makefile_arabidopsis_xianwen_50k_sing
./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols
make -f makefile_arabidopsis_xianwen_10k
./vcfmerger/
aux/
gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
./vcfmerger/gen_makefile.py --input arabidopsis_xianwen.csv --filter-gff TAIR10.fasta_10000.gff.Chr4.gff.inversion.gff --project arabidopsis_xianwen_10k_sing --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --excluded-chrom chloroplast --excluded-chrom mitochondria --cluster-no-cols --simplify-include-singleton
make -f makefile_arabidopsis_xianwen_10k_sing
/home/assembly/tomato150/programs/vcfmerger_ui/data/src/tom85
./vcfmerger/
aux/
gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 10000 --project tom84_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_10k
./vcfmerger/
aux/
gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --infasta S_lycopersicum_chromosomes.2.40.fa --size 50000 --project tom84_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_50k
./vcfmerger/
aux/
gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --filter-gff ITAG2.3_gene_models.gff3.gene.gff3 --project tom84_genes --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_genes
./vcfmerger/
aux/
gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000_introgression.gff --project tom84_10k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_10k_introgression
./vcfmerger/
aux/
gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input short2.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000_introgression.gff --project tom84_50k_introgression --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_tom84_50k_introgression
/home/assembly/tomato150/programs/vcfmerger_ui/data/src/RIL
./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_RIL_50k
./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --cluster-no-cols
make -f makefile_RIL_50k_mode_ril
./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --cluster-no-cols
make -f makefile_RIL_50k_mode_ril_greedy
./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-delete --cluster-no-cols
make -f makefile_RIL_50k_mode_ril_delete
./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_50000.gff --project RIL_50k_mode_ril_delete_greedy --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --concat-RIL --concat-RIL-greedy --concat-RIL-delete --cluster-no-cols
make -f makefile_RIL_50k_mode_ril_delete_greedy
./vcfmerger/
aux/
gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
./vcfmerger/gen_makefile.py --input RIL.lst --filter-gff S_lycopersicum_chromosomes.2.40.fa_10000.gff --project RIL_10k --no-pickle --cluster-no-svg --smart_threads 25 --cluster-threads 5 --cluster-no-cols
make -f makefile_RIL_10k
...
...
vcfmerger/split_multicolumn_vcf.py
0 → 100755
View file @
398ab7eb
#!/usr/bin/python
import
os
import
sys
import
string
ignores
=
[
'0/0'
,
'./.'
]
# reference, nocov
valid_chars
=
frozenset
(
"_%s%s"
%
(
string
.
ascii_letters
,
string
.
digits
))
def
sanitize
(
name
):
return
''
.
join
(
c
if
c
in
valid_chars
else
'_'
for
c
in
name
)
def
main
():
try
:
infile
=
os
.
sys
.
argv
[
1
]
except
:
print
"no input file given"
print
sys
.
argv
[
0
],
"<INPUT MULTICOLUMN CSV>"
sys
.
exit
(
1
)
if
not
os
.
path
.
exists
(
infile
):
print
"input file %s does not exists"
%
infile
sys
.
exit
(
1
)
if
os
.
path
.
isdir
(
infile
):
print
"input file %s is a folder"
%
infile
sys
.
exit
(
1
)
print
"splitting %s"
%
infile
defs
=
[]
names
=
[]
outfiles
=
[]
num_cols
=
None
with
open
(
infile
)
as
fhd
:
for
line
in
fhd
:
line
=
line
.
strip
()
if
len
(
line
)
==
0
:
continue
if
line
.
startswith
(
"#"
):
# header
#print "HEADER", line
if
line
.
startswith
(
"##"
):
# definition lines
#print "HEADER :: DEF", line
defs
.
append
(
line
)
else
:
# column description
#print "HEADER :: COL", line
cols
=
line
.
split
(
"
\t
"
)
num_cols
=
len
(
cols
)
shared
=
cols
[:
9
]
#CHROM POS ID REF ALT QUAL FILTER INFO FORMA
names
=
cols
[
9
:]
#print "HEADER :: COL :: SHARED", shared
#print "HEADER :: COL :: NAMES" , names
outfiles
=
[
None
]
*
len
(
names
)
outlist
=
open
(
"%s.lst"
%
infile
,
'w'
)
for
np
,
name
in
enumerate
(
names
):
nof
=
(
"%s_%0"
+
str
(
len
(
"%d"
%
len
(
names
)))
+
"d_%s.vcf"
)
%
(
infile
,
np
+
1
,
sanitize
(
name
))
print
(
"creating %"
+
str
(
len
(
"%d"
%
len
(
names
)))
+
"d %-"
+
str
(
max
([
len
(
x
)
for
x
in
names
]))
+
"s to %s"
)
%
(
np
+
1
,
name
,
nof
)
nop
=
open
(
nof
,
'w'
)
# skipped valid
outfiles
[
np
]
=
[
name
,
nof
,
nop
,
0
,
0
]
outlist
.
write
(
"1
\t
%s
\t
%s
\n
"
%
(
os
.
path
.
abspath
(
nof
),
name
))
nop
.
write
(
"
\n
"
.
join
(
defs
)
+
"
\n
"
)
nop
.
write
(
"##Split from: %s column %d
\n
"
%
(
os
.
path
.
abspath
(
infile
),
np
+
1
)
)
nop
.
write
(
"
\t
"
.
join
(
shared
))
nop
.
write
(
"
\t
%s
\n
"
%
name
)
nop
.
flush
()
continue
#print "DATA", line
cols
=
line
.
split
(
"
\t
"
)
assert
len
(
cols
)
==
num_cols
shared
=
cols
[:
9
]
#CHROM POS ID REF ALT QUAL FILTER INFO FORMA
data
=
cols
[
9
:]
#print "shared", shared
#print "data" , data
for
pos
,
ndata
in
enumerate
(
data
):
#outfiles[np] = [name, nof, 0, 0, nop]
if
any
([
ndata
.
startswith
(
x
)
for
x
in
ignores
]):
outfiles
[
pos
][
3
]
+=
1
# skipped
continue
outfiles
[
pos
][
4
]
+=
1
# valid
outfiles
[
pos
][
2
].
write
(
"
\t
"
.
join
(
shared
)
+
"
\t
%s
\n
"
%
ndata
)
for
nop
,
ndata
in
enumerate
(
outfiles
):
ndata
[
2
].
close
()
print
(
"closing %"
+
str
(
len
(
"%d"
%
len
(
outfiles
)))
+
"d %-"
+
str
(
max
([
len
(
x
[
0
])
for
x
in
outfiles
]))
+
"s :: %-"
+
str
(
max
([
len
(
x
[
1
])
for
x
in
outfiles
]))
+
"s :: skipped %6d exported %6d total %7d"
)
%
(
nop
+
1
,
ndata
[
0
],
ndata
[
1
],
ndata
[
3
],
ndata
[
4
],
ndata
[
3
]
+
ndata
[
4
])
if
__name__
==
'__main__'
:
main
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment