csv_list_multicolumn.py 1.91 KB
Newer Older
Aflitos, Saulo Alves's avatar
Aflitos, Saulo Alves committed
1
2
3
4
5
6
#!/usr/bin/python

import os
import sys
import csv
import re
Aflitos, Saulo Alves's avatar
Aflitos, Saulo Alves committed
7
8
#import unicodedata
#from unidecode import unidecode
Aflitos, Saulo Alves's avatar
Aflitos, Saulo Alves committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

from filemanager import checkfile, openfile

"""
EX1=vcfmerger/csv_list_multicolumn.py
EX2=vcfmerger/csv_renamer.py
VCF=1001genomes_snp-short-indel_only_ACGTN.vcf.gz
LST=A_thaliana_master_accession_list_1135_20151008.csv

${EX1} ${VCF}
${EX2} ${VCF}.list.csv ${LST} tg_ecotypeid name,othername,CS_number
"""

def main():
    try:
        invcf  = sys.argv[1]

    except:
        print "<invcf>"
        print "EG.: csv_list_multicolumn.py.py 1001genomes_snp-short-indel_only_ACGTN.vcf.gz"
        sys.exit(1)

    print "input vcf              %s" % invcf

    checkfile(invcf)

    names = None
    with openfile(invcf, 'r') as fhdi:
        with open(invcf + '.list.csv', 'wb') as fhdo:
            writer = csv.writer(fhdo, delimiter='\t', quotechar='"')
            for line in fhdi:
                line = line.strip()

                if len(line) == 0:
                    continue

                if line.startswith("#"): # header
                    print "HEADER", line

                    if line.startswith("##"): # definition lines
                        print "HEADER :: DEF", line

                    else: # column description
                        print "HEADER :: COL", line

                        cols     = line.split("\t")
                        num_cols = len(cols)
                        shared   = cols[:9] #CHROM    POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT
                        names    = cols[9:]

                        print "HEADER :: COL :: SHARED", shared
                        print "HEADER :: COL :: NAMES" , names

                        for ln, name in enumerate(names):
                            cols = ["1", "%s|%d" % (invcf, ln+1), name]

                            writer.writerow(cols)

                        break

if __name__ == '__main__':
    main()