fasta_merge.py 1.6 KB
Newer Older
Aflitos, Saulo Alves's avatar
Aflitos, Saulo Alves committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/python

import sys
import os
from collections import OrderedDict

def main():
    outfile = sys.argv[1]
    if os.path.exists(outfile):
        print "output file %s exists. quitting" % outfile
        sys.exit(1)

    else:
        print "output file %s" % outfile


    infiles = sys.argv[2:]


    for infile in infiles:
        if not os.path.exists(infile):
            print "input file %s does not exists" % infile
            sys.exit(1)


    data = OrderedDict()
    for infile in infiles:
        print "reading %s" % infile
        with open(infile, 'r') as fhd:
            for line in fhd:
                line = line.strip()
                if len(line) == 0:
                    continue
                if line[0] == ">":
                    name = line
                    print "reading %s seq %s" % (infile, name),
                    if name not in data:
                        data[name] = ""
                        print " *"
                    else:
                        print
                else:
                    data[name] += line

    with open(outfile, 'w') as fhd:
        for name in data:
            line = data[name]
            print "saving %s (%d)" % (name, len(line))
            fhd.write(name + "\n")
            for seq in split_by_n(line, 80):
                fhd.write(seq + "\n")

def split_by_n( seq, n ):
    """
    A generator to divide a sequence into chunks of n units.
    http://stackoverflow.com/questions/9475241/split-python-string-every-nth-character
    """
    while seq:
        yield seq[:n]
        seq = seq[n:]


if __name__ == '__main__':
    main()