run.py 3.57 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
import sys
import shlex

curl = """curl -X POST -H "Content-Type: application/x-www-form-urlencoded" -d 'result=sample&query=secondary_sample_accession%3D%22XXXXXX%22&format=tsv' "https://www.ebi.ac.uk/ena/portal/api/search\""""
xml = "https://www.ebi.ac.uk/ena/browser/api/xml/"
# wget https://www.ebi.ac.uk/metagenomics/api/v1/genomes/MGYG-HGUT-00001/downloads/MGYG-HGUT-00001.fna
# wget https://www.ebi.ac.uk/metagenomics/api/v1/genomes/MGYG-HGUT-00001/downloads/MGYG-HGUT-00001.gff
# wget https://www.ebi.ac.uk/metagenomics/api/v1/genomes/MGYG-HGUT-00001/downloads/MGYG-HGUT-00001_InterProScan.tsv
# wget https://www.ebi.ac.uk/metagenomics/api/v1/genomes/MGYG-HGUT-02750/downloads/MGYG-HGUT-02750_eggNOG.tsv

source = "https://www.ebi.ac.uk/metagenomics/api/v1/genomes/XXX/downloads/XXX.type"

def retrieval(identifier, accession):
    print("Processing", identifier)
    path = source.replace("XXX", identifier)
    fna = path.replace(".type",".fna")
    gff = path.replace(".type",".gff")
    interpro = path.replace(".type","_InterProScan.tsv")
    egg = path.replace(".type", "_eggNOG.tsv")
    
    # Curl command
    xml_destination = "./xml/" + accession + ".xml"
    if not os.path.isfile(xml_destination):
        command = curl.replace("XXXXXX", accession)
        output = run_command(command)
        os.system("wget -nc " + xml + output.split()[2] + " --output-document=" + xml_destination)
        
    # Wget command
    for element in [fna, gff, interpro, egg]:
        # Create the folder
        path = "./genome/" + identifier + "/"
        if not os.path.isdir(path):
            os.makedirs(path)
        # Download the files
        path = "./genome/" + identifier + "/" + element.split("/")[-1]
        if not os.path.isfile(path):
            command = "wget -nc " + element + " --output-document=" + path
            os.system(command)

def run_command(command):
    import subprocess
    result = subprocess.run(shlex.split(command), capture_output=True)
    return result.stdout.decode('ascii')

def conversion(identifier):
    # "-tool", "conversion", "-f", fasta.getAbsolutePath(), "-gff2rdf", "-i", gff.getAbsolutePath(), "-o", output.getAbsolutePath(), "-id", "MGYG-HGUT-00001", "-codon", "11", "-topology", "linear"
Koehorst, Jasper's avatar
update    
Koehorst, Jasper committed
48
    command = "java -jar ./MGnifyParser.jar -tool conversion -f ./genome/" + identifier + "/" + identifier + ".fna -gff2rdf -i ./genome/" + identifier + "/" + identifier + ".gff -id " + identifier + " -codon 11 -topology linear -o ./genome/" + identifier + "/" + identifier + ".conversion.ttl"
49
    print(command)
Koehorst, Jasper's avatar
Koehorst, Jasper committed
50
    os.system(command)
51

Koehorst, Jasper's avatar
Koehorst, Jasper committed
52
53
54
def interproscan(identifier):
    command = "java -jar ./MGnifyParser.jar -tool interpro -i ./genome/" + identifier + "/" + identifier + ".conversion.ttl -o ./genome/" + identifier + "/" + identifier + ".interproscan.ttl -tsv ./genome/" +identifier +"/" + identifier + "_InterProScan.tsv -version InterProScan-v5_35-74_0";
    print(command)
Koehorst, Jasper's avatar
Koehorst, Jasper committed
55
    os.system(command)
Koehorst, Jasper's avatar
Koehorst, Jasper committed
56
57
58
59

def eggnog(identifier):
    command = "java -jar ./MGnifyParser.jar -tool eggnog -i ./genome/" + identifier + "/" + identifier + ".interproscan.ttl -o ./genome/" + identifier + "/" + identifier + ".interproscan.eggnog.ttl -tsv ./genome/" +identifier +"/" + identifier + "_eggNOG.tsv";
    print(command)
Koehorst, Jasper's avatar
Koehorst, Jasper committed
60
61
    os.system(command)
    
62
63
64
65
66
67
if __name__ == "__main__":
    for line in open("identifiers.tsv"):
        identifier = line.strip().split("\t")[1]
        accession =  line.strip().split("\t")[2]
        if (identifier.startswith("MGYG")):
            retrieval(identifier, accession)
Koehorst, Jasper's avatar
Koehorst, Jasper committed
68
69
70
            conversion(identifier)
            interproscan(identifier)
            eggnog(identifier)