Commit 25ddb525 authored by Koehorst, Jasper's avatar Koehorst, Jasper
Browse files

eggnog.gz check added

parent c9740098
...@@ -23,7 +23,7 @@ def retrieval(identifier, accession): ...@@ -23,7 +23,7 @@ def retrieval(identifier, accession):
egg = path.replace(".type", "_eggNOG.tsv") egg = path.replace(".type", "_eggNOG.tsv")
# Curl command # Curl command
xml_destination = "./xml/" + accession + ".xml" xml_destination = "./genome/" + identifier + "/" + accession + ".xml"
if not os.path.isfile(xml_destination): if not os.path.isfile(xml_destination):
command = curl.replace("XXXXXX", accession) command = curl.replace("XXXXXX", accession)
output = run_command(command) output = run_command(command)
...@@ -37,7 +37,7 @@ def retrieval(identifier, accession): ...@@ -37,7 +37,7 @@ def retrieval(identifier, accession):
os.makedirs(path) os.makedirs(path)
# Download the files # Download the files
path = "./genome/" + identifier + "/" + element.split("/")[-1] path = "./genome/" + identifier + "/" + element.split("/")[-1]
if not os.path.isfile(path): if not os.path.isfile(path) and not os.path.isfile(path + ".gz"):
command = "wget -nc " + element + " --output-document=" + path command = "wget -nc " + element + " --output-document=" + path
os.system(command) os.system(command)
...@@ -84,10 +84,15 @@ def command(command): ...@@ -84,10 +84,15 @@ def command(command):
os.system(command) os.system(command)
def conversion_stage(identifier): def conversion_stage(identifier):
# When eggnog file is present all can be skipped
output_file = "./genome/" + identifier + "/" + identifier + ".interproscan.eggnog.ttl.gz"
if os.path.isfile(output_file + ".gz"): return
conversion(identifier) conversion(identifier)
interproscan(identifier) interproscan(identifier)
eggnog(identifier) eggnog(identifier)
# Perform gzip compression
# Perform gzip compression
gzip("./genome/" + identifier + "/" + identifier + ".conversion.ttl") gzip("./genome/" + identifier + "/" + identifier + ".conversion.ttl")
gzip("./genome/" + identifier + "/" + identifier + ".interproscan.ttl") gzip("./genome/" + identifier + "/" + identifier + ".interproscan.ttl")
gzip("./genome/" + identifier + "/" + identifier + ".interproscan.eggnog.ttl") gzip("./genome/" + identifier + "/" + identifier + ".interproscan.eggnog.ttl")
...@@ -103,7 +108,7 @@ if __name__ == "__main__": ...@@ -103,7 +108,7 @@ if __name__ == "__main__":
retrieval(identifier, accession) retrieval(identifier, accession)
identifiers.add(identifier) identifiers.add(identifier)
conversion_stage(identifier) conversion_stage(identifier)
break # break
# Number of cores # Number of cores
# num_cores = 3 # multiprocessing.cpu_count() # num_cores = 3 # multiprocessing.cpu_count()
# results = Parallel(n_jobs=num_cores)(delayed(conversion_stage)(identifier) for identifier in identifiers) # results = Parallel(n_jobs=num_cores)(delayed(conversion_stage)(identifier) for identifier in identifiers)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment