Commit 25ddb525 authored by Koehorst, Jasper's avatar Koehorst, Jasper
Browse files

eggnog.gz check added

parent c9740098
......@@ -23,7 +23,7 @@ def retrieval(identifier, accession):
egg = path.replace(".type", "_eggNOG.tsv")
# Curl command
xml_destination = "./xml/" + accession + ".xml"
xml_destination = "./genome/" + identifier + "/" + accession + ".xml"
if not os.path.isfile(xml_destination):
command = curl.replace("XXXXXX", accession)
output = run_command(command)
......@@ -37,7 +37,7 @@ def retrieval(identifier, accession):
os.makedirs(path)
# Download the files
path = "./genome/" + identifier + "/" + element.split("/")[-1]
if not os.path.isfile(path):
if not os.path.isfile(path) and not os.path.isfile(path + ".gz"):
command = "wget -nc " + element + " --output-document=" + path
os.system(command)
......@@ -84,9 +84,14 @@ def command(command):
os.system(command)
def conversion_stage(identifier):
# When eggnog file is present all can be skipped
output_file = "./genome/" + identifier + "/" + identifier + ".interproscan.eggnog.ttl.gz"
if os.path.isfile(output_file + ".gz"): return
conversion(identifier)
interproscan(identifier)
eggnog(identifier)
# Perform gzip compression
gzip("./genome/" + identifier + "/" + identifier + ".conversion.ttl")
gzip("./genome/" + identifier + "/" + identifier + ".interproscan.ttl")
......@@ -103,7 +108,7 @@ if __name__ == "__main__":
retrieval(identifier, accession)
identifiers.add(identifier)
conversion_stage(identifier)
break
# break
# Number of cores
# num_cores = 3 # multiprocessing.cpu_count()
# results = Parallel(n_jobs=num_cores)(delayed(conversion_stage)(identifier) for identifier in identifiers)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment