Commit 51aed4fa authored by Koehorst, Jasper's avatar Koehorst, Jasper
Browse files

gzip fix with command

parent 4665c7c5
......@@ -47,41 +47,50 @@ def run_command(command):
return result.stdout.decode('ascii')
def conversion(identifier):
fasta_file = "./genome/" + identifier + "/" + identifier + ".fna"
gff_file = "./genome/" + identifier + "/" + identifier + ".gff"
output_file = "./genome/" + identifier + "/" + identifier + ".conversion.ttl"
if os.path.isfile(output_file + ".gz"): return
# "-tool", "conversion", "-f", fasta.getAbsolutePath(), "-gff2rdf", "-i", gff.getAbsolutePath(), "-o", output.getAbsolutePath(), "-id", "MGYG-HGUT-00001", "-codon", "11", "-topology", "linear"
command = "java -jar ./MGnifyParser.jar -tool conversion -f ./genome/" + identifier + "/" + identifier + ".fna -gff2rdf -i ./genome/" + identifier + "/" + identifier + ".gff -id " + identifier + " -codon 11 -topology linear -o " + output_file
print(command)
os.system(command)
gzip(output_file)
command("java -jar ./MGnifyParser.jar -tool conversion -f " + fasta_file + " -gff2rdf -i " + gff_file + " -id " + identifier + " -codon 11 -topology linear -o " + output_file)
def interproscan(identifier):
input_file = "./genome/" + identifier + "/" + identifier + ".conversion.ttl"
output_file = "./genome/" + identifier + "/" + identifier + ".interproscan.ttl"
tsv_file = "./genome/" +identifier +"/" + identifier + "_InterProScan.tsv"
if os.path.isfile(output_file + ".gz"): return
command = "java -jar ./MGnifyParser.jar -tool interpro -i ./genome/" + identifier + "/" + identifier + ".conversion.ttl -o " + output_file + " -tsv ./genome/" +identifier +"/" + identifier + "_InterProScan.tsv -version InterProScan-v5_35-74_0"
print(command)
os.system(command)
gzip(output_file)
command("java -jar ./MGnifyParser.jar -tool interpro -i " + input_file + " -o " + output_file + " -tsv " + tsv_file + " -version InterProScan-v5_35-74_0")
def eggnog(identifier):
input_file = "./genome/" + identifier + "/" + identifier + ".interproscan.ttl"
tsv_file = "./genome/" +identifier +"/" + identifier + "_eggNOG.tsv"
output_file = "./genome/" + identifier + "/" + identifier + ".interproscan.eggnog.ttl"
if os.path.isfile(output_file + ".gz"): return
command = "java -jar ./MGnifyParser.jar -tool eggnog -i ./genome/" + identifier + "/" + identifier + ".interproscan.ttl -o " + output_file + " -tsv ./genome/" +identifier +"/" + identifier + "_eggNOG.tsv -v v2-db5.0"
print(command)
command("java -jar ./MGnifyParser.jar -tool eggnog -i " + input_file + " -o " + output_file + " -tsv " + tsv_file + " -v v2-db5.0")
def gzip(input_file):
command = "gzip " + input_file
os.system(command)
gzip(output_file)
def gzip(file):
f_in = open(file)
f_out = gzip.open(file + '.gz', 'wb')
f_out.writelines(f_in)
f_out.close()
f_in.close()
def command(command):
print(command)
os.system(command)
def conversion_stage(identifier):
conversion(identifier)
interproscan(identifier)
eggnog(identifier)
# Perform gzip compression
gzip("./genome/" + identifier + "/" + identifier + ".conversion.ttl")
gzip("./genome/" + identifier + "/" + identifier + ".interproscan.ttl")
gzip("./genome/" + identifier + "/" + identifier + ".interproscan.eggnog.ttl")
if __name__ == "__main__":
identifiers = set()
......@@ -92,7 +101,8 @@ if __name__ == "__main__":
if (identifier.startswith("MGYG")):
retrieval(identifier, accession)
identifiers.add(identifier)
conversion_stage(identifier)
break
# Number of cores
num_cores = 3 # multiprocessing.cpu_count()
results = Parallel(n_jobs=num_cores)(delayed(conversion_stage)(identifier) for identifier in identifiers)
# num_cores = 3 # multiprocessing.cpu_count()
# results = Parallel(n_jobs=num_cores)(delayed(conversion_stage)(identifier) for identifier in identifiers)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment