Commit 6d6860f2 authored by Koehorst, Jasper's avatar Koehorst, Jasper
Browse files

sync and accession file example added for new ENA approach

parent f1cde315
Pipeline #19299 passed with stage
in 1 minute and 49 seconds
......@@ -319,33 +319,16 @@ public class Search {
// get All processed folders not sure if needed now metadata is being updated
Set<String> processed = getAllProcessed(commandOptionsKubernetes, connection);
String search = makePath(commandOptionsKubernetes.project, commandOptionsKubernetes.investigation, commandOptionsKubernetes.study, commandOptionsKubernetes.observationUnit, commandOptionsKubernetes.assay, connection);
Set<String> hdts = getAllHDT(search, connection);
// for (String hdt : hdts) {
// System.err.println(hdt);
// }
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
// Creating search pattern based on PISOSA
String search = makePath(commandOptionsKubernetes.project, commandOptionsKubernetes.investigation, commandOptionsKubernetes.study, commandOptionsKubernetes.observationUnit, commandOptionsKubernetes.assay, connection);
// Obtains unprocessed folders...
// String search = "/" + commandOptionsKubernetes.zone + "/projects/" + commandOptionsKubernetes.project + "%";
// if (commandOptionsKubernetes.investigation != null) {
// search = search + "/" + commandOptionsKubernetes.investigation;
// }
// if (commandOptionsKubernetes.study != null) {
// search = search + "/" + commandOptionsKubernetes.study;
// }
// if (commandOptionsKubernetes.observationUnit != null) {
// search = search + "/" + commandOptionsKubernetes.observationUnit;
// }
// if (commandOptionsKubernetes.assay != null) {
// search = search + "/%" + commandOptionsKubernetes.assay;
// }
// search += "%";
// For hdt check
Set<String> hdts = getAllHDT(search, connection);
log.info("Searching in " + search);
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, search);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%.yaml");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_META_DATA_ATTR_NAME, QueryConditionOperators.LIKE, "cwl");
......@@ -371,14 +354,15 @@ public class Search {
log.info(yamlFiles.size() + " yaml files detected");
int count = 0;
for (String yaml : yamlFiles) {
log.info("Processing " + yaml);
count = count + 1;
log.info("Processing " + count + " " + yaml);
Generic.downloadFile(connection, new File(yaml));
Scanner scanner = new Scanner(new File("." + yaml));
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
if (line.startsWith("destination: ")) {
// log.info("Analysing " + line);
String destination = line.split(" ")[1];
if (processed.contains(destination)) {
log.info("Skipping analysis of " + new File(yaml).getName() + " as destination folder already exists");
......@@ -386,13 +370,13 @@ public class Search {
} else {
// When not in processed, will do a final check again to see if it exists if not assign to kube
if (yaml.endsWith("hdt_creation.yaml")) {
log.info("HDT job detected");
// log.info("HDT job detected and forced to be executed due to code being disabled");
boolean hdtPresent = false;
// String hdtCheck = yaml.replaceAll("/hdt_creation.yaml","");
// if (hdts.contains(hdtCheck)) {
// hdtPresent = true;
// fixAVU(connection, yaml);
// }
String hdtCheck = yaml.replaceAll("/hdt_creation.yaml","");
if (hdts.contains(hdtCheck)) {
hdtPresent = true;
fixAVU(connection, yaml);
}
if (!hdtPresent) {
log.info("Processing " + line);
Kubernetes.yamls.add(yaml);
......@@ -510,7 +494,7 @@ public class Search {
}
public static Set<String> getAllHDT(String search, Connection connection) throws GenQueryBuilderException, JargonException, JargonQueryException {
log.info("Obtain all HDTs");
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
......@@ -600,6 +584,8 @@ public class Search {
// Obtains OU folders...
String folderQuery = makePath(commandOptionsHDT.project, commandOptionsHDT.investigation, commandOptionsHDT.study, commandOptionsHDT.observationUnit, commandOptionsHDT.assay, connection);
log.info("Searching in " + folderQuery);
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, folderQuery);
......@@ -622,6 +608,7 @@ public class Search {
for (IRODSQueryResultRow irodsQueryResultSetResult : irodsQueryResultSetResults) {
folders.add(irodsQueryResultSetResult.getColumn(0));
}
log.info("Detected " + folders.size());
return folders;
}
}
......@@ -96,7 +96,7 @@ public class Kubernetes {
}
TimeUnit.MILLISECONDS.sleep(500);
TimeUnit.MILLISECONDS.sleep(5);
totalItems = totalItems + 1;
......
......@@ -8,8 +8,11 @@ import org.apache.commons.lang.StringUtils;
import java.lang.reflect.Field;
public class CommandOptionsGenomeSync extends CommandOptionsYAML {
@Parameter(names = {"-taxon"}, description = "Taxonomy number of GCA accession number", required = true)
public int taxon;
// @Parameter(names = {"-taxon"}, description = "Taxonomy number of GCA accession number", required = true)
// public int taxon;
@Parameter(names = {"-accession"}, description = "File with a list of accession numbers", required = true)
public String accessionFile;
// @Parameter(names = {"-taxonendpoint"}, description = "Taxonomy RDF file from uniprot in HDT format")
// public String taxonfile = "/unlock/references/databases/ncbi/taxonomy.hdt";
......
......@@ -22,7 +22,7 @@ public class CommandOptionsYAML extends CommandOptionsIRODS {
// @Parameter(names = {"-yaml"}, description = "Yaml irods file name")
// public String yaml = "";
@Parameter(names = {"-overwrite"}, description = "Overwrites already existing yaml fiels")
@Parameter(names = {"-overwrite"}, description = "Overwrites already existing yaml files")
public boolean overwrite = false;
@Parameter(names = {"-threads"}, description = "Number of threads to use")
......
......@@ -42,30 +42,31 @@ public class ENA {
// Iterator?
// Parser for the ebi lookup file?
// Obtained via https://www.ebi.ac.uk/ena/browser/advanced-search
String command = "curl -X POST -H \"Content-Type: application/x-www-form-urlencoded\" -d 'result=assembly&query=tax_tree(10239)&fields=accession%2Cassembly_level%2Cgenome_representation%2Csample_accession%2Ctax_id&format=tsv' \"https://www.ebi.ac.uk/ena/portal/api/search\"";
command = "curl -X POST -H \"Content-Type: application/x-www-form-urlencoded\" -d 'result=assembly&query=tax_tree(303)&fields=accession%2Cassembly_level%2Cassembly_name%2Cassembly_title%2Cassembly_type%2Cbase_count%2Cgenome_representation%2Clast_updated%2Csample_accession%2Cscientific_name%2Csecondary_sample_accession%2Cstrain%2Cstudy_accession%2Cstudy_description%2Cstudy_name%2Cstudy_title%2Ctax_id%2Cversion&format=tsv' \"https://www.ebi.ac.uk/ena/portal/api/search\"";
command = "curl -X POST -H \"Content-Type: application/x-www-form-urlencoded\" -d 'result=assembly&query=tax_tree(2039240)&fields=accession%2Cassembly_level%2Cassembly_name%2Cassembly_title%2Cassembly_type%2Cbase_count%2Cgenome_representation%2Clast_updated%2Csample_accession%2Cscientific_name%2Csecondary_sample_accession%2Cstrain%2Cstudy_accession%2Cstudy_description%2Cstudy_name%2Cstudy_title%2Ctax_id%2Cversion&format=tsv' \"https://www.ebi.ac.uk/ena/portal/api/search\"";
// String command = "curl -X POST -H \"Content-Type: application/x-www-form-urlencoded\" -d 'result=assembly&query=tax_tree(10239)&fields=accession%2Cassembly_level%2Cgenome_representation%2Csample_accession%2Ctax_id&format=tsv' \"https://www.ebi.ac.uk/ena/portal/api/search\"";
// command = "curl -X POST -H \"Content-Type: application/x-www-form-urlencoded\" -d 'result=assembly&query=tax_tree(303)&fields=accession%2Cassembly_level%2Cassembly_name%2Cassembly_title%2Cassembly_type%2Cbase_count%2Cgenome_representation%2Clast_updated%2Csample_accession%2Cscientific_name%2Csecondary_sample_accession%2Cstrain%2Cstudy_accession%2Cstudy_description%2Cstudy_name%2Cstudy_title%2Ctax_id%2Cversion&format=tsv' \"https://www.ebi.ac.uk/ena/portal/api/search\"";
// command = "curl -X POST -H \"Content-Type: application/x-www-form-urlencoded\" -d 'result=assembly&query=tax_tree(2039240)&fields=accession%2Cassembly_level%2Cassembly_name%2Cassembly_title%2Cassembly_type%2Cbase_count%2Cgenome_representation%2Clast_updated%2Csample_accession%2Cscientific_name%2Csecondary_sample_accession%2Cstrain%2Cstudy_accession%2Cstudy_description%2Cstudy_name%2Cstudy_title%2Ctax_id%2Cversion&format=tsv' \"https://www.ebi.ac.uk/ena/portal/api/search\"";
// Proteo bacteria and DSM strain
command = "curl -X POST -H \"Content-Type: application/x-www-form-urlencoded\" -d 'result=assembly&query=tax_tree(1224)%20AND%20assembly_title%3D%22*%20DSM%20*%22&fields=accession%2Cassembly_level%2Cassembly_name%2Cassembly_title%2Cassembly_type%2Cbase_count%2Cgenome_representation%2Clast_updated%2Csample_accession%2Cscientific_name%2Csecondary_sample_accession%2Cstrain%2Cstudy_accession%2Cstudy_description%2Cstudy_name%2Cstudy_title%2Ctax_id%2Cversion&format=tsv' \"https://www.ebi.ac.uk/ena/portal/api/search\"";
command = "curl -X POST -H \"Content-Type: application/x-www-form-urlencoded\" -d 'result=assembly&query=tax_tree(2)&fields=accession%2Cassembly_level%2Cassembly_name%2Cassembly_title%2Cassembly_type%2Cbase_count%2Cgenome_representation%2Clast_updated%2Csample_accession%2Cscientific_name%2Csecondary_sample_accession%2Cstrain%2Cstudy_accession%2Cstudy_description%2Cstudy_name%2Cstudy_title%2Ctax_id%2Cversion&format=tsv' \"https://www.ebi.ac.uk/ena/portal/api/search\"";
// command = "curl -X POST -H \"Content-Type: application/x-www-form-urlencoded\" -d 'result=assembly&query=tax_tree(1224)%20AND%20assembly_title%3D%22*%20DSM%20*%22&fields=accession%2Cassembly_level%2Cassembly_name%2Cassembly_title%2Cassembly_type%2Cbase_count%2Cgenome_representation%2Clast_updated%2Csample_accession%2Cscientific_name%2Csecondary_sample_accession%2Cstrain%2Cstudy_accession%2Cstudy_description%2Cstudy_name%2Cstudy_title%2Ctax_id%2Cversion&format=tsv' \"https://www.ebi.ac.uk/ena/portal/api/search\"";
// command = "curl -X POST -H \"Content-Type: application/x-www-form-urlencoded\" -d 'result=assembly&query=tax_tree(2)&fields=accession%2Cassembly_level%2Cassembly_name%2Cassembly_title%2Cassembly_type%2Cbase_count%2Cgenome_representation%2Clast_updated%2Csample_accession%2Cscientific_name%2Csecondary_sample_accession%2Cstrain%2Cstudy_accession%2Cstudy_description%2Cstudy_name%2Cstudy_title%2Ctax_id%2Cversion&format=tsv' \"https://www.ebi.ac.uk/ena/portal/api/search\"";
// No issue with just copying the ENA curl request
command = command.replaceFirst("curl", "curl --output curl.txt");
// command = command.replaceFirst("curl", "curl --output curl.txt");
PrintWriter printWriter = new PrintWriter(new File("curl.sh"));
printWriter.write(command);
printWriter.close();
// PrintWriter printWriter = new PrintWriter(new File("curl.sh"));
// printWriter.write(command);
// printWriter.close();
// logger.warn("ECHO!, not executing the curl...");
// ExecCommand execCommand = new ExecCommand("echo sh curl.sh");
ExecCommand execCommand = new ExecCommand("sh curl.sh");
// ExecCommand execCommand = new ExecCommand("sh curl.sh");
if (execCommand.getExit() > 0) {
throw new Exception("Execution of curl failed");
}
// if (execCommand.getExit() > 0) {
// throw new Exception("Execution of curl failed");
// }
Scanner scanner = new Scanner(new File("curl.txt"));
Scanner scanner = new Scanner(new File(commandOptions.accessionFile));
String[] header = scanner.nextLine().split("\t");
HashMap<String, Integer> lookup = new HashMap<>();
......@@ -101,19 +102,20 @@ public class ENA {
String codon = line.split(" ")[1];
tax2codon.put(Integer.parseInt(tax), Integer.parseInt(codon));
}
tax2codonFile.close();
int counter = 0;
while (scanner.hasNextLine()) {
String[] line = scanner.nextLine().split("\t");
boolean pass = filter(lookup, line);
boolean pass = true; // filter(lookup, line);
if (pass) {
counter = counter + 1;
commandOptions.id = line[lookup.get("accession")];
commandOptions.taxon = Integer.parseInt(line[lookup.get("tax_id")]);
int taxon = Integer.parseInt(line[lookup.get("tax_id")]);
WorkflowGenomeSync workflow = new WorkflowGenomeSync();
// Set everything to default values and overwrite otherwise (constructor does not work?)
......@@ -123,7 +125,7 @@ public class ENA {
workflow.provenance = false;
workflow.setGca(commandOptions.id);
String lineage = getLineage(model, commandOptions.taxon);
String lineage = getLineage(model, taxon);
if (lineage == null) {
continue;
}
......@@ -131,7 +133,7 @@ public class ENA {
lineage = lineage.toLowerCase().replaceAll(" +", "_");
if (lineage.startsWith("bacteria")) {
workflow.codon = tax2codon.get(commandOptions.taxon);
workflow.codon = tax2codon.get(taxon);
workflow.bacteria = true;
// logger.info("Codon table " + workflow.codon + " will be used");
}
......@@ -174,19 +176,17 @@ public class ENA {
private static boolean filter(HashMap<String, Integer> lookup, String[] line) {
if (lookup.containsKey("assembly_title")) {
// Accept all DSM strains
if (line[lookup.get("assembly_title")].toUpperCase().contains(" DSM ")) {
if (line[lookup.get("assembly_title")].toUpperCase().contains("DSM")) {
return true;
}
if (line[lookup.get("assembly_title")].toUpperCase().contains(" ATCC ")) {
if (line[lookup.get("assembly_title")].toUpperCase().contains("ATCC")) {
return true;
}
}
// if (lookup.containsKey("assembly_level")) {
// if (!line[lookup.get("assembly_level")].endsWith("complete genome"))
// return false;
// }
return false;
}
......
......@@ -16,6 +16,7 @@ import org.jermontology.ontology.JERMOntology.domain.Data_sample;
import java.io.File;
import java.io.FileWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
......@@ -49,9 +50,9 @@ public class FASTQC {
// Only paired and that means illumina data can be ran through fastqc
if (files.size() > 0 && files.get(0).getClassTypeIri().endsWith("PairedSequenceDataSet")) {
if (files.get(0).getFileType().getIRI().endsWith("FASTQ")) {
forwardReads.add(files.get(0).getFilePath());
reverseReads.add(files.get(1).getFilePath());
if (files.get(0).getFileFormat().getIRI().endsWith("FASTQ")) {
forwardReads.add(new URL(files.get(0).getContentUrl()).getPath());
reverseReads.add(new URL(files.get(1).getContentUrl()).getPath());
}
for (String filePath : forwardReads) {
......
......@@ -19,7 +19,7 @@ public class HDT {
private static final Logger log = Generic.getLogger(HDT.class, false);
public static void generateHDTWorkflow(CommandOptionsHDT commandOptionsHDT, Connection connection, Set<String> folders) throws Exception {
log.info("Generating HDT workflows for " + folders.size());
DataTransferOperations dataTransferOperationsAO = connection.irodsFileSystem.getIRODSAccessObjectFactory().getDataTransferOperations(connection.irodsAccount);
for (String folder : folders) {
......
......@@ -23,6 +23,7 @@ import org.purl.ppeo.PPEO.owl.domain.observation_unit;
import java.io.File;
import java.io.FileWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
......@@ -61,26 +62,26 @@ public class MetaGenomics {
// Parse each sequence dataset, as we might have overlapping identifiers in the future...
SequenceDataSet sequenceDataSet = (SequenceDataSet) data_sample;
// If file already processed skip...
if (filePaths.contains(sequenceDataSet.getFilePath())) continue;
if (filePaths.contains(new URL(sequenceDataSet.getContentUrl()).getPath())) continue;
if (sequenceDataSet.getSeqPlatform().getIRI().endsWith("Illumina")) {
PairedSequenceDataSet pairedSequenceDataSet = (PairedSequenceDataSet) data_sample;
workflow.addForward_reads(pairedSequenceDataSet.getFilePath());
workflow.addReverse_reads(pairedSequenceDataSet.getPaired().getFilePath());
workflow.addForward_reads(new URL(pairedSequenceDataSet.getContentUrl()).getPath());
workflow.addReverse_reads(new URL(pairedSequenceDataSet.getPaired().getContentUrl()).getPath());
// Adding to already processed files due to linked paired dataset
filePaths.add(pairedSequenceDataSet.getFilePath());
filePaths.add(pairedSequenceDataSet.getPaired().getFilePath());
filePaths.add(new URL(pairedSequenceDataSet.getContentUrl()).getPath());
filePaths.add(new URL(pairedSequenceDataSet.getPaired().getContentUrl()).getPath());
} else if (sequenceDataSet.getSeqPlatform().getIRI().endsWith("PacBio")) {
if (sequenceDataSet.getFileType().getIRI().endsWith("FASTQ")) {
workflow.addPacbio(sequenceDataSet.getFilePath());
if (sequenceDataSet.getFileFormat().getIRI().endsWith("FASTQ")) {
workflow.addPacbio(new URL(sequenceDataSet.getContentUrl()).getPath());
} else {
log.error("File type for " + sequenceDataSet.getFileName() + " not supported");
log.error("File type for " + sequenceDataSet.getName() + " not supported");
}
} else if (sequenceDataSet.getSeqPlatform().getIRI().endsWith("NanoPore")) {
log.error("NANOPORE NOT YET DONE");
// workflow.addP(assay.getAllFile().get(0).getFilePath());
}
filePaths.add(sequenceDataSet.getFilePath());
filePaths.add(new URL(sequenceDataSet.getContentUrl()).getPath());
}
}
}
......
......@@ -30,6 +30,7 @@ import org.jermontology.ontology.JERMOntology.domain.Data_sample;
import java.io.File;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.net.URL;
import java.util.*;
import java.util.concurrent.TimeUnit;
......@@ -105,7 +106,7 @@ public class NGTax {
continue;
}
// IN PROGRESS... mr..DNA -.-
workflow.addForward_reads(files.get(0).getFilePath());
workflow.addForward_reads(new URL(files.get(0).getContentUrl()).getPath());
SequenceDataSet sequenceDataSet = (SequenceDataSet) ampliconAssay.getAllFile().get(0);
// Evaluate if the set read length matches / smaller than the raw read length
......@@ -125,8 +126,8 @@ public class NGTax {
log.debug("Paired data detected");
workflow.setReverse_primer(ampliconAssay.getReversePrimer());
workflow.addForward_reads(files.get(0).getFilePath());
workflow.addReverse_reads(files.get(1).getFilePath());
workflow.addForward_reads(new URL(files.get(0).getContentUrl()).getPath());
workflow.addReverse_reads(new URL(files.get(1).getContentUrl()).getPath());
PairedSequenceDataSet sequenceDataSet1 = (PairedSequenceDataSet) ampliconAssay.getAllFile().get(0);
PairedSequenceDataSet sequenceDataSet2 = sequenceDataSet1.getPaired();
......@@ -159,7 +160,7 @@ public class NGTax {
}
} else {
files.forEach(file -> {
System.err.println(file.getFileName());
System.err.println(file.getName());
System.err.println(file.getResource().getURI());
});
throw new Exception("More than 2 files detected...");
......@@ -224,8 +225,8 @@ public class NGTax {
// Find amplicon library
String path = "/" + connection.irodsAccount.getZone() + "/landingzone/ampliconlibraries";
HashSet<String> libraryFwdFile = findFile(connection, ampliconLibraryAssay.getAllFile().get(0).getFileName(), path);
HashSet<String> libraryRevFile = findFile(connection, ampliconLibraryAssay.getAllFile().get(1).getFileName(), path);
HashSet<String> libraryFwdFile = findFile(connection, ampliconLibraryAssay.getAllFile().get(0).getName(), path);
HashSet<String> libraryRevFile = findFile(connection, ampliconLibraryAssay.getAllFile().get(1).getName(), path);
if (libraryFwdFile.size() == 0)
continue;
......@@ -326,7 +327,7 @@ public class NGTax {
String revBarcode = ampliconAssay.getReverseBarcode();
String libraryNumber = "1";
String direction = "p";
String libraryName = ampliconAssay.getLibrary().getAllFile().get(0).getFileName();
String libraryName = ampliconAssay.getLibrary().getAllFile().get(0).getName();
printWriter.println(sampleID + "\t" + fwdBarcode + "\t" + revBarcode + "\t" + libraryNumber + "\t" + direction + "\t" + libraryName);
}
printWriter.close();
......
......@@ -16,6 +16,7 @@ import org.jermontology.ontology.JERMOntology.domain.Data_sample;
import java.io.File;
import java.io.FileWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
......@@ -65,8 +66,8 @@ public class RNASeq {
} else if (files.size() == 2) {
log.info("Paired data detected");
workflow.addForward_reads(files.get(0).getFilePath());
workflow.addReverse_reads(files.get(1).getFilePath());
workflow.addForward_reads(new URL(files.get(0).getContentUrl()).getPath());
workflow.addReverse_reads(new URL(files.get(1).getContentUrl()).getPath());
// SequenceDataSet sequenceDataSet = (SequenceDataSet) rnaSeqAssay.getAllFile().get(0);
// if (commandOptions.read_len == 0 && sequenceDataSet.getReadLength() > 0) {
......
......@@ -21,6 +21,7 @@ import org.purl.ppeo.PPEO.owl.domain.observation_unit;
import java.io.File;
import java.io.FileWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
......@@ -57,26 +58,26 @@ public class SPADES {
// Parse each sequence dataset, as we might have overlapping identifiers in the future...
SequenceDataSet sequenceDataSet = (SequenceDataSet) data_sample;
// If file already processed skip...
if (filePaths.contains(sequenceDataSet.getFilePath())) continue;
if (filePaths.contains(new URL(sequenceDataSet.getContentUrl()).getPath())) continue;
if (sequenceDataSet.getSeqPlatform().getIRI().endsWith("Illumina")) {
PairedSequenceDataSet pairedSequenceDataSet = (PairedSequenceDataSet) assay.getAllFile().get(0);
workflow.addForward_reads(pairedSequenceDataSet.getFilePath());
workflow.addReverse_reads(pairedSequenceDataSet.getPaired().getFilePath());
workflow.addForward_reads(new URL(pairedSequenceDataSet.getContentUrl()).getPath());
workflow.addReverse_reads(new URL(pairedSequenceDataSet.getPaired().getContentUrl()).getPath());
// Adding to already processed files due to linked paired dataset
filePaths.add(pairedSequenceDataSet.getFilePath());
filePaths.add(pairedSequenceDataSet.getPaired().getFilePath());
filePaths.add(new URL(pairedSequenceDataSet.getContentUrl()).getPath());
filePaths.add(new URL(pairedSequenceDataSet.getPaired().getContentUrl()).getPath());
} else if (sequenceDataSet.getSeqPlatform().getIRI().endsWith("PacBio")) {
if (sequenceDataSet.getFileType().getIRI().endsWith("FASTQ")) {
workflow.addPacbio(sequenceDataSet.getFilePath());
if (sequenceDataSet.getFileFormat().getIRI().endsWith("FASTQ")) {
workflow.addPacbio(new URL(sequenceDataSet.getContentUrl()).getPath());
} else {
log.error("File type for " + sequenceDataSet.getFileName() + " not supported");
log.error("File type for " + sequenceDataSet.getName() + " not supported");
}
} else if (sequenceDataSet.getSeqPlatform().getIRI().endsWith("NanoPore")) {
log.error("NANOPORE NOT YET DONE");
// workflow.addP(assay.getAllFile().get(0).getFilePath());
}
filePaths.add(sequenceDataSet.getFilePath());
filePaths.add(new URL(sequenceDataSet.getContentUrl()).getPath());
}
}
}
......
......@@ -88,19 +88,23 @@ public class Yaml {
NGTax.generateDemultiplexWorkflow(commandOptionsDemultiplexing, connection, domain);
}
} else if (commandOptions.cwl.contains("irods_hdt.cwl")) {
log.info("Generating HDT workflow files");
CommandOptionsHDT commandOptionsHDT = new CommandOptionsHDT(args);
Set<String> folders = Search.getOUFromiRODS(connection, commandOptionsHDT);
String search = Search.makePath(commandOptionsHDT.project, commandOptionsHDT.investigation, commandOptionsHDT.study, commandOptionsHDT.observationUnit, commandOptionsHDT.assay, connection);
Set<String> hdts = Search.getAllHDT(search, connection);
Set<String> analysis = new HashSet<>();
for (String folder : folders) {
// System.err.println(folder);
if (!hdts.contains(folder + "/hdt")) {
analysis.add(folder);
// overwrite check
if (commandOptions.overwrite) {
analysis.addAll(folders);
} else {
for (String folder : folders) {
if (!hdts.contains(folder + "/hdt")) {
analysis.add(folder);
}
}
}
// generate the workflows
HDT.generateHDTWorkflow(commandOptionsHDT, connection, analysis);
} else {
......
......@@ -29,6 +29,8 @@ public class AppTest {
private String[] ngtaxReadLength = {"100", "70", "120"}; //{"70", "100", "120"};
private String minimumThreshold = "0.001";
private String referenceDB = "/unlock/references/databases/Silva/SILVA_138_SSURef_tax_silva.fasta.gz";
private String study = "%";
private String assay = "%";
// Performing the quality workflow test
@Test
......@@ -48,19 +50,25 @@ public class AppTest {
public void testNGTAX() throws InterruptedException {
while (true) {
try {
referenceDB = "/unlock/references/databases/Silva/SILVA_138_SSURef_tax_silva.fasta.gz";
study = "S_MockOnly";
// assay = "A_Amplicon_1430_MOCK3_2017_0004";
for (String readLength : ngtaxReadLength) {
String[] args = {
"-cwl", "workflow_ngtax.cwl",
"-id", "AmpliconAnalysis_NGTAX",
"-id", "AmpliconAnalysis_NGTAX_Silva138",
"-project", project,
"-investigation", investigation,
"-study", study,
"-assay", assay,
"-length", readLength,
"-referenceDB", referenceDB,
"-memory", "6000" //,
"-overwrite",
"-memory", "6000"
};
App.main(args);
// App.main(args);
}
// runKubernetes();
runKubernetes();
return;
} catch (Exception e) {
e.printStackTrace();
......@@ -97,18 +105,24 @@ public class AppTest {
}
// Performing the Assembly workflow test
// Performing the HDT workflow
@Test
public void testHDT() throws Exception {
project = "P_MIB-Amplicon";
investigation = "I_Poultry_16S_MIB";
study = "S_MockOnly";
String[] args = {
"-cwl", "/unlock/infrastructure/cwl/irods/irods_hdt.cwl",
"-id", "hdt_creation",
"-project", "P_MIB-Amplicon",
"-investigation", "%", //investigation,
"-project", project,
"-investigation", investigation,
"-study", study,
"-memory", "6000",
"-threads", "2"
}; // "-yaml","",
// App.main(args);
"-overwrite",
"-threads", "2" //,
};
App.main(args);
runKubernetes();
}
......@@ -164,7 +178,7 @@ public class AppTest {
String[] args = {
"-cwl", "workflow_ena_annotation.cwl",
"-id", "ID",
"-taxon", "0",
"-accession", "./src/test/resources/accessionFile.txt",
"-threads", "5",
"-memory", "10000",
};
......@@ -189,10 +203,10 @@ public class AppTest {
String[] args = {
"-kubernetes",
"-project", project,
// "-investigation", investigation,
// "-study", "S_PREDIMED_Microbiota",
"-investigation", investigation,
"-study", study,
// "-observationUnit", "O_Mock%",
"-limit", "300",
"-limit", "150",
"-reset"
// "-priority", "1000"
};
......
accession tax_id
GCA_000007565.2 160488
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment