Commit ad3fe9e7 authored by Koehorst, Jasper's avatar Koehorst, Jasper
Browse files

code sync

parent 5daf99b6
Pipeline #20947 passed with stage
in 1 minute and 12 seconds
......@@ -14,6 +14,8 @@ import nl.munlock.yaml.Yaml;
import nl.wur.ssb.RDFSimpleCon.ResultLine;
import nl.wur.ssb.RDFSimpleCon.api.Domain;
import org.apache.commons.io.IOUtils;
import org.apache.jena.ext.com.google.common.io.Files;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.log4j.Logger;
import org.irods.jargon.core.exception.JargonException;
import org.irods.jargon.core.pub.DataObjectAO;
......@@ -42,16 +44,19 @@ public class Search {
*/
public static ArrayList<Assay> getAssaysFromRDF(CommandOptionsIRODS commandOptions, Connection connection) throws Exception {
log.info("Obtaining assay files");
Domain domain = new Domain("");
File domainDirectory = Files.createTempDir();
Domain domain = new Domain("file://"+domainDirectory);
// Get unprocessed files
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
// Obtain all TTL files in /Unprocessed folder
String folderQuery = makePath(commandOptions.project, commandOptions.investigation, commandOptions.study, commandOptions.observationUnit, commandOptions.assay, connection) + "%";
String folderQuery = makePath(commandOptions.project, commandOptions.investigation, commandOptions.study, commandOptions.observationUnit, commandOptions.assay, connection);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, folderQuery);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.NOT_LIKE, "%PROVENANCE%");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.NOT_LIKE, "%/Step_%");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_META_COLL_ATTR_NAME, QueryConditionOperators.LIKE, "type");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_META_COLL_ATTR_VALUE, QueryConditionOperators.LIKE, "Assay");
// queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.NOT_LIKE, "%PROVENANCE%");
// queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.NOT_LIKE, "%/Step_%");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%ttl");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.NOT_LIKE, ".%");
......@@ -104,9 +109,10 @@ public class Search {
}
if (!new File(path).isHidden()) {
Domain domainTemp = new Domain("file://" + "." + path);
domain.getRDFSimpleCon().getModel().add(domainTemp.getRDFSimpleCon().getModel());
domainTemp.close();
// Domain domainTemp = new Domain("file://" + "." + path);
RDFDataMgr.read(domain.getRDFSimpleCon().getModel(), "."+path);
// domain.getRDFSimpleCon().getModel().add(domainTemp.getRDFSimpleCon().getModel());
// domainTemp.close();
}
}
......@@ -138,30 +144,33 @@ public class Search {
}
public static String makePath(String project, String investigation, String study, String observationUnit, String assay, Connection connection) {
String folderQuery = "/" + connection.irodsAccount.getZone() + "/projects/" + project;
String folderQuery = "/" + connection.irodsAccount.getZone() + "/projects";
if (project != null) {
folderQuery += "/" + project;
}
if (investigation != null) {
folderQuery += "/" + investigation;
} else {
folderQuery += "%";
folderQuery += "/%";
}
if (study != null) {
folderQuery += "/" + study;
} else {
folderQuery += "%";
folderQuery += "/%";
}
if (observationUnit != null) {
folderQuery += "/" + observationUnit;
} else {
folderQuery += "%";
folderQuery += "/%";
}
if (assay != null) {
folderQuery += "/" + assay;
} else {
folderQuery += "%";
folderQuery += "/%";
}
// Replace multiple %%% by one %
folderQuery += "%";
folderQuery = folderQuery.replaceAll("%+","%");
// folderQuery += "/%";
folderQuery = folderQuery.replaceAll("(/%)+", "/%").replaceAll("%+","%");
return folderQuery;
}
......@@ -373,13 +382,13 @@ public class Search {
// When not in processed, will do a final check again to see if it exists if not assign to kube
if (yaml.endsWith("hdt_creation.yaml")) {
// log.info("HDT job detected and forced to be executed due to code being disabled");
boolean hdtPresent = false;
// boolean hdtPresent = false;
String hdtCheck = yaml.replaceAll("/hdt_creation.yaml","");
if (hdts.contains(hdtCheck)) {
hdtPresent = true;
// If folder exists and reset is false we can skip it
if (hdts.contains(hdtCheck) && !commandOptionsKubernetes.reset) {
// hdtPresent = true;
fixAVU(connection, yaml);
}
if (!hdtPresent) {
} else {
log.info("Processing " + line);
Kubernetes.yamls.add(yaml);
log.info("Starting kubernetes");
......@@ -584,9 +593,9 @@ public class Search {
public static Set<String> getOUFromiRODS(Connection connection, CommandOptionsHDT commandOptionsHDT) throws GenQueryBuilderException, JargonException, JargonQueryException {
// Obtains OU folders...
String folderQuery = makePath(commandOptionsHDT.project, commandOptionsHDT.investigation, commandOptionsHDT.study, commandOptionsHDT.observationUnit, commandOptionsHDT.assay, connection);
String folderQuery = makePath(commandOptionsHDT.project, commandOptionsHDT.investigation, commandOptionsHDT.study, null, null, connection);
log.info("Searching in " + folderQuery);
log.info("Searching for OU in " + folderQuery);
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
......@@ -608,7 +617,13 @@ public class Search {
Set<String> folders = new HashSet<>();
for (IRODSQueryResultRow irodsQueryResultSetResult : irodsQueryResultSetResults) {
folders.add(irodsQueryResultSetResult.getColumn(0));
String path = irodsQueryResultSetResult.getColumn(0);
if (commandOptionsHDT.observationUnit != null) {
if (path.contains(commandOptionsHDT.observationUnit))
folders.add(path);
} else {
folders.add(path);
}
}
log.info("Detected " + folders.size());
return folders;
......
......@@ -114,7 +114,8 @@ public class Kubernetes {
Search.getAllUnprocessedReferences(commandOptionsKubernetes, connection, path);
Kubernetes.createJobs(commandOptionsKubernetes, connection);
} else if (commandOptionsKubernetes.project.contains("ampliconlibraries")) {
String path ="/" + commandOptionsKubernetes.zone + "/landingzone%";
String path = Search.makePath(commandOptionsKubernetes.project, commandOptionsKubernetes.investigation, commandOptionsKubernetes.study, commandOptionsKubernetes.observationUnit, commandOptionsKubernetes.assay, connection);
if (commandOptionsKubernetes.reset) {
resetFailures(connection, path);
}
......
......@@ -18,7 +18,6 @@ public class Workflow {
return threads;
}
public void setMemory(int threads) {
this.memory = threads;
}
......
......@@ -18,6 +18,7 @@ public class WorkflowMetagenomics extends Workflow {
public ArrayList<FileClass> reverse_reads = new ArrayList<>();
public ArrayList<FileClass> pacbio_reads = new ArrayList<>();
public String kraken_database; // = new ArrayList<>();
public String bbmap_reference;
public boolean isolate;
public boolean metagenomics;
public Map<String, String> clazz;
......@@ -46,7 +47,6 @@ public class WorkflowMetagenomics extends Workflow {
this.forward_reads.add(fileClass);
}
public boolean isIsolate() {
return isolate;
}
......@@ -98,4 +98,12 @@ public class WorkflowMetagenomics extends Workflow {
// fileClass.setLocation(kraken_database);
// this.kraken_database.add(fileClass);
}
public String getBbmap_reference() {
return bbmap_reference;
}
public void setBbmap_reference(String bbmap_reference) {
this.bbmap_reference = bbmap_reference;
}
}
package nl.munlock.options.irods;
import com.beust.jcommander.Parameter;
import org.apache.log4j.helpers.BoundedFIFO;
public class CommandOptionsIRODS {
......
......@@ -11,6 +11,12 @@ public class CommandOptionsMetagenomics extends CommandOptionsYAML {
@Parameter(names = {"-level"}, description = "Level of analysis (OU or Assay)", required = true)
public String level = "OU";
@Parameter(names = {"-kraken"}, description = "Path to the kraken database")
public String kraken = "/unlock/references/databases/KRAKEN2_STANDARD_20200724";
@Parameter(names = {"-contamination"}, description = "Path to the contamination file (e.g. human genome)")
public String bbmap = "/unlock/references/databases/bbduk/GCA_000001405.28_GRCh38.p13_genomic.fna.gz";
public CommandOptionsMetagenomics(String args[]) {
try {
JCommander jc = new JCommander(this);
......
......@@ -26,7 +26,7 @@ public class CommandOptionsYAML extends CommandOptionsIRODS {
public boolean overwrite = false;
@Parameter(names = {"-threads"}, description = "Number of threads to use")
public float threads = 2;
public int threads = 2;
@Parameter(names = {"-memory"}, description = "Amount of memory needed (in megabytes)")
public int memory = 5000;
......
......@@ -40,7 +40,6 @@ public class HDT {
log.info("Uploading " + new File(yamlName) + " to " + destFile);
// System.err.println(folder);
IRODSFile irodsFile = connection.fileFactory.instanceIRODSFile(folder + "/hdt/");
// Prepare directory for YAML file
irodsFile.mkdirs();
......@@ -65,6 +64,9 @@ public class HDT {
DataObjectAO dataObjectAO = connection.irodsFileSystem.getIRODSAccessObjectFactory().getDataObjectAO(connection.irodsAccount);
AvuData avuMetaData = new AvuData("cwl", commandOptionsHDT.cwl, "waiting");
dataObjectAO.setAVUMetadata(destFile.getAbsolutePath(), avuMetaData);
// Start kubernetes?
}
}
}
......@@ -86,7 +86,8 @@ public class MetaGenomics {
}
workflow.setDestination(destination);
workflow.setKraken_database("/unlock/references/databases/KRAKEN2_STANDARD_20200724");
workflow.setKraken_database(commandOptions.kraken);
workflow.setBbmap_reference(commandOptions.bbmap);
if (workflow.getForward_reads().size() == 0 || workflow.getReverse_reads().size() == 0) {
log.warn("No illumina paired reads detected, cannot run SPADES");
......
......@@ -52,6 +52,7 @@ public class NGTax {
log.info("Class type of " + assay.getClassTypeIri().replaceAll(".*/", "") + " not matching AmpliconAssay");
continue;
} else {
log.info(assay.getResource().getURI());
log.info(assay.getIdentifier());
log.info("Analysing " + assay.getLogicalPath());
}
......@@ -373,7 +374,8 @@ public class NGTax {
if (path.startsWith(supposedPath)) {
paths.add(path);
} else {
System.err.println(path + " not in " + supposedPath);
System.err.println(path + " not in " + supposedPath + " but still added for processing");
paths.add(path);
}
}
......
......@@ -56,7 +56,7 @@ public class Yaml {
for (Domain domain : domains) {
SPADES.generateSpadesWorkflow(commandOptionsSpades, connection, domain);
}
} else if (commandOptions.cwl.matches("workflow_metagenomics.cwl") || commandOptions.cwl.matches("workflow_metagenomics_kraken.cwl")) {
} else if (commandOptions.cwl.matches("workflow_metagenomics.cwl") || commandOptions.cwl.matches("workflow_metagenomics_read_annotation.cwl")) {
log.info("Running metagenomics workflow");
ArrayList<Domain> domains = Search.getProjectFromRDF(commandOptions, connection);
CommandOptionsMetagenomics commandOptionsMetagenomics = new CommandOptionsMetagenomics(args);
......@@ -82,7 +82,7 @@ public class Yaml {
throw new Exception("Investigation is obligatory due to demultiplexing complexity");
}
log.info("Demultiplexing for " + commandOptionsDemultiplexing.project + " " + commandOptionsDemultiplexing.investigation);
String folderQuery = "/" + connection.irodsAccount.getZone() + "/landingzone/projects/" + commandOptions.project;
String folderQuery = "/" + connection.irodsAccount.getZone() + "/landingzone/projects/" + commandOptions.project + "/" + commandOptionsDemultiplexing.investigation;
ArrayList<Domain> domains = Search.getProjectFromRDF(connection, folderQuery, commandOptionsDemultiplexing.investigation);
for (Domain domain : domains) {
NGTax.generateDemultiplexWorkflow(commandOptionsDemultiplexing, connection, domain);
......@@ -91,6 +91,7 @@ public class Yaml {
log.info("Generating HDT workflow files");
CommandOptionsHDT commandOptionsHDT = new CommandOptionsHDT(args);
Set<String> folders = Search.getOUFromiRODS(connection, commandOptionsHDT);
log.info("Found " + folders.size() + " observational units");
String search = Search.makePath(commandOptionsHDT.project, commandOptionsHDT.investigation, commandOptionsHDT.study, commandOptionsHDT.observationUnit, commandOptionsHDT.assay, connection);
Set<String> hdts = Search.getAllHDT(search, connection);
Set<String> analysis = new HashSet<>();
......
......@@ -14,11 +14,9 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.lang.reflect.Array;
import java.lang.reflect.Field;
import java.util.Arrays;
import java.util.Map;
import java.util.Scanner;
import java.util.Timer;
import java.util.*;
import java.util.concurrent.TimeUnit;
import static nl.munlock.kubernetes.Kubernetes.checkKubernetesJobs;
......@@ -26,11 +24,15 @@ import static nl.munlock.kubernetes.Kubernetes.checkKubernetesJobs;
public class AppTest {
private String project = "P_MIB-Amplicon"; // "P_MIB-Amplicon"; // "P_FIRM-Project";// "P_E2BN"; // ""P_EXPLODIV"; // "P_FIRM-Project"; // "P_SIAM"; // "P_NWO_THP"; // "P_UNLOCK";
private String investigation = "I_Poultry_16S_MIB"; // "I_SRA_Amplicon"; // "I_Mocks"; // "I_BIOGAS"; // "I_FIRM-Broilers"; // I_InvestigationID"; // "I_UNCOUPLED"; // "I_CAMI"; // "I_UNCOUPLED";
private String[] ngtaxReadLength = {"70", "100"}; //"100", "70", "120"}; //{"70", "100", "120"};
private String[] ngtaxReadLength = {"100"}; //"100", "70", "120"}; //{"70", "100", "120"};
private String minimumThreshold = "0.001";
private String referenceDB = "/unlock/references/databases/Silva/SILVA_138_SSURef_tax_silva.fasta.gz";
private String study = "%";
private String observationUnit = "%";
private String assay = "%";
private String limit = "150";
private String reset = "";
// Performing the quality workflow test
@Test
......@@ -49,33 +51,35 @@ public class AppTest {
@Test
public void testNGTAX() throws InterruptedException {
while (true) {
// String[] studies = {"S_Mocks_3_4"};
try {
String[] studies = {"S_Cargill_study_Hugo", "S_Carus_study", "S_Chicken_ceca_Hellen", "S_DIMI_CHICKen", "S_Emily_Frehen", "S_EndLESS", "S_FIRM", "S_FIRM2", "S_FIRM3", "S_hiseq_jerine", "S_Jerine", "S_jerine_fmt", "S_MockOnly"};
for (String study : studies) {
// for (String study : studies) {
referenceDB = "/unlock/references/databases/Silva/SILVA_138.1_SSURef_tax_silva.fasta.gz";
project = "P_MIB-Amplicon";
investigation = "I_Poultry_16S_MIB";
project = "P_E2BN";
investigation = "%";
// study = "%";
assay = "%";
// assay = "%";
limit = "150";
for (String readLength : ngtaxReadLength) {
String[] args = {
"-cwl", "workflow_ngtax.cwl",
"-id", "AmpliconAnalysis_NGTAX_Silva138.1",
"-wid", "AmpliconAnalysis_NGTAX_Silva138.1",
"-project", project,
"-investigation", investigation,
"-study", study,
"-assay", assay,
"-length", readLength,
"-referenceDB", referenceDB,
"-overwrite",
// "-overwrite",
"-memory", "6000",
"-threads", "1.5"
"-threads", "2"
};
// App.main(args);
}
App.main(args);
// }
// }
runKubernetes();
return;
}
return;
} catch (Exception e) {
e.printStackTrace();
TimeUnit.MINUTES.sleep(5);
......@@ -86,24 +90,25 @@ public class AppTest {
// Performing the NGTAX demultiplex test
@Test
public void testNGTAXDemultiplexing() throws Exception {
project = "P_MIB-Amplicon";
investigation = "I_Poultry_16S_MIB";
project = "P_E2BN";
investigation = "I_PRED";
String[] args = {
"-cwl", "workflow_demultiplexing.cwl",
"-id", "Demultiplexing_NGTAX",
"-wid", "Demultiplexing_NGTAX",
"-project", project,
"-investigation", investigation,
"-memory", "6000"
};
// App.main(args);
App.main(args);
project = "ampliconlibraries";
String[] args2 = {
"-kubernetes",
"-project", project,
"-investigation", investigation,
"-limit", "100",
"-reset",
"-delay", "30",
"-delay", "0",
// "-debug",
"-disableProv"
};
......@@ -114,26 +119,33 @@ public class AppTest {
// Performing the HDT workflow
@Test
public void testHDT() throws Exception {
// project = "P_MIB-Amplicon";
// investigation = "I_Poultry_16S_MIB";
// study = "S_MockOnly";
// project = "P_MIB-Amplicon"; // P_E2BN
// project = "P_E2BN";
// investigation = "I_PRED";
// investigation = "I_Mocks";
// investigation = "I_Poultry_16S_MIB";
// study = "%"; // "S_PREDIMED_Microbiota"; // "S_MockOnly";
// observationUnit = "Mock";
project = "P_E2BN";
investigation = "I_PRED";
// study = "S_PREDIMED_Microbiota";
project = "P_MIB-Amplicon";
investigation = "I_Mocks";
study = "S_Mocks_3_4";
// project = "P_MIB-Amplicon";
// investigation = "I_Mocks";
// study = "S_Mocks_3_4";
String[] args = {
"-cwl", "/unlock/infrastructure/cwl/irods/irods_hdt.cwl",
"-id", "hdt_creation",
"-wid", "hdt_creation",
"-project", project,
"-investigation", investigation,
"-study", study,
"-memory", "200000", // "6000",
// "-study", study,
// "-observationUnit", observationUnit,
"-memory", "6000", // "200000", // "6000",
"-overwrite",
"-threads", "2" //,
};
App.main(args);
// Ensure hdt overwrite
reset = "-reset";
observationUnit = "%";
runKubernetes();
}
......@@ -146,7 +158,7 @@ public class AppTest {
"-project", project,
"-investigation", investigation,
"-memory", "100000",
"-threads", "30" }; // "-yaml","",
"-threads", "30"}; // "-yaml","",
App.main(args);
// runKubernetes();
}
......@@ -156,17 +168,21 @@ public class AppTest {
public void testMetaGenomics() throws Exception {
// pproject = "P_FIRM-Project";
// investigation = "I_FIRM-Broilers";
project = "P_UNLOCK";
investigation = "I_INVESTIGATION_TEST";
// project = "P_UNLOCK";
// investigation = "I_INVESTIGATION_TEST";
// study = "S_StudyID_1";
project = "P_Deltares";
investigation = "I_vitens";
String[] args = {
"-cwl", "workflow_metagenomics.cwl", // "workflow_metagenomics_kraken.cwl",
"-wid", "metagenomics",
"-cwl", "workflow_metagenomics_read_annotation.cwl", // "workflow_metagenomics.cwl",
"-wid", "metagenomics-read-annotation-samsa2",
"-project", project,
"-investigation", investigation,
// "-study", study,
"-memory", "50000", // "250000"
"-threads", "20",
"-memory", "350000", // "250000"
"-threads", "60",
"-level", "Assay"
};
App.main(args);
......@@ -180,7 +196,7 @@ public class AppTest {
"-cwl", "workflow_indexer.cwl",
"-project", project,
"-investigation", investigation,
"......" }; //, "-id", "Spades_test_run_new_yaml", "-project", "P_NWO_unlock_test", "-memory", "100000", "-threads", "30"}; // "-yaml","",
"......"}; //, "-id", "Spades_test_run_new_yaml", "-project", "P_NWO_unlock_test", "-memory", "100000", "-threads", "30"}; // "-yaml","",
App.main(args);
// runKubernetes();
}
......@@ -198,7 +214,7 @@ public class AppTest {
"-threads", "5",
"-memory", "10000",
};
App.main(args);
// App.main(args);
try {
String[] argsK = {
"-kubernetes",
......@@ -221,17 +237,20 @@ public class AppTest {
"-project", project,
"-investigation", investigation,
"-study", study,
// "-observationUnit", "O_Mock%",
"-limit", "150",
"-reset"
"-observationUnit", observationUnit,
"-limit", limit,
reset
// "-priority", "1000"
};
App.main(args);
// Removing empty elements
List<String> list = new ArrayList<>(Arrays.asList(args));
list.removeAll(Arrays.asList("", null));
App.main(list.toArray(new String[0]));
}
@Test
public void startManager() throws Exception {
String[] args = { "-manager" };
String[] args = {"-manager"};
App.main(args);
}
......@@ -245,6 +264,7 @@ public class AppTest {
};
App.main(args);
}
@Test
public void runReferences() throws Exception {
String[] args = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment