Commit 0ed48686 authored by Koehorst, Jasper's avatar Koehorst, Jasper
Browse files

sync

parent ded0b89e
Pipeline #16547 canceled with stage
......@@ -15,7 +15,10 @@ import nl.wur.ssb.RDFSimpleCon.api.Domain;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.irods.jargon.core.exception.JargonException;
import org.irods.jargon.core.pub.DataObjectAO;
import org.irods.jargon.core.pub.IRODSGenQueryExecutor;
import org.irods.jargon.core.pub.domain.AvuData;
import org.irods.jargon.core.pub.domain.DataObject;
import org.irods.jargon.core.pub.io.IRODSFileInputStream;
import org.irods.jargon.core.query.*;
import org.jermontology.ontology.JERMOntology.domain.Assay;
......@@ -138,23 +141,23 @@ public class Search {
} else {
folderQuery += "%";
}
if (commandOptions.study.length() > 1) {
if (commandOptions.study != null) {
folderQuery += "/" + commandOptions.study;
} else {
folderQuery += "%";
}
if (commandOptions.observationUnit.length() > 1) {
if (commandOptions.observationUnit != null) {
folderQuery += "/" + commandOptions.observationUnit;
} else {
folderQuery += "%";
}
if (commandOptions.assay.length() > 1) {
if (commandOptions.assay != null) {
folderQuery += "/" + commandOptions.assay;
} else {
folderQuery += "%";
}
// Replace multiple %%% by one %
folderQuery += "%";
folderQuery = folderQuery.replaceAll("%+","%");
return folderQuery;
}
......@@ -211,16 +214,9 @@ public class Search {
// }
// }
public static void getAllUnprocessedReferences(CommandOptionsKubernetes commandOptionsKubernetes, Connection connection) throws GenQueryBuilderException, JargonException, JargonQueryException, IOException, InterruptedException, ApiException {
// get All processed folders
// Set<String> processed = getAllProcessed(commandOptionsKubernetes, connection);
public static void getAllUnprocessedReferences(CommandOptionsKubernetes commandOptionsKubernetes, Connection connection, String search) throws GenQueryBuilderException, JargonException, JargonQueryException, IOException, InterruptedException, ApiException {
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
// Obtains unprocessed folders...
String search = "/" + commandOptionsKubernetes.zone + "/references/genomes/bacteria%";
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, search);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%.yaml");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_META_DATA_ATTR_NAME, QueryConditionOperators.LIKE, "cwl");
......@@ -252,14 +248,19 @@ public class Search {
// log.info("Analysing " + line);
String destination = line.split(" ")[1] + "/" + new File(yaml).getName().replaceAll(".yaml",".ttl.gz");
// When not in processed, will do a final check again to see if it exists if not assign to kube
if (!connection.fileFactory.instanceIRODSFile(destination).exists()) {
String path = irodsQueryResultSetResult.getColumn(0) + "/" + irodsQueryResultSetResult.getColumn(1);
String path = irodsQueryResultSetResult.getColumn(0) + "/" + irodsQueryResultSetResult.getColumn(1);
if (search.contains("ampliconlibraries")) {
destination = line.split(" ")[1];
if (!connection.fileFactory.instanceIRODSFile(destination).exists()) {
Kubernetes.yamls.add(path);
log.info("Processing " + index + " of " + irodsQueryResultSetResults.size() + " " + destination);
}
} else if (!connection.fileFactory.instanceIRODSFile(destination).exists()) {
Kubernetes.yamls.add(path);
log.debug("Processing " + index + " of " + irodsQueryResultSetResults.size() + " " + destination);
} else {
fixAVU(connection, yaml);
}
// else {
// log.info("Skipping analysis of " + new File(yaml).getName() + " as destination file already exists, need to change metadata value");
// }
break;
}
if (Kubernetes.yamls.size() > 0) {
......@@ -281,7 +282,22 @@ public class Search {
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
// Obtains unprocessed folders...
String search = "/" + commandOptionsKubernetes.zone + "/projects/" + commandOptionsKubernetes.project + "/%";
String search = "/" + commandOptionsKubernetes.zone + "/projects/" + commandOptionsKubernetes.project + "%";
if (commandOptionsKubernetes.investigation != null) {
search = search + "/" + commandOptionsKubernetes.investigation;
}
if (commandOptionsKubernetes.study != null) {
search = search + "/" + commandOptionsKubernetes.study;
}
if (commandOptionsKubernetes.observationUnit != null) {
search = search + "/" + commandOptionsKubernetes.observationUnit;
}
if (commandOptionsKubernetes.assay != null) {
search = search + "/%" + commandOptionsKubernetes.assay;
}
search += "%";
log.info("Searching in " + search);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, search);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%.yaml");
......@@ -299,7 +315,12 @@ public class Search {
List<IRODSQueryResultRow> irodsQueryResultSetResults = irodsQueryResultSet.getResults();
int counter = 0;
for (IRODSQueryResultRow irodsQueryResultSetResult : irodsQueryResultSetResults) {
counter = counter + 1;
if (counter % 100 == 0) {
log.info("Parsed " + counter + " jobs");
}
// Yaml file... obtain, parse, check destination... if already exists don't do it?
String yaml = irodsQueryResultSetResult.getColumn(0) + "/" + irodsQueryResultSetResult.getColumn(1);
Generic.downloadFile(connection, new File(yaml));
......@@ -325,7 +346,7 @@ public class Search {
Kubernetes.yamls = new HashSet<>();
}
} else {
log.debug("Skipping analysis of " + new File(yaml).getName() + " as destination folder already exists, need to change metadata value");
fixAVU(connection, yaml);
}
break;
}
......@@ -335,6 +356,29 @@ public class Search {
}
}
private static void fixAVU(Connection connection, String yaml) throws JargonException {
log.info("Skipping analysis of " + new File(yaml).getName() + " as destination folder already exists");
DataObjectAO dataObjectAO = connection.irodsFileSystem.getIRODSAccessObjectFactory().getDataObjectAO(connection.irodsAccount);
List<MetaDataAndDomainData> metaDataAndDomainDataList = dataObjectAO.findMetadataValuesForDataObject(yaml);
AvuData avu = null;
for (MetaDataAndDomainData metaDataAndDomainData : metaDataAndDomainDataList) {
avu = metaDataAndDomainData.asAvu();
if (metaDataAndDomainData.getAvuAttribute().contains("cwl") &&
metaDataAndDomainData.getAvuUnit().contains("waiting")) {
break;
}
}
if (avu != null) {
log.info("Updating metadata field");
dataObjectAO.deleteAVUMetadata(yaml, avu);
avu.setUnit("finished");
dataObjectAO.addAVUMetadata(yaml, avu);
}
// AvuData avuData = AvuData.instance("cwl", expectedAttribValue, "finished");
// dataObjectAO.addAVUMetadata(yaml,avuData);
}
private static Set<String> getAllProcessed(CommandOptionsKubernetes commandOptionsKubernetes, Connection connection) throws GenQueryBuilderException, JargonException, JargonQueryException {
// Obtains unprocessed folders...
String search = "/" + commandOptionsKubernetes.zone + "/projects/" + commandOptionsKubernetes.project + "/%/processed/%";
......
......@@ -59,7 +59,7 @@ public class Kubernetes {
// Sleeping for 1 minute...
while (totalItems >= commandOptionsKubernetes.limit) {
log.info("Sleeping for 1 minute as there are " + totalItems + " jobs running");
log.info("Sleeping for 1 minute as there are " + totalItems + " jobs running and have " + yamls.size() + " jobs in total");
// Count down?
for (int i = 60; i >= 0; i--) {
System.out.print("Sleeping for " + i + " seconds\r");
......@@ -90,7 +90,11 @@ public class Kubernetes {
Search.getAllUnprocessed(commandOptionsKubernetes, connection);
Kubernetes.createJobs(commandOptionsKubernetes, connection);
if (commandOptionsKubernetes.project.contains("references")) {
Search.getAllUnprocessedReferences(commandOptionsKubernetes, connection);
Search.getAllUnprocessedReferences(commandOptionsKubernetes, connection, "/" + commandOptionsKubernetes.zone + "/references/genomes/bacteria%");
Kubernetes.createJobs(commandOptionsKubernetes, connection);
}
if (commandOptionsKubernetes.project.contains("ampliconlibraries")) {
Search.getAllUnprocessedReferences(commandOptionsKubernetes, connection, "/" + commandOptionsKubernetes.zone + "/landingzone/ampliconlibraries%");
Kubernetes.createJobs(commandOptionsKubernetes, connection);
}
// Search.getAllYamls(commandOptionsKubernetes, connection);
......@@ -175,10 +179,6 @@ public class Kubernetes {
log.info("Loading ." + yamlFile);
fixClass("." + yamlFile);
// if (!yamlFile.contains("O_105_7.F5.S1.CA.31.8.17/metagenomics.yaml")) {
// return null;
// }
YamlReader reader = new YamlReader(new FileReader("." + yamlFile));
// Hack as ngtax currently cannot handle the -!nl.... paths in the yaml file
Workflow workflow;
......@@ -275,7 +275,9 @@ public class Kubernetes {
containerItem.env(env);
// Set the name
containerItem.name("cwl-" + new File(yamlFile).getName().toLowerCase().replaceAll("[_\\.]", ""));
String name = "cwl-" + new File(yamlFile).getName().toLowerCase().replaceAll("[_\\.]", "");
if (name.length() > 62) name = name.substring(0, 62);
containerItem.name(name);
// Setting secrets
containerItem.addEnvItem(setSecret("irodsHost","unlock-secret", "irodsHost"));
......
......@@ -29,17 +29,18 @@ public class CommandOptionsIRODS {
// ISA
@Parameter(names = {"-project"}, description = "Specify for which project this nl.wur.ssb.workflow should be executed")
public String project = "%";
public String project;
@Parameter(names = {"-investigation"}, description = "Specify for which investigation this nl.wur.ssb.workflow should be executed")
public String investigation = "%";
public String investigation;
@Parameter(names = {"-study"}, description = "Specify for which study this nl.wur.ssb.workflow should be executed")
public String study = "%";
public String study;
@Parameter(names = {"-observationUnit"}, description = "Specify for which observational unit this nl.wur.ssb.workflow should be executed")
public String observationUnit;
@Parameter(names = {"-assay"}, description = "Specify for which assay this nl.wur.ssb.workflow should be executed")
public String assay = "%";
public String assay;
@Parameter(names = {"-observationUnit"}, description = "Specify for which observational unit this nl.wur.ssb.workflow should be executed")
public String observationUnit = "%";
}
......@@ -103,13 +103,14 @@ public class ENA {
}
tax2codonFile.close();
int counter = 0;
while (scanner.hasNextLine()) {
String[] line = scanner.nextLine().split("\t");
boolean pass = filter(lookup, line);
if (pass) {
counter = counter + 1;
commandOptions.id = line[lookup.get("accession")];
commandOptions.taxon = Integer.parseInt(line[lookup.get("tax_id")]);
......@@ -131,13 +132,13 @@ public class ENA {
if (lineage.startsWith("bacteria")) {
workflow.codon = tax2codon.get(commandOptions.taxon);
workflow.bacteria = true;
logger.info("Codon table " + workflow.codon + " will be used");
// logger.info("Codon table " + workflow.codon + " will be used");
}
// Destination
workflow.destination = "/" + connection.irodsAccount.getZone() + "/references/genomes/" + lineage + "/" + commandOptions.id;
logger.info(workflow.destination);
logger.info(counter + " " + workflow.destination);
String yamlFileName = commandOptions.id + ".yaml";
YamlWriter writer = new YamlWriter(new FileWriter(yamlFileName));
......@@ -151,7 +152,7 @@ public class ENA {
connection.fileFactory.instanceIRODSFile(workflow.destination).mkdirs();
}
// if (destFile.exists()) continue;
if (destFile.exists()) continue;
while (destFile.exists()) {
destFile.delete();
......
......@@ -38,8 +38,6 @@ public class MetaGenomics {
observationUnits.forEachRemaining(resultLine -> observationUnitsList.add(resultLine.getIRI("ou")));
for (String observationUnitURL : observationUnitsList) {
// if (!observationUnitURL.endsWith("66_5.F5.S2.CA.31.8.17"))
// continue;
log.info("==============================================");
log.info(observationUnitURL);
......
......@@ -42,8 +42,10 @@ public class NGTax {
public static void generateNGTAXWorkflow(CommandOptionsNGTAX commandOptions, Connection connection, ArrayList<Assay> assays) throws Exception {
DataTransferOperations dataTransferOperationsAO = connection.irodsFileSystem.getIRODSAccessObjectFactory().getDataTransferOperations(connection.irodsAccount);
log.info("Processing: " + assays.size());
int assayCount = 1;
for (Assay assay : assays) {
log.info("Processing: " + assayCount + " of " + assays.size());
assayCount = assayCount + 1;
String yamlFileName;
// Only amplicon types allowed
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment