Commit 23468a9f authored by Koehorst, Jasper's avatar Koehorst, Jasper
Browse files

search and yaml improvements

parent 7640046a
......@@ -21,7 +21,6 @@ import org.irods.jargon.core.query.*;
import org.jermontology.ontology.JERMOntology.domain.Assay;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.*;
......@@ -32,10 +31,10 @@ public class Search {
/**
*
* @param commandOptions
* @param connection
* @return
* @throws Exception
* @param commandOptions command options object
* @param connection irods connection object
* @return list of assays from an rdf file
* @throws Exception general exceptions
*/
public static ArrayList<Assay> getAssaysFromRDF(CommandOptionsIRODS commandOptions, Connection connection) throws Exception {
log.info("Obtaining assay files");
......@@ -50,6 +49,8 @@ public class Search {
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.NOT_LIKE, "%PROVENANCE%");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.NOT_LIKE, "%/Step_%");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%ttl");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.NOT_LIKE, ".%");
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_COLL_NAME);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_NAME);
......@@ -70,13 +71,16 @@ public class Search {
ArrayList<String> paths = new ArrayList<>();
for (IRODSQueryResultRow irodsQueryResultSetResult : irodsQueryResultSetResults) {
String path = irodsQueryResultSetResult.getColumn(0) + "/" + irodsQueryResultSetResult.getColumn(1);
if (path.contains("PROVENANCE")) {
if (path.toLowerCase().contains("provenance")) {
// ignore all PROVENANCE objects
} else {
paths.add(path);
File parentFile = new File("." + new File(path).getParent());
// If file does not exists...
if (!new File("." + path).exists()) {
IRODSFileInputStream irodsFileInputStream = connection.fileFactory.instanceIRODSFileInputStream(path);
new File("." + new File(path).getParent()).mkdirs();
// Creat parent folders to preserve structure
parentFile.mkdirs();
FileOutputStream fileOutputStream = new FileOutputStream(new File("." + path));
IOUtils.copy(irodsFileInputStream, fileOutputStream);
irodsFileInputStream.close();
......@@ -90,8 +94,9 @@ public class Search {
int filePathNumber = 0;
for (String path : paths) {
filePathNumber = filePathNumber + 1;
System.out.print(filePathNumber + "\r");
System.err.println(path);
if (filePathNumber % 10 == 0) {
log.info("Parsed " + filePathNumber + " files");
}
if (!new File(path).isHidden()) {
Domain domainTemp = new Domain("file://" + "." + path);
domain.getRDFSimpleCon().getModel().add(domainTemp.getRDFSimpleCon().getModel());
......@@ -116,7 +121,6 @@ public class Search {
assays.add(domain.make(DNASeqAssay.class, assayIRI));
break;
case "PairedSequenceDataSet":
break;
case "SingleSequenceDataSet":
break;
default :
......@@ -150,73 +154,67 @@ public class Search {
folderQuery += "%";
}
// Replace multiple %%% by one %
folderQuery.replaceAll("%+","%");
folderQuery = folderQuery.replaceAll("%+","%");
return folderQuery;
}
public static void getAllYamls(CommandOptionsKubernetes commandOptionsKubernetes, Connection connection) throws GenQueryBuilderException, JargonException, JargonQueryException, FileNotFoundException {
// get All processed folders
// Set<String> processed = getAllProcessed(commandOptionsKubernetes, connection);
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
// Obtains all yaml files
String search = "/" + connection.irodsAccount.getZone() +"/projects/" + commandOptionsKubernetes.project + "%";
log.info("Searching in " + search);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, search);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%.yaml");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_META_DATA_ATTR_NAME, QueryConditionOperators.LIKE, "cwl");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_META_DATA_ATTR_UNITS, QueryConditionOperators.LIKE, "waiting");
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_COLL_NAME);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_NAME);
// Set limit?
IRODSGenQueryFromBuilder query = queryBuilder.exportIRODSQueryFromBuilder(50000);
IRODSGenQueryExecutor irodsGenQueryExecutor = connection.accessObjectFactory.getIRODSGenQueryExecutor(connection.irodsAccount);
IRODSQueryResultSet irodsQueryResultSet = irodsGenQueryExecutor.executeIRODSQuery(query, 0);
List<IRODSQueryResultRow> irodsQueryResultSetResults = irodsQueryResultSet.getResults();
for (IRODSQueryResultRow irodsQueryResultSetResult : irodsQueryResultSetResults) {
// Yaml file... obtain, parse, check destination... if already exists don't do it?
String yaml = irodsQueryResultSetResult.getColumn(0) + "/" + irodsQueryResultSetResult.getColumn(1);
log.info("Yaml found: " + yaml);
Generic.downloadFile(connection, new File(yaml));
Scanner scanner = new Scanner(new File("." + yaml));
// boolean destinationCheck = false;
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
if (line.startsWith("destination: ")) {
// destinationCheck = true;
// log.info("Analysing " + line);
String destination = line.split(" ")[1];
// When not in processed, will do a final check again to see if it exists if not assign to kube
if (!connection.fileFactory.instanceIRODSFile(destination).exists()) {
log.info("Processing " + yaml);
Kubernetes.yamls.add(yaml);
} else {
log.info("Skipping analysis of " + new File(yaml).getName() + " as destination folder already exists, need to change metadata value");
}
break;
}
}
scanner.close();
// If the destination parameter was not there than this is likely an independent yaml run (e.g. maintenance)
// if (!destinationCheck)
// Kubernetes.yamls.add(yaml);
}
}
// public static void getAllYamls(CommandOptionsKubernetes commandOptionsKubernetes, Connection connection) throws GenQueryBuilderException, JargonException, JargonQueryException, FileNotFoundException {
//
// // get All processed folders
// // Set<String> processed = getAllProcessed(commandOptionsKubernetes, connection);
//
// IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
//
// // Obtains all yaml files
// String search = "/" + connection.irodsAccount.getZone() +"/projects/" + commandOptionsKubernetes.project + "%";
// log.info("Searching in " + search);
// queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, search);
// queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%.yaml");
// queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_META_DATA_ATTR_NAME, QueryConditionOperators.LIKE, "cwl");
// queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_META_DATA_ATTR_UNITS, QueryConditionOperators.LIKE, "waiting");
//
// queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_COLL_NAME);
// queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_NAME);
//
// // Set limit?
// IRODSGenQueryFromBuilder query = queryBuilder.exportIRODSQueryFromBuilder(50000);
//
// IRODSGenQueryExecutor irodsGenQueryExecutor = connection.accessObjectFactory.getIRODSGenQueryExecutor(connection.irodsAccount);
// IRODSQueryResultSet irodsQueryResultSet = irodsGenQueryExecutor.executeIRODSQuery(query, 0);
//
// List<IRODSQueryResultRow> irodsQueryResultSetResults = irodsQueryResultSet.getResults();
//
// for (IRODSQueryResultRow irodsQueryResultSetResult : irodsQueryResultSetResults) {
// // Yaml file... obtain, parse, check destination... if already exists don't do it?
// String yaml = irodsQueryResultSetResult.getColumn(0) + "/" + irodsQueryResultSetResult.getColumn(1);
// log.debug("Yaml found: " + yaml);
// Generic.downloadFile(connection, new File(yaml));
//
// Scanner scanner = new Scanner(new File("." + yaml));
//
// while (scanner.hasNextLine()) {
// String line = scanner.nextLine();
// if (line.startsWith("destination: ")) {
// String destination = line.split(" ")[1];
// // When not in processed, will do a final check again to see if it exists if not assign to kube
// if (!connection.fileFactory.instanceIRODSFile(destination).exists()) {
// log.debug("Processing " + yaml);
// Kubernetes.yamls.add(yaml);
// } else {
// log.debug("Skipping analysis of " + new File(yaml).getName() + " as destination folder already exists, need to change metadata value");
// }
// break;
// }
// }
// scanner.close();
// }
// }
public static void getAllUnprocessedReferences(CommandOptionsKubernetes commandOptionsKubernetes, Connection connection) throws GenQueryBuilderException, JargonException, JargonQueryException, IOException, InterruptedException, ApiException {
// get All processed folders
// Set<String> processed = getAllProcessed(commandOptionsKubernetes, connection);
HashSet<String> references = new HashSet<>();
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
// Obtains unprocessed folders...
......@@ -243,6 +241,7 @@ public class Search {
index = index + 1;
// Yaml file... obtain, parse, check destination... if already exists don't do it?
String yaml = irodsQueryResultSetResult.getColumn(0) + "/" + irodsQueryResultSetResult.getColumn(1);
Generic.downloadFile(connection, new File(yaml));
Scanner scanner = new Scanner(new File("." + yaml));
......@@ -254,31 +253,30 @@ public class Search {
// When not in processed, will do a final check again to see if it exists if not assign to kube
if (!connection.fileFactory.instanceIRODSFile(destination).exists()) {
String path = irodsQueryResultSetResult.getColumn(0) + "/" + irodsQueryResultSetResult.getColumn(1);
references.add(path);
// log.info("Processing " + index + " of " + irodsQueryResultSetResults.size() + " " + destination);
} else {
// log.info("Skipping analysis of " + new File(yaml).getName() + " as destination file already exists, need to change metadata value");
Kubernetes.yamls.add(path);
log.debug("Processing " + index + " of " + irodsQueryResultSetResults.size() + " " + destination);
}
// else {
// log.info("Skipping analysis of " + new File(yaml).getName() + " as destination file already exists, need to change metadata value");
// }
break;
}
if (references.size() > 10) {
if (Kubernetes.yamls.size() == 10) {
log.info("Starting kubernetes");
Kubernetes.yamls = references;
Kubernetes.createJobs(commandOptionsKubernetes, connection);
// Reset references...
references = new HashSet<>();
Kubernetes.yamls = new HashSet<>();
}
}
scanner.close();
}
}
public static HashSet<String> getAllUnprocessed(CommandOptionsKubernetes commandOptionsKubernetes, Connection connection) throws GenQueryBuilderException, JargonException, JargonQueryException, IOException {
public static void getAllUnprocessed(CommandOptionsKubernetes commandOptionsKubernetes, Connection connection) throws GenQueryBuilderException, JargonException, JargonQueryException, IOException, InterruptedException, ApiException {
// get All processed folders
Set<String> processed = getAllProcessed(commandOptionsKubernetes, connection);
HashSet<String> assays = new HashSet<>();
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
// Obtains unprocessed folders...
......@@ -319,8 +317,14 @@ public class Search {
String path = irodsQueryResultSetResult.getColumn(0) + "/" + irodsQueryResultSetResult.getColumn(1);
log.info("Processing " + line);
Kubernetes.yamls.add(path);
if (Kubernetes.yamls.size() > 10) {
log.info("Starting kubernetes");
Kubernetes.createJobs(commandOptionsKubernetes, connection);
// Reset references...
Kubernetes.yamls = new HashSet<>();
}
} else {
log.info("Skipping analysis of " + new File(yaml).getName() + " as destination folder already exists, need to change metadata value");
log.debug("Skipping analysis of " + new File(yaml).getName() + " as destination folder already exists, need to change metadata value");
}
break;
}
......@@ -328,7 +332,6 @@ public class Search {
}
scanner.close();
}
return assays;
}
private static Set<String> getAllProcessed(CommandOptionsKubernetes commandOptionsKubernetes, Connection connection) throws GenQueryBuilderException, JargonException, JargonQueryException {
......
......@@ -10,6 +10,7 @@ import io.kubernetes.client.apis.BatchV1Api;
import io.kubernetes.client.apis.CoreV1Api;
import io.kubernetes.client.custom.Quantity;
import io.kubernetes.client.models.*;
import io.kubernetes.client.proto.V1;
import io.kubernetes.client.util.Config;
import nl.munlock.Generic;
import nl.munlock.irods.Connection;
......@@ -54,6 +55,8 @@ public class Kubernetes {
V1Job v1Job = generateKubernetesJobObject(commandOptionsKubernetes, yaml, connection);
if (v1Job == null) continue;
// Sleeping for 1 minute...
while (totalItems >= commandOptionsKubernetes.limit) {
log.info("Sleeping for 1 minute as there are " + totalItems + " jobs running");
......@@ -90,8 +93,8 @@ public class Kubernetes {
Search.getAllUnprocessedReferences(commandOptionsKubernetes, connection);
Kubernetes.createJobs(commandOptionsKubernetes, connection);
}
Search.getAllYamls(commandOptionsKubernetes, connection);
Kubernetes.createJobs(commandOptionsKubernetes, connection);
// Search.getAllYamls(commandOptionsKubernetes, connection);
// Kubernetes.createJobs(commandOptionsKubernetes, connection);
}
......@@ -171,6 +174,11 @@ public class Kubernetes {
// Load yaml file for cores and memory restrictions?
log.info("Loading ." + yamlFile);
fixClass("." + yamlFile);
// if (!yamlFile.contains("O_105_7.F5.S1.CA.31.8.17/metagenomics.yaml")) {
// return null;
// }
YamlReader reader = new YamlReader(new FileReader("." + yamlFile));
// Hack as ngtax currently cannot handle the -!nl.... paths in the yaml file
Workflow workflow;
......@@ -249,20 +257,11 @@ public class Kubernetes {
command.add("bash");
containerItem.command(command);
// TESTING TO SEE CAN WE MOUNT LOCAL DIRS?
// Unlock mount
containerItem.addVolumeMountsItem(new V1VolumeMount().name("unlock").mountPath("/unlock"));
// HashMap
// HashMap TODO make this a secret
HashMap<String, String> envMap = new HashMap<>();
envMap.put("irodsHost",commandOptions.host);
envMap.put("irodsPort", String.valueOf(commandOptions.port));
envMap.put("irodsUserName",commandOptions.username);
envMap.put("irodsPassword",commandOptions.password);
envMap.put("irodsZone",commandOptions.zone);
envMap.put("irodsAuthScheme",commandOptions.authentication);
envMap.put("irodsSSL",commandOptions.sslPolicyString);
envMap.put("irodsCwd","/");
envMap.put("PATH", "/root/.sdkman/candidates/maven/current/bin:/root/.sdkman/candidates/java/current/bin:/root/.sdkman/candidates/gradle/current/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin");
List<V1EnvVar> env = new ArrayList();
......@@ -274,37 +273,63 @@ public class Kubernetes {
}
containerItem.env(env);
// Set the name
containerItem.name("cwl-" + new File(yamlFile).getName().toLowerCase().replaceAll("[_\\.]", ""));
containerItem.addEnvItem(setSecret("irodsHost","unlock-secret", "irodsHost"));
containerItem.addEnvItem(setSecret("irodsPort","unlock-secret", "irodsPort"));
containerItem.addEnvItem(setSecret("irodsUserName","unlock-secret", "irodsUserName"));
containerItem.addEnvItem(setSecret("irodsZone","unlock-secret", "irodsZone"));
containerItem.addEnvItem(setSecret("irodsAuthScheme","unlock-secret", "irodsAuthScheme"));
containerItem.addEnvItem(setSecret("irodsHome","unlock-secret", "irodsHome"));
containerItem.addEnvItem(setSecret("irodsCwd","unlock-secret", "irodsCwd"));
containerItem.addEnvItem(setSecret("irodsPassword","unlock-secret", "irodsPassword"));
containerItem.addEnvItem(setSecret("irodsSSL","unlock-secret", "irodsSSL"));
// Job specifications
V1JobSpec specs = new V1JobSpec();
specs.ttlSecondsAfterFinished(1000);
specs.setTtlSecondsAfterFinished(1000);
specs.setBackoffLimit(1);
V1JobSpec v1JobSpec = new V1JobSpec();
v1JobSpec.ttlSecondsAfterFinished(1000);
v1JobSpec.setTtlSecondsAfterFinished(1000);
v1JobSpec.setBackoffLimit(1);
// 60 min runtime, disabled as when queue is large jobs get killed when not even started
// specs.activeDeadlineSeconds((long) 3600);
specs.template(template);
v1JobSpec.template(template);
V1PodSpec spec2 = new V1PodSpec();
V1PodSpec v1PodSpec = new V1PodSpec();
// https://rancher.com/docs/k3s/latest/en/storage/
spec2.addVolumesItem(new V1Volume().name("unlock").persistentVolumeClaim(new V1PersistentVolumeClaimVolumeSource().claimName("unlock")));
spec2.addContainersItem(containerItem);
spec2.restartPolicy("Never");
template.spec(spec2);
// spec2.setPriority(1000);
v1PodSpec.addVolumesItem(new V1Volume().name("unlock").persistentVolumeClaim(new V1PersistentVolumeClaimVolumeSource().claimName("unlock")));
v1PodSpec.addContainersItem(containerItem);
v1PodSpec.restartPolicy("Never");
template.spec(v1PodSpec);
// Set priority level
// v1PodSpec.setPriority(commandOptions.priority);
V1ObjectMeta metadata = new V1ObjectMeta();
metadata.generateName("cwl-" + new File(yamlFile).getName().toLowerCase().replaceAll("[_\\.]", "") +"-");
V1ObjectMeta v1ObjectMeta = new V1ObjectMeta();
v1ObjectMeta.generateName("cwl-" + new File(yamlFile).getName().toLowerCase().replaceAll("[_\\.]", "") +"-");
V1Job v1Job = new V1Job();
v1Job.apiVersion("batch/v1");
v1Job.kind("Job");
v1Job.metadata(metadata);
v1Job.spec(specs);
v1Job.metadata(v1ObjectMeta);
v1Job.spec(v1JobSpec);
return v1Job;
}
private static V1EnvVar setSecret(String name, String secretName, String secretKey) {
// Container secrets? TODO validate...
V1EnvVarSource v1EnvVarSource = new V1EnvVarSource();
V1SecretKeySelector v1SecretKeySelector = new V1SecretKeySelector();
v1SecretKeySelector.setName(secretName);
v1SecretKeySelector.setKey(secretKey);
v1EnvVarSource.secretKeyRef(v1SecretKeySelector);
V1EnvVar v1EnvVar = new V1EnvVar();
v1EnvVar.setValueFrom(v1EnvVarSource);
v1EnvVar.setName(name);
return v1EnvVar;
}
}
......@@ -15,35 +15,17 @@ public class CommandOptionsKubernetes extends CommandOptionsIRODS {
@Parameter(names = {"--help", "-h", "-help"})
public boolean help = false;
// @Parameter(names = {"-cwl"}, description = "CWL file stored on iRODS")
// public String cwl;
// @Parameter(names = {"-assayType"}, description = "Specify what the assay type is (RNA/DNA/Amplicon)", required = true)
// public String assayType;
@Parameter(names = {"-limit"}, description = "Total number of items in nl.wur.ssb.kubernetes")
public int limit = 100;
// @Parameter(names = {"-memory"}, description = "Kubernetes pod memory request limit in megabytes")
// public int memoryRequest = 1000;
// @Parameter(names = {"-memorylimit"}, description = "Kubernetes pod memory limit in megabytes")
// public int memoryLimit = 5000;
// @Parameter(names = {"-cpu"}, description = "Kubernetes pod cpu request in megabytes")
// public double cpuRequest = 3;
// @Parameter(names = {"-cpulimit"}, description = "Kubernetes pod cpu limit in megabytes")
// public double cpuLimit = 4;
@Parameter(names = {"-priority"}, description = "Kubernetes priority value")
public int priority = 0;
public CommandOptionsKubernetes(String args[]) {
try {
JCommander jc = new JCommander(this);
jc.parse(args);
System.err.println(this.sslPolicyString);
// if any of the nl.wur.ssb.options is null...
boolean failed = false;
for (Field f : getClass().getDeclaredFields())
......
......@@ -56,8 +56,10 @@ public class ENA {
printWriter.write(command);
printWriter.close();
logger.warn("ECHO!, not executing the curl...");
ExecCommand execCommand = new ExecCommand("echo sh curl.sh");
// logger.warn("ECHO!, not executing the curl...");
// ExecCommand execCommand = new ExecCommand("echo sh curl.sh");
ExecCommand execCommand = new ExecCommand("sh curl.sh");
if (execCommand.getExit() > 0) {
throw new Exception("Execution of curl failed");
......@@ -124,37 +126,13 @@ public class ENA {
continue;
}
lineage = lineage.toLowerCase().replaceAll(" ", "_");
lineage = lineage.toLowerCase().replaceAll(" +", "_");
if (lineage.startsWith("bacteria")) {
workflow.codon = tax2codon.get(commandOptions.taxon);
workflow.bacteria = true;
logger.info("Codon table " + workflow.codon + " will be used");
}
// workflow.bacteria = true;
// // Codon table 4 for sure
// if (lineage.contains("entomoplasmatales") || lineage.contains("mycoplasmatales")) {
// workflow.codon = 4;
// }
// // The unsure ones
// else if (lineage.contains("spirochaetes")) {
// continue;
// }
// // codon table 11 for sure?...
// else if (
// lineage.contains("firmicutes") ||
// lineage.contains("actinobacteria") ||
// lineage.contains("proteobacteria") ||
// lineage.contains("chlamydiae") ||
// lineage.contains("chlorobi") ||
// lineage.contains("fusobacteria") ||
// lineage.contains("bacteroidetes")
// )
// {
// workflow.codon = 11;
// } else {
// throw new Exception("Codon table not set for anything in " + lineage);
// }
// }
// Destination
workflow.destination = "/" + connection.irodsAccount.getZone() + "/references/genomes/" + lineage + "/" + commandOptions.id;
......
......@@ -72,7 +72,7 @@ public class NGTax {
IRODSFile destFile;
log.info("Read length set to " + commandOptions.read_len);
log.debug("Read length set to " + commandOptions.read_len);
// if (commandOptions.read_len > 0) {
// workflow.for_read_len = commandOptions.read_len;
// workflow.rev_read_len = commandOptions.read_len;
......@@ -81,7 +81,12 @@ public class NGTax {
if (files.size() == 0) {
log.warn("No files found");
} else if (files.size() == 1) {
log.error("Single end files not supported but working on it... Amplicon");
log.debug("Single end files detected");
if (files.get(0).getClassTypeIri().endsWith("PairedSequenceDataSet")) {
log.error("Paired sequence dataset detected but only 1 file was found\n" +
"Paired dataset should not be interleaved");
continue;
}
// IN PROGRESS... mr..DNA -.-
workflow.addForward_reads(files.get(0).getFilePath());
SequenceDataSet sequenceDataSet = (SequenceDataSet) ampliconAssay.getAllFile().get(0);
......@@ -100,7 +105,7 @@ public class NGTax {
log.error("RDF file does not contain read length information? Please provide read length information");
}
} else if (files.size() == 2) {
log.info("Paired data detected");
log.debug("Paired data detected");
workflow.setReverse_primer(ampliconAssay.getReversePrimer());
workflow.addForward_reads(files.get(0).getFilePath());
......@@ -110,13 +115,15 @@ public class NGTax {
// Evaluate if the set read length matches / smaller than the raw read length
if (commandOptions.read_len == 0 && sequenceDataSet.getReadLength() > 0) {
log.info("Obtaining read length information from RDF file");
log.debug("Obtaining read length information from RDF file");
workflow.setRev_read_len(Math.toIntExact(sequenceDataSet.getReadLength()));
workflow.setFor_read_len(Math.toIntExact(sequenceDataSet.getReadLength()));
} else if (commandOptions.read_len > 0) {
if (commandOptions.read_len > sequenceDataSet.getReadLength())
throw new Exception("Read length of " + commandOptions.read_len + " is larger than " + sequenceDataSet.getReadLength());
log.info("Setting read length to " + commandOptions.read_len + " of " + sequenceDataSet.getReadLength());
if (commandOptions.read_len > sequenceDataSet.getReadLength()) {
log.error("Skipping!!! As ead length of " + commandOptions.read_len + " is larger than " + sequenceDataSet.getReadLength());
continue;
}
log.debug("Setting read length to " + commandOptions.read_len + " of " + sequenceDataSet.getReadLength());
workflow.setRev_read_len(Math.toIntExact(commandOptions.read_len));
workflow.setFor_read_len(Math.toIntExact(commandOptions.read_len));
} else {
......@@ -137,46 +144,51 @@ public class NGTax {
} else {
yamlFileName = commandOptions.id + "_" + workflow.getFor_read_len() + ".yaml";
}
log.info("Yaml filename: " + yamlFileName);
// Save to iRODS
destFile = connection.fileFactory.instanceIRODSFile(ampliconAssay.getLogicalPath() + "/" + yamlFileName);
// Parent folder check with metadata alignment... TODO remove this as this is a temp check
IRODSFile parent = connection.fileFactory.instanceIRODSFile(ampliconAssay.getLogicalPath());
if (parent.exists()) {
log.debug("Yaml filename: " + yamlFileName);
YamlConfig config = new YamlConfig();
config.writeConfig.setQuoteChar(NONE);
YamlWriter writer = new YamlWriter(new FileWriter(yamlFileName), config);
// Save to iRODS
destFile = connection.fileFactory.instanceIRODSFile(ampliconAssay.getLogicalPath() + "/" + yamlFileName);
writer.write(workflow);
writer.close();
YamlConfig config = new YamlConfig();
config.writeConfig.setQuoteChar(NONE);
YamlWriter writer = new YamlWriter(new FileWriter(yamlFileName), config);
// Fix Clazz > Class
Workflow.fixClazz(yamlFileName);
Workflow.fixComments(yamlFileName);
Workflow.fixSample(yamlFileName);
writer.write(workflow);
writer.close();
if (destFile.exists()) {
if (commandOptions.overwrite) {
log.info("Deleting remote file as destination folder does not exists and nl.wur.ssb.yaml information might have changed");
destFile.delete();
} else {
log.info("Destination file already exists " + destFile);
// Fix Clazz > Class
Workflow.fixClazz(yamlFileName);
Workflow.fixComments(yamlFileName);
Workflow.fixSample(yamlFileName);