Commit 5daf99b6 authored by Koehorst, Jasper's avatar Koehorst, Jasper
Browse files

identifier and other elements fixed

parent a2b7009d
Pipeline #19831 passed with stage
in 1 minute and 54 seconds
...@@ -59,7 +59,7 @@ public class Search { ...@@ -59,7 +59,7 @@ public class Search {
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_NAME); queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_NAME);
// Set limit? // Set limit?
IRODSGenQueryFromBuilder query = queryBuilder.exportIRODSQueryFromBuilder(99999); IRODSGenQueryFromBuilder query = queryBuilder.exportIRODSQueryFromBuilder(100000);
IRODSGenQueryExecutor irodsGenQueryExecutor = connection.accessObjectFactory.getIRODSGenQueryExecutor(connection.irodsAccount); IRODSGenQueryExecutor irodsGenQueryExecutor = connection.accessObjectFactory.getIRODSGenQueryExecutor(connection.irodsAccount);
IRODSQueryResultSet irodsQueryResultSet = irodsGenQueryExecutor.executeIRODSQuery(query, 0); IRODSQueryResultSet irodsQueryResultSet = irodsGenQueryExecutor.executeIRODSQuery(query, 0);
List<IRODSQueryResultRow> irodsQueryResultSetResults = irodsQueryResultSet.getResults(); List<IRODSQueryResultRow> irodsQueryResultSetResults = irodsQueryResultSet.getResults();
...@@ -83,12 +83,13 @@ public class Search { ...@@ -83,12 +83,13 @@ public class Search {
// If file does not exists... // If file does not exists...
if (!new File("." + path).exists()) { if (!new File("." + path).exists()) {
IRODSFileInputStream irodsFileInputStream = connection.fileFactory.instanceIRODSFileInputStream(path); IRODSFileInputStream irodsFileInputStream = connection.fileFactory.instanceIRODSFileInputStream(path);
// Creat parent folders to preserve structure // Create parent folders to preserve structure
parentFile.mkdirs(); parentFile.mkdirs();
FileOutputStream fileOutputStream = new FileOutputStream(new File("." + path)); FileOutputStream fileOutputStream = new FileOutputStream(new File("." + path));
IOUtils.copy(irodsFileInputStream, fileOutputStream); IOUtils.copy(irodsFileInputStream, fileOutputStream);
irodsFileInputStream.close(); irodsFileInputStream.close();
fileOutputStream.close(); fileOutputStream.close();
log.info("Obtained ." + path);
} }
} }
} }
...@@ -98,9 +99,10 @@ public class Search { ...@@ -98,9 +99,10 @@ public class Search {
int filePathNumber = 0; int filePathNumber = 0;
for (String path : paths) { for (String path : paths) {
filePathNumber = filePathNumber + 1; filePathNumber = filePathNumber + 1;
if (filePathNumber % 10 == 0) { if (filePathNumber % 100 == 0) {
log.info("Parsed " + filePathNumber + " files"); log.info("Parsed " + filePathNumber + " files " + domain.getRDFSimpleCon().getModel().size() + " triples");
} }
if (!new File(path).isHidden()) { if (!new File(path).isHidden()) {
Domain domainTemp = new Domain("file://" + "." + path); Domain domainTemp = new Domain("file://" + "." + path);
domain.getRDFSimpleCon().getModel().add(domainTemp.getRDFSimpleCon().getModel()); domain.getRDFSimpleCon().getModel().add(domainTemp.getRDFSimpleCon().getModel());
...@@ -418,7 +420,7 @@ public class Search { ...@@ -418,7 +420,7 @@ public class Search {
} }
} }
if (avu != null) { if (avu != null) {
log.info("Updating metadata field"); log.info("Updating metadata field to finished");
dataObjectAO.deleteAVUMetadata(yaml, avu); dataObjectAO.deleteAVUMetadata(yaml, avu);
avu.setUnit("finished"); avu.setUnit("finished");
dataObjectAO.addAVUMetadata(yaml, avu); dataObjectAO.addAVUMetadata(yaml, avu);
...@@ -527,7 +529,7 @@ public class Search { ...@@ -527,7 +529,7 @@ public class Search {
*/ */
public static ArrayList<Domain> getProjectFromRDF(CommandOptionsYAML commandOptions, Connection connection) throws Exception { public static ArrayList<Domain> getProjectFromRDF(CommandOptionsYAML commandOptions, Connection connection) throws Exception {
// Using default project folder // Using default project folder
String folderQuery = "/" + connection.irodsAccount.getZone() + "/projects/" + commandOptions.project; String folderQuery = "/" + connection.irodsAccount.getZone() + "/projects/" + commandOptions.project + "/" + commandOptions.investigation;
return getProjectFromRDF(connection, folderQuery, commandOptions.investigation); return getProjectFromRDF(connection, folderQuery, commandOptions.investigation);
} }
...@@ -538,7 +540,7 @@ public class Search { ...@@ -538,7 +540,7 @@ public class Search {
// Get unprocessed files // Get unprocessed files
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null); IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, folderQuery); queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, folderQuery);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%"+investigation+"%ttl"); queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%ttl");
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_COLL_NAME); queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_COLL_NAME);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_NAME); queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_NAME);
......
...@@ -4,11 +4,16 @@ public class Workflow { ...@@ -4,11 +4,16 @@ public class Workflow {
public int threads = 2; public int threads = 2;
public int memory = 5000; public int memory = 5000;
public boolean provenance = true; public boolean provenance = true;
public String identifier;
public void setThreads(int threads) { public void setThreads(int threads) {
this.threads = threads; this.threads = threads;
} }
public void setThreads(float threads) {
this.threads = Math.round(threads);
}
public int getThreads() { public int getThreads() {
return threads; return threads;
} }
...@@ -29,4 +34,12 @@ public class Workflow { ...@@ -29,4 +34,12 @@ public class Workflow {
public boolean getProvenance() { public boolean getProvenance() {
return provenance; return provenance;
} }
public String getIdentifier() {
return identifier;
}
public void setIdentifier(String identifier) {
this.identifier = identifier;
}
} }
...@@ -8,6 +8,8 @@ import org.apache.commons.lang.StringUtils; ...@@ -8,6 +8,8 @@ import org.apache.commons.lang.StringUtils;
import java.lang.reflect.Field; import java.lang.reflect.Field;
public class CommandOptionsMetagenomics extends CommandOptionsYAML { public class CommandOptionsMetagenomics extends CommandOptionsYAML {
@Parameter(names = {"-level"}, description = "Level of analysis (OU or Assay)", required = true)
public String level = "OU";
public CommandOptionsMetagenomics(String args[]) { public CommandOptionsMetagenomics(String args[]) {
try { try {
......
...@@ -16,8 +16,8 @@ public class CommandOptionsYAML extends CommandOptionsIRODS { ...@@ -16,8 +16,8 @@ public class CommandOptionsYAML extends CommandOptionsIRODS {
public String cwl; public String cwl;
// This is done at nl.wur.ssb.yaml level // This is done at nl.wur.ssb.yaml level
@Parameter(names = {"-id"}, description = "Identifier used for the nl.wur.ssb.workflow (could be the nl.wur.ssb.workflow name + hash)", required = true) @Parameter(names = {"-wid", "-workflowid"}, description = "Identifier used for the nl.wur.ssb.workflow (could be the nl.wur.ssb.workflow name + hash)", required = true)
public String id; public String wid;
// @Parameter(names = {"-yaml"}, description = "Yaml irods file name") // @Parameter(names = {"-yaml"}, description = "Yaml irods file name")
// public String yaml = ""; // public String yaml = "";
...@@ -26,7 +26,7 @@ public class CommandOptionsYAML extends CommandOptionsIRODS { ...@@ -26,7 +26,7 @@ public class CommandOptionsYAML extends CommandOptionsIRODS {
public boolean overwrite = false; public boolean overwrite = false;
@Parameter(names = {"-threads"}, description = "Number of threads to use") @Parameter(names = {"-threads"}, description = "Number of threads to use")
public int threads = 2; public float threads = 2;
@Parameter(names = {"-memory"}, description = "Amount of memory needed (in megabytes)") @Parameter(names = {"-memory"}, description = "Amount of memory needed (in megabytes)")
public int memory = 5000; public int memory = 5000;
......
package nl.munlock.yaml; package nl.munlock.yaml;
import com.esotericsoftware.yamlbeans.YamlWriter; import com.esotericsoftware.yamlbeans.YamlWriter;
import nl.munlock.App;
import nl.munlock.Generic; import nl.munlock.Generic;
import nl.munlock.irods.Connection; import nl.munlock.irods.Connection;
import nl.munlock.kubernetes.Kubernetes; import nl.munlock.kubernetes.Kubernetes;
import nl.munlock.objects.WorkflowGenomeSync; import nl.munlock.objects.WorkflowGenomeSync;
import nl.munlock.options.workflow.CommandOptionsGenomeSync; import nl.munlock.options.workflow.CommandOptionsGenomeSync;
import nl.wur.ssb.RDFSimpleCon.ExecCommand;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.jena.query.QueryExecution; import org.apache.jena.query.QueryExecution;
import org.apache.jena.query.QueryExecutionFactory; import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QuerySolution; import org.apache.jena.query.QuerySolution;
import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.impl.ModelCom; import org.apache.jena.rdf.model.impl.ModelCom;
import org.apache.jena.tdb.store.Hash;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.irods.jargon.core.pub.DataObjectAO; import org.irods.jargon.core.pub.DataObjectAO;
import org.irods.jargon.core.pub.DataTransferOperations; import org.irods.jargon.core.pub.DataTransferOperations;
...@@ -26,10 +23,8 @@ import org.rdfhdt.hdtjena.HDTGraph; ...@@ -26,10 +23,8 @@ import org.rdfhdt.hdtjena.HDTGraph;
import java.io.File; import java.io.File;
import java.io.FileWriter; import java.io.FileWriter;
import java.io.PrintWriter;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.Scanner; import java.util.Scanner;
public class ENA { public class ENA {
...@@ -114,18 +109,20 @@ public class ENA { ...@@ -114,18 +109,20 @@ public class ENA {
if (pass) { if (pass) {
counter = counter + 1; counter = counter + 1;
commandOptions.id = line[lookup.get("accession")]; commandOptions.wid = line[lookup.get("accession")];
int taxon = Integer.parseInt(line[lookup.get("tax_id")]); int taxon = Integer.parseInt(line[lookup.get("tax_id")]);
WorkflowGenomeSync workflow = new WorkflowGenomeSync(); WorkflowGenomeSync workflow = new WorkflowGenomeSync();
// Set everything to default values and overwrite otherwise (constructor does not work?) // Set everything to default values and overwrite otherwise (constructor does not work?)
workflow = setGenomeSyncWorkflowsDefaults(workflow); workflow = setGenomeSyncWorkflowsDefaults(workflow);
workflow.threads = commandOptions.threads; workflow.threads = Math.round(commandOptions.threads);
workflow.memory = commandOptions.memory; workflow.memory = commandOptions.memory;
workflow.provenance = false; workflow.provenance = false;
workflow.setGca(commandOptions.id); workflow.setGca(commandOptions.wid);
String lineage = getLineage(model, taxon); String lineage = getLineage(model, taxon);
if (lineage == null) { if (lineage == null) {
continue; continue;
} }
...@@ -139,11 +136,11 @@ public class ENA { ...@@ -139,11 +136,11 @@ public class ENA {
} }
// Destination // Destination
workflow.destination = "/" + connection.irodsAccount.getZone() + "/references/genomes/" + lineage + "/" + commandOptions.id; workflow.destination = "/" + connection.irodsAccount.getZone() + "/references/genomes/" + lineage + "/" + commandOptions.wid;
logger.info(counter + " " + workflow.destination); logger.info(counter + " " + workflow.destination);
String yamlFileName = commandOptions.id + ".yaml"; String yamlFileName = commandOptions.wid + ".yaml";
YamlWriter writer = new YamlWriter(new FileWriter(yamlFileName)); YamlWriter writer = new YamlWriter(new FileWriter(yamlFileName));
writer.write(workflow); writer.write(workflow);
writer.close(); writer.close();
......
...@@ -36,10 +36,10 @@ public class FASTQC { ...@@ -36,10 +36,10 @@ public class FASTQC {
// Fill object // Fill object
WorkflowQuality workflow = new WorkflowQuality(); WorkflowQuality workflow = new WorkflowQuality();
workflow.memory = commandOptions.memory; workflow.memory = commandOptions.memory;
workflow.threads = commandOptions.threads; workflow.threads = Math.round(commandOptions.threads);
// The parent of unprocessed // The parent of unprocessed
String destination = sequencingAssay.getLogicalPath().replaceAll("/Unprocessed.*", "/processed/") + "/" + commandOptions.id; String destination = sequencingAssay.getLogicalPath().replaceAll("/Unprocessed.*", "/processed/") + "/" + commandOptions.wid;
workflow.setDestination(destination); workflow.setDestination(destination);
...@@ -65,11 +65,11 @@ public class FASTQC { ...@@ -65,11 +65,11 @@ public class FASTQC {
// Save to nl.wur.ssb.yaml format // Save to nl.wur.ssb.yaml format
String yamlName; String yamlName;
if (commandOptions.id.length() == 0) { if (commandOptions.wid.length() == 0) {
// Generate own nl.wur.ssb.yaml name... // Generate own nl.wur.ssb.yaml name...
yamlName = assay.getIdentifier() + ".yaml"; yamlName = assay.getIdentifier() + ".yaml";
} else { } else {
yamlName = commandOptions.id + ".yaml"; yamlName = commandOptions.wid + ".yaml";
} }
// Save to nl.wur.ssb.yaml format // Save to nl.wur.ssb.yaml format
......
...@@ -23,7 +23,7 @@ public class HDT { ...@@ -23,7 +23,7 @@ public class HDT {
DataTransferOperations dataTransferOperationsAO = connection.irodsFileSystem.getIRODSAccessObjectFactory().getDataTransferOperations(connection.irodsAccount); DataTransferOperations dataTransferOperationsAO = connection.irodsFileSystem.getIRODSAccessObjectFactory().getDataTransferOperations(connection.irodsAccount);
for (String folder : folders) { for (String folder : folders) {
String yamlName = commandOptionsHDT.id + ".yaml"; String yamlName = commandOptionsHDT.wid + ".yaml";
// Save to iRODS // Save to iRODS
IRODSFile destFile = connection.fileFactory.instanceIRODSFile(folder + "/hdt/" + yamlName); IRODSFile destFile = connection.fileFactory.instanceIRODSFile(folder + "/hdt/" + yamlName);
...@@ -49,7 +49,7 @@ public class HDT { ...@@ -49,7 +49,7 @@ public class HDT {
workflowHDT.setFolder(folder); workflowHDT.setFolder(folder);
workflowHDT.setDestination(folder + "/hdt/"); workflowHDT.setDestination(folder + "/hdt/");
workflowHDT.setMemory(commandOptionsHDT.memory); workflowHDT.setMemory(commandOptionsHDT.memory);
workflowHDT.setThreads(commandOptionsHDT.threads); workflowHDT.setThreads(Math.round(commandOptionsHDT.threads));
workflowHDT.setProvenance(false); workflowHDT.setProvenance(false);
YamlWriter writer = new YamlWriter(new FileWriter(yamlName)); YamlWriter writer = new YamlWriter(new FileWriter(yamlName));
......
...@@ -34,7 +34,7 @@ public class Index { ...@@ -34,7 +34,7 @@ public class Index {
WorkflowIndexer workflow = new WorkflowIndexer(); WorkflowIndexer workflow = new WorkflowIndexer();
workflow.memory = commandOptions.memory; workflow.memory = commandOptions.memory;
workflow.threads = commandOptions.threads; workflow.threads = Math.round(commandOptions.threads);
workflow.setReference_genome("/Data/" + new File(commandOptions.reference).getName()); workflow.setReference_genome("/Data/" + new File(commandOptions.reference).getName());
...@@ -49,11 +49,11 @@ public class Index { ...@@ -49,11 +49,11 @@ public class Index {
// Save to nl.wur.ssb.yaml format // Save to nl.wur.ssb.yaml format
String yamlName; String yamlName;
if (commandOptions.id.length() == 0) { if (commandOptions.wid.length() == 0) {
// Generate own nl.wur.ssb.yaml name... // Generate own nl.wur.ssb.yaml name...
yamlName = commandOptions.reference + ".yaml"; yamlName = commandOptions.reference + ".yaml";
} else { } else {
yamlName = commandOptions.id + ".yaml"; yamlName = commandOptions.wid + ".yaml";
} }
YamlWriter writer = new YamlWriter(new FileWriter(yamlName)); YamlWriter writer = new YamlWriter(new FileWriter(yamlName));
......
...@@ -4,11 +4,9 @@ import com.esotericsoftware.yamlbeans.YamlWriter; ...@@ -4,11 +4,9 @@ import com.esotericsoftware.yamlbeans.YamlWriter;
import nl.munlock.Generic; import nl.munlock.Generic;
import nl.munlock.irods.Connection; import nl.munlock.irods.Connection;
import nl.munlock.objects.WorkflowMetagenomics; import nl.munlock.objects.WorkflowMetagenomics;
import nl.munlock.objects.WorkflowSpades;
import nl.munlock.ontology.domain.PairedSequenceDataSet; import nl.munlock.ontology.domain.PairedSequenceDataSet;
import nl.munlock.ontology.domain.SequenceDataSet; import nl.munlock.ontology.domain.SequenceDataSet;
import nl.munlock.options.workflow.CommandOptionsMetagenomics; import nl.munlock.options.workflow.CommandOptionsMetagenomics;
import nl.munlock.options.workflow.CommandOptionsSpades;
import nl.wur.ssb.RDFSimpleCon.ResultLine; import nl.wur.ssb.RDFSimpleCon.ResultLine;
import nl.wur.ssb.RDFSimpleCon.api.Domain; import nl.wur.ssb.RDFSimpleCon.api.Domain;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
...@@ -38,94 +36,199 @@ public class MetaGenomics { ...@@ -38,94 +36,199 @@ public class MetaGenomics {
ArrayList<String> observationUnitsList = new ArrayList<>(); ArrayList<String> observationUnitsList = new ArrayList<>();
observationUnits.forEachRemaining(resultLine -> observationUnitsList.add(resultLine.getIRI("ou"))); observationUnits.forEachRemaining(resultLine -> observationUnitsList.add(resultLine.getIRI("ou")));
for (String observationUnitURL : observationUnitsList) { // Two levels as option, at OU level see code below or at assay level
if (commandOptions.level.matches("Assay")) {
log.info("=============================================="); for (String observationUnitURL : observationUnitsList) {
log.info(observationUnitURL); observation_unit observationUnit = domain.make(observation_unit.class, observationUnitURL);
for (Sample sample : observationUnit.getAllSample()) {
// Fill object for (Assay assay : sample.getAllAssay()) {
WorkflowMetagenomics workflow = new WorkflowMetagenomics(); HashSet<String> filePaths = new HashSet<>();
workflow.memory = commandOptions.memory;
workflow.threads = commandOptions.threads; if (assay.getClassTypeIri().endsWith("DNASeqAssay")) {
log.info("==============================================");
observation_unit observationUnit = domain.make(observation_unit.class, observationUnitURL); log.info(assay.getResource().getURI());
// This needs work, where do we put it? // Fill object
String destination = observationUnit.getLogicalPath().replaceAll("/Unprocessed.*", "/processed/") +"/"+ commandOptions.id; WorkflowMetagenomics workflow = new WorkflowMetagenomics();
workflow.memory = commandOptions.memory;
// Getting the files workflow.threads = Math.round(commandOptions.threads);
HashSet<String> filePaths = new HashSet<>(); workflow.identifier = assay.getIdentifier().replaceAll("^A_", "");
for (Sample sample : observationUnit.getAllSample()) {
for (Assay assay : sample.getAllAssay()) { // This needs work, where do we put it?
if (assay.getClassTypeIri().endsWith("DNASeqAssay")) { String destination = assay.getLogicalPath().replaceAll("/Unprocessed.*", "/processed/") + "/" + commandOptions.wid;
for (Data_sample data_sample : assay.getAllFile()) {
// Parse each sequence dataset, as we might have overlapping identifiers in the future... for (Data_sample data_sample : assay.getAllFile()) {
SequenceDataSet sequenceDataSet = (SequenceDataSet) data_sample; log.error(data_sample.getResource().getURI());
// If file already processed skip... // Parse each sequence dataset, as we might have overlapping identifiers in the future...
if (filePaths.contains(new URL(sequenceDataSet.getContentUrl()).getPath())) continue; SequenceDataSet sequenceDataSet = (SequenceDataSet) data_sample;
// If file already processed skip...
if (sequenceDataSet.getSeqPlatform().getIRI().endsWith("Illumina")) { if (filePaths.contains(new URL(sequenceDataSet.getContentUrl()).getPath())) continue;
PairedSequenceDataSet pairedSequenceDataSet = (PairedSequenceDataSet) data_sample;
workflow.addForward_reads(new URL(pairedSequenceDataSet.getContentUrl()).getPath()); if (sequenceDataSet.getSeqPlatform().getIRI().endsWith("Illumina")) {
workflow.addReverse_reads(new URL(pairedSequenceDataSet.getPaired().getContentUrl()).getPath()); log.info("illumina sequence data detected");
// Adding to already processed files due to linked paired dataset PairedSequenceDataSet pairedSequenceDataSet = (PairedSequenceDataSet) data_sample;
filePaths.add(new URL(pairedSequenceDataSet.getContentUrl()).getPath()); workflow.addForward_reads(new URL(pairedSequenceDataSet.getContentUrl()).getPath());
filePaths.add(new URL(pairedSequenceDataSet.getPaired().getContentUrl()).getPath()); workflow.addReverse_reads(new URL(pairedSequenceDataSet.getPaired().getContentUrl()).getPath());
} else if (sequenceDataSet.getSeqPlatform().getIRI().endsWith("PacBio")) { // Adding to already processed files due to linked paired dataset
if (sequenceDataSet.getFileFormat().getIRI().endsWith("FASTQ")) { filePaths.add(new URL(pairedSequenceDataSet.getContentUrl()).getPath());
workflow.addPacbio(new URL(sequenceDataSet.getContentUrl()).getPath()); filePaths.add(new URL(pairedSequenceDataSet.getPaired().getContentUrl()).getPath());
} else { } else if (sequenceDataSet.getSeqPlatform().getIRI().endsWith("PacBio")) {
log.error("File type for " + sequenceDataSet.getName() + " not supported"); if (sequenceDataSet.getFileFormat().getIRI().endsWith("FASTQ")) {
workflow.addPacbio(new URL(sequenceDataSet.getContentUrl()).getPath());
} else {
log.error("File type for " + sequenceDataSet.getName() + " not supported");
}
} else if (sequenceDataSet.getSeqPlatform().getIRI().endsWith("NanoPore")) {
log.error("NANOPORE NOT YET DONE");
// workflow.addP(assay.getAllFile().get(0).getFilePath());
} }
} else if (sequenceDataSet.getSeqPlatform().getIRI().endsWith("NanoPore")) { filePaths.add(new URL(sequenceDataSet.getContentUrl()).getPath());
log.error("NANOPORE NOT YET DONE");
// workflow.addP(assay.getAllFile().get(0).getFilePath());
} }
filePaths.add(new URL(sequenceDataSet.getContentUrl()).getPath());
workflow.setDestination(destination);
workflow.setKraken_database("/unlock/references/databases/KRAKEN2_STANDARD_20200724");
if (workflow.getForward_reads().size() == 0 || workflow.getReverse_reads().size() == 0) {
log.warn("No illumina paired reads detected, cannot run SPADES");
continue;
}
// Save to nl.wur.ssb.yaml format
String yamlName;
if (commandOptions.wid.length() == 0) {
// Generate own nl.wur.ssb.yaml name...
yamlName = assay.getIdentifier() + ".yaml";
} else {
yamlName = commandOptions.wid + ".yaml";
}
YamlWriter writer = new YamlWriter(new FileWriter(yamlName));
writer.write(workflow);
writer.close();
// Fix Clazz > Class
Workflow.fixClazz(yamlName);
log.info("NOT FINISHED YET");
// Save to iRODS
IRODSFile destFile = connection.fileFactory.instanceIRODSFile(assay.getLogicalPath() + "/" + yamlName);
log.info("Uploading " + new File(yamlName) + " to " + destFile);
if (destFile.exists()) destFile.delete();
dataTransferOperationsAO.putOperation(new File(yamlName), destFile, null, null);
// Add metadata tag...
DataObjectAO dataObjectAO = connection.irodsFileSystem.getIRODSAccessObjectFactory().getDataObjectAO(connection.irodsAccount);
AvuData avuMetaData = new AvuData("cwl", "/unlock/infrastructure/cwl/workflows/" + commandOptions.cwl, "waiting");
dataObjectAO.setAVUMetadata(destFile.getAbsolutePath(), avuMetaData);
} }
} }
} }
} }
}
if (commandOptions.level.matches("OU")) {
// OU level
for (String observationUnitURL : observationUnitsList) {
log.info("==============================================");
log.info(observationUnitURL);
// Fill object
WorkflowMetagenomics workflow = new WorkflowMetagenomics();
workflow.memory = commandOptions.memory;
workflow.threads = Math.round(commandOptions.threads);
observation_unit observationUnit = domain.make(observation_unit.class, observationUnitURL);
workflow.identifier = observationUnit.getIdentifier().replaceAll("^O_", "");
// This needs work, where do we put it?
String destination = observationUnit.getLogicalPath().replaceAll("/Unprocessed.*", "/processed/") + "/" + commandOptions.wid;
// Getting the files
HashSet<String> filePaths = new HashSet<>();
for (Sample sample : observationUnit.getAllSample()) {
System.err.println(sample);
for (Assay assay : sample.getAllAssay()) {
System.err.println(assay.getClassTypeIri());
if (assay.getClassTypeIri().endsWith("DNASeqAssay")) {
for (Data_sample data_sample : assay.getAllFile()) {
log.error(data_sample.getResource().getURI());
// Parse each sequence dataset, as we might have overlapping identifiers in the future...
SequenceDataSet sequenceDataSet = (SequenceDataSet) data_sample;