Commit ded0b89e authored by Koehorst, Jasper's avatar Koehorst, Jasper
Browse files

improving functionalities to cope with the demultiplexing method

parent 908042dd
Pipeline #15418 passed with stage
in 56 seconds
......@@ -372,13 +372,17 @@ public class Search {
* @return
*/
public static Domain getProjectFromRDF(CommandOptionsYAML commandOptions, Connection connection) throws Exception {
// Using default project folder
String folderQuery = "/" + connection.irodsAccount.getZone() + "/projects/" + commandOptions.project;
return getProjectFromRDF(connection, folderQuery);
}
public static Domain getProjectFromRDF(Connection connection, String folderQuery) throws Exception {
log.info("Obtaining project file");
domain = new Domain("");
// Get unprocessed files
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
// Obtain all TTL files in /Unprocessed folder
String folderQuery = "/" + connection.irodsAccount.getZone() + "/projects/" + commandOptions.project;
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, folderQuery);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%ttl");
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_COLL_NAME);
......
package nl.munlock.objects;
import java.util.ArrayList;
import java.util.HashMap;
public class WorkflowDemultiplex extends Workflow {
public String reference_db; // = new ArrayList<>();
public String forward_primer;
public String reverse_primer;
public ArrayList<FileClass> forward_reads = new ArrayList<>();
public ArrayList<FileClass> reverse_reads = new ArrayList<>();
// public ArrayList<FileClass> files = new ArrayList<>();
public String destination;
public int rev_read_len;
public int for_read_len;
public String sample;
public double minimum_threshold;
public HashMap<String, String> irods = new HashMap<>();
public ArrayList<FileClass> mapping_file = new ArrayList<>();
public void setReference_db(String reference_db) throws Exception {
// FileClass fileClass = new FileClass();
// fileClass.setClazz("File");
// fileClass.setLocation(reference_db);
// this.reference_db.add(fileClass);
this.reference_db = reference_db;
}
public String getReference_db() {
return reference_db;
}
public void setForward_primer(String forward_primer) {
this.forward_primer = forward_primer;
}
public String getForward_primer() {
return forward_primer;
}
public void setReverse_primer(String reverse_primer) {
this.reverse_primer = reverse_primer;
}
public String getReverse_primer() {
return reverse_primer;
}
public void addIRODS(String irodsPath) {
irods.put(irods.size() + "_irods", irodsPath);
}
public ArrayList<FileClass> getReverse_reads() {
return reverse_reads;
}
public void addReverse_reads(String reverse_reads) throws Exception {
addIRODS(reverse_reads);
FileClass fileClass = new FileClass();
fileClass.setClazz("File");
fileClass.setLocation(reverse_reads);
this.reverse_reads.add(fileClass);
}
public ArrayList<FileClass> getForward_reads() {
return forward_reads;
}
public void setMappingFile(String mappingFile) throws Exception {
addIRODS(mappingFile);
FileClass fileClass = new FileClass();
fileClass.setClazz("File");
fileClass.setLocation(mappingFile);
this.mapping_file.add(fileClass);
}
public void addForward_reads(String forward_reads) throws Exception {
addIRODS(forward_reads);
FileClass fileClass = new FileClass();
fileClass.setClazz("File");
fileClass.setLocation(forward_reads);
this.forward_reads.add(fileClass);
}
public void setDestination(String destination) {
this.destination = destination;
}
public String getDestination() {
return destination;
}
public void setRev_read_len(int rev_read_len) {
this.rev_read_len = rev_read_len;
}
public int getRev_read_len() {
return rev_read_len;
}
public void setFor_read_len(int for_read_len) {
this.for_read_len = for_read_len;
}
public int getFor_read_len() {
return for_read_len;
}
public void setSample(String sample) {
this.sample = sample.trim();
}
public String getSample() {
return sample;
}
public void setMinimum_threshold(double minimum_threshold) {
this.minimum_threshold = minimum_threshold;
}
public double getMinimum_threshold() {
return minimum_threshold;
}
}
package nl.munlock.options.workflow;
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import java.lang.reflect.Field;
public class CommandOptionsDemultiplexing extends CommandOptionsYAML {
public CommandOptionsDemultiplexing(String args[]) {
try {
JCommander jc = new JCommander(this);
jc.parse(args);
// if any of the nl.wur.ssb.options is null...
boolean failed = false;
for (Field f : getClass().getDeclaredFields())
if (f.get(this) == null) {
System.err.println(f + " system variable not found, see help for more information");
failed = true;
}
if (this.help || failed) {
throw new ParameterException("something failed");
}
} catch (ParameterException | IllegalAccessException pe) {
int exitCode = 64;
if (this.help) {
exitCode = 0;
}
System.out.println(pe.getMessage());
new JCommander(this).usage();
System.out.println(" * required parameter");
System.exit(exitCode);
}
}
}
......@@ -4,23 +4,36 @@ import com.esotericsoftware.yamlbeans.YamlConfig;
import com.esotericsoftware.yamlbeans.YamlWriter;
import nl.munlock.Generic;
import nl.munlock.irods.Connection;
import nl.munlock.objects.WorkflowDemultiplex;
import nl.munlock.objects.WorkflowNgtax;
import nl.munlock.ontology.domain.AmpliconAssay;
import nl.munlock.ontology.domain.AmpliconLibraryAssay;
import nl.munlock.ontology.domain.PairedSequenceDataSet;
import nl.munlock.ontology.domain.SequenceDataSet;
import nl.munlock.options.workflow.CommandOptionsDemultiplexing;
import nl.munlock.options.workflow.CommandOptionsNGTAX;
import nl.munlock.objects.WorkflowNgtax;
import nl.wur.ssb.RDFSimpleCon.ResultLine;
import nl.wur.ssb.RDFSimpleCon.api.Domain;
import org.apache.log4j.Logger;
import org.irods.jargon.core.exception.FileIntegrityException;
import org.irods.jargon.core.exception.JargonException;
import org.irods.jargon.core.pub.DataObjectAO;
import org.irods.jargon.core.pub.DataTransferOperations;
import org.irods.jargon.core.pub.IRODSGenQueryExecutor;
import org.irods.jargon.core.pub.domain.AvuData;
import org.irods.jargon.core.pub.io.IRODSFile;
import org.irods.jargon.core.query.*;
import org.jermontology.ontology.JERMOntology.domain.Assay;
import org.jermontology.ontology.JERMOntology.domain.Data_sample;
import java.io.File;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.TimeUnit;
import static com.esotericsoftware.yamlbeans.YamlConfig.Quote.NONE;
......@@ -188,7 +201,6 @@ public class NGTax {
}
// Generic part here for both single and paired end data
dataTransferOperationsAO.putOperation(new File(yamlFileName), destFile, null, null);
// Add metadata tag...
......@@ -198,4 +210,180 @@ public class NGTax {
}
}
}
public static void generateDemultiplexWorkflow(CommandOptionsDemultiplexing commandOptionsDemultiplexing, Connection connection, Domain domain) throws Exception {
DataTransferOperations dataTransferOperationsAO = connection.irodsFileSystem.getIRODSAccessObjectFactory().getDataTransferOperations(connection.irodsAccount);
Iterator<ResultLine> resultLineIterator = domain.getRDFSimpleCon().runQuery("getAssay.txt", true,"http://m-unlock.nl/ontology/AmpliconLibraryAssay").iterator();
// For each library
while (resultLineIterator.hasNext()) {
ResultLine resultLine = resultLineIterator.next();
AmpliconLibraryAssay ampliconLibraryAssay = domain.make(AmpliconLibraryAssay.class, resultLine.getIRI("assay"));
ResultLine assayLine = domain.getRDFSimpleCon().runQuery("getAssayFromAmpliconLibrary.txt", true, ampliconLibraryAssay.getResource().getURI()).iterator().next();
AmpliconAssay ampliconAssay = domain.make(AmpliconAssay.class, assayLine.getIRI("assay"));
String mappingFile = generateMappingFile(domain, ampliconLibraryAssay, ampliconAssay, commandOptionsDemultiplexing.project);
// Find amplicon library
String path = "/" + connection.irodsAccount.getZone() + "/landingzone/ampliconlibraries";
HashSet<String> libraryFwdFile = findFile(connection, ampliconLibraryAssay.getAllFile().get(0).getFileName(), path);
HashSet<String> libraryRevFile = findFile(connection, ampliconLibraryAssay.getAllFile().get(1).getFileName(), path);
File libraryFolder = new File(libraryFwdFile.iterator().next()).getParentFile();
String mappingFileDestination = libraryFolder + "/" + mappingFile;
IRODSFile destFile = connection.fileFactory.instanceIRODSFile(mappingFileDestination);
while (destFile.exists()) {
destFile.delete();
}
// Uploading file to library location or to project?
dataTransferOperationsAO.putOperation(new File(mappingFile), destFile, null, null);
// create yaml file
String fwdPrimer = ampliconAssay.getForwardPrimer();
String revPrimer = ampliconAssay.getReversePrimer();
String libFile1 = libraryFwdFile.iterator().next();
String libFile2 = libraryRevFile.iterator().next();
WorkflowDemultiplex workflowDemultiplex = new WorkflowDemultiplex();
workflowDemultiplex.setForward_primer(fwdPrimer);
workflowDemultiplex.setReverse_primer(revPrimer);
workflowDemultiplex.addForward_reads(libFile1);
workflowDemultiplex.addReverse_reads(libFile2);
workflowDemultiplex.setMappingFile(mappingFileDestination);
workflowDemultiplex.setDestination(libraryFolder + "/" + commandOptionsDemultiplexing.project + "/demultiplexed/");
String yamlFileName = mappingFile + ".yaml";
YamlConfig config = new YamlConfig();
config.writeConfig.setQuoteChar(NONE);
YamlWriter writer = new YamlWriter(new FileWriter(yamlFileName), config);
writer.write(workflowDemultiplex);
writer.close();
// Fix Clazz > Class
Workflow.fixClazz(yamlFileName);
Workflow.fixComments(yamlFileName);
Workflow.fixSample(yamlFileName);
// Upload
destFile = connection.fileFactory.instanceIRODSFile(libraryFolder + "/" + yamlFileName);
if (destFile.exists()) {
destFile.delete();
}
dataTransferOperationsAO.putOperation(new File(yamlFileName), destFile, null, null);
// Add metadata tag...
DataObjectAO dataObjectAO = connection.irodsFileSystem.getIRODSAccessObjectFactory().getDataObjectAO(connection.irodsAccount);
AvuData avuMetaData = new AvuData("cwl", "/unlock/infrastructure/cwl/workflows/" + commandOptionsDemultiplexing.cwl, "waiting");
dataObjectAO.setAVUMetadata(destFile.getAbsolutePath(), avuMetaData);
/*
generateMappingFile(library, ampliconAssay);
String[] commandArgs = {
"java",
"-jar",
commandOptions.ngtax,
"-demultiplex",
"-mapFile", "mapping_file.txt",
"-for_p", ampliconAssay.getForwardPrimer(),
"-rev_p", ampliconAssay.getReversePrimer(),
"-output", output.getName(),
"-fastQ", "." + libraryPath1 + "," + "." + libraryPath2};
*/
}
}
private static String generateMappingFile(Domain domain, AmpliconLibraryAssay ampliconLibraryAssay, AmpliconAssay ampliconAssayReference, String project) throws Exception {
String mappingFile = "mapping_file_"+project+"_"+ampliconLibraryAssay.getIdentifier()+".txt";
PrintWriter printWriter = new PrintWriter(mappingFile);
printWriter.println("#sampleID\tforwardBarcodeSequence\treverseBarcodeSequence\tLibraryNumber\tDirection\tLibraryName");
Iterable<ResultLine> ampliconAssays = domain.getRDFSimpleCon().runQuery("getAssayIDsFromLibrary.txt", true, ampliconLibraryAssay.getResource().getURI());
for (ResultLine assay : ampliconAssays) {
AmpliconAssay ampliconAssay = domain.make(AmpliconAssay.class, assay.getIRI("assay"));
if (!ampliconAssay.getForwardPrimer().contains(ampliconAssayReference.getForwardPrimer())) {
log.info("Assay from same library with different primer detected " + ampliconAssay.getIdentifier() + " vs reference " + ampliconAssayReference.getIdentifier());
continue;
}
if (ampliconAssay.getReversePrimer() != null && !ampliconAssay.getReversePrimer().contains(ampliconAssayReference.getReversePrimer())) {
log.info("Assay from same library with different primer detected " + ampliconAssay.getIdentifier() + " vs reference " + ampliconAssayReference.getIdentifier());
continue;
}
String sampleID = ampliconAssay.getIdentifier();
String fwdBarcode = ampliconAssay.getForwardBarcode();
String revBarcode = ampliconAssay.getReverseBarcode();
String libraryNumber = "1";
String direction = "p";
String libraryName = ampliconAssay.getLibrary().getAllFile().get(0).getFileName();
printWriter.println(sampleID + "\t" + fwdBarcode + "\t" + revBarcode + "\t" + libraryNumber + "\t" + direction + "\t" + libraryName);
}
printWriter.close();
return mappingFile;
}
public static HashSet<String> findFile(Connection connection, String fileName, String supposedPath) throws GenQueryBuilderException, JargonException { // throws GenQueryBuilderException, JargonQueryException, JargonException {
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
// Generalised to landingzone folder, check later if its ENA or Project
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.EQUAL, fileName);
// Skip files found in trash
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.NOT_LIKE, "/" + connection.irodsAccount.getZone() + "/trash/%");
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_COLL_NAME);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_NAME);
// Set limit?
IRODSGenQueryFromBuilder query = queryBuilder.exportIRODSQueryFromBuilder(2);
IRODSGenQueryExecutor irodsGenQueryExecutor = connection.accessObjectFactory.getIRODSGenQueryExecutor(connection.irodsAccount);
IRODSQueryResultSet irodsQueryResultSet;
try {
irodsQueryResultSet = irodsGenQueryExecutor.executeIRODSQuery(query, 0);
TimeUnit.SECONDS.sleep(0);
} catch (JargonException | InterruptedException | JargonQueryException e) {
e.printStackTrace();
return new HashSet<>();
}
List<IRODSQueryResultRow> irodsQueryResultSetResults = irodsQueryResultSet.getResults();
// If no hits are found
if (irodsQueryResultSetResults.size() == 0) {
System.err.println("nothing " + fileName + " " + supposedPath);
return new HashSet<>();
} else {
// if one hit or more is found collect all paths
HashSet<String> paths = new HashSet<>();
for (IRODSQueryResultRow irodsQueryResultSetResult : irodsQueryResultSetResults) {
String path = irodsQueryResultSetResult.getColumn(0) + "/" + irodsQueryResultSetResult.getColumn(1);
System.err.println(path);
if (path.startsWith(supposedPath)) {
paths.add(path);
} else {
System.err.println(path + " not in " + supposedPath);
}
}
// if more than one hit found
if (paths.size() > 1) {
throw new FileIntegrityException("Multiple libraries found with the same name: " + fileName);
}
return paths;
}
}
}
......@@ -62,13 +62,17 @@ public class Yaml {
RNASeq.generateRNASeqWorkflow(commandOptionsRNAseq, connection, assays);
} else if (commandOptions.cwl.matches("workflow_indexer.cwl")) {
log.info("Running index workflow");
// ArrayList<Assay> assays = Search.getAssaysFromRDF(commandOptions, connection);
CommandOptionsIndexer commandOptionsIndexer = new CommandOptionsIndexer(args);
Index.generateIndexWorkflow(commandOptionsIndexer, connection);
} else if (commandOptions.cwl.matches("workflow_ena_annotation.cwl")) {
log.info("Running ENA workflow");
CommandOptionsGenomeSync commandOptionsGenomeSync = new CommandOptionsGenomeSync(args);
ENA.generateGenomeSyncWorkflows(commandOptionsGenomeSync, connection);
} else if (commandOptions.cwl.matches("workflow_demultiplexing.cwl")) {
String folderQuery = "/" + connection.irodsAccount.getZone() + "/landingzone/projects/" + commandOptions.project;
Domain domain = Search.getProjectFromRDF(connection, folderQuery);
CommandOptionsDemultiplexing commandOptionsDemultiplexing = new CommandOptionsDemultiplexing(args);
NGTax.generateDemultiplexWorkflow(commandOptionsDemultiplexing, connection, domain);
} else {
log.error("Unknown CWL provided " + commandOptions.cwl);
}
......
SELECT DISTINCT ?assay
WHERE {
?assay unlock:library <%1$s> .
}
\ No newline at end of file
SELECT DISTINCT ?assay
WHERE {
?assay <http://m-unlock.nl/ontology/library> <%1$s>
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment