Commit 3b29e61b authored by Koehorst, Jasper's avatar Koehorst, Jasper
Browse files

search engine imporvements for HDT date time checks and connection reconnect option added

parent ad3fe9e7
Pipeline #21246 passed with stage
in 1 minute and 7 seconds
......@@ -15,14 +15,17 @@ import static org.irods.jargon.core.connection.ClientServerNegotiationPolicy.Ssl
import static org.irods.jargon.core.connection.ClientServerNegotiationPolicy.SslNegotiationPolicy.CS_NEG_REQUIRE;
public class Connection {
public final IRODSFileSystem irodsFileSystem;
public final IRODSAccount irodsAccount;
public final IRODSAccessObjectFactory accessObjectFactory;
public final IRODSFileFactory fileFactory;
public IRODSFileSystem irodsFileSystem;
public IRODSAccount irodsAccount;
public IRODSAccessObjectFactory accessObjectFactory;
public IRODSFileFactory fileFactory;
private static final Logger log = LoggerFactory.getLogger(Connection.class);
public Connection(CommandOptionsIRODS commandOptions) throws JargonException {
connect(commandOptions);
}
private void connect(CommandOptionsIRODS commandOptions) throws JargonException {
// Initialize account object
irodsAccount = IRODSAccount.instance(commandOptions.host,Integer.parseInt(commandOptions.port),commandOptions.username, commandOptions.password,"",commandOptions.zone,"");
......@@ -44,9 +47,20 @@ public class Connection {
accessObjectFactory = irodsFileSystem.getIRODSAccessObjectFactory();
// AuthResponse x = accessObjectFactory.authenticateIRODSAccount(irodsAccount);
// log.info(x.getAuthMessage());
fileFactory = accessObjectFactory.getIRODSFileFactory(irodsAccount);
}
public void close() {
try {
this.irodsFileSystem.close();
this.accessObjectFactory.closeSession();
} catch (JargonException e) {
e.printStackTrace();
}
}
public void reconnect(CommandOptionsIRODS commandOptions) throws JargonException {
this.close();
this.connect(commandOptions);
}
}
......@@ -628,4 +628,87 @@ public class Search {
log.info("Detected " + folders.size());
return folders;
}
/**
* Time stamp check for hdt file and turtle files
* @param connection irods connection object
* @param commandOptionsHDT command options object for hdt files
* @param folder folder containing the hdt files and turtle files
* @return
*/
public static boolean checkTimeStampsHDT(Connection connection, CommandOptionsHDT commandOptionsHDT, String folder) throws GenQueryBuilderException, JargonException, JargonQueryException {
// Obtain time stamp of latest TURTLE file
// Obtains OU folders...
log.info("Searching for turtle timestamps in " + folder);
// Get creation time
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, "%.ttl");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, folder + "%");
// queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_DATA_NAME);
queryBuilder.addSelectAsGenQueryValue(RodsGenQueryEnum.COL_D_MODIFY_TIME);
queryBuilder.addOrderByGenQueryField(RodsGenQueryEnum.COL_D_MODIFY_TIME, GenQueryOrderByField.OrderByType.DESC);
// Set limit?
IRODSGenQueryFromBuilder query = queryBuilder.exportIRODSQueryFromBuilder(1);
IRODSGenQueryExecutor irodsGenQueryExecutor = connection.accessObjectFactory.getIRODSGenQueryExecutor(connection.irodsAccount);
IRODSQueryResultSet irodsQueryResultSet = irodsGenQueryExecutor.executeIRODSQuery(query, 0);
List<IRODSQueryResultRow> irodsQueryResultSetResults = irodsQueryResultSet.getResults();
if (irodsQueryResultSetResults.size() == 0)
throw new JargonException("No results found");
Set<String> folders = new HashSet<>();
if (irodsQueryResultSetResults.size() > 0) {
Date dateTTL = irodsQueryResultSetResults.get(0).getColumnAsDateOrNull(0); //.getColumnAsDateOrNull(0);
// Get timestamp HDT file
IRODSFile irodsFolder = connection.fileFactory.instanceIRODSFile(folder + "/hdt/");
for (File file : irodsFolder.listFiles()) {
if (file.getName().endsWith(".hdt")) {
IRODSFile irodsHDTFile = connection.fileFactory.instanceIRODSFile(file.getAbsolutePath());
Date dateHDT = new Date(irodsHDTFile.lastModified());
System.err.println(dateTTL);
System.err.println(dateHDT);
if (dateHDT.compareTo(dateTTL) == -1) {
// This is a job to be executed as TTL files that are newer has been identified
return true;
}
}
}
}
return false;
}
public static long getFolderSize(Connection connection, CommandOptionsHDT commandOptionsHDT, String folder, String filter) throws GenQueryBuilderException, JargonException, JargonQueryException {
IRODSGenQueryBuilder queryBuilder = new IRODSGenQueryBuilder(true, null);
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_COLL_NAME, QueryConditionOperators.LIKE, folder + "%");
queryBuilder.addConditionAsGenQueryField(RodsGenQueryEnum.COL_DATA_NAME, QueryConditionOperators.LIKE, filter);
queryBuilder.addSelectAsAgregateGenQueryValue(RodsGenQueryEnum.COL_DATA_SIZE, GenQueryField.SelectFieldTypes.SUM);
IRODSGenQueryFromBuilder query = queryBuilder.exportIRODSQueryFromBuilder(30000);
IRODSGenQueryExecutor irodsGenQueryExecutor = connection.accessObjectFactory.getIRODSGenQueryExecutor(connection.irodsAccount);
IRODSQueryResultSet irodsQueryResultSet = irodsGenQueryExecutor.executeIRODSQuery(query, 0);
List<IRODSQueryResultRow> irodsQueryResultSetResults = irodsQueryResultSet.getResults();
if (irodsQueryResultSetResults.size() == 0)
throw new JargonException("No results found");
for (IRODSQueryResultRow irodsQueryResultSetResult : irodsQueryResultSetResults) {
long size = Long.parseLong(irodsQueryResultSetResult.getColumn(0)) / 1024 / 1024 * 10;
System.err.println("SIZE: " + size);
return size;
}
return -1;
}
}
......@@ -3,6 +3,7 @@ package nl.munlock.yaml;
import com.esotericsoftware.yamlbeans.YamlWriter;
import nl.munlock.Generic;
import nl.munlock.irods.Connection;
import nl.munlock.irods.Search;
import nl.munlock.objects.WorkflowHDT;
import nl.munlock.options.workflow.CommandOptionsHDT;
import org.apache.log4j.Logger;
......@@ -23,6 +24,12 @@ public class HDT {
DataTransferOperations dataTransferOperationsAO = connection.irodsFileSystem.getIRODSAccessObjectFactory().getDataTransferOperations(connection.irodsAccount);
for (String folder : folders) {
// Obtain rough estimate for memory consumption (TEST)
long memory = Search.getFolderSize(connection, commandOptionsHDT, folder, "%ttl");
if (memory > commandOptionsHDT.memory) {
commandOptionsHDT.memory = Math.toIntExact(memory);
}
String yamlName = commandOptionsHDT.wid + ".yaml";
// Save to iRODS
......@@ -66,7 +73,6 @@ public class HDT {
dataObjectAO.setAVUMetadata(destFile.getAbsolutePath(), avuMetaData);
// Start kubernetes?
}
}
}
......@@ -13,8 +13,6 @@ import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import static nl.munlock.irods.Search.makePath;
/*
* Application to generate YAML files used in iRODS for the CWL runners
* Currently the following workflows are in place:
......@@ -27,7 +25,6 @@ public class Yaml {
private static final Logger log = Generic.getLogger(Yaml.class, Yaml.debug);
/**
*
* @param args
* @throws Exception
*/
......@@ -37,6 +34,8 @@ public class Yaml {
CommandOptionsYAML commandOptions = new CommandOptionsYAML(args);
Connection connection = new Connection(commandOptions);
log.info("Logged in as " + connection.irodsAccount.getUserName());
// Generate specific workflows
if (commandOptions.cwl.matches("workflow_quality.cwl")) {
log.info("Running quality analysis");
......@@ -91,23 +90,23 @@ public class Yaml {
log.info("Generating HDT workflow files");
CommandOptionsHDT commandOptionsHDT = new CommandOptionsHDT(args);
Set<String> folders = Search.getOUFromiRODS(connection, commandOptionsHDT);
log.info("Found " + folders.size() + " observational units");
String search = Search.makePath(commandOptionsHDT.project, commandOptionsHDT.investigation, commandOptionsHDT.study, commandOptionsHDT.observationUnit, commandOptionsHDT.assay, connection);
Set<String> hdts = Search.getAllHDT(search, connection);
// For each folder obtain last modified TTL file and last modified HDT file
Set<String> analysis = new HashSet<>();
// overwrite check
if (commandOptions.overwrite) {
analysis.addAll(folders);
} else {
for (String folder : folders) {
if (!hdts.contains(folder + "/hdt")) {
analysis.add(folder);
}
int count = 0;
for (String folder : folders) {
count++;
if (count > 10) {
connection.reconnect(commandOptions);
}
boolean check = Search.checkTimeStampsHDT(connection, commandOptionsHDT, folder);
if (check) {
analysis.add(folder);
}
}
log.info("Found " + folders.size() + " observational units");
// generate the workflows
HDT.generateHDTWorkflow(commandOptionsHDT, connection, analysis);
} else {
log.error("Unknown CWL provided " + commandOptions.cwl);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment