Skip to content

Commit bc31285

Browse files
committed
Merge branch 'release-0.7.0'
Conflicts: opencga-analysis/pom.xml opencga-app/pom.xml opencga-catalog/pom.xml opencga-core/pom.xml opencga-server/pom.xml opencga-storage/opencga-storage-app/pom.xml opencga-storage/opencga-storage-core/pom.xml opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/alignment/tasks/AlignmentRegionCompactorTask.java opencga-storage/opencga-storage-hadoop/pom.xml opencga-storage/opencga-storage-mongodb/pom.xml opencga-storage/pom.xml pom.xml
2 parents 5d85f85 + 7cc9a30 commit bc31285

File tree

464 files changed

+38524
-6004
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

464 files changed

+38524
-6004
lines changed

.gitignore

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1+
12
# OpenCGA folders
2-
opencga-app/build/conf
3-
opencga-app/build/libs
4-
opencga-app/build/conf
5-
opencga-storage/opencga-storage-app/build
3+
build
64

75
# General
86
*.jar
@@ -61,3 +59,6 @@ nb*.xml
6159

6260
# Install
6361
opencga-server/install.sh
62+
63+
# Generated properties
64+
git.properties

opencga-analysis/pom.xml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
<parent>
2323
<groupId>org.opencb.opencga</groupId>
2424
<artifactId>opencga</artifactId>
25-
<version>0.6.0</version>
25+
<version>0.7.0</version>
2626
<relativePath>..</relativePath>
2727
</parent>
2828

@@ -44,10 +44,10 @@
4444
<groupId>org.slf4j</groupId>
4545
<artifactId>slf4j-api</artifactId>
4646
</dependency>
47-
<dependency>
48-
<groupId>org.slf4j</groupId>
49-
<artifactId>slf4j-simple</artifactId>
50-
</dependency>
47+
<!--<dependency>-->
48+
<!--<groupId>org.slf4j</groupId>-->
49+
<!--<artifactId>slf4j-log4j12</artifactId>-->
50+
<!--</dependency>-->
5151
<dependency>
5252
<groupId>com.fasterxml.jackson.core</groupId>
5353
<artifactId>jackson-databind</artifactId>
@@ -76,7 +76,6 @@
7676
<dependency>
7777
<groupId>junit</groupId>
7878
<artifactId>junit</artifactId>
79-
<scope>test</scope>
8079
</dependency>
8180
</dependencies>
8281

opencga-analysis/src/main/java/org/opencb/opencga/analysis/AnalysisJobExecutor.java

Lines changed: 60 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
package org.opencb.opencga.analysis;
1818

1919
import com.fasterxml.jackson.databind.ObjectMapper;
20+
import org.apache.tools.ant.types.Commandline;
2021
import org.opencb.datastore.core.ObjectMap;
2122
import org.opencb.datastore.core.QueryOptions;
2223
import org.opencb.datastore.core.QueryResult;
2324
import org.opencb.opencga.catalog.exceptions.CatalogException;
2425
import org.opencb.opencga.catalog.CatalogManager;
25-
import org.opencb.opencga.catalog.exceptions.CatalogIOException;
2626
import org.opencb.opencga.catalog.io.CatalogIOManager;
2727
import org.opencb.opencga.catalog.models.File;
2828
import org.opencb.opencga.catalog.models.Job;
@@ -38,18 +38,15 @@
3838
import org.opencb.opencga.core.exec.SingleProcess;
3939
import org.slf4j.Logger;
4040
import org.slf4j.LoggerFactory;
41-
import sun.misc.Signal;
42-
import sun.misc.SignalHandler;
4341

44-
import java.io.ByteArrayInputStream;
4542
import java.io.IOException;
4643
import java.io.InputStream;
4744
import java.net.URI;
4845
import java.nio.file.Files;
4946
import java.nio.file.Path;
5047
import java.nio.file.Paths;
5148
import java.util.*;
52-
import java.util.concurrent.atomic.AtomicBoolean;
49+
import java.util.stream.Collectors;
5350

5451
public class AnalysisJobExecutor {
5552

@@ -192,7 +189,7 @@ public String createCommandLine(String executable, Map<String, List<String>> par
192189
validParams.addAll(analysis.getGlobalParams());
193190
validParams.add(new Option(execution.getOutputParam(), "Outdir", false));
194191
if (checkRequiredParams(params, validParams)) {
195-
params = new HashMap<String, List<String>>(removeUnknownParams(params, validParams));
192+
params = new HashMap<>(removeUnknownParams(params, validParams));
196193
} else {
197194
throw new AnalysisExecutionException("ERROR: missing some required params.");
198195
}
@@ -244,16 +241,48 @@ public QueryResult<Job> createJob(String executable, Map<String, List<String>> p
244241
String commandLine = createCommandLine(executable, params);
245242
System.out.println(commandLine);
246243

244+
Map<String, String> plainParams = params.entrySet().stream().collect(Collectors.toMap(
245+
Map.Entry::getKey,
246+
entry -> entry.getValue().stream().collect(Collectors.joining(",")))
247+
);
248+
247249
return createJob(catalogManager, studyId, jobName, analysisName, description, outDir, inputFiles, sessionId,
248-
randomString, temporalOutDirUri, commandLine, false, false, new HashMap<String, Object>(), new HashMap<String, Object>());
250+
randomString, temporalOutDirUri, executionName, plainParams, commandLine, false, false, new HashMap<>(), new HashMap<>());
249251
}
250252

253+
@Deprecated
251254
public static QueryResult<Job> createJob(final CatalogManager catalogManager, int studyId, String jobName, String toolName, String description,
252255
File outDir, List<Integer> inputFiles, final String sessionId,
253256
String randomString, URI temporalOutDirUri, String commandLine,
254257
boolean execute, boolean simulate, Map<String, Object> attributes,
255258
Map<String, Object> resourceManagerAttributes)
256259
throws AnalysisExecutionException, CatalogException {
260+
String[] args = Commandline.translateCommandline(commandLine);
261+
Map<String, String> params = new HashMap<>();
262+
for (int i = 0; i < args.length; i++) {
263+
if (args[i].startsWith("-")) {
264+
String key = args[i].replaceAll("^--?", "");
265+
String value;
266+
if (args.length == i + 1 || args[i + 1].startsWith("-")) {
267+
value = "";
268+
} else {
269+
value = args[i + 1];
270+
i++;
271+
}
272+
params.put(key, value);
273+
}
274+
}
275+
return createJob(catalogManager, studyId, jobName, toolName, description, outDir, inputFiles, sessionId,
276+
randomString, temporalOutDirUri, "", params, commandLine, execute, simulate, attributes, resourceManagerAttributes);
277+
}
278+
279+
public static QueryResult<Job> createJob(final CatalogManager catalogManager, int studyId, String jobName, String toolName, String description,
280+
File outDir, List<Integer> inputFiles, final String sessionId,
281+
String randomString, URI temporalOutDirUri,
282+
String executor, Map<String, String> params, String commandLine,
283+
boolean execute, boolean simulate, Map<String, Object> attributes,
284+
Map<String, Object> resourceManagerAttributes)
285+
throws AnalysisExecutionException, CatalogException {
257286
logger.debug("Creating job {}: simulate {}, execute {}", jobName, simulate, execute);
258287
long start = System.currentTimeMillis();
259288

@@ -271,7 +300,7 @@ public static QueryResult<Job> createJob(final CatalogManager catalogManager, in
271300
} else {
272301
if (execute) {
273302
/** Create a RUNNING job in CatalogManager **/
274-
jobQueryResult = catalogManager.createJob(studyId, jobName, toolName, description, commandLine, temporalOutDirUri,
303+
jobQueryResult = catalogManager.createJob(studyId, jobName, toolName, description, executor, params, commandLine, temporalOutDirUri,
275304
outDir.getId(), inputFiles, null, attributes, resourceManagerAttributes, Job.Status.RUNNING, System.currentTimeMillis(), 0, null, sessionId);
276305
Job job = jobQueryResult.first();
277306

@@ -280,7 +309,7 @@ public static QueryResult<Job> createJob(final CatalogManager catalogManager, in
280309
} else {
281310
/** Create a PREPARED job in CatalogManager **/
282311
resourceManagerAttributes.put(Job.JOB_SCHEDULER_NAME, randomString);
283-
jobQueryResult = catalogManager.createJob(studyId, jobName, toolName, description, commandLine, temporalOutDirUri,
312+
jobQueryResult = catalogManager.createJob(studyId, jobName, toolName, description, executor, params, commandLine, temporalOutDirUri,
284313
outDir.getId(), inputFiles, null, attributes, resourceManagerAttributes, Job.Status.PREPARED, 0, 0, null, sessionId);
285314
}
286315
}
@@ -290,6 +319,12 @@ public static QueryResult<Job> createJob(final CatalogManager catalogManager, in
290319
private static QueryResult<Job> executeLocal(CatalogManager catalogManager, Job job, String sessionId) throws CatalogException {
291320

292321
Command com = new Command(job.getCommandLine());
322+
CatalogIOManager ioManager = catalogManager.getCatalogIOManagerFactory().get(job.getTmpOutDirUri());
323+
URI sout = job.getTmpOutDirUri().resolve(job.getName() + "." + job.getId() + ".out.txt");
324+
com.setOutputOutputStream(ioManager.createOutputStream(sout, false));
325+
URI serr = job.getTmpOutDirUri().resolve(job.getName() + "." + job.getId() + ".err.txt");
326+
com.setErrorOutputStream(ioManager.createOutputStream(serr, false));
327+
293328
final int jobId = job.getId();
294329
Thread hook = new Thread(() -> {
295330
try {
@@ -322,25 +357,24 @@ private static QueryResult<Job> executeLocal(CatalogManager catalogManager, Job
322357

323358
private static QueryResult<Job> postExecuteLocal(CatalogManager catalogManager, Job job, String sessionId, Command com)
324359
throws CatalogException {
325-
/** Write output to file **/
326-
CatalogIOManager ioManager = catalogManager.getCatalogIOManagerFactory().get(job.getTmpOutDirUri());
327-
try {
328-
URI sout = job.getTmpOutDirUri().resolve(job.getName() + "." + job.getId() + ".out.txt");
329-
if (com.getOutput() != null) {
330-
ioManager.createFile(sout, new ByteArrayInputStream(com.getOutput().getBytes()));
331-
com.setOutput(null);
360+
/** Close output streams **/
361+
if (com.getOutputOutputStream() != null) {
362+
try {
363+
com.getOutputOutputStream().close();
364+
} catch (IOException e) {
365+
logger.warn("Error closing OutputStream", e);
332366
}
333-
} catch (CatalogIOException e) {
334-
e.printStackTrace();
367+
com.setOutputOutputStream(null);
368+
com.setOutput(null);
335369
}
336-
try {
337-
URI serr = job.getTmpOutDirUri().resolve(job.getName() + "." + job.getId() + ".err.txt");
338-
if (com.getError() != null) {
339-
ioManager.createFile(serr, new ByteArrayInputStream(com.getError().getBytes()));
340-
com.setError(null);
370+
if (com.getErrorOutputStream() != null) {
371+
try {
372+
com.getErrorOutputStream().close();
373+
} catch (IOException e) {
374+
logger.warn("Error closing OutputStream", e);
341375
}
342-
} catch (CatalogIOException e) {
343-
e.printStackTrace();
376+
com.setErrorOutputStream(null);
377+
com.setError(null);
344378
}
345379

346380
/** Change status to DONE - Add the execution information to the job entry **/
@@ -352,7 +386,7 @@ private static QueryResult<Job> postExecuteLocal(CatalogManager catalogManager,
352386

353387
/** Record output **/
354388
AnalysisOutputRecorder outputRecorder = new AnalysisOutputRecorder(catalogManager, sessionId);
355-
outputRecorder.recordJobOutput(job, com.getExitValue() != 0);
389+
outputRecorder.recordJobOutputAndPostProcess(job, com.getExitValue() != 0);
356390

357391
/** Change status to READY or ERROR **/
358392
if (com.getExitValue() == 0) {

opencga-analysis/src/main/java/org/opencb/opencga/analysis/AnalysisOutputRecorder.java

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,15 @@
1616

1717
package org.opencb.opencga.analysis;
1818

19-
import org.opencb.biodata.models.variant.VariantSourceEntry;
19+
import org.opencb.biodata.models.variant.StudyEntry;
2020
import org.opencb.datastore.core.ObjectMap;
2121
import org.opencb.datastore.core.QueryOptions;
2222
import org.opencb.datastore.core.QueryResult;
23+
import org.opencb.opencga.analysis.files.FileMetadataReader;
2324
import org.opencb.opencga.analysis.files.FileScanner;
25+
import org.opencb.opencga.catalog.CatalogManager;
2426
import org.opencb.opencga.catalog.db.api.CatalogSampleDBAdaptor;
2527
import org.opencb.opencga.catalog.exceptions.CatalogException;
26-
import org.opencb.opencga.catalog.CatalogManager;
2728
import org.opencb.opencga.catalog.io.CatalogIOManager;
2829
import org.opencb.opencga.catalog.models.Cohort;
2930
import org.opencb.opencga.catalog.models.File;
@@ -61,7 +62,17 @@ public AnalysisOutputRecorder(CatalogManager catalogManager, String sessionId) {
6162
this.sessionId = sessionId;
6263
}
6364

64-
public void recordJobOutput(Job job, boolean jobFailed) {
65+
public void recordJobOutputAndPostProcess(Job job, boolean jobFailed) {
66+
recordJobOutput(job);
67+
68+
/** Modifies the job to set the output and endTime. **/
69+
try {
70+
postProcessJob(job, jobFailed);
71+
} catch (CatalogException e) {
72+
e.printStackTrace(); //TODO: Handle exception
73+
}
74+
}
75+
public void recordJobOutput(Job job) {
6576

6677
try {
6778
/** Scans the output directory from a job or index to find all files. **/
@@ -73,6 +84,10 @@ public void recordJobOutput(Job job, boolean jobFailed) {
7384
List<File> files = fileScanner.scan(outDir, tmpOutDirUri, fileScannerPolicy, calculateChecksum, true, job.getId(), sessionId);
7485
List<Integer> fileIds = files.stream().map(File::getId).collect(Collectors.toList());
7586
CatalogIOManager ioManager = catalogManager.getCatalogIOManagerFactory().get(tmpOutDirUri);
87+
if (!ioManager.exists(tmpOutDirUri)) {
88+
logger.warn("Output folder doesn't exist");
89+
return;
90+
}
7691
List<URI> uriList = ioManager.listFiles(tmpOutDirUri);
7792
if (uriList.isEmpty()) {
7893
ioManager.deleteDirectory(tmpOutDirUri);
@@ -85,20 +100,13 @@ public void recordJobOutput(Job job, boolean jobFailed) {
85100
parameters.put("output", fileIds);
86101
parameters.put("endTime", System.currentTimeMillis());
87102
catalogManager.modifyJob(job.getId(), parameters, sessionId);
103+
job.setOutput(fileIds);
104+
job.setEndTime(parameters.getLong("endTime"));
88105

89106
//TODO: "input" files could be modified by the tool. Have to be scanned, calculate the new Checksum and
90107
} catch (CatalogException | IOException e) {
91108
e.printStackTrace();
92109
logger.error("Error while processing Job", e);
93-
return;
94-
}
95-
96-
97-
/** Modifies the job to set the output and endTime. **/
98-
try {
99-
postProcessJob(job, jobFailed);
100-
} catch (CatalogException e) {
101-
e.printStackTrace(); //TODO: Handle exception
102110
}
103111
}
104112

@@ -129,6 +137,7 @@ public void postProcessJob(Job job, boolean jobFailed) throws CatalogException {
129137
index.setStatus(Index.Status.NONE);
130138
} else {
131139
index.setStatus(Index.Status.TRANSFORMED);
140+
FileMetadataReader.get(catalogManager).updateVariantFileStats(job, sessionId);
132141
}
133142
break;
134143
case LOADING:
@@ -147,6 +156,7 @@ public void postProcessJob(Job job, boolean jobFailed) throws CatalogException {
147156
index.setStatus(Index.Status.NONE);
148157
} else {
149158
index.setStatus(Index.Status.READY);
159+
FileMetadataReader.get(catalogManager).updateVariantFileStats(job, sessionId);
150160
}
151161
break;
152162
}
@@ -157,7 +167,7 @@ public void postProcessJob(Job job, boolean jobFailed) throws CatalogException {
157167
}
158168
catalogManager.modifyFile(indexedFileId, new ObjectMap("index", index), sessionId); //Modify status
159169
if (index.getStatus().equals(Index.Status.READY) && Boolean.parseBoolean(job.getAttributes().getOrDefault(VariantStorageManager.Options.CALCULATE_STATS.key(), VariantStorageManager.Options.CALCULATE_STATS.defaultValue()).toString())) {
160-
QueryResult<Cohort> queryResult = catalogManager.getAllCohorts(catalogManager.getStudyIdByJobId(job.getId()), new QueryOptions(CatalogSampleDBAdaptor.CohortFilterOption.name.toString(), VariantSourceEntry.DEFAULT_COHORT), sessionId);
170+
QueryResult<Cohort> queryResult = catalogManager.getAllCohorts(catalogManager.getStudyIdByJobId(job.getId()), new QueryOptions(CatalogSampleDBAdaptor.CohortFilterOption.name.toString(), StudyEntry.DEFAULT_COHORT), sessionId);
161171
if (queryResult.getNumResults() != 0) {
162172
logger.debug("Default cohort status set to READY");
163173
Cohort defaultCohort = queryResult.first();

opencga-analysis/src/main/java/org/opencb/opencga/analysis/files/BioformatDetector.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
package org.opencb.opencga.analysis.files;
22

3-
import org.apache.commons.lang.math.NumberUtils;
3+
import org.apache.commons.lang3.math.NumberUtils;
44
import org.opencb.opencga.catalog.models.File;
5-
import org.opencb.opencga.core.common.IOUtils;
65

7-
import java.io.*;
6+
import java.io.BufferedReader;
7+
import java.io.FileInputStream;
8+
import java.io.IOException;
9+
import java.io.InputStreamReader;
810
import java.net.URI;
911
import java.nio.file.Files;
1012
import java.nio.file.Path;
1113
import java.nio.file.Paths;
12-
import java.util.HashMap;
13-
import java.util.Map;
14-
import java.util.regex.Pattern;
1514

1615
/**
1716
* Created by ralonso on 12/03/15.

0 commit comments

Comments
 (0)