-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
A new step call fastqprocessing for Optimus to speed up (#82)
* added the fastqprocessing folder that merges the first steps---FastqToBam, Attach10XBarcodes, SplitBamFile, SplitBamByCellBarcodes--into one step in Optimus * added comments and reorganized some files * added all comments * Added the logic to process when I1 files are not provided * removed the libStatGen * modified dockerfile and the patches * updated patch to v1.0.14 * added the makefile without -Werror * Modified docker file to use only the patches to the libStatGen code and freeze a particular version * removed the libStatGen folder * addressed the review comments * addressed the review comments * formatted the python * formatted the python * formatted the python * addressed review comments * refactored process_file into four functions * added some comments to the new functions
- Loading branch information
1 parent
9d09540
commit fd352ae
Showing
30 changed files
with
1,410 additions
and
156 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,9 @@ LABEL maintainer="Ambrose J. Carr <[email protected]>" \ | |
description="python 3.7.7 with pysam, sctools, requests, and a basic science stack" | ||
|
||
COPY requirements.txt . | ||
|
||
RUN apt-get update && apt-get install -y patch && apt-get install -y libhdf5-dev | ||
|
||
RUN pip3 install -r requirements.txt | ||
|
||
RUN mkdir /sctools/ | ||
|
@@ -13,6 +16,24 @@ COPY . /sctools | |
|
||
RUN pip3 install /sctools | ||
|
||
ARG libStatGen_version="1.0.14" | ||
|
||
RUN wget https://github.com/HumanCellAtlas/sctools/archive/kmk-fastqprocessing.zip | ||
|
||
RUN unzip kmk-fastqprocessing.zip && \ | ||
cd sctools-kmk-fastqprocessing/fastqpreprocessing &&\ | ||
wget https://github.com/statgen/libStatGen/archive/v${libStatGen_version}.tar.gz &&\ | ||
tar -zxvf v${libStatGen_version}.tar.gz &&\ | ||
mv libStatGen-${libStatGen_version} libStatGen &&\ | ||
patch libStatGen/fastq/FastQFile.cpp patches/FastQFile.cpp.patch &&\ | ||
patch libStatGen/Makefile patches/Makefile.patch &&\ | ||
patch libStatGen/general/Makefile patches/general.Makefile.patch &&\ | ||
make -C libStatGen &&\ | ||
mkdir src/obj &&\ | ||
make -C src/ | ||
|
||
RUN cp sctools-kmk-fastqprocessing/fastqpreprocessing/src/fastqprocess /usr/local/bin/ | ||
|
||
WORKDIR usr/local/bin/sctools | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
*~ | ||
*.o | ||
*.a | ||
*.bak | ||
dox/ | ||
dox_errors.txt | ||
*# | ||
*nohup.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
--- libStatGen-1.0.14/fastq/FastQFile.cpp 2015-07-08 20:03:23.000000000 +0000 | ||
+++ ../libStatGen/FastQFile.cpp 2020-09-17 19:35:48.797593411 +0000 | ||
@@ -489,6 +489,7 @@ | ||
// Check to see if the sequenceIdentifier is a repeat by adding | ||
// it to the set and seeing if it already existed. | ||
std::pair<std::map<std::string, unsigned int>::iterator,bool> insertResult; | ||
+ /* | ||
insertResult = | ||
myIdentifierMap.insert(std::make_pair(mySequenceIdentifier.c_str(), | ||
myLineNum)); | ||
@@ -505,6 +506,7 @@ | ||
reportErrorOnLine(); | ||
return(false); | ||
} | ||
+ */ | ||
} | ||
|
||
// Valid, return true. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
--- libStatGen-1.0.14/Makefile 2015-07-08 20:03:23.000000000 +0000 | ||
+++ ../libStatGen/Makefile 2020-09-03 14:15:41.904210140 +0000 | ||
@@ -2,7 +2,8 @@ | ||
|
||
.PHONY: package | ||
|
||
-SUBDIRS=general bam fastq glf samtools vcf | ||
+#SUBDIRS=general bam fastq glf samtools vcf | ||
+SUBDIRS=general fastq samtools bam | ||
|
||
include Makefiles/Makefile.base | ||
|
||
@@ -16,7 +17,8 @@ | ||
general: samtools | ||
|
||
# other subdirectories depend on general | ||
-bam fastq glf vcf: general | ||
+#bam fastq glf vcf: general | ||
+bam fastq : general | ||
|
||
RELEASE_FILE?=libStatGen.$(VERSION).tgz | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
--- libStatGen-1.0.14/general/Makefile 2020-09-17 20:29:00.320563968 +0000 | ||
+++ ../libStatGen/Makefile.general 2020-09-17 20:57:47.982915972 +0000 | ||
@@ -8,7 +8,7 @@ | ||
# an error, but allow unused results and variables for the | ||
# time being. | ||
# | ||
- USER_WARNINGS ?= -Werror $(shell if [ X$(CCVERSION) \> X4.2.0 ] ; then echo " -Wno-strict-overflow" ; fi) | ||
+ USER_WARNINGS ?= $(shell if [ X$(CCVERSION) \> X4.2.0 ] ; then echo " -Wno-strict-overflow" ; fi) | ||
#-Wno-strict-overflow | ||
# -Wno-unused-variable $(shell if [ X$(CCVERSION) \> X4.2.0 ] ; then echo " -Wno-unused-result" ; fi) | ||
endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
IDIR =../libStatGen/include | ||
|
||
CC = g++ -std=c++17 -O4 | ||
CFLAGS = -I$(IDIR) -L../libStatGen | ||
|
||
ODIR=obj | ||
LDIR =../libStatGen/ | ||
|
||
TARGET = fastqprocess | ||
LIBS = -lStatGen -lz -lpthread -lstdc++fs | ||
|
||
_DEPS = fastqprocess.h utilities.h | ||
#DEPS = $(patsubst %,$(IDIR)/%,$(_DEPS)) | ||
|
||
_OBJ = fastqprocess.o utilities.o main.o | ||
OBJ = $(patsubst %,$(ODIR)/%,$(_OBJ)) | ||
|
||
|
||
$(ODIR)/%.o: %.cpp $(DEPS) | ||
$(CC) -c -o $@ $< $(CFLAGS) | ||
|
||
$(TARGET): $(OBJ) $(_DEPS) | ||
$(CC) -o $@ $^ $(CFLAGS) $(LIBS) | ||
|
||
.PHONY: clean | ||
|
||
clean: | ||
rm -f $(ODIR)/*.o *~ core $(INCDIR)/*~ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
./fastqprocess --verbose \ | ||
--bam-size 0.001 \ | ||
--barcode-length 16 \ | ||
--umi-length 10 \ | ||
--sample-id L8TX \ | ||
--white-list ../../../data/L8TX/737K-august-2016.txt \ | ||
--I1 ../../../data/L8TX/A_I1.fastq.gz \ | ||
--R1 ../../../data/L8TX/A_R1.fastq.gz \ | ||
--R2 ../../../data/L8TX/A_R2.fastq.gz \ | ||
--I1 ../../../data/L8TX/B_I1.fastq.gz \ | ||
--R1 ../../../data/L8TX/B_R1.fastq.gz \ | ||
--R2 ../../../data/L8TX/B_R2.fastq.gz \ |
Oops, something went wrong.