Skip to content

Commit b9fdd20

Browse files
committed
allow user to set non-default JSON projects file
1 parent c01ec25 commit b9fdd20

File tree

9 files changed

+67
-30
lines changed

9 files changed

+67
-30
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,7 @@
1616
.DS_Store
1717
*/.DS_Store
1818
*.log
19+
examples/
20+
*.pdf
21+
*.csv
22+

bin/micall

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,12 @@ def parseArgs():
5353
help="<optional> Path to bowtie2 script.")
5454
parser.add_argument('--bt2build', default='bowtie2-build-s',
5555
help="<optional> Path to bowtie2-build script.")
56-
parser.add_argument('--threads', '-p', type=int, default=4,
56+
parser.add_argument('--threads', '-t', type=int, default=4,
5757
help="Number of threads for bowtie2 (default 4)")
5858

59+
parser.add_argument('--projects', '-p', type=argparse.FileType('rb'), required=False,
60+
help='<optional> Specify a custom projects JSON file.')
61+
5962
if len(sys.argv) == 1:
6063
parser.print_help()
6164
sys.exit()
@@ -200,7 +203,6 @@ if __name__ == '__main__':
200203

201204
print("Using {} version {}".format(bowtie2.path, bowtie2.version))
202205

203-
#print(args)
204206
if args.outdir is None:
205207
# default write outputs to same location as inputs
206208
if args.fastq1:

micall/core/aln2counts.py

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -487,25 +487,39 @@ def _create_consensus_writer(self, conseq_file):
487487
def write_consensus_header(self, conseq_file):
488488
self._create_consensus_writer(conseq_file).writeheader()
489489

490-
def write_consensus(self, conseq_file):
490+
def write_consensus(self, conseq_file, min_coverage=100):
491+
"""
492+
Generate nucleotide consensus sequences at varying mixture
493+
cutoffs, and write to user-specified file.
494+
:param conseq_file: csv.DictWriter object
495+
:param min_coverage: depth of coverage below which the nucleotide
496+
will be reported in lower-case.
497+
:return:
498+
"""
491499
conseq_writer = self._create_consensus_writer(conseq_file)
500+
492501
for mixture_cutoff in self.conseq_mixture_cutoffs:
493502
consensus = ''
494503
offset = None
495504
for seed_amino in self.seed_aminos[0]:
496505
if offset is None:
497506
if not seed_amino.counts:
498507
continue
499-
offset = seed_amino.consensus_index*3
508+
offset = seed_amino.consensus_index * 3
509+
500510
for seed_nuc in seed_amino.nucleotides:
501-
consensus += seed_nuc.get_consensus(mixture_cutoff)
511+
cnuc = seed_nuc.get_consensus(mixture_cutoff)
512+
coverage = sum(seed_nuc.counts.values())
513+
consensus += cnuc.upper() if (coverage >= min_coverage) else cnuc.lower()
514+
502515
if offset is not None:
503-
conseq_writer.writerow(
504-
{'region': self.seed,
505-
'q-cutoff': self.qcut,
506-
'consensus-percent-cutoff': format_cutoff(mixture_cutoff),
507-
'offset': offset,
508-
'sequence': consensus})
516+
conseq_writer.writerow({
517+
'region': self.seed,
518+
'q-cutoff': self.qcut,
519+
'consensus-percent-cutoff': format_cutoff(mixture_cutoff),
520+
'offset': offset,
521+
'sequence': consensus
522+
})
509523

510524
def _create_nuc_variants_writer(self, nuc_variants_file):
511525
return csv.DictWriter(nuc_variants_file,
@@ -813,7 +827,8 @@ def aln2counts(aligned_csv,
813827
failed_align_csv=None,
814828
nuc_variants_csv=None,
815829
callback=None,
816-
coverage_summary_csv=None):
830+
coverage_summary_csv=None,
831+
json=None):
817832
"""
818833
Analyze aligned reads for nucleotide and amino acid frequencies.
819834
Generate consensus sequences.
@@ -827,11 +842,15 @@ def aln2counts(aligned_csv,
827842
@param nuc_variants_csv: Open file handle to write the most frequent nucleotide sequence
828843
variants.
829844
@param callback: a function to report progress with three optional
830-
parameters - callback(message, progress, max_progress)
831-
@param coverage_summary_csv Open file handle to write coverage depth.
845+
parameters - callback(message, progress, max_progress)
846+
@param coverage_summary_csv: Open file handle to write coverage depth.
847+
@param json: specify a custom JSON project file; None loads the default file.
832848
"""
833849
# load project information
834-
projects = project_config.ProjectConfig.loadDefault()
850+
if json is None:
851+
projects = project_config.ProjectConfig.loadDefault()
852+
else:
853+
projects = project_config.ProjectConfig.load(json)
835854

836855
# initialize reporter classes
837856
insert_writer = InsertionWriter(coord_ins_csv)

micall/core/prelim_map.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#! /usr/bin/env python
1+
#! /usr/bin/env python3
22

33
"""
44
Shipyard-style bowtie2
@@ -37,7 +37,7 @@ def prelim_map(fastq1, fastq2, prelim_csv,
3737
bt2_path='bowtie2', bt2build_path='bowtie2-build-s',
3838
nthreads=BOWTIE_THREADS, callback=None,
3939
rdgopen=READ_GAP_OPEN, rfgopen=REF_GAP_OPEN, stderr=sys.stderr,
40-
gzip=False, work_path='', keep=False):
40+
gzip=False, work_path='', keep=False, json=None):
4141
""" Run the preliminary mapping step.
4242
4343
@param fastq1: the file name for the forward reads in FASTQ format
@@ -52,6 +52,8 @@ def prelim_map(fastq1, fastq2, prelim_csv,
5252
@param stderr: where to write the standard error output from bowtie2 calls.
5353
@param gzip: if True, FASTQ files are compressed
5454
@param work_path: optional path to store working files
55+
@param keep: if False, delete temporary files
56+
@param json: specify a custom JSON project file; None loads the default file.
5557
"""
5658

5759
bowtie2 = Bowtie2(execname=bt2_path)
@@ -92,7 +94,11 @@ def prelim_map(fastq1, fastq2, prelim_csv,
9294
max_progress=total_reads)
9395

9496
# generate initial reference files
95-
projects = project_config.ProjectConfig.loadDefault()
97+
if json is None:
98+
projects = project_config.ProjectConfig.loadDefault()
99+
else:
100+
projects = project_config.ProjectConfig.load(json)
101+
96102
ref_path = os.path.join(work_path, 'micall.fasta')
97103
with open(ref_path, 'w') as ref:
98104
projects.writeSeedFasta(ref)

micall/core/remap.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ def remap(fastq1, fastq2, prelim_csv, remap_csv, remap_counts_csv=None,
383383
bt2_path='bowtie2', bt2build_path='bowtie2-build-s',
384384
nthreads=BOWTIE_THREADS, callback=None, count_threshold=10,
385385
rdgopen=READ_GAP_OPEN, rfgopen=REF_GAP_OPEN, stderr=sys.stderr,
386-
gzip=False, debug_file_prefix=None, keep=False):
386+
gzip=False, debug_file_prefix=None, keep=False, json=None):
387387
"""
388388
Iterative re-map reads from raw paired FASTQ files to a reference sequence set that
389389
is being updated as the consensus of the reads that were mapped to the last set.
@@ -404,6 +404,7 @@ def remap(fastq1, fastq2, prelim_csv, remap_csv, remap_counts_csv=None,
404404
@param count_threshold: minimum number of reads that map to a region for it to be remapped
405405
@param rdgopen: read gap open penalty
406406
@param rfgopen: reference gap open penalty
407+
@param json: specify a custom JSON project file; None loads the default file.
407408
"""
408409

409410
reffile = os.path.join(work_path, 'temp.fasta')
@@ -441,7 +442,11 @@ def remap(fastq1, fastq2, prelim_csv, remap_csv, remap_counts_csv=None,
441442
worker_pool = multiprocessing.Pool(processes=nthreads) if nthreads > 1 else None
442443

443444
# retrieve reference sequences used for preliminary mapping
444-
projects = project_config.ProjectConfig.loadDefault()
445+
if json is None:
446+
projects = project_config.ProjectConfig.loadDefault()
447+
else:
448+
projects = project_config.ProjectConfig.load(json)
449+
445450
seeds = {}
446451
for seed, vals in projects.config['regions'].items():
447452
seqs = vals['reference']

micall/tests/aln2counts_test.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import csv
2-
import StringIO
2+
from io import StringIO
33
import sys
44
import unittest
55

@@ -30,23 +30,23 @@ def add_override(self,
3030
query,
3131
aligned_query,
3232
aligned_reference,
33-
score=sys.maxint):
33+
score=sys.maxsize):
3434
self.overrides[(reference, query)] = (aligned_reference,
3535
aligned_query,
3636
score)
3737

3838

3939
class SequenceReportTest(unittest.TestCase):
4040
def setUp(self):
41-
self.insertion_file = StringIO.StringIO()
41+
self.insertion_file = StringIO()
4242
insert_writer = InsertionWriter(
4343
insert_file=self.insertion_file)
4444
projects = project_config.ProjectConfig()
4545

4646
# Content of seed regions is irrelevant. For R-NO-COORD, there is
4747
# no coordinate reference, so we use the seed reference for display, but
4848
# only the length matters.
49-
projects.load(StringIO.StringIO("""\
49+
projects.load(StringIO("""\
5050
{
5151
"projects": {
5252
"R1": {
@@ -148,11 +148,11 @@ def setUp(self):
148148
self.report = StubbedSequenceReport(insert_writer,
149149
projects,
150150
conseq_mixture_cutoffs)
151-
self.report_file = StringIO.StringIO()
151+
self.report_file = StringIO()
152152

153153
def prepareReads(self, aligned_reads_text):
154154
full_text = "refname,qcut,rank,count,offset,seq\n" + aligned_reads_text
155-
dummy_file = StringIO.StringIO(full_text)
155+
dummy_file = StringIO(full_text)
156156
return csv.DictReader(dummy_file)
157157

158158
def testEmptyAminoReport(self):
@@ -643,7 +643,7 @@ def testMultipleCoordinateInsertionReport(self):
643643
""" Two coordinate regions map the same seed region, the consensus
644644
has an insertion relative to only one of them.
645645
"""
646-
self.report.projects.load(StringIO.StringIO("""\
646+
self.report.projects.load(StringIO("""\
647647
{
648648
"projects": {
649649
"R3": {
@@ -788,7 +788,7 @@ def testGoodAlignmentWithGiantSeed(self):
788788
Even when the consensus maps to the end of the seed, it should still
789789
only require a low alignment score.
790790
"""
791-
self.report.projects.load(StringIO.StringIO("""\
791+
self.report.projects.load(StringIO("""\
792792
{
793793
"projects": {
794794
"R3": {

micall/tests/censor_fastq_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import StringIO
1+
from io import StringIO
22
import unittest
33

44
from micall.core.censor_fastq import censor

micall/tests/coverage_plots_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from mock import patch, call
22
from unittest import TestCase
3-
from StringIO import StringIO
3+
from io import StringIO
44
from micall.utils.coverage_plots import coverage_plot
55
from micall.core.project_config import ProjectConfig
66

micall/utils/coverage.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ if (length(args) != 2) {
66
input.csv <- args[1]
77
out.prefix <- args[2]
88

9+
910
df <- read.csv(input.csv)
1011

1112
# guess if this is a nuc or amino CSV

0 commit comments

Comments
 (0)