Skip to content

Commit

Permalink
removed the test and logic that deals with explicit determination of …
Browse files Browse the repository at this point in the history
…the gene names for intronic alignments
  • Loading branch information
kishorikonwar committed Mar 24, 2020
1 parent da3c12a commit 109da40
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 50 deletions.
52 changes: 15 additions & 37 deletions src/sctools/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
gene_name_tag: str=consts.GENE_NAME_TAG_KEY, open_mode: str='rb')
from_mtx(matrix_mtx: str, row_index_file: str, col_index_file: str)
Notes
-----
Memory usage of this module can be roughly approximated by the chunk_size parameter in Optimus.
Expand Down Expand Up @@ -251,36 +252,7 @@ def from_sorted_tagged_bam(
) in grouped_records_generator:

# modify alignments to include the gene name to the alignments to INTRONIC regions
if chromosomes_gene_locations_extended:
alignments = []
for alignment in input_alignments:
if alignment.has_tag("XF"):
aln_type = alignment.get_tag("XF")
if (
alignment.reference_name
and aln_type == "INTRONIC"
and alignment.reference_name
in chromosomes_gene_locations_extended
):
gene_name = cls.binary_overlap(
chromosomes_gene_locations_extended[
alignment.reference_name
],
0,
len(
chromosomes_gene_locations_extended[
alignment.reference_name
]
)
- 1,
alignment.reference_start,
)

if gene_name:
alignment.set_tag("GE", gene_name)
alignments.append(alignment)
else:
alignments = input_alignments
alignments = input_alignments

# only keep queries w/ well-formed UMIs
gene_name = None
Expand All @@ -289,8 +261,11 @@ def from_sorted_tagged_bam(

if len(alignments) == 1:
primary_alignment = alignments[0]
if primary_alignment.has_tag(gene_name_tag) and primary_alignment.has_tag('XF') \
and primary_alignment.get_tag('XF')!='INTERGENIC':
if (
primary_alignment.has_tag(gene_name_tag)
and primary_alignment.has_tag('XF')
and primary_alignment.get_tag('XF') != 'INTERGENIC'
):
gene_name = primary_alignment.get_tag(gene_name_tag)
# overlaps multiple genes, drop query, and unfortunately there only one
# one alignment for this query
Expand All @@ -301,20 +276,23 @@ def from_sorted_tagged_bam(
else: # multi-map
implicated_gene_names: Set[str] = set()
for alignment in alignments:
if alignment.has_tag(gene_name_tag) and alignment.has_tag('XF') \
and alignment.get_tag('XF')!='INTERGENIC':
if (
alignment.has_tag(gene_name_tag)
and alignment.has_tag('XF')
and alignment.get_tag('XF') != 'INTERGENIC'
):
# consider its gene name only if it has only gene name
gene_name = alignment.get_tag(gene_name_tag)
if len(gene_name.split(',')) == 1:
if len(gene_name.split(',')) == 1:
implicated_gene_names.add(alignment.get_tag(gene_name_tag))

if len(implicated_gene_names) == 1: # only one gene
gene_name = implicated_gene_names.__iter__().__next__()
else:
continue # drop query

if gene_name==None:
continue
if gene_name is None:
continue

if (
cell_barcode,
Expand Down
4 changes: 2 additions & 2 deletions src/sctools/metrics/gatherer.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,9 @@ def extract_metrics(self, mode: str = 'rb') -> None:
for gene_iterator, gene_tag in iter_genes(bam_iterator=bam_iterator):
metric_aggregator = GeneMetrics()

# in case of multi-genes ignore
# in case of multi-genes ignore as in the counting stage
if gene_tag and len(gene_tag.split(',')) > 1:
continue
continue

# break up gene ids by cell barcodes
for cell_iterator, cell_tag in iter_cell_barcodes(
Expand Down
11 changes: 0 additions & 11 deletions src/sctools/test/test_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,17 +894,6 @@ def extract_gene_non_exons(
[bam.QueryNameSortOrder(), CellMoleculeGeneQueryNameSortOrder()],
ids=["query_name_sort_order", "cell_molecule_gene_query_name_sort_order"],
)
def test_count_matrix_with_introns(
alignment_sort_order: bam.AlignmentSortOrder, gene_name_to_index
):
_count_matrix_with_introns(
alignment_sort_order, gene_name_to_index, consts.SINGLE_CELL_COUNT_MATRIX
)
_count_matrix_with_introns(
alignment_sort_order, gene_name_to_index, consts.SINGLE_NUCLEI_COUNT_MATRIX
)


def _count_matrix_with_introns(
alignment_sort_order: bam.AlignmentSortOrder, gene_name_to_index, test_index
):
Expand Down

0 comments on commit 109da40

Please sign in to comment.