Skip to content

Commit

Permalink
ignore multi gene alignements (#74)
Browse files Browse the repository at this point in the history
* ignore multi gene alignements

* (a) fixed the gene_name_tag to gene_id_tag in playform.py and (b) added the logic for multi genes when there is multiple genes

* fix linting

Co-authored-by: Kishori Konwar <[email protected]>
  • Loading branch information
barkasn and kishorikonwar authored Mar 18, 2020
1 parent a3ec39d commit aaed0b9
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
8 changes: 7 additions & 1 deletion src/sctools/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,19 @@ def from_sorted_tagged_bam(
primary_alignment = alignments[0]
if primary_alignment.has_tag(gene_name_tag):
gene_name = primary_alignment.get_tag(gene_name_tag)
# overlaps multiple genes, drop query, and unfortunately there only one
# one alignment for this query
if len(gene_name.split(',')) != 1:
continue
else:
continue # drop query
else: # multi-map
implicated_gene_names: Set[str] = set()
for alignment in alignments:
if alignment.has_tag(gene_name_tag):
implicated_gene_names.add(alignment.get_tag(gene_name_tag))
# consider its gene name only if it has only gene name
if len(gene_name.split(',')) == 1:
implicated_gene_names.add(alignment.get_tag(gene_name_tag))
if len(implicated_gene_names) == 1: # only one gene
gene_name = implicated_gene_names.__iter__().__next__()
else:
Expand Down
2 changes: 1 addition & 1 deletion src/sctools/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ def bam_to_count_matrix(cls, args: Iterable[str] = None) -> int:
chromosomes_gene_locations_extended=gene_locations,
cell_barcode_tag=args.cell_barcode_tag,
molecule_barcode_tag=args.molecule_barcode_tag,
gene_name_tag=args.gene_name_tag,
gene_name_tag=args.gene_id_tag,
open_mode=open_mode,
)
matrix.save(args.output_prefix)
Expand Down

0 comments on commit aaed0b9

Please sign in to comment.