Skip to content

Commit aaed0b9

Browse files
ignore multi gene alignements (#74)
* ignore multi gene alignements * (a) fixed the gene_name_tag to gene_id_tag in playform.py and (b) added the logic for multi genes when there is multiple genes * fix linting Co-authored-by: Kishori Konwar <[email protected]>
1 parent a3ec39d commit aaed0b9

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

src/sctools/count.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,13 +290,19 @@ def from_sorted_tagged_bam(
290290
primary_alignment = alignments[0]
291291
if primary_alignment.has_tag(gene_name_tag):
292292
gene_name = primary_alignment.get_tag(gene_name_tag)
293+
# overlaps multiple genes, drop query, and unfortunately there only one
294+
# one alignment for this query
295+
if len(gene_name.split(',')) != 1:
296+
continue
293297
else:
294298
continue # drop query
295299
else: # multi-map
296300
implicated_gene_names: Set[str] = set()
297301
for alignment in alignments:
298302
if alignment.has_tag(gene_name_tag):
299-
implicated_gene_names.add(alignment.get_tag(gene_name_tag))
303+
# consider its gene name only if it has only gene name
304+
if len(gene_name.split(',')) == 1:
305+
implicated_gene_names.add(alignment.get_tag(gene_name_tag))
300306
if len(implicated_gene_names) == 1: # only one gene
301307
gene_name = implicated_gene_names.__iter__().__next__()
302308
else:

src/sctools/platform.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ def bam_to_count_matrix(cls, args: Iterable[str] = None) -> int:
450450
chromosomes_gene_locations_extended=gene_locations,
451451
cell_barcode_tag=args.cell_barcode_tag,
452452
molecule_barcode_tag=args.molecule_barcode_tag,
453-
gene_name_tag=args.gene_name_tag,
453+
gene_name_tag=args.gene_id_tag,
454454
open_mode=open_mode,
455455
)
456456
matrix.save(args.output_prefix)

0 commit comments

Comments
 (0)