Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New feature: allow user to collapse R1 and R2 in the same UMI group to separate consensus reads #22

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dynast/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.0.1'
__version__ = '1.0.2.beta'
20 changes: 16 additions & 4 deletions dynast/consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ def consensus(
add_RS_RI: bool = False,
n_threads: int = 8,
temp_dir: Optional[str] = None,
collapse_r1_r2: bool = False # <-- NEW parameter
):
"""Main interface for the `consensus` command.
"""
Main interface for the `consensus` command.

Args:
bam_path: Path to BAM to call consensus from
Expand All @@ -40,15 +42,19 @@ def consensus(
add_RS_RI: Add RS and RI tags to BAM. Mostly useful for debugging.
n_threads: Number of threads to use
temp_dir: Temporary directory
collapse_r1_r2: If True, reads from R1 and R2 for the same UMI
will be combined into a single consensus read. If False,
R1 and R2 will each get their own consensus.
"""
stats = Stats()
stats.start()
stats_path = os.path.join(
out_dir, f'{constants.STATS_PREFIX}_{dt.datetime.strftime(stats.start_time, "%Y%m%d_%H%M%S_%f")}.json'
out_dir,
f'{constants.STATS_PREFIX}_{dt.datetime.strftime(stats.start_time, "%Y%m%d_%H%M%S_%f")}.json'
)
os.makedirs(out_dir, exist_ok=True)

# Sort and index bam.
# Sort and index BAM
bam_path = preprocessing.sort_and_index_bam(
bam_path, '{}.sortedByCoord{}'.format(*os.path.splitext(bam_path)), n_threads=n_threads
)
Expand Down Expand Up @@ -105,6 +111,10 @@ def consensus(

consensus_path = os.path.join(out_dir, constants.CONSENSUS_BAM_FILENAME)
logger.info(f'Calling consensus sequences from BAM to {consensus_path}')

# ----------------------------------------------------------------------
# Pass collapse_r1_r2 into preprocessing.call_consensus
# ----------------------------------------------------------------------
preprocessing.call_consensus(
bam_path,
consensus_path,
Expand All @@ -117,7 +127,9 @@ def consensus(
quality=quality,
add_RS_RI=add_RS_RI,
temp_dir=temp_dir,
n_threads=n_threads
n_threads=n_threads,
collapse_r1_r2=collapse_r1_r2
)

stats.end()
stats.save(stats_path)
60 changes: 37 additions & 23 deletions dynast/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,16 +167,7 @@ def setup_align_args(parser: argparse.ArgumentParser, parent: argparse.ArgumentP


def setup_consensus_args(parser: argparse.ArgumentParser, parent: argparse.ArgumentParser) -> argparse.ArgumentParser:
"""Helper function to set up a subparser for the `consensus` command.

Args:
parser: Argparse parser to add the `consensus` command to
parent: Argparse parser parent of the newly added subcommand.
Used to inherit shared commands/flags

Returns:
The newly added parser
"""
"""Helper function to set up a subparser for the `consensus` command."""
parser_consensus = parser.add_parser(
'consensus',
description='Generate consensus sequences',
Expand Down Expand Up @@ -258,6 +249,22 @@ def setup_consensus_args(parser: argparse.ArgumentParser, parent: argparse.Argum
),
action='store_true'
)

# ----------------------------------------------------------------
# NEW ARGUMENT to toggle separate R1/R2. Default is False =>
# (which means R1/R2 are collapsed together internally).
# ----------------------------------------------------------------
parser_consensus.add_argument(
'--separate-r1-r2',
action='store_true',
default=False,
help=(
'If specified, R1 and R2 from the same UMI will be collapsed '
'separately (i.e. produce two consensus reads). By default, '
'they are collapsed into one consensus read.'
),
)

parser_consensus.add_argument(
'bam',
help=(
Expand All @@ -268,7 +275,6 @@ def setup_consensus_args(parser: argparse.ArgumentParser, parent: argparse.Argum
)
return parser_consensus


def setup_count_args(parser: argparse.ArgumentParser, parent: argparse.ArgumentParser) -> argparse.ArgumentParser:
"""Helper function to set up a subparser for the `count` command.

Expand Down Expand Up @@ -727,12 +733,9 @@ def parse_align(parser, args, temp_dir=None):


def parse_consensus(parser: argparse.ArgumentParser, args: argparse.Namespace, temp_dir: Optional[str] = None):
"""Parser for the `consensus` command.

Args:
parser: The parser
args: Command-line arguments dictionary, as parsed by argparse
temp_dir: Temporary directory
"""
Parser for the `consensus` command. We also handle whether to
separate R1/R2 (based on `args.separate_r1_r2`).
"""
# Check quality
if args.quality < 0 or args.quality > 41:
Expand All @@ -759,11 +762,19 @@ def parse_consensus(parser: argparse.ArgumentParser, args: argparse.Namespace, t
else:
logger.info(f'Auto-detected strandedness: {strand}. Use `--strand` to override.')

# ------------------------------------------------------------------
# Convert user-specified --separate-r1-r2 to collapse_r1_r2 bool.
# If separate_r1_r2 == True => we do NOT collapse them together =>
# collapse_r1_r2 = False
# If separate_r1_r2 == False => we collapse them => True
# ------------------------------------------------------------------
collapse_r1_r2 = not args.separate_r1_r2

from .consensus import consensus
consensus(
args.bam,
args.g,
args.o,
bam_path=args.bam,
gtf_path=args.g,
out_dir=args.o,
strand=strand,
umi_tag=args.umi_tag,
barcode_tag=args.barcode_tag,
Expand All @@ -773,6 +784,7 @@ def parse_consensus(parser: argparse.ArgumentParser, args: argparse.Namespace, t
add_RS_RI=args.add_RS_RI,
n_threads=args.t,
temp_dir=temp_dir,
collapse_r1_r2=collapse_r1_r2, # <-- pass the bool down
)


Expand Down Expand Up @@ -995,30 +1007,31 @@ def main():
metavar='<CMD>',
)

# Add common options to this parent parser
# Common parent parser
parent = argparse.ArgumentParser(add_help=False)
parent.add_argument('--tmp', metavar='TMP', help='Override default temporary directory', type=str, default='tmp')
parent.add_argument('--keep-tmp', help='Do not delete the tmp directory', action='store_true')
parent.add_argument('--verbose', help='Print debugging information', action='store_true')
parent.add_argument('-t', metavar='THREADS', help='Number of threads to use (default: 8)', type=int, default=8)

# Command parsers
# Create each subcommand parser
parser_ref = setup_ref_args(subparsers, parent)
parser_align = setup_align_args(subparsers, parent)
parser_consensus = setup_consensus_args(subparsers, parent)
parser_count = setup_count_args(subparsers, parent)
parser_estimate = setup_estimate_args(subparsers, parent)

command_to_parser = {
'ref': parser_ref,
'align': parser_align,
'consensus': parser_consensus,
'count': parser_count,
'estimate': parser_estimate,
}

if '--list' in sys.argv:
print_technologies()

# Show help when no arguments are given
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
Expand All @@ -1045,6 +1058,7 @@ def main():
)
os.makedirs(args.tmp)
os.environ['NUMEXPR_MAX_THREADS'] = str(args.t)

try:
patch_mp_connection_bpo_17560() # Monkeypatch for python 3.7
with warnings.catch_warnings():
Expand Down
Loading