Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

finish bowtie2 rule #153

Open
wants to merge 5 commits into
base: easy_sra_ftp
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion elvers/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,12 @@ failwhale = animalsD['failwhale']
# include all rule files
includeRules = config['include_rules']
for r in includeRules:
include: r
try:
include: r
except:
rule_name = os.path.basename(r).split('.rule')[0]
sys.stderr.write(f"\n\tError: Can't use the {rule_name} rule. Please check rule file code at {r} or file an issue on github!\n\n")
sys.exit(-1)

onstart:
shell('cat {octopus}')
Expand Down
6 changes: 2 additions & 4 deletions elvers/rules/bowtie2/bowtie2-index-wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,5 @@
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

index = snakemake.output[0]
index_basename = index.split('.1.bt2',1)[0]

shell("bowtie2-build --threads {snakemake.threads} {snakemake.params.extra} {snakemake.input} {index_basename} {log}" )
shell("bowtie2-build --threads {snakemake.threads} {snakemake.params.extra} {snakemake.input} {snakemake.params.index} {log}" )
shell("touch {snakemake.output.donefile}")
28 changes: 19 additions & 9 deletions elvers/rules/bowtie2/bowtie2-map-wrapper.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
__author__ = "Johannes Köster"
__author__ = "Johannes Köster and N Tessa Pierce"
__copyright__ = "Copyright 2016, Johannes Köster"
__email__ = "[email protected]"
__license__ = "MIT"


## modified from original wrapper to enable multiple files per sample
from snakemake.shell import shell

extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)

n = len(snakemake.input.sample)
assert n == 1 or n == 2, "input->sample must have 1 (single-end) or 2 (paired-end) elements."
# if multiple files, join them with ',' before mapping with bt2
r1 = snakemake.input.get("r1")
r2 = snakemake.input.get("r2")
r = snakemake.input.get("r")

if n == 1:
reads = "-U {}".format(*snakemake.input.sample)
else:
reads = "-1 {} -2 {}".format(*snakemake.input.sample)
assert (r1 is not None and r2 is not None) or r is not None, "either r1 and r2 (paired), or r (unpaired) are required as input"
if r1:
r1 = [snakemake.input.r1] if isinstance(snakemake.input.r1, str) else snakemake.input.r1
r2 = [snakemake.input.r2] if isinstance(snakemake.input.r2, str) else snakemake.input.r2
assert len(r1) == len(r2), "input-> equal number of files required for r1 and r2" # true?
r1_cmd = ' -1 ' + ",".join(r1) #+ manual_decompression(" ".join(r1), zip_extension)
r2_cmd = ' -2 ' + ",".join(r2) # manual_decompression(" ".join(r2), zip_extension)
read_cmd = " ".join([r1_cmd,r2_cmd])
if r:
assert r1 is None and r2 is None, "Bowtie2 cannot quantify mixed paired/unpaired input files. Please input either r1,r2 (paired) or r (unpaired)"
r = [snakemake.input.r] if isinstance(snakemake.input.r, str) else snakemake.input.r
read_cmd = ' -U ' + ",".join(r) #manual_decompression(" ".join(r), zip_extension)

shell(
"(bowtie2 --threads {snakemake.threads} {snakemake.params.extra} "
"-x {snakemake.params.index} {reads} "
"-x {snakemake.params.index} {read_cmd} "
"| samtools view -Sbh -o {snakemake.output[0]} -) {log}")
117 changes: 117 additions & 0 deletions elvers/rules/bowtie2/bowtie2.rule
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
from os.path import join

prog = "bowtie2"

ep_params = config[prog]['elvers_params']
logs_dir = config['elvers_directories']['logs']
prog_params = config[prog]['program_params']

bt2_dir = ep_params['outputs']['outdir']
BASE = config['basename']

# get inputs
assembly_dir = config['elvers_directories']['outdirs']['assemble']
combine_units = ep_params['outputs']['extensions']['read']['combine_units']

if prog_params.get('input_trimmed', True):
input_dir = config['elvers_directories']['outdirs']['preprocess']
ext = '.trim.fq.gz'
se_ext = 'se'
else:
input_dir = config['elvers_directories']['outdirs']['input_data']
ext = '.fq.gz'
se_ext = '1'

if config.get('ignore_units'):
combine_units = False

# use if you don't want to collapse technical replicates ("units" column; deseq2 not supported)
def get_sample_no_combine(w):
readsD = {}
if not is_single_end(**w): # note, this requires unit to be in outputs!
readsD['r1'] = join(input_dir, f'{w.sample}_{w.unit}_1{ext}')
readsD['r2'] = join(input_dir, f'{w.sample}_{w.unit}_2{ext}')
return readsD
readsD['r'] = join(input_dir, f'{w.sample}_{w.unit}{ext}')
return readsD

def get_sample_combine_units(w):
readsD = {}
r1,r2,r = [],[],[]
unit_list = samples.groupby(level=0).get_group(w.sample)['unit'].tolist()
for unit in unit_list:
if not is_single_end(w.sample, unit):
r1+= [join(input_dir, f"{w.sample}_{unit}_1{ext}")]
r2+= [join(input_dir, f"{w.sample}_{unit}_2{ext}")]
else:
r+= [join(input_dir, f"{w.sample}_{unit}{ext}")]
if r1:
readsD['r1'] = r1
readsD['r2'] = r2
elif r:
readsD['r'] = r
return readsD

rule bowtie2_index:
"""
Index the transcriptome for bowtie2 mapping
"""
input:
join(assembly_dir, "{assembly}.fasta")
output:
donefile = join(bt2_dir, "{assembly}.bowtie2index.done")
# touching flag file should work, but doesn't. snakemake issue?
#touch(join(bt2_dir, "{assembly}.bowtie2index.done")
message:
"""--- Indexing the transcriptome with Bowtie2 ---"""
threads: 10
params:
index = join(bt2_dir,"{assembly}.bowtie2index"),
extra = prog_params['index_params'].get('extra', '')
log:
join(logs_dir, 'bowtie2/{assembly}_index.log')
benchmark:
join(logs_dir, 'bowtie2/{assembly}_index.benchmark')
conda: "environment.yml"
script: "bowtie2-index-wrapper.py"

if combine_units:
rule bowtie2_map_combine_units:
"""
Map to transcripts with Bowtie2
"""
input:
unpack(get_sample_combine_units),
index_donefile = join(bt2_dir, "{assembly}.bowtie2index.done"),
output:
join(bt2_dir,"{sample}_x_{assembly}.bt2.bam"),
message:
"""--- Mapping to transcripts with Bowtie2 ---"""
params:
index = join(bt2_dir, "{assembly}.bowtie2index"),
extra = prog_params['map_params'].get('extra', '')
threads: 20
log: join(logs_dir, 'bowtie2/{sample}_x_{assembly}.log')
benchmark: join(logs_dir, 'bowtie2/{sample}_x_{assembly}.benchmark')
conda: "environment.yml"
script: "bowtie2-map-wrapper.py"
else:
rule bowtie2_map_no_combine:
"""
Map to transcripts with Bowtie2
"""
input:
unpack(get_sample_no_combine),
index_donefile = join(bt2_dir, "{assembly}.bowtie2index.done"),
output:
join(bt2_dir,"{sample}_{unit}_x_{assembly}.bt2.bam"),
message:
"""--- Mapping to transcripts with bowtie2 ---"""
params:
index = join(bt2_dir, "{assembly}.bowtie2index"),
extra = prog_params['map_params'].get('extra', '')
threads: 20
log:join(logs_dir, 'bowtie2/{sample}_{unit}_x_{assembly}.bt2.log')
benchmark:join(logs_dir, 'bowtie2/{sample}_{unit}_x_{assembly}.bt2.benchmark')
conda: "environment.yml"
script: "bowtie2-map-wrapper.py"
110 changes: 0 additions & 110 deletions elvers/rules/bowtie2/bowtie2.rule_init

This file was deleted.

17 changes: 13 additions & 4 deletions elvers/rules/bowtie2/params.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,25 @@ bowtie2:
read:
- trimmed
- raw
reference:
- fasta
outputs:
outdir: 'bowtie2'
outdir: bowtie2
extensions:
read:
combine_units: True
pe:
- '.pe.bam'
- _x___reference__.bt2.bam
se:
- '.se.bam'
- _x___reference__.bt2.bam
base:
- .bowtie2index.done
help: "To run the bowtie2 rule, please provide input data, and either 1) run reference generation, or 2) provide a reference via the `get_reference` utility"
citations:
- "Langmead B, Salzberg S. Fast gapped-read alignment with Bowtie 2. Nature Methods. 2012, 9:357-359"
program_params:
extra: ''
input_trimmed: True
index_params:
extra: '' # extra info for the program
map_params:
extra: ' --local '
4 changes: 4 additions & 0 deletions elvers/rules/bowtie2/test/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Additional parameters required for testing the bowtie2 rule

bowtie2:
input_trimmed: False
2 changes: 1 addition & 1 deletion elvers/schemas/elvers_params.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ required:
- elvers_directories
- elvers_workflows
- include_rules
- reference_extensions
# - reference_extensions
8 changes: 8 additions & 0 deletions elvers/tests/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,11 @@ def test_rcorrector_short():
def test_rcorrector_long():
run_ruletest('rcorrector', short = False)

def test_bowtie2_short():
run_ruletest('bowtie2')

@pytest.mark.long
def test_bowtie2_long():
run_ruletest('bowtie2', short = False)


3 changes: 3 additions & 0 deletions elvers/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ def generate_targs(outdir, basename, samples, ref_exts=[''], base_exts = None, r
base_targets, read_targets, other_targs = [],[],[]
# handle read targets
if read_exts:
if samples is None:
sys.stderr.write(f"Error: attempting to run a read-based workflow without a samples table. Please specify your samples file in the get_samples secton of your config.")
sys.exit(-1)
pe_ext = read_exts.get('pe', None)
se_ext = read_exts.get('se', None)
combine_units = read_exts.get('combine_units', False)
Expand Down