Skip to content

Commit

Permalink
attempt to fix input/output specification
Browse files Browse the repository at this point in the history
  • Loading branch information
bluegenes committed Dec 1, 2019
1 parent ae88ba1 commit 58d2229
Show file tree
Hide file tree
Showing 15 changed files with 107 additions and 82 deletions.
14 changes: 7 additions & 7 deletions elvers/rules/khmer/khmer.rule
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ prog_params = config[prog]['program_params']
output_dir = elvers_params['outputs']['outdir']
# set default input dir, extensions
input_dir = config['elvers_directories']['outdirs']['preprocess']
ext = '.trim.fq.gz'
ext = ['.trim.fq.gz']
# get input we're using in this case
input_name = prog_params.get('inputs', ['trimmed'])[0]
# update input_dir, ext
input_dir = elvers_params['input_options'][input_name].get('indir', input_dir )
ext = elvers_params['input_options'][input_name]['extensions']['read'].get('common_ext', ext)
ext = elvers_params['input_options'][input_name]['extensions']['read'].get('common_ext', ext)[0]
diginorm = prog_params.get('diginorm', True)

def get_trimmed(w):
def get_reads(w):
readsD = {}
if not is_single_end(**w):
readsD['r1'] = expand(join(input_dir, '{sample}_{unit}_1{ext}'),**w, ext=ext)
Expand All @@ -35,7 +35,7 @@ if diginorm:
"""
kmer trim and diginorm with khmer
"""
input: unpack(get_trimmed)
input: unpack(get_reads)
output:
paired=join(output_dir,'{sample}_{unit}.paired.khmer.fq.gz'),
single=join(output_dir,'{sample}_{unit}.single.khmer.fq.gz'),
Expand All @@ -62,7 +62,7 @@ if diginorm:
"""
kmer trim and diginorm with khmer
"""
input: get_trimmed
input: get_reads
output: join(output_dir, '{sample}_{unit}.se.khmer.fq.gz'),
message:
"""--- khmer trimming of low-abundance kmers and digital normalization ---"""
Expand All @@ -86,7 +86,7 @@ else:
"""
kmer trim with khmer, no diginorm
"""
input: unpack(get_trimmed)
input: unpack(get_reads)
output:
paired=join(output_dir,'{sample}_{unit}.paired.khmer.fq.gz'),
single=join(output_dir,'{sample}_{unit}.single.khmer.fq.gz'),
Expand All @@ -109,7 +109,7 @@ else:
"""
khmer trim se, no diginorm
"""
input: get_trimmed
input: get_reads
output: join(output_dir, '{sample}_{unit}.se.khmer.fq.gz'),
message:
"""--- khmer trimming of low-abundance kmers ---"""
Expand Down
4 changes: 2 additions & 2 deletions elvers/rules/khmer/params.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ khmer:
output_options:
kmer_raw:
input: raw
outdir: preprocess
outdir: preprocess/khmer
extensions:
read:
common_ext:
Expand All @@ -22,7 +22,7 @@ khmer:
- '.se.khmer.fq.gz'
kmer_trimmed:
input: trimmed
outdir: preprocess
outdir: preprocess/khmer
extensions:
read:
common_ext:
Expand Down
56 changes: 32 additions & 24 deletions elvers/rules/paladin/paladin.rule
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
import os
from os.path import join

prog = 'paladin'

logs_dir = config['elvers_directories']['logs']
assembly_dir = config['elvers_directories']['outdirs']['assemble']

ep_params = config['paladin']['elvers_params']
prog_params = config['paladin']['program_params']
elvers_params = config[prog]['elvers_params']
prog_params = config[prog]['program_params']
index_params = prog_params['index_params']
alignment_params = prog_params['alignment_params']

# get outdir
paladin_dir = ep_params['outputs']['outdir']
assembly_extension = ep_params['outputs']['paladin_mapped']['extensions']['reference_extensions'][0]
# set output dir
output_dir = elvers_params['outputs']['outdir']
# set default input dir, extensions
#input_dir = config['elvers_directories']['outdirs']['preprocess']
#ext = '.trim.fq.gz'
# get input we're using in this case
# update input_dir, ext
#assembly_extension = elvers_params['outputs']['paladin_mapped']['extensions']['reference_extensions'][0]
inputs = prog_params.get('inputs')

# handle "associated_samples"
associated_samples = {}
Expand All @@ -22,19 +30,16 @@ if 'assembly_info' in config.keys():
assemb_info = config['assembly_info']
refs.update(assemb_info)

for ref_ext, ref_info in refs.items(): #in #config['get_reference']['program_params']['reference_list'].items():#config['reference_info'].items():
#for ref_ext, ref_info in config['reference_info'].items():
for ref_ext, ref_info in refs.items():
if ref_info.get('associated_samples'):
sample_list = ref_info['associated_samples']
if not ref_ext.startswith('_'):
ref_ext = '_' + ref_ext
refname = basename + ref_ext
associated_samples[refname] = sample_list


def get_paladin_input(w):
readsD = {}
inputs = prog_params['inputs']
# grab list of only the associated samples
if w.assembly in associated_samples.keys():
# this assembly has some associated samples
Expand All @@ -53,23 +58,26 @@ def get_paladin_input(w):
elif 'raw' in inputs:
ext = '_1.fq.gz'
input_dir = config['elvers_directories']['outdirs']['input_data']
#input_dir = elvers_params['input_options'][input_name].get('indir', input_dir )
#ext = elvers_params['input_options'][input_name]['extensions']['read'].get('common_ext', ext)
if w.sample in sample_list:
readsD['r'] = join(input_dir, f'{w.sample}_{w.unit}{ext}')
return readsD
# if single end:
elif 'trimmed' in inputs:
input_dir = config['elvers_directories']['outdirs']['preprocess']
ext = '.trim.fq.gz'
elif 'raw' in inputs:
ext = '_1.fq.gz'
input_dir = config['elvers_directories']['outdirs']['input_data']
else:
if 'trimmed' in inputs:
input_dir = config['elvers_directories']['outdirs']['preprocess']
ext = '_se.trim.fq.gz'
elif 'raw' in inputs:
ext = '_1.fq.gz'
input_dir = config['elvers_directories']['outdirs']['input_data']
if w.sample in sample_list:
readsD['r'] = join(input_dir, f'{w.sample}_{w.unit}{ext}')
return readsD

rule paladin_index:
input: join(assembly_dir, "{assembly}.fasta")
output: join(paladin_dir, "{assembly}.fasta.bwt"),
output: join(output_dir, "{assembly}.fasta.bwt"),
params:
reference_type= index_params.get('reference_type', '3'),
gff = index_params.get('gff_file', '')
Expand All @@ -81,9 +89,9 @@ rule paladin_index:
rule paladin_align:
input:
unpack(get_paladin_input),
index = join(paladin_dir, "{assembly}.fasta.bwt"),
index = join(output_dir, "{assembly}.fasta.bwt"),
output:
join(paladin_dir,"{sample}_{unit}_x_{assembly}.paladin.bam"),
join(output_dir,"{sample}_{unit}_x_{assembly}.paladin.bam"),
threads: 20
params:
f = alignment_params.get('f','125'),
Expand All @@ -94,8 +102,8 @@ rule paladin_align:
script: 'paladin-align.py'

rule samtools_sort_paladin:
input: join(paladin_dir,"{sample}_{unit}_x_{assembly}.paladin.bam")
output: join(paladin_dir,"{sample}_{unit}_x_{assembly}.paladin.sort.bam")
input: join(output_dir,"{sample}_{unit}_x_{assembly}.paladin.bam")
output: join(output_dir,"{sample}_{unit}_x_{assembly}.paladin.sort.bam")
conda: "environment.yml"
log: join(logs_dir, 'paladin',"{sample}_{unit}_x_{assembly}.paladin.sort.bam.log")
benchmark: join(logs_dir, 'paladin',"{sample}_{unit}_x_{assembly}.paladin.sort.bam.benchmark")
Expand All @@ -105,8 +113,8 @@ rule samtools_sort_paladin:
"""

rule samtools_flagstat_paladin:
input: join(paladin_dir,"{sample}_{unit}_x_{assembly}.paladin.sort.bam")
output: join(paladin_dir,"{sample}_{unit}_x_{assembly}.paladin.sort.bam.flagstat")
input: join(output_dir,"{sample}_{unit}_x_{assembly}.paladin.sort.bam")
output: join(output_dir,"{sample}_{unit}_x_{assembly}.paladin.sort.bam.flagstat")
log: join(logs_dir, 'paladin',"{sample}_{unit}_x_{assembly}.paladin.sort.bam.flagstat.log")
benchmark: join(logs_dir, 'paladin',"{sample}_{unit}_x_{assembly}.paladin.sort.bam.flagstat.benchmark")
conda: "environment.yml"
Expand All @@ -115,8 +123,8 @@ rule samtools_flagstat_paladin:
"""

rule samtools_index_paladin:
input: join(paladin_dir,"{sample}_{unit}_x_{assembly}.paladin.sort.bam")
output: join(paladin_dir,"{sample}_{unit}_x_{assembly}.paladin.sort.bam.bai")
input: join(output_dir,"{sample}_{unit}_x_{assembly}.paladin.sort.bam")
output: join(output_dir,"{sample}_{unit}_x_{assembly}.paladin.sort.bam.bai")
conda: "environment.yml"
log: join(logs_dir, 'paladin',"{sample}_{unit}_x_{assembly}.paladin.sort.bam.index.log")
benchmark: join(logs_dir, 'paladin',"{sample}_{unit}_x_{assembly}.paladin.sort.bam.index.benchmark")
Expand Down
2 changes: 1 addition & 1 deletion elvers/rules/paladin/params.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ paladin:
program_params:
inputs:
- pear_merged
- fasta
- plass_fasta
index_params:
reference_type: '3'
alignment_params:
Expand Down
2 changes: 1 addition & 1 deletion elvers/rules/paladin/test/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
paladin:
inputs:
- raw
- fasta
- plass_fasta
4 changes: 2 additions & 2 deletions elvers/rules/pear/pear.rule
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ prog_params = config[prog]['program_params']
output_dir = elvers_params['outputs']['outdir']
# set default input dir, extensions
input_dir = config['elvers_directories']['outdirs']['preprocess']
ext = '.trim.fq.gz'
ext = ['.trim.fq.gz']
# get input we're using in this case
input_name = prog_params.get('inputs', ['trimmed'])[0]
# update input_dir, ext
input_dir = elvers_params['input_options'][input_name].get('indir', input_dir )
ext = elvers_params['input_options'][input_name]['extensions']['read'].get('common_ext', ext)
ext = elvers_params['input_options'][input_name]['extensions']['read'].get('common_ext', ext)[0]

def get_pairs(w):
readsD = {}
Expand Down
4 changes: 2 additions & 2 deletions elvers/rules/rcorrector/rcorrector.rule
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ ep_params = config['rcorrector']['elvers_params']
rcorr_dir = ep_params['outputs']['outdir']
# set default input dir, extensions
input_dir = config['elvers_directories']['outdirs']['preprocess']
ext = '.trim.fq.gz'
ext = ['.trim.fq.gz']

# get input we're using in this case
input_name = rcorr_params.get('inputs', ['trimmed'])[0]
# update input_dir, ext
input_dir = ep_params['input_options'][input_name].get('indir', input_dir )
ext = ep_params['input_options'][input_name]['extensions']['read'].get('common_ext', ext)
ext = ep_params['input_options'][input_name]['extensions']['read'].get('common_ext', ext)[0]

# use if you don't want to collapse technical replicates ("units" column)
def get_sample_no_combine(w):
Expand Down
1 change: 1 addition & 0 deletions elvers/rules/sourmash/params.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ sourmash:
elvers_params:
input_options:
read:
- kmer_trimmed
- trimmed
- raw
base:
Expand Down
48 changes: 29 additions & 19 deletions elvers/rules/sourmash/sourmash.rule
Original file line number Diff line number Diff line change
@@ -1,17 +1,27 @@
from os.path import join

logs_dir = config['elvers_directories']['logs']
assembly_dir= config['elvers_directories']['outdirs']['assemble']
preprocess_dir= config['elvers_directories']['outdirs']['preprocess']
prog = "sourmash"

sourmash_params = config['sourmash']['program_params']
sourmash_dir = config['sourmash']['elvers_params']['outputs']['outdir']
logs_dir = config['elvers_directories']['logs']
elvers_params = config[prog]['elvers_params']
prog_params = config[prog]['program_params']

# set output dir
output_dir = elvers_params['outputs']['outdir']
# set default input dir, extensions
input_dir = config['elvers_directories']['outdirs']['preprocess']
ext = ['.trim.fq.gz']
# get input we're using in this case
input_name = prog_params.get('inputs', ['kmer_trimmed'])[0]
# update input_dir, ext
reads_dir = elvers_params['input_options'][input_name].get('indir', input_dir)
assembly_dir= config['elvers_directories']['outdirs']['assemble']
ext = elvers_params['input_options'][input_name]['extensions']['read'].get('common_ext', ext)[0]

def get_reads(wildcards):
if not is_single_end(**wildcards):
return expand(join(preprocess_dir, '{sample}_{unit}.paired.khmer.fq.gz'), **wildcards)
return expand(join(preprocess_dir, '{sample}_{unit}.se.khmer.fq.gz'), **wildcards)
return expand(join(reads_dir, '{sample}_{unit}.paired.khmer.fq.gz'), **wildcards)
return expand(join(reads_dir, '{sample}_{unit}.se.khmer.fq.gz'), **wildcards)


rule sourmash_compute_assembly:
Expand All @@ -21,14 +31,14 @@ rule sourmash_compute_assembly:
input:
join(assembly_dir, "{assembly}.fasta")
output:
join(sourmash_dir, "{assembly}.fasta.sig")
join(output_dir, "{assembly}.fasta.sig")
message:
"""--- Computing a MinHash signature of the transcriptome with Sourmash ---"""
threads: 2
params:
k_size = sourmash_params.get('k_size', 31),
scaled = sourmash_params.get('scaled', 1000),
extra = sourmash_params.get('extra', '')
k_size = prog_params.get('k_size', 31),
scaled = prog_params.get('scaled', 1000),
extra = prog_params.get('extra', '')
log: join(logs_dir, 'sourmash', '{assembly}.log')
benchmark: join(logs_dir, 'sourmash', '{assembly}.benchmark')
conda:
Expand All @@ -41,14 +51,14 @@ rule sourmash_compute_pe_interleaved:
Computing a MinHash signature of the kmer-trimmed with Sourmash
"""
input: get_reads
output: join(sourmash_dir, "{sample}_{unit}.paired.khmer.fq.sig")
output: join(output_dir, "{sample}_{unit}.paired.khmer.fq.sig")
message:
"""--- Computing a MinHash signature of the kmer-trimmed reads with Sourmash ---"""
threads: 6
params:
k_size = sourmash_params.get('k_size', 31),
scaled = sourmash_params.get('scaled', 1000),
extra = sourmash_params.get('extra', '')
k_size = prog_params.get('k_size', 31),
scaled = prog_params.get('scaled', 1000),
extra = prog_params.get('extra', '')
log: join(logs_dir, 'sourmash', '{sample}_{unit}.log')
benchmark: join(logs_dir, 'sourmash', '{sample}_{unit}_pe.benchmark')
conda:
Expand All @@ -61,14 +71,14 @@ rule sourmash_compute_se:
Computing a MinHash signature of the kmer-trimmed reads with Sourmash
"""
input: get_reads
output: join(sourmash_dir, "{sample}_{unit}.se.khmer.fq.sig")
output: join(output_dir, "{sample}_{unit}.se.khmer.fq.sig")
message:
"""--- Computing a MinHash signature of the kmer-trimmed reads with Sourmash ---"""
threads: 6
params:
k_size = sourmash_params.get('k_size', 31),
scaled = sourmash_params.get('scaled', 1000),
extra = sourmash_params.get('extra', '')
k_size = prog_params.get('k_size', 31),
scaled = prog_params.get('scaled', 1000),
extra = prog_params.get('extra', '')
log: join(logs_dir, 'sourmash', '{sample}_{unit}.log')
benchmark: join(logs_dir, 'sourmash', '{sample}_{unit}_se.benchmark')
conda:
Expand Down
6 changes: 3 additions & 3 deletions elvers/rules/trimmomatic/trimmomatic.rule
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ prog_params = config[prog]['program_params']
output_dir = os.path.dirname(elvers_params['outputs']['output_files'][0])
# set default input dir, extensions
input_dir = config['elvers_directories']['outdirs']['input_data']
ext = '.fq.gz'
ext = ['.fq.gz']
# get input we're using in this case
input_name = prog_params.get('input', 'raw').lower()
input_name = prog_params.get('inputs', ['raw'])[0]
# update input_dir, ext
input_dir = elvers_params['input_options'][input_name].get('indir', input_dir)
ext = elvers_params['input_options'][input_name]['extensions']['read'].get('common_ext', ext)
ext = elvers_params['input_options'][input_name]['extensions']['read'].get('common_ext', ext)[0]

# find adapters --> use utils/find_input_file for this?
base_dir = config['elvers_directories']['base_dir']
Expand Down
4 changes: 2 additions & 2 deletions elvers/rules/trinity/test/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
# Additional parameters required for testing the trinity rule

trinity:
input_kmer_trimmed: False
input_trimmed: False
inputs:
- raw
max_memory: 4G
Loading

0 comments on commit 58d2229

Please sign in to comment.