Skip to content

Commit

Permalink
fixed code structure errors introduced when packaging
Browse files Browse the repository at this point in the history
  • Loading branch information
danisven committed May 27, 2020
1 parent 9f149bf commit 6dd5757
Showing 1 changed file with 27 additions and 25 deletions.
52 changes: 27 additions & 25 deletions stringmeup/stringmeup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

__version__ = "0.1.0"
__version__ = "0.1.1"

import argparse
import stringmeup.taxonomy
Expand Down Expand Up @@ -53,7 +53,7 @@ class ReportNode:
offset: int


def validate_input_file(putative_classifications_file, verbose_input):
def validate_input_file(putative_classifications_file, verbose_input, minimum_hit_groups):
"""
Perform simple validation of the input file.
"""
Expand All @@ -74,12 +74,12 @@ def validate_input_file(putative_classifications_file, verbose_input):
# TODO: This can be done much better.
if not verbose_input:
num_cols = len(line_proc) == 5 # original type of kraken2 output file
if args.minimum_hit_groups:
if minimum_hit_groups:
log.error('You specified --minimum_hit_groups {}, but didn\'t supply an input file that contain a minimizer hit groups column.')
sys.exit()
else:
num_cols = len(line_proc) == 6 # 6 columns if the output was produced with the verbose version of kraken2 that outputs minimizer_groups
if not args.minimum_hit_groups:
if not minimum_hit_groups:
log.error('The input file contains too many columns. Use --minimum_hit_groups if you want to use an input file that contain a minimizer hit groups column.')
sys.exit()

Expand Down Expand Up @@ -148,7 +148,7 @@ def process_kmer_string(kmer_info_string):
return taxa_kmer_dict


def reclassify_read(read, confidence_threshold, taxonomy_tree, verbose_input):
def reclassify_read(read, confidence_threshold, taxonomy_tree, verbose_input, minimum_hit_groups, taxa_lineages):
"""
Sums the number of kmers that hit in the clade rooted at "current_node",
and divides it with the total number of kmers queried against the database:
Expand Down Expand Up @@ -191,7 +191,7 @@ def reclassify_read(read, confidence_threshold, taxonomy_tree, verbose_input):

# Filter minimizer_hit_groups
if verbose_input:
if read.minimizer_hit_groups < args.minimum_hit_groups:
if read.minimizer_hit_groups < minimum_hit_groups:
doomed_to_fail = True

# The nr of kmers that hit within the clade rooted at the current node:
Expand Down Expand Up @@ -250,20 +250,20 @@ def reclassify_read(read, confidence_threshold, taxonomy_tree, verbose_input):
if doomed_to_fail:
read.recalculated_conf = max_confidence
read.reclassified_taxid = 0
return read
return read, taxa_lineages

# If the confidence at this node is sufficient, we classify it to
# the current node (TaxID).
if read.recalculated_conf >= confidence_threshold:
read.classified = True
read.reclassified_taxid = read.current_node
return read
return read, taxa_lineages

# If the current node is the root, we stop (can't go higher up in the
# taxonomy).
elif read.current_node == 1:
read.reclassified_taxid = 0
return read
return read, taxa_lineages

# Otherwise, set the current_node to the parent and keep going.
else:
Expand Down Expand Up @@ -452,7 +452,7 @@ def format_kraken2_report_row(report_node):
return report_row


def make_kraken2_report(tax_reads, taxonomy_tree, total_reads):
def make_kraken2_report(tax_reads, taxonomy_tree, total_reads, output_report):
"""
Gets the information that should be printed from
get_kraken2_report_content. Formats the information and prints it to file
Expand All @@ -465,15 +465,15 @@ def make_kraken2_report(tax_reads, taxonomy_tree, total_reads):
tax_reads, taxonomy_tree, total_reads)

# If the output should go to file
if args.output_report:
with open(args.output_report, 'w') as f:
if output_report:
with open(output_report, 'w') as f:
for node in report_node_list:

# Format the output
report_row = format_kraken2_report_row(node)
f.write(report_row + '\n')

log.info('Report saved in {}.'.format(args.output_report))
log.info('Report saved in {}.'.format(output_report))

# Otherwise, print to stdout
else:
Expand Down Expand Up @@ -548,7 +548,7 @@ def create_read(kraken2_read, verbose_input=False):
return read


def main_loop(f_handle, tax_reads_dict, taxonomy_tree, verbose_input=False, o_handle=None, v_handle=None):
def main_loop(f_handle, tax_reads_dict, taxonomy_tree, args, report_frequency, taxa_lineages, verbose_input=False, o_handle=None, v_handle=None):
"""
f_handle: classifications input file to read from.
o_handle: output_classifications file to write to.
Expand All @@ -568,7 +568,7 @@ def write_read_output(read):
row_items.insert(4, read.minimizer_hit_groups)

row_string = '\t'.join([str(x) for x in row_items]) + '\n'
_ = o.write(row_string) # gzip write fnc returns output, therefore send to "_"
_ = o_handle.write(row_string) # gzip write fnc returns output, therefore send to "_"

def write_verbose_output(read):
# read is an instance of ReadClassification
Expand All @@ -591,7 +591,7 @@ def write_verbose_output(read):
row_items.insert(2, read.minimizer_hit_groups)

row_string = '\t'.join([str(x) for x in row_items]) + '\n'
_ = v.write(row_string) # gzip write fnc returns output, therefore send to "_"
_ = v_handle.write(row_string) # gzip write fnc returns output, therefore send to "_"

# Parse the input file, read per read
i = 0
Expand All @@ -605,11 +605,13 @@ def write_verbose_output(read):
read = create_read(read_pair, verbose_input)

# Reclassify the read pair based on confidence
read = reclassify_read(
read, taxa_lineages = reclassify_read(
read,
args.confidence_threshold,
taxonomy_tree,
verbose_input)
verbose_input,
args.minimum_hit_groups,
taxa_lineages)

# Counter for number of reads per taxon/node
if read.reclassified_taxid in tax_reads_dict['hits_at_node']:
Expand Down Expand Up @@ -640,7 +642,7 @@ def write_verbose_output(read):
log.info('Done processing reads. They were {} in total.'.format(i))

# Output a report file
make_kraken2_report(tax_reads_dict, taxonomy_tree, i) # i is used to calculate the ratio of classified reads (col 1 in output file).
make_kraken2_report(tax_reads_dict, taxonomy_tree, i, args.output_report) # i is used to calculate the ratio of classified reads (col 1 in output file).


def read_file(filename):
Expand All @@ -653,11 +655,11 @@ def read_file(filename):
return open(filename, 'r')


def write_file(filename):
def write_file(filename, gz_output):
"""
Wrapper to write either gzipped or ordinary text file output.
"""
if args.gz_output:
if gz_output:
return gzip.open(filename, 'wt')
else:
return open(filename, 'w')
Expand Down Expand Up @@ -749,7 +751,7 @@ def stringmeup():
log.info("The input file appears to contain minimizer_hit_groups.")

# Perform a naive check of the input file
validate_input_file(args.original_classifications_file, verbose_input)
validate_input_file(args.original_classifications_file, verbose_input, args.minimum_hit_groups)

# If user provided names.dmp and nodes.dmp, create taxonomy tree from that,
# otherwise, create i from a pickled taxonomy file
Expand All @@ -773,18 +775,18 @@ def stringmeup():
if not args.output_classifications.endswith('.gz'):
args.output_classifications += '.gz'
log.info('Saving reclassified reads in {}.'.format(args.output_classifications))
o = write_file(args.output_classifications)
o = write_file(args.output_classifications, args.gz_output)

# If user wants to save the verbose classification output to file, open file
if args.output_verbose:
if args.gz_output:
if not args.output_verbose.endswith('.gz'):
args.output_verbose += '.gz'
log.info('Saving verbose classification information in {}.'.format(args.output_verbose))
v = write_file(args.output_verbose)
v = write_file(args.output_verbose, args.gz_output)

# Run the main loop (reclassification)
main_loop(f, tax_reads_dict, taxonomy_tree, verbose_input, o, v)
main_loop(f, tax_reads_dict, taxonomy_tree, args, report_frequency, taxa_lineages, verbose_input, o, v)

# Remember to close files
if o:
Expand Down

0 comments on commit 6dd5757

Please sign in to comment.