-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_tsv_processing.py
94 lines (66 loc) · 4.42 KB
/
test_tsv_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import unittest
import tsv_processing
import pathlib
import os
import subprocess
import pandas as pd
import numpy as np
class TestFileBase(unittest.TestCase):
def assertIsFile(self, file_path):
if not pathlib.Path(file_path).resolve().is_file():
raise AssertionError('The file does not exist: %s' %str(file_path))
def assertIsReadable(self, file_path):
file_path = pathlib.Path(file_path).resolve()
if not os.access(file_path, os.R_OK):
raise AssertionError('The file is not readable: %s' %str(file_path))
class TestFilePath(TestFileBase):
def test_input_test_files(self):
for test_file in ['alignment.b6', 'test_ref_dataframe_best_alignments_for_alignment.b6.tsv', 'test_ref_tsv_processing_output_histogram_data.csv', 'tmp_dataframe_5_columns_no_header.tsv', 'tmp_dataframe_5_columns.tsv', 'tmp_df_max_bitscore_per_read.tsv', 'tmp_empty_file.tsv', 'tmp_df_alignment.b6_with_header', 'test_ref_dataframe_sns_histogram_values']: # keep these 8 test files available in the ./test/ directory
f_path = pathlib.Path('./test/'+ test_file)
self.assertIsFile(f_path)
self.assertIsReadable(f_path)
class TestTsvProcessing(unittest.TestCase):
def test_preprocess_aln_file(self):
#print('Testing files with insufficient columns...')
with self.assertRaises(AssertionError):
tsv_processing.preprocess_aln_file('./test/tmp_dataframe_5_columns_no_header.tsv')
with self.assertRaises(AssertionError):
tsv_processing.preprocess_aln_file('./test/tmp_dataframe_5_columns.tsv')
with self.assertRaises(pd.errors.EmptyDataError):
tsv_processing.preprocess_aln_file('./test/tmp_empty_file.tsv')
# test case: with 'alignment.b6'
test_out_df_1 = pd.read_table('./test/alignment.b6', names=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore'])
##test_out_df_1.to_csv('tmp_df_alignment.b6_with_header', index=False, sep='\t')
func_out_df_1 = tsv_processing.preprocess_aln_file('./test/alignment.b6')
#self.assertTrue(func_out_df_1.equals(test_out_df_1))
pd.testing.assert_frame_equal(func_out_df_1, test_out_df_1)
#print('Testing alignment file with pre-existing header...')
# test case: with 'alignment.b6' containing header ('tmp_df_alignment.b6_with_header')
test_out_df_2 = pd.read_table('./test/tmp_df_alignment.b6_with_header')
func_out_df_2 = tsv_processing.preprocess_aln_file('./test/tmp_df_alignment.b6_with_header')
#self.assertTrue(func_out_df_2.equals(test_out_df_2))
pd.testing.assert_frame_equal(func_out_df_2, test_out_df_2)
def test_return_best_alignment(self):
test_in_df = pd.read_table('./test/alignment.b6', names=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore'])
test_out_df = pd.read_table('./test/test_ref_dataframe_best_alignments_for_alignment.b6.tsv')
test_out_df.loc[:, 'evalue'] = -np.log10(test_out_df.loc[:, 'evalue'])
func_out_df = tsv_processing.return_best_alignment(test_in_df)
func_out_df.loc[:, 'evalue'] = -np.log10(func_out_df.loc[:, 'evalue'])
for col in test_out_df.columns: #[2:]:
self.assertTrue(func_out_df[col].equals(test_out_df[col]))
#self.assertTrue(test_out_df.equals(func_out_df))
pd.testing.assert_frame_equal(test_out_df, func_out_df)
def test_save_csv_file(self):
test_in_df = pd.read_table('./test/test_ref_dataframe_best_alignments_for_alignment.b6.tsv')
test_ref_hist_df = pd.read_csv('./test/test_ref_tsv_processing_output_histogram_data.csv')
func_out_df = tsv_processing.save_csv_file(test_in_df)
#self.assertTrue(func_out_df.equals(test_ref_hist_df))
pd.testing.assert_frame_equal(func_out_df, test_ref_hist_df)
def test_plot_histogram(self):
test_ref_hist_df = pd.read_table('./test/test_ref_dataframe_sns_histogram_values')
func_in_df = pd.read_table('./test/test_ref_dataframe_best_alignments_for_alignment.b6.tsv')
func_out_hist_df = tsv_processing.plot_histogram(func_in_df)
#self.assertTrue(func_out_hist_df.equals(test_ref_hist_df))
pd.testing.assert_frame_equal(func_out_hist_df, test_ref_hist_df)
if __name__ == '__main__':
unittest.main()