diff --git a/include/valik/search/producer_threads_parallel.hpp b/include/valik/search/producer_threads_parallel.hpp index 8e6423db..81a4e1a6 100644 --- a/include/valik/search/producer_threads_parallel.hpp +++ b/include/valik/search/producer_threads_parallel.hpp @@ -49,19 +49,23 @@ inline void prefilter_queries_parallel(index_t const & index, std::unordered_set const & bin_hits) { auto & ibf = index.ibf(); - auto max_bin_hits = std::max((size_t) 1, (size_t) std::round(ibf.bin_count() * arguments.best_bin_entropy_cutoff)); + auto max_bin_hits = std::max((size_t) 1, (size_t) std::round(ibf.bin_count() * 0.1)); + if ((bin_hits.size() > max_bin_hits) && arguments.verbose) + { + verbose_out.write_warning(record, bin_hits.size()); + } + max_bin_hits = std::max((size_t) 1, (size_t) std::round(ibf.bin_count() * arguments.best_bin_entropy_cutoff)); if (bin_hits.size() > max_bin_hits) { - if (arguments.verbose) - verbose_out.write_warning(record, bin_hits.size()); + auto const & entropy_ranking = index.entropy_ranking(); if (arguments.best_bin_entropy_cutoff == 0) { return; } - else if (arguments.best_bin_entropy_cutoff < 1.0) // keep hits for bins with the highest entropy + else if ((entropy_ranking.size() > 0) && // only count minimisers not k-mers -> entropy unknown if k-mer index + (arguments.best_bin_entropy_cutoff < 1.0)) // keep hits for bins with the highest entropy { - auto const & entropy_ranking = index.entropy_ranking(); size_t inserted_bins{0}; size_t i{0}; while (inserted_bins < max_bin_hits) @@ -74,9 +78,8 @@ inline void prefilter_queries_parallel(index_t const & index, } i++; } + return; } - - return; } for (auto const bin : bin_hits) diff --git a/test/cli/dream_test.cpp b/test/cli/dream_test.cpp index 3b475261..64c7253a 100644 --- a/test/cli/dream_test.cpp +++ b/test/cli/dream_test.cpp @@ -12,6 +12,8 @@ struct dream_short_search : public app_test_cli_base, public testing::WithParamI {}; struct dream_split_search : public app_test_cli_base, public testing::WithParamInterface> {}; +struct dream_adaptive_search : public app_test_cli_base, public testing::WithParamInterface> +{}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////// DREAM short shared memory /////////////////////////////////////////////// @@ -207,3 +209,109 @@ INSTANTIATE_TEST_SUITE_P(split_shared_memory_gapped_dream_suite, std::string name = std::to_string(std::get<0>(info.param)) + "_error"; return name; }); + +TEST_P(dream_adaptive_search, adapt_threshold_kmer) +{ + auto const [adaptive_cutoff, entropy_cutoff] = GetParam(); + size_t pattern_size = 50; + float max_error_rate = 0.04; + + setup_tmp_dir(); + setenv("VALIK_MERGE", "cat", true); + + std::filesystem::path ref_meta_path = "ref.bin"; + std::filesystem::path index_path = "ref.ibf"; + + app_test_result const build = execute_app("dream-stellar", "build", + data("repetitive_reference.fasta"), + "--output ", index_path, + "--seg-count 64", + "--fpr 0.001", + "--shape 1111110110110111111", + "--pattern ", std::to_string(pattern_size), + "--error-rate ", std::to_string(max_error_rate)); + + EXPECT_EQ(build.exit_code, 0); + valik::metadata reference(ref_meta_path); + + app_test_result const search = execute_app("dream-stellar", "search", + "--output search.gff", + "--error-rate ", std::to_string(max_error_rate), + "--index ", index_path, + "--query ", data("repetitive_query.fasta"), + "--repeatPeriod 1", + "--repeatLength 1000", + "--numMatches 100", + "--bin-cutoff", std::to_string(adaptive_cutoff), + "--bin-entropy-cutoff", std::to_string(entropy_cutoff)); + + EXPECT_EQ(search.exit_code, 0); + EXPECT_EQ(search.out, std::string{"Launching stellar search on a shared memory machine...\nLoaded 1 database sequence.\n"}); + EXPECT_EQ(search.err, std::string{}); + + /* + auto distributed = valik::read_alignment_output(search_result_path(number_of_errors), reference, std::ios::binary); + auto local = valik::read_alignment_output("search.gff", reference); + + compare_gff_out(distributed, local); + */ +} + + +TEST_P(dream_adaptive_search, adapt_threshold_minimiser) +{ + auto const [adaptive_cutoff, entropy_cutoff] = GetParam(); + size_t pattern_size = 50; + float max_error_rate = 0.04; + + setup_tmp_dir(); + setenv("VALIK_MERGE", "cat", true); + + std::filesystem::path ref_meta_path = "ref.bin"; + std::filesystem::path index_path = "ref.ibf"; + + app_test_result const build = execute_app("dream-stellar", "build", + data("repetitive_reference.fasta"), + "--output ", index_path, + "--seg-count 64", + "--fpr 0.001", + "--fast", + "--shape 1111110110110111111", + "--pattern ", std::to_string(pattern_size), + "--error-rate ", std::to_string(max_error_rate)); + + EXPECT_EQ(build.exit_code, 0); + valik::metadata reference(ref_meta_path); + + app_test_result const search = execute_app("dream-stellar", "search", + "--output search.gff", + "--error-rate ", std::to_string(max_error_rate), + "--index ", index_path, + "--query ", data("repetitive_query.fasta"), + "--repeatPeriod 1", + "--repeatLength 1000", + "--numMatches 100", + "--bin-cutoff", std::to_string(adaptive_cutoff), + "--bin-entropy-cutoff", std::to_string(entropy_cutoff)); + + EXPECT_EQ(search.exit_code, 0); + EXPECT_EQ(search.out, std::string{"Launching stellar search on a shared memory machine...\nLoaded 1 database sequence.\n"}); + EXPECT_EQ(search.err, std::string{}); + + /* + auto distributed = valik::read_alignment_output(search_result_path(number_of_errors), reference, std::ios::binary); + auto local = valik::read_alignment_output("search.gff", reference); + + compare_gff_out(distributed, local); + */ +} + +INSTANTIATE_TEST_SUITE_P(shared_memory_adaptive_dream_suite, + dream_adaptive_search, + testing::Combine(testing::Values(0.1, 1.0), testing::Values(0.1, 1.0)), + [] (testing::TestParamInfo const & info) + { + std::string name = std::to_string((size_t) std::round(std::get<0>(info.param)*100)) + "_adaptive_" + + std::to_string((size_t) std::round(std::get<1>(info.param)*100)) + "_entropy"; + return name; + }); diff --git a/test/data/search/repetitive_query.fasta b/test/data/search/repetitive_query.fasta new file mode 100644 index 00000000..c1d00b2c --- /dev/null +++ b/test/data/search/repetitive_query.fasta @@ -0,0 +1,2 @@ +>1 +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT diff --git a/test/data/search/repetitive_reference.fasta b/test/data/search/repetitive_reference.fasta new file mode 100644 index 00000000..56608ce2 --- /dev/null +++ b/test/data/search/repetitive_reference.fasta @@ -0,0 +1,163 @@ +>1 +TATGCACCAGAGTATGGAAGCATAAGCTCTGCATGCAAAGGTACATCAGATCCTGCGGTTGGGTGCCAAC +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC +CCAAGTGTGTTCACGGGCGCTTGACAGACATCGGAGGATGGTGCACACTCACTCGACCAGCGCAAAGCAC +AGGATCTCACGGGCGGACATCTCTTAGGTCAGTCATCGTGGAGGAATGCTTGTACGTTCTTTTGGCTTCC +ACGTACGTTCGTACGTACGTACGTACGTACCTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC +CCTAACACGGCGGGCGTCTCCGGTACGTATCCTGTCGGTACACCCCTTAAGCCCCTAGGCCCGAAGAACA +TAGCGCATTTCACGCTCTCTACGAATGACCGCAACGATCAAATGGGCGAGAACAACTAATTCCGATTCAT +GGGGTTTGTGGATTGTGACACAGCGCGCCCGCTACTGCGGGACGTGAGGACGCCCAATTCTGCCAAGGAT +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC +TATTTAGGGTGTTTCACTAGAGTTATGCGCCGACCCCGGTTGGACCAGCTTGCATTCGAAACTGCGTTAC +ACAGCACCCCACCGCAATCGTATGACTCTCGCTGAAAAAGGGTGGTCAACCATTACACCTCTTATGCCTG +TTGTGGGAGGCTCGGTCTTAAGCAGCGCGCGAGCTGTGATCCAGGCTACCACGGACATAGTGTATGGAAA +GTGATCCAGAGTAGACCCGCGGGGGCCTGACCTAACCTATATAAGTTGTATCGTGGCTATGAGGGTAGTC +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC +GCCGGAGAAAACGTATGCTTACTGATTTTTAAGTCGGCGTGGCGCCGAAGCCGGATCGGTTGTAAGCTAG +CCGGGCCTAGGGGTTCACCGTAACGGATTAGTCAAATTAAAATCCAGCGATGACTTCCTGATAGAACTCA +AGTCGTGACCCCTCCGCTGCGGGCCTACATCTGTTTTCGCAGGCGTGGTTGTTTACCAGGTATGGTGCTC +ACGTACGTACGTACGTACGTACGTAGGTGCGTACGTACGTACGTACGTACCTACGTACGTACGTACGTAC +ATCTCTATTAGTCACGGGCAGCATGGTGTCACCGAACCGCGCGTCTCCTAATATCTGGTCTACCGATTTA +GCCCCGGCAAATAACTTTGGATTGTGGTTGGAGAGTGCCAGAACTGACGGGCGCTGCCGTGGGGCTCCTA +ACTAAAAACGCCACGGACCTGGCTAACATTCGTTGTTGACTATAACATTTGAGGGCGCTTCGGATTCCCC +ATACTGCCAGAGTATTATGTGGGTGGTAAACATAGATTCTATATAGTCAACGACATACACTCATTATTAT +GCAATTGCGGCATCTCAACTATGTCTTAATTAGTTTTCCCGGATGGCGAAAACGATCTTACAGGAGAAGC +GCTACGCTGGTTTGGAAGACACTTAGTATCCTAGTAGTATGGGCTTGTGCGGGTCAACGGGCGCCGTCAA +AGCGCACACATATCTGGTGGGGACGGTGTCCCCTATCGGCGCACACGGGAGCCTAGGCAATCCCGACGTC +CCGCGTGCTGGATAAAGAAAAGGCCGACTGCGCGAAATGAAGAATCGTCAATTTATTGTTGGCAGCTTTA +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC +CAGTTCTTCTCCGCGGGCGGGCAGAGTGGTTTTAAGACCGGGGTCTATGCACAAGGGTGGAGCTTGATTA +CTATCATCGAAGGGTGACTTGCCGTGTTACAATCGACAAGCGAACGGCCGACTGCTTCGGCCCGCTGAGC +GGACAACCTCCGATGTACCTACTCCATAGTAGACTTGGAAAACCCAGTCTTATGGCGCGGGGGAATCAAA +TGTGCCGATTCTTGACGAATAGTTCTAAGCCGAACTGCCAATCAGGGAAAAGTGTCGCGCACTAACTGGA +GCTGAAACCGCCAATAGTGTCTAAGTTACTCTTCCCATCTATCAAGGTAAGCCTTTTCCGTACAGTTATG +ACCATCTCACACTGGAAAGACGCACTGCATGCTCGGATGGAACTCGGAGATCACCTGGAAAGTCAGTGTC +ATGCGTGGCGGTTTAGTGTTCGACGTAAGAAAAACCTGGAAGACGGACGAGGTATGCAGACATTGCAGCA +GTGGTAGTAGGGGTTATACCCCTGGATTTAAAACTCACAACCTTCCTCATAGGCCTAAGGATCTTGCTAG +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC +ACCTTAAGCAGGTACGTGGATCAGAATCGCTACTTCCTGTTAAGACTGTGGACCCTCCCACAAACTCCGA +TGCGAGCTAGGACGTCTTTAGCTCAGCTTGAGAATACTCCTATTTGCCTTGAAAGCTGAGCGGTTCAGAC +AGAGTAACTACATCTTATATGTAACCACACTCACATAGTTGTTGGGGGCAAACAGCTAAGGATTCCTGGT +CCCTGGCACGGATATAGATCACAATCTGGAATTCCCTCCTAAGTACCCGCCCGGTATTCCCACACTCTGT +GAGACTACGTGCGCGTGTAGTATCGTGAGGTCCGCGGTGGAAAAGGGTTTGGCACTTACTACTCAGTGAC +CGTATACACGGAGATTCGCACTGATGTGGAATATGAAATCCCACATCCCCTGAGAATTTCGAATCTGAGG +ATGAGTATATGCCTCGATGTAGGCCAGGAGCATTGATCCCGGCGTCGTCCGGTCTAAGCACCATAGTTAT +GGGTCGGTTATAAACGAATTTTGACGCGACGGGGAATGATACAGGATCCACAGTGAAACTAGTCTGGGCA +CCGATGCATTGCCAATGGTGCCTATTAGTGTTCCTGAAGTTGACTACAGTCCGTACCTCAGTATAGCGCT +GGTTACTAGTAGCGAAGTTGAGATTGTAGCTCGTACTCCAATGACCACCCGAGGGGGTGGTGCAATGTGC +AGGTAGGGGTAGGTTCCTGTAGTTCGGAGGTCAACCTCTTGTTGACGTCTGATGCGAGCCTGACTAAAAT +GCGCTTCTTCACTTTTGTTCGTATAGTCACTATATTCGCGAAACCGTCGCTTTTATTATAGACGGCCTAC +TTCTTTGACCGAGCCTCATAGTCTGCACTCGGGACGAAACTAACGGCTGTTCCACTCATGACCTACGCGC +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC +CTGAGTGATCAAATAATCAAAAGAATGCGCCGCTATATGTAGGGGGCCCATGTATTGGCTGACTTTGAAA +ACACTCTGACACGAACTTGATTGTCTTACTAGTGAGAGAGGGGGGAGATACGCAGGGCGCGGGAAAACGA +TATGCAGCTGTAGGGAGCGCTATTTGCTAGGGAGTATACAATCCAGGCCTCCCTCGCTCTCATCTTTATC +TGTCGATCGTGAGCCAACGACTAGTTGGTCTATCGCGATTATTGCCACTATCGTTCCCACCCGCCCGTGT +CGCCGTAGGAACAACCGGTCTTGCTACGGATGCTCTGATGTTTGTGCTAGTGGTCTAACGGTCCACGACA +ACAGTGTCGTATACAGGGCCACTCTTCGAAAGCGGCGTAAGCGCCACGCTTGTCCGCGAGTTGGGGACCT +CGGGGAGTCCGTATTCTTTGAACCCTTGTTCAGGCGAGTGTTTTTGACGCCTTTCAACAATGCAGATCCC +AGGGAGGTCCGGCCCTCTGTTCTATGAGTAGGTCAAGCTAGAACCGTGTTGTGAGAGGAATGAGGGTGCT +TAAAGGGTTACCCGTCGTCACATACGTAAAAAGAATCCCTATTATCATGGACTCCAATCTCGCCACGTGA +AAGTTAGCGGGGATAGCGTTTGGGTCTTGCATATCCTGGGCTCTTCGGCTACGGGTGGACACGACATTGT +TGATGGTGGTCGCGTAGAGCTAGCTTTTTACCTTATGGAAGGAGTTTCCGCACCCCCAGGGACGGGCTCC +GGCCTCACTATGTTCAAAGCCCTAAATGCACATTCTAGTCTCCACGTGTTACAGTAGGGCCAAAGATGGC +GTACTAACAGATTGGTATTGGACTATAAAAGATGAGTGATTAAGATCTGAAGTTCAAGACGTCAGGCTTA +ACGTACGTCCGTACCCACGTACGTACGTACGTACGTACGTGCGTACGTACGTACGTACGTACGTACGTAC +AGTAGGAGTACCTTCTGGCACGATAAGGACTCCTACCCCACATCCTGATAATGCAGCTATGTGGCAATTC +ACGCTTACCCAATCCTTAGCTGGCTAACAATTCCCCATACATGTTCAGCGAAGGTAGAACAACCAAAAGC +CCGTTGCGGAACTGGCCAATGCTATCCCAAGTTAGAAGGAATACGAACTGGTTCCGGGGACCAAGGCCCA +GTTGGACATAATTTAACAACTCGGCGAAGATAGCAAGTTCTGGCGTCTGAACGTATTATTGTTGCTGGCT +ATCACAGTTAATTCCCTGCCTACGAATTTTGTCGTACCAATTCAATCCCCCTGGCGGAGCTCTATGTCCT +AAGGCCCTGTTCAACTGCCGGATGATATTGTCTGGTTTCTGTGCCGTCGCTCAAACAATTCTAGGCCCCG +AAACGATCAGAGGAACAACCGGGAATGTCGGCCGACTTCCAGCTCCGTCCTATATGCTAAGCGAAAACTT +TACACGAATCCAATACACATTTCTGCGAAGAGCTTTCACAAGGTACATTTTGCCGCCTTTTGAAGGATAC +GAACTCGGATATGTACAAGAGACTCGGTCGTGCGGGAATCGCGAATTGCGGGAGTTAGGCACCCGGGTCA +GACAATCCAGCGCAGACCTAGACCTAACAAGAAGCACGCCAGCGTGCCCAATCCGGATAGTGGACGCTCC +CCGGGTTTAGAAGCAAATAACCTAGGTGTACCTAACAATCGGCAACGCAGTTTTCTAGGATTCTAGCGTT +TCTATCAGAACAAACAGATTCTTTGAGGGCTTAAATGGCGCCTTTTCCGTACAGCCACTAGGTATGTTAA +CGAACAACACCCTGCCTTTGACATGGCTCACGCTAGGCTTGTGAGCAAGGAGTAATCGACGGTTTGAGAT +CGCTTCTCGTCTAATATTAGACAACCTTGGTCGTCGTTGCCATATAACGACCAATGATTGGTTTCGGACT +GCGACTAATACTGGTAGGTGGCCTGGCGGTGTTGGTGTGGACTGGTGGTGACCCGCACAGTGCAAGAGAT +CATCAGGGACCATGCGATAAGGGACCATACGCGTATGCCCGTTTTGCGAGCAACGCGCGAAGCACAGCGT +CTGGGGGCAACCCAGTTCGTCCTGCAATGCAGGATACGCTGTAACGGTGTGCCGAAGTATTCAAGGTGAA +GACCAACCAGTCTCGTGTAATCCGTTCTAAGACGCACCCTTCTACTCTCTCCCGGTCACGTCGCGTACTT +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC +GGCGTGCACAACAAGAACAGATTGGTGACTCAGTAGAGCCTCGCCGACAGTTCCCTATCTTTCACCAGCG +GATACCTACGAGTTAACCAATTACCAGGTGGGCGCAACTACCGATTGGACTACGCTATGTCGTCCGGCTC +CTGTTCAAGTAAATCCGTGGAGGTAAACTACCACCCGGATGTAAAACTCCGAGGTTCGTTCTATAGTCCC +TTGGTCCGCAAATCGCCCTGTGGGTTAGGGATTATAAAATACACAGATTATAAACGTCGATAAAAGTAGG +TACTGCACGGAGGGGTCTAACAAATAGATGATGTGGTCATCCACTCCGTCCTGAACACTAGCTGAGATTC +CCGCCCAGAGAAGTTAATCATTTTTAAGGCCCGACGGAGGCACCTAGACGCCACGCTGCACTCTCTATCC +CCTAAGCGGCTCAGGCGACTACTTCGGTTACCTGGTAACCAAGTGGCTTGAAAGTTCTTCCTCTTGTGGC +ACGAAAGTTCTGGGATCCGCCGAGGGGGGGTGTACTCAGAATGCTTCCTGGTGCGCTGAGCCGCTAGTGC +TCACATATTAGACATTAATCTGCTCTTAATTATCCTATAGAGGAGCATCGGGACCGTGAAAACAAAAAAC +GACCAACGAATCATTCCAAGACTTAGCTCAACCTGCCATTTTAAGTAATACGAATGTCAGGATACACCGG +TAAAAGGTACCAGTCTGAGCAGTCCTTGATAGAGCCCAGTTAGGCAGTGCGATGATCTCACCAGGGAGAG +ATGGCACTTAGGGAACTCCTGTCATCACACTTTACATCGGCCTTCCTACAGACAGTTTGAATCAGCCTTC +ACGTACGTACGTACGTACGGGCGTACGTACGTACGTGCGTACGTACGTACGTACGTACGTACGTACGTAC +TGGAAGTGAATAGAAGGCATAGTATAGCTCCTGGCGAAGCTTGTGATACTGTAGAGTGCAAGTAGTGGTC +CATTTACAAGTTGACCTTCCACCACTGAACCCGGTCTCAGGTGTTGGGTTATACAGTCACGCCACCGAGA +CCGTTGCTGATCACCGAAAGGCAATCCAGGAACTTGGAGGATACGGGATGCTGATAACTAGAGGGAGTGC +TAGTACAGGCCCCATATGCGGCACAACACAAATAGATATGATGCTTCCTGTGATACGGATCCTCAATACG +CAGCTACCCGAAGGCAAAAGTCCACATGTTCGACTATCTTGGCTTTGGTGTGACCAAGACGTGTGCGGCA +GAGAGGACGTTCGCCAGCAGGCACCGCGTATCCCAACTTGAGGTTTCGCTAAATCCTTCATTGGCTGGAT +GAGGGGGTAAAGGTGGTCTCGGGACGTCTCGTGTGGACCGAAAACTTGTCTACGAGCTCAGCCAGGCTTG +GCGAATGCCGAAACGAGAGTGTTCGTCCTGACTGGTGGGGGCCTACCTACAGACCTCAGTGTTAGCGCCT +ATAATGGAGGGCTGTGGACTAACATTCAGGTGCGTGGAGTCCCCACGACTGGCGATAGCGGCAGTTACTG +GTTGCCGCACAGGCCTATTCGTAGCCTTTAAGAGGGGCACGGTTTCACGCTGTTACGCCGAGGACCGATC +ACCCGGAAGGCTCTCATCTCGTATGGGTACTTAGGCCGATTCCCCGCGCCAAACTTCAACCCAAAAGACG +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC +TTGAGTAGTCAACAGGATCATTGTGTCGTGAGGATAATCAGGATGGGGCCGTAGTAACGACGGCCTTTGC +CAAGCCTAAAGGCGTTTTGAAAGTGAGTGTTTTGAAGATCTCCTCCAGCGGGGGCTCGCTCCGGAGGTGC +ATTAAGCCACCGCCCCACGAAAGTATATCGGCTTCAATGCGTGTTCCTCTAATGCTGGATTATTGGCCGT +GGAACAGCTCGGAACTAGTTATACTGCCCGCTTTCGTTCAAGGTCACCAGGGCCCTATCTTCGATCCGTA +GCCGCCTAGAAAGGAATATAGCTGCAGGAATCGCTTTATCGGATAGTTACTCCCCTCGCCACCGCCCATG +AATATATTGACCCCAGGCATCTCACCTGCTCTTACCGGTGAAGATCCAGATCTGATACATTGGTATATGT +GCGAGGTGACGACTGGCGCGATCCGGATCTTCTCATGCCCTACCCCAACTATTTTCACCGGTATCCCTCT +TTATTAAAAGTCGTCTGGCGATCTCAGATTTAGGTACGACCTTGCCTTTTCTCCGAACCTAAAAAGCTTG +AATGTTTTGCTGAGGTGCTTGCCGACGCCACTCAGTGACTAGCATGGAAGGAAATTTATAGTCCAAGGTG +GAATAGAGGACGCAGATGAGGCCCCACCGGGTTGGCTACGAAGGGTAAGCGACGGGCCAACCGCGTCAAG +CAGAGGGATGAACTTGTAATTCTTTTTATATCGCACTTCTTGATGTTTGCCCAGGTAGAAAGAGATTTTT +TGACAGGTTCGTCTAGCTGAACTTCCGGTCGGTTTGCTCCCTACCAATTCTTCGTCATGAGGTGCGGAGT +AGCCCCTCGATGGCCATCGGCGGGCCATGCTATTTTCAGCATCTTTTTTGCTTCAGATTAGAGCGCACAG +AGTTTTAAGTTACATGACGTATGTTAAGATGTCCGAGTCTACTTGTGCCTAGGGCCAGGGGATGACTTCG +AAGGTCACAACCCGAAGAGACAAGTACGCTATCCTGGGGAGATTCCAATACTTGGGTGGAAGTTTGTTGC +ACGTACGTACGTACGGACGTACGTACGTACGTACGGGCGTACGTACGTACGGGCGTACGTACGTACGTAC +CTCTAGGGCTGCGCCCTGGCCTGTAATGACTCGCCTAGTACGCCCGGGAGTGGGTTACCCATAGGATTCC +AGAGTATATAGCTGAGCGTCCAGCCACTCGTGCGTACACCGAGGCGCTCGTCAGCTTCCCCTTCACCTAT +GGCGGTCCGACTACCGAGAAGCTACACTTAGGAATTTTGCAAGTTTTTATGCTTAAGGTAACAATCCACT +GTCCCCTCGAGTGGTGCCGGCCTACATAGAGTCGCCTCGAACGCGAATTAGCTTTTCTGACTTAGTCCGC +AGTGTCAGCTACCTAAATAAAGACCGGGTTCTGTCGCTAACAGACTCGAATTCTTTCAGGATACCGCTGT +TGTACACTAGGGCTACGGCTCCAGGACTCAGATAGCTAGGCCGTAATATGGAAGTTAAATCAACTGACCA +AGAACTGGGGATACTCTCTTTTTGCAGTGTTAACAGTTCACGCACCTAAACGGCGCGACTTGAATAAACA +CCTACGGCTATCATGAAAGAAAGTTATGTCCTAGTTTGAACGAAGTTTTGTGTTTCAAAAACGGAGGGTA +ACAGCCACGGAAAATAGTACTTCTTCTCGTTCAGGGTTCACGGGCCCGACCGATGGGCCTTTGAAGCCTC +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC +CTTCTTCAAGCTTTTTCAGCCGTCTCGATAGACCCACGGAGAGCGCTCACTGGTTGATAGGTGGTACGTA +AAATGGGTACCACCAATATATTTTCCCAATGGTGGTACTTTCTAACCCCACTGACGGCGTACCGAACTGG +TATGCTTTATTCAATGCCCGCTGTCACCATCAACTCCGCAACAATATTGCAGGGATACCACTTTCGACAC +CCGTCTTGGATGTGTTCCCCATGTTGTGTCCCGGCCTGCGTTCACCTCTCTATTTAAGAAAGGAACGACT +TTTCGCGCGGATACACATCCCGTGGTTGAGGGTGTAACTTGGGTTGGAGCATGTCCCACCACAGAGTCTG +GATTCACGCGATCAGAACGTTCAGGCACTCTCACCAAAACTGGCCAGCATCCAGTCAGAGGAAGCATCCA +GCTGACCGTGAGATGCCTTCCGAAAAGTGAGCATCTCGACATTTGTACGGGAATGATTATTTTGCAACCA +CATCGGATGCACACGTGAACAATATATGGATTGGGAGTCACGAAATAGCTTACTCATTGCCATTTAGACT +AGGTTTGACGGGCCAGGGCCTTCGCGTGGCGGGGATTCGAACGTGGCAATTCGTATGGCGTAGATGTCAA +CAGAGCGGGTACCCGCGCTTCATTCCTGCGTTGGATCTCTCGAGTGGATGGTGCGGTTACAAGTACTTGT +GAAACTTCCCGTAAGGCGGCCCTTTGGGTAGGGCGTCTCTGGAAGCGATTAGTGCGACAGATCACCGCAC +CATGGGGTAAAAAGCTACTTCGTCATGAGCTATGCAGTCAGTAGGCCATGGCCTCGGCCAATAATAGCAC +ATGAACACGTCGTATTCTCCTCCATTGTCAGTAATTGCTTGTTTTGGCGTTCGCGGAGAATCCAATCTCC +CGGTGCTTCAGCTCATTATCCTGACGAGGCGTCCCGTCCCGATCGGGAGAAGGAATTAGCGTATCGGTAA +ATGGATTGCAGGAGCTGAGCATACAACAGAGAACTGTATAAGCTATGGACTCGTCGGAACCGATAGGAAT +TCCCAGTCTAGCCCCGGCTCGAACGGCACGGAATAAACGGCGATGGAACGATTGTAAAGATACTAAGCAC +CTTCCCTACGTAAAAGGGAAACTAACCCTAGCCTAGCGTTTGAGGACTCCTAAGCGACATCCAACAGCTG +CAGGATAAATGTACCTAAATCCCTTGCATCCCTCGTCATTTGCTCTTGTTAGAGACTTCGGTCGTCTTTC +CTCGAACGCGGGGCCAGGCCGATATCCCACAGCTATGACAAGGTGTTTCAAGCACAGAGTAGTCACGGAC +CTCTCGGGCTCCGTGCGGTTGCTGTAAAGTAGGGCTATCCTCAGGGCACATACAAGGAAATGACGTTCAA +TTCCCCCGACGAACTTAGCGGTCTTTGCCAAGGTAATCATTCTGTTAGAATGGTTTCCACCGGTGGCACT +ACGTACGTACGTACGTACGTACGTTCGTACGTACGTACGTACGTACGTACGTTCGTACGTACGTACGTAC +AGGACCCTGGTGTAGCCAGAATTGTTGATGCCCTCTTGGCAATCACTCTCGAGGAGGGTCGTTTCTGATG +AATAACATCGTGCTCCGTTTTTTACGGACCGTTTTCCCGAGGCTGACGCAGTCTTAGCCGGATTTCTCAT +GGCAGCGCTGGTGAAGCGACGTGTTACTTGTACGCACAGTTGCGACATTGGAGGCATGTTTAACAGGGCT +ACGTCAGAAGCCCTTCCCCGGCCACCGGGCGCAGCGGTTATGGATATAGCAACACGTGACCTTCATTTTC +ACCCGCCCACTTCGAGCCTCAACCTGGCCCCGCTCAAACAAGGACGTGAGGATCTGGGCGTGTTTGGAGC +CATTGCGGCCCGGCTACCAC