Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SSHash-based representation #479

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,6 @@
[submodule "metagraph/external-libraries/htslib"]
path = metagraph/external-libraries/htslib
url = https://github.com/samtools/htslib
[submodule "metagraph/external-libraries/sshash"]
path = metagraph/external-libraries/sshash
url = https://github.com/ratschlab/sshash.git
3 changes: 3 additions & 0 deletions metagraph/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,8 @@ IF(APPLE)
set(THREADS_PREFER_PTHREAD_FLAG ON)
ENDIF()
add_subdirectory(external-libraries/spdlog)
add_subdirectory(external-libraries/sshash)
target_include_directories(sshash_static PUBLIC external-libraries/sshash/include)
add_subdirectory(external-libraries/DYNAMIC)
add_subdirectory(external-libraries/zlib)
target_compile_options(zlib
Expand Down Expand Up @@ -409,6 +411,7 @@ set(METALIBS ${METALIBS}
mersenne_twister
sdust
spdlog::spdlog
sshash_static
XXSDS_DYNAMIC
ips4o
caches
Expand Down
1 change: 1 addition & 0 deletions metagraph/external-libraries/sshash
Submodule sshash added at 962461
104 changes: 104 additions & 0 deletions metagraph/src/graph/representation/hash/dbg_sshash.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#include "dbg_sshash.hpp"

namespace mtg {
namespace graph {

void DBGSSHash::add_sequence(std::string_view sequence,
const std::function<void(node_index)> &on_insertion) {
}

void DBGSSHash::map_to_nodes(std::string_view sequence,
const std::function<void(node_index)> &callback,
const std::function<bool()> &terminate) const {
}

void DBGSSHash ::map_to_nodes_sequentially(std::string_view sequence,
const std::function<void(node_index)> &callback,
const std::function<bool()> &terminate) const {
}

DBGSSHash::node_index DBGSSHash::traverse(node_index node, char next_char) const {
return 0;
}

DBGSSHash::node_index DBGSSHash::traverse_back(node_index node, char prev_char) const {
return 0;
}

void DBGSSHash ::adjacent_outgoing_nodes(node_index node,
const std::function<void(node_index)> &callback) const {
}

void DBGSSHash ::adjacent_incoming_nodes(node_index node,
const std::function<void(node_index)> &callback) const {
}

void DBGSSHash ::call_outgoing_kmers(node_index node,
const OutgoingEdgeCallback &callback) const {
}

void DBGSSHash ::call_incoming_kmers(node_index node,
const IncomingEdgeCallback &callback) const {
}

size_t DBGSSHash::outdegree(node_index node) const {
return 0;
}

bool DBGSSHash::has_single_outgoing(node_index node) const {
return false;
}

bool DBGSSHash::has_multiple_outgoing(node_index node) const {
return false;
}

size_t DBGSSHash::indegree(node_index node) const {
return 0;
}

bool DBGSSHash::has_no_incoming(node_index node) const {
return true;
}

bool DBGSSHash::has_single_incoming(node_index node) const {
return false;
}

void DBGSSHash ::call_kmers(
const std::function<void(node_index, const std::string &)> &callback) const {
}

DBGSSHash::node_index DBGSSHash::kmer_to_node(std::string_view kmer) const {
return 0;
}

std::string DBGSSHash::get_node_sequence(node_index node) const {
return "";
}

void DBGSSHash::serialize(std::ostream &out) const {
}

void DBGSSHash::serialize(const std::string &filename) const {
}

bool DBGSSHash::load(std::istream &in) {
return false;
}

bool DBGSSHash::load(const std::string &filename) {
return false;
}

bool DBGSSHash::operator==(const DeBruijnGraph &other) const {
return false;
}

const std::string DBGSSHash::alphabet_ = "";
const std::string &DBGSSHash::alphabet() const {
return alphabet_;
}

} // namespace graph
} // namespace mtg
91 changes: 91 additions & 0 deletions metagraph/src/graph/representation/hash/dbg_sshash.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#ifndef __DBG_SSHASH_HPP__
#define __DBG_SSHASH_HPP__

#include <iostream>
#include <dictionary.hpp>
#include <tsl/ordered_set.h>

#include "graph/representation/base/sequence_graph.hpp"


namespace mtg {
namespace graph {

class DBGSSHash : public DeBruijnGraph {
public:
explicit DBGSSHash(size_t k) {}

// SequenceGraph overrides
void add_sequence(
std::string_view sequence,
const std::function<void(node_index)> &on_insertion = [](node_index) {}) override;

void map_to_nodes(
std::string_view sequence,
const std::function<void(node_index)> &callback,
const std::function<bool()> &terminate = []() { return false; }) const override;

void map_to_nodes_sequentially(
std::string_view sequence,
const std::function<void(node_index)> &callback,
const std::function<bool()> &terminate = []() { return false; }) const override;

void adjacent_outgoing_nodes(node_index node,
const std::function<void(node_index)> &callback) const override;

void adjacent_incoming_nodes(node_index node,
const std::function<void(node_index)> &callback) const override;

uint64_t num_nodes() const override { return 0; }

bool load(std::istream &in);
bool load(const std::string &filename) override;

void serialize(std::ostream &out) const;
void serialize(const std::string &filename) const override;

static constexpr auto kExtension = ".sshashdbg";
std::string file_extension() const override { return kExtension; }

std::string get_node_sequence(node_index node) const override;

// DeBruijnGraph overrides
size_t get_k() const override { return 0; }

// TODO: add the support for the canonical mode
Mode get_mode() const override { return BASIC; }

node_index traverse(node_index node, char next_char) const override;
node_index traverse_back(node_index node, char prev_char) const override;

void call_kmers(const std::function<void(node_index, const std::string &)> &callback) const override;

size_t outdegree(node_index) const override;
bool has_single_outgoing(node_index) const override;
bool has_multiple_outgoing(node_index) const override;

size_t indegree(node_index) const override;
bool has_no_incoming(node_index) const override;
bool has_single_incoming(node_index) const override;

node_index kmer_to_node(std::string_view kmer) const override;

void call_outgoing_kmers(node_index node,
const OutgoingEdgeCallback &callback) const override;

void call_incoming_kmers(node_index node,
const IncomingEdgeCallback &callback) const override;


bool operator==(const DeBruijnGraph &other) const override;

const std::string &alphabet() const override;

private:
static const std::string alphabet_;
};

} // namespace graph
} // namespace mtg

#endif // __DBG_SSHASH_HPP__
26 changes: 26 additions & 0 deletions metagraph/tests/graph/all/test_dbg_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ template<> size_t max_test_k<DBGHashFast>() {
template<> size_t max_test_k<DBGHashString>() {
return 100;
}
template<> size_t max_test_k<DBGSSHash>() {
return 100;
}

template <class Graph>
std::vector<std::string>
Expand Down Expand Up @@ -104,6 +107,24 @@ build_graph<DBGHashString>(uint64_t k,
return graph;
}

template <>
std::shared_ptr<DeBruijnGraph>
build_graph<DBGSSHash>(uint64_t k,
std::vector<std::string> sequences,
DeBruijnGraph::Mode) {
auto graph = std::make_shared<DBGSSHash>(k);

uint64_t max_index = graph->max_index();

for (const auto &sequence : sequences) {
graph->add_sequence(sequence, [&](auto i) { ASSERT_TRUE(i <= ++max_index); });
}

[&]() { ASSERT_EQ(max_index, graph->max_index()); }();

return graph;
}

template <>
std::shared_ptr<DeBruijnGraph>
build_graph<DBGBitmap>(uint64_t k,
Expand Down Expand Up @@ -283,6 +304,10 @@ template
std::shared_ptr<DeBruijnGraph>
build_graph_batch<DBGHashString>(uint64_t, std::vector<std::string>, DeBruijnGraph::Mode);

template
std::shared_ptr<DeBruijnGraph>
build_graph_batch<DBGSSHash>(uint64_t, std::vector<std::string>, DeBruijnGraph::Mode);

template <>
std::shared_ptr<DeBruijnGraph>
build_graph_batch<DBGBitmap>(uint64_t k,
Expand Down Expand Up @@ -488,6 +513,7 @@ template bool check_graph<DBGBitmap>(const std::string &, DeBruijnGraph::Mode, b
template bool check_graph<DBGHashOrdered>(const std::string &, DeBruijnGraph::Mode, bool);
template bool check_graph<DBGHashFast>(const std::string &, DeBruijnGraph::Mode, bool);
template bool check_graph<DBGHashString>(const std::string &, DeBruijnGraph::Mode, bool);
template bool check_graph<DBGSSHash>(const std::string &, DeBruijnGraph::Mode, bool);

} // namespace test
} // namespace mtg
2 changes: 2 additions & 0 deletions metagraph/tests/graph/all/test_dbg_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "graph/representation/hash/dbg_hash_string.hpp"
#include "graph/representation/hash/dbg_hash_ordered.hpp"
#include "graph/representation/hash/dbg_hash_fast.hpp"
#include "graph/representation/hash/dbg_sshash.hpp"
#include "graph/representation/bitmap/dbg_bitmap.hpp"


Expand Down Expand Up @@ -91,6 +92,7 @@ typedef ::testing::Types<DBGBitmap,
DBGHashString,
DBGHashOrdered,
DBGHashFast,
DBGSSHash,
DBGSuccinct,
DBGSuccinctIndexed<1>,
DBGSuccinctIndexed<2>,
Expand Down
Loading