Skip to content

Commit

Permalink
Add padding module
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-steinegger committed Dec 6, 2023
1 parent f5d8483 commit 1806c0c
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/CommandDeclarations.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ extern int convertkb(int argc, const char **argv, const Command& command);
extern int convertmsa(int argc, const char **argv, const Command& command);
extern int convertprofiledb(int argc, const char **argv, const Command& command);
extern int createdb(int argc, const char **argv, const Command& command);
extern int makepaddedseqdb(int argc, const char **argv, const Command& command);
extern int createindex(int argc, const char **argv, const Command& command);
extern int createlinindex(int argc, const char **argv, const Command& command);
extern int createseqfiledb(int argc, const char **argv, const Command& command);
Expand Down
9 changes: 8 additions & 1 deletion src/MMseqsBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,14 +130,21 @@ std::vector<Command> baseCommands = {
"<i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB>",
CITATION_MMSEQS2, {{"fast[a|q]File[.gz|bz2]|stdin", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA | DbType::VARIADIC, &DbValidator::flatfileStdinAndGeneric },
{"sequenceDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::flatfile }}},
{"makepaddedseqdb", makepaddedseqdb, &par.onlyverbosity, COMMAND_HIDDEN,
"Generate a padded sequence DB",
"Generate a padded sequence DB",
"Martin Steinegger <[email protected]>",
"<i:sequenceDB> <o:sequenceDB>",
CITATION_MMSEQS2, {{"sequenceDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA|DbType::NEED_HEADER, &DbValidator::sequenceDb },
{"sequenceIndexDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::sequenceDb }}},
{"appenddbtoindex", appenddbtoindex, &par.appenddbtoindex, COMMAND_HIDDEN,
NULL,
NULL,
"Milot Mirdita <[email protected]>",
"<i:DB1> ... <i:DBN> <o:DB>",
CITATION_MMSEQS2, {{"DB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA | DbType::VARIADIC, &DbValidator::allDb },
{"DB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::allDb }}},
{"indexdb", indexdb, &par.indexdb, COMMAND_HIDDEN,
{"indexdb", indexdb, &par.indexdb, COMMAND_HIDDEN,
NULL,
NULL,
"Martin Steinegger <[email protected]>",
Expand Down
1 change: 1 addition & 0 deletions src/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ set(util_source_files
util/filterdb.cpp
util/gff2db.cpp
util/renamedbkeys.cpp
util/makepaddedseqdb.cpp
util/masksequence.cpp
util/maskbygff.cpp
util/mergeclusters.cpp
Expand Down
31 changes: 31 additions & 0 deletions src/util/makepaddedseqdb.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "Parameters.h"
#include "DBReader.h"
#include "DBWriter.h"
#include "Debug.h"
#include "Util.h"

int makepaddedseqdb(int argc, const char **argv, const Command &command) {
Parameters &par = Parameters::getInstance();
par.parseParameters(argc, argv, command, true, 0, 0);
DBReader<unsigned int> dbr(par.db1.c_str(), par.db1Index.c_str(), 1,
DBReader<unsigned int>::USE_INDEX | DBReader<unsigned int>::USE_DATA);
dbr.open(DBReader<unsigned int>::SORT_BY_LENGTH);
DBWriter writer(par.db2.c_str(), par.db2Index.c_str(), 1, false, dbr.getDbtype());
writer.open();
std::string result;
const int ALIGN = 4;
for (long id = dbr.getSize() - 1; id >= 0; id--) {
unsigned int key = dbr.getDbKey(id);
char *data = dbr.getData(id, 0);
size_t seqLen = dbr.getSeqLen(id);
const size_t sequencepadding = (seqLen % ALIGN == 0) ? 0 : ALIGN - seqLen % ALIGN;
result.append(data, seqLen);
result.append(sequencepadding, ' ');
writer.writeData(data, seqLen + sequencepadding, key, 0, false);
}
writer.close(true);
DBReader<unsigned int>::softlinkDb(par.db1, par.db2, DBFiles::SEQUENCE_ANCILLARY);

dbr.close();
return EXIT_SUCCESS;
}

0 comments on commit 1806c0c

Please sign in to comment.