Skip to content

Commit

Permalink
Add the too long string names sanitization
Browse files Browse the repository at this point in the history
  • Loading branch information
winseros committed May 17, 2024
1 parent 039216d commit fef04ea
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 28 deletions.
27 changes: 21 additions & 6 deletions pbom/io/bb/__test__/sanitizedstring_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ namespace pboman3::io::test {
SanitizedStringTestParam{"*1*", "%2a1%2a"},
SanitizedStringTestParam{"1///", "1%2f%2f%2f"},
SanitizedStringTestParam{"\\2", "%5c2"},
SanitizedStringTestParam{" ", "%20%20%20%20"}
SanitizedStringTestParam{" ", "%20%20%20%20"},
SanitizedStringTestParam{"1111.", "1111%2e"},
SanitizedStringTestParam{"1111 ", "1111%20"}
));

class SanitizedStringRestrictedKeywordsTest : public testing::TestWithParam<SanitizedStringTestParam> {
Expand All @@ -37,10 +39,23 @@ namespace pboman3::io::test {
}

INSTANTIATE_TEST_SUITE_P(TestSuite, SanitizedStringRestrictedKeywordsTest, testing::Values(
SanitizedStringTestParam{"COM1.c", "^COM1-\\d{1,4}.c"},
SanitizedStringTestParam{"COn", "^COn-\\d{1,4}"},
SanitizedStringTestParam{"COM1", "^COM1-\\d{1,4}"},
SanitizedStringTestParam{"lPt2", "^lPt2-\\d{1,4}"},
SanitizedStringTestParam{"NUL", "^NUL-\\d{1,4}"}
SanitizedStringTestParam{"COM1.c", "^COM1-\\d{1,4}.c"},
SanitizedStringTestParam{"COn", "^COn-\\d{1,4}"},
SanitizedStringTestParam{"COM1", "^COM1-\\d{1,4}"},
SanitizedStringTestParam{"lPt2", "^lPt2-\\d{1,4}"},
SanitizedStringTestParam{"NUL", "^NUL-\\d{1,4}"}
));

class SanitizedStringLengthTest : public testing::TestWithParam<SanitizedStringTestParam> {
};

TEST_P(SanitizedStringLengthTest, Deals_With_Long_Strings) {
SanitizedString ss(GetParam().sourceText, 50);
ASSERT_EQ(static_cast<QString>(ss), GetParam().expectedTextOrPattern);
}

INSTANTIATE_TEST_SUITE_P(TestSuite, SanitizedStringLengthTest, testing::Values(
SanitizedStringTestParam{"123456789a123456789a123456789a123456789a123456789ab",
"123456789a1234-d642eb4f7beba2ee9fda95f3ed39de8~37"}
));
}
113 changes: 92 additions & 21 deletions pbom/io/bb/sanitizedstring.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "sanitizedstring.h"
#include "util/filenames.h"
#include "util/numbers.h"
#include <QRandomGenerator>
#include <QCryptographicHash>

namespace pboman3::io {
const QList<QString> SanitizedString::restrictedFileNames_ = {
Expand All @@ -9,22 +11,72 @@ namespace pboman3::io {
"LPT8", "LPT9"
};

SanitizedString::SanitizedString(const QString& text) {
if (qsizetype firstInvalidCharIndex; needsCharacterSanitization(text, &firstInvalidCharIndex)) {
sanitizedText_ = doCharacterSanitization(text, firstInvalidCharIndex);
} else if (QString keyWord; needsKeywordSanitization(text, &keyWord)) {
sanitizedText_ = doKeywordSanitization(text, keyWord);
} else if (needsWhitespaceSanitization(text)) {
sanitizedText_ = doWhitespaceSanitization(text);
} else {
sanitizedText_ = text;
SanitizedString::SanitizedString(const QString& text, int maxStringLength) {
QString sanitized(text);

if (qsizetype firstInvalidCharIndex; needsCharacterSanitization(sanitized, &firstInvalidCharIndex)) {
sanitized = doCharacterSanitization(sanitized, firstInvalidCharIndex);
} else if (QString keyWord; needsKeywordSanitization(sanitized, &keyWord)) {
sanitized = doKeywordSanitization(sanitized, keyWord);
} else if (needsWhitespaceSanitization(sanitized)) {
sanitized = doWhitespaceSanitization(sanitized);
}

if (QChar ending; needsEndingSanitization(sanitized, &ending)) {
sanitized = doEndingSanitization(sanitized, ending);
}

if (needsLengthSanitization(sanitized, maxStringLength)) {
//such code should run in the beginning of the pipeline to truncate the text before the other processing
//but unescaped binary character sequences make QString to report its length incorrectly
//so the length sanitization works wrong if the string has binary symbols
sanitized = doLengthSanitization(sanitized, maxStringLength);
}

sanitizedText_ = sanitized;
}

SanitizedString::operator const QString&() {
return sanitizedText_;
}

bool SanitizedString::needsLengthSanitization(const QString& text, int maxStringLength) {
return text.length() > maxStringLength;
}

QString SanitizedString::doLengthSanitization(const QString& text, int maxStringLength) {
//turns "a-very-long-string"
//into "a-very-{md5}~{number-chars-chopped}" string

const int md5TextLength = 16 * 2;
const int separatorCharCount = 2;

auto stringCutLength = text.length() - maxStringLength + md5TextLength + separatorCharCount;
auto stringCutDigits = util::NumberUtils::GetNumberOfDigits(stringCutLength);

while (true) {
auto stringCutDigits2 = util::NumberUtils::GetNumberOfDigits(stringCutLength + stringCutDigits);
if (stringCutDigits2 == stringCutDigits) {
break;
}
stringCutDigits = stringCutDigits2;
}
stringCutLength += stringCutDigits;

const auto hashBytes = QCryptographicHash::hash(text.toUtf8(), QCryptographicHash::Algorithm::Md5);

QString name = text.left(text.length() - stringCutLength);
name.reserve(name.length() + md5TextLength + separatorCharCount + stringCutDigits);
name += "-";

for (const auto b: hashBytes) {
name += QString::number(static_cast<unsigned char>(b), 16);
}
name += "~" + QString::number(stringCutLength);

return name;
}

bool SanitizedString::needsCharacterSanitization(const QString& text, qsizetype* firstInvalidCharIndex) {
auto it = text.constBegin();
while (it != text.constEnd()) {
Expand All @@ -37,18 +89,6 @@ namespace pboman3::io {
return false;
}

bool SanitizedString::needsKeywordSanitization(const QString& text, QString* keyword) {
const QString& fileNameWithoutExtension = util::FileNames::getFileNameWithoutExtension(text);
const auto found = std::find_if(restrictedFileNames_.constBegin(), restrictedFileNames_.constEnd(), [&fileNameWithoutExtension](const QString& kwd){
return QString::compare(kwd, fileNameWithoutExtension, Qt::CaseInsensitive) == 0;
});
if (found != restrictedFileNames_.constEnd()){
*keyword = *found;
return true;
}
return false;
}

QString SanitizedString::doCharacterSanitization(const QString& text, qsizetype firstInvalidCharIndex) {
QString sanitized;
sanitized.reserve(static_cast<qsizetype>(static_cast<double>(text.size()) * 1.2));
Expand All @@ -72,6 +112,20 @@ namespace pboman3::io {
return sanitized;
}

bool SanitizedString::needsKeywordSanitization(const QString& text, QString* keyword) {
const QString& fileNameWithoutExtension = util::FileNames::getFileNameWithoutExtension(text);
const auto found = std::find_if(restrictedFileNames_.constBegin(), restrictedFileNames_.constEnd(),
[&fileNameWithoutExtension](const QString& kwd) {
return QString::compare(kwd, fileNameWithoutExtension,
Qt::CaseInsensitive) == 0;
});
if (found != restrictedFileNames_.constEnd()) {
*keyword = *found;
return true;
}
return false;
}

QString SanitizedString::doKeywordSanitization(const QString& text, const QString& keyword) {
QString result(text);
const qint32 rnd = QRandomGenerator::global()->bounded(1000);
Expand All @@ -89,6 +143,23 @@ namespace pboman3::io {
return result;
}

bool SanitizedString::needsEndingSanitization(const QString& text, QChar* ending) {
if (text.endsWith('.')) {
*ending = '.';
return true;
} else if (text.endsWith(' ')) {
*ending = ' ';
return true;
}
return false;
}

QString SanitizedString::doEndingSanitization(const QString& text, const QChar& ending) {
QString result = text.chopped(1);
result.append(sanitizeChar(ending));
return result;
}

bool SanitizedString::isCharLegal(const QChar& chr) {
if (chr == '<' || chr == '>' || chr == ':' || chr == ':' || chr == '"' || chr == '\\' || chr == '/' ||
chr == '|' || chr == '?' || chr == '*' || chr == '{' || chr == '}') {
Expand Down
10 changes: 9 additions & 1 deletion pbom/io/bb/sanitizedstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
namespace pboman3::io {
class SanitizedString {
public:
explicit SanitizedString(const QString& text);
explicit SanitizedString(const QString& text, int maxStringLength = 255);

operator const QString&();

Expand All @@ -15,6 +15,10 @@ namespace pboman3::io {

QString sanitizedText_;

static bool needsLengthSanitization(const QString& text, int maxStringLength);

static QString doLengthSanitization(const QString& text, int maxStringLength);

static bool needsCharacterSanitization(const QString& text, qsizetype* firstInvalidCharIndex);

static QString doCharacterSanitization(const QString& text, qsizetype firstInvalidCharIndex);
Expand All @@ -27,6 +31,10 @@ namespace pboman3::io {

static QString doWhitespaceSanitization(const QString& text);

static bool needsEndingSanitization(const QString& text, QChar* ending);

static QString doEndingSanitization(const QString& text, const QChar& ending);

static bool isCharLegal(const QChar& chr);

static QString sanitizeChar(const QChar& chr);
Expand Down
1 change: 1 addition & 0 deletions pbom/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ set(PROJECT_SOURCES ${PROJECT_SOURCES} PARENT_SCOPE)
list(APPEND TEST_SOURCES
"util/__test__/filenames_test.cpp"
"util/__test__/json_test.cpp"
"util/__test__/numbers_test.cpp"
"util/__test__/qpointerlistiterator_test.cpp")

set(TEST_SOURCES ${TEST_SOURCES} PARENT_SCOPE)
28 changes: 28 additions & 0 deletions pbom/util/__test__/numbers_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include <gtest/gtest.h>
#include <QObject>
#include "util/numbers.h"

namespace pboman3::util::test {
struct NumberUtilsTestParam {
qsizetype value;
int expectedResult;
};

class NumberUtilsTest : public ::testing::TestWithParam<NumberUtilsTestParam> {
};

TEST_P(NumberUtilsTest, GetNumberOfDigits_Returns_Correct_Result) {
const auto param = GetParam();
const auto result = NumberUtils::GetNumberOfDigits(param.value);
ASSERT_EQ(result, param.expectedResult);
}

INSTANTIATE_TEST_SUITE_P(TestSuite, NumberUtilsTest, ::testing::Values(
NumberUtilsTestParam{0, 1},
NumberUtilsTestParam{1, 1},
NumberUtilsTestParam{10, 2},
NumberUtilsTestParam{99, 2},
NumberUtilsTestParam{100, 3},
NumberUtilsTestParam{101, 3}
));
}
18 changes: 18 additions & 0 deletions pbom/util/numbers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#include <concepts>

namespace pboman3::util {
class NumberUtils {
public:
template<std::integral T>
[[nodiscard]] static int GetNumberOfDigits(T number) {
int res = 1;
while (number >= 10) {
number = number * 0.1;
res++;
}
return res;
}
};
}

0 comments on commit fef04ea

Please sign in to comment.