Skip to content

Commit

Permalink
Add indexOf with offset, fix codepoints/surrogates
Browse files Browse the repository at this point in the history
  • Loading branch information
kohlschuetter committed Jan 1, 2024
1 parent aed3c00 commit f63a258
Show file tree
Hide file tree
Showing 2 changed files with 257 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -725,15 +725,46 @@ default boolean isCacheable() {
* @return The position, or {@code -1} if not found.
*/
default int indexOf(int c) {
return indexOf(c, 0);
}

/**
* Returns the index within this string of the first occurrence of the specified
* character/codepoint, starting with the given character offset, or {@code -1} if not found.
*
* @param c The character/codepoint to look for.
* @param start The character offset.
* @return The position, or {@code -1} if not found.
*/
default int indexOf(int c, int start) {
if (isString()) {
return toString().indexOf(c);
return toString().indexOf(c, start);
} else if (isKnownEmpty()) {
return -1;
}

boolean isSurrogatePair = c > 0xFFFF;

int i = 0;
for (PrimitiveIterator.OfInt it = codePoints().iterator(); it.hasNext(); i++) {
int ch = it.next();
int next = -1;
for (PrimitiveIterator.OfInt it = chars().skip(start).iterator(); next != -1 || it
.hasNext(); i++) {
int ch;
if (next != -1) {
ch = next;
next = -1;
} else {
ch = it.nextInt();
}
if (isSurrogatePair && Character.isHighSurrogate((char) ch) && it.hasNext()) {
char ch2 = (char) it.nextInt();
if (Character.isLowSurrogate(ch2)) {
ch = Character.toCodePoint((char) ch, ch2);
} else {
// We detected an invalid UTF-8 surrogate, restart search from here
next = ch2;
}
}
if (ch == c) {
return i;
}
Expand All @@ -750,12 +781,23 @@ default int indexOf(int c) {
*/
@SuppressWarnings("PMD.CognitiveComplexity")
default int indexOf(CharSequence str) {
if (str == this) { // NOPMD.CompareObjectsWithEquals
return 0;
}
return indexOf(str, 0);
}

if (CharSequenceReleaseShim.isEmpty(str)) {
return 0;
/**
* Returns the index within this StringHolder of the first occurrence of the specified
* CharSequence, starting with the given character offset, or {@code -1} if not found.
*
* @param str The char sequence to look for.
* @param start The character offset.
* @return The position, or {@code -1} if not found.
*/
@SuppressWarnings({"PMD.CognitiveComplexity", "PMD.CyclomaticComplexity", "PMD.NPathComplexity"})
default int indexOf(CharSequence str, int start) {
if (str == this) { // NOPMD.CompareObjectsWithEquals
return start == 0 || isEmpty() ? 0 : -1;
} else if (CharSequenceReleaseShim.isEmpty(str)) {
return Math.min(start, length());
} else if (isKnownEmpty()) {
return -1;
}
Expand All @@ -768,19 +810,29 @@ default int indexOf(CharSequence str) {
}

int strLen = str.length();
if (isLengthKnown() && length() < strLen) {
if (isLengthKnown() && length() < (strLen + start)) {
return -1;
}

char firstChar = str.charAt(0);
if (str.length() == 1) {
return indexOf(firstChar);
char firstChar = str.charAt(start);
switch (strLen) {
case 1:
return indexOf(firstChar);
case 2:
char secondChar = str.charAt(start + 1);
if (Character.isSurrogatePair(firstChar, secondChar)) {
return indexOf(Character.toCodePoint(firstChar, secondChar), start);
}
break;
default:
// continue below
break;
}

int max = length() - strLen;
int max = length() - strLen - start;

boolean found = false;
loop : for (int i = 0; i <= max; i++) {
loop : for (int i = start; i <= max; i++) {
char myChar = charAt(i);
if (myChar != firstChar) {
// seek ahead
Expand Down Expand Up @@ -809,7 +861,7 @@ default int indexOf(CharSequence str) {

/**
* Checks if this {@link StringHolder} contains the given {@link CharSequence}.
*
*
* @param s The char sequence to look for.
* @return {@code true} if found (also if the sequence is empty).
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1331,13 +1331,8 @@ public void testIndexOfCharSequenceSingleChar() throws Exception {
.append(StringHolder.withSupplier(() -> "bar")).indexOf(" "));
}

@Test
public void testIndexOfEmptyCharSequence() throws Exception {
assertEquals(0, "".indexOf(""));
assertEquals(0, "Foo bar".indexOf(""));

assertEquals(0, StringHolder.withContent("Foo bar").indexOf(""));
assertEquals(0, StringHolder.withContent("Foo bar").indexOf(new CharSequence() {
private static CharSequence newCustomEmptyCharSequence() {
return new CharSequence() {

@Override
public CharSequence subSequence(int start, int end) {
Expand All @@ -1353,13 +1348,30 @@ public int length() {
public char charAt(int index) {
return (char) -1;
}
}));
};
}

@Test
public void testIndexOfEmptyCharSequence() throws Exception {
assertEquals(0, "".indexOf(""));
assertEquals(0, "".indexOf("", 1));
assertEquals(0, "".indexOf("", 2));
assertEquals(0, "Foo bar".indexOf(""));
assertEquals(1, "Foo bar".indexOf("", 1));

assertEquals(0, StringHolder.withContent("Foo bar").indexOf(""));
assertEquals(1, StringHolder.withContent("Foo bar").indexOf("", 1));
assertEquals(0, StringHolder.withContent("Foo bar").indexOf(newCustomEmptyCharSequence()));
assertEquals(1, StringHolder.withContent("Foo bar").indexOf(newCustomEmptyCharSequence(), 1));

assertEquals(0, StringHolder.withContent("").indexOf(""));
assertEquals(0, StringHolder.withContent("").indexOf("", 1));
assertEquals(0, StringHolder.withContent(new StringBuilder()).indexOf(""));
assertEquals(0, StringHolder.withSupplierFixedLength(0, () -> "").indexOf(""));
assertEquals(0, emptyStringHolderButNotAString(false).indexOf(""));
assertEquals(0, emptyStringHolderButNotAString(true).indexOf(""));
assertEquals(0, emptyStringHolderButNotAString(false).indexOf("", 1));
assertEquals(0, emptyStringHolderButNotAString(true).indexOf("", 1));

assertEquals(0, StringHolder.newSequence().indexOf(""));
assertEquals(0, StringHolder.newSequence().append(StringHolder.withContent("")).indexOf(""));
Expand Down Expand Up @@ -1416,9 +1428,11 @@ public void testIndexOfSelf() throws Exception {

sh = StringHolder.withContent("Foo bar");
assertEquals(0, sh.indexOf(sh));
assertEquals(-1, sh.indexOf(sh, 1));

sh = StringHolder.withContent("");
assertEquals(0, sh.indexOf(sh));
assertEquals(0, sh.indexOf(sh, 1)); // note that behavior is different for empty strings

sh = StringHolder.withSupplier(() -> "");
assertEquals(0, sh.indexOf(sh));
Expand All @@ -1434,4 +1448,172 @@ public void testContains() throws Exception {
public void testIndexOfPartialMatch() {
assertEquals(4, StringHolder.withContent("Foo Fobar").indexOf(new StringBuilder("Fobar")));
}

@Test
public void testIndexOfSurrogatePairNative() throws Exception {
int cat = 0x1f408;
int catHigh = Character.highSurrogate(cat);
int catLow = Character.lowSurrogate(cat);
String catStr = "\ud83d\udc08";
String catStrInvalidReversed = "\udc08\ud83d";

String testStr;

testStr = "\ud83d\udc08";
assertEquals(0, testStr.indexOf(testStr));
assertEquals(0, testStr.indexOf(catStr));
assertEquals(-1, testStr.indexOf(catStrInvalidReversed));
assertEquals(0, testStr.indexOf(cat));
assertEquals(0, testStr.indexOf(catHigh));
assertEquals(1, testStr.indexOf(catLow));
assertEquals(-1, testStr.indexOf(testStr, 1));
assertEquals(-1, testStr.indexOf(cat, 1));
assertEquals(1, testStr.indexOf(catLow, 1));
assertEquals(-1, testStr.indexOf(cat, 2));
assertEquals(-1, testStr.indexOf(catHigh, 2));
assertEquals(-1, testStr.indexOf(catLow, 2));

testStr = "c\ud83d\udc08at";
assertEquals(0, testStr.indexOf(testStr));
assertEquals(1, testStr.indexOf(catStr));
assertEquals(-1, testStr.indexOf(catStrInvalidReversed));
assertEquals(1, testStr.indexOf(cat));
assertEquals(1, testStr.indexOf(catHigh));
assertEquals(2, testStr.indexOf(catLow));
assertEquals(-1, testStr.indexOf(testStr, 1));
assertEquals(1, testStr.indexOf(cat, 1));
assertEquals(2, testStr.indexOf(catLow, 1));
assertEquals(-1, testStr.indexOf(cat, 2));
assertEquals(-1, testStr.indexOf(catHigh, 2));
assertEquals(2, testStr.indexOf(catLow, 2));

testStr = "\ud83d\ud83d\udc08at";
assertEquals(0, testStr.indexOf(testStr));
assertEquals(1, testStr.indexOf(catStr));
assertEquals(-1, testStr.indexOf(catStrInvalidReversed));
assertEquals(1, testStr.indexOf(cat));
assertEquals(0, testStr.indexOf(catHigh));
assertEquals(1, testStr.indexOf(catHigh, 1));
assertEquals(2, testStr.indexOf(catLow));
assertEquals(-1, testStr.indexOf(testStr, 1));
assertEquals(1, testStr.indexOf(cat, 1));
assertEquals(2, testStr.indexOf(catLow, 1));
assertEquals(-1, testStr.indexOf(cat, 2));
assertEquals(-1, testStr.indexOf(catHigh, 2));
assertEquals(2, testStr.indexOf(catLow, 2));

testStr = "\ud83dX\ud83d";
assertEquals(0, testStr.indexOf(testStr));
assertEquals(-1, testStr.indexOf(catStr));
assertEquals(-1, testStr.indexOf(catStrInvalidReversed));
assertEquals(-1, testStr.indexOf(cat));
assertEquals(0, testStr.indexOf(catHigh));
assertEquals(2, testStr.indexOf(catHigh, 1));
assertEquals(-1, testStr.indexOf(catLow));
assertEquals(-1, testStr.indexOf(testStr, 1));
assertEquals(-1, testStr.indexOf(cat, 1));
assertEquals(-1, testStr.indexOf(catLow, 1));
assertEquals(-1, testStr.indexOf(cat, 2));
assertEquals(2, testStr.indexOf(catHigh, 2));
assertEquals(-1, testStr.indexOf(catLow, 2));

testStr = "\udc08\ud83d"; // wrong order of surrogate pair
assertEquals(0, testStr.indexOf(testStr));
assertEquals(-1, testStr.indexOf(catStr));
assertEquals(0, testStr.indexOf(catStrInvalidReversed)); // 2 individual (albeit invalid) chars
assertEquals(-1, testStr.indexOf(cat));
assertEquals(1, testStr.indexOf(catHigh));
assertEquals(0, testStr.indexOf(catLow));
assertEquals(-1, testStr.indexOf(testStr, 1));
assertEquals(-1, testStr.indexOf(cat, 1));
assertEquals(-1, testStr.indexOf(catLow, 1));
assertEquals(-1, testStr.indexOf(cat, 2));
assertEquals(-1, testStr.indexOf(catHigh, 2));
assertEquals(-1, testStr.indexOf(catLow, 2));
}

@Test
public void testIndexOfSurrogatePair() throws Exception {
int cat = 0x1f408;
int catHigh = Character.highSurrogate(cat);
int catLow = Character.lowSurrogate(cat);
String catStr = "\ud83d\udc08";
String catStrInvalidReversed = "\udc08\ud83d";

StringHolder testStr;

testStr = StringHolder.withSupplier(() -> "\ud83d\udc08");
assertEquals(0, testStr.indexOf(testStr));
assertEquals(0, testStr.indexOf(catStr));
assertEquals(-1, testStr.indexOf(catStrInvalidReversed));
assertEquals(0, testStr.indexOf(cat));
assertEquals(0, testStr.indexOf(catHigh));
assertEquals(1, testStr.indexOf(catLow));
assertEquals(-1, testStr.indexOf(testStr, 1));
assertEquals(-1, testStr.indexOf(cat, 1));
assertEquals(1, testStr.indexOf(catLow, 1));
assertEquals(-1, testStr.indexOf(cat, 2));
assertEquals(-1, testStr.indexOf(catHigh, 2));
assertEquals(-1, testStr.indexOf(catLow, 2));

testStr = StringHolder.withSupplier(() -> "c\ud83d\udc08at");
assertEquals(0, testStr.indexOf(testStr));
assertEquals(1, testStr.indexOf(catStr));
assertEquals(-1, testStr.indexOf(catStrInvalidReversed));
assertEquals(1, testStr.indexOf(cat));
assertEquals(1, testStr.indexOf(catHigh));
assertEquals(2, testStr.indexOf(catLow));
assertEquals(-1, testStr.indexOf(testStr, 1));
assertEquals(1, testStr.indexOf(cat, 1));
assertEquals(2, testStr.indexOf(catLow, 1));
assertEquals(-1, testStr.indexOf(cat, 2));
assertEquals(-1, testStr.indexOf(catHigh, 2));
assertEquals(2, testStr.indexOf(catLow, 2));

testStr = StringHolder.withSupplier(() -> "\ud83d\ud83d\udc08at");
assertEquals(0, testStr.indexOf(testStr));
assertEquals(1, testStr.indexOf(catStr));
assertEquals(-1, testStr.indexOf(catStrInvalidReversed));
assertEquals(1, testStr.indexOf(cat));
assertEquals(0, testStr.indexOf(catHigh));
assertEquals(1, testStr.indexOf(catHigh, 1));
assertEquals(2, testStr.indexOf(catLow));
assertEquals(-1, testStr.indexOf(testStr, 1));
assertEquals(1, testStr.indexOf(cat, 1));
assertEquals(2, testStr.indexOf(catLow, 1));
assertEquals(-1, testStr.indexOf(cat, 2));
assertEquals(-1, testStr.indexOf(catHigh, 2));
assertEquals(2, testStr.indexOf(catLow, 2));

testStr = StringHolder.withSupplier(() -> "\ud83dX\ud83d");
assertEquals(0, testStr.indexOf(testStr));
assertEquals(-1, testStr.indexOf(catStr));
assertEquals(-1, testStr.indexOf(catStrInvalidReversed));
assertEquals(-1, testStr.indexOf(cat));
assertEquals(0, testStr.indexOf(catHigh));
assertEquals(2, testStr.indexOf(catHigh, 1));
assertEquals(-1, testStr.indexOf(catLow));
assertEquals(-1, testStr.indexOf(testStr, 1));
assertEquals(-1, testStr.indexOf(cat, 1));
assertEquals(-1, testStr.indexOf(catLow, 1));
assertEquals(-1, testStr.indexOf(cat, 2));
assertEquals(2, testStr.indexOf(catHigh, 2));
assertEquals(-1, testStr.indexOf(catLow, 2));

testStr = StringHolder.withSupplier(() -> "\udc08\ud83d"); // wrong order of surrogate pair
assertEquals(0, testStr.indexOf(testStr));
assertEquals(-1, testStr.indexOf(catStr));
assertEquals(0, testStr.indexOf(catStrInvalidReversed)); // 2 individual (albeit invalid) chars
testStr = StringHolder.withSupplier(() -> "\udc08\ud83d"); // fresh instance
assertEquals(0, testStr.indexOf(catStrInvalidReversed)); // 2 individual (albeit invalid) chars
assertEquals(-1, testStr.indexOf(cat));
assertEquals(1, testStr.indexOf(catHigh));
assertEquals(0, testStr.indexOf(catLow));
assertEquals(-1, testStr.indexOf(testStr, 1));
assertEquals(-1, testStr.indexOf(cat, 1));
assertEquals(-1, testStr.indexOf(catLow, 1));
assertEquals(-1, testStr.indexOf(cat, 2));
assertEquals(-1, testStr.indexOf(catHigh, 2));
assertEquals(-1, testStr.indexOf(catLow, 2));
}
}

0 comments on commit f63a258

Please sign in to comment.