Skip to content
This repository has been archived by the owner on Apr 4, 2024. It is now read-only.

LiteralString #41

Closed
SDelgado-21 opened this issue Mar 15, 2024 · 2 comments
Closed

LiteralString #41

SDelgado-21 opened this issue Mar 15, 2024 · 2 comments
Assignees

Comments

@SDelgado-21
Copy link
Collaborator

From Epic: inline snapshot #8
Implementation:

private const val TRIPLE_QUOTE = "\"\"\""
private const val KOTLIN_DOLLAR = "\${'\$'}"
private const val KOTLIN_DOLLARQUOTE = "\${'\"'}"
internal object LiteralString : LiteralFormat<String>() {
override fun encode(
value: String,
language: Language,
encodingPolicy: EscapeLeadingWhitespace
): String =
if (value.indexOf('\n') == -1)
when (language) {
Language.SCALA, // scala only does $ substitution for s" and f" strings
Language.JAVA_PRE15,
Language.GROOVY,
Language.JAVA -> encodeSingleJava(value)
Language.KOTLIN -> encodeSingleJavaWithDollars(value)
}
else
when (language) {
// TODO: support triple-quoted strings in scala
// https://github.com/diffplug/selfie/issues/106
Language.SCALA,
// TODO: support triple-quoted strings in groovy
// https://github.com/diffplug/selfie/issues/105
Language.GROOVY,
Language.JAVA_PRE15 -> encodeSingleJava(value)
Language.JAVA -> encodeMultiJava(value, encodingPolicy)
Language.KOTLIN -> encodeMultiKotlin(value, encodingPolicy)
}
override fun parse(str: String, language: Language): String =
if (!str.startsWith(TRIPLE_QUOTE))
when (language) {
Language.SCALA,
Language.JAVA_PRE15,
Language.JAVA -> parseSingleJava(str)
Language.GROOVY,
Language.KOTLIN -> parseSingleJavaWithDollars(str)
}
else
when (language) {
Language.SCALA ->
throw UnsupportedOperationException(
"Selfie doesn't support triple-quoted strings in Scala, yet - help wanted: https://github.com/diffplug/selfie/issues/106")
Language.GROOVY ->
throw UnsupportedOperationException(
"Selfie doesn't support triple-quoted strings in Groovy, yet - help wanted: https://github.com/diffplug/selfie/issues/105")
Language.JAVA_PRE15,
Language.JAVA -> parseMultiJava(str)
Language.KOTLIN -> parseMultiKotlin(str)
}
fun encodeSingleJava(value: String): String = encodeSingleJavaish(value, false)
fun encodeSingleJavaWithDollars(value: String) = encodeSingleJavaish(value, true)
private fun encodeSingleJavaish(value: String, escapeDollars: Boolean): String {
val source = StringBuilder()
source.append("\"")
for (char in value) {
when (char) {
'\b' -> source.append("\\b")
'\n' -> source.append("\\n")
'\r' -> source.append("\\r")
'\t' -> source.append("\\t")
'\"' -> source.append("\\\"")
'\\' -> source.append("\\\\")
'$' -> if (escapeDollars) source.append(KOTLIN_DOLLAR) else source.append('$')
else ->
if (isControlChar(char)) {
source.append("\\u")
source.append(char.code.toString(16).padStart(4, '0'))
} else {
source.append(char)
}
}
}
source.append("\"")
return source.toString()
}
private fun isControlChar(c: Char): Boolean {
return c in '\u0000'..'\u001F' || c == '\u007F'
}
fun parseSingleJava(sourceWithQuotes: String) = parseSingleJavaish(sourceWithQuotes, false)
fun parseSingleJavaWithDollars(sourceWithQuotes: String) =
parseSingleJavaish(sourceWithQuotes, true)
private fun parseSingleJavaish(sourceWithQuotes: String, removeDollars: Boolean): String {
check(sourceWithQuotes.startsWith('"'))
check(sourceWithQuotes.endsWith('"'))
val source = sourceWithQuotes.substring(1, sourceWithQuotes.length - 1)
val toUnescape = if (removeDollars) inlineDollars(source) else source
return unescapeJava(toUnescape)
}
fun encodeMultiKotlin(arg: String, escapeLeadingWhitespace: EscapeLeadingWhitespace): String {
val escapeDollars = arg.replace("$", KOTLIN_DOLLAR)
val escapeTripleQuotes =
escapeDollars.replace(
TRIPLE_QUOTE, "$KOTLIN_DOLLARQUOTE$KOTLIN_DOLLARQUOTE$KOTLIN_DOLLARQUOTE")
val protectWhitespace =
escapeTripleQuotes.lines().joinToString("\n") { line ->
val protectTrailingWhitespace =
if (line.endsWith(" ")) {
line.dropLast(1) + "\${' '}"
} else if (line.endsWith("\t")) {
line.dropLast(1) + "\${'\\t'}"
} else line
escapeLeadingWhitespace.escapeLine(protectTrailingWhitespace, "\${' '}", "\${'\\t'}")
}
return "$TRIPLE_QUOTE$protectWhitespace$TRIPLE_QUOTE"
}
fun encodeMultiJava(arg: String, escapeLeadingWhitespace: EscapeLeadingWhitespace): String {
val escapeBackslashes = arg.replace("\\", "\\\\")
val escapeTripleQuotes = escapeBackslashes.replace(TRIPLE_QUOTE, "\\\"\\\"\\\"")
var protectWhitespace =
escapeTripleQuotes.lines().joinToString("\n") { line ->
val protectTrailingWhitespace =
if (line.endsWith(" ")) {
line.dropLast(1) + "\\s"
} else if (line.endsWith("\t")) {
line.dropLast(1) + "\\t"
} else line
escapeLeadingWhitespace.escapeLine(protectTrailingWhitespace, "\\s", "\\t")
}
val commonPrefix =
protectWhitespace
.lines()
.mapNotNull { line ->
if (line.isNotBlank()) line.takeWhile { it.isWhitespace() } else null
}
.minOrNull() ?: ""
if (commonPrefix.isNotEmpty()) {
val lines = protectWhitespace.lines()
val last = lines.last()
protectWhitespace =
lines.joinToString("\n") { line ->
if (line === last) {
if (line.startsWith(" ")) "\\s${line.drop(1)}"
else if (line.startsWith("\t")) "\\t${line.drop(1)}"
else
throw UnsupportedOperationException(
"How did it end up with a common whitespace prefix?")
} else line
}
}
return "$TRIPLE_QUOTE\n$protectWhitespace$TRIPLE_QUOTE"
}
private val charLiteralRegex = """\$\{'(\\?.)'\}""".toRegex()
private fun inlineDollars(source: String): String {
if (source.indexOf('$') == -1) {
return source
}
return charLiteralRegex.replace(source) { matchResult ->
val charLiteral = matchResult.groupValues[1]
when {
charLiteral.length == 1 -> charLiteral
charLiteral.length == 2 && charLiteral[0] == '\\' ->
when (charLiteral[1]) {
't' -> "\t"
'b' -> "\b"
'n' -> "\n"
'r' -> "\r"
'\'' -> "'"
'\\' -> "\\"
'$' -> "$"
else -> charLiteral
}
else -> throw IllegalArgumentException("Unknown character literal $charLiteral")
}
}
}
private fun unescapeJava(source: String): String {
val firstEscape = source.indexOf('\\')
if (firstEscape == -1) {
return source
}
val value = StringBuilder()
value.append(source.substring(0, firstEscape))
var i = firstEscape
while (i < source.length) {
var c = source[i]
if (c == '\\') {
i++
c = source[i]
when (c) {
'\"' -> value.append('\"')
'\\' -> value.append('\\')
'b' -> value.append('\b')
'f' -> value.append('\u000c')
'n' -> value.append('\n')
'r' -> value.append('\r')
's' -> value.append(' ')
't' -> value.append('\t')
'u' -> {
val code = source.substring(i + 1, i + 5).toInt(16)
value.append(code.toChar())
i += 4
}
else -> throw IllegalArgumentException("Unknown escape sequence $c")
}
} else {
value.append(c)
}
i++
}
return value.toString()
}
fun parseMultiJava(sourceWithQuotes: String): String {
check(sourceWithQuotes.startsWith("$TRIPLE_QUOTE\n"))
check(sourceWithQuotes.endsWith(TRIPLE_QUOTE))
val source =
sourceWithQuotes.substring(
TRIPLE_QUOTE.length + 1, sourceWithQuotes.length - TRIPLE_QUOTE.length)
val lines = source.lines()
val commonPrefix =
lines
.mapNotNull { line ->
if (line.isNotBlank()) line.takeWhile { it.isWhitespace() } else null
}
.minOrNull() ?: ""
return lines.joinToString("\n") { line ->
if (line.isBlank()) {
""
} else {
val removedPrefix = if (commonPrefix.isEmpty()) line else line.removePrefix(commonPrefix)
val removeTrailingWhitespace = removedPrefix.trimEnd()
val handleEscapeSequences = unescapeJava(removeTrailingWhitespace)
handleEscapeSequences
}
}
}
fun parseMultiKotlin(sourceWithQuotes: String): String {
check(sourceWithQuotes.startsWith(TRIPLE_QUOTE))
check(sourceWithQuotes.endsWith(TRIPLE_QUOTE))
val source =
sourceWithQuotes.substring(
TRIPLE_QUOTE.length, sourceWithQuotes.length - TRIPLE_QUOTE.length)
return inlineDollars(source)
}
}

Test:
class LiteralStringTest {
@Test
fun encodeSingleJava() {
encodeSingleJava("1", "'1'")
encodeSingleJava("\\", "'\\\\'")
encodeSingleJava("1\n\tABC", "'1\\n\\tABC'")
}
private fun encodeSingleJava(value: String, expected: String) {
val actual = LiteralString.encodeSingleJava(value)
actual shouldBe expected.replace("'", "\"")
}
@Test
fun encodeSingleJavaWithDollars() {
encodeSingleJavaWithDollars("1", "`1`")
encodeSingleJavaWithDollars("\\", "`\\\\`")
encodeSingleJavaWithDollars("$", "`s{'s'}`".replace('s', '$'))
encodeSingleJavaWithDollars("1\n\tABC", "`1\\n\\tABC`")
}
private fun encodeSingleJavaWithDollars(value: String, expected: String) {
val actual = LiteralString.encodeSingleJavaWithDollars(value)
actual shouldBe expected.replace("`", "\"")
}
@Test
fun encodeMultiJava() {
encodeMultiJava("1", "'''\n1'''")
encodeMultiJava("\\", "'''\n\\\\'''")
encodeMultiJava(" leading\ntrailing ", "'''\n" + "\\s leading\n" + "trailing \\s'''")
}
private fun encodeMultiJava(value: String, expected: String) {
val actual = LiteralString.encodeMultiJava(value, EscapeLeadingWhitespace.ALWAYS)
actual shouldBe expected.replace("'", "\"")
}
private val KOTLIN_DOLLAR = "s{'s'}".replace('s', '$')
@Test
fun encodeMultiKotlin() {
encodeMultiKotlin("1", "```1```")
encodeMultiKotlin("$", "```$KOTLIN_DOLLAR```")
}
private fun encodeMultiKotlin(value: String, expected: String) {
val actual = LiteralString.encodeMultiKotlin(value, EscapeLeadingWhitespace.ALWAYS)
actual shouldBe expected.replace("`", "\"")
}
@Test
fun parseSingleJava() {
parseSingleJava("1", "1")
parseSingleJava("\\\\", "\\")
parseSingleJava("1\\n\\tABC", "1\n\tABC")
}
private fun parseSingleJava(value: String, expected: String) {
val actual = LiteralString.parseSingleJava("\"${value.replace("'", "\"")}\"")
actual shouldBe expected
}
@Test
fun parseMultiJava() {
parseMultiJava("\n123\nabc", "123\nabc")
parseMultiJava("\n 123\n abc", "123\nabc")
parseMultiJava("\n 123 \n abc\t", "123\nabc")
parseMultiJava("\n 123 \n abc\t", "123\nabc")
parseMultiJava("\n 123 \\s\n abc\t\\s", "123 \nabc\t ")
}
private fun parseMultiJava(value: String, expected: String) {
val actual = LiteralString.parseMultiJava("\"\"\"${value.replace("'", "\"")}\"\"\"")
actual shouldBe expected
}
@Test
fun parseSingleJavaWithDollars() {
parseSingleJavaWithDollars("1", "1")
parseSingleJavaWithDollars("\\\\", "\\")
parseSingleJavaWithDollars("s{'s'}".replace('s', '$'), "$")
parseSingleJavaWithDollars("1\\n\\tABC", "1\n\tABC")
}
private fun parseSingleJavaWithDollars(value: String, expected: String) {
val actual = LiteralString.parseSingleJavaWithDollars("\"${value}\"")
actual shouldBe expected
}
}

@nedtwigg
Copy link
Member

Note that Python strings are fairly different from Java and Kotlin. Should be a little bit easier than the Kotlin code, but it means that straight-up porting won't work as well.

@nedtwigg nedtwigg reopened this Mar 15, 2024
@nedtwigg
Copy link
Member

nedtwigg commented Apr 4, 2024

@nedtwigg nedtwigg closed this as completed Apr 4, 2024
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants