From a690c8adf068bb188df0cb8b6ec6e32d00762fe3 Mon Sep 17 00:00:00 2001 From: Alex Ehlke Date: Fri, 2 Aug 2024 01:15:25 -0400 Subject: [PATCH] optimizations --- Sources/CharacterReader.swift | 12 +++++------- Sources/Entities.swift | 2 +- Sources/TokeniserState.swift | 14 +++++++------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/Sources/CharacterReader.swift b/Sources/CharacterReader.swift index 1ef8d13..93adc5a 100644 --- a/Sources/CharacterReader.swift +++ b/Sources/CharacterReader.swift @@ -59,14 +59,12 @@ public final class CharacterReader { return String(scalar) } - public func consumeToAny(_ chars: Set) -> String { + public func consumeToAny(_ chars: Set) -> String { let start = pos - let utf8CodeUnits = Set(chars.flatMap { $0.utf8 }) - while pos < input.endIndex { let utf8Byte = input[pos] - if utf8CodeUnits.contains(utf8Byte) { + if chars.contains(utf8Byte) { break } input.formIndex(after: &pos) @@ -276,19 +274,19 @@ public final class CharacterReader { } } - static let dataTerminators: Set = [.Ampersand, .LessThan, TokeniserStateVars.nullScalr] + static let dataTerminators = Set([.Ampersand, .LessThan, TokeniserStateVars.nullScalr].flatMap { $0.utf8 }) public func consumeData() -> String { return consumeToAny(CharacterReader.dataTerminators) } - static let tagNameTerminators: Set = [.BackslashT, .BackslashN, .BackslashR, .BackslashF, .Space, .Slash, .GreaterThan, TokeniserStateVars.nullScalr] + static let tagNameTerminators = Set([.BackslashT, .BackslashN, .BackslashR, .BackslashF, .Space, .Slash, .GreaterThan, TokeniserStateVars.nullScalr].flatMap { $0.utf8 }) public func consumeTagName() -> String { return consumeToAny(CharacterReader.tagNameTerminators) } - public func consumeToAnySorted(_ chars: Set) -> String { + public func consumeToAnySorted(_ chars: Set) -> String { return consumeToAny(chars) } } diff --git a/Sources/Entities.swift b/Sources/Entities.swift index 8e91531..84fbe4e 100644 --- a/Sources/Entities.swift +++ b/Sources/Entities.swift @@ -51,7 +51,7 @@ public class Entities { return left.value != right.value } - private static let codeDelims: Set = Set([",", ";"]) + private static let codeDelims = Set([",", ";"].flatMap { $0.utf8 }) init(string: String, size: Int, id: Int) { diff --git a/Sources/TokeniserState.swift b/Sources/TokeniserState.swift index d58a651..4c0d9fe 100644 --- a/Sources/TokeniserState.swift +++ b/Sources/TokeniserState.swift @@ -15,14 +15,14 @@ protocol TokeniserStateProtocol { public class TokeniserStateVars { public static let nullScalr: UnicodeScalar = "\u{0000}" - static let attributeSingleValueChars = Set(["'", UnicodeScalar.Ampersand, nullScalr]) - static let attributeDoubleValueChars = Set(["\"", UnicodeScalar.Ampersand, nullScalr]) - static let attributeNameChars = Set([UnicodeScalar.BackslashT, "\n", "\r", UnicodeScalar.BackslashF, " ", "/", "=", ">", nullScalr, "\"", "'", UnicodeScalar.LessThan]) - static let attributeValueUnquoted = Set([UnicodeScalar.BackslashT, "\n", "\r", UnicodeScalar.BackslashF, " ", UnicodeScalar.Ampersand, ">", nullScalr, "\"", "'", UnicodeScalar.LessThan, "=", "`"]) + static let attributeSingleValueChars = Set(["'", UnicodeScalar.Ampersand, nullScalr].flatMap { $0.utf8 }) + static let attributeDoubleValueChars = Set(["\"", UnicodeScalar.Ampersand, nullScalr].flatMap { $0.utf8 }) + static let attributeNameChars = Set([UnicodeScalar.BackslashT, "\n", "\r", UnicodeScalar.BackslashF, " ", "/", "=", ">", nullScalr, "\"", "'", UnicodeScalar.LessThan].flatMap { $0.utf8 }) + static let attributeValueUnquoted = Set([UnicodeScalar.BackslashT, "\n", "\r", UnicodeScalar.BackslashF, " ", UnicodeScalar.Ampersand, ">", nullScalr, "\"", "'", UnicodeScalar.LessThan, "=", "`"].flatMap { $0.utf8 }) - static let dataDefaultStopChars: Set = [UnicodeScalar.Ampersand, UnicodeScalar.LessThan, TokeniserStateVars.nullScalr] - static let commentDefaultStopChars: Set = ["-", TokeniserStateVars.nullScalr] - static let readDataDefaultStopChars: Set = [UnicodeScalar.LessThan, TokeniserStateVars.nullScalr] + static let dataDefaultStopChars = Set([UnicodeScalar.Ampersand, UnicodeScalar.LessThan, TokeniserStateVars.nullScalr].flatMap { $0.utf8 }) + static let commentDefaultStopChars = Set(["-", TokeniserStateVars.nullScalr].flatMap { $0.utf8 }) + static let readDataDefaultStopChars = Set([UnicodeScalar.LessThan, TokeniserStateVars.nullScalr].flatMap { $0.utf8 }) static let replacementChar: UnicodeScalar = Tokeniser.replacementChar