-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathto_remove.js
148 lines (139 loc) · 4.46 KB
/
to_remove.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// I keep this unneeded code there because I can, who're we to force code to go
// away?
// Yeah I know VCS exist but it is the inferior paradigm compared to commenting
// out code and/or moving it elsewhere.
//
// Jokes aside, this file is just here for helping with brainstorming.
const CHAR_CODE_UPPER_A = "A".charCodeAt(0);
const CHAR_CODE_UPPER_Z = "Z".charCodeAt(0);
const CHAR_CODE_LOWER_A = "a".charCodeAt(0);
const CHAR_CODE_LOWER_Z = "z".charCodeAt(0);
const CHAR_CODE_0 = "0".charCodeAt(0);
const CHAR_CODE_9 = "9".charCodeAt(0);
const CHAR_CODE_DASH = "-".charCodeAt(0);
const CHAR_CODE_UNDERSCORE = "_".charCodeAt(0);
const CHAR_CODE_COLON = ":".charCodeAt(0);
const CHAR_CODE_DOT = ".".charCodeAt(0);
/* eslint-disable max-len */
/**
* NOTE: Thrown away because no point when relying on `document.createElement`
* and HTML5 isn't even based on SGML smh - there's your response past me.
*
* As per SGML:
* ID and NAME tokens must begin with a letter ([A-Za-z]) and may be followed
* by any number of letters, digits ([0-9]), hyphens ("-"), underscores ("_"),
* colons (":"), and periods (".").
*
* NOTE: Browsers seem to be much more tolerant I don't know why yet.
*
* Both Firefox:
* https://searchfox.org/mozilla-central/rev/fb43eb3bdf5b51000bc7dfe3474cbe56ca2ab63c/parser/expat/lib/moz_extensions.c#27
*
* and Chrome:
* https://source.chromium.org/chromium/chromium/src/+/refs/heads/main:third_party/blink/renderer/core/dom/document.cc;l=599
*
* Actually authorize much more characters. I'll guess for now that this is only
* to better interact with XML-related API, though it's perfectly fine to add
* those elements to the DOM, for whatever reason.
*
* I'll stay less tolerant for now, tolerance is for the weak è_é.
* @param {string} str
* @returns {Boolean}
*/
/* eslint-enable max-len */
function isValidSgmlName(str) {
if (str.length === 0) {
return false;
}
const firstChar = str.charCodeAt(0);
if (
!(firstChar >= CHAR_CODE_UPPER_A && firstChar <= CHAR_CODE_UPPER_Z) &&
!(firstChar >= CHAR_CODE_LOWER_A && firstChar <= CHAR_CODE_LOWER_Z)
) {
return false;
}
for (let i = 1; i < str.length; i++) {
const char = str.charCodeAt(i);
if (
!(char >= CHAR_CODE_UPPER_A && CHAR_CODE_UPPER_Z) &&
!(char >= CHAR_CODE_LOWER_A && char <= CHAR_CODE_LOWER_Z) &&
!(char >= CHAR_CODE_0 && char <= CHAR_CODE_9) &&
char !== CHAR_CODE_UNDERSCORE &&
char !== CHAR_CODE_DASH &&
char !== CHAR_CODE_COLON &&
char !== CHAR_CODE_DOT
) {
return false;
}
}
return true;
}
/**
* NOTE: Thrown away because: why did I need this?
* @param {string} currStr
* @param {number} baseOffset
* @returns {number}
*/
function skipCharacterReference(currStr, baseOffset) {
if (currStr[baseOffset] !== "&") {
return baseOffset;
}
const indexOf = currStr.substring(baseOffset + 1).indexOf(";");
if (indexOf === -1) {
return currStr.length;
}
return indexOf + 1;
}
/**
* NOTE: thrown away because it's easier to understand when just inlined for
* now.
*/
function processTagClosingElement(remainingStr, baseOffset, lastTagName) {
let offset = baseOffset;
const endOfElt = skipToElementNameDeclarationEnd(remainingStr[0], offset + 2);
if (remainingStr[0].substring(offset + 2, endOfElt) !== lastTagName) {
checkExprWrongPlace(remainingStr, "in an element's closing tag");
throw new SyntaxError(
"str-html: Closing tag does not " + "correspond to last opened taeg."
);
}
offset = skipWhiteSpace(remainingStr[0], endOfElt);
if (remainingStr[0][offset] !== ">") {
checkExprWrongPlace(remainingStr, "in an element's closing tag");
throw new SyntaxError("str-html: Malformed closing tag.");
}
offset++;
return offset;
}
/**
* NOTE: Thrown away because no point now.
*
* Newlines in HTML may be represented either as U+000D CARRIAGE RETURN (CR)
* characters, U+000A LINE FEED (LF) characters, or pairs of U+000D CARRIAGE
* RETURN (CR), U+000A LINE FEED (LF) characters in that order.
* @param {string} str
* @param {number} offset
* @returns {number}
*/
function getHtmlNewLineLengthAtOffset(str, offset) {
if (str[offset] === "\r") {
if (str[offset + 1] === "\n") {
return 2;
}
return 1;
} else if (str[offset] === "\n") {
return 1;
}
return 0;
}
/**
* @param {string} str
* @param {number} offset
* @returns {number}
*/
function checkTagOpeningEndElement(str, offset) {
if (str[offset] !== ">") {
return -1;
}
return offset + 1;
}