Skip to content

Commit

Permalink
Closing tags behavior (#492)
Browse files Browse the repository at this point in the history
* Change behavior to match previously removed XHTML serializer

* Add test for XHTML behvior

* Update tests for output serialization regression

* Add serializeElement override form DOM output

* Automatic tidy refactor
  • Loading branch information
spassarop authored Nov 18, 2024
1 parent 0b6878c commit 03652a1
Show file tree
Hide file tree
Showing 2 changed files with 194 additions and 25 deletions.
168 changes: 157 additions & 11 deletions src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,31 @@

import java.io.IOException;
import java.io.Writer;
import java.util.Locale;
import org.apache.xml.serialize.ElementState;
import org.apache.xml.serialize.HTMLdtd;
import org.apache.xml.serialize.OutputFormat;
import org.owasp.validator.html.InternalPolicy;
import org.owasp.validator.html.TagMatcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;

@SuppressWarnings("deprecation")
public class ASHTMLSerializer extends org.apache.xml.serialize.HTMLSerializer {

private static final Logger logger = LoggerFactory.getLogger(ASHTMLSerializer.class);
private boolean encodeAllPossibleEntities;
private final TagMatcher allowedEmptyTags;
private final TagMatcher requireClosingTags;

public ASHTMLSerializer(Writer w, OutputFormat format, InternalPolicy policy) {
super(w, format);
this.allowedEmptyTags = policy.getAllowedEmptyTags();
this.requireClosingTags = policy.getRequiresClosingTags();
this.encodeAllPossibleEntities = policy.isEntityEncodeIntlCharacters();
}

Expand All @@ -27,6 +37,128 @@ protected String getEntityRef(int charToPrint) {
return null;
}

/**
* Called to serialize a DOM element. Equivalent to calling {@link #startElement}, {@link
* #endElement} and serializing everything inbetween, but better optimized.
*/
@Override
protected void serializeElement(Element elem) throws IOException {
Attr attr;
NamedNodeMap attrMap;
int i;
Node child;
ElementState state;
boolean preserveSpace;
String name;
String value;
String tagName;

tagName = elem.getTagName();
state = getElementState();
if (isDocumentState()) {
// If this is the root element handle it differently.
// If the first root element in the document, serialize
// the document's DOCTYPE. Space preserving defaults
// to that of the output format.
if (!_started) startDocument(tagName);
} else {
// For any other element, if first in parent, then
// close parent's opening tag and use the parnet's
// space preserving.
if (state.empty) _printer.printText('>');
// Indent this element on a new line if the first
// content of the parent element or immediately
// following an element.
if (_indenting && !state.preserveSpace && (state.empty || state.afterElement))
_printer.breakLine();
}
preserveSpace = state.preserveSpace;

// Do not change the current element state yet.
// This only happens in endElement().

// XHTML: element names are lower case, DOM will be different
_printer.printText('<');
_printer.printText(tagName);
_printer.indent();

// Lookup the element's attribute, but only print specified
// attributes. (Unspecified attributes are derived from the DTD.
// For each attribute print it's name and value as one part,
// separated with a space so the element can be broken on
// multiple lines.
attrMap = elem.getAttributes();
if (attrMap != null) {
for (i = 0; i < attrMap.getLength(); ++i) {
attr = (Attr) attrMap.item(i);
name = attr.getName().toLowerCase(Locale.ENGLISH);
value = attr.getValue();
if (attr.getSpecified()) {
_printer.printSpace();
// HTML: Empty values print as attribute name, no value.
// HTML: URI attributes will print unescaped
if (value == null) {
value = "";
}
if (!_format.getPreserveEmptyAttributes() && value.length() == 0)
_printer.printText(name);
else if (HTMLdtd.isURI(tagName, name)) {
_printer.printText(name);
_printer.printText("=\"");
_printer.printText(escapeURI(value));
_printer.printText('"');
} else if (HTMLdtd.isBoolean(tagName, name)) _printer.printText(name);
else {
_printer.printText(name);
_printer.printText("=\"");
printEscaped(value);
_printer.printText('"');
}
}
}
}
if (HTMLdtd.isPreserveSpace(tagName)) preserveSpace = true;

// If element has children, or if element is not an empty tag,
// serialize an opening tag.
if (elem.hasChildNodes() || !HTMLdtd.isEmptyTag(tagName)) {
// Enter an element state, and serialize the children
// one by one. Finally, end the element.
state = enterElementState(null, null, tagName, preserveSpace);

// Prevents line breaks inside A/TD
if (tagName.equalsIgnoreCase("A") || tagName.equalsIgnoreCase("TD")) {
state.empty = false;
_printer.printText('>');
}

// Handle SCRIPT and STYLE specifically by changing the
// state of the current element to CDATA (XHTML) or
// unescaped (HTML).
if (tagName.equalsIgnoreCase("SCRIPT") || tagName.equalsIgnoreCase("STYLE")) {
// HTML: Print contents unescaped
state.unescaped = true;
}
child = elem.getFirstChild();
while (child != null) {
serializeNode(child);
child = child.getNextSibling();
}
endElementIO(null, null, tagName);
} else {
_printer.unindent();
// XHTML: Close empty tag with ' />' so it's XML and HTML compatible.
// HTML: Empty tags are defined as such in DTD no in document.
if (!elem.hasChildNodes() && isAllowedEmptyTag(tagName) && !requiresClosingTag(tagName))
_printer.printText("/>");
else _printer.printText('>');
// After element but parent element is no longer empty.
state.afterElement = true;
state.empty = false;
if (isDocumentState()) _printer.flush();
}
}

@Override
public void endElementIO(String namespaceURI, String localName, String rawName)
throws IOException {
Expand All @@ -38,17 +170,23 @@ public void endElementIO(String namespaceURI, String localName, String rawName)
_printer.unindent();
state = getElementState();

if (state.empty) _printer.printText('>');
// This element is not empty and that last content was another element, so print a line break
// before that last element and this element's closing tag. [keith] Provided this is not an
// anchor. HTML: some elements do not print closing tag (e.g. LI)
if (rawName == null || !HTMLdtd.isOnlyOpening(rawName) || HTMLdtd.isOptionalClosing(rawName)) {
if (_indenting && !state.preserveSpace && state.afterElement) _printer.breakLine();
// Must leave CData section first (Illegal in HTML, but still)
if (state.inCData) _printer.printText("]]>");
_printer.printText("</");
_printer.printText(state.rawName);
_printer.printText('>');
if (state.empty && isAllowedEmptyTag(rawName) && !requiresClosingTag(rawName)) { //
_printer.printText("/>");
} else {
if (state.empty) _printer.printText('>');
// This element is not empty and that last content was another element, so print a line break
// before that last element and this element's closing tag. [keith] Provided this is not an
// anchor. HTML: some elements do not print closing tag (e.g. LI)
if (rawName == null
|| !HTMLdtd.isOnlyOpening(rawName)
|| HTMLdtd.isOptionalClosing(rawName)) {
if (_indenting && !state.preserveSpace && state.afterElement) _printer.breakLine();
// Must leave CData section first (Illegal in HTML, but still)
if (state.inCData) _printer.printText("]]>");
_printer.printText("</");
_printer.printText(state.rawName);
_printer.printText('>');
}
}

// Leave the element state and update that of the parent (if we're not root) to not empty and
Expand Down Expand Up @@ -76,4 +214,12 @@ protected String escapeURI(String uri) {
}
return "";
}

private boolean requiresClosingTag(String tagName) {
return requireClosingTags.matches(tagName);
}

private boolean isAllowedEmptyTag(String tagName) {
return "head".equals(tagName) || allowedEmptyTags.matches(tagName);
}
}
51 changes: 37 additions & 14 deletions src/test/java/org/owasp/validator/html/test/AntiSamyTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2007-2023, Arshan Dabirsiaghi, Jason Li
* Copyright (c) 2007-2024, Arshan Dabirsiaghi, Jason Li
*
* All rights reserved.
*
Expand Down Expand Up @@ -48,7 +48,10 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.codec.binary.Base64;
Expand All @@ -63,6 +66,7 @@
import org.owasp.validator.html.model.Attribute;
import org.owasp.validator.html.model.Property;
import org.owasp.validator.html.model.Tag;
import org.owasp.validator.html.scan.Constants;

/**
* This class tests AntiSamy functionality and the basic policy file which should be immune to XSS
Expand Down Expand Up @@ -100,6 +104,7 @@ public class AntiSamyTest {

private AntiSamy as = new AntiSamy();
private TestPolicy policy = null;
private ResourceBundle messages = null;

@Before
public void setUp() throws Exception {
Expand All @@ -111,6 +116,13 @@ public void setUp() throws Exception {
// get Policy instance from a URL.
URL url = getClass().getResource("/antisamy.xml");
policy = TestPolicy.getInstance(url);
try {
messages = ResourceBundle.getBundle("AntiSamy", Locale.getDefault());
} catch (MissingResourceException mre) {
messages =
ResourceBundle.getBundle(
"AntiSamy", new Locale(Constants.DEFAULT_LOCALE_LANG, Constants.DEFAULT_LOCALE_LOC));
}
}

@Test
Expand Down Expand Up @@ -868,8 +880,8 @@ public void issue12() throws ScanException, PolicyException {
assertFalse(p.matcher(s1).matches());
assertFalse(p.matcher(s2).matches());

assertThat(s1, containsString("<hr>"));
assertThat(s2, containsString("<hr>"));
assertThat(s1, containsString("<hr/>"));
assertThat(s2, containsString("<hr/>"));
}

@Test
Expand Down Expand Up @@ -1431,7 +1443,7 @@ public void issue112() throws ScanException, PolicyException {

StringBuilder sb = new StringBuilder();
sb.append("<html><head><title>foobar</title></head><body>");
sb.append("<img src=\"http://foobar.com/pic.gif\"></body></html>");
sb.append("<img src=\"http://foobar.com/pic.gif\"/></body></html>");

html = sb.toString();

Expand Down Expand Up @@ -1572,12 +1584,12 @@ public void validateParamAsEmbed() throws ScanException, PolicyException {
String input =
"<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
String expectedOutput =
"<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" width=\"560\"></embed></object>";
"<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"/><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" width=\"560\"/></object>";
CleanResults cr = as.scan(input, revised, AntiSamy.DOM);
assertThat(cr.getCleanHTML(), containsString(expectedOutput));

String saxExpectedOutput =
"<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
"<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"/><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"/></object>";
cr = as.scan(input, revised, AntiSamy.SAX);
assertThat(cr.getCleanHTML(), equalTo(saxExpectedOutput));

Expand All @@ -1586,9 +1598,9 @@ public void validateParamAsEmbed() throws ScanException, PolicyException {
input =
"<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://supermaliciouscode.com/badstuff.swf\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
expectedOutput =
"<object height=\"340\" width=\"560\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" width=\"560\"></embed></object>";
"<object height=\"340\" width=\"560\"><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/><embed allowfullscreen=\"true\" allowscriptaccess=\"always\" height=\"340\" src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" width=\"560\"/></object>";
saxExpectedOutput =
"<object width=\"560\" height=\"340\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
"<object width=\"560\" height=\"340\"><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/><embed src=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"/></object>";
cr = as.scan(input, revised, AntiSamy.DOM);
assertThat(cr.getCleanHTML(), containsString(expectedOutput));

Expand All @@ -1600,9 +1612,9 @@ public void validateParamAsEmbed() throws ScanException, PolicyException {
input =
"<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&hl=en&fs=1&\"></param><param name=\"allowFullScreen\" value=\"true\"></param><param name=\"allowscriptaccess\" value=\"always\"></param><embed src=\"http://hereswhereikeepbadcode.com/ohnoscary.swf\" type=\"application/x-shockwave-flash\" allowscriptaccess=\"always\" allowfullscreen=\"true\" width=\"560\" height=\"340\"></embed></object>";
expectedOutput =
"<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"></object>";
"<object height=\"340\" width=\"560\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"/><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/></object>";
saxExpectedOutput =
"<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"><param name=\"allowFullScreen\" value=\"true\"><param name=\"allowscriptaccess\" value=\"always\"></object>";
"<object width=\"560\" height=\"340\"><param name=\"movie\" value=\"http://www.youtube.com/v/IyAyd4WnvhU&amp;hl=en&amp;fs=1&amp;\"/><param name=\"allowFullScreen\" value=\"true\"/><param name=\"allowscriptaccess\" value=\"always\"/></object>";

cr = as.scan(input, revised, AntiSamy.DOM);
assertThat(cr.getCleanHTML(), containsString(expectedOutput));
Expand Down Expand Up @@ -1799,10 +1811,10 @@ public void testXSSInAntiSamy151() throws ScanException, PolicyException {
CleanResults results_sax = as.scan(test, policy, AntiSamy.SAX);
CleanResults results_dom = as.scan(test, policy, AntiSamy.DOM);

assertEquals(results_sax.getCleanHTML(), results_dom.getCleanHTML());
assertEquals(
"whatever<img src=\"https://ssl.gstatic.com/codesite/ph/images/defaultlogo.png\">",
results_dom.getCleanHTML());
"whatever<img src=\"https://ssl.gstatic.com/codesite/ph/images/defaultlogo.png\"/>",
results_sax.getCleanHTML());
assertEquals(results_sax.getCleanHTML(), results_dom.getCleanHTML());
}

@Test
Expand Down Expand Up @@ -1845,7 +1857,7 @@ public void testStreamScan() throws ScanException {
Writer writer = new StringWriter();
as.scan(reader, writer, policy);
String cleanHtml = writer.toString().trim();
assertEquals("whatever" + testImgSrcURL + ">", cleanHtml);
assertEquals("whatever" + testImgSrcURL + "/>", cleanHtml);
}

@Test
Expand Down Expand Up @@ -2702,4 +2714,15 @@ public void testGithubIssue453() throws ScanException, PolicyException {
+ "<select name=\"Lang\"> "
+ "<option value=\"da\">Dansk</option> "));
}

@Test
public void testGithubIssue484() throws ScanException, PolicyException {
String s = "<p>this is para data</p><br/><p>this is para data 2</p>";
CleanResults crDom = as.scan(s, policy, AntiSamy.DOM);
CleanResults crSax = as.scan(s, policy, AntiSamy.SAX);
String domValue = crDom.getCleanHTML();
String saxValue = crSax.getCleanHTML();
assertEquals("<p>this is para data</p>\n" + "<br/>\n" + "<p>this is para data 2</p>", domValue);
assertEquals("<p>this is para data</p>\n" + "<br/>\n" + "<p>this is para data 2</p>", saxValue);
}
}

0 comments on commit 03652a1

Please sign in to comment.