From b635ecf29c93c2b4e8923c8e33f9c163c6124b5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Wed, 2 Feb 2022 20:12:42 -0300 Subject: [PATCH 01/36] Add constants for two directives names --- src/main/java/org/owasp/validator/html/Policy.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/org/owasp/validator/html/Policy.java b/src/main/java/org/owasp/validator/html/Policy.java index 42b90c08..64f1353a 100644 --- a/src/main/java/org/owasp/validator/html/Policy.java +++ b/src/main/java/org/owasp/validator/html/Policy.java @@ -144,6 +144,8 @@ public class Policy { public static final String PRESERVE_COMMENTS = "preserveComments"; public static final String ENTITY_ENCODE_INTL_CHARS = "entityEncodeIntlChars"; public static final String ALLOW_DYNAMIC_ATTRIBUTES = "allowDynamicAttributes"; + public static final String MAX_INPUT_SIZE = "maxInputSize"; + public static final String MAX_STYLESHEET_IMPORTS = "maxStyleSheetImports"; public static final String EXTERNAL_GENERAL_ENTITIES = "http://xml.org/sax/features/external-general-entities"; public static final String EXTERNAL_PARAM_ENTITIES = "http://xml.org/sax/features/external-parameter-entities"; From 01a27b266c4a09f9ddff07de31f984a12dfdbd04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Wed, 2 Feb 2022 20:13:32 -0300 Subject: [PATCH 02/36] Add tests to improve coverage CssScannerTest is a new class for specific test cases on CssScanner. --- .../owasp/validator/css/CssScannerTest.java | 122 ++++++++++++++++++ .../validator/html/test/AntiSamyTest.java | 26 ++++ 2 files changed, 148 insertions(+) create mode 100644 src/test/java/org/owasp/validator/css/CssScannerTest.java diff --git a/src/test/java/org/owasp/validator/css/CssScannerTest.java b/src/test/java/org/owasp/validator/css/CssScannerTest.java new file mode 100644 index 00000000..0473ae43 --- /dev/null +++ b/src/test/java/org/owasp/validator/css/CssScannerTest.java @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li, Sebastián Passaro + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this list + * of conditions and the following disclaimer. Redistributions in binary form must + * reproduce the above copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the name of OWASP nor the names of its contributors may be used to endorse + * or promote products derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.owasp.validator.css; + +import org.junit.Before; +import org.junit.Test; +import org.owasp.validator.html.*; +import org.owasp.validator.html.scan.Constants; +import org.owasp.validator.html.test.TestPolicy; + +import java.net.URL; +import java.util.Locale; +import java.util.MissingResourceException; +import java.util.ResourceBundle; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.not; +import static org.hamcrest.MatcherAssert.assertThat; + +public class CssScannerTest { + private TestPolicy policy = null; + private ResourceBundle messages = null; + + @Before + public void setUp() throws Exception { + // Load the policy. You may have to change the path to find the Policy file for your environment. + URL url = getClass().getResource("/antisamy.xml"); + policy = TestPolicy.getInstance(url); + // Load resource bundle + try { + messages = ResourceBundle.getBundle("AntiSamy", Locale.getDefault()); + } catch (MissingResourceException mre) { + messages = ResourceBundle.getBundle("AntiSamy", new Locale(Constants.DEFAULT_LOCALE_LANG, + Constants.DEFAULT_LOCALE_LOC)); + } + } + + @Test + public void testAvoidImportingStyles() throws ScanException { + final String input = "@import url(https://raw.githubusercontent.com/nahsra/antisamy/main/src/test/resources/s/slashdot.org_files/classic.css);\n" + + ".very-specific-antisamy {font: 15pt \"Arial\"; color: blue;}"; + // If not passing "shouldParseImportedStyles" then it's false by default. + CssScanner scanner = new CssScanner(policy, messages); + String result = scanner.scanStyleSheet(input, 1000).getCleanHTML(); + // If style sheet was imported, .grid_1 class should be there. + assertThat(result, not(containsString(".grid_1"))); + assertThat(result, containsString(".very-specific-antisamy")); + } + + @Test + public void testAvoidCdataWhenUsingXhtml() throws ScanException { + final String input = ""; + + TestPolicy revised = policy.cloneWithDirective(Policy.USE_XHTML,"true"); + CssScanner scanner = new CssScanner(revised, messages); + assertThat(scanner.scanStyleSheet(input, 1000).getCleanHTML(), not(containsString("CDATA"))); + + revised = policy.cloneWithDirective(Policy.USE_XHTML,"false"); + scanner = new CssScanner(revised, messages); + assertThat(scanner.scanStyleSheet(input, 1000).getCleanHTML(), containsString("CDATA")); + } + + @Test + public void testImportLimiting() throws ScanException { + final String input = "@import url(https://raw.githubusercontent.com/nahsra/antisamy/main/src/test/resources/s/slashdot.org_files/classic.css);\n" + + "@import url(https://raw.githubusercontent.com/nahsra/antisamy/main/src/test/resources/s/slashdot.org_files/providers.css);\n" + + ".very-specific-antisamy {font: 15pt \"Arial\"; color: blue;}"; + TestPolicy revised = policy.cloneWithDirective(Policy.EMBED_STYLESHEETS,"true") + .cloneWithDirective(Policy.MAX_INPUT_SIZE,"500") + .cloneWithDirective(Policy.MAX_STYLESHEET_IMPORTS,"2"); + CssScanner scanner = new CssScanner(revised, messages, true); + CleanResults result = scanner.scanStyleSheet(input, 500); + // Both sheets are larger than 500 bytes + assertThat(result.getErrorMessages().size(), is(2)); + assertThat(result.getErrorMessages().get(0), containsString("500")); + + // Limit to only one import + revised = policy.cloneWithDirective(Policy.EMBED_STYLESHEETS,"true") + .cloneWithDirective(Policy.MAX_STYLESHEET_IMPORTS,"1"); + scanner = new CssScanner(revised, messages, true); + result = scanner.scanStyleSheet(input, 500000); + // If only first style sheet was imported, .grid_1 class should be there and .janrain-provider150-sprit classes should not. + assertThat(result.getCleanHTML(), containsString(".grid_1")); + assertThat(result.getCleanHTML(), not(containsString(".janrain-provider150-sprit"))); + + // Force timeout errors + revised = policy.cloneWithDirective(Policy.EMBED_STYLESHEETS,"true") + .cloneWithDirective(Policy.CONNECTION_TIMEOUT,"1"); + scanner = new CssScanner(revised, messages, true); + result = scanner.scanStyleSheet(input, 500000); + assertThat(result.getErrorMessages().size(), is(2)); + // If style sheets were imported, .grid_1 and .janrain-provider150-sprit classes should be there. + assertThat(result.getCleanHTML(), not(containsString(".grid_1"))); + assertThat(result.getCleanHTML(), not(containsString(".janrain-provider150-sprit"))); + } +} diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index e15a2bc7..cca9d8d2 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -44,11 +44,13 @@ import org.junit.Before; import org.junit.Test; +import java.io.File; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.io.StringWriter; import java.io.Writer; +import java.net.URISyntaxException; import java.net.URL; import java.util.Arrays; import java.util.Collections; @@ -1662,5 +1664,29 @@ public void testLeadingDashOnPropertyName() throws ScanException, PolicyExceptio assertThat(as.scan(input, revised, AntiSamy.DOM).getCleanHTML(), both(containsString("-webkit-border-radius")).and(containsString("-moz-border-radius"))); assertThat(as.scan(input, revised, AntiSamy.SAX).getCleanHTML(), both(containsString("-webkit-border-radius")).and(containsString("-moz-border-radius"))); } + + @Test + public void testScansWithDifferentPolicyLoading() throws ScanException, PolicyException, URISyntaxException { + final String input = "text"; + // Preload policy, do not specify scan type. + AntiSamy asInstance = new AntiSamy(policy); + assertThat(asInstance.scan(input).getCleanHTML(), is(input)); + // Pass policy, assume DOM scan type. + assertThat(asInstance.scan(input, policy).getCleanHTML(), is(input)); + // Pass policy as File. + File policyFile = new File(getClass().getResource("/antisamy.xml").toURI()); + assertThat(asInstance.scan(input, policyFile).getCleanHTML(), is(input)); + // Pass policy filename. + String path = getClass().getResource("/antisamy.xml").getPath(); + path = System.getProperty("file.separator").equals("\\") && path.startsWith("/") ? path.substring(1) : path; + assertThat(asInstance.scan(input, path).getCleanHTML(), is(input)); + // No preloaded nor passed policy, expected to fail. + try { + as.scan(input, null, AntiSamy.DOM); + fail("Scan with no policy must have thrown an exception."); + } catch (PolicyException e) { + // An error is expected. Pass. + } + } } From 78e77ae1376d68b0029b6ccb2193b3e5ca398b0b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Feb 2022 05:03:51 +0000 Subject: [PATCH 03/36] Bump maven-project-info-reports-plugin from 3.1.2 to 3.2.1 Bumps [maven-project-info-reports-plugin](https://github.com/apache/maven-project-info-reports-plugin) from 3.1.2 to 3.2.1. - [Release notes](https://github.com/apache/maven-project-info-reports-plugin/releases) - [Commits](https://github.com/apache/maven-project-info-reports-plugin/compare/maven-project-info-reports-plugin-3.1.2...maven-project-info-reports-plugin-3.2.1) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-project-info-reports-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index aea7a118..26c6358c 100644 --- a/pom.xml +++ b/pom.xml @@ -496,7 +496,7 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.1.2 + 3.2.1 From 6752ca927fc80e7efd6f5fb152ea067fa6d89692 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 9 Feb 2022 05:03:26 +0000 Subject: [PATCH 04/36] Bump version.slf4j from 1.7.35 to 1.7.36 Bumps `version.slf4j` from 1.7.35 to 1.7.36. Updates `slf4j-api` from 1.7.35 to 1.7.36 - [Release notes](https://github.com/qos-ch/slf4j/releases) - [Commits](https://github.com/qos-ch/slf4j/compare/v_1.7.35...v_1.7.36) Updates `jcl-over-slf4j` from 1.7.35 to 1.7.36 - [Release notes](https://github.com/qos-ch/slf4j/releases) - [Commits](https://github.com/qos-ch/slf4j/compare/v_1.7.35...v_1.7.36) Updates `slf4j-simple` from 1.7.35 to 1.7.36 - [Release notes](https://github.com/qos-ch/slf4j/releases) - [Commits](https://github.com/qos-ch/slf4j/compare/v_1.7.35...v_1.7.36) --- updated-dependencies: - dependency-name: org.slf4j:slf4j-api dependency-type: direct:production update-type: version-update:semver-patch - dependency-name: org.slf4j:jcl-over-slf4j dependency-type: direct:development update-type: version-update:semver-patch - dependency-name: org.slf4j:slf4j-simple dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index aea7a118..ee8e4099 100644 --- a/pom.xml +++ b/pom.xml @@ -46,7 +46,7 @@ true 2.6 - 1.7.35 + 1.7.36 4.5.3.0 4.5.3 From 988134d26a3c16a09b4e3e4e83cfe063428b4e8d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Feb 2022 05:04:33 +0000 Subject: [PATCH 05/36] Bump maven-javadoc-plugin from 3.3.1 to 3.3.2 Bumps [maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 3.3.1 to 3.3.2. - [Release notes](https://github.com/apache/maven-javadoc-plugin/releases) - [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-3.3.1...maven-javadoc-plugin-3.3.2) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-javadoc-plugin dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index aea7a118..390af230 100644 --- a/pom.xml +++ b/pom.xml @@ -218,7 +218,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.3.1 + 3.3.2 true From 36fe73fe89b0580c21d1a2aacd50bf7a0b91acf8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 15 Feb 2022 05:03:20 +0000 Subject: [PATCH 06/36] Bump maven-compiler-plugin from 3.9.0 to 3.10.0 Bumps [maven-compiler-plugin](https://github.com/apache/maven-compiler-plugin) from 3.9.0 to 3.10.0. - [Release notes](https://github.com/apache/maven-compiler-plugin/releases) - [Commits](https://github.com/apache/maven-compiler-plugin/compare/maven-compiler-plugin-3.9.0...maven-compiler-plugin-3.10.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-compiler-plugin dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index aea7a118..0394356f 100644 --- a/pom.xml +++ b/pom.xml @@ -241,7 +241,7 @@ org.apache.maven.plugins maven-compiler-plugin - 3.9.0 + 3.10.0 1.7 1.7 From c11fdb04238029e865497a5f25a6233d8a67be35 Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Tue, 15 Feb 2022 21:11:59 -0500 Subject: [PATCH 07/36] Change Java targets to Java 8, and upgrade 1 dependency that requires Java 8. Remove all @Deprecated methods, all uses of such methods, including the test cases. --- pom.xml | 26 ++- .../org/owasp/validator/css/CssHandler.java | 25 +-- .../java/org/owasp/validator/html/Policy.java | 160 +++--------------- .../test/AntiSamyMalformedPolicyTest.java | 33 +--- .../html/test/ESAPIInvalidPolicyTest.java | 85 +--------- .../owasp/validator/html/test/PolicyTest.java | 116 +------------ 6 files changed, 51 insertions(+), 394 deletions(-) diff --git a/pom.xml b/pom.xml index de87a0ec..40645b71 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.owasp.antisamy antisamy jar - 1.6.5 + 1.7.0-dev ossrh @@ -42,10 +42,9 @@ UTF-8 - 2022-01-31T23:13:00Z + 2022-02-30T14:33:00Z true - - 2.6 + 2.11.0 1.7.36 4.5.3.0 4.5.3 @@ -243,10 +242,10 @@ maven-compiler-plugin 3.10.0 - 1.7 - 1.7 - 1.7 - 1.7 + 1.8 + 1.8 + 1.8 + 1.8 -Xlint:unchecked @@ -258,7 +257,6 @@ org.apache.maven.plugins maven-enforcer-plugin - 3.0.0 @@ -274,10 +272,10 @@ - 1.7 + 1.8 true test - Dependencies shouldn't require Java 8+. + Dependencies shouldn't require Java 9+. 3.3.9 @@ -292,8 +290,8 @@ - 1.7 - Antisamy is written to support Java 7+. + 1.8 + Antisamy is written to support Java 8+. @@ -483,7 +481,7 @@ org.apache.maven.plugins maven-pmd-plugin - 1.7 + 1.8 utf-8 diff --git a/src/main/java/org/owasp/validator/css/CssHandler.java b/src/main/java/org/owasp/validator/css/CssHandler.java index a6081a82..8341516d 100644 --- a/src/main/java/org/owasp/validator/css/CssHandler.java +++ b/src/main/java/org/owasp/validator/css/CssHandler.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2021, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -114,29 +114,6 @@ public class CssHandler implements DocumentHandler { */ private boolean selectorOpen = false; - /** - * Constructs a handler for stylesheets using the given policy and queue for - * imported stylesheets. - * - * @param policy - * the policy to use - * @param embeddedStyleSheets - * the queue of stylesheets imported - * @param errorMessages - * the List of error messages to add error messages too if there are errors - * @param messages - * the error message bundle to pull from - * - * @deprecated The embeddedStyleSheets List parameter is removed in the newer version of - * this constructor as the handler has its own internal list that can be accessed through - * the getImportedStylesheetsURIList() method. - */ - @Deprecated - public CssHandler(Policy policy, LinkedList embeddedStyleSheets, - List errorMessages, ResourceBundle messages) { - this(policy, embeddedStyleSheets, errorMessages, null, messages); - } - /** * Constructs a handler for stylesheets using the given policy. The List of embedded stylesheets * produced by this constructor is now available via the getImportedStylesheetsURIList() method. diff --git a/src/main/java/org/owasp/validator/html/Policy.java b/src/main/java/org/owasp/validator/html/Policy.java index 42b90c08..33a6538f 100644 --- a/src/main/java/org/owasp/validator/html/Policy.java +++ b/src/main/java/org/owasp/validator/html/Policy.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2021, Arshan Dabirsiaghi, Jason Li, Kristian Rosenvold + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li, Kristian Rosenvold * * All rights reserved. * @@ -80,40 +80,16 @@ * *

## Schema validation behavior change starting with AntiSamy 1.6.0 ##

* - *

Prior to v1.6.0 AntiSamy was not actually enforcing it's defined XSD. Now, by default AntiSamy enforce the schema, - * and won't continue if the AntiSamy policy is invalid. However, we recognize that it might not be possible for - * developers to fix their AntiSamy policies right away if they are non-compliant, and yet still want to upgrade - * AntiSamy to pick up any security improvements, feature enhancements, and bug fixes. As such, we now provide two - * ways to (temporarily!) disable schema validation:

- * - *

1) Set the Java System property: owasp.validator.validateschema to false. This can be done at the command line - * (e.g., -Dowasp.validator.validateschema=false) or via the Java System properties file. Neither requires a code - * change.

- * - *

2) Change the code using AntiSamy to invoke: Policy.setSchemaValidation(false) before loading the AntiSamy policy. - * This is a static call so once disabled, it is disabled for all new Policy instances.

- * - *

To encourage AntiSamy users to only use XSD compliant policies, AntiSamy will always log some type of warning - * when schema validation is disabled. It will either WARN that the policy is non-compliant so it can be fixed, or - * it will WARN that the policy is compliant, but schema validation is OFF, so validation should be turned back on - * (i.e., stop disabling it). We also added INFO level logging when AntiSamy schema's are loaded and validated.

- * - *

## Disabling schema validation is deprecated immediately, and will go away in AntiSamy 1.7+ ##

- * - *

The ability to disable the new schema validation feature is intended to be temporary, to smooth the transition to - * properly valid AntiSamy policy files. We plan to drop this feature in the next major release. We estimate that - * this will be some time mid-late 2022, so not any time soon. The idea is to give dev teams using AntiSamy directly, - * or through other libraries like ESAPI, plenty of time to get their policy files schema compliant before schema - * validation becomes required.

+ *

Prior to v1.6.0 AntiSamy was not actually enforcing it's defined XSD. For all of v1.6.x, by default AntiSamy + * enforced the schema, and wouldn't continue if the AntiSamy policy was invalid. However, we recognized that it might + * not be possible for developers to fix their AntiSamy policies right away so we provided two + * ways to (temporarily!) disable schema validation. Via a direct method call and via a System property.

* - *

Logging: The logging introduced in 1.6+ uses slf4j. AntiSamy includes the slf4j-simple library for its logging, - * but AntiSamy users can import and use an alternate slf4j compatible logging library if they prefer. They can also - * then exclude slf4j-simple if they want to.

+ *

## Starting with AntiSamy 1.7.0, schema validation is Mandatory.

* - *

WARNING:: AntiSamy's use of slf4j-simple, without any configuration file, logs messages in a buffered - * manner to standard output. As such, some or all of these log messages may get lost if an Exception, such as a - * PolicyException is thrown. This can likely be rectified by configuring slf4j-simple to log to standard error - * instead, or use an alternate slf4j logger that does so.

+ *

Logging: The logging introduced in 1.6+ uses slf4j. But AntiSamy doesn't actually include an slf4j implementation + * library. AntiSamy users must import and properly configure an slf4j logging library if they want to see the very + * few log messages generated by AntiSamy.

* * @author Arshan Dabirsiaghi */ @@ -168,25 +144,6 @@ public class Policy { * XML Schema for policy validation */ private static volatile Schema schema = null; - private static boolean validateSchema = true; // Default is to validate schemas - public static final String VALIDATIONPROPERTY = "owasp.validator.validateschema"; - - // Support the ability to change the default schema validation behavior by setting the - // System property "owasp.antisamy.validateschema". - static { - loadValidateSchemaProperty(); - } - - // encapsulated to be simulated from test cases - private static void loadValidateSchemaProperty() { - String validateProperty = System.getProperty(VALIDATIONPROPERTY); - if (validateProperty != null) { - setSchemaValidation(Boolean.parseBoolean(validateProperty)); - logger.warn("Setting AntiSamy policy schema validation to '" + getSchemaValidation() + "' because '" - + VALIDATIONPROPERTY + "' system property set to: '" + validateProperty - + "'. Note: this feature is temporary and will go away in AntiSamy v1.7.0 (~mid/late 2022) when validation will become mandatory."); - } else validateSchema = true; // default (or back to default if invoked multiple times during testing) - } /** * Get the Tag specified by the provided tag name. @@ -227,33 +184,6 @@ public Property getPropertyByName(String propertyName) { return cssRules.get(propertyName.toLowerCase()); } - /** - * Is XSD schema validation across all policies enabled or not? It is enabled by default. - * - * @return True if schema validation enabled. False otherwise. - * - * @deprecated Temporary method to enable AntiSamy users to upgrade to 1.6.x while still using policy files that aren't - * schema compliant. AntiSamy plans to make schema validation mandatory starting with v1.7.0 (~mid/late 2022). - */ - @Deprecated - public static boolean getSchemaValidation() { - return validateSchema; - } - - /** - * This can enable/disable the schema validation against AntiSamy XSD for the instantiated - * policies. It is enabled by default. - * - * @param enable boolean value to specify if the schema validation should be performed. Use false to disable. - * - * @deprecated Temporary method to enable AntiSamy users to upgrade to 1.6.x while still using policy files that aren't - * schema compliant. AntiSamy plans to make schema validation mandatory starting with v1.7.0 (~mid/late 2022). - */ - @Deprecated - public static void setSchemaValidation(boolean enable) { - validateSchema = enable; - } - /** * Construct a Policy using the default policy file location ("antisamy.xml"). * @@ -284,10 +214,7 @@ public static Policy getInstance(String filename) throws PolicyException { * @throws PolicyException If there is a problem parsing the input stream. */ public static Policy getInstance(InputStream inputStream) throws PolicyException { - final String logMsg = "Attempting to load AntiSamy policy from an input stream."; - // If schema validation is disabled, we elevate this msg to the warn level to match the - // level of the mandatory warning that will follow. We do the same below. - if (validateSchema) logger.info(logMsg); else logger.warn(logMsg); + logger.info("Attempting to load AntiSamy policy from an input stream."); return new InternalPolicy(getSimpleParseContext(getTopLevelElement(inputStream))); } @@ -325,8 +252,7 @@ public static Policy getInstance(File file) throws PolicyException { * @throws PolicyException If the file is not found or there is a problem parsing the file. */ public static Policy getInstance(URL url) throws PolicyException { - String logMsg = "Attempting to load AntiSamy policy from URL: " + url.toString(); - if (validateSchema) logger.info(logMsg); else logger.warn(logMsg); + logger.info("Attempting to load AntiSamy policy from URL: " + url.toString()); return new InternalPolicy(getParseContext(getTopLevelElement(url), url)); } @@ -426,30 +352,10 @@ public InputSource call() throws IOException { protected static Element getTopLevelElement(InputSource source, Callable getResetSource) throws PolicyException { // Track whether an exception was ever thrown while processing policy file - Exception thrownException = null; try { return getDocumentElementFromSource(source, true); - } catch (SAXException e) { - thrownException = e; - if (!validateSchema) { - try { - source = getResetSource.call(); - Element theElement = getDocumentElementFromSource(source, false); - // We warn when the policy has an invalid schema, but schema validation is disabled. - logger.warn("Invalid AntiSamy policy file: " + e.getMessage()); - return theElement; - } catch (Exception e2) { - throw new PolicyException(e2); - } - } else throw new PolicyException(e); - } catch (ParserConfigurationException | IOException e) { - thrownException = e; + } catch (SAXException | ParserConfigurationException | IOException e) { throw new PolicyException(e); - } finally { - if (!validateSchema && thrownException == null) { - // We warn when the policy has a valid schema, but schema validation is disabled. - logger.warn("XML schema validation is disabled for a valid AntiSamy policy. Please reenable policy validation."); - } } } @@ -529,38 +435,17 @@ private static void parsePolicy(Element topLevelElement, ParseContext parseConte + "a vulnerability because only local file URLs are allowed.") private static Element getPolicy(String href, URL baseUrl) throws PolicyException { // Track whether an exception was ever thrown while processing policy file - Exception thrownException = null; try { - return getDocumentElementByUrl(href, baseUrl, true); - } catch (SAXException e) { - thrownException = e; - if (!validateSchema) { - try { - Element theElement = getDocumentElementByUrl(href, baseUrl, false); - // We warn when the policy has an invalid schema, but schema validation is disabled. - logger.warn("Invalid AntiSamy policy file: " + e.getMessage()); - return theElement; - } catch (SAXException | ParserConfigurationException | IOException e2) { - throw new PolicyException(e2); - } - } else { - throw new PolicyException(e); - } - } catch (ParserConfigurationException | IOException e) { - thrownException = e; + return getDocumentElementByUrl(href, baseUrl); + } catch (SAXException | ParserConfigurationException | IOException e) { throw new PolicyException(e); - } finally { - if (!validateSchema && thrownException == null) { - // We warn when the policy has a valid schema, but schema validation is disabled. - logger.warn("XML schema validation is disabled for a valid AntiSamy policy. Please reenable policy validation."); - } } } // TODO: Add JavaDocs for this new method. @SuppressFBWarnings(value = "SECURITY", justification="Opening a stream to the provided URL is not " + "a vulnerability because only local file URLs are allowed.") - private static Element getDocumentElementByUrl(String href, URL baseUrl, boolean schemaValidationEnabled) + private static Element getDocumentElementByUrl(String href, URL baseUrl) throws IOException, ParserConfigurationException, SAXException { InputSource source = null; @@ -575,8 +460,7 @@ private static Element getDocumentElementByUrl(String href, URL baseUrl, boolean try { url = new URL(baseUrl, href); - final String logMsg = "Attempting to load AntiSamy policy from URL: " + url.toString(); - if (validateSchema) logger.info(logMsg); else logger.warn(logMsg); + logger.info("Attempting to load AntiSamy policy from URL: " + url.toString()); source = new InputSource(url.openStream()); source.setSystemId(href); } catch (MalformedURLException | FileNotFoundException e) { @@ -603,13 +487,11 @@ private static Element getDocumentElementByUrl(String href, URL baseUrl, boolean dbf.setFeature(DISALLOW_DOCTYPE_DECL, true); dbf.setFeature(LOAD_EXTERNAL_DTD, false); - // This code doesn't have the retry logic if schema validation fails because schemaValidationEnabled is - // passed in. It is up to the caller to try again, if this fails the first time (if they want to). - if (schemaValidationEnabled) { - getPolicySchema(); - dbf.setNamespaceAware(true); - dbf.setSchema(schema); - } + // This code doesn't have the retry logic if schema validation fails. It is up to the caller to try again, + // if this fails the first time (if they want to). + getPolicySchema(); + dbf.setNamespaceAware(true); + dbf.setSchema(schema); DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(new SAXErrorHandler()); diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyMalformedPolicyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyMalformedPolicyTest.java index 98e345d6..90dbddd4 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyMalformedPolicyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyMalformedPolicyTest.java @@ -14,50 +14,25 @@ public class AntiSamyMalformedPolicyTest { @Test - public void testDirectConfigMalformedValidationOn() throws Exception { - Policy.setSchemaValidation(false); + public void testDirectConfigMalformed() throws Exception { InputStream stream = AntiSamyMalformedPolicyTest.class.getResourceAsStream("/invalidPolicyMalformedXml.xml"); try { Policy.getInstance(stream); - fail("For Malformed XML, PolicyException expected whether schema validation is enabled or not."); - } catch (PolicyException e) { - // This is expected, so do nothing. Any other kind of exception is a failed test case. - } - } - - @Test - public void testDirectConfigMalformedValidationOff() throws Exception { - Policy.setSchemaValidation(true); - InputStream stream = AntiSamyMalformedPolicyTest.class.getResourceAsStream("/invalidPolicyMalformedXml.xml"); - try { - Policy.getInstance(stream); - fail("For Malformed XML, PolicyException expected whether schema validation is enabled or not."); + fail("For Malformed XML, PolicyException expected."); } catch (PolicyException e) { // This is expected, so do nothing. Any other kind of exception is a failed test case. } } @Test - public void testDirectConfigAsBaisMalformedValidationOn() throws Exception { - Policy.setSchemaValidation(true); + public void testDirectConfigAsBaisMalformed() throws Exception { InputStream stream = ESAPIInvalidPolicyTest.class.getResourceAsStream("/invalidPolicyMalformedXml.xml"); try { Policy.getInstance(ESAPIInvalidPolicyTest.toByteArrayStream(stream)); - fail("For Malformed XML, PolicyException expected whether schema validation is enabled or not."); + fail("For Malformed XML, PolicyException expected."); } catch (PolicyException e) { // This is expected, so do nothing. Any other kind of exception is a failed test case. } } - @Test - public void testDirectConfigAsBaisMalformedValidationOff() throws Exception { - Policy.setSchemaValidation(false); - InputStream stream = ESAPIInvalidPolicyTest.class.getResourceAsStream("/invalidPolicyMalformedXml.xml"); - try { - Policy.getInstance(ESAPIInvalidPolicyTest.toByteArrayStream(stream)); - fail("For Malformed XML, PolicyException expected whether schema validation is enabled or not."); - } catch (PolicyException e) { - // This is expected, so do nothing. Any other kind of exception is a failed test case. - } - } } diff --git a/src/test/java/org/owasp/validator/html/test/ESAPIInvalidPolicyTest.java b/src/test/java/org/owasp/validator/html/test/ESAPIInvalidPolicyTest.java index 33ee27b2..a06d66fc 100644 --- a/src/test/java/org/owasp/validator/html/test/ESAPIInvalidPolicyTest.java +++ b/src/test/java/org/owasp/validator/html/test/ESAPIInvalidPolicyTest.java @@ -4,10 +4,7 @@ import java.io.ByteArrayOutputStream; import java.io.InputStream; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import org.junit.After; import org.junit.Test; import org.owasp.validator.html.Policy; @@ -15,102 +12,28 @@ public class ESAPIInvalidPolicyTest { - @After - public void resetSystemProp() throws Exception { - System.clearProperty(Policy.VALIDATIONPROPERTY); - PolicyTest.reloadSchemaValidation(); - if (!Policy.getSchemaValidation()) System.out.println( - "ERROR: resetSystemProp() test method NOT properly enabling AntiSamy policy schema validation!"); - } - @Test - public void testDirectConfigAsBaisValidationOn() throws Exception { - Policy.setSchemaValidation(true); - assertTrue("AntiSamy XSD Validation should be enabled", Policy.getSchemaValidation()); + public void testBaisValidation() throws Exception { InputStream stream = ESAPIInvalidPolicyTest.class.getResourceAsStream("/esapi-antisamy-InvalidPolicy.xml"); try { Policy.getInstance(toByteArrayStream(stream)); - fail("Invalid policy with schema validation ON should throw exception."); - } catch (PolicyException e) { - // This is expected, so do nothing. Any other kind of exception is a failed test case. - } - } - - @Test - public void testDirectConfigAsBaisValidationOff() throws Exception { - Policy.setSchemaValidation(false); - assertFalse("AntiSamy XSD Validation should be disabled", Policy.getSchemaValidation()); - InputStream stream = ESAPIInvalidPolicyTest.class.getResourceAsStream("/esapi-antisamy-InvalidPolicy.xml"); - Policy.getInstance(toByteArrayStream(stream)); - } - - @Test - public void testDirectConfigValidationOn() throws Exception { - Policy.setSchemaValidation(true); - assertTrue("AntiSamy XSD Validation should be enabled", Policy.getSchemaValidation()); - InputStream stream = ESAPIInvalidPolicyTest.class.getResourceAsStream("/esapi-antisamy-InvalidPolicy.xml"); - try { - Policy.getInstance(stream); - fail("Invalid policy with schema validation ON should throw exception."); + fail("Invalid policy with schema should throw exception."); } catch (PolicyException e) { // This is expected, so do nothing. Any other kind of exception is a failed test case. } } @Test - public void testDirectConfigValidationOff() throws Exception { - Policy.setSchemaValidation(false); - assertFalse("AntiSamy XSD Validation should be disabled", Policy.getSchemaValidation()); - InputStream stream = ESAPIInvalidPolicyTest.class.getResourceAsStream("/esapi-antisamy-InvalidPolicy.xml"); - Policy.getInstance(stream); - } - - @Test - public void testSystemPropValidationOn() throws Exception { - System.setProperty(Policy.VALIDATIONPROPERTY, "true"); - PolicyTest.reloadSchemaValidation(); - assertTrue("AntiSamy XSD Validation should be enabled", Policy.getSchemaValidation()); + public void testDirectValidation() throws Exception { InputStream stream = ESAPIInvalidPolicyTest.class.getResourceAsStream("/esapi-antisamy-InvalidPolicy.xml"); try { Policy.getInstance(stream); - fail("Invalid policy with schema validation ON should throw exception."); - } catch (PolicyException e) { - // This is expected, so do nothing. Any other kind of exception is a failed test case. - } - } - - @Test - public void testSystemPropValidationOff() throws Exception { - System.setProperty(Policy.VALIDATIONPROPERTY, "false"); - PolicyTest.reloadSchemaValidation(); - assertFalse("AntiSamy XSD Validation should be disabled", Policy.getSchemaValidation()); - InputStream stream = ESAPIInvalidPolicyTest.class.getResourceAsStream("/esapi-antisamy-InvalidPolicy.xml"); - Policy.getInstance(stream); - } - - @Test - public void testSystemPropAsBaisValidationOn() throws Exception { - System.setProperty(Policy.VALIDATIONPROPERTY, "true"); - PolicyTest.reloadSchemaValidation(); - assertTrue("AntiSamy XSD Validation should be enabled", Policy.getSchemaValidation()); - InputStream stream = ESAPIInvalidPolicyTest.class.getResourceAsStream("/esapi-antisamy-InvalidPolicy.xml"); - try { - Policy.getInstance(toByteArrayStream(stream)); - fail("Invalid policy with schema validation ON should throw exception."); + fail("Invalid policy should throw exception."); } catch (PolicyException e) { // This is expected, so do nothing. Any other kind of exception is a failed test case. } } - @Test - public void testSystemPropAsBaisValidationOff() throws Exception { - System.setProperty(Policy.VALIDATIONPROPERTY, "false"); - PolicyTest.reloadSchemaValidation(); - assertFalse("AntiSamy XSD Validation should be disabled", Policy.getSchemaValidation()); - InputStream stream = ESAPIInvalidPolicyTest.class.getResourceAsStream("/esapi-antisamy-InvalidPolicy.xml"); - Policy.getInstance(toByteArrayStream(stream)); - } - static InputStream toByteArrayStream(InputStream in) throws Exception { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); int nRead; diff --git a/src/test/java/org/owasp/validator/html/test/PolicyTest.java b/src/test/java/org/owasp/validator/html/test/PolicyTest.java index 1d95b485..b7f3a06a 100644 --- a/src/test/java/org/owasp/validator/html/test/PolicyTest.java +++ b/src/test/java/org/owasp/validator/html/test/PolicyTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2021, Jacob Coulter, Mark Oberhaus + * Copyright (c) 2007-2022, Jacob Coulter, Mark Oberhaus * * All rights reserved. * @@ -33,13 +33,11 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import org.junit.Before; import org.junit.Test; import org.owasp.validator.html.AntiSamy; @@ -77,11 +75,6 @@ private String assembleFile(String allowedEmptyTagsSection) { allowedEmptyTagsSection + FOOTER; } - @Before - public void resetSystemProp() throws Exception { - Policy.setSchemaValidation(true); - } - @Test public void testGetAllowedEmptyTags() throws PolicyException { String allowedEmptyTagsSection = "\n" + @@ -136,13 +129,13 @@ public void testGetAllowedEmptyTags_NoSection() throws PolicyException { @Test public void testInvalidPolicies() { - // Default is to now enforce schema validation on policy files. + // Starting with v1.7.0, schema validation is always enforced on policy files. // These tests verify various schema violations are detected and flagged. String notSupportedTagsSection = "\n" + "\n"; String policyFile = assembleFile(notSupportedTagsSection); try { policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - fail("No PolicyException thrown for with schema validation enabled."); + fail("No PolicyException thrown for ."); } catch (PolicyException e) { assertNotNull(e); } @@ -151,7 +144,7 @@ public void testInvalidPolicies() { policyFile = assembleFile(duplicatedTagsSection); try { policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - fail("No PolicyException thrown when duplicated and schema validation enabled."); + fail("No PolicyException thrown when duplicated."); } catch (PolicyException e) { assertNotNull(e); } @@ -159,71 +152,17 @@ public void testInvalidPolicies() { policyFile = assembleFile("").replace("", "").replace("", ""); try { policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - fail("No PolicyException thrown when missing and schema validation enabled."); + fail("No PolicyException thrown when missing."); } catch (PolicyException e) { assertNotNull(e); } } - // Test various Policy schema validation static initializer settings: - - @Test - public void testPolicyStaticInitializerTrue() throws Exception { - System.setProperty(Policy.VALIDATIONPROPERTY, "True"); - reloadSchemaValidation(); - assertTrue("AntiSamy XSD Validation should be enabled", Policy.getSchemaValidation()); - } - - @Test - public void testPolicyStaticInitializerFalse() throws Exception { - System.setProperty(Policy.VALIDATIONPROPERTY, "False"); - reloadSchemaValidation(); - assertFalse("AntiSamy XSD Validation should be disabled", Policy.getSchemaValidation()); - } - - @Test - public void testPolicyStaticInitializerBlank() throws Exception { - System.clearProperty(Policy.VALIDATIONPROPERTY); - reloadSchemaValidation(); - assertTrue("AntiSamy XSD Validation should be enabled", Policy.getSchemaValidation()); - } - - @Test - public void testPolicyStaticInitializerJunk() throws Exception { - System.setProperty(Policy.VALIDATIONPROPERTY, "junk"); - reloadSchemaValidation(); - assertFalse("AntiSamy XSD Validation should be disabled", Policy.getSchemaValidation()); - } - - @Test public void testSchemaValidationToggleWithSource() { String notSupportedTagsSection = "\n" + "\n"; String policyFile = assembleFile(notSupportedTagsSection); - // Disable validation - Policy.setSchemaValidation(false); - - try { - System.out.println("TESTING: A schema invalid WARNING should mention the invalid tag: "); - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - assertNotNull(policy); - } catch (PolicyException e) { - fail("Policy creation should not fail when schema validation is disabled."); - } - - // This one should only print a warning on the console because validation is disabled - try { - System.out.println("TESTING: A WARNING should mention that schema validation should not be disabled."); - policy = Policy.getInstance(new ByteArrayInputStream(assembleFile("").getBytes())); - assertNotNull(policy); - } catch (PolicyException e) { - fail("Policy creation should not fail when schema validation is disabled."); - } - - // Enable validation again - Policy.setSchemaValidation(true); - try { policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); fail("Not supported tag on policy, but no PolicyException occurred."); @@ -233,63 +172,26 @@ public void testSchemaValidationToggleWithSource() { } @Test - public void testSchemaValidationToggleWithUrl() { + public void testSchemaValidationWithUrl() { URL urlOfValidPolicy = getClass().getResource("/antisamy.xml"); URL urlOfInvalidPolicy = getClass().getResource("/invalidPolicy.xml"); - // Disable validation - Policy.setSchemaValidation(false); - - try { - System.out.println("TESTING: A schema invalid WARNING should follow:"); - policy = TestPolicy.getInstance(urlOfInvalidPolicy); - assertNotNull(policy); - } catch (PolicyException e) { - fail("Policy creation should not fail for invalid policy when schema validation disabled."); - } - - // This one should only print a warning on the console because validation is disabled - try { - System.out.println("TESTING: A WARNING should mention that schema validation should not be disabled."); - policy = TestPolicy.getInstance(urlOfValidPolicy); - assertNotNull(policy); - } catch (PolicyException e) { - fail("Policy creation should not fail for valid policy when schema validation disabled."); - } - - // Enable validation again - Policy.setSchemaValidation(true); - try { policy = TestPolicy.getInstance(urlOfInvalidPolicy); - fail("PolicyException not thrown for policy w/invalid schema and schema validation enabled."); + fail("PolicyException not thrown for policy w/invalid schema."); } catch (PolicyException e) { assertNotNull(e); } } @Test - public void testSchemaValidationToggleWithInclude() { + public void testSchemaValidationWithInclude() { // This policy will also include invalidPolicy.xml URL url = getClass().getResource("/emptyPolicyWithInclude.xml"); - // Disable validation - Policy.setSchemaValidation(false); - - try { - System.out.println("TESTING: A schema invalid WARNING should follow:"); - policy = TestPolicy.getInstance(url); - assertNotNull(policy); - } catch (PolicyException e) { - fail("Policy creation should not fail for invalid policy when schema validation disabled."); - } - - // Enable validation again - Policy.setSchemaValidation(true); - try { policy = TestPolicy.getInstance(url); - fail("PolicyException not thrown for policy w/invalid schema and schema validation enabled."); + fail("PolicyException not thrown for policy w/invalid schema."); } catch (PolicyException e) { assertNotNull(e); } From 17d9bf3077ceddac0b9ecffa3e997befbfbffc88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sat, 19 Feb 2022 12:18:02 -0300 Subject: [PATCH 08/36] Add explicit write permission for SL Scan worflow --- .github/workflows/shiftleft-analysis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/shiftleft-analysis.yml b/.github/workflows/shiftleft-analysis.yml index 7c34a8e0..2198335b 100644 --- a/.github/workflows/shiftleft-analysis.yml +++ b/.github/workflows/shiftleft-analysis.yml @@ -12,6 +12,8 @@ jobs: Scan-Build: # Scan runs on ubuntu, mac and windows runs-on: ubuntu-latest + permissions: + security-events: write steps: - uses: actions/checkout@v1 # Instructions From 32c7838224456cc51474142699045c4c3c83bddb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= <30912689+spassarop@users.noreply.github.com> Date: Sat, 19 Feb 2022 12:19:29 -0300 Subject: [PATCH 09/36] Fix indentation in shiftleft-analysis.yml --- .github/workflows/shiftleft-analysis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/shiftleft-analysis.yml b/.github/workflows/shiftleft-analysis.yml index 2198335b..32b0d714 100644 --- a/.github/workflows/shiftleft-analysis.yml +++ b/.github/workflows/shiftleft-analysis.yml @@ -12,7 +12,7 @@ jobs: Scan-Build: # Scan runs on ubuntu, mac and windows runs-on: ubuntu-latest - permissions: + permissions: security-events: write steps: - uses: actions/checkout@v1 From a0f3a20e59380a0a51e51ac260e3d49e210f2fad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Thu, 3 Mar 2022 20:33:32 -0300 Subject: [PATCH 10/36] Add require-closing-tags to default schema With tests to verify it is accepted and parsed in the policy. --- src/main/resources/antisamy.xsd | 5 +- .../owasp/validator/html/test/PolicyTest.java | 77 +++++++++++++++++-- 2 files changed, 75 insertions(+), 7 deletions(-) diff --git a/src/main/resources/antisamy.xsd b/src/main/resources/antisamy.xsd index 40d9502b..6d82fd23 100644 --- a/src/main/resources/antisamy.xsd +++ b/src/main/resources/antisamy.xsd @@ -13,7 +13,8 @@ - + + @@ -65,7 +66,7 @@ - + diff --git a/src/test/java/org/owasp/validator/html/test/PolicyTest.java b/src/test/java/org/owasp/validator/html/test/PolicyTest.java index b7f3a06a..83a1cebf 100644 --- a/src/test/java/org/owasp/validator/html/test/PolicyTest.java +++ b/src/test/java/org/owasp/validator/html/test/PolicyTest.java @@ -70,9 +70,9 @@ public class PolicyTest { private static final String FOOTER = ""; // Returns a valid policy file with the specified allowedEmptyTags - private String assembleFile(String allowedEmptyTagsSection) { + private String assembleFile(String finalTagsSection) { return HEADER + DIRECTIVES + COMMON_REGEXPS + COMMON_ATTRIBUTES + GLOBAL_TAG_ATTRIBUTES + DYNAMIC_TAG_ATTRIBUTES + TAG_RULES + CSS_RULES + - allowedEmptyTagsSection + FOOTER; + finalTagsSection + FOOTER; } @Test @@ -119,14 +119,64 @@ public void testGetAllowedEmptyTags_emptySection() throws PolicyException { @Test public void testGetAllowedEmptyTags_NoSection() throws PolicyException { String allowedEmptyTagsSection = ""; - String policyFile = assembleFile(allowedEmptyTagsSection); policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); assertTrue(policy.getAllowedEmptyTags().size() == Constants.defaultAllowedEmptyTags.size()); } - + + @Test + public void testGetRequireClosingTags() throws PolicyException { + String requireClosingTagsSection = "\n" + + " \n" + + " \n" + + " \n" + + " \n" + + "\n"; + String policyFile = assembleFile(requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + TagMatcher actualTags = policy.getRequiresClosingTags(); + + assertTrue(actualTags.matches("td")); + assertTrue(actualTags.matches("span")); + } + + @Test + public void testGetRequireClosingTags_emptyList() throws PolicyException { + String requireClosingTagsSection = "\n" + + " \n" + + " \n" + + "\n"; + String policyFile = assembleFile(requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + assertEquals(0, policy.getRequiresClosingTags().size()); + } + + @Test + public void testGetRequireClosingTags_emptySection() throws PolicyException { + String requireClosingTagsSection = "\n" + "\n"; + String policyFile = assembleFile(requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + assertEquals(0, policy.getRequiresClosingTags().size()); + } + + @Test + public void testGetRequireClosingTags_NoSection() throws PolicyException { + String requireClosingTagsSection = ""; + String policyFile = assembleFile(requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + assertTrue(policy.getRequiresClosingTags().size() == Constants.defaultRequireClosingTags.size()); + } + @Test public void testInvalidPolicies() { // Starting with v1.7.0, schema validation is always enforced on policy files. @@ -173,7 +223,6 @@ public void testSchemaValidationToggleWithSource() { @Test public void testSchemaValidationWithUrl() { - URL urlOfValidPolicy = getClass().getResource("/antisamy.xml"); URL urlOfInvalidPolicy = getClass().getResource("/invalidPolicy.xml"); try { @@ -197,6 +246,24 @@ public void testSchemaValidationWithInclude() { } } + @Test + public void testSchemaValidationWithOptionallyDefinedTags() throws PolicyException { + String allowedEmptyTagsSection = "\n" + + " \n" + + " \n" + + " \n" + + "\n"; + String requireClosingTagsSection = "\n" + + " \n" + + " \n" + + " \n" + + "\n"; + String policyFile = assembleFile(allowedEmptyTagsSection + requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + // If it reaches this point, it passed schema validation, which is what we want. + } + @Test public void testGithubIssue66() { // Concern is that LSEP characters are not being considered on .* pattern From a2a004d52279a2d17d81d7e9a82ae66a14ad0529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sat, 12 Mar 2022 21:08:23 -0300 Subject: [PATCH 11/36] Use errors list passed to CssHandler constructor --- .../java/org/owasp/validator/css/CssHandler.java | 2 +- .../owasp/validator/html/test/AntiSamyTest.java | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/owasp/validator/css/CssHandler.java b/src/main/java/org/owasp/validator/css/CssHandler.java index 8341516d..6a669daf 100644 --- a/src/main/java/org/owasp/validator/css/CssHandler.java +++ b/src/main/java/org/owasp/validator/css/CssHandler.java @@ -144,7 +144,7 @@ public CssHandler(Policy policy, List errorMessages, ResourceBundle mess * the tag name associated with this inline style */ public CssHandler(Policy policy, List errorMessages, ResourceBundle messages, String tagName) { - this(policy, null, new ArrayList(), tagName, messages); + this(policy, null, errorMessages, tagName, messages); } /** diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index cca9d8d2..c9472169 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1688,5 +1688,19 @@ public void testScansWithDifferentPolicyLoading() throws ScanException, PolicyEx // An error is expected. Pass. } } + + @Test + public void testGithubIssue151() throws ScanException, PolicyException { + // Concern is error messages when parsing stylesheets are no longer returned in AntiSamy 1.6.5 + String input = ""; + + CleanResults result = as.scan(input, policy, AntiSamy.DOM); + assertThat(result.getErrorMessages().size(), is(1)); + assertThat(result.getCleanHTML(), both(containsString("img")).and(not(containsString("CURSOR")))); + + result = as.scan(input, policy, AntiSamy.SAX); + assertThat(result.getErrorMessages().size(), is(1)); + assertThat(result.getCleanHTML(), both(containsString("img")).and(not(containsString("CURSOR")))); + } } From f837cc2556f5f0b2e7a486c0aa5fc39036445e8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= <30912689+spassarop@users.noreply.github.com> Date: Sun, 13 Mar 2022 13:59:32 -0300 Subject: [PATCH 12/36] Edit copyright date on AntiSamyTest.java --- src/test/java/org/owasp/validator/html/test/AntiSamyTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index c9472169..f74c9f20 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2021, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * From 2f0df8581de997270fe4efe7e3cc853a04659ef9 Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Mon, 14 Mar 2022 21:04:58 -0400 Subject: [PATCH 13/36] Upgrade spotbugs. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 40645b71..3e5583f9 100644 --- a/pom.xml +++ b/pom.xml @@ -47,7 +47,7 @@ 2.11.0 1.7.36 4.5.3.0 - 4.5.3 + 4.6.0 From c642a76d0ba45bd6cc6259f4d3f40c043b67d7e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sun, 27 Mar 2022 11:20:58 -0300 Subject: [PATCH 14/36] Update 1.7.0 --- .../owasp/validator/html/test/AntiSamyTest.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index cca9d8d2..49fcd79b 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1688,5 +1688,22 @@ public void testScansWithDifferentPolicyLoading() throws ScanException, PolicyEx // An error is expected. Pass. } } +<<<<<<< Updated upstream +======= + + @Test + public void testGithubIssue151() throws ScanException, PolicyException { + // Concern is error messages when parsing stylesheets are no longer returned in AntiSamy 1.6.5 + String input = ""; + + CleanResults result = as.scan(input, policy, AntiSamy.DOM); + assertThat(result.getErrorMessages().size(), is(1)); + assertThat(result.getCleanHTML(), both(containsString("img")).and(not(containsString("CURSOR")))); + + result = as.scan(input, policy, AntiSamy.SAX); + assertThat(result.getErrorMessages().size(), is(1)); + assertThat(result.getCleanHTML(), both(containsString("img")).and(not(containsString("CURSOR")))); + } +>>>>>>> Stashed changes } From 0b77106350c731c31ae68f8b834a7927baab6225 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sun, 27 Mar 2022 11:21:27 -0300 Subject: [PATCH 15/36] Change neko html dependecy --- pom.xml | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/pom.xml b/pom.xml index 40645b71..0f1fbd88 100644 --- a/pom.xml +++ b/pom.xml @@ -61,16 +61,9 @@ - net.sourceforge.nekohtml - nekohtml - 1.9.22 - - - - xerces - xercesImpl - - + net.sourceforge.htmlunit + neko-htmlunit + 2.60.0 org.apache.httpcomponents From fc3658c1d002752f25489d7dbdd6ab9aacc91b2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sun, 27 Mar 2022 11:35:47 -0300 Subject: [PATCH 16/36] Update neko html imports and tests --- .../org/owasp/validator/html/scan/AntiSamyDOMScanner.java | 4 ++-- .../org/owasp/validator/html/scan/AntiSamySAXScanner.java | 4 ++-- .../java/org/owasp/validator/html/scan/MagicSAXFilter.java | 4 ++-- .../java/org/owasp/validator/html/test/AntiSamyTest.java | 6 ++++-- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java index cac79688..976c1931 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2021, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -25,7 +25,7 @@ import org.apache.batik.css.parser.ParseException; import org.apache.xerces.dom.DocumentImpl; -import org.cyberneko.html.parsers.DOMFragmentParser; +import net.sourceforge.htmlunit.cyberneko.parsers.DOMFragmentParser; import org.owasp.validator.css.CssScanner; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.Policy; diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java index f66bade1..87d10d8f 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2021, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -41,7 +41,7 @@ import javax.xml.transform.sax.SAXSource; import org.apache.xerces.xni.parser.XMLDocumentFilter; -import org.cyberneko.html.parsers.SAXParser; +import net.sourceforge.htmlunit.cyberneko.parsers.SAXParser; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.Policy; import org.owasp.validator.html.ScanException; diff --git a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java index 819b3b6a..a163dcc3 100644 --- a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java +++ b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2021, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -36,7 +36,7 @@ import org.apache.xerces.xni.XMLString; import org.apache.xerces.xni.XNIException; import org.apache.xerces.xni.parser.XMLDocumentFilter; -import org.cyberneko.html.filters.DefaultFilter; +import net.sourceforge.htmlunit.cyberneko.filters.DefaultFilter; import org.owasp.validator.css.CssScanner; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.InternalPolicy; diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index d941dac1..8b71b722 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1511,11 +1511,13 @@ public void entityReferenceEncodedInHtmlAttribute() throws ScanException, Policy // Concern is that "&" is not being encoded and "#00058" was not being interpreted as ":" // so the validations based on regexp passed and a browser would load "&:" together. // All this when not using the XHTML serializer. + + // UPDATE: Using a new HTML parser library starts decoding entities like #00058 Policy revised = policy.cloneWithDirective("useXHTML","false"); assertThat(as.scan("

xss

", revised, AntiSamy.DOM).getCleanHTML(), - containsString("javascript&#00058")); + not(containsString("javascript"))); assertThat(as.scan("

xss

", revised, AntiSamy.SAX).getCleanHTML(), - containsString("javascript&#00058")); + not(containsString("javascript"))); } @Test From 857202e75330abb9242ff321c9b9ec7da0f3bfd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sun, 27 Mar 2022 11:04:31 -0300 Subject: [PATCH 17/36] Support multiple children handling on style tags --- .../html/scan/AntiSamyDOMScanner.java | 28 +++++++++++++------ .../validator/html/test/AntiSamyTest.java | 14 ++++++++++ .../owasp/validator/html/test/TestPolicy.java | 2 +- 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java index 976c1931..3cf63f6a 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java @@ -407,10 +407,17 @@ private boolean processStyleTag(Element ele, Node parentNode) { CssScanner styleScanner = new CssScanner(policy, messages, policy.isEmbedStyleSheets()); try { - Node firstChild = ele.getFirstChild(); - if (firstChild != null) { + if (ele.getChildNodes().getLength() > 0) { + String toScan = ""; + + for (int i = 0; i < ele.getChildNodes().getLength(); i++) { + Node childNode = ele.getChildNodes().item(i); + if (!toScan.isEmpty()){ + toScan += "\n"; + } + toScan += childNode.getTextContent(); + } - String toScan = firstChild.getNodeValue(); CleanResults cr = styleScanner.scanStyleSheet(toScan, policy.getMaxInputSize()); errorMessages.addAll(cr.getErrorMessages()); @@ -422,12 +429,17 @@ private boolean processStyleTag(Element ele, Node parentNode) { * break all CSS. To prevent that, we have this check. */ - final String cleanHTML = cr.getCleanHTML(); + String cleanHTML = cr.getCleanHTML(); + cleanHTML = cleanHTML == null || cleanHTML.equals("") ? "/* */" : cleanHTML; - if (cleanHTML == null || cleanHTML.equals("")) { - firstChild.setNodeValue("/* */"); - } else { - firstChild.setNodeValue(cleanHTML); + ele.getFirstChild().setNodeValue(cleanHTML); + /* + * Remove every other node after cleaning CSS, there will + * be only one node in the end, as it always should have. + */ + for (int i = 1; i < ele.getChildNodes().getLength(); i++) { + Node childNode = ele.getChildNodes().item(i); + ele.removeChild(childNode); } } diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index 8b71b722..51e8a0ed 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1704,5 +1704,19 @@ public void testGithubIssue151() throws ScanException, PolicyException { assertThat(result.getErrorMessages().size(), is(1)); assertThat(result.getCleanHTML(), both(containsString("img")).and(not(containsString("CURSOR")))); } + + @Test + public void testSmuggledTagsInStyleContent() throws ScanException, PolicyException { + // HTML tags may be smuggled into a style tag after parsing input to an internal representation. + // If that happens, they should be treated as text content and not as children nodes. + + Policy revised = policy.cloneWithDirective(Policy.USE_XHTML,"true"); + assertThat(as.scan("test", revised, AntiSamy.DOM).getCleanHTML(), not(containsString("javascript"))); + assertThat(as.scan("test", revised, AntiSamy.SAX).getCleanHTML(), not(containsString("javascript"))); + + Policy revised2 = policy.cloneWithDirective(Policy.USE_XHTML,"false"); + assertThat(as.scan("Walert(1)", revised2, AntiSamy.DOM).getCleanHTML(), not(containsString("script"))); + assertThat(as.scan("Walert(1)", revised2, AntiSamy.SAX).getCleanHTML(), not(containsString("script"))); + } } diff --git a/src/test/java/org/owasp/validator/html/test/TestPolicy.java b/src/test/java/org/owasp/validator/html/test/TestPolicy.java index 4b71ebbb..435175bc 100644 --- a/src/test/java/org/owasp/validator/html/test/TestPolicy.java +++ b/src/test/java/org/owasp/validator/html/test/TestPolicy.java @@ -43,7 +43,7 @@ */ public class TestPolicy extends InternalPolicy { - protected TestPolicy(Policy.ParseContext parseContext) throws PolicyException { + protected TestPolicy(Policy.ParseContext parseContext) { super(parseContext); } From 6ed9b29e6e0c0ba7d7a17361e2495da9545cb444 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sun, 27 Mar 2022 11:49:07 -0300 Subject: [PATCH 18/36] Add malformed PI test --- .../org/owasp/validator/html/test/AntiSamyTest.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index 51e8a0ed..f6015e31 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1718,5 +1718,16 @@ public void testSmuggledTagsInStyleContent() throws ScanException, PolicyExcepti assertThat(as.scan("Walert(1)", revised2, AntiSamy.DOM).getCleanHTML(), not(containsString("script"))); assertThat(as.scan("Walert(1)", revised2, AntiSamy.SAX).getCleanHTML(), not(containsString("script"))); } -} + @Test(timeout = 3000) + public void testMalformedPIScan() { + // Certain malformed input including a malformed processing instruction may lead the parser to an internal memory error. + + try { + as.scan(" Date: Sun, 27 Mar 2022 11:49:56 -0300 Subject: [PATCH 19/36] Update last test --- .../java/org/owasp/validator/html/test/AntiSamyTest.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index f6015e31..29109d4f 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1722,12 +1722,13 @@ public void testSmuggledTagsInStyleContent() throws ScanException, PolicyExcepti @Test(timeout = 3000) public void testMalformedPIScan() { // Certain malformed input including a malformed processing instruction may lead the parser to an internal memory error. - try { as.scan(" Date: Thu, 31 Mar 2022 21:15:13 -0300 Subject: [PATCH 20/36] Upgrade 2x Apache httpcomponent libraries Used from their old 4.x versions to the latest 5.x versions. Required updating imports and rewriting a bit of code in the CssScanner class. --- pom.xml | 44 ++++------- .../org/owasp/validator/css/CssScanner.java | 76 ++++++++++++------- 2 files changed, 63 insertions(+), 57 deletions(-) diff --git a/pom.xml b/pom.xml index 1fc4c8d5..7a92752b 100644 --- a/pom.xml +++ b/pom.xml @@ -46,7 +46,7 @@ true 2.11.0 1.7.36 - 4.5.3.0 + 4.6.0.0 4.6.0 @@ -66,30 +66,14 @@ 2.60.0
- org.apache.httpcomponents - httpclient - 4.5.13 - - - - commons-codec - commons-codec - - - - commons-logging - commons-logging - - - org.apache.httpcomponents - httpcore - - + org.apache.httpcomponents.client5 + httpclient5 + 5.1.3 - org.apache.httpcomponents - httpcore - 4.4.15 + org.apache.httpcomponents.core5 + httpcore5 + 5.1.3 org.apache.xmlgraphics @@ -198,7 +182,7 @@ org.apache.maven.plugins maven-dependency-plugin - 3.2.0 + 3.3.0 commons-io:commons-io @@ -233,7 +217,7 @@ org.apache.maven.plugins maven-compiler-plugin - 3.10.0 + 3.10.1 1.8 1.8 @@ -342,7 +326,7 @@ org.apache.maven.plugins maven-site-plugin - 3.10.0 + 3.11.0 org.apache.maven.plugins @@ -424,7 +408,7 @@ Delete this entire build plugin block in 3.16+ as maven-pmd-plugin is only needed in the reporting block. --> org.apache.maven.plugins maven-pmd-plugin - 3.15.0 + 3.16.0 Date: Sat, 23 Apr 2022 12:40:56 -0300 Subject: [PATCH 23/36] GitHub actions update from main --- .github/workflows/codeql-analysis.yml | 3 +-- .github/workflows/maven.yml | 4 ++-- .github/workflows/shiftleft-analysis.yml | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index d8064ffc..760ae80b 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -37,7 +37,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. @@ -76,4 +76,3 @@ jobs: - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v1 - diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 0d95a437..e86f04e5 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -10,9 +10,9 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up JDK 1.8 - uses: actions/setup-java@v2 + uses: actions/setup-java@v3 with: java-version: 8 distribution: zulu diff --git a/.github/workflows/shiftleft-analysis.yml b/.github/workflows/shiftleft-analysis.yml index 32b0d714..1d0c785e 100644 --- a/.github/workflows/shiftleft-analysis.yml +++ b/.github/workflows/shiftleft-analysis.yml @@ -15,7 +15,7 @@ jobs: permissions: security-events: write steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v3 # Instructions # 1. Setup JDK, Node.js, Python etc depending on your project type # 2. Compile or build the project before invoking scan From 925acf49e58e426e45d46344a744648b00350524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sat, 23 Apr 2022 12:48:19 -0300 Subject: [PATCH 24/36] Update pom.xml from main --- pom.xml | 172 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 99 insertions(+), 73 deletions(-) diff --git a/pom.xml b/pom.xml index 7a92752b..0f627f2f 100644 --- a/pom.xml +++ b/pom.xml @@ -41,9 +41,11 @@ + true UTF-8 2022-02-30T14:33:00Z - true + 1.8 + 1.12.0 2.11.0 1.7.36 4.6.0.0 @@ -63,12 +65,19 @@ net.sourceforge.htmlunit neko-htmlunit - 2.60.0 + 2.61.0 org.apache.httpcomponents.client5 httpclient5 5.1.3 + + + + org.slf4j + slf4j-api + + org.apache.httpcomponents.core5 @@ -194,7 +203,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.3.2 + 3.4.0 true @@ -212,18 +221,21 @@ org.apache.maven.plugins maven-clean-plugin - 3.1.0 + 3.2.0 org.apache.maven.plugins maven-compiler-plugin 3.10.1 - 1.8 - 1.8 - 1.8 - 1.8 - -Xlint:unchecked + ${project.java.target} + ${project.java.target} + ${project.java.target} + ${project.java.target} + + + -Xlint:unchecked + @@ -241,6 +253,11 @@ extra-enforcer-rules 1.5.1 + + org.codehaus.mojo + animal-sniffer-enforcer-rule + 1.21 + @@ -267,8 +284,8 @@ - 1.8 - Antisamy is written to support Java 8+. + ${project.java.target} + Antisamy source code is written to support Java 8+. @@ -318,6 +335,21 @@ + + org.apache.maven.plugins + maven-jxr-plugin + 3.2.0 + + + org.apache.maven.plugins + maven-pmd-plugin + 3.16.0 + + + org.apache.maven.plugins + maven-project-info-reports-plugin + 3.2.2 + org.apache.maven.plugins maven-resources-plugin @@ -326,7 +358,7 @@ org.apache.maven.plugins maven-site-plugin - 3.11.0 + 3.12.0 org.apache.maven.plugins @@ -343,7 +375,12 @@ org.apache.maven.plugins maven-surefire-plugin - 3.0.0-M5 + 3.0.0-M6 + + + org.codehaus.mojo + versions-maven-plugin + 2.10.0 org.cyclonedx @@ -359,7 +396,7 @@ org.jacoco jacoco-maven-plugin - 0.8.7 + 0.8.8 prepare-agent @@ -404,74 +441,55 @@
- - org.apache.maven.plugins - maven-pmd-plugin - 3.16.0 - - - - org.ow2.asm - asm - 9.2 - - + com.h3xstream.findsecbugs + findsecbugs-plugin + ${version.findsecbugs} + + + - org.codehaus.mojo - versions-maven-plugin - 2.10.0 - - - - dependency-updates-report - plugin-updates-report - property-updates-report - - - + com.github.spotbugs + spotbugs-maven-plugin + + + + com.h3xstream.findsecbugs + findsecbugs-plugin + ${version.findsecbugs} + + + Max + false + src/test/spotbugsFilterFile.xml + - - org.jacoco - jacoco-maven-plugin - - - report - - + org.apache.maven.plugins + maven-javadoc-plugin org.apache.maven.plugins - maven-javadoc-plugin + maven-jxr-plugin org.apache.maven.plugins maven-pmd-plugin - 1.8 + ${project.java.target} utf-8 - - org.apache.maven.plugins - maven-jxr-plugin - 3.2.0 - org.apache.maven.plugins maven-project-info-reports-plugin - 3.2.2 @@ -485,20 +503,28 @@
- com.github.spotbugs - spotbugs-maven-plugin - - - - com.h3xstream.findsecbugs - findsecbugs-plugin - 1.11.0 - - - Max - false - src/test/spotbugsFilterFile.xml - + org.codehaus.mojo + versions-maven-plugin + + + + dependency-updates-report + plugin-updates-report + property-updates-report + + + + + + + org.jacoco + jacoco-maven-plugin + + + report + + From 1416ce94bc2f3c22974843650d80365d144b924f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sat, 7 May 2022 22:23:23 -0300 Subject: [PATCH 25/36] Remove XHTML based logic --- .../org/owasp/validator/css/CssScanner.java | 4 +- .../html/scan/ASXHTMLSerializer.java | 82 ------------------- .../html/scan/AbstractAntiSamyScanner.java | 5 -- .../html/scan/AntiSamySAXScanner.java | 3 +- 4 files changed, 3 insertions(+), 91 deletions(-) delete mode 100644 src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java diff --git a/src/main/java/org/owasp/validator/css/CssScanner.java b/src/main/java/org/owasp/validator/css/CssScanner.java index 5b537e5a..d6dd5e5b 100644 --- a/src/main/java/org/owasp/validator/css/CssScanner.java +++ b/src/main/java/org/owasp/validator/css/CssScanner.java @@ -184,8 +184,8 @@ public CleanResults scanStyleSheet(String taintedCss, int sizeLimit) throws Scan String cleaned = getCleanStylesheetWithImports(sizeLimit, errorMessages, handler); - if ( isCdata && !policy.isUseXhtml()) { - cleaned = ""; + if (isCdata) { + cleaned = ""; } return new CleanResults(startOfScan, cleaned, null, errorMessages); diff --git a/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java b/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java deleted file mode 100644 index 85cf18f4..00000000 --- a/src/main/java/org/owasp/validator/html/scan/ASXHTMLSerializer.java +++ /dev/null @@ -1,82 +0,0 @@ -package org.owasp.validator.html.scan; - -import org.apache.xml.serialize.ElementState; -import org.apache.xml.serialize.OutputFormat; -import org.owasp.validator.html.InternalPolicy; -import org.owasp.validator.html.TagMatcher; - -import java.io.IOException; -import java.io.Writer; -import java.util.Locale; - -/** - * This is an extension of the default XHTMLSerializer class that's had it's endElementIO() - * method tweaked to serialize closing tags and self-closing tags the way we require. - */ -@SuppressWarnings("deprecation") -public class ASXHTMLSerializer extends org.apache.xml.serialize.XHTMLSerializer { - - private boolean encodeAllPossibleEntities; - private final TagMatcher allowedEmptyTags; - private final TagMatcher requireClosingTags; - - public ASXHTMLSerializer(Writer w, OutputFormat format, InternalPolicy policy) { - super(w, format); - this.allowedEmptyTags = policy.getAllowedEmptyTags(); - this.requireClosingTags = policy.getRequiresClosingTags(); - this.encodeAllPossibleEntities = policy.isEntityEncodeIntlCharacters(); - } - - protected String getEntityRef(int charToPrint) { - if(encodeAllPossibleEntities || Constants.big5CharsToEncode.indexOf(charToPrint) != -1) - return super.getEntityRef(charToPrint); - return null; - } - - public void endElementIO(String namespaceURI, String localName, - String rawName) throws IOException { - - ElementState state; - - // Works much like content() with additions for closing - // an element. Note the different checks for the closed - // element's state and the parent element's state. - _printer.unindent(); - state = getElementState(); - - if (state.empty && isAllowedEmptyTag(rawName) && !requiresClosingTag(rawName)) { // - _printer.printText(" />"); - } else { - if(state.empty) - _printer.printText('>'); - // Must leave CData section first - if (state.inCData) - _printer.printText("]]>"); - // XHTML: element names are lower case, DOM will be different - _printer.printText("'); - } - - // Leave the element state and update that of the parent - // (if we're not root) to not empty and after element. - state = leaveElementState(); - // Temporary hack to prevent line breaks inside A/TD - if (rawName == null - || (!rawName.equalsIgnoreCase("A") && !rawName - .equalsIgnoreCase("TD"))) - - state.afterElement = true; - state.empty = false; - if (isDocumentState()) - _printer.flush(); - } - - private boolean requiresClosingTag(String tagName) { - return requireClosingTags.matches(tagName); - } - - private boolean isAllowedEmptyTag(String tagName) { - return "head".equals(tagName) || allowedEmptyTags.matches( tagName); - } -} diff --git a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java index ae257e9d..457a613d 100644 --- a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java @@ -96,11 +96,6 @@ protected OutputFormat getOutputFormat() { } protected org.apache.xml.serialize.HTMLSerializer getHTMLSerializer(Writer w, OutputFormat format) { - - if (policy.isUseXhtml()) { - return new ASXHTMLSerializer(w, format, policy); - } - return new ASHTMLSerializer(w, format, policy); } diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java index 87d10d8f..da4aba58 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamySAXScanner.java @@ -164,12 +164,11 @@ public CleanResults scan(Reader reader, Writer writer) throws ScanException { final Transformer transformer = cachedItem.transformer; boolean formatOutput = policy.isFormatOutput(); - boolean useXhtml = policy.isUseXhtml(); boolean omitXml = policy.isOmitXmlDeclaration(); transformer.setOutputProperty(OutputKeys.INDENT, formatOutput ? "yes" : "no"); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, omitXml ? "yes" : "no"); - transformer.setOutputProperty(OutputKeys.METHOD, useXhtml ? "xml" : "html"); + transformer.setOutputProperty(OutputKeys.METHOD, "html"); //noinspection deprecation final org.apache.xml.serialize.OutputFormat format = getOutputFormat(); From 254a8431e7553be84e9d16895b3b32b97dde9197 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sat, 7 May 2022 22:24:28 -0300 Subject: [PATCH 26/36] Avoid duplicate attributes on SAX parser --- .../validator/html/scan/MagicSAXFilter.java | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java index a163dcc3..fe0bcf6e 100644 --- a/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java +++ b/src/main/java/org/owasp/validator/html/scan/MagicSAXFilter.java @@ -312,18 +312,18 @@ public void startElement(QName element, XMLAttributes attributes, Augmentations } else if (attribute != null) { // validate the values against the policy boolean isValid = false; - if (attribute.containsAllowedValue(value.toLowerCase())) { - validattributes.addAttribute(makeSimpleQname(name), "CDATA", value); + if (attribute.containsAllowedValue(value.toLowerCase()) + || attribute.matchesAllowedExpression(value)) { + int attrIndex; + if ((attrIndex = validattributes.getIndex(name)) > 0) { + // If attribute is repeated, use last value. + validattributes.setValue(attrIndex, value); + } else { + validattributes.addAttribute(makeSimpleQname(name), "CDATA", value); + } isValid = true; } - if (!isValid) { - isValid = attribute.matchesAllowedExpression(value); - if (isValid) { - validattributes.addAttribute(makeSimpleQname(name), "CDATA", value); - } - } - // if value or regexp matched, attribute is already // copied, but what happens if not if (!isValid && "removeTag".equals(attribute.getOnInvalid())) { @@ -386,7 +386,12 @@ public void startElement(QName element, XMLAttributes attributes, Augmentations } String relValue = Attribute.mergeRelValuesInAnchor(addNofollow, addNoopenerAndNoreferrer, currentRelValue); if (!relValue.isEmpty()){ - validattributes.addAttribute(makeSimpleQname("rel"), "CDATA", relValue); + int relIndex; + if ((relIndex = validattributes.getIndex("rel")) > 0) { + validattributes.setValue(relIndex, relValue); + } else { + validattributes.addAttribute(makeSimpleQname("rel"), "CDATA", relValue); + } } } From 815a945503c9f4689fb27a747f24788f9407a8a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebasti=C3=A1n=20Passaro?= Date: Sat, 7 May 2022 22:25:16 -0300 Subject: [PATCH 27/36] Remove useXHTML usage from policies --- src/main/java/org/owasp/validator/html/InternalPolicy.java | 7 ------- src/main/java/org/owasp/validator/html/Policy.java | 1 - src/main/java/org/owasp/validator/html/scan/Constants.java | 1 - src/main/resources/antisamy-anythinggoes.xml | 1 - src/main/resources/antisamy-ebay.xml | 1 - src/main/resources/antisamy-myspace.xml | 1 - src/main/resources/antisamy-slashdot.xml | 1 - src/main/resources/antisamy-tinymce.xml | 1 - src/main/resources/antisamy.xml | 1 - 9 files changed, 15 deletions(-) diff --git a/src/main/java/org/owasp/validator/html/InternalPolicy.java b/src/main/java/org/owasp/validator/html/InternalPolicy.java index cf5e4051..cd2807cf 100644 --- a/src/main/java/org/owasp/validator/html/InternalPolicy.java +++ b/src/main/java/org/owasp/validator/html/InternalPolicy.java @@ -22,7 +22,6 @@ public class InternalPolicy extends Policy { private final boolean omitXmlDeclaration; private final boolean omitDoctypeDeclaration; private final boolean entityEncodeIntlCharacters; - private final boolean useXhtml; private final Tag embedTag; private final Tag styleTag; private final String onUnknownTag; @@ -43,7 +42,6 @@ protected InternalPolicy(ParseContext parseContext) { this.omitXmlDeclaration = isTrue(Policy.OMIT_XML_DECLARATION); this.omitDoctypeDeclaration = isTrue(Policy.OMIT_DOCTYPE_DECLARATION); this.entityEncodeIntlCharacters = isTrue(Policy.ENTITY_ENCODE_INTL_CHARS); - this.useXhtml = isTrue(Policy.USE_XHTML); this.embedTag = getTagByLowercaseName("embed"); this.onUnknownTag = getDirective("onUnknownTag"); this.isEncodeUnknownTag = "encode".equals(onUnknownTag); @@ -69,7 +67,6 @@ protected InternalPolicy(Policy old, Map directives, Map defaultAllowedEmptyTags; public static final List defaultRequireClosingTags; diff --git a/src/main/resources/antisamy-anythinggoes.xml b/src/main/resources/antisamy-anythinggoes.xml index 1ec5350f..da6548e1 100644 --- a/src/main/resources/antisamy-anythinggoes.xml +++ b/src/main/resources/antisamy-anythinggoes.xml @@ -14,7 +14,6 @@ http://www.w3.org/TR/html401/struct/global.html - UTF-8 - 2022-02-30T14:33:00Z + 2022-06-26T19:54:30Z 1.8 1.12.0 2.11.0 1.7.36 - 4.6.0.0 - 4.6.0 + 4.7.0.0 + 4.7.0 @@ -65,7 +65,7 @@ net.sourceforge.htmlunit neko-htmlunit - 2.61.0 + 2.62.0 org.apache.httpcomponents.client5 @@ -212,7 +212,7 @@ org.apache.maven.plugins maven-release-plugin - 3.0.0-M5 + 3.0.0-M6 @@ -246,7 +246,7 @@ org.apache.maven.plugins maven-enforcer-plugin - 3.0.0 + 3.1.0 org.codehaus.mojo @@ -343,12 +343,12 @@ org.apache.maven.plugins maven-pmd-plugin - 3.16.0 + 3.17.0 org.apache.maven.plugins maven-project-info-reports-plugin - 3.2.2 + 3.3.0 org.apache.maven.plugins @@ -358,7 +358,7 @@ org.apache.maven.plugins maven-site-plugin - 3.12.0 + 4.0.0-M1 org.apache.maven.plugins @@ -375,17 +375,17 @@ org.apache.maven.plugins maven-surefire-plugin - 3.0.0-M6 + 3.0.0-M7 org.codehaus.mojo versions-maven-plugin - 2.10.0 + 2.11.0 org.cyclonedx cyclonedx-maven-plugin - 2.5.3 + 2.7.0 package @@ -445,7 +445,6 @@ findsecbugs-plugin ${version.findsecbugs} - @@ -470,6 +469,7 @@ src/test/spotbugsFilterFile.xml + org.apache.maven.plugins maven-javadoc-plugin @@ -502,6 +502,7 @@ false + org.codehaus.mojo versions-maven-plugin From 873979213a8984d48325d7b5735a51ea09ba7ba6 Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Sat, 9 Jul 2022 15:19:05 -0400 Subject: [PATCH 31/36] Remove a deprecated CssHandler constructor to address 1 task in issue #195. Most of the deprecated items listed in this issue have already been addressed by @spassarop. --- .../org/owasp/validator/css/CssHandler.java | 31 ++----------------- .../java/org/owasp/validator/html/Policy.java | 10 +++--- .../org/owasp/validator/html/TagMatcher.java | 4 +-- .../validator/html/test/AntiSamyTest.java | 2 -- 4 files changed, 10 insertions(+), 37 deletions(-) diff --git a/src/main/java/org/owasp/validator/css/CssHandler.java b/src/main/java/org/owasp/validator/css/CssHandler.java index 6a669daf..18543611 100644 --- a/src/main/java/org/owasp/validator/css/CssHandler.java +++ b/src/main/java/org/owasp/validator/css/CssHandler.java @@ -127,12 +127,12 @@ public class CssHandler implements DocumentHandler { * the error message bundle to pull from */ public CssHandler(Policy policy, List errorMessages, ResourceBundle messages) { - this(policy, null, errorMessages, null, messages); + this(policy, errorMessages, messages, null); } /** * Constructs a handler for stylesheets using the given policy. The List of embedded stylesheets - * produced by this constructor is now available via the getImportedStylesheetsURIList() method. + * produced by this constructor is available via the getImportedStylesheetsURIList() method. * * @param policy * the policy to use @@ -144,31 +144,6 @@ public CssHandler(Policy policy, List errorMessages, ResourceBundle mess * the tag name associated with this inline style */ public CssHandler(Policy policy, List errorMessages, ResourceBundle messages, String tagName) { - this(policy, null, errorMessages, tagName, messages); - } - - /** - * Constructs a handler for inline style declarations using the given policy - * and queue for imported stylesheets. - * - * @param policy - * the policy to use - * @param embeddedStyleSheets - * the queue of stylesheets imported - * @param errorMessages - * the List of error messages to add error messages too if there are errors - * @param tagName - * the tag name associated with this inline style - * @param messages - * the error message bundle to pull from - * - * @deprecated The embeddedStyleSheets List parameter is removed in the newer version of - * this constructor as the handler has its own internal list that can be accessed through - * the getImportedStylesheetsURIList() method. - */ - @Deprecated - public CssHandler(Policy policy, LinkedList embeddedStyleSheets, - List errorMessages, String tagName, ResourceBundle messages) { assert policy instanceof InternalPolicy : policy.getClass(); this.policy = (InternalPolicy) policy; this.errorMessages = errorMessages; @@ -176,7 +151,7 @@ public CssHandler(Policy policy, LinkedList embeddedStyleSheets, this.validator = new CssValidator(policy); // Create a queue of all style sheets that need to be validated to // account for any sheets that may be imported by the current CSS - this.importedStyleSheets = (embeddedStyleSheets != null ? embeddedStyleSheets : new LinkedList()); + this.importedStyleSheets = new LinkedList(); this.tagName = tagName; this.isInline = (tagName != null); } diff --git a/src/main/java/org/owasp/validator/html/Policy.java b/src/main/java/org/owasp/validator/html/Policy.java index 79e4628a..cba081e9 100644 --- a/src/main/java/org/owasp/validator/html/Policy.java +++ b/src/main/java/org/owasp/validator/html/Policy.java @@ -386,6 +386,7 @@ private static InputStream toByteArrayStream(InputStream in) throws PolicyExcept private static Element getDocumentElementFromSource(InputSource source, boolean schemaValidationEnabled) throws ParserConfigurationException, SAXException, IOException { + // FIXME: remove boolean schemaValidationEnabled from this API and refactor all callers. DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); @@ -397,11 +398,10 @@ private static Element getDocumentElementFromSource(InputSource source, boolean dbf.setFeature(DISALLOW_DOCTYPE_DECL, true); dbf.setFeature(LOAD_EXTERNAL_DTD, false); - if (schemaValidationEnabled) { - getPolicySchema(); - dbf.setNamespaceAware(true); - dbf.setSchema(schema); - } + // Schema validation is always required now. So turn it on. + getPolicySchema(); + dbf.setNamespaceAware(true); + dbf.setSchema(schema); DocumentBuilder db = dbf.newDocumentBuilder(); db.setErrorHandler(new SAXErrorHandler()); diff --git a/src/main/java/org/owasp/validator/html/TagMatcher.java b/src/main/java/org/owasp/validator/html/TagMatcher.java index 3f55cab2..0695efc6 100644 --- a/src/main/java/org/owasp/validator/html/TagMatcher.java +++ b/src/main/java/org/owasp/validator/html/TagMatcher.java @@ -43,9 +43,9 @@ public TagMatcher(Iterable allowedValues) { /** * Examines if this tag matches the values in this matcher. * - * Please note that this is case-insensitive, which is ok for html and xhtml, but not really for xml + * Please note that this is case-insensitive, which is OK for HTML, but not really for XML * @param tagName The tag name to look for - * @return true if the tag name matches this mach + * @return true if the tag name matches this matcher */ public boolean matches(String tagName) { return allowedLowercase.contains(tagName.toLowerCase()); diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java index ed6008f6..9a6d19cf 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyTest.java @@ -1502,10 +1502,8 @@ public void testGithubIssue81() throws ScanException, PolicyException { public void entityReferenceEncodedInHtmlAttribute() throws ScanException, PolicyException { // Concern is that "&" is not being encoded and "#00058" was not being interpreted as ":" // so the validations based on regexp passed and a browser would load "&:" together. - // All this when not using the XHTML serializer. // UPDATE: Using a new HTML parser library starts decoding entities like #00058 - // UPDATE 2: XHTML is no longer used assertThat(as.scan("

xss

", policy, AntiSamy.DOM).getCleanHTML(), not(containsString("javascript"))); assertThat(as.scan("

xss

", policy, AntiSamy.SAX).getCleanHTML(), From b85dd706c886269fb9e1c7c909b380e1ae5ae2fd Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Sat, 9 Jul 2022 16:15:44 -0400 Subject: [PATCH 32/36] Update the copyright date on all files not yet marked 2022. Eliminate one more deprecated item. Update the pom to match the changes made in the main branch. --- LICENSE | 2 +- pom.xml | 8 +-- .../org/owasp/validator/css/CssScanner.java | 2 +- .../org/owasp/validator/css/CssValidator.java | 2 +- .../css/UnknownSelectorException.java | 4 +- .../org/owasp/validator/html/AntiSamy.java | 5 +- .../owasp/validator/html/CleanResults.java | 5 +- .../owasp/validator/html/InternalPolicy.java | 4 +- .../java/org/owasp/validator/html/Policy.java | 5 +- .../owasp/validator/html/PolicyException.java | 2 +- .../owasp/validator/html/ScanException.java | 2 +- .../validator/html/model/AntiSamyPattern.java | 2 +- .../owasp/validator/html/model/Attribute.java | 15 +++-- .../owasp/validator/html/model/Property.java | 2 +- .../org/owasp/validator/html/model/Tag.java | 2 +- .../html/scan/AbstractAntiSamyScanner.java | 2 +- .../html/scan/AntiSamyDOMScanner.java | 2 +- .../owasp/validator/html/scan/Constants.java | 2 +- .../validator/html/util/ErrorMessageUtil.java | 2 +- .../html/util/HTMLEntityEncoder.java | 2 +- .../owasp/validator/html/util/URIUtils.java | 2 +- .../owasp/validator/html/util/XMLUtil.java | 2 +- src/main/resources/antisamy.xml | 55 +++++++++---------- .../html/test/AntiSamyPerformanceTest.java | 2 +- .../validator/html/test/LiteralTest.java | 2 +- 25 files changed, 68 insertions(+), 67 deletions(-) diff --git a/LICENSE b/LICENSE index 4e8f246f..b6d9437f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2007-2020, Arshan Dabirsiaghi, Jason Li +Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li All rights reserved. diff --git a/pom.xml b/pom.xml index dec28722..8dacb3eb 100644 --- a/pom.xml +++ b/pom.xml @@ -48,8 +48,8 @@ 1.12.0 2.11.0 1.7.36 - 4.7.0.0 - 4.7.0 + 4.7.1.0 + 4.7.1 @@ -186,7 +186,7 @@ org.apache.maven.plugins maven-assembly-plugin - 3.3.0 + 3.4.1 org.apache.maven.plugins @@ -358,7 +358,7 @@ org.apache.maven.plugins maven-site-plugin - 4.0.0-M1 + 4.0.0-M2 org.apache.maven.plugins diff --git a/src/main/java/org/owasp/validator/css/CssScanner.java b/src/main/java/org/owasp/validator/css/CssScanner.java index 90543d73..0f0e67ec 100644 --- a/src/main/java/org/owasp/validator/css/CssScanner.java +++ b/src/main/java/org/owasp/validator/css/CssScanner.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2021, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/java/org/owasp/validator/css/CssValidator.java b/src/main/java/org/owasp/validator/css/CssValidator.java index 8c92a500..f07bc31c 100644 --- a/src/main/java/org/owasp/validator/css/CssValidator.java +++ b/src/main/java/org/owasp/validator/css/CssValidator.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/java/org/owasp/validator/css/UnknownSelectorException.java b/src/main/java/org/owasp/validator/css/UnknownSelectorException.java index e127c726..31b7f640 100644 --- a/src/main/java/org/owasp/validator/css/UnknownSelectorException.java +++ b/src/main/java/org/owasp/validator/css/UnknownSelectorException.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -24,12 +24,10 @@ package org.owasp.validator.css; /** - * * This exception gets thrown when there is an unrecognized Selector type parsing * the tainted CSS. * * @author Jason Li - * */ import org.owasp.validator.html.ScanException; diff --git a/src/main/java/org/owasp/validator/html/AntiSamy.java b/src/main/java/org/owasp/validator/html/AntiSamy.java index d9c2fd95..b1803308 100644 --- a/src/main/java/org/owasp/validator/html/AntiSamy.java +++ b/src/main/java/org/owasp/validator/html/AntiSamy.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -32,12 +32,11 @@ import java.io.Writer; /** - * * This is the only class from which the outside world should be calling. The * scan() method holds the meat and potatoes of AntiSamy. The file * contains a number of ways for scan()'ing depending on the * accessibility of the policy file. - * + * * @author Arshan Dabirsiaghi */ diff --git a/src/main/java/org/owasp/validator/html/CleanResults.java b/src/main/java/org/owasp/validator/html/CleanResults.java index ffa0a381..57498013 100644 --- a/src/main/java/org/owasp/validator/html/CleanResults.java +++ b/src/main/java/org/owasp/validator/html/CleanResults.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2021, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * @@ -98,7 +98,8 @@ public CleanResults(long startOfScan, Callable cleanHTML, /** * Return the DOM version of the clean HTML. - * return The XML Document fragment version of the clean HTML produced during the sanitization process. + * + * @return The XML Document fragment version of the clean HTML produced during the sanitization process. * This may be null, even if the clean HTML String is not null. */ public DocumentFragment getCleanXMLDocumentFragment() { diff --git a/src/main/java/org/owasp/validator/html/InternalPolicy.java b/src/main/java/org/owasp/validator/html/InternalPolicy.java index cd2807cf..96a9efc3 100644 --- a/src/main/java/org/owasp/validator/html/InternalPolicy.java +++ b/src/main/java/org/owasp/validator/html/InternalPolicy.java @@ -52,7 +52,7 @@ protected InternalPolicy(ParseContext parseContext) { if (!isNoopenerAndNoreferrerAnchors) { logger.warn("The directive \"" + Policy.ANCHORS_NOOPENER_NOREFERRER + - "\" is not enabled by default. It is recommended to enable it to prevent reverse tabnabbing attacks."); + "\" is enabled by default, but disabled in this policy. It is recommended to leave it enabled to prevent reverse tabnabbing attacks."); } } @@ -77,7 +77,7 @@ protected InternalPolicy(Policy old, Map directives, Map getResetSource) throws PolicyException { // Track whether an exception was ever thrown while processing policy file try { - return getDocumentElementFromSource(source, true); + return getDocumentElementFromSource(source); } catch (SAXException | ParserConfigurationException | IOException e) { throw new PolicyException(e); } @@ -384,9 +384,8 @@ private static InputStream toByteArrayStream(InputStream in) throws PolicyExcept return new ByteArrayInputStream(byteArray); } - private static Element getDocumentElementFromSource(InputSource source, boolean schemaValidationEnabled) + private static Element getDocumentElementFromSource(InputSource source) throws ParserConfigurationException, SAXException, IOException { - // FIXME: remove boolean schemaValidationEnabled from this API and refactor all callers. DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); diff --git a/src/main/java/org/owasp/validator/html/PolicyException.java b/src/main/java/org/owasp/validator/html/PolicyException.java index 64087626..be840913 100644 --- a/src/main/java/org/owasp/validator/html/PolicyException.java +++ b/src/main/java/org/owasp/validator/html/PolicyException.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/java/org/owasp/validator/html/ScanException.java b/src/main/java/org/owasp/validator/html/ScanException.java index 47afe396..71e7ce6d 100644 --- a/src/main/java/org/owasp/validator/html/ScanException.java +++ b/src/main/java/org/owasp/validator/html/ScanException.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/java/org/owasp/validator/html/model/AntiSamyPattern.java b/src/main/java/org/owasp/validator/html/model/AntiSamyPattern.java index 48677af9..7a5d9bd8 100644 --- a/src/main/java/org/owasp/validator/html/model/AntiSamyPattern.java +++ b/src/main/java/org/owasp/validator/html/model/AntiSamyPattern.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/java/org/owasp/validator/html/model/Attribute.java b/src/main/java/org/owasp/validator/html/model/Attribute.java index 03818a91..cfd70d1f 100644 --- a/src/main/java/org/owasp/validator/html/model/Attribute.java +++ b/src/main/java/org/owasp/validator/html/model/Attribute.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013, Arshan Dabirsiaghi, Jason Li, Kristian Rosenvold + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li, Kristian Rosenvold * * All rights reserved. * @@ -24,20 +24,25 @@ package org.owasp.validator.html.model; -import java.util.*; -import java.util.regex.Pattern; - import static org.owasp.validator.html.model.Tag.ANY_NORMAL_WHITESPACES; import static org.owasp.validator.html.model.Tag.ATTRIBUTE_DIVIDER; import static org.owasp.validator.html.model.Tag.CLOSE_ATTRIBUTE; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.regex.Pattern; + /** * A model for HTML attributes and the "rules" they must follow (either literals or regular expressions) in * order to be considered valid. * * @author Arshan Dabirsiaghi * @author Kristian Rosenvold - * */ public class Attribute { diff --git a/src/main/java/org/owasp/validator/html/model/Property.java b/src/main/java/org/owasp/validator/html/model/Property.java index 757d92d4..1f752b32 100644 --- a/src/main/java/org/owasp/validator/html/model/Property.java +++ b/src/main/java/org/owasp/validator/html/model/Property.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2021, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/java/org/owasp/validator/html/model/Tag.java b/src/main/java/org/owasp/validator/html/model/Tag.java index 4c76accf..e6643a17 100644 --- a/src/main/java/org/owasp/validator/html/model/Tag.java +++ b/src/main/java/org/owasp/validator/html/model/Tag.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java index 457a613d..cd81e5a3 100644 --- a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2021, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java index a6c2c652..6c25ad4c 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java @@ -115,7 +115,7 @@ public AntiSamyDOMScanner() throws PolicyException { public CleanResults scan(String html) throws ScanException { if (html == null) { - throw new ScanException(new NullPointerException("Null html input")); + throw new ScanException(new NullPointerException("Null HTML input")); } errorMessages.clear(); diff --git a/src/main/java/org/owasp/validator/html/scan/Constants.java b/src/main/java/org/owasp/validator/html/scan/Constants.java index f84d164b..9a975fc5 100644 --- a/src/main/java/org/owasp/validator/html/scan/Constants.java +++ b/src/main/java/org/owasp/validator/html/scan/Constants.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2020, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/java/org/owasp/validator/html/util/ErrorMessageUtil.java b/src/main/java/org/owasp/validator/html/util/ErrorMessageUtil.java index e3ad5054..bd5afeb2 100644 --- a/src/main/java/org/owasp/validator/html/util/ErrorMessageUtil.java +++ b/src/main/java/org/owasp/validator/html/util/ErrorMessageUtil.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/java/org/owasp/validator/html/util/HTMLEntityEncoder.java b/src/main/java/org/owasp/validator/html/util/HTMLEntityEncoder.java index 8526e1a8..beddae9b 100644 --- a/src/main/java/org/owasp/validator/html/util/HTMLEntityEncoder.java +++ b/src/main/java/org/owasp/validator/html/util/HTMLEntityEncoder.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/java/org/owasp/validator/html/util/URIUtils.java b/src/main/java/org/owasp/validator/html/util/URIUtils.java index 12bb9bc2..14140134 100644 --- a/src/main/java/org/owasp/validator/html/util/URIUtils.java +++ b/src/main/java/org/owasp/validator/html/util/URIUtils.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2020, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/java/org/owasp/validator/html/util/XMLUtil.java b/src/main/java/org/owasp/validator/html/util/XMLUtil.java index 8db5b47b..901256fd 100644 --- a/src/main/java/org/owasp/validator/html/util/XMLUtil.java +++ b/src/main/java/org/owasp/validator/html/util/XMLUtil.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/main/resources/antisamy.xml b/src/main/resources/antisamy.xml index 72742c93..80b5ef14 100644 --- a/src/main/resources/antisamy.xml +++ b/src/main/resources/antisamy.xml @@ -5,8 +5,8 @@ W3C rules retrieved from: http://www.w3.org/TR/html401/struct/global.html --> - - + + @@ -17,8 +17,8 @@ http://www.w3.org/TR/html401/struct/global.html - - + + - + - + - + - + @@ -72,32 +72,32 @@ http://www.w3.org/TR/html401/struct/global.html - + - + - + - + - + - + - + - + - + @@ -111,28 +111,28 @@ http://www.w3.org/TR/html401/struct/global.html - + - + - + - + - - + - + - + diff --git a/src/test/java/org/owasp/validator/html/test/AntiSamyPerformanceTest.java b/src/test/java/org/owasp/validator/html/test/AntiSamyPerformanceTest.java index e2adaedb..af13761f 100644 --- a/src/test/java/org/owasp/validator/html/test/AntiSamyPerformanceTest.java +++ b/src/test/java/org/owasp/validator/html/test/AntiSamyPerformanceTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2019, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * diff --git a/src/test/java/org/owasp/validator/html/test/LiteralTest.java b/src/test/java/org/owasp/validator/html/test/LiteralTest.java index 217b42ab..94d674a6 100644 --- a/src/test/java/org/owasp/validator/html/test/LiteralTest.java +++ b/src/test/java/org/owasp/validator/html/test/LiteralTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2020, Arshan Dabirsiaghi, Jason Li + * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li * * All rights reserved. * From 8eb24879c10e8db754638ed303b976d18ca6d4b7 Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Sat, 9 Jul 2022 17:48:07 -0400 Subject: [PATCH 33/36] Add spotless apply maven plugin to configuration. It will now reformat everything automatically every time mvn compile is run. And reformat everything so pretty much every file has had its format changed some. --- DevStyleXml.prefs | 5 + SECURITY.md | 4 +- pom.xml | 432 +- .../org/owasp/validator/css/CssHandler.java | 951 ++- .../org/owasp/validator/css/CssScanner.java | 538 +- .../org/owasp/validator/css/CssValidator.java | 674 +- .../css/UnknownSelectorException.java | 37 +- .../org/owasp/validator/html/AntiSamy.java | 286 +- .../owasp/validator/html/CleanResults.java | 258 +- .../owasp/validator/html/InternalPolicy.java | 317 +- .../java/org/owasp/validator/html/Policy.java | 1843 +++--- .../owasp/validator/html/PolicyException.java | 34 +- .../owasp/validator/html/ScanException.java | 36 +- .../org/owasp/validator/html/TagMatcher.java | 37 +- .../validator/html/model/AntiSamyPattern.java | 56 +- .../owasp/validator/html/model/Attribute.java | 298 +- .../owasp/validator/html/model/Property.java | 101 +- .../org/owasp/validator/html/model/Tag.java | 220 +- .../validator/html/scan/ASHTMLSerializer.java | 130 +- .../html/scan/AbstractAntiSamyScanner.java | 117 +- .../html/scan/AntiSamyDOMScanner.java | 1345 ++-- .../html/scan/AntiSamySAXScanner.java | 316 +- .../owasp/validator/html/scan/Constants.java | 107 +- .../validator/html/scan/MagicSAXFilter.java | 793 +-- .../validator/html/util/ErrorMessageUtil.java | 113 +- .../html/util/HTMLEntityEncoder.java | 79 +- .../owasp/validator/html/util/URIUtils.java | 347 +- .../owasp/validator/html/util/XMLUtil.java | 350 +- src/main/resources/antisamy-anythinggoes.xml | 5094 ++++++++------- src/main/resources/antisamy-ebay.xml | 4796 ++++++++------- src/main/resources/antisamy-myspace.xml | 5106 ++++++++------- src/main/resources/antisamy-slashdot.xml | 376 +- src/main/resources/antisamy-tinymce.xml | 413 +- src/main/resources/antisamy.xml | 5460 ++++++++--------- .../owasp/validator/css/CssScannerTest.java | 155 +- .../test/AntiSamyMalformedPolicyTest.java | 46 +- .../html/test/AntiSamyPerformanceTest.java | 153 +- .../validator/html/test/AntiSamyTest.java | 4003 +++++++----- .../html/test/ESAPIInvalidPolicyTest.java | 75 +- .../validator/html/test/LiteralTest.java | 124 +- .../owasp/validator/html/test/PolicyTest.java | 566 +- .../owasp/validator/html/test/TestPolicy.java | 95 +- 42 files changed, 18679 insertions(+), 17607 deletions(-) create mode 100644 DevStyleXml.prefs diff --git a/DevStyleXml.prefs b/DevStyleXml.prefs new file mode 100644 index 00000000..5805e757 --- /dev/null +++ b/DevStyleXml.prefs @@ -0,0 +1,5 @@ +eclipse.preferences.version=1 +indentationChar=space +indentationSize=4 +lineWidth=140 +formatCommentJoinLines=true diff --git a/SECURITY.md b/SECURITY.md index 7b7dc0a2..e37a0537 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -7,8 +7,8 @@ GitHut issues list (for both open and closed issues) to see if it has already be If it has not, then please contact Dave Wichers (dave.wichers at owasp.org) _directly_. Please do **not** report any suspected vulnerabilities via GitHub issues -as we wish to keep our users secure while a patch is implemented and deployed. -This is because if this is reported as a GitHub issue, it more or less is equivalent +as we wish to keep our users secure while a patch is implemented and deployed. +This is because if this is reported as a GitHub issue, it more or less is equivalent to dropping a 0-day on all applications using AntiSamy. Instead, we encourage responsible disclosure. diff --git a/pom.xml b/pom.xml index 8dacb3eb..2e2734b6 100644 --- a/pom.xml +++ b/pom.xml @@ -1,4 +1,5 @@ - + 4.0.0 org.owasp.antisamy antisamy @@ -20,20 +21,20 @@ https://github.com/nahsra/antisamy - - nahsra - Arshan Dabirsiaghi - arshan.dabirsiaghi@gmail.com - + + nahsra + Arshan Dabirsiaghi + arshan.dabirsiaghi@gmail.com + - - BSD 3 - https://opensource.org/licenses/BSD-3-Clause - + + BSD 3 + https://opensource.org/licenses/BSD-3-Clause + - + scm:git:git@github.com:nahsra/antisamy.git scm:git:git@github.com:nahsra/antisamy.git @@ -53,12 +54,12 @@ - - release - - false - - + + release + + false + + @@ -124,8 +125,8 @@ ${version.slf4j} test - + xerces xercesImpl @@ -183,15 +184,15 @@ - - org.apache.maven.plugins - maven-assembly-plugin - 3.4.1 - - - org.apache.maven.plugins - maven-dependency-plugin - 3.3.0 + + org.apache.maven.plugins + maven-assembly-plugin + 3.4.1 + + + org.apache.maven.plugins + maven-dependency-plugin + 3.3.0 commons-io:commons-io @@ -199,21 +200,21 @@ org.slf4j:slf4j-simple - - - org.apache.maven.plugins - maven-javadoc-plugin - 3.4.0 - - - true - - - - org.apache.maven.plugins - maven-release-plugin - 3.0.0-M6 - + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.4.0 + + + true + + + + org.apache.maven.plugins + maven-release-plugin + 3.0.0-M6 + @@ -262,7 +263,9 @@ enforce-bytecode-version - enforce + + enforce + @@ -280,7 +283,9 @@ enforce-jdk-version - enforce + + enforce + @@ -300,7 +305,9 @@ sign-artifacts verify - sign + + sign + @@ -325,30 +332,32 @@ - org.apache.maven.plugins - maven-javadoc-plugin - - - attach-javadocs - package - jar - - + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + package + + jar + + + org.apache.maven.plugins maven-jxr-plugin - 3.2.0 + 3.2.0 - org.apache.maven.plugins - maven-pmd-plugin - 3.17.0 + org.apache.maven.plugins + maven-pmd-plugin + 3.17.0 - org.apache.maven.plugins - maven-project-info-reports-plugin - 3.3.0 + org.apache.maven.plugins + maven-project-info-reports-plugin + 3.3.0 org.apache.maven.plugins @@ -361,16 +370,18 @@ 4.0.0-M2 - org.apache.maven.plugins - maven-source-plugin - 3.2.1 - - - attach-sources - package - jar-no-fork - - + org.apache.maven.plugins + maven-source-plugin + 3.2.1 + + + attach-sources + package + + jar-no-fork + + + org.apache.maven.plugins @@ -387,10 +398,12 @@ cyclonedx-maven-plugin 2.7.0 - - package - makeBom - + + package + + makeBom + + @@ -400,44 +413,123 @@ prepare-agent - prepare-agent + + prepare-agent + report test - report + + report + jacoco-check test - check + + check + - false - - CLASS - - METHOD - MISSEDCOUNT - 0 - - + false + + + CLASS + + + METHOD + MISSEDCOUNT + 0 + + + + + + com.diffplug.spotless + spotless-maven-plugin + 2.22.8 + + + origin/main + + + + + + *.md + + + target/**/*.* + + + + + + false + true + 4 + + + + + + **/*.xml + + + target/**/*.* + + + XML + + DevStyleXml.prefs + + + + + + + + + + + + + + 1.7 + + com.google.googlejavaformat:google-java-format + + + + + + + spotless-apply + compile + + apply + + + + + com.github.spotbugs spotbugs-maven-plugin ${version.spotbugs.maven} - - - com.github.spotbugs - spotbugs - ${version.spotbugs} - + + + com.github.spotbugs + spotbugs + ${version.spotbugs} + @@ -449,84 +541,84 @@ - + - - com.github.spotbugs - spotbugs-maven-plugin - - - - com.h3xstream.findsecbugs - findsecbugs-plugin - ${version.findsecbugs} - - - Max - false - src/test/spotbugsFilterFile.xml - - + + com.github.spotbugs + spotbugs-maven-plugin + + + + com.h3xstream.findsecbugs + findsecbugs-plugin + ${version.findsecbugs} + + + Max + false + src/test/spotbugsFilterFile.xml + + - - org.apache.maven.plugins - maven-javadoc-plugin - - - org.apache.maven.plugins - maven-jxr-plugin - - - org.apache.maven.plugins - maven-pmd-plugin - - ${project.java.target} - utf-8 - - - - - org.apache.maven.plugins - maven-project-info-reports-plugin - - - - index - dependency-convergence - - - - - false - - + + org.apache.maven.plugins + maven-javadoc-plugin + + + org.apache.maven.plugins + maven-jxr-plugin + + + org.apache.maven.plugins + maven-pmd-plugin + + ${project.java.target} + utf-8 + + + + + org.apache.maven.plugins + maven-project-info-reports-plugin + + + + index + dependency-convergence + + + + + false + + - - org.codehaus.mojo - versions-maven-plugin - - - - dependency-updates-report - plugin-updates-report - property-updates-report - - - - - - - org.jacoco - jacoco-maven-plugin - - - report - - - + + org.codehaus.mojo + versions-maven-plugin + + + + dependency-updates-report + plugin-updates-report + property-updates-report + + + + + + + org.jacoco + jacoco-maven-plugin + + + + report + + + + diff --git a/src/main/java/org/owasp/validator/css/CssHandler.java b/src/main/java/org/owasp/validator/css/CssHandler.java index 18543611..6704c54d 100644 --- a/src/main/java/org/owasp/validator/css/CssHandler.java +++ b/src/main/java/org/owasp/validator/css/CssHandler.java @@ -1,11 +1,11 @@ /* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li - * + * * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without + * + * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * - Redistributions of source code must retain the above copyright notice, + * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation @@ -35,13 +35,11 @@ import java.util.LinkedList; import java.util.List; import java.util.ResourceBundle; - import org.owasp.validator.html.InternalPolicy; import org.owasp.validator.html.Policy; import org.owasp.validator.html.ScanException; import org.owasp.validator.html.util.ErrorMessageUtil; import org.owasp.validator.html.util.HTMLEntityEncoder; - import org.w3c.css.sac.CSSException; import org.w3c.css.sac.DocumentHandler; import org.w3c.css.sac.InputSource; @@ -51,487 +49,472 @@ import org.w3c.css.sac.SelectorList; /** - * A implementation of a SAC DocumentHandler for CSS validation. The appropriate - * validation method is called whenever the handler is invoked by the parser. - * The handler also builds a clean CSS document as the original CSS is scanned. - * - * NOTE: keeping state in this class is not ideal as handler style parsing a la - * SAX should generally be event driven. However, there is not a fully - * implemented "DOM" equivalent to CSS at this time. Java has a StyleSheet class - * that could accomplish this "DOM" like behavior but it has yet to be fully - * implemented. - * + * A implementation of a SAC DocumentHandler for CSS validation. The appropriate validation method + * is called whenever the handler is invoked by the parser. The handler also builds a clean CSS + * document as the original CSS is scanned. + * + *

NOTE: keeping state in this class is not ideal as handler style parsing a la SAX should + * generally be event driven. However, there is not a fully implemented "DOM" equivalent to CSS at + * this time. Java has a StyleSheet class that could accomplish this "DOM" like behavior but it has + * yet to be fully implemented. + * * @see javax.swing.text.html.StyleSheet * @author Jason Li */ public class CssHandler implements DocumentHandler { - /** - * The style sheet as it is being built by the handler - */ - private StringBuffer styleSheet = new StringBuffer(); - - /** - * The validator to use when CSS constituents are encountered - */ - private final CssValidator validator; - - /** - * The policy file to use in validation - */ - private final InternalPolicy policy; - - /** - * The error messages - */ - private final Collection errorMessages; - - /** - * The error message bundle to pull from. - */ - private ResourceBundle messages; - - /** - * A queue of imported stylesheets; used to track imported stylesheets - */ - private final LinkedList importedStyleSheets; - - /** - * The tag currently being examined (if any); used for inline stylesheet - * error messages - */ - private final String tagName; - - /** - * Indicates whether we are scanning a stylesheet or an inline declaration. - * true if this is an inline declaration; false otherwise - */ - private final boolean isInline; - - /** - * Indicates whether the handler is currently parsing the contents between - * an open selector tag and an close selector tag - */ - private boolean selectorOpen = false; - - /** - * Constructs a handler for stylesheets using the given policy. The List of embedded stylesheets - * produced by this constructor is now available via the getImportedStylesheetsURIList() method. - * This constructor to be used when there is no tag name associated with this inline style. - * - * @param policy - * the policy to use - * @param errorMessages - * the List of error messages to add error messages too if there are errors - * @param messages - * the error message bundle to pull from - */ - public CssHandler(Policy policy, List errorMessages, ResourceBundle messages) { - this(policy, errorMessages, messages, null); - } - - /** - * Constructs a handler for stylesheets using the given policy. The List of embedded stylesheets - * produced by this constructor is available via the getImportedStylesheetsURIList() method. - * - * @param policy - * the policy to use - * @param errorMessages - * the List of error messages to add error messages too if there are errors - * @param messages - * the error message bundle to pull from - * @param tagName - * the tag name associated with this inline style - */ - public CssHandler(Policy policy, List errorMessages, ResourceBundle messages, String tagName) { - assert policy instanceof InternalPolicy : policy.getClass(); - this.policy = (InternalPolicy) policy; - this.errorMessages = errorMessages; - this.messages = messages; - this.validator = new CssValidator(policy); - // Create a queue of all style sheets that need to be validated to - // account for any sheets that may be imported by the current CSS - this.importedStyleSheets = new LinkedList(); - this.tagName = tagName; - this.isInline = (tagName != null); - } - - /** - * Returns the cleaned stylesheet. - * - * @return the cleaned stylesheet. - */ - public String getCleanStylesheet() { - // Always ensure results contain most recent generation of stylesheet - return styleSheet.toString(); - } - - /** - * Returns a list of imported stylesheets from the main parsed stylesheet. - * - * @return the import stylesheet URI list. - */ - public LinkedList getImportedStylesheetsURIList() { - return importedStyleSheets; - } - - /** - * Empties the stylesheet buffer. - */ - public void emptyStyleSheet() { - styleSheet.delete(0, styleSheet.length()); - } - - /** - * Returns the error messages generated during parsing. - * @return the error messages generated during parsing - */ - public Collection getErrorMessages() { - return new ArrayList(errorMessages); - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#comment(java.lang.String) - */ - public void comment(String text) throws CSSException { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_COMMENT_REMOVED, - new Object[] { HTMLEntityEncoder.htmlEntityEncode(text) })); - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#ignorableAtRule(java.lang.String) - */ - public void ignorableAtRule(String atRule) throws CSSException { - // this method is called when the parser hits an unrecognized - // @-rule. Like the page/media/font declarations, this is - // CSS2+ stuff - if (tagName != null) { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_CSS_TAG_RULE_NOTFOUND, - new Object[] { - HTMLEntityEncoder.htmlEntityEncode(tagName), - HTMLEntityEncoder.htmlEntityEncode(atRule) - })); - } else { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_STYLESHEET_RULE_NOTFOUND, - new Object[] { - HTMLEntityEncoder.htmlEntityEncode(atRule) - })); - } - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#importStyle(java.lang.String, - * org.w3c.css.sac.SACMediaList, java.lang.String) - */ - public void importStyle(String uri, SACMediaList media, - String defaultNamespaceURI) throws CSSException { - - if (!policy.isEmbedStyleSheets()) { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_CSS_IMPORT_DISABLED, - new Object[] {})); - return; - } - - try { - // check for non-nullness (validate after canonicalization) - if (uri == null) { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_CSS_IMPORT_URL_INVALID, - new Object[] {})); - return; - } - - URI importedStyleSheet = new URI(uri); - - // canonicalize the URI - importedStyleSheet.normalize(); - - // validate the URL - - if (!policy.getCommonRegularExpressions("offsiteURL").matches(importedStyleSheet.toString()) - && !policy.getCommonRegularExpressions("onsiteURL").matches(importedStyleSheet.toString())) { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_CSS_IMPORT_URL_INVALID, - new Object[] { HTMLEntityEncoder.htmlEntityEncode(uri) })); - return; - } - - if (!importedStyleSheet.isAbsolute()) { - // we have no concept of relative reference for free form - // text as an end user can't know where the corresponding - // free form will end up - if (tagName != null) { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_CSS_TAG_RELATIVE, - new Object[] { - HTMLEntityEncoder.htmlEntityEncode(tagName), - HTMLEntityEncoder.htmlEntityEncode(uri) })); - } else { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_STYLESHEET_RELATIVE, - new Object[] { HTMLEntityEncoder.htmlEntityEncode(uri) })); - } - return; - } - - importedStyleSheets.add(importedStyleSheet); - } catch (URISyntaxException use) { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_CSS_IMPORT_URL_INVALID, - new Object[] { HTMLEntityEncoder.htmlEntityEncode(uri) })); - } - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#namespaceDeclaration(java.lang.String, - * java.lang.String) - */ - public void namespaceDeclaration(String prefix, String uri) - throws CSSException { - // CSS3 - Namespace declaration - ignore for now - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#startDocument(org.w3c.css.sac.InputSource) - */ - public void startDocument(InputSource arg0) throws CSSException { - // no-op - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#endDocument(org.w3c.css.sac.InputSource) - */ - public void endDocument(InputSource source) throws CSSException { - // no-op - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#startFontFace() - */ - public void startFontFace() throws CSSException { - // CSS2 Font Face declaration - ignore this for now - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#endFontFace() - */ - public void endFontFace() throws CSSException { - // CSS2 Font Face declaration - ignore this for now - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#startMedia(org.w3c.css.sac.SACMediaList) - */ - public void startMedia(SACMediaList media) throws CSSException { - // CSS2 Media declaration - ignore this for now - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#endMedia(org.w3c.css.sac.SACMediaList) - */ - public void endMedia(SACMediaList media) throws CSSException { - // CSS2 Media declaration - ignore this for now - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#startPage(java.lang.String, - * java.lang.String) - */ - public void startPage(String name, String pseudoPage) throws CSSException { - // CSS2 Page declaration - ignore this for now - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#endPage(java.lang.String, - * java.lang.String) - */ - public void endPage(String name, String pseudoPage) throws CSSException { - // CSS2 Page declaration - ignore this for now - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#startSelector(org.w3c.css.sac.SelectorList) - */ - public void startSelector(SelectorList selectors) throws CSSException { - - // keep track of number of valid selectors from this rule - int selectorCount = 0; - - // check each selector from this rule - for (int i = 0; i < selectors.getLength(); i++) { - Selector selector = selectors.item(i); - - if (selector != null) { - String selectorName = selector.toString(); - - boolean isValidSelector = false; - - try { - isValidSelector = validator.isValidSelector(selectorName, - selector); - } catch (ScanException se) { - if (tagName != null) { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_CSS_TAG_SELECTOR_NOTFOUND, - new Object[] { - HTMLEntityEncoder.htmlEntityEncode(selector.toString()) - })); - } else { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_STYLESHEET_SELECTOR_NOTFOUND, - new Object[] { - HTMLEntityEncoder.htmlEntityEncode(tagName), - HTMLEntityEncoder.htmlEntityEncode(selector.toString()) - })); - } - } - - // if the selector is valid, add to list - if (isValidSelector) { - if (selectorCount > 0) { - styleSheet.append(','); - styleSheet.append(' '); - } - styleSheet.append(selectorName); - - selectorCount++; - - } else if (tagName != null) { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_CSS_TAG_SELECTOR_DISALLOWED, - new Object[] { - HTMLEntityEncoder.htmlEntityEncode(tagName), - HTMLEntityEncoder.htmlEntityEncode(selector.toString()) - })); - - } else { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_STYLESHEET_SELECTOR_DISALLOWED, - new Object[] { - HTMLEntityEncoder.htmlEntityEncode(selector.toString()) - })); - } - } - } - - // if and only if there were selectors that were valid, append - // appropriate open brace and set state to within selector - if (selectorCount > 0) { - styleSheet.append(' '); - styleSheet.append('{'); - styleSheet.append('\n'); - selectorOpen = true; - } - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#endSelector(org.w3c.css.sac.SelectorList) - */ - public void endSelector(SelectorList selectors) throws CSSException { - // if we are in a state within a selector, close brace - if (selectorOpen) { - styleSheet.append('}'); - styleSheet.append('\n'); - } - - // reset state - selectorOpen = false; - } - - /* - * (non-Javadoc) - * - * @see org.w3c.css.sac.DocumentHandler#property(java.lang.String, - * org.w3c.css.sac.LexicalUnit, boolean) - */ - public void property(String name, LexicalUnit value, boolean important) - throws CSSException { - // only bother validating and building if we are either inline or within - // a selector tag - - if (!selectorOpen && !isInline) { - return; - } - - // validate the property - if (validator.isValidProperty(name, value)) { - - if (!isInline) { styleSheet.append('\t'); } - styleSheet.append(name); - styleSheet.append(':'); - - // append all values - while (value != null) { - styleSheet.append(' '); - styleSheet.append(validator.lexicalValueToString(value)); - value = value.getNextLexicalUnit(); - } - if (important) { styleSheet.append(" !important"); } - styleSheet.append(';'); - if (!isInline) { styleSheet.append('\n'); } - - } else if (tagName != null) { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_CSS_TAG_PROPERTY_INVALID, - new Object[] { - HTMLEntityEncoder.htmlEntityEncode(tagName), - HTMLEntityEncoder.htmlEntityEncode(name), - HTMLEntityEncoder.htmlEntityEncode(validator - .lexicalValueToString(value)) })); - } else { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_STYLESHEET_PROPERTY_INVALID, - new Object[] { - HTMLEntityEncoder.htmlEntityEncode(name), - HTMLEntityEncoder.htmlEntityEncode(validator - .lexicalValueToString(value)) })); - } - } + /** The style sheet as it is being built by the handler */ + private StringBuffer styleSheet = new StringBuffer(); + + /** The validator to use when CSS constituents are encountered */ + private final CssValidator validator; + + /** The policy file to use in validation */ + private final InternalPolicy policy; + + /** The error messages */ + private final Collection errorMessages; + + /** The error message bundle to pull from. */ + private ResourceBundle messages; + + /** A queue of imported stylesheets; used to track imported stylesheets */ + private final LinkedList importedStyleSheets; + + /** The tag currently being examined (if any); used for inline stylesheet error messages */ + private final String tagName; + + /** + * Indicates whether we are scanning a stylesheet or an inline declaration. true if this is an + * inline declaration; false otherwise + */ + private final boolean isInline; + + /** + * Indicates whether the handler is currently parsing the contents between an open selector tag + * and an close selector tag + */ + private boolean selectorOpen = false; + + /** + * Constructs a handler for stylesheets using the given policy. The List of embedded stylesheets + * produced by this constructor is now available via the getImportedStylesheetsURIList() method. + * This constructor to be used when there is no tag name associated with this inline style. + * + * @param policy the policy to use + * @param errorMessages the List of error messages to add error messages too if there are errors + * @param messages the error message bundle to pull from + */ + public CssHandler(Policy policy, List errorMessages, ResourceBundle messages) { + this(policy, errorMessages, messages, null); + } + + /** + * Constructs a handler for stylesheets using the given policy. The List of embedded stylesheets + * produced by this constructor is available via the getImportedStylesheetsURIList() method. + * + * @param policy the policy to use + * @param errorMessages the List of error messages to add error messages too if there are errors + * @param messages the error message bundle to pull from + * @param tagName the tag name associated with this inline style + */ + public CssHandler( + Policy policy, List errorMessages, ResourceBundle messages, String tagName) { + assert policy instanceof InternalPolicy : policy.getClass(); + this.policy = (InternalPolicy) policy; + this.errorMessages = errorMessages; + this.messages = messages; + this.validator = new CssValidator(policy); + // Create a queue of all style sheets that need to be validated to + // account for any sheets that may be imported by the current CSS + this.importedStyleSheets = new LinkedList(); + this.tagName = tagName; + this.isInline = (tagName != null); + } + + /** + * Returns the cleaned stylesheet. + * + * @return the cleaned stylesheet. + */ + public String getCleanStylesheet() { + // Always ensure results contain most recent generation of stylesheet + return styleSheet.toString(); + } + + /** + * Returns a list of imported stylesheets from the main parsed stylesheet. + * + * @return the import stylesheet URI list. + */ + public LinkedList getImportedStylesheetsURIList() { + return importedStyleSheets; + } + + /** Empties the stylesheet buffer. */ + public void emptyStyleSheet() { + styleSheet.delete(0, styleSheet.length()); + } + + /** + * Returns the error messages generated during parsing. + * + * @return the error messages generated during parsing + */ + public Collection getErrorMessages() { + return new ArrayList(errorMessages); + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#comment(java.lang.String) + */ + public void comment(String text) throws CSSException { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_COMMENT_REMOVED, + new Object[] {HTMLEntityEncoder.htmlEntityEncode(text)})); + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#ignorableAtRule(java.lang.String) + */ + public void ignorableAtRule(String atRule) throws CSSException { + // this method is called when the parser hits an unrecognized @-rule. Like the page/media/font + // declarations, this is CSS2+ stuff + if (tagName != null) { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_CSS_TAG_RULE_NOTFOUND, + new Object[] { + HTMLEntityEncoder.htmlEntityEncode(tagName), + HTMLEntityEncoder.htmlEntityEncode(atRule) + })); + } else { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_STYLESHEET_RULE_NOTFOUND, + new Object[] {HTMLEntityEncoder.htmlEntityEncode(atRule)})); + } + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#importStyle(java.lang.String, + * org.w3c.css.sac.SACMediaList, java.lang.String) + */ + public void importStyle(String uri, SACMediaList media, String defaultNamespaceURI) + throws CSSException { + + if (!policy.isEmbedStyleSheets()) { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, ErrorMessageUtil.ERROR_CSS_IMPORT_DISABLED, new Object[] {})); + return; + } + + try { + // check for non-nullness (validate after canonicalization) + if (uri == null) { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, ErrorMessageUtil.ERROR_CSS_IMPORT_URL_INVALID, new Object[] {})); + return; + } + + URI importedStyleSheet = new URI(uri); + + // canonicalize the URI + importedStyleSheet.normalize(); + + // validate the URL + + if (!policy.getCommonRegularExpressions("offsiteURL").matches(importedStyleSheet.toString()) + && !policy + .getCommonRegularExpressions("onsiteURL") + .matches(importedStyleSheet.toString())) { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_CSS_IMPORT_URL_INVALID, + new Object[] {HTMLEntityEncoder.htmlEntityEncode(uri)})); + return; + } + + if (!importedStyleSheet.isAbsolute()) { + // we have no concept of relative reference for free form text as an end user can't know + // where the corresponding free form will end up + if (tagName != null) { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_CSS_TAG_RELATIVE, + new Object[] { + HTMLEntityEncoder.htmlEntityEncode(tagName), + HTMLEntityEncoder.htmlEntityEncode(uri) + })); + } else { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_STYLESHEET_RELATIVE, + new Object[] {HTMLEntityEncoder.htmlEntityEncode(uri)})); + } + return; + } + + importedStyleSheets.add(importedStyleSheet); + } catch (URISyntaxException use) { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_CSS_IMPORT_URL_INVALID, + new Object[] {HTMLEntityEncoder.htmlEntityEncode(uri)})); + } + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#namespaceDeclaration(java.lang.String, + * java.lang.String) + */ + public void namespaceDeclaration(String prefix, String uri) throws CSSException { + // CSS3 - Namespace declaration - ignore for now + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#startDocument(org.w3c.css.sac.InputSource) + */ + public void startDocument(InputSource arg0) throws CSSException { + // no-op + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#endDocument(org.w3c.css.sac.InputSource) + */ + public void endDocument(InputSource source) throws CSSException { + // no-op + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#startFontFace() + */ + public void startFontFace() throws CSSException { + // CSS2 Font Face declaration - ignore this for now + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#endFontFace() + */ + public void endFontFace() throws CSSException { + // CSS2 Font Face declaration - ignore this for now + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#startMedia(org.w3c.css.sac.SACMediaList) + */ + public void startMedia(SACMediaList media) throws CSSException { + // CSS2 Media declaration - ignore this for now + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#endMedia(org.w3c.css.sac.SACMediaList) + */ + public void endMedia(SACMediaList media) throws CSSException { + // CSS2 Media declaration - ignore this for now + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#startPage(java.lang.String, + * java.lang.String) + */ + public void startPage(String name, String pseudoPage) throws CSSException { + // CSS2 Page declaration - ignore this for now + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#endPage(java.lang.String, + * java.lang.String) + */ + public void endPage(String name, String pseudoPage) throws CSSException { + // CSS2 Page declaration - ignore this for now + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#startSelector(org.w3c.css.sac.SelectorList) + */ + public void startSelector(SelectorList selectors) throws CSSException { + + // keep track of number of valid selectors from this rule + int selectorCount = 0; + + // check each selector from this rule + for (int i = 0; i < selectors.getLength(); i++) { + Selector selector = selectors.item(i); + + if (selector != null) { + String selectorName = selector.toString(); + + boolean isValidSelector = false; + + try { + isValidSelector = validator.isValidSelector(selectorName, selector); + } catch (ScanException se) { + if (tagName != null) { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_CSS_TAG_SELECTOR_NOTFOUND, + new Object[] {HTMLEntityEncoder.htmlEntityEncode(selector.toString())})); + } else { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_STYLESHEET_SELECTOR_NOTFOUND, + new Object[] { + HTMLEntityEncoder.htmlEntityEncode(tagName), + HTMLEntityEncoder.htmlEntityEncode(selector.toString()) + })); + } + } + + // if the selector is valid, add to list + if (isValidSelector) { + if (selectorCount > 0) { + styleSheet.append(','); + styleSheet.append(' '); + } + styleSheet.append(selectorName); + + selectorCount++; + + } else if (tagName != null) { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_CSS_TAG_SELECTOR_DISALLOWED, + new Object[] { + HTMLEntityEncoder.htmlEntityEncode(tagName), + HTMLEntityEncoder.htmlEntityEncode(selector.toString()) + })); + + } else { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_STYLESHEET_SELECTOR_DISALLOWED, + new Object[] {HTMLEntityEncoder.htmlEntityEncode(selector.toString())})); + } + } + } + + // if and only if there were selectors that were valid, append appropriate open brace and set + // state to within selector + if (selectorCount > 0) { + styleSheet.append(' '); + styleSheet.append('{'); + styleSheet.append('\n'); + selectorOpen = true; + } + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#endSelector(org.w3c.css.sac.SelectorList) + */ + public void endSelector(SelectorList selectors) throws CSSException { + // if we are in a state within a selector, close brace + if (selectorOpen) { + styleSheet.append('}'); + styleSheet.append('\n'); + } + + // reset state + selectorOpen = false; + } + + /* + * (non-Javadoc) + * + * @see org.w3c.css.sac.DocumentHandler#property(java.lang.String, + * org.w3c.css.sac.LexicalUnit, boolean) + */ + public void property(String name, LexicalUnit value, boolean important) throws CSSException { + // only bother validating and building if we are either inline or within a selector tag + + if (!selectorOpen && !isInline) { + return; + } + + // validate the property + if (validator.isValidProperty(name, value)) { + + if (!isInline) { + styleSheet.append('\t'); + } + styleSheet.append(name); + styleSheet.append(':'); + + // append all values + while (value != null) { + styleSheet.append(' '); + styleSheet.append(validator.lexicalValueToString(value)); + value = value.getNextLexicalUnit(); + } + if (important) { + styleSheet.append(" !important"); + } + styleSheet.append(';'); + if (!isInline) { + styleSheet.append('\n'); + } + + } else if (tagName != null) { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_CSS_TAG_PROPERTY_INVALID, + new Object[] { + HTMLEntityEncoder.htmlEntityEncode(tagName), + HTMLEntityEncoder.htmlEntityEncode(name), + HTMLEntityEncoder.htmlEntityEncode(validator.lexicalValueToString(value)) + })); + } else { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_STYLESHEET_PROPERTY_INVALID, + new Object[] { + HTMLEntityEncoder.htmlEntityEncode(name), + HTMLEntityEncoder.htmlEntityEncode(validator.lexicalValueToString(value)) + })); + } + } } diff --git a/src/main/java/org/owasp/validator/css/CssScanner.java b/src/main/java/org/owasp/validator/css/CssScanner.java index 0f0e67ec..2aba67da 100644 --- a/src/main/java/org/owasp/validator/css/CssScanner.java +++ b/src/main/java/org/owasp/validator/css/CssScanner.java @@ -40,17 +40,16 @@ import java.util.ResourceBundle; import java.util.regex.Matcher; import java.util.regex.Pattern; - import org.apache.batik.css.parser.ParseException; import org.apache.batik.css.parser.Parser; -import org.apache.hc.core5.http.ClassicHttpResponse; -import org.apache.hc.core5.http.HttpEntity; -import org.apache.hc.core5.http.HttpStatus; import org.apache.hc.client5.http.ClientProtocolException; import org.apache.hc.client5.http.classic.HttpClient; -import org.apache.hc.client5.http.config.RequestConfig; import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.config.RequestConfig; import org.apache.hc.client5.http.impl.classic.HttpClientBuilder; +import org.apache.hc.core5.http.ClassicHttpResponse; +import org.apache.hc.core5.http.HttpEntity; +import org.apache.hc.core5.http.HttpStatus; import org.apache.hc.core5.http.io.HttpClientResponseHandler; import org.apache.hc.core5.http.io.entity.EntityUtils; import org.apache.hc.core5.util.Timeout; @@ -63,307 +62,290 @@ import org.w3c.css.sac.InputSource; /** - * Encapsulates the parsing and validation of a CSS stylesheet or inline - * declaration. To make use of this class, instantiate the scanner with the - * desired policy and call either scanInlineSheet() or - * scanStyleSheet as appropriate. + * Encapsulates the parsing and validation of a CSS stylesheet or inline declaration. To make use of + * this class, instantiate the scanner with the desired policy and call either + * scanInlineSheet() or scanStyleSheet as appropriate. * * @see #scanInlineStyle(String, String, int) * @see #scanStyleSheet(String, int) - * * @author Jason Li */ public class CssScanner { - protected static final Timeout DEFAULT_TIMEOUT = Timeout.ofMilliseconds(1000); - - private static final String CDATA = "^\\s*\\s*$"; - - /** - * The parser to be used in any scanning + protected static final Timeout DEFAULT_TIMEOUT = Timeout.ofMilliseconds(1000); + + private static final String CDATA = "^\\s*\\s*$"; + + /** The parser to be used in any scanning */ + private final Parser parser = new CssParser(); + + /** The policy file to be used in any scanning */ + private final InternalPolicy policy; + + /** The message bundled to pull error messages from. */ + private final ResourceBundle messages; + + /** The message bundled to pull error messages from. */ + private final boolean shouldParseImportedStyles; + + private static final Pattern cdataMatchPattern = Pattern.compile(CDATA, Pattern.DOTALL); + + /** + * Constructs a scanner based on the given policy. This version of the constructor defaults + * shouldParseImportedStyles to false. Look at the other constructor for a description of that + * parameter. + * + * @param policy the policy to follow when scanning + * @param messages the error message bundle to pull from + */ + public CssScanner(InternalPolicy policy, ResourceBundle messages) { + this(policy, messages, false); + } + + /** + * Constructs a scanner based on the given policy. + * + * @param policy the policy to follow when scanning + * @param messages the error message bundle to pull from + * @param shouldParseImportedStyles Flag to indicate if styles within @import directives should be + * imported and parsed in the resulting style sheet. This boolean determines if URLs should be + * recognized when parsing styles (i.e., to fetch them or ignore them). + */ + public CssScanner( + InternalPolicy policy, ResourceBundle messages, boolean shouldParseImportedStyles) { + this.policy = policy; + this.messages = messages; + this.shouldParseImportedStyles = shouldParseImportedStyles; + } + + /** + * Scans the contents of a full stylesheet (ex. a file based stylesheet or the complete stylesheet + * contents as declared within <style> tags) + * + * @param taintedCss a String containing the contents of the CSS stylesheet to + * validate + * @param sizeLimit the limit on the total size in bytes of any imported stylesheets + * @return a CleanResuts object containing the results of the scan + * @throws ScanException if an error occurs during scanning + */ + public CleanResults scanStyleSheet(String taintedCss, int sizeLimit) throws ScanException { + + long startOfScan = System.currentTimeMillis(); + List errorMessages = new ArrayList(); + + /* Check to see if the text starts with (\s)*(\s)*. */ - private final Parser parser = new CssParser(); - /** - * The policy file to be used in any scanning - */ - private final InternalPolicy policy; + Matcher m = cdataMatchPattern.matcher(taintedCss); + boolean isCdata = m.matches(); - /** - * The message bundled to pull error messages from. - */ - private final ResourceBundle messages; - - /** - * The message bundled to pull error messages from. - */ - private final boolean shouldParseImportedStyles; - - private static final Pattern cdataMatchPattern = Pattern.compile(CDATA, Pattern.DOTALL); - - /** - * Constructs a scanner based on the given policy. This version of the constructor defaults - * shouldParseImportedStyles to false. Look at the other constructor for a description of that parameter. - * - * @param policy - * the policy to follow when scanning - * @param messages - * the error message bundle to pull from - */ - public CssScanner(InternalPolicy policy, ResourceBundle messages) { - this(policy, messages, false); + if (isCdata) { + taintedCss = m.group(1); } - /** - * Constructs a scanner based on the given policy. - * - * @param policy - * the policy to follow when scanning - * @param messages - * the error message bundle to pull from - * @param shouldParseImportedStyles - * Flag to indicate if styles within @import directives should be imported and - * parsed in the resulting style sheet. This boolean determines if URLs should be - * recognized when parsing styles (i.e., to fetch them or ignore them). - */ - public CssScanner(InternalPolicy policy, ResourceBundle messages, boolean shouldParseImportedStyles) { - this.policy = policy; - this.messages = messages; - this.shouldParseImportedStyles = shouldParseImportedStyles; + CssHandler handler = new CssHandler(policy, errorMessages, messages); + + // parse the stylesheet + parser.setDocumentHandler(handler); + + try { + // parse the style declaration + // note this does not count against the size limit because it + // should already have been counted by the caller since it was + // embedded in the HTML + parser.parseStyleSheet(new InputSource(new StringReader(taintedCss))); + } catch (IOException | ParseException e) { + /* + * ParseException, from batik, is unfortunately a RuntimeException. + */ + throw new ScanException(e); } - /** - * Scans the contents of a full stylesheet (ex. a file based stylesheet - * or the complete stylesheet contents as declared within <style> - * tags) - * - * @param taintedCss - * a String containing the contents of the - * CSS stylesheet to validate - * @param sizeLimit - * the limit on the total size in bytes of any imported - * stylesheets - * @return a CleanResuts object containing the results of - * the scan - * @throws ScanException - * if an error occurs during scanning - */ - public CleanResults scanStyleSheet(String taintedCss, int sizeLimit) throws ScanException { - - long startOfScan = System.currentTimeMillis(); - List errorMessages = new ArrayList(); - - /* Check to see if the text starts with (\s)*(\s)*. - */ - - Matcher m = cdataMatchPattern.matcher(taintedCss); - boolean isCdata = m.matches(); - - if (isCdata) { - taintedCss = m.group(1); - } - - CssHandler handler = new CssHandler(policy, errorMessages, messages); + String cleaned = getCleanStylesheetWithImports(sizeLimit, errorMessages, handler); - // parse the stylesheet - parser.setDocumentHandler(handler); - - try { - // parse the style declaration - // note this does not count against the size limit because it - // should already have been counted by the caller since it was - // embedded in the HTML - parser.parseStyleSheet(new InputSource(new StringReader(taintedCss))); - } catch (IOException | ParseException e) { - /* - * ParseException, from batik, is unfortunately a RuntimeException. - */ - throw new ScanException(e); - } - - String cleaned = getCleanStylesheetWithImports(sizeLimit, errorMessages, handler); - - if (isCdata) { - cleaned = ""; - } + if (isCdata) { + cleaned = ""; + } - return new CleanResults(startOfScan, cleaned, null, errorMessages); + return new CleanResults(startOfScan, cleaned, null, errorMessages); + } + + /** + * Scans the contents of an inline style declaration (ex. in the style attribute of an HTML tag) + * and validates the style sheet according to this CssScanner's policy file. + * + * @param taintedCss a String containing the contents of the CSS stylesheet to + * validate + * @param tagName the name of the tag for which this inline style was declared + * @param sizeLimit the limit on the total size in bites of any imported stylesheets + * @return a CleanResuts object containing the results of the scan + * @throws ScanException if an error occurs during scanning + */ + public CleanResults scanInlineStyle(String taintedCss, String tagName, int sizeLimit) + throws ScanException { + + long startOfScan = System.currentTimeMillis(); + + List errorMessages = new ArrayList(); + + CssHandler handler = new CssHandler(policy, errorMessages, messages, tagName); + + parser.setDocumentHandler(handler); + + try { + // parse the inline style declaration + // note this does not count against the size limit because it + // should already have been counted by the caller since it was + // embedded in the HTML + parser.parseStyleDeclaration(taintedCss); + } catch (IOException ioe) { + throw new ScanException(ioe); } - /** - * Scans the contents of an inline style declaration (ex. in the style - * attribute of an HTML tag) and validates the style sheet according to - * this CssScanner's policy file. - * - * @param taintedCss - * a String containing the contents of the - * CSS stylesheet to validate - * @param tagName - * the name of the tag for which this inline style was - * declared - * - * @param sizeLimit - * the limit on the total size in bites of any imported - * stylesheets - * @return a CleanResuts object containing the results of - * the scan - * @throws ScanException - * if an error occurs during scanning - */ - public CleanResults scanInlineStyle(String taintedCss, String tagName, - int sizeLimit) throws ScanException { + String cleaned = getCleanStylesheetWithImports(sizeLimit, errorMessages, handler); - long startOfScan = System.currentTimeMillis(); + return new CleanResults(startOfScan, cleaned, null, errorMessages); + } - List errorMessages = new ArrayList(); + private String getCleanStylesheetWithImports( + int sizeLimit, List errorMessages, CssHandler handler) throws ScanException { + String cleaned = handler.getCleanStylesheet(); + if (shouldParseImportedStyles) { + handler.emptyStyleSheet(); + parseImportedStylesheets(handler.getImportedStylesheetsURIList(), errorMessages, sizeLimit); + // If there are styles to import they must be added to the beginning + cleaned = handler.getCleanStylesheet() + cleaned; + } + return cleaned; + } + + /** + * Parses through a LinkedList of imported stylesheet URIs, this method parses + * through those stylesheets and validates them + * + * @param stylesheets the LinkedList of stylesheet URIs to parse + * @param errorMessages the list of error messages to append to + * @param sizeLimit the limit on the total size in bites of any imported stylesheets + * @throws ScanException if an error occurs during scanning + */ + private void parseImportedStylesheets( + LinkedList stylesheets, List errorMessages, int sizeLimit) throws ScanException { + // if stylesheets were imported by the inline style declaration, + // continue parsing the nested styles. Note this only happens + // if CSS importing was enabled in the policy file + if (!stylesheets.isEmpty()) { + int importedStylesheets = 0; + + // Ensure that we have appropriate timeout values so we don't + // get DoSed waiting for returns + Timeout timeout = DEFAULT_TIMEOUT; + try { + timeout = + Timeout.ofMilliseconds(Long.parseLong(policy.getDirective(Policy.CONNECTION_TIMEOUT))); + } catch (NumberFormatException nfe) { + } + + RequestConfig requestConfig = + RequestConfig.custom() + .setConnectTimeout(timeout) + .setResponseTimeout(timeout) + .setConnectionRequestTimeout(timeout) + .build(); + + HttpClient httpClient = + HttpClientBuilder.create() + .disableAutomaticRetries() + .disableConnectionState() + .disableCookieManagement() + .setDefaultRequestConfig(requestConfig) + .build(); + + int allowedImports = Policy.DEFAULT_MAX_STYLESHEET_IMPORTS; + try { + allowedImports = Integer.parseInt(policy.getDirective("maxStyleSheetImports")); + } catch (NumberFormatException nfe) { + } + + while (!stylesheets.isEmpty()) { + + URI stylesheetUri = stylesheets.removeFirst(); + + if (++importedStylesheets > allowedImports) { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_CSS_IMPORT_EXCEEDED, + new Object[] { + HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString()), + String.valueOf(allowedImports) + })); + continue; + } - CssHandler handler = new CssHandler(policy, errorMessages, messages, tagName); + // Pulled directly from: + // https://github.com/apache/httpcomponents-client/blob/5.1.x/httpclient5/src/test/java/org/apache/hc/client5/http/examples/ClientWithResponseHandler.java + // Create a custom response handler to read in the stylesheet + final HttpClientResponseHandler responseHandler = + new HttpClientResponseHandler() { + + @Override + public String handleResponse(final ClassicHttpResponse response) throws IOException { + final int status = response.getCode(); + if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) { + final HttpEntity entity = response.getEntity(); + try { + return entity != null ? EntityUtils.toString(entity) : null; + } catch (final ParseException | org.apache.hc.core5.http.ParseException ex) { + throw new ClientProtocolException(ex); + } + } else { + throw new ClientProtocolException("Unexpected response status: " + status); + } + } + }; - parser.setDocumentHandler(handler); + byte[] stylesheet = null; try { - // parse the inline style declaration - // note this does not count against the size limit because it - // should already have been counted by the caller since it was - // embedded in the HTML - parser.parseStyleDeclaration(taintedCss); + String responseBody = httpClient.execute(new HttpGet(stylesheetUri), responseHandler); + // pull down stylesheet, observing size limit + stylesheet = responseBody.getBytes(); + if (stylesheet != null && stylesheet.length > sizeLimit) { + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_CSS_IMPORT_INPUT_SIZE, + new Object[] { + HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString()), + String.valueOf(policy.getMaxInputSize()) + })); + stylesheet = null; + } } catch (IOException ioe) { - throw new ScanException(ioe); + errorMessages.add( + ErrorMessageUtil.getMessage( + messages, + ErrorMessageUtil.ERROR_CSS_IMPORT_FAILURE, + new Object[] {HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString())})); } - String cleaned = getCleanStylesheetWithImports(sizeLimit, errorMessages, handler); + if (stylesheet != null) { + // decrease the size limit based on the + sizeLimit -= stylesheet.length; - return new CleanResults(startOfScan, cleaned, null, errorMessages); - } + try { + InputSource nextStyleSheet = + new InputSource( + new InputStreamReader( + new ByteArrayInputStream(stylesheet), Charset.forName("UTF8"))); + parser.parseStyleSheet(nextStyleSheet); - private String getCleanStylesheetWithImports(int sizeLimit, List errorMessages, CssHandler handler) - throws ScanException { - String cleaned = handler.getCleanStylesheet(); - if (shouldParseImportedStyles) { - handler.emptyStyleSheet(); - parseImportedStylesheets(handler.getImportedStylesheetsURIList(), errorMessages, sizeLimit); - // If there are styles to import they must be added to the beginning - cleaned = handler.getCleanStylesheet() + cleaned; + } catch (IOException ioe) { + throw new ScanException(ioe); + } } - return cleaned; - } - - /** - * Parses through a LinkedList of imported stylesheet - * URIs, this method parses through those stylesheets and validates them - * - * @param stylesheets the LinkedList of stylesheet URIs to parse - * @param errorMessages the list of error messages to append to - * @param sizeLimit the limit on the total size in bites of any imported stylesheets - * @throws ScanException if an error occurs during scanning - */ - private void parseImportedStylesheets(LinkedList stylesheets, List errorMessages, int sizeLimit) - throws ScanException { - // if stylesheets were imported by the inline style declaration, - // continue parsing the nested styles. Note this only happens - // if CSS importing was enabled in the policy file - if (!stylesheets.isEmpty()) { - int importedStylesheets = 0; - - // Ensure that we have appropriate timeout values so we don't - // get DoSed waiting for returns - Timeout timeout = DEFAULT_TIMEOUT; - try { - timeout = Timeout.ofMilliseconds(Long.parseLong(policy.getDirective(Policy.CONNECTION_TIMEOUT))); - } catch (NumberFormatException nfe) { - } - - RequestConfig requestConfig = RequestConfig.custom() - .setConnectTimeout(timeout) - .setResponseTimeout(timeout) - .setConnectionRequestTimeout(timeout) - .build(); - - HttpClient httpClient = HttpClientBuilder.create(). - disableAutomaticRetries(). - disableConnectionState(). - disableCookieManagement(). - setDefaultRequestConfig(requestConfig). - build(); - - int allowedImports = Policy.DEFAULT_MAX_STYLESHEET_IMPORTS; - try { - allowedImports = Integer.parseInt(policy.getDirective("maxStyleSheetImports")); - } catch (NumberFormatException nfe) { - } - - while (!stylesheets.isEmpty()) { - - URI stylesheetUri = stylesheets.removeFirst(); - - if (++importedStylesheets > allowedImports) { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_CSS_IMPORT_EXCEEDED, - new Object[] { - HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString()), - String.valueOf(allowedImports) })); - continue; - } - - // Pulled directly from: https://github.com/apache/httpcomponents-client/blob/5.1.x/httpclient5/src/test/java/org/apache/hc/client5/http/examples/ClientWithResponseHandler.java - // Create a custom response handler to read in the stylesheet - final HttpClientResponseHandler responseHandler = new HttpClientResponseHandler() { - - @Override - public String handleResponse( - final ClassicHttpResponse response) throws IOException { - final int status = response.getCode(); - if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) { - final HttpEntity entity = response.getEntity(); - try { - return entity != null ? EntityUtils.toString(entity) : null; - } catch (final ParseException | org.apache.hc.core5.http.ParseException ex) { - throw new ClientProtocolException(ex); - } - } else { - throw new ClientProtocolException("Unexpected response status: " + status); - } - } - }; - - byte[] stylesheet = null; - - try { - String responseBody = httpClient.execute(new HttpGet(stylesheetUri), responseHandler); - // pull down stylesheet, observing size limit - stylesheet = responseBody.getBytes(); - if (stylesheet != null && stylesheet.length > sizeLimit) { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_CSS_IMPORT_INPUT_SIZE, - new Object[] { - HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString()), - String.valueOf(policy.getMaxInputSize()) })); - stylesheet = null; - } - } catch (IOException ioe) { - errorMessages.add(ErrorMessageUtil.getMessage( - messages, - ErrorMessageUtil.ERROR_CSS_IMPORT_FAILURE, - new Object[] { HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString()) })); - } - - if (stylesheet != null) { - // decrease the size limit based on the - sizeLimit -= stylesheet.length; - - try { - InputSource nextStyleSheet = new InputSource( - new InputStreamReader(new ByteArrayInputStream(stylesheet), Charset.forName("UTF8"))); - parser.parseStyleSheet(nextStyleSheet); - - } catch (IOException ioe) { - throw new ScanException(ioe); - } - } - } // end while - } // end if - } // end parseImportedStylesheets() + } // end while + } // end if + } // end parseImportedStylesheets() } diff --git a/src/main/java/org/owasp/validator/css/CssValidator.java b/src/main/java/org/owasp/validator/css/CssValidator.java index f07bc31c..d6107a61 100644 --- a/src/main/java/org/owasp/validator/css/CssValidator.java +++ b/src/main/java/org/owasp/validator/css/CssValidator.java @@ -1,11 +1,11 @@ /* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li - * + * * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without + * + * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * - Redistributions of source code must retain the above copyright notice, + * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation @@ -30,13 +30,11 @@ import java.util.Iterator; import java.util.regex.Pattern; - import org.owasp.validator.html.Policy; import org.owasp.validator.html.ScanException; import org.owasp.validator.html.model.AntiSamyPattern; import org.owasp.validator.html.model.Property; import org.owasp.validator.html.util.HTMLEntityEncoder; - import org.w3c.css.sac.AttributeCondition; import org.w3c.css.sac.CombinatorCondition; import org.w3c.css.sac.Condition; @@ -50,351 +48,329 @@ import org.w3c.css.sac.SimpleSelector; /** - * Encapsulates all the necessary operations for validating individual elements - * of a stylesheet (namely: selectors, conditions and properties). - * + * Encapsulates all the necessary operations for validating individual elements of a stylesheet + * (namely: selectors, conditions and properties). + * * @author Jason Li */ public class CssValidator { - private final Policy policy; - - /** - * Constructs a validator for CSS selectors, conditions and properties based - * on the given policy. - * - * @param policy - * the policy file to use in this validator - */ - public CssValidator(Policy policy) { - this.policy = policy; - } - - /** - * Determines whether the given property (both name and value) are valid - * according to this validator's policy. - * - * @param name - * the name of the property - * @param lu - * the value of the property - * @return true if this property name/value is valid; false otherwise - */ - public boolean isValidProperty(String name, LexicalUnit lu) { - boolean isValid = false; - Property property = null; - - if (name != null) { - property = policy.getPropertyByName(name.toLowerCase()); - } - - // if we were able to find the property by name, validate the value - if (property != null) { - - // validate all values attached to this property - isValid = true; - while (lu != null) { - String value = lexicalValueToString(lu); - - if (value == null || !validateValue(property, value)) { - isValid = false; - break; - } - - lu = lu.getNextLexicalUnit(); - } - } - - return isValid; - } - - /** - * Determines whether the given selector name is valid according to this - * validator's policy. - * - * @param selectorName - * the name of the selector - * @param selector - * the object representation of the selector - * @return true if this selector name is valid; false otherwise - * @throws ScanException When there is a problem encountered - * while scanning this selector - */ - public boolean isValidSelector(String selectorName, Selector selector) - throws ScanException { - - // determine correct behavior - switch (selector.getSelectorType()) { - case Selector.SAC_ANY_NODE_SELECTOR: - case Selector.SAC_ELEMENT_NODE_SELECTOR: - case Selector.SAC_PSEUDO_ELEMENT_SELECTOR: - case Selector.SAC_ROOT_NODE_SELECTOR: - // these selectors are the most base selectors - return validateSimpleSelector((SimpleSelector) selector); - case Selector.SAC_CHILD_SELECTOR: - case Selector.SAC_DESCENDANT_SELECTOR: - // these are compound selectors - decompose into simple selectors - DescendantSelector descSelector = (DescendantSelector) selector; - return isValidSelector(selectorName, descSelector - .getSimpleSelector()) - && isValidSelector(selectorName, descSelector - .getAncestorSelector()); - case Selector.SAC_CONDITIONAL_SELECTOR: - // this is a compound selector - decompose into simple selectors - ConditionalSelector condSelector = (ConditionalSelector) selector; - return isValidSelector(selectorName, condSelector - .getSimpleSelector()) - && isValidCondition(selectorName, condSelector - .getCondition()); - case Selector.SAC_DIRECT_ADJACENT_SELECTOR: - // this is a compound selector - decompose into simple selectors - SiblingSelector sibSelector = (SiblingSelector) selector; - return isValidSelector(selectorName, sibSelector - .getSiblingSelector()) - && isValidSelector(selectorName, sibSelector.getSelector()); - case Selector.SAC_NEGATIVE_SELECTOR: - // this is a compound selector with one simple selector - return validateSimpleSelector((NegativeSelector) selector); - case Selector.SAC_CDATA_SECTION_NODE_SELECTOR: - case Selector.SAC_COMMENT_NODE_SELECTOR: - case Selector.SAC_PROCESSING_INSTRUCTION_NODE_SELECTOR: - case Selector.SAC_TEXT_NODE_SELECTOR: - default: - - throw new UnknownSelectorException(HTMLEntityEncoder.htmlEntityEncode(selector.toString())); - } - } - - /** - * Validates a basic selector against the policy - * - * @param selector - * the object representation of the selector - * @return true if this selector name is valid; false otherwise - */ - private boolean validateSimpleSelector(SimpleSelector selector) { - // ensure the name follows the valid pattern and is not blacklisted - // by the exclusion pattern. - // NOTE: intentionally using non-short-circuited AND operator to - // generate all relevant error messages - - String selectorLowerCase = selector.toString().toLowerCase(); - return policy.getCommonRegularExpressions("cssElementSelector").matches(selectorLowerCase) - && !policy.getCommonRegularExpressions("cssElementExclusion").matches(selectorLowerCase); - } - - /** - * Determines whether the given condition is valid according to this - * validator's policy. - * - * @param selectorName - * the name of the selector that contains this condition - * @param condition - * the object representation of this condition - * @return true if this condition is valid; false otherwise - * @throws ScanException When there is a problem encountered - * while scanning this condition - */ - public boolean isValidCondition(String selectorName, Condition condition) - throws ScanException { - switch (condition.getConditionType()) { - case Condition.SAC_AND_CONDITION: - case Condition.SAC_OR_CONDITION: - // these are compound condition - decompose into simple conditions - CombinatorCondition comboCondition = (CombinatorCondition) condition; - return isValidCondition(selectorName, comboCondition - .getFirstCondition()) - && isValidCondition(selectorName, comboCondition - .getSecondCondition()); - case Condition.SAC_CLASS_CONDITION: - // this is a basic class condition; compare condition against - // valid pattern and is not blacklisted by exclusion pattern - - return validateCondition((AttributeCondition) condition, policy.getCommonRegularExpressions("cssClassSelector"), policy.getCommonRegularExpressions("cssClassExclusion")); - case Condition.SAC_ID_CONDITION: - // this is a basic ID condition; compare condition against - // valid pattern and is not blacklisted by exclusion pattern - - return validateCondition((AttributeCondition) condition, policy.getCommonRegularExpressions("cssIDSelector"), policy.getCommonRegularExpressions("cssIDExclusion")); - case Condition.SAC_PSEUDO_CLASS_CONDITION: - // this is a basic psuedo element condition; compare condition - // against valid pattern and is not blacklisted by exclusion pattern - - return validateCondition((AttributeCondition) condition, policy.getCommonRegularExpressions("cssPseudoElementSelector"), policy.getCommonRegularExpressions("cssPsuedoElementExclusion")); - case Condition.SAC_BEGIN_HYPHEN_ATTRIBUTE_CONDITION: - case Condition.SAC_ONE_OF_ATTRIBUTE_CONDITION: - case Condition.SAC_ATTRIBUTE_CONDITION: - // this is a basic class condition; compare condition against - // valid pattern and is not blacklisted by exclusion pattern - - return validateCondition((AttributeCondition) condition, policy.getCommonRegularExpressions("cssAttributeSelector"), policy.getCommonRegularExpressions("cssAttributeExclusion")); - case Condition.SAC_NEGATIVE_CONDITION: - // this is a compound condition; decompose to simple condition - return isValidCondition(selectorName, - ((NegativeCondition) condition).getCondition()); - case Condition.SAC_ONLY_CHILD_CONDITION: - case Condition.SAC_ONLY_TYPE_CONDITION: - // :only-child and :only-of-type are constants - return true; - case Condition.SAC_POSITIONAL_CONDITION: - case Condition.SAC_CONTENT_CONDITION: - case Condition.SAC_LANG_CONDITION: - default: - throw new UnknownSelectorException(HTMLEntityEncoder.htmlEntityEncode(selectorName)); - } - } - - /** - * Validates a basic condition against the white list pattern and the - * blacklist pattern - * - * @param condition - * the object representation of the condition - * @param pattern - * the positive pattern of valid conditions - * @param exclusionPattern - * the negative pattern of excluded conditions - * @return true if this selector name is valid; false otherwise - */ - private boolean validateCondition(AttributeCondition condition, - AntiSamyPattern pattern, AntiSamyPattern exclusionPattern) { - // check that the name of the condition matches valid pattern and does - // not match exclusion pattern - // NOTE: intentionally using non-short-circuited AND operator to - // generate all relevant error messages - String otherLower = condition.toString().toLowerCase(); - return pattern.matches(otherLower) && !exclusionPattern.matches(otherLower); - } - - /** - * Determines whether the given property value is valid according to this - * validator's policy. - * - * @param property - * the object representation of the property and its associated - * policy - * @param value - * the string representation of the value - * @return true if the property is valid; false otherwise - */ - private boolean validateValue(Property property, String value) { - boolean isValid = false; - - // normalize the value to lowercase - value = value.toLowerCase(); - - // check if the value matches any of the allowed literal values - Iterator allowedValues = property.getAllowedValues().iterator(); - while (allowedValues.hasNext() && !isValid) { - String allowedValue = (String) allowedValues.next(); - - if (allowedValue != null && allowedValue.equals(value)) { - isValid = true; - } - } - - // check if the value matches any of the allowed regular expressions - Iterator allowedRegexps = property.getAllowedRegExp().iterator(); - while (allowedRegexps.hasNext() && !isValid) { - Pattern pattern = (Pattern) allowedRegexps.next(); - - if (pattern != null && pattern.matcher(value).matches()) { - isValid = true; - } - } - - // check if the value matches any of the allowed shorthands - Iterator shorthandRefs = property.getShorthandRefs().iterator(); - while (shorthandRefs.hasNext() && !isValid) { - String shorthandRef = (String) shorthandRefs.next(); - Property shorthand = policy.getPropertyByName(shorthandRef); - - if (shorthand != null) { - isValid = validateValue(shorthand, value); - } - } - - return isValid; - } - - /** - * Converts the given lexical unit to a String - * representation. This method does not perform any validation - it is meant - * to be used in conjunction with the validator/logging methods. - * - * @param lu - * the lexical unit to convert - * @return a String representation of the given lexical unit - */ - public String lexicalValueToString(LexicalUnit lu) { - switch (lu.getLexicalUnitType()) { - case LexicalUnit.SAC_PERCENTAGE: - case LexicalUnit.SAC_DIMENSION: - case LexicalUnit.SAC_EM: - case LexicalUnit.SAC_EX: - case LexicalUnit.SAC_PIXEL: - case LexicalUnit.SAC_INCH: - case LexicalUnit.SAC_CENTIMETER: - case LexicalUnit.SAC_MILLIMETER: - case LexicalUnit.SAC_POINT: - case LexicalUnit.SAC_PICA: - case LexicalUnit.SAC_DEGREE: - case LexicalUnit.SAC_GRADIAN: - case LexicalUnit.SAC_RADIAN: - case LexicalUnit.SAC_MILLISECOND: - case LexicalUnit.SAC_SECOND: - case LexicalUnit.SAC_HERTZ: - case LexicalUnit.SAC_KILOHERTZ: - // these are all measurements - return lu.getFloatValue() + lu.getDimensionUnitText(); - case LexicalUnit.SAC_INTEGER: - // just a number - return String.valueOf(lu.getIntegerValue()); - case LexicalUnit.SAC_REAL: - // just a number - return String.valueOf(lu.getFloatValue()); - case LexicalUnit.SAC_STRING_VALUE: - case LexicalUnit.SAC_IDENT: - // just a string/identifier - String stringValue = lu.getStringValue(); - if(stringValue.indexOf(" ") != -1) - stringValue = "\""+stringValue+"\""; - return stringValue; - case LexicalUnit.SAC_URI: - // this is a URL - return "url(" + lu.getStringValue() + ")"; - case LexicalUnit.SAC_RGBCOLOR: - // this is a rgb encoded color - StringBuffer sb = new StringBuffer("rgb("); - LexicalUnit param = lu.getParameters(); - sb.append(param.getIntegerValue()); // R value - sb.append(','); - param = param.getNextLexicalUnit(); // comma - param = param.getNextLexicalUnit(); // G value - sb.append(param.getIntegerValue()); - sb.append(','); - param = param.getNextLexicalUnit(); // comma - param = param.getNextLexicalUnit(); // B value - sb.append(param.getIntegerValue()); - sb.append(')'); - - return sb.toString(); - case LexicalUnit.SAC_INHERIT: - // constant - return "inherit"; - case LexicalUnit.SAC_OPERATOR_COMMA: - return ","; - case LexicalUnit.SAC_ATTR: - case LexicalUnit.SAC_COUNTER_FUNCTION: - case LexicalUnit.SAC_COUNTERS_FUNCTION: - case LexicalUnit.SAC_FUNCTION: - case LexicalUnit.SAC_RECT_FUNCTION: - case LexicalUnit.SAC_SUB_EXPRESSION: - case LexicalUnit.SAC_UNICODERANGE: - default: - // these are properties that shouldn't be necessary for most run - // of the mill HTML/CSS - return null; - } - } + private final Policy policy; + + /** + * Constructs a validator for CSS selectors, conditions and properties based on the given policy. + * + * @param policy the policy file to use in this validator + */ + public CssValidator(Policy policy) { + this.policy = policy; + } + + /** + * Determines whether the given property (both name and value) are valid according to this + * validator's policy. + * + * @param name the name of the property + * @param lu the value of the property + * @return true if this property name/value is valid; false otherwise + */ + public boolean isValidProperty(String name, LexicalUnit lu) { + boolean isValid = false; + Property property = null; + + if (name != null) { + property = policy.getPropertyByName(name.toLowerCase()); + } + + // if we were able to find the property by name, validate the value + if (property != null) { + + // validate all values attached to this property + isValid = true; + while (lu != null) { + String value = lexicalValueToString(lu); + + if (value == null || !validateValue(property, value)) { + isValid = false; + break; + } + + lu = lu.getNextLexicalUnit(); + } + } + + return isValid; + } + + /** + * Determines whether the given selector name is valid according to this validator's policy. + * + * @param selectorName the name of the selector + * @param selector the object representation of the selector + * @return true if this selector name is valid; false otherwise + * @throws ScanException When there is a problem encountered while scanning this selector + */ + public boolean isValidSelector(String selectorName, Selector selector) throws ScanException { + + // determine correct behavior + switch (selector.getSelectorType()) { + case Selector.SAC_ANY_NODE_SELECTOR: + case Selector.SAC_ELEMENT_NODE_SELECTOR: + case Selector.SAC_PSEUDO_ELEMENT_SELECTOR: + case Selector.SAC_ROOT_NODE_SELECTOR: + // these selectors are the most base selectors + return validateSimpleSelector((SimpleSelector) selector); + case Selector.SAC_CHILD_SELECTOR: + case Selector.SAC_DESCENDANT_SELECTOR: + // these are compound selectors - decompose into simple selectors + DescendantSelector descSelector = (DescendantSelector) selector; + return isValidSelector(selectorName, descSelector.getSimpleSelector()) + && isValidSelector(selectorName, descSelector.getAncestorSelector()); + case Selector.SAC_CONDITIONAL_SELECTOR: + // this is a compound selector - decompose into simple selectors + ConditionalSelector condSelector = (ConditionalSelector) selector; + return isValidSelector(selectorName, condSelector.getSimpleSelector()) + && isValidCondition(selectorName, condSelector.getCondition()); + case Selector.SAC_DIRECT_ADJACENT_SELECTOR: + // this is a compound selector - decompose into simple selectors + SiblingSelector sibSelector = (SiblingSelector) selector; + return isValidSelector(selectorName, sibSelector.getSiblingSelector()) + && isValidSelector(selectorName, sibSelector.getSelector()); + case Selector.SAC_NEGATIVE_SELECTOR: + // this is a compound selector with one simple selector + return validateSimpleSelector((NegativeSelector) selector); + case Selector.SAC_CDATA_SECTION_NODE_SELECTOR: + case Selector.SAC_COMMENT_NODE_SELECTOR: + case Selector.SAC_PROCESSING_INSTRUCTION_NODE_SELECTOR: + case Selector.SAC_TEXT_NODE_SELECTOR: + default: + throw new UnknownSelectorException(HTMLEntityEncoder.htmlEntityEncode(selector.toString())); + } + } + + /** + * Validates a basic selector against the policy + * + * @param selector the object representation of the selector + * @return true if this selector name is valid; false otherwise + */ + private boolean validateSimpleSelector(SimpleSelector selector) { + // ensure the name follows the valid pattern and is not blacklisted + // by the exclusion pattern. + // NOTE: intentionally using non-short-circuited AND operator to + // generate all relevant error messages + + String selectorLowerCase = selector.toString().toLowerCase(); + return policy.getCommonRegularExpressions("cssElementSelector").matches(selectorLowerCase) + && !policy.getCommonRegularExpressions("cssElementExclusion").matches(selectorLowerCase); + } + + /** + * Determines whether the given condition is valid according to this validator's policy. + * + * @param selectorName the name of the selector that contains this condition + * @param condition the object representation of this condition + * @return true if this condition is valid; false otherwise + * @throws ScanException When there is a problem encountered while scanning this condition + */ + public boolean isValidCondition(String selectorName, Condition condition) throws ScanException { + switch (condition.getConditionType()) { + case Condition.SAC_AND_CONDITION: + case Condition.SAC_OR_CONDITION: + // these are compound condition - decompose into simple conditions + CombinatorCondition comboCondition = (CombinatorCondition) condition; + return isValidCondition(selectorName, comboCondition.getFirstCondition()) + && isValidCondition(selectorName, comboCondition.getSecondCondition()); + case Condition.SAC_CLASS_CONDITION: + // this is a basic class condition; compare condition against + // valid pattern and is not blacklisted by exclusion pattern + + return validateCondition( + (AttributeCondition) condition, + policy.getCommonRegularExpressions("cssClassSelector"), + policy.getCommonRegularExpressions("cssClassExclusion")); + case Condition.SAC_ID_CONDITION: + // this is a basic ID condition; compare condition against + // valid pattern and is not blacklisted by exclusion pattern + + return validateCondition( + (AttributeCondition) condition, + policy.getCommonRegularExpressions("cssIDSelector"), + policy.getCommonRegularExpressions("cssIDExclusion")); + case Condition.SAC_PSEUDO_CLASS_CONDITION: + // this is a basic psuedo element condition; compare condition + // against valid pattern and is not blacklisted by exclusion pattern + + return validateCondition( + (AttributeCondition) condition, + policy.getCommonRegularExpressions("cssPseudoElementSelector"), + policy.getCommonRegularExpressions("cssPsuedoElementExclusion")); + case Condition.SAC_BEGIN_HYPHEN_ATTRIBUTE_CONDITION: + case Condition.SAC_ONE_OF_ATTRIBUTE_CONDITION: + case Condition.SAC_ATTRIBUTE_CONDITION: + // this is a basic class condition; compare condition against + // valid pattern and is not blacklisted by exclusion pattern + + return validateCondition( + (AttributeCondition) condition, + policy.getCommonRegularExpressions("cssAttributeSelector"), + policy.getCommonRegularExpressions("cssAttributeExclusion")); + case Condition.SAC_NEGATIVE_CONDITION: + // this is a compound condition; decompose to simple condition + return isValidCondition(selectorName, ((NegativeCondition) condition).getCondition()); + case Condition.SAC_ONLY_CHILD_CONDITION: + case Condition.SAC_ONLY_TYPE_CONDITION: + // :only-child and :only-of-type are constants + return true; + case Condition.SAC_POSITIONAL_CONDITION: + case Condition.SAC_CONTENT_CONDITION: + case Condition.SAC_LANG_CONDITION: + default: + throw new UnknownSelectorException(HTMLEntityEncoder.htmlEntityEncode(selectorName)); + } + } + + /** + * Validates a basic condition against the white list pattern and the blacklist pattern + * + * @param condition the object representation of the condition + * @param pattern the positive pattern of valid conditions + * @param exclusionPattern the negative pattern of excluded conditions + * @return true if this selector name is valid; false otherwise + */ + private boolean validateCondition( + AttributeCondition condition, AntiSamyPattern pattern, AntiSamyPattern exclusionPattern) { + // check that the name of the condition matches valid pattern and does + // not match exclusion pattern + // NOTE: intentionally using non-short-circuited AND operator to + // generate all relevant error messages + String otherLower = condition.toString().toLowerCase(); + return pattern.matches(otherLower) && !exclusionPattern.matches(otherLower); + } + + /** + * Determines whether the given property value is valid according to this validator's policy. + * + * @param property the object representation of the property and its associated policy + * @param value the string representation of the value + * @return true if the property is valid; false otherwise + */ + private boolean validateValue(Property property, String value) { + boolean isValid = false; + + // normalize the value to lowercase + value = value.toLowerCase(); + + // check if the value matches any of the allowed literal values + Iterator allowedValues = property.getAllowedValues().iterator(); + while (allowedValues.hasNext() && !isValid) { + String allowedValue = (String) allowedValues.next(); + + if (allowedValue != null && allowedValue.equals(value)) { + isValid = true; + } + } + + // check if the value matches any of the allowed regular expressions + Iterator allowedRegexps = property.getAllowedRegExp().iterator(); + while (allowedRegexps.hasNext() && !isValid) { + Pattern pattern = (Pattern) allowedRegexps.next(); + + if (pattern != null && pattern.matcher(value).matches()) { + isValid = true; + } + } + + // check if the value matches any of the allowed shorthands + Iterator shorthandRefs = property.getShorthandRefs().iterator(); + while (shorthandRefs.hasNext() && !isValid) { + String shorthandRef = (String) shorthandRefs.next(); + Property shorthand = policy.getPropertyByName(shorthandRef); + + if (shorthand != null) { + isValid = validateValue(shorthand, value); + } + } + + return isValid; + } + + /** + * Converts the given lexical unit to a String representation. This method does not + * perform any validation - it is meant to be used in conjunction with the validator/logging + * methods. + * + * @param lu the lexical unit to convert + * @return a String representation of the given lexical unit + */ + public String lexicalValueToString(LexicalUnit lu) { + switch (lu.getLexicalUnitType()) { + case LexicalUnit.SAC_PERCENTAGE: + case LexicalUnit.SAC_DIMENSION: + case LexicalUnit.SAC_EM: + case LexicalUnit.SAC_EX: + case LexicalUnit.SAC_PIXEL: + case LexicalUnit.SAC_INCH: + case LexicalUnit.SAC_CENTIMETER: + case LexicalUnit.SAC_MILLIMETER: + case LexicalUnit.SAC_POINT: + case LexicalUnit.SAC_PICA: + case LexicalUnit.SAC_DEGREE: + case LexicalUnit.SAC_GRADIAN: + case LexicalUnit.SAC_RADIAN: + case LexicalUnit.SAC_MILLISECOND: + case LexicalUnit.SAC_SECOND: + case LexicalUnit.SAC_HERTZ: + case LexicalUnit.SAC_KILOHERTZ: + // these are all measurements + return lu.getFloatValue() + lu.getDimensionUnitText(); + case LexicalUnit.SAC_INTEGER: + // just a number + return String.valueOf(lu.getIntegerValue()); + case LexicalUnit.SAC_REAL: + // just a number + return String.valueOf(lu.getFloatValue()); + case LexicalUnit.SAC_STRING_VALUE: + case LexicalUnit.SAC_IDENT: + // just a string/identifier + String stringValue = lu.getStringValue(); + if (stringValue.indexOf(" ") != -1) stringValue = "\"" + stringValue + "\""; + return stringValue; + case LexicalUnit.SAC_URI: + // this is a URL + return "url(" + lu.getStringValue() + ")"; + case LexicalUnit.SAC_RGBCOLOR: + // this is a rgb encoded color + StringBuffer sb = new StringBuffer("rgb("); + LexicalUnit param = lu.getParameters(); + sb.append(param.getIntegerValue()); // R value + sb.append(','); + param = param.getNextLexicalUnit(); // comma + param = param.getNextLexicalUnit(); // G value + sb.append(param.getIntegerValue()); + sb.append(','); + param = param.getNextLexicalUnit(); // comma + param = param.getNextLexicalUnit(); // B value + sb.append(param.getIntegerValue()); + sb.append(')'); + + return sb.toString(); + case LexicalUnit.SAC_INHERIT: + // constant + return "inherit"; + case LexicalUnit.SAC_OPERATOR_COMMA: + return ","; + case LexicalUnit.SAC_ATTR: + case LexicalUnit.SAC_COUNTER_FUNCTION: + case LexicalUnit.SAC_COUNTERS_FUNCTION: + case LexicalUnit.SAC_FUNCTION: + case LexicalUnit.SAC_RECT_FUNCTION: + case LexicalUnit.SAC_SUB_EXPRESSION: + case LexicalUnit.SAC_UNICODERANGE: + default: + // these are properties that shouldn't be necessary for most run + // of the mill HTML/CSS + return null; + } + } } diff --git a/src/main/java/org/owasp/validator/css/UnknownSelectorException.java b/src/main/java/org/owasp/validator/css/UnknownSelectorException.java index 31b7f640..a3f1bf1b 100644 --- a/src/main/java/org/owasp/validator/css/UnknownSelectorException.java +++ b/src/main/java/org/owasp/validator/css/UnknownSelectorException.java @@ -1,10 +1,10 @@ /* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li - * + * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -24,27 +24,24 @@ package org.owasp.validator.css; /** - * This exception gets thrown when there is an unrecognized Selector type parsing - * the tainted CSS. - * + * This exception gets thrown when there is an unrecognized Selector type parsing the tainted CSS. + * * @author Jason Li */ import org.owasp.validator.html.ScanException; public class UnknownSelectorException extends ScanException { - - private final String selectorName; - - public UnknownSelectorException(String selectorName) { - super("Unknown selector " + selectorName); - - this.selectorName = selectorName; - } - /** - * @return the selectorName - */ - public String getSelectorName() { - return selectorName; - } + private final String selectorName; + + public UnknownSelectorException(String selectorName) { + super("Unknown selector " + selectorName); + + this.selectorName = selectorName; + } + + /** @return the selectorName */ + public String getSelectorName() { + return selectorName; + } } diff --git a/src/main/java/org/owasp/validator/html/AntiSamy.java b/src/main/java/org/owasp/validator/html/AntiSamy.java index b1803308..3d0d71b8 100644 --- a/src/main/java/org/owasp/validator/html/AntiSamy.java +++ b/src/main/java/org/owasp/validator/html/AntiSamy.java @@ -1,10 +1,10 @@ /* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li - * + * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -24,157 +24,151 @@ package org.owasp.validator.html; -import org.owasp.validator.html.scan.AntiSamyDOMScanner; -import org.owasp.validator.html.scan.AntiSamySAXScanner; - import java.io.File; import java.io.Reader; import java.io.Writer; +import org.owasp.validator.html.scan.AntiSamyDOMScanner; +import org.owasp.validator.html.scan.AntiSamySAXScanner; /** - * This is the only class from which the outside world should be calling. The - * scan() method holds the meat and potatoes of AntiSamy. The file - * contains a number of ways for scan()'ing depending on the - * accessibility of the policy file. + * This is the only class from which the outside world should be calling. The scan() + * method holds the meat and potatoes of AntiSamy. The file contains a number of ways for + * scan()'ing depending on the accessibility of the policy file. * * @author Arshan Dabirsiaghi */ - public class AntiSamy { - public static final int DOM = 0; - public static final int SAX = 1; - - private Policy policy = null; - - public AntiSamy() { - } - - public AntiSamy(Policy policy) { - this.policy = policy; - } - - /** - * The meat and potatoes. The scan() family of methods are the - * only methods the outside world should be calling to invoke AntiSamy. - * - * @param taintedHTML Untrusted HTML which may contain malicious code. - * @return A CleanResults object which contains information - * about the scan (including the results). - * @throws ScanException When there is a problem encountered - * while scanning the HTML. - * @throws PolicyException When there is a problem reading the policy file. - */ - public CleanResults scan(String taintedHTML) throws ScanException, PolicyException { - return this.scan(taintedHTML, this.policy, SAX); - } - - /** - * This method sets scan() to use the specified scan type. - * - * @param taintedHTML Untrusted HTML which may contain malicious code. - * @param scanType The type of scan (DOM or SAX). - * @return A CleanResults object which contains information - * about the scan (including the results). - * @throws ScanException When there is a problem encountered - * while scanning the HTML. - * @throws PolicyException When there is a problem reading the policy file. - */ - public CleanResults scan(String taintedHTML, int scanType) throws ScanException, PolicyException { - - return this.scan(taintedHTML, this.policy, scanType); - } - - /** - * This method wraps scan() using the Policy object passed in. - * - * @param taintedHTML Untrusted HTML which may contain malicious code. - * @param policy The custom policy to enforce. - * @return A CleanResults object which contains information - * about the scan (including the results). - * @throws ScanException When there is a problem encountered - * while scanning the HTML. - * @throws PolicyException When there is a problem reading the policy file. - */ - public CleanResults scan(String taintedHTML, Policy policy) throws ScanException, PolicyException { - return this.scan(taintedHTML, policy, DOM); - } - - /** - * This method wraps scan() using the Policy object passed in and the specified scan type. - * - * @param taintedHTML Untrusted HTML which may contain malicious code. - * @param policy The custom policy to enforce. - * @param scanType The type of scan (DOM or SAX). - * @return A CleanResults object which contains information - * about the scan (including the results). - * @throws ScanException When there is a problem encountered - * while scanning the HTML. - * @throws PolicyException When there is a problem reading the policy file. - */ - public CleanResults scan(String taintedHTML, Policy policy, int scanType) throws ScanException, PolicyException { - if (policy == null) { - throw new PolicyException("No policy loaded"); - } - - if (scanType == DOM) { - return new AntiSamyDOMScanner(policy).scan(taintedHTML); - } else { - return new AntiSamySAXScanner(policy).scan(taintedHTML); - } - } - - /** - * Use this method if caller has Streams rather than Strings for I/O - * Useful for servlets where the response is very large and we don't validate, - * simply encode as bytes are consumed from the stream. - * @param reader Reader that produces the input, possibly a little at a time - * @param writer Writer that receives the cleaned output, possibly a little at a time - * @param policy Policy that directs the scan - * @return CleanResults where the cleanHtml is null. If caller wants the clean HTML, it - * must capture the writer's contents. When using Streams, caller generally - * doesn't want to create a single string containing clean HTML. - * @throws ScanException When there is a problem encountered - * while scanning the HTML. - */ - public CleanResults scan(Reader reader, Writer writer, Policy policy) throws ScanException { - return (new AntiSamySAXScanner(policy)).scan(reader, writer); - } - - /** - * This method wraps scan() using the Policy in the specified file. - * - * @param taintedHTML Untrusted HTML which may contain malicious code. - * @param filename The file name of the custom policy to enforce. - * @return A CleanResults object which contains information - * about the scan (including the results). - * @throws ScanException When there is a problem encountered - * while scanning the HTML. - * @throws PolicyException When there is a problem reading the policy file. - */ - public CleanResults scan(String taintedHTML, String filename) throws ScanException, PolicyException { - - Policy policy = Policy.getInstance(filename); - - return this.scan(taintedHTML, policy); - } - - /** - * This method wraps scan() using the policy File object passed in. - * - * @param taintedHTML Untrusted HTML which may contain malicious code. - * @param policyFile The File object of the custom policy to enforce. - * @return A CleanResults object which contains information - * about the scan (including the results). - * @throws ScanException When there is a problem encountered - * while scanning the HTML. - * @throws PolicyException When there is a problem reading the policy file. - */ - public CleanResults scan(String taintedHTML, File policyFile) throws ScanException, PolicyException { - - Policy policy = Policy.getInstance(policyFile); - - return this.scan(taintedHTML, policy); - } - + public static final int DOM = 0; + public static final int SAX = 1; + + private Policy policy = null; + + public AntiSamy() {} + + public AntiSamy(Policy policy) { + this.policy = policy; + } + + /** + * The meat and potatoes. The scan() family of methods are the only methods the + * outside world should be calling to invoke AntiSamy. + * + * @param taintedHTML Untrusted HTML which may contain malicious code. + * @return A CleanResults object which contains information about the scan (including + * the results). + * @throws ScanException When there is a problem encountered while scanning the HTML. + * @throws PolicyException When there is a problem reading the policy file. + */ + public CleanResults scan(String taintedHTML) throws ScanException, PolicyException { + return this.scan(taintedHTML, this.policy, SAX); + } + + /** + * This method sets scan() to use the specified scan type. + * + * @param taintedHTML Untrusted HTML which may contain malicious code. + * @param scanType The type of scan (DOM or SAX). + * @return A CleanResults object which contains information about the scan (including + * the results). + * @throws ScanException When there is a problem encountered while scanning the HTML. + * @throws PolicyException When there is a problem reading the policy file. + */ + public CleanResults scan(String taintedHTML, int scanType) throws ScanException, PolicyException { + + return this.scan(taintedHTML, this.policy, scanType); + } + + /** + * This method wraps scan() using the Policy object passed in. + * + * @param taintedHTML Untrusted HTML which may contain malicious code. + * @param policy The custom policy to enforce. + * @return A CleanResults object which contains information about the scan (including + * the results). + * @throws ScanException When there is a problem encountered while scanning the HTML. + * @throws PolicyException When there is a problem reading the policy file. + */ + public CleanResults scan(String taintedHTML, Policy policy) + throws ScanException, PolicyException { + return this.scan(taintedHTML, policy, DOM); + } + + /** + * This method wraps scan() using the Policy object passed in and the specified scan + * type. + * + * @param taintedHTML Untrusted HTML which may contain malicious code. + * @param policy The custom policy to enforce. + * @param scanType The type of scan (DOM or SAX). + * @return A CleanResults object which contains information about the scan (including + * the results). + * @throws ScanException When there is a problem encountered while scanning the HTML. + * @throws PolicyException When there is a problem reading the policy file. + */ + public CleanResults scan(String taintedHTML, Policy policy, int scanType) + throws ScanException, PolicyException { + if (policy == null) { + throw new PolicyException("No policy loaded"); + } + + if (scanType == DOM) { + return new AntiSamyDOMScanner(policy).scan(taintedHTML); + } else { + return new AntiSamySAXScanner(policy).scan(taintedHTML); + } + } + + /** + * Use this method if caller has Streams rather than Strings for I/O Useful for servlets where the + * response is very large and we don't validate, simply encode as bytes are consumed from the + * stream. + * + * @param reader Reader that produces the input, possibly a little at a time + * @param writer Writer that receives the cleaned output, possibly a little at a time + * @param policy Policy that directs the scan + * @return CleanResults where the cleanHtml is null. If caller wants the clean HTML, it must + * capture the writer's contents. When using Streams, caller generally doesn't want to create + * a single string containing clean HTML. + * @throws ScanException When there is a problem encountered while scanning the HTML. + */ + public CleanResults scan(Reader reader, Writer writer, Policy policy) throws ScanException { + return (new AntiSamySAXScanner(policy)).scan(reader, writer); + } + + /** + * This method wraps scan() using the Policy in the specified file. + * + * @param taintedHTML Untrusted HTML which may contain malicious code. + * @param filename The file name of the custom policy to enforce. + * @return A CleanResults object which contains information about the scan (including + * the results). + * @throws ScanException When there is a problem encountered while scanning the HTML. + * @throws PolicyException When there is a problem reading the policy file. + */ + public CleanResults scan(String taintedHTML, String filename) + throws ScanException, PolicyException { + + Policy policy = Policy.getInstance(filename); + + return this.scan(taintedHTML, policy); + } + + /** + * This method wraps scan() using the policy File object passed in. + * + * @param taintedHTML Untrusted HTML which may contain malicious code. + * @param policyFile The File object of the custom policy to enforce. + * @return A CleanResults object which contains information about the scan (including + * the results). + * @throws ScanException When there is a problem encountered while scanning the HTML. + * @throws PolicyException When there is a problem reading the policy file. + */ + public CleanResults scan(String taintedHTML, File policyFile) + throws ScanException, PolicyException { + + Policy policy = Policy.getInstance(policyFile); + + return this.scan(taintedHTML, policy); + } } diff --git a/src/main/java/org/owasp/validator/html/CleanResults.java b/src/main/java/org/owasp/validator/html/CleanResults.java index 57498013..0dcb68cd 100644 --- a/src/main/java/org/owasp/validator/html/CleanResults.java +++ b/src/main/java/org/owasp/validator/html/CleanResults.java @@ -1,10 +1,10 @@ /* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li - * + * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -28,136 +28,142 @@ import java.util.Collections; import java.util.List; import java.util.concurrent.Callable; - import org.w3c.dom.DocumentFragment; /** * This class contains the results of a scan. - * - * The list of error messages (errorMessages) will let the user - * know what, if any HTML errors existed, and what, if any, security or - * validation-related errors existed, and what was done about them. - * + * + *

The list of error messages (errorMessages) will let the user know what, if any + * HTML errors existed, and what, if any, security or validation-related errors existed, and what + * was done about them. + * * @author Arshan Dabirsiaghi */ - public class CleanResults { - private List errorMessages; - private Callable cleanHTML; - private long startOfScan; - private long elapsedScan; - - /* - * A DOM object version of the clean HTML String. May be null even if clean HTML is set. - */ - private DocumentFragment cleanXMLDocumentFragment; - - /* - * For extension. - */ - public CleanResults() { - this.errorMessages = new ArrayList(); - } - - /** - * Create a clean set of results. - * @param startOfScan - The time when the scan started. - * @param cleanHTML - The resulting clean HTML produced per the AntiSamy policy. - * @param XMLDocumentFragment - The XML Document fragment version of the clean HTML produced during the sanitzation process. - * @param errorMessages - Messages describing any errors that occurred during sanitization. - */ - public CleanResults(long startOfScan, final String cleanHTML, - DocumentFragment XMLDocumentFragment, List errorMessages) { - - this(startOfScan, - new Callable() { - public String call() throws Exception { - return cleanHTML; - } - }, - XMLDocumentFragment, - errorMessages); - } - - /** - * Create a clean set of results. - * @param startOfScan - The time when the scan started. - * @param cleanHTML - The resulting clean HTML produced per the AntiSamy policy. - * @param XMLDocumentFragment - The XML Document fragment version of the clean HTML produced during the sanitization process. - * @param errorMessages - Messages describing any errors that occurred during sanitization. - */ - public CleanResults(long startOfScan, Callable cleanHTML, - DocumentFragment XMLDocumentFragment, List errorMessages) { - this.startOfScan = startOfScan; - this.elapsedScan = System.currentTimeMillis() - startOfScan; - this.cleanXMLDocumentFragment = XMLDocumentFragment; - this.cleanHTML = cleanHTML; - this.errorMessages = Collections.unmodifiableList(errorMessages); - } - - /** - * Return the DOM version of the clean HTML. - * - * @return The XML Document fragment version of the clean HTML produced during the sanitization process. - * This may be null, even if the clean HTML String is not null. - */ - public DocumentFragment getCleanXMLDocumentFragment() { - return cleanXMLDocumentFragment; - } - - /** - * Return the filtered HTML as a String. - * - * @return A String object which contains the serialized, safe HTML. - */ - public String getCleanHTML() { - try { - return cleanHTML.call(); - } catch (Exception e) { - throw new RuntimeException(e); - } + private List errorMessages; + private Callable cleanHTML; + private long startOfScan; + private long elapsedScan; + + /* + * A DOM object version of the clean HTML String. May be null even if clean HTML is set. + */ + private DocumentFragment cleanXMLDocumentFragment; + + /* + * For extension. + */ + public CleanResults() { + this.errorMessages = new ArrayList(); + } + + /** + * Create a clean set of results. + * + * @param startOfScan - The time when the scan started. + * @param cleanHTML - The resulting clean HTML produced per the AntiSamy policy. + * @param XMLDocumentFragment - The XML Document fragment version of the clean HTML produced + * during the sanitzation process. + * @param errorMessages - Messages describing any errors that occurred during sanitization. + */ + public CleanResults( + long startOfScan, + final String cleanHTML, + DocumentFragment XMLDocumentFragment, + List errorMessages) { + + this( + startOfScan, + new Callable() { + public String call() throws Exception { + return cleanHTML; + } + }, + XMLDocumentFragment, + errorMessages); + } + + /** + * Create a clean set of results. + * + * @param startOfScan - The time when the scan started. + * @param cleanHTML - The resulting clean HTML produced per the AntiSamy policy. + * @param XMLDocumentFragment - The XML Document fragment version of the clean HTML produced + * during the sanitization process. + * @param errorMessages - Messages describing any errors that occurred during sanitization. + */ + public CleanResults( + long startOfScan, + Callable cleanHTML, + DocumentFragment XMLDocumentFragment, + List errorMessages) { + this.startOfScan = startOfScan; + this.elapsedScan = System.currentTimeMillis() - startOfScan; + this.cleanXMLDocumentFragment = XMLDocumentFragment; + this.cleanHTML = cleanHTML; + this.errorMessages = Collections.unmodifiableList(errorMessages); + } + + /** + * Return the DOM version of the clean HTML. + * + * @return The XML Document fragment version of the clean HTML produced during the sanitization + * process. This may be null, even if the clean HTML String is not null. + */ + public DocumentFragment getCleanXMLDocumentFragment() { + return cleanXMLDocumentFragment; + } + + /** + * Return the filtered HTML as a String. + * + * @return A String object which contains the serialized, safe HTML. + */ + public String getCleanHTML() { + try { + return cleanHTML.call(); + } catch (Exception e) { + throw new RuntimeException(e); } - - /** - * Return a list of error messages -- but an empty list returned does not mean there was no attack - * present, due to the serialization and deserialization process automatically cleaning up some - * attacks. See the README for more discussion. - * - * @return An ArrayList object which contain the error messages after a - * scan. - */ - public List getErrorMessages() { - return errorMessages; - } - - /** - * Return the time elapsed during the scan. - * - * @return A double primitive indicating the amount of time elapsed between - * the beginning and end of the scan in seconds. - */ - public double getScanTime() { - return elapsedScan / 1000D; - } - - /** - * Return the number of errors encountered during filtering. - * - * @return The number of errors encountered during filtering. - */ - public int getNumberOfErrors() { - return errorMessages.size(); - } - - /** - * Get the time the scan started. - * - * @return time that scan started in milliseconds since epoch. - */ - public long getStartOfScan() - { - return startOfScan; - } - + } + + /** + * Return a list of error messages -- but an empty list returned does not mean there was no attack + * present, due to the serialization and deserialization process automatically cleaning up some + * attacks. See the README for more discussion. + * + * @return An ArrayList object which contain the error messages after a scan. + */ + public List getErrorMessages() { + return errorMessages; + } + + /** + * Return the time elapsed during the scan. + * + * @return A double primitive indicating the amount of time elapsed between the beginning and end + * of the scan in seconds. + */ + public double getScanTime() { + return elapsedScan / 1000D; + } + + /** + * Return the number of errors encountered during filtering. + * + * @return The number of errors encountered during filtering. + */ + public int getNumberOfErrors() { + return errorMessages.size(); + } + + /** + * Get the time the scan started. + * + * @return time that scan started in milliseconds since epoch. + */ + public long getStartOfScan() { + return startOfScan; + } } diff --git a/src/main/java/org/owasp/validator/html/InternalPolicy.java b/src/main/java/org/owasp/validator/html/InternalPolicy.java index 96a9efc3..33017f29 100644 --- a/src/main/java/org/owasp/validator/html/InternalPolicy.java +++ b/src/main/java/org/owasp/validator/html/InternalPolicy.java @@ -1,168 +1,175 @@ package org.owasp.validator.html; +import java.util.Map; import org.owasp.validator.html.model.Property; import org.owasp.validator.html.model.Tag; -import java.util.Map; - /** * Contains a bunch of optimized lookups over the regular Policy Class. For internal use only. * - * Not part of any public API and may explode or self-destruct at any given moment, preferably both. + *

Not part of any public API and may explode or self-destruct at any given moment, preferably + * both. * * @author Kristian Rosenvold */ public class InternalPolicy extends Policy { - private final int maxInputSize; - private final boolean isNofollowAnchors; - private final boolean isNoopenerAndNoreferrerAnchors; - private final boolean isValidateParamAsEmbed; - private final boolean formatOutput; - private final boolean preserveSpace; - private final boolean omitXmlDeclaration; - private final boolean omitDoctypeDeclaration; - private final boolean entityEncodeIntlCharacters; - private final Tag embedTag; - private final Tag styleTag; - private final String onUnknownTag; - private final boolean preserveComments; - private final boolean embedStyleSheets; - private final boolean isEncodeUnknownTag; - private final boolean allowDynamicAttributes; - - - protected InternalPolicy(ParseContext parseContext) { - super(parseContext); - this.maxInputSize = determineMaxInputSize(); - this.isNofollowAnchors = isTrue(Policy.ANCHORS_NOFOLLOW); - this.isNoopenerAndNoreferrerAnchors = isTrue(Policy.ANCHORS_NOOPENER_NOREFERRER); - this.isValidateParamAsEmbed = isTrue(Policy.VALIDATE_PARAM_AS_EMBED); - this.formatOutput = isTrue(Policy.FORMAT_OUTPUT); - this.preserveSpace = isTrue(Policy.PRESERVE_SPACE); - this.omitXmlDeclaration = isTrue(Policy.OMIT_XML_DECLARATION); - this.omitDoctypeDeclaration = isTrue(Policy.OMIT_DOCTYPE_DECLARATION); - this.entityEncodeIntlCharacters = isTrue(Policy.ENTITY_ENCODE_INTL_CHARS); - this.embedTag = getTagByLowercaseName("embed"); - this.onUnknownTag = getDirective("onUnknownTag"); - this.isEncodeUnknownTag = "encode".equals(onUnknownTag); - this.preserveComments = isTrue(Policy.PRESERVE_COMMENTS); - this.styleTag = getTagByLowercaseName("style"); - this.embedStyleSheets = isTrue(Policy.EMBED_STYLESHEETS); - this.allowDynamicAttributes = isTrue(Policy.ALLOW_DYNAMIC_ATTRIBUTES); - - if (!isNoopenerAndNoreferrerAnchors) { - logger.warn("The directive \"" + Policy.ANCHORS_NOOPENER_NOREFERRER + - "\" is enabled by default, but disabled in this policy. It is recommended to leave it enabled to prevent reverse tabnabbing attacks."); - } - } - - protected InternalPolicy(Policy old, Map directives, Map tagRules, Map cssRules) { - super(old, directives, tagRules, cssRules); - this.maxInputSize = determineMaxInputSize(); - this.isNofollowAnchors = isTrue(Policy.ANCHORS_NOFOLLOW); - this.isNoopenerAndNoreferrerAnchors = isTrue(Policy.ANCHORS_NOOPENER_NOREFERRER); - this.isValidateParamAsEmbed = isTrue(Policy.VALIDATE_PARAM_AS_EMBED); - this.formatOutput = isTrue(Policy.FORMAT_OUTPUT); - this.preserveSpace = isTrue(Policy.PRESERVE_SPACE); - this.omitXmlDeclaration = isTrue(Policy.OMIT_XML_DECLARATION); - this.omitDoctypeDeclaration = isTrue(Policy.OMIT_DOCTYPE_DECLARATION); - this.entityEncodeIntlCharacters = isTrue(Policy.ENTITY_ENCODE_INTL_CHARS); - this.embedTag = getTagByLowercaseName("embed"); - this.onUnknownTag = getDirective("onUnknownTag"); - this.isEncodeUnknownTag = "encode".equals(onUnknownTag); - this.preserveComments = isTrue(Policy.PRESERVE_COMMENTS); - this.styleTag = getTagByLowercaseName("style"); - this.embedStyleSheets = isTrue(Policy.EMBED_STYLESHEETS); - this.allowDynamicAttributes = isTrue(Policy.ALLOW_DYNAMIC_ATTRIBUTES); - - if (!isNoopenerAndNoreferrerAnchors) { - logger.warn("The directive \"" + Policy.ANCHORS_NOOPENER_NOREFERRER + - "\" is enabled by default, but disabled in this policy. It is recommended to leave it enabled to prevent reverse tabnabbing attacks."); - } - } - - public Tag getEmbedTag() { - return embedTag; - } - - public Tag getStyleTag() { - return styleTag; - } - - public boolean isEmbedStyleSheets() { - return embedStyleSheets; - } - - public boolean isPreserveComments() { - return preserveComments; - } - - public int getMaxInputSize() { - return maxInputSize; - } - - public boolean isEntityEncodeIntlCharacters() { - return entityEncodeIntlCharacters; - } - - public boolean isNofollowAnchors() { - return isNofollowAnchors; - } - - public boolean isNoopenerAndNoreferrerAnchors() { - return isNoopenerAndNoreferrerAnchors; - } - - public boolean isValidateParamAsEmbed() { - return isValidateParamAsEmbed; - } - - public boolean isFormatOutput() { - return formatOutput; - } - - public boolean isPreserveSpace() { - return preserveSpace; - } - - public boolean isOmitXmlDeclaration() { - return omitXmlDeclaration; - } - - public boolean isOmitDoctypeDeclaration() { - return omitDoctypeDeclaration; - } - - private boolean isTrue(String anchorsNofollow) { - return "true".equals(getDirective(anchorsNofollow)); - } - - public String getOnUnknownTag() { - return onUnknownTag; - } - - public boolean isEncodeUnknownTag() { - return isEncodeUnknownTag; - } - - public boolean isAllowDynamicAttributes() { - return allowDynamicAttributes; - } - - /** - * Returns the maximum input size. If this value is not specified by - * the policy, the DEFAULT_MAX_INPUT_SIZE is used. - * - * @return the maximum input size. - */ - public int determineMaxInputSize() { - int maxInputSize = Policy.DEFAULT_MAX_INPUT_SIZE; - - try { - maxInputSize = Integer.parseInt(getDirective("maxInputSize")); - } catch (NumberFormatException ignore) { - } - - return maxInputSize; - } + private final int maxInputSize; + private final boolean isNofollowAnchors; + private final boolean isNoopenerAndNoreferrerAnchors; + private final boolean isValidateParamAsEmbed; + private final boolean formatOutput; + private final boolean preserveSpace; + private final boolean omitXmlDeclaration; + private final boolean omitDoctypeDeclaration; + private final boolean entityEncodeIntlCharacters; + private final Tag embedTag; + private final Tag styleTag; + private final String onUnknownTag; + private final boolean preserveComments; + private final boolean embedStyleSheets; + private final boolean isEncodeUnknownTag; + private final boolean allowDynamicAttributes; + + protected InternalPolicy(ParseContext parseContext) { + super(parseContext); + this.maxInputSize = determineMaxInputSize(); + this.isNofollowAnchors = isTrue(Policy.ANCHORS_NOFOLLOW); + this.isNoopenerAndNoreferrerAnchors = isTrue(Policy.ANCHORS_NOOPENER_NOREFERRER); + this.isValidateParamAsEmbed = isTrue(Policy.VALIDATE_PARAM_AS_EMBED); + this.formatOutput = isTrue(Policy.FORMAT_OUTPUT); + this.preserveSpace = isTrue(Policy.PRESERVE_SPACE); + this.omitXmlDeclaration = isTrue(Policy.OMIT_XML_DECLARATION); + this.omitDoctypeDeclaration = isTrue(Policy.OMIT_DOCTYPE_DECLARATION); + this.entityEncodeIntlCharacters = isTrue(Policy.ENTITY_ENCODE_INTL_CHARS); + this.embedTag = getTagByLowercaseName("embed"); + this.onUnknownTag = getDirective("onUnknownTag"); + this.isEncodeUnknownTag = "encode".equals(onUnknownTag); + this.preserveComments = isTrue(Policy.PRESERVE_COMMENTS); + this.styleTag = getTagByLowercaseName("style"); + this.embedStyleSheets = isTrue(Policy.EMBED_STYLESHEETS); + this.allowDynamicAttributes = isTrue(Policy.ALLOW_DYNAMIC_ATTRIBUTES); + + if (!isNoopenerAndNoreferrerAnchors) { + logger.warn( + "The directive \"" + + Policy.ANCHORS_NOOPENER_NOREFERRER + + "\" is enabled by default, but disabled in this policy. It is recommended to leave it enabled to prevent reverse tabnabbing attacks."); + } + } + + protected InternalPolicy( + Policy old, + Map directives, + Map tagRules, + Map cssRules) { + super(old, directives, tagRules, cssRules); + this.maxInputSize = determineMaxInputSize(); + this.isNofollowAnchors = isTrue(Policy.ANCHORS_NOFOLLOW); + this.isNoopenerAndNoreferrerAnchors = isTrue(Policy.ANCHORS_NOOPENER_NOREFERRER); + this.isValidateParamAsEmbed = isTrue(Policy.VALIDATE_PARAM_AS_EMBED); + this.formatOutput = isTrue(Policy.FORMAT_OUTPUT); + this.preserveSpace = isTrue(Policy.PRESERVE_SPACE); + this.omitXmlDeclaration = isTrue(Policy.OMIT_XML_DECLARATION); + this.omitDoctypeDeclaration = isTrue(Policy.OMIT_DOCTYPE_DECLARATION); + this.entityEncodeIntlCharacters = isTrue(Policy.ENTITY_ENCODE_INTL_CHARS); + this.embedTag = getTagByLowercaseName("embed"); + this.onUnknownTag = getDirective("onUnknownTag"); + this.isEncodeUnknownTag = "encode".equals(onUnknownTag); + this.preserveComments = isTrue(Policy.PRESERVE_COMMENTS); + this.styleTag = getTagByLowercaseName("style"); + this.embedStyleSheets = isTrue(Policy.EMBED_STYLESHEETS); + this.allowDynamicAttributes = isTrue(Policy.ALLOW_DYNAMIC_ATTRIBUTES); + + if (!isNoopenerAndNoreferrerAnchors) { + logger.warn( + "The directive \"" + + Policy.ANCHORS_NOOPENER_NOREFERRER + + "\" is enabled by default, but disabled in this policy. It is recommended to leave it enabled to prevent reverse tabnabbing attacks."); + } + } + + public Tag getEmbedTag() { + return embedTag; + } + + public Tag getStyleTag() { + return styleTag; + } + + public boolean isEmbedStyleSheets() { + return embedStyleSheets; + } + + public boolean isPreserveComments() { + return preserveComments; + } + + public int getMaxInputSize() { + return maxInputSize; + } + + public boolean isEntityEncodeIntlCharacters() { + return entityEncodeIntlCharacters; + } + + public boolean isNofollowAnchors() { + return isNofollowAnchors; + } + + public boolean isNoopenerAndNoreferrerAnchors() { + return isNoopenerAndNoreferrerAnchors; + } + + public boolean isValidateParamAsEmbed() { + return isValidateParamAsEmbed; + } + + public boolean isFormatOutput() { + return formatOutput; + } + + public boolean isPreserveSpace() { + return preserveSpace; + } + + public boolean isOmitXmlDeclaration() { + return omitXmlDeclaration; + } + + public boolean isOmitDoctypeDeclaration() { + return omitDoctypeDeclaration; + } + + private boolean isTrue(String anchorsNofollow) { + return "true".equals(getDirective(anchorsNofollow)); + } + + public String getOnUnknownTag() { + return onUnknownTag; + } + + public boolean isEncodeUnknownTag() { + return isEncodeUnknownTag; + } + + public boolean isAllowDynamicAttributes() { + return allowDynamicAttributes; + } + + /** + * Returns the maximum input size. If this value is not specified by the policy, the + * DEFAULT_MAX_INPUT_SIZE is used. + * + * @return the maximum input size. + */ + public int determineMaxInputSize() { + int maxInputSize = Policy.DEFAULT_MAX_INPUT_SIZE; + + try { + maxInputSize = Integer.parseInt(getDirective("maxInputSize")); + } catch (NumberFormatException ignore) { + } + + return maxInputSize; + } } diff --git a/src/main/java/org/owasp/validator/html/Policy.java b/src/main/java/org/owasp/validator/html/Policy.java index 7a373bcd..2a9c2916 100644 --- a/src/main/java/org/owasp/validator/html/Policy.java +++ b/src/main/java/org/owasp/validator/html/Policy.java @@ -24,12 +24,13 @@ package org.owasp.validator.html; -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import static org.owasp.validator.html.util.XMLUtil.getAttributeValue; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import java.io.ByteArrayInputStream; import java.io.File; -import java.io.IOException; import java.io.FileNotFoundException; +import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; @@ -46,7 +47,6 @@ import java.util.Set; import java.util.concurrent.Callable; import java.util.regex.Pattern; - import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -55,14 +55,12 @@ import javax.xml.transform.stream.StreamSource; import javax.xml.validation.Schema; import javax.xml.validation.SchemaFactory; - import org.owasp.validator.html.model.AntiSamyPattern; import org.owasp.validator.html.model.Attribute; import org.owasp.validator.html.model.Property; import org.owasp.validator.html.model.Tag; import org.owasp.validator.html.scan.Constants; import org.owasp.validator.html.util.URIUtils; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; @@ -73,944 +71,1051 @@ import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; -import static org.owasp.validator.html.util.XMLUtil.getAttributeValue; - /** - *

Policy.java - This file holds the model for our policy engine.

- * - *

## Schema validation behavior change starting with AntiSamy 1.6.0 ##

- * - *

Prior to v1.6.0 AntiSamy was not actually enforcing it's defined XSD. For all of v1.6.x, by default AntiSamy - * enforced the schema, and wouldn't continue if the AntiSamy policy was invalid. However, we recognized that it might - * not be possible for developers to fix their AntiSamy policies right away so we provided two - * ways to (temporarily!) disable schema validation. Via a direct method call and via a System property.

- * - *

## Starting with AntiSamy 1.7.0, schema validation is Mandatory.

- * - *

Logging: The logging introduced in 1.6+ uses slf4j. But AntiSamy doesn't actually include an slf4j implementation - * library. AntiSamy users must import and properly configure an slf4j logging library if they want to see the very - * few log messages generated by AntiSamy.

- * + * Policy.java - This file holds the model for our policy engine. + * + *

## Schema validation behavior change starting with AntiSamy 1.6.0 ## + * + *

Prior to v1.6.0 AntiSamy was not actually enforcing it's defined XSD. For all of v1.6.x, by + * default AntiSamy enforced the schema, and wouldn't continue if the AntiSamy policy was invalid. + * However, we recognized that it might not be possible for developers to fix their AntiSamy + * policies right away so we provided two ways to (temporarily!) disable schema validation. Via a + * direct method call and via a System property. + * + *

## Starting with AntiSamy 1.7.0, schema validation is Mandatory. + * + *

Logging: The logging introduced in 1.6+ uses slf4j. But AntiSamy doesn't actually include an + * slf4j implementation library. AntiSamy users must import and properly configure an slf4j logging + * library if they want to see the very few log messages generated by AntiSamy. + * * @author Arshan Dabirsiaghi */ - public class Policy { - protected static final Logger logger = LoggerFactory.getLogger(Policy.class); - - public static final Pattern ANYTHING_REGEXP = Pattern.compile(".*", Pattern.DOTALL); - - private static final String POLICY_SCHEMA_URI = "antisamy.xsd"; - protected static final String DEFAULT_POLICY_URI = "antisamy.xml"; - private static final String DEFAULT_ONINVALID = "removeAttribute"; - - public static final int DEFAULT_MAX_INPUT_SIZE = 100000; - public static final int DEFAULT_MAX_STYLESHEET_IMPORTS = 1; - - public static final String OMIT_XML_DECLARATION = "omitXmlDeclaration"; - public static final String OMIT_DOCTYPE_DECLARATION = "omitDoctypeDeclaration"; - public static final String FORMAT_OUTPUT = "formatOutput"; - public static final String EMBED_STYLESHEETS = "embedStyleSheets"; - public static final String CONNECTION_TIMEOUT = "connectionTimeout"; - public static final String ANCHORS_NOFOLLOW = "nofollowAnchors"; - public static final String ANCHORS_NOOPENER_NOREFERRER = "noopenerAndNoreferrerAnchors"; - public static final String VALIDATE_PARAM_AS_EMBED = "validateParamAsEmbed"; - public static final String PRESERVE_SPACE = "preserveSpace"; - public static final String PRESERVE_COMMENTS = "preserveComments"; - public static final String ENTITY_ENCODE_INTL_CHARS = "entityEncodeIntlChars"; - public static final String ALLOW_DYNAMIC_ATTRIBUTES = "allowDynamicAttributes"; - public static final String MAX_INPUT_SIZE = "maxInputSize"; - public static final String MAX_STYLESHEET_IMPORTS = "maxStyleSheetImports"; - - public static final String EXTERNAL_GENERAL_ENTITIES = "http://xml.org/sax/features/external-general-entities"; - public static final String EXTERNAL_PARAM_ENTITIES = "http://xml.org/sax/features/external-parameter-entities"; - public static final String DISALLOW_DOCTYPE_DECL = "http://apache.org/xml/features/disallow-doctype-decl"; - public static final String LOAD_EXTERNAL_DTD = "http://apache.org/xml/features/nonvalidating/load-external-dtd"; - - public static final String ACTION_VALIDATE = "validate"; - public static final String ACTION_FILTER = "filter"; - public static final String ACTION_TRUNCATE = "truncate"; - - private final Map commonRegularExpressions; - protected final Map tagRules; - protected final Map cssRules; - protected final Map directives; - private final Map globalAttributes; - private final Map dynamicAttributes; - - private final TagMatcher allowedEmptyTagsMatcher; - private final TagMatcher requiresClosingTagsMatcher; - - /** - * XML Schema for policy validation - */ - private static volatile Schema schema = null; - - /** - * Get the Tag specified by the provided tag name. - * - * @param tagName - * The name of the Tag to return. - * @return The requested Tag, or null if it doesn't exist. - */ - public Tag getTagByLowercaseName(String tagName) { - return tagRules.get(tagName); - } - - protected static class ParseContext { - Map commonRegularExpressions = new HashMap(); - Map commonAttributes = new HashMap(); - Map tagRules = new HashMap(); - Map cssRules = new HashMap(); - Map directives = new HashMap(); - Map globalAttributes = new HashMap(); - Map dynamicAttributes = new HashMap(); - - List allowedEmptyTags = new ArrayList(); - List requireClosingTags = new ArrayList(); - - public void resetParamsWhereLastConfigWins() { - allowedEmptyTags.clear(); - requireClosingTags.clear(); - } - } - - /** - * Retrieves a CSS Property from the Policy. - * - * @param propertyName The name of the CSS Property to look up. - * @return The CSS Property associated with the name specified, or null if none is found. - */ - public Property getPropertyByName(String propertyName) { - return cssRules.get(propertyName.toLowerCase()); + protected static final Logger logger = LoggerFactory.getLogger(Policy.class); + + public static final Pattern ANYTHING_REGEXP = Pattern.compile(".*", Pattern.DOTALL); + + private static final String POLICY_SCHEMA_URI = "antisamy.xsd"; + protected static final String DEFAULT_POLICY_URI = "antisamy.xml"; + private static final String DEFAULT_ONINVALID = "removeAttribute"; + + public static final int DEFAULT_MAX_INPUT_SIZE = 100000; + public static final int DEFAULT_MAX_STYLESHEET_IMPORTS = 1; + + public static final String OMIT_XML_DECLARATION = "omitXmlDeclaration"; + public static final String OMIT_DOCTYPE_DECLARATION = "omitDoctypeDeclaration"; + public static final String FORMAT_OUTPUT = "formatOutput"; + public static final String EMBED_STYLESHEETS = "embedStyleSheets"; + public static final String CONNECTION_TIMEOUT = "connectionTimeout"; + public static final String ANCHORS_NOFOLLOW = "nofollowAnchors"; + public static final String ANCHORS_NOOPENER_NOREFERRER = "noopenerAndNoreferrerAnchors"; + public static final String VALIDATE_PARAM_AS_EMBED = "validateParamAsEmbed"; + public static final String PRESERVE_SPACE = "preserveSpace"; + public static final String PRESERVE_COMMENTS = "preserveComments"; + public static final String ENTITY_ENCODE_INTL_CHARS = "entityEncodeIntlChars"; + public static final String ALLOW_DYNAMIC_ATTRIBUTES = "allowDynamicAttributes"; + public static final String MAX_INPUT_SIZE = "maxInputSize"; + public static final String MAX_STYLESHEET_IMPORTS = "maxStyleSheetImports"; + + public static final String EXTERNAL_GENERAL_ENTITIES = + "http://xml.org/sax/features/external-general-entities"; + public static final String EXTERNAL_PARAM_ENTITIES = + "http://xml.org/sax/features/external-parameter-entities"; + public static final String DISALLOW_DOCTYPE_DECL = + "http://apache.org/xml/features/disallow-doctype-decl"; + public static final String LOAD_EXTERNAL_DTD = + "http://apache.org/xml/features/nonvalidating/load-external-dtd"; + + public static final String ACTION_VALIDATE = "validate"; + public static final String ACTION_FILTER = "filter"; + public static final String ACTION_TRUNCATE = "truncate"; + + private final Map commonRegularExpressions; + protected final Map tagRules; + protected final Map cssRules; + protected final Map directives; + private final Map globalAttributes; + private final Map dynamicAttributes; + + private final TagMatcher allowedEmptyTagsMatcher; + private final TagMatcher requiresClosingTagsMatcher; + + /** XML Schema for policy validation */ + private static volatile Schema schema = null; + + /** + * Get the Tag specified by the provided tag name. + * + * @param tagName The name of the Tag to return. + * @return The requested Tag, or null if it doesn't exist. + */ + public Tag getTagByLowercaseName(String tagName) { + return tagRules.get(tagName); + } + + protected static class ParseContext { + Map commonRegularExpressions = new HashMap(); + Map commonAttributes = new HashMap(); + Map tagRules = new HashMap(); + Map cssRules = new HashMap(); + Map directives = new HashMap(); + Map globalAttributes = new HashMap(); + Map dynamicAttributes = new HashMap(); + + List allowedEmptyTags = new ArrayList(); + List requireClosingTags = new ArrayList(); + + public void resetParamsWhereLastConfigWins() { + allowedEmptyTags.clear(); + requireClosingTags.clear(); } - - /** - * Construct a Policy using the default policy file location ("antisamy.xml"). - * - * @return A populated Policy object based on the XML policy file located in the default location. - * @throws PolicyException If the file is not found or there is a problem parsing the file. - */ - public static Policy getInstance() throws PolicyException { - return getInstance(Policy.class.getClassLoader().getResource(DEFAULT_POLICY_URI)); - } - - /** - * Construct a Policy based on the file whose name is passed in. - * - * @param filename The path to the XML policy file. - * @return A populated Policy object based on the XML policy file located in the location passed in. - * @throws PolicyException If the file is not found or there is a problem parsing the file. - */ - public static Policy getInstance(String filename) throws PolicyException { - File file = new File(filename); - return getInstance(file); + } + + /** + * Retrieves a CSS Property from the Policy. + * + * @param propertyName The name of the CSS Property to look up. + * @return The CSS Property associated with the name specified, or null if none is found. + */ + public Property getPropertyByName(String propertyName) { + return cssRules.get(propertyName.toLowerCase()); + } + + /** + * Construct a Policy using the default policy file location ("antisamy.xml"). + * + * @return A populated Policy object based on the XML policy file located in the default location. + * @throws PolicyException If the file is not found or there is a problem parsing the file. + */ + public static Policy getInstance() throws PolicyException { + return getInstance(Policy.class.getClassLoader().getResource(DEFAULT_POLICY_URI)); + } + + /** + * Construct a Policy based on the file whose name is passed in. + * + * @param filename The path to the XML policy file. + * @return A populated Policy object based on the XML policy file located in the location passed + * in. + * @throws PolicyException If the file is not found or there is a problem parsing the file. + */ + public static Policy getInstance(String filename) throws PolicyException { + File file = new File(filename); + return getInstance(file); + } + + /** + * Construct a Policy from the InputStream object passed in. + * + * @param inputStream An InputStream which contains the XML policy information. + * @return A populated Policy object based on the XML policy file pointed to by the inputStream + * parameter. + * @throws PolicyException If there is a problem parsing the input stream. + */ + public static Policy getInstance(InputStream inputStream) throws PolicyException { + logger.info("Attempting to load AntiSamy policy from an input stream."); + return new InternalPolicy(getSimpleParseContext(getTopLevelElement(inputStream))); + } + + /** + * Construct a Policy from the File object passed in. + * + * @param file A File object which contains the XML policy information. + * @return A populated Policy object based on the XML policy file pointed to by the File + * parameter. + * @throws PolicyException If the file is not found or there is a problem parsing the file. + */ + public static Policy getInstance(File file) throws PolicyException { + try { + URI uri = file.toURI(); + return getInstance(uri.toURL()); + } catch (IOException e) { + throw new PolicyException(e); } - - /** - * Construct a Policy from the InputStream object passed in. - * - * @param inputStream An InputStream which contains the XML policy information. - * @return A populated Policy object based on the XML policy file pointed to by the inputStream parameter. - * @throws PolicyException If there is a problem parsing the input stream. - */ - public static Policy getInstance(InputStream inputStream) throws PolicyException { - logger.info("Attempting to load AntiSamy policy from an input stream."); - return new InternalPolicy(getSimpleParseContext(getTopLevelElement(inputStream))); + } + + /** + * Construct a Policy from the target of the URL passed in.
+ *
+ * NOTE: This is the only factory method that will work with <include> tags in AntiSamy + * policy files.
+ *
+ * For security reasons, the provided URL must point to a local file. Currently only 'file:' and + * 'jar:' URL prefixes are allowed. If you want to use a different URL format, and are confident + * that the URL points to a safe source, you can open the target of the URL with URL.openStream(), + * and use the getInstance(InputStream) constructor instead. For example, Spring has classpath: + * and Wildfly/Jboss supports vfs: for accessing local files. Just be aware that this alternate + * constructor doesn't support the use of <include> tags, per the NOTE above. + * + * @param url A URL object which contains the XML policy information. + * @return A populated Policy object based on the XML policy file pointed to by the File + * parameter. + * @throws PolicyException If the file is not found or there is a problem parsing the file. + */ + public static Policy getInstance(URL url) throws PolicyException { + logger.info("Attempting to load AntiSamy policy from URL: " + url.toString()); + return new InternalPolicy(getParseContext(getTopLevelElement(url), url)); + } + + protected Policy(ParseContext parseContext) { + this.allowedEmptyTagsMatcher = new TagMatcher(parseContext.allowedEmptyTags); + this.requiresClosingTagsMatcher = new TagMatcher(parseContext.requireClosingTags); + this.commonRegularExpressions = + Collections.unmodifiableMap(parseContext.commonRegularExpressions); + this.tagRules = Collections.unmodifiableMap(parseContext.tagRules); + this.cssRules = Collections.unmodifiableMap(parseContext.cssRules); + this.directives = Collections.unmodifiableMap(parseContext.directives); + this.globalAttributes = Collections.unmodifiableMap(parseContext.globalAttributes); + this.dynamicAttributes = Collections.unmodifiableMap(parseContext.dynamicAttributes); + } + + protected Policy( + Policy old, + Map directives, + Map tagRules, + Map cssRules) { + this.allowedEmptyTagsMatcher = old.allowedEmptyTagsMatcher; + this.requiresClosingTagsMatcher = old.requiresClosingTagsMatcher; + this.commonRegularExpressions = old.commonRegularExpressions; + this.tagRules = tagRules; + this.cssRules = cssRules; + this.directives = directives; + this.globalAttributes = old.globalAttributes; + this.dynamicAttributes = old.dynamicAttributes; + } + + protected static ParseContext getSimpleParseContext(Element topLevelElement) + throws PolicyException { + ParseContext parseContext = new ParseContext(); + if (getByTagName(topLevelElement, "include").iterator().hasNext()) { + throw new IllegalArgumentException( + "A policy file loaded with an InputStream cannot contain include references"); } + parsePolicy(topLevelElement, parseContext); + return parseContext; + } - /** - * Construct a Policy from the File object passed in. - * - * @param file A File object which contains the XML policy information. - * @return A populated Policy object based on the XML policy file pointed to by the File parameter. - * @throws PolicyException If the file is not found or there is a problem parsing the file. - */ - public static Policy getInstance(File file) throws PolicyException { - try { - URI uri = file.toURI(); - return getInstance(uri.toURL()); - } catch (IOException e) { - throw new PolicyException(e); - } - } + protected static ParseContext getParseContext(Element topLevelElement, URL baseUrl) + throws PolicyException { + ParseContext parseContext = new ParseContext(); /** - * Construct a Policy from the target of the URL passed in. - *

- * NOTE: This is the only factory method that will work with <include> tags - * in AntiSamy policy files. - *

- * For security reasons, the provided URL must point to a local file. Currently only 'file:' and 'jar:' - * URL prefixes are allowed. If you want to use a different URL format, and are confident that the URL - * points to a safe source, you can open the target of the URL with URL.openStream(), and use the - * getInstance(InputStream) constructor instead. For example, Spring has classpath: and Wildfly/Jboss - * supports vfs: for accessing local files. Just be aware that this alternate constructor doesn't support - * the use of <include> tags, per the NOTE above. + * Are there any included policies? These are parsed here first so that rules in _this_ policy + * file will override included rules. * - * @param url A URL object which contains the XML policy information. - * @return A populated Policy object based on the XML policy file pointed to by the File parameter. - * @throws PolicyException If the file is not found or there is a problem parsing the file. + *

NOTE that by this being here we only support one level of includes. To support recursion, + * move this into the parsePolicy method. */ - public static Policy getInstance(URL url) throws PolicyException { - logger.info("Attempting to load AntiSamy policy from URL: " + url.toString()); - return new InternalPolicy(getParseContext(getTopLevelElement(url), url)); - } + for (Element include : getByTagName(topLevelElement, "include")) { + String href = getAttributeValue(include, "href"); - protected Policy(ParseContext parseContext) { - this.allowedEmptyTagsMatcher = new TagMatcher(parseContext.allowedEmptyTags); - this.requiresClosingTagsMatcher = new TagMatcher(parseContext.requireClosingTags); - this.commonRegularExpressions = Collections.unmodifiableMap(parseContext.commonRegularExpressions); - this.tagRules = Collections.unmodifiableMap(parseContext.tagRules); - this.cssRules = Collections.unmodifiableMap(parseContext.cssRules); - this.directives = Collections.unmodifiableMap(parseContext.directives); - this.globalAttributes = Collections.unmodifiableMap(parseContext.globalAttributes); - this.dynamicAttributes = Collections.unmodifiableMap(parseContext.dynamicAttributes); + Element includedPolicy = getPolicy(href, baseUrl); + parsePolicy(includedPolicy, parseContext); } - protected Policy(Policy old, Map directives, Map tagRules, Map cssRules) { - this.allowedEmptyTagsMatcher = old.allowedEmptyTagsMatcher; - this.requiresClosingTagsMatcher = old.requiresClosingTagsMatcher; - this.commonRegularExpressions = old.commonRegularExpressions; - this.tagRules = tagRules; - this.cssRules = cssRules; - this.directives = directives; - this.globalAttributes = old.globalAttributes; - this.dynamicAttributes = old.dynamicAttributes; - } - - protected static ParseContext getSimpleParseContext(Element topLevelElement) throws PolicyException { - ParseContext parseContext = new ParseContext(); - if (getByTagName(topLevelElement, "include").iterator().hasNext()) { - throw new IllegalArgumentException( - "A policy file loaded with an InputStream cannot contain include references"); - } - parsePolicy(topLevelElement, parseContext); - return parseContext; - } - - protected static ParseContext getParseContext(Element topLevelElement, URL baseUrl) throws PolicyException { - ParseContext parseContext = new ParseContext(); - - /** - * Are there any included policies? These are parsed here first so that - * rules in _this_ policy file will override included rules. - * - * NOTE that by this being here we only support one level of includes. - * To support recursion, move this into the parsePolicy method. - */ - for (Element include : getByTagName(topLevelElement, "include")) { - String href = getAttributeValue(include, "href"); - - Element includedPolicy = getPolicy(href, baseUrl); - parsePolicy(includedPolicy, parseContext); - } - - parsePolicy(topLevelElement, parseContext); - return parseContext; - } - - protected static Element getTopLevelElement(final URL baseUrl) throws PolicyException { - final InputSource source = getSourceFromUrl(baseUrl); - return getTopLevelElement(source, new Callable() { - @Override - public InputSource call() throws PolicyException { - return getSourceFromUrl(baseUrl); - } + parsePolicy(topLevelElement, parseContext); + return parseContext; + } + + protected static Element getTopLevelElement(final URL baseUrl) throws PolicyException { + final InputSource source = getSourceFromUrl(baseUrl); + return getTopLevelElement( + source, + new Callable() { + @Override + public InputSource call() throws PolicyException { + return getSourceFromUrl(baseUrl); + } }); + } + + @SuppressFBWarnings( + value = "SECURITY", + justification = + "Opening a stream to the provided URL is not " + + "a vulnerability because it points to a local JAR file.") + protected static InputSource getSourceFromUrl(URL baseUrl) throws PolicyException { + try { + InputSource source = resolveEntity(baseUrl.toExternalForm(), baseUrl); + if (source == null) { + source = new InputSource(baseUrl.toExternalForm()); + source.setByteStream(baseUrl.openStream()); + } else { + source.setSystemId(baseUrl.toExternalForm()); + } + + return source; + } catch (SAXException | IOException e) { + // SAXException can't actually happen. See JavaDoc for resolveEntity(String, URL) + throw new PolicyException(e); } + } - @SuppressFBWarnings(value = "SECURITY", justification="Opening a stream to the provided URL is not " - + "a vulnerability because it points to a local JAR file.") - protected static InputSource getSourceFromUrl(URL baseUrl) throws PolicyException { - try { - InputSource source = resolveEntity(baseUrl.toExternalForm(), baseUrl); - if (source == null) { - source = new InputSource(baseUrl.toExternalForm()); - source.setByteStream(baseUrl.openStream()); - } else { - source.setSystemId(baseUrl.toExternalForm()); - } + private static Element getTopLevelElement(InputStream is) throws PolicyException { + final InputSource source = new InputSource(toByteArrayStream(is)); + return getTopLevelElement( + source, + new Callable() { + @Override + public InputSource call() throws IOException { + source.getByteStream().reset(); return source; - } catch (SAXException | IOException e) { - // SAXException can't actually happen. See JavaDoc for resolveEntity(String, URL) - throw new PolicyException(e); - } - } - - private static Element getTopLevelElement(InputStream is) throws PolicyException { - final InputSource source = new InputSource(toByteArrayStream(is)); - - return getTopLevelElement(source, new Callable() { - @Override - public InputSource call() throws IOException { - source.getByteStream().reset(); - return source; - } + } }); + } + + protected static Element getTopLevelElement( + InputSource source, Callable getResetSource) throws PolicyException { + // Track whether an exception was ever thrown while processing policy file + try { + return getDocumentElementFromSource(source); + } catch (SAXException | ParserConfigurationException | IOException e) { + throw new PolicyException(e); } - - protected static Element getTopLevelElement(InputSource source, Callable getResetSource) throws PolicyException { - // Track whether an exception was ever thrown while processing policy file - try { - return getDocumentElementFromSource(source); - } catch (SAXException | ParserConfigurationException | IOException e) { - throw new PolicyException(e); - } + } + + /* + * This method takes an arbitrary input stream, copies its contents into a byte[], then returns it + * in a ByteArrayInputStream, closing the provided InputStream in the process. It's purpose is to + * ensure that the InputStream we are using can be reset to the beginning, as not all InputStream's properly + * allow this. We use this for AntiSamy XML policy files, which we never expect to get that large + * (e.g., a few Kb at most). + */ + private static InputStream toByteArrayStream(InputStream in) throws PolicyException { + byte[] byteArray; + try (Reader reader = new InputStreamReader(in, Charset.forName("UTF8"))) { + char[] charArray = new char[8 * 1024]; + StringBuilder builder = new StringBuilder(); + int numCharsRead; + while ((numCharsRead = reader.read(charArray, 0, charArray.length)) != -1) { + builder.append(charArray, 0, numCharsRead); + } + byteArray = builder.toString().getBytes(Charset.forName("UTF8")); + } catch (IOException ioe) { + throw new PolicyException(ioe); } - /* - * This method takes an arbitrary input stream, copies its contents into a byte[], then returns it - * in a ByteArrayInputStream, closing the provided InputStream in the process. It's purpose is to - * ensure that the InputStream we are using can be reset to the beginning, as not all InputStream's properly - * allow this. We use this for AntiSamy XML policy files, which we never expect to get that large - * (e.g., a few Kb at most). - */ - private static InputStream toByteArrayStream(InputStream in) throws PolicyException { - byte[] byteArray; - try (Reader reader = new InputStreamReader(in, Charset.forName("UTF8"))) { - char[] charArray = new char[8 * 1024]; - StringBuilder builder = new StringBuilder(); - int numCharsRead; - while ((numCharsRead = reader.read(charArray, 0, charArray.length)) != -1) { - builder.append(charArray, 0, numCharsRead); - } - byteArray = builder.toString().getBytes(Charset.forName("UTF8")); - } catch (IOException ioe) { - throw new PolicyException(ioe); - } - - return new ByteArrayInputStream(byteArray); - } - - private static Element getDocumentElementFromSource(InputSource source) - throws ParserConfigurationException, SAXException, IOException { - - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - - /** - * Disable external entities, etc. - */ - dbf.setFeature(EXTERNAL_GENERAL_ENTITIES, false); - dbf.setFeature(EXTERNAL_PARAM_ENTITIES, false); - dbf.setFeature(DISALLOW_DOCTYPE_DECL, true); - dbf.setFeature(LOAD_EXTERNAL_DTD, false); - - // Schema validation is always required now. So turn it on. - getPolicySchema(); - dbf.setNamespaceAware(true); - dbf.setSchema(schema); - - DocumentBuilder db = dbf.newDocumentBuilder(); - db.setErrorHandler(new SAXErrorHandler()); - Document dom = db.parse(source); - - return dom.getDocumentElement(); + return new ByteArrayInputStream(byteArray); + } + + private static Element getDocumentElementFromSource(InputSource source) + throws ParserConfigurationException, SAXException, IOException { + + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + + /** Disable external entities, etc. */ + dbf.setFeature(EXTERNAL_GENERAL_ENTITIES, false); + dbf.setFeature(EXTERNAL_PARAM_ENTITIES, false); + dbf.setFeature(DISALLOW_DOCTYPE_DECL, true); + dbf.setFeature(LOAD_EXTERNAL_DTD, false); + + // Schema validation is always required now. So turn it on. + getPolicySchema(); + dbf.setNamespaceAware(true); + dbf.setSchema(schema); + + DocumentBuilder db = dbf.newDocumentBuilder(); + db.setErrorHandler(new SAXErrorHandler()); + Document dom = db.parse(source); + + return dom.getDocumentElement(); + } + + private static void parsePolicy(Element topLevelElement, ParseContext parseContext) + throws PolicyException { + + if (topLevelElement == null) return; + + parseContext.resetParamsWhereLastConfigWins(); + + parseCommonRegExps( + getFirstChild(topLevelElement, "common-regexps"), parseContext.commonRegularExpressions); + parseDirectives(getFirstChild(topLevelElement, "directives"), parseContext.directives); + parseCommonAttributes( + getFirstChild(topLevelElement, "common-attributes"), + parseContext.commonAttributes, + parseContext.commonRegularExpressions); + parseGlobalAttributes( + getFirstChild(topLevelElement, "global-tag-attributes"), + parseContext.globalAttributes, + parseContext.commonAttributes); + parseDynamicAttributes( + getFirstChild(topLevelElement, "dynamic-tag-attributes"), + parseContext.dynamicAttributes, + parseContext.commonAttributes); + parseTagRules( + getFirstChild(topLevelElement, "tag-rules"), + parseContext.commonAttributes, + parseContext.commonRegularExpressions, + parseContext.tagRules); + parseCSSRules( + getFirstChild(topLevelElement, "css-rules"), + parseContext.cssRules, + parseContext.commonRegularExpressions); + + parseAllowedEmptyTags( + getFirstChild(topLevelElement, "allowed-empty-tags"), parseContext.allowedEmptyTags); + parseRequireClosingTags( + getFirstChild(topLevelElement, "require-closing-tags"), parseContext.requireClosingTags); + } + + /** Returns the top level element of a loaded policy Document */ + @SuppressFBWarnings( + value = "SECURITY", + justification = + "Opening a stream to the provided URL is not " + + "a vulnerability because only local file URLs are allowed.") + private static Element getPolicy(String href, URL baseUrl) throws PolicyException { + // Track whether an exception was ever thrown while processing policy file + try { + return getDocumentElementByUrl(href, baseUrl); + } catch (SAXException | ParserConfigurationException | IOException e) { + throw new PolicyException(e); } + } - private static void parsePolicy(Element topLevelElement, ParseContext parseContext) - throws PolicyException { + // TODO: Add JavaDocs for this new method. + @SuppressFBWarnings( + value = "SECURITY", + justification = + "Opening a stream to the provided URL is not " + + "a vulnerability because only local file URLs are allowed.") + private static Element getDocumentElementByUrl(String href, URL baseUrl) + throws IOException, ParserConfigurationException, SAXException { - if (topLevelElement == null) return; + InputSource source = null; - parseContext.resetParamsWhereLastConfigWins(); + // Can't resolve public id, but might be able to resolve relative + // system id, since we have a base URI. + if (href != null && baseUrl != null) { - parseCommonRegExps(getFirstChild(topLevelElement, "common-regexps"), parseContext.commonRegularExpressions); - parseDirectives(getFirstChild(topLevelElement, "directives"), parseContext.directives); - parseCommonAttributes(getFirstChild(topLevelElement, "common-attributes"), parseContext.commonAttributes, parseContext.commonRegularExpressions); - parseGlobalAttributes(getFirstChild(topLevelElement, "global-tag-attributes"), parseContext.globalAttributes, parseContext.commonAttributes); - parseDynamicAttributes(getFirstChild(topLevelElement, "dynamic-tag-attributes"), parseContext.dynamicAttributes, parseContext.commonAttributes); - parseTagRules(getFirstChild(topLevelElement, "tag-rules"), parseContext.commonAttributes, parseContext.commonRegularExpressions, parseContext.tagRules); - parseCSSRules(getFirstChild(topLevelElement, "css-rules"), parseContext.cssRules, parseContext.commonRegularExpressions); + verifyLocalUrl(baseUrl); - parseAllowedEmptyTags(getFirstChild(topLevelElement, "allowed-empty-tags"), parseContext.allowedEmptyTags); - parseRequireClosingTags(getFirstChild(topLevelElement, "require-closing-tags"), parseContext.requireClosingTags); - } + URL url; - /** - * Returns the top level element of a loaded policy Document - */ - @SuppressFBWarnings(value = "SECURITY", justification="Opening a stream to the provided URL is not " - + "a vulnerability because only local file URLs are allowed.") - private static Element getPolicy(String href, URL baseUrl) throws PolicyException { - // Track whether an exception was ever thrown while processing policy file + try { + url = new URL(baseUrl, href); + logger.info("Attempting to load AntiSamy policy from URL: " + url.toString()); + source = new InputSource(url.openStream()); + source.setSystemId(href); + } catch (MalformedURLException | FileNotFoundException e) { try { - return getDocumentElementByUrl(href, baseUrl); - } catch (SAXException | ParserConfigurationException | IOException e) { - throw new PolicyException(e); + String absURL = URIUtils.resolveAsString(href, baseUrl.toString()); + url = new URL(absURL); + source = new InputSource(url.openStream()); + source.setSystemId(href); + } catch (MalformedURLException ex2) { + // nothing to do + // TODO: Is this true? Or should we at least log the original exception, or + // rethrow it? } + } } - // TODO: Add JavaDocs for this new method. - @SuppressFBWarnings(value = "SECURITY", justification="Opening a stream to the provided URL is not " - + "a vulnerability because only local file URLs are allowed.") - private static Element getDocumentElementByUrl(String href, URL baseUrl) - throws IOException, ParserConfigurationException, SAXException { - - InputSource source = null; - - // Can't resolve public id, but might be able to resolve relative - // system id, since we have a base URI. - if (href != null && baseUrl != null) { - - verifyLocalUrl(baseUrl); - - URL url; - - try { - url = new URL(baseUrl, href); - logger.info("Attempting to load AntiSamy policy from URL: " + url.toString()); - source = new InputSource(url.openStream()); - source.setSystemId(href); - } catch (MalformedURLException | FileNotFoundException e) { - try { - String absURL = URIUtils.resolveAsString(href, baseUrl.toString()); - url = new URL(absURL); - source = new InputSource(url.openStream()); - source.setSystemId(href); - } catch (MalformedURLException ex2) { - // nothing to do - // TODO: Is this true? Or should we at least log the original exception, or - // rethrow it? - } - } - } - - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - - /** - * Disable external entities, etc. - */ - dbf.setFeature(EXTERNAL_GENERAL_ENTITIES, false); - dbf.setFeature(EXTERNAL_PARAM_ENTITIES, false); - dbf.setFeature(DISALLOW_DOCTYPE_DECL, true); - dbf.setFeature(LOAD_EXTERNAL_DTD, false); + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - // This code doesn't have the retry logic if schema validation fails. It is up to the caller to try again, - // if this fails the first time (if they want to). - getPolicySchema(); - dbf.setNamespaceAware(true); - dbf.setSchema(schema); + /** Disable external entities, etc. */ + dbf.setFeature(EXTERNAL_GENERAL_ENTITIES, false); + dbf.setFeature(EXTERNAL_PARAM_ENTITIES, false); + dbf.setFeature(DISALLOW_DOCTYPE_DECL, true); + dbf.setFeature(LOAD_EXTERNAL_DTD, false); - DocumentBuilder db = dbf.newDocumentBuilder(); - db.setErrorHandler(new SAXErrorHandler()); + // This code doesn't have the retry logic if schema validation fails. It is up to the caller + // to try again, + // if this fails the first time (if they want to). + getPolicySchema(); + dbf.setNamespaceAware(true); + dbf.setSchema(schema); - // Load and parse the file. - if (source != null) { - Document dom = db.parse(source); + DocumentBuilder db = dbf.newDocumentBuilder(); + db.setErrorHandler(new SAXErrorHandler()); - // Get the policy information out of it! - return dom.getDocumentElement(); - } + // Load and parse the file. + if (source != null) { + Document dom = db.parse(source); - return null; + // Get the policy information out of it! + return dom.getDocumentElement(); } - /** - * Creates a copy of this policy with an added/changed directive. - * @param name The directive to add/modify - * @param value The value - * @return A clone of the policy with the updated directive - */ - public Policy cloneWithDirective(String name, String value) { - Map directives = new HashMap(this.directives); - directives.put(name, value); - return new InternalPolicy(this, Collections.unmodifiableMap(directives), tagRules, cssRules); + return null; + } + + /** + * Creates a copy of this policy with an added/changed directive. + * + * @param name The directive to add/modify + * @param value The value + * @return A clone of the policy with the updated directive + */ + public Policy cloneWithDirective(String name, String value) { + Map directives = new HashMap(this.directives); + directives.put(name, value); + return new InternalPolicy(this, Collections.unmodifiableMap(directives), tagRules, cssRules); + } + + /** + * Go through section of the policy file. + * + * @param root Top level of + * @param directives The directives map to update + */ + private static void parseDirectives(Element root, Map directives) { + for (Element ele : getByTagName(root, "directive")) { + String name = getAttributeValue(ele, "name"); + String value = getAttributeValue(ele, "value"); + directives.put(name, value); } - - /** - * Go through section of the policy file. - * - * @param root Top level of - * @param directives The directives map to update - */ - private static void parseDirectives(Element root, Map directives) { - for (Element ele : getByTagName(root, "directive")) { - String name = getAttributeValue(ele, "name"); - String value = getAttributeValue(ele, "value"); - directives.put(name, value); + } + + /** + * Go through section of the policy file. + * + * @param allowedEmptyTagsListNode Top level of + * @param allowedEmptyTags The tags that can be empty + */ + private static void parseAllowedEmptyTags( + Element allowedEmptyTagsListNode, List allowedEmptyTags) { + if (allowedEmptyTagsListNode != null) { + for (Element literalNode : + getGrandChildrenByTagName(allowedEmptyTagsListNode, "literal-list", "literal")) { + + String value = getAttributeValue(literalNode, "value"); + if (value != null && value.length() > 0) { + allowedEmptyTags.add(value); } - } - - /** - * Go through section of the policy file. - * - * @param allowedEmptyTagsListNode Top level of - * @param allowedEmptyTags The tags that can be empty - */ - private static void parseAllowedEmptyTags(Element allowedEmptyTagsListNode, - List allowedEmptyTags) { - if (allowedEmptyTagsListNode != null) { - for (Element literalNode : - getGrandChildrenByTagName(allowedEmptyTagsListNode, "literal-list", "literal")) { - - String value = getAttributeValue(literalNode, "value"); - if (value != null && value.length() > 0) { - allowedEmptyTags.add(value); - } - } - } else allowedEmptyTags.addAll(Constants.defaultAllowedEmptyTags); - } - - /** - * Go through section of the policy file. - * - * @param requireClosingTagsListNode Top level of - * @param requireClosingTags The list of tags that require closing - */ - private static void parseRequireClosingTags(Element requireClosingTagsListNode, - List requireClosingTags) { - if (requireClosingTagsListNode != null) { - for (Element literalNode : - getGrandChildrenByTagName(requireClosingTagsListNode, "literal-list", "literal")) { - - String value = getAttributeValue(literalNode, "value"); - if (value != null && value.length() > 0) { - requireClosingTags.add(value); - } - } - } else requireClosingTags.addAll(Constants.defaultRequireClosingTags); - } - - /** - * Go through section of the policy file. - * - * @param root Top level of - * @param globalAttributes1 A HashMap of global Attributes that need validation for every tag. - * @param commonAttributes The common attributes - * @throws PolicyException - */ - private static void parseGlobalAttributes(Element root, Map globalAttributes1, Map commonAttributes) throws PolicyException { - for (Element ele : getByTagName(root, "attribute")) { - - String name = getAttributeValue(ele, "name"); - Attribute toAdd = commonAttributes.get(name.toLowerCase()); - - if (toAdd != null) globalAttributes1.put(name.toLowerCase(), toAdd); - else throw new PolicyException("Global attribute '" + name - + "' was not defined in "); + } + } else allowedEmptyTags.addAll(Constants.defaultAllowedEmptyTags); + } + + /** + * Go through section of the policy file. + * + * @param requireClosingTagsListNode Top level of + * @param requireClosingTags The list of tags that require closing + */ + private static void parseRequireClosingTags( + Element requireClosingTagsListNode, List requireClosingTags) { + if (requireClosingTagsListNode != null) { + for (Element literalNode : + getGrandChildrenByTagName(requireClosingTagsListNode, "literal-list", "literal")) { + + String value = getAttributeValue(literalNode, "value"); + if (value != null && value.length() > 0) { + requireClosingTags.add(value); } + } + } else requireClosingTags.addAll(Constants.defaultRequireClosingTags); + } + + /** + * Go through section of the policy file. + * + * @param root Top level of + * @param globalAttributes1 A HashMap of global Attributes that need validation for every tag. + * @param commonAttributes The common attributes + * @throws PolicyException + */ + private static void parseGlobalAttributes( + Element root, + Map globalAttributes1, + Map commonAttributes) + throws PolicyException { + for (Element ele : getByTagName(root, "attribute")) { + + String name = getAttributeValue(ele, "name"); + Attribute toAdd = commonAttributes.get(name.toLowerCase()); + + if (toAdd != null) globalAttributes1.put(name.toLowerCase(), toAdd); + else + throw new PolicyException( + "Global attribute '" + name + "' was not defined in "); } - - /** - * Go through section of the policy file. - * - * @param root Top level of - * @param dynamicAttributes A HashMap of dynamic Attributes that need validation for every tag. - * @param commonAttributes The common attributes - * @throws PolicyException - */ - private static void parseDynamicAttributes(Element root, Map dynamicAttributes, Map commonAttributes) throws PolicyException { - for (Element ele : getByTagName(root, "attribute")) { - - String name = getAttributeValue(ele, "name"); - Attribute toAdd = commonAttributes.get(name.toLowerCase()); - - if (toAdd != null) { - String attrName = name.toLowerCase().substring(0, name.length() - 1); - dynamicAttributes.put(attrName, toAdd); - } else throw new PolicyException("Dynamic attribute '" + name - + "' was not defined in "); - } + } + + /** + * Go through section of the policy file. + * + * @param root Top level of + * @param dynamicAttributes A HashMap of dynamic Attributes that need validation for every tag. + * @param commonAttributes The common attributes + * @throws PolicyException + */ + private static void parseDynamicAttributes( + Element root, + Map dynamicAttributes, + Map commonAttributes) + throws PolicyException { + for (Element ele : getByTagName(root, "attribute")) { + + String name = getAttributeValue(ele, "name"); + Attribute toAdd = commonAttributes.get(name.toLowerCase()); + + if (toAdd != null) { + String attrName = name.toLowerCase().substring(0, name.length() - 1); + dynamicAttributes.put(attrName, toAdd); + } else + throw new PolicyException( + "Dynamic attribute '" + name + "' was not defined in "); } - - /** - * Go through the section of the policy file. - * - * @param root Top level of - * @param commonRegularExpressions1 the antisamy pattern objects - */ - private static void parseCommonRegExps(Element root, Map commonRegularExpressions1) { - for (Element ele : getByTagName(root, "regexp")) { - - String name = getAttributeValue(ele, "name"); - Pattern pattern = Pattern.compile(getAttributeValue(ele, "value"), Pattern.DOTALL); - commonRegularExpressions1.put(name, new AntiSamyPattern(pattern)); - } + } + + /** + * Go through the section of the policy file. + * + * @param root Top level of + * @param commonRegularExpressions1 the antisamy pattern objects + */ + private static void parseCommonRegExps( + Element root, Map commonRegularExpressions1) { + for (Element ele : getByTagName(root, "regexp")) { + + String name = getAttributeValue(ele, "name"); + Pattern pattern = Pattern.compile(getAttributeValue(ele, "value"), Pattern.DOTALL); + commonRegularExpressions1.put(name, new AntiSamyPattern(pattern)); } - - private static void parseCommonAttributes(Element root, Map commonAttributes1, - Map commonRegularExpressions1) { - - for (Element ele : getByTagName(root, "attribute")) { - String onInvalid = getAttributeValue(ele, "onInvalid"); - String name = getAttributeValue(ele, "name"); - - List allowedRegexps = getAllowedRegexps(commonRegularExpressions1, ele); - List allowedValues = getAllowedLiterals(ele); - - final String onInvalidStr; - if (onInvalid != null && onInvalid.length() > 0) { - onInvalidStr = onInvalid; - } else onInvalidStr = DEFAULT_ONINVALID; - - String description = getAttributeValue(ele, "description"); - Attribute attribute = new Attribute(getAttributeValue(ele, "name"), allowedRegexps, - allowedValues, onInvalidStr, description); - commonAttributes1.put(name.toLowerCase(), attribute); - } + } + + private static void parseCommonAttributes( + Element root, + Map commonAttributes1, + Map commonRegularExpressions1) { + + for (Element ele : getByTagName(root, "attribute")) { + String onInvalid = getAttributeValue(ele, "onInvalid"); + String name = getAttributeValue(ele, "name"); + + List allowedRegexps = getAllowedRegexps(commonRegularExpressions1, ele); + List allowedValues = getAllowedLiterals(ele); + + final String onInvalidStr; + if (onInvalid != null && onInvalid.length() > 0) { + onInvalidStr = onInvalid; + } else onInvalidStr = DEFAULT_ONINVALID; + + String description = getAttributeValue(ele, "description"); + Attribute attribute = + new Attribute( + getAttributeValue(ele, "name"), + allowedRegexps, + allowedValues, + onInvalidStr, + description); + commonAttributes1.put(name.toLowerCase(), attribute); } - - private static List getAllowedLiterals(Element ele) { - List allowedValues = new ArrayList(); - for (Element literalNode : getGrandChildrenByTagName(ele, "literal-list", "literal")) { - String value = getAttributeValue(literalNode, "value"); - - if (value != null && value.length() > 0) { - allowedValues.add(value); - } else if (literalNode.getNodeValue() != null) { - allowedValues.add(literalNode.getNodeValue()); - } - } - return allowedValues; + } + + private static List getAllowedLiterals(Element ele) { + List allowedValues = new ArrayList(); + for (Element literalNode : getGrandChildrenByTagName(ele, "literal-list", "literal")) { + String value = getAttributeValue(literalNode, "value"); + + if (value != null && value.length() > 0) { + allowedValues.add(value); + } else if (literalNode.getNodeValue() != null) { + allowedValues.add(literalNode.getNodeValue()); + } } - - private static List getAllowedRegexps(Map commonRegularExpressions1, Element ele) { - List allowedRegExp = new ArrayList(); - for (Element regExpNode : getGrandChildrenByTagName(ele, "regexp-list", "regexp")) { - String regExpName = getAttributeValue(regExpNode, "name"); - String value = getAttributeValue(regExpNode, "value"); - - if (regExpName != null && regExpName.length() > 0) { - allowedRegExp.add(commonRegularExpressions1.get(regExpName).getPattern()); - } else allowedRegExp.add(Pattern.compile(value, Pattern.DOTALL)); - } - return allowedRegExp; + return allowedValues; + } + + private static List getAllowedRegexps( + Map commonRegularExpressions1, Element ele) { + List allowedRegExp = new ArrayList(); + for (Element regExpNode : getGrandChildrenByTagName(ele, "regexp-list", "regexp")) { + String regExpName = getAttributeValue(regExpNode, "name"); + String value = getAttributeValue(regExpNode, "value"); + + if (regExpName != null && regExpName.length() > 0) { + allowedRegExp.add(commonRegularExpressions1.get(regExpName).getPattern()); + } else allowedRegExp.add(Pattern.compile(value, Pattern.DOTALL)); } - - private static List getAllowedRegexps2(Map commonRegularExpressions1, - Element attributeNode, String tagName) throws PolicyException { - List allowedRegexps = new ArrayList(); - for (Element regExpNode : getGrandChildrenByTagName(attributeNode, "regexp-list", "regexp")) { - String regExpName = getAttributeValue(regExpNode, "name"); - String value = getAttributeValue(regExpNode, "value"); - - /* - * Look up common regular expression specified - * by the "name" field. They can put a common - * name in the "name" field or provide a custom - * value in the "value" field. They must choose - * one or the other, not both. - */ - if (regExpName != null && regExpName.length() > 0) { - AntiSamyPattern pattern = commonRegularExpressions1.get(regExpName); - if (pattern != null) { - allowedRegexps.add(pattern.getPattern()); - } else throw new PolicyException("Regular expression '" + regExpName + - "' was referenced as a common regexp in definition of '" + tagName + - "', but does not exist in "); - } else if (value != null && value.length() > 0) { - allowedRegexps.add(Pattern.compile(value, Pattern.DOTALL)); - } - } - return allowedRegexps; + return allowedRegExp; + } + + private static List getAllowedRegexps2( + Map commonRegularExpressions1, Element attributeNode, String tagName) + throws PolicyException { + List allowedRegexps = new ArrayList(); + for (Element regExpNode : getGrandChildrenByTagName(attributeNode, "regexp-list", "regexp")) { + String regExpName = getAttributeValue(regExpNode, "name"); + String value = getAttributeValue(regExpNode, "value"); + + /* + * Look up common regular expression specified + * by the "name" field. They can put a common + * name in the "name" field or provide a custom + * value in the "value" field. They must choose + * one or the other, not both. + */ + if (regExpName != null && regExpName.length() > 0) { + AntiSamyPattern pattern = commonRegularExpressions1.get(regExpName); + if (pattern != null) { + allowedRegexps.add(pattern.getPattern()); + } else + throw new PolicyException( + "Regular expression '" + + regExpName + + "' was referenced as a common regexp in definition of '" + + tagName + + "', but does not exist in "); + } else if (value != null && value.length() > 0) { + allowedRegexps.add(Pattern.compile(value, Pattern.DOTALL)); + } } - - private static List getAllowedRegexp3(Map commonRegularExpressions1, - Element ele, String name) throws PolicyException { - - List allowedRegExp = new ArrayList(); - for (Element regExpNode : getGrandChildrenByTagName(ele, "regexp-list", "regexp")) { - String regExpName = getAttributeValue(regExpNode, "name"); - String value = getAttributeValue(regExpNode, "value"); - - AntiSamyPattern pattern = commonRegularExpressions1.get(regExpName); - - if (pattern != null) { - allowedRegExp.add(pattern.getPattern()); - } else if (value != null) { - allowedRegExp.add(Pattern.compile(value, Pattern.DOTALL)); - } else throw new PolicyException("Regular expression '" + regExpName + - "' was referenced as a common regexp in definition of '" + name + - "', but does not exist in "); - } - return allowedRegExp; + return allowedRegexps; + } + + private static List getAllowedRegexp3( + Map commonRegularExpressions1, Element ele, String name) + throws PolicyException { + + List allowedRegExp = new ArrayList(); + for (Element regExpNode : getGrandChildrenByTagName(ele, "regexp-list", "regexp")) { + String regExpName = getAttributeValue(regExpNode, "name"); + String value = getAttributeValue(regExpNode, "value"); + + AntiSamyPattern pattern = commonRegularExpressions1.get(regExpName); + + if (pattern != null) { + allowedRegExp.add(pattern.getPattern()); + } else if (value != null) { + allowedRegExp.add(Pattern.compile(value, Pattern.DOTALL)); + } else + throw new PolicyException( + "Regular expression '" + + regExpName + + "' was referenced as a common regexp in definition of '" + + name + + "', but does not exist in "); } + return allowedRegExp; + } - private static void parseTagRules(Element root, Map commonAttributes1, Map commonRegularExpressions1, Map tagRules1) throws PolicyException { + private static void parseTagRules( + Element root, + Map commonAttributes1, + Map commonRegularExpressions1, + Map tagRules1) + throws PolicyException { - if (root == null) return; + if (root == null) return; - for (Element tagNode : getByTagName(root, "tag")) { - String name = getAttributeValue(tagNode, "name"); - String action = getAttributeValue(tagNode, "action"); + for (Element tagNode : getByTagName(root, "tag")) { + String name = getAttributeValue(tagNode, "name"); + String action = getAttributeValue(tagNode, "action"); - NodeList attributeList = tagNode.getElementsByTagName("attribute"); - Map tagAttributes = getTagAttributes(commonAttributes1, commonRegularExpressions1, attributeList, name); - Tag tag = new Tag(name, tagAttributes, action); + NodeList attributeList = tagNode.getElementsByTagName("attribute"); + Map tagAttributes = + getTagAttributes(commonAttributes1, commonRegularExpressions1, attributeList, name); + Tag tag = new Tag(name, tagAttributes, action); - tagRules1.put(name.toLowerCase(), tag); - } + tagRules1.put(name.toLowerCase(), tag); } - - private static Map getTagAttributes(Map commonAttributes1, Map commonRegularExpressions1, NodeList attributeList, String tagName) throws PolicyException { - - Map tagAttributes = new HashMap(); - for (int j = 0; j < attributeList.getLength(); j++) { - Element attributeNode = (Element) attributeList.item(j); - - String attrName = getAttributeValue(attributeNode, "name").toLowerCase(); - if (!attributeNode.hasChildNodes()) { - Attribute attribute = commonAttributes1.get(attrName); - - // All they provided was the name, so they must want a common attribute. - if (attribute != null) { - /* - * If they provide onInvalid/description values here they will - * override the common values. - */ - - String onInvalid = getAttributeValue(attributeNode, "onInvalid"); - String description = getAttributeValue(attributeNode, "description"); - Attribute changed = attribute.mutate(onInvalid, description); - commonAttributes1.put(attrName, changed); - tagAttributes.put(attrName, changed); - - } else throw new PolicyException("Attribute '" + getAttributeValue(attributeNode, "name") + - "' was referenced as a common attribute in definition of '" + tagName + - "', but does not exist in "); - - } else { - List allowedRegexps2 = getAllowedRegexps2(commonRegularExpressions1, attributeNode, tagName); - List allowedValues2 = getAllowedLiterals(attributeNode); - String onInvalid = getAttributeValue(attributeNode, "onInvalid"); - String description = getAttributeValue(attributeNode, "description"); - Attribute attribute = new Attribute(getAttributeValue(attributeNode, "name"), allowedRegexps2, allowedValues2, onInvalid, description); - - // Add fully built attribute. - tagAttributes.put(attrName, attribute); - } - } - return tagAttributes; + } + + private static Map getTagAttributes( + Map commonAttributes1, + Map commonRegularExpressions1, + NodeList attributeList, + String tagName) + throws PolicyException { + + Map tagAttributes = new HashMap(); + for (int j = 0; j < attributeList.getLength(); j++) { + Element attributeNode = (Element) attributeList.item(j); + + String attrName = getAttributeValue(attributeNode, "name").toLowerCase(); + if (!attributeNode.hasChildNodes()) { + Attribute attribute = commonAttributes1.get(attrName); + + // All they provided was the name, so they must want a common attribute. + if (attribute != null) { + /* + * If they provide onInvalid/description values here they will + * override the common values. + */ + + String onInvalid = getAttributeValue(attributeNode, "onInvalid"); + String description = getAttributeValue(attributeNode, "description"); + Attribute changed = attribute.mutate(onInvalid, description); + commonAttributes1.put(attrName, changed); + tagAttributes.put(attrName, changed); + + } else + throw new PolicyException( + "Attribute '" + + getAttributeValue(attributeNode, "name") + + "' was referenced as a common attribute in definition of '" + + tagName + + "', but does not exist in "); + + } else { + List allowedRegexps2 = + getAllowedRegexps2(commonRegularExpressions1, attributeNode, tagName); + List allowedValues2 = getAllowedLiterals(attributeNode); + String onInvalid = getAttributeValue(attributeNode, "onInvalid"); + String description = getAttributeValue(attributeNode, "description"); + Attribute attribute = + new Attribute( + getAttributeValue(attributeNode, "name"), + allowedRegexps2, + allowedValues2, + onInvalid, + description); + + // Add fully built attribute. + tagAttributes.put(attrName, attribute); + } } - - private static void parseCSSRules(Element root, Map cssRules1, Map commonRegularExpressions1) throws PolicyException { - - for (Element ele : getByTagName(root, "property")) { - String name = getAttributeValue(ele, "name"); - String description = getAttributeValue(ele, "description"); - - List allowedRegexp3 = getAllowedRegexp3(commonRegularExpressions1, ele, name); - - List allowedValue = new ArrayList(); - for (Element literalNode : getGrandChildrenByTagName(ele, "literal-list", "literal")) { - allowedValue.add(getAttributeValue(literalNode, "value")); - } - - List shortHandRefs = new ArrayList(); - for (Element shorthandNode : getGrandChildrenByTagName(ele, "shorthand-list", "shorthand")) { - shortHandRefs.add(getAttributeValue(shorthandNode, "name")); - } - - String onInvalid = getAttributeValue(ele, "onInvalid"); - final String onInvalidStr; - if (onInvalid != null && onInvalid.length() > 0) { - onInvalidStr = onInvalid; - } else onInvalidStr = DEFAULT_ONINVALID; - - Property property = new Property(name,allowedRegexp3, allowedValue, shortHandRefs, description, onInvalidStr); - cssRules1.put(name.toLowerCase(), property); - } + return tagAttributes; + } + + private static void parseCSSRules( + Element root, + Map cssRules1, + Map commonRegularExpressions1) + throws PolicyException { + + for (Element ele : getByTagName(root, "property")) { + String name = getAttributeValue(ele, "name"); + String description = getAttributeValue(ele, "description"); + + List allowedRegexp3 = getAllowedRegexp3(commonRegularExpressions1, ele, name); + + List allowedValue = new ArrayList(); + for (Element literalNode : getGrandChildrenByTagName(ele, "literal-list", "literal")) { + allowedValue.add(getAttributeValue(literalNode, "value")); + } + + List shortHandRefs = new ArrayList(); + for (Element shorthandNode : getGrandChildrenByTagName(ele, "shorthand-list", "shorthand")) { + shortHandRefs.add(getAttributeValue(shorthandNode, "name")); + } + + String onInvalid = getAttributeValue(ele, "onInvalid"); + final String onInvalidStr; + if (onInvalid != null && onInvalid.length() > 0) { + onInvalidStr = onInvalid; + } else onInvalidStr = DEFAULT_ONINVALID; + + Property property = + new Property( + name, allowedRegexp3, allowedValue, shortHandRefs, description, onInvalidStr); + cssRules1.put(name.toLowerCase(), property); } - - /** - * A simple method for returning on of the <global-attribute> entries by - * name. - * - * @param name The name of the global-attribute we want to look up. - * @return An Attribute associated with the global-attribute lookup name specified. - */ - public Attribute getGlobalAttributeByName(String name) { - return globalAttributes.get(name.toLowerCase()); - } - - /** - * A method for returning one of the dynamic <global-attribute> entries by name. - * - * @param name The name of the dynamic global-attribute we want to look up. - * @return An Attribute associated with the global-attribute lookup name specified, - * or null if not found. - */ - public Attribute getDynamicAttributeByName(String name) { - Attribute dynamicAttribute = null; - Set> entries = dynamicAttributes.entrySet(); - for (Map.Entry entry : entries) { - if (name.startsWith(entry.getKey())) { - dynamicAttribute = entry.getValue(); - break; - } - } - return dynamicAttribute; - } - - /** - * Return all the allowed empty tags configured in the Policy. - * - * @return A String array of all the he allowed empty tags configured in the Policy. - */ - public TagMatcher getAllowedEmptyTags() { - return allowedEmptyTagsMatcher; - } - - /** - * Return all the tags that are required to be closed with an end tag, even if they have no child content. - * - * @return A String array of all the tags that are required to be closed with an end tag, even if they have no child content. - */ - public TagMatcher getRequiresClosingTags() { - return requiresClosingTagsMatcher; + } + + /** + * A simple method for returning on of the <global-attribute> entries by name. + * + * @param name The name of the global-attribute we want to look up. + * @return An Attribute associated with the global-attribute lookup name specified. + */ + public Attribute getGlobalAttributeByName(String name) { + return globalAttributes.get(name.toLowerCase()); + } + + /** + * A method for returning one of the dynamic <global-attribute> entries by name. + * + * @param name The name of the dynamic global-attribute we want to look up. + * @return An Attribute associated with the global-attribute lookup name specified, or null if not + * found. + */ + public Attribute getDynamicAttributeByName(String name) { + Attribute dynamicAttribute = null; + Set> entries = dynamicAttributes.entrySet(); + for (Map.Entry entry : entries) { + if (name.startsWith(entry.getKey())) { + dynamicAttribute = entry.getValue(); + break; + } } - - /** - * Return a directive value based on a lookup name. - * - * @param name The name of the directive we want to look up. - * @return A String object containing the directive associated with the lookup name, or null if none is found. - */ - public String getDirective(String name) { - return directives.get(name); - } - - /** - * Resolves public and system IDs to files stored within the JAR. - * - * @param systemId The name of the entity we want to look up. - * @param baseUrl The base location of the entity. - * @return A String object containing the directive associated with the lookup name, or null if none is found. - * @throws IOException if the specified URL can't be opened. - * @throws SAXException This exception can't actually be thrown, but left in the method signature for - * API compatibility reasons. - */ - @SuppressFBWarnings(value = "SECURITY", justification="Opening a stream to the provided URL is not " - + "a vulnerability because only local file URLs are allowed.") - public static InputSource resolveEntity(final String systemId, URL baseUrl) throws IOException, SAXException { - - InputSource source; - - // Can't resolve public id, but might be able to resolve relative - // system id, since we have a base URI. - if (systemId != null && baseUrl != null) { - - verifyLocalUrl(baseUrl); - - URL url; - - try { - url = new URL(baseUrl, systemId); - source = new InputSource(url.openStream()); - source.setSystemId(systemId); - return source; - } catch (MalformedURLException | FileNotFoundException e) { - try { - String absURL = URIUtils.resolveAsString(systemId, baseUrl.toString()); - url = new URL(absURL); - source = new InputSource(url.openStream()); - source.setSystemId(systemId); - return source; - } catch (MalformedURLException ex2) { - // nothing to do - } - } - return null; - } - - // No resolving. - return null; - } - - /** - * Verify that the target of the URL is a local file only. Currently, we allow file: and jar: URLs. - * The target of the URL is typically an AntiSamy policy file. - * @param url The URL to verify. - * @throws MalformedURLException If the supplied URL does not reference a local file directly, or one inside - * a local JAR file. - */ - private static void verifyLocalUrl(URL url) throws MalformedURLException { - - switch (url.getProtocol()) { - case "file": - case "jar" : break; // These are OK. - - default: throw new MalformedURLException( - "Only local files can be accessed with a policy URL. Illegal value supplied was: " + url); + return dynamicAttribute; + } + + /** + * Return all the allowed empty tags configured in the Policy. + * + * @return A String array of all the he allowed empty tags configured in the Policy. + */ + public TagMatcher getAllowedEmptyTags() { + return allowedEmptyTagsMatcher; + } + + /** + * Return all the tags that are required to be closed with an end tag, even if they have no child + * content. + * + * @return A String array of all the tags that are required to be closed with an end tag, even if + * they have no child content. + */ + public TagMatcher getRequiresClosingTags() { + return requiresClosingTagsMatcher; + } + + /** + * Return a directive value based on a lookup name. + * + * @param name The name of the directive we want to look up. + * @return A String object containing the directive associated with the lookup name, or null if + * none is found. + */ + public String getDirective(String name) { + return directives.get(name); + } + + /** + * Resolves public and system IDs to files stored within the JAR. + * + * @param systemId The name of the entity we want to look up. + * @param baseUrl The base location of the entity. + * @return A String object containing the directive associated with the lookup name, or null if + * none is found. + * @throws IOException if the specified URL can't be opened. + * @throws SAXException This exception can't actually be thrown, but left in the method signature + * for API compatibility reasons. + */ + @SuppressFBWarnings( + value = "SECURITY", + justification = + "Opening a stream to the provided URL is not " + + "a vulnerability because only local file URLs are allowed.") + public static InputSource resolveEntity(final String systemId, URL baseUrl) + throws IOException, SAXException { + + InputSource source; + + // Can't resolve public id, but might be able to resolve relative + // system id, since we have a base URI. + if (systemId != null && baseUrl != null) { + + verifyLocalUrl(baseUrl); + + URL url; + + try { + url = new URL(baseUrl, systemId); + source = new InputSource(url.openStream()); + source.setSystemId(systemId); + return source; + } catch (MalformedURLException | FileNotFoundException e) { + try { + String absURL = URIUtils.resolveAsString(systemId, baseUrl.toString()); + url = new URL(absURL); + source = new InputSource(url.openStream()); + source.setSystemId(systemId); + return source; + } catch (MalformedURLException ex2) { + // nothing to do } + } + return null; } - private static Element getFirstChild(Element element, String tagName) { - if (element == null) return null; - NodeList elementsByTagName = element.getElementsByTagName(tagName); - if (elementsByTagName != null && elementsByTagName.getLength() > 0) - return (Element) elementsByTagName.item(0); - else return null; + // No resolving. + return null; + } + + /** + * Verify that the target of the URL is a local file only. Currently, we allow file: and jar: + * URLs. The target of the URL is typically an AntiSamy policy file. + * + * @param url The URL to verify. + * @throws MalformedURLException If the supplied URL does not reference a local file directly, or + * one inside a local JAR file. + */ + private static void verifyLocalUrl(URL url) throws MalformedURLException { + + switch (url.getProtocol()) { + case "file": + case "jar": + break; // These are OK. + + default: + throw new MalformedURLException( + "Only local files can be accessed with a policy URL. Illegal value supplied was: " + + url); } + } + + private static Element getFirstChild(Element element, String tagName) { + if (element == null) return null; + NodeList elementsByTagName = element.getElementsByTagName(tagName); + if (elementsByTagName != null && elementsByTagName.getLength() > 0) + return (Element) elementsByTagName.item(0); + else return null; + } + + private static Iterable getGrandChildrenByTagName( + Element parent, String immediateChildName, String subChild) { + NodeList elementsByTagName = parent.getElementsByTagName(immediateChildName); + if (elementsByTagName.getLength() == 0) return Collections.emptyList(); + Element regExpListNode = (Element) elementsByTagName.item(0); + return getByTagName(regExpListNode, subChild); + } + + private static Iterable getByTagName(Element parent, String tagName) { + if (parent == null) return Collections.emptyList(); + + final NodeList nodes = parent.getElementsByTagName(tagName); + return new Iterable() { + public Iterator iterator() { + return new Iterator() { + int pos = 0; + int len = nodes.getLength(); + + public boolean hasNext() { + return pos < len; + } - private static Iterable getGrandChildrenByTagName(Element parent, String immediateChildName, String subChild){ - NodeList elementsByTagName = parent.getElementsByTagName(immediateChildName); - if (elementsByTagName.getLength() == 0) return Collections.emptyList(); - Element regExpListNode = (Element) elementsByTagName.item(0); - return getByTagName( regExpListNode, subChild); - } + public Element next() { + return (Element) nodes.item(pos++); + } - private static Iterable getByTagName(Element parent, String tagName) { - if (parent == null) return Collections.emptyList(); - - final NodeList nodes = parent.getElementsByTagName(tagName); - return new Iterable() { - public Iterator iterator() { - return new Iterator() { - int pos = 0; - int len = nodes.getLength(); - - public boolean hasNext() { - return pos < len; - } - - public Element next() { - return (Element) nodes.item(pos++); - } - - public void remove() { - throw new UnsupportedOperationException("Cant remove"); - } - }; - } + public void remove() { + throw new UnsupportedOperationException("Cant remove"); + } }; + } + }; + } + + public AntiSamyPattern getCommonRegularExpressions(String name) { + return commonRegularExpressions.get(name); + } + + private static void getPolicySchema() throws SAXException { + if (schema == null) { + InputStream schemaStream = + Policy.class.getClassLoader().getResourceAsStream(POLICY_SCHEMA_URI); + Source schemaSource = new StreamSource(schemaStream); + schema = + SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(schemaSource); } - - public AntiSamyPattern getCommonRegularExpressions(String name) { - return commonRegularExpressions.get(name); + } + + /** + * This class is implemented to just throw an exception when validating the policy schema while + * parsing the document. + */ + static class SAXErrorHandler implements ErrorHandler { + @Override + public void error(SAXParseException arg0) throws SAXException { + throw arg0; } - private static void getPolicySchema() throws SAXException { - if (schema == null) { - InputStream schemaStream = Policy.class.getClassLoader().getResourceAsStream(POLICY_SCHEMA_URI); - Source schemaSource = new StreamSource(schemaStream); - schema = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI) - .newSchema(schemaSource); - } + @Override + public void fatalError(SAXParseException arg0) throws SAXException { + throw arg0; } - /** - * This class is implemented to just throw an exception when - * validating the policy schema while parsing the document. - */ - static class SAXErrorHandler implements ErrorHandler { - @Override - public void error(SAXParseException arg0) throws SAXException { - throw arg0; - } - - @Override - public void fatalError(SAXParseException arg0) throws SAXException { - throw arg0; - } - - @Override - public void warning(SAXParseException arg0) throws SAXException { - throw arg0; - } + @Override + public void warning(SAXParseException arg0) throws SAXException { + throw arg0; } + } } diff --git a/src/main/java/org/owasp/validator/html/PolicyException.java b/src/main/java/org/owasp/validator/html/PolicyException.java index be840913..ab61c0fd 100644 --- a/src/main/java/org/owasp/validator/html/PolicyException.java +++ b/src/main/java/org/owasp/validator/html/PolicyException.java @@ -1,10 +1,10 @@ /* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li - * + * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -25,27 +25,21 @@ package org.owasp.validator.html; /** - * This exception gets thrown when there is a problem validating or parsing - * the policy file. Any validation errors not caught by the XML validation - * will be thrown with this exception. - * - * @author Arshan Dabirsiaghi + * This exception gets thrown when there is a problem validating or parsing the policy file. Any + * validation errors not caught by the XML validation will be thrown with this exception. * + * @author Arshan Dabirsiaghi */ - public class PolicyException extends Exception { - /** - * - */ - private static final long serialVersionUID = 1L; - - public PolicyException(Exception e) { - super(e); - } + /** */ + private static final long serialVersionUID = 1L; - public PolicyException(String string) { - super(string); - } + public PolicyException(Exception e) { + super(e); + } + public PolicyException(String string) { + super(string); + } } diff --git a/src/main/java/org/owasp/validator/html/ScanException.java b/src/main/java/org/owasp/validator/html/ScanException.java index 71e7ce6d..fd41edb9 100644 --- a/src/main/java/org/owasp/validator/html/ScanException.java +++ b/src/main/java/org/owasp/validator/html/ScanException.java @@ -1,10 +1,10 @@ /* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li - * + * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -25,28 +25,22 @@ package org.owasp.validator.html; /** - * - * This exception gets thrown when there is an unexpected error parsing - * the tainted HTML. The code is sturdy, but the unlikely IOException or - * SAX exceptions are always theoretically possible. - * - * @author Arshan Dabirsiaghi + * This exception gets thrown when there is an unexpected error parsing the tainted HTML. The code + * is sturdy, but the unlikely IOException or SAX exceptions are always theoretically + * possible. * + * @author Arshan Dabirsiaghi */ - public class ScanException extends Exception { - /** - * - */ - private static final long serialVersionUID = 1L; + /** */ + private static final long serialVersionUID = 1L; - public ScanException(Exception e) { - super(e); - } - - public ScanException(String s) { - super(s); - } + public ScanException(Exception e) { + super(e); + } + public ScanException(String s) { + super(s); + } } diff --git a/src/main/java/org/owasp/validator/html/TagMatcher.java b/src/main/java/org/owasp/validator/html/TagMatcher.java index 0695efc6..195c2fd9 100644 --- a/src/main/java/org/owasp/validator/html/TagMatcher.java +++ b/src/main/java/org/owasp/validator/html/TagMatcher.java @@ -32,26 +32,27 @@ * @author Kristian Rosenvold */ public class TagMatcher { - private final Set allowedLowercase = new HashSet(); + private final Set allowedLowercase = new HashSet(); - public TagMatcher(Iterable allowedValues) { - for (String item : allowedValues) { - allowedLowercase.add(item.toLowerCase()); - } + public TagMatcher(Iterable allowedValues) { + for (String item : allowedValues) { + allowedLowercase.add(item.toLowerCase()); } + } - /** - * Examines if this tag matches the values in this matcher. - * - * Please note that this is case-insensitive, which is OK for HTML, but not really for XML - * @param tagName The tag name to look for - * @return true if the tag name matches this matcher - */ - public boolean matches(String tagName) { - return allowedLowercase.contains(tagName.toLowerCase()); - } + /** + * Examines if this tag matches the values in this matcher. + * + *

Please note that this is case-insensitive, which is OK for HTML, but not really for XML + * + * @param tagName The tag name to look for + * @return true if the tag name matches this matcher + */ + public boolean matches(String tagName) { + return allowedLowercase.contains(tagName.toLowerCase()); + } - public int size() { - return allowedLowercase.size(); - } + public int size() { + return allowedLowercase.size(); + } } diff --git a/src/main/java/org/owasp/validator/html/model/AntiSamyPattern.java b/src/main/java/org/owasp/validator/html/model/AntiSamyPattern.java index 7a5d9bd8..1d7e662e 100644 --- a/src/main/java/org/owasp/validator/html/model/AntiSamyPattern.java +++ b/src/main/java/org/owasp/validator/html/model/AntiSamyPattern.java @@ -1,10 +1,10 @@ /* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li - * + * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -28,35 +28,33 @@ import java.util.regex.Pattern; /** - * An extension of the Pattern with helper methods. + * An extension of the Pattern with helper methods. * * @author Arshan Dabirsiaghi */ public class AntiSamyPattern { - private final Pattern pattern; - - /** - * Constructor for AntiSamyPattern. - * - * @param pattern The Pattern to lookup based on the "name". - */ - public AntiSamyPattern(Pattern pattern) { - this.pattern = pattern; - } - - /** - * @return Return the Pattern of the AntiSamyPattern. - */ - public Pattern getPattern() { - return pattern; - } - - public Matcher matcher(CharSequence input) { - return pattern.matcher(input); - } - - public boolean matches(String other) { - return matcher(other).matches(); - } + private final Pattern pattern; + + /** + * Constructor for AntiSamyPattern. + * + * @param pattern The Pattern to lookup based on the "name". + */ + public AntiSamyPattern(Pattern pattern) { + this.pattern = pattern; + } + + /** @return Return the Pattern of the AntiSamyPattern. */ + public Pattern getPattern() { + return pattern; + } + + public Matcher matcher(CharSequence input) { + return pattern.matcher(input); + } + + public boolean matches(String other) { + return matcher(other).matches(); + } } diff --git a/src/main/java/org/owasp/validator/html/model/Attribute.java b/src/main/java/org/owasp/validator/html/model/Attribute.java index cfd70d1f..32bcbf58 100644 --- a/src/main/java/org/owasp/validator/html/model/Attribute.java +++ b/src/main/java/org/owasp/validator/html/model/Attribute.java @@ -1,10 +1,10 @@ /* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li, Kristian Rosenvold - * + * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -38,155 +38,171 @@ import java.util.regex.Pattern; /** - * A model for HTML attributes and the "rules" they must follow (either literals or regular expressions) in - * order to be considered valid. - * + * A model for HTML attributes and the "rules" they must follow (either literals or regular + * expressions) in order to be considered valid. + * * @author Arshan Dabirsiaghi * @author Kristian Rosenvold */ - -public class Attribute { - - private final String name; - private final String description; - private final String onInvalid; - private final List allowedValues; - private final Pattern[] allowedRegExps; - private final Set allowedValuesLower; - - public Attribute(String name, List allowedRegexps, List allowedValues, String onInvalidStr, String description) { - this.name = name; - this.allowedRegExps = allowedRegexps.toArray(new Pattern[ allowedRegexps.size()]); - this.allowedValues = Collections.unmodifiableList( allowedValues); - Set allowedValuesLower = new HashSet(); - for (String allowedValue : allowedValues) { - allowedValuesLower.add( allowedValue.toLowerCase()); - } - - this.allowedValuesLower = allowedValuesLower; - this.onInvalid = onInvalidStr; - this.description = description; +public class Attribute { + + private final String name; + private final String description; + private final String onInvalid; + private final List allowedValues; + private final Pattern[] allowedRegExps; + private final Set allowedValuesLower; + + public Attribute( + String name, + List allowedRegexps, + List allowedValues, + String onInvalidStr, + String description) { + this.name = name; + this.allowedRegExps = allowedRegexps.toArray(new Pattern[allowedRegexps.size()]); + this.allowedValues = Collections.unmodifiableList(allowedValues); + Set allowedValuesLower = new HashSet(); + for (String allowedValue : allowedValues) { + allowedValuesLower.add(allowedValue.toLowerCase()); } - public boolean matchesAllowedExpression(String value){ - String input = value.toLowerCase(); - for (Pattern pattern : allowedRegExps) { - if (pattern != null && pattern.matcher(input).matches()) { - return true; - } - } - return false; + this.allowedValuesLower = allowedValuesLower; + this.onInvalid = onInvalidStr; + this.description = description; + } + + public boolean matchesAllowedExpression(String value) { + String input = value.toLowerCase(); + for (Pattern pattern : allowedRegExps) { + if (pattern != null && pattern.matcher(input).matches()) { + return true; + } } + return false; + } + + public boolean containsAllowedValue(String valueInLowerCase) { + return allowedValuesLower.contains(valueInLowerCase); + } + + public String getName() { + return name; + } + + /** + * @return The onInvalid value a tag could have, from the list of "filterTag", + * "removeTag" and "removeAttribute" + */ + public String getOnInvalid() { + return onInvalid; + } + + public Attribute mutate(String onInvalid, String description) { + return new Attribute( + name, + Arrays.asList(allowedRegExps), + allowedValues, + onInvalid != null && onInvalid.length() != 0 ? onInvalid : this.onInvalid, + description != null && description.length() != 0 ? description : this.description); + } + + public String matcherRegEx(boolean hasNext) { + //

+ + StringBuilder regExp = new StringBuilder(); + regExp + .append(this.getName()) + .append(ANY_NORMAL_WHITESPACES) + .append("=") + .append(ANY_NORMAL_WHITESPACES) + .append("\"") + .append(Tag.OPEN_ATTRIBUTE); + + boolean hasRegExps = allowedRegExps.length > 0; + + if (allowedRegExps.length + allowedValues.size() > 0) { + + /* + * Go through and add static values to the regular expression. + */ + Iterator allowedValues = this.allowedValues.iterator(); + while (allowedValues.hasNext()) { + String allowedValue = allowedValues.next(); + + regExp.append(Tag.escapeRegularExpressionCharacters(allowedValue)); + + if (allowedValues.hasNext() || hasRegExps) { + regExp.append(ATTRIBUTE_DIVIDER); + } + } + + /* + * Add the regular expressions for this attribute value to the mother regular expression. + */ + Iterator allowedRegExps = Arrays.asList(this.allowedRegExps).iterator(); + while (allowedRegExps.hasNext()) { + Pattern allowedRegExp = allowedRegExps.next(); + regExp.append(allowedRegExp.pattern()); + + if (allowedRegExps.hasNext()) { + regExp.append(ATTRIBUTE_DIVIDER); + } + } - public boolean containsAllowedValue(String valueInLowerCase){ - return allowedValuesLower.contains(valueInLowerCase); - } - - public String getName() { - return name; - } - - /** - * - * @return The onInvalid value a tag could have, from the list of "filterTag", "removeTag" and "removeAttribute" - */ - public String getOnInvalid() { - return onInvalid; - } + if (this.allowedRegExps.length + this.allowedValues.size() > 0) { + regExp.append(CLOSE_ATTRIBUTE); + } + regExp.append("\"" + ANY_NORMAL_WHITESPACES); - public Attribute mutate(String onInvalid, String description) { - return new Attribute(name, Arrays.asList(allowedRegExps), allowedValues, onInvalid != null && onInvalid.length() != 0 ? onInvalid : this.onInvalid, - description != null && description.length() != 0 ? description : this.description); + if (hasNext) { + regExp.append(ATTRIBUTE_DIVIDER); + } } - - public String matcherRegEx(boolean hasNext){ - //

- - StringBuilder regExp = new StringBuilder(); - regExp.append(this.getName()).append(ANY_NORMAL_WHITESPACES).append("=").append(ANY_NORMAL_WHITESPACES).append("\"").append(Tag.OPEN_ATTRIBUTE); - - boolean hasRegExps = allowedRegExps.length > 0; - - if (allowedRegExps.length + allowedValues.size() > 0) { - - /* - * Go through and add static values to the regular expression. - */ - Iterator allowedValues = this.allowedValues.iterator(); - while (allowedValues.hasNext()) { - String allowedValue = allowedValues.next(); - - regExp.append(Tag.escapeRegularExpressionCharacters(allowedValue)); - - if (allowedValues.hasNext() || hasRegExps) { - regExp.append(ATTRIBUTE_DIVIDER); - } - } - - /* - * Add the regular expressions for this attribute value to the mother regular expression. - */ - Iterator allowedRegExps = Arrays.asList(this.allowedRegExps).iterator(); - while (allowedRegExps.hasNext()) { - Pattern allowedRegExp = allowedRegExps.next(); - regExp.append(allowedRegExp.pattern()); - - if (allowedRegExps.hasNext()) { - regExp.append(ATTRIBUTE_DIVIDER); - } - } - - if (this.allowedRegExps.length + this.allowedValues.size() > 0) { - regExp.append(CLOSE_ATTRIBUTE); - } - - regExp.append("\"" + ANY_NORMAL_WHITESPACES); - - if (hasNext) { - regExp.append(ATTRIBUTE_DIVIDER); - } + return regExp.toString(); + } + + /** + * This method takes the current rel attribute values and, depending on which ones to + * add, appends the corresponding values if they are not already present. It is meant to be used + * with anchor tags. + * + * @param addNofollow Specifies if "nofollow" value should be added in case it is not + * present. + * @param addNoopenerAndNoreferrer Specifies if "noopener noreferrer" value should be + * added in case it is not present. + * @param currentRelValue Current rel attribute value, it will be merged with the + * values specified from the previous parameters. + * @return The new rel attribute value to replace in an anchor tag. + */ + public static String mergeRelValuesInAnchor( + boolean addNofollow, boolean addNoopenerAndNoreferrer, String currentRelValue) { + String newRelValue = ""; + if (currentRelValue == null || currentRelValue.isEmpty()) { + if (addNofollow) newRelValue = "nofollow"; + if (addNoopenerAndNoreferrer) newRelValue += " noopener noreferrer"; + } else { + ArrayList relTokens = new ArrayList<>(); + newRelValue = currentRelValue; + for (String value : currentRelValue.split(" ")) { + relTokens.add(value.toLowerCase()); + } + + if (addNofollow && !relTokens.contains("nofollow")) { + newRelValue += " nofollow"; + } + + if (addNoopenerAndNoreferrer) { + if (!relTokens.contains("noopener")) { + newRelValue += " noopener"; } - return regExp.toString(); - } - - /** - * This method takes the current rel attribute values and, depending on which ones to add, - * appends the corresponding values if they are not already present. It is meant to be used with anchor tags. - * - * @param addNofollow Specifies if "nofollow" value should be added in case it is not present. - * @param addNoopenerAndNoreferrer Specifies if "noopener noreferrer" value should be added in case - * it is not present. - * @param currentRelValue Current rel attribute value, it will be merged with the values specified - * from the previous parameters. - * @return The new rel attribute value to replace in an anchor tag. - */ - public static String mergeRelValuesInAnchor(boolean addNofollow, boolean addNoopenerAndNoreferrer, String currentRelValue) { - String newRelValue = ""; - if (currentRelValue == null || currentRelValue.isEmpty()) { - if (addNofollow) newRelValue = "nofollow"; - if (addNoopenerAndNoreferrer) newRelValue += " noopener noreferrer"; - } else { - ArrayList relTokens = new ArrayList<>(); - newRelValue = currentRelValue; - for (String value: currentRelValue.split(" ")) { - relTokens.add(value.toLowerCase()); - } - - if (addNofollow && !relTokens.contains("nofollow")) { - newRelValue += " nofollow"; - } - - if (addNoopenerAndNoreferrer) { - if (!relTokens.contains("noopener")){ - newRelValue += " noopener"; - } - if (!relTokens.contains("noreferrer")){ - newRelValue += " noreferrer"; - } - } + if (!relTokens.contains("noreferrer")) { + newRelValue += " noreferrer"; } - - return newRelValue.trim(); + } } + + return newRelValue.trim(); + } } diff --git a/src/main/java/org/owasp/validator/html/model/Property.java b/src/main/java/org/owasp/validator/html/model/Property.java index 1f752b32..7dfed870 100644 --- a/src/main/java/org/owasp/validator/html/model/Property.java +++ b/src/main/java/org/owasp/validator/html/model/Property.java @@ -1,11 +1,11 @@ /* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li - * + * * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without + * + * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * - Redistributions of source code must retain the above copyright notice, + * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation @@ -33,57 +33,66 @@ import java.util.regex.Pattern; /** - * A model for CSS properties and the "rules" they must follow (either literals - * or regular expressions) in order to be considered valid. - * + * A model for CSS properties and the "rules" they must follow (either literals or regular + * expressions) in order to be considered valid. + * * @author Jason Li */ public class Property { - private final String name; - - private final List allowedRegExp; + private final String name; - private final List allowedValues; + private final List allowedRegExp; - private final List shorthandRefs; + private final List allowedValues; - public Property(String name, List allowedRegexp3, List allowedValue, List shortHandRefs, String description, String onInvalidStr) { - this.name = name; - this.allowedRegExp = Collections.unmodifiableList(allowedRegexp3); - this.allowedValues = Collections.unmodifiableList(allowedValue); - this.shorthandRefs = Collections.unmodifiableList(shortHandRefs); - } + private final List shorthandRefs; - /** - * Return a List of allowed regular expressions - * @return The List of allowed regular expressions. - */ - public List getAllowedRegExp() { - return allowedRegExp; - } + public Property( + String name, + List allowedRegexp3, + List allowedValue, + List shortHandRefs, + String description, + String onInvalidStr) { + this.name = name; + this.allowedRegExp = Collections.unmodifiableList(allowedRegexp3); + this.allowedValues = Collections.unmodifiableList(allowedValue); + this.shorthandRefs = Collections.unmodifiableList(shortHandRefs); + } - /** - * Return a List of allowed literal values - * @return The List of allowed literal values. - */ - public List getAllowedValues() { - return allowedValues; - } + /** + * Return a List of allowed regular expressions + * + * @return The List of allowed regular expressions. + */ + public List getAllowedRegExp() { + return allowedRegExp; + } - /** - * Return a List of allowed shorthand references - * @return The List of allowed shorthand references. - */ - public List getShorthandRefs() { - return shorthandRefs; - } + /** + * Return a List of allowed literal values + * + * @return The List of allowed literal values. + */ + public List getAllowedValues() { + return allowedValues; + } - /** - * Get the name of the property. - * @return The name of the property. - */ - public String getName() { - return name; - } + /** + * Return a List of allowed shorthand references + * + * @return The List of allowed shorthand references. + */ + public List getShorthandRefs() { + return shorthandRefs; + } + /** + * Get the name of the property. + * + * @return The name of the property. + */ + public String getName() { + return name; + } } diff --git a/src/main/java/org/owasp/validator/html/model/Tag.java b/src/main/java/org/owasp/validator/html/model/Tag.java index e6643a17..fef0af6c 100644 --- a/src/main/java/org/owasp/validator/html/model/Tag.java +++ b/src/main/java/org/owasp/validator/html/model/Tag.java @@ -1,10 +1,10 @@ /* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li - * + * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -27,129 +27,131 @@ import java.util.*; /** - * A model for HTML "tags" and the rules dictating their validation/filtration. Also contains information - * about their allowed attributes. - *

- * There is also some experimental (unused) code in here for generating a valid regular expression according to a policy - * file on a per-tag basis. + * A model for HTML "tags" and the rules dictating their validation/filtration. Also contains + * information about their allowed attributes.
+ *
+ * There is also some experimental (unused) code in here for generating a valid regular expression + * according to a policy file on a per-tag basis. * * @author Arshan Dabirsiaghi */ public class Tag { - /* - * These are the fields pulled from the policy XML. - */ - private final Map allowedAttributes; - private final String name; - private final String action; - + /* + * These are the fields pulled from the policy XML. + */ + private final Map allowedAttributes; + private final String name; + private final String action; + + public Tag(String name, Map tagAttributes, String action) { + this.name = name; + this.allowedAttributes = Collections.unmodifiableMap(tagAttributes); + this.action = action; + } + + /** + * @return The action for this tag which is one of filter, validate or + * remove. + */ + public String getAction() { + return action; + } + + /** + * Indicates if the action for this tag matches the supplied action + * + * @param action The action to match against + * @return True if it matches + */ + public boolean isAction(String action) { + return action.equals(this.action); + } + + public Tag mutateAction(String action) { + return new Tag(this.name, this.allowedAttributes, action); + } + + /* --------------------------------------------------------------------------------------------------*/ + + /** + * Returns a regular expression for validating individual tags. Not used by the AntiSamy scanner, + * but you might find some use for this. + * + * @return A regular expression for the tag, i.e., "^<b>$" or + * "<hr(\s)*(width='((\w){2,3}(\%)*)'>" + */ + public String getRegularExpression() { - public Tag(String name, Map tagAttributes, String action) { - this.name = name; - this.allowedAttributes = Collections.unmodifiableMap(tagAttributes); - this.action = action; - } - - /** - * @return The action for this tag which is one of filter, validate or remove. + /* + * For such tags as , , */ - public String getAction() { - return action; + if (allowedAttributes.size() == 0) { + return "^<" + name + ">$"; } - /** Indicates if the action for this tag matches the supplied action - * @param action The action to match against - * @return True if it matches - */ - public boolean isAction(String action){ - return action.equals( this.action); + StringBuilder regExp = + new StringBuilder("<" + ANY_NORMAL_WHITESPACES + name + OPEN_TAG_ATTRIBUTES); + + List values = new ArrayList(allowedAttributes.values()); + Collections.sort( + values, + new Comparator() { + public int compare(Attribute o1, Attribute o2) { + return o1.getName().compareTo(o2.getName()); + } + }); + Iterator attributes = values.iterator(); + while (attributes.hasNext()) { + Attribute attr = attributes.next(); + regExp.append(attr.matcherRegEx(attributes.hasNext())); } - public Tag mutateAction(String action) { - return new Tag(this.name, this.allowedAttributes, action); - } + regExp.append(CLOSE_TAG_ATTRIBUTES + ANY_NORMAL_WHITESPACES + ">"); + return regExp.toString(); + } - /* --------------------------------------------------------------------------------------------------*/ + static String escapeRegularExpressionCharacters(String allowedValue) { + String toReturn = allowedValue; - /** - * Returns a regular expression for validating individual tags. Not used by the AntiSamy scanner, but you might find some use for this. - * - * @return A regular expression for the tag, i.e., - * "^<b>$" - * or "<hr(\s)*(width='((\w){2,3}(\%)*)'>" - */ - public String getRegularExpression() { - - /* - * For such tags as , , - */ - if (allowedAttributes.size() == 0) { - return "^<" + name + ">$"; - } - - StringBuilder regExp = new StringBuilder("<" + ANY_NORMAL_WHITESPACES + name + OPEN_TAG_ATTRIBUTES); - - List values = new ArrayList(allowedAttributes.values()); - Collections.sort(values, new Comparator() { - public int compare(Attribute o1, Attribute o2) { - return o1.getName().compareTo(o2.getName()); - } - } ); - Iterator attributes = values.iterator(); - while (attributes.hasNext()) { - Attribute attr = attributes.next(); - regExp.append( attr.matcherRegEx(attributes.hasNext())); - } - - regExp.append(CLOSE_TAG_ATTRIBUTES + ANY_NORMAL_WHITESPACES + ">"); - - return regExp.toString(); + if (toReturn == null) { + return null; } - static String escapeRegularExpressionCharacters(String allowedValue) { - - String toReturn = allowedValue; - - if (toReturn == null) { - return null; - } - - for (int i = 0; i < REGEXP_CHARACTERS.length(); i++) { - toReturn = toReturn.replaceAll("\\" + String.valueOf(REGEXP_CHARACTERS.charAt(i)), "\\" + REGEXP_CHARACTERS.charAt(i)); - } - - return toReturn; + for (int i = 0; i < REGEXP_CHARACTERS.length(); i++) { + toReturn = + toReturn.replaceAll( + "\\" + String.valueOf(REGEXP_CHARACTERS.charAt(i)), + "\\" + REGEXP_CHARACTERS.charAt(i)); } - /** - * Begin Variables Needed For Generating Regular Expressions * - */ - final static String ANY_NORMAL_WHITESPACES = "(\\s)*"; - final static String OPEN_ATTRIBUTE = "("; - final static String ATTRIBUTE_DIVIDER = "|"; - final static String CLOSE_ATTRIBUTE = ")"; - private final static String OPEN_TAG_ATTRIBUTES = ANY_NORMAL_WHITESPACES + OPEN_ATTRIBUTE; - private final static String CLOSE_TAG_ATTRIBUTES = ")*"; - private final static String REGEXP_CHARACTERS = "\\(){}.*?$^-+"; - - /** - * @return The String name of the tag. - */ - public String getName() { - return name; - } - - - /** - * Returns an Attribute associated with a lookup name. - * - * @param name The name of the allowed attribute by name. - * @return The Attribute object associated with the name, or - */ - public Attribute getAttributeByName(String name) { - return allowedAttributes.get(name); - } + return toReturn; + } + + /** Begin Variables Needed For Generating Regular Expressions * */ + static final String ANY_NORMAL_WHITESPACES = "(\\s)*"; + + static final String OPEN_ATTRIBUTE = "("; + static final String ATTRIBUTE_DIVIDER = "|"; + static final String CLOSE_ATTRIBUTE = ")"; + private static final String OPEN_TAG_ATTRIBUTES = ANY_NORMAL_WHITESPACES + OPEN_ATTRIBUTE; + private static final String CLOSE_TAG_ATTRIBUTES = ")*"; + private static final String REGEXP_CHARACTERS = "\\(){}.*?$^-+"; + + /** @return The String name of the tag. */ + public String getName() { + return name; + } + + /** + * Returns an Attribute associated with a lookup name. + * + * @param name The name of the allowed attribute by name. + * @return The Attribute object associated with the name, or + */ + public Attribute getAttributeByName(String name) { + return allowedAttributes.get(name); + } } diff --git a/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java b/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java index f0a2ff69..a7ba50ee 100644 --- a/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java +++ b/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java @@ -1,5 +1,7 @@ package org.owasp.validator.html.scan; +import java.io.IOException; +import java.io.Writer; import org.apache.xml.serialize.ElementState; import org.apache.xml.serialize.HTMLdtd; import org.apache.xml.serialize.OutputFormat; @@ -7,82 +9,72 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.io.Writer; - @SuppressWarnings("deprecation") public class ASHTMLSerializer extends org.apache.xml.serialize.HTMLSerializer { - private static final Logger logger = LoggerFactory.getLogger(ASHTMLSerializer.class); - private boolean encodeAllPossibleEntities; + private static final Logger logger = LoggerFactory.getLogger(ASHTMLSerializer.class); + private boolean encodeAllPossibleEntities; + + public ASHTMLSerializer(Writer w, OutputFormat format, InternalPolicy policy) { + super(w, format); + this.encodeAllPossibleEntities = policy.isEntityEncodeIntlCharacters(); + } + + protected String getEntityRef(int charToPrint) { + if (encodeAllPossibleEntities || Constants.big5CharsToEncode.indexOf(charToPrint) != -1) + return super.getEntityRef(charToPrint); + return null; + } - public ASHTMLSerializer(Writer w, OutputFormat format, InternalPolicy policy) { - super(w, format); - this.encodeAllPossibleEntities = policy.isEntityEncodeIntlCharacters(); - } - - protected String getEntityRef(int charToPrint) { - if(encodeAllPossibleEntities || Constants.big5CharsToEncode.indexOf(charToPrint) != -1) - return super.getEntityRef(charToPrint); - return null; - } + public void endElementIO(String namespaceURI, String localName, String rawName) + throws IOException { - public void endElementIO(String namespaceURI, String localName, - String rawName) throws IOException { - - ElementState state; + ElementState state; - // Works much like content() with additions for closing - // an element. Note the different checks for the closed - // element's state and the parent element's state. - _printer.unindent(); - state = getElementState(); + // Works much like content() with additions for closing + // an element. Note the different checks for the closed + // element's state and the parent element's state. + _printer.unindent(); + state = getElementState(); - if (state.empty) - _printer.printText('>'); - // This element is not empty and that last content was - // another element, so print a line break before that - // last element and this element's closing tag. - // [keith] Provided this is not an anchor. - // HTML: some elements do not print closing tag (e.g. LI) - if (rawName == null || !HTMLdtd.isOnlyOpening(rawName) || HTMLdtd.isOptionalClosing(rawName)) { - if (_indenting && !state.preserveSpace && state.afterElement) - _printer.breakLine(); - // Must leave CData section first (Illegal in HTML, but still) - if (state.inCData) - _printer.printText("]]>"); - _printer.printText("'); - } - - // Leave the element state and update that of the parent - // (if we're not root) to not empty and after element. - state = leaveElementState(); - // Temporary hack to prevent line breaks inside A/TD - if (rawName == null - || (!rawName.equalsIgnoreCase("A") && !rawName - .equalsIgnoreCase("TD"))) + if (state.empty) _printer.printText('>'); + // This element is not empty and that last content was + // another element, so print a line break before that + // last element and this element's closing tag. + // [keith] Provided this is not an anchor. + // HTML: some elements do not print closing tag (e.g. LI) + if (rawName == null || !HTMLdtd.isOnlyOpening(rawName) || HTMLdtd.isOptionalClosing(rawName)) { + if (_indenting && !state.preserveSpace && state.afterElement) _printer.breakLine(); + // Must leave CData section first (Illegal in HTML, but still) + if (state.inCData) _printer.printText("]]>"); + _printer.printText("'); + } - state.afterElement = true; - state.empty = false; - if (isDocumentState()) - _printer.flush(); - } + // Leave the element state and update that of the parent + // (if we're not root) to not empty and after element. + state = leaveElementState(); + // Temporary hack to prevent line breaks inside A/TD + if (rawName == null || (!rawName.equalsIgnoreCase("A") && !rawName.equalsIgnoreCase("TD"))) + state.afterElement = true; + state.empty = false; + if (isDocumentState()) _printer.flush(); + } - /* - The override is to use printEscaped() which already escapes entity references - and writes them in the final serialized string. As escapeURI() is called like - "printer.printText(escapeURI(value))", if the URI is returned here it would - be double-printed and that is why the return value is an empty string. - */ - @Override - protected String escapeURI(String uri) { - try { - printEscaped(uri); - } catch (IOException e) { - logger.error("URI escaping failed for value: " + uri); - } - return ""; - } + /* + The override is to use printEscaped() which already escapes entity references + and writes them in the final serialized string. As escapeURI() is called like + "printer.printText(escapeURI(value))", if the URI is returned here it would + be double-printed and that is why the return value is an empty string. + */ + @Override + protected String escapeURI(String uri) { + try { + printEscaped(uri); + } catch (IOException e) { + logger.error("URI escaping failed for value: " + uri); + } + return ""; + } } diff --git a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java index cd81e5a3..df53199b 100644 --- a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java @@ -1,10 +1,10 @@ /* * Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li - * + * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - * + * * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -30,9 +30,7 @@ import java.util.Locale; import java.util.MissingResourceException; import java.util.ResourceBundle; - import org.apache.xml.serialize.OutputFormat; - import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.InternalPolicy; import org.owasp.validator.html.Policy; @@ -42,74 +40,75 @@ public abstract class AbstractAntiSamyScanner { - protected final InternalPolicy policy; - protected final List errorMessages = new ArrayList(); + protected final InternalPolicy policy; + protected final List errorMessages = new ArrayList(); - protected static final ResourceBundle messages = getResourceBundle(); - protected final Locale locale = Locale.getDefault(); + protected static final ResourceBundle messages = getResourceBundle(); + protected final Locale locale = Locale.getDefault(); - protected boolean isNofollowAnchors = false; - protected boolean isNoopenerAndNoreferrerAnchors = false; - protected boolean isValidateParamAsEmbed = false; + protected boolean isNofollowAnchors = false; + protected boolean isNoopenerAndNoreferrerAnchors = false; + protected boolean isValidateParamAsEmbed = false; - public abstract CleanResults scan(String html) throws ScanException; + public abstract CleanResults scan(String html) throws ScanException; - public abstract CleanResults getResults(); + public abstract CleanResults getResults(); - public AbstractAntiSamyScanner(Policy policy) { - assert policy instanceof InternalPolicy : policy.getClass(); - this.policy = (InternalPolicy) policy; - } + public AbstractAntiSamyScanner(Policy policy) { + assert policy instanceof InternalPolicy : policy.getClass(); + this.policy = (InternalPolicy) policy; + } - public AbstractAntiSamyScanner() throws PolicyException { - policy = (InternalPolicy) Policy.getInstance(); - } + public AbstractAntiSamyScanner() throws PolicyException { + policy = (InternalPolicy) Policy.getInstance(); + } - private static ResourceBundle getResourceBundle() { - try { - return ResourceBundle.getBundle("AntiSamy", Locale.getDefault()); - } catch (MissingResourceException mre) { - return ResourceBundle.getBundle("AntiSamy", new Locale(Constants.DEFAULT_LOCALE_LANG, - Constants.DEFAULT_LOCALE_LOC)); - } + private static ResourceBundle getResourceBundle() { + try { + return ResourceBundle.getBundle("AntiSamy", Locale.getDefault()); + } catch (MissingResourceException mre) { + return ResourceBundle.getBundle( + "AntiSamy", new Locale(Constants.DEFAULT_LOCALE_LANG, Constants.DEFAULT_LOCALE_LOC)); } + } - protected void addError(String errorKey, Object[] objs) { - errorMessages.add(ErrorMessageUtil.getMessage(messages, errorKey, objs)); - } - - protected OutputFormat getOutputFormat() { + protected void addError(String errorKey, Object[] objs) { + errorMessages.add(ErrorMessageUtil.getMessage(messages, errorKey, objs)); + } - OutputFormat format = new OutputFormat(); - format.setOmitXMLDeclaration(policy.isOmitXmlDeclaration()); - format.setOmitDocumentType(policy.isOmitDoctypeDeclaration()); - format.setPreserveEmptyAttributes(true); - format.setPreserveSpace(policy.isPreserveSpace()); + protected OutputFormat getOutputFormat() { - if (policy.isFormatOutput()) { - format.setLineWidth(80); - format.setIndenting(true); - format.setIndent(2); - } + OutputFormat format = new OutputFormat(); + format.setOmitXMLDeclaration(policy.isOmitXmlDeclaration()); + format.setOmitDocumentType(policy.isOmitDoctypeDeclaration()); + format.setPreserveEmptyAttributes(true); + format.setPreserveSpace(policy.isPreserveSpace()); - return format; + if (policy.isFormatOutput()) { + format.setLineWidth(80); + format.setIndenting(true); + format.setIndent(2); } - protected org.apache.xml.serialize.HTMLSerializer getHTMLSerializer(Writer w, OutputFormat format) { - return new ASHTMLSerializer(w, format, policy); - } - - protected String trim(String original, String cleaned) { - if (cleaned.endsWith("\n")) { - if (!original.endsWith("\n")) { - if (cleaned.endsWith("\r\n")) { - cleaned = cleaned.substring(0, cleaned.length() - 2); - } else if (cleaned.endsWith("\n")) { - cleaned = cleaned.substring(0, cleaned.length() - 1); - } - } + return format; + } + + protected org.apache.xml.serialize.HTMLSerializer getHTMLSerializer( + Writer w, OutputFormat format) { + return new ASHTMLSerializer(w, format, policy); + } + + protected String trim(String original, String cleaned) { + if (cleaned.endsWith("\n")) { + if (!original.endsWith("\n")) { + if (cleaned.endsWith("\r\n")) { + cleaned = cleaned.substring(0, cleaned.length() - 2); + } else if (cleaned.endsWith("\n")) { + cleaned = cleaned.substring(0, cleaned.length() - 1); } - - return cleaned; + } } + + return cleaned; + } } diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java index 6c25ad4c..66812ae6 100644 --- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java +++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java @@ -23,9 +23,18 @@ */ package org.owasp.validator.html.scan; +import java.io.IOException; +import java.io.StringReader; +import java.io.StringWriter; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import net.sourceforge.htmlunit.cyberneko.parsers.DOMFragmentParser; import org.apache.batik.css.parser.ParseException; import org.apache.xerces.dom.DocumentImpl; -import net.sourceforge.htmlunit.cyberneko.parsers.DOMFragmentParser; import org.owasp.validator.css.CssScanner; import org.owasp.validator.html.CleanResults; import org.owasp.validator.html.Policy; @@ -50,747 +59,801 @@ import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXNotSupportedException; -import java.io.IOException; -import java.io.StringReader; -import java.io.StringWriter; -import java.util.List; -import java.util.Queue; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - /** - * This is where the magic lives. All the scanning/filtration logic resides - * here, but it should not be called directly. All scanning should be done - * through an AntiSamy.scan() method. - * + * This is where the magic lives. All the scanning/filtration logic resides here, but it should not + * be called directly. All scanning should be done through an AntiSamy.scan() method. + * * @author Arshan Dabirsiaghi */ public class AntiSamyDOMScanner extends AbstractAntiSamyScanner { - private Document document = new DocumentImpl(); - private DocumentFragment dom = document.createDocumentFragment(); - private CleanResults results = null; - private static final int maxDepth = 250; - private static final Pattern invalidXmlCharacters = - Pattern.compile("[\\u0000-\\u001F\\uD800-\\uDFFF\\uFFFE-\\uFFFF&&[^\\u0009\\u000A\\u000D]]"); - private static final Pattern conditionalDirectives = Pattern.compile("?"); - - private static final Queue cachedItems = new ConcurrentLinkedQueue(); - - static class CachedItem { - private final DOMFragmentParser parser; - private final Matcher invalidXmlCharMatcher = invalidXmlCharacters.matcher(""); - - - CachedItem() throws SAXNotSupportedException, SAXNotRecognizedException { - this.parser = getDomParser(); - } - - DOMFragmentParser getDomFragmentParser() { - return parser; - } + private Document document = new DocumentImpl(); + private DocumentFragment dom = document.createDocumentFragment(); + private CleanResults results = null; + private static final int maxDepth = 250; + private static final Pattern invalidXmlCharacters = + Pattern.compile("[\\u0000-\\u001F\\uD800-\\uDFFF\\uFFFE-\\uFFFF&&[^\\u0009\\u000A\\u000D]]"); + private static final Pattern conditionalDirectives = + Pattern.compile("?"); + + private static final Queue cachedItems = new ConcurrentLinkedQueue(); + + static class CachedItem { + private final DOMFragmentParser parser; + private final Matcher invalidXmlCharMatcher = invalidXmlCharacters.matcher(""); + + CachedItem() throws SAXNotSupportedException, SAXNotRecognizedException { + this.parser = getDomParser(); } - public AntiSamyDOMScanner(Policy policy) { - super(policy); + DOMFragmentParser getDomFragmentParser() { + return parser; } - - /* UnusedDeclaration TODO Investigate */ - public AntiSamyDOMScanner() throws PolicyException { - super(); + } + + public AntiSamyDOMScanner(Policy policy) { + super(policy); + } + + /* UnusedDeclaration TODO Investigate */ + public AntiSamyDOMScanner() throws PolicyException { + super(); + } + + /** + * This is where the magic lives. + * + * @param html A String whose contents we want to scan. + * @return A CleanResults object with an XMLDocumentFragment object and + * its String representation, as well as some scan statistics. + * @throws ScanException When there is a problem encountered while scanning the HTML. + */ + @Override + public CleanResults scan(String html) throws ScanException { + + if (html == null) { + throw new ScanException(new NullPointerException("Null HTML input")); } - /** - * This is where the magic lives. - * - * @param html A String whose contents we want to scan. - * @return A CleanResults object with an - * XMLDocumentFragment object and its String - * representation, as well as some scan statistics. - * @throws ScanException When there is a problem encountered - * while scanning the HTML. - */ - @Override - public CleanResults scan(String html) throws ScanException { - - if (html == null) { - throw new ScanException(new NullPointerException("Null HTML input")); - } + errorMessages.clear(); + int maxInputSize = policy.getMaxInputSize(); - errorMessages.clear(); - int maxInputSize = policy.getMaxInputSize(); + if (maxInputSize < html.length()) { + addError(ErrorMessageUtil.ERROR_INPUT_SIZE, new Object[] {html.length(), maxInputSize}); + throw new ScanException(errorMessages.get(0)); + } - if (maxInputSize < html.length()) { - addError(ErrorMessageUtil.ERROR_INPUT_SIZE, new Object[]{html.length(), maxInputSize}); - throw new ScanException(errorMessages.get(0)); - } + isNofollowAnchors = policy.isNofollowAnchors(); + isNoopenerAndNoreferrerAnchors = policy.isNoopenerAndNoreferrerAnchors(); + isValidateParamAsEmbed = policy.isValidateParamAsEmbed(); - isNofollowAnchors = policy.isNofollowAnchors(); - isNoopenerAndNoreferrerAnchors = policy.isNoopenerAndNoreferrerAnchors(); - isValidateParamAsEmbed = policy.isValidateParamAsEmbed(); + long startOfScan = System.currentTimeMillis(); - long startOfScan = System.currentTimeMillis(); + try { - try { + CachedItem cachedItem; + cachedItem = cachedItems.poll(); + if (cachedItem == null) { + cachedItem = new CachedItem(); + } - CachedItem cachedItem; - cachedItem = cachedItems.poll(); - if (cachedItem == null){ - cachedItem = new CachedItem(); - } + /* + * We have to replace any invalid XML characters to prevent NekoHTML + * from breaking when it gets passed encodings like %21. + */ - /* - * We have to replace any invalid XML characters to prevent NekoHTML - * from breaking when it gets passed encodings like %21. - */ + html = stripNonValidXMLCharacters(html, cachedItem.invalidXmlCharMatcher); - html = stripNonValidXMLCharacters(html, cachedItem.invalidXmlCharMatcher); + /* + * First thing we do is call the HTML cleaner ("NekoHTML") on it + * with the appropriate options. We choose not to omit tags due to + * the fallibility of our own listing in the ever changing world of + * W3C. + */ - /* - * First thing we do is call the HTML cleaner ("NekoHTML") on it - * with the appropriate options. We choose not to omit tags due to - * the fallibility of our own listing in the ever changing world of - * W3C. - */ + DOMFragmentParser parser = cachedItem.getDomFragmentParser(); - DOMFragmentParser parser = cachedItem.getDomFragmentParser(); + try { + parser.parse(new InputSource(new StringReader(html)), dom); + } catch (Exception e) { + throw new ScanException(e); + } - try { - parser.parse(new InputSource(new StringReader(html)), dom); - } catch (Exception e) { - throw new ScanException(e); - } + processChildren(dom, 0); - processChildren(dom, 0); + /* + * Serialize the output and then return the resulting DOM object and + * its string representation. + */ - /* - * Serialize the output and then return the resulting DOM object and - * its string representation. - */ - - final String trimmedHtml = html; - - StringWriter out = new StringWriter(); + final String trimmedHtml = html; - @SuppressWarnings("deprecation") - org.apache.xml.serialize.OutputFormat format = getOutputFormat(); + StringWriter out = new StringWriter(); - //noinspection deprecation - org.apache.xml.serialize.HTMLSerializer serializer = getHTMLSerializer(out, format); - serializer.serialize(dom); + @SuppressWarnings("deprecation") + org.apache.xml.serialize.OutputFormat format = getOutputFormat(); - /* - * Get the String out of the StringWriter and rip out the XML - * declaration if the Policy says we should. - */ - final String trimmed = trim( trimmedHtml, out.getBuffer().toString() ); + //noinspection deprecation + org.apache.xml.serialize.HTMLSerializer serializer = getHTMLSerializer(out, format); + serializer.serialize(dom); - Callable cleanHtml = new Callable() { - public String call() throws Exception { - return trimmed; - } - }; + /* + * Get the String out of the StringWriter and rip out the XML + * declaration if the Policy says we should. + */ + final String trimmed = trim(trimmedHtml, out.getBuffer().toString()); - /* - * Return the DOM object as well as string HTML. - */ - results = new CleanResults(startOfScan, cleanHtml, dom, errorMessages); + Callable cleanHtml = + new Callable() { + public String call() throws Exception { + return trimmed; + } + }; - cachedItems.add( cachedItem); - return results; + /* + * Return the DOM object as well as string HTML. + */ + results = new CleanResults(startOfScan, cleanHtml, dom, errorMessages); - } catch (SAXException | IOException e) { - throw new ScanException(e); - } + cachedItems.add(cachedItem); + return results; + } catch (SAXException | IOException e) { + throw new ScanException(e); } + } - static DOMFragmentParser getDomParser() - throws SAXNotRecognizedException, SAXNotSupportedException { - DOMFragmentParser parser = new DOMFragmentParser(); - parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); + static DOMFragmentParser getDomParser() + throws SAXNotRecognizedException, SAXNotSupportedException { + DOMFragmentParser parser = new DOMFragmentParser(); + parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); - parser.setFeature("http://cyberneko.org/html/features/scanner/style/strip-cdata-delims", false); - parser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true); + parser.setFeature("http://cyberneko.org/html/features/scanner/style/strip-cdata-delims", false); + parser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true); - try { - parser.setFeature("http://cyberneko.org/html/features/enforce-strict-attribute-names", true); - } catch (SAXNotRecognizedException se) { - // this indicates that the patched nekohtml is not on the - // classpath - } - return parser; + try { + parser.setFeature("http://cyberneko.org/html/features/enforce-strict-attribute-names", true); + } catch (SAXNotRecognizedException se) { + // this indicates that the patched nekohtml is not on the + // classpath } + return parser; + } - /** - * The workhorse of the scanner. Recursively scans document elements - * according to the policy. This should be called implicitly through the - * AntiSamy.scan() method. - * - * @param node The node to validate. - */ - private void recursiveValidateTag(final Node node, int currentStackDepth) throws ScanException { - - currentStackDepth++; - - if(currentStackDepth > maxDepth) { - throw new ScanException("Too many nested tags"); - } - - if (node instanceof Comment) { - processCommentNode(node); - return; - } - - boolean isElement = node instanceof Element; - NodeList eleChildNodes = node.getChildNodes(); - if (isElement && eleChildNodes.getLength() == 0) { - if (removeDisallowedEmpty(node)){ - return; - } - } - - if (node instanceof Text && Node.CDATA_SECTION_NODE == node.getNodeType()) { - stripCData(node); - return; - } - - if (node instanceof ProcessingInstruction) { - removePI(node); - } - - if (!isElement) { - return; - } - - final Element ele = (Element) node; - final Node parentNode = ele.getParentNode(); + /** + * The workhorse of the scanner. Recursively scans document elements according to the policy. This + * should be called implicitly through the AntiSamy.scan() method. + * + * @param node The node to validate. + */ + private void recursiveValidateTag(final Node node, int currentStackDepth) throws ScanException { - final String tagName = ele.getNodeName(); - final String tagNameLowerCase = tagName.toLowerCase(); - Tag tagRule = policy.getTagByLowercaseName(tagNameLowerCase); + currentStackDepth++; - /* - * If and no policy and isValidateParamAsEmbed and policy in - * place for and policy is to validate, use custom - * policy to get the tag through to the validator. - */ - Tag embedTag = policy.getEmbedTag(); - boolean masqueradingParam = isMasqueradingParam(tagRule, embedTag, tagNameLowerCase); - if (masqueradingParam){ - tagRule = Constants.BASIC_PARAM_TAG_RULE; - } - - if ((tagRule == null && policy.isEncodeUnknownTag()) || (tagRule != null && tagRule.isAction( "encode"))) { - encodeTag(currentStackDepth, ele, tagName, eleChildNodes); - } else if (tagRule == null || tagRule.isAction( Policy.ACTION_FILTER)) { - actionFilter(currentStackDepth, ele, tagName, tagRule, eleChildNodes); - } else if (tagRule.isAction( Policy.ACTION_VALIDATE)) { - actionValidate(currentStackDepth, ele, parentNode, tagName, tagNameLowerCase, tagRule, masqueradingParam, embedTag, eleChildNodes); - } else if (tagRule.isAction( Policy.ACTION_TRUNCATE)) { - actionTruncate(ele, tagName, eleChildNodes); - } else { - /* - * If we reached this that means that the tag's action is "remove", - * which means to remove the tag (including its contents). - */ - addError(ErrorMessageUtil.ERROR_TAG_DISALLOWED, new Object[]{HTMLEntityEncoder.htmlEntityEncode(tagName)}); - removeNode(ele); - } + if (currentStackDepth > maxDepth) { + throw new ScanException("Too many nested tags"); } - private boolean isMasqueradingParam(Tag tagRule, Tag embedTag, String tagNameLowerCase){ - if (tagRule == null && isValidateParamAsEmbed && "param".equals(tagNameLowerCase)) { - return embedTag != null && embedTag.isAction(Policy.ACTION_VALIDATE); - } - return false; + if (node instanceof Comment) { + processCommentNode(node); + return; } - private void encodeTag(int currentStackDepth, Element ele, String tagName, NodeList eleChildNodes) throws ScanException { - addError(ErrorMessageUtil.ERROR_TAG_ENCODED, new Object[]{HTMLEntityEncoder.htmlEntityEncode(tagName)}); - processChildren(eleChildNodes, currentStackDepth); - - /* - * Transform the tag to text, HTML-encode it and promote the - * children. The tag will be kept in the fragment as one or two text - * Nodes located before and after the children; representing how the - * tag used to wrap them. - */ - - encodeAndPromoteChildren(ele); + boolean isElement = node instanceof Element; + NodeList eleChildNodes = node.getChildNodes(); + if (isElement && eleChildNodes.getLength() == 0) { + if (removeDisallowedEmpty(node)) { + return; + } } - private void actionFilter(int currentStackDepth, Element ele, String tagName, Tag tag, NodeList eleChildNodes) throws ScanException { - if (tag == null) { - addError(ErrorMessageUtil.ERROR_TAG_NOT_IN_POLICY, new Object[]{HTMLEntityEncoder.htmlEntityEncode(tagName)}); - } else { - addError(ErrorMessageUtil.ERROR_TAG_FILTERED, new Object[]{HTMLEntityEncoder.htmlEntityEncode(tagName)}); - } - - processChildren(eleChildNodes, currentStackDepth); - promoteChildren(ele); - } - - private void actionValidate(int currentStackDepth, Element ele, Node parentNode, String tagName, String tagNameLowerCase, Tag tag, boolean masqueradingParam, Tag embedTag, NodeList eleChildNodes) throws ScanException { - /* - * If doing as , now is the time to convert it. - */ - String nameValue = null; - if (masqueradingParam) { - nameValue = ele.getAttribute("name"); - if (nameValue != null && !"".equals(nameValue)) { - String valueValue = ele.getAttribute("value"); - ele.setAttribute(nameValue, valueValue); - ele.removeAttribute("name"); - ele.removeAttribute("value"); - tag = embedTag; - } - } - - /* - * Check to see if it's a ", policy, AntiSamy.DOM).getCleanHTML().contains("ha.ckers.org")); - assertTrue(!as.scan("", policy, AntiSamy.SAX).getCleanHTML().contains("ha.ckers.org")); - - assertTrue(!as.scan("", policy, AntiSamy.DOM).getCleanHTML().contains("ha.ckers.org")); - assertTrue(!as.scan("", policy, AntiSamy.SAX).getCleanHTML().contains("ha.ckers.org")); - - assertTrue(!as.scan("

  • XSS", policy, AntiSamy.DOM).getCleanHTML().contains("javascript")); - assertTrue(!as.scan("
    • XSS", policy, AntiSamy.SAX).getCleanHTML().contains("javascript")); - - assertTrue(!as.scan("", policy, AntiSamy.DOM).getCleanHTML().contains("vbscript")); - assertTrue(!as.scan("", policy, AntiSamy.SAX).getCleanHTML().contains("vbscript")); - - assertTrue(!as.scan("", policy, AntiSamy.DOM).getCleanHTML().contains("", policy, AntiSamy.SAX).getCleanHTML().contains("", policy, AntiSamy.DOM).getCleanHTML().contains("", policy, AntiSamy.SAX).getCleanHTML().contains("", policy, AntiSamy.DOM).getCleanHTML().contains("", policy, AntiSamy.SAX).getCleanHTML().contains("", policy, AntiSamy.DOM).getCleanHTML().contains("iframe")); - assertTrue(!as.scan("", policy, AntiSamy.SAX).getCleanHTML().contains("iframe")); - - assertTrue(!as.scan("", policy, AntiSamy.DOM).getCleanHTML().contains("javascript")); - assertTrue(!as.scan("", policy, AntiSamy.SAX).getCleanHTML().contains("javascript")); - - assertTrue(!as.scan("", policy, AntiSamy.DOM).getCleanHTML().contains("background")); - assertTrue(!as.scan("
      ", policy, AntiSamy.SAX).getCleanHTML().contains("background")); - - assertTrue(!as.scan("
      ", policy, AntiSamy.DOM).getCleanHTML().contains("background")); - assertTrue(!as.scan("
      ", policy, AntiSamy.SAX).getCleanHTML().contains("background")); - - assertTrue(!as.scan("
      ", policy, AntiSamy.DOM).getCleanHTML().contains("javascript")); - assertTrue(!as.scan("
      ", policy, AntiSamy.SAX).getCleanHTML().contains("javascript")); - - assertTrue(!as.scan("
      ", policy, AntiSamy.DOM).getCleanHTML().contains("alert")); - assertTrue(!as.scan("
      ", policy, AntiSamy.SAX).getCleanHTML().contains("alert")); - - assertTrue(!as.scan("", policy, AntiSamy.DOM).getCleanHTML().contains("alert")); - assertTrue(!as.scan("", policy, AntiSamy.SAX).getCleanHTML().contains("alert")); - - assertTrue(!as.scan("", policy, AntiSamy.DOM).getCleanHTML().contains("ript:alert")); - assertTrue(!as.scan("", policy, AntiSamy.SAX).getCleanHTML().contains("ript:alert")); - - assertTrue(!as.scan("", policy, AntiSamy.DOM).getCleanHTML().contains("javascript")); - assertTrue(!as.scan("", policy, AntiSamy.SAX).getCleanHTML().contains("javascript")); - - assertTrue(!as.scan("", policy, AntiSamy.DOM).getCleanHTML().contains("", policy, AntiSamy.SAX).getCleanHTML().contains("", policy, AntiSamy.DOM).getCleanHTML().contains("", policy, AntiSamy.SAX).getCleanHTML().contains("", policy, AntiSamy.DOM).getCleanHTML().contains("javascript")); - - CleanResults cr = as.scan("", policy, AntiSamy.SAX); - // System.out.println(cr.getErrorMessages().get(0)); - assertTrue(!cr.getCleanHTML().contains("javascript")); - - assertTrue(!as.scan("", policy, AntiSamy.DOM).getCleanHTML().contains("", policy, AntiSamy.SAX).getCleanHTML().contains("", - policy, AntiSamy.DOM).getCleanHTML().contains("", - policy, AntiSamy.SAX).getCleanHTML().contains("\" SRC=\"http://ha.ckers.org/xss.js\">", policy, AntiSamy.DOM).getCleanHTML().contains("\" SRC=\"http://ha.ckers.org/xss.js\">", policy, AntiSamy.SAX).getCleanHTML().contains("\" '' SRC=\"http://ha.ckers.org/xss.js\">", policy, AntiSamy.DOM).getCleanHTML().contains("\" '' SRC=\"http://ha.ckers.org/xss.js\">", policy, AntiSamy.SAX).getCleanHTML().contains("` SRC=\"http://ha.ckers.org/xss.js\">", policy, AntiSamy.DOM).getCleanHTML().contains("` SRC=\"http://ha.ckers.org/xss.js\">", policy, AntiSamy.SAX).getCleanHTML().contains("'>\" SRC=\"http://ha.ckers.org/xss.js\">", policy, AntiSamy.DOM).getCleanHTML().contains("'>\" SRC=\"http://ha.ckers.org/xss.js\">", policy, AntiSamy.SAX).getCleanHTML().contains("document.write(\"PT SRC=\"http://ha.ckers.org/xss.js\">", policy, AntiSamy.DOM).getCleanHTML().contains("script")); - assertTrue(!as.scan("PT SRC=\"http://ha.ckers.org/xss.js\">", policy, AntiSamy.SAX).getCleanHTML().contains("script")); - - assertTrue(!as.scan("", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("script")); + assertTrue( + !as.scan("test", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("script")); + + assertTrue( + !as.scan("<<<><", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("onload")); + assertTrue( + !as.scan("", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("onload")); + + assertTrue( + !as.scan("", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("alert")); + assertTrue( + !as.scan("", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("alert")); + + assertTrue( + !as.scan("", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("iframe")); + assertTrue( + !as.scan("", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("iframe")); + + assertTrue( + !as.scan( + "", + policy, + AntiSamy.DOM) + .getCleanHTML() + .contains("javascript")); + assertTrue( + !as.scan( + "", + policy, + AntiSamy.SAX) + .getCleanHTML() + .contains("javascript")); + + assertTrue( + !as.scan("", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("background")); + assertTrue( + !as.scan("
      ", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("background")); + + assertTrue( + !as.scan("
      ", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("background")); + assertTrue( + !as.scan("
      ", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("background")); + + assertTrue( + !as.scan( + "
      ", + policy, + AntiSamy.DOM) + .getCleanHTML() + .contains("javascript")); + assertTrue( + !as.scan( + "
      ", + policy, + AntiSamy.SAX) + .getCleanHTML() + .contains("javascript")); + + assertTrue( + !as.scan("
      ", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("alert")); + assertTrue( + !as.scan("
      ", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("alert")); + + assertTrue( + !as.scan("", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("alert")); + assertTrue( + !as.scan("", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("alert")); + + assertTrue( + !as.scan("", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("ript:alert")); + assertTrue( + !as.scan("", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("ript:alert")); + + assertTrue( + !as.scan("", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("javascript")); + assertTrue( + !as.scan("", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("javascript")); + + assertTrue( + !as.scan("", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("", + policy, + AntiSamy.DOM) + .getCleanHTML() + .contains("", + policy, + AntiSamy.SAX) + .getCleanHTML() + .contains("", + policy, + AntiSamy.DOM) + .getCleanHTML() + .contains("javascript")); + + CleanResults cr = + as.scan( + "", + policy, + AntiSamy.SAX); + // System.out.println(cr.getErrorMessages().get(0)); + assertTrue(!cr.getCleanHTML().contains("javascript")); + + assertTrue( + !as.scan( + "", + policy, + AntiSamy.DOM) + .getCleanHTML() + .contains("", + policy, + AntiSamy.SAX) + .getCleanHTML() + .contains("", + policy, + AntiSamy.DOM) + .getCleanHTML() + .contains("", + policy, + AntiSamy.SAX) + .getCleanHTML() + .contains("\" SRC=\"http://ha.ckers.org/xss.js\">", + policy, + AntiSamy.DOM) + .getCleanHTML() + .contains("\" SRC=\"http://ha.ckers.org/xss.js\">", + policy, + AntiSamy.SAX) + .getCleanHTML() + .contains("\" '' SRC=\"http://ha.ckers.org/xss.js\">", + policy, + AntiSamy.DOM) + .getCleanHTML() + .contains("\" '' SRC=\"http://ha.ckers.org/xss.js\">", + policy, + AntiSamy.SAX) + .getCleanHTML() + .contains("` SRC=\"http://ha.ckers.org/xss.js\">", policy, AntiSamy.DOM) + .getCleanHTML() + .contains("` SRC=\"http://ha.ckers.org/xss.js\">", policy, AntiSamy.SAX) + .getCleanHTML() + .contains("'>\" SRC=\"http://ha.ckers.org/xss.js\">", + policy, + AntiSamy.DOM) + .getCleanHTML() + .contains("'>\" SRC=\"http://ha.ckers.org/xss.js\">", + policy, + AntiSamy.SAX) + .getCleanHTML() + .contains("document.write(\"PT SRC=\"http://ha.ckers.org/xss.js\">", + policy, + AntiSamy.DOM) + .getCleanHTML() + .contains("script")); + assertTrue( + !as.scan( + "PT SRC=\"http://ha.ckers.org/xss.js\">", + policy, + AntiSamy.SAX) + .getCleanHTML() + .contains("script")); + + assertTrue( + !as.scan(""; + as.scan(s, policy, AntiSamy.DOM); + as.scan(s, policy, AntiSamy.SAX); + } + + @Test + public void issue37() throws ScanException, PolicyException { + + String dirty = + "
      Poor Bill, couldn't make it to the Museum's story time" + + "today, he was so busy shoveling! Well, we sure missed you Bill! So since" + + "ou were busy moving snow we read books about snow. We found a clue in one" + + "book which revealed a snowplow at the end of the story - we wish it had" + + "driven to your driveway Bill. We also read a story which shared fourteen" + + "Names For Snow. We'll catch up with you next week....wonder which" + + "hat Bill will wear?
      Jane"; + + Policy mySpacePolicy = Policy.getInstance(getClass().getResource("/antisamy-myspace.xml")); + CleanResults cr = as.scan(dirty, mySpacePolicy, AntiSamy.DOM); + assertNotNull(cr.getCleanHTML()); + cr = as.scan(dirty, mySpacePolicy, AntiSamy.SAX); + assertNotNull(cr.getCleanHTML()); + + Policy ebayPolicy = Policy.getInstance(getClass().getResource("/antisamy-ebay.xml")); + cr = as.scan(dirty, ebayPolicy, AntiSamy.DOM); + assertNotNull(cr.getCleanHTML()); + cr = as.scan(dirty, mySpacePolicy, AntiSamy.SAX); + assertNotNull(cr.getCleanHTML()); + + Policy slashdotPolicy = Policy.getInstance(getClass().getResource("/antisamy-slashdot.xml")); + cr = as.scan(dirty, slashdotPolicy, AntiSamy.DOM); + assertNotNull(cr.getCleanHTML()); + cr = as.scan(dirty, slashdotPolicy, AntiSamy.SAX); + assertNotNull(cr.getCleanHTML()); + } + + @Test + public void issue38() throws ScanException, PolicyException { + + /* issue #38 - color problem/color combinations */ + String s = "Test"; + String expected = "Test"; + assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); + assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); + + s = "
      Test 3 letter code
      "; + expected = "
      Test 3 letter code
      "; + assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); + assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); + + s = "Test"; + expected = "Test"; + assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); + assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); + + s = "Test"; + expected = "Test"; + assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); + assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); + + s = "Test"; + expected = "Test"; + assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); + assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); + + s = "
      Test
      "; + expected = "
      Test
      "; + assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); + assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); + + s = "Test"; + expected = "Test"; + assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); + assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); + + s = "
      Test
      "; + expected = "
      Test
      "; + assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); + assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); /* - * Test a bunch of strings that have tweaked the XML parsing capabilities of - * NekoHTML. + * This test case was failing because of the following code from the + * batik CSS library, which throws an exception if any character + * other than a '!' follows a beginning token of '<'. The + * ParseException is now caught in the node a CssScanner.java and + * the outside AntiSamyDOMScanner.java. + * + * 0398 nextChar(); 0399 if (current != '!') { 0400 throw new + * ParseException("character", 0401 reader.getLine(), 0402 + * reader.getColumn()); */ - @Test - public void IllegalXML() throws PolicyException { - - for (String BASE64_BAD_XML_STRING : BASE64_BAD_XML_STRINGS) { - - try { - String testStr = new String(Base64.decodeBase64(BASE64_BAD_XML_STRING.getBytes())); - as.scan(testStr, policy, AntiSamy.DOM); - as.scan(testStr, policy, AntiSamy.SAX); - - } catch (ScanException ex) { - // still success! - } - } - - // This fails due to a bug in NekoHTML - // try { - // assertTrue ( - // as.scan("",policy, AntiSamy.DOM).getCleanHTML().indexOf("href") - // != -1 ); - // } catch (Exception e) { - // e.printStackTrace(); - // fail("Couldn't parse malformed HTML: " + e.getMessage()); - // } - - // This fails due to a bug in NekoHTML - // try { - // assertTrue ( - // as.scan("",policy, AntiSamy.DOM).getCleanHTML().indexOf("href") - // != -1 ); - // } catch (Exception e) { - // e.printStackTrace(); - // fail("Couldn't parse malformed HTML: " + e.getMessage()); - // } - - try { - assertTrue(as.scan(""; - CleanResults cr = as.scan(s, policy, AntiSamy.DOM); - assertEquals(s, cr.getCleanHTML()); - } - - @Test - public void issue30() throws ScanException, PolicyException { - - String s = ""; - - as.scan(s, policy, AntiSamy.DOM); - CleanResults cr; - - /* followup - does the patch fix multiline CSS? */ - String s2 = ""; - cr = as.scan(s2, policy, AntiSamy.DOM); - assertEquals("", cr.getCleanHTML()); - - /* next followup - does non-CDATA parsing still work? */ - - String s3 = "\n", cr.getCleanHTML()); - } - - @Test - public void issue31() throws ScanException, PolicyException { - - String test = "foo"; - Policy revised = policy.cloneWithDirective("onUnknownTag", "encode"); - CleanResults cr = as.scan(test, revised, AntiSamy.DOM); - String s = cr.getCleanHTML(); - assertFalse(!s.contains("<g>")); - assertFalse(!s.contains("</g>")); - s = as.scan(test, revised, AntiSamy.SAX).getCleanHTML(); - assertFalse(!s.contains("<g>")); - assertFalse(!s.contains("</g>")); - - Tag tag = policy.getTagByLowercaseName("b").mutateAction("encode"); - Policy policy1 = policy.mutateTag(tag); - - cr = as.scan(test, policy1, AntiSamy.DOM); - s = cr.getCleanHTML(); - - assertFalse(!s.contains("<b>")); - assertFalse(!s.contains("</b>")); - - cr = as.scan(test, policy1, AntiSamy.SAX); - s = cr.getCleanHTML(); - - assertFalse(!s.contains("<b>")); - assertFalse(!s.contains("</b>")); - } - - @Test - public void issue32() throws ScanException, PolicyException { - /* issue #32 - nekos problem */ - String s = ""; - as.scan(s, policy, AntiSamy.DOM); - as.scan(s, policy, AntiSamy.SAX); - } - - @Test - public void issue37() throws ScanException, PolicyException { - - String dirty = "
      Poor Bill, couldn't make it to the Museum's story time" - + "today, he was so busy shoveling! Well, we sure missed you Bill! So since" + "ou were busy moving snow we read books about snow. We found a clue in one" - + "book which revealed a snowplow at the end of the story - we wish it had" + "driven to your driveway Bill. We also read a story which shared fourteen" - + "Names For Snow. We'll catch up with you next week....wonder which" + "hat Bill will wear?
      Jane"; - - Policy mySpacePolicy = Policy.getInstance(getClass().getResource("/antisamy-myspace.xml")); - CleanResults cr = as.scan(dirty, mySpacePolicy, AntiSamy.DOM); - assertNotNull(cr.getCleanHTML()); - cr = as.scan(dirty, mySpacePolicy, AntiSamy.SAX); - assertNotNull(cr.getCleanHTML()); - - Policy ebayPolicy = Policy.getInstance(getClass().getResource("/antisamy-ebay.xml")); - cr = as.scan(dirty, ebayPolicy, AntiSamy.DOM); - assertNotNull(cr.getCleanHTML()); - cr = as.scan(dirty, mySpacePolicy, AntiSamy.SAX); - assertNotNull(cr.getCleanHTML()); - - Policy slashdotPolicy = Policy.getInstance(getClass().getResource("/antisamy-slashdot.xml")); - cr = as.scan(dirty, slashdotPolicy, AntiSamy.DOM); - assertNotNull(cr.getCleanHTML()); - cr = as.scan(dirty, slashdotPolicy, AntiSamy.SAX); - assertNotNull(cr.getCleanHTML()); - } - - @Test - public void issue38() throws ScanException, PolicyException { - - /* issue #38 - color problem/color combinations */ - String s = "Test"; - String expected = "Test"; - assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); - assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); - - s = "
      Test 3 letter code
      "; - expected = "
      Test 3 letter code
      "; - assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); - assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); - - s = "Test"; - expected = "Test"; - assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); - assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); - - s = "Test"; - expected = "Test"; - assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); - assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); - - s = "Test"; - expected = "Test"; - assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); - assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); - - s = "
      Test
      "; - expected = "
      Test
      "; - assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); - assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); - - s = "Test"; - expected = "Test"; - assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); - assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); - - s = "
      Test
      "; - expected = "
      Test
      "; - assertEquals(as.scan(s, policy, AntiSamy.DOM).getCleanHTML(), expected); - assertEquals(as.scan(s, policy, AntiSamy.SAX).getCleanHTML(), expected); - - /* - * This test case was failing because of the following code from the - * batik CSS library, which throws an exception if any character - * other than a '!' follows a beginning token of '<'. The - * ParseException is now caught in the node a CssScanner.java and - * the outside AntiSamyDOMScanner.java. - * - * 0398 nextChar(); 0399 if (current != '!') { 0400 throw new - * ParseException("character", 0401 reader.getLine(), 0402 - * reader.getColumn()); - */ - s = "foo@import 'x';bar"; - as.scan(s, policy, AntiSamy.DOM); - as.scan(s, policy, AntiSamy.SAX); - } - - @Test - public void issue40() throws ScanException, PolicyException { - - /* issue #40 - handling "; - Policy revised = policy.cloneWithDirective(Policy.PRESERVE_SPACE, "true"); - - CleanResults cr = as.scan(s, revised, AntiSamy.DOM); - assertTrue(cr.getCleanHTML().contains("print, projection, screen")); - - cr = as.scan(s, revised, AntiSamy.SAX); - assertTrue(cr.getCleanHTML().contains("print, projection, screen")); - } - - @Test - public void issue41() throws ScanException, PolicyException { - /* issue #41 - comment handling */ - - Policy revised = policy.cloneWithDirective(Policy.PRESERVE_SPACE, "true"); - - policy.cloneWithDirective(Policy.PRESERVE_COMMENTS, "false"); - - assertEquals("text ", as.scan("text ", revised, AntiSamy.DOM).getCleanHTML()); - assertEquals("text ", as.scan("text ", revised, AntiSamy.SAX).getCleanHTML()); - - Policy revised2 = policy.cloneWithDirective(Policy.PRESERVE_COMMENTS, "true").cloneWithDirective(Policy.PRESERVE_SPACE, "true").cloneWithDirective(Policy.FORMAT_OUTPUT, "false"); - - /* - * These make sure the regular comments are kept alive and that - * conditional comments are ripped out. - */ - assertEquals("
      text
      ", as.scan("
      text
      ", revised2, AntiSamy.DOM).getCleanHTML()); - assertEquals("
      text
      ", as.scan("
      text
      ", revised2, AntiSamy.SAX).getCleanHTML()); - - assertEquals("
      text
      ", as.scan("
      text
      ", revised2, AntiSamy.DOM).getCleanHTML()); - assertEquals("
      text
      ", as.scan("
      text
      ", revised2, AntiSamy.SAX).getCleanHTML()); - - /* - * Check to see how nested conditional comments are handled. This is - * not very clean but the main goal is to avoid any tags. Not sure - * on encodings allowed in comments. - */ - String input = "
      text <[endif]-->
      "; - String expected = "
      text <[endif]-->
      "; - String output = as.scan(input, revised2, AntiSamy.DOM).getCleanHTML(); - assertEquals(expected, output); - - input = "
      text <[endif]-->
      "; - expected = "
      text <[endif]-->
      "; - output = as.scan(input, revised2, AntiSamy.SAX).getCleanHTML(); - - assertEquals(expected, output); - - /* - * Regular comment nested inside conditional comment. Test makes - * sure - */ - assertEquals("
      text comment <[endif]-->
      ", as.scan("
      text comment <[endif]-->
      ", revised2, AntiSamy.DOM).getCleanHTML()); - - /* - * These play with whitespace and have invalid comment syntax. - */ - assertEquals("
      text
      ", as.scan("
      text
      ", revised2, AntiSamy.DOM).getCleanHTML()); - assertEquals("
      text comment
      ", as.scan("
      text comment
      ", revised2, AntiSamy.DOM).getCleanHTML()); - assertEquals("
      text comment
      ", as.scan("
      text comment
      ", revised2, AntiSamy.DOM).getCleanHTML()); - - String attack = "[if lte 8]" + ""; - as.scan(s, policy, AntiSamy.DOM); - assertEquals(as.scan(s, policy, AntiSamy.DOM).getNumberOfErrors(), 3); - - CleanResults cr = as.scan(s, policy, AntiSamy.SAX); - - assertEquals(cr.getNumberOfErrors(), 3); - } - - @Test - public void issue51() throws ScanException, PolicyException { - /* issue #51 - offsite URLs with () are found to be invalid */ - String s = "test"; - CleanResults cr = as.scan(s, policy, AntiSamy.DOM); - - assertEquals(cr.getNumberOfErrors(), 0); + s = "foo@import 'x';bar"; + as.scan(s, policy, AntiSamy.DOM); + as.scan(s, policy, AntiSamy.SAX); + } - cr = as.scan(s, policy, AntiSamy.SAX); - assertEquals(cr.getNumberOfErrors(), 0); - } - - @Test - public void issue56() throws ScanException, PolicyException { - /* issue #56 - unnecessary spaces */ - - String s = "Hello World!"; - String expected = "Hello World!"; - - CleanResults cr = as.scan(s, policy, AntiSamy.DOM); - String s2 = cr.getCleanHTML(); - - assertEquals(expected, s2); - - cr = as.scan(s, policy, AntiSamy.SAX); - s2 = cr.getCleanHTML(); - - assertEquals(expected, s2); - } - - @Test - public void issue58() throws ScanException, PolicyException { - /* issue #58 - input not in list of allowed-to-be-empty tags */ - String s = "tgdan g h"; - CleanResults cr = as.scan(s, policy, AntiSamy.DOM); - assertTrue(cr.getErrorMessages().size() == 0); - - cr = as.scan(s, policy, AntiSamy.SAX); - assertTrue(cr.getErrorMessages().size() == 0); - } - - @Test - public void issue61() throws ScanException, PolicyException { - /* issue #61 - input has newline appended if ends with an accepted tag */ - String dirtyInput = "blah blah."; - Policy revised = policy.cloneWithDirective(Policy.FORMAT_OUTPUT, "false"); - CleanResults cr = as.scan(dirtyInput, revised, AntiSamy.DOM); - assertEquals(dirtyInput, cr.getCleanHTML()); - - cr = as.scan(dirtyInput, revised, AntiSamy.SAX); - assertEquals(dirtyInput, cr.getCleanHTML()); - } - - @Test - public void issue69() throws ScanException, PolicyException { - - /* issue #69 - char attribute should allow single char or entity ref */ - - String s = "
      test
      "; - CleanResults crDom = as.scan(s, policy, AntiSamy.DOM); - CleanResults crSax = as.scan(s, policy, AntiSamy.SAX); - String domValue = crDom.getCleanHTML(); - String saxValue = crSax.getCleanHTML(); - assertTrue(domValue.contains("char")); - assertTrue(saxValue.contains("char")); - - s = "
      test
      "; - assertTrue(!as.scan(s, policy, AntiSamy.DOM).getCleanHTML().contains("char")); - assertTrue(!as.scan(s, policy, AntiSamy.SAX).getCleanHTML().contains("char")); - - s = "
      test
      "; - assertTrue(as.scan(s, policy, AntiSamy.DOM).getCleanHTML().contains("char")); - assertTrue(as.scan(s, policy, AntiSamy.SAX).getCleanHTML().contains("char")); - - s = "
      test
      "; - assertTrue(!as.scan(s, policy, AntiSamy.DOM).getCleanHTML().contains("char")); - assertTrue(!as.scan(s, policy, AntiSamy.SAX).getCleanHTML().contains("char")); + @Test + public void issue40() throws ScanException, PolicyException { - s = "
      test
      "; - assertTrue(!as.scan(s, policy, AntiSamy.DOM).getCleanHTML().contains("char")); - assertTrue(!as.scan(s, policy, AntiSamy.SAX).getCleanHTML().contains("char")); - } - - @Test - public void CDATAByPass() throws ScanException, PolicyException { - String malInput = "]]>"; - CleanResults crd = as.scan(malInput, policy, AntiSamy.DOM); - CleanResults crs = as.scan(malInput, policy, AntiSamy.SAX); - String crDom = crd.getCleanHTML(); - String crSax = crs.getCleanHTML(); - - assertTrue(crd.getErrorMessages().size() > 0); - assertTrue(crs.getErrorMessages().size() > 0); - - assertTrue(crSax.contains("<script") && !crDom.contains(" media attributes right */ - String goodInput = "hello

      world

      "; - crDom = as.scan(goodInput, policy, AntiSamy.DOM).getCleanHTML(); - crSax = as.scan(goodInput, policy, AntiSamy.SAX).getCleanHTML(); + String s = ""; + Policy revised = policy.cloneWithDirective(Policy.PRESERVE_SPACE, "true"); - assertTrue(crSax.contains("left")); - assertTrue(crDom.contains("left")); - } - - @Test - public void stackExhaustion() throws ScanException, PolicyException { - /* - * Test Julian Cohen's stack exhaustion bug. - */ - - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < 249; i++) { - sb.append("
      "); - } - /* - * First, make sure this attack is useless against the - * SAX parser. - */ - as.scan(sb.toString(), policy, AntiSamy.SAX); - - /* - * Scan this really deep tree (depth=249, 1 less than the - * max) and make sure it doesn't blow up. - */ - - CleanResults crd = as.scan(sb.toString(), policy, AntiSamy.DOM); - - String crDom = crd.getCleanHTML(); - assertTrue(crDom.length() != 0); - /* - * Now push it over the limit to 251 and make sure we blow - * up safely. - */ - sb.append("
      "); // this makes 251 - - try { - as.scan(sb.toString(), policy, AntiSamy.DOM); - fail("DOM depth exceeded max - should've errored"); - } catch (ScanException e) { - // An error is expected. Pass - } - } - - @Test - public void issue107() throws ScanException, PolicyException { - StringBuilder sb = new StringBuilder(); - - /* - * #107 - erroneous newlines appearing? couldn't reproduce this - * error but the test seems worthy of keeping. - */ - String nl = "\n"; + CleanResults cr = as.scan(s, revised, AntiSamy.DOM); + assertTrue(cr.getCleanHTML().contains("print, projection, screen")); - String header = "

      Header

      "; - String para = "

      Paragraph

      "; - sb.append(header); - sb.append(nl); - sb.append(para); + cr = as.scan(s, revised, AntiSamy.SAX); + assertTrue(cr.getCleanHTML().contains("print, projection, screen")); + } - String html = sb.toString(); - - String crDom = as.scan(html, policy, AntiSamy.DOM).getCleanHTML(); - String crSax = as.scan(html, policy, AntiSamy.SAX).getCleanHTML(); - - /* Make sure only 1 newline appears */ - assertTrue(crDom.lastIndexOf(nl) == crDom.indexOf(nl)); - assertTrue(crSax.lastIndexOf(nl) == crSax.indexOf(nl)); - - int expectedLoc = header.length(); - int actualLoc = crSax.indexOf(nl); - assertTrue(expectedLoc == actualLoc); - - actualLoc = crDom.indexOf(nl); - // account for line separator length difference across OSes. - assertTrue(expectedLoc == actualLoc || expectedLoc == actualLoc + 1); - } + @Test + public void issue41() throws ScanException, PolicyException { + /* issue #41 - comment handling */ - @Test - public void issue112() throws ScanException, PolicyException { - TestPolicy revised = policy.cloneWithDirective(Policy.PRESERVE_COMMENTS, "true") - .cloneWithDirective(Policy.PRESERVE_SPACE, "true") - .cloneWithDirective(Policy.FORMAT_OUTPUT, "false"); + Policy revised = policy.cloneWithDirective(Policy.PRESERVE_SPACE, "true"); - /* - * #112 - empty tag becomes self closing - */ + policy.cloneWithDirective(Policy.PRESERVE_COMMENTS, "false"); - String html = "text text text"; + assertEquals("text ", as.scan("text ", revised, AntiSamy.DOM).getCleanHTML()); + assertEquals("text ", as.scan("text ", revised, AntiSamy.SAX).getCleanHTML()); - String crDom = as.scan(html, revised, AntiSamy.DOM).getCleanHTML(); - String crSax = as.scan(html, revised, AntiSamy.SAX).getCleanHTML(); + Policy revised2 = + policy + .cloneWithDirective(Policy.PRESERVE_COMMENTS, "true") + .cloneWithDirective(Policy.PRESERVE_SPACE, "true") + .cloneWithDirective(Policy.FORMAT_OUTPUT, "false"); - assertTrue(!crDom.contains("") && !crDom.contains("")); - assertTrue(!crSax.contains("") && !crSax.contains("")); + /* + * These make sure the regular comments are kept alive and that + * conditional comments are ripped out. + */ + assertEquals( + "
      text
      ", + as.scan("
      text
      ", revised2, AntiSamy.DOM).getCleanHTML()); + assertEquals( + "
      text
      ", + as.scan("
      text
      ", revised2, AntiSamy.SAX).getCleanHTML()); + + assertEquals( + "
      text
      ", + as.scan("
      text
      ", revised2, AntiSamy.DOM) + .getCleanHTML()); + assertEquals( + "
      text
      ", + as.scan("
      text
      ", revised2, AntiSamy.SAX) + .getCleanHTML()); - StringBuilder sb = new StringBuilder(); - sb.append("foobar"); - sb.append(""); + /* + * Check to see how nested conditional comments are handled. This is + * not very clean but the main goal is to avoid any tags. Not sure + * on encodings allowed in comments. + */ + String input = "
      text <[endif]-->
      "; + String expected = "
      text <[endif]-->
      "; + String output = as.scan(input, revised2, AntiSamy.DOM).getCleanHTML(); + assertEquals(expected, output); - html = sb.toString(); + input = "
      text <[endif]-->
      "; + expected = "
      text <[endif]-->
      "; + output = as.scan(input, revised2, AntiSamy.SAX).getCleanHTML(); - crDom = as.scan(html, revised, AntiSamy.DOM).getCleanHTML(); - crSax = as.scan(html, revised, AntiSamy.SAX).getCleanHTML(); + assertEquals(expected, output); - assertTrue(html.equals(crDom)); - assertTrue(html.equals(crSax)); - } + /* + * Regular comment nested inside conditional comment. Test makes + * sure + */ + assertEquals( + "
      text comment <[endif]-->
      ", + as.scan( + "
      text comment <[endif]-->
      ", + revised2, + AntiSamy.DOM) + .getCleanHTML()); + /* + * These play with whitespace and have invalid comment syntax. + */ + assertEquals( + "
      text
      ", + as.scan( + "
      text
      ", + revised2, + AntiSamy.DOM) + .getCleanHTML()); + assertEquals( + "
      text comment
      ", + as.scan("
      text comment
      ", revised2, AntiSamy.DOM) + .getCleanHTML()); + assertEquals( + "
      text comment
      ", + as.scan("
      text comment
      ", revised2, AntiSamy.DOM) + .getCleanHTML()); - @Test - public void nestedCdataAttacks() throws ScanException, PolicyException { + String attack = "[if lte 8]]]>>]]>"; - String crDom = as.scan(html, policy, AntiSamy.DOM).getCleanHTML(); - String crSax = as.scan(html, policy, AntiSamy.SAX).getCleanHTML(); - assertTrue(!crDom.contains("" + + ""; + as.scan(s, policy, AntiSamy.DOM); + assertEquals(as.scan(s, policy, AntiSamy.DOM).getNumberOfErrors(), 3); + + CleanResults cr = as.scan(s, policy, AntiSamy.SAX); + + assertEquals(cr.getNumberOfErrors(), 3); + } + + @Test + public void issue51() throws ScanException, PolicyException { + /* issue #51 - offsite URLs with () are found to be invalid */ + String s = + "test"; + CleanResults cr = as.scan(s, policy, AntiSamy.DOM); + + assertEquals(cr.getNumberOfErrors(), 0); + + cr = as.scan(s, policy, AntiSamy.SAX); + assertEquals(cr.getNumberOfErrors(), 0); + } + + @Test + public void issue56() throws ScanException, PolicyException { + /* issue #56 - unnecessary spaces */ + + String s = "Hello World!"; + String expected = "Hello World!"; + + CleanResults cr = as.scan(s, policy, AntiSamy.DOM); + String s2 = cr.getCleanHTML(); + + assertEquals(expected, s2); + + cr = as.scan(s, policy, AntiSamy.SAX); + s2 = cr.getCleanHTML(); + + assertEquals(expected, s2); + } + + @Test + public void issue58() throws ScanException, PolicyException { + /* issue #58 - input not in list of allowed-to-be-empty tags */ + String s = "tgdan g h"; + CleanResults cr = as.scan(s, policy, AntiSamy.DOM); + assertTrue(cr.getErrorMessages().size() == 0); + + cr = as.scan(s, policy, AntiSamy.SAX); + assertTrue(cr.getErrorMessages().size() == 0); + } + + @Test + public void issue61() throws ScanException, PolicyException { + /* issue #61 - input has newline appended if ends with an accepted tag */ + String dirtyInput = "blah blah."; + Policy revised = policy.cloneWithDirective(Policy.FORMAT_OUTPUT, "false"); + CleanResults cr = as.scan(dirtyInput, revised, AntiSamy.DOM); + assertEquals(dirtyInput, cr.getCleanHTML()); + + cr = as.scan(dirtyInput, revised, AntiSamy.SAX); + assertEquals(dirtyInput, cr.getCleanHTML()); + } + + @Test + public void issue69() throws ScanException, PolicyException { + + /* issue #69 - char attribute should allow single char or entity ref */ + + String s = "
      test
      "; + CleanResults crDom = as.scan(s, policy, AntiSamy.DOM); + CleanResults crSax = as.scan(s, policy, AntiSamy.SAX); + String domValue = crDom.getCleanHTML(); + String saxValue = crSax.getCleanHTML(); + assertTrue(domValue.contains("char")); + assertTrue(saxValue.contains("char")); + + s = "
      test
      "; + assertTrue(!as.scan(s, policy, AntiSamy.DOM).getCleanHTML().contains("char")); + assertTrue(!as.scan(s, policy, AntiSamy.SAX).getCleanHTML().contains("char")); + + s = "
      test
      "; + assertTrue(as.scan(s, policy, AntiSamy.DOM).getCleanHTML().contains("char")); + assertTrue(as.scan(s, policy, AntiSamy.SAX).getCleanHTML().contains("char")); + + s = "
      test
      "; + assertTrue(!as.scan(s, policy, AntiSamy.DOM).getCleanHTML().contains("char")); + assertTrue(!as.scan(s, policy, AntiSamy.SAX).getCleanHTML().contains("char")); + + s = "
      test
      "; + assertTrue(!as.scan(s, policy, AntiSamy.DOM).getCleanHTML().contains("char")); + assertTrue(!as.scan(s, policy, AntiSamy.SAX).getCleanHTML().contains("char")); + } + + @Test + public void CDATAByPass() throws ScanException, PolicyException { + String malInput = "]]>"; + CleanResults crd = as.scan(malInput, policy, AntiSamy.DOM); + CleanResults crs = as.scan(malInput, policy, AntiSamy.SAX); + String crDom = crd.getCleanHTML(); + String crSax = crs.getCleanHTML(); + + assertTrue(crd.getErrorMessages().size() > 0); + assertTrue(crs.getErrorMessages().size() > 0); + + assertTrue(crSax.contains("<script") && !crDom.contains(""); } + /* + * First, make sure this attack is useless against the + * SAX parser. + */ + as.scan(sb.toString(), policy, AntiSamy.SAX); - private void runProfiledTest(int scanType) throws ScanException, PolicyException { - double totalDomTime; - - warmup(scanType); + /* + * Scan this really deep tree (depth=249, 1 less than the + * max) and make sure it doesn't blow up. + */ - int testReps = 9999; + CleanResults crd = as.scan(sb.toString(), policy, AntiSamy.DOM); - String html = " hey you out there on your own "; + String crDom = crd.getCleanHTML(); + assertTrue(crDom.length() != 0); + /* + * Now push it over the limit to 251 and make sure we blow + * up safely. + */ + sb.append("
      "); // this makes 251 - Double each = 0D; - int repeats = 10; - for (int i = 0; i < repeats; i++) { - totalDomTime = 0; - for (int j = 0; j < testReps; j++) { - totalDomTime += as.scan(html, policy, scanType).getScanTime(); - } - each = each + totalDomTime; - System.out.println("Total " + (scanType == AntiSamy.DOM ? "DOM" : "SAX") + " time 9999 reps short string: " + totalDomTime); - } - System.out.println("Average time: " + (each / repeats)); + try { + as.scan(sb.toString(), policy, AntiSamy.DOM); + fail("DOM depth exceeded max - should've errored"); + } catch (ScanException e) { + // An error is expected. Pass } + } - private void warmup(int scanType) throws ScanException, PolicyException { - int warmupReps = 15000; - - String html = " hey you out there on your own "; + @Test + public void issue107() throws ScanException, PolicyException { + StringBuilder sb = new StringBuilder(); - for (int j = 0; j < warmupReps; j++) { - as.scan(html, policy, scanType).getScanTime(); - } - } - - @Test - public void comparePatternSpeed() throws IOException, ScanException, PolicyException { + /* + * #107 - erroneous newlines appearing? couldn't reproduce this + * error but the test seems worthy of keeping. + */ + String nl = "\n"; - final Pattern invalidXmlCharacters = - Pattern.compile("[\\u0000-\\u001F\\uD800-\\uDFFF\\uFFFE-\\uFFFF&&[^\\u0009\\u000A\\u000D]]"); + String header = "

      Header

      "; + String para = "

      Paragraph

      "; + sb.append(header); + sb.append(nl); + sb.append(para); - int testReps = 10000; + String html = sb.toString(); - String html = " hey you out there on your own "; + String crDom = as.scan(html, policy, AntiSamy.DOM).getCleanHTML(); + String crSax = as.scan(html, policy, AntiSamy.SAX).getCleanHTML(); - String s = null; - //long start = System.currentTimeMillis(); - for (int j = 0; j < testReps; j++) { - s = invalidXmlCharacters.matcher(html).replaceAll(""); - } - //long total = System.currentTimeMillis() - start; + /* Make sure only 1 newline appears */ + assertTrue(crDom.lastIndexOf(nl) == crDom.indexOf(nl)); + assertTrue(crSax.lastIndexOf(nl) == crSax.indexOf(nl)); - //start = System.currentTimeMillis(); - Matcher matcher; - for (int j = 0; j < testReps; j++) { - matcher = invalidXmlCharacters.matcher(html); - if (matcher.matches()) { - s = matcher.replaceAll(""); - } - } - //long total2 = System.currentTimeMillis() - start; + int expectedLoc = header.length(); + int actualLoc = crSax.indexOf(nl); + assertTrue(expectedLoc == actualLoc); - assertNotNull(s); - //System.out.println("replaceAllDirect " + total); - //System.out.println("match then replace: " + total2); - } + actualLoc = crDom.indexOf(nl); + // account for line separator length difference across OSes. + assertTrue(expectedLoc == actualLoc || expectedLoc == actualLoc + 1); + } - @Test - public void testOnsiteRegex() throws ScanException, PolicyException { - assertIsGoodOnsiteURL("foo"); - assertIsGoodOnsiteURL("/foo/bar"); - assertIsGoodOnsiteURL("../../di.cgi?foo&3D~"); - assertIsGoodOnsiteURL("/foo/bar/1/sdf;jsessiond=1f1f12312_123123"); - } - - void assertIsGoodOnsiteURL(String url) throws ScanException, PolicyException { - String html = as.scan("X", policy, AntiSamy.DOM).getCleanHTML(); - assertThat(html, containsString("href=\"")); - } - - @Test - public void issue10() throws ScanException, PolicyException { - assertFalse(as.scan("X", policy, AntiSamy.DOM).getCleanHTML().contains("javascript")); - assertFalse(as.scan("X", policy, AntiSamy.SAX).getCleanHTML().contains("javascript")); - } - - @Test - public void issue147() throws ScanException, PolicyException { - URL url = getClass().getResource("/antisamy-tinymce.xml"); + @Test + public void issue112() throws ScanException, PolicyException { + TestPolicy revised = + policy + .cloneWithDirective(Policy.PRESERVE_COMMENTS, "true") + .cloneWithDirective(Policy.PRESERVE_SPACE, "true") + .cloneWithDirective(Policy.FORMAT_OUTPUT, "false"); - Policy pol = Policy.getInstance(url); - as.scan("
      ", pol, AntiSamy.DOM); - } + /* + * #112 - empty tag becomes self closing + */ - @Test - public void issue75() throws ScanException, PolicyException { - URL url = getClass().getResource("/antisamy-tinymce.xml"); - Policy pol = Policy.getInstance(url); - as.scan("", pol, AntiSamy.DOM); - as.scan("", pol, AntiSamy.SAX); - } + String html = "text text text"; - @Test - public void issue144() throws ScanException, PolicyException { - String pinata = "pi\u00f1ata"; - CleanResults results = as.scan(pinata, policy, AntiSamy.DOM); - String cleanHTML = results.getCleanHTML(); - assertEquals(pinata, cleanHTML); - } + String crDom = as.scan(html, revised, AntiSamy.DOM).getCleanHTML(); + String crSax = as.scan(html, revised, AntiSamy.SAX).getCleanHTML(); - @Test - public void testWhitespaceNotBeingMangled() throws ScanException, PolicyException { - String test = ""; - String expected = ""; - Policy preserveSpace = policy.cloneWithDirective( Policy.PRESERVE_SPACE, "true" ); - CleanResults preserveSpaceResults = as.scan(test, preserveSpace, AntiSamy.SAX); - assertEquals(expected, preserveSpaceResults.getCleanHTML()); - } + assertTrue(!crDom.contains("") && !crDom.contains("")); + assertTrue(!crSax.contains("") && !crSax.contains("")); - @Test - public void testDataTag159() throws ScanException, PolicyException { - /* issue #159 - allow dynamic HTML5 data-* attribute */ - String good = "

      Hello World!

      "; - String bad = "

      Hello World!

      "; - String goodExpected = "

      Hello World!

      "; - String badExpected = "

      Hello World!

      "; - // test good attribute "data-" - CleanResults cr = as.scan(good, policy, AntiSamy.SAX); - String s = cr.getCleanHTML(); - assertEquals(goodExpected, s); - cr = as.scan(good, policy, AntiSamy.DOM); - s = cr.getCleanHTML(); - assertEquals(goodExpected, s); - - // test bad attribute "dat-" - cr = as.scan(bad, policy, AntiSamy.SAX); - s = cr.getCleanHTML(); - assertEquals(badExpected, s); - cr = as.scan(bad, policy, AntiSamy.DOM); - s = cr.getCleanHTML(); - assertEquals(badExpected, s); - } + StringBuilder sb = new StringBuilder(); + sb.append("foobar"); + sb.append(""); - @Test - public void testXSSInAntiSamy151() throws ScanException, PolicyException { - String test = "whatever"; - CleanResults results_sax = as.scan(test, policy, AntiSamy.SAX); - CleanResults results_dom = as.scan(test, policy, AntiSamy.DOM); + html = sb.toString(); - assertEquals(results_sax.getCleanHTML(), results_dom.getCleanHTML()); - assertEquals("whatever", results_dom.getCleanHTML()); - } + crDom = as.scan(html, revised, AntiSamy.DOM).getCleanHTML(); + crSax = as.scan(html, revised, AntiSamy.SAX).getCleanHTML(); - @Test - public void testAnotherXSS() throws ScanException, PolicyException { - String test = "foo"; - CleanResults results_sax = as.scan(test, policy, AntiSamy.SAX); - CleanResults results_dom = as.scan(test, policy, AntiSamy.DOM); + assertTrue(html.equals(crDom)); + assertTrue(html.equals(crSax)); + } - assertEquals( results_sax.getCleanHTML(), results_dom.getCleanHTML()); - assertEquals("foo", results_dom.getCleanHTML()); - } + @Test + public void nestedCdataAttacks() throws ScanException, PolicyException { - @Test - public void testIssue2() throws ScanException, PolicyException { - String test = ""; - assertThat(as.scan(test, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("alert"))); - assertThat(as.scan(test, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("alert"))); - } - /* - * Mailing list user sent this in. Didn't work, but good test to leave in. + * Testing for nested CDATA attacks against the SAX parser. */ - @Test - public void testUnknownTags() throws ScanException, PolicyException { - String test = "<%/onmouseover=prompt(1)>"; - CleanResults saxResults = as.scan(test, policy, AntiSamy.SAX); - CleanResults domResults = as.scan(test, policy, AntiSamy.DOM); - assertThat(saxResults.getCleanHTML(), not(containsString("<%/"))); - assertThat(domResults.getCleanHTML(), not(containsString("<%/"))); - } - - @Test - public void testStreamScan() throws ScanException { - String testImgSrcURL = "whatever" + testImgSrcURL + " onmouseover=\"alert('xss')\">"); - Writer writer = new StringWriter(); - as.scan(reader, writer, policy); - String cleanHtml = writer.toString().trim(); - assertEquals("whatever" + testImgSrcURL + ">", cleanHtml); - } - - @Test - public void testGithubIssue23() throws ScanException, PolicyException { - - // Antisamy Stripping nested lists and tables - String test23 = "
      • one
      • two
      • three
        • a
        • b
      "; - // Issue claims you end up with this: - //
      • one
      • two
      • three
        • a
        • b
        - // Meaning the
      • a
      • b
      • elements were moved outside of the nested
          list they were in - - // The a.replaceAll("\\s","") is used to strip out all the whitespace in the CleanHTML so we can successfully find - // what we expect to find. - assertThat(as.scan(test23, policy, AntiSamy.DOM).getCleanHTML().replaceAll("\\s",""), containsString("
          • a
          • ")); - assertThat(as.scan(test23, policy, AntiSamy.SAX).getCleanHTML().replaceAll("\\s",""), containsString("
            • a
            • ")); - - // However, the test above can't replicate this misbehavior. - } - - // TODO: This issue is a valid enhancement request we plan to implement in the future. - // Commenting out the test case for now so test failures aren't included in a released version of AntiSamy. -/* @Test - public void testGithubIssue24() throws ScanException, PolicyException { - - // if we have onUnknownTag set to encode, it still strips out the @ and everything else after the it - // DOM Parser actually rips out the entire value even with onUnknownTag set - TestPolicy revisedPolicy = policy.cloneWithDirective("onUnknownTag", "encode"); - - String email = "name@mail.com"; - String test24 = "firstname,lastname<" + email + ">"; - assertThat(as.scan(test24, revisedPolicy, AntiSamy.SAX).getCleanHTML(), containsString(email)); - assertThat(as.scan(test24, revisedPolicy, AntiSamy.DOM).getCleanHTML(), containsString(email)); - } -*/ - @Test - public void testGithubIssue26() throws ScanException, PolicyException { - // Potential bypass (False positive) - String test26 = ""><img src=a onerror=alert(1)>"; - // Issue claims you end up with this: - // > - - assertThat(as.scan(test26, policy, AntiSamy.SAX).getCleanHTML(), not(containsString(""))); - assertThat(as.scan(test26, policy, AntiSamy.DOM).getCleanHTML(), not(containsString(""))); - - // But you actually end up with this: "><img src=a onerror=alert(1)> -- Which is as expected - } - - @Test - public void testGithubIssue27() throws ScanException, PolicyException { - // This test doesn't cause an ArrayIndexOutOfBoundsException, as reported in this issue even though it - // replicates the test as described. - String test27 = "my &test"; - assertThat(as.scan(test27, policy, AntiSamy.DOM).getCleanHTML(), containsString("test")); - assertThat(as.scan(test27, policy, AntiSamy.SAX).getCleanHTML(), containsString("test")); - } - -static final String test33 = "\n" - + "\n" - + " Test\n" - + "\n" - + "\n" - + "

              Tricky Encoding

              \n" - + "

              NOT Sanitized by AntiSamy

              \n" - + "
                \n" - + "
              1. X:x
              2. \n" - + "
              3. X:y
              4. \n" - - + "
              5. X:x
              6. \n" - + "
              7. X:y
              8. \n" - - + "
              9. X:x
              10. \n" - + "
              11. X:y
              12. \n" - - + "
              13. X:x
              14. \n" - + "
              15. X:y
              16. \n" - + "
              \n" - + "

              Tricky Encoding with Ampersand Encoding

              \n" - + "

              AntiSamy turns harmless payload into XSS by just decoding the encoded ampersands in the href attribute\n" - + "

                \n" - + "
              1. X&#x3A;x
              2. \n" - + "
              3. X&#x3A;x
              4. \n" - - + "
              5. X&#x3A;x
              6. \n" - + "
              7. X&#x3A;x
              8. \n" - - + "
              9. X&#x3A;x
              10. \n" - + "
              11. X&#x3A;x
              12. \n" - + "
              \n" - + "

              Original without ampersand encoding

              \n" - + "\n" - + ""; - - @Test - public void testGithubIssue33() throws ScanException, PolicyException { - - // Potential bypass - - // Issue claims you end up with this: - // javascript:x=alert and other similar problems (javascript:x=alert,x%281%29) but you don't. - // So issue is a false positive and has been closed. - //System.out.println(as.scan(test33, policy, AntiSamy.SAX).getCleanHTML()); - - assertThat(as.scan(test33, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("javascript:x=alert,x%281%29"))); - assertThat(as.scan(test33, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("javascript:x=alert,x%281%29"))); - } - - // TODO: This issue is a valid enhancement request. We are trying to decide whether to implement in the future. - // Commenting out the test case for now so test failures aren't included in a released version of AntiSamy. -/* - @Test - public void testGithubIssue34a() throws ScanException, PolicyException { - - // bypass stripNonValidXMLCharacters - // Issue indicates: "
              Hello\\uD83D\\uDC95
              " should be sanitized to: "
              Hello
              " - - String test34a = "
              Hello\uD83D\uDC95
              "; - assertEquals("
              Hello
              ", as.scan(test34a, policy, AntiSamy.SAX).getCleanHTML()); - assertEquals("
              Hello
              ", as.scan(test34a, policy, AntiSamy.DOM).getCleanHTML()); - } - @Test - public void testGithubIssue34b() throws ScanException, PolicyException { - - // bypass stripNonValidXMLCharacters - // Issue indicates: "
              Hello\\uD83D\\uDC95
              " should be sanitized to: "
              Hello
              " - - String test34b = "\uD888"; - assertEquals("", as.scan(test34b, policy, AntiSamy.DOM).getCleanHTML()); - assertEquals("", as.scan(test34b, policy, AntiSamy.SAX).getCleanHTML()); - } -*/ + String html = "]]>>]]>"; + String crDom = as.scan(html, policy, AntiSamy.DOM).getCleanHTML(); + String crSax = as.scan(html, policy, AntiSamy.SAX).getCleanHTML(); + assertTrue(!crDom.contains("", pol, AntiSamy.DOM); + as.scan("", pol, AntiSamy.SAX); + } + + @Test + public void issue144() throws ScanException, PolicyException { + String pinata = "pi\u00f1ata"; + CleanResults results = as.scan(pinata, policy, AntiSamy.DOM); + String cleanHTML = results.getCleanHTML(); + assertEquals(pinata, cleanHTML); + } + + @Test + public void testWhitespaceNotBeingMangled() throws ScanException, PolicyException { + String test = ""; + String expected = + ""; + Policy preserveSpace = policy.cloneWithDirective(Policy.PRESERVE_SPACE, "true"); + CleanResults preserveSpaceResults = as.scan(test, preserveSpace, AntiSamy.SAX); + assertEquals(expected, preserveSpaceResults.getCleanHTML()); + } + + @Test + public void testDataTag159() throws ScanException, PolicyException { + /* issue #159 - allow dynamic HTML5 data-* attribute */ + String good = "

              Hello World!

              "; + String bad = "

              Hello World!

              "; + String goodExpected = "

              Hello World!

              "; + String badExpected = "

              Hello World!

              "; + // test good attribute "data-" + CleanResults cr = as.scan(good, policy, AntiSamy.SAX); + String s = cr.getCleanHTML(); + assertEquals(goodExpected, s); + cr = as.scan(good, policy, AntiSamy.DOM); + s = cr.getCleanHTML(); + assertEquals(goodExpected, s); + + // test bad attribute "dat-" + cr = as.scan(bad, policy, AntiSamy.SAX); + s = cr.getCleanHTML(); + assertEquals(badExpected, s); + cr = as.scan(bad, policy, AntiSamy.DOM); + s = cr.getCleanHTML(); + assertEquals(badExpected, s); + } + + @Test + public void testXSSInAntiSamy151() throws ScanException, PolicyException { + String test = + "whatever"; + CleanResults results_sax = as.scan(test, policy, AntiSamy.SAX); + CleanResults results_dom = as.scan(test, policy, AntiSamy.DOM); + + assertEquals(results_sax.getCleanHTML(), results_dom.getCleanHTML()); + assertEquals( + "whatever", + results_dom.getCleanHTML()); + } + + @Test + public void testAnotherXSS() throws ScanException, PolicyException { + String test = "foo"; + CleanResults results_sax = as.scan(test, policy, AntiSamy.SAX); + CleanResults results_dom = as.scan(test, policy, AntiSamy.DOM); + + assertEquals(results_sax.getCleanHTML(), results_dom.getCleanHTML()); + assertEquals( + "foo", results_dom.getCleanHTML()); + } + + @Test + public void testIssue2() throws ScanException, PolicyException { + String test = ""; + assertThat(as.scan(test, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("alert"))); + assertThat(as.scan(test, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("alert"))); + } + + /* + * Mailing list user sent this in. Didn't work, but good test to leave in. + */ + @Test + public void testUnknownTags() throws ScanException, PolicyException { + String test = "<%/onmouseover=prompt(1)>"; + CleanResults saxResults = as.scan(test, policy, AntiSamy.SAX); + CleanResults domResults = as.scan(test, policy, AntiSamy.DOM); + assertThat(saxResults.getCleanHTML(), not(containsString("<%/"))); + assertThat(domResults.getCleanHTML(), not(containsString("<%/"))); + } + + @Test + public void testStreamScan() throws ScanException { + String testImgSrcURL = + "whatever" + testImgSrcURL + " onmouseover=\"alert('xss')\">"); + Writer writer = new StringWriter(); + as.scan(reader, writer, policy); + String cleanHtml = writer.toString().trim(); + assertEquals("whatever" + testImgSrcURL + ">", cleanHtml); + } + + @Test + public void testGithubIssue23() throws ScanException, PolicyException { + + // Antisamy Stripping nested lists and tables + String test23 = "
              • one
              • two
              • three
                • a
                • b
              "; + // Issue claims you end up with this: + //
              • one
              • two
              • three
                • a
                • b
                + // Meaning the
              • a
              • b
              • elements were moved outside of the nested
                  list they + // were in + + // The a.replaceAll("\\s","") is used to strip out all the whitespace in the CleanHTML so we + // can successfully find + // what we expect to find. + assertThat( + as.scan(test23, policy, AntiSamy.DOM).getCleanHTML().replaceAll("\\s", ""), + containsString("
                  • a
                  • ")); + assertThat( + as.scan(test23, policy, AntiSamy.SAX).getCleanHTML().replaceAll("\\s", ""), + containsString("
                    • a
                    • ")); + + // However, the test above can't replicate this misbehavior. + } + + // TODO: This issue is a valid enhancement request we plan to implement in the future. + // Commenting out the test case for now so test failures aren't included in a released version + // of AntiSamy. + /* @Test + public void testGithubIssue24() throws ScanException, PolicyException { + + // if we have onUnknownTag set to encode, it still strips out the @ and everything else after the it + // DOM Parser actually rips out the entire value even with onUnknownTag set + TestPolicy revisedPolicy = policy.cloneWithDirective("onUnknownTag", "encode"); + + String email = "name@mail.com"; + String test24 = "firstname,lastname<" + email + ">"; + assertThat(as.scan(test24, revisedPolicy, AntiSamy.SAX).getCleanHTML(), containsString(email)); + assertThat(as.scan(test24, revisedPolicy, AntiSamy.DOM).getCleanHTML(), containsString(email)); + } + */ + @Test + public void testGithubIssue26() throws ScanException, PolicyException { + // Potential bypass (False positive) + String test26 = + ""><img src=a onerror=alert(1)>"; + // Issue claims you end up with this: + // > + + assertThat( + as.scan(test26, policy, AntiSamy.SAX).getCleanHTML(), + not(containsString(""))); + assertThat( + as.scan(test26, policy, AntiSamy.DOM).getCleanHTML(), + not(containsString(""))); + + // But you actually end up with this: "><img src=a onerror=alert(1)> -- Which + // is as expected + } + + @Test + public void testGithubIssue27() throws ScanException, PolicyException { + // This test doesn't cause an ArrayIndexOutOfBoundsException, as reported in this issue even + // though it + // replicates the test as described. + String test27 = "my &test"; + assertThat(as.scan(test27, policy, AntiSamy.DOM).getCleanHTML(), containsString("test")); + assertThat(as.scan(test27, policy, AntiSamy.SAX).getCleanHTML(), containsString("test")); + } + + static final String test33 = + "\n" + + "\n" + + " Test\n" + + "\n" + + "\n" + + "

                      Tricky Encoding

                      \n" + + "

                      NOT Sanitized by AntiSamy

                      \n" + + "
                        \n" + + "
                      1. X:x
                      2. \n" + + "
                      3. X:y
                      4. \n" + + "
                      5. X:x
                      6. \n" + + "
                      7. X:y
                      8. \n" + + "
                      9. X:x
                      10. \n" + + "
                      11. X:y
                      12. \n" + + "
                      13. X:x
                      14. \n" + + "
                      15. X:y
                      16. \n" + + "
                      \n" + + "

                      Tricky Encoding with Ampersand Encoding

                      \n" + + "

                      AntiSamy turns harmless payload into XSS by just decoding the encoded ampersands in the href attribute\n" + + "

                        \n" + + "
                      1. X&#x3A;x
                      2. \n" + + "
                      3. X&#x3A;x
                      4. \n" + + "
                      5. X&#x3A;x
                      6. \n" + + "
                      7. X&#x3A;x
                      8. \n" + + "
                      9. X&#x3A;x
                      10. \n" + + "
                      11. X&#x3A;x
                      12. \n" + + "
                      \n" + + "

                      Original without ampersand encoding

                      \n" + + "\n" + + ""; - static final String test40 = "\n" + @Test + public void testGithubIssue33() throws ScanException, PolicyException { + + // Potential bypass + + // Issue claims you end up with this: + // javascript:x=alert and other similar problems (javascript:x=alert,x%281%29) but + // you don't. + // So issue is a false positive and has been closed. + // System.out.println(as.scan(test33, policy, AntiSamy.SAX).getCleanHTML()); + + assertThat( + as.scan(test33, policy, AntiSamy.SAX).getCleanHTML(), + not(containsString("javascript:x=alert,x%281%29"))); + assertThat( + as.scan(test33, policy, AntiSamy.DOM).getCleanHTML(), + not(containsString("javascript:x=alert,x%281%29"))); + } + + // TODO: This issue is a valid enhancement request. We are trying to decide whether to implement + // in the future. + // Commenting out the test case for now so test failures aren't included in a released version + // of AntiSamy. + /* + @Test + public void testGithubIssue34a() throws ScanException, PolicyException { + + // bypass stripNonValidXMLCharacters + // Issue indicates: "
                      Hello\\uD83D\\uDC95
                      " should be sanitized to: "
                      Hello
                      " + + String test34a = "
                      Hello\uD83D\uDC95
                      "; + assertEquals("
                      Hello
                      ", as.scan(test34a, policy, AntiSamy.SAX).getCleanHTML()); + assertEquals("
                      Hello
                      ", as.scan(test34a, policy, AntiSamy.DOM).getCleanHTML()); + } + + @Test + public void testGithubIssue34b() throws ScanException, PolicyException { + + // bypass stripNonValidXMLCharacters + // Issue indicates: "
                      Hello\\uD83D\\uDC95
                      " should be sanitized to: "
                      Hello
                      " + + String test34b = "\uD888"; + assertEquals("", as.scan(test34b, policy, AntiSamy.DOM).getCleanHTML()); + assertEquals("", as.scan(test34b, policy, AntiSamy.SAX).getCleanHTML()); + } + */ + + static final String test40 = + "\n" + "\n" + " Test\n" + "\n" @@ -1417,312 +2019,561 @@ public void testGithubIssue34b() throws ScanException, PolicyException { + "\n" + ""; - @Test - public void testGithubIssue40() throws ScanException, PolicyException { - - // Concern is that: " - + "You must click me"; - - // Output: You must click me - - assertThat(as.scan(phishingAttempt, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("//evilactor.com/"))); - assertThat(as.scan(phishingAttempt, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("//evilactor.com/"))); - - // This ones never failed, they're just to prove a dangling markup attack on the following resulting HTML won't work. - // Less probable case (steal more tags): - final String danglingMarkup = "
                      User input: " + - ""; - - assertThat(as.scan(danglingMarkup, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("//evilactor.com/"))); - assertThat(as.scan(danglingMarkup, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("//evilactor.com/"))); - - // More probable case (steal just an attribute): - // HTML before attack: - final String danglingMarkup2 = "
                      User input: " + - ""; - - assertThat(as.scan(danglingMarkup2, policy, AntiSamy.SAX).getCleanHTML(), not(containsString("//evilactor.com/"))); - assertThat(as.scan(danglingMarkup2, policy, AntiSamy.DOM).getCleanHTML(), not(containsString("//evilactor.com/"))); - } - - @Test - public void testGithubIssue62() { - // Concern is that when a processing instruction is at the root level, node removal gets messy and Null pointer exception arises. - // More test cases are added for PI removal. - - try{ - assertThat(as.scan("|||
                      ")); - assertThat(as.scan("
                      ||
                      ")); - - assertThat(as.scan("
                      ", policy, AntiSamy.DOM) - .getCleanHTML(), not(containsString("
                      ", policy, AntiSamy.SAX) - .getCleanHTML(), not(containsString("", policy, AntiSamy.DOM).getCleanHTML(), is("")); - assertThat(as.scan("", policy, AntiSamy.SAX).getCleanHTML(), is("")); - - } catch (Exception exc) { - fail(exc.getMessage()); - } - } - - @Test - public void testGithubIssue81() throws ScanException, PolicyException { - // Concern is that "!important" is missing after processing CSS - assertThat(as.scan("

                      Some Text

                      ", policy, AntiSamy.DOM).getCleanHTML(), containsString("!important")); - assertThat(as.scan("

                      Some Text

                      ", policy, AntiSamy.SAX).getCleanHTML(), containsString("!important")); - - // Just to check scan keeps working accordingly without "!important" - assertThat(as.scan("

                      Some Text

                      ", policy, AntiSamy.DOM).getCleanHTML(), not(containsString("!important"))); - assertThat(as.scan("

                      Some Text

                      ", policy, AntiSamy.SAX).getCleanHTML(), not(containsString("!important"))); - } - - @Test - public void entityReferenceEncodedInHtmlAttribute() throws ScanException, PolicyException { - // Concern is that "&" is not being encoded and "#00058" was not being interpreted as ":" - // so the validations based on regexp passed and a browser would load "&:" together. - - // UPDATE: Using a new HTML parser library starts decoding entities like #00058 - assertThat(as.scan("

                      xss

                      ", policy, AntiSamy.DOM).getCleanHTML(), - not(containsString("javascript"))); - assertThat(as.scan("

                      xss

                      ", policy, AntiSamy.SAX).getCleanHTML(), - not(containsString("javascript"))); - } - - @Test - public void testGithubIssue99() throws ScanException, PolicyException { - // Test that the IANA subtags is not lost - assertThat(as.scan("

                      This paragraph is defined as British English.

                      ", policy, AntiSamy.DOM).getCleanHTML(), containsString("lang=\"en-GB\"")); - assertThat(as.scan("

                      This paragraph is defined as British English.

                      ", policy, AntiSamy.SAX).getCleanHTML(), containsString("lang=\"en-GB\"")); - } - - @Test - public void testGithubIssue101() throws ScanException, PolicyException { - // Test that margin attribute is not removed when value has too much significant figures. - // Current behavior is that decimals like 0.0001 are internally translated to 1.0E-4, this - // is reflected on regex validation and actual output. The inconsistency is due to Batik CSS. - assertThat(as.scan("

                      Some text.

                      ", policy, AntiSamy.DOM).getCleanHTML(), containsString("margin")); - assertThat(as.scan("

                      Some text.

                      ", policy, AntiSamy.SAX).getCleanHTML(), containsString("margin")); - assertThat(as.scan("

                      Some text.

                      ", policy, AntiSamy.DOM).getCleanHTML(), containsString("margin")); - assertThat(as.scan("

                      Some text.

                      ", policy, AntiSamy.SAX).getCleanHTML(), containsString("margin")); - assertThat(as.scan("

                      Some text.

                      ", policy, AntiSamy.DOM).getCleanHTML(), containsString("margin")); - assertThat(as.scan("

                      Some text.

                      ", policy, AntiSamy.SAX).getCleanHTML(), containsString("margin")); - // When using exponential directly the "e" or "E" is internally considered as the start of - // the dimension/unit type. This creates inconsistencies that make the regex validation fail, - // also in cases like 1e4pt where "e" is considered as dimension instead of "pt". - assertThat(as.scan("

                      Some text.

                      ", policy, AntiSamy.DOM).getCleanHTML(), not(containsString("margin"))); - assertThat(as.scan("

                      Some text.

                      ", policy, AntiSamy.SAX).getCleanHTML(), not(containsString("margin"))); - } - - @Test - public void testCSSUnits() throws ScanException, PolicyException { - String input = "
                      \n" + - "\t

                      Some text.

                      \n" + - "
                      "; - CleanResults cr = as.scan(input, policy, AntiSamy.DOM); - assertThat(cr.getCleanHTML(), containsString("ex")); - assertThat(cr.getCleanHTML(), containsString("px")); - assertThat(cr.getCleanHTML(), containsString("rem")); - assertThat(cr.getCleanHTML(), containsString("vw")); - assertThat(cr.getCleanHTML(), containsString("vh")); - assertThat(cr.getCleanHTML(), not(containsString("rpc"))); - cr = as.scan(input, policy, AntiSamy.SAX); - assertThat(cr.getCleanHTML(), containsString("ex")); - assertThat(cr.getCleanHTML(), containsString("px")); - assertThat(cr.getCleanHTML(), containsString("rem")); - assertThat(cr.getCleanHTML(), containsString("vw")); - assertThat(cr.getCleanHTML(), containsString("vh")); - assertThat(cr.getCleanHTML(), not(containsString("rpc"))); - } - - @Test - public void testXSSInsideSelectOptionStyle() throws ScanException, PolicyException { - // Tests for CVE-2021-42575, XSS nested into ", policy, AntiSamy.DOM).getCleanHTML(), containsString("black")); - assertThat(as.scan("", policy, AntiSamy.SAX).getCleanHTML(), containsString("black")); - // Unsafe case - assertThat(as.scan("", policy, AntiSamy.DOM).getCleanHTML(), not(containsString("", policy, AntiSamy.SAX).getCleanHTML(), not(containsString("", policy, AntiSamy.DOM).getCleanHTML(), not(containsString("script"))); - assertThat(as.scan("Walert(1)", policy, AntiSamy.SAX).getCleanHTML(), not(containsString("script"))); - assertThat(as.scan("kinput/onfocus=alert(1)>", policy, AntiSamy.DOM).getCleanHTML(), not(containsString("input"))); - assertThat(as.scan("kinput/onfocus=alert(1)>", policy, AntiSamy.SAX).getCleanHTML(), not(containsString("input"))); - } - - @Test(timeout = 4000) - public void testMalformedPIScan() { - // Certain malformed input including a malformed processing instruction may lead the parser to an internal memory error. - // Does not matter if it is DOM or SAX scan, the problem was internally the same on HTML parser. - try { - as.scan("" + + "You must click me"; + + // Output: You must click + // me + + assertThat( + as.scan(phishingAttempt, policy, AntiSamy.SAX).getCleanHTML(), + not(containsString("//evilactor.com/"))); + assertThat( + as.scan(phishingAttempt, policy, AntiSamy.DOM).getCleanHTML(), + not(containsString("//evilactor.com/"))); + + // This ones never failed, they're just to prove a dangling markup attack on the following + // resulting HTML won't work. + // Less probable case (steal more tags): + final String danglingMarkup = + "
                      User input: " + + ""; + + assertThat( + as.scan(danglingMarkup, policy, AntiSamy.SAX).getCleanHTML(), + not(containsString("//evilactor.com/"))); + assertThat( + as.scan(danglingMarkup, policy, AntiSamy.DOM).getCleanHTML(), + not(containsString("//evilactor.com/"))); + + // More probable case (steal just an attribute): + // HTML before attack: + final String danglingMarkup2 = + "
                      User input: " + + ""; + + assertThat( + as.scan(danglingMarkup2, policy, AntiSamy.SAX).getCleanHTML(), + not(containsString("//evilactor.com/"))); + assertThat( + as.scan(danglingMarkup2, policy, AntiSamy.DOM).getCleanHTML(), + not(containsString("//evilactor.com/"))); + } + + @Test + public void testGithubIssue62() { + // Concern is that when a processing instruction is at the root level, node removal gets + // messy and Null pointer exception arises. + // More test cases are added for PI removal. + + try { + assertThat(as.scan("|||
                      ")); + assertThat( + as.scan("
                      ||
                      ")); + + assertThat( + as.scan( + "
                      ", + policy, + AntiSamy.DOM) + .getCleanHTML(), + not(containsString("
                      ", + policy, + AntiSamy.SAX) + .getCleanHTML(), + not(containsString("", policy, AntiSamy.DOM) + .getCleanHTML(), + is("")); + assertThat( + as.scan("", policy, AntiSamy.SAX) + .getCleanHTML(), + is("")); + + } catch (Exception exc) { + fail(exc.getMessage()); + } + } + + @Test + public void testGithubIssue81() throws ScanException, PolicyException { + // Concern is that "!important" is missing after processing CSS + assertThat( + as.scan("

                      Some Text

                      ", policy, AntiSamy.DOM) + .getCleanHTML(), + containsString("!important")); + assertThat( + as.scan("

                      Some Text

                      ", policy, AntiSamy.SAX) + .getCleanHTML(), + containsString("!important")); + + // Just to check scan keeps working accordingly without "!important" + assertThat( + as.scan("

                      Some Text

                      ", policy, AntiSamy.DOM).getCleanHTML(), + not(containsString("!important"))); + assertThat( + as.scan("

                      Some Text

                      ", policy, AntiSamy.SAX).getCleanHTML(), + not(containsString("!important"))); + } + + @Test + public void entityReferenceEncodedInHtmlAttribute() throws ScanException, PolicyException { + // Concern is that "&" is not being encoded and "#00058" was not being interpreted as ":" + // so the validations based on regexp passed and a browser would load "&:" together. + + // UPDATE: Using a new HTML parser library starts decoding entities like #00058 + assertThat( + as.scan( + "

                      xss

                      ", + policy, AntiSamy.DOM) + .getCleanHTML(), + not(containsString("javascript"))); + assertThat( + as.scan( + "

                      xss

                      ", + policy, AntiSamy.SAX) + .getCleanHTML(), + not(containsString("javascript"))); + } + + @Test + public void testGithubIssue99() throws ScanException, PolicyException { + // Test that the IANA subtags is not lost + assertThat( + as.scan( + "

                      This paragraph is defined as British English.

                      ", + policy, + AntiSamy.DOM) + .getCleanHTML(), + containsString("lang=\"en-GB\"")); + assertThat( + as.scan( + "

                      This paragraph is defined as British English.

                      ", + policy, + AntiSamy.SAX) + .getCleanHTML(), + containsString("lang=\"en-GB\"")); + } + + @Test + public void testGithubIssue101() throws ScanException, PolicyException { + // Test that margin attribute is not removed when value has too much significant figures. + // Current behavior is that decimals like 0.0001 are internally translated to 1.0E-4, this + // is reflected on regex validation and actual output. The inconsistency is due to Batik + // CSS. + assertThat( + as.scan("

                      Some text.

                      ", policy, AntiSamy.DOM) + .getCleanHTML(), + containsString("margin")); + assertThat( + as.scan("

                      Some text.

                      ", policy, AntiSamy.SAX) + .getCleanHTML(), + containsString("margin")); + assertThat( + as.scan("

                      Some text.

                      ", policy, AntiSamy.DOM) + .getCleanHTML(), + containsString("margin")); + assertThat( + as.scan("

                      Some text.

                      ", policy, AntiSamy.SAX) + .getCleanHTML(), + containsString("margin")); + assertThat( + as.scan("

                      Some text.

                      ", policy, AntiSamy.DOM) + .getCleanHTML(), + containsString("margin")); + assertThat( + as.scan("

                      Some text.

                      ", policy, AntiSamy.SAX) + .getCleanHTML(), + containsString("margin")); + // When using exponential directly the "e" or "E" is internally considered as the start of + // the dimension/unit type. This creates inconsistencies that make the regex validation + // fail, + // also in cases like 1e4pt where "e" is considered as dimension instead of "pt". + assertThat( + as.scan("

                      Some text.

                      ", policy, AntiSamy.DOM) + .getCleanHTML(), + not(containsString("margin"))); + assertThat( + as.scan("

                      Some text.

                      ", policy, AntiSamy.SAX) + .getCleanHTML(), + not(containsString("margin"))); + } + + @Test + public void testCSSUnits() throws ScanException, PolicyException { + String input = + "
                      \n" + + "\t

                      Some text.

                      \n" + + "
                      "; + CleanResults cr = as.scan(input, policy, AntiSamy.DOM); + assertThat(cr.getCleanHTML(), containsString("ex")); + assertThat(cr.getCleanHTML(), containsString("px")); + assertThat(cr.getCleanHTML(), containsString("rem")); + assertThat(cr.getCleanHTML(), containsString("vw")); + assertThat(cr.getCleanHTML(), containsString("vh")); + assertThat(cr.getCleanHTML(), not(containsString("rpc"))); + cr = as.scan(input, policy, AntiSamy.SAX); + assertThat(cr.getCleanHTML(), containsString("ex")); + assertThat(cr.getCleanHTML(), containsString("px")); + assertThat(cr.getCleanHTML(), containsString("rem")); + assertThat(cr.getCleanHTML(), containsString("vw")); + assertThat(cr.getCleanHTML(), containsString("vh")); + assertThat(cr.getCleanHTML(), not(containsString("rpc"))); + } + + @Test + public void testXSSInsideSelectOptionStyle() throws ScanException, PolicyException { + // Tests for CVE-2021-42575, XSS nested into ", + policy, + AntiSamy.DOM) + .getCleanHTML(), + containsString("black")); + assertThat( + as.scan( + "", + policy, + AntiSamy.SAX) + .getCleanHTML(), + containsString("black")); + // Unsafe case + assertThat( + as.scan( + "", + policy, + AntiSamy.DOM) + .getCleanHTML(), + not(containsString("", + policy, + AntiSamy.SAX) + .getCleanHTML(), + not(containsString("", policy, AntiSamy.DOM) + .getCleanHTML(), + not(containsString("script"))); + assertThat( + as.scan("Walert(1)", policy, AntiSamy.SAX) + .getCleanHTML(), + not(containsString("script"))); + assertThat( + as.scan("kinput/onfocus=alert(1)>", policy, AntiSamy.DOM) + .getCleanHTML(), + not(containsString("input"))); + assertThat( + as.scan("kinput/onfocus=alert(1)>", policy, AntiSamy.SAX) + .getCleanHTML(), + not(containsString("input"))); + } + + @Test(timeout = 4000) + public void testMalformedPIScan() { + // Certain malformed input including a malformed processing instruction may lead the parser + // to an internal memory error. + // Does not matter if it is DOM or SAX scan, the problem was internally the same on HTML + // parser. + try { + as.scan("html
          "; + public void testSAXGoodResult() throws Exception { + // System.out.println("Policy: " + policy); - CleanResults cleanResults = new AntiSamy(policy).scan(html, AntiSamy.SAX); - //System.out.println("SAX cleanResults: " + cleanResults.getCleanHTML()); - //System.out.println("SAX cleanResults error messages: " + cleanResults.getErrorMessages().size()); + // good + String html = "
          html
          "; - for (String msg : cleanResults.getErrorMessages()) { - System.out.println("error msg: " + msg); - } + CleanResults cleanResults = new AntiSamy(policy).scan(html, AntiSamy.SAX); + // System.out.println("SAX cleanResults: " + cleanResults.getCleanHTML()); + // System.out.println("SAX cleanResults error messages: " + + // cleanResults.getErrorMessages().size()); - assertTrue(cleanResults.getErrorMessages().isEmpty()); - } + for (String msg : cleanResults.getErrorMessages()) { + System.out.println("error msg: " + msg); + } - public void testSAXBadResult() throws Exception { - //System.out.println("Policy: " + policy); + assertTrue(cleanResults.getErrorMessages().isEmpty()); + } - // AntiSamy should complain about the attribute value "foo" ... but it is not - String badHtml = "
          badhtml
          "; + public void testSAXBadResult() throws Exception { + // System.out.println("Policy: " + policy); - CleanResults cleanResults2 = new AntiSamy(policy).scan(badHtml, AntiSamy.SAX); + // AntiSamy should complain about the attribute value "foo" ... but it is not + String badHtml = "
          badhtml
          "; - //System.out.println("SAX cleanResults2: " + cleanResults2.getCleanHTML()); - //System.out.println("SAX cleanResults2 error messages: " + cleanResults2.getErrorMessages().size()); - /* for (String msg : cleanResults2.getErrorMessages()) { - System.out.println("error msg: " + msg); - } */ - assertTrue(cleanResults2.getErrorMessages().size() > 0); - } + CleanResults cleanResults2 = new AntiSamy(policy).scan(badHtml, AntiSamy.SAX); + + // System.out.println("SAX cleanResults2: " + cleanResults2.getCleanHTML()); + // System.out.println("SAX cleanResults2 error messages: " + + // cleanResults2.getErrorMessages().size()); + /* for (String msg : cleanResults2.getErrorMessages()) { + System.out.println("error msg: " + msg); + } */ + assertTrue(cleanResults2.getErrorMessages().size() > 0); + } - public void testDOMGoodResult() throws Exception { - //System.out.println("Policy: " + policy); + public void testDOMGoodResult() throws Exception { + // System.out.println("Policy: " + policy); - // good - String html = "
          html
          "; + // good + String html = "
          html
          "; - CleanResults cleanResults = new AntiSamy(policy).scan(html, AntiSamy.DOM); - //System.out.println("DOM cleanResults error messages: " + cleanResults.getErrorMessages().size()); - for (String msg : cleanResults.getErrorMessages()) { - System.out.println("error msg: " + msg); - } + CleanResults cleanResults = new AntiSamy(policy).scan(html, AntiSamy.DOM); + // System.out.println("DOM cleanResults error messages: " + + // cleanResults.getErrorMessages().size()); + for (String msg : cleanResults.getErrorMessages()) { + System.out.println("error msg: " + msg); + } - assertTrue(cleanResults.getErrorMessages().isEmpty()); - } + assertTrue(cleanResults.getErrorMessages().isEmpty()); + } - public void testDOMBadResult() throws Exception { - //System.out.println("Policy: " + policy); + public void testDOMBadResult() throws Exception { + // System.out.println("Policy: " + policy); - // AntiSamy should complain about the attribute value "foo" ... but it is not - String badHtml = "
          badhtml
          "; + // AntiSamy should complain about the attribute value "foo" ... but it is not + String badHtml = "
          badhtml
          "; - CleanResults cleanResults2 = new AntiSamy(policy).scan(badHtml, AntiSamy.DOM); + CleanResults cleanResults2 = new AntiSamy(policy).scan(badHtml, AntiSamy.DOM); - //System.out.println("DOM cleanResults2 error messages: " + cleanResults2.getErrorMessages().size()); - /* for (String msg : cleanResults2.getErrorMessages()) { - System.out.println("error msg: " + msg); - } */ - assertTrue(cleanResults2.getErrorMessages().size() > 0); - } + // System.out.println("DOM cleanResults2 error messages: " + + // cleanResults2.getErrorMessages().size()); + /* for (String msg : cleanResults2.getErrorMessages()) { + System.out.println("error msg: " + msg); + } */ + assertTrue(cleanResults2.getErrorMessages().size() > 0); + } } diff --git a/src/test/java/org/owasp/validator/html/test/PolicyTest.java b/src/test/java/org/owasp/validator/html/test/PolicyTest.java index 83a1cebf..a43f828f 100644 --- a/src/test/java/org/owasp/validator/html/test/PolicyTest.java +++ b/src/test/java/org/owasp/validator/html/test/PolicyTest.java @@ -31,299 +31,339 @@ import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.notNullValue; import static org.hamcrest.MatcherAssert.assertThat; - import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import java.io.ByteArrayInputStream; +import java.lang.reflect.Method; +import java.net.URL; import org.junit.Test; - import org.owasp.validator.html.AntiSamy; import org.owasp.validator.html.Policy; import org.owasp.validator.html.PolicyException; import org.owasp.validator.html.TagMatcher; import org.owasp.validator.html.scan.Constants; -import java.io.ByteArrayInputStream; -import java.lang.reflect.Method; -import java.net.URL; - /** * This class tests the Policy functionality to show that we can successfully parse the policy file. */ public class PolicyTest { - private Policy policy; - - private static final String HEADER = "\n" + - "\n"; - private static final String DIRECTIVES = "\n\n"; - private static final String COMMON_ATTRIBUTES = "\n\n"; - private static final String GLOBAL_TAG_ATTRIBUTES = "\n\n"; - private static final String DYNAMIC_TAG_ATTRIBUTES = "\n\n"; - private static final String TAG_RULES = "\n"; - private static final String CSS_RULES = "\n\n"; - private static final String COMMON_REGEXPS = "\n"; - private static final String FOOTER = ""; - - // Returns a valid policy file with the specified allowedEmptyTags - private String assembleFile(String finalTagsSection) { - return HEADER + DIRECTIVES + COMMON_REGEXPS + COMMON_ATTRIBUTES + GLOBAL_TAG_ATTRIBUTES + DYNAMIC_TAG_ATTRIBUTES + TAG_RULES + CSS_RULES + - finalTagsSection + FOOTER; - } - - @Test - public void testGetAllowedEmptyTags() throws PolicyException { - String allowedEmptyTagsSection = "\n" + - " \n" + - " \n" + - " \n" + - " \n" + - "\n"; - String policyFile = assembleFile(allowedEmptyTagsSection); - - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - - TagMatcher actualTags = policy.getAllowedEmptyTags(); - - assertTrue(actualTags.matches("td")); - assertTrue(actualTags.matches("span")); - } - - @Test - public void testGetAllowedEmptyTags_emptyList() throws PolicyException { - String allowedEmptyTagsSection = "\n" + - " \n" + - " \n" + - "\n"; - String policyFile = assembleFile(allowedEmptyTagsSection); - - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - - assertEquals(0, policy.getAllowedEmptyTags().size()); - } - - @Test - public void testGetAllowedEmptyTags_emptySection() throws PolicyException { - String allowedEmptyTagsSection = "\n" + "\n"; - String policyFile = assembleFile(allowedEmptyTagsSection); - - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - - assertEquals(0, policy.getAllowedEmptyTags().size()); - } - - @Test - public void testGetAllowedEmptyTags_NoSection() throws PolicyException { - String allowedEmptyTagsSection = ""; - String policyFile = assembleFile(allowedEmptyTagsSection); - - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - - assertTrue(policy.getAllowedEmptyTags().size() == Constants.defaultAllowedEmptyTags.size()); + private Policy policy; + + private static final String HEADER = + "\n" + + "\n"; + private static final String DIRECTIVES = "\n\n"; + private static final String COMMON_ATTRIBUTES = "\n\n"; + private static final String GLOBAL_TAG_ATTRIBUTES = + "\n\n"; + private static final String DYNAMIC_TAG_ATTRIBUTES = + "\n\n"; + private static final String TAG_RULES = "\n"; + private static final String CSS_RULES = "\n\n"; + private static final String COMMON_REGEXPS = "\n"; + private static final String FOOTER = ""; + + // Returns a valid policy file with the specified allowedEmptyTags + private String assembleFile(String finalTagsSection) { + return HEADER + + DIRECTIVES + + COMMON_REGEXPS + + COMMON_ATTRIBUTES + + GLOBAL_TAG_ATTRIBUTES + + DYNAMIC_TAG_ATTRIBUTES + + TAG_RULES + + CSS_RULES + + finalTagsSection + + FOOTER; + } + + @Test + public void testGetAllowedEmptyTags() throws PolicyException { + String allowedEmptyTagsSection = + "\n" + + " \n" + + " \n" + + " \n" + + " \n" + + "\n"; + String policyFile = assembleFile(allowedEmptyTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + TagMatcher actualTags = policy.getAllowedEmptyTags(); + + assertTrue(actualTags.matches("td")); + assertTrue(actualTags.matches("span")); + } + + @Test + public void testGetAllowedEmptyTags_emptyList() throws PolicyException { + String allowedEmptyTagsSection = + "\n" + + " \n" + + " \n" + + "\n"; + String policyFile = assembleFile(allowedEmptyTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + assertEquals(0, policy.getAllowedEmptyTags().size()); + } + + @Test + public void testGetAllowedEmptyTags_emptySection() throws PolicyException { + String allowedEmptyTagsSection = "\n" + "\n"; + String policyFile = assembleFile(allowedEmptyTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + assertEquals(0, policy.getAllowedEmptyTags().size()); + } + + @Test + public void testGetAllowedEmptyTags_NoSection() throws PolicyException { + String allowedEmptyTagsSection = ""; + String policyFile = assembleFile(allowedEmptyTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + assertTrue(policy.getAllowedEmptyTags().size() == Constants.defaultAllowedEmptyTags.size()); + } + + @Test + public void testGetRequireClosingTags() throws PolicyException { + String requireClosingTagsSection = + "\n" + + " \n" + + " \n" + + " \n" + + " \n" + + "\n"; + String policyFile = assembleFile(requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + TagMatcher actualTags = policy.getRequiresClosingTags(); + + assertTrue(actualTags.matches("td")); + assertTrue(actualTags.matches("span")); + } + + @Test + public void testGetRequireClosingTags_emptyList() throws PolicyException { + String requireClosingTagsSection = + "\n" + + " \n" + + " \n" + + "\n"; + String policyFile = assembleFile(requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + assertEquals(0, policy.getRequiresClosingTags().size()); + } + + @Test + public void testGetRequireClosingTags_emptySection() throws PolicyException { + String requireClosingTagsSection = "\n" + "\n"; + String policyFile = assembleFile(requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + assertEquals(0, policy.getRequiresClosingTags().size()); + } + + @Test + public void testGetRequireClosingTags_NoSection() throws PolicyException { + String requireClosingTagsSection = ""; + String policyFile = assembleFile(requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + + assertTrue( + policy.getRequiresClosingTags().size() == Constants.defaultRequireClosingTags.size()); + } + + @Test + public void testInvalidPolicies() { + // Starting with v1.7.0, schema validation is always enforced on policy files. + // These tests verify various schema violations are detected and flagged. + String notSupportedTagsSection = "\n" + "\n"; + String policyFile = assembleFile(notSupportedTagsSection); + try { + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + fail("No PolicyException thrown for ."); + } catch (PolicyException e) { + assertNotNull(e); } - @Test - public void testGetRequireClosingTags() throws PolicyException { - String requireClosingTagsSection = "\n" + - " \n" + - " \n" + - " \n" + - " \n" + - "\n"; - String policyFile = assembleFile(requireClosingTagsSection); - - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - - TagMatcher actualTags = policy.getRequiresClosingTags(); - - assertTrue(actualTags.matches("td")); - assertTrue(actualTags.matches("span")); + String duplicatedTagsSection = "\n" + "\n"; + policyFile = assembleFile(duplicatedTagsSection); + try { + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + fail("No PolicyException thrown when duplicated."); + } catch (PolicyException e) { + assertNotNull(e); } - @Test - public void testGetRequireClosingTags_emptyList() throws PolicyException { - String requireClosingTagsSection = "\n" + - " \n" + - " \n" + - "\n"; - String policyFile = assembleFile(requireClosingTagsSection); - - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - - assertEquals(0, policy.getRequiresClosingTags().size()); + policyFile = assembleFile("").replace("", "").replace("", ""); + try { + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + fail("No PolicyException thrown when missing."); + } catch (PolicyException e) { + assertNotNull(e); } - - @Test - public void testGetRequireClosingTags_emptySection() throws PolicyException { - String requireClosingTagsSection = "\n" + "\n"; - String policyFile = assembleFile(requireClosingTagsSection); - - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - - assertEquals(0, policy.getRequiresClosingTags().size()); + } + + @Test + public void testSchemaValidationToggleWithSource() { + String notSupportedTagsSection = "\n" + "\n"; + String policyFile = assembleFile(notSupportedTagsSection); + + try { + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + fail("Not supported tag on policy, but no PolicyException occurred."); + } catch (PolicyException e) { + assertNotNull(e); } + } - @Test - public void testGetRequireClosingTags_NoSection() throws PolicyException { - String requireClosingTagsSection = ""; - String policyFile = assembleFile(requireClosingTagsSection); - - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + @Test + public void testSchemaValidationWithUrl() { + URL urlOfInvalidPolicy = getClass().getResource("/invalidPolicy.xml"); - assertTrue(policy.getRequiresClosingTags().size() == Constants.defaultRequireClosingTags.size()); + try { + policy = TestPolicy.getInstance(urlOfInvalidPolicy); + fail("PolicyException not thrown for policy w/invalid schema."); + } catch (PolicyException e) { + assertNotNull(e); } - - @Test - public void testInvalidPolicies() { - // Starting with v1.7.0, schema validation is always enforced on policy files. - // These tests verify various schema violations are detected and flagged. - String notSupportedTagsSection = "\n" + "\n"; - String policyFile = assembleFile(notSupportedTagsSection); - try { - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - fail("No PolicyException thrown for ."); - } catch (PolicyException e) { - assertNotNull(e); - } - - String duplicatedTagsSection = "\n" + "\n"; - policyFile = assembleFile(duplicatedTagsSection); - try { - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - fail("No PolicyException thrown when duplicated."); - } catch (PolicyException e) { - assertNotNull(e); - } - - policyFile = assembleFile("").replace("", "").replace("", ""); - try { - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - fail("No PolicyException thrown when missing."); - } catch (PolicyException e) { - assertNotNull(e); - } + } + + @Test + public void testSchemaValidationWithInclude() { + // This policy will also include invalidPolicy.xml + URL url = getClass().getResource("/emptyPolicyWithInclude.xml"); + + try { + policy = TestPolicy.getInstance(url); + fail("PolicyException not thrown for policy w/invalid schema."); + } catch (PolicyException e) { + assertNotNull(e); } - - @Test - public void testSchemaValidationToggleWithSource() { - String notSupportedTagsSection = "\n" + "\n"; - String policyFile = assembleFile(notSupportedTagsSection); - - try { - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - fail("Not supported tag on policy, but no PolicyException occurred."); - } catch (PolicyException e) { - assertNotNull(e); - } - } - - @Test - public void testSchemaValidationWithUrl() { - URL urlOfInvalidPolicy = getClass().getResource("/invalidPolicy.xml"); - - try { - policy = TestPolicy.getInstance(urlOfInvalidPolicy); - fail("PolicyException not thrown for policy w/invalid schema."); - } catch (PolicyException e) { - assertNotNull(e); - } - } - - @Test - public void testSchemaValidationWithInclude() { - // This policy will also include invalidPolicy.xml - URL url = getClass().getResource("/emptyPolicyWithInclude.xml"); - - try { - policy = TestPolicy.getInstance(url); - fail("PolicyException not thrown for policy w/invalid schema."); - } catch (PolicyException e) { - assertNotNull(e); - } + } + + @Test + public void testSchemaValidationWithOptionallyDefinedTags() throws PolicyException { + String allowedEmptyTagsSection = + "\n" + + " \n" + + " \n" + + " \n" + + "\n"; + String requireClosingTagsSection = + "\n" + + " \n" + + " \n" + + " \n" + + "\n"; + String policyFile = assembleFile(allowedEmptyTagsSection + requireClosingTagsSection); + + policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); + // If it reaches this point, it passed schema validation, which is what we want. + } + + @Test + public void testGithubIssue66() { + // Concern is that LSEP characters are not being considered on .* pattern + // Note: Change was done in Policy loading, so test is located here + String tagRules = + "" + + "" + + " " + + " " + + " " + + " " + + " " + + "" + + ""; + String rawPolicy = + HEADER + + DIRECTIVES + + COMMON_REGEXPS + + COMMON_ATTRIBUTES + + GLOBAL_TAG_ATTRIBUTES + + tagRules + + CSS_RULES + + FOOTER; + + try { + policy = Policy.getInstance(new ByteArrayInputStream(rawPolicy.getBytes())); + assertThat( + new AntiSamy() + .scan("Content", policy, AntiSamy.DOM) + .getCleanHTML(), + containsString("Line 1")); + assertThat( + new AntiSamy() + .scan("Content", policy, AntiSamy.SAX) + .getCleanHTML(), + containsString("Line 1")); + } catch (Exception e) { + fail("Policy nor scan should fail:" + e.getMessage()); } - - @Test - public void testSchemaValidationWithOptionallyDefinedTags() throws PolicyException { - String allowedEmptyTagsSection = "\n" + - " \n" + - " \n" + - " \n" + - "\n"; - String requireClosingTagsSection = "\n" + - " \n" + - " \n" + - " \n" + - "\n"; - String policyFile = assembleFile(allowedEmptyTagsSection + requireClosingTagsSection); - - policy = Policy.getInstance(new ByteArrayInputStream(policyFile.getBytes())); - // If it reaches this point, it passed schema validation, which is what we want. - } - - @Test - public void testGithubIssue66() { - // Concern is that LSEP characters are not being considered on .* pattern - // Note: Change was done in Policy loading, so test is located here - String tagRules = "" + - "" + - " " + - " " + - " " + - " " + - " " + - "" + - ""; - String rawPolicy = HEADER + DIRECTIVES + COMMON_REGEXPS + COMMON_ATTRIBUTES + GLOBAL_TAG_ATTRIBUTES + tagRules + CSS_RULES + FOOTER; - - try { - policy = Policy.getInstance(new ByteArrayInputStream(rawPolicy.getBytes())); - assertThat(new AntiSamy().scan("Content", policy, AntiSamy.DOM).getCleanHTML(), containsString("Line 1")); - assertThat(new AntiSamy().scan("Content", policy, AntiSamy.SAX).getCleanHTML(), containsString("Line 1")); - } catch (Exception e) { - fail("Policy nor scan should fail:" + e.getMessage()); - } - } - - static void reloadSchemaValidation() throws Exception { - // Emulates the static code block used in Policy to set schema validation on/off if - // the Policy.VALIDATIONPROPERTY system property is set. If not set, it sets it to the default. - Method method = Policy.class.getDeclaredMethod("loadValidateSchemaProperty"); - method.setAccessible(true); - method.invoke(null); + } + + static void reloadSchemaValidation() throws Exception { + // Emulates the static code block used in Policy to set schema validation on/off if + // the Policy.VALIDATIONPROPERTY system property is set. If not set, it sets it to the + // default. + Method method = Policy.class.getDeclaredMethod("loadValidateSchemaProperty"); + method.setAccessible(true); + method.invoke(null); + } + + @Test + public void testGithubIssue79() { + URL policyUrl; + + // Case 1: Loading policy from a URL beginning with "jar:file:" + try { + java.net.URLClassLoader child = + new java.net.URLClassLoader( + new URL[] { + Thread.currentThread() + .getContextClassLoader() + .getResource("policy-in-external-library.jar") + }, + this.getClass().getClassLoader()); + + policyUrl = + Class.forName("org.owasp.antisamy.test.Dummy", true, child) + .getClassLoader() + .getResource("policyInsideJar.xml"); + assertThat(policyUrl, notNullValue()); + + policy = Policy.getInstance(policyUrl); + assertThat(policy, notNullValue()); + } catch (Exception e) { + fail("Policy nor scan should fail:" + e.getMessage()); } - @Test - public void testGithubIssue79() { - URL policyUrl; - - // Case 1: Loading policy from a URL beginning with "jar:file:" - try { - java.net.URLClassLoader child = new java.net.URLClassLoader( - new URL[] {Thread.currentThread().getContextClassLoader().getResource("policy-in-external-library.jar")}, - this.getClass().getClassLoader() - ); - - policyUrl = Class.forName("org.owasp.antisamy.test.Dummy", true, child).getClassLoader().getResource("policyInsideJar.xml"); - assertThat(policyUrl, notNullValue()); - - policy = Policy.getInstance(policyUrl); - assertThat(policy, notNullValue()); - } catch (Exception e) { - fail("Policy nor scan should fail:" + e.getMessage()); - } - - // Case 2: Loading policy from a URL beginning with "jar:https:" - policyUrl = null; - try { - policyUrl = new URL("jar:https://somebadsite.com/foo.xml"); - policy = Policy.getInstance(policyUrl); - fail("URL creation or policy loading should have failed"); - } catch (Exception e) { - } - assertNull(policyUrl); + // Case 2: Loading policy from a URL beginning with "jar:https:" + policyUrl = null; + try { + policyUrl = new URL("jar:https://somebadsite.com/foo.xml"); + policy = Policy.getInstance(policyUrl); + fail("URL creation or policy loading should have failed"); + } catch (Exception e) { } + assertNull(policyUrl); + } } diff --git a/src/test/java/org/owasp/validator/html/test/TestPolicy.java b/src/test/java/org/owasp/validator/html/test/TestPolicy.java index 435175bc..62880300 100644 --- a/src/test/java/org/owasp/validator/html/test/TestPolicy.java +++ b/src/test/java/org/owasp/validator/html/test/TestPolicy.java @@ -31,69 +31,70 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; - import org.owasp.validator.html.InternalPolicy; import org.owasp.validator.html.Policy; import org.owasp.validator.html.PolicyException; import org.owasp.validator.html.model.Property; import org.owasp.validator.html.model.Tag; -/** - * @author Kristian Rosenvold - */ +/** @author Kristian Rosenvold */ public class TestPolicy extends InternalPolicy { - protected TestPolicy(Policy.ParseContext parseContext) { - super(parseContext); - } + protected TestPolicy(Policy.ParseContext parseContext) { + super(parseContext); + } - protected TestPolicy(Policy old, Map directives, Map tagRules, Map cssRules) { - super(old, directives, tagRules, cssRules); - } + protected TestPolicy( + Policy old, + Map directives, + Map tagRules, + Map cssRules) { + super(old, directives, tagRules, cssRules); + } - public static TestPolicy getInstance() throws PolicyException { - return getInstance(Policy.class.getClassLoader().getResource(DEFAULT_POLICY_URI)); - } + public static TestPolicy getInstance() throws PolicyException { + return getInstance(Policy.class.getClassLoader().getResource(DEFAULT_POLICY_URI)); + } - public static TestPolicy getInstance(String filename) throws PolicyException { - File file = new File(filename); - return getInstance(file); - } + public static TestPolicy getInstance(String filename) throws PolicyException { + File file = new File(filename); + return getInstance(file); + } - public static TestPolicy getInstance(File file) throws PolicyException { - try { - URI uri = file.toURI(); - return getInstance(uri.toURL()); - } catch (IOException e) { - throw new PolicyException(e); - } + public static TestPolicy getInstance(File file) throws PolicyException { + try { + URI uri = file.toURI(); + return getInstance(uri.toURL()); + } catch (IOException e) { + throw new PolicyException(e); } + } - public static TestPolicy getInstance(URL url) throws PolicyException { - return new TestPolicy(getParseContext(getTopLevelElement(url), url)); - } + public static TestPolicy getInstance(URL url) throws PolicyException { + return new TestPolicy(getParseContext(getTopLevelElement(url), url)); + } - public TestPolicy cloneWithDirective(String name, String value) { - Map directives = new HashMap(this.directives); - directives.put(name, value); - return new TestPolicy(this, Collections.unmodifiableMap(directives), tagRules, cssRules); - } + public TestPolicy cloneWithDirective(String name, String value) { + Map directives = new HashMap(this.directives); + directives.put(name, value); + return new TestPolicy(this, Collections.unmodifiableMap(directives), tagRules, cssRules); + } - public TestPolicy addTagRule(Tag tag) { - Map newTagRules = new HashMap(tagRules); - newTagRules.put(tag.getName().toLowerCase(), tag); - return new TestPolicy(this, this.directives, newTagRules, cssRules); - } + public TestPolicy addTagRule(Tag tag) { + Map newTagRules = new HashMap(tagRules); + newTagRules.put(tag.getName().toLowerCase(), tag); + return new TestPolicy(this, this.directives, newTagRules, cssRules); + } - public TestPolicy mutateTag(Tag tag) { - Map newRules = new HashMap(this.tagRules); - newRules.put( tag.getName().toLowerCase(), tag); - return new TestPolicy(this, this.directives, newRules, cssRules); - } + public TestPolicy mutateTag(Tag tag) { + Map newRules = new HashMap(this.tagRules); + newRules.put(tag.getName().toLowerCase(), tag); + return new TestPolicy(this, this.directives, newRules, cssRules); + } - public TestPolicy addCssProperty(Property property) { - Map newCssRules = new HashMap(cssRules); - newCssRules.put(property.getName().toLowerCase(), property); - return new TestPolicy(this, this.directives, tagRules, newCssRules); - } + public TestPolicy addCssProperty(Property property) { + Map newCssRules = new HashMap(cssRules); + newCssRules.put(property.getName().toLowerCase(), property); + return new TestPolicy(this, this.directives, tagRules, newCssRules); + } } From f7550f987a769e7deee3f70c7e7d65e9dbb8281f Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Sat, 9 Jul 2022 18:34:25 -0400 Subject: [PATCH 34/36] Update the README to clearly describe the deprecated things that were removed and how to change their code to use the new APIs. --- README.md | 41 +++++++++---------- .../validator/html/scan/ASHTMLSerializer.java | 27 ++++++------ 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 1bc2769d..77f37da2 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,24 @@ # AntiSamy -A library for performing fast, configurable cleansing of HTML coming from untrusted sources. Supports Java 7+. +A library for performing fast, configurable cleansing of HTML coming from untrusted sources. Supports Java 8+. Another way of saying that could be: It's an API that helps you make sure that clients don't supply malicious cargo code in the HTML they supply for their profile, comments, etc., that get persisted on the server. The term "malicious code" in regards to web applications usually mean "JavaScript." Mostly, Cascading Stylesheets are only considered malicious when they invoke JavaScript. However, there are many situations where "normal" HTML and CSS can be used in a malicious manner. +## IMPORTANT! - API breaking changes in 1.7.0 + +Throughout the development of the 1.6.x series, we have identified and deprecated a number of features and APIs. All of these deprecated items have been removed in the 1.7.0 release. These changes were all tracked in ticket: https://github.com/nahsra/antisamy/issues/195. Each of the changes are described below: + +CssHandler had 2 constructors which dropped the LinkedList embeddedStyleSheets parameter. Both contructors now create an empty internal LinkedList and the method getImportedStylesheetsURIList() can be used to get a reference to it, if needed. This is rarely used so is unlikely to affect most users of AntiSamy. Normally, an empty list was passed in as this parameter value and that list was never used again. + + * The CssHandler(Policy, LinkedList, List, ResourceBundle) was dropped + * It was replaced with: CssHandler(Policy, List, ResourceBundle) + * The CssHandler(Policy, LinkedList, List, String, ResourceBundle) was dropped + * It was replaced with: CssHandler(Policy, List, ResourceBundle, String). NOTE: The order of the last 2 parameters to this method was reversed. + + * Support for XHTML was dropped. AntiSamy now only supports HTML. As we believe this was a rarely used feature, we don't expect this to affect many AntiSamy users. + * XML Schema validation is now required on AntiSamy policy files and cannot be disabled. You must make your policy file schema compliant in order to use it with AntiSamy. + * The policy directive 'noopenerAndNoreferrerAnchors' is now ON by default. If it is disabled, AntiSamy issues a nag, encouraging you to enable it. + ## How to Use ### 1. Import the dependency @@ -38,25 +53,7 @@ MySpace was, at the time this project was born, the most popular social networki I don’t know of a possible use case for this policy file. If you wanted to allow every single valid HTML and CSS element (but without JavaScript or blatant CSS-related phishing attacks), you can use this policy file. Not even MySpace was this crazy. However, it does serve as a good reference because it contains base rules for every element, so you can use it as a knowledge base when using tailoring the other policy files. -### NOTE: Schema validation behavior change starting with AntiSamy 1.6.0 - -While working on some improvements to AntiSamy's XML Schema Definition (XSD) for AntiSamy policy files, we noticed that AntiSamy was NOT actually enforcing the XSD. So, we've CHANGED the default behavior starting with AntiSamy 1.6.0 to enforce the schema, and not continue if the AntiSamy policy is invalid. However ... - -we recognize that it might not be possible for developers to fix their AntiSamy policies right away if they are non-compliant, and yet still want to upgrade AntiSamy to pick up any security improvements, feature enhancements, and bug fixes. As such, we've provided two ways to (temporarily!) disable schema validation: - -1) Set the Java System property: owasp.validator.validateschema to false. This can be done at the command line (e.g., -Dowasp.validator.validateschema=false) or via the Java System properties file. Neither requires a code change. - -2) Change the code using AntiSamy to invoke: Policy.setSchemaValidation(false) before loading the AntiSamy policy. This is a static call so once disabled, it is disabled for all new Policy instances. - -To encourage AntiSamy users to only use XSD compliant policies, AntiSamy will always log some type of warning when schema validation is disabled. It will either WARN that the policy is non-compliant so it can be fixed, or it will WARN that the policy is compliant, but schema validation is OFF, so validation should be turned back on (i.e., stop disabling it). We also added INFO level logging when AntiSamy schema's are loaded and validated. - -### Disabling schema validation is deprecated immediately, and will go away in AntiSamy 1.7+ - -The ability to disable the new schema validation feature is intended to be temporary, to smooth the transition to properly valid AntiSamy policy files. We plan to drop this feature in the next major release. We estimate that this will be some time mid-late 2022, so not any time soon. The idea is to give dev teams using AntiSamy directly, or through other libraries like ESAPI, plenty of time to get their policy files schema compliant before schema validation becomes required. - -### Logging: The logging introduced in 1.6.0 accidentally used log4j, while declaring slf4 as the logging API. - -This was quickly fixed in 1.6.1 to use slf4j APIs only. AntiSamy now includes the slf4j-simple library for its logging, but AntiSamy users can import and use an alternate slf4j compatible logging library if they prefer. They can also then exclude slf4j-simple if they want to. +### Logging: AntiSamy now includes the slf4j-simple library for its logging, but AntiSamy users can import and use an alternate slf4j compatible logging library if they prefer. They can also then exclude slf4j-simple if they want to. WARNING: AntiSamy's use of slf4j-simple, without any configuration file, logs messages in a buffered manner to standard output. As such, some or all of these log messages may get lost if an Exception, such as a PolicyException is thrown. This can likely be rectified by configuring slf4j-simple to log to standard error instead, or use an alternate slf4j logger that does so. @@ -103,10 +100,10 @@ The `CleanResults` object provides a lot of useful stuff. * `getCleanHTML()` - the clean, safe HTML output * `getCleanXMLDocumentFragment()` - the clean, safe `XMLDocumentFragment` which is reflected in `getCleanHTML()` * `getScanTime()` - returns the scan time in seconds - + __Important Note__: There has been much confusion about the `getErrorMessages()` method. The `getErrorMessages()` method does not subtly answer the question "is this safe input?" in the affirmative if it returns an empty list. You must always use the sanitized input and there is no way to be sure the input passed in had no attacks. -The serialization and deserialization process that is critical to the effectiveness of the sanitizer is purposefully lossy and will filter out attacks via a number of attack vectors. Unfortunately, one of the tradeoffs of this strategy is that we don't always know in retrospect that an attack was seen. Thus, the `getErrorMessages()` API is there to help users understand their well-intentioned input meet the requirements of the system, not help a developer detect if an attack was present. +The serialization and deserialization process that is critical to the effectiveness of the sanitizer is purposefully lossy and will filter out attacks via a number of attack vectors. Unfortunately, one of the tradeoffs of this strategy is that we don't always know in retrospect that an attack was seen. Thus, the `getErrorMessages()` API is there to help users understand their well-intentioned input meet the requirements of the system, not help a developer detect if an attack was present. ## Other Documentation diff --git a/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java b/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java index a7ba50ee..d1263d5a 100644 --- a/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java +++ b/src/main/java/org/owasp/validator/html/scan/ASHTMLSerializer.java @@ -31,18 +31,15 @@ public void endElementIO(String namespaceURI, String localName, String rawName) ElementState state; - // Works much like content() with additions for closing - // an element. Note the different checks for the closed - // element's state and the parent element's state. + // Works much like content() with additions for closing an element. Note the different checks + // for the closed element's state and the parent element's state. _printer.unindent(); state = getElementState(); if (state.empty) _printer.printText('>'); - // This element is not empty and that last content was - // another element, so print a line break before that - // last element and this element's closing tag. - // [keith] Provided this is not an anchor. - // HTML: some elements do not print closing tag (e.g. LI) + // This element is not empty and that last content was another element, so print a line break + // before that last element and this element's closing tag. [keith] Provided this is not an + // anchor. HTML: some elements do not print closing tag (e.g. LI) if (rawName == null || !HTMLdtd.isOnlyOpening(rawName) || HTMLdtd.isOptionalClosing(rawName)) { if (_indenting && !state.preserveSpace && state.afterElement) _printer.breakLine(); // Must leave CData section first (Illegal in HTML, but still) @@ -52,8 +49,8 @@ public void endElementIO(String namespaceURI, String localName, String rawName) _printer.printText('>'); } - // Leave the element state and update that of the parent - // (if we're not root) to not empty and after element. + // Leave the element state and update that of the parent (if we're not root) to not empty and + // after element. state = leaveElementState(); // Temporary hack to prevent line breaks inside A/TD if (rawName == null || (!rawName.equalsIgnoreCase("A") && !rawName.equalsIgnoreCase("TD"))) @@ -63,11 +60,11 @@ public void endElementIO(String namespaceURI, String localName, String rawName) } /* - The override is to use printEscaped() which already escapes entity references - and writes them in the final serialized string. As escapeURI() is called like - "printer.printText(escapeURI(value))", if the URI is returned here it would - be double-printed and that is why the return value is an empty string. - */ + * The override is to use printEscaped() which already escapes entity references + * and writes them in the final serialized string. As escapeURI() is called like + * "printer.printText(escapeURI(value))", if the URI is returned here it would + * be double-printed and that is why the return value is an empty string. + */ @Override protected String escapeURI(String uri) { try { From 842e2655ab1863440e16c18ff634265e85bc9829 Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Sat, 9 Jul 2022 18:39:27 -0400 Subject: [PATCH 35/36] Fix README formatting a bit. --- README.md | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 77f37da2..15e91f7e 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,10 @@ Throughout the development of the 1.6.x series, we have identified and deprecate CssHandler had 2 constructors which dropped the LinkedList embeddedStyleSheets parameter. Both contructors now create an empty internal LinkedList and the method getImportedStylesheetsURIList() can be used to get a reference to it, if needed. This is rarely used so is unlikely to affect most users of AntiSamy. Normally, an empty list was passed in as this parameter value and that list was never used again. - * The CssHandler(Policy, LinkedList, List, ResourceBundle) was dropped - * It was replaced with: CssHandler(Policy, List, ResourceBundle) - * The CssHandler(Policy, LinkedList, List, String, ResourceBundle) was dropped - * It was replaced with: CssHandler(Policy, List, ResourceBundle, String). NOTE: The order of the last 2 parameters to this method was reversed. + * The CssHandler(Policy, LinkedList\, List\, ResourceBundle) was dropped + * It was replaced with: CssHandler(Policy, List\, ResourceBundle) + * The CssHandler(Policy, LinkedList\, List\, String, ResourceBundle) was dropped + * It was replaced with: CssHandler(Policy, List\, ResourceBundle, String). NOTE: The order of the last 2 parameters to this method was reversed. * Support for XHTML was dropped. AntiSamy now only supports HTML. As we believe this was a rarely used feature, we don't expect this to affect many AntiSamy users. * XML Schema validation is now required on AntiSamy policy files and cannot be disabled. You must make your policy file schema compliant in order to use it with AntiSamy. @@ -53,7 +53,8 @@ MySpace was, at the time this project was born, the most popular social networki I don’t know of a possible use case for this policy file. If you wanted to allow every single valid HTML and CSS element (but without JavaScript or blatant CSS-related phishing attacks), you can use this policy file. Not even MySpace was this crazy. However, it does serve as a good reference because it contains base rules for every element, so you can use it as a knowledge base when using tailoring the other policy files. -### Logging: AntiSamy now includes the slf4j-simple library for its logging, but AntiSamy users can import and use an alternate slf4j compatible logging library if they prefer. They can also then exclude slf4j-simple if they want to. +### Logging +AntiSamy now includes the slf4j-simple library for its logging, but AntiSamy users can import and use an alternate slf4j compatible logging library if they prefer. They can also then exclude slf4j-simple if they want to. WARNING: AntiSamy's use of slf4j-simple, without any configuration file, logs messages in a buffered manner to standard output. As such, some or all of these log messages may get lost if an Exception, such as a PolicyException is thrown. This can likely be rectified by configuring slf4j-simple to log to standard error instead, or use an alternate slf4j logger that does so. @@ -85,14 +86,11 @@ There are a few ways to create a `Policy` object. The `getInstance()` method can AntiSamy as = new AntiSamy(); CleanResults cr = as.scan(dirtyInput, policyFilePath); ``` - Finally, policy files can also be referenced by `File` objects directly in the second parameter: - ``` AntiSamy as = new AntiSamy(); CleanResults cr = as.scan(dirtyInput, new File(policyFilePath)); ``` - ### 5. Analyzing CleanResults The `CleanResults` object provides a lot of useful stuff. @@ -127,6 +125,5 @@ $ git clone https://github.com/nahsra/antisamy $ cd antisamy $ mvn package ``` - ## License Released under the [BSD-3-Clause](https://opensource.org/licenses/BSD-3-Clause) license as specified here: [LICENSE](https://github.com/nahsra/antisamy/blob/main/LICENSE). From 8ec86945713cc692a272127dacbacbfd928ed9c8 Mon Sep 17 00:00:00 2001 From: Dave Wichers Date: Tue, 12 Jul 2022 14:21:01 -0400 Subject: [PATCH 36/36] Minor tweaks to README and upgrades to pom to match changes to main branch. --- README.md | 6 +++--- pom.xml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 15e91f7e..f4f3d375 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Another way of saying that could be: It's an API that helps you make sure that c Throughout the development of the 1.6.x series, we have identified and deprecated a number of features and APIs. All of these deprecated items have been removed in the 1.7.0 release. These changes were all tracked in ticket: https://github.com/nahsra/antisamy/issues/195. Each of the changes are described below: -CssHandler had 2 constructors which dropped the LinkedList embeddedStyleSheets parameter. Both contructors now create an empty internal LinkedList and the method getImportedStylesheetsURIList() can be used to get a reference to it, if needed. This is rarely used so is unlikely to affect most users of AntiSamy. Normally, an empty list was passed in as this parameter value and that list was never used again. +CssHandler had 2 constructors which dropped the LinkedList embeddedStyleSheets parameter. Both contructors now create an empty internal LinkedList and the method getImportedStylesheetsURIList() can be used to get a reference to it, if needed. This feature is rarely used, and in fact direct invocation of these constructors is also rare, so this change is unlikely to affect most users of AntiSamy. When used, normally an empty list is passed in as this parameter value and that list is never used again. * The CssHandler(Policy, LinkedList\, List\, ResourceBundle) was dropped * It was replaced with: CssHandler(Policy, List\, ResourceBundle) @@ -43,7 +43,7 @@ Accordingly, we’ve built a policy file that allows fairly similar functionalit 2) antisamy-ebay.xml -eBay is the most popular online auction site in the universe, as far as I can tell. It is a public site so anyone is allowed to post listings with rich HTML content. It’s not surprising that given the attractiveness of eBay as a target that it has been subject to a few complex XSS attacks. Listings are allowed to contain much more rich content than, say, Slashdot -- so it’s attack surface is considerably larger. +eBay is the most popular online auction site in the universe, as far as we can tell. It is a public site so anyone is allowed to post listings with rich HTML content. It’s not surprising that given the attractiveness of eBay as a target that it has been subject to a few complex XSS attacks. Listings are allowed to contain much more rich content than, say, Slashdot -- so it’s attack surface is considerably larger. 3) antisamy-myspace.xml @@ -51,7 +51,7 @@ MySpace was, at the time this project was born, the most popular social networki 4) antisamy-anythinggoes.xml -I don’t know of a possible use case for this policy file. If you wanted to allow every single valid HTML and CSS element (but without JavaScript or blatant CSS-related phishing attacks), you can use this policy file. Not even MySpace was this crazy. However, it does serve as a good reference because it contains base rules for every element, so you can use it as a knowledge base when using tailoring the other policy files. +We don’t know of a possible use case for this policy file. If you wanted to allow every single valid HTML and CSS element (but without JavaScript or blatant CSS-related phishing attacks), you can use this policy file. Not even MySpace was this crazy. However, it does serve as a good reference because it contains base rules for every element, so you can use it as a knowledge base when using tailoring the other policy files. ### Logging AntiSamy now includes the slf4j-simple library for its logging, but AntiSamy users can import and use an alternate slf4j compatible logging library if they prefer. They can also then exclude slf4j-simple if they want to. diff --git a/pom.xml b/pom.xml index 2e2734b6..72996edf 100644 --- a/pom.xml +++ b/pom.xml @@ -66,7 +66,7 @@ net.sourceforge.htmlunit neko-htmlunit - 2.62.0 + 2.63.0 org.apache.httpcomponents.client5 @@ -83,7 +83,7 @@ org.apache.httpcomponents.core5 httpcore5 - 5.1.3 + 5.1.4 org.apache.xmlgraphics @@ -252,7 +252,7 @@ org.codehaus.mojo extra-enforcer-rules - 1.5.1 + 1.6.0 org.codehaus.mojo