Skip to content

Commit 7cbbefc

Browse files
authored
Merge pull request #102 from spassarop/1.6.5
- Update length regexes on example policies. - The tests have comments explaining the bug (#101) and a single case that passes but involves a latent bug.
2 parents 2186c6a + f56025b commit 7cbbefc

File tree

5 files changed

+26
-8
lines changed

5 files changed

+26
-8
lines changed

src/main/resources/antisamy-anythinggoes.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,8 @@ http://www.w3.org/TR/html401/struct/global.html
102102
<regexp name="angle" value="(-|\+)?([0-9]+(\.[0-9]+)?)(deg|grads|rad)"/>
103103
<regexp name="time" value="([0-9]+(\.[0-9]+)?)(ms|s)"/>
104104
<regexp name="frequency" value="([0-9]+(\.[0-9]+)?)(hz|khz)"/>
105-
<regexp name="length" value="((-|\+)?0|(-|\+)?([0-9]+(\.[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
106-
<regexp name="positiveLength" value="((\+)?0|(\+)?([0-9]+(\.[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
105+
<regexp name="length" value="((-|\+)?0|(-|\+)?([0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
106+
<regexp name="positiveLength" value="((\+)?0|(\+)?([0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
107107
<regexp name="percentage" value="(-|\+)?([0-9]+(\.[0-9]+)?)%"/>
108108
<regexp name="positivePercentage" value="(\+)?([0-9]+(\.[0-9]+)?)%"/>
109109

src/main/resources/antisamy-ebay.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ http://www.w3.org/TR/html401/struct/global.html
100100
<regexp name="angle" value="(-|\+)?([0-9]+(\.[0-9]+)?)(deg|grads|rad)"/>
101101
<regexp name="time" value="([0-9]+(\.[0-9]+)?)(ms|s)"/>
102102
<regexp name="frequency" value="([0-9]+(\.[0-9]+)?)(hz|khz)"/>
103-
<regexp name="length" value="((-|\+)?0|(-|\+)?([0-9]+(\.[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
104-
<regexp name="positiveLength" value="((\+)?0|(\+)?([0-9]+(\.[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
103+
<regexp name="length" value="((-|\+)?0|(-|\+)?([0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
104+
<regexp name="positiveLength" value="((\+)?0|(\+)?([0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
105105
<regexp name="percentage" value="(-|\+)?([0-9]+(\.[0-9]+)?)%"/>
106106
<regexp name="positivePercentage" value="(\+)?([0-9]+(\.[0-9]+)?)%"/>
107107

src/main/resources/antisamy-myspace.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,8 @@ http://www.w3.org/TR/html401/struct/global.html
102102
<regexp name="angle" value="(-|\+)?([0-9]+(\.[0-9]+)?)(deg|grads|rad)"/>
103103
<regexp name="time" value="([0-9]+(\.[0-9]+)?)(ms|s)"/>
104104
<regexp name="frequency" value="([0-9]+(\.[0-9]+)?)(hz|khz)"/>
105-
<regexp name="length" value="((-|\+)?0|(-|\+)?([0-9]+(\.[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
106-
<regexp name="positiveLength" value="((\+)?0|(\+)?([0-9]+(\.[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
105+
<regexp name="length" value="((-|\+)?0|(-|\+)?([0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
106+
<regexp name="positiveLength" value="((\+)?0|(\+)?([0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
107107
<regexp name="percentage" value="(-|\+)?([0-9]+(\.[0-9]+)?)%"/>
108108
<regexp name="positivePercentage" value="(\+)?([0-9]+(\.[0-9]+)?)%"/>
109109

src/main/resources/antisamy.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ http://www.w3.org/TR/html401/struct/global.html
107107
<regexp name="angle" value="(-|\+)?([0-9]+(\.[0-9]+)?)(deg|grads|rad)"/>
108108
<regexp name="time" value="([0-9]+(\.[0-9]+)?)(ms|s)"/>
109109
<regexp name="frequency" value="([0-9]+(\.[0-9]+)?)(hz|khz)"/>
110-
<regexp name="length" value="((-|\+)?0|(-|\+)?([0-9]+(\.[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
111-
<regexp name="positiveLength" value="((\+)?0|(\+)?([0-9]+(\.[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
110+
<regexp name="length" value="((-|\+)?0|(-|\+)?([0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
111+
<regexp name="positiveLength" value="((\+)?0|(\+)?([0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?)(em|ex|px|in|cm|mm|pt|pc))"/>
112112
<regexp name="percentage" value="(-|\+)?([0-9]+(\.[0-9]+)?)%"/>
113113
<regexp name="positivePercentage" value="(\+)?([0-9]+(\.[0-9]+)?)%"/>
114114

src/test/java/org/owasp/validator/html/test/AntiSamyTest.java

+18
Original file line numberDiff line numberDiff line change
@@ -1509,5 +1509,23 @@ public void testGithubIssue99() throws ScanException, PolicyException {
15091509
assertThat(as.scan("<p lang=\"en-GB\">This paragraph is defined as British English.</p>", policy, AntiSamy.DOM).getCleanHTML(), containsString("lang=\"en-GB\""));
15101510
assertThat(as.scan("<p lang=\"en-GB\">This paragraph is defined as British English.</p>", policy, AntiSamy.SAX).getCleanHTML(), containsString("lang=\"en-GB\""));
15111511
}
1512+
1513+
@Test
1514+
public void testGithubIssue101() throws ScanException, PolicyException {
1515+
// Test that margin attribute is not removed when value has too much significant figures.
1516+
// Current behavior is that decimals like 0.0001 are internally translated to 1.0E-4, this
1517+
// is reflected on regex validation and actual output. The inconsistency is due to Batik CSS.
1518+
assertThat(as.scan("<p style=\"margin: 0.0001pt;\">Some text.</p>", policy, AntiSamy.DOM).getCleanHTML(), containsString("margin"));
1519+
assertThat(as.scan("<p style=\"margin: 0.0001pt;\">Some text.</p>", policy, AntiSamy.SAX).getCleanHTML(), containsString("margin"));
1520+
assertThat(as.scan("<p style=\"margin: 10000000pt;\">Some text.</p>", policy, AntiSamy.DOM).getCleanHTML(), containsString("margin"));
1521+
assertThat(as.scan("<p style=\"margin: 10000000pt;\">Some text.</p>", policy, AntiSamy.SAX).getCleanHTML(), containsString("margin"));
1522+
assertThat(as.scan("<p style=\"margin: 1.0E-4pt;\">Some text.</p>", policy, AntiSamy.DOM).getCleanHTML(), containsString("margin"));
1523+
assertThat(as.scan("<p style=\"margin: 1.0E-4pt;\">Some text.</p>", policy, AntiSamy.SAX).getCleanHTML(), containsString("margin"));
1524+
// When using exponential directly the "e" or "E" is internally considered as the start of
1525+
// the dimension/unit type. This creates inconsistencies that make the regex validation fail,
1526+
// also in cases like 1e4pt where "e" is considered as dimension instead of "pt".
1527+
assertThat(as.scan("<p style=\"margin: 1.0E+4pt;\">Some text.</p>", policy, AntiSamy.DOM).getCleanHTML(), not(containsString("margin")));
1528+
assertThat(as.scan("<p style=\"margin: 1.0E+4pt;\">Some text.</p>", policy, AntiSamy.SAX).getCleanHTML(), not(containsString("margin")));
1529+
}
15121530
}
15131531

0 commit comments

Comments
 (0)