Skip to content

Commit

Permalink
Warn about Multiple Child Elements with Same Name
Browse files Browse the repository at this point in the history
- currently when we have multiple child elements in a sequence with the same name, daffodil allows it, while choice branches return a UPA error and unordered sequences return an SDE. We want to generate a warning when this happens in an ordered sequence so we refactor code to warn if the local names are the same. This has the effect of warning whether namespaces are supported or unsupported
- we group using elementChildren instead of groupMembers since we want to consider all children since regardless of the model groups, their immediate children end up as siblings in the infoset and can cause potential ambiguities for some infoset types
- add test to check we get UPA error with choices
- add tests to exercise new warning (same/different namespaces, but same local name)

DAFFODIL-2736
  • Loading branch information
olabusayoT committed Oct 11, 2024
1 parent e02ae7f commit 7c4a219
Show file tree
Hide file tree
Showing 6 changed files with 221 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ abstract class SequenceGroupTermBase(xml: Node, lexicalParent: SchemaComponent,

requiredEvaluationsIfActivated(checkIfValidUnorderedSequence)
requiredEvaluationsIfActivated(checkIfNonEmptyAndDiscrimsOrAsserts)
requiredEvaluationsIfActivated(checkIfMultipleChildrenWithSameName)

protected def apparentXMLChildren: Seq[Node]

Expand Down Expand Up @@ -181,7 +182,7 @@ abstract class SequenceGroupTermBase(xml: Node, lexicalParent: SchemaComponent,
if (!isOrdered) {
checkMembersAreAllElementOrElementRef
checkMembersHaveValidOccursCountKind
checkMembersHaveUniqueNamesInNamespaces
checkUnorderedSequenceMembersHaveUniqueNamesInNamespaces
}
}

Expand Down Expand Up @@ -215,20 +216,37 @@ abstract class SequenceGroupTermBase(xml: Node, lexicalParent: SchemaComponent,
}
}

private lazy val checkMembersHaveUniqueNamesInNamespaces: Unit = {
val childrenGroupedByQName = groupMembers.groupBy { gm =>
// previous checks should ensure that all group members are either local
// elements or element references
Assert.invariant(gm.isInstanceOf[ElementBase])
gm.asInstanceOf[ElementBase].namedQName
}
childrenGroupedByQName.foreach { case (qname, children) =>
if (children.length > 1) {
private lazy val checkUnorderedSequenceMembersHaveUniqueNamesInNamespaces: Unit = {
// previous checks should ensure that all group members for unordered sequences are either local
// elements or element references
groupChildrenBy(_.namedQName)
.filter(_._2.length > 1)
.values
.foreach { children =>
children.head.SDE(
"Two or more members of an unordered sequence have the same name and the same namespace"
)
}
}
}

private def groupChildrenBy[T](func: (ElementBase) => T): Map[T, Seq[ElementBase]] = {
val childrenGrouped = elementChildren.groupBy { gm => func(gm) }
childrenGrouped
}

private lazy val checkIfMultipleChildrenWithSameName: Unit = {
groupChildrenBy(_.name)
.filter(_._2.length > 1)
.values
.foreach { children =>
children.head.SDW(
WarnID.MultipleChildElementsWithSameName,
"Two or more members of the sequence have the same name. " +
"Since not all infosets support namespaces or siblings with the same name, " +
"this could cause issues. QNames are: %s",
children.map(c => c.namedQName).mkString(", ")
)
}
}

final lazy val isOrdered: Boolean = this.sequenceKind match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,7 @@
<xs:enumeration value="invalidAnnotationPoint" />
<xs:enumeration value="layerCompileWarning" />
<xs:enumeration value="multipleChoiceBranches" />
<xs:enumeration value="multipleChildElementsWithSameName" />
<xs:enumeration value="namespaceDifferencesOnly" />
<xs:enumeration value="noEmptyDefault" />
<xs:enumeration value="nonExpressionPropertyValueLooksLikeExpression" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1536,4 +1536,115 @@
</tdml:infoset>
</tdml:parserTestCase>

<tdml:defineSchema name="multipleElemSameName" xmlns:u="USMTF" elementFormDefault="unqualified">

<xs:include schemaLocation="/org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
<xs:import namespace="USMTF" schemaLocation="/org/apache/daffodil/section14/sequence_groups/sequenceWithComplexType.dfdl.xsd"/>
<dfdl:format ref="ex:GeneralFormat" lengthKind="explicit" occursCountKind="implicit"/>

<xs:element name="multipleElemSameName" dfdl:lengthKind="implicit">
<xs:complexType>
<xs:sequence>
<xs:element name="A" type="xs:string" dfdl:length="1" />
<xs:element name="AmbigElt" type="xs:string" dfdl:length="1" />
<xs:element name="B" type="xs:string" dfdl:length="1" />
<xs:element name="AmbigElt" type="xs:string" dfdl:length="1" />
</xs:sequence>
</xs:complexType>
</xs:element>

<xs:element name="multipleElemSameNameDifferentNamespaces" dfdl:lengthKind="implicit">
<xs:complexType>
<xs:sequence>
<xs:element name="A" type="xs:string" dfdl:length="1" />
<xs:element name="e1" dfdl:terminator="/" dfdl:lengthKind="delimited">
<xs:complexType>
<xs:sequence dfdl:initiator="-" dfdl:separator=";" dfdl:separatorPosition="prefix">
<xs:element name="e2" type="u:ct1" minOccurs="1" maxOccurs="unbounded" dfdl:lengthKind="delimited" />
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="B" type="xs:string" dfdl:length="1" />
<xs:element ref="u:e1" dfdl:lengthKind="delimited"/>
</xs:sequence>
</xs:complexType>
</xs:element>

</tdml:defineSchema>


<tdml:parserTestCase name="multipleElemSameName"
root="multipleElemSameName"
model="multipleElemSameName"
ignoreUnexpectedWarnings="false"
description="TDML runner verifies warnings and infoset.">

<tdml:document><![CDATA[1234]]></tdml:document>

<tdml:infoset>
<tdml:dfdlInfoset>
<multipleElemSameName>
<A>1</A>
<AmbigElt>2</AmbigElt>
<B>3</B>
<AmbigElt>4</AmbigElt>
</multipleElemSameName>
</tdml:dfdlInfoset>
</tdml:infoset>

<tdml:warnings>
<tdml:warning>Schema Definition Warning</tdml:warning>
<tdml:warning>same name</tdml:warning>
<tdml:warning>AmbigElt</tdml:warning>
</tdml:warnings>
</tdml:parserTestCase>

<tdml:parserTestCase name="multipleElemSameNameDifferentNamespaces"
root="multipleElemSameNameDifferentNamespaces"
model="multipleElemSameName"
ignoreUnexpectedWarnings="false"
description="TDML runner verifies warnings and infoset.">

<tdml:document><![CDATA[1-;2;3;4/5-;6;7;8/]]></tdml:document>

<tdml:infoset>
<tdml:dfdlInfoset>
<multipleElemSameNameDifferentNamespaces>
<A>1</A>
<e1>
<e2>
<String1>2</String1>
</e2>
<e2>
<String1>3</String1>
</e2>
<e2>
<String1>4</String1>
</e2>
</e1>
<B>5</B>
<e1>
<e2>
<String1>6</String1>
</e2>
<e2>
<String1>7</String1>
</e2>
<e2>
<String1>8</String1>
</e2>
</e1>
</multipleElemSameNameDifferentNamespaces>
</tdml:dfdlInfoset>
</tdml:infoset>

<tdml:warnings>
<tdml:warning>Schema Definition Warning</tdml:warning>
<tdml:warning>same name</tdml:warning>
<tdml:warning>{}e1</tdml:warning>
<tdml:warning>u:e1</tdml:warning>
</tdml:warnings>
</tdml:parserTestCase>


</tdml:testSuite>
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<tdml:testSuite
suiteName="choice1765"
description="Tests for choice construct. Bug DFDL-1765."
xmlns:tdml="http://www.ibm.com/xmlns/dfdl/testData"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:ex="http://example.com"
xmlns:fn="http://www.w3.org/2005/xpath-functions">

<tdml:defineSchema name="ambiguousChoice" elementFormDefault="unqualified">

<xs:include schemaLocation="/org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
<dfdl:format ref="ex:GeneralFormat" lengthKind="explicit" occursCountKind="implicit"/>


<xs:element name="ambiguousChoice" dfdl:lengthKind="implicit">
<xs:complexType>
<xs:sequence>
<xs:element name="key" type="xs:string" dfdl:length="1"/>
<xs:choice dfdl:choiceDispatchKey="{key}">
<xs:element name="A" type="xs:string" dfdl:length="1" dfdl:choiceBranchKey="A" />
<xs:element name="AmbigElt" type="xs:string" dfdl:length="1" dfdl:choiceBranchKey="B" />
<xs:element name="B" type="xs:string" dfdl:length="1" dfdl:choiceBranchKey="C" />
<xs:element name="AmbigElt" type="xs:string" dfdl:length="1" dfdl:choiceBranchKey="D" />
</xs:choice>
</xs:sequence>
</xs:complexType>
</xs:element>


</tdml:defineSchema>

<tdml:parserTestCase name="choiceAmbiguousUPA" root="ambiguousChoice" model="ambiguousChoice"
description="verify UPA error in choice"
ignoreUnexpectedWarnings="false">

<tdml:document><![CDATA[B2]]></tdml:document>

<tdml:errors>
<tdml:error>Schema Definition Error</tdml:error>
<tdml:error>AmbigElt</tdml:error>
<tdml:error>Unique Particle Attribution</tdml:error>
</tdml:errors>
</tdml:parserTestCase>


</tdml:testSuite>
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,12 @@ class TestSequenceGroups {
// @Test def test_delimiterScanning_03() { runner_01.runOneTest("delimiterScanning_03") }

@Test def test_hiddenGroupIVC(): Unit = { runner_02.runOneTest("hiddenGroupIVC") }

// DAFFODIL-2736
@Test def test_multipleElemSameName() = {
runner_02.runOneTest("multipleElemSameName")
}
@Test def test_multipleElemSameNameDifferentNamespaces() = {
runner_02.runOneTest("multipleElemSameNameDifferentNamespaces")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@ object TestChoice2 {
val runner = Runner(testDir, "choice1765.tdml")
val runner1773 = Runner(testDir, "choice1773.tdml")
val runner2162 = Runner(testDir, "choice2162.tdml")
val runner2736 = Runner(testDir, "choice2736.tdml")

@AfterClass def shutDown(): Unit = {
runner.reset
runner1773.reset
runner2162.reset
runner2736.reset
}
}

Expand Down Expand Up @@ -63,4 +65,8 @@ class TestChoice2 {
runner2162.runOneTest("choiceArrayDirectDispatch1")
}

// DAFFODIL-2736
@Test def test_choiceAmbiguousUPA() = {
runner2736.runOneTest("choiceAmbiguousUPA")
}
}

0 comments on commit 7c4a219

Please sign in to comment.