-
Notifications
You must be signed in to change notification settings - Fork 74
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix Bug with valueLength being overwritten after Trim #1338
base: main
Are you sure you want to change the base?
Changes from all commits
15ba745
bc68935
78050c8
ccdc433
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,7 +72,9 @@ trait ElementBaseGrammarMixin | |
} | ||
} | ||
|
||
protected lazy val isDelimitedPrefixedPattern = { | ||
lazy val isPrefixed: Boolean = lengthKind == LengthKind.Prefixed | ||
|
||
protected lazy val isDelimitedPrefixedPattern: Boolean = { | ||
import LengthKind._ | ||
lengthKind match { | ||
case Delimited => | ||
|
@@ -474,12 +476,16 @@ trait ElementBaseGrammarMixin | |
lazy val leftPadding = leftPaddingArg | ||
lazy val rightPadFill = rightPadFillArg | ||
lazy val body = bodyArg | ||
CaptureContentLengthStart(this) ~ | ||
leftPadding ~ | ||
CaptureValueLengthStart(this) ~ | ||
body ~ | ||
CaptureValueLengthEnd(this) ~ | ||
rightPadFill ~ | ||
specifiedLength( | ||
CaptureContentLengthStart(this) ~ | ||
leftPadding ~ | ||
CaptureValueLengthStart(this) ~ | ||
body ~ | ||
CaptureValueLengthEnd(this) ~ | ||
rightPadFill | ||
) ~ | ||
// CaptureContentLengthEnd must be outside the specified length so it can | ||
// do any skipping of bits it needs to before capturing the end of content length | ||
CaptureContentLengthEnd(this) | ||
} | ||
|
||
|
@@ -623,14 +629,14 @@ trait ElementBaseGrammarMixin | |
|
||
private lazy val stringPrim = { | ||
lengthKind match { | ||
case LengthKind.Explicit => specifiedLength(StringOfSpecifiedLength(this)) | ||
case LengthKind.Prefixed => specifiedLength(StringOfSpecifiedLength(this)) | ||
case LengthKind.Explicit => StringOfSpecifiedLength(this) | ||
case LengthKind.Prefixed => StringOfSpecifiedLength(this) | ||
case LengthKind.Delimited => stringDelimitedEndOfData | ||
case LengthKind.Pattern => specifiedLength(StringOfSpecifiedLength(this)) | ||
case LengthKind.Pattern => StringOfSpecifiedLength(this) | ||
case LengthKind.Implicit => { | ||
val pt = this.simpleType.primType | ||
Assert.invariant(pt == PrimType.String) | ||
specifiedLength(StringOfSpecifiedLength(this)) | ||
StringOfSpecifiedLength(this) | ||
} | ||
case LengthKind.EndOfParent if isComplexType => | ||
notYetImplemented("lengthKind='endOfParent' for complex type") | ||
|
@@ -645,11 +651,11 @@ trait ElementBaseGrammarMixin | |
} | ||
|
||
private lazy val hexBinaryLengthPattern = prod("hexBinaryLengthPattern") { | ||
new SpecifiedLengthPattern(this, new HexBinaryEndOfBitLimit(this)) | ||
new HexBinaryEndOfBitLimit(this) | ||
} | ||
|
||
private lazy val hexBinaryLengthPrefixed = prod("hexBinaryLengthPrefixed") { | ||
new HexBinaryLengthPrefixed(this) | ||
new HexBinaryEndOfBitLimit(this) | ||
} | ||
|
||
private lazy val hexBinaryValue = prod("hexBinaryValue") { | ||
|
@@ -1226,7 +1232,7 @@ trait ElementBaseGrammarMixin | |
} | ||
|
||
private lazy val nilLitSimple = prod("nilLitSimple", isSimpleType) { | ||
captureLengthRegions(leftPadding, specifiedLength(nilLitContent), rightPadding ~ rightFill) | ||
captureLengthRegions(leftPadding, nilLitContent, rightPadding ~ rightFill) | ||
} | ||
|
||
private lazy val nilLitComplex = prod("nilLitComplex", isComplexType) { | ||
|
@@ -1329,7 +1335,10 @@ trait ElementBaseGrammarMixin | |
* as well, by not enclosing the body in a specified length enforcer. | ||
*/ | ||
private def specifiedLength(bodyArg: => Gram) = { | ||
lazy val body = bodyArg | ||
// we need this to evaluate before we wrap in specified length parser, | ||
// so it can do any internal checks for example blobValue's check for | ||
// non-explicit lengthKind | ||
val body = bodyArg | ||
lazy val bitsMultiplier = lengthUnits match { | ||
case LengthUnits.Bits => 1 | ||
case LengthUnits.Bytes => 8 | ||
|
@@ -1341,7 +1350,13 @@ trait ElementBaseGrammarMixin | |
case LengthKind.Delimited => body | ||
case LengthKind.Pattern => new SpecifiedLengthPattern(this, body) | ||
case LengthKind.Explicit if bitsMultiplier != 0 => | ||
new SpecifiedLengthExplicit(this, body, bitsMultiplier) | ||
if (isSimpleType && primType == PrimType.HexBinary) { | ||
// hexBinary has some checks that need to be done that SpecifiedLengthExplicit | ||
// gets in the way of | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think instead of this comment, we can say HexBinary has it's own HexBinarySpecifiedLength parser that handles calculating the length, so we do not need the SpecifiedLengthExplicit parser? In fact, do we need to exclude a number of other primitive types that do their own explicit length handling? Looking at the current code base, I think maybe only simple types that are strings and complex types use the SpecifiedLengthExplicit parser? I think all other primitives implement their own specified length handling? So maybe this wants to be if (isComplexType || primType == PrimType.String) {
SpecifiedLengthExplicit(...)
} else {
// non-string simple types have their own custom parsers/unparsers for handling explicit lengths
body
} In fact, I wonder if we eventually want to refactor all of this to completely get rid of all the custom explicit/implicit length parsers? We just have various SpecifiedLength parser that sets a bit limit (based on a pattern, a prefix length, evaluaating a length expression etc) and then we just have a single parser that just reads all bit up until that current bit limit. Separation of concerns kind of thing. It would get rid of this condiation and all these BinaryIntegerKnownLength/RuntimeLength/PrefixLength/etc parsers. There's just a single BinaryNumberParser, and it just gets the length from the bitLimit. Maybe that generality would take performance hit? I'm also not exactly sure how that would work with unparsing--the SpecifiedLengthUnparser would need to somehow pass the calculated length to the child unparser, I guess it could still use bitLimit since that is a thing in UState? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mbeckerle , any thoughts on refactoring the code to have various specified length parser as described above, and any idea on how that would work with unparsing? |
||
body | ||
} else { | ||
new SpecifiedLengthExplicit(this, body, bitsMultiplier) | ||
} | ||
case LengthKind.Explicit => { | ||
Assert.invariant(!knownEncodingIsFixedWidth) | ||
Assert.invariant(lengthUnits eq LengthUnits.Characters) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Below this we have cases for implicit lengths. Do we need to do anything special for non-string simple types? I think those primitives have custom parsers that handle the implict length logic and don't need a SpecifiedLengthImplicit gramar? My concern is we could be adding that grammar and it would do something like set a bit limit, but the child paser that actually parsrers a the thing would just use it's own calculate and wouldn't need the bit limit, so we are just wasting effort. |
||
|
@@ -1366,14 +1381,15 @@ trait ElementBaseGrammarMixin | |
} | ||
case LengthKind.Implicit if isSimpleType && primType == PrimType.String => | ||
new SpecifiedLengthImplicitCharacters(this, body, this.maxLength.longValue) | ||
|
||
case LengthKind.Implicit if isSimpleType && primType == PrimType.HexBinary => | ||
new SpecifiedLengthImplicit(this, body, this.maxLength.longValue * bitsMultiplier) | ||
case LengthKind.Implicit | ||
if isSimpleType && impliedRepresentation == Representation.Binary => | ||
if isSimpleType && | ||
impliedRepresentation == Representation.Binary && | ||
primType != PrimType.HexBinary => | ||
new SpecifiedLengthImplicit(this, body, implicitBinaryLengthInBits) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This condition feels like it doesn't exclude enough things. For example, this matches all binary types except hex binary. But, for example, integers with length kind implicit use something like I'm wonder if it's just string types that really make use of specified length stuff? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So It look like nillable binary types need SpecifiedLengthImplicit., they are the only tests failing when I comment out that chunk of code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that means we only need the SpecifiedLengthImplicit when we are trying to parse the nillable part of a number. For example, the way things currently work is we have something like a I wonder if captureLengthRegion needs a new paramater (e.g. |
||
case LengthKind.Implicit if isComplexType => | ||
case LengthKind.Implicit => | ||
body // for complex types, implicit means "roll up from the bottom" | ||
// for simple types, the primitives have custom parsers that handle implicit length logic | ||
// and don't use the limit provided by the SpecifiedLengthImplicit parser | ||
olabusayoT marked this conversation as resolved.
Show resolved
Hide resolved
|
||
case LengthKind.EndOfParent if isComplexType => | ||
notYetImplemented("lengthKind='endOfParent' for complex type") | ||
case LengthKind.EndOfParent => | ||
|
@@ -1403,7 +1419,7 @@ trait ElementBaseGrammarMixin | |
private lazy val sharedComplexContentRegion: Gram = | ||
schemaSet.sharedComplexContentFactory.getShared( | ||
shareKey, | ||
captureLengthRegions(EmptyGram, specifiedLength(complexContent), elementUnused) ~ | ||
captureLengthRegions(EmptyGram, complexContent, elementUnused) ~ | ||
terminatorRegion | ||
) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, maybe my suggestion was wrong and the primitive still wants to be something like HexBInaryLengthPrefixed so that the grammar is obvious and things like code generators/etc can use the obvious grammar names? The parsers generated don't necessarily have to match the names.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm...do you mean the suggestion to remove HexBinaryLengthPrefixed or the suggestion to rename all PrefixedLength parsers/unparser to BitLimitLength/MinimumLength?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry that wasn't clear. I'm suggesting that we should keep the old grammar name
HexBinaryLengthPrefixed
so this change isn't needed. This way the grammar is a more clear for things like the code generator that examine it. But it's still fine to call the parsersBitLimitLength/MinimumLength
etc from that grammar.