Skip to content

Commit

Permalink
add XMLEscapeCharsDecoder
Browse files Browse the repository at this point in the history
  • Loading branch information
clbanning committed Dec 14, 2020
1 parent 8ff6af5 commit 034e655
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 11 deletions.
47 changes: 43 additions & 4 deletions escapechars.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ var xmlEscapeChars bool
// XMLEscapeChars(true) forces escaping invalid characters in attribute and element values.
// NOTE: this is brute force with NO interrogation of '&' being escaped already; if it is
// then '&' will be re-escaped as '&'.
//
//
/*
The values are:
" "
Expand All @@ -22,11 +22,26 @@ var xmlEscapeChars bool
> >
& &
*/
func XMLEscapeChars(b bool) {
xmlEscapeChars = b
//
// Note: if XMLEscapeCharsDecoder(true) has been called - or the default, 'false,' value
// has been toggled to 'true' - then XMLEscapeChars(true) is ignored. If XMLEscapeChars(true)
// has already been called before XMLEscapeCharsDecoder(true), XMLEscapeChars(false) is called
// to turn escape encoding on mv.Xml, etc., to prevent double escaping ampersands, '&'.
func XMLEscapeChars(b ...bool) {
var bb bool
if len(b) == 0 {
bb = !xmlEscapeChars
} else {
bb = b[0]
}
if bb == true && xmlEscapeCharsDecoder == false {
xmlEscapeChars = true
} else {
xmlEscapeChars = false
}
}

// Scan for '&' first, since 's' may contain "&" that is parsed to "&"
// Scan for '&' first, since 's' may contain "&" that is parsed to "&"
// - or "<" that is parsed to "<".
var escapechars = [][2][]byte{
{[]byte(`&`), []byte(`&`)},
Expand All @@ -52,3 +67,27 @@ func escapeChars(s string) string {
return string(b)
}

// per issue #84, escape CharData values from xml.Decoder

var xmlEscapeCharsDecoder bool

// XMLEscapeCharsDecoder(b ...bool) escapes XML characters in xml.CharData values
// returned by Decoder.Token. Thus, the internal Map values will contain escaped
// values, and you do not need to set XMLEscapeChars for proper encoding.
//
// By default, the Map values have the non-escaped values returned by Decoder.Token.
// XMLEscapeCharsDecoder(true) - or, XMLEscapeCharsDecoder() - will toggle escape
// encoding 'on.'
//
// Note: if XMLEscapeCharDecoder(true) is call then XMLEscapeChars(false) is
// called to prevent re-escaping the values on encoding using mv.Xml, etc.
func XMLEscapeCharsDecoder(b ...bool) {
if len(b) == 0 {
xmlEscapeCharsDecoder = !xmlEscapeCharsDecoder
} else {
xmlEscapeCharsDecoder = b[0]
}
if xmlEscapeCharsDecoder == true && xmlEscapeChars == true {
xmlEscapeChars = false
}
}
6 changes: 6 additions & 0 deletions xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,9 @@ func xmlToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[stri
if lowerCase {
key = strings.ToLower(key)
}
if xmlEscapeCharsDecoder { // per issue#84
v.Value = escapeChars(v.Value)
}
na[key] = cast(v.Value, r, key)
}
}
Expand Down Expand Up @@ -478,6 +481,9 @@ func xmlToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[stri
case xml.CharData:
// clean up possible noise
tt := strings.Trim(string(t.(xml.CharData)), trimRunes)
if xmlEscapeCharsDecoder { // issue#84
tt = escapeChars(tt)
}
if len(tt) > 0 {
if len(na) > 0 || decodeSimpleValuesAsMap {
na["#text"] = cast(tt, r, "#text")
Expand Down
20 changes: 13 additions & 7 deletions xmlseq.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ var NO_ROOT = NoRoot // maintain backwards compatibility
// 1. Keys in the MapSeq value that are parsed from a <name space prefix>:<local name> tag preserve the
// "<prefix>:" notation rather than stripping it as with NewMapXml().
// 2. Attribute keys for name space prefix declarations preserve "xmlns:<prefix>" notation.
//
//
// ERRORS:
// 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
// 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
// "#directive" or #procinst" key.
func NewMapXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) {
var r bool
Expand All @@ -96,9 +96,9 @@ func NewMapXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) {
// 2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
// re-encode the message in its original structure.
// 3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
//
//
// ERRORS:
// 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
// 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment",
// "#directive" or #procinst" key.
func NewMapXmlSeqReader(xmlReader io.Reader, cast ...bool) (MapSeq, error) {
var r bool
Expand Down Expand Up @@ -131,9 +131,9 @@ func NewMapXmlSeqReader(xmlReader io.Reader, cast ...bool) (MapSeq, error) {
// 4. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to
// re-encode the message in its original structure.
// 5. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case.
//
//
// ERRORS:
// 1. If a NoRoot error, "no root key," is returned, check if the initial map key is "#comment",
// 1. If a NoRoot error, "no root key," is returned, check if the initial map key is "#comment",
// "#directive" or #procinst" key.
func NewMapXmlSeqReaderRaw(xmlReader io.Reader, cast ...bool) (MapSeq, []byte, error) {
var r bool
Expand Down Expand Up @@ -208,6 +208,9 @@ func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[s
if snakeCaseKeys {
v.Name.Local = strings.Replace(v.Name.Local, "-", "_", -1)
}
if xmlEscapeCharsDecoder { // per issue#84
v.Value = escapeChars(v.Value)
}
if len(v.Name.Space) > 0 {
aa[v.Name.Space+`:`+v.Name.Local] = map[string]interface{}{"#text": cast(v.Value, r, ""), "#seq": i}
} else {
Expand Down Expand Up @@ -339,6 +342,9 @@ func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[s
case xml.CharData:
// clean up possible noise
tt := strings.Trim(string(t.(xml.CharData)), trimRunes)
if xmlEscapeCharsDecoder { // issue#84
tt = escapeChars(tt)
}
if skey == "" {
// per Adrian (http://www.adrianlungu.com/) catch stray text
// in decoder stream -
Expand Down Expand Up @@ -834,7 +840,7 @@ func (e elemListSeq) Less(i, j int) bool {
// =============== https://groups.google.com/forum/#!topic/golang-nuts/lHPOHD-8qio

// BeautifyXml (re)formats an XML doc similar to Map.XmlIndent().
// It preserves comments, directives and process instructions,
// It preserves comments, directives and process instructions,
func BeautifyXml(b []byte, prefix, indent string) ([]byte, error) {
x, err := NewMapXmlSeq(b)
if err != nil {
Expand Down

0 comments on commit 034e655

Please sign in to comment.