diff --git a/escapechars.go b/escapechars.go index bee0442..eeb3d25 100644 --- a/escapechars.go +++ b/escapechars.go @@ -13,7 +13,7 @@ var xmlEscapeChars bool // XMLEscapeChars(true) forces escaping invalid characters in attribute and element values. // NOTE: this is brute force with NO interrogation of '&' being escaped already; if it is // then '&' will be re-escaped as '&amp;'. -// +// /* The values are: " " @@ -22,11 +22,26 @@ var xmlEscapeChars bool > > & & */ -func XMLEscapeChars(b bool) { - xmlEscapeChars = b +// +// Note: if XMLEscapeCharsDecoder(true) has been called - or the default, 'false,' value +// has been toggled to 'true' - then XMLEscapeChars(true) is ignored. If XMLEscapeChars(true) +// has already been called before XMLEscapeCharsDecoder(true), XMLEscapeChars(false) is called +// to turn escape encoding on mv.Xml, etc., to prevent double escaping ampersands, '&'. +func XMLEscapeChars(b ...bool) { + var bb bool + if len(b) == 0 { + bb = !xmlEscapeChars + } else { + bb = b[0] + } + if bb == true && xmlEscapeCharsDecoder == false { + xmlEscapeChars = true + } else { + xmlEscapeChars = false + } } -// Scan for '&' first, since 's' may contain "&" that is parsed to "&amp;" +// Scan for '&' first, since 's' may contain "&" that is parsed to "&amp;" // - or "<" that is parsed to "&lt;". var escapechars = [][2][]byte{ {[]byte(`&`), []byte(`&`)}, @@ -52,3 +67,27 @@ func escapeChars(s string) string { return string(b) } +// per issue #84, escape CharData values from xml.Decoder + +var xmlEscapeCharsDecoder bool + +// XMLEscapeCharsDecoder(b ...bool) escapes XML characters in xml.CharData values +// returned by Decoder.Token. Thus, the internal Map values will contain escaped +// values, and you do not need to set XMLEscapeChars for proper encoding. +// +// By default, the Map values have the non-escaped values returned by Decoder.Token. +// XMLEscapeCharsDecoder(true) - or, XMLEscapeCharsDecoder() - will toggle escape +// encoding 'on.' +// +// Note: if XMLEscapeCharDecoder(true) is call then XMLEscapeChars(false) is +// called to prevent re-escaping the values on encoding using mv.Xml, etc. +func XMLEscapeCharsDecoder(b ...bool) { + if len(b) == 0 { + xmlEscapeCharsDecoder = !xmlEscapeCharsDecoder + } else { + xmlEscapeCharsDecoder = b[0] + } + if xmlEscapeCharsDecoder == true && xmlEscapeChars == true { + xmlEscapeChars = false + } +} diff --git a/xml.go b/xml.go index 39700e3..a2dbbf1 100644 --- a/xml.go +++ b/xml.go @@ -372,6 +372,9 @@ func xmlToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[stri if lowerCase { key = strings.ToLower(key) } + if xmlEscapeCharsDecoder { // per issue#84 + v.Value = escapeChars(v.Value) + } na[key] = cast(v.Value, r, key) } } @@ -478,6 +481,9 @@ func xmlToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[stri case xml.CharData: // clean up possible noise tt := strings.Trim(string(t.(xml.CharData)), trimRunes) + if xmlEscapeCharsDecoder { // issue#84 + tt = escapeChars(tt) + } if len(tt) > 0 { if len(na) > 0 || decodeSimpleValuesAsMap { na["#text"] = cast(tt, r, "#text") diff --git a/xmlseq.go b/xmlseq.go index ae8e78e..559b028 100644 --- a/xmlseq.go +++ b/xmlseq.go @@ -77,9 +77,9 @@ var NO_ROOT = NoRoot // maintain backwards compatibility // 1. Keys in the MapSeq value that are parsed from a : tag preserve the // ":" notation rather than stripping it as with NewMapXml(). // 2. Attribute keys for name space prefix declarations preserve "xmlns:" notation. -// +// // ERRORS: -// 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment", +// 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment", // "#directive" or #procinst" key. func NewMapXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) { var r bool @@ -96,9 +96,9 @@ func NewMapXmlSeq(xmlVal []byte, cast ...bool) (MapSeq, error) { // 2. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to // re-encode the message in its original structure. // 3. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case. -// +// // ERRORS: -// 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment", +// 1. If a NoRoot error, "no root key," is returned, check the initial map key for a "#comment", // "#directive" or #procinst" key. func NewMapXmlSeqReader(xmlReader io.Reader, cast ...bool) (MapSeq, error) { var r bool @@ -131,9 +131,9 @@ func NewMapXmlSeqReader(xmlReader io.Reader, cast ...bool) (MapSeq, error) { // 4. CoerceKeysToLower() is NOT recognized, since the intent here is to eventually call m.XmlSeq() to // re-encode the message in its original structure. // 5. If CoerceKeysToSnakeCase() has been called, then all key values will be converted to snake case. -// +// // ERRORS: -// 1. If a NoRoot error, "no root key," is returned, check if the initial map key is "#comment", +// 1. If a NoRoot error, "no root key," is returned, check if the initial map key is "#comment", // "#directive" or #procinst" key. func NewMapXmlSeqReaderRaw(xmlReader io.Reader, cast ...bool) (MapSeq, []byte, error) { var r bool @@ -208,6 +208,9 @@ func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[s if snakeCaseKeys { v.Name.Local = strings.Replace(v.Name.Local, "-", "_", -1) } + if xmlEscapeCharsDecoder { // per issue#84 + v.Value = escapeChars(v.Value) + } if len(v.Name.Space) > 0 { aa[v.Name.Space+`:`+v.Name.Local] = map[string]interface{}{"#text": cast(v.Value, r, ""), "#seq": i} } else { @@ -339,6 +342,9 @@ func xmlSeqToMapParser(skey string, a []xml.Attr, p *xml.Decoder, r bool) (map[s case xml.CharData: // clean up possible noise tt := strings.Trim(string(t.(xml.CharData)), trimRunes) + if xmlEscapeCharsDecoder { // issue#84 + tt = escapeChars(tt) + } if skey == "" { // per Adrian (http://www.adrianlungu.com/) catch stray text // in decoder stream - @@ -834,7 +840,7 @@ func (e elemListSeq) Less(i, j int) bool { // =============== https://groups.google.com/forum/#!topic/golang-nuts/lHPOHD-8qio // BeautifyXml (re)formats an XML doc similar to Map.XmlIndent(). -// It preserves comments, directives and process instructions, +// It preserves comments, directives and process instructions, func BeautifyXml(b []byte, prefix, indent string) ([]byte, error) { x, err := NewMapXmlSeq(b) if err != nil {