diff --git a/processor/xml.go b/processor/xml.go index 2ecefa3..2ac4bed 100644 --- a/processor/xml.go +++ b/processor/xml.go @@ -84,10 +84,11 @@ func (p *XMLProcessor) ProcessContent(content string, path string, luaExpr strin declaration := doc.FirstChild.OutputXML(true) // Remove the firstChild (declaration) before serializing the rest of the document doc.FirstChild = doc.FirstChild.NextSibling - return declaration + doc.OutputXML(true), modCount, matchCount, nil + return ConvertToNamedEntities(declaration + doc.OutputXML(true)), modCount, matchCount, nil } - return doc.OutputXML(true), modCount, matchCount, nil + // Convert numeric entities to named entities for better readability + return ConvertToNamedEntities(doc.OutputXML(true)), modCount, matchCount, nil } // ToLua converts XML node values to Lua variables @@ -268,3 +269,127 @@ func nodeTypeName(nodeType xmlquery.NodeType) string { return "unknown" } } + +// ConvertToNamedEntities replaces numeric XML entities with their named counterparts +func ConvertToNamedEntities(xml string) string { + // Basic XML entities + replacements := map[string]string{ + // Basic XML entities + """: """, // double quote + "'": "'", // single quote + "<": "<", // less than + ">": ">", // greater than + "&": "&", // ampersand + + // Common symbols + " ": " ", // non-breaking space + "©": "©", // copyright + "®": "®", // registered trademark + "€": "€", // euro + "£": "£", // pound + "¥": "¥", // yen + "¢": "¢", // cent + "§": "§", // section + "™": "™", // trademark + "♠": "♠", // spade + "♣": "♣", // club + "♥": "♥", // heart + "♦": "♦", // diamond + + // Special characters + "¡": "¡", // inverted exclamation + "¿": "¿", // inverted question + "«": "«", // left angle quotes + "»": "»", // right angle quotes + "·": "·", // middle dot + "•": "•", // bullet + "…": "…", // horizontal ellipsis + "′": "′", // prime + "″": "″", // double prime + "‾": "‾", // overline + "⁄": "⁄", // fraction slash + + // Math symbols + "±": "±", // plus-minus + "×": "×", // multiplication + "÷": "÷", // division + "∞": "∞", // infinity + "≈": "≈", // almost equal + "≠": "≠", // not equal + "≤": "≤", // less than or equal + "≥": "≥", // greater than or equal + "∑": "∑", // summation + "√": "√", // square root + "∫": "∫", // integral + + // Accented characters + "À": "À", // A grave + "Á": "Á", // A acute + "Â": "Â", // A circumflex + "Ã": "Ã", // A tilde + "Ä": "Ä", // A umlaut + "Å": "Å", // A ring + "Æ": "Æ", // AE ligature + "Ç": "Ç", // C cedilla + "È": "È", // E grave + "É": "É", // E acute + "Ê": "Ê", // E circumflex + "Ë": "Ë", // E umlaut + "Ì": "Ì", // I grave + "Í": "Í", // I acute + "Î": "Î", // I circumflex + "Ï": "Ï", // I umlaut + "Ð": "Ð", // Eth + "Ñ": "Ñ", // N tilde + "Ò": "Ò", // O grave + "Ó": "Ó", // O acute + "Ô": "Ô", // O circumflex + "Õ": "Õ", // O tilde + "Ö": "Ö", // O umlaut + "Ø": "Ø", // O slash + "Ù": "Ù", // U grave + "Ú": "Ú", // U acute + "Û": "Û", // U circumflex + "Ü": "Ü", // U umlaut + "Ý": "Ý", // Y acute + "Þ": "Þ", // Thorn + "ß": "ß", // Sharp s + "à": "à", // a grave + "á": "á", // a acute + "â": "â", // a circumflex + "ã": "ã", // a tilde + "ä": "ä", // a umlaut + "å": "å", // a ring + "æ": "æ", // ae ligature + "ç": "ç", // c cedilla + "è": "è", // e grave + "é": "é", // e acute + "ê": "ê", // e circumflex + "ë": "ë", // e umlaut + "ì": "ì", // i grave + "í": "í", // i acute + "î": "î", // i circumflex + "ï": "ï", // i umlaut + "ð": "ð", // eth + "ñ": "ñ", // n tilde + "ò": "ò", // o grave + "ó": "ó", // o acute + "ô": "ô", // o circumflex + "õ": "õ", // o tilde + "ö": "ö", // o umlaut + "ø": "ø", // o slash + "ù": "ù", // u grave + "ú": "ú", // u acute + "û": "û", // u circumflex + "ü": "ü", // u umlaut + "ý": "ý", // y acute + "þ": "þ", // thorn + "ÿ": "ÿ", // y umlaut + } + + result := xml + for numeric, named := range replacements { + result = strings.ReplaceAll(result, numeric, named) + } + return result +} diff --git a/processor/xml_test.go b/processor/xml_test.go index 65ef8a8..dddaaf6 100644 --- a/processor/xml_test.go +++ b/processor/xml_test.go @@ -17,12 +17,7 @@ func normalizeXMLWhitespace(s string) string { s = re.ReplaceAllString(strings.TrimSpace(s), " ") // Normalize XML entities for comparison - s = strings.ReplaceAll(s, "'", "'") - s = strings.ReplaceAll(s, """, """) - s = strings.ReplaceAll(s, """, "\"") - s = strings.ReplaceAll(s, "<", "<") - s = strings.ReplaceAll(s, ">", ">") - s = strings.ReplaceAll(s, "&", "&") + s = ConvertToNamedEntities(s) return s } @@ -52,7 +47,7 @@ func TestXMLProcessor_Process_NodeValues(t *testing.T) { Gambardella, Matthew - XML Developer's Guide + XML Developer's Guide Computer 89.9 2000-10-01 @@ -1015,9 +1010,9 @@ func TestXMLProcessor_Process_ElementReordering(t *testing.T) { luaExpr := ` -- With table approach, we can reorder elements by redefining the table -- Store the values - local artist = v.artist - local title = v.title - local year = v.year + local artist = v.attr.artist + local title = v.attr.title + local year = v.attr.year -- Clear the table for k in pairs(v) do @@ -1025,9 +1020,9 @@ func TestXMLProcessor_Process_ElementReordering(t *testing.T) { end -- Add elements in the desired order - v.title = title - v.artist = artist - v.year = year + v.attr.title = title + v.attr.artist = artist + v.attr.year = year ` result, modCount, matchCount, err := p.ProcessContent(content, "//song", luaExpr) @@ -1178,13 +1173,13 @@ func TestXMLProcessor_Process_DynamicXPath(t *testing.T) { expected := ` - - - + + + - - + + ` @@ -1192,7 +1187,7 @@ func TestXMLProcessor_Process_DynamicXPath(t *testing.T) { p := &XMLProcessor{} // Double all timeout values in the configuration - result, modCount, matchCount, err := p.ProcessContent(content, "//setting[@name='timeout']/@value", "v = v * 2") + result, modCount, matchCount, err := p.ProcessContent(content, "//setting[@name='timeout']/@value", "v.value = v.value * 2") if err != nil { t.Fatalf("Error processing content: %v", err)