package processor import ( "fmt" "log" "modify/processor/xpath" "strings" "github.com/antchfx/xmlquery" lua "github.com/yuin/gopher-lua" ) // XMLProcessor implements the Processor interface for XML documents type XMLProcessor struct{} // ProcessContent implements the Processor interface for XMLProcessor func (p *XMLProcessor) ProcessContent(content string, path string, luaExpr string) (string, int, int, error) { // Parse XML document // We can't really use encoding/xml here because it requires a pre defined struct // And we HAVE TO parse dynamic unknown XML doc, err := xmlquery.Parse(strings.NewReader(content)) if err != nil { return content, 0, 0, fmt.Errorf("error parsing XML: %v", err) } // Find nodes matching the XPath pattern nodes, err := xpath.Get(doc, path) if err != nil { return content, 0, 0, fmt.Errorf("error executing XPath: %v", err) } matchCount := len(nodes) if matchCount == 0 { return content, 0, 0, nil } // Apply modifications to each node modCount := 0 for _, node := range nodes { L, err := NewLuaState() if err != nil { return content, 0, 0, fmt.Errorf("error creating Lua state: %v", err) } defer L.Close() err = p.ToLua(L, node) if err != nil { return content, modCount, matchCount, fmt.Errorf("error converting to Lua: %v", err) } err = L.DoString(BuildLuaScript(luaExpr)) if err != nil { return content, modCount, matchCount, fmt.Errorf("error executing Lua: %v", err) } result, err := p.FromLua(L) if err != nil { return content, modCount, matchCount, fmt.Errorf("error getting result from Lua: %v", err) } log.Printf("%#v", result) modified := false modified = L.GetGlobal("modified").String() == "true" if !modified { log.Printf("No changes made to node at path: %s", node.Data) continue } // Apply modification based on the result if updatedValue, ok := result.(string); ok { // If the result is a simple string, update the node value directly xpath.Set(doc, path, updatedValue) } else if nodeData, ok := result.(map[string]interface{}); ok { // If the result is a map, apply more complex updates updateNodeFromMap(node, nodeData) } modCount++ } // Serialize the modified XML document to string if doc.FirstChild != nil && doc.FirstChild.Type == xmlquery.DeclarationNode { // If we have an XML declaration, start with it declaration := doc.FirstChild.OutputXML(true) // Remove the firstChild (declaration) before serializing the rest of the document doc.FirstChild = doc.FirstChild.NextSibling return ConvertToNamedEntities(declaration + doc.OutputXML(true)), modCount, matchCount, nil } // Convert numeric entities to named entities for better readability return ConvertToNamedEntities(doc.OutputXML(true)), modCount, matchCount, nil } // ToLua converts XML node values to Lua variables func (p *XMLProcessor) ToLua(L *lua.LState, data interface{}) error { // Check if data is an xmlquery.Node node, ok := data.(*xmlquery.Node) if !ok { return fmt.Errorf("expected xmlquery.Node, got %T", data) } // Create a simple table with essential data table := L.NewTable() // For element nodes, just provide basic info L.SetField(table, "type", lua.LString(nodeTypeToString(node.Type))) L.SetField(table, "name", lua.LString(node.Data)) L.SetField(table, "value", lua.LString(node.InnerText())) // Add attributes if any if len(node.Attr) > 0 { attrs := L.NewTable() for _, attr := range node.Attr { L.SetField(attrs, attr.Name.Local, lua.LString(attr.Value)) } L.SetField(table, "attr", attrs) } L.SetGlobal("v", table) return nil } // FromLua gets modified values from Lua func (p *XMLProcessor) FromLua(L *lua.LState) (interface{}, error) { luaValue := L.GetGlobal("v") // Handle string values directly if luaValue.Type() == lua.LTString { return luaValue.String(), nil } // Handle tables (for attributes and more complex updates) if luaValue.Type() == lua.LTTable { return luaTableToMap(L, luaValue.(*lua.LTable)), nil } return luaValue.String(), nil } // Simple helper to convert a Lua table to a Go map func luaTableToMap(L *lua.LState, table *lua.LTable) map[string]interface{} { result := make(map[string]interface{}) table.ForEach(func(k, v lua.LValue) { if k.Type() == lua.LTString { key := k.String() if v.Type() == lua.LTTable { result[key] = luaTableToMap(L, v.(*lua.LTable)) } else { result[key] = v.String() } } }) return result } // Simple helper to convert node type to string func nodeTypeToString(nodeType xmlquery.NodeType) string { switch nodeType { case xmlquery.ElementNode: return "element" case xmlquery.TextNode: return "text" case xmlquery.AttributeNode: return "attribute" default: return "other" } } // Helper function to update an XML node from a map func updateNodeFromMap(node *xmlquery.Node, data map[string]interface{}) { // Update node value if present if value, ok := data["value"]; ok { if strValue, ok := value.(string); ok { // For element nodes, replace text content if node.Type == xmlquery.ElementNode { // Find the first text child if it exists var textNode *xmlquery.Node for child := node.FirstChild; child != nil; child = child.NextSibling { if child.Type == xmlquery.TextNode { textNode = child break } } if textNode != nil { // Update existing text node textNode.Data = strValue } else { // Create new text node newText := &xmlquery.Node{ Type: xmlquery.TextNode, Data: strValue, Parent: node, } // Insert at beginning of children if node.FirstChild != nil { newText.NextSibling = node.FirstChild node.FirstChild.PrevSibling = newText node.FirstChild = newText } else { node.FirstChild = newText node.LastChild = newText } } } else if node.Type == xmlquery.TextNode { // Directly update text node node.Data = strValue } else if node.Type == xmlquery.AttributeNode { // Update attribute value if node.Parent != nil { for i, attr := range node.Parent.Attr { if attr.Name.Local == node.Data { node.Parent.Attr[i].Value = strValue break } } } } } } // Update attributes if present if attrs, ok := data["attr"].(map[string]interface{}); ok && node.Type == xmlquery.ElementNode { for name, value := range attrs { if strValue, ok := value.(string); ok { // Look for existing attribute found := false for i, attr := range node.Attr { if attr.Name.Local == name { node.Attr[i].Value = strValue found = true break } } // Add new attribute if not found if !found { node.Attr = append(node.Attr, xmlquery.Attr{ Name: struct { Space, Local string }{Local: name}, Value: strValue, }) } } } } } // Helper function to get a string representation of node type func nodeTypeName(nodeType xmlquery.NodeType) string { switch nodeType { case xmlquery.ElementNode: return "element" case xmlquery.TextNode: return "text" case xmlquery.AttributeNode: return "attribute" case xmlquery.CommentNode: return "comment" case xmlquery.DeclarationNode: return "declaration" default: return "unknown" } } // ConvertToNamedEntities replaces numeric XML entities with their named counterparts func ConvertToNamedEntities(xml string) string { // Basic XML entities replacements := map[string]string{ // Basic XML entities """: """, // double quote "'": "'", // single quote "<": "<", // less than ">": ">", // greater than "&": "&", // ampersand // Common symbols " ": " ", // non-breaking space "©": "©", // copyright "®": "®", // registered trademark "€": "€", // euro "£": "£", // pound "¥": "¥", // yen "¢": "¢", // cent "§": "§", // section "™": "™", // trademark "♠": "♠", // spade "♣": "♣", // club "♥": "♥", // heart "♦": "♦", // diamond // Special characters "¡": "¡", // inverted exclamation "¿": "¿", // inverted question "«": "«", // left angle quotes "»": "»", // right angle quotes "·": "·", // middle dot "•": "•", // bullet "…": "…", // horizontal ellipsis "′": "′", // prime "″": "″", // double prime "‾": "‾", // overline "⁄": "⁄", // fraction slash // Math symbols "±": "±", // plus-minus "×": "×", // multiplication "÷": "÷", // division "∞": "∞", // infinity "≈": "≈", // almost equal "≠": "≠", // not equal "≤": "≤", // less than or equal "≥": "≥", // greater than or equal "∑": "∑", // summation "√": "√", // square root "∫": "∫", // integral // Accented characters "À": "À", // A grave "Á": "Á", // A acute "Â": "Â", // A circumflex "Ã": "Ã", // A tilde "Ä": "Ä", // A umlaut "Å": "Å", // A ring "Æ": "Æ", // AE ligature "Ç": "Ç", // C cedilla "È": "È", // E grave "É": "É", // E acute "Ê": "Ê", // E circumflex "Ë": "Ë", // E umlaut "Ì": "Ì", // I grave "Í": "Í", // I acute "Î": "Î", // I circumflex "Ï": "Ï", // I umlaut "Ð": "Ð", // Eth "Ñ": "Ñ", // N tilde "Ò": "Ò", // O grave "Ó": "Ó", // O acute "Ô": "Ô", // O circumflex "Õ": "Õ", // O tilde "Ö": "Ö", // O umlaut "Ø": "Ø", // O slash "Ù": "Ù", // U grave "Ú": "Ú", // U acute "Û": "Û", // U circumflex "Ü": "Ü", // U umlaut "Ý": "Ý", // Y acute "Þ": "Þ", // Thorn "ß": "ß", // Sharp s "à": "à", // a grave "á": "á", // a acute "â": "â", // a circumflex "ã": "ã", // a tilde "ä": "ä", // a umlaut "å": "å", // a ring "æ": "æ", // ae ligature "ç": "ç", // c cedilla "è": "è", // e grave "é": "é", // e acute "ê": "ê", // e circumflex "ë": "ë", // e umlaut "ì": "ì", // i grave "í": "í", // i acute "î": "î", // i circumflex "ï": "ï", // i umlaut "ð": "ð", // eth "ñ": "ñ", // n tilde "ò": "ò", // o grave "ó": "ó", // o acute "ô": "ô", // o circumflex "õ": "õ", // o tilde "ö": "ö", // o umlaut "ø": "ø", // o slash "ù": "ù", // u grave "ú": "ú", // u acute "û": "û", // u circumflex "ü": "ü", // u umlaut "ý": "ý", // y acute "þ": "þ", // thorn "ÿ": "ÿ", // y umlaut } result := xml for numeric, named := range replacements { result = strings.ReplaceAll(result, numeric, named) } return result }