405 lines
12 KiB
Go
405 lines
12 KiB
Go
package processor
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"modify/processor/xpath"
|
|
"strings"
|
|
|
|
"github.com/antchfx/xmlquery"
|
|
lua "github.com/yuin/gopher-lua"
|
|
)
|
|
|
|
// XMLProcessor implements the Processor interface for XML documents
|
|
type XMLProcessor struct{}
|
|
|
|
// ProcessContent implements the Processor interface for XMLProcessor
|
|
func (p *XMLProcessor) ProcessContent(content string, path string, luaExpr string) (string, int, int, error) {
|
|
// Parse XML document
|
|
// We can't really use encoding/xml here because it requires a pre defined struct
|
|
// And we HAVE TO parse dynamic unknown XML
|
|
doc, err := xmlquery.Parse(strings.NewReader(content))
|
|
if err != nil {
|
|
return content, 0, 0, fmt.Errorf("error parsing XML: %v", err)
|
|
}
|
|
|
|
// Find nodes matching the XPath pattern
|
|
nodes, err := xpath.Get(doc, path)
|
|
if err != nil {
|
|
return content, 0, 0, fmt.Errorf("error executing XPath: %v", err)
|
|
}
|
|
|
|
matchCount := len(nodes)
|
|
if matchCount == 0 {
|
|
return content, 0, 0, nil
|
|
}
|
|
|
|
// Apply modifications to each node
|
|
modCount := 0
|
|
for _, node := range nodes {
|
|
L, err := NewLuaState()
|
|
if err != nil {
|
|
return content, 0, 0, fmt.Errorf("error creating Lua state: %v", err)
|
|
}
|
|
defer L.Close()
|
|
|
|
table, err := p.ToLua(L, node)
|
|
if err != nil {
|
|
return content, modCount, matchCount, fmt.Errorf("error converting to Lua: %v", err)
|
|
}
|
|
L.SetGlobal("v", table)
|
|
|
|
err = L.DoString(BuildLuaScript(luaExpr))
|
|
if err != nil {
|
|
return content, modCount, matchCount, fmt.Errorf("error executing Lua: %v", err)
|
|
}
|
|
|
|
result, err := p.FromLua(L)
|
|
if err != nil {
|
|
return content, modCount, matchCount, fmt.Errorf("error getting result from Lua: %v", err)
|
|
}
|
|
log.Printf("%#v", result)
|
|
|
|
modified := false
|
|
modified = L.GetGlobal("modified").String() == "true"
|
|
if !modified {
|
|
log.Printf("No changes made to node at path: %s", node.Data)
|
|
continue
|
|
}
|
|
|
|
// Apply modification based on the result
|
|
if updatedValue, ok := result.(string); ok {
|
|
// If the result is a simple string, update the node value directly
|
|
xpath.Set(doc, path, updatedValue)
|
|
} else if nodeData, ok := result.(map[string]interface{}); ok {
|
|
// If the result is a map, apply more complex updates
|
|
updateNodeFromMap(node, nodeData)
|
|
}
|
|
|
|
modCount++
|
|
}
|
|
|
|
// Serialize the modified XML document to string
|
|
if doc.FirstChild != nil && doc.FirstChild.Type == xmlquery.DeclarationNode {
|
|
// If we have an XML declaration, start with it
|
|
declaration := doc.FirstChild.OutputXML(true)
|
|
// Remove the firstChild (declaration) before serializing the rest of the document
|
|
doc.FirstChild = doc.FirstChild.NextSibling
|
|
return ConvertToNamedEntities(declaration + doc.OutputXML(true)), modCount, matchCount, nil
|
|
}
|
|
|
|
// Convert numeric entities to named entities for better readability
|
|
return ConvertToNamedEntities(doc.OutputXML(true)), modCount, matchCount, nil
|
|
}
|
|
|
|
// ToLua converts XML node values to Lua variables
|
|
func (p *XMLProcessor) ToLua(L *lua.LState, data interface{}) (lua.LValue, error) {
|
|
// Check if data is an xmlquery.Node
|
|
node, ok := data.(*xmlquery.Node)
|
|
if !ok {
|
|
return nil, fmt.Errorf("expected xmlquery.Node, got %T", data)
|
|
}
|
|
|
|
// Create a simple table with essential data
|
|
table := L.NewTable()
|
|
|
|
// For element nodes, just provide basic info
|
|
L.SetField(table, "type", lua.LString(nodeTypeToString(node.Type)))
|
|
L.SetField(table, "name", lua.LString(node.Data))
|
|
L.SetField(table, "value", lua.LString(node.InnerText()))
|
|
|
|
// Add children if any
|
|
children := L.NewTable()
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
childTable, err := p.ToLua(L, child)
|
|
if err == nil {
|
|
children.Append(childTable)
|
|
}
|
|
}
|
|
L.SetField(table, "children", children)
|
|
|
|
attrs := L.NewTable()
|
|
if len(node.Attr) > 0 {
|
|
for _, attr := range node.Attr {
|
|
L.SetField(attrs, attr.Name.Local, lua.LString(attr.Value))
|
|
}
|
|
}
|
|
L.SetField(table, "attr", attrs)
|
|
|
|
return table, nil
|
|
}
|
|
|
|
// FromLua gets modified values from Lua
|
|
func (p *XMLProcessor) FromLua(L *lua.LState) (interface{}, error) {
|
|
luaValue := L.GetGlobal("v")
|
|
|
|
// Handle string values directly
|
|
if luaValue.Type() == lua.LTString {
|
|
return luaValue.String(), nil
|
|
}
|
|
|
|
// Handle tables (for attributes and more complex updates)
|
|
if luaValue.Type() == lua.LTTable {
|
|
return luaTableToMap(L, luaValue.(*lua.LTable)), nil
|
|
}
|
|
|
|
return luaValue.String(), nil
|
|
}
|
|
|
|
// Simple helper to convert a Lua table to a Go map
|
|
func luaTableToMap(L *lua.LState, table *lua.LTable) map[string]interface{} {
|
|
result := make(map[string]interface{})
|
|
|
|
table.ForEach(func(k, v lua.LValue) {
|
|
if k.Type() == lua.LTString {
|
|
key := k.String()
|
|
|
|
if v.Type() == lua.LTTable {
|
|
result[key] = luaTableToMap(L, v.(*lua.LTable))
|
|
} else {
|
|
result[key] = v.String()
|
|
}
|
|
}
|
|
})
|
|
|
|
return result
|
|
}
|
|
|
|
// Simple helper to convert node type to string
|
|
func nodeTypeToString(nodeType xmlquery.NodeType) string {
|
|
switch nodeType {
|
|
case xmlquery.ElementNode:
|
|
return "element"
|
|
case xmlquery.TextNode:
|
|
return "text"
|
|
case xmlquery.AttributeNode:
|
|
return "attribute"
|
|
default:
|
|
return "other"
|
|
}
|
|
}
|
|
|
|
// Helper function to update an XML node from a map
|
|
func updateNodeFromMap(node *xmlquery.Node, data map[string]interface{}) {
|
|
// Update node value if present
|
|
if value, ok := data["value"]; ok {
|
|
if strValue, ok := value.(string); ok {
|
|
// For element nodes, replace text content
|
|
if node.Type == xmlquery.ElementNode {
|
|
// Find the first text child if it exists
|
|
var textNode *xmlquery.Node
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
if child.Type == xmlquery.TextNode {
|
|
textNode = child
|
|
break
|
|
}
|
|
}
|
|
|
|
if textNode != nil {
|
|
// Update existing text node
|
|
textNode.Data = strValue
|
|
} else {
|
|
// Create new text node
|
|
newText := &xmlquery.Node{
|
|
Type: xmlquery.TextNode,
|
|
Data: strValue,
|
|
Parent: node,
|
|
}
|
|
|
|
// Insert at beginning of children
|
|
if node.FirstChild != nil {
|
|
newText.NextSibling = node.FirstChild
|
|
node.FirstChild.PrevSibling = newText
|
|
node.FirstChild = newText
|
|
} else {
|
|
node.FirstChild = newText
|
|
node.LastChild = newText
|
|
}
|
|
}
|
|
} else if node.Type == xmlquery.TextNode {
|
|
// Directly update text node
|
|
node.Data = strValue
|
|
} else if node.Type == xmlquery.AttributeNode {
|
|
// Update attribute value
|
|
if node.Parent != nil {
|
|
for i, attr := range node.Parent.Attr {
|
|
if attr.Name.Local == node.Data {
|
|
node.Parent.Attr[i].Value = strValue
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Update attributes if present
|
|
if attrs, ok := data["attr"].(map[string]interface{}); ok && node.Type == xmlquery.ElementNode {
|
|
for name, value := range attrs {
|
|
if strValue, ok := value.(string); ok {
|
|
// Look for existing attribute
|
|
found := false
|
|
for i, attr := range node.Attr {
|
|
if attr.Name.Local == name {
|
|
node.Attr[i].Value = strValue
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
|
|
// Add new attribute if not found
|
|
if !found {
|
|
node.Attr = append(node.Attr, xmlquery.Attr{
|
|
Name: struct {
|
|
Space, Local string
|
|
}{Local: name},
|
|
Value: strValue,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Helper function to get a string representation of node type
|
|
func nodeTypeName(nodeType xmlquery.NodeType) string {
|
|
switch nodeType {
|
|
case xmlquery.ElementNode:
|
|
return "element"
|
|
case xmlquery.TextNode:
|
|
return "text"
|
|
case xmlquery.AttributeNode:
|
|
return "attribute"
|
|
case xmlquery.CommentNode:
|
|
return "comment"
|
|
case xmlquery.DeclarationNode:
|
|
return "declaration"
|
|
default:
|
|
return "unknown"
|
|
}
|
|
}
|
|
|
|
// ConvertToNamedEntities replaces numeric XML entities with their named counterparts
|
|
func ConvertToNamedEntities(xml string) string {
|
|
// Basic XML entities
|
|
replacements := map[string]string{
|
|
// Basic XML entities
|
|
""": """, // double quote
|
|
"'": "'", // single quote
|
|
"<": "<", // less than
|
|
">": ">", // greater than
|
|
"&": "&", // ampersand
|
|
|
|
// Common symbols
|
|
" ": " ", // non-breaking space
|
|
"©": "©", // copyright
|
|
"®": "®", // registered trademark
|
|
"€": "€", // euro
|
|
"£": "£", // pound
|
|
"¥": "¥", // yen
|
|
"¢": "¢", // cent
|
|
"§": "§", // section
|
|
"™": "™", // trademark
|
|
"♠": "♠", // spade
|
|
"♣": "♣", // club
|
|
"♥": "♥", // heart
|
|
"♦": "♦", // diamond
|
|
|
|
// Special characters
|
|
"¡": "¡", // inverted exclamation
|
|
"¿": "¿", // inverted question
|
|
"«": "«", // left angle quotes
|
|
"»": "»", // right angle quotes
|
|
"·": "·", // middle dot
|
|
"•": "•", // bullet
|
|
"…": "…", // horizontal ellipsis
|
|
"′": "′", // prime
|
|
"″": "″", // double prime
|
|
"‾": "‾", // overline
|
|
"⁄": "⁄", // fraction slash
|
|
|
|
// Math symbols
|
|
"±": "±", // plus-minus
|
|
"×": "×", // multiplication
|
|
"÷": "÷", // division
|
|
"∞": "∞", // infinity
|
|
"≈": "≈", // almost equal
|
|
"≠": "≠", // not equal
|
|
"≤": "≤", // less than or equal
|
|
"≥": "≥", // greater than or equal
|
|
"∑": "∑", // summation
|
|
"√": "√", // square root
|
|
"∫": "∫", // integral
|
|
|
|
// Accented characters
|
|
"À": "À", // A grave
|
|
"Á": "Á", // A acute
|
|
"Â": "Â", // A circumflex
|
|
"Ã": "Ã", // A tilde
|
|
"Ä": "Ä", // A umlaut
|
|
"Å": "Å", // A ring
|
|
"Æ": "Æ", // AE ligature
|
|
"Ç": "Ç", // C cedilla
|
|
"È": "È", // E grave
|
|
"É": "É", // E acute
|
|
"Ê": "Ê", // E circumflex
|
|
"Ë": "Ë", // E umlaut
|
|
"Ì": "Ì", // I grave
|
|
"Í": "Í", // I acute
|
|
"Î": "Î", // I circumflex
|
|
"Ï": "Ï", // I umlaut
|
|
"Ð": "Ð", // Eth
|
|
"Ñ": "Ñ", // N tilde
|
|
"Ò": "Ò", // O grave
|
|
"Ó": "Ó", // O acute
|
|
"Ô": "Ô", // O circumflex
|
|
"Õ": "Õ", // O tilde
|
|
"Ö": "Ö", // O umlaut
|
|
"Ø": "Ø", // O slash
|
|
"Ù": "Ù", // U grave
|
|
"Ú": "Ú", // U acute
|
|
"Û": "Û", // U circumflex
|
|
"Ü": "Ü", // U umlaut
|
|
"Ý": "Ý", // Y acute
|
|
"Þ": "Þ", // Thorn
|
|
"ß": "ß", // Sharp s
|
|
"à": "à", // a grave
|
|
"á": "á", // a acute
|
|
"â": "â", // a circumflex
|
|
"ã": "ã", // a tilde
|
|
"ä": "ä", // a umlaut
|
|
"å": "å", // a ring
|
|
"æ": "æ", // ae ligature
|
|
"ç": "ç", // c cedilla
|
|
"è": "è", // e grave
|
|
"é": "é", // e acute
|
|
"ê": "ê", // e circumflex
|
|
"ë": "ë", // e umlaut
|
|
"ì": "ì", // i grave
|
|
"í": "í", // i acute
|
|
"î": "î", // i circumflex
|
|
"ï": "ï", // i umlaut
|
|
"ð": "ð", // eth
|
|
"ñ": "ñ", // n tilde
|
|
"ò": "ò", // o grave
|
|
"ó": "ó", // o acute
|
|
"ô": "ô", // o circumflex
|
|
"õ": "õ", // o tilde
|
|
"ö": "ö", // o umlaut
|
|
"ø": "ø", // o slash
|
|
"ù": "ù", // u grave
|
|
"ú": "ú", // u acute
|
|
"û": "û", // u circumflex
|
|
"ü": "ü", // u umlaut
|
|
"ý": "ý", // y acute
|
|
"þ": "þ", // thorn
|
|
"ÿ": "ÿ", // y umlaut
|
|
}
|
|
|
|
result := xml
|
|
for numeric, named := range replacements {
|
|
result = strings.ReplaceAll(result, numeric, named)
|
|
}
|
|
return result
|
|
}
|