Files
BigChef/processor/xml.go

396 lines
11 KiB
Go

package processor
import (
"fmt"
"log"
"modify/processor/xpath"
"strings"
"github.com/antchfx/xmlquery"
lua "github.com/yuin/gopher-lua"
)
// XMLProcessor implements the Processor interface for XML documents
type XMLProcessor struct{}
// ProcessContent implements the Processor interface for XMLProcessor
func (p *XMLProcessor) ProcessContent(content string, path string, luaExpr string) (string, int, int, error) {
// Parse XML document
// We can't really use encoding/xml here because it requires a pre defined struct
// And we HAVE TO parse dynamic unknown XML
doc, err := xmlquery.Parse(strings.NewReader(content))
if err != nil {
return content, 0, 0, fmt.Errorf("error parsing XML: %v", err)
}
// Find nodes matching the XPath pattern
nodes, err := xpath.Get(doc, path)
if err != nil {
return content, 0, 0, fmt.Errorf("error executing XPath: %v", err)
}
matchCount := len(nodes)
if matchCount == 0 {
return content, 0, 0, nil
}
// Apply modifications to each node
modCount := 0
for _, node := range nodes {
L, err := NewLuaState()
if err != nil {
return content, 0, 0, fmt.Errorf("error creating Lua state: %v", err)
}
defer L.Close()
err = p.ToLua(L, node)
if err != nil {
return content, modCount, matchCount, fmt.Errorf("error converting to Lua: %v", err)
}
err = L.DoString(BuildLuaScript(luaExpr))
if err != nil {
return content, modCount, matchCount, fmt.Errorf("error executing Lua: %v", err)
}
result, err := p.FromLua(L)
if err != nil {
return content, modCount, matchCount, fmt.Errorf("error getting result from Lua: %v", err)
}
log.Printf("%#v", result)
modified := false
modified = L.GetGlobal("modified").String() == "true"
if !modified {
log.Printf("No changes made to node at path: %s", node.Data)
continue
}
// Apply modification based on the result
if updatedValue, ok := result.(string); ok {
// If the result is a simple string, update the node value directly
xpath.Set(doc, path, updatedValue)
} else if nodeData, ok := result.(map[string]interface{}); ok {
// If the result is a map, apply more complex updates
updateNodeFromMap(node, nodeData)
}
modCount++
}
// Serialize the modified XML document to string
if doc.FirstChild != nil && doc.FirstChild.Type == xmlquery.DeclarationNode {
// If we have an XML declaration, start with it
declaration := doc.FirstChild.OutputXML(true)
// Remove the firstChild (declaration) before serializing the rest of the document
doc.FirstChild = doc.FirstChild.NextSibling
return ConvertToNamedEntities(declaration + doc.OutputXML(true)), modCount, matchCount, nil
}
// Convert numeric entities to named entities for better readability
return ConvertToNamedEntities(doc.OutputXML(true)), modCount, matchCount, nil
}
// ToLua converts XML node values to Lua variables
func (p *XMLProcessor) ToLua(L *lua.LState, data interface{}) error {
// Check if data is an xmlquery.Node
node, ok := data.(*xmlquery.Node)
if !ok {
return fmt.Errorf("expected xmlquery.Node, got %T", data)
}
// Create a simple table with essential data
table := L.NewTable()
// For element nodes, just provide basic info
L.SetField(table, "type", lua.LString(nodeTypeToString(node.Type)))
L.SetField(table, "name", lua.LString(node.Data))
L.SetField(table, "value", lua.LString(node.InnerText()))
// Add attributes if any
if len(node.Attr) > 0 {
attrs := L.NewTable()
for _, attr := range node.Attr {
L.SetField(attrs, attr.Name.Local, lua.LString(attr.Value))
}
L.SetField(table, "attr", attrs)
}
L.SetGlobal("v", table)
return nil
}
// FromLua gets modified values from Lua
func (p *XMLProcessor) FromLua(L *lua.LState) (interface{}, error) {
luaValue := L.GetGlobal("v")
// Handle string values directly
if luaValue.Type() == lua.LTString {
return luaValue.String(), nil
}
// Handle tables (for attributes and more complex updates)
if luaValue.Type() == lua.LTTable {
return luaTableToMap(L, luaValue.(*lua.LTable)), nil
}
return luaValue.String(), nil
}
// Simple helper to convert a Lua table to a Go map
func luaTableToMap(L *lua.LState, table *lua.LTable) map[string]interface{} {
result := make(map[string]interface{})
table.ForEach(func(k, v lua.LValue) {
if k.Type() == lua.LTString {
key := k.String()
if v.Type() == lua.LTTable {
result[key] = luaTableToMap(L, v.(*lua.LTable))
} else {
result[key] = v.String()
}
}
})
return result
}
// Simple helper to convert node type to string
func nodeTypeToString(nodeType xmlquery.NodeType) string {
switch nodeType {
case xmlquery.ElementNode:
return "element"
case xmlquery.TextNode:
return "text"
case xmlquery.AttributeNode:
return "attribute"
default:
return "other"
}
}
// Helper function to update an XML node from a map
func updateNodeFromMap(node *xmlquery.Node, data map[string]interface{}) {
// Update node value if present
if value, ok := data["value"]; ok {
if strValue, ok := value.(string); ok {
// For element nodes, replace text content
if node.Type == xmlquery.ElementNode {
// Find the first text child if it exists
var textNode *xmlquery.Node
for child := node.FirstChild; child != nil; child = child.NextSibling {
if child.Type == xmlquery.TextNode {
textNode = child
break
}
}
if textNode != nil {
// Update existing text node
textNode.Data = strValue
} else {
// Create new text node
newText := &xmlquery.Node{
Type: xmlquery.TextNode,
Data: strValue,
Parent: node,
}
// Insert at beginning of children
if node.FirstChild != nil {
newText.NextSibling = node.FirstChild
node.FirstChild.PrevSibling = newText
node.FirstChild = newText
} else {
node.FirstChild = newText
node.LastChild = newText
}
}
} else if node.Type == xmlquery.TextNode {
// Directly update text node
node.Data = strValue
} else if node.Type == xmlquery.AttributeNode {
// Update attribute value
if node.Parent != nil {
for i, attr := range node.Parent.Attr {
if attr.Name.Local == node.Data {
node.Parent.Attr[i].Value = strValue
break
}
}
}
}
}
}
// Update attributes if present
if attrs, ok := data["attr"].(map[string]interface{}); ok && node.Type == xmlquery.ElementNode {
for name, value := range attrs {
if strValue, ok := value.(string); ok {
// Look for existing attribute
found := false
for i, attr := range node.Attr {
if attr.Name.Local == name {
node.Attr[i].Value = strValue
found = true
break
}
}
// Add new attribute if not found
if !found {
node.Attr = append(node.Attr, xmlquery.Attr{
Name: struct {
Space, Local string
}{Local: name},
Value: strValue,
})
}
}
}
}
}
// Helper function to get a string representation of node type
func nodeTypeName(nodeType xmlquery.NodeType) string {
switch nodeType {
case xmlquery.ElementNode:
return "element"
case xmlquery.TextNode:
return "text"
case xmlquery.AttributeNode:
return "attribute"
case xmlquery.CommentNode:
return "comment"
case xmlquery.DeclarationNode:
return "declaration"
default:
return "unknown"
}
}
// ConvertToNamedEntities replaces numeric XML entities with their named counterparts
func ConvertToNamedEntities(xml string) string {
// Basic XML entities
replacements := map[string]string{
// Basic XML entities
""": """, // double quote
"'": "'", // single quote
"<": "<", // less than
">": ">", // greater than
"&": "&", // ampersand
// Common symbols
" ": " ", // non-breaking space
"©": "©", // copyright
"®": "®", // registered trademark
"€": "€", // euro
"£": "£", // pound
"¥": "¥", // yen
"¢": "¢", // cent
"§": "§", // section
"™": "™", // trademark
"♠": "♠", // spade
"♣": "♣", // club
"♥": "♥", // heart
"♦": "♦", // diamond
// Special characters
"¡": "¡", // inverted exclamation
"¿": "¿", // inverted question
"«": "«", // left angle quotes
"»": "»", // right angle quotes
"·": "·", // middle dot
"•": "•", // bullet
"…": "…", // horizontal ellipsis
"′": "′", // prime
"″": "″", // double prime
"‾": "‾", // overline
"⁄": "⁄", // fraction slash
// Math symbols
"±": "±", // plus-minus
"×": "×", // multiplication
"÷": "÷", // division
"∞": "∞", // infinity
"≈": "≈", // almost equal
"≠": "≠", // not equal
"≤": "≤", // less than or equal
"≥": "≥", // greater than or equal
"∑": "∑", // summation
"√": "√", // square root
"∫": "∫", // integral
// Accented characters
"À": "À", // A grave
"Á": "Á", // A acute
"Â": "Â", // A circumflex
"Ã": "Ã", // A tilde
"Ä": "Ä", // A umlaut
"Å": "Å", // A ring
"Æ": "Æ", // AE ligature
"Ç": "Ç", // C cedilla
"È": "È", // E grave
"É": "É", // E acute
"Ê": "Ê", // E circumflex
"Ë": "Ë", // E umlaut
"Ì": "Ì", // I grave
"Í": "Í", // I acute
"Î": "Î", // I circumflex
"Ï": "Ï", // I umlaut
"Ð": "Ð", // Eth
"Ñ": "Ñ", // N tilde
"Ò": "Ò", // O grave
"Ó": "Ó", // O acute
"Ô": "Ô", // O circumflex
"Õ": "Õ", // O tilde
"Ö": "Ö", // O umlaut
"Ø": "Ø", // O slash
"Ù": "Ù", // U grave
"Ú": "Ú", // U acute
"Û": "Û", // U circumflex
"Ü": "Ü", // U umlaut
"Ý": "Ý", // Y acute
"Þ": "Þ", // Thorn
"ß": "ß", // Sharp s
"à": "à", // a grave
"á": "á", // a acute
"â": "â", // a circumflex
"ã": "ã", // a tilde
"ä": "ä", // a umlaut
"å": "å", // a ring
"æ": "æ", // ae ligature
"ç": "ç", // c cedilla
"è": "è", // e grave
"é": "é", // e acute
"ê": "ê", // e circumflex
"ë": "ë", // e umlaut
"ì": "ì", // i grave
"í": "í", // i acute
"î": "î", // i circumflex
"ï": "ï", // i umlaut
"ð": "ð", // eth
"ñ": "ñ", // n tilde
"ò": "ò", // o grave
"ó": "ó", // o acute
"ô": "ô", // o circumflex
"õ": "õ", // o tilde
"ö": "ö", // o umlaut
"ø": "ø", // o slash
"ù": "ù", // u grave
"ú": "ú", // u acute
"û": "û", // u circumflex
"ü": "ü", // u umlaut
"ý": "ý", // y acute
"þ": "þ", // thorn
"ÿ": "ÿ", // y umlaut
}
result := xml
for numeric, named := range replacements {
result = strings.ReplaceAll(result, numeric, named)
}
return result
}