Files
BigChef/processor/xml.go

534 lines
14 KiB
Go

package processor
import (
"cook/utils"
"encoding/xml"
"fmt"
"io"
"sort"
"strconv"
"strings"
logger "git.site.quack-lab.dev/dave/cylogger"
lua "github.com/yuin/gopher-lua"
)
var xmlLogger = logger.Default.WithPrefix("processor/xml")
// XMLElement represents a parsed XML element with position tracking
type XMLElement struct {
Tag string
Attributes map[string]XMLAttribute
Text string
Children []*XMLElement
StartPos int64
EndPos int64
TextStart int64
TextEnd int64
}
// XMLAttribute represents an attribute with its position in the source
type XMLAttribute struct {
Value string
ValueStart int64
ValueEnd int64
}
// parseXMLWithPositions parses XML while tracking byte positions of all elements and attributes
func parseXMLWithPositions(content string) (*XMLElement, error) {
decoder := xml.NewDecoder(strings.NewReader(content))
var root *XMLElement
var stack []*XMLElement
var lastPos int64
for {
token, err := decoder.Token()
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("failed to parse XML: %v", err)
}
offset := decoder.InputOffset()
switch t := token.(type) {
case xml.StartElement:
// Find the actual start position of this element by searching for "<tagname"
tagSearchPattern := "<" + t.Name.Local
startPos := int64(strings.LastIndex(content[:offset], tagSearchPattern))
element := &XMLElement{
Tag: t.Name.Local,
Attributes: make(map[string]XMLAttribute),
StartPos: startPos,
Children: []*XMLElement{},
}
// Parse attributes - search within the tag boundaries
if len(t.Attr) > 0 {
tagEnd := offset
tagSection := content[startPos:tagEnd]
for _, attr := range t.Attr {
// Find attribute in the tag section: attrname="value"
attrPattern := attr.Name.Local + `="`
attrIdx := strings.Index(tagSection, attrPattern)
if attrIdx >= 0 {
valueStart := startPos + int64(attrIdx) + int64(len(attrPattern))
valueEnd := valueStart + int64(len(attr.Value))
element.Attributes[attr.Name.Local] = XMLAttribute{
Value: attr.Value,
ValueStart: valueStart,
ValueEnd: valueEnd,
}
}
}
}
if len(stack) > 0 {
parent := stack[len(stack)-1]
parent.Children = append(parent.Children, element)
} else {
root = element
}
stack = append(stack, element)
lastPos = offset
case xml.CharData:
rawText := string(t)
text := strings.TrimSpace(rawText)
if len(stack) > 0 && text != "" {
current := stack[len(stack)-1]
current.Text = text
// The text content is between lastPos (after >) and offset (before </)
// Search for the trimmed text within the raw content
textInContent := content[lastPos:offset]
trimmedStart := strings.Index(textInContent, text)
if trimmedStart >= 0 {
current.TextStart = lastPos + int64(trimmedStart)
current.TextEnd = current.TextStart + int64(len(text))
}
}
lastPos = offset
case xml.EndElement:
if len(stack) > 0 {
current := stack[len(stack)-1]
current.EndPos = offset
stack = stack[:len(stack)-1]
}
lastPos = offset
}
}
return root, nil
}
// XMLChange represents a detected difference between original and modified XML structures
type XMLChange struct {
Type string // "text", "attribute", "add_element", "remove_element"
Path string
OldValue string
NewValue string
StartPos int64
EndPos int64
InsertText string
}
func findXMLChanges(original, modified *XMLElement, path string) []XMLChange {
var changes []XMLChange
// Check text content changes
if original.Text != modified.Text {
changes = append(changes, XMLChange{
Type: "text",
Path: path,
OldValue: original.Text,
NewValue: modified.Text,
StartPos: original.TextStart,
EndPos: original.TextEnd,
})
}
// Check attribute changes
for attrName, origAttr := range original.Attributes {
if modAttr, exists := modified.Attributes[attrName]; exists {
if origAttr.Value != modAttr.Value {
changes = append(changes, XMLChange{
Type: "attribute",
Path: path + "/@" + attrName,
OldValue: origAttr.Value,
NewValue: modAttr.Value,
StartPos: origAttr.ValueStart,
EndPos: origAttr.ValueEnd,
})
}
} else {
// Attribute removed
changes = append(changes, XMLChange{
Type: "remove_attribute",
Path: path + "/@" + attrName,
OldValue: origAttr.Value,
StartPos: origAttr.ValueStart - int64(len(attrName)+2), // Include attr=" part
EndPos: origAttr.ValueEnd + 1, // Include closing "
})
}
}
// Check for added attributes
for attrName, modAttr := range modified.Attributes {
if _, exists := original.Attributes[attrName]; !exists {
changes = append(changes, XMLChange{
Type: "add_attribute",
Path: path + "/@" + attrName,
NewValue: modAttr.Value,
StartPos: original.StartPos, // Will be adjusted to insert after tag name
InsertText: fmt.Sprintf(` %s="%s"`, attrName, modAttr.Value),
})
}
}
// Check children recursively
origChildMap := make(map[string][]*XMLElement)
for _, child := range original.Children {
origChildMap[child.Tag] = append(origChildMap[child.Tag], child)
}
modChildMap := make(map[string][]*XMLElement)
for _, child := range modified.Children {
modChildMap[child.Tag] = append(modChildMap[child.Tag], child)
}
// Compare children by tag name
processedTags := make(map[string]bool)
for tag, origChildren := range origChildMap {
processedTags[tag] = true
modChildren := modChildMap[tag]
// Match children by index
maxLen := len(origChildren)
if len(modChildren) > maxLen {
maxLen = len(modChildren)
}
for i := 0; i < maxLen; i++ {
childPath := fmt.Sprintf("%s/%s[%d]", path, tag, i)
if i < len(origChildren) && i < len(modChildren) {
// Both exist, compare recursively
childChanges := findXMLChanges(origChildren[i], modChildren[i], childPath)
changes = append(changes, childChanges...)
} else if i < len(origChildren) {
// Child removed
changes = append(changes, XMLChange{
Type: "remove_element",
Path: childPath,
StartPos: origChildren[i].StartPos,
EndPos: origChildren[i].EndPos,
})
}
}
// Handle added children
if len(modChildren) > len(origChildren) {
for i := len(origChildren); i < len(modChildren); i++ {
childPath := fmt.Sprintf("%s/%s[%d]", path, tag, i)
// Generate XML text for the new element
xmlText := serializeXMLElement(modChildren[i], " ")
changes = append(changes, XMLChange{
Type: "add_element",
Path: childPath,
InsertText: xmlText,
StartPos: original.EndPos - int64(len(original.Tag)+3), // Before closing tag
})
}
}
}
return changes
}
// serializeXMLElement converts an XMLElement back to XML text
func serializeXMLElement(elem *XMLElement, indent string) string {
var sb strings.Builder
sb.WriteString(indent)
sb.WriteString("<")
sb.WriteString(elem.Tag)
// Write attributes
attrNames := make([]string, 0, len(elem.Attributes))
for name := range elem.Attributes {
attrNames = append(attrNames, name)
}
sort.Strings(attrNames)
for _, name := range attrNames {
attr := elem.Attributes[name]
sb.WriteString(fmt.Sprintf(` %s="%s"`, name, attr.Value))
}
if elem.Text == "" && len(elem.Children) == 0 {
sb.WriteString(" />")
return sb.String()
}
sb.WriteString(">")
if elem.Text != "" {
sb.WriteString(elem.Text)
}
if len(elem.Children) > 0 {
sb.WriteString("\n")
for _, child := range elem.Children {
sb.WriteString(serializeXMLElement(child, indent+" "))
sb.WriteString("\n")
}
sb.WriteString(indent)
}
sb.WriteString("</")
sb.WriteString(elem.Tag)
sb.WriteString(">")
return sb.String()
}
// applyXMLChanges generates ReplaceCommands from detected XML changes
func applyXMLChanges(changes []XMLChange) []utils.ReplaceCommand {
var commands []utils.ReplaceCommand
for _, change := range changes {
switch change.Type {
case "text":
commands = append(commands, utils.ReplaceCommand{
From: int(change.StartPos),
To: int(change.EndPos),
With: change.NewValue,
})
case "attribute":
commands = append(commands, utils.ReplaceCommand{
From: int(change.StartPos),
To: int(change.EndPos),
With: change.NewValue,
})
case "add_attribute":
// Insert after tag name, before > or />
commands = append(commands, utils.ReplaceCommand{
From: int(change.StartPos),
To: int(change.StartPos),
With: change.InsertText,
})
case "remove_attribute":
commands = append(commands, utils.ReplaceCommand{
From: int(change.StartPos),
To: int(change.EndPos),
With: "",
})
case "add_element":
commands = append(commands, utils.ReplaceCommand{
From: int(change.StartPos),
To: int(change.StartPos),
With: "\n" + change.InsertText,
})
case "remove_element":
commands = append(commands, utils.ReplaceCommand{
From: int(change.StartPos),
To: int(change.EndPos),
With: "",
})
}
}
return commands
}
// deepCopyXMLElement creates a deep copy of an XMLElement
func deepCopyXMLElement(elem *XMLElement) *XMLElement {
if elem == nil {
return nil
}
copied := &XMLElement{
Tag: elem.Tag,
Text: elem.Text,
StartPos: elem.StartPos,
EndPos: elem.EndPos,
TextStart: elem.TextStart,
TextEnd: elem.TextEnd,
Attributes: make(map[string]XMLAttribute),
Children: make([]*XMLElement, len(elem.Children)),
}
for k, v := range elem.Attributes {
copied.Attributes[k] = v
}
for i, child := range elem.Children {
copied.Children[i] = deepCopyXMLElement(child)
}
return copied
}
// Helper function to parse numeric values
func parseNumeric(s string) (float64, bool) {
if f, err := strconv.ParseFloat(s, 64); err == nil {
return f, true
}
return 0, false
}
// Helper function to format numeric values
func formatNumeric(f float64) string {
if f == float64(int64(f)) {
return strconv.FormatInt(int64(f), 10)
}
return strconv.FormatFloat(f, 'f', -1, 64)
}
// ProcessXML applies Lua processing to XML content with surgical editing
func ProcessXML(content string, command utils.ModifyCommand, filename string) ([]utils.ReplaceCommand, error) {
processXMLLogger := xmlLogger.WithPrefix("ProcessXML").WithField("commandName", command.Name).WithField("file", filename)
processXMLLogger.Debug("Starting XML processing for file")
// Parse XML with position tracking
originalElem, err := parseXMLWithPositions(content)
if err != nil {
processXMLLogger.Error("Failed to parse XML: %v", err)
return nil, fmt.Errorf("failed to parse XML: %v", err)
}
processXMLLogger.Debug("Successfully parsed XML content")
// Create Lua state
L, err := NewLuaState()
if err != nil {
processXMLLogger.Error("Error creating Lua state: %v", err)
return nil, fmt.Errorf("error creating Lua state: %v", err)
}
defer L.Close()
// Set filename global
L.SetGlobal("file", lua.LString(filename))
// Create modifiable copy
modifiedElem := deepCopyXMLElement(originalElem)
// Convert to Lua table and set as global
luaTable := xmlElementToLuaTable(L, modifiedElem)
L.SetGlobal("root", luaTable)
processXMLLogger.Debug("Set XML data as Lua global 'root'")
// Build and execute Lua script
luaExpr := BuildJSONLuaScript(command.Lua) // Reuse JSON script builder
processXMLLogger.Debug("Built Lua script from expression: %q", command.Lua)
if err := L.DoString(luaExpr); err != nil {
processXMLLogger.Error("Lua script execution failed: %v\nScript: %s", err, luaExpr)
return nil, fmt.Errorf("lua script execution failed: %v", err)
}
processXMLLogger.Debug("Lua script executed successfully")
// Check if modification flag is set
modifiedVal := L.GetGlobal("modified")
if modifiedVal.Type() != lua.LTBool || !lua.LVAsBool(modifiedVal) {
processXMLLogger.Debug("Skipping - no modifications indicated by Lua script")
return nil, nil
}
// Get the modified data back from Lua
modifiedTable := L.GetGlobal("root")
if modifiedTable.Type() != lua.LTTable {
processXMLLogger.Error("Expected 'root' to be a table after Lua processing")
return nil, fmt.Errorf("expected 'root' to be a table after Lua processing")
}
// Apply Lua modifications back to XMLElement
luaTableToXMLElement(L, modifiedTable.(*lua.LTable), modifiedElem)
// Find changes between original and modified
changes := findXMLChanges(originalElem, modifiedElem, "")
processXMLLogger.Debug("Found %d changes", len(changes))
if len(changes) == 0 {
return nil, nil
}
// Generate surgical replace commands
commands := applyXMLChanges(changes)
processXMLLogger.Debug("Generated %d replace commands", len(commands))
return commands, nil
}
// xmlElementToLuaTable converts an XMLElement to a Lua table
func xmlElementToLuaTable(L *lua.LState, elem *XMLElement) *lua.LTable {
table := L.CreateTable(0, 4)
table.RawSetString("_tag", lua.LString(elem.Tag))
if len(elem.Attributes) > 0 {
attrs := L.CreateTable(0, len(elem.Attributes))
for name, attr := range elem.Attributes {
attrs.RawSetString(name, lua.LString(attr.Value))
}
table.RawSetString("_attr", attrs)
}
if len(elem.Children) > 0 {
children := L.CreateTable(len(elem.Children), 0)
for i, child := range elem.Children {
children.RawSetInt(i+1, xmlElementToLuaTable(L, child))
}
table.RawSetString("_children", children)
}
return table
}
// luaTableToXMLElement applies Lua table modifications back to XMLElement
func luaTableToXMLElement(L *lua.LState, table *lua.LTable, elem *XMLElement) {
// Update attributes
if attrVal := table.RawGetString("_attr"); attrVal.Type() == lua.LTTable {
attrTable := attrVal.(*lua.LTable)
// Clear and rebuild attributes
elem.Attributes = make(map[string]XMLAttribute)
attrTable.ForEach(func(key lua.LValue, value lua.LValue) {
if key.Type() == lua.LTString && value.Type() == lua.LTString {
attrName := string(key.(lua.LString))
attrValue := string(value.(lua.LString))
elem.Attributes[attrName] = XMLAttribute{Value: attrValue}
}
})
}
// Update children
if childrenVal := table.RawGetString("_children"); childrenVal.Type() == lua.LTTable {
childrenTable := childrenVal.(*lua.LTable)
newChildren := []*XMLElement{}
// Iterate over array indices
for i := 1; ; i++ {
childVal := childrenTable.RawGetInt(i)
if childVal.Type() == lua.LTNil {
break
}
if childVal.Type() == lua.LTTable {
if i-1 < len(elem.Children) {
// Update existing child
luaTableToXMLElement(L, childVal.(*lua.LTable), elem.Children[i-1])
newChildren = append(newChildren, elem.Children[i-1])
}
}
}
elem.Children = newChildren
}
}