package processor import ( "cook/utils" "encoding/xml" "fmt" "io" "sort" "strconv" "strings" logger "git.site.quack-lab.dev/dave/cylogger" ) var xmlLogger = logger.Default.WithPrefix("processor/xml") // XMLElement represents a parsed XML element with position tracking type XMLElement struct { Tag string Attributes map[string]XMLAttribute Text string Children []*XMLElement StartPos int64 EndPos int64 TextStart int64 TextEnd int64 } // XMLAttribute represents an attribute with its position in the source type XMLAttribute struct { Value string ValueStart int64 ValueEnd int64 } // parseXMLWithPositions parses XML while tracking byte positions of all elements and attributes func parseXMLWithPositions(content string) (*XMLElement, error) { decoder := xml.NewDecoder(strings.NewReader(content)) var root *XMLElement var stack []*XMLElement var lastPos int64 for { token, err := decoder.Token() if err == io.EOF { break } if err != nil { return nil, fmt.Errorf("failed to parse XML: %v", err) } offset := decoder.InputOffset() switch t := token.(type) { case xml.StartElement: // Find the actual start position of this element by searching for " 0 { tagEnd := offset tagSection := content[startPos:tagEnd] for _, attr := range t.Attr { // Find attribute in the tag section: attrname="value" attrPattern := attr.Name.Local + `="` attrIdx := strings.Index(tagSection, attrPattern) if attrIdx >= 0 { valueStart := startPos + int64(attrIdx) + int64(len(attrPattern)) valueEnd := valueStart + int64(len(attr.Value)) element.Attributes[attr.Name.Local] = XMLAttribute{ Value: attr.Value, ValueStart: valueStart, ValueEnd: valueEnd, } } } } if len(stack) > 0 { parent := stack[len(stack)-1] parent.Children = append(parent.Children, element) } else { root = element } stack = append(stack, element) lastPos = offset case xml.CharData: rawText := string(t) text := strings.TrimSpace(rawText) if len(stack) > 0 && text != "" { current := stack[len(stack)-1] current.Text = text // The text content is between lastPos (after >) and offset (before = 0 { current.TextStart = lastPos + int64(trimmedStart) current.TextEnd = current.TextStart + int64(len(text)) } } lastPos = offset case xml.EndElement: if len(stack) > 0 { current := stack[len(stack)-1] current.EndPos = offset stack = stack[:len(stack)-1] } lastPos = offset } } return root, nil } // xmlElementToMap converts XMLElement to a map for comparison func xmlElementToMap(elem *XMLElement) map[string]interface{} { result := make(map[string]interface{}) result["_tag"] = elem.Tag if len(elem.Attributes) > 0 { attrs := make(map[string]interface{}) for k, v := range elem.Attributes { attrs[k] = v.Value } result["_attr"] = attrs } if elem.Text != "" { result["_text"] = elem.Text } if len(elem.Children) > 0 { children := make([]interface{}, len(elem.Children)) for i, child := range elem.Children { children[i] = xmlElementToMap(child) } result["_children"] = children } return result } // XMLChange represents a detected difference between original and modified XML structures type XMLChange struct { Type string // "text", "attribute", "add_element", "remove_element" Path string OldValue string NewValue string StartPos int64 EndPos int64 InsertText string } func findXMLChanges(original, modified *XMLElement, path string) []XMLChange { var changes []XMLChange // Check text content changes if original.Text != modified.Text { changes = append(changes, XMLChange{ Type: "text", Path: path, OldValue: original.Text, NewValue: modified.Text, StartPos: original.TextStart, EndPos: original.TextEnd, }) } // Check attribute changes for attrName, origAttr := range original.Attributes { if modAttr, exists := modified.Attributes[attrName]; exists { if origAttr.Value != modAttr.Value { changes = append(changes, XMLChange{ Type: "attribute", Path: path + "/@" + attrName, OldValue: origAttr.Value, NewValue: modAttr.Value, StartPos: origAttr.ValueStart, EndPos: origAttr.ValueEnd, }) } } else { // Attribute removed changes = append(changes, XMLChange{ Type: "remove_attribute", Path: path + "/@" + attrName, OldValue: origAttr.Value, StartPos: origAttr.ValueStart - int64(len(attrName)+2), // Include attr=" part EndPos: origAttr.ValueEnd + 1, // Include closing " }) } } // Check for added attributes for attrName, modAttr := range modified.Attributes { if _, exists := original.Attributes[attrName]; !exists { changes = append(changes, XMLChange{ Type: "add_attribute", Path: path + "/@" + attrName, NewValue: modAttr.Value, StartPos: original.StartPos, // Will be adjusted to insert after tag name InsertText: fmt.Sprintf(` %s="%s"`, attrName, modAttr.Value), }) } } // Check children recursively origChildMap := make(map[string][]*XMLElement) for _, child := range original.Children { origChildMap[child.Tag] = append(origChildMap[child.Tag], child) } modChildMap := make(map[string][]*XMLElement) for _, child := range modified.Children { modChildMap[child.Tag] = append(modChildMap[child.Tag], child) } // Compare children by tag name processedTags := make(map[string]bool) for tag, origChildren := range origChildMap { processedTags[tag] = true modChildren := modChildMap[tag] // Match children by index maxLen := len(origChildren) if len(modChildren) > maxLen { maxLen = len(modChildren) } for i := 0; i < maxLen; i++ { childPath := fmt.Sprintf("%s/%s[%d]", path, tag, i) if i < len(origChildren) && i < len(modChildren) { // Both exist, compare recursively childChanges := findXMLChanges(origChildren[i], modChildren[i], childPath) changes = append(changes, childChanges...) } else if i < len(origChildren) { // Child removed changes = append(changes, XMLChange{ Type: "remove_element", Path: childPath, StartPos: origChildren[i].StartPos, EndPos: origChildren[i].EndPos, }) } } // Handle added children if len(modChildren) > len(origChildren) { for i := len(origChildren); i < len(modChildren); i++ { childPath := fmt.Sprintf("%s/%s[%d]", path, tag, i) // Generate XML text for the new element xmlText := serializeXMLElement(modChildren[i], " ") changes = append(changes, XMLChange{ Type: "add_element", Path: childPath, InsertText: xmlText, StartPos: original.EndPos - int64(len(original.Tag)+3), // Before closing tag }) } } } // Handle completely new tag types for tag, modChildren := range modChildMap { if !processedTags[tag] { for i, child := range modChildren { childPath := fmt.Sprintf("%s/%s[%d]", path, tag, i) xmlText := serializeXMLElement(child, " ") changes = append(changes, XMLChange{ Type: "add_element", Path: childPath, InsertText: xmlText, StartPos: original.EndPos - int64(len(original.Tag)+3), }) } } } return changes } // serializeXMLElement converts an XMLElement back to XML text func serializeXMLElement(elem *XMLElement, indent string) string { var sb strings.Builder sb.WriteString(indent) sb.WriteString("<") sb.WriteString(elem.Tag) // Write attributes attrNames := make([]string, 0, len(elem.Attributes)) for name := range elem.Attributes { attrNames = append(attrNames, name) } sort.Strings(attrNames) for _, name := range attrNames { attr := elem.Attributes[name] sb.WriteString(fmt.Sprintf(` %s="%s"`, name, attr.Value)) } if elem.Text == "" && len(elem.Children) == 0 { sb.WriteString(" />") return sb.String() } sb.WriteString(">") if elem.Text != "" { sb.WriteString(elem.Text) } if len(elem.Children) > 0 { sb.WriteString("\n") for _, child := range elem.Children { sb.WriteString(serializeXMLElement(child, indent+" ")) sb.WriteString("\n") } sb.WriteString(indent) } sb.WriteString("") return sb.String() } // applyXMLChanges generates ReplaceCommands from detected XML changes func applyXMLChanges(changes []XMLChange) []utils.ReplaceCommand { var commands []utils.ReplaceCommand for _, change := range changes { switch change.Type { case "text": commands = append(commands, utils.ReplaceCommand{ From: int(change.StartPos), To: int(change.EndPos), With: change.NewValue, }) case "attribute": commands = append(commands, utils.ReplaceCommand{ From: int(change.StartPos), To: int(change.EndPos), With: change.NewValue, }) case "add_attribute": // Insert after tag name, before > or /> commands = append(commands, utils.ReplaceCommand{ From: int(change.StartPos), To: int(change.StartPos), With: change.InsertText, }) case "remove_attribute": commands = append(commands, utils.ReplaceCommand{ From: int(change.StartPos), To: int(change.EndPos), With: "", }) case "add_element": commands = append(commands, utils.ReplaceCommand{ From: int(change.StartPos), To: int(change.StartPos), With: "\n" + change.InsertText, }) case "remove_element": commands = append(commands, utils.ReplaceCommand{ From: int(change.StartPos), To: int(change.EndPos), With: "", }) } } return commands } // modifyXMLElement applies modifications to an XMLElement based on a modification function func modifyXMLElement(elem *XMLElement, modifyFunc func(*XMLElement)) *XMLElement { // Deep copy the element copied := deepCopyXMLElement(elem) modifyFunc(copied) return copied } // deepCopyXMLElement creates a deep copy of an XMLElement func deepCopyXMLElement(elem *XMLElement) *XMLElement { if elem == nil { return nil } copied := &XMLElement{ Tag: elem.Tag, Text: elem.Text, StartPos: elem.StartPos, EndPos: elem.EndPos, TextStart: elem.TextStart, TextEnd: elem.TextEnd, Attributes: make(map[string]XMLAttribute), Children: make([]*XMLElement, len(elem.Children)), } for k, v := range elem.Attributes { copied.Attributes[k] = v } for i, child := range elem.Children { copied.Children[i] = deepCopyXMLElement(child) } return copied } // Helper function to parse numeric values func parseNumeric(s string) (float64, bool) { if f, err := strconv.ParseFloat(s, 64); err == nil { return f, true } return 0, false } // Helper function to format numeric values func formatNumeric(f float64) string { if f == float64(int64(f)) { return strconv.FormatInt(int64(f), 10) } return strconv.FormatFloat(f, 'f', -1, 64) }