Hallucinate up an xml parser implementation
Who knows if this will work...
This commit is contained in:
447
processor/xml.go
Normal file
447
processor/xml.go
Normal file
@@ -0,0 +1,447 @@
|
||||
package processor
|
||||
|
||||
import (
|
||||
"cook/utils"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
logger "git.site.quack-lab.dev/dave/cylogger"
|
||||
)
|
||||
|
||||
var xmlLogger = logger.Default.WithPrefix("processor/xml")
|
||||
|
||||
// XMLElement represents a parsed XML element with position tracking
|
||||
type XMLElement struct {
|
||||
Tag string
|
||||
Attributes map[string]XMLAttribute
|
||||
Text string
|
||||
Children []*XMLElement
|
||||
StartPos int64
|
||||
EndPos int64
|
||||
TextStart int64
|
||||
TextEnd int64
|
||||
}
|
||||
|
||||
// XMLAttribute represents an attribute with its position in the source
|
||||
type XMLAttribute struct {
|
||||
Value string
|
||||
ValueStart int64
|
||||
ValueEnd int64
|
||||
}
|
||||
|
||||
// parseXMLWithPositions parses XML while tracking byte positions of all elements and attributes
|
||||
func parseXMLWithPositions(content string) (*XMLElement, error) {
|
||||
decoder := xml.NewDecoder(strings.NewReader(content))
|
||||
var root *XMLElement
|
||||
var stack []*XMLElement
|
||||
var lastPos int64
|
||||
|
||||
for {
|
||||
token, err := decoder.Token()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse XML: %v", err)
|
||||
}
|
||||
|
||||
offset := decoder.InputOffset()
|
||||
|
||||
switch t := token.(type) {
|
||||
case xml.StartElement:
|
||||
// Find the actual start position of this element by searching for "<tagname"
|
||||
tagSearchPattern := "<" + t.Name.Local
|
||||
startPos := int64(strings.LastIndex(content[:offset], tagSearchPattern))
|
||||
|
||||
element := &XMLElement{
|
||||
Tag: t.Name.Local,
|
||||
Attributes: make(map[string]XMLAttribute),
|
||||
StartPos: startPos,
|
||||
Children: []*XMLElement{},
|
||||
}
|
||||
|
||||
// Parse attributes - search within the tag boundaries
|
||||
if len(t.Attr) > 0 {
|
||||
tagEnd := offset
|
||||
tagSection := content[startPos:tagEnd]
|
||||
|
||||
for _, attr := range t.Attr {
|
||||
// Find attribute in the tag section: attrname="value"
|
||||
attrPattern := attr.Name.Local + `="`
|
||||
attrIdx := strings.Index(tagSection, attrPattern)
|
||||
if attrIdx >= 0 {
|
||||
valueStart := startPos + int64(attrIdx) + int64(len(attrPattern))
|
||||
valueEnd := valueStart + int64(len(attr.Value))
|
||||
element.Attributes[attr.Name.Local] = XMLAttribute{
|
||||
Value: attr.Value,
|
||||
ValueStart: valueStart,
|
||||
ValueEnd: valueEnd,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(stack) > 0 {
|
||||
parent := stack[len(stack)-1]
|
||||
parent.Children = append(parent.Children, element)
|
||||
} else {
|
||||
root = element
|
||||
}
|
||||
|
||||
stack = append(stack, element)
|
||||
lastPos = offset
|
||||
|
||||
case xml.CharData:
|
||||
rawText := string(t)
|
||||
text := strings.TrimSpace(rawText)
|
||||
if len(stack) > 0 && text != "" {
|
||||
current := stack[len(stack)-1]
|
||||
current.Text = text
|
||||
|
||||
// The text content is between lastPos (after >) and offset (before </)
|
||||
// Search for the trimmed text within the raw content
|
||||
textInContent := content[lastPos:offset]
|
||||
trimmedStart := strings.Index(textInContent, text)
|
||||
if trimmedStart >= 0 {
|
||||
current.TextStart = lastPos + int64(trimmedStart)
|
||||
current.TextEnd = current.TextStart + int64(len(text))
|
||||
}
|
||||
}
|
||||
lastPos = offset
|
||||
|
||||
case xml.EndElement:
|
||||
if len(stack) > 0 {
|
||||
current := stack[len(stack)-1]
|
||||
current.EndPos = offset
|
||||
stack = stack[:len(stack)-1]
|
||||
}
|
||||
lastPos = offset
|
||||
}
|
||||
}
|
||||
|
||||
return root, nil
|
||||
}
|
||||
|
||||
// xmlElementToMap converts XMLElement to a map for comparison
|
||||
func xmlElementToMap(elem *XMLElement) map[string]interface{} {
|
||||
result := make(map[string]interface{})
|
||||
result["_tag"] = elem.Tag
|
||||
|
||||
if len(elem.Attributes) > 0 {
|
||||
attrs := make(map[string]interface{})
|
||||
for k, v := range elem.Attributes {
|
||||
attrs[k] = v.Value
|
||||
}
|
||||
result["_attr"] = attrs
|
||||
}
|
||||
|
||||
if elem.Text != "" {
|
||||
result["_text"] = elem.Text
|
||||
}
|
||||
|
||||
if len(elem.Children) > 0 {
|
||||
children := make([]interface{}, len(elem.Children))
|
||||
for i, child := range elem.Children {
|
||||
children[i] = xmlElementToMap(child)
|
||||
}
|
||||
result["_children"] = children
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// XMLChange represents a detected difference between original and modified XML structures
|
||||
type XMLChange struct {
|
||||
Type string // "text", "attribute", "add_element", "remove_element"
|
||||
Path string
|
||||
OldValue string
|
||||
NewValue string
|
||||
StartPos int64
|
||||
EndPos int64
|
||||
InsertText string
|
||||
}
|
||||
|
||||
func findXMLChanges(original, modified *XMLElement, path string) []XMLChange {
|
||||
var changes []XMLChange
|
||||
|
||||
// Check text content changes
|
||||
if original.Text != modified.Text {
|
||||
changes = append(changes, XMLChange{
|
||||
Type: "text",
|
||||
Path: path,
|
||||
OldValue: original.Text,
|
||||
NewValue: modified.Text,
|
||||
StartPos: original.TextStart,
|
||||
EndPos: original.TextEnd,
|
||||
})
|
||||
}
|
||||
|
||||
// Check attribute changes
|
||||
for attrName, origAttr := range original.Attributes {
|
||||
if modAttr, exists := modified.Attributes[attrName]; exists {
|
||||
if origAttr.Value != modAttr.Value {
|
||||
changes = append(changes, XMLChange{
|
||||
Type: "attribute",
|
||||
Path: path + "/@" + attrName,
|
||||
OldValue: origAttr.Value,
|
||||
NewValue: modAttr.Value,
|
||||
StartPos: origAttr.ValueStart,
|
||||
EndPos: origAttr.ValueEnd,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
// Attribute removed
|
||||
changes = append(changes, XMLChange{
|
||||
Type: "remove_attribute",
|
||||
Path: path + "/@" + attrName,
|
||||
OldValue: origAttr.Value,
|
||||
StartPos: origAttr.ValueStart - int64(len(attrName)+2), // Include attr=" part
|
||||
EndPos: origAttr.ValueEnd + 1, // Include closing "
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Check for added attributes
|
||||
for attrName, modAttr := range modified.Attributes {
|
||||
if _, exists := original.Attributes[attrName]; !exists {
|
||||
changes = append(changes, XMLChange{
|
||||
Type: "add_attribute",
|
||||
Path: path + "/@" + attrName,
|
||||
NewValue: modAttr.Value,
|
||||
StartPos: original.StartPos, // Will be adjusted to insert after tag name
|
||||
InsertText: fmt.Sprintf(` %s="%s"`, attrName, modAttr.Value),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Check children recursively
|
||||
origChildMap := make(map[string][]*XMLElement)
|
||||
for _, child := range original.Children {
|
||||
origChildMap[child.Tag] = append(origChildMap[child.Tag], child)
|
||||
}
|
||||
|
||||
modChildMap := make(map[string][]*XMLElement)
|
||||
for _, child := range modified.Children {
|
||||
modChildMap[child.Tag] = append(modChildMap[child.Tag], child)
|
||||
}
|
||||
|
||||
// Compare children by tag name
|
||||
processedTags := make(map[string]bool)
|
||||
|
||||
for tag, origChildren := range origChildMap {
|
||||
processedTags[tag] = true
|
||||
modChildren := modChildMap[tag]
|
||||
|
||||
// Match children by index
|
||||
maxLen := len(origChildren)
|
||||
if len(modChildren) > maxLen {
|
||||
maxLen = len(modChildren)
|
||||
}
|
||||
|
||||
for i := 0; i < maxLen; i++ {
|
||||
childPath := fmt.Sprintf("%s/%s[%d]", path, tag, i)
|
||||
if i < len(origChildren) && i < len(modChildren) {
|
||||
// Both exist, compare recursively
|
||||
childChanges := findXMLChanges(origChildren[i], modChildren[i], childPath)
|
||||
changes = append(changes, childChanges...)
|
||||
} else if i < len(origChildren) {
|
||||
// Child removed
|
||||
changes = append(changes, XMLChange{
|
||||
Type: "remove_element",
|
||||
Path: childPath,
|
||||
StartPos: origChildren[i].StartPos,
|
||||
EndPos: origChildren[i].EndPos,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Handle added children
|
||||
if len(modChildren) > len(origChildren) {
|
||||
for i := len(origChildren); i < len(modChildren); i++ {
|
||||
childPath := fmt.Sprintf("%s/%s[%d]", path, tag, i)
|
||||
// Generate XML text for the new element
|
||||
xmlText := serializeXMLElement(modChildren[i], " ")
|
||||
changes = append(changes, XMLChange{
|
||||
Type: "add_element",
|
||||
Path: childPath,
|
||||
InsertText: xmlText,
|
||||
StartPos: original.EndPos - int64(len(original.Tag)+3), // Before closing tag
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle completely new tag types
|
||||
for tag, modChildren := range modChildMap {
|
||||
if !processedTags[tag] {
|
||||
for i, child := range modChildren {
|
||||
childPath := fmt.Sprintf("%s/%s[%d]", path, tag, i)
|
||||
xmlText := serializeXMLElement(child, " ")
|
||||
changes = append(changes, XMLChange{
|
||||
Type: "add_element",
|
||||
Path: childPath,
|
||||
InsertText: xmlText,
|
||||
StartPos: original.EndPos - int64(len(original.Tag)+3),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return changes
|
||||
}
|
||||
|
||||
// serializeXMLElement converts an XMLElement back to XML text
|
||||
func serializeXMLElement(elem *XMLElement, indent string) string {
|
||||
var sb strings.Builder
|
||||
sb.WriteString(indent)
|
||||
sb.WriteString("<")
|
||||
sb.WriteString(elem.Tag)
|
||||
|
||||
// Write attributes
|
||||
attrNames := make([]string, 0, len(elem.Attributes))
|
||||
for name := range elem.Attributes {
|
||||
attrNames = append(attrNames, name)
|
||||
}
|
||||
sort.Strings(attrNames)
|
||||
|
||||
for _, name := range attrNames {
|
||||
attr := elem.Attributes[name]
|
||||
sb.WriteString(fmt.Sprintf(` %s="%s"`, name, attr.Value))
|
||||
}
|
||||
|
||||
if elem.Text == "" && len(elem.Children) == 0 {
|
||||
sb.WriteString(" />")
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
sb.WriteString(">")
|
||||
|
||||
if elem.Text != "" {
|
||||
sb.WriteString(elem.Text)
|
||||
}
|
||||
|
||||
if len(elem.Children) > 0 {
|
||||
sb.WriteString("\n")
|
||||
for _, child := range elem.Children {
|
||||
sb.WriteString(serializeXMLElement(child, indent+" "))
|
||||
sb.WriteString("\n")
|
||||
}
|
||||
sb.WriteString(indent)
|
||||
}
|
||||
|
||||
sb.WriteString("</")
|
||||
sb.WriteString(elem.Tag)
|
||||
sb.WriteString(">")
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// applyXMLChanges generates ReplaceCommands from detected XML changes
|
||||
func applyXMLChanges(changes []XMLChange) []utils.ReplaceCommand {
|
||||
var commands []utils.ReplaceCommand
|
||||
|
||||
for _, change := range changes {
|
||||
switch change.Type {
|
||||
case "text":
|
||||
commands = append(commands, utils.ReplaceCommand{
|
||||
From: int(change.StartPos),
|
||||
To: int(change.EndPos),
|
||||
With: change.NewValue,
|
||||
})
|
||||
|
||||
case "attribute":
|
||||
commands = append(commands, utils.ReplaceCommand{
|
||||
From: int(change.StartPos),
|
||||
To: int(change.EndPos),
|
||||
With: change.NewValue,
|
||||
})
|
||||
|
||||
case "add_attribute":
|
||||
// Insert after tag name, before > or />
|
||||
commands = append(commands, utils.ReplaceCommand{
|
||||
From: int(change.StartPos),
|
||||
To: int(change.StartPos),
|
||||
With: change.InsertText,
|
||||
})
|
||||
|
||||
case "remove_attribute":
|
||||
commands = append(commands, utils.ReplaceCommand{
|
||||
From: int(change.StartPos),
|
||||
To: int(change.EndPos),
|
||||
With: "",
|
||||
})
|
||||
|
||||
case "add_element":
|
||||
commands = append(commands, utils.ReplaceCommand{
|
||||
From: int(change.StartPos),
|
||||
To: int(change.StartPos),
|
||||
With: "\n" + change.InsertText,
|
||||
})
|
||||
|
||||
case "remove_element":
|
||||
commands = append(commands, utils.ReplaceCommand{
|
||||
From: int(change.StartPos),
|
||||
To: int(change.EndPos),
|
||||
With: "",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return commands
|
||||
}
|
||||
|
||||
// modifyXMLElement applies modifications to an XMLElement based on a modification function
|
||||
func modifyXMLElement(elem *XMLElement, modifyFunc func(*XMLElement)) *XMLElement {
|
||||
// Deep copy the element
|
||||
copied := deepCopyXMLElement(elem)
|
||||
modifyFunc(copied)
|
||||
return copied
|
||||
}
|
||||
|
||||
// deepCopyXMLElement creates a deep copy of an XMLElement
|
||||
func deepCopyXMLElement(elem *XMLElement) *XMLElement {
|
||||
if elem == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
copied := &XMLElement{
|
||||
Tag: elem.Tag,
|
||||
Text: elem.Text,
|
||||
StartPos: elem.StartPos,
|
||||
EndPos: elem.EndPos,
|
||||
TextStart: elem.TextStart,
|
||||
TextEnd: elem.TextEnd,
|
||||
Attributes: make(map[string]XMLAttribute),
|
||||
Children: make([]*XMLElement, len(elem.Children)),
|
||||
}
|
||||
|
||||
for k, v := range elem.Attributes {
|
||||
copied.Attributes[k] = v
|
||||
}
|
||||
|
||||
for i, child := range elem.Children {
|
||||
copied.Children[i] = deepCopyXMLElement(child)
|
||||
}
|
||||
|
||||
return copied
|
||||
}
|
||||
|
||||
// Helper function to parse numeric values
|
||||
func parseNumeric(s string) (float64, bool) {
|
||||
if f, err := strconv.ParseFloat(s, 64); err == nil {
|
||||
return f, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// Helper function to format numeric values
|
||||
func formatNumeric(f float64) string {
|
||||
if f == float64(int64(f)) {
|
||||
return strconv.FormatInt(int64(f), 10)
|
||||
}
|
||||
return strconv.FormatFloat(f, 'f', -1, 64)
|
||||
}
|
||||
Reference in New Issue
Block a user