Refactor everything to processors and implement json and xml processors such as they are

This commit is contained in:
2025-03-24 15:45:04 +01:00
parent 84e0a8bed6
commit 17bb3d4f71
9 changed files with 3109 additions and 1477 deletions

505
main.go
View File

@@ -6,14 +6,13 @@ import (
"io"
"log"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"github.com/bmatcuk/doublestar/v4"
lua "github.com/yuin/gopher-lua"
"modify/processor"
)
var Error *log.Logger
@@ -21,24 +20,24 @@ var Warning *log.Logger
var Info *log.Logger
var Success *log.Logger
// ModificationRecord tracks a single value modification
type ModificationRecord struct {
File string
OldValue string
NewValue string
Operation string
Context string
}
// GlobalStats tracks all modifications across files
type GlobalStats struct {
TotalMatches int
TotalModifications int
Modifications []ModificationRecord
Modifications []processor.ModificationRecord
ProcessedFiles int
FailedFiles int
}
// FileMode defines how we interpret and process files
type FileMode string
const (
ModeRegex FileMode = "regex" // Default mode using regex
ModeXML FileMode = "xml" // XML mode using XPath
ModeJSON FileMode = "json" // JSON mode using JSONPath
)
var stats GlobalStats
func init() {
@@ -65,19 +64,35 @@ func init() {
// Initialize global stats
stats = GlobalStats{
Modifications: make([]ModificationRecord, 0),
Modifications: make([]processor.ModificationRecord, 0),
}
}
func main() {
// Define flags
fileModeFlag := flag.String("mode", "regex", "Processing mode: regex, xml, json")
xpathFlag := flag.String("xpath", "", "XPath expression (for XML mode)")
jsonpathFlag := flag.String("jsonpath", "", "JSONPath expression (for JSON mode)")
verboseFlag := flag.Bool("verbose", false, "Enable verbose output")
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: %s <regex_with_capture_groups> <lua_expression> <...files_or_globs>\n", os.Args[0])
fmt.Fprintf(os.Stderr, "Usage: %s [options] <pattern> <lua_expression> <...files_or_globs>\n", os.Args[0])
fmt.Fprintf(os.Stderr, "\nOptions:\n")
fmt.Fprintf(os.Stderr, " -mode string\n")
fmt.Fprintf(os.Stderr, " Processing mode: regex, xml, json (default \"regex\")\n")
fmt.Fprintf(os.Stderr, " -xpath string\n")
fmt.Fprintf(os.Stderr, " XPath expression (for XML mode)\n")
fmt.Fprintf(os.Stderr, " -jsonpath string\n")
fmt.Fprintf(os.Stderr, " JSONPath expression (for JSON mode)\n")
fmt.Fprintf(os.Stderr, " -verbose\n")
fmt.Fprintf(os.Stderr, " Enable verbose output\n")
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " %s \"<value>(\\d+)</value>\" \"*1.5\" data.xml\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s \"<value>(\\d+)</value>\" \"*1.5\" \"*.xml\"\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s \"<value>(\\d+)</value>,(\\d+)\" \"v1 * 1.5 * v2\" data.xml\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s \"<value>(\\d+)</value>\" \"=0\" data.xml\n", os.Args[0])
fmt.Fprintf(os.Stderr, " Regex mode (default):\n")
fmt.Fprintf(os.Stderr, " %s \"<value>(\\d+)</value>\" \"*1.5\" data.xml\n", os.Args[0])
fmt.Fprintf(os.Stderr, " XML mode:\n")
fmt.Fprintf(os.Stderr, " %s -mode=xml -xpath=\"//value\" \"*1.5\" data.xml\n", os.Args[0])
fmt.Fprintf(os.Stderr, " JSON mode:\n")
fmt.Fprintf(os.Stderr, " %s -mode=json -jsonpath=\"$.items[*].value\" \"*1.5\" data.json\n", os.Args[0])
fmt.Fprintf(os.Stderr, "\nNote: v1, v2, etc. are used to refer to capture groups as numbers.\n")
fmt.Fprintf(os.Stderr, " s1, s2, etc. are used to refer to capture groups as strings.\n")
fmt.Fprintf(os.Stderr, " Helper functions: num(str) converts string to number, str(num) converts number to string\n")
@@ -88,34 +103,127 @@ func main() {
}
flag.Parse()
// Set up verbose mode
if !*verboseFlag {
// If not verbose, suppress Info level logs
Info.SetOutput(io.Discard)
}
args := flag.Args()
if len(args) < 3 {
Error.Println("Insufficient arguments - need regex pattern, lua expression, and at least one file or glob pattern")
requiredArgCount := 3 // Default for regex mode
// XML/JSON modes need one fewer positional argument
if *fileModeFlag == "xml" || *fileModeFlag == "json" {
requiredArgCount = 2
}
if len(args) < requiredArgCount {
Error.Printf("%s mode requires %d arguments minimum", *fileModeFlag, requiredArgCount)
flag.Usage()
return
}
regexPattern := args[0]
luaExpr := args[1]
filePatterns := args[2:]
// Expand file patterns with glob support
files, err := expandFilePatterns(filePatterns)
if err != nil {
Error.Printf("Error expanding file patterns: %v", err)
// Validate mode-specific parameters
if *fileModeFlag == "xml" && *xpathFlag == "" {
Error.Printf("XML mode requires an XPath expression with -xpath flag")
return
}
if *fileModeFlag == "json" && *jsonpathFlag == "" {
Error.Printf("JSON mode requires a JSONPath expression with -jsonpath flag")
return
}
if len(files) == 0 {
Error.Println("No files found matching the specified patterns")
return
// Get the appropriate pattern and expression based on mode
var regexPattern string
var luaExpr string
var filePatterns []string
// In regex mode, we need both pattern arguments
// In XML/JSON modes, we only need the lua expression from args
if *fileModeFlag == "regex" {
regexPattern = args[0]
luaExpr = args[1]
filePatterns = args[2:]
// Process files with regex mode
processFilesWithRegex(regexPattern, luaExpr, filePatterns)
} else {
// XML/JSON modes
luaExpr = args[0]
filePatterns = args[1:]
// Prepare the Lua expression
originalLuaExpr := luaExpr
luaExpr = processor.BuildLuaScript(luaExpr)
if originalLuaExpr != luaExpr {
Info.Printf("Transformed Lua expression from '%s' to '%s'", originalLuaExpr, luaExpr)
}
// Expand file patterns with glob support
files, err := expandFilePatterns(filePatterns)
if err != nil {
Error.Printf("Error expanding file patterns: %v", err)
return
}
if len(files) == 0 {
Error.Printf("No files found matching the specified patterns")
return
}
// Create the processor based on mode
var proc processor.Processor
if *fileModeFlag == "xml" {
Info.Printf("Starting XML modifier with XPath '%s', expression '%s' on %d files",
*xpathFlag, luaExpr, len(files))
proc = processor.NewXMLProcessor(Info)
} else {
Info.Printf("Starting JSON modifier with JSONPath '%s', expression '%s' on %d files",
*jsonpathFlag, luaExpr, len(files))
proc = processor.NewJSONProcessor(Info)
}
var wg sync.WaitGroup
// Process each file
for _, file := range files {
wg.Add(1)
go func(file string) {
defer wg.Done()
Info.Printf("🔄 Processing file: %s", file)
// Pass the appropriate path expression as the pattern
var pattern string
if *fileModeFlag == "xml" {
pattern = *xpathFlag
} else {
pattern = *jsonpathFlag
}
modCount, matchCount, err := proc.Process(file, pattern, luaExpr, originalLuaExpr)
if err != nil {
Error.Printf("❌ Failed to process file %s: %v", file, err)
stats.FailedFiles++
} else {
Info.Printf("✅ Successfully processed file: %s", file)
stats.ProcessedFiles++
stats.TotalMatches += matchCount
stats.TotalModifications += modCount
}
}(file)
}
wg.Wait()
}
Info.Printf("Starting modifier with pattern '%s', expression '%s' on %d files", regexPattern, luaExpr, len(files))
// Print summary of all modifications
printSummary(luaExpr)
}
// processFilesWithRegex handles regex mode pattern processing for multiple files
func processFilesWithRegex(regexPattern string, luaExpr string, filePatterns []string) {
// Prepare the Lua expression
originalLuaExpr := luaExpr
luaExpr = buildLuaScript(luaExpr)
luaExpr = processor.BuildLuaScript(luaExpr)
if originalLuaExpr != luaExpr {
Info.Printf("Transformed Lua expression from '%s' to '%s'", originalLuaExpr, luaExpr)
}
@@ -146,6 +254,24 @@ func main() {
return
}
// Expand file patterns with glob support
files, err := expandFilePatterns(filePatterns)
if err != nil {
Error.Printf("Error expanding file patterns: %v", err)
return
}
if len(files) == 0 {
Error.Printf("No files found matching the specified patterns")
return
}
Info.Printf("Starting regex modifier with pattern '%s', expression '%s' on %d files",
regexPattern, luaExpr, len(files))
// Create the regex processor
proc := processor.NewRegexProcessor(pattern, Info)
var wg sync.WaitGroup
// Process each file
for _, file := range files {
@@ -153,20 +279,19 @@ func main() {
go func(file string) {
defer wg.Done()
Info.Printf("🔄 Processing file: %s", file)
err := processFile(file, pattern, luaExpr, originalLuaExpr)
modCount, matchCount, err := proc.Process(file, regexPattern, luaExpr, originalLuaExpr)
if err != nil {
Error.Printf("❌ Failed to process file %s: %v", file, err)
stats.FailedFiles++
} else {
Info.Printf("✅ Successfully processed file: %s", file)
stats.ProcessedFiles++
stats.TotalMatches += matchCount
stats.TotalModifications += modCount
}
}(file)
}
wg.Wait()
// Print summary of all modifications
printSummary(originalLuaExpr)
}
// printSummary outputs a formatted summary of all modifications made
@@ -180,7 +305,7 @@ func printSummary(operation string) {
stats.TotalModifications, stats.ProcessedFiles, stats.ProcessedFiles+stats.FailedFiles, operation)
// Group modifications by file for better readability
fileGroups := make(map[string][]ModificationRecord)
fileGroups := make(map[string][]processor.ModificationRecord)
for _, mod := range stats.Modifications {
fileGroups[mod.File] = append(fileGroups[mod.File], mod)
}
@@ -212,292 +337,6 @@ func printSummary(operation string) {
}
}
// buildLuaScript creates a complete Lua script from the expression
func buildLuaScript(luaExpr string) string {
// Track if we modified the expression
modified := false
original := luaExpr
// Auto-prepend v1 for expressions starting with operators
if strings.HasPrefix(luaExpr, "*") ||
strings.HasPrefix(luaExpr, "/") ||
strings.HasPrefix(luaExpr, "+") ||
strings.HasPrefix(luaExpr, "-") ||
strings.HasPrefix(luaExpr, "^") ||
strings.HasPrefix(luaExpr, "%") {
luaExpr = "v1 = v1" + luaExpr
modified = true
} else if strings.HasPrefix(luaExpr, "=") {
// Handle direct assignment with = operator
luaExpr = "v1 " + luaExpr
modified = true
}
// Add assignment if needed
if !strings.Contains(luaExpr, "=") {
luaExpr = "v1 = " + luaExpr
modified = true
}
// Replace shorthand v[] and s[] with their direct variable names
newExpr := strings.ReplaceAll(luaExpr, "v[1]", "v1")
newExpr = strings.ReplaceAll(newExpr, "v[2]", "v2")
newExpr = strings.ReplaceAll(newExpr, "s[1]", "s1")
newExpr = strings.ReplaceAll(newExpr, "s[2]", "s2")
if newExpr != luaExpr {
luaExpr = newExpr
modified = true
}
if modified {
Info.Printf("Transformed Lua expression: '%s' → '%s'", original, luaExpr)
}
return luaExpr
}
func processFile(filename string, pattern *regexp.Regexp, luaExpr string, originalExpr string) error {
fullPath := filepath.Join(".", filename)
// Read file content
content, err := os.ReadFile(fullPath)
if err != nil {
Error.Printf("Cannot read file %s: %v", fullPath, err)
return fmt.Errorf("error reading file: %v", err)
}
fileContent := string(content)
Info.Printf("File %s loaded: %d bytes", fullPath, len(content))
// Process the content
result, modificationCount, matchCount, err := process(fileContent, pattern, luaExpr, filename, originalExpr)
if err != nil {
Error.Printf("Processing failed for %s: %v", fullPath, err)
return err
}
// Update global stats
stats.TotalMatches += matchCount
stats.TotalModifications += modificationCount
if modificationCount == 0 {
Warning.Printf("No modifications made to %s - pattern didn't match any content", fullPath)
return nil
}
// Write the modified content back
err = os.WriteFile(fullPath, []byte(result), 0644)
if err != nil {
Error.Printf("Failed to save changes to %s: %v", fullPath, err)
return fmt.Errorf("error writing file: %v", err)
}
Info.Printf("Made %d modifications to %s and saved (%d bytes)",
modificationCount, fullPath, len(result))
return nil
}
func process(data string, pattern *regexp.Regexp, luaExpr string, filename string, originalExpr string) (string, int, int, error) {
L := lua.NewState()
defer L.Close()
// Initialize Lua environment
modificationCount := 0
matchCount := 0
// Load math library
L.Push(L.GetGlobal("require"))
L.Push(lua.LString("math"))
if err := L.PCall(1, 1, nil); err != nil {
Error.Printf("Failed to load Lua math library: %v", err)
return data, 0, 0, fmt.Errorf("error loading Lua math library: %v", err)
}
// Initialize helper functions
helperScript := `
-- Custom Lua helpers for math operations
function min(a, b) return math.min(a, b) end
function max(a, b) return math.max(a, b) end
function round(x) return math.floor(x + 0.5) end
function floor(x) return math.floor(x) end
function ceil(x) return math.ceil(x) end
-- String to number conversion helper
function num(str)
return tonumber(str) or 0
end
-- Number to string conversion
function str(num)
return tostring(num)
end
-- Check if string is numeric
function is_number(str)
return tonumber(str) ~= nil
end
`
if err := L.DoString(helperScript); err != nil {
Error.Printf("Failed to load Lua helper functions: %v", err)
return data, 0, 0, fmt.Errorf("error loading helper functions: %v", err)
}
// Process all regex matches
result := pattern.ReplaceAllStringFunc(data, func(match string) string {
matchCount++
captures := pattern.FindStringSubmatch(match)
if len(captures) <= 1 {
// No capture groups, return unchanged
Warning.Printf("Match found but no capture groups: %s", limitString(match, 50))
return match
}
Info.Printf("Match found: %s", limitString(match, 50))
// Set up global variables v1, v2, etc. for the Lua context
captureValues := make([]string, len(captures)-1)
for i, capture := range captures[1:] {
captureValues[i] = capture
// Set the raw string value with s prefix
L.SetGlobal(fmt.Sprintf("s%d", i+1), lua.LString(capture))
// Also set numeric version with v prefix if possible
floatVal, err := strconv.ParseFloat(capture, 64)
if err == nil {
L.SetGlobal(fmt.Sprintf("v%d", i+1), lua.LNumber(floatVal))
} else {
// For non-numeric values, set v also to the string value
L.SetGlobal(fmt.Sprintf("v%d", i+1), lua.LString(capture))
}
}
// Execute the user's Lua code
if err := L.DoString(luaExpr); err != nil {
Error.Printf("Lua execution failed for match '%s': %v", limitString(match, 50), err)
return match // Return unchanged on error
}
// Get the modified values after Lua execution
modifications := make(map[int]string)
for i := 0; i < len(captures)-1 && i < 12; i++ {
// Check both v and s variables to see if any were modified
vVarName := fmt.Sprintf("v%d", i+1)
sVarName := fmt.Sprintf("s%d", i+1)
// First check the v-prefixed numeric variable
vLuaVal := L.GetGlobal(vVarName)
sLuaVal := L.GetGlobal(sVarName)
oldVal := captures[i+1]
var newVal string
var useModification bool
// First priority: check if the string variable was modified
if sLuaVal != lua.LNil {
if sStr, ok := sLuaVal.(lua.LString); ok {
newStrVal := string(sStr)
if newStrVal != oldVal {
newVal = newStrVal
useModification = true
}
}
}
// Second priority: if string wasn't modified, check numeric variable
if !useModification && vLuaVal != lua.LNil {
switch v := vLuaVal.(type) {
case lua.LNumber:
newNumVal := strconv.FormatFloat(float64(v), 'f', -1, 64)
if newNumVal != oldVal {
newVal = newNumVal
useModification = true
}
case lua.LString:
newStrVal := string(v)
if newStrVal != oldVal {
newVal = newStrVal
useModification = true
}
default:
newDefaultVal := fmt.Sprintf("%v", v)
if newDefaultVal != oldVal {
newVal = newDefaultVal
useModification = true
}
}
}
// Record the modification if anything changed
if useModification {
modifications[i] = newVal
}
}
// Apply modifications to the matched text
if len(modifications) == 0 {
return match // No changes
}
result := match
for i, newVal := range modifications {
oldVal := captures[i+1]
// Special handling for empty capture groups
if oldVal == "" {
// Find the position where the empty capture group should be
// by analyzing the regex pattern and current match
parts := pattern.SubexpNames()
if i+1 < len(parts) && parts[i+1] != "" {
// Named capture groups
subPattern := fmt.Sprintf("(?P<%s>)", parts[i+1])
emptyGroupPattern := regexp.MustCompile(subPattern)
if loc := emptyGroupPattern.FindStringIndex(result); loc != nil {
// Insert the new value at the capture group location
result = result[:loc[0]] + newVal + result[loc[1]:]
}
} else {
// For unnamed capture groups, we need to find where they would be in the regex
// This is a simplification that might not work for complex regex patterns
// but should handle the test case with <value></value>
tagPattern := regexp.MustCompile("<value></value>")
if loc := tagPattern.FindStringIndex(result); loc != nil {
// Replace the empty tag content with our new value
result = result[:loc[0]+7] + newVal + result[loc[1]-8:]
}
}
} else {
// Normal replacement for non-empty capture groups
result = strings.Replace(result, oldVal, newVal, 1)
}
// Extract a bit of context from the match for better reporting
contextStart := max(0, strings.Index(match, oldVal)-10)
contextLength := min(30, len(match)-contextStart)
if contextStart+contextLength > len(match) {
contextLength = len(match) - contextStart
}
contextStr := "..." + match[contextStart:contextStart+contextLength] + "..."
// Log the modification
Info.Printf("Modified value [%d]: '%s' → '%s'", i+1, limitString(oldVal, 30), limitString(newVal, 30))
// Record the modification for summary
stats.Modifications = append(stats.Modifications, ModificationRecord{
File: filename,
OldValue: oldVal,
NewValue: newVal,
Operation: originalExpr,
Context: fmt.Sprintf("(in %s)", limitString(contextStr, 30)),
})
}
modificationCount++
return result
})
return result, modificationCount, matchCount, nil
}
// limitString truncates a string to maxLen and adds "..." if truncated
func limitString(s string, maxLen int) string {
s = strings.ReplaceAll(s, "\n", "\\n")
@@ -507,22 +346,6 @@ func limitString(s string, maxLen int) string {
return s[:maxLen-3] + "..."
}
// max returns the maximum of two integers
func max(a, b int) int {
if a > b {
return a
}
return b
}
// min returns the minimum of two integers
func min(a, b int) int {
if a < b {
return a
}
return b
}
func expandFilePatterns(patterns []string) ([]string, error) {
var files []string
filesMap := make(map[string]bool)