From d3a1f1bd960b7be4c02198c29e81f5c1cf76a8cd Mon Sep 17 00:00:00 2001 From: PhatPhuckDave Date: Wed, 26 Mar 2025 22:07:46 +0100 Subject: [PATCH] Rework regex grouping to avoid changing the same area twice --- processor/regex.go | 260 ++++++++++++++++++++------------------------- 1 file changed, 118 insertions(+), 142 deletions(-) diff --git a/processor/regex.go b/processor/regex.go index a75f27c..3f52bba 100644 --- a/processor/regex.go +++ b/processor/regex.go @@ -16,106 +16,88 @@ type RegexProcessor struct{} // ToLua sets capture groups as Lua variables (v1, v2, etc. for numeric values and s1, s2, etc. for strings) func (p *RegexProcessor) ToLua(L *lua.LState, data interface{}) error { - captures, ok := data.([]string) + captureGroups, ok := data.([]*CaptureGroup) if !ok { - return fmt.Errorf("expected []string for captures, got %T", data) + return fmt.Errorf("expected []*CaptureGroup for captures, got %T", data) } - // Set variables for each capture group, starting from v1/s1 for the first capture - for i := 0; i < len(captures); i++ { - // Set string version (always available as s1, s2, etc.) - L.SetGlobal(fmt.Sprintf("s%d", i+1), lua.LString(captures[i])) + groupindex := 0 + for _, capture := range captureGroups { + if capture.Name == "" { + // We don't want to change the name of the capture group + // Even if it's empty + tempName := fmt.Sprintf("%d", groupindex+1) + groupindex++ - // Try to convert to number and set v1, v2, etc. - if val, err := strconv.ParseFloat(captures[i], 64); err == nil { - L.SetGlobal(fmt.Sprintf("v%d", i+1), lua.LNumber(val)) + L.SetGlobal("s"+tempName, lua.LString(capture.Value)) + + val, err := strconv.ParseFloat(capture.Value, 64) + if err == nil { + L.SetGlobal("v"+tempName, lua.LNumber(val)) + } + } else { + val, err := strconv.ParseFloat(capture.Value, 64) + if err == nil { + L.SetGlobal(capture.Name, lua.LNumber(val)) + } else { + L.SetGlobal(capture.Name, lua.LString(capture.Value)) + } } } return nil } -// FromLua implements the Processor interface for RegexProcessor func (p *RegexProcessor) FromLua(L *lua.LState) (interface{}, error) { - // Get the modified values after Lua execution - modifications := make(map[int]string) + // Stub to satisfy interface + return nil, nil +} - // Check for modifications to v1-v12 and s1-s12 - for i := 0; i < 12; i++ { - // Check both v and s variables to see if any were modified - vVarName := fmt.Sprintf("v%d", i+1) - sVarName := fmt.Sprintf("s%d", i+1) +// FromLua implements the Processor interface for RegexProcessor +func (p *RegexProcessor) FromLuaCustom(L *lua.LState, captureGroups []*CaptureGroup) ([]*CaptureGroup, error) { + captureIndex := 0 + for _, capture := range captureGroups { + if capture.Name == "" { + capture.Name = fmt.Sprintf("%d", captureIndex+1) - vLuaVal := L.GetGlobal(vVarName) - sLuaVal := L.GetGlobal(sVarName) + vVarName := fmt.Sprintf("v%s", capture.Name) + sVarName := fmt.Sprintf("s%s", capture.Name) + captureIndex++ - // If our value is a number then it's very likely we want it to be a number - // And not a string - // If we do want it to be a string we will cast it into a string in lua - // wait that wouldn't work... Casting v to a string would not load it here - if vLuaVal.Type() == lua.LTNumber { - modifications[i] = vLuaVal.String() - continue + vLuaVal := L.GetGlobal(vVarName) + sLuaVal := L.GetGlobal(sVarName) + + if sLuaVal.Type() == lua.LTString { + capture.Updated = sLuaVal.String() + } + // Numbers have priority + if vLuaVal.Type() == lua.LTNumber { + capture.Updated = vLuaVal.String() + } + } else { + // Easy shit + capture.Updated = L.GetGlobal(capture.Name).String() } - if sLuaVal.Type() == lua.LTString { - modifications[i] = sLuaVal.String() - continue - } - } - return modifications, nil + return captureGroups, nil } -type NamedCapture struct { - Name string - Value string - Range [2]int +type CaptureGroup struct { + Name string + Value string + Updated string + Range [2]int } type ReplaceCommand struct { - From int - To int - With string + From int + To int + With string } // ProcessContent applies regex replacement with Lua processing func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr string) (string, int, int, error) { - // Handle special pattern modifications - if !strings.HasPrefix(pattern, "(?s)") { - pattern = "(?s)" + pattern - log.Printf("Pattern modified to include (?s): %s", pattern) - } - - // The order of these replaces is important - // This one handles !num-s inside of named capture groups - // If it were not here our !num in a named capture group would - // Expand to another capture group in the capture group - // We really only want one (our named) capture group - namedGroupNum := regexp.MustCompile(`(?:(\?<[^>]+>)(!num))`) - pattern = namedGroupNum.ReplaceAllStringFunc(pattern, func(match string) string { - parts := namedGroupNum.FindStringSubmatch(match) - if len(parts) != 3 { - return match - } - replacement := `-?\d*\.?\d+` - return parts[1] + replacement - }) - pattern = strings.ReplaceAll(pattern, "!num", `"?(-?\d*\.?\d+)"?`) - pattern = strings.ReplaceAll(pattern, "!any", `.*?`) - repPattern := regexp.MustCompile(`!rep\(([^,]+),\s*(\d+)\)`) - // !rep(pattern, count) repeats the pattern n times - // Inserting !any between each repetition - pattern = repPattern.ReplaceAllStringFunc(pattern, func(match string) string { - parts := repPattern.FindStringSubmatch(match) - if len(parts) != 3 { - return match - } - repeatedPattern := parts[1] - count := parts[2] - repetitions, _ := strconv.Atoi(count) - return strings.Repeat(repeatedPattern+".*?", repetitions-1) + repeatedPattern - }) - + pattern = ResolveRegexPlaceholders(pattern) compiledPattern, err := regexp.Compile(pattern) if err != nil { log.Printf("Error compiling pattern: %v", err) @@ -181,120 +163,75 @@ func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr } } - captures := make([]string, 0, len(groups)/2) - for j := 0; j < len(groups); j += 2 { - if groups[j] == -1 || groups[j+1] == -1 { - continue - } - captures = append(captures, content[groups[j]:groups[j+1]]) - } - log.Printf("Captured groups: %v", captures) - // We have to use array to preserve order // Very important for the reconstruction step // Because we must overwrite the values in reverse order // See comments a few dozen lines above for more details - namedCaptures := make([]NamedCapture, 0, len(groups)/2) + captureGroups := make([]*CaptureGroup, 0, len(groups)/2) groupNames := compiledPattern.SubexpNames()[1:] for i, name := range groupNames { - if name == "" { + // if name == "" { + // continue + // } + start := groups[i*2] + end := groups[i*2+1] + if start == -1 || end == -1 { continue } - if groups[i*2] == -1 || groups[i*2+1] == -1 { - continue - } - namedCaptures = append(namedCaptures, NamedCapture{ + + captureGroups = append(captureGroups, &CaptureGroup{ Name: name, - Value: captures[i], - Range: [2]int{groups[i*2], groups[i*2+1]}, + Value: content[start:end], + Range: [2]int{start, end}, }) } - log.Printf("Named captures: %v", namedCaptures) + log.Printf("Capture groups: %v", captureGroups) - if err := p.ToLua(L, captures); err != nil { + if err := p.ToLua(L, captureGroups); err != nil { log.Printf("Error setting Lua variables: %v", err) continue } log.Println("Lua variables set successfully") - for _, capture := range namedCaptures { - if capture.Name == "" { - continue - } - if val, err := strconv.ParseFloat(capture.Value, 64); err == nil { - L.SetGlobal(capture.Name, lua.LNumber(val)) - } else { - L.SetGlobal(capture.Name, lua.LString(capture.Value)) - } - } - if err := L.DoString(luaExpr); err != nil { - log.Printf("Error executing Lua code %s for group %s: %v", luaExpr, captures, err) + log.Printf("Error executing Lua code %s for groups %+v: %v", luaExpr, captureGroups, err) continue } log.Println("Lua code executed successfully") // Get modifications from Lua - modResult, err := p.FromLua(L) + captureGroups, err = p.FromLuaCustom(L, captureGroups) if err != nil { log.Printf("Error getting modifications: %v", err) continue } - // Apply modifications to the matched text - modsMap, ok := modResult.(map[int]string) - if !ok || len(modsMap) == 0 { - log.Println("No modifications to apply") - continue - } - replacement := "" replacementVar := L.GetGlobal("replacement") if replacementVar.Type() != lua.LTNil { replacement = replacementVar.String() } if replacement == "" { - commands := make([]ReplaceCommand, 0, len(modsMap)) + commands := make([]ReplaceCommand, 0, len(captureGroups)) // Apply the modifications to the original match replacement = match - for i := len(modsMap) - 1; i >= 0; i-- { - newVal := modsMap[i] - log.Printf("Applying modification: %s", newVal) + for _, capture := range captureGroups { + log.Printf("Applying modification: %s", capture.Updated) // Indices of the group are relative to content // To relate them to match we have to subtract the match start index - groupStart := groups[i*2] - matchIndices[0] - groupEnd := groups[i*2+1] - matchIndices[0] // replacement = replacement[:groupStart] + newVal + replacement[groupEnd:] - log.Printf("%#v", groupStart) - log.Printf("%#v", groupEnd) - // commands = append(commands, ReplaceCommand{ - // From: groupStart, - // To: groupEnd, - // With: newVal, - // }) - } - - for i := len(namedCaptures) - 1; i >= 0; i-- { - capture := namedCaptures[i] - if capture.Name == "" { - continue - } - groupStart := capture.Range[0] - matchIndices[0] - groupEnd := capture.Range[1] - matchIndices[0] - luaValue := L.GetGlobal(capture.Name).String() - // replacement = replacement[:groupStart] + luaValue + replacement[groupEnd:] commands = append(commands, ReplaceCommand{ - From: groupStart, - To: groupEnd, - With: luaValue, + From: capture.Range[0] - matchIndices[0], + To: capture.Range[1] - matchIndices[0], + With: capture.Updated, }) } sort.Slice(commands, func(i, j int) bool { return commands[i].From > commands[j].From }) - + for _, command := range commands { replacement = replacement[:command.From] + command.With + replacement[command.To:] } @@ -307,3 +244,42 @@ func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr log.Printf("Process completed with %d modifications", modificationCount) return result, modificationCount, len(indices), nil } + +// The order of these replaces is important +// This one handles !num-s inside of named capture groups +// If it were not here our !num in a named capture group would +// Expand to another capture group in the capture group +// We really only want one (our named) capture group +func ResolveRegexPlaceholders(pattern string) string { + // Handle special pattern modifications + if !strings.HasPrefix(pattern, "(?s)") { + pattern = "(?s)" + pattern + log.Printf("Pattern modified to include (?s): %s", pattern) + } + + namedGroupNum := regexp.MustCompile(`(?:(\?<[^>]+>)(!num))`) + pattern = namedGroupNum.ReplaceAllStringFunc(pattern, func(match string) string { + parts := namedGroupNum.FindStringSubmatch(match) + if len(parts) != 3 { + return match + } + replacement := `-?\d*\.?\d+` + return parts[1] + replacement + }) + pattern = strings.ReplaceAll(pattern, "!num", `"?(-?\d*\.?\d+)"?`) + pattern = strings.ReplaceAll(pattern, "!any", `.*?`) + repPattern := regexp.MustCompile(`!rep\(([^,]+),\s*(\d+)\)`) + // !rep(pattern, count) repeats the pattern n times + // Inserting !any between each repetition + pattern = repPattern.ReplaceAllStringFunc(pattern, func(match string) string { + parts := repPattern.FindStringSubmatch(match) + if len(parts) != 3 { + return match + } + repeatedPattern := parts[1] + count := parts[2] + repetitions, _ := strconv.Atoi(count) + return strings.Repeat(repeatedPattern+".*?", repetitions-1) + repeatedPattern + }) + return pattern +}