package processor import ( "fmt" "regexp" "strconv" "strings" lua "github.com/yuin/gopher-lua" "modify/logger" "modify/utils" ) type CaptureGroup struct { Name string Value string Updated string Range [2]int } // ProcessContent applies regex replacement with Lua processing func ProcessRegex(content string, command utils.ModifyCommand) (string, int, int, error) { // We don't HAVE to do this multiple times for a pattern // But it's quick enough for us to not care pattern := resolveRegexPlaceholders(command.Pattern) logger.Debug("Compiling regex pattern: %s", pattern) compiledPattern, err := regexp.Compile(pattern) if err != nil { logger.Error("Error compiling pattern: %v", err) return "", 0, 0, fmt.Errorf("error compiling pattern: %v", err) } logger.Debug("Compiled pattern successfully: %s", pattern) // Same here, it's just string concatenation, it won't kill us // More important is that we don't fuck up the command // But we shouldn't be able to since it's passed by value previous := command.LuaExpr luaExpr := BuildLuaScript(command.LuaExpr) logger.Debug("Transformed Lua expression: %q → %q", previous, luaExpr) // Initialize Lua environment modificationCount := 0 // Process all regex matches result := content indices := compiledPattern.FindAllStringSubmatchIndex(content, -1) logger.Debug("Found %d matches in content of length %d", len(indices), len(content)) // We walk backwards because we're replacing something with something else that might be longer // And in the case it is longer than the original all indicces past that change will be fucked up // By going backwards we fuck up all the indices to the end of the file that we don't care about // Because there either aren't any (last match) or they're already modified (subsequent matches) for i := len(indices) - 1; i >= 0; i-- { logger.Debug("Processing match %d of %d", i+1, len(indices)) L, err := NewLuaState() if err != nil { logger.Error("Error creating Lua state: %v", err) return "", 0, 0, fmt.Errorf("error creating Lua state: %v", err) } // Hmm... Maybe we don't want to defer this.. // Maybe we want to close them every iteration // We'll leave it as is for now defer L.Close() logger.Trace("Lua state created successfully for match %d", i+1) matchIndices := indices[i] logger.Trace("Match indices: %v (match position %d-%d)", matchIndices, matchIndices[0], matchIndices[1]) // Why we're doing this whole song and dance of indices is to properly handle empty matches // Plus it's a little cleaner to surgically replace our matches // If we were to use string.replace and encountered an empty match there'd be nothing to replace // But using indices an empty match would have its starting and ending indices be the same // So when we're cutting open the array we say 0:7 + modified + 7:end // As if concatenating in the middle of the array // Plus it supports lookarounds match := content[matchIndices[0]:matchIndices[1]] matchPreview := match if len(match) > 50 { matchPreview = match[:47] + "..." } logger.Trace("Matched content: %q (length: %d)", matchPreview, len(match)) groups := matchIndices[2:] if len(groups) <= 0 { logger.Warning("No capture groups found for match %q and regex %q", matchPreview, pattern) continue } if len(groups)%2 == 1 { logger.Warning("Invalid number of group indices (%d), should be even: %v", len(groups), groups) continue } // Count how many valid groups we have validGroups := 0 for j := 0; j < len(groups); j += 2 { if groups[j] != -1 && groups[j+1] != -1 { validGroups++ } } logger.Debug("Found %d valid capture groups in match", validGroups) for _, index := range groups { if index == -1 { logger.Warning("Negative index encountered in match indices %v. This may indicate an issue with the regex pattern or an empty/optional capture group.", matchIndices) continue } } // We have to use array to preserve order // Very important for the reconstruction step // Because we must overwrite the values in reverse order // See comments a few dozen lines above for more details captureGroups := make([]*CaptureGroup, 0, len(groups)/2) groupNames := compiledPattern.SubexpNames()[1:] for i, name := range groupNames { start := groups[i*2] end := groups[i*2+1] if start == -1 || end == -1 { continue } value := content[start:end] captureGroups = append(captureGroups, &CaptureGroup{ Name: name, Value: value, Range: [2]int{start, end}, }) // Include name info in log if available if name != "" { logger.Trace("Capture group '%s': %q (pos %d-%d)", name, value, start, end) } else { logger.Trace("Capture group #%d: %q (pos %d-%d)", i+1, value, start, end) } } captureGroups = deduplicateGroups(captureGroups) if err := toLua(L, captureGroups); err != nil { logger.Error("Failed to set Lua variables: %v", err) continue } logger.Trace("Set %d capture groups as Lua variables", len(captureGroups)) if err := L.DoString(luaExpr); err != nil { logger.Error("Lua script execution failed: %v\nScript: %s\nCapture Groups: %+v", err, luaExpr, captureGroups) continue } logger.Trace("Lua script executed successfully") // Get modifications from Lua captureGroups, err = fromLua(L, captureGroups) if err != nil { logger.Error("Failed to retrieve modifications from Lua: %v", err) continue } logger.Trace("Retrieved updated values from Lua") replacement := "" replacementVar := L.GetGlobal("replacement") if replacementVar.Type() != lua.LTNil { replacement = replacementVar.String() logger.Debug("Using global replacement: %q", replacement) } // Check if modification flag is set modifiedVal := L.GetGlobal("modified") if modifiedVal.Type() != lua.LTBool || !lua.LVAsBool(modifiedVal) { logger.Debug("Skipping match - no modifications made by Lua script") continue } // if replacement == "" { // commands := make([]ReplaceCommand, 0, len(captureGroups)) // // Apply the modifications to the original match // replacement = match // // // Count groups that were actually modified // modifiedGroups := 0 // for _, capture := range captureGroups { // if capture.Value != capture.Updated { // modifiedGroups++ // } // } // logger.Debug("%d of %d capture groups were modified", modifiedGroups, len(captureGroups)) // // for _, capture := range captureGroups { // if capture.Value == capture.Updated { // logger.Trace("Capture group unchanged: %s", capture.Value) // continue // } // // // Log what changed with context // logger.Debug("Modifying group %s: %q → %q", // capture.Name, capture.Value, capture.Updated) // // // Indices of the group are relative to content // // To relate them to match we have to subtract the match start index // // replacement = replacement[:groupStart] + newVal + replacement[groupEnd:] // commands = append(commands, ReplaceCommand{ // From: capture.Range[0] - matchIndices[0], // To: capture.Range[1] - matchIndices[0], // With: capture.Updated, // }) // } // } // Preview the replacement for logging replacementPreview := replacement if len(replacement) > 50 { replacementPreview = replacement[:47] + "..." } logger.Debug("Replacing match %q with %q", matchPreview, replacementPreview) modificationCount++ result = result[:matchIndices[0]] + replacement + result[matchIndices[1]:] logger.Debug("Match #%d processed, running modification count: %d", i+1, modificationCount) } logger.Info("Regex processing complete: %d modifications from %d matches", modificationCount, len(indices)) return result, modificationCount, len(indices), nil } func deduplicateGroups(captureGroups []*CaptureGroup) []*CaptureGroup { deduplicatedGroups := make([]*CaptureGroup, 0) for _, group := range captureGroups { overlaps := false logger.Debug("Checking capture group: %s with range %v", group.Name, group.Range) for _, existingGroup := range deduplicatedGroups { logger.Debug("Comparing with existing group: %s with range %v", existingGroup.Name, existingGroup.Range) if group.Range[0] < existingGroup.Range[1] && group.Range[1] > existingGroup.Range[0] { overlaps = true logger.Warning("Detected overlap between capture group '%s' and existing group '%s' in range %v-%v and %v-%v", group.Name, existingGroup.Name, group.Range[0], group.Range[1], existingGroup.Range[0], existingGroup.Range[1]) break } } if overlaps { // We CAN just continue despite this fuckup logger.Error("Overlapping capture group: %s", group.Name) continue } logger.Debug("No overlap detected for capture group: %s. Adding to deduplicated groups.", group.Name) deduplicatedGroups = append(deduplicatedGroups, group) } return deduplicatedGroups } // The order of these replaces is important // This one handles !num-s inside of named capture groups // If it were not here our !num in a named capture group would // Expand to another capture group in the capture group // We really only want one (our named) capture group func resolveRegexPlaceholders(pattern string) string { // Handle special pattern modifications if !strings.HasPrefix(pattern, "(?s)") { pattern = "(?s)" + pattern // Use fmt.Printf for test compatibility fmt.Printf("Pattern modified to include (?s): %s\n", pattern) } namedGroupNum := regexp.MustCompile(`(?:(\?<[^>]+>)(!num))`) pattern = namedGroupNum.ReplaceAllStringFunc(pattern, func(match string) string { parts := namedGroupNum.FindStringSubmatch(match) if len(parts) != 3 { return match } replacement := `-?\d*\.?\d+` return parts[1] + replacement }) pattern = strings.ReplaceAll(pattern, "!num", `"?(-?\d*\.?\d+)"?`) pattern = strings.ReplaceAll(pattern, "!any", `.*?`) repPattern := regexp.MustCompile(`!rep\(([^,]+),\s*(\d+)\)`) // !rep(pattern, count) repeats the pattern n times // Inserting !any between each repetition pattern = repPattern.ReplaceAllStringFunc(pattern, func(match string) string { parts := repPattern.FindStringSubmatch(match) if len(parts) != 3 { return match } repeatedPattern := parts[1] count := parts[2] repetitions, _ := strconv.Atoi(count) return strings.Repeat(repeatedPattern+".*?", repetitions-1) + repeatedPattern }) return pattern } // ToLua sets capture groups as Lua variables (v1, v2, etc. for numeric values and s1, s2, etc. for strings) func toLua(L *lua.LState, data interface{}) error { captureGroups, ok := data.([]*CaptureGroup) if !ok { return fmt.Errorf("expected []*CaptureGroup for captures, got %T", data) } groupindex := 0 for _, capture := range captureGroups { if capture.Name == "" { // We don't want to change the name of the capture group // Even if it's empty tempName := fmt.Sprintf("%d", groupindex+1) groupindex++ L.SetGlobal("s"+tempName, lua.LString(capture.Value)) val, err := strconv.ParseFloat(capture.Value, 64) if err == nil { L.SetGlobal("v"+tempName, lua.LNumber(val)) } } else { val, err := strconv.ParseFloat(capture.Value, 64) if err == nil { L.SetGlobal(capture.Name, lua.LNumber(val)) } else { L.SetGlobal(capture.Name, lua.LString(capture.Value)) } } } return nil } // FromLua implements the Processor interface for RegexProcessor func fromLua(L *lua.LState, captureGroups []*CaptureGroup) ([]*CaptureGroup, error) { captureIndex := 0 for _, capture := range captureGroups { if capture.Name == "" { capture.Name = fmt.Sprintf("%d", captureIndex+1) vVarName := fmt.Sprintf("v%s", capture.Name) sVarName := fmt.Sprintf("s%s", capture.Name) captureIndex++ vLuaVal := L.GetGlobal(vVarName) sLuaVal := L.GetGlobal(sVarName) if sLuaVal.Type() == lua.LTString { capture.Updated = sLuaVal.String() } // Numbers have priority if vLuaVal.Type() == lua.LTNumber { capture.Updated = vLuaVal.String() } } else { // Easy shit capture.Updated = L.GetGlobal(capture.Name).String() } } return captureGroups, nil }