package processor import ( "cook/utils" "fmt" "regexp" "strconv" "strings" "time" logger "git.site.quack-lab.dev/dave/cylogger" lua "github.com/yuin/gopher-lua" ) // regexLogger is a scoped logger for the processor/regex package. var regexLogger = logger.Default.WithPrefix("processor/regex") type CaptureGroup struct { Name string Value string Updated string Range [2]int } // ProcessContent applies regex replacement with Lua processing // The filename here exists ONLY so we can pass it to the lua environment // It's not used for anything else func ProcessRegex(content string, command utils.ModifyCommand, filename string) ([]utils.ReplaceCommand, error) { processRegexLogger := regexLogger.WithPrefix("ProcessRegex").WithField("commandName", command.Name).WithField("file", filename) processRegexLogger.Debug("Starting regex processing for file") processRegexLogger.Trace("Initial file content length: %d", len(content)) processRegexLogger.Trace("Command details: %+v", command) var commands []utils.ReplaceCommand // Start timing the regex processing startTime := time.Now() // We don't HAVE to do this multiple times for a pattern // But it's quick enough for us to not care pattern := resolveRegexPlaceholders(command.Regex) processRegexLogger.Debug("Resolved regex placeholders. Pattern: %s", pattern) // I'm not too happy about having to trim regex, we could have meaningful whitespace or newlines // But it's a compromise that allows us to use | in yaml // Otherwise we would have to escape every god damn pair of quotation marks // And a bunch of other shit pattern = strings.TrimSpace(pattern) processRegexLogger.Debug("Trimmed regex pattern: %s", pattern) patternCompileStart := time.Now() compiledPattern, err := regexp.Compile(pattern) if err != nil { processRegexLogger.Error("Error compiling pattern %q: %v", pattern, err) return commands, fmt.Errorf("error compiling pattern: %v", err) } processRegexLogger.Debug("Compiled pattern successfully in %v", time.Since(patternCompileStart)) // Same here, it's just string concatenation, it won't kill us // More important is that we don't fuck up the command // But we shouldn't be able to since it's passed by value previousLuaExpr := command.Lua luaExpr := BuildLuaScript(command.Lua) processRegexLogger.Debug("Transformed Lua expression: %q → %q", previousLuaExpr, luaExpr) processRegexLogger.Trace("Full Lua script: %q", utils.LimitString(luaExpr, 200)) // Process all regex matches matchFindStart := time.Now() indices := compiledPattern.FindAllStringSubmatchIndex(content, -1) matchFindDuration := time.Since(matchFindStart) processRegexLogger.Debug("Found %d matches in content of length %d (search took %v)", len(indices), len(content), matchFindDuration) processRegexLogger.Trace("Match indices: %v", indices) // Log pattern complexity metrics patternComplexity := estimatePatternComplexity(pattern) processRegexLogger.Debug("Pattern complexity estimate: %d", patternComplexity) if len(indices) == 0 { processRegexLogger.Warning("No matches found for regex: %q", pattern) processRegexLogger.Debug("Total regex processing time: %v", time.Since(startTime)) return commands, nil } // We walk backwards because we're replacing something with something else that might be longer // And in the case it is longer than the original all indicces past that change will be fucked up // By going backwards we fuck up all the indices to the end of the file that we don't care about // Because there either aren't any (last match) or they're already modified (subsequent matches) for i, matchIndices := range indices { matchLogger := processRegexLogger.WithField("matchNum", i+1) matchLogger.Debug("Processing match %d of %d", i+1, len(indices)) matchLogger.Trace("Match indices: %v (match position %d-%d)", matchIndices, matchIndices[0], matchIndices[1]) L, err := NewLuaState() if err != nil { matchLogger.Error("Error creating Lua state: %v", err) return commands, fmt.Errorf("error creating Lua state: %v", err) } L.SetGlobal("file", lua.LString(filename)) // Hmm... Maybe we don't want to defer this.. // Maybe we want to close them every iteration // We'll leave it as is for now defer L.Close() matchLogger.Trace("Lua state created successfully for match %d", i+1) // Why we're doing this whole song and dance of indices is to properly handle empty matches // Plus it's a little cleaner to surgically replace our matches // If we were to use string.replace and encountered an empty match there'd be nothing to replace // But using indices an empty match would have its starting and ending indices be the same // So when we're cutting open the array we say 0:7 + modified + 7:end // As if concatenating in the middle of the array // Plus it supports lookarounds matchContent := content[matchIndices[0]:matchIndices[1]] matchPreview := utils.LimitString(matchContent, 50) matchLogger.Trace("Matched content: %q (length: %d)", matchPreview, len(matchContent)) groups := matchIndices[2:] if len(groups) <= 0 { matchLogger.Warning("No capture groups found for match %q and regex %q", matchPreview, pattern) continue } if len(groups)%2 == 1 { matchLogger.Warning("Invalid number of group indices (%d), should be even: %v", len(groups), groups) continue } // Count how many valid groups we have validGroups := 0 for j := 0; j < len(groups); j += 2 { if groups[j] != -1 && groups[j+1] != -1 { validGroups++ } } matchLogger.Debug("Found %d valid capture groups in match", validGroups) for _, index := range groups { if index == -1 { matchLogger.Warning("Negative index encountered in match indices %v. This may indicate an issue with the regex pattern or an empty/optional capture group.", matchIndices) continue } } // We have to use array to preserve order // Very important for the reconstruction step // Because we must overwrite the values in reverse order // See comments a few dozen lines above for more details captureGroups := make([]*CaptureGroup, 0, len(groups)/2) groupNames := compiledPattern.SubexpNames()[1:] for i, name := range groupNames { start := groups[i*2] end := groups[i*2+1] if start == -1 || end == -1 { matchLogger.Debug("Skipping empty or unmatched capture group #%d (name: %q)", i+1, name) continue } value := content[start:end] captureGroups = append(captureGroups, &CaptureGroup{ Name: name, Value: value, Range: [2]int{start, end}, }) // Include name info in log if available if name != "" { matchLogger.Trace("Capture group '%s': %q (pos %d-%d)", name, value, start, end) } else { matchLogger.Trace("Capture group #%d: %q (pos %d-%d)", i+1, value, start, end) } } // Use the DeduplicateGroups flag to control whether to deduplicate capture groups if !command.NoDedup { matchLogger.Debug("Deduplicating capture groups as specified in command settings") captureGroups = deduplicateGroups(captureGroups) matchLogger.Trace("Capture groups after deduplication: %v", captureGroups) } else { matchLogger.Debug("Skipping deduplication of capture groups (NoDedup is true)") } if err := toLua(L, captureGroups); err != nil { matchLogger.Error("Failed to set Lua variables for capture groups: %v", err) continue } matchLogger.Debug("Set %d capture groups as Lua variables", len(captureGroups)) matchLogger.Trace("Lua globals set for capture groups") if err := L.DoString(luaExpr); err != nil { matchLogger.Error("Lua script execution failed: %v\nScript: %s\nCapture Groups: %+v", err, utils.LimitString(luaExpr, 200), captureGroups) continue } matchLogger.Debug("Lua script executed successfully") // Get modifications from Lua updatedCaptureGroups, err := fromLua(L, captureGroups) if err != nil { matchLogger.Error("Failed to retrieve modifications from Lua: %v", err) continue } matchLogger.Debug("Retrieved updated values from Lua") matchLogger.Trace("Updated capture groups from Lua: %v", updatedCaptureGroups) replacement := "" replacementVar := L.GetGlobal("replacement") if replacementVar.Type() != lua.LTNil { replacement = replacementVar.String() matchLogger.Debug("Using global replacement variable from Lua: %q", replacement) } // Check if modification flag is set modifiedVal := L.GetGlobal("modified") if modifiedVal.Type() != lua.LTBool || !lua.LVAsBool(modifiedVal) { matchLogger.Debug("Skipping match - no modifications indicated by Lua script") continue } if replacement == "" { // Apply the modifications to the original match replacement = matchContent // Count groups that were actually modified modifiedGroupsCount := 0 for _, capture := range updatedCaptureGroups { if capture.Value != capture.Updated { modifiedGroupsCount++ } } matchLogger.Info("%d of %d capture groups identified for modification", modifiedGroupsCount, len(updatedCaptureGroups)) for _, capture := range updatedCaptureGroups { if capture.Value == capture.Updated { matchLogger.Debug("Capture group unchanged: %s", utils.LimitString(capture.Value, 50)) continue } // Log what changed with context matchLogger.Debug("Capture group %q scheduled for modification: %q → %q", capture.Name, utils.LimitString(capture.Value, 50), utils.LimitString(capture.Updated, 50)) // Indices of the group are relative to content // To relate them to match we have to subtract the match start index // replacement = replacement[:groupStart] + newVal + replacement[groupEnd:] commands = append(commands, utils.ReplaceCommand{ From: capture.Range[0], To: capture.Range[1], With: capture.Updated, }) matchLogger.Trace("Added replacement command: %+v", commands[len(commands)-1]) } } else { matchLogger.Debug("Using full replacement string from Lua: %q", utils.LimitString(replacement, 50)) commands = append(commands, utils.ReplaceCommand{ From: matchIndices[0], To: matchIndices[1], With: replacement, }) matchLogger.Trace("Added full replacement command: %+v", commands[len(commands)-1]) } } processRegexLogger.Debug("Total regex processing time: %v", time.Since(startTime)) processRegexLogger.Debug("Generated %d total modifications", len(commands)) return commands, nil } func deduplicateGroups(captureGroups []*CaptureGroup) []*CaptureGroup { deduplicateGroupsLogger := regexLogger.WithPrefix("deduplicateGroups") deduplicateGroupsLogger.Debug("Starting deduplication of capture groups") deduplicateGroupsLogger.Trace("Input capture groups: %v", captureGroups) // Preserve input order and drop any group that overlaps with an already accepted group accepted := make([]*CaptureGroup, 0, len(captureGroups)) for _, group := range captureGroups { groupLogger := deduplicateGroupsLogger.WithField("groupName", group.Name).WithField("groupRange", group.Range) groupLogger.Debug("Processing capture group") overlaps := false for _, kept := range accepted { // Overlap if start < keptEnd and end > keptStart (adjacent is allowed) if group.Range[0] < kept.Range[1] && group.Range[1] > kept.Range[0] { overlaps = true break } } if overlaps { groupLogger.Warning("Overlapping capture group detected and skipped.") continue } groupLogger.Debug("Capture group does not overlap with previously accepted groups. Adding.") accepted = append(accepted, group) } deduplicateGroupsLogger.Debug("Finished deduplication. Original %d groups, %d deduplicated.", len(captureGroups), len(accepted)) deduplicateGroupsLogger.Trace("Deduplicated groups: %v", accepted) return accepted } // The order of these replaces is important // This one handles !num-s inside of named capture groups // If it were not here our !num in a named capture group would // Expand to another capture group in the capture group // We really only want one (our named) capture group func resolveRegexPlaceholders(pattern string) string { resolveLogger := regexLogger.WithPrefix("resolveRegexPlaceholders").WithField("originalPattern", utils.LimitString(pattern, 100)) resolveLogger.Debug("Resolving regex placeholders in pattern") // Handle special pattern modifications if !strings.HasPrefix(pattern, "(?s)") { pattern = "(?s)" + pattern resolveLogger.Debug("Prepended '(?s)' to pattern for single-line mode") } namedGroupNum := regexp.MustCompile(`(?:(\?<[^>]+>)(!num))`) pattern = namedGroupNum.ReplaceAllStringFunc(pattern, func(match string) string { funcLogger := resolveLogger.WithPrefix("namedGroupNumReplace").WithField("match", utils.LimitString(match, 50)) funcLogger.Debug("Processing named group !num placeholder") parts := namedGroupNum.FindStringSubmatch(match) if len(parts) != 3 { funcLogger.Warning("Unexpected number of submatches for namedGroupNum: %d. Returning original match.", len(parts)) return match } replacement := `-?\d*\.?\d+` funcLogger.Trace("Replacing !num in named group with: %q", replacement) return parts[1] + replacement }) resolveLogger.Debug("Handled named group !num placeholders") pattern = strings.ReplaceAll(pattern, "!num", `(-?\d*\.?\d+)`) resolveLogger.Debug("Replaced !num with numeric capture group") pattern = strings.ReplaceAll(pattern, "!any", `.*?`) resolveLogger.Debug("Replaced !any with non-greedy wildcard") repPattern := regexp.MustCompile(`!rep\(([^,]+),\s*(\d+)\)`) // !rep(pattern, count) repeats the pattern n times // Inserting !any between each repetition pattern = repPattern.ReplaceAllStringFunc(pattern, func(match string) string { funcLogger := resolveLogger.WithPrefix("repPatternReplace").WithField("match", utils.LimitString(match, 50)) funcLogger.Debug("Processing !rep placeholder") parts := repPattern.FindStringSubmatch(match) if len(parts) != 3 { funcLogger.Warning("Unexpected number of submatches for repPattern: %d. Returning original match.", len(parts)) return match } repeatedPattern := parts[1] countStr := parts[2] repetitions, err := strconv.Atoi(countStr) if err != nil { funcLogger.Error("Failed to parse repetition count %q: %v. Returning original match.", countStr, err) return match } var finalReplacement string if repetitions > 0 { finalReplacement = strings.Repeat(repeatedPattern+".*?", repetitions-1) + repeatedPattern } else { finalReplacement = "" } funcLogger.Trace("Replaced !rep with %d repetitions of %q: %q", repetitions, utils.LimitString(repeatedPattern, 30), utils.LimitString(finalReplacement, 100)) return finalReplacement }) resolveLogger.Debug("Handled !rep placeholders") resolveLogger.Debug("Finished resolving regex placeholders") resolveLogger.Trace("Final resolved pattern: %q", utils.LimitString(pattern, 100)) return pattern } // ToLua sets capture groups as Lua variables (v1, v2, etc. for numeric values and s1, s2, etc. for strings) func toLua(L *lua.LState, data interface{}) error { toLuaLogger := regexLogger.WithPrefix("toLua") toLuaLogger.Debug("Setting capture groups as Lua variables") captureGroups, ok := data.([]*CaptureGroup) if !ok { toLuaLogger.Error("Invalid data type for toLua. Expected []*CaptureGroup, got %T", data) return fmt.Errorf("expected []*CaptureGroup for captures, got %T", data) } toLuaLogger.Trace("Input capture groups: %v", captureGroups) groupindex := 0 for _, capture := range captureGroups { groupLogger := toLuaLogger.WithField("captureGroup", capture.Name).WithField("value", utils.LimitString(capture.Value, 50)) groupLogger.Debug("Processing capture group for Lua") if capture.Name == "" { // We don't want to change the name of the capture group // Even if it's empty tempName := fmt.Sprintf("%d", groupindex+1) groupindex++ groupLogger.Debug("Unnamed capture group, assigning temporary name: %q", tempName) L.SetGlobal("s"+tempName, lua.LString(capture.Value)) groupLogger.Trace("Set Lua global s%s = %q", tempName, capture.Value) val, err := strconv.ParseFloat(capture.Value, 64) if err == nil { L.SetGlobal("v"+tempName, lua.LNumber(val)) groupLogger.Trace("Set Lua global v%s = %f", tempName, val) } else { groupLogger.Trace("Value %q is not numeric, skipping v%s assignment", capture.Value, tempName) } } else { val, err := strconv.ParseFloat(capture.Value, 64) if err == nil { L.SetGlobal(capture.Name, lua.LNumber(val)) groupLogger.Trace("Set Lua global %s = %f (numeric)", capture.Name, val) } else { L.SetGlobal(capture.Name, lua.LString(capture.Value)) groupLogger.Trace("Set Lua global %s = %q (string)", capture.Name, capture.Value) } } } toLuaLogger.Debug("Finished setting capture groups as Lua variables") return nil } // FromLua implements the Processor interface for RegexProcessor func fromLua(L *lua.LState, captureGroups []*CaptureGroup) ([]*CaptureGroup, error) { fromLuaLogger := regexLogger.WithPrefix("fromLua") fromLuaLogger.Debug("Retrieving modifications from Lua for capture groups") fromLuaLogger.Trace("Initial capture groups: %v", captureGroups) captureIndex := 0 for _, capture := range captureGroups { groupLogger := fromLuaLogger.WithField("originalCaptureName", capture.Name).WithField("originalValue", utils.LimitString(capture.Value, 50)) groupLogger.Debug("Processing capture group to retrieve updated value") if capture.Name == "" { // This case means it was an unnamed capture group originally. // We need to reconstruct the original temporary name to fetch its updated value. // The name will be set to an integer if it was empty, then incremented. // So, we use the captureIndex to get the correct 'vX' and 'sX' variables. tempName := fmt.Sprintf("%d", captureIndex+1) groupLogger.Debug("Retrieving updated value for unnamed group (temp name: %q)", tempName) vVarName := fmt.Sprintf("v%s", tempName) sVarName := fmt.Sprintf("s%s", tempName) captureIndex++ vLuaVal := L.GetGlobal(vVarName) sLuaVal := L.GetGlobal(sVarName) groupLogger.Trace("Lua values for unnamed group: v=%v, s=%v", vLuaVal, sLuaVal) if sLuaVal.Type() == lua.LTString { capture.Updated = sLuaVal.String() groupLogger.Trace("Updated value from s%s (string): %q", tempName, capture.Updated) } // Numbers have priority if vLuaVal.Type() == lua.LTNumber { capture.Updated = vLuaVal.String() groupLogger.Trace("Updated value from v%s (numeric): %q", tempName, capture.Updated) } } else { // Easy shit, directly use the named capture group updatedValue := L.GetGlobal(capture.Name) if updatedValue.Type() != lua.LTNil { capture.Updated = updatedValue.String() groupLogger.Trace("Updated value for named group %q: %q", capture.Name, capture.Updated) } else { groupLogger.Debug("Named capture group %q not found in Lua globals or is nil. Keeping original value.", capture.Name) capture.Updated = capture.Value // Keep original if not found or nil } } groupLogger.Debug("Finished processing capture group. Original: %q, Updated: %q", utils.LimitString(capture.Value, 50), utils.LimitString(capture.Updated, 50)) } fromLuaLogger.Debug("Finished retrieving modifications from Lua") fromLuaLogger.Trace("Final updated capture groups: %v", captureGroups) return captureGroups, nil } // estimatePatternComplexity gives a rough estimate of regex pattern complexity // This can help identify potentially problematic patterns func estimatePatternComplexity(pattern string) int { estimateComplexityLogger := regexLogger.WithPrefix("estimatePatternComplexity").WithField("pattern", utils.LimitString(pattern, 100)) estimateComplexityLogger.Debug("Estimating regex pattern complexity") complexity := len(pattern) // Add complexity for potentially expensive operations complexity += strings.Count(pattern, ".*") * 10 // Greedy wildcard complexity += strings.Count(pattern, ".*?") * 5 // Non-greedy wildcard complexity += strings.Count(pattern, "[^") * 3 // Negated character class complexity += strings.Count(pattern, "\\b") * 2 // Word boundary complexity += strings.Count(pattern, "(") * 2 // Capture groups complexity += strings.Count(pattern, "(?:") * 1 // Non-capture groups complexity += strings.Count(pattern, "\\1") * 3 // Backreferences complexity += strings.Count(pattern, "{") * 2 // Counted repetition estimateComplexityLogger.Debug("Estimated pattern complexity: %d", complexity) return complexity }