388 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			388 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
package processor
 | 
						|
 | 
						|
import (
 | 
						|
	"fmt"
 | 
						|
	"regexp"
 | 
						|
	"strconv"
 | 
						|
	"strings"
 | 
						|
	"time"
 | 
						|
 | 
						|
	lua "github.com/yuin/gopher-lua"
 | 
						|
 | 
						|
	"modify/logger"
 | 
						|
	"modify/utils"
 | 
						|
)
 | 
						|
 | 
						|
type CaptureGroup struct {
 | 
						|
	Name    string
 | 
						|
	Value   string
 | 
						|
	Updated string
 | 
						|
	Range   [2]int
 | 
						|
}
 | 
						|
 | 
						|
// ProcessContent applies regex replacement with Lua processing
 | 
						|
// The filename here exists ONLY so we can pass it to the lua environment
 | 
						|
// It's not used for anything else
 | 
						|
func ProcessRegex(content string, command utils.ModifyCommand, filename string) ([]utils.ReplaceCommand, error) {
 | 
						|
	var commands []utils.ReplaceCommand
 | 
						|
	logger.Trace("Processing regex: %q", command.Regex)
 | 
						|
 | 
						|
	// Start timing the regex processing
 | 
						|
	startTime := time.Now()
 | 
						|
 | 
						|
	// We don't HAVE to do this multiple times for a pattern
 | 
						|
	// But it's quick enough for us to not care
 | 
						|
	pattern := resolveRegexPlaceholders(command.Regex)
 | 
						|
	logger.Debug("Compiling regex pattern: %s", pattern)
 | 
						|
 | 
						|
	patternCompileStart := time.Now()
 | 
						|
	compiledPattern, err := regexp.Compile(pattern)
 | 
						|
	if err != nil {
 | 
						|
		logger.Error("Error compiling pattern: %v", err)
 | 
						|
		return commands, fmt.Errorf("error compiling pattern: %v", err)
 | 
						|
	}
 | 
						|
	logger.Debug("Compiled pattern successfully in %v: %s", time.Since(patternCompileStart), pattern)
 | 
						|
 | 
						|
	// Same here, it's just string concatenation, it won't kill us
 | 
						|
	// More important is that we don't fuck up the command
 | 
						|
	// But we shouldn't be able to since it's passed by value
 | 
						|
	previous := command.Lua
 | 
						|
	luaExpr := BuildLuaScript(command.Lua)
 | 
						|
	logger.Debug("Transformed Lua expression: %q → %q", previous, luaExpr)
 | 
						|
 | 
						|
	// Process all regex matches
 | 
						|
	matchFindStart := time.Now()
 | 
						|
	indices := compiledPattern.FindAllStringSubmatchIndex(content, -1)
 | 
						|
	matchFindDuration := time.Since(matchFindStart)
 | 
						|
 | 
						|
	logger.Debug("Found %d matches in content of length %d (search took %v)",
 | 
						|
		len(indices), len(content), matchFindDuration)
 | 
						|
 | 
						|
	// Log pattern complexity metrics
 | 
						|
	patternComplexity := estimatePatternComplexity(pattern)
 | 
						|
	logger.Debug("Pattern complexity estimate: %d", patternComplexity)
 | 
						|
 | 
						|
	if len(indices) == 0 {
 | 
						|
		logger.Warning("No matches found for regex: %q", pattern)
 | 
						|
		logger.Debug("Total regex processing time: %v", time.Since(startTime))
 | 
						|
		return commands, nil
 | 
						|
	}
 | 
						|
 | 
						|
	// We walk backwards because we're replacing something with something else that might be longer
 | 
						|
	// And in the case it is longer than the original all indicces past that change will be fucked up
 | 
						|
	// By going backwards we fuck up all the indices to the end of the file that we don't care about
 | 
						|
	// Because there either aren't any (last match) or they're already modified (subsequent matches)
 | 
						|
	for i, matchIndices := range indices {
 | 
						|
		logger.Debug("Processing match %d of %d", i+1, len(indices))
 | 
						|
		logger.Trace("Match indices: %v (match position %d-%d)", matchIndices, matchIndices[0], matchIndices[1])
 | 
						|
 | 
						|
		L, err := NewLuaState()
 | 
						|
		if err != nil {
 | 
						|
			logger.Error("Error creating Lua state: %v", err)
 | 
						|
			return commands, fmt.Errorf("error creating Lua state: %v", err)
 | 
						|
		}
 | 
						|
		L.SetGlobal("file", lua.LString(filename))
 | 
						|
		// Hmm... Maybe we don't want to defer this..
 | 
						|
		// Maybe we want to close them every iteration
 | 
						|
		// We'll leave it as is for now
 | 
						|
		defer L.Close()
 | 
						|
		logger.Trace("Lua state created successfully for match %d", i+1)
 | 
						|
 | 
						|
		// Why we're doing this whole song and dance of indices is to properly handle empty matches
 | 
						|
		// Plus it's a little cleaner to surgically replace our matches
 | 
						|
		// If we were to use string.replace and encountered an empty match there'd be nothing to replace
 | 
						|
		// But using indices an empty match would have its starting and ending indices be the same
 | 
						|
		// So when we're cutting open the array we say 0:7 + modified + 7:end
 | 
						|
		// As if concatenating in the middle of the array
 | 
						|
		// Plus it supports lookarounds
 | 
						|
		match := content[matchIndices[0]:matchIndices[1]]
 | 
						|
		matchPreview := match
 | 
						|
		if len(match) > 50 {
 | 
						|
			matchPreview = match[:47] + "..."
 | 
						|
		}
 | 
						|
		logger.Trace("Matched content: %q (length: %d)", matchPreview, len(match))
 | 
						|
 | 
						|
		groups := matchIndices[2:]
 | 
						|
		if len(groups) <= 0 {
 | 
						|
			logger.Warning("No capture groups found for match %q and regex %q", matchPreview, pattern)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		if len(groups)%2 == 1 {
 | 
						|
			logger.Warning("Invalid number of group indices (%d), should be even: %v", len(groups), groups)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// Count how many valid groups we have
 | 
						|
		validGroups := 0
 | 
						|
		for j := 0; j < len(groups); j += 2 {
 | 
						|
			if groups[j] != -1 && groups[j+1] != -1 {
 | 
						|
				validGroups++
 | 
						|
			}
 | 
						|
		}
 | 
						|
		logger.Debug("Found %d valid capture groups in match", validGroups)
 | 
						|
 | 
						|
		for _, index := range groups {
 | 
						|
			if index == -1 {
 | 
						|
				logger.Warning("Negative index encountered in match indices %v. This may indicate an issue with the regex pattern or an empty/optional capture group.", matchIndices)
 | 
						|
				continue
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		// We have to use array to preserve order
 | 
						|
		// Very important for the reconstruction step
 | 
						|
		// Because we must overwrite the values in reverse order
 | 
						|
		// See comments a few dozen lines above for more details
 | 
						|
		captureGroups := make([]*CaptureGroup, 0, len(groups)/2)
 | 
						|
		groupNames := compiledPattern.SubexpNames()[1:]
 | 
						|
		for i, name := range groupNames {
 | 
						|
			start := groups[i*2]
 | 
						|
			end := groups[i*2+1]
 | 
						|
			if start == -1 || end == -1 {
 | 
						|
				continue
 | 
						|
			}
 | 
						|
 | 
						|
			value := content[start:end]
 | 
						|
			captureGroups = append(captureGroups, &CaptureGroup{
 | 
						|
				Name:  name,
 | 
						|
				Value: value,
 | 
						|
				Range: [2]int{start, end},
 | 
						|
			})
 | 
						|
 | 
						|
			// Include name info in log if available
 | 
						|
			if name != "" {
 | 
						|
				logger.Trace("Capture group '%s': %q (pos %d-%d)", name, value, start, end)
 | 
						|
			} else {
 | 
						|
				logger.Trace("Capture group #%d: %q (pos %d-%d)", i+1, value, start, end)
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		// Use the DeduplicateGroups flag to control whether to deduplicate capture groups
 | 
						|
		if !command.NoDedup {
 | 
						|
			logger.Debug("Deduplicating capture groups as specified in command settings")
 | 
						|
			captureGroups = deduplicateGroups(captureGroups)
 | 
						|
		}
 | 
						|
 | 
						|
		if err := toLua(L, captureGroups); err != nil {
 | 
						|
			logger.Error("Failed to set Lua variables: %v", err)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		logger.Trace("Set %d capture groups as Lua variables", len(captureGroups))
 | 
						|
 | 
						|
		if err := L.DoString(luaExpr); err != nil {
 | 
						|
			logger.Error("Lua script execution failed: %v\nScript: %s\nCapture Groups: %+v",
 | 
						|
				err, luaExpr, captureGroups)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		logger.Trace("Lua script executed successfully")
 | 
						|
 | 
						|
		// Get modifications from Lua
 | 
						|
		captureGroups, err = fromLua(L, captureGroups)
 | 
						|
		if err != nil {
 | 
						|
			logger.Error("Failed to retrieve modifications from Lua: %v", err)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		logger.Trace("Retrieved updated values from Lua")
 | 
						|
 | 
						|
		replacement := ""
 | 
						|
		replacementVar := L.GetGlobal("replacement")
 | 
						|
		if replacementVar.Type() != lua.LTNil {
 | 
						|
			replacement = replacementVar.String()
 | 
						|
			logger.Debug("Using global replacement: %q", replacement)
 | 
						|
		}
 | 
						|
 | 
						|
		// Check if modification flag is set
 | 
						|
		modifiedVal := L.GetGlobal("modified")
 | 
						|
		if modifiedVal.Type() != lua.LTBool || !lua.LVAsBool(modifiedVal) {
 | 
						|
			logger.Debug("Skipping match - no modifications made by Lua script")
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		if replacement == "" {
 | 
						|
			// Apply the modifications to the original match
 | 
						|
			replacement = match
 | 
						|
 | 
						|
			// Count groups that were actually modified
 | 
						|
			modifiedGroups := 0
 | 
						|
			for _, capture := range captureGroups {
 | 
						|
				if capture.Value != capture.Updated {
 | 
						|
					modifiedGroups++
 | 
						|
				}
 | 
						|
			}
 | 
						|
			logger.Info("%d of %d capture groups identified for modification", modifiedGroups, len(captureGroups))
 | 
						|
 | 
						|
			for _, capture := range captureGroups {
 | 
						|
				if capture.Value == capture.Updated {
 | 
						|
					logger.Info("Capture group unchanged: %s", LimitString(capture.Value, 50))
 | 
						|
					continue
 | 
						|
				}
 | 
						|
 | 
						|
				// Log what changed with context
 | 
						|
				logger.Debug("Capture group %s scheduled for modification: %q → %q",
 | 
						|
					capture.Name, capture.Value, capture.Updated)
 | 
						|
 | 
						|
				// Indices of the group are relative to content
 | 
						|
				// To relate them to match we have to subtract the match start index
 | 
						|
				// replacement = replacement[:groupStart] + newVal + replacement[groupEnd:]
 | 
						|
				commands = append(commands, utils.ReplaceCommand{
 | 
						|
					From: capture.Range[0],
 | 
						|
					To:   capture.Range[1],
 | 
						|
					With: capture.Updated,
 | 
						|
				})
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			commands = append(commands, utils.ReplaceCommand{
 | 
						|
				From: matchIndices[0],
 | 
						|
				To:   matchIndices[1],
 | 
						|
				With: replacement,
 | 
						|
			})
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	logger.Debug("Total regex processing time: %v", time.Since(startTime))
 | 
						|
	return commands, nil
 | 
						|
}
 | 
						|
 | 
						|
func deduplicateGroups(captureGroups []*CaptureGroup) []*CaptureGroup {
 | 
						|
	deduplicatedGroups := make([]*CaptureGroup, 0)
 | 
						|
	for _, group := range captureGroups {
 | 
						|
		overlaps := false
 | 
						|
		logger.Debug("Checking capture group: %s with range %v", group.Name, group.Range)
 | 
						|
		for _, existingGroup := range deduplicatedGroups {
 | 
						|
			logger.Debug("Comparing with existing group: %s with range %v", existingGroup.Name, existingGroup.Range)
 | 
						|
			if group.Range[0] < existingGroup.Range[1] && group.Range[1] > existingGroup.Range[0] {
 | 
						|
				overlaps = true
 | 
						|
				logger.Warning("Detected overlap between capture group '%s' and existing group '%s' in range %v-%v and %v-%v", group.Name, existingGroup.Name, group.Range[0], group.Range[1], existingGroup.Range[0], existingGroup.Range[1])
 | 
						|
				break
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if overlaps {
 | 
						|
			// We CAN just continue despite this fuckup
 | 
						|
			logger.Warning("Overlapping capture group: %s", group.Name)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		logger.Debug("No overlap detected for capture group: %s. Adding to deduplicated groups.", group.Name)
 | 
						|
		deduplicatedGroups = append(deduplicatedGroups, group)
 | 
						|
	}
 | 
						|
	return deduplicatedGroups
 | 
						|
}
 | 
						|
 | 
						|
// The order of these replaces is important
 | 
						|
// This one handles !num-s inside of named capture groups
 | 
						|
// If it were not here our !num in a named capture group would
 | 
						|
// Expand to another capture group in the capture group
 | 
						|
// We really only want one (our named) capture group
 | 
						|
func resolveRegexPlaceholders(pattern string) string {
 | 
						|
	// Handle special pattern modifications
 | 
						|
	if !strings.HasPrefix(pattern, "(?s)") {
 | 
						|
		pattern = "(?s)" + pattern
 | 
						|
	}
 | 
						|
 | 
						|
	namedGroupNum := regexp.MustCompile(`(?:(\?<[^>]+>)(!num))`)
 | 
						|
	pattern = namedGroupNum.ReplaceAllStringFunc(pattern, func(match string) string {
 | 
						|
		parts := namedGroupNum.FindStringSubmatch(match)
 | 
						|
		if len(parts) != 3 {
 | 
						|
			return match
 | 
						|
		}
 | 
						|
		replacement := `-?\d*\.?\d+`
 | 
						|
		return parts[1] + replacement
 | 
						|
	})
 | 
						|
	pattern = strings.ReplaceAll(pattern, "!num", `(-?\d*\.?\d+)`)
 | 
						|
	pattern = strings.ReplaceAll(pattern, "!any", `.*?`)
 | 
						|
	repPattern := regexp.MustCompile(`!rep\(([^,]+),\s*(\d+)\)`)
 | 
						|
	// !rep(pattern, count) repeats the pattern n times
 | 
						|
	// Inserting !any between each repetition
 | 
						|
	pattern = repPattern.ReplaceAllStringFunc(pattern, func(match string) string {
 | 
						|
		parts := repPattern.FindStringSubmatch(match)
 | 
						|
		if len(parts) != 3 {
 | 
						|
			return match
 | 
						|
		}
 | 
						|
		repeatedPattern := parts[1]
 | 
						|
		count := parts[2]
 | 
						|
		repetitions, _ := strconv.Atoi(count)
 | 
						|
		return strings.Repeat(repeatedPattern+".*?", repetitions-1) + repeatedPattern
 | 
						|
	})
 | 
						|
	return pattern
 | 
						|
}
 | 
						|
 | 
						|
// ToLua sets capture groups as Lua variables (v1, v2, etc. for numeric values and s1, s2, etc. for strings)
 | 
						|
func toLua(L *lua.LState, data interface{}) error {
 | 
						|
	captureGroups, ok := data.([]*CaptureGroup)
 | 
						|
	if !ok {
 | 
						|
		return fmt.Errorf("expected []*CaptureGroup for captures, got %T", data)
 | 
						|
	}
 | 
						|
 | 
						|
	groupindex := 0
 | 
						|
	for _, capture := range captureGroups {
 | 
						|
		if capture.Name == "" {
 | 
						|
			// We don't want to change the name of the capture group
 | 
						|
			// Even if it's empty
 | 
						|
			tempName := fmt.Sprintf("%d", groupindex+1)
 | 
						|
			groupindex++
 | 
						|
 | 
						|
			L.SetGlobal("s"+tempName, lua.LString(capture.Value))
 | 
						|
 | 
						|
			val, err := strconv.ParseFloat(capture.Value, 64)
 | 
						|
			if err == nil {
 | 
						|
				L.SetGlobal("v"+tempName, lua.LNumber(val))
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			val, err := strconv.ParseFloat(capture.Value, 64)
 | 
						|
			if err == nil {
 | 
						|
				L.SetGlobal(capture.Name, lua.LNumber(val))
 | 
						|
			} else {
 | 
						|
				L.SetGlobal(capture.Name, lua.LString(capture.Value))
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// FromLua implements the Processor interface for RegexProcessor
 | 
						|
func fromLua(L *lua.LState, captureGroups []*CaptureGroup) ([]*CaptureGroup, error) {
 | 
						|
	captureIndex := 0
 | 
						|
	for _, capture := range captureGroups {
 | 
						|
		if capture.Name == "" {
 | 
						|
			capture.Name = fmt.Sprintf("%d", captureIndex+1)
 | 
						|
 | 
						|
			vVarName := fmt.Sprintf("v%s", capture.Name)
 | 
						|
			sVarName := fmt.Sprintf("s%s", capture.Name)
 | 
						|
			captureIndex++
 | 
						|
 | 
						|
			vLuaVal := L.GetGlobal(vVarName)
 | 
						|
			sLuaVal := L.GetGlobal(sVarName)
 | 
						|
 | 
						|
			if sLuaVal.Type() == lua.LTString {
 | 
						|
				capture.Updated = sLuaVal.String()
 | 
						|
			}
 | 
						|
			// Numbers have priority
 | 
						|
			if vLuaVal.Type() == lua.LTNumber {
 | 
						|
				capture.Updated = vLuaVal.String()
 | 
						|
			}
 | 
						|
		} else {
 | 
						|
			// Easy shit
 | 
						|
			capture.Updated = L.GetGlobal(capture.Name).String()
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	return captureGroups, nil
 | 
						|
}
 | 
						|
 | 
						|
// estimatePatternComplexity gives a rough estimate of regex pattern complexity
 | 
						|
// This can help identify potentially problematic patterns
 | 
						|
func estimatePatternComplexity(pattern string) int {
 | 
						|
	complexity := len(pattern)
 | 
						|
 | 
						|
	// Add complexity for potentially expensive operations
 | 
						|
	complexity += strings.Count(pattern, ".*") * 10 // Greedy wildcard
 | 
						|
	complexity += strings.Count(pattern, ".*?") * 5 // Non-greedy wildcard
 | 
						|
	complexity += strings.Count(pattern, "[^") * 3  // Negated character class
 | 
						|
	complexity += strings.Count(pattern, "\\b") * 2 // Word boundary
 | 
						|
	complexity += strings.Count(pattern, "(") * 2   // Capture groups
 | 
						|
	complexity += strings.Count(pattern, "(?:") * 1 // Non-capture groups
 | 
						|
	complexity += strings.Count(pattern, "\\1") * 3 // Backreferences
 | 
						|
	complexity += strings.Count(pattern, "{") * 2   // Counted repetition
 | 
						|
 | 
						|
	return complexity
 | 
						|
}
 |