Rework regex grouping to avoid changing the same area twice
This commit is contained in:
@@ -16,106 +16,88 @@ type RegexProcessor struct{}
|
|||||||
|
|
||||||
// ToLua sets capture groups as Lua variables (v1, v2, etc. for numeric values and s1, s2, etc. for strings)
|
// ToLua sets capture groups as Lua variables (v1, v2, etc. for numeric values and s1, s2, etc. for strings)
|
||||||
func (p *RegexProcessor) ToLua(L *lua.LState, data interface{}) error {
|
func (p *RegexProcessor) ToLua(L *lua.LState, data interface{}) error {
|
||||||
captures, ok := data.([]string)
|
captureGroups, ok := data.([]*CaptureGroup)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("expected []string for captures, got %T", data)
|
return fmt.Errorf("expected []*CaptureGroup for captures, got %T", data)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set variables for each capture group, starting from v1/s1 for the first capture
|
groupindex := 0
|
||||||
for i := 0; i < len(captures); i++ {
|
for _, capture := range captureGroups {
|
||||||
// Set string version (always available as s1, s2, etc.)
|
if capture.Name == "" {
|
||||||
L.SetGlobal(fmt.Sprintf("s%d", i+1), lua.LString(captures[i]))
|
// We don't want to change the name of the capture group
|
||||||
|
// Even if it's empty
|
||||||
|
tempName := fmt.Sprintf("%d", groupindex+1)
|
||||||
|
groupindex++
|
||||||
|
|
||||||
// Try to convert to number and set v1, v2, etc.
|
L.SetGlobal("s"+tempName, lua.LString(capture.Value))
|
||||||
if val, err := strconv.ParseFloat(captures[i], 64); err == nil {
|
|
||||||
L.SetGlobal(fmt.Sprintf("v%d", i+1), lua.LNumber(val))
|
val, err := strconv.ParseFloat(capture.Value, 64)
|
||||||
|
if err == nil {
|
||||||
|
L.SetGlobal("v"+tempName, lua.LNumber(val))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
val, err := strconv.ParseFloat(capture.Value, 64)
|
||||||
|
if err == nil {
|
||||||
|
L.SetGlobal(capture.Name, lua.LNumber(val))
|
||||||
|
} else {
|
||||||
|
L.SetGlobal(capture.Name, lua.LString(capture.Value))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// FromLua implements the Processor interface for RegexProcessor
|
|
||||||
func (p *RegexProcessor) FromLua(L *lua.LState) (interface{}, error) {
|
func (p *RegexProcessor) FromLua(L *lua.LState) (interface{}, error) {
|
||||||
// Get the modified values after Lua execution
|
// Stub to satisfy interface
|
||||||
modifications := make(map[int]string)
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Check for modifications to v1-v12 and s1-s12
|
// FromLua implements the Processor interface for RegexProcessor
|
||||||
for i := 0; i < 12; i++ {
|
func (p *RegexProcessor) FromLuaCustom(L *lua.LState, captureGroups []*CaptureGroup) ([]*CaptureGroup, error) {
|
||||||
// Check both v and s variables to see if any were modified
|
captureIndex := 0
|
||||||
vVarName := fmt.Sprintf("v%d", i+1)
|
for _, capture := range captureGroups {
|
||||||
sVarName := fmt.Sprintf("s%d", i+1)
|
if capture.Name == "" {
|
||||||
|
capture.Name = fmt.Sprintf("%d", captureIndex+1)
|
||||||
|
|
||||||
vLuaVal := L.GetGlobal(vVarName)
|
vVarName := fmt.Sprintf("v%s", capture.Name)
|
||||||
sLuaVal := L.GetGlobal(sVarName)
|
sVarName := fmt.Sprintf("s%s", capture.Name)
|
||||||
|
captureIndex++
|
||||||
|
|
||||||
// If our value is a number then it's very likely we want it to be a number
|
vLuaVal := L.GetGlobal(vVarName)
|
||||||
// And not a string
|
sLuaVal := L.GetGlobal(sVarName)
|
||||||
// If we do want it to be a string we will cast it into a string in lua
|
|
||||||
// wait that wouldn't work... Casting v to a string would not load it here
|
if sLuaVal.Type() == lua.LTString {
|
||||||
if vLuaVal.Type() == lua.LTNumber {
|
capture.Updated = sLuaVal.String()
|
||||||
modifications[i] = vLuaVal.String()
|
}
|
||||||
continue
|
// Numbers have priority
|
||||||
|
if vLuaVal.Type() == lua.LTNumber {
|
||||||
|
capture.Updated = vLuaVal.String()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Easy shit
|
||||||
|
capture.Updated = L.GetGlobal(capture.Name).String()
|
||||||
}
|
}
|
||||||
if sLuaVal.Type() == lua.LTString {
|
|
||||||
modifications[i] = sLuaVal.String()
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return modifications, nil
|
return captureGroups, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type NamedCapture struct {
|
type CaptureGroup struct {
|
||||||
Name string
|
Name string
|
||||||
Value string
|
Value string
|
||||||
Range [2]int
|
Updated string
|
||||||
|
Range [2]int
|
||||||
}
|
}
|
||||||
type ReplaceCommand struct {
|
type ReplaceCommand struct {
|
||||||
From int
|
From int
|
||||||
To int
|
To int
|
||||||
With string
|
With string
|
||||||
}
|
}
|
||||||
|
|
||||||
// ProcessContent applies regex replacement with Lua processing
|
// ProcessContent applies regex replacement with Lua processing
|
||||||
func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr string) (string, int, int, error) {
|
func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr string) (string, int, int, error) {
|
||||||
// Handle special pattern modifications
|
pattern = ResolveRegexPlaceholders(pattern)
|
||||||
if !strings.HasPrefix(pattern, "(?s)") {
|
|
||||||
pattern = "(?s)" + pattern
|
|
||||||
log.Printf("Pattern modified to include (?s): %s", pattern)
|
|
||||||
}
|
|
||||||
|
|
||||||
// The order of these replaces is important
|
|
||||||
// This one handles !num-s inside of named capture groups
|
|
||||||
// If it were not here our !num in a named capture group would
|
|
||||||
// Expand to another capture group in the capture group
|
|
||||||
// We really only want one (our named) capture group
|
|
||||||
namedGroupNum := regexp.MustCompile(`(?:(\?<[^>]+>)(!num))`)
|
|
||||||
pattern = namedGroupNum.ReplaceAllStringFunc(pattern, func(match string) string {
|
|
||||||
parts := namedGroupNum.FindStringSubmatch(match)
|
|
||||||
if len(parts) != 3 {
|
|
||||||
return match
|
|
||||||
}
|
|
||||||
replacement := `-?\d*\.?\d+`
|
|
||||||
return parts[1] + replacement
|
|
||||||
})
|
|
||||||
pattern = strings.ReplaceAll(pattern, "!num", `"?(-?\d*\.?\d+)"?`)
|
|
||||||
pattern = strings.ReplaceAll(pattern, "!any", `.*?`)
|
|
||||||
repPattern := regexp.MustCompile(`!rep\(([^,]+),\s*(\d+)\)`)
|
|
||||||
// !rep(pattern, count) repeats the pattern n times
|
|
||||||
// Inserting !any between each repetition
|
|
||||||
pattern = repPattern.ReplaceAllStringFunc(pattern, func(match string) string {
|
|
||||||
parts := repPattern.FindStringSubmatch(match)
|
|
||||||
if len(parts) != 3 {
|
|
||||||
return match
|
|
||||||
}
|
|
||||||
repeatedPattern := parts[1]
|
|
||||||
count := parts[2]
|
|
||||||
repetitions, _ := strconv.Atoi(count)
|
|
||||||
return strings.Repeat(repeatedPattern+".*?", repetitions-1) + repeatedPattern
|
|
||||||
})
|
|
||||||
|
|
||||||
compiledPattern, err := regexp.Compile(pattern)
|
compiledPattern, err := regexp.Compile(pattern)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("Error compiling pattern: %v", err)
|
log.Printf("Error compiling pattern: %v", err)
|
||||||
@@ -181,120 +163,75 @@ func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
captures := make([]string, 0, len(groups)/2)
|
|
||||||
for j := 0; j < len(groups); j += 2 {
|
|
||||||
if groups[j] == -1 || groups[j+1] == -1 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
captures = append(captures, content[groups[j]:groups[j+1]])
|
|
||||||
}
|
|
||||||
log.Printf("Captured groups: %v", captures)
|
|
||||||
|
|
||||||
// We have to use array to preserve order
|
// We have to use array to preserve order
|
||||||
// Very important for the reconstruction step
|
// Very important for the reconstruction step
|
||||||
// Because we must overwrite the values in reverse order
|
// Because we must overwrite the values in reverse order
|
||||||
// See comments a few dozen lines above for more details
|
// See comments a few dozen lines above for more details
|
||||||
namedCaptures := make([]NamedCapture, 0, len(groups)/2)
|
captureGroups := make([]*CaptureGroup, 0, len(groups)/2)
|
||||||
groupNames := compiledPattern.SubexpNames()[1:]
|
groupNames := compiledPattern.SubexpNames()[1:]
|
||||||
for i, name := range groupNames {
|
for i, name := range groupNames {
|
||||||
if name == "" {
|
// if name == "" {
|
||||||
|
// continue
|
||||||
|
// }
|
||||||
|
start := groups[i*2]
|
||||||
|
end := groups[i*2+1]
|
||||||
|
if start == -1 || end == -1 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if groups[i*2] == -1 || groups[i*2+1] == -1 {
|
|
||||||
continue
|
captureGroups = append(captureGroups, &CaptureGroup{
|
||||||
}
|
|
||||||
namedCaptures = append(namedCaptures, NamedCapture{
|
|
||||||
Name: name,
|
Name: name,
|
||||||
Value: captures[i],
|
Value: content[start:end],
|
||||||
Range: [2]int{groups[i*2], groups[i*2+1]},
|
Range: [2]int{start, end},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Named captures: %v", namedCaptures)
|
log.Printf("Capture groups: %v", captureGroups)
|
||||||
|
|
||||||
if err := p.ToLua(L, captures); err != nil {
|
if err := p.ToLua(L, captureGroups); err != nil {
|
||||||
log.Printf("Error setting Lua variables: %v", err)
|
log.Printf("Error setting Lua variables: %v", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
log.Println("Lua variables set successfully")
|
log.Println("Lua variables set successfully")
|
||||||
|
|
||||||
for _, capture := range namedCaptures {
|
|
||||||
if capture.Name == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if val, err := strconv.ParseFloat(capture.Value, 64); err == nil {
|
|
||||||
L.SetGlobal(capture.Name, lua.LNumber(val))
|
|
||||||
} else {
|
|
||||||
L.SetGlobal(capture.Name, lua.LString(capture.Value))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := L.DoString(luaExpr); err != nil {
|
if err := L.DoString(luaExpr); err != nil {
|
||||||
log.Printf("Error executing Lua code %s for group %s: %v", luaExpr, captures, err)
|
log.Printf("Error executing Lua code %s for groups %+v: %v", luaExpr, captureGroups, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
log.Println("Lua code executed successfully")
|
log.Println("Lua code executed successfully")
|
||||||
|
|
||||||
// Get modifications from Lua
|
// Get modifications from Lua
|
||||||
modResult, err := p.FromLua(L)
|
captureGroups, err = p.FromLuaCustom(L, captureGroups)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("Error getting modifications: %v", err)
|
log.Printf("Error getting modifications: %v", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply modifications to the matched text
|
|
||||||
modsMap, ok := modResult.(map[int]string)
|
|
||||||
if !ok || len(modsMap) == 0 {
|
|
||||||
log.Println("No modifications to apply")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
replacement := ""
|
replacement := ""
|
||||||
replacementVar := L.GetGlobal("replacement")
|
replacementVar := L.GetGlobal("replacement")
|
||||||
if replacementVar.Type() != lua.LTNil {
|
if replacementVar.Type() != lua.LTNil {
|
||||||
replacement = replacementVar.String()
|
replacement = replacementVar.String()
|
||||||
}
|
}
|
||||||
if replacement == "" {
|
if replacement == "" {
|
||||||
commands := make([]ReplaceCommand, 0, len(modsMap))
|
commands := make([]ReplaceCommand, 0, len(captureGroups))
|
||||||
// Apply the modifications to the original match
|
// Apply the modifications to the original match
|
||||||
replacement = match
|
replacement = match
|
||||||
for i := len(modsMap) - 1; i >= 0; i-- {
|
for _, capture := range captureGroups {
|
||||||
newVal := modsMap[i]
|
log.Printf("Applying modification: %s", capture.Updated)
|
||||||
log.Printf("Applying modification: %s", newVal)
|
|
||||||
// Indices of the group are relative to content
|
// Indices of the group are relative to content
|
||||||
// To relate them to match we have to subtract the match start index
|
// To relate them to match we have to subtract the match start index
|
||||||
groupStart := groups[i*2] - matchIndices[0]
|
|
||||||
groupEnd := groups[i*2+1] - matchIndices[0]
|
|
||||||
// replacement = replacement[:groupStart] + newVal + replacement[groupEnd:]
|
// replacement = replacement[:groupStart] + newVal + replacement[groupEnd:]
|
||||||
log.Printf("%#v", groupStart)
|
|
||||||
log.Printf("%#v", groupEnd)
|
|
||||||
// commands = append(commands, ReplaceCommand{
|
|
||||||
// From: groupStart,
|
|
||||||
// To: groupEnd,
|
|
||||||
// With: newVal,
|
|
||||||
// })
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := len(namedCaptures) - 1; i >= 0; i-- {
|
|
||||||
capture := namedCaptures[i]
|
|
||||||
if capture.Name == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
groupStart := capture.Range[0] - matchIndices[0]
|
|
||||||
groupEnd := capture.Range[1] - matchIndices[0]
|
|
||||||
luaValue := L.GetGlobal(capture.Name).String()
|
|
||||||
// replacement = replacement[:groupStart] + luaValue + replacement[groupEnd:]
|
|
||||||
commands = append(commands, ReplaceCommand{
|
commands = append(commands, ReplaceCommand{
|
||||||
From: groupStart,
|
From: capture.Range[0] - matchIndices[0],
|
||||||
To: groupEnd,
|
To: capture.Range[1] - matchIndices[0],
|
||||||
With: luaValue,
|
With: capture.Updated,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
sort.Slice(commands, func(i, j int) bool {
|
sort.Slice(commands, func(i, j int) bool {
|
||||||
return commands[i].From > commands[j].From
|
return commands[i].From > commands[j].From
|
||||||
})
|
})
|
||||||
|
|
||||||
for _, command := range commands {
|
for _, command := range commands {
|
||||||
replacement = replacement[:command.From] + command.With + replacement[command.To:]
|
replacement = replacement[:command.From] + command.With + replacement[command.To:]
|
||||||
}
|
}
|
||||||
@@ -307,3 +244,42 @@ func (p *RegexProcessor) ProcessContent(content string, pattern string, luaExpr
|
|||||||
log.Printf("Process completed with %d modifications", modificationCount)
|
log.Printf("Process completed with %d modifications", modificationCount)
|
||||||
return result, modificationCount, len(indices), nil
|
return result, modificationCount, len(indices), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The order of these replaces is important
|
||||||
|
// This one handles !num-s inside of named capture groups
|
||||||
|
// If it were not here our !num in a named capture group would
|
||||||
|
// Expand to another capture group in the capture group
|
||||||
|
// We really only want one (our named) capture group
|
||||||
|
func ResolveRegexPlaceholders(pattern string) string {
|
||||||
|
// Handle special pattern modifications
|
||||||
|
if !strings.HasPrefix(pattern, "(?s)") {
|
||||||
|
pattern = "(?s)" + pattern
|
||||||
|
log.Printf("Pattern modified to include (?s): %s", pattern)
|
||||||
|
}
|
||||||
|
|
||||||
|
namedGroupNum := regexp.MustCompile(`(?:(\?<[^>]+>)(!num))`)
|
||||||
|
pattern = namedGroupNum.ReplaceAllStringFunc(pattern, func(match string) string {
|
||||||
|
parts := namedGroupNum.FindStringSubmatch(match)
|
||||||
|
if len(parts) != 3 {
|
||||||
|
return match
|
||||||
|
}
|
||||||
|
replacement := `-?\d*\.?\d+`
|
||||||
|
return parts[1] + replacement
|
||||||
|
})
|
||||||
|
pattern = strings.ReplaceAll(pattern, "!num", `"?(-?\d*\.?\d+)"?`)
|
||||||
|
pattern = strings.ReplaceAll(pattern, "!any", `.*?`)
|
||||||
|
repPattern := regexp.MustCompile(`!rep\(([^,]+),\s*(\d+)\)`)
|
||||||
|
// !rep(pattern, count) repeats the pattern n times
|
||||||
|
// Inserting !any between each repetition
|
||||||
|
pattern = repPattern.ReplaceAllStringFunc(pattern, func(match string) string {
|
||||||
|
parts := repPattern.FindStringSubmatch(match)
|
||||||
|
if len(parts) != 3 {
|
||||||
|
return match
|
||||||
|
}
|
||||||
|
repeatedPattern := parts[1]
|
||||||
|
count := parts[2]
|
||||||
|
repetitions, _ := strconv.Atoi(count)
|
||||||
|
return strings.Repeat(repeatedPattern+".*?", repetitions-1) + repeatedPattern
|
||||||
|
})
|
||||||
|
return pattern
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user