Make recursive much much faster

By virtue of parallelism
This commit is contained in:
2024-07-01 18:28:06 +02:00
parent dcd17c21a4
commit 79794e59fe
2 changed files with 53 additions and 16 deletions

View File

@@ -28,6 +28,7 @@ const DefaultColor = White
const PathColor = Green const PathColor = Green
var DirRegex, _ = regexp.Compile(`^(.+?)[/\\]sync$`) var DirRegex, _ = regexp.Compile(`^(.+?)[/\\]sync$`)
var FileRegex, _ = regexp.Compile(`^sync$`)
var programName = os.Args[0] var programName = os.Args[0]
func main() { func main() {
@@ -131,6 +132,7 @@ func ReadFromFilesRecursively(input string, output chan LinkInstruction, status
wg.Add(1) wg.Add(1)
go func() { go func() {
defer wg.Done() defer wg.Done()
log.Println(file)
file = NormalizePath(file) file = NormalizePath(file)
log.Printf("Processing file: %s%s%s", PathColor, file, DefaultColor) log.Printf("Processing file: %s%s%s", PathColor, file, DefaultColor)

57
util.go
View File

@@ -2,10 +2,12 @@ package main
import ( import (
"fmt" "fmt"
"io/fs" "log"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
"sync"
"sync/atomic"
"time" "time"
) )
@@ -67,29 +69,62 @@ func GetSyncFilesRecursively(input string, output chan string, status chan error
defer close(status) defer close(status)
var filesProcessed int32 var filesProcessed int32
var foldersProcessed int32
progressTicker := time.NewTicker(200 * time.Millisecond) progressTicker := time.NewTicker(200 * time.Millisecond)
defer progressTicker.Stop() defer progressTicker.Stop()
go func() { go func() {
for { for {
fmt.Printf("\rFiles processed: %d; Folders processed: %d;", filesProcessed, foldersProcessed)
<-progressTicker.C <-progressTicker.C
fmt.Printf("\rFiles processed: %d", filesProcessed)
} }
}() }()
err := filepath.WalkDir(input, func(path string, file fs.DirEntry, err error) error { var wg sync.WaitGroup
wg.Add(1)
var initial sync.Once
directories := make(chan string, 10000)
workerPool := make(chan struct{}, 10000)
directories <- input
log.Printf("%+v", len(workerPool))
go func() {
for directory := range directories {
workerPool <- struct{}{}
wg.Add(1)
go func(directory string) {
atomic.AddInt32(&foldersProcessed, 1)
defer wg.Done()
defer func() { <-workerPool }()
// log.Printf("Reading directory %s", directory)
files, err := os.ReadDir(directory)
if err != nil { if err != nil {
return err log.Printf("Error reading directory %s: %+v", directory, err)
return
} }
// Effectively only find files named "sync" (with no extension!!) for _, file := range files {
if !file.IsDir() && DirRegex.MatchString(path) { // log.Printf("Processing file %s", file.Name())
output <- path if file.IsDir() {
directories <- filepath.Join(directory, file.Name())
} else {
// log.Println(file.Name(), DirRegex.MatchString(file.Name()))
if FileRegex.MatchString(file.Name()) {
log.Printf("Writing")
output <- filepath.Join(directory, file.Name())
} }
filesProcessed++ atomic.AddInt32(&filesProcessed, 1)
}
}
// log.Printf("Done reading directory %s", directory)
return nil initial.Do(func() {
wg.Done()
}) })
if err != nil { }(directory)
status <- err
} }
}()
wg.Wait()
log.Printf("Done")
} }