Maybe fix the directory crawler
This commit is contained in:
130
util.go
130
util.go
@@ -79,76 +79,104 @@ func GetSyncFilesRecursively(input string, output chan string, status chan error
|
|||||||
|
|
||||||
var filesProcessed int32
|
var filesProcessed int32
|
||||||
var foldersProcessed int32
|
var foldersProcessed int32
|
||||||
|
var activeWorkers int32
|
||||||
|
|
||||||
progressTicker := time.NewTicker(200 * time.Millisecond)
|
progressTicker := time.NewTicker(200 * time.Millisecond)
|
||||||
defer progressTicker.Stop()
|
defer progressTicker.Stop()
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
done := make(chan struct{})
|
||||||
var initial sync.Once
|
defer close(done)
|
||||||
var done bool
|
|
||||||
wg.Add(1)
|
|
||||||
directories := make(chan string, 100000)
|
directories := make(chan string, 100000)
|
||||||
workerPool := make(chan struct{}, 4000)
|
workerPool := make(chan struct{}, 4000)
|
||||||
directories <- input
|
directories <- input
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
for {
|
for {
|
||||||
fmt.Printf("\rFiles processed: %d; Folders processed: %d; Workers: %d; Directory Stack Size: %d;", atomic.LoadInt32((&filesProcessed)), atomic.LoadInt32(&foldersProcessed), len(workerPool), len(directories))
|
select {
|
||||||
<-progressTicker.C
|
case <-progressTicker.C:
|
||||||
|
dirCount := len(directories)
|
||||||
|
workers := atomic.LoadInt32(&activeWorkers)
|
||||||
|
fmt.Printf("\rFiles processed: %d; Folders processed: %d; Active workers: %d; Directory queue: %d",
|
||||||
|
atomic.LoadInt32(&filesProcessed),
|
||||||
|
atomic.LoadInt32(&foldersProcessed),
|
||||||
|
workers,
|
||||||
|
dirCount)
|
||||||
|
case <-done:
|
||||||
|
// Final progress update
|
||||||
|
fmt.Printf("\rFiles processed: %d; Folders processed: %d; Completed successfully\n",
|
||||||
|
atomic.LoadInt32(&filesProcessed),
|
||||||
|
atomic.LoadInt32(&foldersProcessed))
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// log.Printf("%+v", len(workerPool))
|
allDone := make(chan struct{})
|
||||||
go func() {
|
|
||||||
for directory := range directories {
|
|
||||||
workerPool <- struct{}{}
|
|
||||||
wg.Add(1)
|
|
||||||
go func(directory string) {
|
|
||||||
atomic.AddInt32(&foldersProcessed, 1)
|
|
||||||
defer wg.Done()
|
|
||||||
defer func() { <-workerPool }()
|
|
||||||
|
|
||||||
files, err := os.ReadDir(directory)
|
go func() {
|
||||||
if err != nil {
|
var wg sync.WaitGroup
|
||||||
log.Printf("Error reading directory %s: %+v", directory, err)
|
|
||||||
|
go func() {
|
||||||
|
for {
|
||||||
|
if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 {
|
||||||
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 {
|
||||||
|
close(allDone)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case directory, ok := <-directories:
|
||||||
|
if !ok {
|
||||||
|
wg.Wait()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, file := range files {
|
atomic.AddInt32(&activeWorkers, 1)
|
||||||
// log.Printf("Processing file %s", file.Name())
|
|
||||||
if file.IsDir() {
|
go func(dir string) {
|
||||||
directories <- filepath.Join(directory, file.Name())
|
workerPool <- struct{}{}
|
||||||
} else {
|
|
||||||
// log.Println(file.Name(), DirRegex.MatchString(file.Name()))
|
atomic.AddInt32(&foldersProcessed, 1)
|
||||||
if FileRegex.MatchString(file.Name()) || IsYAMLSyncFile(file.Name()) {
|
processDirectory(dir, directories, output, &filesProcessed)
|
||||||
// log.Printf("Writing")
|
|
||||||
output <- filepath.Join(directory, file.Name())
|
<-workerPool
|
||||||
}
|
atomic.AddInt32(&activeWorkers, -1)
|
||||||
atomic.AddInt32(&filesProcessed, 1)
|
}(directory)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
// log.Printf("Done reading directory %s", directory)
|
|
||||||
done = len(directories) == 0
|
|
||||||
if done {
|
|
||||||
initial.Do(func() {
|
|
||||||
wg.Done()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}(directory)
|
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// This actually does not go through ALL files sadly...
|
<-allDone
|
||||||
// It so happens (very often) that we manage to quit between one iteration ending
|
|
||||||
// And another beginning
|
log.Printf("Files processed: %d; Folders processed: %d",
|
||||||
// In such a state workgroup is decreased and, before it has a chance to increase, we are done
|
atomic.LoadInt32(&filesProcessed),
|
||||||
// What I should do here is only terminate if directories is empty
|
atomic.LoadInt32(&foldersProcessed))
|
||||||
// ...but how do I do that?
|
}
|
||||||
// I might be wrong... Fuck knows...
|
|
||||||
// It also sometimes happens that wg.Wait triggers after wg.Done on line 97 but before the next (what would be!) wg.Add on line 94
|
func processDirectory(directory string, directories chan<- string, output chan<- string, filesProcessed *int32) {
|
||||||
// This happens much more often with a small number of workers
|
files, err := os.ReadDir(directory)
|
||||||
// Such is the nature of race conditions...
|
if err != nil {
|
||||||
wg.Wait()
|
log.Printf("Error reading directory %s: %+v", directory, err)
|
||||||
log.Printf("Files processed: %d; Folders processed: %d", filesProcessed, foldersProcessed)
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, file := range files {
|
||||||
|
if file.IsDir() {
|
||||||
|
directories <- filepath.Join(directory, file.Name())
|
||||||
|
} else {
|
||||||
|
if FileRegex.MatchString(file.Name()) || IsYAMLSyncFile(file.Name()) {
|
||||||
|
output <- filepath.Join(directory, file.Name())
|
||||||
|
}
|
||||||
|
atomic.AddInt32(filesProcessed, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func IsYAMLSyncFile(filename string) bool {
|
func IsYAMLSyncFile(filename string) bool {
|
||||||
|
Reference in New Issue
Block a user