diff --git a/util.go b/util.go index 2fcba80..6a456fa 100644 --- a/util.go +++ b/util.go @@ -79,76 +79,104 @@ func GetSyncFilesRecursively(input string, output chan string, status chan error var filesProcessed int32 var foldersProcessed int32 + var activeWorkers int32 + progressTicker := time.NewTicker(200 * time.Millisecond) defer progressTicker.Stop() - var wg sync.WaitGroup - var initial sync.Once - var done bool - wg.Add(1) + done := make(chan struct{}) + defer close(done) + directories := make(chan string, 100000) workerPool := make(chan struct{}, 4000) directories <- input go func() { for { - fmt.Printf("\rFiles processed: %d; Folders processed: %d; Workers: %d; Directory Stack Size: %d;", atomic.LoadInt32((&filesProcessed)), atomic.LoadInt32(&foldersProcessed), len(workerPool), len(directories)) - <-progressTicker.C + select { + case <-progressTicker.C: + dirCount := len(directories) + workers := atomic.LoadInt32(&activeWorkers) + fmt.Printf("\rFiles processed: %d; Folders processed: %d; Active workers: %d; Directory queue: %d", + atomic.LoadInt32(&filesProcessed), + atomic.LoadInt32(&foldersProcessed), + workers, + dirCount) + case <-done: + // Final progress update + fmt.Printf("\rFiles processed: %d; Folders processed: %d; Completed successfully\n", + atomic.LoadInt32(&filesProcessed), + atomic.LoadInt32(&foldersProcessed)) + return + } } }() - // log.Printf("%+v", len(workerPool)) - go func() { - for directory := range directories { - workerPool <- struct{}{} - wg.Add(1) - go func(directory string) { - atomic.AddInt32(&foldersProcessed, 1) - defer wg.Done() - defer func() { <-workerPool }() + allDone := make(chan struct{}) - files, err := os.ReadDir(directory) - if err != nil { - log.Printf("Error reading directory %s: %+v", directory, err) + go func() { + var wg sync.WaitGroup + + go func() { + for { + if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 { + time.Sleep(10 * time.Millisecond) + if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 { + close(allDone) + return + } + } + time.Sleep(50 * time.Millisecond) + } + }() + + for { + select { + case directory, ok := <-directories: + if !ok { + wg.Wait() return } - for _, file := range files { - // log.Printf("Processing file %s", file.Name()) - if file.IsDir() { - directories <- filepath.Join(directory, file.Name()) - } else { - // log.Println(file.Name(), DirRegex.MatchString(file.Name())) - if FileRegex.MatchString(file.Name()) || IsYAMLSyncFile(file.Name()) { - // log.Printf("Writing") - output <- filepath.Join(directory, file.Name()) - } - atomic.AddInt32(&filesProcessed, 1) - } - } - // log.Printf("Done reading directory %s", directory) - done = len(directories) == 0 - if done { - initial.Do(func() { - wg.Done() - }) - } - }(directory) + atomic.AddInt32(&activeWorkers, 1) + + go func(dir string) { + workerPool <- struct{}{} + + atomic.AddInt32(&foldersProcessed, 1) + processDirectory(dir, directories, output, &filesProcessed) + + <-workerPool + atomic.AddInt32(&activeWorkers, -1) + }(directory) + } } }() - // This actually does not go through ALL files sadly... - // It so happens (very often) that we manage to quit between one iteration ending - // And another beginning - // In such a state workgroup is decreased and, before it has a chance to increase, we are done - // What I should do here is only terminate if directories is empty - // ...but how do I do that? - // I might be wrong... Fuck knows... - // It also sometimes happens that wg.Wait triggers after wg.Done on line 97 but before the next (what would be!) wg.Add on line 94 - // This happens much more often with a small number of workers - // Such is the nature of race conditions... - wg.Wait() - log.Printf("Files processed: %d; Folders processed: %d", filesProcessed, foldersProcessed) + <-allDone + + log.Printf("Files processed: %d; Folders processed: %d", + atomic.LoadInt32(&filesProcessed), + atomic.LoadInt32(&foldersProcessed)) +} + +func processDirectory(directory string, directories chan<- string, output chan<- string, filesProcessed *int32) { + files, err := os.ReadDir(directory) + if err != nil { + log.Printf("Error reading directory %s: %+v", directory, err) + return + } + + for _, file := range files { + if file.IsDir() { + directories <- filepath.Join(directory, file.Name()) + } else { + if FileRegex.MatchString(file.Name()) || IsYAMLSyncFile(file.Name()) { + output <- filepath.Join(directory, file.Name()) + } + atomic.AddInt32(filesProcessed, 1) + } + } } func IsYAMLSyncFile(filename string) bool {