package main import ( "bufio" "flag" "fmt" "log" "os" "path/filepath" "strings" "sync" "sync/atomic" "time" ) var Error *log.Logger var Warning *log.Logger func init() { log.SetFlags(log.Lmicroseconds | log.Lshortfile) log.SetOutput(os.Stdout) Error = log.New(os.Stderr, fmt.Sprintf("%sERROR:%s ", "\033[0;101m", "\033[0m"), log.Lmicroseconds|log.Lshortfile) Warning = log.New(os.Stdout, fmt.Sprintf("%sWarning:%s ", "\033[0;93m", "\033[0m"), log.Lmicroseconds|log.Lshortfile) } type ExtData struct { ext string binaryCount int textCount int } func main() { flag.Parse() dir := flag.Arg(0) if dir == "" { dir = "." } dir = NormalizePath(dir) log.Printf("Scanning directory: %s", dir) files := make(chan string, 10000) status := make(chan error) go GetSyncFilesRecursively(dir, files, status) extensionTypeCount := sync.Map{} wg := sync.WaitGroup{} for file := range files { wg.Add(1) go func(file string) { defer wg.Done() f, err := os.Open(file) if err != nil { log.Printf("Error opening file %s: %v", file, err) return } scanner := bufio.NewScanner(f) if scanner.Scan() { ext := filepath.Ext(file) extData, _ := extensionTypeCount.LoadOrStore(ext, &ExtData{ext: ext, binaryCount: 0, textCount: 0}) if IsStringBinary(scanner.Text()) { extData.(*ExtData).binaryCount++ //log.Printf("Binary file: %s (%s)", file, ext) } else { extData.(*ExtData).textCount++ //log.Printf("Text file: %s (%s)", file, ext) } } else if err := scanner.Err(); err != nil { log.Printf("Error reading line from file %s: %v", file, err) } f.Close() }(file) } wg.Wait() extensionTypeCount.Range(func(key, value any) bool { extData := value.(*ExtData) if extData.binaryCount > extData.textCount*2 { log.Printf("Extension: %s, Binary Count: %d, Text Count: %d", extData.ext, extData.binaryCount, extData.textCount) } return true }) } func IsStringBinary(s string) bool { for _, c := range s { if c < ' ' || c > '~' { return true } } return false } func GetSyncFilesRecursively(input string, output chan string, status chan error) { defer close(output) defer close(status) var filesProcessed int32 var foldersProcessed int32 var activeWorkers int32 progressTicker := time.NewTicker(200 * time.Millisecond) defer progressTicker.Stop() done := make(chan struct{}) defer close(done) directories := make(chan string, 100000) workerPool := make(chan struct{}, 4000) directories <- input go func() { for { select { case <-progressTicker.C: dirCount := len(directories) workers := atomic.LoadInt32(&activeWorkers) fmt.Printf("\rFiles processed: %8d; Folders processed: %8d; Active workers: %8d; Directory queue: %8d", atomic.LoadInt32(&filesProcessed), atomic.LoadInt32(&foldersProcessed), workers, dirCount) case <-done: // Final progress update fmt.Printf("\nFiles processed: %8d; Folders processed: %8d; Completed successfully\n", atomic.LoadInt32(&filesProcessed), atomic.LoadInt32(&foldersProcessed)) return } } }() allDone := make(chan struct{}) go func() { var wg sync.WaitGroup go func() { for { if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 { time.Sleep(10 * time.Millisecond) if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 { close(allDone) return } } time.Sleep(50 * time.Millisecond) } }() for { select { case directory, ok := <-directories: if !ok { wg.Wait() return } atomic.AddInt32(&activeWorkers, 1) go func(dir string) { workerPool <- struct{}{} atomic.AddInt32(&foldersProcessed, 1) processDirectory(dir, directories, output, &filesProcessed) <-workerPool atomic.AddInt32(&activeWorkers, -1) }(directory) } } }() <-allDone log.Printf("Files processed: %d; Folders processed: %d", atomic.LoadInt32(&filesProcessed), atomic.LoadInt32(&foldersProcessed)) } func processDirectory(directory string, directories chan<- string, output chan<- string, filesProcessed *int32) { files, err := os.ReadDir(directory) if err != nil { log.Printf("Error reading directory %s: %+v", directory, err) return } for _, file := range files { if file.IsDir() { directories <- filepath.Join(directory, file.Name()) } else { output <- filepath.Join(directory, file.Name()) atomic.AddInt32(filesProcessed, 1) } } } func NormalizePath(input string) string { input = filepath.Clean(input) input = filepath.ToSlash(input) input = strings.ReplaceAll(input, "\"", "") return input }