253 lines
5.9 KiB
Go
253 lines
5.9 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
)
|
|
|
|
var Error *log.Logger
|
|
var Warning *log.Logger
|
|
|
|
func init() {
|
|
log.SetFlags(log.Lmicroseconds | log.Lshortfile)
|
|
log.SetOutput(os.Stdout)
|
|
|
|
Error = log.New(os.Stderr,
|
|
fmt.Sprintf("%sERROR:%s ", "\033[0;101m", "\033[0m"),
|
|
log.Lmicroseconds|log.Lshortfile)
|
|
Warning = log.New(os.Stdout,
|
|
fmt.Sprintf("%sWarning:%s ", "\033[0;93m", "\033[0m"),
|
|
log.Lmicroseconds|log.Lshortfile)
|
|
}
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
dir := flag.Arg(0)
|
|
if dir == "" {
|
|
dir = "."
|
|
}
|
|
dir = NormalizePath(dir)
|
|
log.Printf("Scanning directory: %s", dir)
|
|
|
|
files := make(chan string, 10000)
|
|
status := make(chan error)
|
|
|
|
go GetSyncFilesRecursively(dir, files, status)
|
|
|
|
extensionTypeCount := sync.Map{}
|
|
wg := sync.WaitGroup{}
|
|
for file := range files {
|
|
wg.Add(1)
|
|
go func(file string) {
|
|
defer wg.Done()
|
|
f, err := os.Open(file)
|
|
if err != nil {
|
|
log.Printf("Error opening file %s: %v", file, err)
|
|
return
|
|
}
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
if scanner.Scan() {
|
|
ext := filepath.Ext(file)
|
|
key := ext
|
|
if IsStringBinary(scanner.Text()) {
|
|
key += " (binary)"
|
|
count, _ := extensionTypeCount.LoadOrStore(key, 0)
|
|
extensionTypeCount.Store(key, count.(int)+1)
|
|
//log.Printf("Binary file: %s (%s)", file, ext)
|
|
} else {
|
|
key += " (text)"
|
|
count, _ := extensionTypeCount.LoadOrStore(key, 0)
|
|
extensionTypeCount.Store(key, count.(int)+1)
|
|
//log.Printf("Text file: %s (%s)", file, ext)
|
|
}
|
|
} else if err := scanner.Err(); err != nil {
|
|
log.Printf("Error reading line from file %s: %v", file, err)
|
|
}
|
|
|
|
f.Close()
|
|
}(file)
|
|
}
|
|
wg.Wait()
|
|
|
|
// Create a map to store raw extension names and their binary vs text counts
|
|
extensionBinaryTextCount := make(map[string][2]int)
|
|
|
|
// Collect all raw extensions and their counts
|
|
extensionTypeCount.Range(func(key, value any) bool {
|
|
keyStr := key.(string)
|
|
count := value.(int)
|
|
|
|
// Check if it's a text file (has " (text)" suffix)
|
|
if strings.HasSuffix(keyStr, " (text)") {
|
|
baseExt := strings.TrimSuffix(keyStr, " (text)")
|
|
counts, exists := extensionBinaryTextCount[baseExt]
|
|
if !exists {
|
|
counts = [2]int{0, 0}
|
|
}
|
|
counts[1] = count // index 1 for text count
|
|
extensionBinaryTextCount[baseExt] = counts
|
|
} else {
|
|
// Binary file
|
|
counts, exists := extensionBinaryTextCount[keyStr]
|
|
if !exists {
|
|
counts = [2]int{0, 0}
|
|
}
|
|
counts[0] = count // index 0 for binary count
|
|
extensionBinaryTextCount[keyStr] = counts
|
|
}
|
|
return true
|
|
})
|
|
|
|
// Get all extensions that have more binary occurrences than text
|
|
var binaryDominantExts []string
|
|
for ext, counts := range extensionBinaryTextCount {
|
|
binaryCount := counts[0]
|
|
textCount := counts[1]
|
|
|
|
if binaryCount > textCount {
|
|
binaryDominantExts = append(binaryDominantExts, ext)
|
|
}
|
|
}
|
|
|
|
// Sort the extensions
|
|
sort.Strings(binaryDominantExts)
|
|
|
|
// Print only the extensions that are more likely to be binary
|
|
fmt.Println("Extensions that are predominantly binary:")
|
|
for _, ext := range binaryDominantExts {
|
|
counts := extensionBinaryTextCount[ext]
|
|
fmt.Printf("Extension: %s, Binary Count: %d, Text Count: %d\n",
|
|
ext, counts[0], counts[1])
|
|
}
|
|
}
|
|
|
|
func IsStringBinary(s string) bool {
|
|
for _, c := range s {
|
|
if c < ' ' || c > '~' {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func GetSyncFilesRecursively(input string, output chan string, status chan error) {
|
|
defer close(output)
|
|
defer close(status)
|
|
|
|
var filesProcessed int32
|
|
var foldersProcessed int32
|
|
var activeWorkers int32
|
|
|
|
progressTicker := time.NewTicker(200 * time.Millisecond)
|
|
defer progressTicker.Stop()
|
|
|
|
done := make(chan struct{})
|
|
defer close(done)
|
|
|
|
directories := make(chan string, 100000)
|
|
workerPool := make(chan struct{}, 4000)
|
|
directories <- input
|
|
|
|
go func() {
|
|
for {
|
|
select {
|
|
case <-progressTicker.C:
|
|
dirCount := len(directories)
|
|
workers := atomic.LoadInt32(&activeWorkers)
|
|
fmt.Printf("\rFiles processed: %8d; Folders processed: %8d; Active workers: %8d; Directory queue: %8d",
|
|
atomic.LoadInt32(&filesProcessed),
|
|
atomic.LoadInt32(&foldersProcessed),
|
|
workers,
|
|
dirCount)
|
|
case <-done:
|
|
// Final progress update
|
|
fmt.Printf("\nFiles processed: %8d; Folders processed: %8d; Completed successfully\n",
|
|
atomic.LoadInt32(&filesProcessed),
|
|
atomic.LoadInt32(&foldersProcessed))
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
allDone := make(chan struct{})
|
|
|
|
go func() {
|
|
var wg sync.WaitGroup
|
|
|
|
go func() {
|
|
for {
|
|
if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 {
|
|
time.Sleep(10 * time.Millisecond)
|
|
if atomic.LoadInt32(&activeWorkers) == 0 && len(directories) == 0 {
|
|
close(allDone)
|
|
return
|
|
}
|
|
}
|
|
time.Sleep(50 * time.Millisecond)
|
|
}
|
|
}()
|
|
|
|
for {
|
|
select {
|
|
case directory, ok := <-directories:
|
|
if !ok {
|
|
wg.Wait()
|
|
return
|
|
}
|
|
|
|
atomic.AddInt32(&activeWorkers, 1)
|
|
|
|
go func(dir string) {
|
|
workerPool <- struct{}{}
|
|
|
|
atomic.AddInt32(&foldersProcessed, 1)
|
|
processDirectory(dir, directories, output, &filesProcessed)
|
|
|
|
<-workerPool
|
|
atomic.AddInt32(&activeWorkers, -1)
|
|
}(directory)
|
|
}
|
|
}
|
|
}()
|
|
|
|
<-allDone
|
|
|
|
log.Printf("Files processed: %d; Folders processed: %d",
|
|
atomic.LoadInt32(&filesProcessed),
|
|
atomic.LoadInt32(&foldersProcessed))
|
|
}
|
|
|
|
func processDirectory(directory string, directories chan<- string, output chan<- string, filesProcessed *int32) {
|
|
files, err := os.ReadDir(directory)
|
|
if err != nil {
|
|
log.Printf("Error reading directory %s: %+v", directory, err)
|
|
return
|
|
}
|
|
|
|
for _, file := range files {
|
|
if file.IsDir() {
|
|
directories <- filepath.Join(directory, file.Name())
|
|
} else {
|
|
output <- filepath.Join(directory, file.Name())
|
|
atomic.AddInt32(filesProcessed, 1)
|
|
}
|
|
}
|
|
}
|
|
|
|
func NormalizePath(input string) string {
|
|
input = filepath.Clean(input)
|
|
input = filepath.ToSlash(input)
|
|
input = strings.ReplaceAll(input, "\"", "")
|
|
return input
|
|
}
|