package main import ( "flag" "image/jpeg" "os" "path/filepath" "sync" logger "git.site.quack-lab.dev/dave/cylogger" "github.com/bmatcuk/doublestar/v4" "github.com/corona10/goimagehash" ) func main() { thresh := flag.Int("thresh", 10, "Threshold for distance") flag.Parse() logger.InitFlag() hashes := &sync.Map{} logger.Info("Starting") logger.Info("Threshold: %v", *thresh) logger.Info("Patterns: %d", len(flag.Args())) files := make([]string, 0) for _, pattern := range flag.Args() { base, pattern := doublestar.SplitPattern(pattern) logger.Debug("Globbing %q from %q", pattern, base) matches, err := doublestar.Glob(os.DirFS(base), pattern) if err != nil { logger.Error("Failed to glob pattern: %v", err) continue } logger.Debug("Glob %q in %q got %d matches", pattern, base, len(matches)) for _, match := range matches { match = filepath.Join(base, match) logger.Trace("Adding %q", match) files = append(files, match) } } logger.Info("Patterns expanded to %d files", len(files)) wg := sync.WaitGroup{} for _, file := range flag.Args() { wg.Add(1) go func(file string) { defer wg.Done() log := logger.Default.WithPrefix(file) imgfile, err := os.Open(file) if err != nil { log.Error("Failed to open file: %v", err) return } defer imgfile.Close() img, err := jpeg.Decode(imgfile) if err != nil { log.Error("Failed to decode image: %v", err) return } hash, err := goimagehash.ExtPerceptionHash(img, 8, 8) if err != nil { log.Error("Failed to calculate hash: %v", err) return } log.Debug("Hashed: %v", hash) hashes.Store(file, hash) }(file) } groupedImages := make(map[string][]string) wg.Wait() hashes.Range(func(key, value interface{}) bool { filea := key.(string) hasha := value.(*goimagehash.ExtImageHash) hashes.Range(func(key, value interface{}) bool { fileb := key.(string) hashb := value.(*goimagehash.ExtImageHash) if filea == fileb { return true } distance, err := hasha.Distance(hashb) if err != nil { logger.Error("Failed to calculate distance: %v", err) return true } logger.Debug("Distance between %v and %v: %v", filea, fileb, distance) if distance <= *thresh { groupedImages[filea] = append(groupedImages[filea], fileb) } return true }) return true }) for file, files := range groupedImages { logger.Info("Grouped %v with %v", file, files) } logger.Info("Done") }