Files
ImageDeduplicator/main.go
2025-05-23 13:08:45 +02:00

79 lines
1.8 KiB
Go

package main
import (
"flag"
"image/jpeg"
"os"
"sync"
logger "git.site.quack-lab.dev/dave/cylogger"
"github.com/corona10/goimagehash"
)
func main() {
thresh := flag.Int("thresh", 10, "Threshold for distance")
flag.Parse()
logger.InitFlag()
hashes := &sync.Map{}
logger.Info("Starting")
logger.Info("Threshold: %v", *thresh)
logger.Info("Files: %d", len(flag.Args()))
wg := sync.WaitGroup{}
for _, file := range flag.Args() {
wg.Add(1)
go func(file string) {
defer wg.Done()
log := logger.Default.WithPrefix(file)
imgfile, err := os.Open(file)
if err != nil {
log.Error("Failed to open file: %v", err)
return
}
defer imgfile.Close()
img, err := jpeg.Decode(imgfile)
if err != nil {
log.Error("Failed to decode image: %v", err)
return
}
hash, err := goimagehash.ExtPerceptionHash(img, 8, 8)
if err != nil {
log.Error("Failed to calculate hash: %v", err)
return
}
log.Debug("Hashed: %v", hash)
hashes.Store(file, hash)
}(file)
}
groupedImages := make(map[string][]string)
wg.Wait()
hashes.Range(func(key, value interface{}) bool {
filea := key.(string)
hasha := value.(*goimagehash.ExtImageHash)
hashes.Range(func(key, value interface{}) bool {
fileb := key.(string)
hashb := value.(*goimagehash.ExtImageHash)
if filea == fileb {
return true
}
distance, err := hasha.Distance(hashb)
if err != nil {
logger.Error("Failed to calculate distance: %v", err)
return true
}
logger.Debug("Distance between %v and %v: %v", filea, fileb, distance)
if distance <= *thresh {
groupedImages[filea] = append(groupedImages[filea], fileb)
}
return true
})
return true
})
for file, files := range groupedImages {
logger.Info("Grouped %v with %v", file, files)
}
logger.Info("Done")
}