package main import ( "flag" "os" "path/filepath" "regexp" "strconv" "strings" "time" logger "git.site.quack-lab.dev/dave/cylogger" "github.com/bmatcuk/doublestar/v4" "github.com/djherbis/times" ) type Constants struct { ROOT string ROOT_ARCHIVE string IGNORED_DIRECTORIES []string ARCHIVE_THRESHOLD int64 // ms DELETE_THRESHOLD int64 // ms SCAN_INTERVAL time.Duration USE_MODTIME bool } var ( constants Constants timeUnits = map[string]int64{"ms": 1, "s": 1000, "m": 60_000, "h": 3_600_000, "d": 86_400_000, "M": 2_592_000_000, "y": 31_536_000_000} valueRegex = regexp.MustCompile(`\d+`) unitRegex = regexp.MustCompile(`[a-zA-Z]+`) numFilesArchived = 0 numFilesDeleted = 0 ) func parseDurationMS(expr string) int64 { expr = strings.TrimSpace(expr) if expr == "" { return 0 } var total int64 for _, p := range strings.Split(expr, "_") { p = strings.TrimSpace(p) if p == "" { continue } v := valueRegex.FindString(p) u := unitRegex.FindString(p) if v == "" || u == "" { logger.Warning("Invalid duration part: %q", p) continue } unit, ok := timeUnits[u] if !ok { logger.Warning("Invalid duration unit: %q", u) continue } n, err := strconv.ParseInt(v, 10, 64) if err != nil { logger.Warning("Invalid duration value: %q: %v", v, err) continue } total += n * unit } return total } func getenv(key, def string) string { if v, ok := os.LookupEnv(key); ok { return v } return def } func loadConfig() Constants { flag.Parse() logger.InitFlag() root := filepath.ToSlash(strings.TrimSpace(getenv("ROOT", "/tmp"))) archive := filepath.ToSlash(strings.TrimSpace(getenv("ROOT_ARCHIVE", root+"/archive"))) _ = os.MkdirAll(archive, os.ModePerm) // TODO: This just doesn't really work or do what it says it does // Fix it... // Right now we don't care because we're not using it // But we might want to... ignored := []string{} if ig := strings.TrimSpace(getenv("IGNORED_DIRECTORIES", "")); ig != "" { for _, s := range strings.Split(ig, ",") { if t := strings.TrimSpace(s); t != "" { ignored = append(ignored, filepath.ToSlash(t)) } } } archiveMs := parseDurationMS(getenv("ARCHIVE_THRESHOLD", "1d")) deleteMs := parseDurationMS(getenv("DELETE_THRESHOLD", "12h")) interval := time.Duration(parseDurationMS(getenv("SCAN_INTERVAL", "1m"))) * time.Millisecond useMod := strings.EqualFold(strings.TrimSpace(getenv("USE_MODTIME", "false")), "true") logger.Info("Config:") logger.Info(" ROOT: %s", root) logger.Info(" ROOT_ARCHIVE: %s", archive) logger.Info(" IGNORED_DIRECTORIES: %v", ignored) logger.Info(" ARCHIVE_THRESHOLD(ms): %d", archiveMs) logger.Info(" DELETE_THRESHOLD(ms): %d", deleteMs) logger.Info(" SCAN_INTERVAL(ms): %d", interval.Milliseconds()) logger.Info(" USE_MODTIME: %t", useMod) return Constants{ ROOT: root, ROOT_ARCHIVE: archive, IGNORED_DIRECTORIES: ignored, ARCHIVE_THRESHOLD: archiveMs, DELETE_THRESHOLD: deleteMs, SCAN_INTERVAL: interval, USE_MODTIME: useMod, } } func shouldIgnore(path string) bool { log := logger.Default.WithPrefix("shouldIgnore").WithPrefix(path) // Match against slash-normalized full path path = filepath.ToSlash(path) for _, pat := range constants.IGNORED_DIRECTORIES { patLog := log.WithPrefix(pat) ok, err := doublestar.Match(pat, path) if err != nil { patLog.Warning("Ignore pattern error %q vs %q: %v", pat, path, err) continue } if ok { patLog.Debug("ignore due to doublestar %q", pat) return true } } return false } func fileTime(info os.FileInfo) int64 { t := times.Get(info) if constants.USE_MODTIME { return t.ModTime().UnixMilli() } // If AccessTime is not supported, it returns ModTime; times handles that internally. return t.AccessTime().UnixMilli() } func archiveCandidate(path string, info os.FileInfo) { log := logger.Default.WithPrefix("archiveCandidate") if info.IsDir() { return } now := time.Now().UnixMilli() ft := fileTime(info) log.Debug("now %s filetime %s", time.UnixMilli(now).Format(time.RFC3339), time.UnixMilli(ft).Format(time.RFC3339)) if now-ft <= constants.ARCHIVE_THRESHOLD { log.Debug("skip") return } rel, err := filepath.Rel(constants.ROOT, path) if err != nil { log.Warning("rel ROOT->%s: %v", path, err) return } if strings.HasPrefix(rel, "archive") { log.Debug("refusing to archive a file that's already archived") return } log.Debug("rel %q", rel) dst := filepath.Join(constants.ROOT_ARCHIVE, rel) log.Debug("dst %q", dst) if err := os.MkdirAll(filepath.Dir(dst), os.ModePerm); err != nil { log.Error("mkdir %s: %v", filepath.Dir(dst), err) return } if err := os.Rename(path, dst); err != nil { log.Error("archive %s -> %s: %v", path, dst, err) return } numFilesArchived++ log.Info("Archived: %s -> %s", path, dst) } func deleteCandidate(path string, info os.FileInfo) { log := logger.Default.WithPrefix("deleteCandidate").WithPrefix(path) if info.IsDir() { return } now := time.Now().UnixMilli() ft := fileTime(info) log.Debug("now %s filetime %s", time.UnixMilli(now).Format(time.RFC3339), time.UnixMilli(ft).Format(time.RFC3339)) if now-ft <= constants.DELETE_THRESHOLD { log.Debug("skip") return } log.Debug("delete") if err := os.Remove(path); err != nil { log.Error("delete %s: %v", path, err) return } numFilesDeleted++ log.Info("Deleted: %s", path) } func scanRoot() { log := logger.Default.WithPrefix("scanRoot") root := constants.ROOT // doublestar.Glob with os.DirFS(root) returns relative paths log.Debug("glob %s", root) paths, err := doublestar.Glob(os.DirFS(root), "**") if err != nil { log.Error("glob %s: %v", root, err) return } log.Info("Found %d files in root", len(paths)) for _, rel := range paths { pathLog := log.WithPrefix(rel) full := filepath.Join(root, rel) pathLog.Debug("full %s", full) if shouldIgnore(full) { pathLog.Debug("ignore") continue } info, err := os.Stat(full) pathLog.Trace("stat %+v", info) if err != nil { log.Warning("stat %s: %v", full, err) continue } archiveCandidate(full, info) } } func scanArchive() { log := logger.Default.WithPrefix("scanArchive") base := constants.ROOT_ARCHIVE paths, err := doublestar.Glob(os.DirFS(base), "**") if err != nil { log.Error("glob %s: %v", base, err) return } for _, rel := range paths { full := filepath.Join(base, rel) info, err := os.Stat(full) if err != nil { log.Warning("stat %s: %v", full, err) continue } deleteCandidate(full, info) } } func cleanEmptyDirs() { log := logger.Default.WithPrefix("cleanEmptyDirs") root := constants.ROOT paths, err := doublestar.Glob(os.DirFS(root), "**") if err != nil { log.Error("glob %s: %v", root, err) return } // Remove empty dirs; iterate deepest first by sorting longer paths first // Build a slice of directory paths var dirs []string for _, rel := range paths { full := filepath.Join(root, rel) info, err := os.Stat(full) if err != nil || !info.IsDir() { continue } if shouldIgnore(full) { continue } dirs = append(dirs, full) } // Simple length-based reverse to try delete children before parents for i := 0; i < len(dirs)-1; i++ { for j := i + 1; j < len(dirs); j++ { if len(dirs[i]) < len(dirs[j]) { dirs[i], dirs[j] = dirs[j], dirs[i] } } } for _, d := range dirs { empty, err := isDirEmpty(d) if err != nil { log.Warning("check empty %s: %v", d, err) continue } if empty { if err := os.Remove(d); err == nil { log.Info("Removed empty dir: %s", d) } else { log.Warning("remove %s: %v", d, err) } } } } func isDirEmpty(dir string) (bool, error) { f, err := os.Open(dir) if err != nil { return false, err } defer f.Close() // Read at most one entry; if none, it's empty _, err = f.Readdirnames(1) if err == os.ErrNotExist { // For older Go versions, fall back return true, nil } if err != nil { // io.EOF indicates empty for Readdirnames return true, nil } return false, nil } func doRun() { scanRoot() scanArchive() cleanEmptyDirs() logger.Info("Cycle summary: archived=%d deleted=%d", numFilesArchived, numFilesDeleted) numFilesArchived, numFilesDeleted = 0, 0 } func main() { flag.Parse() logger.InitFlag() logger.Info("Starting directory cleaner") constants = loadConfig() logger.Info("Ready. First scan in %s", constants.SCAN_INTERVAL) // Run immediately, then on interval doRun() t := time.NewTicker(constants.SCAN_INTERVAL) defer t.Stop() for range t.C { logger.Info("Tick %d", time.Now().UnixMilli()) doRun() } }