Hallucinate everything

Who knows if its better...
This commit is contained in:
2025-08-07 10:55:47 +02:00
parent cd6017519e
commit 9982d36aaf

619
main.go
View File

@@ -10,359 +10,320 @@ import (
"time" "time"
logger "git.site.quack-lab.dev/dave/cylogger" logger "git.site.quack-lab.dev/dave/cylogger"
"github.com/bmatcuk/doublestar/v4" "github.com/bmatcuk/doublestar/v4"
"github.com/djherbis/times" "github.com/djherbis/times"
) )
var timeUnits = map[string]int64{
"ms": 1,
"s": 1000,
"m": 1000 * 60,
"h": 1000 * 60 * 60,
"d": 1000 * 60 * 60 * 24,
"M": 1000 * 60 * 60 * 24 * 30,
"y": 1000 * 60 * 60 * 24 * 365,
}
var numFilesArchived = 0
var numFilesDeleted = 0
var valueRegex, _ = regexp.Compile(`\d+`)
var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`)
func parseDuration(date string) int64 {
var milliseconds int64 = 0
date = strings.TrimSpace(date)
var parts = strings.Split(date, "_")
for _, part := range parts {
part = strings.TrimSpace(part)
logger.Info("Parsing date part: %s", part)
var value = valueRegex.FindString(part)
var unit = unitRegex.FindString(part)
if value == "" || unit == "" {
logger.Error("Invalid date part: " + part)
continue
}
if _, ok := timeUnits[unit]; !ok {
logger.Error("Invalid date unit: " + unit)
continue
}
logger.Info("Part %s parsed as: Value: %s, Unit: %s", part, value, unit)
var valueMs, _ = strconv.ParseInt(value, 10, 16)
valueMs = valueMs * timeUnits[unit]
milliseconds += valueMs
logger.Info("Adding %dms to duration, now: %d", valueMs, milliseconds)
}
return milliseconds
}
func getEnv(key, def string) string {
var value, exists = os.LookupEnv(key)
if exists {
return value
}
return def
}
func scanRoot() {
log := logger.Default.WithPrefix("scanRoot").WithPrefix(constants.ROOT)
log.Info("Scanning root directory")
filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Error("Error scanning %s: %s", path, err)
return nil
}
path = filepath.ToSlash(path)
if path == constants.ROOT {
log.Info("Skipping root directory %s", path)
return nil
}
// I forgot why this code was here... It doesn't make sense to me now
// if info.IsDir() {
// log.Info("Skipping directory %s", path)
// return filepath.SkipDir
// }
// We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it
// In fact iteration should be faster for small lists rather than hashing
for _, ignoredDir := range constants.IGNORED_DIRECTORIES {
matched, err := doublestar.Match(ignoredDir, path)
if err != nil {
log.Error("Error matching %s: %v", path, err)
continue
}
if matched {
log.Info("Ignoring directory %s", path)
return filepath.SkipDir
}
}
processFile(path, info)
return nil
})
}
func scanArchive() {
log := logger.Default.WithPrefix("scanArchive")
log.Info("Scanning archive...")
filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Error("Error scanning %s: %s", path, err)
return nil
}
path = filepath.ToSlash(path)
if path == constants.ROOT_ARCHIVE {
log.Info("Skipping root directory %s...", path)
return nil
}
processArchiveFile(path, info)
return nil
})
}
func processFile(path string, info os.FileInfo) {
var now = time.Now().UnixMilli()
log := logger.Default.WithPrefix("processFile").WithPrefix(path)
var timeType = "accessed"
if constants.USE_MODTIME {
timeType = "modified"
}
var fileTime int64 = 0
if constants.USE_MODTIME {
fileTime = times.Get(info).ModTime().UnixMilli()
} else {
fileTime = times.Get(info).AccessTime().UnixMilli()
}
var timeDelta = now - fileTime
log.Info("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta)
if timeDelta > constants.ARCHIVE_THRESHOLD {
log.Info("File %s was %s more than %dms ago, archiving...", path, timeType, constants.ARCHIVE_THRESHOLD)
archiveFile(path)
}
}
func processArchiveFile(path string, info os.FileInfo) {
var now = time.Now().UnixMilli()
log := logger.Default.WithPrefix("processArchiveFile").WithPrefix(path)
var timeType = "accessed"
if constants.USE_MODTIME {
timeType = "modified"
}
var fileTime int64 = 0
if constants.USE_MODTIME {
fileTime = times.Get(info).ModTime().UnixMilli()
} else {
fileTime = times.Get(info).AccessTime().UnixMilli()
}
var timeDelta = now - int64(fileTime)
log.Info("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta)
if timeDelta > constants.DELETE_THRESHOLD {
log.Info("File %s was %s more than %dms ago, deleting...", path, timeType, constants.DELETE_THRESHOLD)
deleteFile(path)
}
}
func archiveFile(path string) {
// defer os.Exit(1)
var newPath = constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1)
log := logger.Default.WithPrefix("archiveFile").WithPrefix(path)
log.Info("Archiving file %s to %s...", path, newPath)
os.MkdirAll(filepath.Dir(newPath), os.ModePerm)
var err = os.Rename(path, newPath)
if err != nil {
log.Error("Error archiving file %s: %s", path, err)
return
}
numFilesArchived++
}
func deleteFile(path string) {
log := logger.Default.WithPrefix("deleteFile").WithPrefix(path)
// defer os.Exit(1)
log.Info("Deleting file %s...", path)
var err = os.Remove(path)
if err != nil {
log.Error("Error deleting file %s: %s", path, err)
return
}
numFilesDeleted++
}
func shouldIgnore(path string) bool {
log := logger.Default.WithPrefix("shouldIgnore").WithPrefix(path)
for _, ignoredDir := range constants.IGNORED_DIRECTORIES {
log.Debug("Checking if %s matches %s", ignoredDir, path)
matched, err := doublestar.Match(ignoredDir, path)
if err != nil {
log.Error("Error matching %s: %v", path, err)
continue
}
if matched {
log.Debug("Directory is ignored, skipping")
return true
}
}
log.Debug("Directory is not ignored")
return false
}
func cleanRoot() {
log := logger.Default.WithPrefix("cleanRoot")
files, err := doublestar.Glob(os.DirFS(constants.ROOT), "**")
if err != nil {
log.Error("Error reading root directory %s: %s", constants.ROOT, err)
return
}
for _, file := range files {
fullpath := filepath.Join(constants.ROOT, file)
filelog := log.WithPrefix(file)
var info os.FileInfo
filelog.Debug("Getting file info")
info, err = os.Stat(fullpath)
if err != nil {
filelog.Error("Error getting file info %v", err)
continue
}
filelog.Trace("File info: %+v", info)
if !info.IsDir() {
filelog.Info("File is not a directory, skipping")
continue
}
filelog.Debug("Checking if directory is ignored")
if shouldIgnore(fullpath) {
filelog.Info("Directory is ignored, skipping")
continue
}
filelog.Debug("Directory is not ignored, checking if it is empty")
var empty, err = isDirEmpty(fullpath)
if err != nil {
filelog.Error("Error checking if directory - is empty: %v", err)
continue
}
filelog.Info("Directory isempty: %t", empty)
if empty {
filelog.Info("Deleting empty directory")
var err = os.RemoveAll(fullpath)
if err != nil {
filelog.Error("Error deleting empty directory %v", err)
}
}
}
}
func isDirEmpty(dirPath string) (bool, error) {
var empty = true
var ferr error = nil
log := logger.Default.WithPrefix("isDirEmpty").WithPrefix(dirPath)
filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Error("Error scanning %s: %s", path, err)
ferr = err
return nil
}
if !info.IsDir() {
empty = false
log.Info("Directory %s is not empty, found %s", dirPath, path)
return filepath.SkipAll
}
return nil
})
return empty, ferr
}
type Constants struct { type Constants struct {
ROOT string ROOT string
ROOT_ARCHIVE string ROOT_ARCHIVE string
IGNORED_DIRECTORIES []string IGNORED_DIRECTORIES []string
ARCHIVE_THRESHOLD int64 ARCHIVE_THRESHOLD int64 // ms
DELETE_THRESHOLD int64 DELETE_THRESHOLD int64 // ms
SCAN_INTERVAL time.Duration SCAN_INTERVAL time.Duration
USE_MODTIME bool USE_MODTIME bool
} }
var (
constants Constants
timeUnits = map[string]int64{"ms": 1, "s": 1000, "m": 60_000, "h": 3_600_000, "d": 86_400_000, "M": 2_592_000_000, "y": 31_536_000_000}
valueRegex = regexp.MustCompile(`\d+`)
unitRegex = regexp.MustCompile(`[a-zA-Z]+`)
numFilesArchived = 0
numFilesDeleted = 0
)
func parseDurationMS(expr string) int64 {
expr = strings.TrimSpace(expr)
if expr == "" {
return 0
}
var total int64
for _, p := range strings.Split(expr, "_") {
p = strings.TrimSpace(p)
if p == "" {
continue
}
v := valueRegex.FindString(p)
u := unitRegex.FindString(p)
if v == "" || u == "" {
logger.Warning("Invalid duration part: %q", p)
continue
}
unit, ok := timeUnits[u]
if !ok {
logger.Warning("Invalid duration unit: %q", u)
continue
}
n, err := strconv.ParseInt(v, 10, 64)
if err != nil {
logger.Warning("Invalid duration value: %q: %v", v, err)
continue
}
total += n * unit
}
return total
}
func getenv(key, def string) string {
if v, ok := os.LookupEnv(key); ok {
return v
}
return def
}
func loadConfig() Constants {
flag.Parse()
logger.InitFlag()
root := filepath.ToSlash(strings.TrimSpace(getenv("ROOT", "/tmp")))
archive := filepath.ToSlash(strings.TrimSpace(getenv("ROOT_ARCHIVE", root+"/archive")))
_ = os.MkdirAll(archive, os.ModePerm)
ignored := []string{}
if ig := strings.TrimSpace(getenv("IGNORED_DIRECTORIES", "")); ig != "" {
for _, s := range strings.Split(ig, ",") {
if t := strings.TrimSpace(s); t != "" {
ignored = append(ignored, filepath.ToSlash(t))
}
}
}
// Always ignore ROOT and ROOT_ARCHIVE themselves
ignored = append(ignored, filepath.ToSlash(archive))
ignored = append(ignored, filepath.ToSlash(root))
archiveMs := parseDurationMS(getenv("ARCHIVE_THRESHOLD", "1d"))
deleteMs := parseDurationMS(getenv("DELETE_THRESHOLD", "12h"))
interval := time.Duration(parseDurationMS(getenv("SCAN_INTERVAL", "1m"))) * time.Millisecond
useMod := strings.EqualFold(strings.TrimSpace(getenv("USE_MODTIME", "false")), "true")
logger.Info("Config:")
logger.Info(" ROOT: %s", root)
logger.Info(" ROOT_ARCHIVE: %s", archive)
logger.Info(" IGNORED_DIRECTORIES: %v", ignored)
logger.Info(" ARCHIVE_THRESHOLD(ms): %d", archiveMs)
logger.Info(" DELETE_THRESHOLD(ms): %d", deleteMs)
logger.Info(" SCAN_INTERVAL(ms): %d", interval.Milliseconds())
logger.Info(" USE_MODTIME: %t", useMod)
return Constants{
ROOT: root,
ROOT_ARCHIVE: archive,
IGNORED_DIRECTORIES: ignored,
ARCHIVE_THRESHOLD: archiveMs,
DELETE_THRESHOLD: deleteMs,
SCAN_INTERVAL: interval,
USE_MODTIME: useMod,
}
}
func shouldIgnore(path string) bool {
// Match against slash-normalized full path
path = filepath.ToSlash(path)
for _, pat := range constants.IGNORED_DIRECTORIES {
ok, err := doublestar.Match(pat, path)
if err != nil {
logger.Warning("Ignore pattern error %q vs %q: %v", pat, path, err)
continue
}
if ok || path == pat {
return true
}
}
return false
}
func fileTime(info os.FileInfo, path string) int64 {
t := times.Get(info)
if constants.USE_MODTIME {
return t.ModTime().UnixMilli()
}
// If AccessTime is not supported, it returns ModTime; times handles that internally.
return t.AccessTime().UnixMilli()
}
func archiveCandidate(path string, info os.FileInfo) {
if info.IsDir() {
return
}
now := time.Now().UnixMilli()
ft := fileTime(info, path)
if now-ft <= constants.ARCHIVE_THRESHOLD {
return
}
rel, err := filepath.Rel(constants.ROOT, path)
if err != nil {
logger.Warning("rel ROOT->%s: %v", path, err)
return
}
dst := filepath.Join(constants.ROOT_ARCHIVE, rel)
if err := os.MkdirAll(filepath.Dir(dst), os.ModePerm); err != nil {
logger.Error("mkdir %s: %v", filepath.Dir(dst), err)
return
}
if err := os.Rename(path, dst); err != nil {
logger.Error("archive %s -> %s: %v", path, dst, err)
return
}
numFilesArchived++
logger.Info("Archived: %s -> %s", path, dst)
}
func deleteCandidate(path string, info os.FileInfo) {
if info.IsDir() {
return
}
now := time.Now().UnixMilli()
ft := fileTime(info, path)
if now-ft <= constants.DELETE_THRESHOLD {
return
}
if err := os.Remove(path); err != nil {
logger.Error("delete %s: %v", path, err)
return
}
numFilesDeleted++
logger.Info("Deleted: %s", path)
}
func scanRoot() {
log := logger.Default.WithPrefix("scanRoot")
root := constants.ROOT
// doublestar.Glob with os.DirFS(root) returns relative paths
paths, err := doublestar.Glob(os.DirFS(root), "**")
if err != nil {
log.Error("glob %s: %v", root, err)
return
}
for _, rel := range paths {
full := filepath.Join(root, rel)
if shouldIgnore(full) {
continue
}
info, err := os.Stat(full)
if err != nil {
log.Warning("stat %s: %v", full, err)
continue
}
archiveCandidate(full, info)
}
}
func scanArchive() {
log := logger.Default.WithPrefix("scanArchive")
base := constants.ROOT_ARCHIVE
paths, err := doublestar.Glob(os.DirFS(base), "**")
if err != nil {
log.Error("glob %s: %v", base, err)
return
}
for _, rel := range paths {
full := filepath.Join(base, rel)
info, err := os.Stat(full)
if err != nil {
log.Warning("stat %s: %v", full, err)
continue
}
deleteCandidate(full, info)
}
}
func cleanEmptyDirs() {
log := logger.Default.WithPrefix("cleanEmptyDirs")
root := constants.ROOT
paths, err := doublestar.Glob(os.DirFS(root), "**")
if err != nil {
log.Error("glob %s: %v", root, err)
return
}
// Remove empty dirs; iterate deepest first by sorting longer paths first
// Build a slice of directory paths
var dirs []string
for _, rel := range paths {
full := filepath.Join(root, rel)
info, err := os.Stat(full)
if err != nil || !info.IsDir() {
continue
}
if shouldIgnore(full) {
continue
}
dirs = append(dirs, full)
}
// Simple length-based reverse to try delete children before parents
for i := 0; i < len(dirs)-1; i++ {
for j := i + 1; j < len(dirs); j++ {
if len(dirs[i]) < len(dirs[j]) {
dirs[i], dirs[j] = dirs[j], dirs[i]
}
}
}
for _, d := range dirs {
empty, err := isDirEmpty(d)
if err != nil {
log.Warning("check empty %s: %v", d, err)
continue
}
if empty {
if err := os.Remove(d); err == nil {
log.Info("Removed empty dir: %s", d)
} else {
log.Warning("remove %s: %v", d, err)
}
}
}
}
func isDirEmpty(dir string) (bool, error) {
f, err := os.Open(dir)
if err != nil {
return false, err
}
defer f.Close()
// Read at most one entry; if none, it's empty
_, err = f.Readdirnames(1)
if err == os.ErrNotExist {
// For older Go versions, fall back
return true, nil
}
if err != nil {
// io.EOF indicates empty for Readdirnames
return true, nil
}
return false, nil
}
func doRun() { func doRun() {
scanRoot() scanRoot()
scanArchive() scanArchive()
cleanRoot() cleanEmptyDirs()
logger.Info("Archived %d files, deleted %d files", numFilesArchived, numFilesDeleted) logger.Info("Cycle summary: archived=%d deleted=%d", numFilesArchived, numFilesDeleted)
numFilesArchived = 0 numFilesArchived, numFilesDeleted = 0, 0
numFilesDeleted = 0
} }
var constants = Constants{}
func main() { func main() {
flag.Parse() flag.Parse()
logger.InitFlag() logger.InitFlag()
// Important: Access times dont accumulate.
// This implies that archiving the file won't alter its access time.
// Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME,
// where X represents the duration it can exist in the folder,
// and Y represents the duration it can exist in the archive.
logger.Info("Starting directory cleaner") logger.Info("Starting directory cleaner")
constants = loadConfig()
logger.Info("Ready. First scan in %s", constants.SCAN_INTERVAL)
var ROOT = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/tmp"))) // Run immediately, then on interval
var ROOT_ARCHIVE = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive")))
os.Mkdir(ROOT_ARCHIVE, os.ModePerm)
var IGNORED_DIRECTORIES = []string{}
var ignoredEnv = getEnv("IGNORED_DIRECTORIES", "")
if ignoredEnv != "" {
ignoredEnv = strings.TrimSpace(ignoredEnv)
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...)
}
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE)
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT)
for key, dir := range IGNORED_DIRECTORIES {
IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir))
}
var ARCHIVE_THRESHOLD = parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d"))
var DELETE_THRESHOLD = parseDuration(getEnv("DELETE_THRESHOLD", "12h"))
var SCAN_INTERVAL = time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m")) * 1e6)
var USE_MODTIME = strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true"
logger.Info("Input args parsed as:")
logger.Info("ROOT: %s", ROOT)
logger.Info("ROOT_ARCHIVE: %s", ROOT_ARCHIVE)
logger.Info("IGNORED_DIRECTORIES: %s", IGNORED_DIRECTORIES)
logger.Info("ARCHIVE_THRESHOLD: %d", ARCHIVE_THRESHOLD)
logger.Info("DELETE_THRESHOLD: %d", DELETE_THRESHOLD)
logger.Info("SCAN_INTERVAL: %d", SCAN_INTERVAL.Milliseconds())
logger.Info("USE_MODTIME: %s", strconv.FormatBool(USE_MODTIME))
constants.ROOT = ROOT
constants.ROOT_ARCHIVE = ROOT_ARCHIVE
constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES
constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD
constants.DELETE_THRESHOLD = DELETE_THRESHOLD
constants.SCAN_INTERVAL = SCAN_INTERVAL
constants.USE_MODTIME = USE_MODTIME
doRun() doRun()
t := time.NewTicker(constants.SCAN_INTERVAL)
defer t.Stop()
for { for {
logger.Info("Running at %d", time.Now().UnixMilli()) select {
time.Sleep(SCAN_INTERVAL) case ts := <-t.C:
logger.Info("Tick %d", ts.UnixMilli())
doRun() doRun()
} }
}
} }