Files
directory-cleaner/main.go
PhatPhuckDave 37b08d27f5 feat: replace std log with cylogger, add flag-based init, enhance logs,
update Go version and deps, and streamline Docker build/deploy scripts
2025-08-07 10:31:28 +02:00

323 lines
9.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"flag"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
logger "git.site.quack-lab.dev/dave/cylogger"
"github.com/djherbis/times"
)
var timeUnits = map[string]int64{
"ms": 1,
"s": 1000,
"m": 1000 * 60,
"h": 1000 * 60 * 60,
"d": 1000 * 60 * 60 * 24,
"M": 1000 * 60 * 60 * 24 * 30,
"y": 1000 * 60 * 60 * 24 * 365,
}
var numFilesArchived = 0
var numFilesDeleted = 0
var valueRegex, _ = regexp.Compile(`\d+`)
var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`)
func parseDuration(date string) int64 {
var milliseconds int64 = 0
date = strings.TrimSpace(date)
var parts = strings.Split(date, "_")
for _, part := range parts {
part = strings.TrimSpace(part)
logger.Info("Parsing date part: %s", part)
var value = valueRegex.FindString(part)
var unit = unitRegex.FindString(part)
if value == "" || unit == "" {
logger.Error("Invalid date part: " + part)
continue
}
if _, ok := timeUnits[unit]; !ok {
logger.Error("Invalid date unit: " + unit)
continue
}
logger.Info("Part %s parsed as: Value: %s, Unit: %s", part, value, unit)
var valueMs, _ = strconv.ParseInt(value, 10, 16)
valueMs = valueMs * timeUnits[unit]
milliseconds += valueMs
logger.Info("Adding %dms to duration, now: %d", valueMs, milliseconds)
}
return milliseconds
}
func getEnv(key, def string) string {
var value, exists = os.LookupEnv(key)
if exists {
return value
}
return def
}
func scanRoot() {
log := logger.Default.WithPrefix("scanRoot")
log.Info("Scanning root directory...")
filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Error("Error scanning %s: %s", path, err)
return nil
}
path = filepath.ToSlash(path)
if path == constants.ROOT {
log.Info("Skipping root directory %s...", path)
return nil
}
// I forgot why this code was here... It doesn't make sense to me now
// if info.IsDir() {
// log.Info("Skipping directory %s...", path)
// return filepath.SkipDir
// }
// We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it
// In fact iteration should be faster for small lists rather than hashing
for _, ignoredDir := range constants.IGNORED_DIRECTORIES {
log.Info("Ignored directories: %s", constants.IGNORED_DIRECTORIES)
if strings.HasPrefix(path, ignoredDir) {
log.Info("Ignoring directory %s", path)
return filepath.SkipDir
}
}
processFile(path, info)
return nil
})
}
func scanArchive() {
log := logger.Default.WithPrefix("scanArchive")
log.Info("Scanning archive...")
filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Error("Error scanning %s: %s", path, err)
return nil
}
path = filepath.ToSlash(path)
if path == constants.ROOT_ARCHIVE {
log.Info("Skipping root directory %s...", path)
return nil
}
processArchiveFile(path, info)
return nil
})
}
func processFile(path string, info os.FileInfo) {
var now = time.Now().UnixMilli()
log := logger.Default.WithPrefix("processFile").WithPrefix(path)
var timeType = "accessed"
if constants.USE_MODTIME {
timeType = "modified"
}
var fileTime int64 = 0
if constants.USE_MODTIME {
fileTime = times.Get(info).ModTime().UnixMilli()
} else {
fileTime = times.Get(info).AccessTime().UnixMilli()
}
var timeDelta = now - fileTime
log.Info("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta)
if timeDelta > constants.ARCHIVE_THRESHOLD {
log.Info("File %s was %s more than %dms ago, archiving...", path, timeType, constants.ARCHIVE_THRESHOLD)
archiveFile(path)
}
}
func processArchiveFile(path string, info os.FileInfo) {
var now = time.Now().UnixMilli()
log := logger.Default.WithPrefix("processArchiveFile").WithPrefix(path)
var timeType = "accessed"
if constants.USE_MODTIME {
timeType = "modified"
}
var fileTime int64 = 0
if constants.USE_MODTIME {
fileTime = times.Get(info).ModTime().UnixMilli()
} else {
fileTime = times.Get(info).AccessTime().UnixMilli()
}
var timeDelta = now - int64(fileTime)
log.Info("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta)
if timeDelta > constants.DELETE_THRESHOLD {
log.Info("File %s was %s more than %dms ago, deleting...", path, timeType, constants.DELETE_THRESHOLD)
deleteFile(path)
}
}
func archiveFile(path string) {
// defer os.Exit(1)
var newPath = constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1)
log := logger.Default.WithPrefix("archiveFile").WithPrefix(path)
log.Info("Archiving file %s to %s...", path, newPath)
os.MkdirAll(filepath.Dir(newPath), os.ModePerm)
var err = os.Rename(path, newPath)
if err != nil {
log.Error("Error archiving file %s: %s", path, err)
return
}
numFilesArchived++
}
func deleteFile(path string) {
log := logger.Default.WithPrefix("deleteFile").WithPrefix(path)
// defer os.Exit(1)
log.Info("Deleting file %s...", path)
var err = os.Remove(path)
if err != nil {
log.Error("Error deleting file %s: %s", path, err)
return
}
numFilesDeleted++
}
func cleanRoot() {
log := logger.Default.WithPrefix("cleanRoot")
var files, err = os.ReadDir(constants.ROOT)
if err != nil {
log.Error("Error reading root directory %s: %s", constants.ROOT, err)
return
}
for _, file := range files {
if !file.IsDir() {
continue
}
var empty, err = isDirEmpty(constants.ROOT + "/" + file.Name())
if err != nil {
log.Error("Error checking if directory %s is empty: %s", file.Name(), err)
continue
}
log.Info("Directory %s isempty: %t", file.Name(), empty)
if empty {
log.Info("Deleting empty directory %s", file.Name())
var err = os.RemoveAll(constants.ROOT + "/" + file.Name())
if err != nil {
log.Error("Error deleting empty directory %s: %s", file.Name(), err)
}
}
}
}
func isDirEmpty(dirPath string) (bool, error) {
var empty = true
var ferr error = nil
log := logger.Default.WithPrefix("isDirEmpty").WithPrefix(dirPath)
filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Error("Error scanning %s: %s", path, err)
ferr = err
return nil
}
if !info.IsDir() {
empty = false
log.Info("Directory %s is not empty, found %s", dirPath, path)
return filepath.SkipAll
}
return nil
})
return empty, ferr
}
type Constants struct {
ROOT string
ROOT_ARCHIVE string
IGNORED_DIRECTORIES []string
ARCHIVE_THRESHOLD int64
DELETE_THRESHOLD int64
SCAN_INTERVAL time.Duration
USE_MODTIME bool
}
func doRun() {
scanRoot()
scanArchive()
cleanRoot()
logger.Info("Archived %d files, deleted %d files", numFilesArchived, numFilesDeleted)
numFilesArchived = 0
numFilesDeleted = 0
}
var constants = Constants{}
func main() {
flag.Parse()
logger.InitFlag()
// Important: Access times dont accumulate.
// This implies that archiving the file won't alter its access time.
// Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME,
// where X represents the duration it can exist in the folder,
// and Y represents the duration it can exist in the archive.
logger.Info("Starting directory cleaner")
var ROOT = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/tmp")))
var ROOT_ARCHIVE = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive")))
os.Mkdir(ROOT_ARCHIVE, os.ModePerm)
var IGNORED_DIRECTORIES = []string{}
var ignoredEnv = getEnv("IGNORED_DIRECTORIES", "")
if ignoredEnv != "" {
ignoredEnv = strings.TrimSpace(ignoredEnv)
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...)
}
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE)
for key, dir := range IGNORED_DIRECTORIES {
IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir))
}
var ARCHIVE_THRESHOLD = parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d"))
var DELETE_THRESHOLD = parseDuration(getEnv("DELETE_THRESHOLD", "12h"))
var SCAN_INTERVAL = time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m")) * 1e6)
var USE_MODTIME = strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true"
logger.Info("Input args parsed as:")
logger.Info("ROOT: %s", ROOT)
logger.Info("ROOT_ARCHIVE: %s", ROOT_ARCHIVE)
logger.Info("IGNORED_DIRECTORIES: %s", IGNORED_DIRECTORIES)
logger.Info("ARCHIVE_THRESHOLD: %d", ARCHIVE_THRESHOLD)
logger.Info("DELETE_THRESHOLD: %d", DELETE_THRESHOLD)
logger.Info("SCAN_INTERVAL: %d", SCAN_INTERVAL.Milliseconds())
logger.Info("USE_MODTIME: %s", strconv.FormatBool(USE_MODTIME))
constants.ROOT = ROOT
constants.ROOT_ARCHIVE = ROOT_ARCHIVE
constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES
constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD
constants.DELETE_THRESHOLD = DELETE_THRESHOLD
constants.SCAN_INTERVAL = SCAN_INTERVAL
constants.USE_MODTIME = USE_MODTIME
doRun()
for {
logger.Info("Running at %d", time.Now().UnixMilli())
time.Sleep(SCAN_INTERVAL)
doRun()
}
}