This commit is contained in:
2024-06-22 13:45:20 +02:00
parent d9f64ac0ad
commit 7c8c92be9a

622
main.go
View File

@@ -1,311 +1,311 @@
package main package main
import ( import (
"log" "log"
"os" "os"
"path/filepath" "path/filepath"
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
"time" "time"
"github.com/djherbis/times" "github.com/djherbis/times"
) )
var timeUnits = map[string]int64{ var timeUnits = map[string]int64{
"ms": 1, "ms": 1,
"s": 1000, "s": 1000,
"m": 1000 * 60, "m": 1000 * 60,
"h": 1000 * 60 * 60, "h": 1000 * 60 * 60,
"d": 1000 * 60 * 60 * 24, "d": 1000 * 60 * 60 * 24,
"M": 1000 * 60 * 60 * 24 * 30, "M": 1000 * 60 * 60 * 24 * 30,
"y": 1000 * 60 * 60 * 24 * 365, "y": 1000 * 60 * 60 * 24 * 365,
} }
var numFilesArchived = 0 var numFilesArchived = 0
var numFilesDeleted = 0 var numFilesDeleted = 0
var valueRegex, _ = regexp.Compile(`\d+`) var valueRegex, _ = regexp.Compile(`\d+`)
var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`) var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`)
func parseDuration(date string) int64 { func parseDuration(date string) int64 {
var milliseconds int64 = 0 var milliseconds int64 = 0
date = strings.TrimSpace(date) date = strings.TrimSpace(date)
var parts = strings.Split(date, "_") var parts = strings.Split(date, "_")
for _, part := range parts { for _, part := range parts {
part = strings.TrimSpace(part) part = strings.TrimSpace(part)
log.Printf("Parsing date part: %s\n", part) log.Printf("Parsing date part: %s", part)
var value = valueRegex.FindString(part) var value = valueRegex.FindString(part)
var unit = unitRegex.FindString(part) var unit = unitRegex.FindString(part)
if value == "" || unit == "" { if value == "" || unit == "" {
log.Println("Invalid date part: " + part) log.Println("Invalid date part: " + part)
continue continue
} }
if _, ok := timeUnits[unit]; !ok { if _, ok := timeUnits[unit]; !ok {
log.Println("Invalid date unit: " + unit) log.Println("Invalid date unit: " + unit)
continue continue
} }
log.Printf("Part %s parsed as: Value: %s, Unit: %s\n", part, value, unit) log.Printf("Part %s parsed as: Value: %s, Unit: %s", part, value, unit)
var valueMs, _ = strconv.ParseInt(value, 10, 16) var valueMs, _ = strconv.ParseInt(value, 10, 16)
valueMs = valueMs * timeUnits[unit] valueMs = valueMs * timeUnits[unit]
milliseconds += valueMs milliseconds += valueMs
log.Printf("Adding %dms to duration, now: %d\n", valueMs, milliseconds) log.Printf("Adding %dms to duration, now: %d", valueMs, milliseconds)
} }
return milliseconds return milliseconds
} }
func getEnv(key, def string) string { func getEnv(key, def string) string {
var value, exists = os.LookupEnv(key) var value, exists = os.LookupEnv(key)
if exists { if exists {
return value return value
} }
return def return def
} }
func scanRoot() { func scanRoot() {
log.Println("Scanning root directory...") log.Println("Scanning root directory...")
filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error { filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error {
if err != nil { if err != nil {
log.Printf("Error scanning %s: %s\n", path, err) log.Printf("Error scanning %s: %s", path, err)
return nil return nil
} }
path = filepath.ToSlash(path) path = filepath.ToSlash(path)
if path == constants.ROOT { if path == constants.ROOT {
log.Printf("Skipping root directory %s...\n", path) log.Printf("Skipping root directory %s...", path)
return nil return nil
} }
// I forgot why this code was here... It doesn't make sense to me now // I forgot why this code was here... It doesn't make sense to me now
// if info.IsDir() { // if info.IsDir() {
// log.Printf("Skipping directory %s...\n", path) // log.Printf("Skipping directory %s...", path)
// return filepath.SkipDir // return filepath.SkipDir
// } // }
// We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it // We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it
// In fact iteration should be faster for small lists rather than hashing // In fact iteration should be faster for small lists rather than hashing
for _, ignoredDir := range constants.IGNORED_DIRECTORIES { for _, ignoredDir := range constants.IGNORED_DIRECTORIES {
log.Println(constants.IGNORED_DIRECTORIES, len(constants.IGNORED_DIRECTORIES)) log.Println(constants.IGNORED_DIRECTORIES, len(constants.IGNORED_DIRECTORIES))
if strings.HasPrefix(path, ignoredDir) { if strings.HasPrefix(path, ignoredDir) {
log.Printf("Ignoring directory %s\n", path) log.Printf("Ignoring directory %s", path)
return filepath.SkipDir return filepath.SkipDir
} }
} }
processFile(path, info) processFile(path, info)
return nil return nil
}) })
} }
func scanArchive() { func scanArchive() {
log.Println("Scanning archive...") log.Println("Scanning archive...")
filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error { filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error {
if err != nil { if err != nil {
log.Printf("Error scanning %s: %s\n", path, err) log.Printf("Error scanning %s: %s", path, err)
return nil return nil
} }
path = filepath.ToSlash(path) path = filepath.ToSlash(path)
if path == constants.ROOT_ARCHIVE { if path == constants.ROOT_ARCHIVE {
log.Printf("Skipping root directory %s...\n", path) log.Printf("Skipping root directory %s...", path)
return nil return nil
} }
processArchiveFile(path, info) processArchiveFile(path, info)
return nil return nil
}) })
} }
func processFile(path string, info os.FileInfo) { func processFile(path string, info os.FileInfo) {
var now = time.Now().UnixMilli() var now = time.Now().UnixMilli()
var timeType = "accessed" var timeType = "accessed"
if constants.USE_MODTIME { if constants.USE_MODTIME {
timeType = "modified" timeType = "modified"
} }
var fileTime int64 = 0 var fileTime int64 = 0
if constants.USE_MODTIME { if constants.USE_MODTIME {
fileTime = times.Get(info).ModTime().UnixMilli() fileTime = times.Get(info).ModTime().UnixMilli()
} else { } else {
fileTime = times.Get(info).AccessTime().UnixMilli() fileTime = times.Get(info).AccessTime().UnixMilli()
} }
var timeDelta = now - fileTime var timeDelta = now - fileTime
log.Printf("File %s last %s at %d, %dms ago\n", path, timeType, fileTime, timeDelta) log.Printf("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta)
if timeDelta > constants.ARCHIVE_THRESHOLD { if timeDelta > constants.ARCHIVE_THRESHOLD {
log.Printf("File %s was %s more than %dms ago, archiving...\n", path, timeType, constants.ARCHIVE_THRESHOLD) log.Printf("File %s was %s more than %dms ago, archiving...", path, timeType, constants.ARCHIVE_THRESHOLD)
archiveFile(path) archiveFile(path)
} }
} }
func processArchiveFile(path string, info os.FileInfo) { func processArchiveFile(path string, info os.FileInfo) {
var now = time.Now().UnixMilli() var now = time.Now().UnixMilli()
var timeType = "accessed" var timeType = "accessed"
if constants.USE_MODTIME { if constants.USE_MODTIME {
timeType = "modified" timeType = "modified"
} }
var fileTime int64 = 0 var fileTime int64 = 0
if constants.USE_MODTIME { if constants.USE_MODTIME {
fileTime = times.Get(info).ModTime().UnixMilli() fileTime = times.Get(info).ModTime().UnixMilli()
} else { } else {
fileTime = times.Get(info).AccessTime().UnixMilli() fileTime = times.Get(info).AccessTime().UnixMilli()
} }
var timeDelta = now - int64(fileTime) var timeDelta = now - int64(fileTime)
log.Printf("File %s last %s at %d, %dms ago\n", path, timeType, fileTime, timeDelta) log.Printf("File %s last %s at %d, %dms ago", path, timeType, fileTime, timeDelta)
if timeDelta > constants.DELETE_THRESHOLD { if timeDelta > constants.DELETE_THRESHOLD {
log.Printf("File %s was %s more than %dms ago, deleting...\n", path, timeType, constants.DELETE_THRESHOLD) log.Printf("File %s was %s more than %dms ago, deleting...", path, timeType, constants.DELETE_THRESHOLD)
deleteFile(path) deleteFile(path)
} }
} }
func archiveFile(path string) { func archiveFile(path string) {
// defer os.Exit(1) defer os.Exit(1)
var newPath = constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1) var newPath = constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1)
log.Printf("Archiving file %s to %s...\n", path, newPath) log.Printf("Archiving file %s to %s...", path, newPath)
os.MkdirAll(filepath.Dir(newPath), os.ModePerm) os.MkdirAll(filepath.Dir(newPath), os.ModePerm)
var err = os.Rename(path, newPath) var err = os.Rename(path, newPath)
if err != nil { if err != nil {
log.Printf("Error archiving file %s: %s\n", path, err) log.Printf("Error archiving file %s: %s", path, err)
return return
} }
numFilesArchived++ numFilesArchived++
} }
func deleteFile(path string) { func deleteFile(path string) {
// defer os.Exit(1) defer os.Exit(1)
log.Printf("Deleting file %s...\n", path) log.Printf("Deleting file %s...", path)
var err = os.Remove(path) err := os.Remove(path)
if err != nil { if err != nil {
log.Printf("Error deleting file %s: %s\n", path, err) log.Printf("Error deleting file %s: %s", path, err)
return return
} }
numFilesDeleted++ numFilesDeleted++
} }
func cleanRoot() { func cleanRoot() {
var files, err = os.ReadDir(constants.ROOT) files, err := os.ReadDir(constants.ROOT)
if err != nil { if err != nil {
log.Printf("Error reading root directory %s: %s\n", constants.ROOT, err) log.Printf("Error reading root directory %s: %s", constants.ROOT, err)
return return
} }
for _, file := range files { for _, file := range files {
if !file.IsDir() { if !file.IsDir() {
continue continue
} }
var empty, err = isDirEmpty(constants.ROOT + "/" + file.Name()) empty, err := isDirEmpty(constants.ROOT + "/" + file.Name())
if err != nil { if err != nil {
log.Printf("Error checking if directory %s is empty: %s\n", file.Name(), err) log.Printf("Error checking if directory %s is empty: %s", file.Name(), err)
continue continue
} }
log.Printf("Directory %s isempty: %t\n", file.Name(), empty) log.Printf("Directory %s isempty: %t", file.Name(), empty)
if empty { if empty {
log.Printf("Deleting empty directory %s\n", file.Name()) log.Printf("Deleting empty directory %s", file.Name())
var err = os.RemoveAll(constants.ROOT + "/" + file.Name()) err := os.RemoveAll(constants.ROOT + "/" + file.Name())
if err != nil { if err != nil {
log.Printf("Error deleting empty directory %s: %s\n", file.Name(), err) log.Printf("Error deleting empty directory %s: %s", file.Name(), err)
} }
} }
} }
} }
func isDirEmpty(dirPath string) (bool, error) { func isDirEmpty(dirPath string) (bool, error) {
var empty = true var empty = true
var ferr error = nil var ferr error = nil
filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error { filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error {
if err != nil { if err != nil {
log.Printf("Error scanning %s: %s\n", path, err) log.Printf("Error scanning %s: %s", path, err)
ferr = err ferr = err
return nil return nil
} }
if !info.IsDir() { if !info.IsDir() {
empty = false empty = false
log.Printf("Directory %s is not empty, found %s\n", dirPath, path) log.Printf("Directory %s is not empty, found %s", dirPath, path)
return filepath.SkipAll return filepath.SkipAll
} }
return nil return nil
}) })
return empty, ferr return empty, ferr
} }
type Constants struct { type Constants struct {
ROOT string ROOT string
ROOT_ARCHIVE string ROOT_ARCHIVE string
IGNORED_DIRECTORIES []string IGNORED_DIRECTORIES []string
ARCHIVE_THRESHOLD int64 ARCHIVE_THRESHOLD int64
DELETE_THRESHOLD int64 DELETE_THRESHOLD int64
SCAN_INTERVAL time.Duration SCAN_INTERVAL time.Duration
USE_MODTIME bool USE_MODTIME bool
} }
func doRun() { func doRun() {
scanRoot() scanRoot()
scanArchive() scanArchive()
cleanRoot() cleanRoot()
log.Printf("Archived %d files, deleted %d files\n", numFilesArchived, numFilesDeleted) log.Printf("Archived %d files, deleted %d files", numFilesArchived, numFilesDeleted)
numFilesArchived = 0 numFilesArchived = 0
numFilesDeleted = 0 numFilesDeleted = 0
} }
var constants = Constants{} var constants = Constants{}
func main() { func main() {
log.SetFlags(0b111) log.SetFlags(log.Lmicroseconds)
// Important: Access times dont accumulate. // Important: Access times dont accumulate.
// This implies that archiving the file won't alter its access time. // This implies that archiving the file won't alter its access time.
// Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME, // Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME,
// where X represents the duration it can exist in the folder, // where X represents the duration it can exist in the folder,
// and Y represents the duration it can exist in the archive. // and Y represents the duration it can exist in the archive.
var ROOT = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/tmp"))) ROOT := filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/c/tmp")))
var ROOT_ARCHIVE = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive"))) ROOT_ARCHIVE := filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive")))
os.Mkdir(ROOT_ARCHIVE, os.ModePerm) os.Mkdir(ROOT_ARCHIVE, os.ModePerm)
var IGNORED_DIRECTORIES = []string{} IGNORED_DIRECTORIES := []string{}
var ignoredEnv = getEnv("IGNORED_DIRECTORIES", "") ignoredEnv := getEnv("IGNORED_DIRECTORIES", "")
if ignoredEnv != "" { if ignoredEnv != "" {
ignoredEnv = strings.TrimSpace(ignoredEnv) ignoredEnv = strings.TrimSpace(ignoredEnv)
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...) IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...)
} }
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE) IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE)
for key, dir := range IGNORED_DIRECTORIES { for key, dir := range IGNORED_DIRECTORIES {
IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir)) IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir))
} }
var ARCHIVE_THRESHOLD = parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d")) ARCHIVE_THRESHOLD := parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d"))
var DELETE_THRESHOLD = parseDuration(getEnv("DELETE_THRESHOLD", "12h")) DELETE_THRESHOLD := parseDuration(getEnv("DELETE_THRESHOLD", "12h"))
var SCAN_INTERVAL = time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m")) * 1e6) SCAN_INTERVAL := time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m")) * 1e6)
var USE_MODTIME = strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true" USE_MODTIME := strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true"
constants.ROOT = ROOT constants.ROOT = ROOT
constants.ROOT_ARCHIVE = ROOT_ARCHIVE constants.ROOT_ARCHIVE = ROOT_ARCHIVE
constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES
constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD
constants.DELETE_THRESHOLD = DELETE_THRESHOLD constants.DELETE_THRESHOLD = DELETE_THRESHOLD
constants.SCAN_INTERVAL = SCAN_INTERVAL constants.SCAN_INTERVAL = SCAN_INTERVAL
constants.USE_MODTIME = USE_MODTIME constants.USE_MODTIME = USE_MODTIME
log.Println("Input args parsed as:") log.Println("Input args parsed as:")
log.Printf("ROOT: %s\n", ROOT) log.Printf("ROOT: %s", ROOT)
log.Printf("ROOT_ARCHIVE: %s\n", ROOT_ARCHIVE) log.Printf("ROOT_ARCHIVE: %s", ROOT_ARCHIVE)
log.Printf("IGNORED_DIRECTORIES: %s\n", IGNORED_DIRECTORIES) log.Printf("IGNORED_DIRECTORIES: %s", IGNORED_DIRECTORIES)
log.Printf("ARCHIVE_THRESHOLD: %d\n", ARCHIVE_THRESHOLD) log.Printf("ARCHIVE_THRESHOLD: %d", ARCHIVE_THRESHOLD)
log.Printf("DELETE_THRESHOLD: %d\n", DELETE_THRESHOLD) log.Printf("DELETE_THRESHOLD: %d", DELETE_THRESHOLD)
log.Printf("SCAN_INTERVAL: %d\n", SCAN_INTERVAL.Milliseconds()) log.Printf("SCAN_INTERVAL: %d", SCAN_INTERVAL.Milliseconds())
log.Printf("USE_MODTIME: %s\n", strconv.FormatBool(USE_MODTIME)) log.Printf("USE_MODTIME: %s", strconv.FormatBool(USE_MODTIME))
doRun() doRun()
for { for {
log.Printf("Running at %d", time.Now().UnixMilli()) log.Printf("Running at %d", time.Now().UnixMilli())
time.Sleep(SCAN_INTERVAL) time.Sleep(SCAN_INTERVAL)
doRun() doRun()
} }
} }