312 lines
8.7 KiB
Go
312 lines
8.7 KiB
Go
package main
|
||
|
||
import (
|
||
"log"
|
||
"os"
|
||
"path/filepath"
|
||
"regexp"
|
||
"strconv"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/djherbis/times"
|
||
)
|
||
|
||
var timeUnits = map[string]int64{
|
||
"ms": 1,
|
||
"s": 1000,
|
||
"m": 1000 * 60,
|
||
"h": 1000 * 60 * 60,
|
||
"d": 1000 * 60 * 60 * 24,
|
||
"M": 1000 * 60 * 60 * 24 * 30,
|
||
"y": 1000 * 60 * 60 * 24 * 365,
|
||
}
|
||
|
||
var numFilesArchived = 0
|
||
var numFilesDeleted = 0
|
||
|
||
var valueRegex, _ = regexp.Compile(`\d+`)
|
||
var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`)
|
||
|
||
func parseDuration(date string) int64 {
|
||
var milliseconds int64 = 0
|
||
|
||
date = strings.TrimSpace(date)
|
||
var parts = strings.Split(date, "_")
|
||
for _, part := range parts {
|
||
part = strings.TrimSpace(part)
|
||
log.Printf("Parsing date part: %s\n", part)
|
||
var value = valueRegex.FindString(part)
|
||
var unit = unitRegex.FindString(part)
|
||
|
||
if value == "" || unit == "" {
|
||
log.Println("Invalid date part: " + part)
|
||
continue
|
||
}
|
||
|
||
if _, ok := timeUnits[unit]; !ok {
|
||
log.Println("Invalid date unit: " + unit)
|
||
continue
|
||
}
|
||
log.Printf("Part %s parsed as: Value: %s, Unit: %s\n", part, value, unit)
|
||
|
||
var valueMs, _ = strconv.ParseInt(value, 10, 16)
|
||
valueMs = valueMs * timeUnits[unit]
|
||
milliseconds += valueMs
|
||
log.Printf("Adding %dms to duration, now: %d\n", valueMs, milliseconds)
|
||
}
|
||
|
||
return milliseconds
|
||
}
|
||
|
||
func getEnv(key, def string) string {
|
||
var value, exists = os.LookupEnv(key)
|
||
if exists {
|
||
return value
|
||
}
|
||
return def
|
||
}
|
||
|
||
func scanRoot() {
|
||
log.Println("Scanning root directory...")
|
||
filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error {
|
||
if err != nil {
|
||
log.Printf("Error scanning %s: %s\n", path, err)
|
||
return nil
|
||
}
|
||
path = filepath.ToSlash(path)
|
||
|
||
if path == constants.ROOT {
|
||
log.Printf("Skipping root directory %s...\n", path)
|
||
return nil
|
||
}
|
||
|
||
// I forgot why this code was here... It doesn't make sense to me now
|
||
// if info.IsDir() {
|
||
// log.Printf("Skipping directory %s...\n", path)
|
||
// return filepath.SkipDir
|
||
// }
|
||
|
||
// We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it
|
||
// In fact iteration should be faster for small lists rather than hashing
|
||
for _, ignoredDir := range constants.IGNORED_DIRECTORIES {
|
||
log.Println(constants.IGNORED_DIRECTORIES, len(constants.IGNORED_DIRECTORIES))
|
||
if strings.HasPrefix(path, ignoredDir) {
|
||
log.Printf("Ignoring directory %s\n", path)
|
||
return filepath.SkipDir
|
||
}
|
||
}
|
||
|
||
processFile(path, info)
|
||
return nil
|
||
})
|
||
}
|
||
|
||
func scanArchive() {
|
||
log.Println("Scanning archive...")
|
||
filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error {
|
||
if err != nil {
|
||
log.Printf("Error scanning %s: %s\n", path, err)
|
||
return nil
|
||
}
|
||
path = filepath.ToSlash(path)
|
||
|
||
if path == constants.ROOT_ARCHIVE {
|
||
log.Printf("Skipping root directory %s...\n", path)
|
||
return nil
|
||
}
|
||
|
||
processArchiveFile(path, info)
|
||
return nil
|
||
})
|
||
}
|
||
|
||
func processFile(path string, info os.FileInfo) {
|
||
var now = time.Now().UnixMilli()
|
||
|
||
var timeType = "accessed"
|
||
if constants.USE_MODTIME {
|
||
timeType = "modified"
|
||
}
|
||
|
||
var fileTime int64 = 0
|
||
if constants.USE_MODTIME {
|
||
fileTime = times.Get(info).ModTime().UnixMilli()
|
||
} else {
|
||
fileTime = times.Get(info).AccessTime().UnixMilli()
|
||
}
|
||
|
||
var timeDelta = now - fileTime
|
||
log.Printf("File %s last %s at %d, %dms ago\n", path, timeType, fileTime, timeDelta)
|
||
if timeDelta > constants.ARCHIVE_THRESHOLD {
|
||
log.Printf("File %s was %s more than %dms ago, archiving...\n", path, timeType, constants.ARCHIVE_THRESHOLD)
|
||
archiveFile(path)
|
||
}
|
||
}
|
||
|
||
func processArchiveFile(path string, info os.FileInfo) {
|
||
var now = time.Now().UnixMilli()
|
||
|
||
var timeType = "accessed"
|
||
if constants.USE_MODTIME {
|
||
timeType = "modified"
|
||
}
|
||
|
||
var fileTime int64 = 0
|
||
if constants.USE_MODTIME {
|
||
fileTime = times.Get(info).ModTime().UnixMilli()
|
||
} else {
|
||
fileTime = times.Get(info).AccessTime().UnixMilli()
|
||
}
|
||
|
||
var timeDelta = now - int64(fileTime)
|
||
log.Printf("File %s last %s at %d, %dms ago\n", path, timeType, fileTime, timeDelta)
|
||
|
||
if timeDelta > constants.DELETE_THRESHOLD {
|
||
log.Printf("File %s was %s more than %dms ago, deleting...\n", path, timeType, constants.DELETE_THRESHOLD)
|
||
deleteFile(path)
|
||
}
|
||
}
|
||
|
||
func archiveFile(path string) {
|
||
// defer os.Exit(1)
|
||
var newPath = constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1)
|
||
log.Printf("Archiving file %s to %s...\n", path, newPath)
|
||
|
||
os.MkdirAll(filepath.Dir(newPath), os.ModePerm)
|
||
var err = os.Rename(path, newPath)
|
||
if err != nil {
|
||
log.Printf("Error archiving file %s: %s\n", path, err)
|
||
return
|
||
}
|
||
numFilesArchived++
|
||
}
|
||
|
||
func deleteFile(path string) {
|
||
// defer os.Exit(1)
|
||
log.Printf("Deleting file %s...\n", path)
|
||
var err = os.Remove(path)
|
||
if err != nil {
|
||
log.Printf("Error deleting file %s: %s\n", path, err)
|
||
return
|
||
}
|
||
numFilesDeleted++
|
||
}
|
||
|
||
func cleanRoot() {
|
||
var files, err = os.ReadDir(constants.ROOT)
|
||
if err != nil {
|
||
log.Printf("Error reading root directory %s: %s\n", constants.ROOT, err)
|
||
return
|
||
}
|
||
for _, file := range files {
|
||
if !file.IsDir() {
|
||
continue
|
||
}
|
||
var empty, err = isDirEmpty(constants.ROOT + "/" + file.Name())
|
||
if err != nil {
|
||
log.Printf("Error checking if directory %s is empty: %s\n", file.Name(), err)
|
||
continue
|
||
}
|
||
log.Printf("Directory %s isempty: %t\n", file.Name(), empty)
|
||
if empty {
|
||
log.Printf("Deleting empty directory %s\n", file.Name())
|
||
var err = os.RemoveAll(constants.ROOT + "/" + file.Name())
|
||
if err != nil {
|
||
log.Printf("Error deleting empty directory %s: %s\n", file.Name(), err)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
func isDirEmpty(dirPath string) (bool, error) {
|
||
var empty = true
|
||
var ferr error = nil
|
||
|
||
filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error {
|
||
if err != nil {
|
||
log.Printf("Error scanning %s: %s\n", path, err)
|
||
ferr = err
|
||
return nil
|
||
}
|
||
if !info.IsDir() {
|
||
empty = false
|
||
log.Printf("Directory %s is not empty, found %s\n", dirPath, path)
|
||
return filepath.SkipAll
|
||
}
|
||
return nil
|
||
})
|
||
return empty, ferr
|
||
}
|
||
|
||
type Constants struct {
|
||
ROOT string
|
||
ROOT_ARCHIVE string
|
||
IGNORED_DIRECTORIES []string
|
||
ARCHIVE_THRESHOLD int64
|
||
DELETE_THRESHOLD int64
|
||
SCAN_INTERVAL time.Duration
|
||
USE_MODTIME bool
|
||
}
|
||
|
||
func doRun() {
|
||
scanRoot()
|
||
scanArchive()
|
||
cleanRoot()
|
||
log.Printf("Archived %d files, deleted %d files\n", numFilesArchived, numFilesDeleted)
|
||
numFilesArchived = 0
|
||
numFilesDeleted = 0
|
||
}
|
||
|
||
var constants = Constants{}
|
||
|
||
func main() {
|
||
log.SetFlags(0b111)
|
||
// Important: Access times don’t accumulate.
|
||
// This implies that archiving the file won't alter its access time.
|
||
// Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME,
|
||
// where X represents the duration it can exist in the folder,
|
||
// and Y represents the duration it can exist in the archive.
|
||
|
||
var ROOT = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/tmp")))
|
||
var ROOT_ARCHIVE = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive")))
|
||
os.Mkdir(ROOT_ARCHIVE, os.ModePerm)
|
||
var IGNORED_DIRECTORIES = []string{}
|
||
var ignoredEnv = getEnv("IGNORED_DIRECTORIES", "")
|
||
if ignoredEnv != "" {
|
||
ignoredEnv = strings.TrimSpace(ignoredEnv)
|
||
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...)
|
||
}
|
||
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE)
|
||
for key, dir := range IGNORED_DIRECTORIES {
|
||
IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir))
|
||
}
|
||
var ARCHIVE_THRESHOLD = parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d"))
|
||
var DELETE_THRESHOLD = parseDuration(getEnv("DELETE_THRESHOLD", "12h"))
|
||
var SCAN_INTERVAL = time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m")) * 1e6)
|
||
var USE_MODTIME = strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true"
|
||
|
||
constants.ROOT = ROOT
|
||
constants.ROOT_ARCHIVE = ROOT_ARCHIVE
|
||
constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES
|
||
constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD
|
||
constants.DELETE_THRESHOLD = DELETE_THRESHOLD
|
||
constants.SCAN_INTERVAL = SCAN_INTERVAL
|
||
constants.USE_MODTIME = USE_MODTIME
|
||
|
||
log.Println("Input args parsed as:")
|
||
log.Printf("ROOT: %s\n", ROOT)
|
||
log.Printf("ROOT_ARCHIVE: %s\n", ROOT_ARCHIVE)
|
||
log.Printf("IGNORED_DIRECTORIES: %s\n", IGNORED_DIRECTORIES)
|
||
log.Printf("ARCHIVE_THRESHOLD: %d\n", ARCHIVE_THRESHOLD)
|
||
log.Printf("DELETE_THRESHOLD: %d\n", DELETE_THRESHOLD)
|
||
log.Printf("SCAN_INTERVAL: %d\n", SCAN_INTERVAL.Milliseconds())
|
||
log.Printf("USE_MODTIME: %s\n", strconv.FormatBool(USE_MODTIME))
|
||
|
||
doRun()
|
||
for {
|
||
log.Printf("Running at %d", time.Now().UnixMilli())
|
||
time.Sleep(SCAN_INTERVAL)
|
||
doRun()
|
||
}
|
||
}
|