Compare commits

...

10 Commits

2 changed files with 347 additions and 297 deletions

View File

@@ -1,30 +1,21 @@
# syntax=docker/dockerfile:1 FROM golang:1.21.6 as base
FROM golang:1.22-rc-alpine
WORKDIR $GOPATH/src/app/
# Set destination for COPY
WORKDIR /app COPY . .
# Download Go modules RUN go mod download
COPY go.mod go.sum ./ RUN go mod verify
RUN go mod download
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /main .
# Copy the source code. Note the slash at the end, as explained in
# https://docs.docker.com/engine/reference/builder/#copy FROM scratch
# May not be enough for complex projects
COPY *.go ./ COPY --from=base /usr/share/zoneinfo /usr/share/zoneinfo
COPY --from=base /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
# Build COPY --from=base /etc/passwd /etc/passwd
# Add GOOS=linux for linux COPY --from=base /etc/group /etc/group
# GOOS=windows
# GOOS=darwin COPY --from=base /main .
RUN go build -o /main
# Optional:
# To bind to a TCP port, runtime parameters must be supplied to the docker command.
# But we can document in the Dockerfile what ports
# the application is going to listen on by default.
# https://docs.docker.com/engine/reference/builder/#expose
# EXPOSE 8080
# Run
CMD ["/main"] CMD ["/main"]

595
main.go
View File

@@ -1,268 +1,327 @@
package main package main
import ( import (
"log" "log"
"os" "os"
"path/filepath" "path/filepath"
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
"time" "time"
"github.com/djherbis/times" "github.com/djherbis/times"
) )
var timeUnits = map[string]int64{ var timeUnits = map[string]int64{
"ms": 1, "ms": 1,
"s": 1000, "s": 1000,
"m": 1000 * 60, "m": 1000 * 60,
"h": 1000 * 60 * 60, "h": 1000 * 60 * 60,
"d": 1000 * 60 * 60 * 24, "d": 1000 * 60 * 60 * 24,
"M": 1000 * 60 * 60 * 24 * 30, "M": 1000 * 60 * 60 * 24 * 30,
"y": 1000 * 60 * 60 * 24 * 365, "y": 1000 * 60 * 60 * 24 * 365,
} }
var valueRegex, _ = regexp.Compile(`\d+`) var numFilesArchived = 0
var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`) var numFilesDeleted = 0
func parseDuration(date string) int64 { var valueRegex, _ = regexp.Compile(`\d+`)
var milliseconds int64 = 0 var unitRegex, _ = regexp.Compile(`[a-zA-Z]+`)
date = strings.TrimSpace(date) func parseDuration(date string) int64 {
var parts = strings.Split(date, "_") var milliseconds int64 = 0
for _, part := range parts {
part = strings.TrimSpace(part) date = strings.TrimSpace(date)
log.Printf("Parsing date part: %s\n", part) var parts = strings.Split(date, "_")
var value = valueRegex.FindString(part) for _, part := range parts {
var unit = unitRegex.FindString(part) part = strings.TrimSpace(part)
log.Printf("Parsing date part: %s", part)
if value == "" || unit == "" { var value = valueRegex.FindString(part)
log.Println("Invalid date part: " + part) var unit = unitRegex.FindString(part)
continue
} if value == "" || unit == "" {
log.Println("Invalid date part: " + part)
if _, ok := timeUnits[unit]; !ok { continue
log.Println("Invalid date unit: " + unit) }
continue
} if _, ok := timeUnits[unit]; !ok {
log.Printf("Part %s parsed as: Value: %s, Unit: %s\n", part, value, unit) log.Println("Invalid date unit: " + unit)
continue
var valueMs, _ = strconv.ParseInt(value, 10, 16) }
valueMs = valueMs * timeUnits[unit] log.Printf("Part %s parsed as: Value: %s, Unit: %s", part, value, unit)
milliseconds += valueMs
log.Printf("Adding %dms to duration, now: %d\n", valueMs, milliseconds) var valueMs, _ = strconv.ParseInt(value, 10, 16)
} valueMs = valueMs * timeUnits[unit]
milliseconds += valueMs
return milliseconds log.Printf("Adding %dms to duration, now: %d", valueMs, milliseconds)
} }
func getEnv(key, def string) string { return milliseconds
var value, exists = os.LookupEnv(key) }
if exists {
return value func getEnv(key, def string) string {
} var value, exists = os.LookupEnv(key)
return def if exists {
} return value
}
func scanRoot() { return def
log.Println("Scanning root directory...") }
filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error {
if err != nil { type Constants struct {
log.Printf("Error scanning %s: %s\n", path, err) ROOT string
return nil ROOT_ARCHIVE string
} IGNORED_DIRECTORIES []string
path = filepath.ToSlash(path) ARCHIVE_THRESHOLD time.Duration
DELETE_THRESHOLD time.Duration
if path == constants.ROOT { SCAN_INTERVAL time.Duration
log.Printf("Skipping root directory %s...\n", path) USE_MODTIME bool
return nil }
}
var constants = &Constants{}
if info.IsDir() {
log.Printf("Skipping directory %s...\n", path) // region main
return filepath.SkipDir func main() {
} log.SetFlags(log.Lmicroseconds | log.Lshortfile)
// Important: Access times dont accumulate.
// We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it // This implies that archiving the file won't alter its access time.
// In fact iteration should be faster for small lists rather than hashing // Therefore, assign X as the ARCHIVE_TIME and X + Y as the DELETE_TIME,
for _, ignoredDir := range constants.IGNORED_DIRECTORIES { // where X represents the duration it can exist in the folder,
log.Println(constants.IGNORED_DIRECTORIES, len(constants.IGNORED_DIRECTORIES)) // and Y represents the duration it can exist in the archive.
if strings.HasPrefix(path, ignoredDir) {
log.Printf("Ignoring directory %s\n", path) ROOT := filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/tmp")))
return filepath.SkipDir ROOT_ARCHIVE := filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive")))
} os.Mkdir(ROOT_ARCHIVE, os.ModePerm)
} IGNORED_DIRECTORIES := []string{}
ignoredEnv := getEnv("IGNORED_DIRECTORIES", "")
processFile(path, info) if ignoredEnv != "" {
return nil ignoredEnv = strings.TrimSpace(ignoredEnv)
}) IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...)
} }
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE)
func scanArchive() { for key, dir := range IGNORED_DIRECTORIES {
log.Println("Scanning archive...") IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir))
filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error { }
if err != nil { ARCHIVE_THRESHOLD := time.Duration(parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d"))) * time.Millisecond
log.Printf("Error scanning %s: %s\n", path, err) DELETE_THRESHOLD := time.Duration(parseDuration(getEnv("DELETE_THRESHOLD", "12h"))) * time.Millisecond
return nil SCAN_INTERVAL := time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m"))) * time.Millisecond
} USE_MODTIME := strings.TrimSpace(getEnv("USE_MODTIME", "false")) == "true"
path = filepath.ToSlash(path)
constants.ROOT = ROOT
if path == constants.ROOT_ARCHIVE { constants.ROOT_ARCHIVE = ROOT_ARCHIVE
log.Printf("Skipping root directory %s...\n", path) constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES
return nil constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD
} constants.DELETE_THRESHOLD = DELETE_THRESHOLD
constants.SCAN_INTERVAL = SCAN_INTERVAL
processArchiveFile(path, info) constants.USE_MODTIME = USE_MODTIME
return nil
}) log.Println("Input args parsed as:")
} log.Printf("ROOT: %s", ROOT)
log.Printf("ROOT_ARCHIVE: %s", ROOT_ARCHIVE)
func processFile(path string, info os.FileInfo) { log.Printf("IGNORED_DIRECTORIES: %s", IGNORED_DIRECTORIES)
var now = time.Now().UnixMilli() log.Printf("ARCHIVE_THRESHOLD: %s", ARCHIVE_THRESHOLD)
log.Printf("DELETE_THRESHOLD: %s", DELETE_THRESHOLD)
var fileATime int64 = times.Get(info).AccessTime().UnixMilli() log.Printf("SCAN_INTERVAL: %s", SCAN_INTERVAL)
var accessTimeDelta = now - fileATime log.Printf("USE_MODTIME: %s", strconv.FormatBool(USE_MODTIME))
log.Printf("File %s last accessed at %d, %dms ago\n", path, fileATime, accessTimeDelta)
if accessTimeDelta > constants.ARCHIVE_THRESHOLD { doRun()
log.Printf("File %s was accessed more than %dms ago, archiving...\n", path, constants.ARCHIVE_THRESHOLD) for {
archiveFile(path) // os.Exit(0)
} time.Sleep(SCAN_INTERVAL)
} doRun()
}
func processArchiveFile(path string, info os.FileInfo) { }
var now = time.Now().UnixMilli()
func doRun() {
var fileATime int64 = times.Get(info).AccessTime().UnixMilli() log.Printf("Running at %s", time.Now().Format(time.DateTime))
var accessTimeDelta = now - fileATime scanRoot()
log.Printf("File %s last accessed at %d, %dms ago\n", path, fileATime, accessTimeDelta) scanArchive()
if accessTimeDelta > constants.DELETE_THRESHOLD { cleanRoot()
log.Printf("File %s was accessed more than %dms ago, deleting...\n", path, constants.DELETE_THRESHOLD) log.Printf("Archived %d files, deleted %d files", numFilesArchived, numFilesDeleted)
deleteFile(path) numFilesArchived = 0
} numFilesDeleted = 0
} }
func archiveFile(path string) { // region scanRoot
// defer os.Exit(1) func scanRoot() {
var newPath = constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1) log.Println("Scanning root directory...")
log.Printf("Archiving file %s to %s...\n", path, newPath) filepath.Walk(constants.ROOT, func(path string, info os.FileInfo, err error) error {
log.Printf("Scanning file %s...", path)
os.MkdirAll(filepath.Dir(newPath), os.ModePerm) if err != nil {
var err = os.Rename(path, newPath) log.Printf("Error scanning %s: %s", path, err)
if err != nil { return nil
log.Printf("Error archiving file %s: %s\n", path, err) }
return path = filepath.ToSlash(path)
}
} if path == constants.ROOT {
log.Printf("Skipping root directory %s...", path)
func deleteFile(path string) { return nil
// defer os.Exit(1) }
log.Printf("Deleting file %s...\n", path)
var err = os.Remove(path) // I forgot why this code was here... It doesn't make sense to me now
if err != nil { // if info.IsDir() {
log.Printf("Error deleting file %s: %s\n", path, err) // log.Printf("Skipping directory %s...", path)
return // return filepath.SkipDir
} // }
}
// We hope that IGNORED_DIRECTORIES is a small list, so we can afford to iterate over it
func cleanRoot() { // In fact iteration should be faster for small lists rather than hashing
var files, err = os.ReadDir(constants.ROOT) for _, ignoredDir := range constants.IGNORED_DIRECTORIES {
if err != nil { // log.Println(constants.IGNORED_DIRECTORIES, len(constants.IGNORED_DIRECTORIES))
log.Printf("Error reading root directory %s: %s\n", constants.ROOT, err) if strings.HasPrefix(path, ignoredDir) {
return log.Printf("Ignoring directory %s", path)
} return filepath.SkipDir
for _, file := range files { }
if !file.IsDir() { }
continue
} go processFile(path, info)
var empty, err = isDirEmpty(constants.ROOT + "/" + file.Name()) return nil
if err != nil { })
log.Printf("Error checking if directory %s is empty: %s\n", file.Name(), err) }
continue
} func processFile(path string, info os.FileInfo) {
log.Printf("Directory %s isempty: %t\n", file.Name(), empty) now := time.Now().UnixMilli()
if empty { log.Printf("Processing file %s...", path)
log.Printf("Deleting empty directory %s\n", file.Name())
var err = os.RemoveAll(constants.ROOT + "/" + file.Name()) timeType := "accessed"
if err != nil { if constants.USE_MODTIME {
log.Printf("Error deleting empty directory %s: %s\n", file.Name(), err) timeType = "modified"
} }
}
} var fileTime int64
} if constants.USE_MODTIME {
func isDirEmpty(dirPath string) (bool, error) { fileTime = times.Get(info).ModTime().UnixMilli()
var empty = true } else {
var ferr error = nil fileTime = times.Get(info).AccessTime().UnixMilli()
}
filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error {
if err != nil { timeDelta := now - fileTime
log.Printf("Error scanning %s: %s\n", path, err) fileTimeFormatted := time.UnixMilli(fileTime).Format(time.DateTime)
ferr = err timeDeltaFormatted := time.Duration(timeDelta) * time.Millisecond
return nil log.Printf("File %s last %s at %s, %s ago", path, timeType, fileTimeFormatted, timeDeltaFormatted)
} if timeDelta > constants.ARCHIVE_THRESHOLD.Milliseconds() {
if !info.IsDir() { log.Printf("File %s was %s more than %s ago, archiving...", path, timeType, constants.ARCHIVE_THRESHOLD)
empty = false go archiveFile(path)
log.Printf("Directory %s is not empty, found %s\n", dirPath, path) }
return filepath.SkipAll }
}
return nil func archiveFile(path string) {
}) newPath := constants.ROOT_ARCHIVE + strings.Replace(path, constants.ROOT, "", 1)
return empty, ferr log.Printf("Archiving file %s to %s...", path, newPath)
}
err := os.MkdirAll(filepath.Dir(newPath), os.ModePerm)
type Constants struct { if err != nil {
ROOT string log.Printf("Error creating directory %s: %s", filepath.Dir(newPath), err)
ROOT_ARCHIVE string return
IGNORED_DIRECTORIES []string }
ARCHIVE_THRESHOLD int64 err = os.Rename(path, newPath)
DELETE_THRESHOLD int64 if err != nil {
SCAN_INTERVAL time.Duration log.Printf("Error archiving file %s: %s", path, err)
} return
}
var constants = Constants{} numFilesArchived++
}
func main() {
log.SetFlags(0b111) // region scanArchive
func scanArchive() {
var ROOT = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT", "/tmp"))) log.Println("Scanning archive...")
var ROOT_ARCHIVE = filepath.ToSlash(strings.TrimSpace(getEnv("ROOT_ARCHIVE", ROOT+"/archive"))) filepath.Walk(constants.ROOT_ARCHIVE, func(path string, info os.FileInfo, err error) error {
os.Mkdir(ROOT_ARCHIVE, os.ModePerm) log.Printf("Scanning archive file %s...", path)
var IGNORED_DIRECTORIES = []string{} if err != nil {
var ignoredEnv = getEnv("IGNORED_DIRECTORIES", "") log.Printf("Error scanning %s: %s", path, err)
if ignoredEnv != "" { return nil
ignoredEnv = strings.TrimSpace(ignoredEnv) }
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, strings.Split(ignoredEnv, ",")...) path = filepath.ToSlash(path)
}
IGNORED_DIRECTORIES = append(IGNORED_DIRECTORIES, ROOT_ARCHIVE) if path == constants.ROOT_ARCHIVE {
for key, dir := range IGNORED_DIRECTORIES { log.Printf("Skipping root directory %s...", path)
IGNORED_DIRECTORIES[key] = filepath.ToSlash(strings.TrimSpace(dir)) return nil
} }
var ARCHIVE_THRESHOLD = parseDuration(getEnv("ARCHIVE_THRESHOLD", "1d"))
var DELETE_THRESHOLD = parseDuration(getEnv("DELETE_THRESHOLD", "12h")) go processArchiveFile(path, info)
var SCAN_INTERVAL = time.Duration(parseDuration(getEnv("SCAN_INTERVAL", "1m")) * 1e6) return nil
})
constants.ROOT = ROOT }
constants.ROOT_ARCHIVE = ROOT_ARCHIVE
constants.IGNORED_DIRECTORIES = IGNORED_DIRECTORIES func processArchiveFile(path string, info os.FileInfo) {
constants.ARCHIVE_THRESHOLD = ARCHIVE_THRESHOLD now := time.Now().UnixMilli()
constants.DELETE_THRESHOLD = DELETE_THRESHOLD
constants.SCAN_INTERVAL = SCAN_INTERVAL timeType := "accessed"
if constants.USE_MODTIME {
log.Println("Input args parsed as:") timeType = "modified"
log.Printf("ROOT: %s\n", ROOT) }
log.Printf("ROOT_ARCHIVE: %s\n", ROOT_ARCHIVE)
log.Printf("IGNORED_DIRECTORIES: %s\n", IGNORED_DIRECTORIES) var fileTime int64
log.Printf("ARCHIVE_THRESHOLD: %d\n", ARCHIVE_THRESHOLD) if constants.USE_MODTIME {
log.Printf("DELETE_THRESHOLD: %d\n", DELETE_THRESHOLD) fileTime = times.Get(info).ModTime().UnixMilli()
log.Printf("SCAN_INTERVAL: %d\n", SCAN_INTERVAL.Milliseconds()) } else {
fileTime = times.Get(info).AccessTime().UnixMilli()
scanRoot() }
scanArchive()
cleanRoot() timeDelta := now - int64(fileTime)
for {
log.Printf("Running at %d", time.Now().UnixMilli()) fileTimeFormatted := time.UnixMilli(fileTime).Format(time.DateTime)
time.Sleep(SCAN_INTERVAL) timeDeltaFormatted := time.Duration(timeDelta) * time.Millisecond
scanRoot() log.Printf("File %s last %s at %s, %s ago", path, timeType, fileTimeFormatted, timeDeltaFormatted)
scanArchive()
cleanRoot() if timeDelta > constants.DELETE_THRESHOLD.Milliseconds() {
} log.Printf("File %s was %s more than %s ago, deleting...", path, timeType, constants.DELETE_THRESHOLD)
} go deleteFile(path)
}
}
func deleteFile(path string) {
log.Printf("Deleting file %s...", path)
err := os.Remove(path)
if err != nil {
log.Printf("Error deleting file %s: %s", path, err)
return
}
numFilesDeleted++
}
// region cleanRoot
func cleanRoot() {
files, err := os.ReadDir(constants.ROOT)
if err != nil {
log.Printf("Error reading root directory %s: %s", constants.ROOT, err)
return
}
for _, file := range files {
if !file.IsDir() {
continue
}
empty, err := isDirEmpty(constants.ROOT + "/" + file.Name())
if err != nil {
log.Printf("Error checking if directory %s is empty: %s", file.Name(), err)
continue
}
log.Printf("Directory %s isempty: %t", file.Name(), empty)
if empty {
log.Printf("Deleting empty directory %s", file.Name())
err := os.RemoveAll(constants.ROOT + "/" + file.Name())
if err != nil {
log.Printf("Error deleting empty directory %s: %s", file.Name(), err)
}
}
}
}
func isDirEmpty(dirPath string) (bool, error) {
var empty = true
var ferr error = nil
filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
log.Printf("Error scanning %s: %s", path, err)
ferr = err
return nil
}
if !info.IsDir() {
empty = false
log.Printf("Directory %s is not empty, found %s", dirPath, path)
return filepath.SkipAll
}
return nil
})
return empty, ferr
}