Add monitoring

This commit is contained in:
Sparky
2025-10-10 14:48:15 +01:00
parent 92cf99d41f
commit ec7d08bcb7
17 changed files with 2735 additions and 0 deletions

4
monitoring/pinger/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
deploy.tar
vendor
main
*.log

View File

@@ -0,0 +1,2 @@
docker build -t pinger .
cd .. && docker compose up -d && cd pinger

View File

@@ -0,0 +1,4 @@
{
"schemaVersion": 2,
"dockerfilePath": "./dockerfile"
}

View File

@@ -0,0 +1 @@
tar -cf deploy.tar *.go go.mod go.sum dockerfile captain-definition vendor

View File

@@ -0,0 +1,43 @@
# Build stage
FROM golang:bullseye as base
# Install iperf3
RUN apt-get update && apt-get install -y iperf3 && apt-get clean
RUN adduser \
--disabled-password \
--gecos "" \
--home "/nonexistent" \
--shell "/sbin/nologin" \
--no-create-home \
--uid 65532 \
small-user
WORKDIR /app
COPY . .
# RUN go mod download
# RUN go mod verify
RUN GOFLAGS=-mod=vendor CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o main .
# Final stage
FROM alpine:latest
# Install iperf3 in Alpine
RUN apk add --no-cache iperf3
# Copy the binary from the build stage
COPY --from=base /app/main /main
# Copy necessary files from the build stage
COPY --from=base /usr/share/zoneinfo /usr/share/zoneinfo
COPY --from=base /etc/passwd /etc/passwd
COPY --from=base /etc/group /etc/group
# Use the non-root user
USER small-user
EXPOSE 8090
CMD ["/main"]

31
monitoring/pinger/go.mod Normal file
View File

@@ -0,0 +1,31 @@
module pinger
go 1.22
toolchain go1.22.11
require (
github.com/prometheus-community/pro-bing v0.6.1
github.com/prometheus/client_golang v1.20.5
)
require (
github.com/fsnotify/fsnotify v1.8.0 // indirect
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
)
require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/nxadm/tail v1.4.11
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
golang.org/x/net v0.34.0 // indirect
golang.org/x/sync v0.10.0 // indirect
golang.org/x/sys v0.30.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
)

40
monitoring/pinger/go.sum Normal file
View File

@@ -0,0 +1,40 @@
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/nxadm/tail v1.4.11 h1:8feyoE3OzPrcshW5/MJ4sGESc5cqmGkGCWlco4l0bqY=
github.com/nxadm/tail v1.4.11/go.mod h1:OTaG3NK980DZzxbRq6lEuzgU+mug70nY11sMd4JXXHc=
github.com/prometheus-community/pro-bing v0.6.1 h1:EQukUOma9YFZRPe4DGSscxUf9LH07rpqwisNWjSZrgU=
github.com/prometheus-community/pro-bing v0.6.1/go.mod h1:jNCOI3D7pmTCeaoF41cNS6uaxeFY/Gmc3ffwbuJVzAQ=
github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0=
golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k=
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=

357
monitoring/pinger/main.go Normal file
View File

@@ -0,0 +1,357 @@
package main
import (
"fmt"
"io"
"log"
"net/http"
"net/url"
"os"
"os/exec"
"regexp"
"strconv"
"time"
"github.com/nxadm/tail"
ping "github.com/prometheus-community/pro-bing"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
type IperfResult struct {
Start struct {
Version string `json:"version"`
SystemInfo string `json:"system_info"`
ConnectTime float64 `json:"connect_time"`
} `json:"start"`
End struct {
SumSent struct {
Seconds float64 `json:"seconds"`
Bytes int64 `json:"bytes"`
BitsPerSec float64 `json:"bits_per_second"`
Retransmits int `json:"retransmits"`
} `json:"sum_sent"`
SumReceived struct {
Seconds float64 `json:"seconds"`
Bytes int64 `json:"bytes"`
BitsPerSec float64 `json:"bits_per_second"`
} `json:"sum_received"`
CPUUtilization struct {
HostTotal float64 `json:"host_total"`
RemoteTotal float64 `json:"remote_total"`
} `json:"cpu_utilization_percent"`
} `json:"end"`
Error string `json:"error"`
}
var Error *log.Logger
var Warning *log.Logger
var (
pingRttHistogram = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "ping_rtt_seconds",
Help: "Round-trip time histogram in seconds",
Buckets: []float64{.001, .002, .005, .01, .02, .05, .1, .2, .5, 1},
},
[]string{"target", "name"},
)
iperfBandwidthGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "iperf_bandwidth_bits_per_second",
Help: "Bandwidth measured by iperf3 in bits per second",
},
[]string{"direction", "server"},
)
httpRttHistogram = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_rtt_seconds",
Help: "HTTP request round-trip time histogram in seconds",
Buckets: []float64{.001, .002, .005, .01, .02, .05, .1, .2, .5, 1},
},
[]string{"target", "name"},
)
)
var iperfServer = "138.199.199.16"
var iperfUploadPort = 3332
var iperfDownloadPort = 3333
func init() {
log.Printf("Initializing application...")
log.SetFlags(log.Lmicroseconds | log.Lshortfile)
Error = log.New(io.MultiWriter(os.Stderr, os.Stdout),
fmt.Sprintf("%sERROR:%s ", "\033[0;101m", "\033[0m"),
log.Lmicroseconds|log.Lshortfile)
Warning = log.New(io.MultiWriter(os.Stdout),
fmt.Sprintf("%sWarning:%s ", "\033[0;93m", "\033[0m"),
log.Lmicroseconds|log.Lshortfile)
// Register only the essential metrics
prometheus.MustRegister(pingRttHistogram)
// Register new iperf3 metrics
prometheus.MustRegister(iperfBandwidthGauge)
// Register HTTP metrics
prometheus.MustRegister(httpRttHistogram)
log.Printf("Registered all Prometheus metrics")
}
type PingTarget struct {
IP string
Name string
}
func updateMetrics(stats *ping.Statistics, name string) {
target := stats.Addr
if stats.PacketsRecv > 0 {
histogram := pingRttHistogram.WithLabelValues(target, name)
histogram.Observe(stats.AvgRtt.Seconds())
//log.Printf("Ping metrics for %s (%s): RTT=%v, PacketsRecv=%d/%d",
//name, target, stats.AvgRtt, stats.PacketsRecv, stats.PacketsSent)
} else {
log.Printf("No packets received for %s (%s)", name, target)
}
}
// IperfTest represents a runnable iperf3 test configuration
type IperfTest struct {
ServerHost string
UploadPort int
DownloadPort int
}
// NewIperfTest creates a new IperfTest with default values
func NewIperfTest(serverHost string, uploadPort, downloadPort int) *IperfTest {
return &IperfTest{
ServerHost: serverHost,
UploadPort: uploadPort,
DownloadPort: downloadPort,
}
}
var bitrateRegex = regexp.MustCompile(`(\d+) Mbits/sec`)
// runTest executes an iperf3 test with the given direction
func (t *IperfTest) runTest(reverse bool) error {
direction := "upload"
port := t.UploadPort
if reverse {
direction = "download"
port = t.DownloadPort
}
log.Printf("Starting iperf3 %s test to %s:%d", direction, t.ServerHost, port)
// Check if iperf3 is installed
if _, err := exec.LookPath("iperf3"); err != nil {
return fmt.Errorf("iperf3 not found: %v", err)
}
title := "upload"
if reverse {
title = "download"
}
logFile := fmt.Sprintf("iperf3-%s-%s.log", direction, t.ServerHost)
_, err := os.Stat(logFile)
if err == nil {
log.Printf("Removing existing log file %s", logFile)
os.Remove(logFile)
}
log.Printf("Logging iperf3 %s test to %s", direction, logFile)
args := []string{
"-c", t.ServerHost,
"-p", fmt.Sprintf("%d", port),
"-Z",
"-T", title,
"-t", "0",
"-b", "600M", // Limit bandwidth to 200 Mbits
"-f", "m",
"-i", "1",
"--logfile", logFile,
}
if reverse {
args = append(args, "-R")
}
log.Printf("Running iperf3 %s test with args: %v", direction, args)
cmd := exec.Command("iperf3", args...)
cmd.Stderr = os.Stderr
if err := cmd.Start(); err != nil {
return fmt.Errorf("failed to start iperf3: %v", err)
}
itail, err := tail.TailFile(
logFile, tail.Config{Follow: true, ReOpen: true})
if err != nil {
return fmt.Errorf("failed to tail iperf3 log file: %v", err)
}
for line := range itail.Lines {
bitrate := bitrateRegex.FindStringSubmatch(line.Text)
if len(bitrate) > 1 {
mbps, err := strconv.ParseFloat(bitrate[1], 64)
if err != nil {
log.Printf("failed to parse bitrate: %v", err)
}
log.Printf("iperf3 %s bandwidth: %.2f Mbps", direction, mbps)
if reverse {
iperfBandwidthGauge.WithLabelValues(direction, t.ServerHost).Set(mbps)
} else {
iperfBandwidthGauge.WithLabelValues(direction, t.ServerHost).Set(mbps)
}
}
}
if err := cmd.Wait(); err != nil {
return fmt.Errorf("iperf3 %s test failed: %v",
direction, err)
}
log.Printf("iperf3 %s test completed successfully", direction)
return nil
}
// Start begins continuous iperf3 testing in both directions
func (t *IperfTest) Start() {
log.Printf("Starting continuous iperf3 tests to %s", t.ServerHost)
// Start upload test goroutine
go func() {
for {
if err := t.runTest(false); err != nil {
Error.Printf("Upload test failed: %v", err)
}
time.Sleep(1 * time.Second)
log.Printf("Restarting upload test immediately")
}
}()
// Start download test goroutine
go func() {
for {
if err := t.runTest(true); err != nil {
Error.Printf("Download test failed: %v", err)
}
time.Sleep(1 * time.Second)
log.Printf("Restarting download test immediately")
}
}()
}
type HttpTarget struct {
URL string
Name string
}
func probeHTTP(target HttpTarget) {
log.Printf("Starting HTTP probe for %s (%s)", target.Name, target.URL)
parsedURL, err := url.Parse(target.URL)
if err != nil {
Error.Printf("Invalid URL %s: %v", target.URL, err)
return
}
client := &http.Client{
Timeout: 20 * time.Second,
}
for {
start := time.Now()
log.Printf("Sending HTTP request to %s", target.URL)
resp, err := client.Get(target.URL)
duration := time.Since(start)
if err != nil {
Error.Printf("HTTP probe to %s failed: %v (took %v)", target.URL, err, duration)
} else {
httpRttHistogram.WithLabelValues(parsedURL.Host, target.Name).Observe(duration.Seconds())
log.Printf("HTTP probe to %s succeeded in %v", target.URL, duration)
resp.Body.Close()
}
log.Printf("Sleeping for 10 seconds before next HTTP probe to %s", target.URL)
time.Sleep(10 * time.Second)
}
}
func main() {
log.Printf("Starting network monitoring application")
targets := []PingTarget{
{IP: "8.8.8.8", Name: "google-dns-1"},
{IP: "1.1.1.1", Name: "cloudflare-dns-1"},
{IP: "8.8.4.4", Name: "google-dns-2"},
{IP: "1.0.0.1", Name: "cloudflare-dns-2"},
{IP: "208.67.222.222", Name: "opendns-1"},
{IP: "208.67.220.220", Name: "opendns-2"},
{IP: "192.168.1.254", Name: "router"},
}
log.Printf("Configured ping targets: %v", targets)
// Add HTTP targets
httpTargets := []HttpTarget{
{URL: "http://192.168.1.254", Name: "router-http"},
}
log.Printf("Configured HTTP targets: %v", httpTargets)
// Start HTTP probes
for _, target := range httpTargets {
go probeHTTP(target)
}
//Start Prometheus HTTP server
go func() {
http.Handle("/metrics", promhttp.Handler())
if err := http.ListenAndServe(":2112", nil); err != nil {
Error.Printf("starting prometheus listener failed: %v", err)
os.Exit(1)
}
}()
pingers := make([]*ping.Pinger, len(targets))
for i, target := range targets {
pinger, err := ping.NewPinger(target.IP)
if err != nil {
Error.Printf("new pinger for %s failed: %v", target.IP, err)
return
}
//pinger.SetPrivileged(true)
pinger.RecordRtts = false
pinger.RecordTTLs = false
pingers[i] = pinger
go func(t PingTarget) {
log.Printf("Starting pinger for %s (%s)", t.Name, t.IP)
err = pinger.Run()
if err != nil {
Error.Printf("pinger for %s (%s) failed: %v", t.Name, t.IP, err)
return
}
}(target)
}
// Initialize and start iperf3 testing with separate ports
iperfTest := NewIperfTest(iperfServer, iperfUploadPort, iperfDownloadPort)
iperfTest.Start()
log.Printf("All monitoring services started, entering main loop")
for {
time.Sleep(1 * time.Second)
for i, pinger := range pingers {
if pinger != nil {
stats := pinger.Statistics()
updateMetrics(stats, targets[i].Name)
}
}
}
}