From ec7d08bcb785ea6c0b1400182d4fe65df45be81c Mon Sep 17 00:00:00 2001 From: Sparky Date: Fri, 10 Oct 2025 14:48:15 +0100 Subject: [PATCH] Add monitoring --- .gitignore | 5 + monitoring/docker-compose.yml | 121 ++ monitoring/grafana/config/grafana.ini | 1910 +++++++++++++++++ monitoring/grafana/config/ldap.toml | 75 + monitoring/loki/config/config.yaml | 44 + monitoring/pinger/.gitignore | 4 + monitoring/pinger/build.sh | 2 + monitoring/pinger/captain-definition | 4 + monitoring/pinger/deploy.sh | 1 + monitoring/pinger/dockerfile | 43 + monitoring/pinger/go.mod | 31 + monitoring/pinger/go.sum | 40 + monitoring/pinger/main.go | 357 +++ .../prometheus/config/console_libraries | 1 + monitoring/prometheus/config/consoles | 1 + monitoring/prometheus/config/prometheus.yml | 61 + monitoring/promtail/config/config.yml | 35 + 17 files changed, 2735 insertions(+) create mode 100755 monitoring/docker-compose.yml create mode 100644 monitoring/grafana/config/grafana.ini create mode 100644 monitoring/grafana/config/ldap.toml create mode 100755 monitoring/loki/config/config.yaml create mode 100644 monitoring/pinger/.gitignore create mode 100644 monitoring/pinger/build.sh create mode 100644 monitoring/pinger/captain-definition create mode 100644 monitoring/pinger/deploy.sh create mode 100644 monitoring/pinger/dockerfile create mode 100644 monitoring/pinger/go.mod create mode 100644 monitoring/pinger/go.sum create mode 100644 monitoring/pinger/main.go create mode 120000 monitoring/prometheus/config/console_libraries create mode 120000 monitoring/prometheus/config/consoles create mode 100644 monitoring/prometheus/config/prometheus.yml create mode 100644 monitoring/promtail/config/config.yml diff --git a/.gitignore b/.gitignore index 1d86657..3d01685 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,8 @@ coolify/sentinel syncthing-sparky/index-v2 syncthing-sparky/index-v0.14.0.db-migrated coolify/services/sowk04gs00g444ks4ccs4sw0/data/docker/registry/v2 +monitoring/grafana/data +monitoring/prometheus/data +monitoring/prometheus/newdata +monitoring/victoriametrics/data +monitoring/pinger/nohup.out diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml new file mode 100755 index 0000000..565bd67 --- /dev/null +++ b/monitoring/docker-compose.yml @@ -0,0 +1,121 @@ +services: + prometheus: + image: prom/prometheus:v3.5.0 + container_name: prometheus + volumes: + - ./prometheus/config:/etc/prometheus + - ./prometheus/data:/prometheus + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--web.console.libraries=/etc/prometheus/console_libraries" + - "--web.console.templates=/etc/prometheus/consoles" + - "--storage.tsdb.retention.time=1d" + - "--web.enable-lifecycle" + - "--web.listen-address=:43261" + restart: unless-stopped + expose: + - 43261 + network_mode: "host" + labels: + org.label-schema.group: "monitoring" + + nodeexporter: + image: prom/node-exporter:v1.8.2 + container_name: nodeexporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - "--path.procfs=/host/proc" + - "--path.rootfs=/rootfs" + - "--path.sysfs=/host/sys" + - "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)" + - "--web.listen-address=:56546" + restart: unless-stopped + expose: + - 56546 + network_mode: "host" + labels: + org.label-schema.group: "monitoring" + + # cadvisor: + # image: gcr.io/google-containers/cadvisor:v0.34.0 + # container_name: cadvisor + # volumes: + # - /:/rootfs:ro + # - /var/run:/var/run:rw + # - /sys:/sys:ro + # - /var/lib/docker:/var/lib/docker:ro + # #- /cgroup:/cgroup:ro #doesn't work on MacOS only for Linux + # restart: unless-stopped + # expose: + # - 8080 + # network_mode: "host" + # labels: + # org.label-schema.group: "monitoring" + + # We run grafana via coolify so that it may be exposed to internet + # grafana: + # image: grafana/grafana:11.5 + # container_name: grafana + # volumes: + # - ./grafana/data:/var/lib/grafana + # - ./grafana/config:/etc/grafana + # environment: + # - GF_SECURITY_ADMIN_USER=dave + # - GF_SECURITY_ADMIN_PASSWORD=9@^F@93qHv48JXHvi3Bf + # - GF_USERS_ALLOW_SIGN_UP=true + # restart: unless-stopped + # expose: + # - 43621 + # network_mode: "host" + # labels: + # org.label-schema.group: "monitoring" + + #loki: + # container_name: loki + # image: grafana/loki:3.3.2 + # expose: + # - 43622 + # command: -config.file=/etc/loki/config.yaml + # restart: unless-stopped + # volumes: + # - ./loki/config:/etc/loki + # - ./loki/data:/data/loki + # network_mode: "host" + + #promtail: + # container_name: promtail + # image: grafana/promtail:3.3.2 + # expose: + # - 43623 + # volumes: + # - /var/log:/var/log + # - /mnt/data/docker/containers:/var/lib/docker/containers:ro + # - ./promtail/config:/etc/promtail + # command: -config.file=/etc/promtail/config.yml + # restart: unless-stopped + # network_mode: "host" + + #pinger: + # image: pinger + # container_name: pinger + # restart: unless-stopped + # network_mode: "host" + # volumes: + # - ./pinger/logs:/logs + victoriametrics: + image: victoriametrics/victoria-metrics + container_name: victoriametrics + volumes: + - ./victoriametrics/data:/victoria-metrics-data + command: + - "--retentionPeriod=100y" + restart: unless-stopped + expose: + - 8428 + network_mode: "host" + labels: + org.label-schema.group: "monitoring" diff --git a/monitoring/grafana/config/grafana.ini b/monitoring/grafana/config/grafana.ini new file mode 100644 index 0000000..91e8c59 --- /dev/null +++ b/monitoring/grafana/config/grafana.ini @@ -0,0 +1,1910 @@ +##################### Grafana Configuration Example ##################### +# +# Everything has defaults so you only need to uncomment things you want to +# change + +# possible values : production, development +;app_mode = production + +# instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty +;instance_name = ${HOSTNAME} + +#################################### Paths #################################### +[paths] +# Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) +;data = /var/lib/grafana + +# Temporary files in `data` directory older than given duration will be removed +;temp_data_lifetime = 24h + +# Directory where grafana can store logs +;logs = /var/log/grafana + +# Directory where grafana will automatically scan and look for plugins +;plugins = /var/lib/grafana/plugins + +# folder that contains provisioning config files that grafana will apply on startup and while running. +;provisioning = conf/provisioning + +#################################### Server #################################### +[server] +# Protocol (http, https, h2, socket) +;protocol = http + +# Minimum TLS version allowed. By default, this value is empty. Accepted values are: TLS1.2, TLS1.3. If nothing is set TLS1.2 would be taken +;min_tls_version = "" + +# The ip address to bind to, empty will bind to all interfaces +;http_addr = + +# The http port to use +http_port = 43433 + +# The public facing domain name used to access grafana from a browser +;domain = localhost + +# Redirect to correct domain if host header does not match domain +# Prevents DNS rebinding attacks +;enforce_domain = false + +# The full public facing url you use in browser, used for redirects and emails +# If you use reverse proxy and sub path specify full url (with sub path) +;root_url = %(protocol)s://%(domain)s:%(http_port)s/ + +# Serve Grafana from subpath specified in `root_url` setting. By default it is set to `false` for compatibility reasons. +;serve_from_sub_path = false + +# Log web requests +;router_logging = false + +# the path relative working path +;static_root_path = public + +# enable gzip +;enable_gzip = false + +# https certs & key file +;cert_file = +;cert_key = + +# optional password to be used to decrypt key file +;cert_pass = + +# Certificates file watch interval +;certs_watch_interval = + +# Unix socket gid +# Changing the gid of a file without privileges requires that the target group is in the group of the process and that the process is the file owner +# It is recommended to set the gid as http server user gid +# Not set when the value is -1 +;socket_gid = + +# Unix socket mode +;socket_mode = + +# Unix socket path +;socket = + +# CDN Url +;cdn_url = + +# Sets the maximum time using a duration format (5s/5m/5ms) before timing out read of an incoming request and closing idle connections. +# `0` means there is no timeout for reading the request. +;read_timeout = 0 + +# This setting enables you to specify additional headers that the server adds to HTTP(S) responses. +[server.custom_response_headers] +#exampleHeader1 = exampleValue1 +#exampleHeader2 = exampleValue2 + +[environment] +# Sets whether the local file system is available for Grafana to use. Default is true for backward compatibility. +;local_file_system_available = true + +#################################### GRPC Server ######################### +;[grpc_server] +;network = "tcp" +;address = "127.0.0.1:10000" +;use_tls = false +;cert_file = +;key_file = +;max_recv_msg_size = +;max_send_msg_size = +# this will log the request and response for each unary gRPC call +;enable_logging = false + +#################################### Database #################################### +[database] +# You can configure the database connection by specifying type, host, name, user and password +# as separate properties or as on string using the url properties. + +# Either "mysql", "postgres" or "sqlite3", it's your choice +;type = sqlite3 +;host = 127.0.0.1:3306 +;name = grafana +;user = root +# If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;""" +;password = +# Use either URL or the previous fields to configure the database +# Example: mysql://user:secret@host:port/database +;url = + +# Max idle conn setting default is 2 +;max_idle_conn = 2 + +# Max conn setting default is 0 (mean not set) +;max_open_conn = + +# Connection Max Lifetime default is 14400 (means 14400 seconds or 4 hours) +;conn_max_lifetime = 14400 + +# Set to true to log the sql calls and execution times. +;log_queries = + +# For "postgres", use either "disable", "require" or "verify-full" +# For "mysql", use either "true", "false", or "skip-verify". +;ssl_mode = disable + +# For "postgres", use either "1" to enable or "0" to disable SNI +;ssl_sni = + +# Database drivers may support different transaction isolation levels. +# Currently, only "mysql" driver supports isolation levels. +# If the value is empty - driver's default isolation level is applied. +# For "mysql" use "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ" or "SERIALIZABLE". +;isolation_level = + +;ca_cert_path = +;client_key_path = +;client_cert_path = +;server_cert_name = + +# For "sqlite3" only, path relative to data_path setting +;path = grafana.db + +# For "sqlite3" only. cache mode setting used for connecting to the database. (private, shared) +;cache_mode = private + +# For "sqlite3" only. Enable/disable Write-Ahead Logging, https://sqlite.org/wal.html. Default is false. +;wal = false + +# For "mysql" and "postgres" only. Lock the database for the migrations, default is true. +;migration_locking = true + +# For "mysql" and "postgres" only. How many seconds to wait before failing to lock the database for the migrations, default is 0. +;locking_attempt_timeout_sec = 0 + +# For "sqlite" only. How many times to retry query in case of database is locked failures. Default is 0 (disabled). +;query_retries = 0 + +# For "sqlite" only. How many times to retry transaction in case of database is locked failures. Default is 5. +;transaction_retries = 5 + +# Set to true to add metrics and tracing for database queries. +;instrument_queries = false + +#################################### Cache server ############################# +[remote_cache] +# Either "redis", "memcached" or "database" default is "database" +;type = database + +# cache connectionstring options +# database: will use Grafana primary database. +# redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=0,ssl=false`. Only addr is required. ssl may be 'true', 'false', or 'insecure'. +# memcache: 127.0.0.1:11211 +;connstr = + +# prefix prepended to all the keys in the remote cache +; prefix = + +# This enables encryption of values stored in the remote cache +;encryption = + +#################################### Data proxy ########################### +[dataproxy] + +# This enables data proxy logging, default is false +;logging = false + +# How long the data proxy waits to read the headers of the response before timing out, default is 30 seconds. +# This setting also applies to core backend HTTP data sources where query requests use an HTTP client with timeout set. +;timeout = 30 + +# How long the data proxy waits to establish a TCP connection before timing out, default is 10 seconds. +;dialTimeout = 10 + +# How many seconds the data proxy waits before sending a keepalive probe request. +;keep_alive_seconds = 30 + +# How many seconds the data proxy waits for a successful TLS Handshake before timing out. +;tls_handshake_timeout_seconds = 10 + +# How many seconds the data proxy will wait for a server's first response headers after +# fully writing the request headers if the request has an "Expect: 100-continue" +# header. A value of 0 will result in the body being sent immediately, without +# waiting for the server to approve. +;expect_continue_timeout_seconds = 1 + +# Optionally limits the total number of connections per host, including connections in the dialing, +# active, and idle states. On limit violation, dials will block. +# A value of zero (0) means no limit. +;max_conns_per_host = 0 + +# The maximum number of idle connections that Grafana will keep alive. +;max_idle_connections = 100 + +# How many seconds the data proxy keeps an idle connection open before timing out. +;idle_conn_timeout_seconds = 90 + +# If enabled and user is not anonymous, data proxy will add X-Grafana-User header with username into the request, default is false. +;send_user_header = false + +# Limit the amount of bytes that will be read/accepted from responses of outgoing HTTP requests. +;response_limit = 0 + +# Limits the number of rows that Grafana will process from SQL data sources. +;row_limit = 1000000 + +# Sets a custom value for the `User-Agent` header for outgoing data proxy requests. If empty, the default value is `Grafana/` (for example `Grafana/9.0.0`). +;user_agent = + +#################################### Analytics #################################### +[analytics] +# Server reporting, sends usage counters to stats.grafana.org every 24 hours. +# No ip addresses are being tracked, only simple counters to track +# running instances, dashboard and error counts. It is very helpful to us. +# Change this option to false to disable reporting. +;reporting_enabled = true + +# The name of the distributor of the Grafana instance. Ex hosted-grafana, grafana-labs +;reporting_distributor = grafana-labs + +# Set to false to disable all checks to https://grafana.com +# for new versions of grafana. The check is used +# in some UI views to notify that a grafana update exists. +# This option does not cause any auto updates, nor send any information +# only a GET request to https://grafana.com/api/grafana/versions/stable to get the latest version. +;check_for_updates = true + +# Set to false to disable all checks to https://grafana.com +# for new versions of plugins. The check is used +# in some UI views to notify that a plugin update exists. +# This option does not cause any auto updates, nor send any information +# only a GET request to https://grafana.com to get the latest versions. +;check_for_plugin_updates = true + +# Google Analytics universal tracking code, only enabled if you specify an id here +;google_analytics_ua_id = + +# Google Analytics 4 tracking code, only enabled if you specify an id here +;google_analytics_4_id = + +# When Google Analytics 4 Enhanced event measurement is enabled, we will try to avoid sending duplicate events and let Google Analytics 4 detect navigation changes, etc. +;google_analytics_4_send_manual_page_views = false + +# Google Tag Manager ID, only enabled if you specify an id here +;google_tag_manager_id = + +# Rudderstack write key, enabled only if rudderstack_data_plane_url is also set +;rudderstack_write_key = + +# Rudderstack data plane url, enabled only if rudderstack_write_key is also set +;rudderstack_data_plane_url = + +# Rudderstack SDK url, optional, only valid if rudderstack_write_key and rudderstack_data_plane_url is also set +;rudderstack_sdk_url = + +# Rudderstack Config url, optional, used by Rudderstack SDK to fetch source config +;rudderstack_config_url = + +# Rudderstack Integrations URL, optional. Only valid if you pass the SDK version 1.1 or higher +;rudderstack_integrations_url = + +# Intercom secret, optional, used to hash user_id before passing to Intercom via Rudderstack +;intercom_secret = + +# Application Insights connection string. Specify an URL string to enable this feature. +;application_insights_connection_string = + +# Optional. Specifies an Application Insights endpoint URL where the endpoint string is wrapped in backticks ``. +;application_insights_endpoint_url = + +# Controls if the UI contains any links to user feedback forms +;feedback_links_enabled = true + +# Static context that is being added to analytics events +;reporting_static_context = grafanaInstance=12, os=linux + +#################################### Security #################################### +[security] +# disable creation of admin user on first start of grafana +disable_initial_admin_creation = false + +# default admin user, created on startup +;admin_user = dave + +# default admin password, can be changed before first start of grafana, or in profile settings +;admin_password = 9@^F@93qHv48JXHvi3Bf + +# default admin email, created on startup +;admin_email = admin@localhost + +# used for signing +;secret_key = SW2YcwTIb9zpOOhoPsMm + +# current key provider used for envelope encryption, default to static value specified by secret_key +;encryption_provider = secretKey.v1 + +# list of configured key providers, space separated (Enterprise only): e.g., awskms.v1 azurekv.v1 +;available_encryption_providers = + +# disable gravatar profile images +;disable_gravatar = false + +# data source proxy whitelist (ip_or_domain:port separated by spaces) +;data_source_proxy_whitelist = + +# disable protection against brute force login attempts +;disable_brute_force_login_protection = false + +# set to true if you host Grafana behind HTTPS. default is false. +;cookie_secure = false + +# set cookie SameSite attribute. defaults to `lax`. can be set to "lax", "strict", "none" and "disabled" +;cookie_samesite = lax + +# set to true if you want to allow browsers to render Grafana in a ,