fix(servercheck): Properly check server statuses with and without Sentinel

This commit is contained in:
Andras Bacsai
2025-08-22 11:50:56 +02:00
parent 0aef9b3f5c
commit 7d10711a65
6 changed files with 339 additions and 3 deletions

View File

@@ -10,7 +10,7 @@ use App\Jobs\PullChangelogFromGitHub;
use App\Jobs\PullTemplatesFromCDN; use App\Jobs\PullTemplatesFromCDN;
use App\Jobs\RegenerateSslCertJob; use App\Jobs\RegenerateSslCertJob;
use App\Jobs\ScheduledJobManager; use App\Jobs\ScheduledJobManager;
use App\Jobs\ServerResourceManager; use App\Jobs\ServerManagerJob;
use App\Jobs\UpdateCoolifyJob; use App\Jobs\UpdateCoolifyJob;
use App\Models\InstanceSettings; use App\Models\InstanceSettings;
use App\Models\Server; use App\Models\Server;
@@ -55,7 +55,7 @@ class Kernel extends ConsoleKernel
$this->scheduleInstance->job(new CheckHelperImageJob)->everyTenMinutes()->onOneServer(); $this->scheduleInstance->job(new CheckHelperImageJob)->everyTenMinutes()->onOneServer();
// Server Jobs // Server Jobs
$this->scheduleInstance->job(new ServerResourceManager)->everyMinute()->onOneServer(); $this->scheduleInstance->job(new ServerManagerJob)->everyMinute()->onOneServer();
// Scheduled Jobs (Backups & Tasks) // Scheduled Jobs (Backups & Tasks)
$this->scheduleInstance->job(new ScheduledJobManager)->everyMinute()->onOneServer(); $this->scheduleInstance->job(new ScheduledJobManager)->everyMinute()->onOneServer();
@@ -74,7 +74,7 @@ class Kernel extends ConsoleKernel
$this->scheduleUpdates(); $this->scheduleUpdates();
// Server Jobs // Server Jobs
$this->scheduleInstance->job(new ServerResourceManager)->everyMinute()->onOneServer(); $this->scheduleInstance->job(new ServerManagerJob)->everyMinute()->onOneServer();
$this->pullImages(); $this->pullImages();

View File

@@ -0,0 +1,153 @@
<?php
namespace App\Jobs;
use App\Models\Server;
use App\Services\ConfigurationRepository;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldBeEncrypted;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\Middleware\WithoutOverlapping;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Log;
class ServerConnectionCheckJob implements ShouldBeEncrypted, ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
public $tries = 1;
public $timeout = 30;
public function __construct(
public Server $server,
public bool $disableMux = true
) {}
public function middleware(): array
{
return [(new WithoutOverlapping('server-connection-check-'.$this->server->uuid))->expireAfter(45)->dontRelease()];
}
private function disableSshMux(): void
{
$configRepository = app(ConfigurationRepository::class);
$configRepository->disableSshMux();
}
public function handle()
{
try {
// Check if server is disabled
if ($this->server->settings->force_disabled) {
$this->server->settings->update([
'is_reachable' => false,
'is_usable' => false,
]);
Log::debug('ServerConnectionCheck: Server is disabled', [
'server_id' => $this->server->id,
'server_name' => $this->server->name,
]);
return;
}
// Temporarily disable mux if requested
if ($this->disableMux) {
$this->disableSshMux();
}
// Check basic connectivity first
$isReachable = $this->checkConnection();
if (! $isReachable) {
$this->server->settings->update([
'is_reachable' => false,
'is_usable' => false,
]);
Log::warning('ServerConnectionCheck: Server not reachable', [
'server_id' => $this->server->id,
'server_name' => $this->server->name,
'server_ip' => $this->server->ip,
]);
return;
}
// Server is reachable, check if Docker is available
// $isUsable = $this->checkDockerAvailability();
$this->server->settings->update([
'is_reachable' => true,
'is_usable' => true,
]);
} catch (\Throwable $e) {
$this->server->settings->update([
'is_reachable' => false,
'is_usable' => false,
]);
throw $e;
}
}
private function checkConnection(): bool
{
try {
// Use instant_remote_process with a simple command
// This will automatically handle mux, sudo, IPv6, Cloudflare tunnel, etc.
$output = instant_remote_process_with_timeout(
['ls -la /'],
$this->server,
false // don't throw error
);
return $output !== null;
} catch (\Throwable $e) {
Log::debug('ServerConnectionCheck: Connection check failed', [
'server_id' => $this->server->id,
'error' => $e->getMessage(),
]);
return false;
}
}
private function checkDockerAvailability(): bool
{
try {
// Use instant_remote_process to check Docker
// The function will automatically handle sudo for non-root users
$output = instant_remote_process_with_timeout(
['docker version --format json'],
$this->server,
false // don't throw error
);
if ($output === null) {
return false;
}
// Try to parse the JSON output to ensure Docker is really working
$output = trim($output);
if (! empty($output)) {
$dockerInfo = json_decode($output, true);
return isset($dockerInfo['Server']['Version']);
}
return false;
} catch (\Throwable $e) {
Log::debug('ServerConnectionCheck: Docker check failed', [
'server_id' => $this->server->id,
'error' => $e->getMessage(),
]);
return false;
}
}
}

View File

@@ -0,0 +1,183 @@
<?php
namespace App\Jobs;
use App\Models\InstanceSettings;
use App\Models\Server;
use App\Models\Team;
use Cron\CronExpression;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Carbon;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\Log;
class ServerManagerJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
/**
* The time when this job execution started.
*/
private ?Carbon $executionTime = null;
private InstanceSettings $settings;
private string $instanceTimezone;
/**
* Create a new job instance.
*/
public function __construct()
{
$this->onQueue('high');
}
public function handle(): void
{
// Freeze the execution time at the start of the job
$this->executionTime = Carbon::now();
$this->settings = instanceSettings();
$this->instanceTimezone = $this->settings->instance_timezone ?: config('app.timezone');
if (validate_timezone($this->instanceTimezone) === false) {
$this->instanceTimezone = config('app.timezone');
}
// Get all servers to process
$servers = $this->getServers();
// Dispatch ServerConnectionCheck for all servers efficiently
$this->dispatchConnectionChecks($servers);
// Process server-specific scheduled tasks
$this->processScheduledTasks($servers);
}
private function getServers(): Collection
{
$allServers = Server::where('ip', '!=', '1.2.3.4');
if (isCloud()) {
$servers = $allServers->whereRelation('team.subscription', 'stripe_invoice_paid', true)->get();
$own = Team::find(0)->servers;
return $servers->merge($own);
} else {
return $allServers->get();
}
}
private function dispatchConnectionChecks(Collection $servers): void
{
$checkFrequency = isCloud() ? '*/2 * * * *' : '* * * * *'; // Every 2 min for cloud, every minute for self-hosted
$cron = new CronExpression($checkFrequency);
if ($cron->isDue($this->executionTime)) {
$servers->each(function (Server $server) {
try {
ServerConnectionCheckJob::dispatch($server);
} catch (\Exception $e) {
Log::channel('scheduled-errors')->error('Failed to dispatch ServerConnectionCheck', [
'server_id' => $server->id,
'server_name' => $server->name,
'error' => $e->getMessage(),
]);
}
});
}
}
private function processScheduledTasks(Collection $servers): void
{
foreach ($servers as $server) {
try {
$this->processServerTasks($server);
} catch (\Exception $e) {
Log::channel('scheduled-errors')->error('Error processing server tasks', [
'server_id' => $server->id,
'server_name' => $server->name,
'error' => $e->getMessage(),
]);
}
}
}
private function processServerTasks(Server $server): void
{
$serverTimezone = data_get($server->settings, 'server_timezone', $this->instanceTimezone);
if (validate_timezone($serverTimezone) === false) {
$serverTimezone = config('app.timezone');
}
// Check if we should run sentinel-based checks
$lastSentinelUpdate = $server->sentinel_updated_at;
$waitTime = $server->waitBeforeDoingSshCheck();
$sentinelOutOfSync = Carbon::parse($lastSentinelUpdate)->isBefore($this->executionTime->subSeconds($waitTime));
if ($sentinelOutOfSync) {
// Dispatch jobs if Sentinel is out of sync
$checkFrequency = isCloud() ? '*/5 * * * *' : '* * * * *'; // Every 5 min for cloud, every minute for self-hosted
$cron = new CronExpression($checkFrequency);
if ($cron->isDue($this->executionTime)) {
ServerCheckJob::dispatch($server);
}
// Dispatch ServerStorageCheckJob if due
$serverDiskUsageCheckFrequency = data_get($server->settings, 'server_disk_usage_check_frequency', '0 * * * *');
if (isset(VALID_CRON_STRINGS[$serverDiskUsageCheckFrequency])) {
$serverDiskUsageCheckFrequency = VALID_CRON_STRINGS[$serverDiskUsageCheckFrequency];
}
$shouldRunStorageCheck = $this->shouldRunNow($serverDiskUsageCheckFrequency, $serverTimezone);
if ($shouldRunStorageCheck) {
ServerStorageCheckJob::dispatch($server);
}
}
// Dispatch DockerCleanupJob if due
$dockerCleanupFrequency = data_get($server->settings, 'docker_cleanup_frequency', '0 * * * *');
if (isset(VALID_CRON_STRINGS[$dockerCleanupFrequency])) {
$dockerCleanupFrequency = VALID_CRON_STRINGS[$dockerCleanupFrequency];
}
$shouldRunDockerCleanup = $this->shouldRunNow($dockerCleanupFrequency, $serverTimezone);
if ($shouldRunDockerCleanup) {
DockerCleanupJob::dispatch($server, false, $server->settings->delete_unused_volumes, $server->settings->delete_unused_networks);
}
// Dispatch ServerPatchCheckJob if due (weekly)
$shouldRunPatchCheck = $this->shouldRunNow('0 0 * * 0', $serverTimezone);
if ($shouldRunPatchCheck) { // Weekly on Sunday at midnight
ServerPatchCheckJob::dispatch($server);
}
// Dispatch Sentinel restart if due (daily for Sentinel-enabled servers)
$isSentinelEnabled = $server->isSentinelEnabled();
$shouldRestartSentinel = $isSentinelEnabled && $this->shouldRunNow('0 0 * * *', $serverTimezone);
if ($shouldRestartSentinel) {
dispatch(function () use ($server) {
$server->restartContainer('coolify-sentinel');
});
}
}
private function shouldRunNow(string $frequency, string $timezone): bool
{
$cron = new CronExpression($frequency);
// Use the frozen execution time, not the current time
$baseTime = $this->executionTime ?? Carbon::now();
$executionTime = $baseTime->copy()->setTimezone($timezone);
return $cron->isDue($executionTime);
}
}