From 7d10711a6594a0672d64ae20824a9d2e59b03ab6 Mon Sep 17 00:00:00 2001 From: Andras Bacsai <5845193+andrasbacsai@users.noreply.github.com> Date: Fri, 22 Aug 2025 11:50:56 +0200 Subject: [PATCH] fix(servercheck): Properly check server statuses with and without Sentinel --- app/Console/Kernel.php | 6 +- ...b.php => DEPRECATEDContainerStatusJob.php} | 0 ...ob.php => DEPRECATEDServerCheckNewJob.php} | 0 ...hp => DEPRECATEDServerResourceManager.php} | 0 app/Jobs/ServerConnectionCheckJob.php | 153 +++++++++++++++ app/Jobs/ServerManagerJob.php | 183 ++++++++++++++++++ 6 files changed, 339 insertions(+), 3 deletions(-) rename app/Jobs/{ContainerStatusJob.php => DEPRECATEDContainerStatusJob.php} (100%) rename app/Jobs/{ServerCheckNewJob.php => DEPRECATEDServerCheckNewJob.php} (100%) rename app/Jobs/{ServerResourceManager.php => DEPRECATEDServerResourceManager.php} (100%) create mode 100644 app/Jobs/ServerConnectionCheckJob.php create mode 100644 app/Jobs/ServerManagerJob.php diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index 6df191bb3..c5c4d7e7f 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -10,7 +10,7 @@ use App\Jobs\PullChangelogFromGitHub; use App\Jobs\PullTemplatesFromCDN; use App\Jobs\RegenerateSslCertJob; use App\Jobs\ScheduledJobManager; -use App\Jobs\ServerResourceManager; +use App\Jobs\ServerManagerJob; use App\Jobs\UpdateCoolifyJob; use App\Models\InstanceSettings; use App\Models\Server; @@ -55,7 +55,7 @@ class Kernel extends ConsoleKernel $this->scheduleInstance->job(new CheckHelperImageJob)->everyTenMinutes()->onOneServer(); // Server Jobs - $this->scheduleInstance->job(new ServerResourceManager)->everyMinute()->onOneServer(); + $this->scheduleInstance->job(new ServerManagerJob)->everyMinute()->onOneServer(); // Scheduled Jobs (Backups & Tasks) $this->scheduleInstance->job(new ScheduledJobManager)->everyMinute()->onOneServer(); @@ -74,7 +74,7 @@ class Kernel extends ConsoleKernel $this->scheduleUpdates(); // Server Jobs - $this->scheduleInstance->job(new ServerResourceManager)->everyMinute()->onOneServer(); + $this->scheduleInstance->job(new ServerManagerJob)->everyMinute()->onOneServer(); $this->pullImages(); diff --git a/app/Jobs/ContainerStatusJob.php b/app/Jobs/DEPRECATEDContainerStatusJob.php similarity index 100% rename from app/Jobs/ContainerStatusJob.php rename to app/Jobs/DEPRECATEDContainerStatusJob.php diff --git a/app/Jobs/ServerCheckNewJob.php b/app/Jobs/DEPRECATEDServerCheckNewJob.php similarity index 100% rename from app/Jobs/ServerCheckNewJob.php rename to app/Jobs/DEPRECATEDServerCheckNewJob.php diff --git a/app/Jobs/ServerResourceManager.php b/app/Jobs/DEPRECATEDServerResourceManager.php similarity index 100% rename from app/Jobs/ServerResourceManager.php rename to app/Jobs/DEPRECATEDServerResourceManager.php diff --git a/app/Jobs/ServerConnectionCheckJob.php b/app/Jobs/ServerConnectionCheckJob.php new file mode 100644 index 000000000..167bcea38 --- /dev/null +++ b/app/Jobs/ServerConnectionCheckJob.php @@ -0,0 +1,153 @@ +server->uuid))->expireAfter(45)->dontRelease()]; + } + + private function disableSshMux(): void + { + $configRepository = app(ConfigurationRepository::class); + $configRepository->disableSshMux(); + } + + public function handle() + { + try { + // Check if server is disabled + if ($this->server->settings->force_disabled) { + $this->server->settings->update([ + 'is_reachable' => false, + 'is_usable' => false, + ]); + Log::debug('ServerConnectionCheck: Server is disabled', [ + 'server_id' => $this->server->id, + 'server_name' => $this->server->name, + ]); + + return; + } + + // Temporarily disable mux if requested + if ($this->disableMux) { + $this->disableSshMux(); + } + + // Check basic connectivity first + $isReachable = $this->checkConnection(); + + if (! $isReachable) { + $this->server->settings->update([ + 'is_reachable' => false, + 'is_usable' => false, + ]); + + Log::warning('ServerConnectionCheck: Server not reachable', [ + 'server_id' => $this->server->id, + 'server_name' => $this->server->name, + 'server_ip' => $this->server->ip, + ]); + + return; + } + + // Server is reachable, check if Docker is available + // $isUsable = $this->checkDockerAvailability(); + + $this->server->settings->update([ + 'is_reachable' => true, + 'is_usable' => true, + ]); + + } catch (\Throwable $e) { + $this->server->settings->update([ + 'is_reachable' => false, + 'is_usable' => false, + ]); + + throw $e; + } + } + + private function checkConnection(): bool + { + try { + // Use instant_remote_process with a simple command + // This will automatically handle mux, sudo, IPv6, Cloudflare tunnel, etc. + $output = instant_remote_process_with_timeout( + ['ls -la /'], + $this->server, + false // don't throw error + ); + + return $output !== null; + } catch (\Throwable $e) { + Log::debug('ServerConnectionCheck: Connection check failed', [ + 'server_id' => $this->server->id, + 'error' => $e->getMessage(), + ]); + + return false; + } + } + + private function checkDockerAvailability(): bool + { + try { + // Use instant_remote_process to check Docker + // The function will automatically handle sudo for non-root users + $output = instant_remote_process_with_timeout( + ['docker version --format json'], + $this->server, + false // don't throw error + ); + + if ($output === null) { + return false; + } + + // Try to parse the JSON output to ensure Docker is really working + $output = trim($output); + if (! empty($output)) { + $dockerInfo = json_decode($output, true); + + return isset($dockerInfo['Server']['Version']); + } + + return false; + } catch (\Throwable $e) { + Log::debug('ServerConnectionCheck: Docker check failed', [ + 'server_id' => $this->server->id, + 'error' => $e->getMessage(), + ]); + + return false; + } + } +} diff --git a/app/Jobs/ServerManagerJob.php b/app/Jobs/ServerManagerJob.php new file mode 100644 index 000000000..8ec1cd2a4 --- /dev/null +++ b/app/Jobs/ServerManagerJob.php @@ -0,0 +1,183 @@ +onQueue('high'); + } + + public function handle(): void + { + // Freeze the execution time at the start of the job + $this->executionTime = Carbon::now(); + + $this->settings = instanceSettings(); + $this->instanceTimezone = $this->settings->instance_timezone ?: config('app.timezone'); + + if (validate_timezone($this->instanceTimezone) === false) { + $this->instanceTimezone = config('app.timezone'); + } + + // Get all servers to process + $servers = $this->getServers(); + + // Dispatch ServerConnectionCheck for all servers efficiently + $this->dispatchConnectionChecks($servers); + + // Process server-specific scheduled tasks + $this->processScheduledTasks($servers); + } + + private function getServers(): Collection + { + $allServers = Server::where('ip', '!=', '1.2.3.4'); + + if (isCloud()) { + $servers = $allServers->whereRelation('team.subscription', 'stripe_invoice_paid', true)->get(); + $own = Team::find(0)->servers; + + return $servers->merge($own); + } else { + return $allServers->get(); + } + } + + private function dispatchConnectionChecks(Collection $servers): void + { + + $checkFrequency = isCloud() ? '*/2 * * * *' : '* * * * *'; // Every 2 min for cloud, every minute for self-hosted + $cron = new CronExpression($checkFrequency); + + if ($cron->isDue($this->executionTime)) { + $servers->each(function (Server $server) { + try { + ServerConnectionCheckJob::dispatch($server); + } catch (\Exception $e) { + Log::channel('scheduled-errors')->error('Failed to dispatch ServerConnectionCheck', [ + 'server_id' => $server->id, + 'server_name' => $server->name, + 'error' => $e->getMessage(), + ]); + } + }); + } + } + + private function processScheduledTasks(Collection $servers): void + { + foreach ($servers as $server) { + try { + $this->processServerTasks($server); + } catch (\Exception $e) { + Log::channel('scheduled-errors')->error('Error processing server tasks', [ + 'server_id' => $server->id, + 'server_name' => $server->name, + 'error' => $e->getMessage(), + ]); + } + } + } + + private function processServerTasks(Server $server): void + { + $serverTimezone = data_get($server->settings, 'server_timezone', $this->instanceTimezone); + if (validate_timezone($serverTimezone) === false) { + $serverTimezone = config('app.timezone'); + } + + // Check if we should run sentinel-based checks + $lastSentinelUpdate = $server->sentinel_updated_at; + $waitTime = $server->waitBeforeDoingSshCheck(); + $sentinelOutOfSync = Carbon::parse($lastSentinelUpdate)->isBefore($this->executionTime->subSeconds($waitTime)); + + if ($sentinelOutOfSync) { + // Dispatch jobs if Sentinel is out of sync + $checkFrequency = isCloud() ? '*/5 * * * *' : '* * * * *'; // Every 5 min for cloud, every minute for self-hosted + $cron = new CronExpression($checkFrequency); + + if ($cron->isDue($this->executionTime)) { + ServerCheckJob::dispatch($server); + } + + // Dispatch ServerStorageCheckJob if due + $serverDiskUsageCheckFrequency = data_get($server->settings, 'server_disk_usage_check_frequency', '0 * * * *'); + if (isset(VALID_CRON_STRINGS[$serverDiskUsageCheckFrequency])) { + $serverDiskUsageCheckFrequency = VALID_CRON_STRINGS[$serverDiskUsageCheckFrequency]; + } + $shouldRunStorageCheck = $this->shouldRunNow($serverDiskUsageCheckFrequency, $serverTimezone); + + if ($shouldRunStorageCheck) { + ServerStorageCheckJob::dispatch($server); + } + } + + // Dispatch DockerCleanupJob if due + $dockerCleanupFrequency = data_get($server->settings, 'docker_cleanup_frequency', '0 * * * *'); + if (isset(VALID_CRON_STRINGS[$dockerCleanupFrequency])) { + $dockerCleanupFrequency = VALID_CRON_STRINGS[$dockerCleanupFrequency]; + } + $shouldRunDockerCleanup = $this->shouldRunNow($dockerCleanupFrequency, $serverTimezone); + + if ($shouldRunDockerCleanup) { + DockerCleanupJob::dispatch($server, false, $server->settings->delete_unused_volumes, $server->settings->delete_unused_networks); + } + + // Dispatch ServerPatchCheckJob if due (weekly) + $shouldRunPatchCheck = $this->shouldRunNow('0 0 * * 0', $serverTimezone); + + if ($shouldRunPatchCheck) { // Weekly on Sunday at midnight + ServerPatchCheckJob::dispatch($server); + } + + // Dispatch Sentinel restart if due (daily for Sentinel-enabled servers) + $isSentinelEnabled = $server->isSentinelEnabled(); + $shouldRestartSentinel = $isSentinelEnabled && $this->shouldRunNow('0 0 * * *', $serverTimezone); + + if ($shouldRestartSentinel) { + dispatch(function () use ($server) { + $server->restartContainer('coolify-sentinel'); + }); + } + } + + private function shouldRunNow(string $frequency, string $timezone): bool + { + $cron = new CronExpression($frequency); + + // Use the frozen execution time, not the current time + $baseTime = $this->executionTime ?? Carbon::now(); + $executionTime = $baseTime->copy()->setTimezone($timezone); + + return $cron->isDue($executionTime); + } +}