fix(servercheck): Properly check server statuses with and without Sentinel
This commit is contained in:
@@ -10,7 +10,7 @@ use App\Jobs\PullChangelogFromGitHub;
|
||||
use App\Jobs\PullTemplatesFromCDN;
|
||||
use App\Jobs\RegenerateSslCertJob;
|
||||
use App\Jobs\ScheduledJobManager;
|
||||
use App\Jobs\ServerResourceManager;
|
||||
use App\Jobs\ServerManagerJob;
|
||||
use App\Jobs\UpdateCoolifyJob;
|
||||
use App\Models\InstanceSettings;
|
||||
use App\Models\Server;
|
||||
@@ -55,7 +55,7 @@ class Kernel extends ConsoleKernel
|
||||
$this->scheduleInstance->job(new CheckHelperImageJob)->everyTenMinutes()->onOneServer();
|
||||
|
||||
// Server Jobs
|
||||
$this->scheduleInstance->job(new ServerResourceManager)->everyMinute()->onOneServer();
|
||||
$this->scheduleInstance->job(new ServerManagerJob)->everyMinute()->onOneServer();
|
||||
|
||||
// Scheduled Jobs (Backups & Tasks)
|
||||
$this->scheduleInstance->job(new ScheduledJobManager)->everyMinute()->onOneServer();
|
||||
@@ -74,7 +74,7 @@ class Kernel extends ConsoleKernel
|
||||
$this->scheduleUpdates();
|
||||
|
||||
// Server Jobs
|
||||
$this->scheduleInstance->job(new ServerResourceManager)->everyMinute()->onOneServer();
|
||||
$this->scheduleInstance->job(new ServerManagerJob)->everyMinute()->onOneServer();
|
||||
|
||||
$this->pullImages();
|
||||
|
||||
|
||||
153
app/Jobs/ServerConnectionCheckJob.php
Normal file
153
app/Jobs/ServerConnectionCheckJob.php
Normal file
@@ -0,0 +1,153 @@
|
||||
<?php
|
||||
|
||||
namespace App\Jobs;
|
||||
|
||||
use App\Models\Server;
|
||||
use App\Services\ConfigurationRepository;
|
||||
use Illuminate\Bus\Queueable;
|
||||
use Illuminate\Contracts\Queue\ShouldBeEncrypted;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Foundation\Bus\Dispatchable;
|
||||
use Illuminate\Queue\InteractsWithQueue;
|
||||
use Illuminate\Queue\Middleware\WithoutOverlapping;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
|
||||
class ServerConnectionCheckJob implements ShouldBeEncrypted, ShouldQueue
|
||||
{
|
||||
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
||||
|
||||
public $tries = 1;
|
||||
|
||||
public $timeout = 30;
|
||||
|
||||
public function __construct(
|
||||
public Server $server,
|
||||
public bool $disableMux = true
|
||||
) {}
|
||||
|
||||
public function middleware(): array
|
||||
{
|
||||
return [(new WithoutOverlapping('server-connection-check-'.$this->server->uuid))->expireAfter(45)->dontRelease()];
|
||||
}
|
||||
|
||||
private function disableSshMux(): void
|
||||
{
|
||||
$configRepository = app(ConfigurationRepository::class);
|
||||
$configRepository->disableSshMux();
|
||||
}
|
||||
|
||||
public function handle()
|
||||
{
|
||||
try {
|
||||
// Check if server is disabled
|
||||
if ($this->server->settings->force_disabled) {
|
||||
$this->server->settings->update([
|
||||
'is_reachable' => false,
|
||||
'is_usable' => false,
|
||||
]);
|
||||
Log::debug('ServerConnectionCheck: Server is disabled', [
|
||||
'server_id' => $this->server->id,
|
||||
'server_name' => $this->server->name,
|
||||
]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Temporarily disable mux if requested
|
||||
if ($this->disableMux) {
|
||||
$this->disableSshMux();
|
||||
}
|
||||
|
||||
// Check basic connectivity first
|
||||
$isReachable = $this->checkConnection();
|
||||
|
||||
if (! $isReachable) {
|
||||
$this->server->settings->update([
|
||||
'is_reachable' => false,
|
||||
'is_usable' => false,
|
||||
]);
|
||||
|
||||
Log::warning('ServerConnectionCheck: Server not reachable', [
|
||||
'server_id' => $this->server->id,
|
||||
'server_name' => $this->server->name,
|
||||
'server_ip' => $this->server->ip,
|
||||
]);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Server is reachable, check if Docker is available
|
||||
// $isUsable = $this->checkDockerAvailability();
|
||||
|
||||
$this->server->settings->update([
|
||||
'is_reachable' => true,
|
||||
'is_usable' => true,
|
||||
]);
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
$this->server->settings->update([
|
||||
'is_reachable' => false,
|
||||
'is_usable' => false,
|
||||
]);
|
||||
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
private function checkConnection(): bool
|
||||
{
|
||||
try {
|
||||
// Use instant_remote_process with a simple command
|
||||
// This will automatically handle mux, sudo, IPv6, Cloudflare tunnel, etc.
|
||||
$output = instant_remote_process_with_timeout(
|
||||
['ls -la /'],
|
||||
$this->server,
|
||||
false // don't throw error
|
||||
);
|
||||
|
||||
return $output !== null;
|
||||
} catch (\Throwable $e) {
|
||||
Log::debug('ServerConnectionCheck: Connection check failed', [
|
||||
'server_id' => $this->server->id,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private function checkDockerAvailability(): bool
|
||||
{
|
||||
try {
|
||||
// Use instant_remote_process to check Docker
|
||||
// The function will automatically handle sudo for non-root users
|
||||
$output = instant_remote_process_with_timeout(
|
||||
['docker version --format json'],
|
||||
$this->server,
|
||||
false // don't throw error
|
||||
);
|
||||
|
||||
if ($output === null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to parse the JSON output to ensure Docker is really working
|
||||
$output = trim($output);
|
||||
if (! empty($output)) {
|
||||
$dockerInfo = json_decode($output, true);
|
||||
|
||||
return isset($dockerInfo['Server']['Version']);
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (\Throwable $e) {
|
||||
Log::debug('ServerConnectionCheck: Docker check failed', [
|
||||
'server_id' => $this->server->id,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
183
app/Jobs/ServerManagerJob.php
Normal file
183
app/Jobs/ServerManagerJob.php
Normal file
@@ -0,0 +1,183 @@
|
||||
<?php
|
||||
|
||||
namespace App\Jobs;
|
||||
|
||||
use App\Models\InstanceSettings;
|
||||
use App\Models\Server;
|
||||
use App\Models\Team;
|
||||
use Cron\CronExpression;
|
||||
use Illuminate\Bus\Queueable;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
use Illuminate\Foundation\Bus\Dispatchable;
|
||||
use Illuminate\Queue\InteractsWithQueue;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
use Illuminate\Support\Carbon;
|
||||
use Illuminate\Support\Collection;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
|
||||
class ServerManagerJob implements ShouldQueue
|
||||
{
|
||||
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
||||
|
||||
/**
|
||||
* The time when this job execution started.
|
||||
*/
|
||||
private ?Carbon $executionTime = null;
|
||||
|
||||
private InstanceSettings $settings;
|
||||
|
||||
private string $instanceTimezone;
|
||||
|
||||
/**
|
||||
* Create a new job instance.
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
$this->onQueue('high');
|
||||
}
|
||||
|
||||
public function handle(): void
|
||||
{
|
||||
// Freeze the execution time at the start of the job
|
||||
$this->executionTime = Carbon::now();
|
||||
|
||||
$this->settings = instanceSettings();
|
||||
$this->instanceTimezone = $this->settings->instance_timezone ?: config('app.timezone');
|
||||
|
||||
if (validate_timezone($this->instanceTimezone) === false) {
|
||||
$this->instanceTimezone = config('app.timezone');
|
||||
}
|
||||
|
||||
// Get all servers to process
|
||||
$servers = $this->getServers();
|
||||
|
||||
// Dispatch ServerConnectionCheck for all servers efficiently
|
||||
$this->dispatchConnectionChecks($servers);
|
||||
|
||||
// Process server-specific scheduled tasks
|
||||
$this->processScheduledTasks($servers);
|
||||
}
|
||||
|
||||
private function getServers(): Collection
|
||||
{
|
||||
$allServers = Server::where('ip', '!=', '1.2.3.4');
|
||||
|
||||
if (isCloud()) {
|
||||
$servers = $allServers->whereRelation('team.subscription', 'stripe_invoice_paid', true)->get();
|
||||
$own = Team::find(0)->servers;
|
||||
|
||||
return $servers->merge($own);
|
||||
} else {
|
||||
return $allServers->get();
|
||||
}
|
||||
}
|
||||
|
||||
private function dispatchConnectionChecks(Collection $servers): void
|
||||
{
|
||||
|
||||
$checkFrequency = isCloud() ? '*/2 * * * *' : '* * * * *'; // Every 2 min for cloud, every minute for self-hosted
|
||||
$cron = new CronExpression($checkFrequency);
|
||||
|
||||
if ($cron->isDue($this->executionTime)) {
|
||||
$servers->each(function (Server $server) {
|
||||
try {
|
||||
ServerConnectionCheckJob::dispatch($server);
|
||||
} catch (\Exception $e) {
|
||||
Log::channel('scheduled-errors')->error('Failed to dispatch ServerConnectionCheck', [
|
||||
'server_id' => $server->id,
|
||||
'server_name' => $server->name,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private function processScheduledTasks(Collection $servers): void
|
||||
{
|
||||
foreach ($servers as $server) {
|
||||
try {
|
||||
$this->processServerTasks($server);
|
||||
} catch (\Exception $e) {
|
||||
Log::channel('scheduled-errors')->error('Error processing server tasks', [
|
||||
'server_id' => $server->id,
|
||||
'server_name' => $server->name,
|
||||
'error' => $e->getMessage(),
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function processServerTasks(Server $server): void
|
||||
{
|
||||
$serverTimezone = data_get($server->settings, 'server_timezone', $this->instanceTimezone);
|
||||
if (validate_timezone($serverTimezone) === false) {
|
||||
$serverTimezone = config('app.timezone');
|
||||
}
|
||||
|
||||
// Check if we should run sentinel-based checks
|
||||
$lastSentinelUpdate = $server->sentinel_updated_at;
|
||||
$waitTime = $server->waitBeforeDoingSshCheck();
|
||||
$sentinelOutOfSync = Carbon::parse($lastSentinelUpdate)->isBefore($this->executionTime->subSeconds($waitTime));
|
||||
|
||||
if ($sentinelOutOfSync) {
|
||||
// Dispatch jobs if Sentinel is out of sync
|
||||
$checkFrequency = isCloud() ? '*/5 * * * *' : '* * * * *'; // Every 5 min for cloud, every minute for self-hosted
|
||||
$cron = new CronExpression($checkFrequency);
|
||||
|
||||
if ($cron->isDue($this->executionTime)) {
|
||||
ServerCheckJob::dispatch($server);
|
||||
}
|
||||
|
||||
// Dispatch ServerStorageCheckJob if due
|
||||
$serverDiskUsageCheckFrequency = data_get($server->settings, 'server_disk_usage_check_frequency', '0 * * * *');
|
||||
if (isset(VALID_CRON_STRINGS[$serverDiskUsageCheckFrequency])) {
|
||||
$serverDiskUsageCheckFrequency = VALID_CRON_STRINGS[$serverDiskUsageCheckFrequency];
|
||||
}
|
||||
$shouldRunStorageCheck = $this->shouldRunNow($serverDiskUsageCheckFrequency, $serverTimezone);
|
||||
|
||||
if ($shouldRunStorageCheck) {
|
||||
ServerStorageCheckJob::dispatch($server);
|
||||
}
|
||||
}
|
||||
|
||||
// Dispatch DockerCleanupJob if due
|
||||
$dockerCleanupFrequency = data_get($server->settings, 'docker_cleanup_frequency', '0 * * * *');
|
||||
if (isset(VALID_CRON_STRINGS[$dockerCleanupFrequency])) {
|
||||
$dockerCleanupFrequency = VALID_CRON_STRINGS[$dockerCleanupFrequency];
|
||||
}
|
||||
$shouldRunDockerCleanup = $this->shouldRunNow($dockerCleanupFrequency, $serverTimezone);
|
||||
|
||||
if ($shouldRunDockerCleanup) {
|
||||
DockerCleanupJob::dispatch($server, false, $server->settings->delete_unused_volumes, $server->settings->delete_unused_networks);
|
||||
}
|
||||
|
||||
// Dispatch ServerPatchCheckJob if due (weekly)
|
||||
$shouldRunPatchCheck = $this->shouldRunNow('0 0 * * 0', $serverTimezone);
|
||||
|
||||
if ($shouldRunPatchCheck) { // Weekly on Sunday at midnight
|
||||
ServerPatchCheckJob::dispatch($server);
|
||||
}
|
||||
|
||||
// Dispatch Sentinel restart if due (daily for Sentinel-enabled servers)
|
||||
$isSentinelEnabled = $server->isSentinelEnabled();
|
||||
$shouldRestartSentinel = $isSentinelEnabled && $this->shouldRunNow('0 0 * * *', $serverTimezone);
|
||||
|
||||
if ($shouldRestartSentinel) {
|
||||
dispatch(function () use ($server) {
|
||||
$server->restartContainer('coolify-sentinel');
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private function shouldRunNow(string $frequency, string $timezone): bool
|
||||
{
|
||||
$cron = new CronExpression($frequency);
|
||||
|
||||
// Use the frozen execution time, not the current time
|
||||
$baseTime = $this->executionTime ?? Carbon::now();
|
||||
$executionTime = $baseTime->copy()->setTimezone($timezone);
|
||||
|
||||
return $cron->isDue($executionTime);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user