From b6176d905b159fc6abead2bca3d7fad6a37b2610 Mon Sep 17 00:00:00 2001 From: Andras Bacsai <5845193+andrasbacsai@users.noreply.github.com> Date: Sun, 7 Sep 2025 10:26:23 +0200 Subject: [PATCH 1/5] feat(command): implement SSH command retry logic with exponential backoff and logging for better error handling --- app/Traits/ExecuteRemoteCommand.php | 265 +++++++++++++++++++++------- config/constants.php | 4 + 2 files changed, 209 insertions(+), 60 deletions(-) diff --git a/app/Traits/ExecuteRemoteCommand.php b/app/Traits/ExecuteRemoteCommand.php index a228a5d10..a420e1f2b 100644 --- a/app/Traits/ExecuteRemoteCommand.php +++ b/app/Traits/ExecuteRemoteCommand.php @@ -7,6 +7,7 @@ use App\Helpers\SshMultiplexingHelper; use App\Models\Server; use Carbon\Carbon; use Illuminate\Support\Collection; +use Illuminate\Support\Facades\Log; use Illuminate\Support\Facades\Process; trait ExecuteRemoteCommand @@ -15,6 +16,47 @@ trait ExecuteRemoteCommand public static int $batch_counter = 0; + /** + * Check if an error message indicates a retryable SSH connection error + */ + private function isRetryableSshError(string $errorOutput): bool + { + $retryablePatterns = [ + 'kex_exchange_identification', + 'Connection reset by peer', + 'Connection refused', + 'Connection timed out', + 'Connection closed by remote host', + 'ssh_exchange_identification', + 'Bad file descriptor', + 'Broken pipe', + 'No route to host', + 'Network is unreachable', + ]; + + foreach ($retryablePatterns as $pattern) { + if (str_contains($errorOutput, $pattern)) { + return true; + } + } + + return false; + } + + /** + * Calculate delay for exponential backoff + */ + private function calculateRetryDelay(int $attempt): int + { + $baseDelay = config('constants.ssh.retry_base_delay', 2); + $maxDelay = config('constants.ssh.retry_max_delay', 30); + $multiplier = config('constants.ssh.retry_multiplier', 2); + + $delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay); + + return (int) $delay; + } + public function execute_remote_command(...$commands) { static::$batch_counter++; @@ -43,76 +85,179 @@ trait ExecuteRemoteCommand $command = parseLineForSudo($command, $this->server); } } - $remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command); - $process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) { - $output = str($output)->trim(); - if ($output->startsWith('╔')) { - $output = "\n".$output; - } - // Sanitize output to ensure valid UTF-8 encoding before JSON encoding - $sanitized_output = sanitize_utf8_text($output); - - $new_log_entry = [ - 'command' => remove_iip($command), - 'output' => remove_iip($sanitized_output), - 'type' => $customType ?? $type === 'err' ? 'stderr' : 'stdout', - 'timestamp' => Carbon::now('UTC'), - 'hidden' => $hidden, - 'batch' => static::$batch_counter, - ]; - if (! $this->application_deployment_queue->logs) { - $new_log_entry['order'] = 1; - } else { - try { - $previous_logs = json_decode($this->application_deployment_queue->logs, associative: true, flags: JSON_THROW_ON_ERROR); - } catch (\JsonException $e) { - // If existing logs are corrupted, start fresh - $previous_logs = []; - $new_log_entry['order'] = 1; - } - if (is_array($previous_logs)) { - $new_log_entry['order'] = count($previous_logs) + 1; - } else { - $previous_logs = []; - $new_log_entry['order'] = 1; - } - } - $previous_logs[] = $new_log_entry; + $maxRetries = config('constants.ssh.max_retries'); + $attempt = 0; + $lastError = null; + $commandExecuted = false; + while ($attempt < $maxRetries && ! $commandExecuted) { try { - $this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_THROW_ON_ERROR); - } catch (\JsonException $e) { - // If JSON encoding still fails, use fallback with invalid sequences replacement - $this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_INVALID_UTF8_SUBSTITUTE); - } + $this->executeCommandWithProcess($command, $hidden, $customType, $append, $ignore_errors); + $commandExecuted = true; + } catch (\RuntimeException $e) { + $lastError = $e; + $errorMessage = $e->getMessage(); - $this->application_deployment_queue->save(); + // Only retry if it's an SSH connection error and we haven't exhausted retries + if ($this->isRetryableSshError($errorMessage) && $attempt < $maxRetries - 1) { + $attempt++; + $delay = $this->calculateRetryDelay($attempt - 1); - if ($this->save) { - if (data_get($this->saved_outputs, $this->save, null) === null) { - data_set($this->saved_outputs, $this->save, str()); - } - if ($append) { - $this->saved_outputs[$this->save] .= str($sanitized_output)->trim(); - $this->saved_outputs[$this->save] = str($this->saved_outputs[$this->save]); + // Log the retry attempt + Log::warning('SSH command failed, retrying', [ + 'server' => $this->server->ip, + 'attempt' => $attempt, + 'max_retries' => $maxRetries, + 'delay' => $delay, + 'error' => $errorMessage, + 'command_preview' => $hidden ? '[hidden]' : substr($command, 0, 100), + ]); + + // Add log entry for the retry + if (isset($this->application_deployment_queue)) { + $this->addRetryLogEntry($attempt, $maxRetries, $delay, $errorMessage); + } + + sleep($delay); } else { - $this->saved_outputs[$this->save] = str($sanitized_output)->trim(); + // Not retryable or max retries reached + throw $e; } } - }); - $this->application_deployment_queue->update([ - 'current_process_id' => $process->id(), - ]); + } - $process_result = $process->wait(); - if ($process_result->exitCode() !== 0) { - if (! $ignore_errors) { - $this->application_deployment_queue->status = ApplicationDeploymentStatus::FAILED->value; - $this->application_deployment_queue->save(); - throw new \RuntimeException($process_result->errorOutput()); - } + // If we exhausted all retries and still failed + if (! $commandExecuted && $lastError) { + Log::error('SSH command failed after all retries', [ + 'server' => $this->server->ip, + 'attempts' => $attempt, + 'error' => $lastError->getMessage(), + ]); + throw $lastError; } }); } + + /** + * Execute the actual command with process handling + */ + private function executeCommandWithProcess($command, $hidden, $customType, $append, $ignore_errors) + { + $remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command); + $process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) { + $output = str($output)->trim(); + if ($output->startsWith('╔')) { + $output = "\n".$output; + } + + // Sanitize output to ensure valid UTF-8 encoding before JSON encoding + $sanitized_output = sanitize_utf8_text($output); + + $new_log_entry = [ + 'command' => remove_iip($command), + 'output' => remove_iip($sanitized_output), + 'type' => $customType ?? $type === 'err' ? 'stderr' : 'stdout', + 'timestamp' => Carbon::now('UTC'), + 'hidden' => $hidden, + 'batch' => static::$batch_counter, + ]; + if (! $this->application_deployment_queue->logs) { + $new_log_entry['order'] = 1; + } else { + try { + $previous_logs = json_decode($this->application_deployment_queue->logs, associative: true, flags: JSON_THROW_ON_ERROR); + } catch (\JsonException $e) { + // If existing logs are corrupted, start fresh + $previous_logs = []; + $new_log_entry['order'] = 1; + } + if (is_array($previous_logs)) { + $new_log_entry['order'] = count($previous_logs) + 1; + } else { + $previous_logs = []; + $new_log_entry['order'] = 1; + } + } + $previous_logs[] = $new_log_entry; + + try { + $this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_THROW_ON_ERROR); + } catch (\JsonException $e) { + // If JSON encoding still fails, use fallback with invalid sequences replacement + $this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_INVALID_UTF8_SUBSTITUTE); + } + + $this->application_deployment_queue->save(); + + if ($this->save) { + if (data_get($this->saved_outputs, $this->save, null) === null) { + data_set($this->saved_outputs, $this->save, str()); + } + if ($append) { + $this->saved_outputs[$this->save] .= str($sanitized_output)->trim(); + $this->saved_outputs[$this->save] = str($this->saved_outputs[$this->save]); + } else { + $this->saved_outputs[$this->save] = str($sanitized_output)->trim(); + } + } + }); + $this->application_deployment_queue->update([ + 'current_process_id' => $process->id(), + ]); + + $process_result = $process->wait(); + if ($process_result->exitCode() !== 0) { + if (! $ignore_errors) { + $this->application_deployment_queue->status = ApplicationDeploymentStatus::FAILED->value; + $this->application_deployment_queue->save(); + throw new \RuntimeException($process_result->errorOutput()); + } + } + } + + /** + * Add a log entry for SSH retry attempts + */ + private function addRetryLogEntry(int $attempt, int $maxRetries, int $delay, string $errorMessage) + { + $retryMessage = "🔄 SSH connection failed. Retrying... (Attempt {$attempt}/{$maxRetries}, waiting {$delay}s)\nError: {$errorMessage}"; + + $new_log_entry = [ + 'command' => 'SSH Retry', + 'output' => $retryMessage, + 'type' => 'stdout', + 'timestamp' => Carbon::now('UTC'), + 'hidden' => false, + 'batch' => static::$batch_counter, + ]; + + if (! $this->application_deployment_queue->logs) { + $new_log_entry['order'] = 1; + $previous_logs = []; + } else { + try { + $previous_logs = json_decode($this->application_deployment_queue->logs, associative: true, flags: JSON_THROW_ON_ERROR); + } catch (\JsonException $e) { + $previous_logs = []; + $new_log_entry['order'] = 1; + } + if (is_array($previous_logs)) { + $new_log_entry['order'] = count($previous_logs) + 1; + } else { + $previous_logs = []; + $new_log_entry['order'] = 1; + } + } + + $previous_logs[] = $new_log_entry; + + try { + $this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_THROW_ON_ERROR); + } catch (\JsonException $e) { + $this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_INVALID_UTF8_SUBSTITUTE); + } + + $this->application_deployment_queue->save(); + } } diff --git a/config/constants.php b/config/constants.php index 9c1b8b274..652af5ff4 100644 --- a/config/constants.php +++ b/config/constants.php @@ -62,6 +62,10 @@ return [ 'connection_timeout' => 10, 'server_interval' => 20, 'command_timeout' => 7200, + 'max_retries' => env('SSH_MAX_RETRIES', 3), + 'retry_base_delay' => env('SSH_RETRY_BASE_DELAY', 2), // seconds + 'retry_max_delay' => env('SSH_RETRY_MAX_DELAY', 30), // seconds + 'retry_multiplier' => env('SSH_RETRY_MULTIPLIER', 2), ], 'invitation' => [ From b8477409246817720386a31a9fed188b92f0e808 Mon Sep 17 00:00:00 2001 From: Andras Bacsai <5845193+andrasbacsai@users.noreply.github.com> Date: Sun, 7 Sep 2025 16:38:11 +0200 Subject: [PATCH 2/5] refactor(command): simplify SSH command retry logic by removing unnecessary logging and improving delay calculation --- app/Traits/ExecuteRemoteCommand.php | 32 ++++++++--------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/app/Traits/ExecuteRemoteCommand.php b/app/Traits/ExecuteRemoteCommand.php index a420e1f2b..436d0a0d4 100644 --- a/app/Traits/ExecuteRemoteCommand.php +++ b/app/Traits/ExecuteRemoteCommand.php @@ -48,9 +48,9 @@ trait ExecuteRemoteCommand */ private function calculateRetryDelay(int $attempt): int { - $baseDelay = config('constants.ssh.retry_base_delay', 2); - $maxDelay = config('constants.ssh.retry_max_delay', 30); - $multiplier = config('constants.ssh.retry_multiplier', 2); + $baseDelay = config('constants.ssh.retry_base_delay'); + $maxDelay = config('constants.ssh.retry_max_delay'); + $multiplier = config('constants.ssh.retry_multiplier'); $delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay); @@ -98,22 +98,10 @@ trait ExecuteRemoteCommand } catch (\RuntimeException $e) { $lastError = $e; $errorMessage = $e->getMessage(); - // Only retry if it's an SSH connection error and we haven't exhausted retries if ($this->isRetryableSshError($errorMessage) && $attempt < $maxRetries - 1) { $attempt++; $delay = $this->calculateRetryDelay($attempt - 1); - - // Log the retry attempt - Log::warning('SSH command failed, retrying', [ - 'server' => $this->server->ip, - 'attempt' => $attempt, - 'max_retries' => $maxRetries, - 'delay' => $delay, - 'error' => $errorMessage, - 'command_preview' => $hidden ? '[hidden]' : substr($command, 0, 100), - ]); - // Add log entry for the retry if (isset($this->application_deployment_queue)) { $this->addRetryLogEntry($attempt, $maxRetries, $delay, $errorMessage); @@ -129,11 +117,6 @@ trait ExecuteRemoteCommand // If we exhausted all retries and still failed if (! $commandExecuted && $lastError) { - Log::error('SSH command failed after all retries', [ - 'server' => $this->server->ip, - 'attempts' => $attempt, - 'error' => $lastError->getMessage(), - ]); throw $lastError; } }); @@ -145,6 +128,10 @@ trait ExecuteRemoteCommand private function executeCommandWithProcess($command, $hidden, $customType, $append, $ignore_errors) { $remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command); + // Randomly fail the command with a key exchange error for testing + // if (random_int(1, 10) === 1) { // 10% chance to fail + // throw new \RuntimeException('SSH key exchange failed: kex_exchange_identification: read: Connection reset by peer'); + // } $process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) { $output = str($output)->trim(); if ($output->startsWith('╔')) { @@ -221,11 +208,10 @@ trait ExecuteRemoteCommand */ private function addRetryLogEntry(int $attempt, int $maxRetries, int $delay, string $errorMessage) { - $retryMessage = "🔄 SSH connection failed. Retrying... (Attempt {$attempt}/{$maxRetries}, waiting {$delay}s)\nError: {$errorMessage}"; + $retryMessage = "SSH connection failed. Retrying... (Attempt {$attempt}/{$maxRetries}, waiting {$delay}s)\nError: {$errorMessage}"; $new_log_entry = [ - 'command' => 'SSH Retry', - 'output' => $retryMessage, + 'output' => remove_iip($retryMessage), 'type' => 'stdout', 'timestamp' => Carbon::now('UTC'), 'hidden' => false, From 579cc2589892d115ae7de1ea6ea0c781b2bc040c Mon Sep 17 00:00:00 2001 From: Andras Bacsai <5845193+andrasbacsai@users.noreply.github.com> Date: Sun, 7 Sep 2025 17:17:35 +0200 Subject: [PATCH 3/5] fix(ssh): introduce SshRetryHandler and SshRetryable trait for enhanced SSH command retry logic with exponential backoff and error handling --- app/Helpers/SshRetryHandler.php | 34 +++++ app/Traits/ExecuteRemoteCommand.php | 46 +------ app/Traits/SshRetryable.php | 133 +++++++++++++++++++ bootstrap/helpers/remoteProcess.php | 99 ++++++++------ tests/Unit/SshRetryMechanismTest.php | 189 +++++++++++++++++++++++++++ 5 files changed, 420 insertions(+), 81 deletions(-) create mode 100644 app/Helpers/SshRetryHandler.php create mode 100644 app/Traits/SshRetryable.php create mode 100644 tests/Unit/SshRetryMechanismTest.php diff --git a/app/Helpers/SshRetryHandler.php b/app/Helpers/SshRetryHandler.php new file mode 100644 index 000000000..aaaf4252a --- /dev/null +++ b/app/Helpers/SshRetryHandler.php @@ -0,0 +1,34 @@ +executeWithSshRetry($callback, $context, $throwError); + } +} diff --git a/app/Traits/ExecuteRemoteCommand.php b/app/Traits/ExecuteRemoteCommand.php index 436d0a0d4..0b770a6e0 100644 --- a/app/Traits/ExecuteRemoteCommand.php +++ b/app/Traits/ExecuteRemoteCommand.php @@ -7,56 +7,16 @@ use App\Helpers\SshMultiplexingHelper; use App\Models\Server; use Carbon\Carbon; use Illuminate\Support\Collection; -use Illuminate\Support\Facades\Log; use Illuminate\Support\Facades\Process; trait ExecuteRemoteCommand { + use SshRetryable; + public ?string $save = null; public static int $batch_counter = 0; - /** - * Check if an error message indicates a retryable SSH connection error - */ - private function isRetryableSshError(string $errorOutput): bool - { - $retryablePatterns = [ - 'kex_exchange_identification', - 'Connection reset by peer', - 'Connection refused', - 'Connection timed out', - 'Connection closed by remote host', - 'ssh_exchange_identification', - 'Bad file descriptor', - 'Broken pipe', - 'No route to host', - 'Network is unreachable', - ]; - - foreach ($retryablePatterns as $pattern) { - if (str_contains($errorOutput, $pattern)) { - return true; - } - } - - return false; - } - - /** - * Calculate delay for exponential backoff - */ - private function calculateRetryDelay(int $attempt): int - { - $baseDelay = config('constants.ssh.retry_base_delay'); - $maxDelay = config('constants.ssh.retry_max_delay'); - $multiplier = config('constants.ssh.retry_multiplier'); - - $delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay); - - return (int) $delay; - } - public function execute_remote_command(...$commands) { static::$batch_counter++; @@ -129,7 +89,7 @@ trait ExecuteRemoteCommand { $remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command); // Randomly fail the command with a key exchange error for testing - // if (random_int(1, 10) === 1) { // 10% chance to fail + // if (random_int(1, 20) === 1) { // 5% chance to fail // throw new \RuntimeException('SSH key exchange failed: kex_exchange_identification: read: Connection reset by peer'); // } $process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) { diff --git a/app/Traits/SshRetryable.php b/app/Traits/SshRetryable.php new file mode 100644 index 000000000..c2756c2ea --- /dev/null +++ b/app/Traits/SshRetryable.php @@ -0,0 +1,133 @@ + 0) { + Log::info('SSH operation succeeded after retry', array_merge($context, [ + 'attempt' => $attempt + 1, + ])); + } + + return $result; + + } catch (\Throwable $e) { + $lastError = $e; + $lastErrorMessage = $e->getMessage(); + + // Check if it's retryable and not the last attempt + if ($this->isRetryableSshError($lastErrorMessage) && $attempt < $maxRetries - 1) { + $delay = $this->calculateRetryDelay($attempt); + + // Add deployment log if available (for ExecuteRemoteCommand trait) + if (isset($this->application_deployment_queue) && method_exists($this, 'addRetryLogEntry')) { + $this->addRetryLogEntry($attempt + 1, $maxRetries, $delay, $lastErrorMessage); + } + + sleep($delay); + + continue; + } + + // Not retryable or max retries reached + break; + } + } + + // All retries exhausted + if ($attempt >= $maxRetries) { + Log::error('SSH operation failed after all retries', array_merge($context, [ + 'attempts' => $attempt, + 'error' => $lastErrorMessage, + ])); + } + + if ($throwError && $lastError) { + throw $lastError; + } + + return null; + } +} diff --git a/bootstrap/helpers/remoteProcess.php b/bootstrap/helpers/remoteProcess.php index 6c1e2beab..6efe4a405 100644 --- a/bootstrap/helpers/remoteProcess.php +++ b/bootstrap/helpers/remoteProcess.php @@ -60,15 +60,28 @@ function remote_process( function instant_scp(string $source, string $dest, Server $server, $throwError = true) { - $scp_command = SshMultiplexingHelper::generateScpCommand($server, $source, $dest); - $process = Process::timeout(config('constants.ssh.command_timeout'))->run($scp_command); - $output = trim($process->output()); - $exitCode = $process->exitCode(); - if ($exitCode !== 0) { - return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null; - } + return \App\Helpers\SshRetryHandler::retry( + function () use ($source, $dest, $server) { + $scp_command = SshMultiplexingHelper::generateScpCommand($server, $source, $dest); + $process = Process::timeout(config('constants.ssh.command_timeout'))->run($scp_command); - return $output === 'null' ? null : $output; + $output = trim($process->output()); + $exitCode = $process->exitCode(); + + if ($exitCode !== 0) { + excludeCertainErrors($process->errorOutput(), $exitCode); + } + + return $output === 'null' ? null : $output; + }, + [ + 'server' => $server->ip, + 'source' => $source, + 'dest' => $dest, + 'function' => 'instant_scp', + ], + $throwError + ); } function instant_remote_process_with_timeout(Collection|array $command, Server $server, bool $throwError = true, bool $no_sudo = false): ?string @@ -79,25 +92,30 @@ function instant_remote_process_with_timeout(Collection|array $command, Server $ } $command_string = implode("\n", $command); - // $start_time = microtime(true); - $sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string); - $process = Process::timeout(30)->run($sshCommand); - // $end_time = microtime(true); + return \App\Helpers\SshRetryHandler::retry( + function () use ($server, $command_string) { + $sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string); + $process = Process::timeout(30)->run($sshCommand); - // $execution_time = ($end_time - $start_time) * 1000; // Convert to milliseconds - // ray('SSH command execution time:', $execution_time.' ms')->orange(); + $output = trim($process->output()); + $exitCode = $process->exitCode(); - $output = trim($process->output()); - $exitCode = $process->exitCode(); + if ($exitCode !== 0) { + excludeCertainErrors($process->errorOutput(), $exitCode); + } - if ($exitCode !== 0) { - return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null; - } + // Sanitize output to ensure valid UTF-8 encoding + $output = $output === 'null' ? null : sanitize_utf8_text($output); - // Sanitize output to ensure valid UTF-8 encoding - $output = $output === 'null' ? null : sanitize_utf8_text($output); - - return $output; + return $output; + }, + [ + 'server' => $server->ip, + 'command_preview' => substr($command_string, 0, 100), + 'function' => 'instant_remote_process_with_timeout', + ], + $throwError + ); } function instant_remote_process(Collection|array $command, Server $server, bool $throwError = true, bool $no_sudo = false): ?string @@ -108,25 +126,30 @@ function instant_remote_process(Collection|array $command, Server $server, bool } $command_string = implode("\n", $command); - // $start_time = microtime(true); - $sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string); - $process = Process::timeout(config('constants.ssh.command_timeout'))->run($sshCommand); - // $end_time = microtime(true); + return \App\Helpers\SshRetryHandler::retry( + function () use ($server, $command_string) { + $sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string); + $process = Process::timeout(config('constants.ssh.command_timeout'))->run($sshCommand); - // $execution_time = ($end_time - $start_time) * 1000; // Convert to milliseconds - // ray('SSH command execution time:', $execution_time.' ms')->orange(); + $output = trim($process->output()); + $exitCode = $process->exitCode(); - $output = trim($process->output()); - $exitCode = $process->exitCode(); + if ($exitCode !== 0) { + excludeCertainErrors($process->errorOutput(), $exitCode); + } - if ($exitCode !== 0) { - return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null; - } + // Sanitize output to ensure valid UTF-8 encoding + $output = $output === 'null' ? null : sanitize_utf8_text($output); - // Sanitize output to ensure valid UTF-8 encoding - $output = $output === 'null' ? null : sanitize_utf8_text($output); - - return $output; + return $output; + }, + [ + 'server' => $server->ip, + 'command_preview' => substr($command_string, 0, 100), + 'function' => 'instant_remote_process', + ], + $throwError + ); } function excludeCertainErrors(string $errorOutput, ?int $exitCode = null) diff --git a/tests/Unit/SshRetryMechanismTest.php b/tests/Unit/SshRetryMechanismTest.php new file mode 100644 index 000000000..23e1b867f --- /dev/null +++ b/tests/Unit/SshRetryMechanismTest.php @@ -0,0 +1,189 @@ +assertTrue(class_exists(\App\Helpers\SshRetryHandler::class)); + } + + public function test_ssh_retryable_trait_exists() + { + $this->assertTrue(trait_exists(\App\Traits\SshRetryable::class)); + } + + public function test_retry_on_ssh_connection_errors() + { + $handler = new class + { + use SshRetryable; + + // Make methods public for testing + public function test_is_retryable_ssh_error($error) + { + return $this->isRetryableSshError($error); + } + }; + + // Test various SSH error patterns + $sshErrors = [ + 'kex_exchange_identification: read: Connection reset by peer', + 'Connection refused', + 'Connection timed out', + 'ssh_exchange_identification: Connection closed by remote host', + 'Broken pipe', + 'No route to host', + 'Network is unreachable', + ]; + + foreach ($sshErrors as $error) { + $this->assertTrue( + $handler->test_is_retryable_ssh_error($error), + "Failed to identify as retryable: $error" + ); + } + } + + public function test_non_ssh_errors_are_not_retryable() + { + $handler = new class + { + use SshRetryable; + + // Make methods public for testing + public function test_is_retryable_ssh_error($error) + { + return $this->isRetryableSshError($error); + } + }; + + // Test non-SSH errors + $nonSshErrors = [ + 'Command not found', + 'Permission denied', + 'File not found', + 'Syntax error', + 'Invalid argument', + ]; + + foreach ($nonSshErrors as $error) { + $this->assertFalse( + $handler->test_is_retryable_ssh_error($error), + "Incorrectly identified as retryable: $error" + ); + } + } + + public function test_exponential_backoff_calculation() + { + $handler = new class + { + use SshRetryable; + + // Make method public for testing + public function test_calculate_retry_delay($attempt) + { + return $this->calculateRetryDelay($attempt); + } + }; + + // Test with default config values + config(['constants.ssh.retry_base_delay' => 2]); + config(['constants.ssh.retry_max_delay' => 30]); + config(['constants.ssh.retry_multiplier' => 2]); + + // Attempt 0: 2 seconds + $this->assertEquals(2, $handler->test_calculate_retry_delay(0)); + + // Attempt 1: 4 seconds + $this->assertEquals(4, $handler->test_calculate_retry_delay(1)); + + // Attempt 2: 8 seconds + $this->assertEquals(8, $handler->test_calculate_retry_delay(2)); + + // Attempt 3: 16 seconds + $this->assertEquals(16, $handler->test_calculate_retry_delay(3)); + + // Attempt 4: Should be capped at 30 seconds + $this->assertEquals(30, $handler->test_calculate_retry_delay(4)); + + // Attempt 5: Should still be capped at 30 seconds + $this->assertEquals(30, $handler->test_calculate_retry_delay(5)); + } + + public function test_retry_succeeds_after_failures() + { + $attemptCount = 0; + + config(['constants.ssh.max_retries' => 3]); + + // Simulate a function that fails twice then succeeds using the public static method + $result = SshRetryHandler::retry( + function () use (&$attemptCount) { + $attemptCount++; + if ($attemptCount < 3) { + throw new \RuntimeException('kex_exchange_identification: Connection reset by peer'); + } + + return 'success'; + }, + ['test' => 'retry_test'], + true + ); + + $this->assertEquals('success', $result); + $this->assertEquals(3, $attemptCount); + } + + public function test_retry_fails_after_max_attempts() + { + $attemptCount = 0; + + config(['constants.ssh.max_retries' => 3]); + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('Connection reset by peer'); + + // Simulate a function that always fails using the public static method + SshRetryHandler::retry( + function () use (&$attemptCount) { + $attemptCount++; + throw new \RuntimeException('Connection reset by peer'); + }, + ['test' => 'retry_test'], + true + ); + } + + public function test_non_retryable_errors_fail_immediately() + { + $attemptCount = 0; + + config(['constants.ssh.max_retries' => 3]); + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('Command not found'); + + try { + // Simulate a non-retryable error using the public static method + SshRetryHandler::retry( + function () use (&$attemptCount) { + $attemptCount++; + throw new \RuntimeException('Command not found'); + }, + ['test' => 'non_retryable_test'], + true + ); + } catch (\RuntimeException $e) { + // Should only attempt once since it's not retryable + $this->assertEquals(1, $attemptCount); + throw $e; + } + } +} From 4bd29bf966bf1dfdeb5e7c0f0fe9f786a9bbcd33 Mon Sep 17 00:00:00 2001 From: Andras Bacsai <5845193+andrasbacsai@users.noreply.github.com> Date: Sun, 7 Sep 2025 18:45:44 +0200 Subject: [PATCH 4/5] refactor(ssh): enhance error handling in SSH command execution and improve connection validation logging --- app/Models/Server.php | 1 + app/Traits/ExecuteRemoteCommand.php | 4 ---- app/Traits/SshRetryable.php | 32 ++++++++++++++--------------- bootstrap/helpers/remoteProcess.php | 11 ++++++++-- 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/app/Models/Server.php b/app/Models/Server.php index 0f92bd390..736a59be4 100644 --- a/app/Models/Server.php +++ b/app/Models/Server.php @@ -1082,6 +1082,7 @@ $schema://$host { public function validateConnection(bool $justCheckingNewKey = false) { + ray('validateConnection', $this->id); $this->disableSshMux(); if ($this->skipServer()) { diff --git a/app/Traits/ExecuteRemoteCommand.php b/app/Traits/ExecuteRemoteCommand.php index 0b770a6e0..398f05bc9 100644 --- a/app/Traits/ExecuteRemoteCommand.php +++ b/app/Traits/ExecuteRemoteCommand.php @@ -88,10 +88,6 @@ trait ExecuteRemoteCommand private function executeCommandWithProcess($command, $hidden, $customType, $append, $ignore_errors) { $remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command); - // Randomly fail the command with a key exchange error for testing - // if (random_int(1, 20) === 1) { // 5% chance to fail - // throw new \RuntimeException('SSH key exchange failed: kex_exchange_identification: read: Connection reset by peer'); - // } $process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) { $output = str($output)->trim(); if ($output->startsWith('╔')) { diff --git a/app/Traits/SshRetryable.php b/app/Traits/SshRetryable.php index c2756c2ea..2092dc5f3 100644 --- a/app/Traits/SshRetryable.php +++ b/app/Traits/SshRetryable.php @@ -57,9 +57,9 @@ trait SshRetryable */ protected function calculateRetryDelay(int $attempt): int { - $baseDelay = config('constants.ssh.retry_base_delay', 2); - $maxDelay = config('constants.ssh.retry_max_delay', 30); - $multiplier = config('constants.ssh.retry_multiplier', 2); + $baseDelay = config('constants.ssh.retry_base_delay'); + $maxDelay = config('constants.ssh.retry_max_delay'); + $multiplier = config('constants.ssh.retry_multiplier'); $delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay); @@ -76,23 +76,17 @@ trait SshRetryable */ protected function executeWithSshRetry(callable $callback, array $context = [], bool $throwError = true) { - $maxRetries = config('constants.ssh.max_retries', 3); + $maxRetries = config('constants.ssh.max_retries'); $lastError = null; $lastErrorMessage = ''; + // Randomly fail the command with a key exchange error for testing + // if (random_int(1, 10) === 1) { // 10% chance to fail + // ray('SSH key exchange failed: kex_exchange_identification: read: Connection reset by peer'); + // throw new \RuntimeException('SSH key exchange failed: kex_exchange_identification: read: Connection reset by peer'); + // } for ($attempt = 0; $attempt < $maxRetries; $attempt++) { try { - // Execute the callback - $result = $callback(); - - // If we get here, it succeeded - if ($attempt > 0) { - Log::info('SSH operation succeeded after retry', array_merge($context, [ - 'attempt' => $attempt + 1, - ])); - } - - return $result; - + return $callback(); } catch (\Throwable $e) { $lastError = $e; $lastErrorMessage = $e->getMessage(); @@ -125,6 +119,12 @@ trait SshRetryable } if ($throwError && $lastError) { + // If the error message is empty, provide a more meaningful one + if (empty($lastErrorMessage) || trim($lastErrorMessage) === '') { + $contextInfo = isset($context['server']) ? " to server {$context['server']}" : ''; + $attemptInfo = $attempt > 1 ? " after {$attempt} attempts" : ''; + throw new \RuntimeException("SSH connection failed{$contextInfo}{$attemptInfo}", $lastError->getCode()); + } throw $lastError; } diff --git a/bootstrap/helpers/remoteProcess.php b/bootstrap/helpers/remoteProcess.php index 6efe4a405..b5bdeff49 100644 --- a/bootstrap/helpers/remoteProcess.php +++ b/bootstrap/helpers/remoteProcess.php @@ -159,11 +159,18 @@ function excludeCertainErrors(string $errorOutput, ?int $exitCode = null) 'Could not resolve hostname', ]); $ignored = $ignoredErrors->contains(fn ($error) => Str::contains($errorOutput, $error)); + + // Ensure we always have a meaningful error message + $errorMessage = trim($errorOutput); + if (empty($errorMessage)) { + $errorMessage = "SSH command failed with exit code: $exitCode"; + } + if ($ignored) { // TODO: Create new exception and disable in sentry - throw new \RuntimeException($errorOutput, $exitCode); + throw new \RuntimeException($errorMessage, $exitCode); } - throw new \RuntimeException($errorOutput, $exitCode); + throw new \RuntimeException($errorMessage, $exitCode); } function decode_remote_command_output(?ApplicationDeploymentQueue $application_deployment_queue = null): Collection From 45c75ad9c16e860af5065010bfa0bea7106d8e88 Mon Sep 17 00:00:00 2001 From: Andras Bacsai <5845193+andrasbacsai@users.noreply.github.com> Date: Sun, 7 Sep 2025 18:57:20 +0200 Subject: [PATCH 5/5] feat(ssh): add Sentry tracking for SSH retry events to enhance error monitoring --- app/Traits/ExecuteRemoteCommand.php | 8 ++++++ app/Traits/SshRetryable.php | 41 +++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/app/Traits/ExecuteRemoteCommand.php b/app/Traits/ExecuteRemoteCommand.php index 398f05bc9..0e7961368 100644 --- a/app/Traits/ExecuteRemoteCommand.php +++ b/app/Traits/ExecuteRemoteCommand.php @@ -62,6 +62,14 @@ trait ExecuteRemoteCommand if ($this->isRetryableSshError($errorMessage) && $attempt < $maxRetries - 1) { $attempt++; $delay = $this->calculateRetryDelay($attempt - 1); + + // Track SSH retry event in Sentry + $this->trackSshRetryEvent($attempt, $maxRetries, $delay, $errorMessage, [ + 'server' => $this->server->name ?? $this->server->ip ?? 'unknown', + 'command' => remove_iip($command), + 'trait' => 'ExecuteRemoteCommand', + ]); + // Add log entry for the retry if (isset($this->application_deployment_queue)) { $this->addRetryLogEntry($attempt, $maxRetries, $delay, $errorMessage); diff --git a/app/Traits/SshRetryable.php b/app/Traits/SshRetryable.php index 2092dc5f3..a26481056 100644 --- a/app/Traits/SshRetryable.php +++ b/app/Traits/SshRetryable.php @@ -95,6 +95,9 @@ trait SshRetryable if ($this->isRetryableSshError($lastErrorMessage) && $attempt < $maxRetries - 1) { $delay = $this->calculateRetryDelay($attempt); + // Track SSH retry event in Sentry + $this->trackSshRetryEvent($attempt + 1, $maxRetries, $delay, $lastErrorMessage, $context); + // Add deployment log if available (for ExecuteRemoteCommand trait) if (isset($this->application_deployment_queue) && method_exists($this, 'addRetryLogEntry')) { $this->addRetryLogEntry($attempt + 1, $maxRetries, $delay, $lastErrorMessage); @@ -130,4 +133,42 @@ trait SshRetryable return null; } + + /** + * Track SSH retry event in Sentry + */ + protected function trackSshRetryEvent(int $attempt, int $maxRetries, int $delay, string $errorMessage, array $context = []): void + { + // Only track in production/cloud instances + if (isDev() || ! config('constants.sentry.sentry_dsn')) { + return; + } + + try { + app('sentry')->captureMessage( + 'SSH connection retry triggered', + \Sentry\Severity::warning(), + [ + 'extra' => [ + 'attempt' => $attempt, + 'max_retries' => $maxRetries, + 'delay_seconds' => $delay, + 'error_message' => $errorMessage, + 'context' => $context, + 'retryable_error' => true, + ], + 'tags' => [ + 'component' => 'ssh_retry', + 'error_type' => 'connection_retry', + ], + ] + ); + } catch (\Throwable $e) { + // Don't let Sentry tracking errors break the SSH retry flow + Log::warning('Failed to track SSH retry event in Sentry', [ + 'error' => $e->getMessage(), + 'original_attempt' => $attempt, + ]); + } + } }