From 579cc2589892d115ae7de1ea6ea0c781b2bc040c Mon Sep 17 00:00:00 2001 From: Andras Bacsai <5845193+andrasbacsai@users.noreply.github.com> Date: Sun, 7 Sep 2025 17:17:35 +0200 Subject: [PATCH] fix(ssh): introduce SshRetryHandler and SshRetryable trait for enhanced SSH command retry logic with exponential backoff and error handling --- app/Helpers/SshRetryHandler.php | 34 +++++ app/Traits/ExecuteRemoteCommand.php | 46 +------ app/Traits/SshRetryable.php | 133 +++++++++++++++++++ bootstrap/helpers/remoteProcess.php | 99 ++++++++------ tests/Unit/SshRetryMechanismTest.php | 189 +++++++++++++++++++++++++++ 5 files changed, 420 insertions(+), 81 deletions(-) create mode 100644 app/Helpers/SshRetryHandler.php create mode 100644 app/Traits/SshRetryable.php create mode 100644 tests/Unit/SshRetryMechanismTest.php diff --git a/app/Helpers/SshRetryHandler.php b/app/Helpers/SshRetryHandler.php new file mode 100644 index 000000000..aaaf4252a --- /dev/null +++ b/app/Helpers/SshRetryHandler.php @@ -0,0 +1,34 @@ +executeWithSshRetry($callback, $context, $throwError); + } +} diff --git a/app/Traits/ExecuteRemoteCommand.php b/app/Traits/ExecuteRemoteCommand.php index 436d0a0d4..0b770a6e0 100644 --- a/app/Traits/ExecuteRemoteCommand.php +++ b/app/Traits/ExecuteRemoteCommand.php @@ -7,56 +7,16 @@ use App\Helpers\SshMultiplexingHelper; use App\Models\Server; use Carbon\Carbon; use Illuminate\Support\Collection; -use Illuminate\Support\Facades\Log; use Illuminate\Support\Facades\Process; trait ExecuteRemoteCommand { + use SshRetryable; + public ?string $save = null; public static int $batch_counter = 0; - /** - * Check if an error message indicates a retryable SSH connection error - */ - private function isRetryableSshError(string $errorOutput): bool - { - $retryablePatterns = [ - 'kex_exchange_identification', - 'Connection reset by peer', - 'Connection refused', - 'Connection timed out', - 'Connection closed by remote host', - 'ssh_exchange_identification', - 'Bad file descriptor', - 'Broken pipe', - 'No route to host', - 'Network is unreachable', - ]; - - foreach ($retryablePatterns as $pattern) { - if (str_contains($errorOutput, $pattern)) { - return true; - } - } - - return false; - } - - /** - * Calculate delay for exponential backoff - */ - private function calculateRetryDelay(int $attempt): int - { - $baseDelay = config('constants.ssh.retry_base_delay'); - $maxDelay = config('constants.ssh.retry_max_delay'); - $multiplier = config('constants.ssh.retry_multiplier'); - - $delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay); - - return (int) $delay; - } - public function execute_remote_command(...$commands) { static::$batch_counter++; @@ -129,7 +89,7 @@ trait ExecuteRemoteCommand { $remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command); // Randomly fail the command with a key exchange error for testing - // if (random_int(1, 10) === 1) { // 10% chance to fail + // if (random_int(1, 20) === 1) { // 5% chance to fail // throw new \RuntimeException('SSH key exchange failed: kex_exchange_identification: read: Connection reset by peer'); // } $process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) { diff --git a/app/Traits/SshRetryable.php b/app/Traits/SshRetryable.php new file mode 100644 index 000000000..c2756c2ea --- /dev/null +++ b/app/Traits/SshRetryable.php @@ -0,0 +1,133 @@ + 0) { + Log::info('SSH operation succeeded after retry', array_merge($context, [ + 'attempt' => $attempt + 1, + ])); + } + + return $result; + + } catch (\Throwable $e) { + $lastError = $e; + $lastErrorMessage = $e->getMessage(); + + // Check if it's retryable and not the last attempt + if ($this->isRetryableSshError($lastErrorMessage) && $attempt < $maxRetries - 1) { + $delay = $this->calculateRetryDelay($attempt); + + // Add deployment log if available (for ExecuteRemoteCommand trait) + if (isset($this->application_deployment_queue) && method_exists($this, 'addRetryLogEntry')) { + $this->addRetryLogEntry($attempt + 1, $maxRetries, $delay, $lastErrorMessage); + } + + sleep($delay); + + continue; + } + + // Not retryable or max retries reached + break; + } + } + + // All retries exhausted + if ($attempt >= $maxRetries) { + Log::error('SSH operation failed after all retries', array_merge($context, [ + 'attempts' => $attempt, + 'error' => $lastErrorMessage, + ])); + } + + if ($throwError && $lastError) { + throw $lastError; + } + + return null; + } +} diff --git a/bootstrap/helpers/remoteProcess.php b/bootstrap/helpers/remoteProcess.php index 6c1e2beab..6efe4a405 100644 --- a/bootstrap/helpers/remoteProcess.php +++ b/bootstrap/helpers/remoteProcess.php @@ -60,15 +60,28 @@ function remote_process( function instant_scp(string $source, string $dest, Server $server, $throwError = true) { - $scp_command = SshMultiplexingHelper::generateScpCommand($server, $source, $dest); - $process = Process::timeout(config('constants.ssh.command_timeout'))->run($scp_command); - $output = trim($process->output()); - $exitCode = $process->exitCode(); - if ($exitCode !== 0) { - return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null; - } + return \App\Helpers\SshRetryHandler::retry( + function () use ($source, $dest, $server) { + $scp_command = SshMultiplexingHelper::generateScpCommand($server, $source, $dest); + $process = Process::timeout(config('constants.ssh.command_timeout'))->run($scp_command); - return $output === 'null' ? null : $output; + $output = trim($process->output()); + $exitCode = $process->exitCode(); + + if ($exitCode !== 0) { + excludeCertainErrors($process->errorOutput(), $exitCode); + } + + return $output === 'null' ? null : $output; + }, + [ + 'server' => $server->ip, + 'source' => $source, + 'dest' => $dest, + 'function' => 'instant_scp', + ], + $throwError + ); } function instant_remote_process_with_timeout(Collection|array $command, Server $server, bool $throwError = true, bool $no_sudo = false): ?string @@ -79,25 +92,30 @@ function instant_remote_process_with_timeout(Collection|array $command, Server $ } $command_string = implode("\n", $command); - // $start_time = microtime(true); - $sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string); - $process = Process::timeout(30)->run($sshCommand); - // $end_time = microtime(true); + return \App\Helpers\SshRetryHandler::retry( + function () use ($server, $command_string) { + $sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string); + $process = Process::timeout(30)->run($sshCommand); - // $execution_time = ($end_time - $start_time) * 1000; // Convert to milliseconds - // ray('SSH command execution time:', $execution_time.' ms')->orange(); + $output = trim($process->output()); + $exitCode = $process->exitCode(); - $output = trim($process->output()); - $exitCode = $process->exitCode(); + if ($exitCode !== 0) { + excludeCertainErrors($process->errorOutput(), $exitCode); + } - if ($exitCode !== 0) { - return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null; - } + // Sanitize output to ensure valid UTF-8 encoding + $output = $output === 'null' ? null : sanitize_utf8_text($output); - // Sanitize output to ensure valid UTF-8 encoding - $output = $output === 'null' ? null : sanitize_utf8_text($output); - - return $output; + return $output; + }, + [ + 'server' => $server->ip, + 'command_preview' => substr($command_string, 0, 100), + 'function' => 'instant_remote_process_with_timeout', + ], + $throwError + ); } function instant_remote_process(Collection|array $command, Server $server, bool $throwError = true, bool $no_sudo = false): ?string @@ -108,25 +126,30 @@ function instant_remote_process(Collection|array $command, Server $server, bool } $command_string = implode("\n", $command); - // $start_time = microtime(true); - $sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string); - $process = Process::timeout(config('constants.ssh.command_timeout'))->run($sshCommand); - // $end_time = microtime(true); + return \App\Helpers\SshRetryHandler::retry( + function () use ($server, $command_string) { + $sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string); + $process = Process::timeout(config('constants.ssh.command_timeout'))->run($sshCommand); - // $execution_time = ($end_time - $start_time) * 1000; // Convert to milliseconds - // ray('SSH command execution time:', $execution_time.' ms')->orange(); + $output = trim($process->output()); + $exitCode = $process->exitCode(); - $output = trim($process->output()); - $exitCode = $process->exitCode(); + if ($exitCode !== 0) { + excludeCertainErrors($process->errorOutput(), $exitCode); + } - if ($exitCode !== 0) { - return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null; - } + // Sanitize output to ensure valid UTF-8 encoding + $output = $output === 'null' ? null : sanitize_utf8_text($output); - // Sanitize output to ensure valid UTF-8 encoding - $output = $output === 'null' ? null : sanitize_utf8_text($output); - - return $output; + return $output; + }, + [ + 'server' => $server->ip, + 'command_preview' => substr($command_string, 0, 100), + 'function' => 'instant_remote_process', + ], + $throwError + ); } function excludeCertainErrors(string $errorOutput, ?int $exitCode = null) diff --git a/tests/Unit/SshRetryMechanismTest.php b/tests/Unit/SshRetryMechanismTest.php new file mode 100644 index 000000000..23e1b867f --- /dev/null +++ b/tests/Unit/SshRetryMechanismTest.php @@ -0,0 +1,189 @@ +assertTrue(class_exists(\App\Helpers\SshRetryHandler::class)); + } + + public function test_ssh_retryable_trait_exists() + { + $this->assertTrue(trait_exists(\App\Traits\SshRetryable::class)); + } + + public function test_retry_on_ssh_connection_errors() + { + $handler = new class + { + use SshRetryable; + + // Make methods public for testing + public function test_is_retryable_ssh_error($error) + { + return $this->isRetryableSshError($error); + } + }; + + // Test various SSH error patterns + $sshErrors = [ + 'kex_exchange_identification: read: Connection reset by peer', + 'Connection refused', + 'Connection timed out', + 'ssh_exchange_identification: Connection closed by remote host', + 'Broken pipe', + 'No route to host', + 'Network is unreachable', + ]; + + foreach ($sshErrors as $error) { + $this->assertTrue( + $handler->test_is_retryable_ssh_error($error), + "Failed to identify as retryable: $error" + ); + } + } + + public function test_non_ssh_errors_are_not_retryable() + { + $handler = new class + { + use SshRetryable; + + // Make methods public for testing + public function test_is_retryable_ssh_error($error) + { + return $this->isRetryableSshError($error); + } + }; + + // Test non-SSH errors + $nonSshErrors = [ + 'Command not found', + 'Permission denied', + 'File not found', + 'Syntax error', + 'Invalid argument', + ]; + + foreach ($nonSshErrors as $error) { + $this->assertFalse( + $handler->test_is_retryable_ssh_error($error), + "Incorrectly identified as retryable: $error" + ); + } + } + + public function test_exponential_backoff_calculation() + { + $handler = new class + { + use SshRetryable; + + // Make method public for testing + public function test_calculate_retry_delay($attempt) + { + return $this->calculateRetryDelay($attempt); + } + }; + + // Test with default config values + config(['constants.ssh.retry_base_delay' => 2]); + config(['constants.ssh.retry_max_delay' => 30]); + config(['constants.ssh.retry_multiplier' => 2]); + + // Attempt 0: 2 seconds + $this->assertEquals(2, $handler->test_calculate_retry_delay(0)); + + // Attempt 1: 4 seconds + $this->assertEquals(4, $handler->test_calculate_retry_delay(1)); + + // Attempt 2: 8 seconds + $this->assertEquals(8, $handler->test_calculate_retry_delay(2)); + + // Attempt 3: 16 seconds + $this->assertEquals(16, $handler->test_calculate_retry_delay(3)); + + // Attempt 4: Should be capped at 30 seconds + $this->assertEquals(30, $handler->test_calculate_retry_delay(4)); + + // Attempt 5: Should still be capped at 30 seconds + $this->assertEquals(30, $handler->test_calculate_retry_delay(5)); + } + + public function test_retry_succeeds_after_failures() + { + $attemptCount = 0; + + config(['constants.ssh.max_retries' => 3]); + + // Simulate a function that fails twice then succeeds using the public static method + $result = SshRetryHandler::retry( + function () use (&$attemptCount) { + $attemptCount++; + if ($attemptCount < 3) { + throw new \RuntimeException('kex_exchange_identification: Connection reset by peer'); + } + + return 'success'; + }, + ['test' => 'retry_test'], + true + ); + + $this->assertEquals('success', $result); + $this->assertEquals(3, $attemptCount); + } + + public function test_retry_fails_after_max_attempts() + { + $attemptCount = 0; + + config(['constants.ssh.max_retries' => 3]); + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('Connection reset by peer'); + + // Simulate a function that always fails using the public static method + SshRetryHandler::retry( + function () use (&$attemptCount) { + $attemptCount++; + throw new \RuntimeException('Connection reset by peer'); + }, + ['test' => 'retry_test'], + true + ); + } + + public function test_non_retryable_errors_fail_immediately() + { + $attemptCount = 0; + + config(['constants.ssh.max_retries' => 3]); + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('Command not found'); + + try { + // Simulate a non-retryable error using the public static method + SshRetryHandler::retry( + function () use (&$attemptCount) { + $attemptCount++; + throw new \RuntimeException('Command not found'); + }, + ['test' => 'non_retryable_test'], + true + ); + } catch (\RuntimeException $e) { + // Should only attempt once since it's not retryable + $this->assertEquals(1, $attemptCount); + throw $e; + } + } +}