fix(ssh): introduce SshRetryHandler and SshRetryable trait for enhanced SSH command retry logic with exponential backoff and error handling

This commit is contained in:
Andras Bacsai
2025-09-07 17:17:35 +02:00
parent b847740924
commit 579cc25898
5 changed files with 420 additions and 81 deletions

View File

@@ -0,0 +1,34 @@
<?php
namespace App\Helpers;
use App\Traits\SshRetryable;
/**
* Helper class to use SshRetryable trait in non-class contexts
*/
class SshRetryHandler
{
use SshRetryable;
/**
* Static method to get a singleton instance
*/
public static function instance(): self
{
static $instance = null;
if ($instance === null) {
$instance = new self;
}
return $instance;
}
/**
* Convenience static method for retry execution
*/
public static function retry(callable $callback, array $context = [], bool $throwError = true)
{
return self::instance()->executeWithSshRetry($callback, $context, $throwError);
}
}

View File

@@ -7,56 +7,16 @@ use App\Helpers\SshMultiplexingHelper;
use App\Models\Server;
use Carbon\Carbon;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Process;
trait ExecuteRemoteCommand
{
use SshRetryable;
public ?string $save = null;
public static int $batch_counter = 0;
/**
* Check if an error message indicates a retryable SSH connection error
*/
private function isRetryableSshError(string $errorOutput): bool
{
$retryablePatterns = [
'kex_exchange_identification',
'Connection reset by peer',
'Connection refused',
'Connection timed out',
'Connection closed by remote host',
'ssh_exchange_identification',
'Bad file descriptor',
'Broken pipe',
'No route to host',
'Network is unreachable',
];
foreach ($retryablePatterns as $pattern) {
if (str_contains($errorOutput, $pattern)) {
return true;
}
}
return false;
}
/**
* Calculate delay for exponential backoff
*/
private function calculateRetryDelay(int $attempt): int
{
$baseDelay = config('constants.ssh.retry_base_delay');
$maxDelay = config('constants.ssh.retry_max_delay');
$multiplier = config('constants.ssh.retry_multiplier');
$delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay);
return (int) $delay;
}
public function execute_remote_command(...$commands)
{
static::$batch_counter++;
@@ -129,7 +89,7 @@ trait ExecuteRemoteCommand
{
$remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command);
// Randomly fail the command with a key exchange error for testing
// if (random_int(1, 10) === 1) { // 10% chance to fail
// if (random_int(1, 20) === 1) { // 5% chance to fail
// throw new \RuntimeException('SSH key exchange failed: kex_exchange_identification: read: Connection reset by peer');
// }
$process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) {

133
app/Traits/SshRetryable.php Normal file
View File

@@ -0,0 +1,133 @@
<?php
namespace App\Traits;
use Illuminate\Support\Facades\Log;
trait SshRetryable
{
/**
* Check if an error message indicates a retryable SSH connection error
*/
protected function isRetryableSshError(string $errorOutput): bool
{
$retryablePatterns = [
'kex_exchange_identification',
'Connection reset by peer',
'Connection refused',
'Connection timed out',
'Connection closed by remote host',
'ssh_exchange_identification',
'Bad file descriptor',
'Broken pipe',
'No route to host',
'Network is unreachable',
'Host is down',
'No buffer space available',
'Connection reset by',
'Permission denied, please try again',
'Received disconnect from',
'Disconnected from',
'Connection to .* closed',
'ssh: connect to host .* port .*: Connection',
'Lost connection',
'Timeout, server not responding',
'Cannot assign requested address',
'Network is down',
'Host key verification failed',
'Operation timed out',
'Connection closed unexpectedly',
'Remote host closed connection',
'Authentication failed',
'Too many authentication failures',
];
$lowerErrorOutput = strtolower($errorOutput);
foreach ($retryablePatterns as $pattern) {
if (str_contains($lowerErrorOutput, strtolower($pattern))) {
return true;
}
}
return false;
}
/**
* Calculate delay for exponential backoff
*/
protected function calculateRetryDelay(int $attempt): int
{
$baseDelay = config('constants.ssh.retry_base_delay', 2);
$maxDelay = config('constants.ssh.retry_max_delay', 30);
$multiplier = config('constants.ssh.retry_multiplier', 2);
$delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay);
return (int) $delay;
}
/**
* Execute a callback with SSH retry logic
*
* @param callable $callback The operation to execute
* @param array $context Context for logging (server, command, etc.)
* @param bool $throwError Whether to throw error on final failure
* @return mixed The result from the callback
*/
protected function executeWithSshRetry(callable $callback, array $context = [], bool $throwError = true)
{
$maxRetries = config('constants.ssh.max_retries', 3);
$lastError = null;
$lastErrorMessage = '';
for ($attempt = 0; $attempt < $maxRetries; $attempt++) {
try {
// Execute the callback
$result = $callback();
// If we get here, it succeeded
if ($attempt > 0) {
Log::info('SSH operation succeeded after retry', array_merge($context, [
'attempt' => $attempt + 1,
]));
}
return $result;
} catch (\Throwable $e) {
$lastError = $e;
$lastErrorMessage = $e->getMessage();
// Check if it's retryable and not the last attempt
if ($this->isRetryableSshError($lastErrorMessage) && $attempt < $maxRetries - 1) {
$delay = $this->calculateRetryDelay($attempt);
// Add deployment log if available (for ExecuteRemoteCommand trait)
if (isset($this->application_deployment_queue) && method_exists($this, 'addRetryLogEntry')) {
$this->addRetryLogEntry($attempt + 1, $maxRetries, $delay, $lastErrorMessage);
}
sleep($delay);
continue;
}
// Not retryable or max retries reached
break;
}
}
// All retries exhausted
if ($attempt >= $maxRetries) {
Log::error('SSH operation failed after all retries', array_merge($context, [
'attempts' => $attempt,
'error' => $lastErrorMessage,
]));
}
if ($throwError && $lastError) {
throw $lastError;
}
return null;
}
}

View File

@@ -60,15 +60,28 @@ function remote_process(
function instant_scp(string $source, string $dest, Server $server, $throwError = true)
{
return \App\Helpers\SshRetryHandler::retry(
function () use ($source, $dest, $server) {
$scp_command = SshMultiplexingHelper::generateScpCommand($server, $source, $dest);
$process = Process::timeout(config('constants.ssh.command_timeout'))->run($scp_command);
$output = trim($process->output());
$exitCode = $process->exitCode();
if ($exitCode !== 0) {
return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null;
excludeCertainErrors($process->errorOutput(), $exitCode);
}
return $output === 'null' ? null : $output;
},
[
'server' => $server->ip,
'source' => $source,
'dest' => $dest,
'function' => 'instant_scp',
],
$throwError
);
}
function instant_remote_process_with_timeout(Collection|array $command, Server $server, bool $throwError = true, bool $no_sudo = false): ?string
@@ -79,25 +92,30 @@ function instant_remote_process_with_timeout(Collection|array $command, Server $
}
$command_string = implode("\n", $command);
// $start_time = microtime(true);
return \App\Helpers\SshRetryHandler::retry(
function () use ($server, $command_string) {
$sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string);
$process = Process::timeout(30)->run($sshCommand);
// $end_time = microtime(true);
// $execution_time = ($end_time - $start_time) * 1000; // Convert to milliseconds
// ray('SSH command execution time:', $execution_time.' ms')->orange();
$output = trim($process->output());
$exitCode = $process->exitCode();
if ($exitCode !== 0) {
return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null;
excludeCertainErrors($process->errorOutput(), $exitCode);
}
// Sanitize output to ensure valid UTF-8 encoding
$output = $output === 'null' ? null : sanitize_utf8_text($output);
return $output;
},
[
'server' => $server->ip,
'command_preview' => substr($command_string, 0, 100),
'function' => 'instant_remote_process_with_timeout',
],
$throwError
);
}
function instant_remote_process(Collection|array $command, Server $server, bool $throwError = true, bool $no_sudo = false): ?string
@@ -108,25 +126,30 @@ function instant_remote_process(Collection|array $command, Server $server, bool
}
$command_string = implode("\n", $command);
// $start_time = microtime(true);
return \App\Helpers\SshRetryHandler::retry(
function () use ($server, $command_string) {
$sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string);
$process = Process::timeout(config('constants.ssh.command_timeout'))->run($sshCommand);
// $end_time = microtime(true);
// $execution_time = ($end_time - $start_time) * 1000; // Convert to milliseconds
// ray('SSH command execution time:', $execution_time.' ms')->orange();
$output = trim($process->output());
$exitCode = $process->exitCode();
if ($exitCode !== 0) {
return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null;
excludeCertainErrors($process->errorOutput(), $exitCode);
}
// Sanitize output to ensure valid UTF-8 encoding
$output = $output === 'null' ? null : sanitize_utf8_text($output);
return $output;
},
[
'server' => $server->ip,
'command_preview' => substr($command_string, 0, 100),
'function' => 'instant_remote_process',
],
$throwError
);
}
function excludeCertainErrors(string $errorOutput, ?int $exitCode = null)

View File

@@ -0,0 +1,189 @@
<?php
namespace Tests\Unit;
use App\Helpers\SshRetryHandler;
use App\Traits\SshRetryable;
use Tests\TestCase;
class SshRetryMechanismTest extends TestCase
{
public function test_ssh_retry_handler_exists()
{
$this->assertTrue(class_exists(\App\Helpers\SshRetryHandler::class));
}
public function test_ssh_retryable_trait_exists()
{
$this->assertTrue(trait_exists(\App\Traits\SshRetryable::class));
}
public function test_retry_on_ssh_connection_errors()
{
$handler = new class
{
use SshRetryable;
// Make methods public for testing
public function test_is_retryable_ssh_error($error)
{
return $this->isRetryableSshError($error);
}
};
// Test various SSH error patterns
$sshErrors = [
'kex_exchange_identification: read: Connection reset by peer',
'Connection refused',
'Connection timed out',
'ssh_exchange_identification: Connection closed by remote host',
'Broken pipe',
'No route to host',
'Network is unreachable',
];
foreach ($sshErrors as $error) {
$this->assertTrue(
$handler->test_is_retryable_ssh_error($error),
"Failed to identify as retryable: $error"
);
}
}
public function test_non_ssh_errors_are_not_retryable()
{
$handler = new class
{
use SshRetryable;
// Make methods public for testing
public function test_is_retryable_ssh_error($error)
{
return $this->isRetryableSshError($error);
}
};
// Test non-SSH errors
$nonSshErrors = [
'Command not found',
'Permission denied',
'File not found',
'Syntax error',
'Invalid argument',
];
foreach ($nonSshErrors as $error) {
$this->assertFalse(
$handler->test_is_retryable_ssh_error($error),
"Incorrectly identified as retryable: $error"
);
}
}
public function test_exponential_backoff_calculation()
{
$handler = new class
{
use SshRetryable;
// Make method public for testing
public function test_calculate_retry_delay($attempt)
{
return $this->calculateRetryDelay($attempt);
}
};
// Test with default config values
config(['constants.ssh.retry_base_delay' => 2]);
config(['constants.ssh.retry_max_delay' => 30]);
config(['constants.ssh.retry_multiplier' => 2]);
// Attempt 0: 2 seconds
$this->assertEquals(2, $handler->test_calculate_retry_delay(0));
// Attempt 1: 4 seconds
$this->assertEquals(4, $handler->test_calculate_retry_delay(1));
// Attempt 2: 8 seconds
$this->assertEquals(8, $handler->test_calculate_retry_delay(2));
// Attempt 3: 16 seconds
$this->assertEquals(16, $handler->test_calculate_retry_delay(3));
// Attempt 4: Should be capped at 30 seconds
$this->assertEquals(30, $handler->test_calculate_retry_delay(4));
// Attempt 5: Should still be capped at 30 seconds
$this->assertEquals(30, $handler->test_calculate_retry_delay(5));
}
public function test_retry_succeeds_after_failures()
{
$attemptCount = 0;
config(['constants.ssh.max_retries' => 3]);
// Simulate a function that fails twice then succeeds using the public static method
$result = SshRetryHandler::retry(
function () use (&$attemptCount) {
$attemptCount++;
if ($attemptCount < 3) {
throw new \RuntimeException('kex_exchange_identification: Connection reset by peer');
}
return 'success';
},
['test' => 'retry_test'],
true
);
$this->assertEquals('success', $result);
$this->assertEquals(3, $attemptCount);
}
public function test_retry_fails_after_max_attempts()
{
$attemptCount = 0;
config(['constants.ssh.max_retries' => 3]);
$this->expectException(\RuntimeException::class);
$this->expectExceptionMessage('Connection reset by peer');
// Simulate a function that always fails using the public static method
SshRetryHandler::retry(
function () use (&$attemptCount) {
$attemptCount++;
throw new \RuntimeException('Connection reset by peer');
},
['test' => 'retry_test'],
true
);
}
public function test_non_retryable_errors_fail_immediately()
{
$attemptCount = 0;
config(['constants.ssh.max_retries' => 3]);
$this->expectException(\RuntimeException::class);
$this->expectExceptionMessage('Command not found');
try {
// Simulate a non-retryable error using the public static method
SshRetryHandler::retry(
function () use (&$attemptCount) {
$attemptCount++;
throw new \RuntimeException('Command not found');
},
['test' => 'non_retryable_test'],
true
);
} catch (\RuntimeException $e) {
// Should only attempt once since it's not retryable
$this->assertEquals(1, $attemptCount);
throw $e;
}
}
}