fix(ssh): introduce SshRetryHandler and SshRetryable trait for enhanced SSH command retry logic with exponential backoff and error handling
This commit is contained in:
34
app/Helpers/SshRetryHandler.php
Normal file
34
app/Helpers/SshRetryHandler.php
Normal file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
|
||||
namespace App\Helpers;
|
||||
|
||||
use App\Traits\SshRetryable;
|
||||
|
||||
/**
|
||||
* Helper class to use SshRetryable trait in non-class contexts
|
||||
*/
|
||||
class SshRetryHandler
|
||||
{
|
||||
use SshRetryable;
|
||||
|
||||
/**
|
||||
* Static method to get a singleton instance
|
||||
*/
|
||||
public static function instance(): self
|
||||
{
|
||||
static $instance = null;
|
||||
if ($instance === null) {
|
||||
$instance = new self;
|
||||
}
|
||||
|
||||
return $instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience static method for retry execution
|
||||
*/
|
||||
public static function retry(callable $callback, array $context = [], bool $throwError = true)
|
||||
{
|
||||
return self::instance()->executeWithSshRetry($callback, $context, $throwError);
|
||||
}
|
||||
}
|
@@ -7,56 +7,16 @@ use App\Helpers\SshMultiplexingHelper;
|
||||
use App\Models\Server;
|
||||
use Carbon\Carbon;
|
||||
use Illuminate\Support\Collection;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Illuminate\Support\Facades\Process;
|
||||
|
||||
trait ExecuteRemoteCommand
|
||||
{
|
||||
use SshRetryable;
|
||||
|
||||
public ?string $save = null;
|
||||
|
||||
public static int $batch_counter = 0;
|
||||
|
||||
/**
|
||||
* Check if an error message indicates a retryable SSH connection error
|
||||
*/
|
||||
private function isRetryableSshError(string $errorOutput): bool
|
||||
{
|
||||
$retryablePatterns = [
|
||||
'kex_exchange_identification',
|
||||
'Connection reset by peer',
|
||||
'Connection refused',
|
||||
'Connection timed out',
|
||||
'Connection closed by remote host',
|
||||
'ssh_exchange_identification',
|
||||
'Bad file descriptor',
|
||||
'Broken pipe',
|
||||
'No route to host',
|
||||
'Network is unreachable',
|
||||
];
|
||||
|
||||
foreach ($retryablePatterns as $pattern) {
|
||||
if (str_contains($errorOutput, $pattern)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate delay for exponential backoff
|
||||
*/
|
||||
private function calculateRetryDelay(int $attempt): int
|
||||
{
|
||||
$baseDelay = config('constants.ssh.retry_base_delay');
|
||||
$maxDelay = config('constants.ssh.retry_max_delay');
|
||||
$multiplier = config('constants.ssh.retry_multiplier');
|
||||
|
||||
$delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay);
|
||||
|
||||
return (int) $delay;
|
||||
}
|
||||
|
||||
public function execute_remote_command(...$commands)
|
||||
{
|
||||
static::$batch_counter++;
|
||||
@@ -129,7 +89,7 @@ trait ExecuteRemoteCommand
|
||||
{
|
||||
$remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command);
|
||||
// Randomly fail the command with a key exchange error for testing
|
||||
// if (random_int(1, 10) === 1) { // 10% chance to fail
|
||||
// if (random_int(1, 20) === 1) { // 5% chance to fail
|
||||
// throw new \RuntimeException('SSH key exchange failed: kex_exchange_identification: read: Connection reset by peer');
|
||||
// }
|
||||
$process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) {
|
||||
|
133
app/Traits/SshRetryable.php
Normal file
133
app/Traits/SshRetryable.php
Normal file
@@ -0,0 +1,133 @@
|
||||
<?php
|
||||
|
||||
namespace App\Traits;
|
||||
|
||||
use Illuminate\Support\Facades\Log;
|
||||
|
||||
trait SshRetryable
|
||||
{
|
||||
/**
|
||||
* Check if an error message indicates a retryable SSH connection error
|
||||
*/
|
||||
protected function isRetryableSshError(string $errorOutput): bool
|
||||
{
|
||||
$retryablePatterns = [
|
||||
'kex_exchange_identification',
|
||||
'Connection reset by peer',
|
||||
'Connection refused',
|
||||
'Connection timed out',
|
||||
'Connection closed by remote host',
|
||||
'ssh_exchange_identification',
|
||||
'Bad file descriptor',
|
||||
'Broken pipe',
|
||||
'No route to host',
|
||||
'Network is unreachable',
|
||||
'Host is down',
|
||||
'No buffer space available',
|
||||
'Connection reset by',
|
||||
'Permission denied, please try again',
|
||||
'Received disconnect from',
|
||||
'Disconnected from',
|
||||
'Connection to .* closed',
|
||||
'ssh: connect to host .* port .*: Connection',
|
||||
'Lost connection',
|
||||
'Timeout, server not responding',
|
||||
'Cannot assign requested address',
|
||||
'Network is down',
|
||||
'Host key verification failed',
|
||||
'Operation timed out',
|
||||
'Connection closed unexpectedly',
|
||||
'Remote host closed connection',
|
||||
'Authentication failed',
|
||||
'Too many authentication failures',
|
||||
];
|
||||
|
||||
$lowerErrorOutput = strtolower($errorOutput);
|
||||
foreach ($retryablePatterns as $pattern) {
|
||||
if (str_contains($lowerErrorOutput, strtolower($pattern))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate delay for exponential backoff
|
||||
*/
|
||||
protected function calculateRetryDelay(int $attempt): int
|
||||
{
|
||||
$baseDelay = config('constants.ssh.retry_base_delay', 2);
|
||||
$maxDelay = config('constants.ssh.retry_max_delay', 30);
|
||||
$multiplier = config('constants.ssh.retry_multiplier', 2);
|
||||
|
||||
$delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay);
|
||||
|
||||
return (int) $delay;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a callback with SSH retry logic
|
||||
*
|
||||
* @param callable $callback The operation to execute
|
||||
* @param array $context Context for logging (server, command, etc.)
|
||||
* @param bool $throwError Whether to throw error on final failure
|
||||
* @return mixed The result from the callback
|
||||
*/
|
||||
protected function executeWithSshRetry(callable $callback, array $context = [], bool $throwError = true)
|
||||
{
|
||||
$maxRetries = config('constants.ssh.max_retries', 3);
|
||||
$lastError = null;
|
||||
$lastErrorMessage = '';
|
||||
for ($attempt = 0; $attempt < $maxRetries; $attempt++) {
|
||||
try {
|
||||
// Execute the callback
|
||||
$result = $callback();
|
||||
|
||||
// If we get here, it succeeded
|
||||
if ($attempt > 0) {
|
||||
Log::info('SSH operation succeeded after retry', array_merge($context, [
|
||||
'attempt' => $attempt + 1,
|
||||
]));
|
||||
}
|
||||
|
||||
return $result;
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
$lastError = $e;
|
||||
$lastErrorMessage = $e->getMessage();
|
||||
|
||||
// Check if it's retryable and not the last attempt
|
||||
if ($this->isRetryableSshError($lastErrorMessage) && $attempt < $maxRetries - 1) {
|
||||
$delay = $this->calculateRetryDelay($attempt);
|
||||
|
||||
// Add deployment log if available (for ExecuteRemoteCommand trait)
|
||||
if (isset($this->application_deployment_queue) && method_exists($this, 'addRetryLogEntry')) {
|
||||
$this->addRetryLogEntry($attempt + 1, $maxRetries, $delay, $lastErrorMessage);
|
||||
}
|
||||
|
||||
sleep($delay);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Not retryable or max retries reached
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// All retries exhausted
|
||||
if ($attempt >= $maxRetries) {
|
||||
Log::error('SSH operation failed after all retries', array_merge($context, [
|
||||
'attempts' => $attempt,
|
||||
'error' => $lastErrorMessage,
|
||||
]));
|
||||
}
|
||||
|
||||
if ($throwError && $lastError) {
|
||||
throw $lastError;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
@@ -60,15 +60,28 @@ function remote_process(
|
||||
|
||||
function instant_scp(string $source, string $dest, Server $server, $throwError = true)
|
||||
{
|
||||
$scp_command = SshMultiplexingHelper::generateScpCommand($server, $source, $dest);
|
||||
$process = Process::timeout(config('constants.ssh.command_timeout'))->run($scp_command);
|
||||
$output = trim($process->output());
|
||||
$exitCode = $process->exitCode();
|
||||
if ($exitCode !== 0) {
|
||||
return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null;
|
||||
}
|
||||
return \App\Helpers\SshRetryHandler::retry(
|
||||
function () use ($source, $dest, $server) {
|
||||
$scp_command = SshMultiplexingHelper::generateScpCommand($server, $source, $dest);
|
||||
$process = Process::timeout(config('constants.ssh.command_timeout'))->run($scp_command);
|
||||
|
||||
return $output === 'null' ? null : $output;
|
||||
$output = trim($process->output());
|
||||
$exitCode = $process->exitCode();
|
||||
|
||||
if ($exitCode !== 0) {
|
||||
excludeCertainErrors($process->errorOutput(), $exitCode);
|
||||
}
|
||||
|
||||
return $output === 'null' ? null : $output;
|
||||
},
|
||||
[
|
||||
'server' => $server->ip,
|
||||
'source' => $source,
|
||||
'dest' => $dest,
|
||||
'function' => 'instant_scp',
|
||||
],
|
||||
$throwError
|
||||
);
|
||||
}
|
||||
|
||||
function instant_remote_process_with_timeout(Collection|array $command, Server $server, bool $throwError = true, bool $no_sudo = false): ?string
|
||||
@@ -79,25 +92,30 @@ function instant_remote_process_with_timeout(Collection|array $command, Server $
|
||||
}
|
||||
$command_string = implode("\n", $command);
|
||||
|
||||
// $start_time = microtime(true);
|
||||
$sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string);
|
||||
$process = Process::timeout(30)->run($sshCommand);
|
||||
// $end_time = microtime(true);
|
||||
return \App\Helpers\SshRetryHandler::retry(
|
||||
function () use ($server, $command_string) {
|
||||
$sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string);
|
||||
$process = Process::timeout(30)->run($sshCommand);
|
||||
|
||||
// $execution_time = ($end_time - $start_time) * 1000; // Convert to milliseconds
|
||||
// ray('SSH command execution time:', $execution_time.' ms')->orange();
|
||||
$output = trim($process->output());
|
||||
$exitCode = $process->exitCode();
|
||||
|
||||
$output = trim($process->output());
|
||||
$exitCode = $process->exitCode();
|
||||
if ($exitCode !== 0) {
|
||||
excludeCertainErrors($process->errorOutput(), $exitCode);
|
||||
}
|
||||
|
||||
if ($exitCode !== 0) {
|
||||
return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null;
|
||||
}
|
||||
// Sanitize output to ensure valid UTF-8 encoding
|
||||
$output = $output === 'null' ? null : sanitize_utf8_text($output);
|
||||
|
||||
// Sanitize output to ensure valid UTF-8 encoding
|
||||
$output = $output === 'null' ? null : sanitize_utf8_text($output);
|
||||
|
||||
return $output;
|
||||
return $output;
|
||||
},
|
||||
[
|
||||
'server' => $server->ip,
|
||||
'command_preview' => substr($command_string, 0, 100),
|
||||
'function' => 'instant_remote_process_with_timeout',
|
||||
],
|
||||
$throwError
|
||||
);
|
||||
}
|
||||
|
||||
function instant_remote_process(Collection|array $command, Server $server, bool $throwError = true, bool $no_sudo = false): ?string
|
||||
@@ -108,25 +126,30 @@ function instant_remote_process(Collection|array $command, Server $server, bool
|
||||
}
|
||||
$command_string = implode("\n", $command);
|
||||
|
||||
// $start_time = microtime(true);
|
||||
$sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string);
|
||||
$process = Process::timeout(config('constants.ssh.command_timeout'))->run($sshCommand);
|
||||
// $end_time = microtime(true);
|
||||
return \App\Helpers\SshRetryHandler::retry(
|
||||
function () use ($server, $command_string) {
|
||||
$sshCommand = SshMultiplexingHelper::generateSshCommand($server, $command_string);
|
||||
$process = Process::timeout(config('constants.ssh.command_timeout'))->run($sshCommand);
|
||||
|
||||
// $execution_time = ($end_time - $start_time) * 1000; // Convert to milliseconds
|
||||
// ray('SSH command execution time:', $execution_time.' ms')->orange();
|
||||
$output = trim($process->output());
|
||||
$exitCode = $process->exitCode();
|
||||
|
||||
$output = trim($process->output());
|
||||
$exitCode = $process->exitCode();
|
||||
if ($exitCode !== 0) {
|
||||
excludeCertainErrors($process->errorOutput(), $exitCode);
|
||||
}
|
||||
|
||||
if ($exitCode !== 0) {
|
||||
return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null;
|
||||
}
|
||||
// Sanitize output to ensure valid UTF-8 encoding
|
||||
$output = $output === 'null' ? null : sanitize_utf8_text($output);
|
||||
|
||||
// Sanitize output to ensure valid UTF-8 encoding
|
||||
$output = $output === 'null' ? null : sanitize_utf8_text($output);
|
||||
|
||||
return $output;
|
||||
return $output;
|
||||
},
|
||||
[
|
||||
'server' => $server->ip,
|
||||
'command_preview' => substr($command_string, 0, 100),
|
||||
'function' => 'instant_remote_process',
|
||||
],
|
||||
$throwError
|
||||
);
|
||||
}
|
||||
|
||||
function excludeCertainErrors(string $errorOutput, ?int $exitCode = null)
|
||||
|
189
tests/Unit/SshRetryMechanismTest.php
Normal file
189
tests/Unit/SshRetryMechanismTest.php
Normal file
@@ -0,0 +1,189 @@
|
||||
<?php
|
||||
|
||||
namespace Tests\Unit;
|
||||
|
||||
use App\Helpers\SshRetryHandler;
|
||||
use App\Traits\SshRetryable;
|
||||
use Tests\TestCase;
|
||||
|
||||
class SshRetryMechanismTest extends TestCase
|
||||
{
|
||||
public function test_ssh_retry_handler_exists()
|
||||
{
|
||||
$this->assertTrue(class_exists(\App\Helpers\SshRetryHandler::class));
|
||||
}
|
||||
|
||||
public function test_ssh_retryable_trait_exists()
|
||||
{
|
||||
$this->assertTrue(trait_exists(\App\Traits\SshRetryable::class));
|
||||
}
|
||||
|
||||
public function test_retry_on_ssh_connection_errors()
|
||||
{
|
||||
$handler = new class
|
||||
{
|
||||
use SshRetryable;
|
||||
|
||||
// Make methods public for testing
|
||||
public function test_is_retryable_ssh_error($error)
|
||||
{
|
||||
return $this->isRetryableSshError($error);
|
||||
}
|
||||
};
|
||||
|
||||
// Test various SSH error patterns
|
||||
$sshErrors = [
|
||||
'kex_exchange_identification: read: Connection reset by peer',
|
||||
'Connection refused',
|
||||
'Connection timed out',
|
||||
'ssh_exchange_identification: Connection closed by remote host',
|
||||
'Broken pipe',
|
||||
'No route to host',
|
||||
'Network is unreachable',
|
||||
];
|
||||
|
||||
foreach ($sshErrors as $error) {
|
||||
$this->assertTrue(
|
||||
$handler->test_is_retryable_ssh_error($error),
|
||||
"Failed to identify as retryable: $error"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public function test_non_ssh_errors_are_not_retryable()
|
||||
{
|
||||
$handler = new class
|
||||
{
|
||||
use SshRetryable;
|
||||
|
||||
// Make methods public for testing
|
||||
public function test_is_retryable_ssh_error($error)
|
||||
{
|
||||
return $this->isRetryableSshError($error);
|
||||
}
|
||||
};
|
||||
|
||||
// Test non-SSH errors
|
||||
$nonSshErrors = [
|
||||
'Command not found',
|
||||
'Permission denied',
|
||||
'File not found',
|
||||
'Syntax error',
|
||||
'Invalid argument',
|
||||
];
|
||||
|
||||
foreach ($nonSshErrors as $error) {
|
||||
$this->assertFalse(
|
||||
$handler->test_is_retryable_ssh_error($error),
|
||||
"Incorrectly identified as retryable: $error"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public function test_exponential_backoff_calculation()
|
||||
{
|
||||
$handler = new class
|
||||
{
|
||||
use SshRetryable;
|
||||
|
||||
// Make method public for testing
|
||||
public function test_calculate_retry_delay($attempt)
|
||||
{
|
||||
return $this->calculateRetryDelay($attempt);
|
||||
}
|
||||
};
|
||||
|
||||
// Test with default config values
|
||||
config(['constants.ssh.retry_base_delay' => 2]);
|
||||
config(['constants.ssh.retry_max_delay' => 30]);
|
||||
config(['constants.ssh.retry_multiplier' => 2]);
|
||||
|
||||
// Attempt 0: 2 seconds
|
||||
$this->assertEquals(2, $handler->test_calculate_retry_delay(0));
|
||||
|
||||
// Attempt 1: 4 seconds
|
||||
$this->assertEquals(4, $handler->test_calculate_retry_delay(1));
|
||||
|
||||
// Attempt 2: 8 seconds
|
||||
$this->assertEquals(8, $handler->test_calculate_retry_delay(2));
|
||||
|
||||
// Attempt 3: 16 seconds
|
||||
$this->assertEquals(16, $handler->test_calculate_retry_delay(3));
|
||||
|
||||
// Attempt 4: Should be capped at 30 seconds
|
||||
$this->assertEquals(30, $handler->test_calculate_retry_delay(4));
|
||||
|
||||
// Attempt 5: Should still be capped at 30 seconds
|
||||
$this->assertEquals(30, $handler->test_calculate_retry_delay(5));
|
||||
}
|
||||
|
||||
public function test_retry_succeeds_after_failures()
|
||||
{
|
||||
$attemptCount = 0;
|
||||
|
||||
config(['constants.ssh.max_retries' => 3]);
|
||||
|
||||
// Simulate a function that fails twice then succeeds using the public static method
|
||||
$result = SshRetryHandler::retry(
|
||||
function () use (&$attemptCount) {
|
||||
$attemptCount++;
|
||||
if ($attemptCount < 3) {
|
||||
throw new \RuntimeException('kex_exchange_identification: Connection reset by peer');
|
||||
}
|
||||
|
||||
return 'success';
|
||||
},
|
||||
['test' => 'retry_test'],
|
||||
true
|
||||
);
|
||||
|
||||
$this->assertEquals('success', $result);
|
||||
$this->assertEquals(3, $attemptCount);
|
||||
}
|
||||
|
||||
public function test_retry_fails_after_max_attempts()
|
||||
{
|
||||
$attemptCount = 0;
|
||||
|
||||
config(['constants.ssh.max_retries' => 3]);
|
||||
|
||||
$this->expectException(\RuntimeException::class);
|
||||
$this->expectExceptionMessage('Connection reset by peer');
|
||||
|
||||
// Simulate a function that always fails using the public static method
|
||||
SshRetryHandler::retry(
|
||||
function () use (&$attemptCount) {
|
||||
$attemptCount++;
|
||||
throw new \RuntimeException('Connection reset by peer');
|
||||
},
|
||||
['test' => 'retry_test'],
|
||||
true
|
||||
);
|
||||
}
|
||||
|
||||
public function test_non_retryable_errors_fail_immediately()
|
||||
{
|
||||
$attemptCount = 0;
|
||||
|
||||
config(['constants.ssh.max_retries' => 3]);
|
||||
|
||||
$this->expectException(\RuntimeException::class);
|
||||
$this->expectExceptionMessage('Command not found');
|
||||
|
||||
try {
|
||||
// Simulate a non-retryable error using the public static method
|
||||
SshRetryHandler::retry(
|
||||
function () use (&$attemptCount) {
|
||||
$attemptCount++;
|
||||
throw new \RuntimeException('Command not found');
|
||||
},
|
||||
['test' => 'non_retryable_test'],
|
||||
true
|
||||
);
|
||||
} catch (\RuntimeException $e) {
|
||||
// Should only attempt once since it's not retryable
|
||||
$this->assertEquals(1, $attemptCount);
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user