fix(ssh): introduce SshRetryHandler and SshRetryable trait for enhanced SSH command retry logic with exponential backoff and error handling

This commit is contained in:
Andras Bacsai
2025-09-07 17:17:35 +02:00
parent b847740924
commit 579cc25898
5 changed files with 420 additions and 81 deletions

View File

@@ -0,0 +1,34 @@
<?php
namespace App\Helpers;
use App\Traits\SshRetryable;
/**
* Helper class to use SshRetryable trait in non-class contexts
*/
class SshRetryHandler
{
use SshRetryable;
/**
* Static method to get a singleton instance
*/
public static function instance(): self
{
static $instance = null;
if ($instance === null) {
$instance = new self;
}
return $instance;
}
/**
* Convenience static method for retry execution
*/
public static function retry(callable $callback, array $context = [], bool $throwError = true)
{
return self::instance()->executeWithSshRetry($callback, $context, $throwError);
}
}

View File

@@ -7,56 +7,16 @@ use App\Helpers\SshMultiplexingHelper;
use App\Models\Server;
use Carbon\Carbon;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Process;
trait ExecuteRemoteCommand
{
use SshRetryable;
public ?string $save = null;
public static int $batch_counter = 0;
/**
* Check if an error message indicates a retryable SSH connection error
*/
private function isRetryableSshError(string $errorOutput): bool
{
$retryablePatterns = [
'kex_exchange_identification',
'Connection reset by peer',
'Connection refused',
'Connection timed out',
'Connection closed by remote host',
'ssh_exchange_identification',
'Bad file descriptor',
'Broken pipe',
'No route to host',
'Network is unreachable',
];
foreach ($retryablePatterns as $pattern) {
if (str_contains($errorOutput, $pattern)) {
return true;
}
}
return false;
}
/**
* Calculate delay for exponential backoff
*/
private function calculateRetryDelay(int $attempt): int
{
$baseDelay = config('constants.ssh.retry_base_delay');
$maxDelay = config('constants.ssh.retry_max_delay');
$multiplier = config('constants.ssh.retry_multiplier');
$delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay);
return (int) $delay;
}
public function execute_remote_command(...$commands)
{
static::$batch_counter++;
@@ -129,7 +89,7 @@ trait ExecuteRemoteCommand
{
$remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command);
// Randomly fail the command with a key exchange error for testing
// if (random_int(1, 10) === 1) { // 10% chance to fail
// if (random_int(1, 20) === 1) { // 5% chance to fail
// throw new \RuntimeException('SSH key exchange failed: kex_exchange_identification: read: Connection reset by peer');
// }
$process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) {

133
app/Traits/SshRetryable.php Normal file
View File

@@ -0,0 +1,133 @@
<?php
namespace App\Traits;
use Illuminate\Support\Facades\Log;
trait SshRetryable
{
/**
* Check if an error message indicates a retryable SSH connection error
*/
protected function isRetryableSshError(string $errorOutput): bool
{
$retryablePatterns = [
'kex_exchange_identification',
'Connection reset by peer',
'Connection refused',
'Connection timed out',
'Connection closed by remote host',
'ssh_exchange_identification',
'Bad file descriptor',
'Broken pipe',
'No route to host',
'Network is unreachable',
'Host is down',
'No buffer space available',
'Connection reset by',
'Permission denied, please try again',
'Received disconnect from',
'Disconnected from',
'Connection to .* closed',
'ssh: connect to host .* port .*: Connection',
'Lost connection',
'Timeout, server not responding',
'Cannot assign requested address',
'Network is down',
'Host key verification failed',
'Operation timed out',
'Connection closed unexpectedly',
'Remote host closed connection',
'Authentication failed',
'Too many authentication failures',
];
$lowerErrorOutput = strtolower($errorOutput);
foreach ($retryablePatterns as $pattern) {
if (str_contains($lowerErrorOutput, strtolower($pattern))) {
return true;
}
}
return false;
}
/**
* Calculate delay for exponential backoff
*/
protected function calculateRetryDelay(int $attempt): int
{
$baseDelay = config('constants.ssh.retry_base_delay', 2);
$maxDelay = config('constants.ssh.retry_max_delay', 30);
$multiplier = config('constants.ssh.retry_multiplier', 2);
$delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay);
return (int) $delay;
}
/**
* Execute a callback with SSH retry logic
*
* @param callable $callback The operation to execute
* @param array $context Context for logging (server, command, etc.)
* @param bool $throwError Whether to throw error on final failure
* @return mixed The result from the callback
*/
protected function executeWithSshRetry(callable $callback, array $context = [], bool $throwError = true)
{
$maxRetries = config('constants.ssh.max_retries', 3);
$lastError = null;
$lastErrorMessage = '';
for ($attempt = 0; $attempt < $maxRetries; $attempt++) {
try {
// Execute the callback
$result = $callback();
// If we get here, it succeeded
if ($attempt > 0) {
Log::info('SSH operation succeeded after retry', array_merge($context, [
'attempt' => $attempt + 1,
]));
}
return $result;
} catch (\Throwable $e) {
$lastError = $e;
$lastErrorMessage = $e->getMessage();
// Check if it's retryable and not the last attempt
if ($this->isRetryableSshError($lastErrorMessage) && $attempt < $maxRetries - 1) {
$delay = $this->calculateRetryDelay($attempt);
// Add deployment log if available (for ExecuteRemoteCommand trait)
if (isset($this->application_deployment_queue) && method_exists($this, 'addRetryLogEntry')) {
$this->addRetryLogEntry($attempt + 1, $maxRetries, $delay, $lastErrorMessage);
}
sleep($delay);
continue;
}
// Not retryable or max retries reached
break;
}
}
// All retries exhausted
if ($attempt >= $maxRetries) {
Log::error('SSH operation failed after all retries', array_merge($context, [
'attempts' => $attempt,
'error' => $lastErrorMessage,
]));
}
if ($throwError && $lastError) {
throw $lastError;
}
return null;
}
}