fix(ssh): introduce SshRetryHandler and SshRetryable trait for enhanced SSH command retry logic with exponential backoff and error handling
This commit is contained in:
34
app/Helpers/SshRetryHandler.php
Normal file
34
app/Helpers/SshRetryHandler.php
Normal file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
|
||||
namespace App\Helpers;
|
||||
|
||||
use App\Traits\SshRetryable;
|
||||
|
||||
/**
|
||||
* Helper class to use SshRetryable trait in non-class contexts
|
||||
*/
|
||||
class SshRetryHandler
|
||||
{
|
||||
use SshRetryable;
|
||||
|
||||
/**
|
||||
* Static method to get a singleton instance
|
||||
*/
|
||||
public static function instance(): self
|
||||
{
|
||||
static $instance = null;
|
||||
if ($instance === null) {
|
||||
$instance = new self;
|
||||
}
|
||||
|
||||
return $instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience static method for retry execution
|
||||
*/
|
||||
public static function retry(callable $callback, array $context = [], bool $throwError = true)
|
||||
{
|
||||
return self::instance()->executeWithSshRetry($callback, $context, $throwError);
|
||||
}
|
||||
}
|
@@ -7,56 +7,16 @@ use App\Helpers\SshMultiplexingHelper;
|
||||
use App\Models\Server;
|
||||
use Carbon\Carbon;
|
||||
use Illuminate\Support\Collection;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Illuminate\Support\Facades\Process;
|
||||
|
||||
trait ExecuteRemoteCommand
|
||||
{
|
||||
use SshRetryable;
|
||||
|
||||
public ?string $save = null;
|
||||
|
||||
public static int $batch_counter = 0;
|
||||
|
||||
/**
|
||||
* Check if an error message indicates a retryable SSH connection error
|
||||
*/
|
||||
private function isRetryableSshError(string $errorOutput): bool
|
||||
{
|
||||
$retryablePatterns = [
|
||||
'kex_exchange_identification',
|
||||
'Connection reset by peer',
|
||||
'Connection refused',
|
||||
'Connection timed out',
|
||||
'Connection closed by remote host',
|
||||
'ssh_exchange_identification',
|
||||
'Bad file descriptor',
|
||||
'Broken pipe',
|
||||
'No route to host',
|
||||
'Network is unreachable',
|
||||
];
|
||||
|
||||
foreach ($retryablePatterns as $pattern) {
|
||||
if (str_contains($errorOutput, $pattern)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate delay for exponential backoff
|
||||
*/
|
||||
private function calculateRetryDelay(int $attempt): int
|
||||
{
|
||||
$baseDelay = config('constants.ssh.retry_base_delay');
|
||||
$maxDelay = config('constants.ssh.retry_max_delay');
|
||||
$multiplier = config('constants.ssh.retry_multiplier');
|
||||
|
||||
$delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay);
|
||||
|
||||
return (int) $delay;
|
||||
}
|
||||
|
||||
public function execute_remote_command(...$commands)
|
||||
{
|
||||
static::$batch_counter++;
|
||||
@@ -129,7 +89,7 @@ trait ExecuteRemoteCommand
|
||||
{
|
||||
$remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command);
|
||||
// Randomly fail the command with a key exchange error for testing
|
||||
// if (random_int(1, 10) === 1) { // 10% chance to fail
|
||||
// if (random_int(1, 20) === 1) { // 5% chance to fail
|
||||
// throw new \RuntimeException('SSH key exchange failed: kex_exchange_identification: read: Connection reset by peer');
|
||||
// }
|
||||
$process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) {
|
||||
|
133
app/Traits/SshRetryable.php
Normal file
133
app/Traits/SshRetryable.php
Normal file
@@ -0,0 +1,133 @@
|
||||
<?php
|
||||
|
||||
namespace App\Traits;
|
||||
|
||||
use Illuminate\Support\Facades\Log;
|
||||
|
||||
trait SshRetryable
|
||||
{
|
||||
/**
|
||||
* Check if an error message indicates a retryable SSH connection error
|
||||
*/
|
||||
protected function isRetryableSshError(string $errorOutput): bool
|
||||
{
|
||||
$retryablePatterns = [
|
||||
'kex_exchange_identification',
|
||||
'Connection reset by peer',
|
||||
'Connection refused',
|
||||
'Connection timed out',
|
||||
'Connection closed by remote host',
|
||||
'ssh_exchange_identification',
|
||||
'Bad file descriptor',
|
||||
'Broken pipe',
|
||||
'No route to host',
|
||||
'Network is unreachable',
|
||||
'Host is down',
|
||||
'No buffer space available',
|
||||
'Connection reset by',
|
||||
'Permission denied, please try again',
|
||||
'Received disconnect from',
|
||||
'Disconnected from',
|
||||
'Connection to .* closed',
|
||||
'ssh: connect to host .* port .*: Connection',
|
||||
'Lost connection',
|
||||
'Timeout, server not responding',
|
||||
'Cannot assign requested address',
|
||||
'Network is down',
|
||||
'Host key verification failed',
|
||||
'Operation timed out',
|
||||
'Connection closed unexpectedly',
|
||||
'Remote host closed connection',
|
||||
'Authentication failed',
|
||||
'Too many authentication failures',
|
||||
];
|
||||
|
||||
$lowerErrorOutput = strtolower($errorOutput);
|
||||
foreach ($retryablePatterns as $pattern) {
|
||||
if (str_contains($lowerErrorOutput, strtolower($pattern))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate delay for exponential backoff
|
||||
*/
|
||||
protected function calculateRetryDelay(int $attempt): int
|
||||
{
|
||||
$baseDelay = config('constants.ssh.retry_base_delay', 2);
|
||||
$maxDelay = config('constants.ssh.retry_max_delay', 30);
|
||||
$multiplier = config('constants.ssh.retry_multiplier', 2);
|
||||
|
||||
$delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay);
|
||||
|
||||
return (int) $delay;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a callback with SSH retry logic
|
||||
*
|
||||
* @param callable $callback The operation to execute
|
||||
* @param array $context Context for logging (server, command, etc.)
|
||||
* @param bool $throwError Whether to throw error on final failure
|
||||
* @return mixed The result from the callback
|
||||
*/
|
||||
protected function executeWithSshRetry(callable $callback, array $context = [], bool $throwError = true)
|
||||
{
|
||||
$maxRetries = config('constants.ssh.max_retries', 3);
|
||||
$lastError = null;
|
||||
$lastErrorMessage = '';
|
||||
for ($attempt = 0; $attempt < $maxRetries; $attempt++) {
|
||||
try {
|
||||
// Execute the callback
|
||||
$result = $callback();
|
||||
|
||||
// If we get here, it succeeded
|
||||
if ($attempt > 0) {
|
||||
Log::info('SSH operation succeeded after retry', array_merge($context, [
|
||||
'attempt' => $attempt + 1,
|
||||
]));
|
||||
}
|
||||
|
||||
return $result;
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
$lastError = $e;
|
||||
$lastErrorMessage = $e->getMessage();
|
||||
|
||||
// Check if it's retryable and not the last attempt
|
||||
if ($this->isRetryableSshError($lastErrorMessage) && $attempt < $maxRetries - 1) {
|
||||
$delay = $this->calculateRetryDelay($attempt);
|
||||
|
||||
// Add deployment log if available (for ExecuteRemoteCommand trait)
|
||||
if (isset($this->application_deployment_queue) && method_exists($this, 'addRetryLogEntry')) {
|
||||
$this->addRetryLogEntry($attempt + 1, $maxRetries, $delay, $lastErrorMessage);
|
||||
}
|
||||
|
||||
sleep($delay);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Not retryable or max retries reached
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// All retries exhausted
|
||||
if ($attempt >= $maxRetries) {
|
||||
Log::error('SSH operation failed after all retries', array_merge($context, [
|
||||
'attempts' => $attempt,
|
||||
'error' => $lastErrorMessage,
|
||||
]));
|
||||
}
|
||||
|
||||
if ($throwError && $lastError) {
|
||||
throw $lastError;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user