feat(command): implement SSH command retry logic with exponential backoff and logging for better error handling

This commit is contained in:
Andras Bacsai
2025-09-07 10:26:23 +02:00
parent 2d135071c7
commit b6176d905b
2 changed files with 209 additions and 60 deletions

View File

@@ -7,6 +7,7 @@ use App\Helpers\SshMultiplexingHelper;
use App\Models\Server;
use Carbon\Carbon;
use Illuminate\Support\Collection;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Process;
trait ExecuteRemoteCommand
@@ -15,6 +16,47 @@ trait ExecuteRemoteCommand
public static int $batch_counter = 0;
/**
* Check if an error message indicates a retryable SSH connection error
*/
private function isRetryableSshError(string $errorOutput): bool
{
$retryablePatterns = [
'kex_exchange_identification',
'Connection reset by peer',
'Connection refused',
'Connection timed out',
'Connection closed by remote host',
'ssh_exchange_identification',
'Bad file descriptor',
'Broken pipe',
'No route to host',
'Network is unreachable',
];
foreach ($retryablePatterns as $pattern) {
if (str_contains($errorOutput, $pattern)) {
return true;
}
}
return false;
}
/**
* Calculate delay for exponential backoff
*/
private function calculateRetryDelay(int $attempt): int
{
$baseDelay = config('constants.ssh.retry_base_delay', 2);
$maxDelay = config('constants.ssh.retry_max_delay', 30);
$multiplier = config('constants.ssh.retry_multiplier', 2);
$delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay);
return (int) $delay;
}
public function execute_remote_command(...$commands)
{
static::$batch_counter++;
@@ -43,6 +85,65 @@ trait ExecuteRemoteCommand
$command = parseLineForSudo($command, $this->server);
}
}
$maxRetries = config('constants.ssh.max_retries');
$attempt = 0;
$lastError = null;
$commandExecuted = false;
while ($attempt < $maxRetries && ! $commandExecuted) {
try {
$this->executeCommandWithProcess($command, $hidden, $customType, $append, $ignore_errors);
$commandExecuted = true;
} catch (\RuntimeException $e) {
$lastError = $e;
$errorMessage = $e->getMessage();
// Only retry if it's an SSH connection error and we haven't exhausted retries
if ($this->isRetryableSshError($errorMessage) && $attempt < $maxRetries - 1) {
$attempt++;
$delay = $this->calculateRetryDelay($attempt - 1);
// Log the retry attempt
Log::warning('SSH command failed, retrying', [
'server' => $this->server->ip,
'attempt' => $attempt,
'max_retries' => $maxRetries,
'delay' => $delay,
'error' => $errorMessage,
'command_preview' => $hidden ? '[hidden]' : substr($command, 0, 100),
]);
// Add log entry for the retry
if (isset($this->application_deployment_queue)) {
$this->addRetryLogEntry($attempt, $maxRetries, $delay, $errorMessage);
}
sleep($delay);
} else {
// Not retryable or max retries reached
throw $e;
}
}
}
// If we exhausted all retries and still failed
if (! $commandExecuted && $lastError) {
Log::error('SSH command failed after all retries', [
'server' => $this->server->ip,
'attempts' => $attempt,
'error' => $lastError->getMessage(),
]);
throw $lastError;
}
});
}
/**
* Execute the actual command with process handling
*/
private function executeCommandWithProcess($command, $hidden, $customType, $append, $ignore_errors)
{
$remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command);
$process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) {
$output = str($output)->trim();
@@ -113,6 +214,50 @@ trait ExecuteRemoteCommand
throw new \RuntimeException($process_result->errorOutput());
}
}
});
}
/**
* Add a log entry for SSH retry attempts
*/
private function addRetryLogEntry(int $attempt, int $maxRetries, int $delay, string $errorMessage)
{
$retryMessage = "🔄 SSH connection failed. Retrying... (Attempt {$attempt}/{$maxRetries}, waiting {$delay}s)\nError: {$errorMessage}";
$new_log_entry = [
'command' => 'SSH Retry',
'output' => $retryMessage,
'type' => 'stdout',
'timestamp' => Carbon::now('UTC'),
'hidden' => false,
'batch' => static::$batch_counter,
];
if (! $this->application_deployment_queue->logs) {
$new_log_entry['order'] = 1;
$previous_logs = [];
} else {
try {
$previous_logs = json_decode($this->application_deployment_queue->logs, associative: true, flags: JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
$previous_logs = [];
$new_log_entry['order'] = 1;
}
if (is_array($previous_logs)) {
$new_log_entry['order'] = count($previous_logs) + 1;
} else {
$previous_logs = [];
$new_log_entry['order'] = 1;
}
}
$previous_logs[] = $new_log_entry;
try {
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_INVALID_UTF8_SUBSTITUTE);
}
$this->application_deployment_queue->save();
}
}

View File

@@ -62,6 +62,10 @@ return [
'connection_timeout' => 10,
'server_interval' => 20,
'command_timeout' => 7200,
'max_retries' => env('SSH_MAX_RETRIES', 3),
'retry_base_delay' => env('SSH_RETRY_BASE_DELAY', 2), // seconds
'retry_max_delay' => env('SSH_RETRY_MAX_DELAY', 30), // seconds
'retry_multiplier' => env('SSH_RETRY_MULTIPLIER', 2),
],
'invitation' => [