feat(command): implement SSH command retry logic with exponential backoff and logging for better error handling

This commit is contained in:
Andras Bacsai
2025-09-07 10:26:23 +02:00
parent 2d135071c7
commit b6176d905b
2 changed files with 209 additions and 60 deletions

View File

@@ -7,6 +7,7 @@ use App\Helpers\SshMultiplexingHelper;
use App\Models\Server; use App\Models\Server;
use Carbon\Carbon; use Carbon\Carbon;
use Illuminate\Support\Collection; use Illuminate\Support\Collection;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Process; use Illuminate\Support\Facades\Process;
trait ExecuteRemoteCommand trait ExecuteRemoteCommand
@@ -15,6 +16,47 @@ trait ExecuteRemoteCommand
public static int $batch_counter = 0; public static int $batch_counter = 0;
/**
* Check if an error message indicates a retryable SSH connection error
*/
private function isRetryableSshError(string $errorOutput): bool
{
$retryablePatterns = [
'kex_exchange_identification',
'Connection reset by peer',
'Connection refused',
'Connection timed out',
'Connection closed by remote host',
'ssh_exchange_identification',
'Bad file descriptor',
'Broken pipe',
'No route to host',
'Network is unreachable',
];
foreach ($retryablePatterns as $pattern) {
if (str_contains($errorOutput, $pattern)) {
return true;
}
}
return false;
}
/**
* Calculate delay for exponential backoff
*/
private function calculateRetryDelay(int $attempt): int
{
$baseDelay = config('constants.ssh.retry_base_delay', 2);
$maxDelay = config('constants.ssh.retry_max_delay', 30);
$multiplier = config('constants.ssh.retry_multiplier', 2);
$delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay);
return (int) $delay;
}
public function execute_remote_command(...$commands) public function execute_remote_command(...$commands)
{ {
static::$batch_counter++; static::$batch_counter++;
@@ -43,76 +85,179 @@ trait ExecuteRemoteCommand
$command = parseLineForSudo($command, $this->server); $command = parseLineForSudo($command, $this->server);
} }
} }
$remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command);
$process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) {
$output = str($output)->trim();
if ($output->startsWith('╔')) {
$output = "\n".$output;
}
// Sanitize output to ensure valid UTF-8 encoding before JSON encoding $maxRetries = config('constants.ssh.max_retries');
$sanitized_output = sanitize_utf8_text($output); $attempt = 0;
$lastError = null;
$new_log_entry = [ $commandExecuted = false;
'command' => remove_iip($command),
'output' => remove_iip($sanitized_output),
'type' => $customType ?? $type === 'err' ? 'stderr' : 'stdout',
'timestamp' => Carbon::now('UTC'),
'hidden' => $hidden,
'batch' => static::$batch_counter,
];
if (! $this->application_deployment_queue->logs) {
$new_log_entry['order'] = 1;
} else {
try {
$previous_logs = json_decode($this->application_deployment_queue->logs, associative: true, flags: JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
// If existing logs are corrupted, start fresh
$previous_logs = [];
$new_log_entry['order'] = 1;
}
if (is_array($previous_logs)) {
$new_log_entry['order'] = count($previous_logs) + 1;
} else {
$previous_logs = [];
$new_log_entry['order'] = 1;
}
}
$previous_logs[] = $new_log_entry;
while ($attempt < $maxRetries && ! $commandExecuted) {
try { try {
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_THROW_ON_ERROR); $this->executeCommandWithProcess($command, $hidden, $customType, $append, $ignore_errors);
} catch (\JsonException $e) { $commandExecuted = true;
// If JSON encoding still fails, use fallback with invalid sequences replacement } catch (\RuntimeException $e) {
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_INVALID_UTF8_SUBSTITUTE); $lastError = $e;
} $errorMessage = $e->getMessage();
$this->application_deployment_queue->save(); // Only retry if it's an SSH connection error and we haven't exhausted retries
if ($this->isRetryableSshError($errorMessage) && $attempt < $maxRetries - 1) {
$attempt++;
$delay = $this->calculateRetryDelay($attempt - 1);
if ($this->save) { // Log the retry attempt
if (data_get($this->saved_outputs, $this->save, null) === null) { Log::warning('SSH command failed, retrying', [
data_set($this->saved_outputs, $this->save, str()); 'server' => $this->server->ip,
} 'attempt' => $attempt,
if ($append) { 'max_retries' => $maxRetries,
$this->saved_outputs[$this->save] .= str($sanitized_output)->trim(); 'delay' => $delay,
$this->saved_outputs[$this->save] = str($this->saved_outputs[$this->save]); 'error' => $errorMessage,
'command_preview' => $hidden ? '[hidden]' : substr($command, 0, 100),
]);
// Add log entry for the retry
if (isset($this->application_deployment_queue)) {
$this->addRetryLogEntry($attempt, $maxRetries, $delay, $errorMessage);
}
sleep($delay);
} else { } else {
$this->saved_outputs[$this->save] = str($sanitized_output)->trim(); // Not retryable or max retries reached
throw $e;
} }
} }
}); }
$this->application_deployment_queue->update([
'current_process_id' => $process->id(),
]);
$process_result = $process->wait(); // If we exhausted all retries and still failed
if ($process_result->exitCode() !== 0) { if (! $commandExecuted && $lastError) {
if (! $ignore_errors) { Log::error('SSH command failed after all retries', [
$this->application_deployment_queue->status = ApplicationDeploymentStatus::FAILED->value; 'server' => $this->server->ip,
$this->application_deployment_queue->save(); 'attempts' => $attempt,
throw new \RuntimeException($process_result->errorOutput()); 'error' => $lastError->getMessage(),
} ]);
throw $lastError;
} }
}); });
} }
/**
* Execute the actual command with process handling
*/
private function executeCommandWithProcess($command, $hidden, $customType, $append, $ignore_errors)
{
$remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command);
$process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) {
$output = str($output)->trim();
if ($output->startsWith('╔')) {
$output = "\n".$output;
}
// Sanitize output to ensure valid UTF-8 encoding before JSON encoding
$sanitized_output = sanitize_utf8_text($output);
$new_log_entry = [
'command' => remove_iip($command),
'output' => remove_iip($sanitized_output),
'type' => $customType ?? $type === 'err' ? 'stderr' : 'stdout',
'timestamp' => Carbon::now('UTC'),
'hidden' => $hidden,
'batch' => static::$batch_counter,
];
if (! $this->application_deployment_queue->logs) {
$new_log_entry['order'] = 1;
} else {
try {
$previous_logs = json_decode($this->application_deployment_queue->logs, associative: true, flags: JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
// If existing logs are corrupted, start fresh
$previous_logs = [];
$new_log_entry['order'] = 1;
}
if (is_array($previous_logs)) {
$new_log_entry['order'] = count($previous_logs) + 1;
} else {
$previous_logs = [];
$new_log_entry['order'] = 1;
}
}
$previous_logs[] = $new_log_entry;
try {
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
// If JSON encoding still fails, use fallback with invalid sequences replacement
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_INVALID_UTF8_SUBSTITUTE);
}
$this->application_deployment_queue->save();
if ($this->save) {
if (data_get($this->saved_outputs, $this->save, null) === null) {
data_set($this->saved_outputs, $this->save, str());
}
if ($append) {
$this->saved_outputs[$this->save] .= str($sanitized_output)->trim();
$this->saved_outputs[$this->save] = str($this->saved_outputs[$this->save]);
} else {
$this->saved_outputs[$this->save] = str($sanitized_output)->trim();
}
}
});
$this->application_deployment_queue->update([
'current_process_id' => $process->id(),
]);
$process_result = $process->wait();
if ($process_result->exitCode() !== 0) {
if (! $ignore_errors) {
$this->application_deployment_queue->status = ApplicationDeploymentStatus::FAILED->value;
$this->application_deployment_queue->save();
throw new \RuntimeException($process_result->errorOutput());
}
}
}
/**
* Add a log entry for SSH retry attempts
*/
private function addRetryLogEntry(int $attempt, int $maxRetries, int $delay, string $errorMessage)
{
$retryMessage = "🔄 SSH connection failed. Retrying... (Attempt {$attempt}/{$maxRetries}, waiting {$delay}s)\nError: {$errorMessage}";
$new_log_entry = [
'command' => 'SSH Retry',
'output' => $retryMessage,
'type' => 'stdout',
'timestamp' => Carbon::now('UTC'),
'hidden' => false,
'batch' => static::$batch_counter,
];
if (! $this->application_deployment_queue->logs) {
$new_log_entry['order'] = 1;
$previous_logs = [];
} else {
try {
$previous_logs = json_decode($this->application_deployment_queue->logs, associative: true, flags: JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
$previous_logs = [];
$new_log_entry['order'] = 1;
}
if (is_array($previous_logs)) {
$new_log_entry['order'] = count($previous_logs) + 1;
} else {
$previous_logs = [];
$new_log_entry['order'] = 1;
}
}
$previous_logs[] = $new_log_entry;
try {
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_INVALID_UTF8_SUBSTITUTE);
}
$this->application_deployment_queue->save();
}
} }

View File

@@ -62,6 +62,10 @@ return [
'connection_timeout' => 10, 'connection_timeout' => 10,
'server_interval' => 20, 'server_interval' => 20,
'command_timeout' => 7200, 'command_timeout' => 7200,
'max_retries' => env('SSH_MAX_RETRIES', 3),
'retry_base_delay' => env('SSH_RETRY_BASE_DELAY', 2), // seconds
'retry_max_delay' => env('SSH_RETRY_MAX_DELAY', 30), // seconds
'retry_multiplier' => env('SSH_RETRY_MULTIPLIER', 2),
], ],
'invitation' => [ 'invitation' => [