feat(command): implement SSH command retry logic with exponential backoff and logging for better error handling
This commit is contained in:
@@ -7,6 +7,7 @@ use App\Helpers\SshMultiplexingHelper;
|
|||||||
use App\Models\Server;
|
use App\Models\Server;
|
||||||
use Carbon\Carbon;
|
use Carbon\Carbon;
|
||||||
use Illuminate\Support\Collection;
|
use Illuminate\Support\Collection;
|
||||||
|
use Illuminate\Support\Facades\Log;
|
||||||
use Illuminate\Support\Facades\Process;
|
use Illuminate\Support\Facades\Process;
|
||||||
|
|
||||||
trait ExecuteRemoteCommand
|
trait ExecuteRemoteCommand
|
||||||
@@ -15,6 +16,47 @@ trait ExecuteRemoteCommand
|
|||||||
|
|
||||||
public static int $batch_counter = 0;
|
public static int $batch_counter = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if an error message indicates a retryable SSH connection error
|
||||||
|
*/
|
||||||
|
private function isRetryableSshError(string $errorOutput): bool
|
||||||
|
{
|
||||||
|
$retryablePatterns = [
|
||||||
|
'kex_exchange_identification',
|
||||||
|
'Connection reset by peer',
|
||||||
|
'Connection refused',
|
||||||
|
'Connection timed out',
|
||||||
|
'Connection closed by remote host',
|
||||||
|
'ssh_exchange_identification',
|
||||||
|
'Bad file descriptor',
|
||||||
|
'Broken pipe',
|
||||||
|
'No route to host',
|
||||||
|
'Network is unreachable',
|
||||||
|
];
|
||||||
|
|
||||||
|
foreach ($retryablePatterns as $pattern) {
|
||||||
|
if (str_contains($errorOutput, $pattern)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate delay for exponential backoff
|
||||||
|
*/
|
||||||
|
private function calculateRetryDelay(int $attempt): int
|
||||||
|
{
|
||||||
|
$baseDelay = config('constants.ssh.retry_base_delay', 2);
|
||||||
|
$maxDelay = config('constants.ssh.retry_max_delay', 30);
|
||||||
|
$multiplier = config('constants.ssh.retry_multiplier', 2);
|
||||||
|
|
||||||
|
$delay = min($baseDelay * pow($multiplier, $attempt), $maxDelay);
|
||||||
|
|
||||||
|
return (int) $delay;
|
||||||
|
}
|
||||||
|
|
||||||
public function execute_remote_command(...$commands)
|
public function execute_remote_command(...$commands)
|
||||||
{
|
{
|
||||||
static::$batch_counter++;
|
static::$batch_counter++;
|
||||||
@@ -43,6 +85,65 @@ trait ExecuteRemoteCommand
|
|||||||
$command = parseLineForSudo($command, $this->server);
|
$command = parseLineForSudo($command, $this->server);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$maxRetries = config('constants.ssh.max_retries');
|
||||||
|
$attempt = 0;
|
||||||
|
$lastError = null;
|
||||||
|
$commandExecuted = false;
|
||||||
|
|
||||||
|
while ($attempt < $maxRetries && ! $commandExecuted) {
|
||||||
|
try {
|
||||||
|
$this->executeCommandWithProcess($command, $hidden, $customType, $append, $ignore_errors);
|
||||||
|
$commandExecuted = true;
|
||||||
|
} catch (\RuntimeException $e) {
|
||||||
|
$lastError = $e;
|
||||||
|
$errorMessage = $e->getMessage();
|
||||||
|
|
||||||
|
// Only retry if it's an SSH connection error and we haven't exhausted retries
|
||||||
|
if ($this->isRetryableSshError($errorMessage) && $attempt < $maxRetries - 1) {
|
||||||
|
$attempt++;
|
||||||
|
$delay = $this->calculateRetryDelay($attempt - 1);
|
||||||
|
|
||||||
|
// Log the retry attempt
|
||||||
|
Log::warning('SSH command failed, retrying', [
|
||||||
|
'server' => $this->server->ip,
|
||||||
|
'attempt' => $attempt,
|
||||||
|
'max_retries' => $maxRetries,
|
||||||
|
'delay' => $delay,
|
||||||
|
'error' => $errorMessage,
|
||||||
|
'command_preview' => $hidden ? '[hidden]' : substr($command, 0, 100),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Add log entry for the retry
|
||||||
|
if (isset($this->application_deployment_queue)) {
|
||||||
|
$this->addRetryLogEntry($attempt, $maxRetries, $delay, $errorMessage);
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep($delay);
|
||||||
|
} else {
|
||||||
|
// Not retryable or max retries reached
|
||||||
|
throw $e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we exhausted all retries and still failed
|
||||||
|
if (! $commandExecuted && $lastError) {
|
||||||
|
Log::error('SSH command failed after all retries', [
|
||||||
|
'server' => $this->server->ip,
|
||||||
|
'attempts' => $attempt,
|
||||||
|
'error' => $lastError->getMessage(),
|
||||||
|
]);
|
||||||
|
throw $lastError;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute the actual command with process handling
|
||||||
|
*/
|
||||||
|
private function executeCommandWithProcess($command, $hidden, $customType, $append, $ignore_errors)
|
||||||
|
{
|
||||||
$remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command);
|
$remote_command = SshMultiplexingHelper::generateSshCommand($this->server, $command);
|
||||||
$process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) {
|
$process = Process::timeout(3600)->idleTimeout(3600)->start($remote_command, function (string $type, string $output) use ($command, $hidden, $customType, $append) {
|
||||||
$output = str($output)->trim();
|
$output = str($output)->trim();
|
||||||
@@ -113,6 +214,50 @@ trait ExecuteRemoteCommand
|
|||||||
throw new \RuntimeException($process_result->errorOutput());
|
throw new \RuntimeException($process_result->errorOutput());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a log entry for SSH retry attempts
|
||||||
|
*/
|
||||||
|
private function addRetryLogEntry(int $attempt, int $maxRetries, int $delay, string $errorMessage)
|
||||||
|
{
|
||||||
|
$retryMessage = "🔄 SSH connection failed. Retrying... (Attempt {$attempt}/{$maxRetries}, waiting {$delay}s)\nError: {$errorMessage}";
|
||||||
|
|
||||||
|
$new_log_entry = [
|
||||||
|
'command' => 'SSH Retry',
|
||||||
|
'output' => $retryMessage,
|
||||||
|
'type' => 'stdout',
|
||||||
|
'timestamp' => Carbon::now('UTC'),
|
||||||
|
'hidden' => false,
|
||||||
|
'batch' => static::$batch_counter,
|
||||||
|
];
|
||||||
|
|
||||||
|
if (! $this->application_deployment_queue->logs) {
|
||||||
|
$new_log_entry['order'] = 1;
|
||||||
|
$previous_logs = [];
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
$previous_logs = json_decode($this->application_deployment_queue->logs, associative: true, flags: JSON_THROW_ON_ERROR);
|
||||||
|
} catch (\JsonException $e) {
|
||||||
|
$previous_logs = [];
|
||||||
|
$new_log_entry['order'] = 1;
|
||||||
|
}
|
||||||
|
if (is_array($previous_logs)) {
|
||||||
|
$new_log_entry['order'] = count($previous_logs) + 1;
|
||||||
|
} else {
|
||||||
|
$previous_logs = [];
|
||||||
|
$new_log_entry['order'] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$previous_logs[] = $new_log_entry;
|
||||||
|
|
||||||
|
try {
|
||||||
|
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_THROW_ON_ERROR);
|
||||||
|
} catch (\JsonException $e) {
|
||||||
|
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_INVALID_UTF8_SUBSTITUTE);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->application_deployment_queue->save();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -62,6 +62,10 @@ return [
|
|||||||
'connection_timeout' => 10,
|
'connection_timeout' => 10,
|
||||||
'server_interval' => 20,
|
'server_interval' => 20,
|
||||||
'command_timeout' => 7200,
|
'command_timeout' => 7200,
|
||||||
|
'max_retries' => env('SSH_MAX_RETRIES', 3),
|
||||||
|
'retry_base_delay' => env('SSH_RETRY_BASE_DELAY', 2), // seconds
|
||||||
|
'retry_max_delay' => env('SSH_RETRY_MAX_DELAY', 30), // seconds
|
||||||
|
'retry_multiplier' => env('SSH_RETRY_MULTIPLIER', 2),
|
||||||
],
|
],
|
||||||
|
|
||||||
'invitation' => [
|
'invitation' => [
|
||||||
|
Reference in New Issue
Block a user