feat(utf8-handling): implement UTF-8 sanitization for command outputs and enhance error handling in logs processing

This commit is contained in:
Andras Bacsai
2025-06-05 14:27:39 +02:00
parent 5d390dc528
commit 8e8400f595
3 changed files with 130 additions and 10 deletions

View File

@@ -94,8 +94,12 @@ function instant_remote_process_with_timeout(Collection|array $command, Server $
return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null;
}
return $output === 'null' ? null : $output;
// Sanitize output to ensure valid UTF-8 encoding
$output = $output === 'null' ? null : sanitize_utf8_text($output);
return $output;
}
function instant_remote_process(Collection|array $command, Server $server, bool $throwError = true, bool $no_sudo = false): ?string
{
$command = $command instanceof Collection ? $command->toArray() : $command;
@@ -119,7 +123,10 @@ function instant_remote_process(Collection|array $command, Server $server, bool
return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null;
}
return $output === 'null' ? null : $output;
// Sanitize output to ensure valid UTF-8 encoding
$output = $output === 'null' ? null : sanitize_utf8_text($output);
return $output;
}
function excludeCertainErrors(string $errorOutput, ?int $exitCode = null)
@@ -143,15 +150,38 @@ function decode_remote_command_output(?ApplicationDeploymentQueue $application_d
}
$application = Application::find(data_get($application_deployment_queue, 'application_id'));
$is_debug_enabled = data_get($application, 'settings.is_debug_enabled');
$logs = data_get($application_deployment_queue, 'logs');
if (empty($logs)) {
return collect([]);
}
try {
$decoded = json_decode(
data_get($application_deployment_queue, 'logs'),
$logs,
associative: true,
flags: JSON_THROW_ON_ERROR
);
} catch (\JsonException) {
} catch (\JsonException $e) {
// If JSON decoding fails, try to clean up the logs and retry
try {
// Ensure valid UTF-8 encoding
$cleaned_logs = sanitize_utf8_text($logs);
$decoded = json_decode(
$cleaned_logs,
associative: true,
flags: JSON_THROW_ON_ERROR
);
} catch (\JsonException $e) {
// If it still fails, return empty collection to prevent crashes
return collect([]);
}
}
if (! is_array($decoded)) {
return collect([]);
}
$seenCommands = collect();
$formatted = collect($decoded);
if (! $is_debug_enabled) {
@@ -204,11 +234,41 @@ function decode_remote_command_output(?ApplicationDeploymentQueue $application_d
function remove_iip($text)
{
// Ensure the input is valid UTF-8 before processing
$text = sanitize_utf8_text($text);
$text = preg_replace('/x-access-token:.*?(?=@)/', 'x-access-token:'.REDACTED, $text);
return preg_replace('/\x1b\[[0-9;]*m/', '', $text);
}
/**
* Sanitizes text to ensure it contains valid UTF-8 encoding.
*
* This function is crucial for preventing "Malformed UTF-8 characters" errors
* that can occur when Docker build output contains binary data mixed with text,
* especially during image processing or builds with many assets.
*
* @param string|null $text The text to sanitize
* @return string Valid UTF-8 encoded text
*/
function sanitize_utf8_text($text): string
{
if (empty($text)) {
return '';
}
// Convert to UTF-8, replacing invalid sequences
$sanitized = mb_convert_encoding($text, 'UTF-8', 'UTF-8');
// Additional fallback: use SUBSTITUTE flag to replace invalid sequences with substitution character
if (! mb_check_encoding($sanitized, 'UTF-8')) {
$sanitized = mb_convert_encoding($text, 'UTF-8', mb_detect_encoding($text, mb_detect_order(), true) ?: 'UTF-8');
}
return $sanitized;
}
function refresh_server_connection(?PrivateKey $private_key = null)
{
if (is_null($private_key)) {