feat(utf8-handling): implement UTF-8 sanitization for command outputs and enhance error handling in logs processing

This commit is contained in:
Andras Bacsai
2025-06-05 14:27:39 +02:00
parent 5d390dc528
commit 8e8400f595
3 changed files with 130 additions and 10 deletions

View File

@@ -49,9 +49,13 @@ trait ExecuteRemoteCommand
if ($output->startsWith('╔')) { if ($output->startsWith('╔')) {
$output = "\n".$output; $output = "\n".$output;
} }
// Sanitize output to ensure valid UTF-8 encoding before JSON encoding
$sanitized_output = sanitize_utf8_text($output);
$new_log_entry = [ $new_log_entry = [
'command' => remove_iip($command), 'command' => remove_iip($command),
'output' => remove_iip($output), 'output' => remove_iip($sanitized_output),
'type' => $customType ?? $type === 'err' ? 'stderr' : 'stdout', 'type' => $customType ?? $type === 'err' ? 'stderr' : 'stdout',
'timestamp' => Carbon::now('UTC'), 'timestamp' => Carbon::now('UTC'),
'hidden' => $hidden, 'hidden' => $hidden,
@@ -60,11 +64,29 @@ trait ExecuteRemoteCommand
if (! $this->application_deployment_queue->logs) { if (! $this->application_deployment_queue->logs) {
$new_log_entry['order'] = 1; $new_log_entry['order'] = 1;
} else { } else {
$previous_logs = json_decode($this->application_deployment_queue->logs, associative: true, flags: JSON_THROW_ON_ERROR); try {
$new_log_entry['order'] = count($previous_logs) + 1; $previous_logs = json_decode($this->application_deployment_queue->logs, associative: true, flags: JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
// If existing logs are corrupted, start fresh
$previous_logs = [];
$new_log_entry['order'] = 1;
}
if (is_array($previous_logs)) {
$new_log_entry['order'] = count($previous_logs) + 1;
} else {
$previous_logs = [];
$new_log_entry['order'] = 1;
}
} }
$previous_logs[] = $new_log_entry; $previous_logs[] = $new_log_entry;
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_THROW_ON_ERROR);
try {
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_THROW_ON_ERROR);
} catch (\JsonException $e) {
// If JSON encoding still fails, use fallback with invalid sequences replacement
$this->application_deployment_queue->logs = json_encode($previous_logs, flags: JSON_INVALID_UTF8_SUBSTITUTE);
}
$this->application_deployment_queue->save(); $this->application_deployment_queue->save();
if ($this->save) { if ($this->save) {
@@ -72,10 +94,10 @@ trait ExecuteRemoteCommand
data_set($this->saved_outputs, $this->save, str()); data_set($this->saved_outputs, $this->save, str());
} }
if ($append) { if ($append) {
$this->saved_outputs[$this->save] .= str($output)->trim(); $this->saved_outputs[$this->save] .= str($sanitized_output)->trim();
$this->saved_outputs[$this->save] = str($this->saved_outputs[$this->save]); $this->saved_outputs[$this->save] = str($this->saved_outputs[$this->save]);
} else { } else {
$this->saved_outputs[$this->save] = str($output)->trim(); $this->saved_outputs[$this->save] = str($sanitized_output)->trim();
} }
} }
}); });

View File

@@ -94,8 +94,12 @@ function instant_remote_process_with_timeout(Collection|array $command, Server $
return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null; return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null;
} }
return $output === 'null' ? null : $output; // Sanitize output to ensure valid UTF-8 encoding
$output = $output === 'null' ? null : sanitize_utf8_text($output);
return $output;
} }
function instant_remote_process(Collection|array $command, Server $server, bool $throwError = true, bool $no_sudo = false): ?string function instant_remote_process(Collection|array $command, Server $server, bool $throwError = true, bool $no_sudo = false): ?string
{ {
$command = $command instanceof Collection ? $command->toArray() : $command; $command = $command instanceof Collection ? $command->toArray() : $command;
@@ -119,7 +123,10 @@ function instant_remote_process(Collection|array $command, Server $server, bool
return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null; return $throwError ? excludeCertainErrors($process->errorOutput(), $exitCode) : null;
} }
return $output === 'null' ? null : $output; // Sanitize output to ensure valid UTF-8 encoding
$output = $output === 'null' ? null : sanitize_utf8_text($output);
return $output;
} }
function excludeCertainErrors(string $errorOutput, ?int $exitCode = null) function excludeCertainErrors(string $errorOutput, ?int $exitCode = null)
@@ -143,15 +150,38 @@ function decode_remote_command_output(?ApplicationDeploymentQueue $application_d
} }
$application = Application::find(data_get($application_deployment_queue, 'application_id')); $application = Application::find(data_get($application_deployment_queue, 'application_id'));
$is_debug_enabled = data_get($application, 'settings.is_debug_enabled'); $is_debug_enabled = data_get($application, 'settings.is_debug_enabled');
$logs = data_get($application_deployment_queue, 'logs');
if (empty($logs)) {
return collect([]);
}
try { try {
$decoded = json_decode( $decoded = json_decode(
data_get($application_deployment_queue, 'logs'), $logs,
associative: true, associative: true,
flags: JSON_THROW_ON_ERROR flags: JSON_THROW_ON_ERROR
); );
} catch (\JsonException) { } catch (\JsonException $e) {
// If JSON decoding fails, try to clean up the logs and retry
try {
// Ensure valid UTF-8 encoding
$cleaned_logs = sanitize_utf8_text($logs);
$decoded = json_decode(
$cleaned_logs,
associative: true,
flags: JSON_THROW_ON_ERROR
);
} catch (\JsonException $e) {
// If it still fails, return empty collection to prevent crashes
return collect([]);
}
}
if (! is_array($decoded)) {
return collect([]); return collect([]);
} }
$seenCommands = collect(); $seenCommands = collect();
$formatted = collect($decoded); $formatted = collect($decoded);
if (! $is_debug_enabled) { if (! $is_debug_enabled) {
@@ -204,11 +234,41 @@ function decode_remote_command_output(?ApplicationDeploymentQueue $application_d
function remove_iip($text) function remove_iip($text)
{ {
// Ensure the input is valid UTF-8 before processing
$text = sanitize_utf8_text($text);
$text = preg_replace('/x-access-token:.*?(?=@)/', 'x-access-token:'.REDACTED, $text); $text = preg_replace('/x-access-token:.*?(?=@)/', 'x-access-token:'.REDACTED, $text);
return preg_replace('/\x1b\[[0-9;]*m/', '', $text); return preg_replace('/\x1b\[[0-9;]*m/', '', $text);
} }
/**
* Sanitizes text to ensure it contains valid UTF-8 encoding.
*
* This function is crucial for preventing "Malformed UTF-8 characters" errors
* that can occur when Docker build output contains binary data mixed with text,
* especially during image processing or builds with many assets.
*
* @param string|null $text The text to sanitize
* @return string Valid UTF-8 encoded text
*/
function sanitize_utf8_text($text): string
{
if (empty($text)) {
return '';
}
// Convert to UTF-8, replacing invalid sequences
$sanitized = mb_convert_encoding($text, 'UTF-8', 'UTF-8');
// Additional fallback: use SUBSTITUTE flag to replace invalid sequences with substitution character
if (! mb_check_encoding($sanitized, 'UTF-8')) {
$sanitized = mb_convert_encoding($text, 'UTF-8', mb_detect_encoding($text, mb_detect_order(), true) ?: 'UTF-8');
}
return $sanitized;
}
function refresh_server_connection(?PrivateKey $private_key = null) function refresh_server_connection(?PrivateKey $private_key = null)
{ {
if (is_null($private_key)) { if (is_null($private_key)) {

View File

@@ -0,0 +1,38 @@
<?php
namespace Tests\Feature;
use Tests\TestCase;
class Utf8HandlingTest extends TestCase
{
public function test_sanitize_utf8_text_handles_malformed_utf8()
{
// Test with valid UTF-8
$validUtf8 = 'Hello World! 🚀';
$this->assertEquals($validUtf8, sanitize_utf8_text($validUtf8));
// Test with empty string
$this->assertEquals('', sanitize_utf8_text(''));
// Test with malformed UTF-8 (binary data)
$malformedUtf8 = "Hello\x80\x81\x82World";
$sanitized = sanitize_utf8_text($malformedUtf8);
$this->assertTrue(mb_check_encoding($sanitized, 'UTF-8'));
// Test that JSON encoding works after sanitization
$testArray = ['output' => $sanitized];
$this->assertIsString(json_encode($testArray, JSON_THROW_ON_ERROR));
}
public function test_remove_iip_handles_malformed_utf8()
{
// Test with malformed UTF-8 in command output
$malformedOutput = "Processing image\x80\x81file.webp";
$cleaned = remove_iip($malformedOutput);
$this->assertTrue(mb_check_encoding($cleaned, 'UTF-8'));
// Test that JSON encoding works after cleaning
$this->assertIsString(json_encode(['output' => $cleaned], JSON_THROW_ON_ERROR));
}
}