Fix packing for larger strings
Some checks are pending
Benchmark BufferPool / RunBenchmarks (push) Waiting to run
Run Tests / Test (push) Waiting to run

This commit is contained in:
2024-07-28 17:35:19 +02:00
parent b26c64d0d9
commit a2f4e9af25

View File

@@ -7,34 +7,72 @@ import (
type GSM7Coder struct{} type GSM7Coder struct{}
func (c *GSM7Coder) Encode(s string, buf *bytes.Buffer) error { func (c *GSM7Coder) Encode(s string, buf *bytes.Buffer) error {
// utf8 := *(*[]byte)(unsafe.Pointer(&s)) // utf8 := *(*[]byte)(unsafe.Pointer(&s))
utf8 := []byte(s) utf8 := []byte(s)
var offset byte = 1 var offset int = 1
var bitshift byte = 1 var bitshift byte = 0
var leap, shift bool
for index, septet := range utf8 { for index, septet := range utf8 {
if septet > 0b01111111 { if septet > 0b01111111 {
return fmt.Errorf("invalid character at index %d", index) return fmt.Errorf("invalid character at index %d", index)
} }
bindex := byte(index) if index == 0 {
if bindex == 0 {
continue continue
} }
bitshift++
// log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte:%08b (%-3d) Leap:%5v", index, offset, bitshift, utf8[index], utf8[index], utf8[index-offset], utf8[index-offset], leap)
mask := byte(255 >> (8 - bitshift)) mask := byte(255 >> (8 - bitshift))
masked := (mask & septet) << (8 - bitshift) masked := (mask & septet) << (8 - bitshift)
utf8[bindex-offset] |= masked // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d Mask:%08b Masked:%08b", index, offset, bitshift, mask, masked)
utf8[bindex] >>= bitshift if leap {
masked >>= 1
}
utf8[index-offset] |= masked
utf8[index] >>= bitshift
buf.WriteByte(utf8[bindex-offset]) if !leap {
bitshift++ buf.WriteByte(utf8[index-offset])
if bitshift == 8 { }
offset++ if index == len(utf8)-1 && utf8[index] > 0 {
bitshift = 1 buf.WriteByte(utf8[index])
}
// log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte:%08b (%-3d) Leap:%5v", index, offset, bitshift, utf8[index], utf8[index], utf8[index-offset], utf8[index-offset], leap)
if bitshift >= 7 {
if leap {
// log.Printf("Shift at Index:%-3d Offset:%-3d Bitshift:%-3d", index, offset, bitshift)
leap = false
bitshift = 0
offset++
shift = true
continue
}
// log.Printf("Leap at Index:%-3d Offset:%-3d Bitshift:%-3d", index, offset, bitshift)
leap = true
bitshift = 6
}
if shift {
offset = 1
} }
} }
return nil return nil
// The issue happens during leap
// 2024/07/28 16:56:12 Index 7 1 7 11100100 00000000
// 2024/07/28 16:56:12 Leap at 7 1 7
// 2024/07/28 16:56:12 Index 8 1 7 11000010 00000000
// 2024/07/28 16:56:12 Shift at 8 1 7
// 2024/07/28 16:56:12 Index 9 2 1 11000010 00111100
// The correct output should be:
// Index 9 2 1 11100001 00111100
// Also
// 2024/07/28 16:58:49 Index 8 1 7 00000000 01100001
// 2024/07/28 16:58:49 Index 8 1 7 11000010 00000000
// 2024/07/28 16:58:49 Shift at 8 1 7
// 2024/07/28 16:58:49 Index 9 2 1 11000010 01111001
// 2024/07/28 16:58:49 Index 9 2 1 11000010 00111100 <-- the LSB 1 that is removed is not added to the previous byte (should be 11100001, not 11000010)
} }
func (c *GSM7Coder) Decode(buf *bytes.Buffer) (string, error) { func (c *GSM7Coder) Decode(buf *bytes.Buffer) (string, error) {