diff --git a/encoding/gsm7.go b/encoding/gsm7.go index 0fab297..f6f884d 100644 --- a/encoding/gsm7.go +++ b/encoding/gsm7.go @@ -7,34 +7,72 @@ import ( type GSM7Coder struct{} - func (c *GSM7Coder) Encode(s string, buf *bytes.Buffer) error { // utf8 := *(*[]byte)(unsafe.Pointer(&s)) utf8 := []byte(s) - var offset byte = 1 - var bitshift byte = 1 + var offset int = 1 + var bitshift byte = 0 + var leap, shift bool for index, septet := range utf8 { if septet > 0b01111111 { return fmt.Errorf("invalid character at index %d", index) } - bindex := byte(index) - if bindex == 0 { + if index == 0 { continue } + bitshift++ + // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte:%08b (%-3d) Leap:%5v", index, offset, bitshift, utf8[index], utf8[index], utf8[index-offset], utf8[index-offset], leap) mask := byte(255 >> (8 - bitshift)) masked := (mask & septet) << (8 - bitshift) - utf8[bindex-offset] |= masked - utf8[bindex] >>= bitshift + // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d Mask:%08b Masked:%08b", index, offset, bitshift, mask, masked) + if leap { + masked >>= 1 + } + utf8[index-offset] |= masked + utf8[index] >>= bitshift - buf.WriteByte(utf8[bindex-offset]) - bitshift++ - if bitshift == 8 { - offset++ - bitshift = 1 + if !leap { + buf.WriteByte(utf8[index-offset]) + } + if index == len(utf8)-1 && utf8[index] > 0 { + buf.WriteByte(utf8[index]) + } + // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte:%08b (%-3d) Leap:%5v", index, offset, bitshift, utf8[index], utf8[index], utf8[index-offset], utf8[index-offset], leap) + if bitshift >= 7 { + if leap { + // log.Printf("Shift at Index:%-3d Offset:%-3d Bitshift:%-3d", index, offset, bitshift) + leap = false + bitshift = 0 + offset++ + shift = true + continue + } + // log.Printf("Leap at Index:%-3d Offset:%-3d Bitshift:%-3d", index, offset, bitshift) + leap = true + bitshift = 6 + } + if shift { + offset = 1 } } return nil + // The issue happens during leap + // 2024/07/28 16:56:12 Index 7 1 7 11100100 00000000 + // 2024/07/28 16:56:12 Leap at 7 1 7 + // 2024/07/28 16:56:12 Index 8 1 7 11000010 00000000 + // 2024/07/28 16:56:12 Shift at 8 1 7 + // 2024/07/28 16:56:12 Index 9 2 1 11000010 00111100 + + // The correct output should be: + // Index 9 2 1 11100001 00111100 + + // Also + // 2024/07/28 16:58:49 Index 8 1 7 00000000 01100001 + // 2024/07/28 16:58:49 Index 8 1 7 11000010 00000000 + // 2024/07/28 16:58:49 Shift at 8 1 7 + // 2024/07/28 16:58:49 Index 9 2 1 11000010 01111001 + // 2024/07/28 16:58:49 Index 9 2 1 11000010 00111100 <-- the LSB 1 that is removed is not added to the previous byte (should be 11100001, not 11000010) } func (c *GSM7Coder) Decode(buf *bytes.Buffer) (string, error) {