package encoding import ( "bytes" "fmt" ) type GSM7Coder struct{} func (c *GSM7Coder) Encode(s string, buf *bytes.Buffer) error { // utf8 := *(*[]byte)(unsafe.Pointer(&s)) utf8 := []byte(s) var offset int = 1 var bitshift byte = 0 var leap, shift bool for index, septet := range utf8 { if septet > 0b01111111 { return fmt.Errorf("invalid character at index %d", index) } if index == 0 { continue } bitshift++ // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte:%08b (%-3d) Leap:%5v", index, offset, bitshift, utf8[index], utf8[index], utf8[index-offset], utf8[index-offset], leap) mask := byte(255 >> (8 - bitshift)) masked := (mask & septet) << (8 - bitshift) // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d Mask:%08b Masked:%08b", index, offset, bitshift, mask, masked) if leap { masked >>= 1 } utf8[index-offset] |= masked utf8[index] >>= bitshift if !leap { buf.WriteByte(utf8[index-offset]) } if index == len(utf8)-1 && utf8[index] > 0 { buf.WriteByte(utf8[index]) } // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte:%08b (%-3d) Leap:%5v", index, offset, bitshift, utf8[index], utf8[index], utf8[index-offset], utf8[index-offset], leap) if bitshift >= 7 { if leap { // log.Printf("Shift at Index:%-3d Offset:%-3d Bitshift:%-3d", index, offset, bitshift) leap = false bitshift = 0 offset++ shift = true continue } // log.Printf("Leap at Index:%-3d Offset:%-3d Bitshift:%-3d", index, offset, bitshift) leap = true bitshift = 6 } if shift { offset = 1 } } return nil // The issue happens during leap // 2024/07/28 16:56:12 Index 7 1 7 11100100 00000000 // 2024/07/28 16:56:12 Leap at 7 1 7 // 2024/07/28 16:56:12 Index 8 1 7 11000010 00000000 // 2024/07/28 16:56:12 Shift at 8 1 7 // 2024/07/28 16:56:12 Index 9 2 1 11000010 00111100 // The correct output should be: // Index 9 2 1 11100001 00111100 // Also // 2024/07/28 16:58:49 Index 8 1 7 00000000 01100001 // 2024/07/28 16:58:49 Index 8 1 7 11000010 00000000 // 2024/07/28 16:58:49 Shift at 8 1 7 // 2024/07/28 16:58:49 Index 9 2 1 11000010 01111001 // 2024/07/28 16:58:49 Index 9 2 1 11000010 00111100 <-- the LSB 1 that is removed is not added to the previous byte (should be 11100001, not 11000010) } func (c *GSM7Coder) Decode(buf *bytes.Buffer) (string, error) { return buf.String(), nil }