81 lines
2.5 KiB
Go
81 lines
2.5 KiB
Go
package encoding
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
)
|
|
|
|
type GSM7Coder struct{}
|
|
|
|
func (c *GSM7Coder) Encode(s string, buf *bytes.Buffer) error {
|
|
// utf8 := *(*[]byte)(unsafe.Pointer(&s))
|
|
utf8 := []byte(s)
|
|
var offset int = 1
|
|
var bitshift byte = 0
|
|
var leap, shift bool
|
|
|
|
for index, septet := range utf8 {
|
|
if septet > 0b01111111 {
|
|
return fmt.Errorf("invalid character at index %d", index)
|
|
}
|
|
if index == 0 {
|
|
continue
|
|
}
|
|
bitshift++
|
|
// log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte:%08b (%-3d) Leap:%5v", index, offset, bitshift, utf8[index], utf8[index], utf8[index-offset], utf8[index-offset], leap)
|
|
mask := byte(255 >> (8 - bitshift))
|
|
masked := (mask & septet) << (8 - bitshift)
|
|
// log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d Mask:%08b Masked:%08b", index, offset, bitshift, mask, masked)
|
|
if leap {
|
|
masked >>= 1
|
|
}
|
|
utf8[index-offset] |= masked
|
|
utf8[index] >>= bitshift
|
|
|
|
if !leap {
|
|
buf.WriteByte(utf8[index-offset])
|
|
}
|
|
if index == len(utf8)-1 && utf8[index] > 0 {
|
|
buf.WriteByte(utf8[index])
|
|
}
|
|
// log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte:%08b (%-3d) Leap:%5v", index, offset, bitshift, utf8[index], utf8[index], utf8[index-offset], utf8[index-offset], leap)
|
|
if bitshift >= 7 {
|
|
if leap {
|
|
// log.Printf("Shift at Index:%-3d Offset:%-3d Bitshift:%-3d", index, offset, bitshift)
|
|
leap = false
|
|
bitshift = 0
|
|
offset++
|
|
shift = true
|
|
continue
|
|
}
|
|
// log.Printf("Leap at Index:%-3d Offset:%-3d Bitshift:%-3d", index, offset, bitshift)
|
|
leap = true
|
|
bitshift = 6
|
|
}
|
|
if shift {
|
|
offset = 1
|
|
}
|
|
}
|
|
return nil
|
|
// The issue happens during leap
|
|
// 2024/07/28 16:56:12 Index 7 1 7 11100100 00000000
|
|
// 2024/07/28 16:56:12 Leap at 7 1 7
|
|
// 2024/07/28 16:56:12 Index 8 1 7 11000010 00000000
|
|
// 2024/07/28 16:56:12 Shift at 8 1 7
|
|
// 2024/07/28 16:56:12 Index 9 2 1 11000010 00111100
|
|
|
|
// The correct output should be:
|
|
// Index 9 2 1 11100001 00111100
|
|
|
|
// Also
|
|
// 2024/07/28 16:58:49 Index 8 1 7 00000000 01100001
|
|
// 2024/07/28 16:58:49 Index 8 1 7 11000010 00000000
|
|
// 2024/07/28 16:58:49 Shift at 8 1 7
|
|
// 2024/07/28 16:58:49 Index 9 2 1 11000010 01111001
|
|
// 2024/07/28 16:58:49 Index 9 2 1 11000010 00111100 <-- the LSB 1 that is removed is not added to the previous byte (should be 11100001, not 11000010)
|
|
}
|
|
|
|
func (c *GSM7Coder) Decode(buf *bytes.Buffer) (string, error) {
|
|
return buf.String(), nil
|
|
}
|