137 lines
4.0 KiB
Go
137 lines
4.0 KiB
Go
package encoding
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
)
|
|
|
|
type GSM7Coder struct{}
|
|
|
|
// Make sure buffer can fit EncodesInto bytes
|
|
// Otherwise Encode will allocate memory as it sees fit
|
|
// Which is fine but not optimal
|
|
// Preallocate the buffer with the size of EncodesInto bytes
|
|
func (c *GSM7Coder) Encode(s *string, buf *bytes.Buffer) error {
|
|
// utf8 := *(*[]byte)(unsafe.Pointer(&s))
|
|
utf8 := []byte(*s)
|
|
var (
|
|
offset int = 1
|
|
bitshift byte = 0
|
|
leap, shift bool
|
|
)
|
|
encodedSize := c.EncodesInto(s)
|
|
cap := buf.Cap()
|
|
if cap < encodedSize {
|
|
buf.Grow(encodedSize - cap)
|
|
}
|
|
|
|
for index, septet := range utf8 {
|
|
if septet > 0b01111111 {
|
|
return fmt.Errorf("invalid character at index %d", index)
|
|
}
|
|
if index == 0 {
|
|
continue
|
|
}
|
|
bitshift++
|
|
// log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte:%08b (%-3d) Leap:%5v", index, offset, bitshift, utf8[index], utf8[index], utf8[index-offset], utf8[index-offset], leap)
|
|
mask := byte(255 >> (8 - bitshift))
|
|
masked := (mask & septet) << (8 - bitshift)
|
|
// log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d Mask:%08b Masked:%08b", index, offset, bitshift, mask, masked)
|
|
if leap {
|
|
masked >>= 1
|
|
}
|
|
utf8[index-offset] |= masked
|
|
utf8[index] >>= bitshift
|
|
|
|
if !leap {
|
|
buf.WriteByte(utf8[index-offset])
|
|
}
|
|
if index == len(utf8)-1 && utf8[index] > 0 {
|
|
buf.WriteByte(utf8[index])
|
|
}
|
|
// log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte:%08b (%-3d) Leap:%5v", index, offset, bitshift, utf8[index], utf8[index], utf8[index-offset], utf8[index-offset], leap)
|
|
if bitshift >= 7 {
|
|
if leap {
|
|
// log.Printf("Shift at Index:%-3d Offset:%-3d Bitshift:%-3d", index, offset, bitshift)
|
|
leap = false
|
|
bitshift = 0
|
|
offset++
|
|
shift = true
|
|
continue
|
|
}
|
|
// log.Printf("Leap at Index:%-3d Offset:%-3d Bitshift:%-3d", index, offset, bitshift)
|
|
leap = true
|
|
bitshift = 6
|
|
}
|
|
if shift {
|
|
offset = 1
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *GSM7Coder) Decode(buf *bytes.Buffer) (string, error) {
|
|
gsm7 := buf.Bytes()
|
|
var (
|
|
offset int
|
|
bitshift byte = 0
|
|
leap bool
|
|
)
|
|
outLength := c.DecodesInto(buf)
|
|
lengthDiff := outLength - len(gsm7)
|
|
gsm7 = append(gsm7, make([]byte, lengthDiff)...)
|
|
start := len(gsm7) - 2
|
|
|
|
// We don't care about the last byte
|
|
// Unless it's the %8.......
|
|
// We'll deal with that later
|
|
for index := start; index >= 0; index-- {
|
|
octet := gsm7[index]
|
|
bitshift = byte((index % 7) + 1)
|
|
if bitshift == 7 {
|
|
leap = true
|
|
}
|
|
offset = 1
|
|
// log.Println(offset, index, index+offset)
|
|
// log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte(%-3d):%08b (%-3d) Leap:%5v", index, offset, bitshift, gsm7[index], gsm7[index], index+offset, gsm7[index+offset], gsm7[index+offset], leap)
|
|
|
|
mask := byte(255 << (8 - bitshift))
|
|
masked := (mask & octet) >> (8 - bitshift)
|
|
// log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d Mask:%08b Masked:%08b", index, offset, bitshift, mask, masked)
|
|
if leap {
|
|
InsertAt(&gsm7, index+offset, masked)
|
|
} else {
|
|
gsm7[index+offset] |= masked
|
|
}
|
|
// Remove last bitshift bits
|
|
gsm7[index] <<= bitshift
|
|
// Move the remaining bit once to the right to form septet instead of octet
|
|
gsm7[index] >>= 1
|
|
|
|
// log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte(%-3d):%08b (%-3d) Leap:%5v", index, offset, bitshift, gsm7[index], gsm7[index], index+offset, gsm7[index+offset], gsm7[index+offset], leap)
|
|
leap = false
|
|
}
|
|
return string(gsm7), nil
|
|
}
|
|
|
|
// Allocation free
|
|
// Which means data MUST have space for value
|
|
func InsertAt(data *[]byte, index int, value byte) {
|
|
copy((*data)[index+1:], (*data)[index:])
|
|
(*data)[index] = value
|
|
}
|
|
|
|
func (c GSM7Coder) EncodesInto(s *string) int {
|
|
slen := len(*s)
|
|
enclen := slen * 7 / 8
|
|
if slen%8 != 0 {
|
|
enclen++
|
|
}
|
|
return enclen
|
|
}
|
|
func (c GSM7Coder) DecodesInto(buf *bytes.Buffer) int {
|
|
blen := buf.Len()
|
|
declen := blen * 8 / 7
|
|
return declen
|
|
}
|