package encoding import ( "bytes" "fmt" ) type GSM7Coder struct{} // Make sure buffer can fit EncodesInto bytes // Otherwise Encode will allocate memory as it sees fit // Which is fine but not optimal // Preallocate the buffer with the size of EncodesInto bytes func (c *GSM7Coder) Encode(s *string, buf *bytes.Buffer) error { // utf8 := *(*[]byte)(unsafe.Pointer(&s)) utf8 := []byte(*s) var ( offset int = 1 bitshift byte = 0 leap, shift bool ) encodedSize := c.EncodesInto(s) cap := buf.Cap() if cap < encodedSize { buf.Grow(encodedSize - cap) } for index, septet := range utf8 { if septet > 0b01111111 { return fmt.Errorf("invalid character at index %d", index) } if index == 0 { continue } bitshift++ // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte:%08b (%-3d) Leap:%5v", index, offset, bitshift, utf8[index], utf8[index], utf8[index-offset], utf8[index-offset], leap) mask := byte(255 >> (8 - bitshift)) masked := (mask & septet) << (8 - bitshift) // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d Mask:%08b Masked:%08b", index, offset, bitshift, mask, masked) if leap { masked >>= 1 } utf8[index-offset] |= masked utf8[index] >>= bitshift if !leap { buf.WriteByte(utf8[index-offset]) } if index == len(utf8)-1 && utf8[index] > 0 { buf.WriteByte(utf8[index]) } // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte:%08b (%-3d) Leap:%5v", index, offset, bitshift, utf8[index], utf8[index], utf8[index-offset], utf8[index-offset], leap) if bitshift >= 7 { if leap { // log.Printf("Shift at Index:%-3d Offset:%-3d Bitshift:%-3d", index, offset, bitshift) leap = false bitshift = 0 offset++ shift = true continue } // log.Printf("Leap at Index:%-3d Offset:%-3d Bitshift:%-3d", index, offset, bitshift) leap = true bitshift = 6 } if shift { offset = 1 } } return nil } func (c *GSM7Coder) Decode(buf *bytes.Buffer) (string, error) { gsm7 := buf.Bytes() var ( offset int bitshift byte = 0 leap bool ) outLength := c.DecodesInto(buf) lengthDiff := outLength - len(gsm7) gsm7 = append(gsm7, make([]byte, lengthDiff)...) start := len(gsm7) - 2 // We don't care about the last byte // Unless it's the %8....... // We'll deal with that later for index := start; index >= 0; index-- { octet := gsm7[index] bitshift = byte((index % 7) + 1) if bitshift == 7 { leap = true } offset = 1 // log.Println(offset, index, index+offset) // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte(%-3d):%08b (%-3d) Leap:%5v", index, offset, bitshift, gsm7[index], gsm7[index], index+offset, gsm7[index+offset], gsm7[index+offset], leap) mask := byte(255 << (8 - bitshift)) masked := (mask & octet) >> (8 - bitshift) // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d Mask:%08b Masked:%08b", index, offset, bitshift, mask, masked) if leap { InsertAt(&gsm7, index+offset, masked) } else { gsm7[index+offset] |= masked } // Remove last bitshift bits gsm7[index] <<= bitshift // Move the remaining bit once to the right to form septet instead of octet gsm7[index] >>= 1 // log.Printf("Index:%-3d Offset:%-3d Bitshift:%-3d CurrentByte:%08b (%-3d) OffsetByte(%-3d):%08b (%-3d) Leap:%5v", index, offset, bitshift, gsm7[index], gsm7[index], index+offset, gsm7[index+offset], gsm7[index+offset], leap) leap = false } return string(gsm7), nil } // Allocation free // Which means data MUST have space for value func InsertAt(data *[]byte, index int, value byte) { copy((*data)[index+1:], (*data)[index:]) (*data)[index] = value } func (c GSM7Coder) EncodesInto(s *string) int { slen := len(*s) enclen := slen * 7 / 8 if slen%8 != 0 { enclen++ } return enclen } func (c GSM7Coder) DecodesInto(buf *bytes.Buffer) int { blen := buf.Len() declen := blen * 8 / 7 return declen }