From b2a588e66776368ac7827ef07fe225061dbd5619 Mon Sep 17 00:00:00 2001 From: PhatPhuckDave Date: Tue, 30 Jul 2024 22:45:56 +0200 Subject: [PATCH] Add "len" methods to encode and decode To get length of encoded/decoded string --- encoding/gsm7.go | 36 +++++++++++++++++++-------- encoding/gsm7_test.go | 57 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 82 insertions(+), 11 deletions(-) diff --git a/encoding/gsm7.go b/encoding/gsm7.go index 914dca7..1649840 100644 --- a/encoding/gsm7.go +++ b/encoding/gsm7.go @@ -3,10 +3,15 @@ package encoding import ( "bytes" "fmt" + "log" ) type GSM7Coder struct{} +// Make sure buffer can fit EncodesInto bytes +// Otherwise Encode will allocate memory as it sees fit +// Which is fine but not optimal +// Preallocate the buffer with the size of EncodesInto bytes func (c *GSM7Coder) Encode(s string, buf *bytes.Buffer) error { // utf8 := *(*[]byte)(unsafe.Pointer(&s)) utf8 := []byte(s) @@ -15,10 +20,6 @@ func (c *GSM7Coder) Encode(s string, buf *bytes.Buffer) error { bitshift byte = 0 leap, shift bool ) - tbw := len(utf8) * 7 / 8 - if buf.Available() < tbw { - buf.Grow(tbw) - } for index, septet := range utf8 { if septet > 0b01111111 { @@ -62,17 +63,18 @@ func (c *GSM7Coder) Encode(s string, buf *bytes.Buffer) error { offset = 1 } } + log.Println(buf.Cap(), buf.Len()) return nil } func (c *GSM7Coder) Decode(buf *bytes.Buffer) (string, error) { gsm7 := buf.Bytes() var ( - offset int = (len(gsm7) / 8) + 1 - bitshift byte = 0 + offset int = (len(gsm7) / 8) + 1 + bitshift byte = 0 leap, shift bool ) - outLength := len(gsm7)*8/7 + outLength := DecodesInto(buf) lengthDiff := outLength - len(gsm7) gsm7 = append(gsm7, make([]byte, lengthDiff)...) @@ -130,7 +132,7 @@ func (c *GSM7Coder) Decode(buf *bytes.Buffer) (string, error) { } // log.Printf("Result: %+v", gsm7) // for _, v := range gsm7 { - // log.Printf("%08b", v) + // log.Printf("%08b", v) // } return string(gsm7), nil } @@ -138,6 +140,20 @@ func (c *GSM7Coder) Decode(buf *bytes.Buffer) (string, error) { // Allocation free // Which means data MUST have space for value func InsertAt(data *[]byte, index int, value byte) { - copy((*data)[index+1:], (*data)[index:]) - (*data)[index] = value + copy((*data)[index+1:], (*data)[index:]) + (*data)[index] = value +} + +func EncodesInto(s *string) int { + slen := len(*s) + enclen := slen * 7 / 8 + if slen%8 != 0 { + enclen++ + } + return enclen +} +func DecodesInto(buf *bytes.Buffer) int { + blen := buf.Len() + declen := blen * 8 / 7 + return declen } diff --git a/encoding/gsm7_test.go b/encoding/gsm7_test.go index 298a26d..6a5b3fb 100644 --- a/encoding/gsm7_test.go +++ b/encoding/gsm7_test.go @@ -219,7 +219,62 @@ func TestDeletesLastValue(t *testing.T) { } } -// benchmark +// region misc tests +func TestGSM7EncodesIntoSmallString(t *testing.T) { + input := "Sunshine" + expected := 7 + actual := EncodesInto(&input) + if actual != expected { + t.Errorf("Expected %d, but got %d", expected, actual) + } +} + +func TestGSM7EncodesIntoLargerNot8nString(t *testing.T) { + input := "Golden rays play, Chasing night away." + expected := 33 + actual := EncodesInto(&input) + if actual != expected { + t.Errorf("Expected %d, but got %d", expected, actual) + } +} + +func TestGSM7EncodesIntoLarger8nString(t *testing.T) { + input := "Ducks are fucking great, they quacks, O quackers, what the fuck." + expected := 56 + actual := EncodesInto(&input) + if actual != expected { + t.Errorf("Expected %d, but got %d", expected, actual) + } +} + +func TestGSM7DecodesIntoSmallString(t *testing.T) { + input := []byte{0b11010011, 0b10111010, 0b01111011, 0b10001110, 0b01001110, 0b10111011, 0b11001011} + expected := 8 + actual := DecodesInto(bytes.NewBuffer(input)) + if actual != expected { + t.Errorf("Expected %d, but got %d", expected, actual) + } +} + +func TestGSM7DecodesIntoLargerNot8nString(t *testing.T) { + input := []byte{0b11000111, 0b00110111, 0b10011011, 0b01011100, 0b01110110, 0b10000011, 0b11100100, 0b11100001, 0b11111100, 0b00011100, 0b00000100, 0b01100111, 0b10000111, 0b11110011, 0b00101100, 0b11010000, 0b00010000, 0b00011101, 0b10011110, 0b10100111, 0b11011101, 0b01100111, 0b10010000, 0b00111011, 0b01111101, 0b01000110, 0b11010011, 0b01000001, 0b11100001, 0b01111011, 0b00111000, 0b11101111, 0b00000010} + expected := 37 + actual := DecodesInto(bytes.NewBuffer(input)) + if actual != expected { + t.Errorf("Expected %d, but got %d", expected, actual) + } +} + +func TestGSM7DecodesIntoLarger8nString(t *testing.T) { + input := []byte{0b11000100, 0b11111010, 0b01111000, 0b00111101, 0b00000111, 0b10000101, 0b11100101, 0b01100101, 0b10010000, 0b10111001, 0b00111110, 0b01011110, 0b10100111, 0b11011101, 0b01100111, 0b11010000, 0b01011001, 0b01011110, 0b00001110, 0b11010011, 0b01011001, 0b00100000, 0b00111010, 0b10111010, 0b10011100, 0b00000111, 0b11000101, 0b11101011, 0b11100001, 0b11110001, 0b01111010, 0b11001110, 0b00000010, 0b00111101, 0b01000001, 0b11110001, 0b01111010, 0b01111000, 0b10111100, 0b00101110, 0b11001011, 0b11100111, 0b00101100, 0b11010000, 0b00011101, 0b00011101, 0b10100110, 0b10000011, 0b11101000, 0b11101000, 0b00110010, 0b11001000, 0b01011100, 0b00011111, 0b10101111, 0b01011101} + expected := 64 + actual := DecodesInto(bytes.NewBuffer(input)) + if actual != expected { + t.Errorf("Expected %d, but got %d", expected, actual) + } +} + +// region benchmark func BenchmarkGSM7EncodeSimpleASCIIString(b *testing.B) { coder := &GSM7Coder{} var buf bytes.Buffer