Skip to content

Commit c51d5b2

Browse files
committed
feat: implementing Huffman coding
1 parent 852d839 commit c51d5b2

File tree

2 files changed

+324
-2
lines changed

2 files changed

+324
-2
lines changed

compression/compression.go

+235-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,242 @@
11
package compression
22

3+
import (
4+
"bytes"
5+
"fmt"
6+
"log"
7+
"strings"
8+
"unicode/utf8"
9+
10+
"encoding/binary"
11+
)
12+
13+
// represents a node in a Hufmann Tree, holds character and it's number of occurances.
14+
type HNode struct {
15+
char rune
16+
freq int
17+
left, right *HNode
18+
}
19+
20+
// implements the interface Orderable
21+
func (hn *HNode) Less(other Orderable) bool {
22+
otherhn, ok := other.(*HNode)
23+
if !ok {
24+
log.Printf("Type assertion failed: expected *HNode, got %T", other)
25+
return false
26+
}
27+
return hn.freq < otherhn.freq
28+
}
29+
30+
// builds HNodes and puts them in a minimum priority queue
31+
func buildHNodesPQ(chars []rune) *MinPQ[Orderable] {
32+
charFreq, hNodesPQ := make(map[rune]int), NewMinPQ[Orderable]()
33+
34+
for _, r := range chars {
35+
charFreq[r] += 1
36+
}
37+
38+
for k, v := range charFreq {
39+
hf := &HNode{
40+
char: k,
41+
freq: v,
42+
}
43+
hNodesPQ.Insert(hf)
44+
}
45+
46+
return hNodesPQ
47+
}
48+
49+
// builds a Hufmann Tree by picking the HNode having the least character frequency
50+
func buildHuffmannTree(hfNodesPQ *MinPQ[Orderable]) *HNode {
51+
var a, b *HNode
52+
for !hfNodesPQ.IsEmpty() {
53+
temp, err := hfNodesPQ.DeleteMin()
54+
a = temp.(*HNode)
55+
if err != nil {
56+
log.Fatalln(err.Error())
57+
}
58+
if hfNodesPQ.IsEmpty() {
59+
break
60+
} else {
61+
temp, err := hfNodesPQ.DeleteMin()
62+
b = temp.(*HNode)
63+
if err != nil {
64+
log.Fatalln(err.Error())
65+
}
66+
}
67+
c := &HNode{
68+
freq: a.freq + b.freq,
69+
left: a,
70+
right: b,
71+
}
72+
hfNodesPQ.Insert(c)
73+
}
74+
75+
return a
76+
}
77+
78+
// builds the mapping to encode the data. CodeCharMap maps the character to the Hufmann Code generated using the Hufmann Tree
79+
func buildCodeCharMap(root *HNode) map[string]rune {
80+
codeCharMap := make(map[string]rune)
81+
var buildCodes func(node *HNode, codeBuffer *bytes.Buffer)
82+
buildCodes = func(node *HNode, codeBuffer *bytes.Buffer) {
83+
if node.left == nil && node.right == nil {
84+
codeCharMap[codeBuffer.String()] = node.char
85+
return
86+
}
87+
if node.left != nil {
88+
codeBuffer.WriteByte('0')
89+
buildCodes(node.left, codeBuffer)
90+
codeBuffer.Truncate(codeBuffer.Len() - 1)
91+
}
92+
if node.right != nil {
93+
codeBuffer.WriteByte('1')
94+
buildCodes(node.right, codeBuffer)
95+
codeBuffer.Truncate(codeBuffer.Len() - 1)
96+
}
97+
}
98+
99+
if root.left == nil && root.right == nil {
100+
codeCharMap["0"] = root.char
101+
return codeCharMap
102+
}
103+
var codeBuffer bytes.Buffer
104+
buildCodes(root, &codeBuffer)
105+
return codeCharMap
106+
}
107+
108+
// Serializes the mapping used for encoding the data. Formatting: for each character and it's Hufmann code, <code length><characterlength><code><character>
109+
func serializeCodeCharMap(codeCharMap map[string]rune) ([]byte, error) {
110+
var encodedBytesBuffer bytes.Buffer
111+
112+
for k, v := range codeCharMap {
113+
keyLen := len(k)
114+
valLen := utf8.RuneLen(v)
115+
if valLen == -1 {
116+
return nil, fmt.Errorf("invalid UTF-8 rune: %v", v)
117+
}
118+
119+
// write key length and value length as single bytes
120+
encodedBytesBuffer.WriteByte(byte(keyLen))
121+
encodedBytesBuffer.WriteByte(byte(valLen))
122+
123+
// write the key (code) and character
124+
encodedBytesBuffer.WriteString(k)
125+
encodedBytesBuffer.WriteRune(v)
126+
}
127+
128+
return encodedBytesBuffer.Bytes(), nil
129+
}
130+
131+
// Reconstructs the mapping used for encoding the data.
132+
func deserializeCodeCharMap(encodedBytesBuffer *bytes.Buffer, encodingMappingLen int32) (map[string]rune, error) {
133+
codeCharMap := make(map[string]rune)
134+
n := 0
135+
136+
for n < int(encodingMappingLen) {
137+
// Read the key length (uint8)
138+
keyLenByte, err := encodedBytesBuffer.ReadByte()
139+
if err != nil {
140+
return nil, fmt.Errorf("failed to read key length: %v", err)
141+
}
142+
n += 1
143+
keyLen := int(keyLenByte)
144+
145+
// Read the value length (uint8)
146+
valLenByte, err := encodedBytesBuffer.ReadByte()
147+
if err != nil {
148+
return nil, fmt.Errorf("failed to read value length: %v", err)
149+
}
150+
n += 1
151+
valLen := int(valLenByte)
152+
153+
// Read the key
154+
key := make([]byte, keyLen)
155+
if _, err := encodedBytesBuffer.Read(key); err != nil {
156+
return nil, fmt.Errorf("failed to read key: %v", err)
157+
}
158+
n += keyLen
159+
// Read the value (should be one rune)
160+
valueBytes := make([]byte, valLen)
161+
if _, err := encodedBytesBuffer.Read(valueBytes); err != nil {
162+
return nil, fmt.Errorf("failed to read value: %v", err)
163+
}
164+
value, size := utf8.DecodeRune(valueBytes)
165+
if size == 0 || value == utf8.RuneError {
166+
log.Printf("Warning: invalid UTF-8 rune detected at position %d", n)
167+
return nil, fmt.Errorf("invalid UTF-8 rune detected")
168+
}
169+
n += valLen
170+
// Add to map
171+
codeCharMap[string(key)] = value
172+
}
173+
174+
return codeCharMap, nil
175+
}
176+
177+
// encode the given string returns a string in the format: <length of encoding mapping><encoding mapping><encoded string>
3178
func Encode(s string) string {
4-
return s
179+
chars := []rune(s)
180+
pq := buildHNodesPQ(chars)
181+
root := buildHuffmannTree(pq)
182+
183+
// codeCharMap will be used for decoding the string
184+
codeCharMap := buildCodeCharMap(root)
185+
186+
var encodedBytesBuffer bytes.Buffer
187+
encodingMappingBytes, err := serializeCodeCharMap(codeCharMap)
188+
if err != nil {
189+
log.Fatalln(err.Error())
190+
}
191+
192+
// writing the length of the mapping used for encoding the given string s
193+
err = binary.Write(&encodedBytesBuffer, binary.BigEndian, int32(len(encodingMappingBytes)))
194+
if err != nil {
195+
log.Fatalln("Error occured in writing the length of the encoding mapping to the buffer:", err)
196+
}
197+
// writing the encoding mapping itself
198+
encodedBytesBuffer.Write(encodingMappingBytes)
199+
200+
// charCodeMap will be used for encoding the string
201+
charCodeMap := make(map[rune]string)
202+
for k, v := range codeCharMap {
203+
charCodeMap[v] = k
204+
}
205+
206+
for _, el := range chars {
207+
encodedBytesBuffer.WriteString(charCodeMap[el])
208+
}
209+
210+
return encodedBytesBuffer.String()
5211
}
6212

213+
// decode the given string
7214
func Decode(s string) string {
8-
return s
215+
encodedDataBytes := bytes.NewBuffer([]byte(s))
216+
var encodingMappingLen int32
217+
err := binary.Read(encodedDataBytes, binary.BigEndian, &encodingMappingLen)
218+
if err != nil {
219+
log.Fatalln("Error reading encoding mapping length:", err)
220+
}
221+
222+
codeCharMap, err := deserializeCodeCharMap(encodedDataBytes, int32(encodingMappingLen))
223+
if err != nil {
224+
log.Fatalln(err.Error())
225+
}
226+
227+
var sofar bytes.Buffer
228+
var decodedStringBuilder strings.Builder
229+
230+
for encodedDataBytes.Len() > 0 {
231+
k, err := encodedDataBytes.ReadByte()
232+
if err != nil {
233+
log.Fatalln(err.Error())
234+
}
235+
sofar.WriteByte(k)
236+
if v, ok := codeCharMap[sofar.String()]; ok {
237+
decodedStringBuilder.WriteRune(v)
238+
sofar.Reset()
239+
}
240+
}
241+
return decodedStringBuilder.String()
9242
}

compression/minpq.go

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
package compression
2+
3+
import (
4+
"fmt"
5+
)
6+
7+
type Orderable interface {
8+
Less(other Orderable) bool
9+
}
10+
11+
type MinPQ[T Orderable] struct {
12+
arr []T
13+
}
14+
15+
func NewMinPQ[T Orderable]() *MinPQ[T] {
16+
return &MinPQ[T]{
17+
arr: make([]T, 0),
18+
}
19+
}
20+
21+
func (pq *MinPQ[T]) IsEmpty() bool {
22+
return len(pq.arr) == 0
23+
}
24+
25+
func (pq *MinPQ[T]) Size() int {
26+
return len(pq.arr)
27+
}
28+
29+
func (pq *MinPQ[T]) Min() (T, error) {
30+
if len(pq.arr) == 0 {
31+
var zero T
32+
return zero, fmt.Errorf("priority queue is empty")
33+
}
34+
return pq.arr[0], nil
35+
}
36+
37+
func (pq *MinPQ[T]) swap(a, b int) {
38+
pq.arr[a], pq.arr[b] = pq.arr[b], pq.arr[a]
39+
}
40+
41+
func (pq *MinPQ[T]) rise(k int) {
42+
for k > 0 {
43+
parent := (k - 1) / 2
44+
if !pq.arr[k].Less(pq.arr[parent]) {
45+
break
46+
}
47+
pq.swap(k, parent)
48+
k = parent
49+
}
50+
}
51+
52+
func (pq *MinPQ[T]) sink(k int) {
53+
n := len(pq.arr)
54+
for 2*k+1 < n {
55+
left := 2*k + 1
56+
right := left + 1
57+
smallest := left
58+
59+
if right < n && pq.arr[right].Less(pq.arr[left]) {
60+
smallest = right
61+
}
62+
63+
if !pq.arr[smallest].Less(pq.arr[k]) {
64+
break
65+
}
66+
67+
pq.swap(k, smallest)
68+
k = smallest
69+
}
70+
}
71+
72+
func (pq *MinPQ[T]) Insert(x T) {
73+
pq.arr = append(pq.arr, x)
74+
pq.rise(len(pq.arr) - 1)
75+
}
76+
77+
func (pq *MinPQ[T]) DeleteMin() (T, error) {
78+
if len(pq.arr) == 0 {
79+
var zero T
80+
return zero, fmt.Errorf("priority queue is empty")
81+
}
82+
x := pq.arr[0]
83+
pq.arr[0] = pq.arr[len(pq.arr)-1]
84+
pq.arr = pq.arr[:len(pq.arr)-1]
85+
86+
pq.sink(0)
87+
88+
return x, nil
89+
}

0 commit comments

Comments
 (0)