Huffman Coding Algorithm

What is Huffman Coding?

Huffman Coding is a lossless data compression algorithm that assigns variable-length codes to input characters, with shorter codes assigned to more frequent characters. It is widely used in data compression systems.

How Huffman Coding Works

Python Code Example

import heapq

class Node:
    def __init__(self, freq, char=None, left=None, right=None):
        self.freq = freq
        self.char = char
        self.left = left
        self.right = right

    # For priority queue comparison
    def __lt__(self, other):
        return self.freq < other.freq

def build_huffman_tree(freq_map):
    heap = [Node(freq, char) for char, freq in freq_map.items()]
    heapq.heapify(heap)

    while len(heap) > 1:
        node1 = heapq.heappop(heap)
        node2 = heapq.heappop(heap)
        merged = Node(node1.freq + node2.freq, None, node1, node2)
        heapq.heappush(heap, merged)

    return heap[0]

def generate_codes(node, prefix="", code_map=None):
    if code_map is None:
        code_map = {}
    if node.char is not None:
        code_map[node.char] = prefix
    else:
        generate_codes(node.left, prefix + "0", code_map)
        generate_codes(node.right, prefix + "1", code_map)
    return code_map

# Example usage
data = "huffman coding algorithm"
freq_map = {}
for ch in data:
    freq_map[ch] = freq_map.get(ch, 0) + 1

root = build_huffman_tree(freq_map)
codes = generate_codes(root)
print("Character codes:")
for ch in codes:
    print(f"{ch}: {codes[ch]}")

Applications