Huffman Coding is a lossless data compression algorithm that assigns variable-length codes to input characters, with shorter codes assigned to more frequent characters. It is widely used in data compression systems.
import heapq
class Node:
def __init__(self, freq, char=None, left=None, right=None):
self.freq = freq
self.char = char
self.left = left
self.right = right
# For priority queue comparison
def __lt__(self, other):
return self.freq < other.freq
def build_huffman_tree(freq_map):
heap = [Node(freq, char) for char, freq in freq_map.items()]
heapq.heapify(heap)
while len(heap) > 1:
node1 = heapq.heappop(heap)
node2 = heapq.heappop(heap)
merged = Node(node1.freq + node2.freq, None, node1, node2)
heapq.heappush(heap, merged)
return heap[0]
def generate_codes(node, prefix="", code_map=None):
if code_map is None:
code_map = {}
if node.char is not None:
code_map[node.char] = prefix
else:
generate_codes(node.left, prefix + "0", code_map)
generate_codes(node.right, prefix + "1", code_map)
return code_map
# Example usage
data = "huffman coding algorithm"
freq_map = {}
for ch in data:
freq_map[ch] = freq_map.get(ch, 0) + 1
root = build_huffman_tree(freq_map)
codes = generate_codes(root)
print("Character codes:")
for ch in codes:
print(f"{ch}: {codes[ch]}")