-
Notifications
You must be signed in to change notification settings - Fork 0
/
Zcoder.py
213 lines (197 loc) · 8.32 KB
/
Zcoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# -*- encoding: utf-8 -*-
'''
@File : Zcoder.py
@Time : 2023/04/08 14:25:35
@Author : zeroc
'''
import sys
sys.dont_write_bytecode = True
from tqdm import tqdm
from math import log
from LZ import *
from Huffman import *
import os
def replace_last(string, old_substring, new_substring):
head, sep, tail = string.rpartition(old_substring)
if sep:
return head + new_substring + tail
else:
return string
class Zcoder:
# initialize the Zcoder
def __init__(self, path: str) -> None:
"""
Arguments:
- path: str, the relative path of the file
"""
path = os.path.abspath(__file__)[:-9] + path
self.path = path # absolute path of the file
self.filecontent = open(self.path, "rb").read() # content of the file
self.dic = [] # the dictionary of the LZ coding
# check if the file is empty
if self.filecontent == b'':
print("\033[0;31m [Error]The file is empty!\033[0m")
exit(0)
# compress the file using Huffman coding
def Huffman_compress(self, output_path: str = None) -> None:
# check if the file is already compressed with my file format
File_Head = b"ZCHM"
# make the frequency dictionary
frequency = HuffmanCoding._MakeFrequencyDict(self.filecontent)
# construct the codebook
codebook = HuffmanCoding._ConstructCodeBook(frequency)
codebook = HuffmanCoding.Canonical(codebook)
# store the codebook with the canonical code
max_length = 0
for char in codebook:
# ! calculate the max length of the codeword
max_length = max(max_length, len(codebook[char]))
length = [0 for _ in range(max_length + 1)]
for char in codebook:
# ! calculate the number of codewords with the same length
length[len(codebook[char])] += 1
#! if all the codewords have the same length, using 0 to sign it
if length[max_length] == 256:
length[max_length] = 0
length.pop(0) # remove the element whose length is 0
length_data = bytes(length)
code_data = b"".join(codebook.keys())
codebook_data = bytes([max_length]) + length_data + code_data
# encode the file
encoded_data, padding = HuffmanCoding.Encode(
self.filecontent, codebook)
# write the encoded file
if output_path:
with open(output_path, "wb") as f:
f.write(File_Head + codebook_data +
encoded_data + bytes([padding]))
print("\033[0;32m [Success]Compress successfully!\033[0m")
print("\033[0;32m [Success]The file is stored in {path}\033[0m".format(
path=output_path))
else:
with open(self.path + ".enc", "wb") as f:
f.write(File_Head + codebook_data +
encoded_data + bytes([padding]))
print("\033[0;32m [Success]Compress successfully!\033[0m")
print("\033[0;32m [Success]The file is stored in {path}\033[0m".format(
path=self.path + ".enc"))
# decompress the file using Huffman coding
def Huffman_decompress(self, output_path: str = None) -> None:
# check if the file is already compressed with my file format
File_Head = b"ZCHM"
# read the encoded file
encoded_data = self.filecontent
# check if the file is already compressed with my file format
if encoded_data[:4] != File_Head:
print(
"\033[0;31m [Error]The file is not compressed with my algorithm\033[0m")
exit(0)
encoded_data = encoded_data[4:]
# Recover the codebook
max_length = int(encoded_data[0])
length = list(encoded_data[1:max_length + 1])
num = sum(length)
if num == 0 and max_length != 0:
num = 256
length[max_length - 1] = 256
value, length2 = [], []
for i in range(1 + max_length, 1 + max_length + num):
value.append(encoded_data[i])
for i in range(max_length):
length2 += [i + 1] * length[i]
codebook = HuffmanCoding._ReConstruct(value, length2)
# decode the file
padding = encoded_data[-1]
encoded_data = encoded_data[max_length + 1 + num:-1]
decoded_data = HuffmanCoding.Decode(encoded_data, padding, codebook)
# write the decoded file
if output_path:
with open(output_path, "wb") as f:
f.write(decoded_data)
print("\033[0;32m [Success]Decompress successfully!\033[0m")
print("\033[0;32m [Success]The file is stored in {path}\033[0m".format(
path=output_path))
else:
with open(replace_last(self.path, ".enc", ".dec"), "wb") as f:
f.write(decoded_data)
print("\033[0;32m [Success]Decompress successfully!\033[0m")
print("\033[0;32m [Success]The file is stored in {path}\033[0m".format(
path=replace_last(self.path, ".enc", ".dec")))
# compress the file using LZ78 coding
def LZ78_compress(self, output_path: str = None) -> None:
# check if the file is already compressed with my file format
File_Head = b"ZCLZ"
if output_path:
f = open(output_path, "wb")
else:
f = open(self.path + ".enc", "wb")
f.write(File_Head)
# encode the file
self.dic = LZ78.Encode(self.filecontent, self.dic)
index = 0
length = len(self.dic)
while index < length:
if index == 0:
data = self.dic[index][0] + \
self.dic[index][1].to_bytes(1, byteorder='big')
else:
data = self.dic[index][0] + self.dic[index][1].to_bytes(
int(log(index, 256))+1, byteorder='big')
f.write(data)
index += 1
f.close()
print("\033[0;32m [Success]Compress successfully!\033[0m")
print("\033[0;32m [Success]The file is stored in {path}\033[0m".format(
path=output_path if output_path else self.path + ".enc"))
# decompress the file using LZ78 coding
def LZ78_decompress(self, output_path: str = None) -> None:
# check if the file is already compressed with my file format
File_Head = b"ZCLZ"
encoded_data = self.filecontent
# check if the file is already compressed with my file format
if encoded_data[:4] != File_Head:
print(
"\033[0;31m [Error]The file is not compressed with my algorithm\033[0m")
exit(0)
encoded_data = encoded_data[4:]
# Recover the dictionary
index = 0
length = len(encoded_data)
while index < length:
if index == 0:
data = [bytes([encoded_data[index]]), encoded_data[index+1]]
index += 2
self.dic.append(data)
continue
else:
if index+int(log(index, 256))+2 > length:
data = [b'', int.from_bytes(
encoded_data[index:], byteorder='big')]
self.dic.append(data)
break
data = [bytes([encoded_data[index]]), int.from_bytes(
encoded_data[index+1:index+int(log(len(self.dic), 256))+2], byteorder='big')]
index += int(log(len(self.dic), 256))+2
self.dic.append(data)
continue
# Decode the file
decoded_data = LZ78.Decode(self.dic)
if output_path:
f = open(output_path, "wb")
else:
f = open(replace_last(self.path, ".enc", ".dec"), "wb")
f.write(b"".join(decoded_data))
print("\033[0;32m [Success]Decompress successfully!\033[0m")
print("\033[0;32m [Success]The file is stored in {path}\033[0m".format(
path=output_path if output_path else replace_last(self.path, ".enc", ".dec")))
if __name__ == "__main__":
# filepath = "HuffmanTest/ico.zip"
# zcoder = Zcoder(filepath)
# zcoder.Huffman_compress()
# zcode = Zcoder(filepath + ".enc")
# zcode.Huffman_decompress()
filepath = "LZTest/ELF"
zcoder = Zcoder(filepath)
zcoder.LZ78_compress()
zcoder = Zcoder(filepath + ".enc")
zcoder.LZ78_decompress()