|
1 #!/usr/bin/env python |
|
2 # Demo program for zlib; it compresses or decompresses files, but *doesn't* |
|
3 # delete the original. This doesn't support all of gzip's options. |
|
4 # |
|
5 # The 'gzip' module in the standard library provides a more complete |
|
6 # implementation of gzip-format files. |
|
7 |
|
8 import zlib, sys, os |
|
9 |
|
10 FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 |
|
11 |
|
12 def write32(output, value): |
|
13 output.write(chr(value & 255)) ; value=value // 256 |
|
14 output.write(chr(value & 255)) ; value=value // 256 |
|
15 output.write(chr(value & 255)) ; value=value // 256 |
|
16 output.write(chr(value & 255)) |
|
17 |
|
18 def read32(input): |
|
19 v = ord(input.read(1)) |
|
20 v += (ord(input.read(1)) << 8 ) |
|
21 v += (ord(input.read(1)) << 16) |
|
22 v += (ord(input.read(1)) << 24) |
|
23 return v |
|
24 |
|
25 def compress (filename, input, output): |
|
26 output.write('\037\213\010') # Write the header, ... |
|
27 output.write(chr(FNAME)) # ... flag byte ... |
|
28 |
|
29 statval = os.stat(filename) # ... modification time ... |
|
30 mtime = statval[8] |
|
31 write32(output, mtime) |
|
32 output.write('\002') # ... slowest compression alg. ... |
|
33 output.write('\377') # ... OS (=unknown) ... |
|
34 output.write(filename+'\000') # ... original filename ... |
|
35 |
|
36 crcval = zlib.crc32("") |
|
37 compobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS, |
|
38 zlib.DEF_MEM_LEVEL, 0) |
|
39 while True: |
|
40 data = input.read(1024) |
|
41 if data == "": |
|
42 break |
|
43 crcval = zlib.crc32(data, crcval) |
|
44 output.write(compobj.compress(data)) |
|
45 output.write(compobj.flush()) |
|
46 write32(output, crcval) # ... the CRC ... |
|
47 write32(output, statval[6]) # and the file size. |
|
48 |
|
49 def decompress (input, output): |
|
50 magic = input.read(2) |
|
51 if magic != '\037\213': |
|
52 print 'Not a gzipped file' |
|
53 sys.exit(0) |
|
54 if ord(input.read(1)) != 8: |
|
55 print 'Unknown compression method' |
|
56 sys.exit(0) |
|
57 flag = ord(input.read(1)) |
|
58 input.read(4+1+1) # Discard modification time, |
|
59 # extra flags, and OS byte. |
|
60 if flag & FEXTRA: |
|
61 # Read & discard the extra field, if present |
|
62 xlen = ord(input.read(1)) |
|
63 xlen += 256*ord(input.read(1)) |
|
64 input.read(xlen) |
|
65 if flag & FNAME: |
|
66 # Read and discard a null-terminated string containing the filename |
|
67 while True: |
|
68 s = input.read(1) |
|
69 if s == '\0': break |
|
70 if flag & FCOMMENT: |
|
71 # Read and discard a null-terminated string containing a comment |
|
72 while True: |
|
73 s=input.read(1) |
|
74 if s=='\0': break |
|
75 if flag & FHCRC: |
|
76 input.read(2) # Read & discard the 16-bit header CRC |
|
77 |
|
78 decompobj = zlib.decompressobj(-zlib.MAX_WBITS) |
|
79 crcval = zlib.crc32("") |
|
80 length = 0 |
|
81 while True: |
|
82 data=input.read(1024) |
|
83 if data == "": |
|
84 break |
|
85 decompdata = decompobj.decompress(data) |
|
86 output.write(decompdata) |
|
87 length += len(decompdata) |
|
88 crcval = zlib.crc32(decompdata, crcval) |
|
89 |
|
90 decompdata = decompobj.flush() |
|
91 output.write(decompdata) |
|
92 length += len(decompdata) |
|
93 crcval = zlib.crc32(decompdata, crcval) |
|
94 |
|
95 # We've read to the end of the file, so we have to rewind in order |
|
96 # to reread the 8 bytes containing the CRC and the file size. The |
|
97 # decompressor is smart and knows when to stop, so feeding it |
|
98 # extra data is harmless. |
|
99 input.seek(-8, 2) |
|
100 crc32 = read32(input) |
|
101 isize = read32(input) |
|
102 if crc32 != crcval: |
|
103 print 'CRC check failed.' |
|
104 if isize != length: |
|
105 print 'Incorrect length of data produced' |
|
106 |
|
107 def main(): |
|
108 if len(sys.argv)!=2: |
|
109 print 'Usage: minigzip.py <filename>' |
|
110 print ' The file will be compressed or decompressed.' |
|
111 sys.exit(0) |
|
112 |
|
113 filename = sys.argv[1] |
|
114 if filename.endswith('.gz'): |
|
115 compressing = False |
|
116 outputname = filename[:-3] |
|
117 else: |
|
118 compressing = True |
|
119 outputname = filename + '.gz' |
|
120 |
|
121 input = open(filename, 'rb') |
|
122 output = open(outputname, 'wb') |
|
123 |
|
124 if compressing: |
|
125 compress(filename, input, output) |
|
126 else: |
|
127 decompress(input, output) |
|
128 |
|
129 input.close() |
|
130 output.close() |
|
131 |
|
132 if __name__ == '__main__': |
|
133 main() |