|
1 """Utilities for comparing files and directories. |
|
2 |
|
3 Classes: |
|
4 dircmp |
|
5 |
|
6 Functions: |
|
7 cmp(f1, f2, shallow=1) -> int |
|
8 cmpfiles(a, b, common) -> ([], [], []) |
|
9 |
|
10 """ |
|
11 |
|
12 import os |
|
13 import stat |
|
14 import warnings |
|
15 from itertools import ifilter, ifilterfalse, imap, izip |
|
16 |
|
17 __all__ = ["cmp","dircmp","cmpfiles"] |
|
18 |
|
19 _cache = {} |
|
20 BUFSIZE=8*1024 |
|
21 |
|
22 def cmp(f1, f2, shallow=1): |
|
23 """Compare two files. |
|
24 |
|
25 Arguments: |
|
26 |
|
27 f1 -- First file name |
|
28 |
|
29 f2 -- Second file name |
|
30 |
|
31 shallow -- Just check stat signature (do not read the files). |
|
32 defaults to 1. |
|
33 |
|
34 Return value: |
|
35 |
|
36 True if the files are the same, False otherwise. |
|
37 |
|
38 This function uses a cache for past comparisons and the results, |
|
39 with a cache invalidation mechanism relying on stale signatures. |
|
40 |
|
41 """ |
|
42 |
|
43 s1 = _sig(os.stat(f1)) |
|
44 s2 = _sig(os.stat(f2)) |
|
45 if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG: |
|
46 return False |
|
47 if shallow and s1 == s2: |
|
48 return True |
|
49 if s1[1] != s2[1]: |
|
50 return False |
|
51 |
|
52 result = _cache.get((f1, f2)) |
|
53 if result and (s1, s2) == result[:2]: |
|
54 return result[2] |
|
55 outcome = _do_cmp(f1, f2) |
|
56 _cache[f1, f2] = s1, s2, outcome |
|
57 return outcome |
|
58 |
|
59 def _sig(st): |
|
60 return (stat.S_IFMT(st.st_mode), |
|
61 st.st_size, |
|
62 st.st_mtime) |
|
63 |
|
64 def _do_cmp(f1, f2): |
|
65 bufsize = BUFSIZE |
|
66 fp1 = open(f1, 'rb') |
|
67 fp2 = open(f2, 'rb') |
|
68 while True: |
|
69 b1 = fp1.read(bufsize) |
|
70 b2 = fp2.read(bufsize) |
|
71 if b1 != b2: |
|
72 return False |
|
73 if not b1: |
|
74 return True |
|
75 |
|
76 # Directory comparison class. |
|
77 # |
|
78 class dircmp: |
|
79 """A class that manages the comparison of 2 directories. |
|
80 |
|
81 dircmp(a,b,ignore=None,hide=None) |
|
82 A and B are directories. |
|
83 IGNORE is a list of names to ignore, |
|
84 defaults to ['RCS', 'CVS', 'tags']. |
|
85 HIDE is a list of names to hide, |
|
86 defaults to [os.curdir, os.pardir]. |
|
87 |
|
88 High level usage: |
|
89 x = dircmp(dir1, dir2) |
|
90 x.report() -> prints a report on the differences between dir1 and dir2 |
|
91 or |
|
92 x.report_partial_closure() -> prints report on differences between dir1 |
|
93 and dir2, and reports on common immediate subdirectories. |
|
94 x.report_full_closure() -> like report_partial_closure, |
|
95 but fully recursive. |
|
96 |
|
97 Attributes: |
|
98 left_list, right_list: The files in dir1 and dir2, |
|
99 filtered by hide and ignore. |
|
100 common: a list of names in both dir1 and dir2. |
|
101 left_only, right_only: names only in dir1, dir2. |
|
102 common_dirs: subdirectories in both dir1 and dir2. |
|
103 common_files: files in both dir1 and dir2. |
|
104 common_funny: names in both dir1 and dir2 where the type differs between |
|
105 dir1 and dir2, or the name is not stat-able. |
|
106 same_files: list of identical files. |
|
107 diff_files: list of filenames which differ. |
|
108 funny_files: list of files which could not be compared. |
|
109 subdirs: a dictionary of dircmp objects, keyed by names in common_dirs. |
|
110 """ |
|
111 |
|
112 def __init__(self, a, b, ignore=None, hide=None): # Initialize |
|
113 self.left = a |
|
114 self.right = b |
|
115 if hide is None: |
|
116 self.hide = [os.curdir, os.pardir] # Names never to be shown |
|
117 else: |
|
118 self.hide = hide |
|
119 if ignore is None: |
|
120 self.ignore = ['RCS', 'CVS', 'tags'] # Names ignored in comparison |
|
121 else: |
|
122 self.ignore = ignore |
|
123 |
|
124 def phase0(self): # Compare everything except common subdirectories |
|
125 self.left_list = _filter(os.listdir(self.left), |
|
126 self.hide+self.ignore) |
|
127 self.right_list = _filter(os.listdir(self.right), |
|
128 self.hide+self.ignore) |
|
129 self.left_list.sort() |
|
130 self.right_list.sort() |
|
131 |
|
132 def phase1(self): # Compute common names |
|
133 a = dict(izip(imap(os.path.normcase, self.left_list), self.left_list)) |
|
134 b = dict(izip(imap(os.path.normcase, self.right_list), self.right_list)) |
|
135 self.common = map(a.__getitem__, ifilter(b.has_key, a)) |
|
136 self.left_only = map(a.__getitem__, ifilterfalse(b.has_key, a)) |
|
137 self.right_only = map(b.__getitem__, ifilterfalse(a.has_key, b)) |
|
138 |
|
139 def phase2(self): # Distinguish files, directories, funnies |
|
140 self.common_dirs = [] |
|
141 self.common_files = [] |
|
142 self.common_funny = [] |
|
143 |
|
144 for x in self.common: |
|
145 a_path = os.path.join(self.left, x) |
|
146 b_path = os.path.join(self.right, x) |
|
147 |
|
148 ok = 1 |
|
149 try: |
|
150 a_stat = os.stat(a_path) |
|
151 except os.error, why: |
|
152 # print 'Can\'t stat', a_path, ':', why[1] |
|
153 ok = 0 |
|
154 try: |
|
155 b_stat = os.stat(b_path) |
|
156 except os.error, why: |
|
157 # print 'Can\'t stat', b_path, ':', why[1] |
|
158 ok = 0 |
|
159 |
|
160 if ok: |
|
161 a_type = stat.S_IFMT(a_stat.st_mode) |
|
162 b_type = stat.S_IFMT(b_stat.st_mode) |
|
163 if a_type != b_type: |
|
164 self.common_funny.append(x) |
|
165 elif stat.S_ISDIR(a_type): |
|
166 self.common_dirs.append(x) |
|
167 elif stat.S_ISREG(a_type): |
|
168 self.common_files.append(x) |
|
169 else: |
|
170 self.common_funny.append(x) |
|
171 else: |
|
172 self.common_funny.append(x) |
|
173 |
|
174 def phase3(self): # Find out differences between common files |
|
175 xx = cmpfiles(self.left, self.right, self.common_files) |
|
176 self.same_files, self.diff_files, self.funny_files = xx |
|
177 |
|
178 def phase4(self): # Find out differences between common subdirectories |
|
179 # A new dircmp object is created for each common subdirectory, |
|
180 # these are stored in a dictionary indexed by filename. |
|
181 # The hide and ignore properties are inherited from the parent |
|
182 self.subdirs = {} |
|
183 for x in self.common_dirs: |
|
184 a_x = os.path.join(self.left, x) |
|
185 b_x = os.path.join(self.right, x) |
|
186 self.subdirs[x] = dircmp(a_x, b_x, self.ignore, self.hide) |
|
187 |
|
188 def phase4_closure(self): # Recursively call phase4() on subdirectories |
|
189 self.phase4() |
|
190 for sd in self.subdirs.itervalues(): |
|
191 sd.phase4_closure() |
|
192 |
|
193 def report(self): # Print a report on the differences between a and b |
|
194 # Output format is purposely lousy |
|
195 print 'diff', self.left, self.right |
|
196 if self.left_only: |
|
197 self.left_only.sort() |
|
198 print 'Only in', self.left, ':', self.left_only |
|
199 if self.right_only: |
|
200 self.right_only.sort() |
|
201 print 'Only in', self.right, ':', self.right_only |
|
202 if self.same_files: |
|
203 self.same_files.sort() |
|
204 print 'Identical files :', self.same_files |
|
205 if self.diff_files: |
|
206 self.diff_files.sort() |
|
207 print 'Differing files :', self.diff_files |
|
208 if self.funny_files: |
|
209 self.funny_files.sort() |
|
210 print 'Trouble with common files :', self.funny_files |
|
211 if self.common_dirs: |
|
212 self.common_dirs.sort() |
|
213 print 'Common subdirectories :', self.common_dirs |
|
214 if self.common_funny: |
|
215 self.common_funny.sort() |
|
216 print 'Common funny cases :', self.common_funny |
|
217 |
|
218 def report_partial_closure(self): # Print reports on self and on subdirs |
|
219 self.report() |
|
220 for sd in self.subdirs.itervalues(): |
|
221 print |
|
222 sd.report() |
|
223 |
|
224 def report_full_closure(self): # Report on self and subdirs recursively |
|
225 self.report() |
|
226 for sd in self.subdirs.itervalues(): |
|
227 print |
|
228 sd.report_full_closure() |
|
229 |
|
230 methodmap = dict(subdirs=phase4, |
|
231 same_files=phase3, diff_files=phase3, funny_files=phase3, |
|
232 common_dirs = phase2, common_files=phase2, common_funny=phase2, |
|
233 common=phase1, left_only=phase1, right_only=phase1, |
|
234 left_list=phase0, right_list=phase0) |
|
235 |
|
236 def __getattr__(self, attr): |
|
237 if attr not in self.methodmap: |
|
238 raise AttributeError, attr |
|
239 self.methodmap[attr](self) |
|
240 return getattr(self, attr) |
|
241 |
|
242 def cmpfiles(a, b, common, shallow=1): |
|
243 """Compare common files in two directories. |
|
244 |
|
245 a, b -- directory names |
|
246 common -- list of file names found in both directories |
|
247 shallow -- if true, do comparison based solely on stat() information |
|
248 |
|
249 Returns a tuple of three lists: |
|
250 files that compare equal |
|
251 files that are different |
|
252 filenames that aren't regular files. |
|
253 |
|
254 """ |
|
255 res = ([], [], []) |
|
256 for x in common: |
|
257 ax = os.path.join(a, x) |
|
258 bx = os.path.join(b, x) |
|
259 res[_cmp(ax, bx, shallow)].append(x) |
|
260 return res |
|
261 |
|
262 |
|
263 # Compare two files. |
|
264 # Return: |
|
265 # 0 for equal |
|
266 # 1 for different |
|
267 # 2 for funny cases (can't stat, etc.) |
|
268 # |
|
269 def _cmp(a, b, sh, abs=abs, cmp=cmp): |
|
270 try: |
|
271 return not abs(cmp(a, b, sh)) |
|
272 except os.error: |
|
273 return 2 |
|
274 |
|
275 |
|
276 # Return a copy with items that occur in skip removed. |
|
277 # |
|
278 def _filter(flist, skip): |
|
279 return list(ifilterfalse(skip.__contains__, flist)) |
|
280 |
|
281 |
|
282 # Demonstration and testing. |
|
283 # |
|
284 def demo(): |
|
285 import sys |
|
286 import getopt |
|
287 options, args = getopt.getopt(sys.argv[1:], 'r') |
|
288 if len(args) != 2: |
|
289 raise getopt.GetoptError('need exactly two args', None) |
|
290 dd = dircmp(args[0], args[1]) |
|
291 if ('-r', '') in options: |
|
292 dd.report_full_closure() |
|
293 else: |
|
294 dd.report() |
|
295 |
|
296 if __name__ == '__main__': |
|
297 demo() |