|
1 from test.test_support import verbose, TESTFN |
|
2 import random |
|
3 import os |
|
4 |
|
5 # From SF bug #422121: Insecurities in dict comparison. |
|
6 |
|
7 # Safety of code doing comparisons has been an historical Python weak spot. |
|
8 # The problem is that comparison of structures written in C *naturally* |
|
9 # wants to hold on to things like the size of the container, or "the |
|
10 # biggest" containee so far, across a traversal of the container; but |
|
11 # code to do containee comparisons can call back into Python and mutate |
|
12 # the container in arbitrary ways while the C loop is in midstream. If the |
|
13 # C code isn't extremely paranoid about digging things out of memory on |
|
14 # each trip, and artificially boosting refcounts for the duration, anything |
|
15 # from infinite loops to OS crashes can result (yes, I use Windows <wink>). |
|
16 # |
|
17 # The other problem is that code designed to provoke a weakness is usually |
|
18 # white-box code, and so catches only the particular vulnerabilities the |
|
19 # author knew to protect against. For example, Python's list.sort() code |
|
20 # went thru many iterations as one "new" vulnerability after another was |
|
21 # discovered. |
|
22 # |
|
23 # So the dict comparison test here uses a black-box approach instead, |
|
24 # generating dicts of various sizes at random, and performing random |
|
25 # mutations on them at random times. This proved very effective, |
|
26 # triggering at least six distinct failure modes the first 20 times I |
|
27 # ran it. Indeed, at the start, the driver never got beyond 6 iterations |
|
28 # before the test died. |
|
29 |
|
30 # The dicts are global to make it easy to mutate tham from within functions. |
|
31 dict1 = {} |
|
32 dict2 = {} |
|
33 |
|
34 # The current set of keys in dict1 and dict2. These are materialized as |
|
35 # lists to make it easy to pick a dict key at random. |
|
36 dict1keys = [] |
|
37 dict2keys = [] |
|
38 |
|
39 # Global flag telling maybe_mutate() whether to *consider* mutating. |
|
40 mutate = 0 |
|
41 |
|
42 # If global mutate is true, consider mutating a dict. May or may not |
|
43 # mutate a dict even if mutate is true. If it does decide to mutate a |
|
44 # dict, it picks one of {dict1, dict2} at random, and deletes a random |
|
45 # entry from it; or, more rarely, adds a random element. |
|
46 |
|
47 def maybe_mutate(): |
|
48 global mutate |
|
49 if not mutate: |
|
50 return |
|
51 if random.random() < 0.5: |
|
52 return |
|
53 |
|
54 if random.random() < 0.5: |
|
55 target, keys = dict1, dict1keys |
|
56 else: |
|
57 target, keys = dict2, dict2keys |
|
58 |
|
59 if random.random() < 0.2: |
|
60 # Insert a new key. |
|
61 mutate = 0 # disable mutation until key inserted |
|
62 while 1: |
|
63 newkey = Horrid(random.randrange(100)) |
|
64 if newkey not in target: |
|
65 break |
|
66 target[newkey] = Horrid(random.randrange(100)) |
|
67 keys.append(newkey) |
|
68 mutate = 1 |
|
69 |
|
70 elif keys: |
|
71 # Delete a key at random. |
|
72 mutate = 0 # disable mutation until key deleted |
|
73 i = random.randrange(len(keys)) |
|
74 key = keys[i] |
|
75 del target[key] |
|
76 del keys[i] |
|
77 mutate = 1 |
|
78 |
|
79 # A horrid class that triggers random mutations of dict1 and dict2 when |
|
80 # instances are compared. |
|
81 |
|
82 class Horrid: |
|
83 def __init__(self, i): |
|
84 # Comparison outcomes are determined by the value of i. |
|
85 self.i = i |
|
86 |
|
87 # An artificial hashcode is selected at random so that we don't |
|
88 # have any systematic relationship between comparison outcomes |
|
89 # (based on self.i and other.i) and relative position within the |
|
90 # hash vector (based on hashcode). |
|
91 self.hashcode = random.randrange(1000000000) |
|
92 |
|
93 def __hash__(self): |
|
94 return 42 |
|
95 return self.hashcode |
|
96 |
|
97 def __cmp__(self, other): |
|
98 maybe_mutate() # The point of the test. |
|
99 return cmp(self.i, other.i) |
|
100 |
|
101 def __eq__(self, other): |
|
102 maybe_mutate() # The point of the test. |
|
103 return self.i == other.i |
|
104 |
|
105 def __repr__(self): |
|
106 return "Horrid(%d)" % self.i |
|
107 |
|
108 # Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs, |
|
109 # where i and j are selected at random from the candidates list. |
|
110 # Return d.keys() after filling. |
|
111 |
|
112 def fill_dict(d, candidates, numentries): |
|
113 d.clear() |
|
114 for i in xrange(numentries): |
|
115 d[Horrid(random.choice(candidates))] = \ |
|
116 Horrid(random.choice(candidates)) |
|
117 return d.keys() |
|
118 |
|
119 # Test one pair of randomly generated dicts, each with n entries. |
|
120 # Note that dict comparison is trivial if they don't have the same number |
|
121 # of entires (then the "shorter" dict is instantly considered to be the |
|
122 # smaller one, without even looking at the entries). |
|
123 |
|
124 def test_one(n): |
|
125 global mutate, dict1, dict2, dict1keys, dict2keys |
|
126 |
|
127 # Fill the dicts without mutating them. |
|
128 mutate = 0 |
|
129 dict1keys = fill_dict(dict1, range(n), n) |
|
130 dict2keys = fill_dict(dict2, range(n), n) |
|
131 |
|
132 # Enable mutation, then compare the dicts so long as they have the |
|
133 # same size. |
|
134 mutate = 1 |
|
135 if verbose: |
|
136 print "trying w/ lengths", len(dict1), len(dict2), |
|
137 while dict1 and len(dict1) == len(dict2): |
|
138 if verbose: |
|
139 print ".", |
|
140 if random.random() < 0.5: |
|
141 c = cmp(dict1, dict2) |
|
142 else: |
|
143 c = dict1 == dict2 |
|
144 if verbose: |
|
145 print |
|
146 |
|
147 # Run test_one n times. At the start (before the bugs were fixed), 20 |
|
148 # consecutive runs of this test each blew up on or before the sixth time |
|
149 # test_one was run. So n doesn't have to be large to get an interesting |
|
150 # test. |
|
151 # OTOH, calling with large n is also interesting, to ensure that the fixed |
|
152 # code doesn't hold on to refcounts *too* long (in which case memory would |
|
153 # leak). |
|
154 |
|
155 def test(n): |
|
156 for i in xrange(n): |
|
157 test_one(random.randrange(1, 100)) |
|
158 |
|
159 # See last comment block for clues about good values for n. |
|
160 test(100) |
|
161 |
|
162 ########################################################################## |
|
163 # Another segfault bug, distilled by Michael Hudson from a c.l.py post. |
|
164 |
|
165 class Child: |
|
166 def __init__(self, parent): |
|
167 self.__dict__['parent'] = parent |
|
168 def __getattr__(self, attr): |
|
169 self.parent.a = 1 |
|
170 self.parent.b = 1 |
|
171 self.parent.c = 1 |
|
172 self.parent.d = 1 |
|
173 self.parent.e = 1 |
|
174 self.parent.f = 1 |
|
175 self.parent.g = 1 |
|
176 self.parent.h = 1 |
|
177 self.parent.i = 1 |
|
178 return getattr(self.parent, attr) |
|
179 |
|
180 class Parent: |
|
181 def __init__(self): |
|
182 self.a = Child(self) |
|
183 |
|
184 # Hard to say what this will print! May vary from time to time. But |
|
185 # we're specifically trying to test the tp_print slot here, and this is |
|
186 # the clearest way to do it. We print the result to a temp file so that |
|
187 # the expected-output file doesn't need to change. |
|
188 |
|
189 f = open(TESTFN, "w") |
|
190 print >> f, Parent().__dict__ |
|
191 f.close() |
|
192 os.unlink(TESTFN) |
|
193 |
|
194 ########################################################################## |
|
195 # And another core-dumper from Michael Hudson. |
|
196 |
|
197 dict = {} |
|
198 |
|
199 # Force dict to malloc its table. |
|
200 for i in range(1, 10): |
|
201 dict[i] = i |
|
202 |
|
203 f = open(TESTFN, "w") |
|
204 |
|
205 class Machiavelli: |
|
206 def __repr__(self): |
|
207 dict.clear() |
|
208 |
|
209 # Michael sez: "doesn't crash without this. don't know why." |
|
210 # Tim sez: "luck of the draw; crashes with or without for me." |
|
211 print >> f |
|
212 |
|
213 return `"machiavelli"` |
|
214 |
|
215 def __hash__(self): |
|
216 return 0 |
|
217 |
|
218 dict[Machiavelli()] = Machiavelli() |
|
219 |
|
220 print >> f, str(dict) |
|
221 f.close() |
|
222 os.unlink(TESTFN) |
|
223 del f, dict |
|
224 |
|
225 |
|
226 ########################################################################## |
|
227 # And another core-dumper from Michael Hudson. |
|
228 |
|
229 dict = {} |
|
230 |
|
231 # let's force dict to malloc its table |
|
232 for i in range(1, 10): |
|
233 dict[i] = i |
|
234 |
|
235 class Machiavelli2: |
|
236 def __eq__(self, other): |
|
237 dict.clear() |
|
238 return 1 |
|
239 |
|
240 def __hash__(self): |
|
241 return 0 |
|
242 |
|
243 dict[Machiavelli2()] = Machiavelli2() |
|
244 |
|
245 try: |
|
246 dict[Machiavelli2()] |
|
247 except KeyError: |
|
248 pass |
|
249 |
|
250 del dict |
|
251 |
|
252 ########################################################################## |
|
253 # And another core-dumper from Michael Hudson. |
|
254 |
|
255 dict = {} |
|
256 |
|
257 # let's force dict to malloc its table |
|
258 for i in range(1, 10): |
|
259 dict[i] = i |
|
260 |
|
261 class Machiavelli3: |
|
262 def __init__(self, id): |
|
263 self.id = id |
|
264 |
|
265 def __eq__(self, other): |
|
266 if self.id == other.id: |
|
267 dict.clear() |
|
268 return 1 |
|
269 else: |
|
270 return 0 |
|
271 |
|
272 def __repr__(self): |
|
273 return "%s(%s)"%(self.__class__.__name__, self.id) |
|
274 |
|
275 def __hash__(self): |
|
276 return 0 |
|
277 |
|
278 dict[Machiavelli3(1)] = Machiavelli3(0) |
|
279 dict[Machiavelli3(2)] = Machiavelli3(0) |
|
280 |
|
281 f = open(TESTFN, "w") |
|
282 try: |
|
283 try: |
|
284 print >> f, dict[Machiavelli3(2)] |
|
285 except KeyError: |
|
286 pass |
|
287 finally: |
|
288 f.close() |
|
289 os.unlink(TESTFN) |
|
290 |
|
291 del dict |
|
292 del dict1, dict2, dict1keys, dict2keys |