|
1 # Copyright (c) 2009, Google Inc. All rights reserved. |
|
2 # |
|
3 # Redistribution and use in source and binary forms, with or without |
|
4 # modification, are permitted provided that the following conditions are |
|
5 # met: |
|
6 # |
|
7 # * Redistributions of source code must retain the above copyright |
|
8 # notice, this list of conditions and the following disclaimer. |
|
9 # * Redistributions in binary form must reproduce the above |
|
10 # copyright notice, this list of conditions and the following disclaimer |
|
11 # in the documentation and/or other materials provided with the |
|
12 # distribution. |
|
13 # * Neither the name of Google Inc. nor the names of its |
|
14 # contributors may be used to endorse or promote products derived from |
|
15 # this software without specific prior written permission. |
|
16 # |
|
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
28 # |
|
29 # WebKit's Python module for interacting with WebKit's buildbot |
|
30 |
|
31 import operator |
|
32 import re |
|
33 import urllib |
|
34 import urllib2 |
|
35 import xmlrpclib |
|
36 |
|
37 from webkitpy.common.system.logutils import get_logger |
|
38 from webkitpy.thirdparty.autoinstalled.mechanize import Browser |
|
39 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup |
|
40 |
|
41 |
|
42 _log = get_logger(__file__) |
|
43 |
|
44 |
|
45 class Builder(object): |
|
46 def __init__(self, name, buildbot): |
|
47 self._name = name |
|
48 self._buildbot = buildbot |
|
49 self._builds_cache = {} |
|
50 self._revision_to_build_number = None |
|
51 self._browser = Browser() |
|
52 self._browser.set_handle_robots(False) # The builder pages are excluded by robots.txt |
|
53 |
|
54 def name(self): |
|
55 return self._name |
|
56 |
|
57 def results_url(self): |
|
58 return "http://%s/results/%s" % (self._buildbot.buildbot_host, self.url_encoded_name()) |
|
59 |
|
60 def url_encoded_name(self): |
|
61 return urllib.quote(self._name) |
|
62 |
|
63 def url(self): |
|
64 return "http://%s/builders/%s" % (self._buildbot.buildbot_host, self.url_encoded_name()) |
|
65 |
|
66 # This provides a single place to mock |
|
67 def _fetch_build(self, build_number): |
|
68 build_dictionary = self._buildbot._fetch_xmlrpc_build_dictionary(self, build_number) |
|
69 if not build_dictionary: |
|
70 return None |
|
71 return Build(self, |
|
72 build_number=int(build_dictionary['number']), |
|
73 revision=int(build_dictionary['revision']), |
|
74 is_green=(build_dictionary['results'] == 0) # Undocumented, buildbot XMLRPC, 0 seems to mean "pass" |
|
75 ) |
|
76 |
|
77 def build(self, build_number): |
|
78 if not build_number: |
|
79 return None |
|
80 cached_build = self._builds_cache.get(build_number) |
|
81 if cached_build: |
|
82 return cached_build |
|
83 |
|
84 build = self._fetch_build(build_number) |
|
85 self._builds_cache[build_number] = build |
|
86 return build |
|
87 |
|
88 def force_build(self, username="webkit-patch", comments=None): |
|
89 def predicate(form): |
|
90 try: |
|
91 return form.find_control("username") |
|
92 except Exception, e: |
|
93 return False |
|
94 self._browser.open(self.url()) |
|
95 self._browser.select_form(predicate=predicate) |
|
96 self._browser["username"] = username |
|
97 if comments: |
|
98 self._browser["comments"] = comments |
|
99 return self._browser.submit() |
|
100 |
|
101 file_name_regexp = re.compile(r"r(?P<revision>\d+) \((?P<build_number>\d+)\)") |
|
102 def _revision_and_build_for_filename(self, filename): |
|
103 # Example: "r47483 (1)/" or "r47483 (1).zip" |
|
104 match = self.file_name_regexp.match(filename) |
|
105 return (int(match.group("revision")), int(match.group("build_number"))) |
|
106 |
|
107 def _fetch_revision_to_build_map(self): |
|
108 # All _fetch requests go through _buildbot for easier mocking |
|
109 try: |
|
110 # FIXME: This method is horribly slow due to the huge network load. |
|
111 # FIXME: This is a poor way to do revision -> build mapping. |
|
112 # Better would be to ask buildbot through some sort of API. |
|
113 print "Loading revision/build list from %s." % self.results_url() |
|
114 print "This may take a while..." |
|
115 result_files = self._buildbot._fetch_twisted_directory_listing(self.results_url()) |
|
116 except urllib2.HTTPError, error: |
|
117 if error.code != 404: |
|
118 raise |
|
119 result_files = [] |
|
120 |
|
121 # This assumes there was only one build per revision, which is false but we don't care for now. |
|
122 return dict([self._revision_and_build_for_filename(file_info["filename"]) for file_info in result_files]) |
|
123 |
|
124 def _revision_to_build_map(self): |
|
125 if not self._revision_to_build_number: |
|
126 self._revision_to_build_number = self._fetch_revision_to_build_map() |
|
127 return self._revision_to_build_number |
|
128 |
|
129 def revision_build_pairs_with_results(self): |
|
130 return self._revision_to_build_map().items() |
|
131 |
|
132 # This assumes there can be only one build per revision, which is false, but we don't care for now. |
|
133 def build_for_revision(self, revision, allow_failed_lookups=False): |
|
134 # NOTE: This lookup will fail if that exact revision was never built. |
|
135 build_number = self._revision_to_build_map().get(int(revision)) |
|
136 if not build_number: |
|
137 return None |
|
138 build = self.build(build_number) |
|
139 if not build and allow_failed_lookups: |
|
140 # Builds for old revisions with fail to lookup via buildbot's xmlrpc api. |
|
141 build = Build(self, |
|
142 build_number=build_number, |
|
143 revision=revision, |
|
144 is_green=False, |
|
145 ) |
|
146 return build |
|
147 |
|
148 def find_failure_transition(self, red_build, look_back_limit=30): |
|
149 if not red_build or red_build.is_green(): |
|
150 return (None, None) |
|
151 common_failures = None |
|
152 current_build = red_build |
|
153 build_after_current_build = None |
|
154 look_back_count = 0 |
|
155 while current_build: |
|
156 if current_build.is_green(): |
|
157 # current_build can't possibly have any failures in common |
|
158 # with red_build because it's green. |
|
159 break |
|
160 results = current_build.layout_test_results() |
|
161 # We treat a lack of results as if all the test failed. |
|
162 # This occurs, for example, when we can't compile at all. |
|
163 if results: |
|
164 failures = set(results.failing_tests()) |
|
165 if common_failures == None: |
|
166 common_failures = failures |
|
167 common_failures = common_failures.intersection(failures) |
|
168 if not common_failures: |
|
169 # current_build doesn't have any failures in common with |
|
170 # the red build we're worried about. We assume that any |
|
171 # failures in current_build were due to flakiness. |
|
172 break |
|
173 look_back_count += 1 |
|
174 if look_back_count > look_back_limit: |
|
175 return (None, current_build) |
|
176 build_after_current_build = current_build |
|
177 current_build = current_build.previous_build() |
|
178 # We must iterate at least once because red_build is red. |
|
179 assert(build_after_current_build) |
|
180 # Current build must either be green or have no failures in common |
|
181 # with red build, so we've found our failure transition. |
|
182 return (current_build, build_after_current_build) |
|
183 |
|
184 # FIXME: This likely does not belong on Builder |
|
185 def suspect_revisions_for_transition(self, last_good_build, first_bad_build): |
|
186 suspect_revisions = range(first_bad_build.revision(), |
|
187 last_good_build.revision(), |
|
188 -1) |
|
189 suspect_revisions.reverse() |
|
190 return suspect_revisions |
|
191 |
|
192 def blameworthy_revisions(self, red_build_number, look_back_limit=30, avoid_flakey_tests=True): |
|
193 red_build = self.build(red_build_number) |
|
194 (last_good_build, first_bad_build) = \ |
|
195 self.find_failure_transition(red_build, look_back_limit) |
|
196 if not last_good_build: |
|
197 return [] # We ran off the limit of our search |
|
198 # If avoid_flakey_tests, require at least 2 bad builds before we |
|
199 # suspect a real failure transition. |
|
200 if avoid_flakey_tests and first_bad_build == red_build: |
|
201 return [] |
|
202 return self.suspect_revisions_for_transition(last_good_build, first_bad_build) |
|
203 |
|
204 |
|
205 # FIXME: This should be unified with all the layout test results code in the layout_tests package |
|
206 class LayoutTestResults(object): |
|
207 stderr_key = u'Tests that had stderr output:' |
|
208 fail_key = u'Tests where results did not match expected results:' |
|
209 timeout_key = u'Tests that timed out:' |
|
210 crash_key = u'Tests that caused the DumpRenderTree tool to crash:' |
|
211 missing_key = u'Tests that had no expected results (probably new):' |
|
212 |
|
213 expected_keys = [ |
|
214 stderr_key, |
|
215 fail_key, |
|
216 crash_key, |
|
217 timeout_key, |
|
218 missing_key, |
|
219 ] |
|
220 |
|
221 @classmethod |
|
222 def _parse_results_html(cls, page): |
|
223 parsed_results = {} |
|
224 tables = BeautifulSoup(page).findAll("table") |
|
225 for table in tables: |
|
226 table_title = unicode(table.findPreviousSibling("p").string) |
|
227 if table_title not in cls.expected_keys: |
|
228 # This Exception should only ever be hit if run-webkit-tests changes its results.html format. |
|
229 raise Exception("Unhandled title: %s" % table_title) |
|
230 # We might want to translate table titles into identifiers before storing. |
|
231 parsed_results[table_title] = [unicode(row.find("a").string) for row in table.findAll("tr")] |
|
232 |
|
233 return parsed_results |
|
234 |
|
235 @classmethod |
|
236 def _fetch_results_html(cls, base_url): |
|
237 results_html = "%s/results.html" % base_url |
|
238 # FIXME: We need to move this sort of 404 logic into NetworkTransaction or similar. |
|
239 try: |
|
240 page = urllib2.urlopen(results_html) |
|
241 return cls._parse_results_html(page) |
|
242 except urllib2.HTTPError, error: |
|
243 if error.code != 404: |
|
244 raise |
|
245 |
|
246 @classmethod |
|
247 def results_from_url(cls, base_url): |
|
248 parsed_results = cls._fetch_results_html(base_url) |
|
249 if not parsed_results: |
|
250 return None |
|
251 return cls(base_url, parsed_results) |
|
252 |
|
253 def __init__(self, base_url, parsed_results): |
|
254 self._base_url = base_url |
|
255 self._parsed_results = parsed_results |
|
256 |
|
257 def parsed_results(self): |
|
258 return self._parsed_results |
|
259 |
|
260 def failing_tests(self): |
|
261 failing_keys = [self.fail_key, self.crash_key, self.timeout_key] |
|
262 return sorted(sum([tests for key, tests in self._parsed_results.items() if key in failing_keys], [])) |
|
263 |
|
264 |
|
265 class Build(object): |
|
266 def __init__(self, builder, build_number, revision, is_green): |
|
267 self._builder = builder |
|
268 self._number = build_number |
|
269 self._revision = revision |
|
270 self._is_green = is_green |
|
271 self._layout_test_results = None |
|
272 |
|
273 @staticmethod |
|
274 def build_url(builder, build_number): |
|
275 return "%s/builds/%s" % (builder.url(), build_number) |
|
276 |
|
277 def url(self): |
|
278 return self.build_url(self.builder(), self._number) |
|
279 |
|
280 def results_url(self): |
|
281 results_directory = "r%s (%s)" % (self.revision(), self._number) |
|
282 return "%s/%s" % (self._builder.results_url(), urllib.quote(results_directory)) |
|
283 |
|
284 def layout_test_results(self): |
|
285 if not self._layout_test_results: |
|
286 self._layout_test_results = LayoutTestResults.results_from_url(self.results_url()) |
|
287 return self._layout_test_results |
|
288 |
|
289 def builder(self): |
|
290 return self._builder |
|
291 |
|
292 def revision(self): |
|
293 return self._revision |
|
294 |
|
295 def is_green(self): |
|
296 return self._is_green |
|
297 |
|
298 def previous_build(self): |
|
299 # previous_build() allows callers to avoid assuming build numbers are sequential. |
|
300 # They may not be sequential across all master changes, or when non-trunk builds are made. |
|
301 return self._builder.build(self._number - 1) |
|
302 |
|
303 |
|
304 class BuildBot(object): |
|
305 # FIXME: This should move into some sort of webkit_config.py |
|
306 default_host = "build.webkit.org" |
|
307 |
|
308 def __init__(self, host=default_host): |
|
309 self.buildbot_host = host |
|
310 self._builder_by_name = {} |
|
311 |
|
312 # If any core builder is red we should not be landing patches. Other |
|
313 # builders should be added to this list once they are known to be |
|
314 # reliable. |
|
315 # See https://bugs.webkit.org/show_bug.cgi?id=33296 and related bugs. |
|
316 self.core_builder_names_regexps = [ |
|
317 "SnowLeopard.*Build", |
|
318 "SnowLeopard.*Test", |
|
319 "Leopard", |
|
320 "Tiger", |
|
321 "Windows.*Build", |
|
322 "GTK.*32", |
|
323 "GTK.*64.*Debug", # Disallow the 64-bit Release bot which is broken. |
|
324 "Qt", |
|
325 "Chromium.*Release$", |
|
326 ] |
|
327 |
|
328 def _parse_last_build_cell(self, builder, cell): |
|
329 status_link = cell.find('a') |
|
330 if status_link: |
|
331 # Will be either a revision number or a build number |
|
332 revision_string = status_link.string |
|
333 # If revision_string has non-digits assume it's not a revision number. |
|
334 builder['built_revision'] = int(revision_string) \ |
|
335 if not re.match('\D', revision_string) \ |
|
336 else None |
|
337 |
|
338 # FIXME: We treat slave lost as green even though it is not to |
|
339 # work around the Qts bot being on a broken internet connection. |
|
340 # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099 |
|
341 builder['is_green'] = not re.search('fail', cell.renderContents()) or \ |
|
342 not not re.search('lost', cell.renderContents()) |
|
343 |
|
344 status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)" |
|
345 link_match = re.match(status_link_regexp, status_link['href']) |
|
346 builder['build_number'] = int(link_match.group("build_number")) |
|
347 else: |
|
348 # We failed to find a link in the first cell, just give up. This |
|
349 # can happen if a builder is just-added, the first cell will just |
|
350 # be "no build" |
|
351 # Other parts of the code depend on is_green being present. |
|
352 builder['is_green'] = False |
|
353 builder['built_revision'] = None |
|
354 builder['build_number'] = None |
|
355 |
|
356 def _parse_current_build_cell(self, builder, cell): |
|
357 activity_lines = cell.renderContents().split("<br />") |
|
358 builder["activity"] = activity_lines[0] # normally "building" or "idle" |
|
359 # The middle lines document how long left for any current builds. |
|
360 match = re.match("(?P<pending_builds>\d) pending", activity_lines[-1]) |
|
361 builder["pending_builds"] = int(match.group("pending_builds")) if match else 0 |
|
362 |
|
363 def _parse_builder_status_from_row(self, status_row): |
|
364 status_cells = status_row.findAll('td') |
|
365 builder = {} |
|
366 |
|
367 # First cell is the name |
|
368 name_link = status_cells[0].find('a') |
|
369 builder["name"] = unicode(name_link.string) |
|
370 |
|
371 self._parse_last_build_cell(builder, status_cells[1]) |
|
372 self._parse_current_build_cell(builder, status_cells[2]) |
|
373 return builder |
|
374 |
|
375 def _matches_regexps(self, builder_name, name_regexps): |
|
376 for name_regexp in name_regexps: |
|
377 if re.match(name_regexp, builder_name): |
|
378 return True |
|
379 return False |
|
380 |
|
381 # FIXME: Should move onto Builder |
|
382 def _is_core_builder(self, builder_name): |
|
383 return self._matches_regexps(builder_name, self.core_builder_names_regexps) |
|
384 |
|
385 # FIXME: This method needs to die, but is used by a unit test at the moment. |
|
386 def _builder_statuses_with_names_matching_regexps(self, builder_statuses, name_regexps): |
|
387 return [builder for builder in builder_statuses if self._matches_regexps(builder["name"], name_regexps)] |
|
388 |
|
389 def red_core_builders(self): |
|
390 return [builder for builder in self.core_builder_statuses() if not builder["is_green"]] |
|
391 |
|
392 def red_core_builders_names(self): |
|
393 return [builder["name"] for builder in self.red_core_builders()] |
|
394 |
|
395 def idle_red_core_builders(self): |
|
396 return [builder for builder in self.red_core_builders() if builder["activity"] == "idle"] |
|
397 |
|
398 def core_builders_are_green(self): |
|
399 return not self.red_core_builders() |
|
400 |
|
401 # FIXME: These _fetch methods should move to a networking class. |
|
402 def _fetch_xmlrpc_build_dictionary(self, builder, build_number): |
|
403 # The buildbot XMLRPC API is super-limited. |
|
404 # For one, you cannot fetch info on builds which are incomplete. |
|
405 proxy = xmlrpclib.ServerProxy("http://%s/xmlrpc" % self.buildbot_host, allow_none=True) |
|
406 try: |
|
407 return proxy.getBuild(builder.name(), int(build_number)) |
|
408 except xmlrpclib.Fault, err: |
|
409 build_url = Build.build_url(builder, build_number) |
|
410 _log.error("Error fetching data for %s build %s (%s): %s" % (builder.name(), build_number, build_url, err)) |
|
411 return None |
|
412 |
|
413 def _fetch_one_box_per_builder(self): |
|
414 build_status_url = "http://%s/one_box_per_builder" % self.buildbot_host |
|
415 return urllib2.urlopen(build_status_url) |
|
416 |
|
417 def _parse_twisted_file_row(self, file_row): |
|
418 string_or_empty = lambda soup: unicode(soup.string) if soup.string else u"" |
|
419 file_cells = file_row.findAll('td') |
|
420 return { |
|
421 "filename": string_or_empty(file_cells[0].find("a")), |
|
422 "size": string_or_empty(file_cells[1]), |
|
423 "type": string_or_empty(file_cells[2]), |
|
424 "encoding": string_or_empty(file_cells[3]), |
|
425 } |
|
426 |
|
427 def _parse_twisted_directory_listing(self, page): |
|
428 soup = BeautifulSoup(page) |
|
429 # HACK: Match only table rows with a class to ignore twisted header/footer rows. |
|
430 file_rows = soup.find('table').findAll('tr', { "class" : True }) |
|
431 return [self._parse_twisted_file_row(file_row) for file_row in file_rows] |
|
432 |
|
433 # FIXME: There should be a better way to get this information directly from twisted. |
|
434 def _fetch_twisted_directory_listing(self, url): |
|
435 return self._parse_twisted_directory_listing(urllib2.urlopen(url)) |
|
436 |
|
437 def builders(self): |
|
438 return [self.builder_with_name(status["name"]) for status in self.builder_statuses()] |
|
439 |
|
440 # This method pulls from /one_box_per_builder as an efficient way to get information about |
|
441 def builder_statuses(self): |
|
442 soup = BeautifulSoup(self._fetch_one_box_per_builder()) |
|
443 return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')] |
|
444 |
|
445 def core_builder_statuses(self): |
|
446 return [builder for builder in self.builder_statuses() if self._is_core_builder(builder["name"])] |
|
447 |
|
448 def builder_with_name(self, name): |
|
449 builder = self._builder_by_name.get(name) |
|
450 if not builder: |
|
451 builder = Builder(name, self) |
|
452 self._builder_by_name[name] = builder |
|
453 return builder |
|
454 |
|
455 def revisions_causing_failures(self, only_core_builders=True): |
|
456 builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses() |
|
457 revision_to_failing_bots = {} |
|
458 for builder_status in builder_statuses: |
|
459 if builder_status["is_green"]: |
|
460 continue |
|
461 builder = self.builder_with_name(builder_status["name"]) |
|
462 revisions = builder.blameworthy_revisions(builder_status["build_number"]) |
|
463 for revision in revisions: |
|
464 failing_bots = revision_to_failing_bots.get(revision, []) |
|
465 failing_bots.append(builder) |
|
466 revision_to_failing_bots[revision] = failing_bots |
|
467 return revision_to_failing_bots |
|
468 |
|
469 # This makes fewer requests than calling Builder.latest_build would. It grabs all builder |
|
470 # statuses in one request using self.builder_statuses (fetching /one_box_per_builder instead of builder pages). |
|
471 def _latest_builds_from_builders(self, only_core_builders=True): |
|
472 builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses() |
|
473 return [self.builder_with_name(status["name"]).build(status["build_number"]) for status in builder_statuses] |
|
474 |
|
475 def _build_at_or_before_revision(self, build, revision): |
|
476 while build: |
|
477 if build.revision() <= revision: |
|
478 return build |
|
479 build = build.previous_build() |
|
480 |
|
481 def last_green_revision(self, only_core_builders=True): |
|
482 builds = self._latest_builds_from_builders(only_core_builders) |
|
483 target_revision = builds[0].revision() |
|
484 # An alternate way to do this would be to start at one revision and walk backwards |
|
485 # checking builder.build_for_revision, however build_for_revision is very slow on first load. |
|
486 while True: |
|
487 # Make builds agree on revision |
|
488 builds = [self._build_at_or_before_revision(build, target_revision) for build in builds] |
|
489 if None in builds: # One of the builds failed to load from the server. |
|
490 return None |
|
491 min_revision = min(map(lambda build: build.revision(), builds)) |
|
492 if min_revision != target_revision: |
|
493 target_revision = min_revision |
|
494 continue # Builds don't all agree on revision, keep searching |
|
495 # Check to make sure they're all green |
|
496 all_are_green = reduce(operator.and_, map(lambda build: build.is_green(), builds)) |
|
497 if not all_are_green: |
|
498 target_revision -= 1 |
|
499 continue |
|
500 return min_revision |