WebKitTools/Scripts/webkitpy/common/net/buildbot.py
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 # Copyright (c) 2009, Google Inc. All rights reserved.
       
     2 #
       
     3 # Redistribution and use in source and binary forms, with or without
       
     4 # modification, are permitted provided that the following conditions are
       
     5 # met:
       
     6 #
       
     7 #     * Redistributions of source code must retain the above copyright
       
     8 # notice, this list of conditions and the following disclaimer.
       
     9 #     * Redistributions in binary form must reproduce the above
       
    10 # copyright notice, this list of conditions and the following disclaimer
       
    11 # in the documentation and/or other materials provided with the
       
    12 # distribution.
       
    13 #     * Neither the name of Google Inc. nor the names of its
       
    14 # contributors may be used to endorse or promote products derived from
       
    15 # this software without specific prior written permission.
       
    16 #
       
    17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
       
    18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
       
    19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
       
    20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
       
    21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
       
    22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
       
    23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       
    24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    28 #
       
    29 # WebKit's Python module for interacting with WebKit's buildbot
       
    30 
       
    31 import operator
       
    32 import re
       
    33 import urllib
       
    34 import urllib2
       
    35 import xmlrpclib
       
    36 
       
    37 from webkitpy.common.system.logutils import get_logger
       
    38 from webkitpy.thirdparty.autoinstalled.mechanize import Browser
       
    39 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
       
    40 
       
    41 
       
    42 _log = get_logger(__file__)
       
    43 
       
    44 
       
    45 class Builder(object):
       
    46     def __init__(self, name, buildbot):
       
    47         self._name = name
       
    48         self._buildbot = buildbot
       
    49         self._builds_cache = {}
       
    50         self._revision_to_build_number = None
       
    51         self._browser = Browser()
       
    52         self._browser.set_handle_robots(False) # The builder pages are excluded by robots.txt
       
    53 
       
    54     def name(self):
       
    55         return self._name
       
    56 
       
    57     def results_url(self):
       
    58         return "http://%s/results/%s" % (self._buildbot.buildbot_host, self.url_encoded_name())
       
    59 
       
    60     def url_encoded_name(self):
       
    61         return urllib.quote(self._name)
       
    62 
       
    63     def url(self):
       
    64         return "http://%s/builders/%s" % (self._buildbot.buildbot_host, self.url_encoded_name())
       
    65 
       
    66     # This provides a single place to mock
       
    67     def _fetch_build(self, build_number):
       
    68         build_dictionary = self._buildbot._fetch_xmlrpc_build_dictionary(self, build_number)
       
    69         if not build_dictionary:
       
    70             return None
       
    71         return Build(self,
       
    72             build_number=int(build_dictionary['number']),
       
    73             revision=int(build_dictionary['revision']),
       
    74             is_green=(build_dictionary['results'] == 0) # Undocumented, buildbot XMLRPC, 0 seems to mean "pass"
       
    75         )
       
    76 
       
    77     def build(self, build_number):
       
    78         if not build_number:
       
    79             return None
       
    80         cached_build = self._builds_cache.get(build_number)
       
    81         if cached_build:
       
    82             return cached_build
       
    83 
       
    84         build = self._fetch_build(build_number)
       
    85         self._builds_cache[build_number] = build
       
    86         return build
       
    87 
       
    88     def force_build(self, username="webkit-patch", comments=None):
       
    89         def predicate(form):
       
    90             try:
       
    91                 return form.find_control("username")
       
    92             except Exception, e:
       
    93                 return False
       
    94         self._browser.open(self.url())
       
    95         self._browser.select_form(predicate=predicate)
       
    96         self._browser["username"] = username
       
    97         if comments:
       
    98             self._browser["comments"] = comments
       
    99         return self._browser.submit()
       
   100 
       
   101     file_name_regexp = re.compile(r"r(?P<revision>\d+) \((?P<build_number>\d+)\)")
       
   102     def _revision_and_build_for_filename(self, filename):
       
   103         # Example: "r47483 (1)/" or "r47483 (1).zip"
       
   104         match = self.file_name_regexp.match(filename)
       
   105         return (int(match.group("revision")), int(match.group("build_number")))
       
   106 
       
   107     def _fetch_revision_to_build_map(self):
       
   108         # All _fetch requests go through _buildbot for easier mocking
       
   109         try:
       
   110             # FIXME: This method is horribly slow due to the huge network load.
       
   111             # FIXME: This is a poor way to do revision -> build mapping.
       
   112             # Better would be to ask buildbot through some sort of API.
       
   113             print "Loading revision/build list from %s." % self.results_url()
       
   114             print "This may take a while..."
       
   115             result_files = self._buildbot._fetch_twisted_directory_listing(self.results_url())
       
   116         except urllib2.HTTPError, error:
       
   117             if error.code != 404:
       
   118                 raise
       
   119             result_files = []
       
   120 
       
   121         # This assumes there was only one build per revision, which is false but we don't care for now.
       
   122         return dict([self._revision_and_build_for_filename(file_info["filename"]) for file_info in result_files])
       
   123 
       
   124     def _revision_to_build_map(self):
       
   125         if not self._revision_to_build_number:
       
   126             self._revision_to_build_number = self._fetch_revision_to_build_map()
       
   127         return self._revision_to_build_number
       
   128 
       
   129     def revision_build_pairs_with_results(self):
       
   130         return self._revision_to_build_map().items()
       
   131 
       
   132     # This assumes there can be only one build per revision, which is false, but we don't care for now.
       
   133     def build_for_revision(self, revision, allow_failed_lookups=False):
       
   134         # NOTE: This lookup will fail if that exact revision was never built.
       
   135         build_number = self._revision_to_build_map().get(int(revision))
       
   136         if not build_number:
       
   137             return None
       
   138         build = self.build(build_number)
       
   139         if not build and allow_failed_lookups:
       
   140             # Builds for old revisions with fail to lookup via buildbot's xmlrpc api.
       
   141             build = Build(self,
       
   142                 build_number=build_number,
       
   143                 revision=revision,
       
   144                 is_green=False,
       
   145             )
       
   146         return build
       
   147 
       
   148     def find_failure_transition(self, red_build, look_back_limit=30):
       
   149         if not red_build or red_build.is_green():
       
   150             return (None, None)
       
   151         common_failures = None
       
   152         current_build = red_build
       
   153         build_after_current_build = None
       
   154         look_back_count = 0
       
   155         while current_build:
       
   156             if current_build.is_green():
       
   157                 # current_build can't possibly have any failures in common
       
   158                 # with red_build because it's green.
       
   159                 break
       
   160             results = current_build.layout_test_results()
       
   161             # We treat a lack of results as if all the test failed.
       
   162             # This occurs, for example, when we can't compile at all.
       
   163             if results:
       
   164                 failures = set(results.failing_tests())
       
   165                 if common_failures == None:
       
   166                     common_failures = failures
       
   167                 common_failures = common_failures.intersection(failures)
       
   168                 if not common_failures:
       
   169                     # current_build doesn't have any failures in common with
       
   170                     # the red build we're worried about.  We assume that any
       
   171                     # failures in current_build were due to flakiness.
       
   172                     break
       
   173             look_back_count += 1
       
   174             if look_back_count > look_back_limit:
       
   175                 return (None, current_build)
       
   176             build_after_current_build = current_build
       
   177             current_build = current_build.previous_build()
       
   178         # We must iterate at least once because red_build is red.
       
   179         assert(build_after_current_build)
       
   180         # Current build must either be green or have no failures in common
       
   181         # with red build, so we've found our failure transition.
       
   182         return (current_build, build_after_current_build)
       
   183 
       
   184     # FIXME: This likely does not belong on Builder
       
   185     def suspect_revisions_for_transition(self, last_good_build, first_bad_build):
       
   186         suspect_revisions = range(first_bad_build.revision(),
       
   187                                   last_good_build.revision(),
       
   188                                   -1)
       
   189         suspect_revisions.reverse()
       
   190         return suspect_revisions
       
   191 
       
   192     def blameworthy_revisions(self, red_build_number, look_back_limit=30, avoid_flakey_tests=True):
       
   193         red_build = self.build(red_build_number)
       
   194         (last_good_build, first_bad_build) = \
       
   195             self.find_failure_transition(red_build, look_back_limit)
       
   196         if not last_good_build:
       
   197             return [] # We ran off the limit of our search
       
   198         # If avoid_flakey_tests, require at least 2 bad builds before we
       
   199         # suspect a real failure transition.
       
   200         if avoid_flakey_tests and first_bad_build == red_build:
       
   201             return []
       
   202         return self.suspect_revisions_for_transition(last_good_build, first_bad_build)
       
   203 
       
   204 
       
   205 # FIXME: This should be unified with all the layout test results code in the layout_tests package
       
   206 class LayoutTestResults(object):
       
   207     stderr_key = u'Tests that had stderr output:'
       
   208     fail_key = u'Tests where results did not match expected results:'
       
   209     timeout_key = u'Tests that timed out:'
       
   210     crash_key = u'Tests that caused the DumpRenderTree tool to crash:'
       
   211     missing_key = u'Tests that had no expected results (probably new):'
       
   212 
       
   213     expected_keys = [
       
   214         stderr_key,
       
   215         fail_key,
       
   216         crash_key,
       
   217         timeout_key,
       
   218         missing_key,
       
   219     ]
       
   220 
       
   221     @classmethod
       
   222     def _parse_results_html(cls, page):
       
   223         parsed_results = {}
       
   224         tables = BeautifulSoup(page).findAll("table")
       
   225         for table in tables:
       
   226             table_title = unicode(table.findPreviousSibling("p").string)
       
   227             if table_title not in cls.expected_keys:
       
   228                 # This Exception should only ever be hit if run-webkit-tests changes its results.html format.
       
   229                 raise Exception("Unhandled title: %s" % table_title)
       
   230             # We might want to translate table titles into identifiers before storing.
       
   231             parsed_results[table_title] = [unicode(row.find("a").string) for row in table.findAll("tr")]
       
   232 
       
   233         return parsed_results
       
   234 
       
   235     @classmethod
       
   236     def _fetch_results_html(cls, base_url):
       
   237         results_html = "%s/results.html" % base_url
       
   238         # FIXME: We need to move this sort of 404 logic into NetworkTransaction or similar.
       
   239         try:
       
   240             page = urllib2.urlopen(results_html)
       
   241             return cls._parse_results_html(page)
       
   242         except urllib2.HTTPError, error:
       
   243             if error.code != 404:
       
   244                 raise
       
   245 
       
   246     @classmethod
       
   247     def results_from_url(cls, base_url):
       
   248         parsed_results = cls._fetch_results_html(base_url)
       
   249         if not parsed_results:
       
   250             return None
       
   251         return cls(base_url, parsed_results)
       
   252 
       
   253     def __init__(self, base_url, parsed_results):
       
   254         self._base_url = base_url
       
   255         self._parsed_results = parsed_results
       
   256 
       
   257     def parsed_results(self):
       
   258         return self._parsed_results
       
   259 
       
   260     def failing_tests(self):
       
   261         failing_keys = [self.fail_key, self.crash_key, self.timeout_key]
       
   262         return sorted(sum([tests for key, tests in self._parsed_results.items() if key in failing_keys], []))
       
   263 
       
   264 
       
   265 class Build(object):
       
   266     def __init__(self, builder, build_number, revision, is_green):
       
   267         self._builder = builder
       
   268         self._number = build_number
       
   269         self._revision = revision
       
   270         self._is_green = is_green
       
   271         self._layout_test_results = None
       
   272 
       
   273     @staticmethod
       
   274     def build_url(builder, build_number):
       
   275         return "%s/builds/%s" % (builder.url(), build_number)
       
   276 
       
   277     def url(self):
       
   278         return self.build_url(self.builder(), self._number)
       
   279 
       
   280     def results_url(self):
       
   281         results_directory = "r%s (%s)" % (self.revision(), self._number)
       
   282         return "%s/%s" % (self._builder.results_url(), urllib.quote(results_directory))
       
   283 
       
   284     def layout_test_results(self):
       
   285         if not self._layout_test_results:
       
   286             self._layout_test_results = LayoutTestResults.results_from_url(self.results_url())
       
   287         return self._layout_test_results
       
   288 
       
   289     def builder(self):
       
   290         return self._builder
       
   291 
       
   292     def revision(self):
       
   293         return self._revision
       
   294 
       
   295     def is_green(self):
       
   296         return self._is_green
       
   297 
       
   298     def previous_build(self):
       
   299         # previous_build() allows callers to avoid assuming build numbers are sequential.
       
   300         # They may not be sequential across all master changes, or when non-trunk builds are made.
       
   301         return self._builder.build(self._number - 1)
       
   302 
       
   303 
       
   304 class BuildBot(object):
       
   305     # FIXME: This should move into some sort of webkit_config.py
       
   306     default_host = "build.webkit.org"
       
   307 
       
   308     def __init__(self, host=default_host):
       
   309         self.buildbot_host = host
       
   310         self._builder_by_name = {}
       
   311 
       
   312         # If any core builder is red we should not be landing patches.  Other
       
   313         # builders should be added to this list once they are known to be
       
   314         # reliable.
       
   315         # See https://bugs.webkit.org/show_bug.cgi?id=33296 and related bugs.
       
   316         self.core_builder_names_regexps = [
       
   317             "SnowLeopard.*Build",
       
   318             "SnowLeopard.*Test",
       
   319             "Leopard",
       
   320             "Tiger",
       
   321             "Windows.*Build",
       
   322             "GTK.*32",
       
   323             "GTK.*64.*Debug",  # Disallow the 64-bit Release bot which is broken.
       
   324             "Qt",
       
   325             "Chromium.*Release$",
       
   326         ]
       
   327 
       
   328     def _parse_last_build_cell(self, builder, cell):
       
   329         status_link = cell.find('a')
       
   330         if status_link:
       
   331             # Will be either a revision number or a build number
       
   332             revision_string = status_link.string
       
   333             # If revision_string has non-digits assume it's not a revision number.
       
   334             builder['built_revision'] = int(revision_string) \
       
   335                                         if not re.match('\D', revision_string) \
       
   336                                         else None
       
   337 
       
   338             # FIXME: We treat slave lost as green even though it is not to
       
   339             # work around the Qts bot being on a broken internet connection.
       
   340             # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099
       
   341             builder['is_green'] = not re.search('fail', cell.renderContents()) or \
       
   342                                   not not re.search('lost', cell.renderContents())
       
   343 
       
   344             status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)"
       
   345             link_match = re.match(status_link_regexp, status_link['href'])
       
   346             builder['build_number'] = int(link_match.group("build_number"))
       
   347         else:
       
   348             # We failed to find a link in the first cell, just give up.  This
       
   349             # can happen if a builder is just-added, the first cell will just
       
   350             # be "no build"
       
   351             # Other parts of the code depend on is_green being present.
       
   352             builder['is_green'] = False
       
   353             builder['built_revision'] = None
       
   354             builder['build_number'] = None
       
   355 
       
   356     def _parse_current_build_cell(self, builder, cell):
       
   357         activity_lines = cell.renderContents().split("<br />")
       
   358         builder["activity"] = activity_lines[0] # normally "building" or "idle"
       
   359         # The middle lines document how long left for any current builds.
       
   360         match = re.match("(?P<pending_builds>\d) pending", activity_lines[-1])
       
   361         builder["pending_builds"] = int(match.group("pending_builds")) if match else 0
       
   362 
       
   363     def _parse_builder_status_from_row(self, status_row):
       
   364         status_cells = status_row.findAll('td')
       
   365         builder = {}
       
   366 
       
   367         # First cell is the name
       
   368         name_link = status_cells[0].find('a')
       
   369         builder["name"] = unicode(name_link.string)
       
   370 
       
   371         self._parse_last_build_cell(builder, status_cells[1])
       
   372         self._parse_current_build_cell(builder, status_cells[2])
       
   373         return builder
       
   374 
       
   375     def _matches_regexps(self, builder_name, name_regexps):
       
   376         for name_regexp in name_regexps:
       
   377             if re.match(name_regexp, builder_name):
       
   378                 return True
       
   379         return False
       
   380 
       
   381     # FIXME: Should move onto Builder
       
   382     def _is_core_builder(self, builder_name):
       
   383         return self._matches_regexps(builder_name, self.core_builder_names_regexps)
       
   384 
       
   385     # FIXME: This method needs to die, but is used by a unit test at the moment.
       
   386     def _builder_statuses_with_names_matching_regexps(self, builder_statuses, name_regexps):
       
   387         return [builder for builder in builder_statuses if self._matches_regexps(builder["name"], name_regexps)]
       
   388 
       
   389     def red_core_builders(self):
       
   390         return [builder for builder in self.core_builder_statuses() if not builder["is_green"]]
       
   391 
       
   392     def red_core_builders_names(self):
       
   393         return [builder["name"] for builder in self.red_core_builders()]
       
   394 
       
   395     def idle_red_core_builders(self):
       
   396         return [builder for builder in self.red_core_builders() if builder["activity"] == "idle"]
       
   397 
       
   398     def core_builders_are_green(self):
       
   399         return not self.red_core_builders()
       
   400 
       
   401     # FIXME: These _fetch methods should move to a networking class.
       
   402     def _fetch_xmlrpc_build_dictionary(self, builder, build_number):
       
   403         # The buildbot XMLRPC API is super-limited.
       
   404         # For one, you cannot fetch info on builds which are incomplete.
       
   405         proxy = xmlrpclib.ServerProxy("http://%s/xmlrpc" % self.buildbot_host, allow_none=True)
       
   406         try:
       
   407             return proxy.getBuild(builder.name(), int(build_number))
       
   408         except xmlrpclib.Fault, err:
       
   409             build_url = Build.build_url(builder, build_number)
       
   410             _log.error("Error fetching data for %s build %s (%s): %s" % (builder.name(), build_number, build_url, err))
       
   411             return None
       
   412 
       
   413     def _fetch_one_box_per_builder(self):
       
   414         build_status_url = "http://%s/one_box_per_builder" % self.buildbot_host
       
   415         return urllib2.urlopen(build_status_url)
       
   416 
       
   417     def _parse_twisted_file_row(self, file_row):
       
   418         string_or_empty = lambda soup: unicode(soup.string) if soup.string else u""
       
   419         file_cells = file_row.findAll('td')
       
   420         return {
       
   421             "filename": string_or_empty(file_cells[0].find("a")),
       
   422             "size": string_or_empty(file_cells[1]),
       
   423             "type": string_or_empty(file_cells[2]),
       
   424             "encoding": string_or_empty(file_cells[3]),
       
   425         }
       
   426 
       
   427     def _parse_twisted_directory_listing(self, page):
       
   428         soup = BeautifulSoup(page)
       
   429         # HACK: Match only table rows with a class to ignore twisted header/footer rows.
       
   430         file_rows = soup.find('table').findAll('tr', { "class" : True })
       
   431         return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
       
   432 
       
   433     # FIXME: There should be a better way to get this information directly from twisted.
       
   434     def _fetch_twisted_directory_listing(self, url):
       
   435         return self._parse_twisted_directory_listing(urllib2.urlopen(url))
       
   436 
       
   437     def builders(self):
       
   438         return [self.builder_with_name(status["name"]) for status in self.builder_statuses()]
       
   439 
       
   440     # This method pulls from /one_box_per_builder as an efficient way to get information about
       
   441     def builder_statuses(self):
       
   442         soup = BeautifulSoup(self._fetch_one_box_per_builder())
       
   443         return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')]
       
   444 
       
   445     def core_builder_statuses(self):
       
   446         return [builder for builder in self.builder_statuses() if self._is_core_builder(builder["name"])]
       
   447 
       
   448     def builder_with_name(self, name):
       
   449         builder = self._builder_by_name.get(name)
       
   450         if not builder:
       
   451             builder = Builder(name, self)
       
   452             self._builder_by_name[name] = builder
       
   453         return builder
       
   454 
       
   455     def revisions_causing_failures(self, only_core_builders=True):
       
   456         builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
       
   457         revision_to_failing_bots = {}
       
   458         for builder_status in builder_statuses:
       
   459             if builder_status["is_green"]:
       
   460                 continue
       
   461             builder = self.builder_with_name(builder_status["name"])
       
   462             revisions = builder.blameworthy_revisions(builder_status["build_number"])
       
   463             for revision in revisions:
       
   464                 failing_bots = revision_to_failing_bots.get(revision, [])
       
   465                 failing_bots.append(builder)
       
   466                 revision_to_failing_bots[revision] = failing_bots
       
   467         return revision_to_failing_bots
       
   468 
       
   469     # This makes fewer requests than calling Builder.latest_build would.  It grabs all builder
       
   470     # statuses in one request using self.builder_statuses (fetching /one_box_per_builder instead of builder pages).
       
   471     def _latest_builds_from_builders(self, only_core_builders=True):
       
   472         builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
       
   473         return [self.builder_with_name(status["name"]).build(status["build_number"]) for status in builder_statuses]
       
   474 
       
   475     def _build_at_or_before_revision(self, build, revision):
       
   476         while build:
       
   477             if build.revision() <= revision:
       
   478                 return build
       
   479             build = build.previous_build()
       
   480 
       
   481     def last_green_revision(self, only_core_builders=True):
       
   482         builds = self._latest_builds_from_builders(only_core_builders)
       
   483         target_revision = builds[0].revision()
       
   484         # An alternate way to do this would be to start at one revision and walk backwards
       
   485         # checking builder.build_for_revision, however build_for_revision is very slow on first load.
       
   486         while True:
       
   487             # Make builds agree on revision
       
   488             builds = [self._build_at_or_before_revision(build, target_revision) for build in builds]
       
   489             if None in builds: # One of the builds failed to load from the server.
       
   490                 return None
       
   491             min_revision = min(map(lambda build: build.revision(), builds))
       
   492             if min_revision != target_revision:
       
   493                 target_revision = min_revision
       
   494                 continue # Builds don't all agree on revision, keep searching
       
   495             # Check to make sure they're all green
       
   496             all_are_green = reduce(operator.and_, map(lambda build: build.is_green(), builds))
       
   497             if not all_are_green:
       
   498                 target_revision -= 1
       
   499                 continue
       
   500             return min_revision