WebKitTools/Scripts/webkitpy/common/net/buildbot.py
changeset 0 4f2f89ce4247
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/WebKitTools/Scripts/webkitpy/common/net/buildbot.py	Fri Sep 17 09:02:29 2010 +0300
@@ -0,0 +1,500 @@
+# Copyright (c) 2009, Google Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# WebKit's Python module for interacting with WebKit's buildbot
+
+import operator
+import re
+import urllib
+import urllib2
+import xmlrpclib
+
+from webkitpy.common.system.logutils import get_logger
+from webkitpy.thirdparty.autoinstalled.mechanize import Browser
+from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
+
+
+_log = get_logger(__file__)
+
+
+class Builder(object):
+    def __init__(self, name, buildbot):
+        self._name = name
+        self._buildbot = buildbot
+        self._builds_cache = {}
+        self._revision_to_build_number = None
+        self._browser = Browser()
+        self._browser.set_handle_robots(False) # The builder pages are excluded by robots.txt
+
+    def name(self):
+        return self._name
+
+    def results_url(self):
+        return "http://%s/results/%s" % (self._buildbot.buildbot_host, self.url_encoded_name())
+
+    def url_encoded_name(self):
+        return urllib.quote(self._name)
+
+    def url(self):
+        return "http://%s/builders/%s" % (self._buildbot.buildbot_host, self.url_encoded_name())
+
+    # This provides a single place to mock
+    def _fetch_build(self, build_number):
+        build_dictionary = self._buildbot._fetch_xmlrpc_build_dictionary(self, build_number)
+        if not build_dictionary:
+            return None
+        return Build(self,
+            build_number=int(build_dictionary['number']),
+            revision=int(build_dictionary['revision']),
+            is_green=(build_dictionary['results'] == 0) # Undocumented, buildbot XMLRPC, 0 seems to mean "pass"
+        )
+
+    def build(self, build_number):
+        if not build_number:
+            return None
+        cached_build = self._builds_cache.get(build_number)
+        if cached_build:
+            return cached_build
+
+        build = self._fetch_build(build_number)
+        self._builds_cache[build_number] = build
+        return build
+
+    def force_build(self, username="webkit-patch", comments=None):
+        def predicate(form):
+            try:
+                return form.find_control("username")
+            except Exception, e:
+                return False
+        self._browser.open(self.url())
+        self._browser.select_form(predicate=predicate)
+        self._browser["username"] = username
+        if comments:
+            self._browser["comments"] = comments
+        return self._browser.submit()
+
+    file_name_regexp = re.compile(r"r(?P<revision>\d+) \((?P<build_number>\d+)\)")
+    def _revision_and_build_for_filename(self, filename):
+        # Example: "r47483 (1)/" or "r47483 (1).zip"
+        match = self.file_name_regexp.match(filename)
+        return (int(match.group("revision")), int(match.group("build_number")))
+
+    def _fetch_revision_to_build_map(self):
+        # All _fetch requests go through _buildbot for easier mocking
+        try:
+            # FIXME: This method is horribly slow due to the huge network load.
+            # FIXME: This is a poor way to do revision -> build mapping.
+            # Better would be to ask buildbot through some sort of API.
+            print "Loading revision/build list from %s." % self.results_url()
+            print "This may take a while..."
+            result_files = self._buildbot._fetch_twisted_directory_listing(self.results_url())
+        except urllib2.HTTPError, error:
+            if error.code != 404:
+                raise
+            result_files = []
+
+        # This assumes there was only one build per revision, which is false but we don't care for now.
+        return dict([self._revision_and_build_for_filename(file_info["filename"]) for file_info in result_files])
+
+    def _revision_to_build_map(self):
+        if not self._revision_to_build_number:
+            self._revision_to_build_number = self._fetch_revision_to_build_map()
+        return self._revision_to_build_number
+
+    def revision_build_pairs_with_results(self):
+        return self._revision_to_build_map().items()
+
+    # This assumes there can be only one build per revision, which is false, but we don't care for now.
+    def build_for_revision(self, revision, allow_failed_lookups=False):
+        # NOTE: This lookup will fail if that exact revision was never built.
+        build_number = self._revision_to_build_map().get(int(revision))
+        if not build_number:
+            return None
+        build = self.build(build_number)
+        if not build and allow_failed_lookups:
+            # Builds for old revisions with fail to lookup via buildbot's xmlrpc api.
+            build = Build(self,
+                build_number=build_number,
+                revision=revision,
+                is_green=False,
+            )
+        return build
+
+    def find_failure_transition(self, red_build, look_back_limit=30):
+        if not red_build or red_build.is_green():
+            return (None, None)
+        common_failures = None
+        current_build = red_build
+        build_after_current_build = None
+        look_back_count = 0
+        while current_build:
+            if current_build.is_green():
+                # current_build can't possibly have any failures in common
+                # with red_build because it's green.
+                break
+            results = current_build.layout_test_results()
+            # We treat a lack of results as if all the test failed.
+            # This occurs, for example, when we can't compile at all.
+            if results:
+                failures = set(results.failing_tests())
+                if common_failures == None:
+                    common_failures = failures
+                common_failures = common_failures.intersection(failures)
+                if not common_failures:
+                    # current_build doesn't have any failures in common with
+                    # the red build we're worried about.  We assume that any
+                    # failures in current_build were due to flakiness.
+                    break
+            look_back_count += 1
+            if look_back_count > look_back_limit:
+                return (None, current_build)
+            build_after_current_build = current_build
+            current_build = current_build.previous_build()
+        # We must iterate at least once because red_build is red.
+        assert(build_after_current_build)
+        # Current build must either be green or have no failures in common
+        # with red build, so we've found our failure transition.
+        return (current_build, build_after_current_build)
+
+    # FIXME: This likely does not belong on Builder
+    def suspect_revisions_for_transition(self, last_good_build, first_bad_build):
+        suspect_revisions = range(first_bad_build.revision(),
+                                  last_good_build.revision(),
+                                  -1)
+        suspect_revisions.reverse()
+        return suspect_revisions
+
+    def blameworthy_revisions(self, red_build_number, look_back_limit=30, avoid_flakey_tests=True):
+        red_build = self.build(red_build_number)
+        (last_good_build, first_bad_build) = \
+            self.find_failure_transition(red_build, look_back_limit)
+        if not last_good_build:
+            return [] # We ran off the limit of our search
+        # If avoid_flakey_tests, require at least 2 bad builds before we
+        # suspect a real failure transition.
+        if avoid_flakey_tests and first_bad_build == red_build:
+            return []
+        return self.suspect_revisions_for_transition(last_good_build, first_bad_build)
+
+
+# FIXME: This should be unified with all the layout test results code in the layout_tests package
+class LayoutTestResults(object):
+    stderr_key = u'Tests that had stderr output:'
+    fail_key = u'Tests where results did not match expected results:'
+    timeout_key = u'Tests that timed out:'
+    crash_key = u'Tests that caused the DumpRenderTree tool to crash:'
+    missing_key = u'Tests that had no expected results (probably new):'
+
+    expected_keys = [
+        stderr_key,
+        fail_key,
+        crash_key,
+        timeout_key,
+        missing_key,
+    ]
+
+    @classmethod
+    def _parse_results_html(cls, page):
+        parsed_results = {}
+        tables = BeautifulSoup(page).findAll("table")
+        for table in tables:
+            table_title = unicode(table.findPreviousSibling("p").string)
+            if table_title not in cls.expected_keys:
+                # This Exception should only ever be hit if run-webkit-tests changes its results.html format.
+                raise Exception("Unhandled title: %s" % table_title)
+            # We might want to translate table titles into identifiers before storing.
+            parsed_results[table_title] = [unicode(row.find("a").string) for row in table.findAll("tr")]
+
+        return parsed_results
+
+    @classmethod
+    def _fetch_results_html(cls, base_url):
+        results_html = "%s/results.html" % base_url
+        # FIXME: We need to move this sort of 404 logic into NetworkTransaction or similar.
+        try:
+            page = urllib2.urlopen(results_html)
+            return cls._parse_results_html(page)
+        except urllib2.HTTPError, error:
+            if error.code != 404:
+                raise
+
+    @classmethod
+    def results_from_url(cls, base_url):
+        parsed_results = cls._fetch_results_html(base_url)
+        if not parsed_results:
+            return None
+        return cls(base_url, parsed_results)
+
+    def __init__(self, base_url, parsed_results):
+        self._base_url = base_url
+        self._parsed_results = parsed_results
+
+    def parsed_results(self):
+        return self._parsed_results
+
+    def failing_tests(self):
+        failing_keys = [self.fail_key, self.crash_key, self.timeout_key]
+        return sorted(sum([tests for key, tests in self._parsed_results.items() if key in failing_keys], []))
+
+
+class Build(object):
+    def __init__(self, builder, build_number, revision, is_green):
+        self._builder = builder
+        self._number = build_number
+        self._revision = revision
+        self._is_green = is_green
+        self._layout_test_results = None
+
+    @staticmethod
+    def build_url(builder, build_number):
+        return "%s/builds/%s" % (builder.url(), build_number)
+
+    def url(self):
+        return self.build_url(self.builder(), self._number)
+
+    def results_url(self):
+        results_directory = "r%s (%s)" % (self.revision(), self._number)
+        return "%s/%s" % (self._builder.results_url(), urllib.quote(results_directory))
+
+    def layout_test_results(self):
+        if not self._layout_test_results:
+            self._layout_test_results = LayoutTestResults.results_from_url(self.results_url())
+        return self._layout_test_results
+
+    def builder(self):
+        return self._builder
+
+    def revision(self):
+        return self._revision
+
+    def is_green(self):
+        return self._is_green
+
+    def previous_build(self):
+        # previous_build() allows callers to avoid assuming build numbers are sequential.
+        # They may not be sequential across all master changes, or when non-trunk builds are made.
+        return self._builder.build(self._number - 1)
+
+
+class BuildBot(object):
+    # FIXME: This should move into some sort of webkit_config.py
+    default_host = "build.webkit.org"
+
+    def __init__(self, host=default_host):
+        self.buildbot_host = host
+        self._builder_by_name = {}
+
+        # If any core builder is red we should not be landing patches.  Other
+        # builders should be added to this list once they are known to be
+        # reliable.
+        # See https://bugs.webkit.org/show_bug.cgi?id=33296 and related bugs.
+        self.core_builder_names_regexps = [
+            "SnowLeopard.*Build",
+            "SnowLeopard.*Test",
+            "Leopard",
+            "Tiger",
+            "Windows.*Build",
+            "GTK.*32",
+            "GTK.*64.*Debug",  # Disallow the 64-bit Release bot which is broken.
+            "Qt",
+            "Chromium.*Release$",
+        ]
+
+    def _parse_last_build_cell(self, builder, cell):
+        status_link = cell.find('a')
+        if status_link:
+            # Will be either a revision number or a build number
+            revision_string = status_link.string
+            # If revision_string has non-digits assume it's not a revision number.
+            builder['built_revision'] = int(revision_string) \
+                                        if not re.match('\D', revision_string) \
+                                        else None
+
+            # FIXME: We treat slave lost as green even though it is not to
+            # work around the Qts bot being on a broken internet connection.
+            # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099
+            builder['is_green'] = not re.search('fail', cell.renderContents()) or \
+                                  not not re.search('lost', cell.renderContents())
+
+            status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)"
+            link_match = re.match(status_link_regexp, status_link['href'])
+            builder['build_number'] = int(link_match.group("build_number"))
+        else:
+            # We failed to find a link in the first cell, just give up.  This
+            # can happen if a builder is just-added, the first cell will just
+            # be "no build"
+            # Other parts of the code depend on is_green being present.
+            builder['is_green'] = False
+            builder['built_revision'] = None
+            builder['build_number'] = None
+
+    def _parse_current_build_cell(self, builder, cell):
+        activity_lines = cell.renderContents().split("<br />")
+        builder["activity"] = activity_lines[0] # normally "building" or "idle"
+        # The middle lines document how long left for any current builds.
+        match = re.match("(?P<pending_builds>\d) pending", activity_lines[-1])
+        builder["pending_builds"] = int(match.group("pending_builds")) if match else 0
+
+    def _parse_builder_status_from_row(self, status_row):
+        status_cells = status_row.findAll('td')
+        builder = {}
+
+        # First cell is the name
+        name_link = status_cells[0].find('a')
+        builder["name"] = unicode(name_link.string)
+
+        self._parse_last_build_cell(builder, status_cells[1])
+        self._parse_current_build_cell(builder, status_cells[2])
+        return builder
+
+    def _matches_regexps(self, builder_name, name_regexps):
+        for name_regexp in name_regexps:
+            if re.match(name_regexp, builder_name):
+                return True
+        return False
+
+    # FIXME: Should move onto Builder
+    def _is_core_builder(self, builder_name):
+        return self._matches_regexps(builder_name, self.core_builder_names_regexps)
+
+    # FIXME: This method needs to die, but is used by a unit test at the moment.
+    def _builder_statuses_with_names_matching_regexps(self, builder_statuses, name_regexps):
+        return [builder for builder in builder_statuses if self._matches_regexps(builder["name"], name_regexps)]
+
+    def red_core_builders(self):
+        return [builder for builder in self.core_builder_statuses() if not builder["is_green"]]
+
+    def red_core_builders_names(self):
+        return [builder["name"] for builder in self.red_core_builders()]
+
+    def idle_red_core_builders(self):
+        return [builder for builder in self.red_core_builders() if builder["activity"] == "idle"]
+
+    def core_builders_are_green(self):
+        return not self.red_core_builders()
+
+    # FIXME: These _fetch methods should move to a networking class.
+    def _fetch_xmlrpc_build_dictionary(self, builder, build_number):
+        # The buildbot XMLRPC API is super-limited.
+        # For one, you cannot fetch info on builds which are incomplete.
+        proxy = xmlrpclib.ServerProxy("http://%s/xmlrpc" % self.buildbot_host, allow_none=True)
+        try:
+            return proxy.getBuild(builder.name(), int(build_number))
+        except xmlrpclib.Fault, err:
+            build_url = Build.build_url(builder, build_number)
+            _log.error("Error fetching data for %s build %s (%s): %s" % (builder.name(), build_number, build_url, err))
+            return None
+
+    def _fetch_one_box_per_builder(self):
+        build_status_url = "http://%s/one_box_per_builder" % self.buildbot_host
+        return urllib2.urlopen(build_status_url)
+
+    def _parse_twisted_file_row(self, file_row):
+        string_or_empty = lambda soup: unicode(soup.string) if soup.string else u""
+        file_cells = file_row.findAll('td')
+        return {
+            "filename": string_or_empty(file_cells[0].find("a")),
+            "size": string_or_empty(file_cells[1]),
+            "type": string_or_empty(file_cells[2]),
+            "encoding": string_or_empty(file_cells[3]),
+        }
+
+    def _parse_twisted_directory_listing(self, page):
+        soup = BeautifulSoup(page)
+        # HACK: Match only table rows with a class to ignore twisted header/footer rows.
+        file_rows = soup.find('table').findAll('tr', { "class" : True })
+        return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
+
+    # FIXME: There should be a better way to get this information directly from twisted.
+    def _fetch_twisted_directory_listing(self, url):
+        return self._parse_twisted_directory_listing(urllib2.urlopen(url))
+
+    def builders(self):
+        return [self.builder_with_name(status["name"]) for status in self.builder_statuses()]
+
+    # This method pulls from /one_box_per_builder as an efficient way to get information about
+    def builder_statuses(self):
+        soup = BeautifulSoup(self._fetch_one_box_per_builder())
+        return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')]
+
+    def core_builder_statuses(self):
+        return [builder for builder in self.builder_statuses() if self._is_core_builder(builder["name"])]
+
+    def builder_with_name(self, name):
+        builder = self._builder_by_name.get(name)
+        if not builder:
+            builder = Builder(name, self)
+            self._builder_by_name[name] = builder
+        return builder
+
+    def revisions_causing_failures(self, only_core_builders=True):
+        builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
+        revision_to_failing_bots = {}
+        for builder_status in builder_statuses:
+            if builder_status["is_green"]:
+                continue
+            builder = self.builder_with_name(builder_status["name"])
+            revisions = builder.blameworthy_revisions(builder_status["build_number"])
+            for revision in revisions:
+                failing_bots = revision_to_failing_bots.get(revision, [])
+                failing_bots.append(builder)
+                revision_to_failing_bots[revision] = failing_bots
+        return revision_to_failing_bots
+
+    # This makes fewer requests than calling Builder.latest_build would.  It grabs all builder
+    # statuses in one request using self.builder_statuses (fetching /one_box_per_builder instead of builder pages).
+    def _latest_builds_from_builders(self, only_core_builders=True):
+        builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
+        return [self.builder_with_name(status["name"]).build(status["build_number"]) for status in builder_statuses]
+
+    def _build_at_or_before_revision(self, build, revision):
+        while build:
+            if build.revision() <= revision:
+                return build
+            build = build.previous_build()
+
+    def last_green_revision(self, only_core_builders=True):
+        builds = self._latest_builds_from_builders(only_core_builders)
+        target_revision = builds[0].revision()
+        # An alternate way to do this would be to start at one revision and walk backwards
+        # checking builder.build_for_revision, however build_for_revision is very slow on first load.
+        while True:
+            # Make builds agree on revision
+            builds = [self._build_at_or_before_revision(build, target_revision) for build in builds]
+            if None in builds: # One of the builds failed to load from the server.
+                return None
+            min_revision = min(map(lambda build: build.revision(), builds))
+            if min_revision != target_revision:
+                target_revision = min_revision
+                continue # Builds don't all agree on revision, keep searching
+            # Check to make sure they're all green
+            all_are_green = reduce(operator.and_, map(lambda build: build.is_green(), builds))
+            if not all_are_green:
+                target_revision -= 1
+                continue
+            return min_revision