diff -r 000000000000 -r ae805ac0140d python-2.5.2/win32/Lib/test/test_htmlparser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/python-2.5.2/win32/Lib/test/test_htmlparser.py Fri Apr 03 17:19:34 2009 +0100
@@ -0,0 +1,318 @@
+"""Tests for HTMLParser.py."""
+
+import HTMLParser
+import pprint
+import sys
+import unittest
+from test import test_support
+
+
+class EventCollector(HTMLParser.HTMLParser):
+
+ def __init__(self):
+ self.events = []
+ self.append = self.events.append
+ HTMLParser.HTMLParser.__init__(self)
+
+ def get_events(self):
+ # Normalize the list of events so that buffer artefacts don't
+ # separate runs of contiguous characters.
+ L = []
+ prevtype = None
+ for event in self.events:
+ type = event[0]
+ if type == prevtype == "data":
+ L[-1] = ("data", L[-1][1] + event[1])
+ else:
+ L.append(event)
+ prevtype = type
+ self.events = L
+ return L
+
+ # structure markup
+
+ def handle_starttag(self, tag, attrs):
+ self.append(("starttag", tag, attrs))
+
+ def handle_startendtag(self, tag, attrs):
+ self.append(("startendtag", tag, attrs))
+
+ def handle_endtag(self, tag):
+ self.append(("endtag", tag))
+
+ # all other markup
+
+ def handle_comment(self, data):
+ self.append(("comment", data))
+
+ def handle_charref(self, data):
+ self.append(("charref", data))
+
+ def handle_data(self, data):
+ self.append(("data", data))
+
+ def handle_decl(self, data):
+ self.append(("decl", data))
+
+ def handle_entityref(self, data):
+ self.append(("entityref", data))
+
+ def handle_pi(self, data):
+ self.append(("pi", data))
+
+ def unknown_decl(self, decl):
+ self.append(("unknown decl", decl))
+
+
+class EventCollectorExtra(EventCollector):
+
+ def handle_starttag(self, tag, attrs):
+ EventCollector.handle_starttag(self, tag, attrs)
+ self.append(("starttag_text", self.get_starttag_text()))
+
+
+class TestCaseBase(unittest.TestCase):
+
+ def _run_check(self, source, expected_events, collector=EventCollector):
+ parser = collector()
+ for s in source:
+ parser.feed(s)
+ parser.close()
+ events = parser.get_events()
+ if events != expected_events:
+ self.fail("received events did not match expected events\n"
+ "Expected:\n" + pprint.pformat(expected_events) +
+ "\nReceived:\n" + pprint.pformat(events))
+
+ def _run_check_extra(self, source, events):
+ self._run_check(source, events, EventCollectorExtra)
+
+ def _parse_error(self, source):
+ def parse(source=source):
+ parser = HTMLParser.HTMLParser()
+ parser.feed(source)
+ parser.close()
+ self.assertRaises(HTMLParser.HTMLParseError, parse)
+
+
+class HTMLParserTestCase(TestCaseBase):
+
+ def test_processing_instruction_only(self):
+ self._run_check("", [
+ ("pi", "processing instruction"),
+ ])
+ self._run_check("", [
+ ("pi", "processing instruction ?"),
+ ])
+
+ def test_simple_html(self):
+ self._run_check("""
+
+&entity;
+
+sample
+text
+“
+
+
+""", [
+ ("data", "\n"),
+ ("decl", "DOCTYPE html PUBLIC 'foo'"),
+ ("data", "\n"),
+ ("starttag", "html", []),
+ ("entityref", "entity"),
+ ("charref", "32"),
+ ("data", "\n"),
+ ("comment", "comment1a\n->