|
1 """HTTP server base class. |
|
2 |
|
3 Note: the class in this module doesn't implement any HTTP request; see |
|
4 SimpleHTTPServer for simple implementations of GET, HEAD and POST |
|
5 (including CGI scripts). It does, however, optionally implement HTTP/1.1 |
|
6 persistent connections, as of version 0.3. |
|
7 |
|
8 Contents: |
|
9 |
|
10 - BaseHTTPRequestHandler: HTTP request handler base class |
|
11 - test: test function |
|
12 |
|
13 XXX To do: |
|
14 |
|
15 - log requests even later (to capture byte count) |
|
16 - log user-agent header and other interesting goodies |
|
17 - send error log to separate file |
|
18 """ |
|
19 |
|
20 |
|
21 # See also: |
|
22 # |
|
23 # HTTP Working Group T. Berners-Lee |
|
24 # INTERNET-DRAFT R. T. Fielding |
|
25 # <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen |
|
26 # Expires September 8, 1995 March 8, 1995 |
|
27 # |
|
28 # URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt |
|
29 # |
|
30 # and |
|
31 # |
|
32 # Network Working Group R. Fielding |
|
33 # Request for Comments: 2616 et al |
|
34 # Obsoletes: 2068 June 1999 |
|
35 # Category: Standards Track |
|
36 # |
|
37 # URL: http://www.faqs.org/rfcs/rfc2616.html |
|
38 |
|
39 # Log files |
|
40 # --------- |
|
41 # |
|
42 # Here's a quote from the NCSA httpd docs about log file format. |
|
43 # |
|
44 # | The logfile format is as follows. Each line consists of: |
|
45 # | |
|
46 # | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb |
|
47 # | |
|
48 # | host: Either the DNS name or the IP number of the remote client |
|
49 # | rfc931: Any information returned by identd for this person, |
|
50 # | - otherwise. |
|
51 # | authuser: If user sent a userid for authentication, the user name, |
|
52 # | - otherwise. |
|
53 # | DD: Day |
|
54 # | Mon: Month (calendar name) |
|
55 # | YYYY: Year |
|
56 # | hh: hour (24-hour format, the machine's timezone) |
|
57 # | mm: minutes |
|
58 # | ss: seconds |
|
59 # | request: The first line of the HTTP request as sent by the client. |
|
60 # | ddd: the status code returned by the server, - if not available. |
|
61 # | bbbb: the total number of bytes sent, |
|
62 # | *not including the HTTP/1.0 header*, - if not available |
|
63 # | |
|
64 # | You can determine the name of the file accessed through request. |
|
65 # |
|
66 # (Actually, the latter is only true if you know the server configuration |
|
67 # at the time the request was made!) |
|
68 |
|
69 __version__ = "0.3" |
|
70 |
|
71 __all__ = ["HTTPServer", "BaseHTTPRequestHandler"] |
|
72 |
|
73 import sys |
|
74 import time |
|
75 import socket # For gethostbyaddr() |
|
76 import mimetools |
|
77 import SocketServer |
|
78 |
|
79 # Default error message |
|
80 DEFAULT_ERROR_MESSAGE = """\ |
|
81 <head> |
|
82 <title>Error response</title> |
|
83 </head> |
|
84 <body> |
|
85 <h1>Error response</h1> |
|
86 <p>Error code %(code)d. |
|
87 <p>Message: %(message)s. |
|
88 <p>Error code explanation: %(code)s = %(explain)s. |
|
89 </body> |
|
90 """ |
|
91 |
|
92 def _quote_html(html): |
|
93 return html.replace("&", "&").replace("<", "<").replace(">", ">") |
|
94 |
|
95 class HTTPServer(SocketServer.TCPServer): |
|
96 |
|
97 allow_reuse_address = 1 # Seems to make sense in testing environment |
|
98 |
|
99 def server_bind(self): |
|
100 """Override server_bind to store the server name.""" |
|
101 SocketServer.TCPServer.server_bind(self) |
|
102 host, port = self.socket.getsockname()[:2] |
|
103 self.server_name = socket.getfqdn(host) |
|
104 self.server_port = port |
|
105 |
|
106 |
|
107 class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler): |
|
108 |
|
109 """HTTP request handler base class. |
|
110 |
|
111 The following explanation of HTTP serves to guide you through the |
|
112 code as well as to expose any misunderstandings I may have about |
|
113 HTTP (so you don't need to read the code to figure out I'm wrong |
|
114 :-). |
|
115 |
|
116 HTTP (HyperText Transfer Protocol) is an extensible protocol on |
|
117 top of a reliable stream transport (e.g. TCP/IP). The protocol |
|
118 recognizes three parts to a request: |
|
119 |
|
120 1. One line identifying the request type and path |
|
121 2. An optional set of RFC-822-style headers |
|
122 3. An optional data part |
|
123 |
|
124 The headers and data are separated by a blank line. |
|
125 |
|
126 The first line of the request has the form |
|
127 |
|
128 <command> <path> <version> |
|
129 |
|
130 where <command> is a (case-sensitive) keyword such as GET or POST, |
|
131 <path> is a string containing path information for the request, |
|
132 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". |
|
133 <path> is encoded using the URL encoding scheme (using %xx to signify |
|
134 the ASCII character with hex code xx). |
|
135 |
|
136 The specification specifies that lines are separated by CRLF but |
|
137 for compatibility with the widest range of clients recommends |
|
138 servers also handle LF. Similarly, whitespace in the request line |
|
139 is treated sensibly (allowing multiple spaces between components |
|
140 and allowing trailing whitespace). |
|
141 |
|
142 Similarly, for output, lines ought to be separated by CRLF pairs |
|
143 but most clients grok LF characters just fine. |
|
144 |
|
145 If the first line of the request has the form |
|
146 |
|
147 <command> <path> |
|
148 |
|
149 (i.e. <version> is left out) then this is assumed to be an HTTP |
|
150 0.9 request; this form has no optional headers and data part and |
|
151 the reply consists of just the data. |
|
152 |
|
153 The reply form of the HTTP 1.x protocol again has three parts: |
|
154 |
|
155 1. One line giving the response code |
|
156 2. An optional set of RFC-822-style headers |
|
157 3. The data |
|
158 |
|
159 Again, the headers and data are separated by a blank line. |
|
160 |
|
161 The response code line has the form |
|
162 |
|
163 <version> <responsecode> <responsestring> |
|
164 |
|
165 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), |
|
166 <responsecode> is a 3-digit response code indicating success or |
|
167 failure of the request, and <responsestring> is an optional |
|
168 human-readable string explaining what the response code means. |
|
169 |
|
170 This server parses the request and the headers, and then calls a |
|
171 function specific to the request type (<command>). Specifically, |
|
172 a request SPAM will be handled by a method do_SPAM(). If no |
|
173 such method exists the server sends an error response to the |
|
174 client. If it exists, it is called with no arguments: |
|
175 |
|
176 do_SPAM() |
|
177 |
|
178 Note that the request name is case sensitive (i.e. SPAM and spam |
|
179 are different requests). |
|
180 |
|
181 The various request details are stored in instance variables: |
|
182 |
|
183 - client_address is the client IP address in the form (host, |
|
184 port); |
|
185 |
|
186 - command, path and version are the broken-down request line; |
|
187 |
|
188 - headers is an instance of mimetools.Message (or a derived |
|
189 class) containing the header information; |
|
190 |
|
191 - rfile is a file object open for reading positioned at the |
|
192 start of the optional input data part; |
|
193 |
|
194 - wfile is a file object open for writing. |
|
195 |
|
196 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! |
|
197 |
|
198 The first thing to be written must be the response line. Then |
|
199 follow 0 or more header lines, then a blank line, and then the |
|
200 actual data (if any). The meaning of the header lines depends on |
|
201 the command executed by the server; in most cases, when data is |
|
202 returned, there should be at least one header line of the form |
|
203 |
|
204 Content-type: <type>/<subtype> |
|
205 |
|
206 where <type> and <subtype> should be registered MIME types, |
|
207 e.g. "text/html" or "text/plain". |
|
208 |
|
209 """ |
|
210 |
|
211 # The Python system version, truncated to its first component. |
|
212 sys_version = "Python/" + sys.version.split()[0] |
|
213 |
|
214 # The server software version. You may want to override this. |
|
215 # The format is multiple whitespace-separated strings, |
|
216 # where each string is of the form name[/version]. |
|
217 server_version = "BaseHTTP/" + __version__ |
|
218 |
|
219 def parse_request(self): |
|
220 """Parse a request (internal). |
|
221 |
|
222 The request should be stored in self.raw_requestline; the results |
|
223 are in self.command, self.path, self.request_version and |
|
224 self.headers. |
|
225 |
|
226 Return True for success, False for failure; on failure, an |
|
227 error is sent back. |
|
228 |
|
229 """ |
|
230 self.command = None # set in case of error on the first line |
|
231 self.request_version = version = "HTTP/0.9" # Default |
|
232 self.close_connection = 1 |
|
233 requestline = self.raw_requestline |
|
234 if requestline[-2:] == '\r\n': |
|
235 requestline = requestline[:-2] |
|
236 elif requestline[-1:] == '\n': |
|
237 requestline = requestline[:-1] |
|
238 self.requestline = requestline |
|
239 words = requestline.split() |
|
240 if len(words) == 3: |
|
241 [command, path, version] = words |
|
242 if version[:5] != 'HTTP/': |
|
243 self.send_error(400, "Bad request version (%r)" % version) |
|
244 return False |
|
245 try: |
|
246 base_version_number = version.split('/', 1)[1] |
|
247 version_number = base_version_number.split(".") |
|
248 # RFC 2145 section 3.1 says there can be only one "." and |
|
249 # - major and minor numbers MUST be treated as |
|
250 # separate integers; |
|
251 # - HTTP/2.4 is a lower version than HTTP/2.13, which in |
|
252 # turn is lower than HTTP/12.3; |
|
253 # - Leading zeros MUST be ignored by recipients. |
|
254 if len(version_number) != 2: |
|
255 raise ValueError |
|
256 version_number = int(version_number[0]), int(version_number[1]) |
|
257 except (ValueError, IndexError): |
|
258 self.send_error(400, "Bad request version (%r)" % version) |
|
259 return False |
|
260 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": |
|
261 self.close_connection = 0 |
|
262 if version_number >= (2, 0): |
|
263 self.send_error(505, |
|
264 "Invalid HTTP Version (%s)" % base_version_number) |
|
265 return False |
|
266 elif len(words) == 2: |
|
267 [command, path] = words |
|
268 self.close_connection = 1 |
|
269 if command != 'GET': |
|
270 self.send_error(400, |
|
271 "Bad HTTP/0.9 request type (%r)" % command) |
|
272 return False |
|
273 elif not words: |
|
274 return False |
|
275 else: |
|
276 self.send_error(400, "Bad request syntax (%r)" % requestline) |
|
277 return False |
|
278 self.command, self.path, self.request_version = command, path, version |
|
279 |
|
280 # Examine the headers and look for a Connection directive |
|
281 self.headers = self.MessageClass(self.rfile, 0) |
|
282 |
|
283 conntype = self.headers.get('Connection', "") |
|
284 if conntype.lower() == 'close': |
|
285 self.close_connection = 1 |
|
286 elif (conntype.lower() == 'keep-alive' and |
|
287 self.protocol_version >= "HTTP/1.1"): |
|
288 self.close_connection = 0 |
|
289 return True |
|
290 |
|
291 def handle_one_request(self): |
|
292 """Handle a single HTTP request. |
|
293 |
|
294 You normally don't need to override this method; see the class |
|
295 __doc__ string for information on how to handle specific HTTP |
|
296 commands such as GET and POST. |
|
297 |
|
298 """ |
|
299 self.raw_requestline = self.rfile.readline() |
|
300 if not self.raw_requestline: |
|
301 self.close_connection = 1 |
|
302 return |
|
303 if not self.parse_request(): # An error code has been sent, just exit |
|
304 return |
|
305 mname = 'do_' + self.command |
|
306 if not hasattr(self, mname): |
|
307 self.send_error(501, "Unsupported method (%r)" % self.command) |
|
308 return |
|
309 method = getattr(self, mname) |
|
310 method() |
|
311 |
|
312 def handle(self): |
|
313 """Handle multiple requests if necessary.""" |
|
314 self.close_connection = 1 |
|
315 |
|
316 self.handle_one_request() |
|
317 while not self.close_connection: |
|
318 self.handle_one_request() |
|
319 |
|
320 def send_error(self, code, message=None): |
|
321 """Send and log an error reply. |
|
322 |
|
323 Arguments are the error code, and a detailed message. |
|
324 The detailed message defaults to the short entry matching the |
|
325 response code. |
|
326 |
|
327 This sends an error response (so it must be called before any |
|
328 output has been generated), logs the error, and finally sends |
|
329 a piece of HTML explaining the error to the user. |
|
330 |
|
331 """ |
|
332 |
|
333 try: |
|
334 short, long = self.responses[code] |
|
335 except KeyError: |
|
336 short, long = '???', '???' |
|
337 if message is None: |
|
338 message = short |
|
339 explain = long |
|
340 self.log_error("code %d, message %s", code, message) |
|
341 # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201) |
|
342 content = (self.error_message_format % |
|
343 {'code': code, 'message': _quote_html(message), 'explain': explain}) |
|
344 self.send_response(code, message) |
|
345 self.send_header("Content-Type", "text/html") |
|
346 self.send_header('Connection', 'close') |
|
347 self.end_headers() |
|
348 if self.command != 'HEAD' and code >= 200 and code not in (204, 304): |
|
349 self.wfile.write(content) |
|
350 |
|
351 error_message_format = DEFAULT_ERROR_MESSAGE |
|
352 |
|
353 def send_response(self, code, message=None): |
|
354 """Send the response header and log the response code. |
|
355 |
|
356 Also send two standard headers with the server software |
|
357 version and the current date. |
|
358 |
|
359 """ |
|
360 self.log_request(code) |
|
361 if message is None: |
|
362 if code in self.responses: |
|
363 message = self.responses[code][0] |
|
364 else: |
|
365 message = '' |
|
366 if self.request_version != 'HTTP/0.9': |
|
367 self.wfile.write("%s %d %s\r\n" % |
|
368 (self.protocol_version, code, message)) |
|
369 # print (self.protocol_version, code, message) |
|
370 self.send_header('Server', self.version_string()) |
|
371 self.send_header('Date', self.date_time_string()) |
|
372 |
|
373 def send_header(self, keyword, value): |
|
374 """Send a MIME header.""" |
|
375 if self.request_version != 'HTTP/0.9': |
|
376 self.wfile.write("%s: %s\r\n" % (keyword, value)) |
|
377 |
|
378 if keyword.lower() == 'connection': |
|
379 if value.lower() == 'close': |
|
380 self.close_connection = 1 |
|
381 elif value.lower() == 'keep-alive': |
|
382 self.close_connection = 0 |
|
383 |
|
384 def end_headers(self): |
|
385 """Send the blank line ending the MIME headers.""" |
|
386 if self.request_version != 'HTTP/0.9': |
|
387 self.wfile.write("\r\n") |
|
388 |
|
389 def log_request(self, code='-', size='-'): |
|
390 """Log an accepted request. |
|
391 |
|
392 This is called by send_response(). |
|
393 |
|
394 """ |
|
395 |
|
396 self.log_message('"%s" %s %s', |
|
397 self.requestline, str(code), str(size)) |
|
398 |
|
399 def log_error(self, *args): |
|
400 """Log an error. |
|
401 |
|
402 This is called when a request cannot be fulfilled. By |
|
403 default it passes the message on to log_message(). |
|
404 |
|
405 Arguments are the same as for log_message(). |
|
406 |
|
407 XXX This should go to the separate error log. |
|
408 |
|
409 """ |
|
410 |
|
411 self.log_message(*args) |
|
412 |
|
413 def log_message(self, format, *args): |
|
414 """Log an arbitrary message. |
|
415 |
|
416 This is used by all other logging functions. Override |
|
417 it if you have specific logging wishes. |
|
418 |
|
419 The first argument, FORMAT, is a format string for the |
|
420 message to be logged. If the format string contains |
|
421 any % escapes requiring parameters, they should be |
|
422 specified as subsequent arguments (it's just like |
|
423 printf!). |
|
424 |
|
425 The client host and current date/time are prefixed to |
|
426 every message. |
|
427 |
|
428 """ |
|
429 |
|
430 sys.stderr.write("%s - - [%s] %s\n" % |
|
431 (self.address_string(), |
|
432 self.log_date_time_string(), |
|
433 format%args)) |
|
434 |
|
435 def version_string(self): |
|
436 """Return the server software version string.""" |
|
437 return self.server_version + ' ' + self.sys_version |
|
438 |
|
439 def date_time_string(self, timestamp=None): |
|
440 """Return the current date and time formatted for a message header.""" |
|
441 if timestamp is None: |
|
442 timestamp = time.time() |
|
443 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) |
|
444 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( |
|
445 self.weekdayname[wd], |
|
446 day, self.monthname[month], year, |
|
447 hh, mm, ss) |
|
448 return s |
|
449 |
|
450 def log_date_time_string(self): |
|
451 """Return the current time formatted for logging.""" |
|
452 now = time.time() |
|
453 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) |
|
454 s = "%02d/%3s/%04d %02d:%02d:%02d" % ( |
|
455 day, self.monthname[month], year, hh, mm, ss) |
|
456 return s |
|
457 |
|
458 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] |
|
459 |
|
460 monthname = [None, |
|
461 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', |
|
462 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] |
|
463 |
|
464 def address_string(self): |
|
465 """Return the client address formatted for logging. |
|
466 |
|
467 This version looks up the full hostname using gethostbyaddr(), |
|
468 and tries to find a name that contains at least one dot. |
|
469 |
|
470 """ |
|
471 |
|
472 host, port = self.client_address[:2] |
|
473 return socket.getfqdn(host) |
|
474 |
|
475 # Essentially static class variables |
|
476 |
|
477 # The version of the HTTP protocol we support. |
|
478 # Set this to HTTP/1.1 to enable automatic keepalive |
|
479 protocol_version = "HTTP/1.0" |
|
480 |
|
481 # The Message-like class used to parse headers |
|
482 MessageClass = mimetools.Message |
|
483 |
|
484 # Table mapping response codes to messages; entries have the |
|
485 # form {code: (shortmessage, longmessage)}. |
|
486 # See RFC 2616. |
|
487 responses = { |
|
488 100: ('Continue', 'Request received, please continue'), |
|
489 101: ('Switching Protocols', |
|
490 'Switching to new protocol; obey Upgrade header'), |
|
491 |
|
492 200: ('OK', 'Request fulfilled, document follows'), |
|
493 201: ('Created', 'Document created, URL follows'), |
|
494 202: ('Accepted', |
|
495 'Request accepted, processing continues off-line'), |
|
496 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), |
|
497 204: ('No Content', 'Request fulfilled, nothing follows'), |
|
498 205: ('Reset Content', 'Clear input form for further input.'), |
|
499 206: ('Partial Content', 'Partial content follows.'), |
|
500 |
|
501 300: ('Multiple Choices', |
|
502 'Object has several resources -- see URI list'), |
|
503 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), |
|
504 302: ('Found', 'Object moved temporarily -- see URI list'), |
|
505 303: ('See Other', 'Object moved -- see Method and URL list'), |
|
506 304: ('Not Modified', |
|
507 'Document has not changed since given time'), |
|
508 305: ('Use Proxy', |
|
509 'You must use proxy specified in Location to access this ' |
|
510 'resource.'), |
|
511 307: ('Temporary Redirect', |
|
512 'Object moved temporarily -- see URI list'), |
|
513 |
|
514 400: ('Bad Request', |
|
515 'Bad request syntax or unsupported method'), |
|
516 401: ('Unauthorized', |
|
517 'No permission -- see authorization schemes'), |
|
518 402: ('Payment Required', |
|
519 'No payment -- see charging schemes'), |
|
520 403: ('Forbidden', |
|
521 'Request forbidden -- authorization will not help'), |
|
522 404: ('Not Found', 'Nothing matches the given URI'), |
|
523 405: ('Method Not Allowed', |
|
524 'Specified method is invalid for this server.'), |
|
525 406: ('Not Acceptable', 'URI not available in preferred format.'), |
|
526 407: ('Proxy Authentication Required', 'You must authenticate with ' |
|
527 'this proxy before proceeding.'), |
|
528 408: ('Request Timeout', 'Request timed out; try again later.'), |
|
529 409: ('Conflict', 'Request conflict.'), |
|
530 410: ('Gone', |
|
531 'URI no longer exists and has been permanently removed.'), |
|
532 411: ('Length Required', 'Client must specify Content-Length.'), |
|
533 412: ('Precondition Failed', 'Precondition in headers is false.'), |
|
534 413: ('Request Entity Too Large', 'Entity is too large.'), |
|
535 414: ('Request-URI Too Long', 'URI is too long.'), |
|
536 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), |
|
537 416: ('Requested Range Not Satisfiable', |
|
538 'Cannot satisfy request range.'), |
|
539 417: ('Expectation Failed', |
|
540 'Expect condition could not be satisfied.'), |
|
541 |
|
542 500: ('Internal Server Error', 'Server got itself in trouble'), |
|
543 501: ('Not Implemented', |
|
544 'Server does not support this operation'), |
|
545 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), |
|
546 503: ('Service Unavailable', |
|
547 'The server cannot process the request due to a high load'), |
|
548 504: ('Gateway Timeout', |
|
549 'The gateway server did not receive a timely response'), |
|
550 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), |
|
551 } |
|
552 |
|
553 |
|
554 def test(HandlerClass = BaseHTTPRequestHandler, |
|
555 ServerClass = HTTPServer, protocol="HTTP/1.0"): |
|
556 """Test the HTTP request handler class. |
|
557 |
|
558 This runs an HTTP server on port 8000 (or the first command line |
|
559 argument). |
|
560 |
|
561 """ |
|
562 |
|
563 if sys.argv[1:]: |
|
564 port = int(sys.argv[1]) |
|
565 else: |
|
566 port = 8000 |
|
567 server_address = ('', port) |
|
568 |
|
569 HandlerClass.protocol_version = protocol |
|
570 httpd = ServerClass(server_address, HandlerClass) |
|
571 |
|
572 sa = httpd.socket.getsockname() |
|
573 print "Serving HTTP on", sa[0], "port", sa[1], "..." |
|
574 httpd.serve_forever() |
|
575 |
|
576 |
|
577 if __name__ == '__main__': |
|
578 test() |