|
1 /* implements the string, long, and float formatters. that is, |
|
2 string.__format__, etc. */ |
|
3 |
|
4 /* Before including this, you must include either: |
|
5 stringlib/unicodedefs.h |
|
6 stringlib/stringdefs.h |
|
7 |
|
8 Also, you should define the names: |
|
9 FORMAT_STRING |
|
10 FORMAT_LONG |
|
11 FORMAT_FLOAT |
|
12 to be whatever you want the public names of these functions to |
|
13 be. These are the only non-static functions defined here. |
|
14 */ |
|
15 |
|
16 #define ALLOW_PARENS_FOR_SIGN 0 |
|
17 |
|
18 /* |
|
19 get_integer consumes 0 or more decimal digit characters from an |
|
20 input string, updates *result with the corresponding positive |
|
21 integer, and returns the number of digits consumed. |
|
22 |
|
23 returns -1 on error. |
|
24 */ |
|
25 static int |
|
26 get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end, |
|
27 Py_ssize_t *result) |
|
28 { |
|
29 Py_ssize_t accumulator, digitval, oldaccumulator; |
|
30 int numdigits; |
|
31 accumulator = numdigits = 0; |
|
32 for (;;(*ptr)++, numdigits++) { |
|
33 if (*ptr >= end) |
|
34 break; |
|
35 digitval = STRINGLIB_TODECIMAL(**ptr); |
|
36 if (digitval < 0) |
|
37 break; |
|
38 /* |
|
39 This trick was copied from old Unicode format code. It's cute, |
|
40 but would really suck on an old machine with a slow divide |
|
41 implementation. Fortunately, in the normal case we do not |
|
42 expect too many digits. |
|
43 */ |
|
44 oldaccumulator = accumulator; |
|
45 accumulator *= 10; |
|
46 if ((accumulator+10)/10 != oldaccumulator+1) { |
|
47 PyErr_Format(PyExc_ValueError, |
|
48 "Too many decimal digits in format string"); |
|
49 return -1; |
|
50 } |
|
51 accumulator += digitval; |
|
52 } |
|
53 *result = accumulator; |
|
54 return numdigits; |
|
55 } |
|
56 |
|
57 /************************************************************************/ |
|
58 /*********** standard format specifier parsing **************************/ |
|
59 /************************************************************************/ |
|
60 |
|
61 /* returns true if this character is a specifier alignment token */ |
|
62 Py_LOCAL_INLINE(int) |
|
63 is_alignment_token(STRINGLIB_CHAR c) |
|
64 { |
|
65 switch (c) { |
|
66 case '<': case '>': case '=': case '^': |
|
67 return 1; |
|
68 default: |
|
69 return 0; |
|
70 } |
|
71 } |
|
72 |
|
73 /* returns true if this character is a sign element */ |
|
74 Py_LOCAL_INLINE(int) |
|
75 is_sign_element(STRINGLIB_CHAR c) |
|
76 { |
|
77 switch (c) { |
|
78 case ' ': case '+': case '-': |
|
79 #if ALLOW_PARENS_FOR_SIGN |
|
80 case '(': |
|
81 #endif |
|
82 return 1; |
|
83 default: |
|
84 return 0; |
|
85 } |
|
86 } |
|
87 |
|
88 |
|
89 typedef struct { |
|
90 STRINGLIB_CHAR fill_char; |
|
91 STRINGLIB_CHAR align; |
|
92 int alternate; |
|
93 STRINGLIB_CHAR sign; |
|
94 Py_ssize_t width; |
|
95 Py_ssize_t precision; |
|
96 STRINGLIB_CHAR type; |
|
97 } InternalFormatSpec; |
|
98 |
|
99 /* |
|
100 ptr points to the start of the format_spec, end points just past its end. |
|
101 fills in format with the parsed information. |
|
102 returns 1 on success, 0 on failure. |
|
103 if failure, sets the exception |
|
104 */ |
|
105 static int |
|
106 parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, |
|
107 Py_ssize_t format_spec_len, |
|
108 InternalFormatSpec *format, |
|
109 char default_type) |
|
110 { |
|
111 STRINGLIB_CHAR *ptr = format_spec; |
|
112 STRINGLIB_CHAR *end = format_spec + format_spec_len; |
|
113 |
|
114 /* end-ptr is used throughout this code to specify the length of |
|
115 the input string */ |
|
116 |
|
117 Py_ssize_t specified_width; |
|
118 |
|
119 format->fill_char = '\0'; |
|
120 format->align = '\0'; |
|
121 format->alternate = 0; |
|
122 format->sign = '\0'; |
|
123 format->width = -1; |
|
124 format->precision = -1; |
|
125 format->type = default_type; |
|
126 |
|
127 /* If the second char is an alignment token, |
|
128 then parse the fill char */ |
|
129 if (end-ptr >= 2 && is_alignment_token(ptr[1])) { |
|
130 format->align = ptr[1]; |
|
131 format->fill_char = ptr[0]; |
|
132 ptr += 2; |
|
133 } |
|
134 else if (end-ptr >= 1 && is_alignment_token(ptr[0])) { |
|
135 format->align = ptr[0]; |
|
136 ++ptr; |
|
137 } |
|
138 |
|
139 /* Parse the various sign options */ |
|
140 if (end-ptr >= 1 && is_sign_element(ptr[0])) { |
|
141 format->sign = ptr[0]; |
|
142 ++ptr; |
|
143 #if ALLOW_PARENS_FOR_SIGN |
|
144 if (end-ptr >= 1 && ptr[0] == ')') { |
|
145 ++ptr; |
|
146 } |
|
147 #endif |
|
148 } |
|
149 |
|
150 /* If the next character is #, we're in alternate mode. This only |
|
151 applies to integers. */ |
|
152 if (end-ptr >= 1 && ptr[0] == '#') { |
|
153 format->alternate = 1; |
|
154 ++ptr; |
|
155 } |
|
156 |
|
157 /* The special case for 0-padding (backwards compat) */ |
|
158 if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') { |
|
159 format->fill_char = '0'; |
|
160 if (format->align == '\0') { |
|
161 format->align = '='; |
|
162 } |
|
163 ++ptr; |
|
164 } |
|
165 |
|
166 /* XXX add error checking */ |
|
167 specified_width = get_integer(&ptr, end, &format->width); |
|
168 |
|
169 /* if specified_width is 0, we didn't consume any characters for |
|
170 the width. in that case, reset the width to -1, because |
|
171 get_integer() will have set it to zero */ |
|
172 if (specified_width == 0) { |
|
173 format->width = -1; |
|
174 } |
|
175 |
|
176 /* Parse field precision */ |
|
177 if (end-ptr && ptr[0] == '.') { |
|
178 ++ptr; |
|
179 |
|
180 /* XXX add error checking */ |
|
181 specified_width = get_integer(&ptr, end, &format->precision); |
|
182 |
|
183 /* not having a precision after a dot is an error */ |
|
184 if (specified_width == 0) { |
|
185 PyErr_Format(PyExc_ValueError, |
|
186 "Format specifier missing precision"); |
|
187 return 0; |
|
188 } |
|
189 |
|
190 } |
|
191 |
|
192 /* Finally, parse the type field */ |
|
193 |
|
194 if (end-ptr > 1) { |
|
195 /* invalid conversion spec */ |
|
196 PyErr_Format(PyExc_ValueError, "Invalid conversion specification"); |
|
197 return 0; |
|
198 } |
|
199 |
|
200 if (end-ptr == 1) { |
|
201 format->type = ptr[0]; |
|
202 ++ptr; |
|
203 } |
|
204 |
|
205 return 1; |
|
206 } |
|
207 |
|
208 #if defined FORMAT_FLOAT || defined FORMAT_LONG |
|
209 /************************************************************************/ |
|
210 /*********** common routines for numeric formatting *********************/ |
|
211 /************************************************************************/ |
|
212 |
|
213 /* describes the layout for an integer, see the comment in |
|
214 calc_number_widths() for details */ |
|
215 typedef struct { |
|
216 Py_ssize_t n_lpadding; |
|
217 Py_ssize_t n_prefix; |
|
218 Py_ssize_t n_spadding; |
|
219 Py_ssize_t n_rpadding; |
|
220 char lsign; |
|
221 Py_ssize_t n_lsign; |
|
222 char rsign; |
|
223 Py_ssize_t n_rsign; |
|
224 Py_ssize_t n_total; /* just a convenience, it's derivable from the |
|
225 other fields */ |
|
226 } NumberFieldWidths; |
|
227 |
|
228 /* not all fields of format are used. for example, precision is |
|
229 unused. should this take discrete params in order to be more clear |
|
230 about what it does? or is passing a single format parameter easier |
|
231 and more efficient enough to justify a little obfuscation? */ |
|
232 static void |
|
233 calc_number_widths(NumberFieldWidths *spec, STRINGLIB_CHAR actual_sign, |
|
234 Py_ssize_t n_prefix, Py_ssize_t n_digits, |
|
235 const InternalFormatSpec *format) |
|
236 { |
|
237 spec->n_lpadding = 0; |
|
238 spec->n_prefix = 0; |
|
239 spec->n_spadding = 0; |
|
240 spec->n_rpadding = 0; |
|
241 spec->lsign = '\0'; |
|
242 spec->n_lsign = 0; |
|
243 spec->rsign = '\0'; |
|
244 spec->n_rsign = 0; |
|
245 |
|
246 /* the output will look like: |
|
247 | | |
|
248 | <lpadding> <lsign> <prefix> <spadding> <digits> <rsign> <rpadding> | |
|
249 | | |
|
250 |
|
251 lsign and rsign are computed from format->sign and the actual |
|
252 sign of the number |
|
253 |
|
254 prefix is given (it's for the '0x' prefix) |
|
255 |
|
256 digits is already known |
|
257 |
|
258 the total width is either given, or computed from the |
|
259 actual digits |
|
260 |
|
261 only one of lpadding, spadding, and rpadding can be non-zero, |
|
262 and it's calculated from the width and other fields |
|
263 */ |
|
264 |
|
265 /* compute the various parts we're going to write */ |
|
266 if (format->sign == '+') { |
|
267 /* always put a + or - */ |
|
268 spec->n_lsign = 1; |
|
269 spec->lsign = (actual_sign == '-' ? '-' : '+'); |
|
270 } |
|
271 #if ALLOW_PARENS_FOR_SIGN |
|
272 else if (format->sign == '(') { |
|
273 if (actual_sign == '-') { |
|
274 spec->n_lsign = 1; |
|
275 spec->lsign = '('; |
|
276 spec->n_rsign = 1; |
|
277 spec->rsign = ')'; |
|
278 } |
|
279 } |
|
280 #endif |
|
281 else if (format->sign == ' ') { |
|
282 spec->n_lsign = 1; |
|
283 spec->lsign = (actual_sign == '-' ? '-' : ' '); |
|
284 } |
|
285 else { |
|
286 /* non specified, or the default (-) */ |
|
287 if (actual_sign == '-') { |
|
288 spec->n_lsign = 1; |
|
289 spec->lsign = '-'; |
|
290 } |
|
291 } |
|
292 |
|
293 spec->n_prefix = n_prefix; |
|
294 |
|
295 /* now the number of padding characters */ |
|
296 if (format->width == -1) { |
|
297 /* no padding at all, nothing to do */ |
|
298 } |
|
299 else { |
|
300 /* see if any padding is needed */ |
|
301 if (spec->n_lsign + n_digits + spec->n_rsign + |
|
302 spec->n_prefix >= format->width) { |
|
303 /* no padding needed, we're already bigger than the |
|
304 requested width */ |
|
305 } |
|
306 else { |
|
307 /* determine which of left, space, or right padding is |
|
308 needed */ |
|
309 Py_ssize_t padding = format->width - |
|
310 (spec->n_lsign + spec->n_prefix + |
|
311 n_digits + spec->n_rsign); |
|
312 if (format->align == '<') |
|
313 spec->n_rpadding = padding; |
|
314 else if (format->align == '>') |
|
315 spec->n_lpadding = padding; |
|
316 else if (format->align == '^') { |
|
317 spec->n_lpadding = padding / 2; |
|
318 spec->n_rpadding = padding - spec->n_lpadding; |
|
319 } |
|
320 else if (format->align == '=') |
|
321 spec->n_spadding = padding; |
|
322 else |
|
323 spec->n_lpadding = padding; |
|
324 } |
|
325 } |
|
326 spec->n_total = spec->n_lpadding + spec->n_lsign + spec->n_prefix + |
|
327 spec->n_spadding + n_digits + spec->n_rsign + spec->n_rpadding; |
|
328 } |
|
329 |
|
330 /* fill in the non-digit parts of a numbers's string representation, |
|
331 as determined in calc_number_widths(). returns the pointer to |
|
332 where the digits go. */ |
|
333 static STRINGLIB_CHAR * |
|
334 fill_non_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec, |
|
335 STRINGLIB_CHAR *prefix, Py_ssize_t n_digits, |
|
336 STRINGLIB_CHAR fill_char) |
|
337 { |
|
338 STRINGLIB_CHAR *p_digits; |
|
339 |
|
340 if (spec->n_lpadding) { |
|
341 STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding); |
|
342 p_buf += spec->n_lpadding; |
|
343 } |
|
344 if (spec->n_lsign == 1) { |
|
345 *p_buf++ = spec->lsign; |
|
346 } |
|
347 if (spec->n_prefix) { |
|
348 memmove(p_buf, |
|
349 prefix, |
|
350 spec->n_prefix * sizeof(STRINGLIB_CHAR)); |
|
351 p_buf += spec->n_prefix; |
|
352 } |
|
353 if (spec->n_spadding) { |
|
354 STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding); |
|
355 p_buf += spec->n_spadding; |
|
356 } |
|
357 p_digits = p_buf; |
|
358 p_buf += n_digits; |
|
359 if (spec->n_rsign == 1) { |
|
360 *p_buf++ = spec->rsign; |
|
361 } |
|
362 if (spec->n_rpadding) { |
|
363 STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding); |
|
364 p_buf += spec->n_rpadding; |
|
365 } |
|
366 return p_digits; |
|
367 } |
|
368 #endif /* FORMAT_FLOAT || FORMAT_LONG */ |
|
369 |
|
370 /************************************************************************/ |
|
371 /*********** string formatting ******************************************/ |
|
372 /************************************************************************/ |
|
373 |
|
374 static PyObject * |
|
375 format_string_internal(PyObject *value, const InternalFormatSpec *format) |
|
376 { |
|
377 Py_ssize_t width; /* total field width */ |
|
378 Py_ssize_t lpad; |
|
379 STRINGLIB_CHAR *dst; |
|
380 STRINGLIB_CHAR *src = STRINGLIB_STR(value); |
|
381 Py_ssize_t len = STRINGLIB_LEN(value); |
|
382 PyObject *result = NULL; |
|
383 |
|
384 /* sign is not allowed on strings */ |
|
385 if (format->sign != '\0') { |
|
386 PyErr_SetString(PyExc_ValueError, |
|
387 "Sign not allowed in string format specifier"); |
|
388 goto done; |
|
389 } |
|
390 |
|
391 /* alternate is not allowed on strings */ |
|
392 if (format->alternate) { |
|
393 PyErr_SetString(PyExc_ValueError, |
|
394 "Alternate form (#) not allowed in string format " |
|
395 "specifier"); |
|
396 goto done; |
|
397 } |
|
398 |
|
399 /* '=' alignment not allowed on strings */ |
|
400 if (format->align == '=') { |
|
401 PyErr_SetString(PyExc_ValueError, |
|
402 "'=' alignment not allowed " |
|
403 "in string format specifier"); |
|
404 goto done; |
|
405 } |
|
406 |
|
407 /* if precision is specified, output no more that format.precision |
|
408 characters */ |
|
409 if (format->precision >= 0 && len >= format->precision) { |
|
410 len = format->precision; |
|
411 } |
|
412 |
|
413 if (format->width >= 0) { |
|
414 width = format->width; |
|
415 |
|
416 /* but use at least len characters */ |
|
417 if (len > width) { |
|
418 width = len; |
|
419 } |
|
420 } |
|
421 else { |
|
422 /* not specified, use all of the chars and no more */ |
|
423 width = len; |
|
424 } |
|
425 |
|
426 /* allocate the resulting string */ |
|
427 result = STRINGLIB_NEW(NULL, width); |
|
428 if (result == NULL) |
|
429 goto done; |
|
430 |
|
431 /* now write into that space */ |
|
432 dst = STRINGLIB_STR(result); |
|
433 |
|
434 /* figure out how much leading space we need, based on the |
|
435 aligning */ |
|
436 if (format->align == '>') |
|
437 lpad = width - len; |
|
438 else if (format->align == '^') |
|
439 lpad = (width - len) / 2; |
|
440 else |
|
441 lpad = 0; |
|
442 |
|
443 /* if right aligning, increment the destination allow space on the |
|
444 left */ |
|
445 memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR)); |
|
446 |
|
447 /* do any padding */ |
|
448 if (width > len) { |
|
449 STRINGLIB_CHAR fill_char = format->fill_char; |
|
450 if (fill_char == '\0') { |
|
451 /* use the default, if not specified */ |
|
452 fill_char = ' '; |
|
453 } |
|
454 |
|
455 /* pad on left */ |
|
456 if (lpad) |
|
457 STRINGLIB_FILL(dst, fill_char, lpad); |
|
458 |
|
459 /* pad on right */ |
|
460 if (width - len - lpad) |
|
461 STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad); |
|
462 } |
|
463 |
|
464 done: |
|
465 return result; |
|
466 } |
|
467 |
|
468 |
|
469 /************************************************************************/ |
|
470 /*********** long formatting ********************************************/ |
|
471 /************************************************************************/ |
|
472 |
|
473 #if defined FORMAT_LONG || defined FORMAT_INT |
|
474 typedef PyObject* |
|
475 (*IntOrLongToString)(PyObject *value, int base); |
|
476 |
|
477 static PyObject * |
|
478 format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, |
|
479 IntOrLongToString tostring) |
|
480 { |
|
481 PyObject *result = NULL; |
|
482 PyObject *tmp = NULL; |
|
483 STRINGLIB_CHAR *pnumeric_chars; |
|
484 STRINGLIB_CHAR numeric_char; |
|
485 STRINGLIB_CHAR sign = '\0'; |
|
486 STRINGLIB_CHAR *p; |
|
487 Py_ssize_t n_digits; /* count of digits need from the computed |
|
488 string */ |
|
489 Py_ssize_t n_leading_chars; |
|
490 Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to |
|
491 allocate, used for 'n' |
|
492 formatting. */ |
|
493 Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */ |
|
494 STRINGLIB_CHAR *prefix = NULL; |
|
495 NumberFieldWidths spec; |
|
496 long x; |
|
497 |
|
498 /* no precision allowed on integers */ |
|
499 if (format->precision != -1) { |
|
500 PyErr_SetString(PyExc_ValueError, |
|
501 "Precision not allowed in integer format specifier"); |
|
502 goto done; |
|
503 } |
|
504 |
|
505 |
|
506 /* special case for character formatting */ |
|
507 if (format->type == 'c') { |
|
508 /* error to specify a sign */ |
|
509 if (format->sign != '\0') { |
|
510 PyErr_SetString(PyExc_ValueError, |
|
511 "Sign not allowed with integer" |
|
512 " format specifier 'c'"); |
|
513 goto done; |
|
514 } |
|
515 |
|
516 /* taken from unicodeobject.c formatchar() */ |
|
517 /* Integer input truncated to a character */ |
|
518 /* XXX: won't work for int */ |
|
519 x = PyLong_AsLong(value); |
|
520 if (x == -1 && PyErr_Occurred()) |
|
521 goto done; |
|
522 #ifdef Py_UNICODE_WIDE |
|
523 if (x < 0 || x > 0x10ffff) { |
|
524 PyErr_SetString(PyExc_OverflowError, |
|
525 "%c arg not in range(0x110000) " |
|
526 "(wide Python build)"); |
|
527 goto done; |
|
528 } |
|
529 #else |
|
530 if (x < 0 || x > 0xffff) { |
|
531 PyErr_SetString(PyExc_OverflowError, |
|
532 "%c arg not in range(0x10000) " |
|
533 "(narrow Python build)"); |
|
534 goto done; |
|
535 } |
|
536 #endif |
|
537 numeric_char = (STRINGLIB_CHAR)x; |
|
538 pnumeric_chars = &numeric_char; |
|
539 n_digits = 1; |
|
540 } |
|
541 else { |
|
542 int base; |
|
543 int leading_chars_to_skip = 0; /* Number of characters added by |
|
544 PyNumber_ToBase that we want to |
|
545 skip over. */ |
|
546 |
|
547 /* Compute the base and how many characters will be added by |
|
548 PyNumber_ToBase */ |
|
549 switch (format->type) { |
|
550 case 'b': |
|
551 base = 2; |
|
552 leading_chars_to_skip = 2; /* 0b */ |
|
553 break; |
|
554 case 'o': |
|
555 base = 8; |
|
556 leading_chars_to_skip = 2; /* 0o */ |
|
557 break; |
|
558 case 'x': |
|
559 case 'X': |
|
560 base = 16; |
|
561 leading_chars_to_skip = 2; /* 0x */ |
|
562 break; |
|
563 default: /* shouldn't be needed, but stops a compiler warning */ |
|
564 case 'd': |
|
565 case 'n': |
|
566 base = 10; |
|
567 break; |
|
568 } |
|
569 |
|
570 /* The number of prefix chars is the same as the leading |
|
571 chars to skip */ |
|
572 if (format->alternate) |
|
573 n_prefix = leading_chars_to_skip; |
|
574 |
|
575 /* Do the hard part, converting to a string in a given base */ |
|
576 tmp = tostring(value, base); |
|
577 if (tmp == NULL) |
|
578 goto done; |
|
579 |
|
580 pnumeric_chars = STRINGLIB_STR(tmp); |
|
581 n_digits = STRINGLIB_LEN(tmp); |
|
582 |
|
583 prefix = pnumeric_chars; |
|
584 |
|
585 /* Remember not to modify what pnumeric_chars points to. it |
|
586 might be interned. Only modify it after we copy it into a |
|
587 newly allocated output buffer. */ |
|
588 |
|
589 /* Is a sign character present in the output? If so, remember it |
|
590 and skip it */ |
|
591 sign = pnumeric_chars[0]; |
|
592 if (sign == '-') { |
|
593 ++prefix; |
|
594 ++leading_chars_to_skip; |
|
595 } |
|
596 |
|
597 /* Skip over the leading chars (0x, 0b, etc.) */ |
|
598 n_digits -= leading_chars_to_skip; |
|
599 pnumeric_chars += leading_chars_to_skip; |
|
600 } |
|
601 |
|
602 if (format->type == 'n') |
|
603 /* Compute how many additional chars we need to allocate |
|
604 to hold the thousands grouping. */ |
|
605 STRINGLIB_GROUPING(NULL, n_digits, n_digits, |
|
606 0, &n_grouping_chars, 0); |
|
607 |
|
608 /* Calculate the widths of the various leading and trailing parts */ |
|
609 calc_number_widths(&spec, sign, n_prefix, n_digits + n_grouping_chars, |
|
610 format); |
|
611 |
|
612 /* Allocate a new string to hold the result */ |
|
613 result = STRINGLIB_NEW(NULL, spec.n_total); |
|
614 if (!result) |
|
615 goto done; |
|
616 p = STRINGLIB_STR(result); |
|
617 |
|
618 /* XXX There is too much magic here regarding the internals of |
|
619 spec and the location of the prefix and digits. It would be |
|
620 better if calc_number_widths returned a number of logical |
|
621 offsets into the buffer, and those were used. Maybe in a |
|
622 future code cleanup. */ |
|
623 |
|
624 /* Fill in the digit parts */ |
|
625 n_leading_chars = spec.n_lpadding + spec.n_lsign + |
|
626 spec.n_prefix + spec.n_spadding; |
|
627 memmove(p + n_leading_chars, |
|
628 pnumeric_chars, |
|
629 n_digits * sizeof(STRINGLIB_CHAR)); |
|
630 |
|
631 /* If type is 'X', convert the filled in digits to uppercase */ |
|
632 if (format->type == 'X') { |
|
633 Py_ssize_t t; |
|
634 for (t = 0; t < n_digits; ++t) |
|
635 p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]); |
|
636 } |
|
637 |
|
638 /* Insert the grouping, if any, after the uppercasing of the digits, so |
|
639 we can ensure that grouping chars won't be affected. */ |
|
640 if (n_grouping_chars) { |
|
641 /* We know this can't fail, since we've already |
|
642 reserved enough space. */ |
|
643 STRINGLIB_CHAR *pstart = p + n_leading_chars; |
|
644 #ifndef NDEBUG |
|
645 int r = |
|
646 #endif |
|
647 STRINGLIB_GROUPING(pstart, n_digits, n_digits, |
|
648 spec.n_total+n_grouping_chars-n_leading_chars, |
|
649 NULL, 0); |
|
650 assert(r); |
|
651 } |
|
652 |
|
653 /* Fill in the non-digit parts (padding, sign, etc.) */ |
|
654 fill_non_digits(p, &spec, prefix, n_digits + n_grouping_chars, |
|
655 format->fill_char == '\0' ? ' ' : format->fill_char); |
|
656 |
|
657 /* If type is 'X', uppercase the prefix. This has to be done after the |
|
658 prefix is filled in by fill_non_digits */ |
|
659 if (format->type == 'X') { |
|
660 Py_ssize_t t; |
|
661 for (t = 0; t < n_prefix; ++t) |
|
662 p[t + spec.n_lpadding + spec.n_lsign] = |
|
663 STRINGLIB_TOUPPER(p[t + spec.n_lpadding + spec.n_lsign]); |
|
664 } |
|
665 |
|
666 |
|
667 done: |
|
668 Py_XDECREF(tmp); |
|
669 return result; |
|
670 } |
|
671 #endif /* defined FORMAT_LONG || defined FORMAT_INT */ |
|
672 |
|
673 /************************************************************************/ |
|
674 /*********** float formatting *******************************************/ |
|
675 /************************************************************************/ |
|
676 |
|
677 #ifdef FORMAT_FLOAT |
|
678 #if STRINGLIB_IS_UNICODE |
|
679 /* taken from unicodeobject.c */ |
|
680 static Py_ssize_t |
|
681 strtounicode(Py_UNICODE *buffer, const char *charbuffer) |
|
682 { |
|
683 register Py_ssize_t i; |
|
684 Py_ssize_t len = strlen(charbuffer); |
|
685 for (i = len - 1; i >= 0; --i) |
|
686 buffer[i] = (Py_UNICODE) charbuffer[i]; |
|
687 |
|
688 return len; |
|
689 } |
|
690 #endif |
|
691 |
|
692 /* see FORMATBUFLEN in unicodeobject.c */ |
|
693 #define FLOAT_FORMATBUFLEN 120 |
|
694 |
|
695 /* much of this is taken from unicodeobject.c */ |
|
696 static PyObject * |
|
697 format_float_internal(PyObject *value, |
|
698 const InternalFormatSpec *format) |
|
699 { |
|
700 /* fmt = '%.' + `prec` + `type` + '%%' |
|
701 worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/ |
|
702 char fmt[20]; |
|
703 |
|
704 /* taken from unicodeobject.c */ |
|
705 /* Worst case length calc to ensure no buffer overrun: |
|
706 |
|
707 'g' formats: |
|
708 fmt = %#.<prec>g |
|
709 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp |
|
710 for any double rep.) |
|
711 len = 1 + prec + 1 + 2 + 5 = 9 + prec |
|
712 |
|
713 'f' formats: |
|
714 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50) |
|
715 len = 1 + 50 + 1 + prec = 52 + prec |
|
716 |
|
717 If prec=0 the effective precision is 1 (the leading digit is |
|
718 always given), therefore increase the length by one. |
|
719 |
|
720 */ |
|
721 char charbuf[FLOAT_FORMATBUFLEN]; |
|
722 Py_ssize_t n_digits; |
|
723 double x; |
|
724 Py_ssize_t precision = format->precision; |
|
725 PyObject *result = NULL; |
|
726 STRINGLIB_CHAR sign; |
|
727 char* trailing = ""; |
|
728 STRINGLIB_CHAR *p; |
|
729 NumberFieldWidths spec; |
|
730 STRINGLIB_CHAR type = format->type; |
|
731 |
|
732 #if STRINGLIB_IS_UNICODE |
|
733 Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN]; |
|
734 #endif |
|
735 |
|
736 /* alternate is not allowed on floats. */ |
|
737 if (format->alternate) { |
|
738 PyErr_SetString(PyExc_ValueError, |
|
739 "Alternate form (#) not allowed in float format " |
|
740 "specifier"); |
|
741 goto done; |
|
742 } |
|
743 |
|
744 /* first, do the conversion as 8-bit chars, using the platform's |
|
745 snprintf. then, if needed, convert to unicode. */ |
|
746 |
|
747 /* 'F' is the same as 'f', per the PEP */ |
|
748 if (type == 'F') |
|
749 type = 'f'; |
|
750 |
|
751 x = PyFloat_AsDouble(value); |
|
752 |
|
753 if (x == -1.0 && PyErr_Occurred()) |
|
754 goto done; |
|
755 |
|
756 if (type == '%') { |
|
757 type = 'f'; |
|
758 x *= 100; |
|
759 trailing = "%"; |
|
760 } |
|
761 |
|
762 if (precision < 0) |
|
763 precision = 6; |
|
764 if (type == 'f' && (fabs(x) / 1e25) >= 1e25) |
|
765 type = 'g'; |
|
766 |
|
767 /* cast "type", because if we're in unicode we need to pass a |
|
768 8-bit char. this is safe, because we've restricted what "type" |
|
769 can be */ |
|
770 PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision, |
|
771 (char)type); |
|
772 |
|
773 /* do the actual formatting */ |
|
774 PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x); |
|
775 |
|
776 /* adding trailing to fmt with PyOS_snprintf doesn't work, not |
|
777 sure why. we'll just concatentate it here, no harm done. we |
|
778 know we can't have a buffer overflow from the fmt size |
|
779 analysis */ |
|
780 strcat(charbuf, trailing); |
|
781 |
|
782 /* rather than duplicate the code for snprintf for both unicode |
|
783 and 8 bit strings, we just use the 8 bit version and then |
|
784 convert to unicode in a separate code path. that's probably |
|
785 the lesser of 2 evils. */ |
|
786 #if STRINGLIB_IS_UNICODE |
|
787 n_digits = strtounicode(unicodebuf, charbuf); |
|
788 p = unicodebuf; |
|
789 #else |
|
790 /* compute the length. I believe this is done because the return |
|
791 value from snprintf above is unreliable */ |
|
792 n_digits = strlen(charbuf); |
|
793 p = charbuf; |
|
794 #endif |
|
795 |
|
796 /* is a sign character present in the output? if so, remember it |
|
797 and skip it */ |
|
798 sign = p[0]; |
|
799 if (sign == '-') { |
|
800 ++p; |
|
801 --n_digits; |
|
802 } |
|
803 |
|
804 calc_number_widths(&spec, sign, 0, n_digits, format); |
|
805 |
|
806 /* allocate a string with enough space */ |
|
807 result = STRINGLIB_NEW(NULL, spec.n_total); |
|
808 if (result == NULL) |
|
809 goto done; |
|
810 |
|
811 /* Fill in the non-digit parts (padding, sign, etc.) */ |
|
812 fill_non_digits(STRINGLIB_STR(result), &spec, NULL, n_digits, |
|
813 format->fill_char == '\0' ? ' ' : format->fill_char); |
|
814 |
|
815 /* fill in the digit parts */ |
|
816 memmove(STRINGLIB_STR(result) + |
|
817 (spec.n_lpadding + spec.n_lsign + spec.n_spadding), |
|
818 p, |
|
819 n_digits * sizeof(STRINGLIB_CHAR)); |
|
820 |
|
821 done: |
|
822 return result; |
|
823 } |
|
824 #endif /* FORMAT_FLOAT */ |
|
825 |
|
826 /************************************************************************/ |
|
827 /*********** built in formatters ****************************************/ |
|
828 /************************************************************************/ |
|
829 PyObject * |
|
830 FORMAT_STRING(PyObject *obj, |
|
831 STRINGLIB_CHAR *format_spec, |
|
832 Py_ssize_t format_spec_len) |
|
833 { |
|
834 InternalFormatSpec format; |
|
835 PyObject *result = NULL; |
|
836 |
|
837 /* check for the special case of zero length format spec, make |
|
838 it equivalent to str(obj) */ |
|
839 if (format_spec_len == 0) { |
|
840 result = STRINGLIB_TOSTR(obj); |
|
841 goto done; |
|
842 } |
|
843 |
|
844 /* parse the format_spec */ |
|
845 if (!parse_internal_render_format_spec(format_spec, format_spec_len, |
|
846 &format, 's')) |
|
847 goto done; |
|
848 |
|
849 /* type conversion? */ |
|
850 switch (format.type) { |
|
851 case 's': |
|
852 /* no type conversion needed, already a string. do the formatting */ |
|
853 result = format_string_internal(obj, &format); |
|
854 break; |
|
855 default: |
|
856 /* unknown */ |
|
857 #if STRINGLIB_IS_UNICODE |
|
858 /* If STRINGLIB_CHAR is Py_UNICODE, %c might be out-of-range, |
|
859 hence the two cases. If it is char, gcc complains that the |
|
860 condition below is always true, hence the ifdef. */ |
|
861 if (format.type > 32 && format.type <128) |
|
862 #endif |
|
863 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c", |
|
864 (char)format.type); |
|
865 #if STRINGLIB_IS_UNICODE |
|
866 else |
|
867 PyErr_Format(PyExc_ValueError, "Unknown conversion type '\\x%x'", |
|
868 (unsigned int)format.type); |
|
869 #endif |
|
870 goto done; |
|
871 } |
|
872 |
|
873 done: |
|
874 return result; |
|
875 } |
|
876 |
|
877 #if defined FORMAT_LONG || defined FORMAT_INT |
|
878 static PyObject* |
|
879 format_int_or_long(PyObject* obj, |
|
880 STRINGLIB_CHAR *format_spec, |
|
881 Py_ssize_t format_spec_len, |
|
882 IntOrLongToString tostring) |
|
883 { |
|
884 PyObject *result = NULL; |
|
885 PyObject *tmp = NULL; |
|
886 InternalFormatSpec format; |
|
887 |
|
888 /* check for the special case of zero length format spec, make |
|
889 it equivalent to str(obj) */ |
|
890 if (format_spec_len == 0) { |
|
891 result = STRINGLIB_TOSTR(obj); |
|
892 goto done; |
|
893 } |
|
894 |
|
895 /* parse the format_spec */ |
|
896 if (!parse_internal_render_format_spec(format_spec, |
|
897 format_spec_len, |
|
898 &format, 'd')) |
|
899 goto done; |
|
900 |
|
901 /* type conversion? */ |
|
902 switch (format.type) { |
|
903 case 'b': |
|
904 case 'c': |
|
905 case 'd': |
|
906 case 'o': |
|
907 case 'x': |
|
908 case 'X': |
|
909 case 'n': |
|
910 /* no type conversion needed, already an int (or long). do |
|
911 the formatting */ |
|
912 result = format_int_or_long_internal(obj, &format, tostring); |
|
913 break; |
|
914 |
|
915 case 'e': |
|
916 case 'E': |
|
917 case 'f': |
|
918 case 'F': |
|
919 case 'g': |
|
920 case 'G': |
|
921 case '%': |
|
922 /* convert to float */ |
|
923 tmp = PyNumber_Float(obj); |
|
924 if (tmp == NULL) |
|
925 goto done; |
|
926 result = format_float_internal(obj, &format); |
|
927 break; |
|
928 |
|
929 default: |
|
930 /* unknown */ |
|
931 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c", |
|
932 format.type); |
|
933 goto done; |
|
934 } |
|
935 |
|
936 done: |
|
937 Py_XDECREF(tmp); |
|
938 return result; |
|
939 } |
|
940 #endif /* FORMAT_LONG || defined FORMAT_INT */ |
|
941 |
|
942 #ifdef FORMAT_LONG |
|
943 /* Need to define long_format as a function that will convert a long |
|
944 to a string. In 3.0, _PyLong_Format has the correct signature. In |
|
945 2.x, we need to fudge a few parameters */ |
|
946 #if PY_VERSION_HEX >= 0x03000000 |
|
947 #define long_format _PyLong_Format |
|
948 #else |
|
949 static PyObject* |
|
950 long_format(PyObject* value, int base) |
|
951 { |
|
952 /* Convert to base, don't add trailing 'L', and use the new octal |
|
953 format. We already know this is a long object */ |
|
954 assert(PyLong_Check(value)); |
|
955 /* convert to base, don't add 'L', and use the new octal format */ |
|
956 return _PyLong_Format(value, base, 0, 1); |
|
957 } |
|
958 #endif |
|
959 |
|
960 PyObject * |
|
961 FORMAT_LONG(PyObject *obj, |
|
962 STRINGLIB_CHAR *format_spec, |
|
963 Py_ssize_t format_spec_len) |
|
964 { |
|
965 return format_int_or_long(obj, format_spec, format_spec_len, |
|
966 long_format); |
|
967 } |
|
968 #endif /* FORMAT_LONG */ |
|
969 |
|
970 #ifdef FORMAT_INT |
|
971 /* this is only used for 2.x, not 3.0 */ |
|
972 static PyObject* |
|
973 int_format(PyObject* value, int base) |
|
974 { |
|
975 /* Convert to base, and use the new octal format. We already |
|
976 know this is an int object */ |
|
977 assert(PyInt_Check(value)); |
|
978 return _PyInt_Format((PyIntObject*)value, base, 1); |
|
979 } |
|
980 |
|
981 PyObject * |
|
982 FORMAT_INT(PyObject *obj, |
|
983 STRINGLIB_CHAR *format_spec, |
|
984 Py_ssize_t format_spec_len) |
|
985 { |
|
986 return format_int_or_long(obj, format_spec, format_spec_len, |
|
987 int_format); |
|
988 } |
|
989 #endif /* FORMAT_INT */ |
|
990 |
|
991 #ifdef FORMAT_FLOAT |
|
992 PyObject * |
|
993 FORMAT_FLOAT(PyObject *obj, |
|
994 STRINGLIB_CHAR *format_spec, |
|
995 Py_ssize_t format_spec_len) |
|
996 { |
|
997 PyObject *result = NULL; |
|
998 InternalFormatSpec format; |
|
999 |
|
1000 /* check for the special case of zero length format spec, make |
|
1001 it equivalent to str(obj) */ |
|
1002 if (format_spec_len == 0) { |
|
1003 result = STRINGLIB_TOSTR(obj); |
|
1004 goto done; |
|
1005 } |
|
1006 |
|
1007 /* parse the format_spec */ |
|
1008 if (!parse_internal_render_format_spec(format_spec, |
|
1009 format_spec_len, |
|
1010 &format, '\0')) |
|
1011 goto done; |
|
1012 |
|
1013 /* type conversion? */ |
|
1014 switch (format.type) { |
|
1015 case '\0': |
|
1016 /* 'Z' means like 'g', but with at least one decimal. See |
|
1017 PyOS_ascii_formatd */ |
|
1018 format.type = 'Z'; |
|
1019 /* Deliberate fall through to the next case statement */ |
|
1020 case 'e': |
|
1021 case 'E': |
|
1022 case 'f': |
|
1023 case 'F': |
|
1024 case 'g': |
|
1025 case 'G': |
|
1026 case 'n': |
|
1027 case '%': |
|
1028 /* no conversion, already a float. do the formatting */ |
|
1029 result = format_float_internal(obj, &format); |
|
1030 break; |
|
1031 |
|
1032 default: |
|
1033 /* unknown */ |
|
1034 PyErr_Format(PyExc_ValueError, "Unknown conversion type %c", |
|
1035 format.type); |
|
1036 goto done; |
|
1037 } |
|
1038 |
|
1039 done: |
|
1040 return result; |
|
1041 } |
|
1042 #endif /* FORMAT_FLOAT */ |