|
1 // Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
2 // All rights reserved. |
|
3 // This component and the accompanying materials are made available |
|
4 // under the terms of "Eclipse Public License v1.0" |
|
5 // which accompanies this distribution, and is available |
|
6 // at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
7 // |
|
8 // Initial Contributors: |
|
9 // Nokia Corporation - initial contribution. |
|
10 // |
|
11 // Contributors: |
|
12 // |
|
13 // Description: |
|
14 // Contains the implementation punycode conversion algorithm |
|
15 // |
|
16 |
|
17 |
|
18 |
|
19 /** |
|
20 @file |
|
21 */ |
|
22 |
|
23 #include "punycodeconverter.h" |
|
24 #include <e32std.h> |
|
25 |
|
26 // ******************************************************* |
|
27 // This implementation is adapted from the Punycode sample |
|
28 // implementation in appendix C of the RFC-3492. |
|
29 // ******************************************************* |
|
30 // |
|
31 // Bootstring parameters for Punycode |
|
32 // |
|
33 #define PUNYCODE_BASE 36 |
|
34 #define PUNYCODE_TMIN 1 |
|
35 #define PUNYCODE_TMAX 26 |
|
36 #define PUNYCODE_SKEW 38 |
|
37 #define PUNYCODE_DAMP 700 |
|
38 #define PUNYCODE_BIAS 72 |
|
39 #define PUNYCODE_INIT 0x80 |
|
40 #define PUNYCODE_DELI 0x2D |
|
41 |
|
42 _LIT8(KAcePrefix, "xn--"); |
|
43 |
|
44 |
|
45 /* basic(cp) tests whether cp is a basic code point: */ |
|
46 #define basic(cp) ((TUint)(cp) < 0x80) |
|
47 |
|
48 /* delim(cp) tests whether cp is a delimiter: */ |
|
49 #define delim(cp) ((cp) == PUNYCODE_DELI) |
|
50 |
|
51 |
|
52 /** |
|
53 Function to decode each digit and return the character. |
|
54 decode_digit(cp) returns the numeric value of a basic code |
|
55 point (for use in representing integers) in the range 0 to |
|
56 base-1, or base if cp is does not represent a value. |
|
57 @return - returns the non ASCII character for the input |
|
58 @param cp the codepoint |
|
59 */ |
|
60 |
|
61 static TUint decode_digit(TUint cp) |
|
62 { |
|
63 return cp - 48 < 10 ? cp - 22 : |
|
64 cp - 65 < 26 ? cp - 65 : |
|
65 cp - 97 < 26 ? cp - 97 : |
|
66 PUNYCODE_BASE; |
|
67 } |
|
68 |
|
69 |
|
70 /** |
|
71 Function to encode each digit and return the character. |
|
72 encode_digit(d) returns the basic code point whose value |
|
73 (when used for representing integers) is d, which needs to be in |
|
74 the range 0 to base-1. |
|
75 @return - returns the ASCII character for the input |
|
76 @param cp the codepoint |
|
77 */ |
|
78 static char encode_digit(TUint d) |
|
79 { |
|
80 return (d + 22 + 75 * (d < 26)); |
|
81 /* 0..25 map to ASCII a..z */ |
|
82 /* 26..35 map to ASCII 0..9 */ |
|
83 } |
|
84 |
|
85 |
|
86 |
|
87 /** |
|
88 Function to adapt the bias. |
|
89 Bias adaptation function |
|
90 @return - returns the ASCII character for the input |
|
91 @param delta, difference delta |
|
92 @param numPoints, the number of points |
|
93 @param firsttime , whether the first bias |
|
94 */ |
|
95 static TUint adapt(TUint delta, TUint numpoints, int firsttime) |
|
96 { |
|
97 TUint k; |
|
98 |
|
99 delta = firsttime ? delta / PUNYCODE_DAMP : delta >> 1; |
|
100 delta += delta / numpoints; |
|
101 |
|
102 for (k = 0; delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / 2; k += PUNYCODE_BASE) |
|
103 { |
|
104 delta /= PUNYCODE_BASE - PUNYCODE_TMIN; |
|
105 } |
|
106 return k + (PUNYCODE_BASE - PUNYCODE_TMIN + 1) * delta / (delta + PUNYCODE_SKEW); |
|
107 } |
|
108 |
|
109 |
|
110 /** |
|
111 Function to convert the IDN to Punycode |
|
112 @return KErrNone, if conversion is successful |
|
113 KErrDndNameTooBig, if the IDN conversion exceeds the limit for a domain Name |
|
114 or any other system wide errors |
|
115 @param aName, the input name in UCS2.0 encoding |
|
116 */ |
|
117 EXPORT_C TInt TPunyCodeDndName::IdnToPunycode(const THostName &aName) |
|
118 { |
|
119 SetLength(0); |
|
120 |
|
121 for (TInt i = 0; i < aName.Length(); ) |
|
122 { |
|
123 i = Encode(aName, i); |
|
124 if (i < 0) |
|
125 return i; |
|
126 } |
|
127 return KErrNone; |
|
128 } |
|
129 |
|
130 /** |
|
131 Function to encode each label |
|
132 @return KErrNone, if conversion is successful |
|
133 KErrDndNameTooBig, if the IDN conversion exceeds the limit for a domain Name |
|
134 or any other system wide errors |
|
135 @param aName, the input name in UCS2.0 encoding |
|
136 */ |
|
137 TInt TPunyCodeDndName::Encode(const THostName &aName, TInt aIndex) |
|
138 { |
|
139 const TInt output_start = Length(); |
|
140 |
|
141 TInt j; |
|
142 TUint n = PUNYCODE_INIT; |
|
143 TUint delta = 0; |
|
144 TUint bias = PUNYCODE_BIAS; |
|
145 TInt first_time = 1; |
|
146 |
|
147 // Copy the basic code points as is, and |
|
148 // compute the length of the current label |
|
149 // into input_length |
|
150 TUint input_length = 0; |
|
151 for (j = aIndex; j < aName.Length(); ++j) |
|
152 { |
|
153 const TUint c = aName[j]; |
|
154 if (c == '.') |
|
155 break; |
|
156 if (basic(c)) |
|
157 { |
|
158 if (Length() == MaxLength()) |
|
159 return KErrDndNameTooBig; |
|
160 Append(c); |
|
161 } |
|
162 input_length += 1; |
|
163 } |
|
164 |
|
165 // h is the number of code points that have been handled |
|
166 TUint h = Length() - output_start; |
|
167 |
|
168 if (h == input_length) |
|
169 // Only basic code points, all done. |
|
170 goto done; |
|
171 |
|
172 // IDN is required, add prefix! |
|
173 if (Length() + KAcePrefix().Length() > MaxLength()) |
|
174 return KErrDndNameTooBig; |
|
175 Insert(output_start, KAcePrefix); |
|
176 |
|
177 if (h > 0) |
|
178 { |
|
179 // Both basic and non-basic points, need to add a delimiter. |
|
180 if (Length() == MaxLength()) |
|
181 return KErrDndNameTooBig; |
|
182 Append(PUNYCODE_DELI); |
|
183 } |
|
184 |
|
185 // Main encoding loop |
|
186 |
|
187 while (h < input_length) |
|
188 { |
|
189 // All non-basic code points < n have been |
|
190 // handled already. Find the next larger one: |
|
191 TUint m = KMaxTUint; |
|
192 for (j = aIndex; j < aIndex + input_length; ++j) |
|
193 { |
|
194 const TUint c = aName[j]; |
|
195 if (c >= n && c < m) |
|
196 m = c; |
|
197 } |
|
198 |
|
199 // Increase delta enough to advance the decoder's |
|
200 // <n,i> state to <m,0>, but guard against overflow: |
|
201 if (m - n > (KMaxTUint - delta) / (h + 1)) |
|
202 return KErrOverflow; |
|
203 delta += (m - n) * (h + 1); |
|
204 n = m; |
|
205 |
|
206 for (j = aIndex; j < aIndex + input_length; ++j) |
|
207 { |
|
208 const TUint c = aName[j]; |
|
209 if (c < n) |
|
210 { |
|
211 if (++delta == 0) |
|
212 return KErrOverflow; |
|
213 } |
|
214 else if (c == n) |
|
215 { |
|
216 // Represent delta as a generalized variable-length integer: |
|
217 TUint q = delta; |
|
218 for (TUint k = PUNYCODE_BASE; ; k += PUNYCODE_BASE) |
|
219 { |
|
220 if (Length() >= MaxLength()) |
|
221 return KErrDndNameTooBig; |
|
222 |
|
223 const TUint t = k <= bias /* + tmin */ ? PUNYCODE_TMIN : /* +tmin not needed */ |
|
224 k >= bias + PUNYCODE_TMAX ? PUNYCODE_TMAX : k - bias; |
|
225 if (q < t) |
|
226 break; |
|
227 Append(encode_digit(t + (q - t) % (PUNYCODE_BASE - t))); |
|
228 q = (q - t) / (PUNYCODE_BASE - t); |
|
229 } |
|
230 Append(encode_digit(q)); |
|
231 ++h; |
|
232 bias = adapt(delta, h, first_time); |
|
233 delta = 0; |
|
234 first_time = 0; |
|
235 } |
|
236 } |
|
237 ++delta, ++n; |
|
238 } |
|
239 done: |
|
240 aIndex += input_length; |
|
241 if (aIndex < aName.Length()) |
|
242 { |
|
243 // Input terminated with '.', copy it to ouput. |
|
244 if (Length() == MaxLength()) |
|
245 return KErrDndNameTooBig; |
|
246 Append('.'); |
|
247 aIndex += 1; |
|
248 } |
|
249 return aIndex; |
|
250 } |
|
251 |
|
252 |
|
253 /** |
|
254 Function to decode the punycode to IDN |
|
255 @return KErrNone, if conversion is successful |
|
256 KErrDndBadName, if the punycode provided cannot be decoded |
|
257 or any other system wide errors |
|
258 @param aName, the input punycode name in ASCII format |
|
259 @param aStart, where to start the conversion, defaulted to 0. |
|
260 */ |
|
261 EXPORT_C TInt TPunyCodeDndName::PunycodeToIdn(TDes& aBuf, const TInt aStart) |
|
262 { |
|
263 aBuf.SetLength(0); |
|
264 for (TInt i = aStart; i < Length(); ) |
|
265 { |
|
266 i = Decode(i, aBuf); |
|
267 if (i < 0) |
|
268 { |
|
269 // If Punycode fails for any reason, just return |
|
270 // the raw name (it probably was not punycode). |
|
271 return KErrDndBadName; |
|
272 } |
|
273 } |
|
274 return KErrNone; |
|
275 } |
|
276 |
|
277 /** |
|
278 Function to decode each label |
|
279 @return KErrNone, if conversion is successful |
|
280 KErrDndBadName, if the punycode provided cannot be decoded |
|
281 or any other system wide errors |
|
282 @param aBuf, the input punycode name in ASCII format for each label |
|
283 @param aIndex, where to start the conversion, defaulted to 0. |
|
284 */ |
|
285 TInt TPunyCodeDndName::Decode(TInt aIndex, TDes &aBuf) const |
|
286 { |
|
287 if (aIndex + KAcePrefix().Length() > Length() || |
|
288 Mid(aIndex, KAcePrefix().Length()).Compare(KAcePrefix()) != 0) |
|
289 { |
|
290 // cannot be punycode. |
|
291 // copy label as is, while updating aIndex |
|
292 while (aIndex < Length()) |
|
293 { |
|
294 const TUint c = (*this)[aIndex++]; |
|
295 if (aBuf.Length() == aBuf.MaxLength()) |
|
296 return KErrDndNameTooBig; |
|
297 aBuf.Append(c); |
|
298 if (c == '.') |
|
299 break; |
|
300 } |
|
301 return aIndex; |
|
302 } |
|
303 |
|
304 aIndex += KAcePrefix().Length(); // Skip KAcePrefix. |
|
305 |
|
306 |
|
307 // Handle the basic code points. |
|
308 TInt puny_end = aIndex; |
|
309 TInt inp = aIndex; |
|
310 for ( ; puny_end < Length(); ++puny_end) |
|
311 { |
|
312 const TUint c = (*this)[puny_end]; |
|
313 if (c == '.') |
|
314 break; |
|
315 if (delim(c)) |
|
316 inp = puny_end; |
|
317 } |
|
318 |
|
319 if (aBuf.Length() + inp - aIndex > aBuf.MaxLength()) |
|
320 return KErrDndNameTooBig; |
|
321 |
|
322 const TUint out_base = aBuf.Length(); |
|
323 // Copy the basic code points as is. |
|
324 for (TInt j = aIndex; j < inp; ++j) |
|
325 { |
|
326 const TUint c = (*this)[j]; |
|
327 if (!basic(c)) |
|
328 return KErrGeneral; |
|
329 aBuf.Append(c); |
|
330 } |
|
331 // Skip inp over delimiter, if present |
|
332 if (inp > aIndex) |
|
333 inp += 1; |
|
334 |
|
335 // Initialize the state: |
|
336 |
|
337 TUint n = PUNYCODE_INIT; |
|
338 TUint outp = aBuf.Length() - out_base; |
|
339 TUint i = 0; |
|
340 TUint bias = PUNYCODE_BIAS; |
|
341 |
|
342 // Main decoding loop: Start just after the last delimiter if any |
|
343 // basic code points were copied; start at the beginning otherwise. |
|
344 |
|
345 while (inp < puny_end) |
|
346 { |
|
347 // inp is the index of the next character to be consumed, and |
|
348 // outp is the number of code points processed (includes the |
|
349 // initial basic points). |
|
350 |
|
351 // Decode a generalized variable-length integer into delta, |
|
352 // which gets added to i. The overflow checking is easier |
|
353 // if we increase i as we go, then subtract off its starting |
|
354 // value at the end to obtain delta. |
|
355 const TUint oldi = i; |
|
356 TUint w = 1; |
|
357 for (TUint k = PUNYCODE_BASE; ; k += PUNYCODE_BASE) |
|
358 { |
|
359 if (inp >= puny_end) |
|
360 return KErrGeneral; |
|
361 |
|
362 const TUint digit = decode_digit((*this)[inp++]); |
|
363 if (digit >= PUNYCODE_BASE) |
|
364 return KErrGeneral; |
|
365 if (digit > (KMaxTUint - i) / w) |
|
366 return KErrOverflow; |
|
367 i += digit * w; |
|
368 const TUint t = k <= bias /* + tmin */ ? PUNYCODE_TMIN : /* +tmin not needed */ |
|
369 k >= bias + PUNYCODE_TMAX ? PUNYCODE_TMAX : k - bias; |
|
370 if (digit < t) |
|
371 break; |
|
372 if (w > KMaxTUint / (PUNYCODE_BASE - t)) |
|
373 return KErrOverflow; |
|
374 w *= (PUNYCODE_BASE - t); |
|
375 } |
|
376 |
|
377 outp++; // Going to add new code, increment count. |
|
378 bias = adapt(i - oldi, outp, oldi == 0); |
|
379 |
|
380 // i was supposed to wrap around from out+1 to 0, |
|
381 // incrementing n each time, so we'll fix that now: |
|
382 |
|
383 if (i / outp > KMaxTUint - n) |
|
384 return KErrOverflow; |
|
385 n += i / outp; |
|
386 i %= outp; |
|
387 |
|
388 // Insert n at position i of the output: |
|
389 |
|
390 if (aBuf.Length() == aBuf.MaxLength()) |
|
391 return KErrDndNameTooBig; |
|
392 TBuf<1> tmp; |
|
393 tmp.Append(n); |
|
394 aBuf.Insert(out_base + i, tmp); |
|
395 i++; |
|
396 } |
|
397 if (puny_end < Length()) |
|
398 { |
|
399 // Input terminated with '.', copy it to ouput. |
|
400 if (aBuf.Length() == aBuf.MaxLength()) |
|
401 return KErrDndNameTooBig; |
|
402 aBuf.Append('.'); |
|
403 puny_end++; |
|
404 } |
|
405 return puny_end; |
|
406 } |