tcpiputils/punycodeconv/src/punycodeconverter.cpp
changeset 0 af10295192d8
equal deleted inserted replaced
-1:000000000000 0:af10295192d8
       
     1 // Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     2 // All rights reserved.
       
     3 // This component and the accompanying materials are made available
       
     4 // under the terms of "Eclipse Public License v1.0"
       
     5 // which accompanies this distribution, and is available
       
     6 // at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     7 //
       
     8 // Initial Contributors:
       
     9 // Nokia Corporation - initial contribution.
       
    10 //
       
    11 // Contributors:
       
    12 //
       
    13 // Description:
       
    14 // Contains the implementation punycode conversion algorithm
       
    15 //
       
    16 
       
    17 
       
    18 
       
    19 /**
       
    20  @file
       
    21 */
       
    22 
       
    23 #include "punycodeconverter.h"
       
    24 #include <e32std.h>
       
    25 
       
    26 // *******************************************************
       
    27 // This implementation is adapted from the Punycode sample
       
    28 // implementation in appendix C of the RFC-3492.
       
    29 // *******************************************************
       
    30 //
       
    31 // Bootstring parameters for Punycode
       
    32 //
       
    33 #define PUNYCODE_BASE 36
       
    34 #define PUNYCODE_TMIN 1
       
    35 #define PUNYCODE_TMAX 26
       
    36 #define PUNYCODE_SKEW 38
       
    37 #define PUNYCODE_DAMP 700
       
    38 #define PUNYCODE_BIAS 72
       
    39 #define PUNYCODE_INIT 0x80
       
    40 #define PUNYCODE_DELI 0x2D
       
    41 
       
    42 _LIT8(KAcePrefix, "xn--");
       
    43 
       
    44 
       
    45 /* basic(cp) tests whether cp is a basic code point: */
       
    46 #define basic(cp) ((TUint)(cp) < 0x80)
       
    47 
       
    48 /* delim(cp) tests whether cp is a delimiter: */
       
    49 #define delim(cp) ((cp) == PUNYCODE_DELI)
       
    50 
       
    51 
       
    52 /**
       
    53 Function to decode each digit and return the character.
       
    54 	decode_digit(cp) returns the numeric value of a basic code 
       
    55 	point (for use in representing integers) in the range 0 to 
       
    56 	base-1, or base if cp is does not represent a value.       
       
    57 @return - returns the non ASCII character for the input
       
    58 @param cp the codepoint
       
    59 */
       
    60 
       
    61 static TUint decode_digit(TUint cp)
       
    62 	{
       
    63 	return  cp - 48 < 10 ? cp - 22 :
       
    64 			cp - 65 < 26 ? cp - 65 :
       
    65 			cp - 97 < 26 ? cp - 97 :
       
    66 			PUNYCODE_BASE;
       
    67 	}
       
    68 
       
    69 
       
    70 /**
       
    71 Function to encode each digit and return the character.
       
    72 	encode_digit(d) returns the basic code point whose value      
       
    73 	(when used for representing integers) is d, which needs to be in   
       
    74 	the range 0 to base-1.                                           
       
    75 @return - returns the ASCII character for the input
       
    76 @param cp the codepoint
       
    77 */
       
    78 static char encode_digit(TUint d)
       
    79 	{
       
    80 	return (d + 22 + 75 * (d < 26));
       
    81 	  /*  0..25 map to ASCII a..z */
       
    82 	  /* 26..35 map to ASCII 0..9 */
       
    83 	}
       
    84 
       
    85 
       
    86 
       
    87 /**
       
    88 Function to adapt the bias.
       
    89 Bias adaptation function
       
    90 @return - returns the ASCII character for the input
       
    91 @param delta, difference delta
       
    92 @param numPoints, the number of points
       
    93 @param firsttime , whether the first bias
       
    94 */
       
    95 static TUint adapt(TUint delta, TUint numpoints, int firsttime)
       
    96 	{
       
    97   	TUint k;
       
    98 
       
    99 	delta = firsttime ? delta / PUNYCODE_DAMP : delta >> 1;
       
   100 	delta += delta / numpoints;
       
   101 
       
   102 	for (k = 0;  delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / 2;  k += PUNYCODE_BASE)
       
   103 		{
       
   104 		delta /= PUNYCODE_BASE - PUNYCODE_TMIN;
       
   105 		}
       
   106 	return k + (PUNYCODE_BASE - PUNYCODE_TMIN + 1) * delta / (delta + PUNYCODE_SKEW);
       
   107 	}
       
   108 
       
   109 
       
   110 /**
       
   111 Function to convert the IDN to Punycode
       
   112 @return KErrNone, if conversion is successful
       
   113 	KErrDndNameTooBig, if the IDN conversion exceeds the limit for a domain Name
       
   114 	or any other system wide errors
       
   115 @param aName, the input name in UCS2.0 encoding
       
   116 */
       
   117 EXPORT_C TInt TPunyCodeDndName::IdnToPunycode(const THostName &aName)
       
   118 	{
       
   119 	SetLength(0);
       
   120 			
       
   121 	for (TInt i = 0; i < aName.Length(); )
       
   122 		{
       
   123 		i = Encode(aName, i);
       
   124 		if (i < 0)
       
   125 			return i;
       
   126 		}
       
   127 	return KErrNone;
       
   128 	}
       
   129 
       
   130 /**
       
   131 Function to encode each label
       
   132 @return KErrNone, if conversion is successful
       
   133 	KErrDndNameTooBig, if the IDN conversion exceeds the limit for a domain Name
       
   134 	or any other system wide errors
       
   135 @param aName, the input name in UCS2.0 encoding
       
   136 */
       
   137 TInt TPunyCodeDndName::Encode(const THostName &aName, TInt aIndex)
       
   138 	{
       
   139 	const TInt output_start = Length();
       
   140 
       
   141 	TInt j;
       
   142 	TUint n = PUNYCODE_INIT;
       
   143 	TUint delta = 0;
       
   144 	TUint bias = PUNYCODE_BIAS;
       
   145 	TInt first_time = 1;
       
   146 
       
   147 	// Copy the basic code points as is, and
       
   148 	// compute the length of the current label
       
   149 	// into input_length
       
   150 	TUint input_length = 0;
       
   151 	for (j = aIndex;  j < aName.Length();  ++j)
       
   152 		{
       
   153 		const TUint c = aName[j];
       
   154 		if (c == '.')
       
   155 			break;
       
   156 	    if (basic(c))
       
   157 	    	{
       
   158 	    	if (Length() == MaxLength())
       
   159 				return KErrDndNameTooBig;
       
   160 	    	Append(c);
       
   161 			}
       
   162 		input_length += 1;
       
   163 		}
       
   164 
       
   165 	// h is the number of code points that have been handled
       
   166 	TUint h = Length() - output_start;
       
   167 
       
   168 	if (h == input_length)
       
   169 		// Only basic code points, all done.
       
   170 		goto done;
       
   171 
       
   172 	// IDN is required, add prefix!
       
   173 	if (Length() + KAcePrefix().Length() > MaxLength())
       
   174    		return KErrDndNameTooBig;
       
   175 	Insert(output_start, KAcePrefix);
       
   176 
       
   177 	if (h > 0)
       
   178 		{
       
   179 		// Both basic and non-basic points, need to add a delimiter.
       
   180 		if (Length() == MaxLength())
       
   181 			return KErrDndNameTooBig;
       
   182 		Append(PUNYCODE_DELI);
       
   183 		}
       
   184 	
       
   185 	// Main encoding loop
       
   186 
       
   187 	while (h < input_length)
       
   188 		{
       
   189 		// All non-basic code points < n have been
       
   190 		// handled already.  Find the next larger one:
       
   191 		TUint m = KMaxTUint;
       
   192 	    for (j = aIndex;  j < aIndex + input_length;  ++j)
       
   193 	    	{
       
   194 	    	const TUint c = aName[j];
       
   195 			if (c >= n && c < m)
       
   196 				m = c;
       
   197 	    	}
       
   198 
       
   199 		// Increase delta enough to advance the decoder's
       
   200 		// <n,i> state to <m,0>, but guard against overflow:
       
   201 		if (m - n > (KMaxTUint - delta) / (h + 1))
       
   202 			return KErrOverflow;
       
   203 		delta += (m - n) * (h + 1);
       
   204 		n = m;
       
   205 		
       
   206 		for (j = aIndex;  j < aIndex + input_length;  ++j)
       
   207 			{
       
   208 			const TUint c = aName[j];
       
   209 			if (c < n)
       
   210 				{
       
   211 				if (++delta == 0)
       
   212 					return KErrOverflow;
       
   213 				}
       
   214 			else if (c == n)
       
   215 				{
       
   216 				// Represent delta as a generalized variable-length integer:
       
   217 				TUint q = delta;
       
   218 		        for (TUint k = PUNYCODE_BASE;  ;  k += PUNYCODE_BASE)
       
   219 		        	{
       
   220 		        	if (Length() >= MaxLength())
       
   221 						return KErrDndNameTooBig;
       
   222 
       
   223 					const TUint t = k <= bias /* + tmin */ ? PUNYCODE_TMIN :     /* +tmin not needed */
       
   224               			k >= bias + PUNYCODE_TMAX ? PUNYCODE_TMAX : k - bias;
       
   225               		if (q < t)
       
   226               			break;
       
   227               		Append(encode_digit(t + (q - t) % (PUNYCODE_BASE - t)));
       
   228 					q = (q - t) / (PUNYCODE_BASE - t);
       
   229 		        	}
       
   230 		        Append(encode_digit(q));
       
   231 				++h;
       
   232         		bias = adapt(delta, h, first_time);
       
   233         		delta = 0;
       
   234         		first_time = 0;
       
   235 				}
       
   236 			}
       
   237 	    ++delta, ++n;
       
   238 		}
       
   239 done:
       
   240 	aIndex += input_length;
       
   241 	if (aIndex < aName.Length())
       
   242 		{
       
   243 		// Input terminated with '.', copy it to ouput.
       
   244 		if (Length() == MaxLength())
       
   245 			return KErrDndNameTooBig;
       
   246 		Append('.');
       
   247 		aIndex += 1;
       
   248 		}
       
   249 	return aIndex;
       
   250 	}
       
   251 	
       
   252 
       
   253 /**
       
   254 Function to decode the punycode to IDN
       
   255 @return KErrNone, if conversion is successful
       
   256 	KErrDndBadName, if the punycode provided cannot be decoded
       
   257 	or any other system wide errors
       
   258 @param aName, the input punycode name in ASCII format
       
   259 @param aStart, where to start the conversion, defaulted to 0.
       
   260 */
       
   261 EXPORT_C TInt TPunyCodeDndName::PunycodeToIdn(TDes& aBuf, const TInt aStart)
       
   262 	{
       
   263 	aBuf.SetLength(0);
       
   264 	for (TInt i = aStart; i < Length();  )
       
   265 		{
       
   266 		i = Decode(i, aBuf);
       
   267 		if (i < 0)
       
   268 			{
       
   269 			// If Punycode fails for any reason, just return
       
   270 			// the raw name (it probably was not punycode).
       
   271 			return KErrDndBadName;
       
   272 			}
       
   273 		}
       
   274 	return KErrNone;
       
   275 	}
       
   276 
       
   277 /**
       
   278 Function to decode each label
       
   279 @return KErrNone, if conversion is successful
       
   280 	KErrDndBadName, if the punycode provided cannot be decoded
       
   281 	or any other system wide errors
       
   282 @param aBuf, the input punycode name in ASCII format for each label
       
   283 @param aIndex, where to start the conversion, defaulted to 0.
       
   284 */
       
   285 TInt TPunyCodeDndName::Decode(TInt aIndex, TDes &aBuf) const
       
   286 	{
       
   287 	if (aIndex + KAcePrefix().Length() > Length() ||
       
   288 			Mid(aIndex, KAcePrefix().Length()).Compare(KAcePrefix()) != 0)
       
   289 		{
       
   290 		// cannot be punycode.
       
   291 		// copy label as is, while updating aIndex
       
   292 		while (aIndex < Length())
       
   293 			{
       
   294 			const TUint c = (*this)[aIndex++];
       
   295 			if (aBuf.Length() == aBuf.MaxLength())
       
   296 				return KErrDndNameTooBig;
       
   297 			aBuf.Append(c);
       
   298 			if (c == '.')
       
   299 				break;
       
   300 			}
       
   301 		return aIndex;
       
   302 		}
       
   303 		
       
   304 	aIndex += KAcePrefix().Length();	// Skip KAcePrefix.
       
   305 
       
   306 
       
   307 	// Handle the basic code points.
       
   308 	TInt puny_end = aIndex;
       
   309 	TInt inp = aIndex;
       
   310 	for ( ; puny_end < Length();  ++puny_end)
       
   311 		{
       
   312 		const TUint c = (*this)[puny_end];
       
   313 		if (c == '.')
       
   314 			break;
       
   315 		if (delim(c))
       
   316 			inp = puny_end;
       
   317 		}
       
   318 
       
   319 	if (aBuf.Length() + inp - aIndex > aBuf.MaxLength())
       
   320 		return KErrDndNameTooBig;
       
   321 
       
   322 	const TUint out_base = aBuf.Length();
       
   323 	// Copy the basic code points as is.
       
   324 	for (TInt j = aIndex;  j < inp;  ++j)
       
   325 		{
       
   326 		const TUint c = (*this)[j];
       
   327 		if (!basic(c))
       
   328 			return KErrGeneral;
       
   329 		aBuf.Append(c);
       
   330 		}
       
   331 	// Skip inp over delimiter, if present
       
   332 	if (inp > aIndex)
       
   333 		inp += 1;
       
   334 
       
   335 	// Initialize the state:
       
   336 
       
   337 	TUint n = PUNYCODE_INIT;
       
   338 	TUint outp = aBuf.Length() - out_base;
       
   339 	TUint i = 0;
       
   340 	TUint bias = PUNYCODE_BIAS;
       
   341 
       
   342 	// Main decoding loop:  Start just after the last delimiter if any 
       
   343 	// basic code points were copied; start at the beginning otherwise.
       
   344 
       
   345 	while (inp < puny_end)
       
   346 		{
       
   347 		// inp is the index of the next character to be consumed, and
       
   348 		// outp is the number of code points processed (includes the
       
   349 		// initial basic points).
       
   350 
       
   351 		// Decode a generalized variable-length integer into delta,
       
   352 		// which gets added to i.  The overflow checking is easier
       
   353 		// if we increase i as we go, then subtract off its starting
       
   354 		// value at the end to obtain delta.
       
   355 		const TUint oldi = i;
       
   356 		TUint w = 1;
       
   357 		for (TUint k = PUNYCODE_BASE;  ; k += PUNYCODE_BASE)
       
   358 			{
       
   359 			if (inp >= puny_end)
       
   360 				return KErrGeneral;
       
   361 			
       
   362 			const TUint digit = decode_digit((*this)[inp++]);
       
   363 			if (digit >= PUNYCODE_BASE)
       
   364 				return KErrGeneral;
       
   365 			if (digit > (KMaxTUint - i) / w)
       
   366 				return KErrOverflow;
       
   367 			i += digit * w;
       
   368 			const TUint t = k <= bias /* + tmin */ ? PUNYCODE_TMIN :     /* +tmin not needed */
       
   369 				k >= bias + PUNYCODE_TMAX ? PUNYCODE_TMAX : k - bias;
       
   370 			if (digit < t)
       
   371 				break;
       
   372 			if (w > KMaxTUint / (PUNYCODE_BASE - t))
       
   373 				return KErrOverflow;
       
   374 			w *= (PUNYCODE_BASE - t);
       
   375 			}
       
   376 
       
   377 		outp++;		// Going to add new code, increment count.
       
   378 		bias = adapt(i - oldi, outp, oldi == 0);
       
   379 		
       
   380 		// i was supposed to wrap around from out+1 to 0,
       
   381 		// incrementing n each time, so we'll fix that now:
       
   382 
       
   383 	    if (i / outp > KMaxTUint - n)
       
   384 			return KErrOverflow;
       
   385 	    n += i / outp;
       
   386 	    i %= outp;
       
   387 
       
   388 		// Insert n at position i of the output:
       
   389 
       
   390 		if (aBuf.Length() == aBuf.MaxLength())
       
   391 			return KErrDndNameTooBig;
       
   392 		TBuf<1> tmp;
       
   393 		tmp.Append(n);
       
   394 		aBuf.Insert(out_base + i, tmp);
       
   395 		i++;
       
   396 		}
       
   397 	if (puny_end < Length())
       
   398 		{
       
   399 		// Input terminated with '.', copy it to ouput.
       
   400 		if (aBuf.Length() == aBuf.MaxLength())
       
   401 			return KErrDndNameTooBig;
       
   402 		aBuf.Append('.');
       
   403 		puny_end++;
       
   404 		}
       
   405 	return puny_end;
       
   406 	}