charconvfw/Charconv/ongoing/Source/utf/UTF.CPP
changeset 0 1fb32624e06b
child 16 56cd22a7a1cb
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 1997-2004 Nokia Corporation and/or its subsidiary(-ies). 
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description:      
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 
       
    20 
       
    21 
       
    22 
       
    23 
       
    24 
       
    25 #include <e32std.h>
       
    26 #include <e32base.h>
       
    27 #include <utf.h>
       
    28 
       
    29 const TUint KNotInBase64Alphabet=KMaxTUint;
       
    30 
       
    31 enum TPanic
       
    32 	{
       
    33 	EPanicBad6BitNumber=1,
       
    34 	EPanicBadUtf7Pointers1,
       
    35 	EPanicBadUtf7Pointers2,
       
    36 	EPanicBadUtf7Pointers3,
       
    37 	EPanicBadUtf7Pointers4,
       
    38 	EPanicBadUtf7Pointers5,
       
    39 	EPanicBadUtf7Pointers6,
       
    40 	EPanicBadUtf7Pointers7,
       
    41 	EPanicBadUtf7Pointers8,
       
    42 	EPanicBadUtf7Pointers9,
       
    43 	EPanicBadUtf7Pointers10,
       
    44 	EPanicBadUtf7Pointers11,
       
    45 	EPanicNotInBase64Block,
       
    46 	EPanicBadUnicodePointers1,
       
    47 	EPanicBadUnicodePointers2,
       
    48 	EPanicBadUnicodePointers3,
       
    49 	EPanicBadUnicodePointers4,
       
    50 	EPanicBadUnicodePointers5,
       
    51 	EPanicBadUnicodePointers6,
       
    52 	EPanicBadUnicodePointers7,
       
    53 	EPanicBadUnicodePointers8,
       
    54 	EPanicBadUnicodePointers9,
       
    55 	EPanicBadUnicodePointers10,
       
    56 	EPanicBadBitBufferState1,
       
    57 	EPanicBadBitBufferState2,
       
    58 	EPanicBadBitBufferState3,
       
    59 	EPanicBadBitBufferState4,
       
    60 	EPanicBadBitBufferState5,
       
    61 	EPanicBadBitBufferState6,
       
    62 	EPanicBadBitBufferState7,
       
    63 	EPanicBadBitBufferState8,
       
    64 	EPanicBadBitBufferState9,
       
    65 	EPanicBadBitBufferState10,
       
    66 	EPanicBadBitBufferState11,
       
    67 	EPanicBadBitBufferState12,
       
    68 	EPanicBadBitBufferState13,
       
    69 	EPanicBadBitBufferState14,
       
    70 	EPanicBadBitBufferState15,
       
    71 	EPanicBadBitBufferState16,
       
    72 	EPanicBadBitBufferState17,
       
    73 	EPanicUnexpectedNumberOfLoopIterations,
       
    74 	EPanicInitialEscapeCharacterButNoBase64,
       
    75 	EPanicBase64SequenceDoesNotFallOnUnicodeCharacterBoundary,
       
    76 	EPanicBadUtf8Pointers1,
       
    77 	EPanicBadUtf8Pointers2,
       
    78 	EPanicBadUtf8Pointers3,
       
    79 	EPanicBadUtf8Pointers4,
       
    80 	EPanicBadUtf8Pointers5,
       
    81 	EPanicBadUtf8Pointers6,
       
    82 	EPanicBadUtf8Pointers7,
       
    83 	EPanicOutOfSyncUtf7Byte1,
       
    84 	EPanicOutOfSyncUtf7Byte2,
       
    85 	EPanicOutOfSyncBase64Decoding
       
    86 	};
       
    87 
       
    88 _LIT(KLitPanicText, "CHARCONV-UTF");
       
    89 
       
    90 LOCAL_C void Panic(TPanic aPanic)
       
    91 	{
       
    92 	User::Panic(KLitPanicText, aPanic);
       
    93 	}
       
    94 
       
    95 inline TUint EscapeCharacterForStartingBase64Block(TBool aIsImapUtf7) {return aIsImapUtf7? '&': '+';}
       
    96 
       
    97 LOCAL_C TUint Base64Decoding(TUint aMemberOfBase64Alphabet, TBool aIsImapUtf7)
       
    98 	{
       
    99 	if ((aMemberOfBase64Alphabet>='A') && (aMemberOfBase64Alphabet<='Z'))
       
   100 		{
       
   101 		return aMemberOfBase64Alphabet-'A';
       
   102 		}
       
   103 	if ((aMemberOfBase64Alphabet>='a') && (aMemberOfBase64Alphabet<='z'))
       
   104 		{
       
   105 		return aMemberOfBase64Alphabet-('a'-26);
       
   106 		}
       
   107 	if ((aMemberOfBase64Alphabet>='0') && (aMemberOfBase64Alphabet<='9'))
       
   108 		{
       
   109 		return aMemberOfBase64Alphabet+((26*2)-'0');
       
   110 		}
       
   111 	if (aMemberOfBase64Alphabet=='+')
       
   112 		{
       
   113 		return 62;
       
   114 		}
       
   115 	if (aMemberOfBase64Alphabet==STATIC_CAST(TUint, aIsImapUtf7? ',': '/'))
       
   116 		{
       
   117 		return 63;
       
   118 		}
       
   119 	return KNotInBase64Alphabet;
       
   120 	}
       
   121 
       
   122 LOCAL_C TUint Base64Encoding(TUint a6BitNumber, TBool aIsImapUtf7)
       
   123 	{
       
   124 	__ASSERT_DEBUG(a6BitNumber<64, Panic(EPanicBad6BitNumber));
       
   125 	if ((a6BitNumber==63) && aIsImapUtf7)
       
   126 		{
       
   127 		return ',';
       
   128 		}
       
   129 	static const TUint8 base64Alphabet[64]={'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
       
   130 	return base64Alphabet[a6BitNumber];
       
   131 	}
       
   132 
       
   133 LOCAL_C TUint8* PointerToEscapeCharacterStartingBase64Block(TUint8* aPointerToUtf7Byte, const TUint8* aPointerToFirstUtf7Byte, TBool aIsImapUtf7)
       
   134 	{
       
   135 	__ASSERT_DEBUG(aPointerToUtf7Byte>=aPointerToFirstUtf7Byte, Panic(EPanicBadUtf7Pointers1));
       
   136 	TUint8* pointerToCandidateEscapeCharacter=NULL;
       
   137 	FOREVER
       
   138 		{
       
   139 		const TUint utf7Byte=*aPointerToUtf7Byte;
       
   140 		if (utf7Byte==EscapeCharacterForStartingBase64Block(aIsImapUtf7))
       
   141 			{
       
   142 			pointerToCandidateEscapeCharacter=aPointerToUtf7Byte;
       
   143 			}
       
   144 		else if (Base64Decoding(utf7Byte, aIsImapUtf7)==KNotInBase64Alphabet)
       
   145 			{
       
   146 			break;
       
   147 			}
       
   148 		__ASSERT_DEBUG(aPointerToUtf7Byte>=aPointerToFirstUtf7Byte, Panic(EPanicBadUtf7Pointers2));
       
   149 		if (aPointerToUtf7Byte<=aPointerToFirstUtf7Byte)
       
   150 			{
       
   151 			break;
       
   152 			}
       
   153 		--aPointerToUtf7Byte;
       
   154 		}
       
   155 	__ASSERT_DEBUG(pointerToCandidateEscapeCharacter!=NULL, Panic(EPanicNotInBase64Block));
       
   156 	return pointerToCandidateEscapeCharacter;
       
   157 	}
       
   158 
       
   159 LOCAL_C TBool EncodeInUtf7Directly(TUint aUnicodeCharacter, TBool aIsImapUtf7, TBool aEncodeOptionalDirectCharactersInBase64)
       
   160 	{
       
   161 	if (aIsImapUtf7)
       
   162 		{
       
   163 		return (aUnicodeCharacter>=0x0020) && (aUnicodeCharacter<=0x007e);
       
   164 		}
       
   165 	if ((aUnicodeCharacter>=0x0021) && (aUnicodeCharacter<=0x007d))
       
   166 		{
       
   167 		if (aEncodeOptionalDirectCharactersInBase64)
       
   168 			{
       
   169 			return (((aUnicodeCharacter>=0x0041) && (aUnicodeCharacter<=0x005a)) ||
       
   170 					((aUnicodeCharacter>=0x0061) && (aUnicodeCharacter<=0x007a)) ||
       
   171 					((aUnicodeCharacter>=0x0027) && (aUnicodeCharacter<=0x0029)) ||
       
   172 					((aUnicodeCharacter>=0x002b) && (aUnicodeCharacter<=0x003a)) ||
       
   173 					(aUnicodeCharacter==0x003f));
       
   174 			}
       
   175 		return aUnicodeCharacter!=0x005c;
       
   176 		}
       
   177 	return (aUnicodeCharacter==0x0020) || (aUnicodeCharacter==0x0009) || (aUnicodeCharacter==0x000d) || (aUnicodeCharacter==0x000a);
       
   178 	}
       
   179 
       
   180 inline TBool BitBufferContainsNonZeroBits(TUint aBitBuffer, TInt aNumberOfBitsInBuffer)
       
   181 	{
       
   182 	return (aBitBuffer&((1<<aNumberOfBitsInBuffer)-1))!=0;
       
   183 	}
       
   184 
       
   185 
       
   186 
       
   187 /**  Converts Unicode text into UTF-7 encoding. The fucntion leaves with 
       
   188 KErrCorrupt if the input string is corrupt.
       
   189 
       
   190 @param aUnicode A UCS-2 encoded input string.
       
   191 @param aEncodeOptionalDirectCharactersInBase64  If ETrue then 
       
   192 characters from UTF-7 set O (optional direct characters) are encoded in 
       
   193 Modified Base64. If EFalse the characters are encoded directly, 
       
   194 as their ASCII equivalents.
       
   195 @return A descriptor containing the UTF-7 encoded output string. */
       
   196 EXPORT_C HBufC8* CnvUtfConverter::ConvertFromUnicodeToUtf7L(
       
   197 										const TDesC16& aUnicode, 
       
   198 										TBool aEncodeOptionalDirectCharactersInBase64)
       
   199 	{
       
   200 	// If aUnicode is  Null string, return an empty HBufC
       
   201 	if (aUnicode.Length() == 0)
       
   202 		{
       
   203 		HBufC8* hBuf8 = HBufC8::NewL(1);
       
   204 		return hBuf8;
       
   205 		}
       
   206 
       
   207 	// Otherwise, convert and store result in a buffer, reallocating that buffer if needed.
       
   208 	TInt length = aUnicode.Length();
       
   209 	const TInt bufsize = 100;
       
   210 	
       
   211 	TPtrC16 unicode (aUnicode);
       
   212 	TBuf8<bufsize> buf;
       
   213 	HBufC8* hBuf8 = HBufC8::NewLC(length);
       
   214 	TPtr8 utf7 = hBuf8->Des();
       
   215 
       
   216 	FOREVER
       
   217 		{
       
   218 		TInt unconverted = ConvertFromUnicodeToUtf7(buf, unicode, aEncodeOptionalDirectCharactersInBase64);
       
   219 		if( unconverted == EErrorIllFormedInput || unconverted < 0)
       
   220 			User::Leave(KErrCorrupt);
       
   221 
       
   222 		if (utf7.Length() + buf.Length() > utf7.MaxLength())
       
   223 			{
       
   224 			// Reallocate the hBuf8
       
   225 			hBuf8 = hBuf8->ReAllocL(utf7.Length() + buf.Length());
       
   226 			CleanupStack::Pop();
       
   227 			CleanupStack::PushL(hBuf8);
       
   228 			utf7.Set(hBuf8->Des());
       
   229 			}
       
   230 		utf7.Append(buf);
       
   231 		if (unconverted ==0) 
       
   232 			break;
       
   233 		unicode.Set(unicode.Right(unconverted));
       
   234 		}
       
   235 	CleanupStack::Pop();
       
   236 	return hBuf8;
       
   237 
       
   238 	}
       
   239 
       
   240 /** Converts Unicode text into UTF-7 encoding.
       
   241 
       
   242 @param aUtf7 On return, contains the UTF-7 encoded output string.
       
   243 @param aUnicode A UCS-2 encoded input string.
       
   244 @param aEncodeOptionalDirectCharactersInBase64 If ETrue then characters from 
       
   245 UTF-7 set O (optional direct characters) are encoded in Modified Base64. If 
       
   246 EFalse the characters are encoded directly, as their ASCII equivalents.
       
   247 @return The number of unconverted characters left at the end of the input 
       
   248 descriptor, or one of the error values defined in TError. */
       
   249 EXPORT_C TInt CnvUtfConverter::ConvertFromUnicodeToUtf7(
       
   250 										TDes8& aUtf7, 
       
   251 										const TDesC16& aUnicode, 
       
   252 										TBool aEncodeOptionalDirectCharactersInBase64)
       
   253 	{
       
   254 	return ConvertFromUnicodeToUtf7(aUtf7, aUnicode, EFalse, aEncodeOptionalDirectCharactersInBase64);
       
   255 	}
       
   256 
       
   257 TInt CnvUtfConverter::ConvertFromUnicodeToUtf7(TDes8& aUtf7, 
       
   258 											   const TDesC16& aUnicode, 
       
   259 											   TBool aIsImapUtf7, 
       
   260 											   TBool aEncodeOptionalDirectCharactersInBase64)
       
   261 	{
       
   262 	if (aUnicode.Length()==0)
       
   263 		{
       
   264 		aUtf7.SetLength(0);
       
   265 		return 0;
       
   266 		}
       
   267 	if (aUtf7.MaxLength()==0)
       
   268 		{
       
   269 		return aUnicode.Length();
       
   270 		}
       
   271 	const TUint escapeCharacterForStartingBase64Block=EscapeCharacterForStartingBase64Block(aIsImapUtf7);
       
   272 	TUint8* pointerToPreviousUtf7Byte=CONST_CAST(TUint8*, aUtf7.Ptr()-1);
       
   273 	const TUint8* const pointerToLastUtf7Byte=pointerToPreviousUtf7Byte+aUtf7.MaxLength();
       
   274 	const TUint16* pointerToPreviousUnicodeCharacter=aUnicode.Ptr()-1;
       
   275 	const TUint16* const pointerToLastUnicodeCharacter=pointerToPreviousUnicodeCharacter+aUnicode.Length();
       
   276 	const TUint KIsInBase64Block=0x80000000u;
       
   277 	TUint bitBuffer=0;
       
   278 	TInt numberOfBitsInBuffer=0;
       
   279 	FOREVER
       
   280 		{
       
   281 		__ASSERT_DEBUG(pointerToPreviousUtf7Byte<=pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers3));
       
   282 		__ASSERT_DEBUG(pointerToPreviousUnicodeCharacter<=pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers1));
       
   283 		TUint currentUnicodeCharacter=(pointerToPreviousUnicodeCharacter==pointerToLastUnicodeCharacter)? 0: *(pointerToPreviousUnicodeCharacter+1);
       
   284 		if ((pointerToPreviousUnicodeCharacter==pointerToLastUnicodeCharacter) || EncodeInUtf7Directly(currentUnicodeCharacter, aIsImapUtf7, aEncodeOptionalDirectCharactersInBase64))
       
   285 			{
       
   286 			__ASSERT_DEBUG((bitBuffer&KIsInBase64Block) || (numberOfBitsInBuffer==0), Panic(EPanicBadBitBufferState1));
       
   287 			__ASSERT_DEBUG((numberOfBitsInBuffer==0) || (numberOfBitsInBuffer==2) || (numberOfBitsInBuffer==4), Panic(EPanicBadBitBufferState2));
       
   288 			if (bitBuffer&KIsInBase64Block)
       
   289 				{
       
   290 				if (numberOfBitsInBuffer!=0)
       
   291 					{
       
   292 					if (pointerToLastUtf7Byte-pointerToPreviousUtf7Byte<2) // make sure there is enough space for the trailing '-' as well as the remains of the bitBuffer as the KIsInBase64Block flag is about to turned off, thus the trailing '-' may never get written
       
   293 						{
       
   294 						break;
       
   295 						}
       
   296 					++pointerToPreviousUtf7Byte;
       
   297 					*pointerToPreviousUtf7Byte=STATIC_CAST(TUint8, Base64Encoding((bitBuffer<<(6-numberOfBitsInBuffer))&0x3f, aIsImapUtf7));
       
   298 					}
       
   299 				else
       
   300 					{
       
   301 					if (pointerToPreviousUtf7Byte==pointerToLastUtf7Byte)
       
   302 						{
       
   303 						break;
       
   304 						}
       
   305 					}
       
   306 				++pointerToPreviousUtf7Byte;
       
   307 				*pointerToPreviousUtf7Byte='-';
       
   308 				bitBuffer=0;
       
   309 				numberOfBitsInBuffer=0;
       
   310 				}
       
   311 			__ASSERT_DEBUG(pointerToPreviousUnicodeCharacter<=pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers2));
       
   312 			if (pointerToPreviousUnicodeCharacter>=pointerToLastUnicodeCharacter)
       
   313 				{
       
   314 				break;
       
   315 				}
       
   316 			__ASSERT_DEBUG(pointerToPreviousUtf7Byte<=pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers4));
       
   317 			if (pointerToLastUtf7Byte-pointerToPreviousUtf7Byte<((currentUnicodeCharacter==escapeCharacterForStartingBase64Block)? 2: 1))
       
   318 				{
       
   319 				break;
       
   320 				}
       
   321 			++pointerToPreviousUtf7Byte;
       
   322 			*pointerToPreviousUtf7Byte=STATIC_CAST(TUint8, currentUnicodeCharacter);
       
   323 			++pointerToPreviousUnicodeCharacter;
       
   324 			if (currentUnicodeCharacter==escapeCharacterForStartingBase64Block)
       
   325 				{
       
   326 				++pointerToPreviousUtf7Byte;
       
   327 				*pointerToPreviousUtf7Byte='-';
       
   328 				}
       
   329 			}
       
   330 		else
       
   331 			{
       
   332 			{
       
   333 			TInt numberOfUtf7BytesRequired=(numberOfBitsInBuffer+16)/6; // "(numberOfBitsInBuffer+16)/6" is the number of iterations that will happen in the while loop below
       
   334 			if (~bitBuffer&KIsInBase64Block)
       
   335 				{
       
   336 				++numberOfUtf7BytesRequired; // for the initial escapeCharacterForStartingBase64Block
       
   337 				}
       
   338 			if (pointerToLastUtf7Byte-pointerToPreviousUtf7Byte<numberOfUtf7BytesRequired)
       
   339 				{
       
   340 				break;
       
   341 				}
       
   342 			}
       
   343 			if (~bitBuffer&KIsInBase64Block)
       
   344 				{
       
   345 				__ASSERT_DEBUG(pointerToPreviousUtf7Byte<pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers5));
       
   346 				++pointerToPreviousUtf7Byte;
       
   347 				*pointerToPreviousUtf7Byte=STATIC_CAST(TUint8, escapeCharacterForStartingBase64Block);
       
   348 				}
       
   349 			bitBuffer<<=16;
       
   350 			bitBuffer|=currentUnicodeCharacter;
       
   351 			numberOfBitsInBuffer+=16;
       
   352 			++pointerToPreviousUnicodeCharacter;
       
   353 			__ASSERT_DEBUG(numberOfBitsInBuffer<=20, Panic(EPanicBadBitBufferState3));
       
   354 			while (numberOfBitsInBuffer>=6)
       
   355 				{
       
   356 				numberOfBitsInBuffer-=6;
       
   357 				__ASSERT_DEBUG(pointerToPreviousUtf7Byte<pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers6));
       
   358 				++pointerToPreviousUtf7Byte;
       
   359 				*pointerToPreviousUtf7Byte=STATIC_CAST(TUint8, Base64Encoding((bitBuffer>>numberOfBitsInBuffer)&0x3f, aIsImapUtf7));
       
   360 				}
       
   361 			bitBuffer&=((1<<numberOfBitsInBuffer)-1); // zero all the consumed bits - not strictly necessary but it leaves the buffer in a cleaner state
       
   362 			bitBuffer|=KIsInBase64Block;
       
   363 			}
       
   364 		}
       
   365 	__ASSERT_DEBUG((bitBuffer&KIsInBase64Block) || (numberOfBitsInBuffer==0), Panic(EPanicBadBitBufferState4));
       
   366 	__ASSERT_DEBUG((numberOfBitsInBuffer==0) || (numberOfBitsInBuffer==2) || (numberOfBitsInBuffer==4), Panic(EPanicBadBitBufferState5));
       
   367 	if (bitBuffer&KIsInBase64Block)
       
   368 		{
       
   369 #if defined(_DEBUG)
       
   370 		TInt numberOfLoopIterations=1;
       
   371 #endif
       
   372 		FOREVER // there should never be more than 2 iterations of this loop - the first "if" should always succeed the second time if it doesn't succeed the first time
       
   373 			{
       
   374 			__ASSERT_DEBUG(pointerToPreviousUtf7Byte<=pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers7));
       
   375 			__ASSERT_DEBUG((numberOfBitsInBuffer==0) || (numberOfBitsInBuffer==2) || (numberOfBitsInBuffer==4), Panic(EPanicBadBitBufferState6));
       
   376 			__ASSERT_DEBUG(numberOfLoopIterations<=2, Panic(EPanicUnexpectedNumberOfLoopIterations));
       
   377 #if defined(_DEBUG)
       
   378 			++numberOfLoopIterations;
       
   379 #endif
       
   380 			if (pointerToLastUtf7Byte-pointerToPreviousUtf7Byte>=((numberOfBitsInBuffer==0)? 1: 2)) // if there's room to finish off the base-64 sequence by (i) flushing the bit-buffer and (ii) appending the trailing '-'
       
   381 				{
       
   382 				if (numberOfBitsInBuffer!=0)
       
   383 					{
       
   384 					__ASSERT_DEBUG(pointerToPreviousUtf7Byte<pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers8));
       
   385 					++pointerToPreviousUtf7Byte;
       
   386 					*pointerToPreviousUtf7Byte=STATIC_CAST(TUint8, Base64Encoding((bitBuffer<<(6-numberOfBitsInBuffer))&0x3f, aIsImapUtf7));
       
   387 					}
       
   388 				__ASSERT_DEBUG(pointerToPreviousUtf7Byte<pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers9));
       
   389 				++pointerToPreviousUtf7Byte;
       
   390 				*pointerToPreviousUtf7Byte='-';
       
   391 				break;
       
   392 				}
       
   393 			// it is now necessary to move back pointerToPreviousUtf7Byte so that the base-64 sequence can be terminated - note it must be terminated on a Unicode character boundary hence the reason why pointerToPreviousUnicodeCharacter may be moved back too
       
   394 			TUint8* pointerToEscapeCharacterStartingBase64Block=PointerToEscapeCharacterStartingBase64Block(pointerToPreviousUtf7Byte, aUtf7.Ptr(), aIsImapUtf7);
       
   395 			const TInt oldNumberOfBase64Characters=pointerToPreviousUtf7Byte-pointerToEscapeCharacterStartingBase64Block;
       
   396 			__ASSERT_DEBUG(oldNumberOfBase64Characters>0, Panic(EPanicInitialEscapeCharacterButNoBase64));
       
   397 			__ASSERT_DEBUG(((oldNumberOfBase64Characters*6)+numberOfBitsInBuffer)%16==0, Panic(EPanicBase64SequenceDoesNotFallOnUnicodeCharacterBoundary));
       
   398 			pointerToPreviousUnicodeCharacter-=((oldNumberOfBase64Characters*6)+numberOfBitsInBuffer)/16; // move back pointerToPreviousUnicodeCharacter to before the equivalent of the base-64 sequence
       
   399 			pointerToPreviousUtf7Byte=pointerToEscapeCharacterStartingBase64Block;
       
   400 			__ASSERT_DEBUG(*pointerToPreviousUtf7Byte==escapeCharacterForStartingBase64Block, Panic(EPanicBadUtf7Pointers10));
       
   401 			if (oldNumberOfBase64Characters<4) // if the new base-64 sequence will be so short that it won't even be able to contain the UTF-7 encoding of a single Unicode character
       
   402 				{
       
   403 				--pointerToPreviousUtf7Byte; // move back pointerToPreviousUtf7Byte to before the escapeCharacterForStartingBase64Block
       
   404 				break;
       
   405 				}
       
   406 			const TInt newNumberOfUnicodeCharacters=((oldNumberOfBase64Characters-1)*3)/8;
       
   407 			pointerToPreviousUnicodeCharacter+=newNumberOfUnicodeCharacters;
       
   408 			pointerToPreviousUtf7Byte+=((newNumberOfUnicodeCharacters*8)+2)/3;
       
   409 			const TInt numberOfBitsToBeZeroedInLastBase64Character=(newNumberOfUnicodeCharacters%3)*2;
       
   410 			if (numberOfBitsToBeZeroedInLastBase64Character!=0)
       
   411 				{
       
   412 				*pointerToPreviousUtf7Byte=STATIC_CAST(TUint8, Base64Encoding(Base64Decoding(*pointerToPreviousUtf7Byte, aIsImapUtf7)&0x3f&~((1<<numberOfBitsToBeZeroedInLastBase64Character)-1), aIsImapUtf7));
       
   413 				}
       
   414 			bitBuffer=KIsInBase64Block;
       
   415 			numberOfBitsInBuffer=0;
       
   416 			}
       
   417 		}
       
   418 	aUtf7.SetLength((pointerToPreviousUtf7Byte-aUtf7.Ptr())+1);
       
   419 	return pointerToLastUnicodeCharacter-pointerToPreviousUnicodeCharacter;
       
   420 	}
       
   421 
       
   422  
       
   423 
       
   424 /** Converts Unicode text into UTF-8 encoding.
       
   425 
       
   426 @param aUtf8 On return, contains the UTF-8 encoded output string.
       
   427 @param aUnicode The Unicode-encoded input string.
       
   428 @return The number of unconverted characters left at the end of the input 
       
   429 descriptor, or one of the error values defined in TError. */
       
   430 EXPORT_C TInt CnvUtfConverter::ConvertFromUnicodeToUtf8(TDes8& aUtf8, const TDesC16& aUnicode)
       
   431 	{
       
   432 	return ConvertFromUnicodeToUtf8(aUtf8, aUnicode, EFalse);
       
   433 	}
       
   434 
       
   435 
       
   436 /**  Converts Unicode text into UTF-8 encoding.
       
   437 
       
   438 The variant of UTF-8 used internally by Java differs slightly from
       
   439 standard UTF-8. The TBool argument controls the UTF-8
       
   440 variant generated by this function. This function leaves with a 
       
   441 KErrCorrupt if the input string is corrupt. 
       
   442 
       
   443 @param aUnicode A UCS-2 encoded input string.
       
   444 @return A pointer to an HBufC8 containing the converted UTF8. */	
       
   445 EXPORT_C HBufC8* CnvUtfConverter::ConvertFromUnicodeToUtf8L(const TDesC16& aUnicode)
       
   446  	{
       
   447 	// If aUnicode is  Null string, return an empty HBufC
       
   448 	if (aUnicode.Length() == 0)
       
   449 		{
       
   450 		HBufC8* hBuf8 = HBufC8::NewL(1);
       
   451 		return hBuf8;
       
   452 		}
       
   453 
       
   454 	// Otherwise, convert and store result in a buffer, reallocating that buffer if needed.
       
   455 	const TInt length = aUnicode.Length();
       
   456 	const TInt bufsize = 100;
       
   457 	
       
   458 	TPtrC16 unicode (aUnicode);
       
   459 	TBuf8<bufsize> buf;
       
   460 	HBufC8* hBuf8 = HBufC8::NewLC(length);
       
   461 	TPtr8 utf8 = hBuf8->Des();
       
   462 
       
   463 	FOREVER
       
   464 		{
       
   465 		TInt unconverted = ConvertFromUnicodeToUtf8(buf, unicode);
       
   466 		if( unconverted == EErrorIllFormedInput || unconverted < 0)
       
   467 			User::Leave(KErrCorrupt);
       
   468 
       
   469 		if (utf8.Length() + buf.Length() > utf8.MaxLength())
       
   470 			{
       
   471 			// Reallocate the hBuf8
       
   472 			hBuf8 = hBuf8->ReAllocL(utf8.Length() + buf.Length());
       
   473 			CleanupStack::Pop();
       
   474 			CleanupStack::PushL(hBuf8);
       
   475 			utf8.Set(hBuf8->Des());
       
   476 			}
       
   477 		utf8.Append(buf);
       
   478 		if (unconverted ==0) 
       
   479 			break;
       
   480 		unicode.Set(unicode.Right(unconverted));
       
   481 		}
       
   482 	CleanupStack::Pop();
       
   483 	return hBuf8;
       
   484 	}
       
   485 
       
   486 /** Converts Unicode text into UTF-8 encoding. 
       
   487 
       
   488 The variant of UTF-8 used internally by Java differs slightly from standard 
       
   489 UTF-8. The TBool argument controls the UTF-8 variant generated by this function.
       
   490 
       
   491 @param aUtf8 On return, contains the UTF-8 encoded output string.
       
   492 @param aUnicode A UCS-2 encoded input string.
       
   493 @param aGenerateJavaConformantUtf8 EFalse for orthodox UTF-8. ETrue for Java 
       
   494 UTF-8. The default is EFalse.
       
   495 @return The number of unconverted characters left at the end of the input descriptor, 
       
   496 or one of the error values defined in TError. */
       
   497 TInt CnvUtfConverter::ConvertFromUnicodeToUtf8(TDes8& aUtf8, 
       
   498 											   const TDesC16& aUnicode, 
       
   499 											   TBool aGenerateJavaConformantUtf8)
       
   500 	{
       
   501 	if (aUnicode.Length()==0)
       
   502 		{
       
   503 		aUtf8.SetLength(0);
       
   504 		return 0;
       
   505 		}
       
   506 	if (aUtf8.MaxLength()==0)
       
   507 		{
       
   508 		return aUnicode.Length();
       
   509 		}
       
   510 	TUint8* pointerToCurrentUtf8Byte=CONST_CAST(TUint8*, aUtf8.Ptr());
       
   511 	const TUint8* pointerToLastUtf8Byte=pointerToCurrentUtf8Byte+(aUtf8.MaxLength()-1);
       
   512 	const TUint16* pointerToCurrentUnicodeCharacter=aUnicode.Ptr();
       
   513 	const TUint16* pointerToLastUnicodeCharacter=pointerToCurrentUnicodeCharacter+(aUnicode.Length()-1);
       
   514 	TBool inputIsTruncated=EFalse;
       
   515 	FOREVER
       
   516 		{
       
   517 		__ASSERT_DEBUG(pointerToCurrentUtf8Byte<=pointerToLastUtf8Byte, Panic(EPanicBadUtf8Pointers1));
       
   518 		__ASSERT_DEBUG(pointerToCurrentUnicodeCharacter<=pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers3));
       
   519 		TUint currentUnicodeCharacter=*pointerToCurrentUnicodeCharacter;
       
   520 		if (((currentUnicodeCharacter&0xff80)==0x0000) && ((currentUnicodeCharacter!=0x0000) || !aGenerateJavaConformantUtf8))
       
   521 			{
       
   522 			*pointerToCurrentUtf8Byte=STATIC_CAST(TUint8, currentUnicodeCharacter);
       
   523 			}
       
   524 		else if ((currentUnicodeCharacter&0xf800)==0x0000)
       
   525 			{
       
   526 			if (pointerToCurrentUtf8Byte==pointerToLastUtf8Byte)
       
   527 				{
       
   528 				--pointerToCurrentUtf8Byte;
       
   529 				--pointerToCurrentUnicodeCharacter;
       
   530 				break;
       
   531 				}
       
   532 			*pointerToCurrentUtf8Byte=STATIC_CAST(TUint8, 0xc0|(currentUnicodeCharacter>>6));
       
   533 			++pointerToCurrentUtf8Byte;
       
   534 			*pointerToCurrentUtf8Byte=STATIC_CAST(TUint8, 0x80|(currentUnicodeCharacter&0x3f));
       
   535 			}
       
   536 		else if (((currentUnicodeCharacter&0xfc00)==0xd800) && !aGenerateJavaConformantUtf8)
       
   537 			{
       
   538 			__ASSERT_DEBUG(pointerToCurrentUtf8Byte<=pointerToLastUtf8Byte, Panic(EPanicBadUtf8Pointers2));
       
   539 			if (pointerToLastUtf8Byte-pointerToCurrentUtf8Byte<3)
       
   540 				{
       
   541 				--pointerToCurrentUtf8Byte;
       
   542 				--pointerToCurrentUnicodeCharacter;
       
   543 				break;
       
   544 				}
       
   545 			__ASSERT_DEBUG(pointerToCurrentUnicodeCharacter<=pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers4));
       
   546 			if (pointerToCurrentUnicodeCharacter>=pointerToLastUnicodeCharacter)
       
   547 				{
       
   548 				--pointerToCurrentUtf8Byte;
       
   549 				--pointerToCurrentUnicodeCharacter;
       
   550 				inputIsTruncated=ETrue;
       
   551 				break;
       
   552 				}
       
   553 			currentUnicodeCharacter+=0x0040;
       
   554 			*pointerToCurrentUtf8Byte=STATIC_CAST(TUint8, 0xf0|((currentUnicodeCharacter>>8)&0x07));
       
   555 			++pointerToCurrentUtf8Byte;
       
   556 			*pointerToCurrentUtf8Byte=STATIC_CAST(TUint8, 0x80|((currentUnicodeCharacter>>2)&0x3f));
       
   557 			{
       
   558 			TUint currentUtf8Byte=(0x80|((currentUnicodeCharacter&0x03)<<4));
       
   559 			++pointerToCurrentUnicodeCharacter;
       
   560 			currentUnicodeCharacter=*pointerToCurrentUnicodeCharacter;
       
   561 			if ((currentUnicodeCharacter&0xfc00)!=0xdc00)
       
   562 				{
       
   563 				return EErrorIllFormedInput;
       
   564 				}
       
   565 			currentUtf8Byte|=((currentUnicodeCharacter>>6)&0x0f);
       
   566 			++pointerToCurrentUtf8Byte;
       
   567 			*pointerToCurrentUtf8Byte=STATIC_CAST(TUint8, currentUtf8Byte);
       
   568 			}
       
   569 			++pointerToCurrentUtf8Byte;
       
   570 			*pointerToCurrentUtf8Byte=STATIC_CAST(TUint8, 0x80|(currentUnicodeCharacter&0x3f));
       
   571 			}
       
   572 		else
       
   573 			{
       
   574 			if (pointerToLastUtf8Byte-pointerToCurrentUtf8Byte<2)
       
   575 				{
       
   576 				--pointerToCurrentUtf8Byte;
       
   577 				--pointerToCurrentUnicodeCharacter;
       
   578 				break;
       
   579 				}
       
   580 			*pointerToCurrentUtf8Byte=STATIC_CAST(TUint8, 0xe0|(currentUnicodeCharacter>>12));
       
   581 			++pointerToCurrentUtf8Byte;
       
   582 			*pointerToCurrentUtf8Byte=STATIC_CAST(TUint8, 0x80|((currentUnicodeCharacter>>6)&0x3f));
       
   583 			++pointerToCurrentUtf8Byte;
       
   584 			*pointerToCurrentUtf8Byte=STATIC_CAST(TUint8, 0x80|(currentUnicodeCharacter&0x3f));
       
   585 			}
       
   586 		if ((pointerToCurrentUnicodeCharacter==pointerToLastUnicodeCharacter) || (pointerToCurrentUtf8Byte==pointerToLastUtf8Byte))
       
   587 			{
       
   588 			break;
       
   589 			}
       
   590 		++pointerToCurrentUtf8Byte;
       
   591 		++pointerToCurrentUnicodeCharacter;
       
   592 		}
       
   593 	if ((pointerToCurrentUnicodeCharacter<aUnicode.Ptr()) && inputIsTruncated)
       
   594 		{
       
   595 		return EErrorIllFormedInput;
       
   596 		}
       
   597 	aUtf8.SetLength((pointerToCurrentUtf8Byte-aUtf8.Ptr())+1);
       
   598 	return pointerToLastUnicodeCharacter-pointerToCurrentUnicodeCharacter;
       
   599 	}
       
   600 
       
   601 
       
   602 
       
   603 /**  Converts text encoded using the Unicode transformation format UTF-7
       
   604 into the Unicode UCS-2 character set.
       
   605 
       
   606 @param aUtf7 The UTF-7 encoded input string.
       
   607 @return A pointer to an HBufC16 containing the converted Unicode string */	
       
   608 EXPORT_C HBufC16* CnvUtfConverter::ConvertToUnicodeFromUtf7L(const TDesC8& aUtf7)
       
   609 	{
       
   610 		// If aUtf8 is an empty string return 
       
   611 	if (aUtf7.Length()==0)
       
   612 		{
       
   613 		HBufC16* hBuf = HBufC16::NewL(1);
       
   614 		return hBuf;
       
   615 		}
       
   616 
       
   617 	// else convert aUtf8 to Unicode storing the result in a buffer, reallocating
       
   618 	// it when needed.
       
   619 	TInt length = aUtf7.Length();
       
   620 	const TInt bufsize = 100;
       
   621 	TInt state = KStateDefault;
       
   622 
       
   623 	TPtrC8 utf7 (aUtf7);
       
   624 	TBuf<bufsize> buf;
       
   625 	HBufC16* hBuf = HBufC16::NewLC(length);
       
   626 	TPtr unicode = hBuf->Des();
       
   627 
       
   628 	FOREVER
       
   629 		{
       
   630 		TInt unconverted = ConvertToUnicodeFromUtf7(buf, utf7, state);
       
   631 		if( unconverted == EErrorIllFormedInput || unconverted < 0)
       
   632 			User::Leave(KErrCorrupt);
       
   633 
       
   634 		if (unicode.Length() + buf.Length() > unicode.MaxLength())
       
   635 			{
       
   636 			// Reallocate hBuf
       
   637 			hBuf = hBuf->ReAllocL(unicode.Length() + buf.Length());
       
   638 			CleanupStack::Pop();
       
   639 			CleanupStack::PushL(hBuf);
       
   640 			unicode.Set(hBuf->Des());
       
   641 			}
       
   642 		unicode.Append(buf);
       
   643 		if (unconverted ==0) 
       
   644 			break;
       
   645 		utf7.Set(utf7.Right(unconverted));
       
   646 		}
       
   647 	CleanupStack::Pop();
       
   648 	return hBuf;
       
   649 	}
       
   650 
       
   651  
       
   652 
       
   653 /** Converts text encoded using the Unicode transformation format UTF-7 into the 
       
   654 Unicode UCS-2 character set.
       
   655 
       
   656 If the conversion is achieved using a series of calls to this function, where 
       
   657 each call starts off where the previous call reached in the input descriptor, 
       
   658 the state of the conversion is stored. The initial value of the state variable 
       
   659 should be set as KStateDefault when the conversion is started, and afterwards 
       
   660 simply passed unchanged into each function call.
       
   661 
       
   662 @param aUnicode On return, contains the Unicode encoded output string.
       
   663 @param aUtf7 The UTF-7 encoded input string.
       
   664 @param aState For the first call of the function set to KStateDefault. For 
       
   665 subsequent calls, pass in the variable unchanged.
       
   666 @return The number of unconverted bytes left at the end of the input descriptor, 
       
   667 or one of the error values defined in TError. */
       
   668 EXPORT_C TInt CnvUtfConverter::ConvertToUnicodeFromUtf7(TDes16& aUnicode, 
       
   669 														const TDesC8& aUtf7, 
       
   670 														TInt& aState)
       
   671 	{
       
   672 	return ConvertToUnicodeFromUtf7(aUnicode, aUtf7, EFalse, aState);
       
   673 	}
       
   674 
       
   675 TInt CnvUtfConverter::ConvertToUnicodeFromUtf7(TDes16& aUnicode, 
       
   676 											   const TDesC8& aUtf7, 
       
   677 											   TBool aIsImapUtf7, 
       
   678 											   TInt& aState)
       
   679 	{
       
   680 	if (aUtf7.Length()==0)
       
   681 		{
       
   682 		aUnicode.SetLength(0);
       
   683 		return 0;
       
   684 		}
       
   685 	if (aUnicode.MaxLength()==0)
       
   686 		{
       
   687 		return aUtf7.Length();
       
   688 		}
       
   689 	const TUint escapeCharacterForStartingBase64Block=EscapeCharacterForStartingBase64Block(aIsImapUtf7);
       
   690 	TUint16* pointerToPreviousUnicodeCharacter=CONST_CAST(TUint16*, aUnicode.Ptr()-1);
       
   691 	const TUint16* pointerToLastUnicodeCharacter=pointerToPreviousUnicodeCharacter+aUnicode.MaxLength();
       
   692 	const TUint8* pointerToCurrentUtf7Byte=aUtf7.Ptr();
       
   693 	const TUint8* pointerToLastUtf7Byte=pointerToCurrentUtf7Byte+(aUtf7.Length()-1);
       
   694 	TUint currentUtf7Byte=*pointerToCurrentUtf7Byte;
       
   695 	const TUint KIsInBase64Block=0x80000000u;
       
   696 	TUint bitBuffer=STATIC_CAST(TUint, aState);
       
   697 	TInt numberOfBitsInBuffer=((bitBuffer&0xf0)>>4);
       
   698 	bitBuffer&=~0xf0; // turn off the bits that stored numberOfBitsInBuffer
       
   699 	if (bitBuffer&KIsInBase64Block)
       
   700 		{
       
   701 		__ASSERT_ALWAYS((numberOfBitsInBuffer==0) || (numberOfBitsInBuffer==2) || (numberOfBitsInBuffer==4) || ((numberOfBitsInBuffer<16) && (numberOfBitsInBuffer%2==0) && !BitBufferContainsNonZeroBits(bitBuffer, numberOfBitsInBuffer)), Panic(EPanicBadBitBufferState7));
       
   702 		__ASSERT_ALWAYS((bitBuffer&~(KIsInBase64Block|0x0000000f))==0, Panic(EPanicBadBitBufferState8));
       
   703 		}
       
   704 	else
       
   705 		{
       
   706 		__ASSERT_ALWAYS(bitBuffer==0, Panic(EPanicBadBitBufferState9));
       
   707 		__ASSERT_ALWAYS(numberOfBitsInBuffer==0, Panic(EPanicBadBitBufferState10));
       
   708 		}
       
   709 	aState=KStateDefault;
       
   710 	if (bitBuffer&KIsInBase64Block)
       
   711 		{
       
   712 		currentUtf7Byte=Base64Decoding(currentUtf7Byte, aIsImapUtf7);
       
   713 		}
       
   714 	TBool inputIsTruncated=EFalse;
       
   715 	FOREVER
       
   716 		{
       
   717 		__ASSERT_DEBUG(pointerToPreviousUnicodeCharacter<pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers5));
       
   718 		__ASSERT_DEBUG(pointerToCurrentUtf7Byte<=pointerToLastUtf7Byte, Panic(EPanicBadUtf7Pointers11));
       
   719 		__ASSERT_DEBUG((bitBuffer&KIsInBase64Block) || (currentUtf7Byte==*pointerToCurrentUtf7Byte), Panic(EPanicOutOfSyncUtf7Byte1));
       
   720 		__ASSERT_DEBUG((~bitBuffer&KIsInBase64Block) || (currentUtf7Byte==Base64Decoding(*pointerToCurrentUtf7Byte, aIsImapUtf7)), Panic(EPanicOutOfSyncUtf7Byte2));
       
   721 		__ASSERT_DEBUG((bitBuffer&KIsInBase64Block) || ((bitBuffer==0) && (numberOfBitsInBuffer==0)), Panic(EPanicBadBitBufferState11));
       
   722 		if ((~bitBuffer&KIsInBase64Block) && (currentUtf7Byte==escapeCharacterForStartingBase64Block))
       
   723 			{
       
   724 			if (pointerToCurrentUtf7Byte==pointerToLastUtf7Byte)
       
   725 				{
       
   726 				--pointerToCurrentUtf7Byte;
       
   727 				inputIsTruncated=ETrue;
       
   728 				goto end;
       
   729 				}
       
   730 			++pointerToCurrentUtf7Byte;
       
   731 			currentUtf7Byte=*pointerToCurrentUtf7Byte;
       
   732 			if (currentUtf7Byte=='-')
       
   733 				{
       
   734 				currentUtf7Byte=escapeCharacterForStartingBase64Block;
       
   735 				}
       
   736 			else
       
   737 				{
       
   738 				currentUtf7Byte=Base64Decoding(currentUtf7Byte, aIsImapUtf7);
       
   739 				if (currentUtf7Byte==KNotInBase64Alphabet)
       
   740 					{
       
   741 					return EErrorIllFormedInput;
       
   742 					}
       
   743 				bitBuffer=KIsInBase64Block;
       
   744 				}
       
   745 			}
       
   746 		if (bitBuffer&KIsInBase64Block)
       
   747 			{
       
   748 			FOREVER
       
   749 				{
       
   750 				__ASSERT_DEBUG(currentUtf7Byte==Base64Decoding(*pointerToCurrentUtf7Byte, aIsImapUtf7), Panic(EPanicOutOfSyncBase64Decoding));
       
   751 				__ASSERT_DEBUG((numberOfBitsInBuffer<16) || (BitBufferContainsNonZeroBits(bitBuffer, numberOfBitsInBuffer-16) && (numberOfBitsInBuffer<16+6)), Panic(EPanicBadBitBufferState12));
       
   752 				if (currentUtf7Byte==KNotInBase64Alphabet)
       
   753 					{
       
   754 					if (BitBufferContainsNonZeroBits(bitBuffer, numberOfBitsInBuffer))
       
   755 						{
       
   756 						return EErrorIllFormedInput;
       
   757 						}
       
   758 					bitBuffer=0;
       
   759 					numberOfBitsInBuffer=0;
       
   760 					currentUtf7Byte=*pointerToCurrentUtf7Byte;
       
   761 					if (currentUtf7Byte=='-')
       
   762 						{
       
   763 						if (pointerToCurrentUtf7Byte==pointerToLastUtf7Byte)
       
   764 							{
       
   765 							goto end;
       
   766 							}
       
   767 						++pointerToCurrentUtf7Byte;
       
   768 						currentUtf7Byte=*pointerToCurrentUtf7Byte;
       
   769 						}
       
   770 					break;
       
   771 					}
       
   772 				bitBuffer<<=6;
       
   773 				bitBuffer|=currentUtf7Byte;
       
   774 				bitBuffer|=KIsInBase64Block;
       
   775 				numberOfBitsInBuffer+=6;
       
   776 				// only flush the buffer if it contains a whole Unicode character and the remainder is either all zero-bits (hence would be a legal point to end the base-64 sequence) or at least 6 bits long (therefore would leave at least one UTF-7 byte unconverted at the end of the input descriptor)
       
   777 				if ((numberOfBitsInBuffer>=16+6) || ((numberOfBitsInBuffer>=16) && !BitBufferContainsNonZeroBits(bitBuffer, numberOfBitsInBuffer-16)))
       
   778 					{
       
   779 					numberOfBitsInBuffer-=16;
       
   780 					__ASSERT_DEBUG(pointerToPreviousUnicodeCharacter<pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers6));
       
   781 					++pointerToPreviousUnicodeCharacter;
       
   782 					*pointerToPreviousUnicodeCharacter=STATIC_CAST(TUint16, bitBuffer>>numberOfBitsInBuffer);
       
   783 					bitBuffer&=((1<<numberOfBitsInBuffer)-1); // zero all the consumed bits - must be done as bitBuffer is stored along with numberOfBitsInBuffer in aState if the output descriptor runs out of space or if the input descriptor was truncated
       
   784 					bitBuffer|=KIsInBase64Block; // turn it back on as the line above turned it off
       
   785 					if (pointerToPreviousUnicodeCharacter==pointerToLastUnicodeCharacter)
       
   786 						{
       
   787 						goto end;
       
   788 						}
       
   789 					}
       
   790 				if (pointerToCurrentUtf7Byte==pointerToLastUtf7Byte)
       
   791 					{
       
   792 					inputIsTruncated=ETrue;
       
   793 					goto end;
       
   794 					}
       
   795 				++pointerToCurrentUtf7Byte;
       
   796 				currentUtf7Byte=Base64Decoding(*pointerToCurrentUtf7Byte, aIsImapUtf7);
       
   797 				}
       
   798 			}
       
   799 		else
       
   800 			{
       
   801 			__ASSERT_DEBUG(pointerToPreviousUnicodeCharacter<pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers7));
       
   802 			++pointerToPreviousUnicodeCharacter;
       
   803 			*pointerToPreviousUnicodeCharacter=STATIC_CAST(TUint16, currentUtf7Byte);
       
   804 			if ((pointerToPreviousUnicodeCharacter==pointerToLastUnicodeCharacter) || (pointerToCurrentUtf7Byte==pointerToLastUtf7Byte))
       
   805 				{
       
   806 				goto end;
       
   807 				}
       
   808 			++pointerToCurrentUtf7Byte;
       
   809 			currentUtf7Byte=*pointerToCurrentUtf7Byte;
       
   810 			}
       
   811 		}
       
   812 end:
       
   813 	if (bitBuffer&KIsInBase64Block)
       
   814 		{
       
   815 		__ASSERT_DEBUG((numberOfBitsInBuffer<16) || (BitBufferContainsNonZeroBits(bitBuffer, numberOfBitsInBuffer-16) && (numberOfBitsInBuffer<16+6)), Panic(EPanicBadBitBufferState13));
       
   816 		if (BitBufferContainsNonZeroBits(bitBuffer, numberOfBitsInBuffer))
       
   817 			{
       
   818 			// rewind how far we've got in the UTF-7 descriptor to indicate to the user (by returning a value greater than zero) that not all of the input could be converted as it ended with a truncated base-64 sequence
       
   819 			__ASSERT_DEBUG(numberOfBitsInBuffer>=6, Panic(EPanicBadBitBufferState14));
       
   820 			pointerToCurrentUtf7Byte-=numberOfBitsInBuffer/6;
       
   821 			const TInt newNumberOfBitsInBuffer=numberOfBitsInBuffer%6;
       
   822 			bitBuffer&=~KIsInBase64Block; // temporarily turn off the KIsInBase64Block for the right-shift
       
   823 			bitBuffer>>=(numberOfBitsInBuffer-newNumberOfBitsInBuffer);
       
   824 			bitBuffer|=KIsInBase64Block; // must be turned back on again as the bit-buffer is packed into aState
       
   825 			numberOfBitsInBuffer=newNumberOfBitsInBuffer;
       
   826 			__ASSERT_DEBUG((numberOfBitsInBuffer==0) || (numberOfBitsInBuffer==2) || (numberOfBitsInBuffer==4), Panic(EPanicBadBitBufferState15));
       
   827 			}
       
   828 		__ASSERT_DEBUG((numberOfBitsInBuffer<16) && (numberOfBitsInBuffer%2==0), Panic(EPanicBadBitBufferState16));
       
   829 		aState=STATIC_CAST(TInt, bitBuffer);
       
   830 		aState|=(numberOfBitsInBuffer<<4);
       
   831 		__ASSERT_DEBUG(aState&KIsInBase64Block, Panic(EPanicBadBitBufferState17));
       
   832 		bitBuffer=0;
       
   833 		numberOfBitsInBuffer=0;
       
   834 		}
       
   835 	if ((pointerToCurrentUtf7Byte<aUtf7.Ptr()) && inputIsTruncated)
       
   836 		{
       
   837 		return EErrorIllFormedInput;
       
   838 		}
       
   839 	aUnicode.SetLength((pointerToPreviousUnicodeCharacter+1)-aUnicode.Ptr());
       
   840 	return pointerToLastUtf7Byte-pointerToCurrentUtf7Byte;
       
   841 	}
       
   842 
       
   843 
       
   844 
       
   845 /** Converts text encoded using the Unicode transformation format UTF-8
       
   846 into the Unicode UCS-2 character set. This function leaves with an 
       
   847 error code of the input string is corrupted. 
       
   848 
       
   849 @param aUtf8 The UTF-8 encoded input string
       
   850 @return A pointer to an HBufC16 with the converted Unicode string. */	
       
   851 EXPORT_C HBufC16* CnvUtfConverter::ConvertToUnicodeFromUtf8L(const TDesC8& aUtf8)
       
   852  	{
       
   853 	// If aUtf8 is an empty string return 
       
   854 	if (aUtf8.Length()==0)
       
   855 		{
       
   856 		HBufC16* hBuf = HBufC16::NewL(1);
       
   857 		return hBuf;
       
   858 		}
       
   859 
       
   860 	// else convert aUtf8 to Unicode storing the result in a buffer, reallocating
       
   861 	// it when needed.
       
   862 	TInt length = aUtf8.Length();
       
   863 	const TInt bufsize = 100;
       
   864 
       
   865 	TPtrC8 utf8 (aUtf8);
       
   866 	TBuf<bufsize> buf;
       
   867 	HBufC16* hBuf = HBufC16::NewLC(length);
       
   868 	TPtr unicode = hBuf->Des();
       
   869 
       
   870 	FOREVER
       
   871 		{
       
   872 		TInt unconverted = ConvertToUnicodeFromUtf8(buf, utf8);
       
   873 		if( unconverted == EErrorIllFormedInput || unconverted < 0)
       
   874 			User::Leave(KErrCorrupt);
       
   875 
       
   876 		if (unicode.Length() + buf.Length() > unicode.MaxLength())
       
   877 			{
       
   878 			// Reallocate hBuf
       
   879 			hBuf = hBuf->ReAllocL(unicode.Length() + buf.Length());
       
   880 			CleanupStack::Pop();
       
   881 			CleanupStack::PushL(hBuf);
       
   882 			unicode.Set(hBuf->Des());
       
   883 			}
       
   884 		unicode.Append(buf);
       
   885 		if (unconverted ==0) 
       
   886 			break;
       
   887 		utf8.Set(utf8.Right(unconverted));
       
   888 		}
       
   889 	CleanupStack::Pop();
       
   890 	return hBuf;
       
   891 	}
       
   892 
       
   893 /** Converts text encoded using the Unicode transformation format UTF-8 into the 
       
   894 Unicode UCS-2 character set.
       
   895 
       
   896 @param aUnicode On return, contains the Unicode encoded output string.
       
   897 @param aUtf8 The UTF-8 encoded input string
       
   898 @return The number of unconverted bytes left at the end of the input descriptor, 
       
   899 or one of the error values defined in TError. */
       
   900 EXPORT_C TInt CnvUtfConverter::ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8)
       
   901 	{
       
   902 	return ConvertToUnicodeFromUtf8(aUnicode, aUtf8, EFalse);
       
   903 	}
       
   904 
       
   905 static void UpdateUnconvertibleInfo(TInt& aNumberOfUnconvertibleCharacters,
       
   906 		TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter, TUint8 aIndex)
       
   907 	{
       
   908 	if (aNumberOfUnconvertibleCharacters<=0)
       
   909 		{
       
   910 		aIndexOfFirstByteOfFirstUnconvertibleCharacter = aIndex;
       
   911 		}
       
   912 	++aNumberOfUnconvertibleCharacters;
       
   913 	}
       
   914 
       
   915 /** Converts text encoded using the Unicode transformation format UTF-8 into the 
       
   916 Unicode UCS-2 character set.
       
   917 
       
   918 @param aUnicode On return, contains the Unicode encoded output string.
       
   919 @param aUtf8 The UTF-8 encoded input string
       
   920 @param aGenerateJavaConformantUtf8 EFalse for orthodox UTF-8. ETrue for Java 
       
   921 @return The number of unconverted bytes left at the end of the input descriptor, 
       
   922 or one of the error values defined in TError. */
       
   923 TInt CnvUtfConverter::ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8, TBool aGenerateJavaConformantUtf8)
       
   924 	{
       
   925 	TInt dummyUnconverted, dummyUnconvertedIndex;
       
   926 	return ConvertToUnicodeFromUtf8(aUnicode, aUtf8, aGenerateJavaConformantUtf8, dummyUnconverted, dummyUnconvertedIndex);
       
   927 	}
       
   928 
       
   929 /** Converts text encoded using the Unicode transformation format UTF-8 into the 
       
   930 Unicode UCS-2 character set. Surrogate pairs can be created when a valid 4 byte UTF-8 is input.
       
   931 
       
   932 The variant of UTF-8 used internally by Java differs slightly from standard 
       
   933 UTF-8. The TBool argument controls the UTF-8 variant generated by this function.
       
   934 
       
   935 @param aUnicode On return, contains the Unicode encoded output string.
       
   936 @param aUtf8 The UTF-8 encoded input string
       
   937 @param aGenerateJavaConformantUtf8 EFalse for orthodox UTF-8. ETrue for Java 
       
   938 UTF-8. The default is EFalse.
       
   939 @param aNumberOfUnconvertibleCharacters On return, contains the number of bytes 
       
   940 which were not converted.
       
   941 @param aIndexOfFirstByteOfFirstUnconvertibleCharacter On return, the index 
       
   942 of the first byte of the first unconvertible character. For instance if the 
       
   943 first character in the input descriptor (aForeign) could not be converted, 
       
   944 then this parameter is set to the first byte of that character, i.e. zero. 
       
   945 A negative value is returned if all the characters were converted.
       
   946 @return The number of unconverted bytes left at the end of the input descriptor, 
       
   947 or one of the error values defined in TError. */
       
   948 
       
   949 /* of note: conformance.  Unicode standard 5.0 section 3.9, table 3-7
       
   950  * Well formed UTF-8 Byte Sequences, full table.
       
   951  * +----------------------------------------------------------------+
       
   952  * | Code Points        | 1st byte | 2nd byte | 3rd byte | 4th byte |
       
   953  * +--------------------+----------+----------+----------+----------+
       
   954  * | U+0000..U+007F     | 00..7D   |          |          |          |  1 byte, ascii
       
   955  * | U+0080..U+07FF     | C2..DF   | 80..BF   |          |          |  2 bytes, error if 1st < 0xC2 
       
   956  * | U+0800..U+0FFF     | E0       | A0..BF   | 80..BF   |          |  3 bytes, 1st == 0xE0, error if 2nd < 0xA0
       
   957  * | U+1000..U+CFFF     | E1..EC   | 80..BF   | 80..BF   |          |  normal
       
   958  * | U+D000..U+D7FF     | ED       | 80..9F   | 80..BF   |          |  3 bytes, 1st == 0xED, error if 2nd > 0x9F
       
   959  * | U+E000..U+FFFF     | EE..EF   | 80..BF   | 80..BF   |          |  normal
       
   960  * | U+10000..U+3FFFF   | F0       | 90..BF   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xf0, error if 2nd < 0x90
       
   961  * | U+40000..U+FFFFF   | F1..F3   | 80..BF   | 80..BF   | 80..BF   |  normal
       
   962  * | U+100000..U+10FFFF | F4       | 80..8F   | 80..BF   | 80..BF   |  4 bytes, 1st == 0xF4, error if 2nd > 0x8F
       
   963  * +--------------------+----------+----------+----------+----------+
       
   964  * 
       
   965  * As a consequence of the well-formedness conditions specified in table 3-7,
       
   966  * the following byte values are disallowed in UTF-8: C0-C1, F5-FF.
       
   967  */
       
   968 TInt CnvUtfConverter::ConvertToUnicodeFromUtf8(TDes16& aUnicode, const TDesC8& aUtf8, TBool aGenerateJavaConformantUtf8,
       
   969 		TInt& aNumberOfUnconvertibleCharacters, TInt& aIndexOfFirstByteOfFirstUnconvertibleCharacter)
       
   970 	{	
       
   971 	aUnicode.SetLength(0);
       
   972 	if (aUtf8.Length()==0)
       
   973 		{
       
   974 		return 0;
       
   975 		}
       
   976 	if (aUnicode.MaxLength()==0)
       
   977 		{
       
   978 		return aUtf8.Length();
       
   979 		}
       
   980 
       
   981 	TUint16* pointerToCurrentUnicodeCharacter=CONST_CAST(TUint16*, aUnicode.Ptr());
       
   982 	const TUint16* pointerToLastUnicodeCharacter=pointerToCurrentUnicodeCharacter+(aUnicode.MaxLength()-1);
       
   983 	const TUint8* pointerToCurrentUtf8Byte=aUtf8.Ptr();
       
   984 	const TUint8* pointerToPendingUtf8Byte=aUtf8.Ptr();
       
   985 	const TUint8* pointerToLastUtf8Byte=pointerToCurrentUtf8Byte+(aUtf8.Length()-1);
       
   986 	TUint16 replacementcharacter = 0xFFFD;
       
   987 	TUint8 currentUtf8Byte;
       
   988 	TUint currentUnicodeCharacter;
       
   989 	TInt sequenceLength;		
       
   990 	
       
   991 	FOREVER
       
   992 		{
       
   993 		__ASSERT_DEBUG(pointerToCurrentUnicodeCharacter<=pointerToLastUnicodeCharacter, Panic(EPanicBadUnicodePointers8));
       
   994 		__ASSERT_DEBUG(pointerToCurrentUtf8Byte<=pointerToLastUtf8Byte, Panic(EPanicBadUtf8Pointers3));
       
   995 		currentUtf8Byte=*pointerToCurrentUtf8Byte;
       
   996 		pointerToPendingUtf8Byte = pointerToCurrentUtf8Byte;
       
   997 		sequenceLength=100;
       
   998 		
       
   999 		for(TInt i=0;i<7;i++)
       
  1000 			{
       
  1001 			if ((currentUtf8Byte&(0xf8<<i))==(STATIC_CAST(TUint8,(0xF0<<i))))
       
  1002 				{
       
  1003 				sequenceLength = 4-i;
       
  1004 				break;
       
  1005 				}
       
  1006 			}
       
  1007 
       
  1008 		if ((sequenceLength<2 || sequenceLength>6) && sequenceLength!=0)
       
  1009 			{
       
  1010 			currentUnicodeCharacter=replacementcharacter;
       
  1011 				UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
       
  1012 						aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pointerToCurrentUtf8Byte-aUtf8.Ptr());
       
  1013 			}
       
  1014 		else
       
  1015 			{		
       
  1016 			if ((pointerToLastUtf8Byte-pointerToCurrentUtf8Byte+1)<sequenceLength)
       
  1017 				{
       
  1018 					if((pointerToCurrentUnicodeCharacter-aUnicode.Ptr())==0)
       
  1019 						return EErrorIllFormedInput;
       
  1020 					
       
  1021 					break;
       
  1022 				}			
       
  1023 				
       
  1024 			currentUnicodeCharacter = currentUtf8Byte&(0x7F>>sequenceLength);
       
  1025 			
       
  1026 			for(TInt i=sequenceLength;i>1; i--)
       
  1027 				{
       
  1028 				currentUtf8Byte = *(++pointerToCurrentUtf8Byte);
       
  1029 				if ((currentUtf8Byte&0xc0)==0x80)
       
  1030 					{
       
  1031 					currentUnicodeCharacter = (currentUnicodeCharacter<<6)|(currentUtf8Byte&0x3F);
       
  1032 					}
       
  1033 				else
       
  1034 					{
       
  1035 					currentUnicodeCharacter=replacementcharacter;
       
  1036 						UpdateUnconvertibleInfo(aNumberOfUnconvertibleCharacters,
       
  1037 								aIndexOfFirstByteOfFirstUnconvertibleCharacter,	pointerToCurrentUtf8Byte-aUtf8.Ptr());
       
  1038 					--pointerToCurrentUtf8Byte;
       
  1039 					}
       
  1040 				}
       
  1041 			}
       
  1042 			
       
  1043 		if (currentUnicodeCharacter > 0xFFFF)
       
  1044 			{
       
  1045 			if(pointerToCurrentUnicodeCharacter>=pointerToLastUnicodeCharacter)
       
  1046 				{
       
  1047 				pointerToCurrentUtf8Byte=pointerToPendingUtf8Byte;
       
  1048 				break;
       
  1049 				}
       
  1050 			
       
  1051 			TUint surrogate = (currentUnicodeCharacter>>10) + 0xD7C0;
       
  1052 			*pointerToCurrentUnicodeCharacter=STATIC_CAST(TUint16, surrogate);			
       
  1053 			++pointerToCurrentUnicodeCharacter;
       
  1054 					
       
  1055 			surrogate = (currentUnicodeCharacter&0x3FF)+0xDC00;
       
  1056 			*pointerToCurrentUnicodeCharacter=STATIC_CAST(TUint16, surrogate);			
       
  1057 			++pointerToCurrentUnicodeCharacter;
       
  1058 			++pointerToCurrentUtf8Byte;
       
  1059 			}
       
  1060 		else
       
  1061 			{
       
  1062 			*pointerToCurrentUnicodeCharacter=STATIC_CAST(TUint16, currentUnicodeCharacter);			
       
  1063 			++pointerToCurrentUnicodeCharacter;
       
  1064 			++pointerToCurrentUtf8Byte;
       
  1065 			}
       
  1066 	
       
  1067 		if ((pointerToCurrentUtf8Byte>pointerToLastUtf8Byte) || (pointerToCurrentUnicodeCharacter>pointerToLastUnicodeCharacter))
       
  1068 			{
       
  1069 			break;
       
  1070 			}
       
  1071 		}
       
  1072 
       
  1073 		aUnicode.SetLength(pointerToCurrentUnicodeCharacter-aUnicode.Ptr());
       
  1074 		return pointerToLastUtf8Byte-pointerToCurrentUtf8Byte+1;
       
  1075 	}
       
  1076 
       
  1077 
       
  1078 GLREF_C void IsCharacterSetUTF8 (TInt& aConfidenceLevel, const TDesC8& aSample)
       
  1079 	{
       
  1080 
       
  1081 	TInt sampleLength = aSample.Length();
       
  1082 	if (sampleLength == 0)
       
  1083 		{
       
  1084 		aConfidenceLevel = 89;
       
  1085 		return;
       
  1086 		}
       
  1087 	aConfidenceLevel=sampleLength;
       
  1088 	TInt bytesRemaining=0;
       
  1089 
       
  1090 	const TUint8* buffer=&aSample[0];
       
  1091 	for(TInt index=0; index!=sampleLength; ++index)
       
  1092 		{
       
  1093 		if(bytesRemaining>0)
       
  1094 			{
       
  1095 			// bytesRemaining > 0, means that a byte representing the start of a 
       
  1096 			// multibyte sequence was encountered and the bytesRemaining is the 
       
  1097 			// number of bytes to follow. The remaining bytes have to conform to 
       
  1098 			// values within the range 0x80 and 0xbf
       
  1099 			if((buffer[index]&0xc0)==0x80) // the value is within range
       
  1100 				{
       
  1101 				--bytesRemaining;
       
  1102 				continue;
       
  1103 				}
       
  1104 			else
       
  1105 				{
       
  1106 				bytesRemaining=0;
       
  1107 				aConfidenceLevel=0;
       
  1108 				break;
       
  1109 				}
       
  1110 			}
       
  1111 		if (bytesRemaining==0)
       
  1112 			{
       
  1113 			if((buffer[index]&0x80)==0x00)
       
  1114 				{
       
  1115 				// The value of aSample[index] is in the range 0x00-0x7f
       
  1116 				//UTF8 maintains ASCII transparency. So it's a valid
       
  1117 				//UTF8. Do nothing, check next value.
       
  1118 				}
       
  1119 			else if((buffer[index]&0xe0)==0xc0)
       
  1120 				{
       
  1121 				bytesRemaining=1;
       
  1122 				}
       
  1123 			else if((buffer[index]&0xf0)==0xe0)
       
  1124 				{
       
  1125 				bytesRemaining=2;
       
  1126 				}
       
  1127 			else if((buffer[index]&0xf8)==0xf0)
       
  1128 				{
       
  1129 				bytesRemaining=3;
       
  1130 				}
       
  1131 			else
       
  1132 				{
       
  1133 				// wasn't anything expected so must be an illegal/irregular UTF8 coded value
       
  1134 				aConfidenceLevel=0;
       
  1135 				break;
       
  1136 				}
       
  1137 			}
       
  1138 		} // for 
       
  1139 	aConfidenceLevel = (aConfidenceLevel > 0)?100:0;
       
  1140 	}
       
  1141 
       
  1142 GLREF_C void IsCharacterSetUTF7(TInt& aConfidenceLevel, const TDesC8& aSample)
       
  1143 	{
       
  1144 	TInt sampleLength = aSample.Length();
       
  1145 	aConfidenceLevel = 70;
       
  1146 	for (TInt i=0; i<sampleLength; ++i)
       
  1147 		{
       
  1148 		// UTF-7 value ranges only 7 bits 
       
  1149 		if((aSample[i]&0x80)!=0x00)
       
  1150 			{
       
  1151 			aConfidenceLevel= 0;
       
  1152 			break;
       
  1153 			}
       
  1154 	
       
  1155 		// there is no "~" in UTF-7 encoding. So if find either, it's not UTF-7
       
  1156 		else if (char(aSample[i])=='~')
       
  1157 			{
       
  1158 			aConfidenceLevel = 0; 
       
  1159 			break;
       
  1160 			}
       
  1161 
       
  1162 		// The SMS7Bit escape char value is 0x1b. Reduce confidence if it follows the following format
       
  1163 		else if ( (aSample[i]==0x1b) && (i <sampleLength-1) )
       
  1164 			{
       
  1165 			static const TInt smsExtensionTable[11] = 
       
  1166 				{0x0a, 0x14, 0x1b, 0x28, 0x29, 0x2f, 0x3c, 0x3d, 0x3e, 0x40, 0x65};
       
  1167 			TInt increment1 = i+1;
       
  1168 			if (increment1>= sampleLength)
       
  1169 				break;
       
  1170 			for (TInt j=0; j < 11; ++j)
       
  1171 				{
       
  1172 				if (aSample[increment1] == smsExtensionTable[j])
       
  1173 					{
       
  1174 					aConfidenceLevel-=10;
       
  1175 					}
       
  1176 				}
       
  1177 			}
       
  1178 		// The UTF-7 escape char is 0x2b. The values that follow the escape sequence
       
  1179 		// the values following the escape char value must belong to the modified base64
       
  1180 		// or '-' else it is an ill-formed sequence, so probably not UTF-7
       
  1181 		else if ( (aSample[i]==0x2b)  && (i <sampleLength-1) )
       
  1182 			{
       
  1183 			TInt increment1 = i+1;
       
  1184 			if ((aSample[increment1] == 0x2b) || (aSample[increment1] == 0x2d) || (aSample[increment1] == 0x2f) ||
       
  1185 				((aSample[increment1] >= 0x41) && (aSample[increment1] <= 0x5a)) ||
       
  1186 				((aSample[increment1] >= 0x61) && (aSample[increment1] <= 0x7a))) 
       
  1187 				{
       
  1188 				aConfidenceLevel+=5;
       
  1189 				}
       
  1190 			else
       
  1191 				{
       
  1192 				aConfidenceLevel-=15;
       
  1193 				}
       
  1194 			i++; // should this be here or up in the if loop ??
       
  1195 			}
       
  1196 		} //for
       
  1197 	aConfidenceLevel =(aConfidenceLevel >0)? ((aConfidenceLevel > 100)? 100: aConfidenceLevel): 0;
       
  1198 	}