--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/charconvfw/charconvplugins/src/shared/gb2312_shared.cpp Tue Feb 02 02:02:46 2010 +0200
@@ -0,0 +1,122 @@
+/*
+* Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+
+#include <e32std.h>
+#include <convgeneratedcpp.h>
+#include "gb2312.h"
+
+struct SCnvConversionData;
+
+EXPORT_C const TDesC8& CnvGb2312::ReplacementForUnconvertibleUnicodeCharacters()
+ {
+ return ReplacementForUnconvertibleUnicodeCharacters_internal();
+ }
+
+EXPORT_C const SCnvConversionData& CnvGb2312::ConversionData()
+ {
+ return conversionData;
+ }
+
+EXPORT_C TBool CnvGb2312::IsCharGBBased(TInt& aConfidenceLevel, const TDesC8& aSample)
+ {
+ TInt sampleLength = aSample.Length();
+ aConfidenceLevel = 0;
+ //WBB the following is for distiguish between big5 and GBK
+ TInt totalWeight=0; //sum of the weights of 20 most frequent chars
+ TInt sumOfGoodChar=0; //the number of chars whose first byte and second are both in the range
+ TInt sumOfWeight=0; //sum of the weights of the chars which are included in the sample
+ TInt sumOutChar=0; //the number of chars which are not common
+ TInt sumOfBadSecondByte=0;//the number of chars whose first byte is in the range but not the second
+ struct referenceChar
+ {
+ TUint charGBK;
+ TInt weight;
+ };
+
+ referenceChar refGbk[20];
+ static const TInt iniWeight[20]=
+ {
+ //occurence per 1000 chars
+ 30,20,20,10,10,10,10,10,5,5,
+ 5,5,5,5,5,5,5,5,5,5
+ };
+
+ static const TUint iniChar[20]=
+ {
+ 0xa3ac,0xb5c4,0xc1cb,0xb8f6,0xb2bb,0xb0d1,0xd2bb,0xcac7,0xd2aa,0xbecd,
+ 0xd2b2,0xccec,0xc9cf,0xbacd,0xd6d0,0xd4da,0xd0a1,0xc8cb,0xcfc2,0xd6d0,
+ };
+
+ for (TInt k=0; k<20; k++)
+ {
+ refGbk[k].charGBK=iniChar[k];
+ refGbk[k].weight=iniWeight[k];
+ totalWeight=totalWeight+iniWeight[k];
+ }
+
+
+ //WBB
+ for (TInt i = 0; i < sampleLength; ++i)
+ {
+ //GBK encoding first byte range 0x81-0xfe
+ // second byte range 0x40-0x7e, 0x80-0xfe
+ if((aSample[i] >= 0x81) && (aSample[i] <= 0xfe))
+ {
+ TInt increment1 = i+1;
+ if (increment1 >= sampleLength)
+ break;
+ if (((aSample[increment1] >=0x40) && (aSample[increment1] <= 0x7e)) ||
+ ((aSample[increment1] >=0x80) && (aSample[increment1] <= 0xfe)))
+ {
+ //WBB
+ TUint charGbk=(aSample[i]<<8)|(aSample[increment1]);
+ TInt j;
+ for (j=0; j<20; j++)
+ {
+ if (charGbk==refGbk[j].charGBK)
+ {
+ sumOfWeight=sumOfWeight+refGbk[j].weight;
+ break;
+ }
+ }
+ if ((aSample[i]>=0xa4)&&(aSample[i]<=0xaf))
+ sumOutChar++;
+ sumOfGoodChar++;
+ i++;
+ //WBB
+ }
+ else
+ {
+ sumOfBadSecondByte++;
+ }
+ }
+ } // for
+
+ TInt limit;
+ limit = (10*sampleLength)/100;
+ if (sumOfGoodChar > limit)
+ {
+ aConfidenceLevel=sumOfGoodChar*100/(sumOfBadSecondByte+sumOfGoodChar);
+ aConfidenceLevel=aConfidenceLevel-Max(0,((totalWeight-sumOfWeight)*sumOfGoodChar/1000));//against frequent chars
+ aConfidenceLevel=aConfidenceLevel-(sumOutChar*100/sumOfGoodChar);//against gap
+ aConfidenceLevel=(aConfidenceLevel < 0)?0:aConfidenceLevel;
+ }
+ else
+ aConfidenceLevel=0;
+ return ETrue;
+ }