searchengine/oss/loc/analysis/src/tinyunicode.cpp
author hgs
Fri, 15 Oct 2010 12:09:28 +0530
changeset 24 65456528cac2
permissions -rw-r--r--
201041
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24
hgs
parents:
diff changeset
     1
/*
hgs
parents:
diff changeset
     2
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
hgs
parents:
diff changeset
     3
* All rights reserved.
hgs
parents:
diff changeset
     4
* This component and the accompanying materials are made available
hgs
parents:
diff changeset
     5
* under the terms of "Eclipse Public License v1.0"
hgs
parents:
diff changeset
     6
* which accompanies this distribution, and is available
hgs
parents:
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
hgs
parents:
diff changeset
     8
*
hgs
parents:
diff changeset
     9
* Initial Contributors:
hgs
parents:
diff changeset
    10
* Nokia Corporation - initial contribution.
hgs
parents:
diff changeset
    11
*
hgs
parents:
diff changeset
    12
* Contributors:
hgs
parents:
diff changeset
    13
*
hgs
parents:
diff changeset
    14
* Description: 
hgs
parents:
diff changeset
    15
*
hgs
parents:
diff changeset
    16
*/
hgs
parents:
diff changeset
    17
#include "tinyunicode.h"
hgs
parents:
diff changeset
    18
hgs
parents:
diff changeset
    19
namespace analysis {
hgs
parents:
diff changeset
    20
hgs
parents:
diff changeset
    21
	namespace unicode {
hgs
parents:
diff changeset
    22
hgs
parents:
diff changeset
    23
        // See Chapter 16 Hangul in http://unicode.org/reports/tr15/ for reference
hgs
parents:
diff changeset
    24
        // 
hgs
parents:
diff changeset
    25
        // Ported from Hangul Java code example
hgs
parents:
diff changeset
    26
        //
hgs
parents:
diff changeset
    27
	
hgs
parents:
diff changeset
    28
        int IsHangulSyllable(int c) {
hgs
parents:
diff changeset
    29
            return (c >= 0xAC00 && c <= 0xD7AF);
hgs
parents:
diff changeset
    30
        }
hgs
parents:
diff changeset
    31
        int IsHangulJamo(int c) {
hgs
parents:
diff changeset
    32
            return (c >= 0x1100 && c < 0x1200)     // Hangul Jamo
hgs
parents:
diff changeset
    33
                || (c >= 0x3130 && c <= 0x318F)    // Hangul compatibility Jamo
hgs
parents:
diff changeset
    34
                || (c >= 0xA960 && c < 0xA97F)     // Hangul Jamo Extended-A
hgs
parents:
diff changeset
    35
                || (c >= 0xD780 && c <= 0xD7FF)    // Hangul Jamo Extended-B
hgs
parents:
diff changeset
    36
                || (c >= 0xff00 && c <= 0xffef);   // Hangul halfwidth and fullwidth forms
hgs
parents:
diff changeset
    37
        }
hgs
parents:
diff changeset
    38
hgs
parents:
diff changeset
    39
        int IsHangul(int c) {
hgs
parents:
diff changeset
    40
            return IsHangulSyllable(c) || IsHangulJamo(c); 
hgs
parents:
diff changeset
    41
        }
hgs
parents:
diff changeset
    42
		int IsCjk(int c) {
hgs
parents:
diff changeset
    43
			return (c >= 0x4E00 && c < 0xa000)  // CJK Unified ideographs block
hgs
parents:
diff changeset
    44
                 || IsHangul(c)              // Korean alphabet
hgs
parents:
diff changeset
    45
				 || (c >= 0x3400 && c < 0x4Dc0)     // CJK Unified ideographs extension A
hgs
parents:
diff changeset
    46
				 || (c >= 0x3040 && c <= 0x309f)    // Hiragana
hgs
parents:
diff changeset
    47
				 || (c >= 0x20000 && c < 0x30000);  // CJK Unified ideographs extension B, C, D E and so forth
hgs
parents:
diff changeset
    48
		}
hgs
parents:
diff changeset
    49
		
hgs
parents:
diff changeset
    50
		int IsThai(int c) {
hgs
parents:
diff changeset
    51
			return (c >= 0x0E00) && (c < 0x0F00); // Thai unicode block 
hgs
parents:
diff changeset
    52
		}
hgs
parents:
diff changeset
    53
	}
hgs
parents:
diff changeset
    54
	
hgs
parents:
diff changeset
    55
}