--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/webengine/wmlengine/src/htmlp/src/htmlp_lexer.c Mon Mar 30 12:54:55 2009 +0300
@@ -0,0 +1,476 @@
+/*
+* Copyright (c) 2000 - 2001 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of the License "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+
+/*
+
+Lexer for HTML parser.
+
+*/
+#ifndef FEA_RME_NOHTMLPARSER
+
+#include "nwx_defs.h"
+#include "nw_htmlp_lexer.h"
+#include "nw_string_char.h"
+#include "BrsrStatusCodes.h"
+
+/* Does not copy the buffer */
+TBrowserStatusCode NW_HTMLP_Lexer_InitFromBuffer(NW_HTMLP_Lexer_t* pL,
+ NW_Uint32 byteCount,
+ NW_Uint8* pBuf,
+ NW_HTMLP_ElementTableIndex_t elementCount,
+ NW_HTMLP_ElementDescriptionConst_t* pElementDictionary)
+{
+ if ((byteCount == 0) || (pBuf == NULL)) {
+ return KBrsrFailure;
+ }
+ pL->encoding = 0;
+ pL->endianness = NW_NATIVE_ENDIAN;
+ pL->readPosition = 0;
+ pL->charPosition = 0;
+ pL->lineColumn.crCount = 0;
+ pL->lineColumn.lfCount = 0;
+ pL->lineColumn.charsSinceLastCR = 0;
+ pL->lineColumn.charsSinceLastLF = 0;
+ pL->end = NW_FALSE;
+ pL->byteCount = byteCount;
+ pL->pBuf = pBuf;
+ pL->elementCount = elementCount;
+ pL->pElementDictionary = pElementDictionary;
+ return KBrsrSuccess;
+}
+
+/* Returns a pointer in *ppData into pBuf at some byte position
+and byte count, byte count is truncated to fit in pBuf if required. */
+TBrowserStatusCode NW_HTMLP_Lexer_DataAddressFromBuffer(NW_HTMLP_Lexer_t* pL,
+ NW_Uint32 byteIndex,
+ NW_Uint32* pByteCount,
+ NW_Uint8** ppData)
+{
+ NW_ASSERT(byteIndex < pL->byteCount);
+ NW_ASSERT(*pByteCount <= pL->byteCount);
+ NW_ASSERT((byteIndex + *pByteCount) <= pL->byteCount);
+ *ppData = NULL;
+ if (byteIndex < pL->byteCount) {
+ *ppData = pL->pBuf + byteIndex;
+ if ((byteIndex + *pByteCount) > pL->byteCount) {
+ *pByteCount = pL->byteCount - byteIndex;
+ }
+ return KBrsrSuccess;
+ }
+ return KBrsrFailure;
+}
+
+/* peekOrAdvance: first arg "advance": peek = NW_FALSE, advance = NW_TRUE */
+static
+TBrowserStatusCode NW_HTMLP_Lexer_PeekOrAdvanceOffset(NW_Bool advance,
+ NW_HTMLP_Lexer_t* pL,
+ NW_Uint32 offsetCharCount,
+ NW_Uint32* pC,
+ NW_Bool* pEOF)
+{
+ NW_Uint32 i;
+ NW_Uint32 charCount = 0;
+ NW_Int32 byteCount = 0;
+ NW_Uint32 crCount = 0;
+ NW_Uint32 lfCount = 0;
+ NW_Uint32 charsPastCR = 0;
+ NW_Uint32 charsPastLF = 0;
+ NW_Ucs2 c_ucs2;
+ NW_Bool resetPastCR = 0;
+ NW_Bool resetPastLF = 0;
+
+ /* it makes no sense to advance by 0 */
+ NW_ASSERT((advance == NW_FALSE) || (offsetCharCount > 0));
+
+ if (NW_HTMLP_Lexer_AtEnd(pL)) {
+ *pEOF = NW_TRUE;
+ return KBrsrSuccess;
+ }
+ *pEOF = NW_FALSE;
+ i = pL->readPosition;
+ do {
+ /* It is assumed that this func returns UNICODE code points. */
+ byteCount = NW_String_readChar(&(pL->pBuf[i]),
+ &c_ucs2, pL->encoding);
+ /* This catches NW_String_readChar() reading past end of buffer
+ and can be removed when the readChar function does proper
+ error checking (requires passing in buf length). */
+ if ((i + byteCount) > pL->byteCount) {
+ return KBrsrFailure;
+ }
+ *pC = c_ucs2;
+ if (byteCount == -1) {
+ return KBrsrFailure;
+ }
+ if (charCount == offsetCharCount) {
+ break;
+ }
+ charCount++;
+ charsPastCR++;
+ charsPastLF++;
+ if (c_ucs2 == 0xd /* CR */) {
+ crCount++;
+ resetPastCR = 1;
+ charsPastCR = 0;
+ } else if (c_ucs2 == 0xa /* LF */) {
+ lfCount++;
+ resetPastLF = 1;
+ charsPastLF = 0;
+ }
+ i += byteCount;
+ if (pL->encoding == HTTP_iso_10646_ucs_2 &&
+ i == (pL->byteCount - 1))
+ {
+ *pEOF = NW_TRUE;
+ break;
+ }
+ if (pL->encoding == HTTP_utf_8 &&
+ (i + 3) > pL->byteCount)
+ {
+ *pEOF = NW_TRUE;
+ break;
+ }
+ } while (i < pL->byteCount);
+ if (i >= pL->byteCount) {
+ *pEOF = NW_TRUE;
+ }
+ if (advance == NW_TRUE) {
+ if (*pEOF == NW_TRUE) {
+ pL->readPosition = pL->byteCount;
+ pL->end = NW_TRUE;
+ } else {
+ pL->readPosition = i;
+ }
+ pL->charPosition += charCount;
+ pL->lineColumn.crCount += crCount;
+ pL->lineColumn.lfCount += lfCount;
+ if (resetPastCR) {
+ pL->lineColumn.charsSinceLastCR = charsPastCR;
+ } else {
+ pL->lineColumn.charsSinceLastCR += charsPastCR;
+ }
+ if (resetPastLF) {
+ pL->lineColumn.charsSinceLastLF = charsPastLF;
+ } else {
+ pL->lineColumn.charsSinceLastLF += charsPastLF;
+ }
+ }
+ return KBrsrSuccess;
+}
+
+TBrowserStatusCode NW_HTMLP_Lexer_PeekOffset(NW_HTMLP_Lexer_t* pL,
+ NW_Uint32 offsetCharCount,
+ NW_Uint32* pC,
+ NW_Bool* pEOF)
+{
+ return NW_HTMLP_Lexer_PeekOrAdvanceOffset(NW_FALSE, pL,
+ offsetCharCount, pC, pEOF);
+}
+
+TBrowserStatusCode NW_HTMLP_Lexer_AdvanceOffset(NW_HTMLP_Lexer_t* pL,
+ NW_Uint32 offsetCharCount)
+{
+ NW_Uint32 c;
+ NW_Bool eof;
+ return NW_HTMLP_Lexer_PeekOrAdvanceOffset(NW_TRUE, pL,
+ offsetCharCount, &c, &eof);
+}
+
+void NW_HTMLP_Lexer_GetPosition(NW_HTMLP_Lexer_t* pL, NW_HTMLP_Lexer_Position_t* pPosition)
+{
+ pPosition->readPosition = pL->readPosition;
+ pPosition->charPosition = pL->charPosition;
+ pPosition->crCount = pL->lineColumn.crCount;
+ pPosition->lfCount = pL->lineColumn.lfCount;
+ pPosition->charsSinceLastCR = pL->lineColumn.charsSinceLastCR;
+ pPosition->charsSinceLastLF = pL->lineColumn.charsSinceLastLF;
+ pPosition->end = pL->end;
+}
+
+/* Note: Setting the position (similar to seeking in a file) is in general
+not possible without reading the characters (usually reading forward) because
+character encoding may use a variable numbers of bytes per character. This is
+here so that if you have defined a valid interval, then you can reposition to
+the beginning of the interval. Setting to the position to a bad value will
+not always be caught immediately. Don't forget to also save and set line
+and column with position. */
+TBrowserStatusCode NW_HTMLP_Lexer_SetPosition(NW_HTMLP_Lexer_t* pL, NW_HTMLP_Lexer_Position_t* pPosition)
+{
+ if ((pPosition->readPosition > pL->byteCount)
+ || (pPosition->end && (pPosition->readPosition != pL->byteCount))) {
+ return KBrsrFailure;
+ }
+ pL->readPosition = pPosition->readPosition;
+ pL->charPosition = pPosition->charPosition;
+ pL->lineColumn.crCount = pPosition->crCount;
+ pL->lineColumn.lfCount = pPosition->lfCount;
+ pL->lineColumn.charsSinceLastCR = pPosition->charsSinceLastCR;
+ pL->lineColumn.charsSinceLastLF = pPosition->charsSinceLastLF;
+ pL->end = pPosition->end;
+
+ return KBrsrSuccess;
+}
+
+TBrowserStatusCode NW_HTMLP_Lexer_IsSpace(NW_HTMLP_Lexer_t* pL, NW_Bool* pMatch)
+{
+ NW_Uint32 c;
+ TBrowserStatusCode e;
+ NW_Bool eof;
+
+ *pMatch = NW_FALSE;
+ e = NW_HTMLP_Lexer_Peek(pL, &c, &eof);
+ if (BRSR_STAT_IS_FAILURE(e)) {
+ return e;
+ }
+ if (eof == NW_TRUE) {
+ return KBrsrSuccess;
+ }
+ /* "space" is defined in HTML to be the following codepoints:
+ 0x20 (space), 0x9 (tab), 0xc (form feed), 0x200b (zero-width space),
+ 0xd (cr), 0xa (lf) */
+ if ((c == 0x20U) || (c == 0x9U) || (c == 0xcU)
+ || (c== 0x200bU) || (c == 0xdU) || (c == 0xaU)) {
+ *pMatch = NW_TRUE;
+ }
+ return KBrsrSuccess;
+}
+
+TBrowserStatusCode NW_HTMLP_Lexer_IsCRLF(NW_HTMLP_Lexer_t* pL, NW_Bool* pMatch)
+{
+ NW_Uint32 c;
+ TBrowserStatusCode e;
+ NW_Bool eof;
+
+ *pMatch = NW_FALSE;
+ e = NW_HTMLP_Lexer_Peek(pL, &c, &eof);
+ if (BRSR_STAT_IS_FAILURE(e)) {
+ return e;
+ }
+ if (eof == NW_TRUE) {
+ return KBrsrSuccess;
+ }
+ /* 0xd (CR), 0xa (LF) */
+ if ((c == 0xdU) || (c == 0xaU)) {
+ *pMatch = NW_TRUE;
+ }
+ return KBrsrSuccess;
+}
+
+/* on return: *pMatch == NW_TRUE if character is in [a-zA-Z] */
+TBrowserStatusCode NW_HTMLP_Lexer_IsAsciiLetter(NW_HTMLP_Lexer_t* pL, NW_Bool* pMatch)
+{
+ NW_Uint32 c;
+ TBrowserStatusCode e;
+ NW_Bool eof;
+
+ *pMatch = NW_FALSE;
+ e = NW_HTMLP_Lexer_Peek(pL, &c, &eof);
+ if (BRSR_STAT_IS_FAILURE(e)) {
+ return e;
+ }
+ if (eof == NW_TRUE) {
+ return KBrsrSuccess;
+ }
+ if ( ( (c >= (NW_Uint32)'a') && (c <= (NW_Uint32)'z') )
+ || ( (c >= (NW_Uint32)'A') && (c <= (NW_Uint32)'Z') ) ) {
+ *pMatch = NW_TRUE;
+ }
+ return KBrsrSuccess;
+}
+
+/* on return: *pMatch == NW_TRUE if character is in [0-9] */
+TBrowserStatusCode NW_HTMLP_Lexer_IsAsciiDigit(NW_HTMLP_Lexer_t* pL, NW_Bool* pMatch)
+{
+ NW_Uint32 c;
+ TBrowserStatusCode e;
+ NW_Bool eof;
+
+ *pMatch = NW_FALSE;
+ e = NW_HTMLP_Lexer_Peek(pL, &c, &eof);
+ if (BRSR_STAT_IS_FAILURE(e)) {
+ return e;
+ }
+ if (eof == NW_TRUE) {
+ return KBrsrSuccess;
+ }
+ if ((c >= (NW_Uint32)'0') && (c <= (NW_Uint32)'9')) {
+ *pMatch = NW_TRUE;
+ }
+ return KBrsrSuccess;
+}
+
+/* *pMatch is NW_TRUE iff ASCII string matches the text in its encoding */
+TBrowserStatusCode NW_HTMLP_Lexer_AsciiCharCompare(NW_HTMLP_Lexer_t* pL,
+ NW_Uint8 asciiChar,
+ NW_Bool* pMatch)
+{
+ NW_Uint32 c_text;
+ TBrowserStatusCode e;
+ NW_Bool eof;
+
+ *pMatch = NW_FALSE;
+ e = NW_HTMLP_Lexer_Peek(pL, &c_text, &eof);
+ if (BRSR_STAT_IS_FAILURE(e)) {
+ return e;
+ }
+ if (eof == NW_TRUE) {
+ return KBrsrSuccess;
+ }
+ if (c_text == asciiChar) {
+ *pMatch = NW_TRUE;
+ }
+ return KBrsrSuccess;
+}
+
+/*
+on entry: no assumptions
+on return: If matched string, then *pMatch == NW_TRUE.
+.........: If did not match string, then *pMatch == NW_FALSE.
+.........: In either case, lexer read position is unchanged
+eof handling: if encounters EOF while attempting operation then returns
+............: *pMatch == NW_FALSE and KBrsrSuccess, and
+............: lexer read position is unchanged
+on error return: return value is not KBrsrSuccess, *pMatch == NW_FALSE
+...............: and lexer read position is unchanged
+*/
+TBrowserStatusCode NW_HTMLP_Lexer_AsciiStringCompare(NW_HTMLP_Lexer_t* pL,
+ NW_Uint32 asciiCharCount,
+ const NW_Uint8* pString,
+ NW_Bool* pMatch)
+{
+ return NW_HTMLP_Lexer_AsciiStringCompareCase(pL,
+ asciiCharCount,
+ pString,
+ NW_TRUE,
+ pMatch);
+}
+
+/* perform the same functionality as NW_HTMLP_Lexer_AsciiStringCompareCase
+ except for the ability to perform both case insensitive check and
+ case sensitive check
+*/
+TBrowserStatusCode NW_HTMLP_Lexer_AsciiStringCompareCase(NW_HTMLP_Lexer_t* pL,
+ NW_Uint32 asciiCharCount,
+ const NW_Uint8* pString,
+ NW_Bool CaseSensitive,
+ NW_Bool* pMatch)
+{
+ NW_Uint32 c_text;
+ NW_Uint32 i;
+ TBrowserStatusCode e = KBrsrSuccess;
+ NW_Bool eof;
+
+ *pMatch = NW_FALSE;
+ NW_ASSERT(asciiCharCount);
+ for (i = 0; i < asciiCharCount; i++) {
+
+ e = NW_HTMLP_Lexer_PeekOffset(pL, i, &c_text, &eof);
+ if (BRSR_STAT_IS_FAILURE(e)) {
+ break;
+ }
+ if (eof == NW_TRUE) {
+ break;
+ }
+ if (c_text != pString[i]) {
+ if (!CaseSensitive)
+ {
+ if (c_text + 'A' - 'a' == pString[i]) //Small case to Upper
+ continue;
+ if(c_text + 'a' - 'A' == pString[i]) //Upper case to small
+ continue;
+ }
+ break;
+ }
+ }
+ if (i == asciiCharCount) {
+ *pMatch = NW_TRUE;
+ }
+ return e;
+}
+
+/* Sets start, stop, charStart, charStop to current read position. */
+void NW_HTMLP_Interval_Start(NW_HTMLP_Interval_t* pI, NW_HTMLP_Lexer_t* pL)
+{
+ /* set both start and stop for safety in later use */
+ pI->start = pI->stop = pL->readPosition;
+ pI->charStart = pI->charStop = pL->charPosition;
+}
+
+/* Sets stop to current reader position. */
+void NW_HTMLP_Interval_Stop(NW_HTMLP_Interval_t* pI, NW_HTMLP_Lexer_t* pL)
+{
+ pI->stop = pL->readPosition;
+ pI->charStop = pL->charPosition;
+}
+
+/* Returns an estimate of the current line and column position in the text.
+It is an estimate because it has to guess at what the intended line ending
+sequence is using a count of CR and LF characters. Line and Column indices
+are 1-based not 0-based. */
+void
+NW_HTMLP_Lexer_GetLineColumn(NW_HTMLP_Lexer_t* pT, NW_Uint32* pLine,
+ NW_Uint32* pColumn)
+{
+ NW_Uint32 crCount, lfCount, charsSinceCR, charsSinceLF;
+ crCount = pT->lineColumn.crCount;
+ lfCount = pT->lineColumn.lfCount;
+ charsSinceCR = pT->lineColumn.charsSinceLastCR;
+ charsSinceLF = pT->lineColumn.charsSinceLastLF;
+ if (crCount == lfCount) {
+ /* assume CR, LF, DOS style */
+ /* use a bias in favor of CR followed by LF
+ which will give the correct column for DOS */
+ *pLine = lfCount + 1;
+ *pColumn = charsSinceLF + 1;
+ } else if (lfCount == 0) {
+ /* assume CR only, Unix style */
+ *pLine = crCount + 1;
+ *pColumn = charsSinceCR + 1;
+ } else if (crCount == 0) {
+ /* assume LF only, Mac style */
+ *pLine = lfCount + 1;
+ *pColumn = charsSinceLF + 1;
+ } else {
+ /* an unclear situation so use
+ thresholds on the ratio to guess */
+ NW_Uint32 ratio;
+ ratio = ((crCount * 100) / lfCount);
+ if (ratio > 300) {/* more than 3 to 1 crCount to lfCount */
+ /* assume CR only, Unix style */
+ *pLine = crCount + 1;
+ *pColumn = charsSinceCR + 1;
+ } else if (ratio < 33) {/* less than 1 to 3 crCount to lfCount */
+ /* assume LF only, Mac style */
+ *pLine = lfCount + 1;
+ *pColumn = charsSinceLF + 1;
+ } else {
+ /* assume CR, LF, DOS style */
+ /* use a bias in favor of CR, LF sequence (DOS style)
+ which will give the correct column */
+ *pLine = lfCount + 1;
+ *pColumn = charsSinceLF + 1;
+ }
+ }
+}
+#else
+
+void FeaRmeNoHTMLParser_htmlp_lexer(){
+ int i = 0;
+ i+=1;
+}
+#endif /* FEA_RME_NOHTMLPARSER */