WebCore/loader/TextResourceDecoder.cpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Mon, 04 Oct 2010 01:32:07 +0300
changeset 2 303757a437d3
parent 0 4f2f89ce4247
permissions -rw-r--r--
Revision: 201037 Kit: 201039
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     1
/*
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     2
    Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     3
    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     4
    Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     5
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     6
    This library is free software; you can redistribute it and/or
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     7
    modify it under the terms of the GNU Library General Public
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     8
    License as published by the Free Software Foundation; either
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     9
    version 2 of the License, or (at your option) any later version.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    10
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    11
    This library is distributed in the hope that it will be useful,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    12
    but WITHOUT ANY WARRANTY; without even the implied warranty of
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    13
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    14
    Library General Public License for more details.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    15
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    16
    You should have received a copy of the GNU Library General Public License
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    17
    along with this library; see the file COPYING.LIB.  If not, write to
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    18
    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    19
    Boston, MA 02110-1301, USA.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    20
*/
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    21
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    22
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    23
#include "config.h"
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    24
#include "TextResourceDecoder.h"
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    25
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    26
#include "DOMImplementation.h"
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    27
#include "HTMLNames.h"
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    28
#include "TextCodec.h"
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    29
#include "TextEncoding.h"
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    30
#include "TextEncodingDetector.h"
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    31
#include "TextEncodingRegistry.h"
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    32
#include <wtf/ASCIICType.h>
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    33
#include <wtf/StringExtras.h>
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    34
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    35
using namespace WTF;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    36
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    37
namespace WebCore {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    38
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    39
using namespace HTMLNames;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    40
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    41
// You might think we should put these find functions elsewhere, perhaps with the
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    42
// similar functions that operate on UChar, but arguably only the decoder has
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    43
// a reason to process strings of char rather than UChar.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    44
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    45
static int find(const char* subject, size_t subjectLength, const char* target)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    46
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    47
    size_t targetLength = strlen(target);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    48
    if (targetLength > subjectLength)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    49
        return -1;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    50
    for (size_t i = 0; i <= subjectLength - targetLength; ++i) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    51
        bool match = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    52
        for (size_t j = 0; j < targetLength; ++j) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    53
            if (subject[i + j] != target[j]) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    54
                match = false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    55
                break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    56
            }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    57
        }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    58
        if (match)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    59
            return i;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    60
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    61
    return -1;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    62
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    63
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    64
static int findIgnoringCase(const char* subject, size_t subjectLength, const char* target)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    65
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    66
    size_t targetLength = strlen(target);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    67
    if (targetLength > subjectLength)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    68
        return -1;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    69
#ifndef NDEBUG
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    70
    for (size_t i = 0; i < targetLength; ++i)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    71
        ASSERT(isASCIILower(target[i]));
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    72
#endif
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    73
    for (size_t i = 0; i <= subjectLength - targetLength; ++i) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    74
        bool match = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    75
        for (size_t j = 0; j < targetLength; ++j) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    76
            if (toASCIILower(subject[i + j]) != target[j]) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    77
                match = false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    78
                break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    79
            }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    80
        }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    81
        if (match)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    82
            return i;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    83
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    84
    return -1;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    85
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    86
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    87
static TextEncoding findTextEncoding(const char* encodingName, int length)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    88
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    89
    Vector<char, 64> buffer(length + 1);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    90
    memcpy(buffer.data(), encodingName, length);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    91
    buffer[length] = '\0';
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    92
    return buffer.data();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    93
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    94
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    95
class KanjiCode {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    96
public:
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    97
    enum Type { ASCII, JIS, EUC, SJIS, UTF16, UTF8 };
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    98
    static enum Type judge(const char* str, int length);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    99
    static const int ESC = 0x1b;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   100
    static const unsigned char sjisMap[256];
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   101
    static int ISkanji(int code)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   102
    {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   103
        if (code >= 0x100)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   104
            return 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   105
        return sjisMap[code & 0xff] & 1;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   106
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   107
    static int ISkana(int code)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   108
    {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   109
        if (code >= 0x100)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   110
            return 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   111
        return sjisMap[code & 0xff] & 2;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   112
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   113
};
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   114
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   115
const unsigned char KanjiCode::sjisMap[256] = {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   116
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   117
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   118
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   119
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   120
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   121
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   122
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   123
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   124
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   125
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   126
    0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   127
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   128
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   129
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   130
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   131
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   132
};
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   133
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   134
/*
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   135
 * EUC-JP is
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   136
 *     [0xa1 - 0xfe][0xa1 - 0xfe]
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   137
 *     0x8e[0xa1 - 0xfe](SS2)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   138
 *     0x8f[0xa1 - 0xfe][0xa1 - 0xfe](SS3)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   139
 *
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   140
 * Shift_Jis is
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   141
 *     [0x81 - 0x9f, 0xe0 - 0xef(0xfe?)][0x40 - 0x7e, 0x80 - 0xfc]
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   142
 *
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   143
 * Shift_Jis Hankaku Kana is
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   144
 *     [0xa1 - 0xdf]
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   145
 */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   146
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   147
/*
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   148
 * KanjiCode::judge() is based on judge_jcode() from jvim
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   149
 *     http://hp.vector.co.jp/authors/VA003457/vim/
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   150
 *
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   151
 * Special Thanks to Kenichi Tsuchida
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   152
 */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   153
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   154
enum KanjiCode::Type KanjiCode::judge(const char* str, int size)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   155
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   156
    enum Type code;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   157
    int i;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   158
    int bfr = false;            /* Kana Moji */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   159
    int bfk = 0;                /* EUC Kana */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   160
    int sjis = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   161
    int euc = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   162
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   163
    const unsigned char* ptr = reinterpret_cast<const unsigned char*>(str);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   164
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   165
    code = ASCII;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   166
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   167
    i = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   168
    while (i < size) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   169
        if (ptr[i] == ESC && (size - i >= 3)) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   170
            if ((ptr[i + 1] == '$' && ptr[i + 2] == 'B')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   171
            || (ptr[i + 1] == '(' && ptr[i + 2] == 'B')) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   172
                code = JIS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   173
                goto breakBreak;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   174
            } else if ((ptr[i + 1] == '$' && ptr[i + 2] == '@')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   175
                    || (ptr[i + 1] == '(' && ptr[i + 2] == 'J')) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   176
                code = JIS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   177
                goto breakBreak;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   178
            } else if (ptr[i + 1] == '(' && ptr[i + 2] == 'I') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   179
                code = JIS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   180
                i += 3;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   181
            } else if (ptr[i + 1] == ')' && ptr[i + 2] == 'I') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   182
                code = JIS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   183
                i += 3;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   184
            } else {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   185
                i++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   186
            }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   187
            bfr = false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   188
            bfk = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   189
        } else {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   190
            if (ptr[i] < 0x20) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   191
                bfr = false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   192
                bfk = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   193
                /* ?? check kudokuten ?? && ?? hiragana ?? */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   194
                if ((i >= 2) && (ptr[i - 2] == 0x81)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   195
                        && (0x41 <= ptr[i - 1] && ptr[i - 1] <= 0x49)) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   196
                    code = SJIS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   197
                    sjis += 100;        /* kudokuten */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   198
                } else if ((i >= 2) && (ptr[i - 2] == 0xa1)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   199
                        && (0xa2 <= ptr[i - 1] && ptr[i - 1] <= 0xaa)) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   200
                    code = EUC;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   201
                    euc += 100;         /* kudokuten */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   202
                } else if ((i >= 2) && (ptr[i - 2] == 0x82) && (0xa0 <= ptr[i - 1])) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   203
                    sjis += 40;         /* hiragana */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   204
                } else if ((i >= 2) && (ptr[i - 2] == 0xa4) && (0xa0 <= ptr[i - 1])) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   205
                    euc += 40;          /* hiragana */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   206
                }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   207
            } else {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   208
                /* ?? check hiragana or katana ?? */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   209
                if ((size - i > 1) && (ptr[i] == 0x82) && (0xa0 <= ptr[i + 1])) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   210
                    sjis++;     /* hiragana */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   211
                } else if ((size - i > 1) && (ptr[i] == 0x83)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   212
                         && (0x40 <= ptr[i + 1] && ptr[i + 1] <= 0x9f)) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   213
                    sjis++;     /* katakana */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   214
                } else if ((size - i > 1) && (ptr[i] == 0xa4) && (0xa0 <= ptr[i + 1])) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   215
                    euc++;      /* hiragana */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   216
                } else if ((size - i > 1) && (ptr[i] == 0xa5) && (0xa0 <= ptr[i + 1])) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   217
                    euc++;      /* katakana */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   218
                }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   219
                if (bfr) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   220
                    if ((i >= 1) && (0x40 <= ptr[i] && ptr[i] <= 0xa0) && ISkanji(ptr[i - 1])) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   221
                        code = SJIS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   222
                        goto breakBreak;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   223
                    } else if ((i >= 1) && (0x81 <= ptr[i - 1] && ptr[i - 1] <= 0x9f) && ((0x40 <= ptr[i] && ptr[i] < 0x7e) || (0x7e < ptr[i] && ptr[i] <= 0xfc))) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   224
                        code = SJIS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   225
                        goto breakBreak;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   226
                    } else if ((i >= 1) && (0xfd <= ptr[i] && ptr[i] <= 0xfe) && (0xa1 <= ptr[i - 1] && ptr[i - 1] <= 0xfe)) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   227
                        code = EUC;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   228
                        goto breakBreak;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   229
                    } else if ((i >= 1) && (0xfd <= ptr[i - 1] && ptr[i - 1] <= 0xfe) && (0xa1 <= ptr[i] && ptr[i] <= 0xfe)) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   230
                        code = EUC;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   231
                        goto breakBreak;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   232
                    } else if ((i >= 1) && (ptr[i] < 0xa0 || 0xdf < ptr[i]) && (0x8e == ptr[i - 1])) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   233
                        code = SJIS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   234
                        goto breakBreak;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   235
                    } else if (ptr[i] <= 0x7f) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   236
                        code = SJIS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   237
                        goto breakBreak;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   238
                    } else {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   239
                        if (0xa1 <= ptr[i] && ptr[i] <= 0xa6) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   240
                            euc++;      /* sjis hankaku kana kigo */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   241
                        } else if (0xa1 <= ptr[i] && ptr[i] <= 0xdf) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   242
                            ;           /* sjis hankaku kana */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   243
                        } else if (0xa1 <= ptr[i] && ptr[i] <= 0xfe) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   244
                            euc++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   245
                        } else if (0x8e == ptr[i]) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   246
                            euc++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   247
                        } else if (0x20 <= ptr[i] && ptr[i] <= 0x7f) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   248
                            sjis++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   249
                        }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   250
                        bfr = false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   251
                        bfk = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   252
                    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   253
                } else if (0x8e == ptr[i]) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   254
                    if (size - i <= 1) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   255
                        ;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   256
                    } else if (0xa1 <= ptr[i + 1] && ptr[i + 1] <= 0xdf) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   257
                        /* EUC KANA or SJIS KANJI */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   258
                        if (bfk == 1) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   259
                            euc += 100;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   260
                        }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   261
                        bfk++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   262
                        i++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   263
                    } else {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   264
                        /* SJIS only */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   265
                        code = SJIS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   266
                        goto breakBreak;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   267
                    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   268
                } else if (0x81 <= ptr[i] && ptr[i] <= 0x9f) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   269
                    /* SJIS only */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   270
                    code = SJIS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   271
                    if ((size - i >= 1)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   272
                            && ((0x40 <= ptr[i + 1] && ptr[i + 1] <= 0x7e)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   273
                            || (0x80 <= ptr[i + 1] && ptr[i + 1] <= 0xfc))) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   274
                        goto breakBreak;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   275
                    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   276
                } else if (0xfd <= ptr[i] && ptr[i] <= 0xfe) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   277
                    /* EUC only */
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   278
                    code = EUC;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   279
                    if ((size - i >= 1)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   280
                            && (0xa1 <= ptr[i + 1] && ptr[i + 1] <= 0xfe)) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   281
                        goto breakBreak;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   282
                    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   283
                } else if (ptr[i] <= 0x7f) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   284
                    ;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   285
                } else {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   286
                    bfr = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   287
                    bfk = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   288
                }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   289
            }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   290
            i++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   291
        }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   292
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   293
    if (code == ASCII) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   294
        if (sjis > euc) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   295
            code = SJIS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   296
        } else if (sjis < euc) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   297
            code = EUC;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   298
        }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   299
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   300
breakBreak:
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   301
    return (code);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   302
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   303
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   304
TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const String& mimeType)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   305
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   306
    if (equalIgnoringCase(mimeType, "text/css"))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   307
        return CSS;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   308
    if (equalIgnoringCase(mimeType, "text/html"))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   309
        return HTML;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   310
    if (DOMImplementation::isXMLMIMEType(mimeType))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   311
        return XML;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   312
    return PlainText;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   313
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   314
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   315
const TextEncoding& TextResourceDecoder::defaultEncoding(ContentType contentType, const TextEncoding& specifiedDefaultEncoding)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   316
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   317
    // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII 
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   318
    // for text/xml. This matches Firefox.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   319
    if (contentType == XML)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   320
        return UTF8Encoding();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   321
    if (!specifiedDefaultEncoding.isValid())
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   322
        return Latin1Encoding();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   323
    return specifiedDefaultEncoding;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   324
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   325
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   326
TextResourceDecoder::TextResourceDecoder(const String& mimeType, const TextEncoding& specifiedDefaultEncoding, bool usesEncodingDetector)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   327
    : m_contentType(determineContentType(mimeType))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   328
    , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   329
    , m_source(DefaultEncoding)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   330
    , m_hintEncoding(0)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   331
    , m_checkedForBOM(false)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   332
    , m_checkedForCSSCharset(false)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   333
    , m_checkedForHeadCharset(false)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   334
    , m_useLenientXMLDecoding(false)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   335
    , m_sawError(false)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   336
    , m_usesEncodingDetector(usesEncodingDetector)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   337
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   338
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   339
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   340
TextResourceDecoder::~TextResourceDecoder()
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   341
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   342
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   343
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   344
void TextResourceDecoder::setEncoding(const TextEncoding& encoding, EncodingSource source)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   345
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   346
    // In case the encoding didn't exist, we keep the old one (helps some sites specifying invalid encodings).
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   347
    if (!encoding.isValid())
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   348
        return;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   349
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   350
    // When encoding comes from meta tag (i.e. it cannot be XML files sent via XHR),
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   351
    // treat x-user-defined as windows-1252 (bug 18270)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   352
    if (source == EncodingFromMetaTag && strcasecmp(encoding.name(), "x-user-defined") == 0)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   353
        m_encoding = "windows-1252";
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   354
    else if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader || source == EncodingFromCSSCharset)        
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   355
        m_encoding = encoding.closestByteBasedEquivalent();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   356
    else
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   357
        m_encoding = encoding;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   358
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   359
    m_codec.clear();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   360
    m_source = source;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   361
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   362
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   363
// Returns the position of the encoding string.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   364
static int findXMLEncoding(const char* str, int len, int& encodingLength)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   365
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   366
    int pos = find(str, len, "encoding");
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   367
    if (pos == -1)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   368
        return -1;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   369
    pos += 8;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   370
    
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   371
    // Skip spaces and stray control characters.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   372
    while (pos < len && str[pos] <= ' ')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   373
        ++pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   374
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   375
    // Skip equals sign.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   376
    if (pos >= len || str[pos] != '=')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   377
        return -1;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   378
    ++pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   379
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   380
    // Skip spaces and stray control characters.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   381
    while (pos < len && str[pos] <= ' ')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   382
        ++pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   383
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   384
    // Skip quotation mark.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   385
    if (pos >= len)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   386
        return - 1;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   387
    char quoteMark = str[pos];
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   388
    if (quoteMark != '"' && quoteMark != '\'')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   389
        return -1;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   390
    ++pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   391
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   392
    // Find the trailing quotation mark.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   393
    int end = pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   394
    while (end < len && str[end] != quoteMark)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   395
        ++end;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   396
    if (end >= len)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   397
        return -1;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   398
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   399
    encodingLength = end - pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   400
    return pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   401
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   402
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   403
// true if there is more to parse
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   404
static inline bool skipWhitespace(const char*& pos, const char* dataEnd)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   405
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   406
    while (pos < dataEnd && (*pos == '\t' || *pos == ' '))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   407
        ++pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   408
    return pos != dataEnd;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   409
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   410
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   411
size_t TextResourceDecoder::checkForBOM(const char* data, size_t len)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   412
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   413
    // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   414
    // We let it override even a user-chosen encoding.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   415
    ASSERT(!m_checkedForBOM);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   416
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   417
    size_t lengthOfBOM = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   418
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   419
    size_t bufferLength = m_buffer.size();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   420
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   421
    size_t buf1Len = bufferLength;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   422
    size_t buf2Len = len;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   423
    const unsigned char* buf1 = reinterpret_cast<const unsigned char*>(m_buffer.data());
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   424
    const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   425
    unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   426
    unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   427
    unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   428
    unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   429
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   430
    // Check for the BOM.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   431
    if (c1 == 0xFF && c2 == 0xFE) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   432
        if (c3 != 0 || c4 != 0) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   433
            setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   434
            lengthOfBOM = 2;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   435
        } else {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   436
            setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   437
            lengthOfBOM = 4;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   438
        }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   439
    } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   440
        setEncoding(UTF8Encoding(), AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   441
        lengthOfBOM = 3;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   442
    } else if (c1 == 0xFE && c2 == 0xFF) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   443
        setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   444
        lengthOfBOM = 2;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   445
    } else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   446
        setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   447
        lengthOfBOM = 4;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   448
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   449
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   450
    if (lengthOfBOM || bufferLength + len >= 4)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   451
        m_checkedForBOM = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   452
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   453
    return lengthOfBOM;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   454
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   455
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   456
bool TextResourceDecoder::checkForCSSCharset(const char* data, size_t len, bool& movedDataToBuffer)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   457
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   458
    if (m_source != DefaultEncoding && m_source != EncodingFromParentFrame) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   459
        m_checkedForCSSCharset = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   460
        return true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   461
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   462
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   463
    size_t oldSize = m_buffer.size();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   464
    m_buffer.grow(oldSize + len);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   465
    memcpy(m_buffer.data() + oldSize, data, len);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   466
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   467
    movedDataToBuffer = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   468
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   469
    if (m_buffer.size() > 8) { // strlen("@charset") == 8
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   470
        const char* dataStart = m_buffer.data();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   471
        const char* dataEnd = dataStart + m_buffer.size();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   472
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   473
        if (dataStart[0] == '@' && dataStart[1] == 'c' && dataStart[2] == 'h' && dataStart[3] == 'a' && dataStart[4] == 'r' && 
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   474
            dataStart[5] == 's' && dataStart[6] == 'e' && dataStart[7] == 't') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   475
    
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   476
            dataStart += 8;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   477
            const char* pos = dataStart;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   478
            if (!skipWhitespace(pos, dataEnd))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   479
                return false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   480
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   481
            if (*pos == '"' || *pos == '\'') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   482
                char quotationMark = *pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   483
                ++pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   484
                dataStart = pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   485
            
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   486
                while (pos < dataEnd && *pos != quotationMark)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   487
                    ++pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   488
                if (pos == dataEnd)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   489
                    return false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   490
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   491
                int encodingNameLength = pos - dataStart + 1;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   492
                
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   493
                ++pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   494
                if (!skipWhitespace(pos, dataEnd))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   495
                    return false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   496
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   497
                if (*pos == ';')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   498
                    setEncoding(findTextEncoding(dataStart, encodingNameLength), EncodingFromCSSCharset);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   499
            }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   500
        }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   501
        m_checkedForCSSCharset = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   502
        return true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   503
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   504
    return false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   505
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   506
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   507
// Other browsers allow comments in the head section, so we need to also.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   508
// It's important not to look for tags inside the comments.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   509
static inline void skipComment(const char*& ptr, const char* pEnd)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   510
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   511
    const char* p = ptr;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   512
    if (p == pEnd)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   513
      return;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   514
    // Allow <!-->; other browsers do.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   515
    if (*p == '>') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   516
        p++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   517
    } else {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   518
        while (p + 2 < pEnd) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   519
            if (*p == '-') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   520
                // This is the real end of comment, "-->".
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   521
                if (p[1] == '-' && p[2] == '>') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   522
                    p += 3;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   523
                    break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   524
                }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   525
                // This is the incorrect end of comment that other browsers allow, "--!>".
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   526
                if (p + 3 < pEnd && p[1] == '-' && p[2] == '!' && p[3] == '>') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   527
                    p += 4;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   528
                    break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   529
                }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   530
            }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   531
            p++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   532
        }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   533
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   534
    ptr = p;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   535
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   536
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   537
const int bytesToCheckUnconditionally = 1024; // That many input bytes will be checked for meta charset even if <head> section is over.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   538
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   539
bool TextResourceDecoder::checkForHeadCharset(const char* data, size_t len, bool& movedDataToBuffer)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   540
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   541
    if (m_source != DefaultEncoding && m_source != EncodingFromParentFrame) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   542
        m_checkedForHeadCharset = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   543
        return true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   544
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   545
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   546
    // This is not completely efficient, since the function might go
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   547
    // through the HTML head several times.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   548
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   549
    size_t oldSize = m_buffer.size();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   550
    m_buffer.grow(oldSize + len);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   551
    memcpy(m_buffer.data() + oldSize, data, len);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   552
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   553
    movedDataToBuffer = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   554
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   555
    const char* ptr = m_buffer.data();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   556
    const char* pEnd = ptr + m_buffer.size();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   557
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   558
    // Is there enough data available to check for XML declaration?
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   559
    if (m_buffer.size() < 8)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   560
        return false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   561
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   562
    // Handle XML declaration, which can have encoding in it. This encoding is honored even for HTML documents.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   563
    // It is an error for an XML declaration not to be at the start of an XML document, and it is ignored in HTML documents in such case.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   564
    if (ptr[0] == '<' && ptr[1] == '?' && ptr[2] == 'x' && ptr[3] == 'm' && ptr[4] == 'l') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   565
        const char* xmlDeclarationEnd = ptr;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   566
        while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   567
            ++xmlDeclarationEnd;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   568
        if (xmlDeclarationEnd == pEnd)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   569
            return false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   570
        // No need for +1, because we have an extra "?" to lose at the end of XML declaration.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   571
        int len = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   572
        int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   573
        if (pos != -1)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   574
            setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   575
        // continue looking for a charset - it may be specified in an HTTP-Equiv meta
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   576
    } else if (ptr[0] == '<' && ptr[1] == 0 && ptr[2] == '?' && ptr[3] == 0 && ptr[4] == 'x' && ptr[5] == 0) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   577
        setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   578
        return true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   579
    } else if (ptr[0] == 0 && ptr[1] == '<' && ptr[2] == 0 && ptr[3] == '?' && ptr[4] == 0 && ptr[5] == 'x') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   580
        setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   581
        return true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   582
    } else if (ptr[0] == '<' && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0 && ptr[4] == '?' && ptr[5] == 0 && ptr[6] == 0 && ptr[7] == 0) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   583
        setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   584
        return true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   585
    } else if (ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == '<' && ptr[4] == 0 && ptr[5] == 0 && ptr[6] == 0 && ptr[7] == '?') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   586
        setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   587
        return true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   588
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   589
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   590
    // we still don't have an encoding, and are in the head
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   591
    // the following tags are allowed in <head>:
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   592
    // SCRIPT|STYLE|META|LINK|OBJECT|TITLE|BASE
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   593
    
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   594
    // We stop scanning when a tag that is not permitted in <head>
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   595
    // is seen, rather when </head> is seen, because that more closely
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   596
    // matches behavior in other browsers; more details in
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   597
    // <http://bugs.webkit.org/show_bug.cgi?id=3590>.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   598
    
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   599
    // Additionally, we ignore things that looks like tags in <title>, <script> and <noscript>; see
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   600
    // <http://bugs.webkit.org/show_bug.cgi?id=4560>, <http://bugs.webkit.org/show_bug.cgi?id=12165>
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   601
    // and <http://bugs.webkit.org/show_bug.cgi?id=12389>.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   602
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   603
    // Since many sites have charset declarations after <body> or other tags that are disallowed in <head>,
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   604
    // we don't bail out until we've checked at least bytesToCheckUnconditionally bytes of input.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   605
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   606
    AtomicStringImpl* enclosingTagName = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   607
    bool inHeadSection = true; // Becomes false when </head> or any tag not allowed in head is encountered.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   608
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   609
    // the HTTP-EQUIV meta has no effect on XHTML
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   610
    if (m_contentType == XML)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   611
        return true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   612
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   613
    while (ptr + 3 < pEnd) { // +3 guarantees that "<!--" fits in the buffer - and certainly we aren't going to lose any "charset" that way.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   614
        if (*ptr == '<') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   615
            bool end = false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   616
            ptr++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   617
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   618
            // Handle comments.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   619
            if (ptr[0] == '!' && ptr[1] == '-' && ptr[2] == '-') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   620
                ptr += 3;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   621
                skipComment(ptr, pEnd);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   622
                if (ptr - m_buffer.data() >= bytesToCheckUnconditionally && !inHeadSection) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   623
                    // Some pages that test bandwidth from within the browser do it by having
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   624
                    // huge comments and measuring the time they take to load. Repeatedly scanning
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   625
                    // these comments can take a lot of CPU time.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   626
                    m_checkedForHeadCharset = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   627
                    return true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   628
                }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   629
                continue;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   630
            }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   631
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   632
            if (*ptr == '/') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   633
                ++ptr;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   634
                end = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   635
            }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   636
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   637
            // Grab the tag name, but mostly ignore namespaces.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   638
            bool sawNamespace = false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   639
            char tagBuffer[20];
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   640
            int len = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   641
            while (len < 19) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   642
                if (ptr == pEnd)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   643
                    return false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   644
                char c = *ptr;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   645
                if (c == ':') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   646
                    len = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   647
                    sawNamespace = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   648
                    ptr++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   649
                    continue;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   650
                }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   651
                if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   652
                    ;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   653
                else if (c >= 'A' && c <= 'Z')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   654
                    c += 'a' - 'A';
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   655
                else
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   656
                    break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   657
                tagBuffer[len++] = c;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   658
                ptr++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   659
            }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   660
            tagBuffer[len] = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   661
            AtomicString tag(tagBuffer);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   662
            
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   663
            if (enclosingTagName) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   664
                if (end && tag.impl() == enclosingTagName)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   665
                    enclosingTagName = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   666
            } else {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   667
                if (tag == titleTag)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   668
                    enclosingTagName = titleTag.localName().impl();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   669
                else if (tag == scriptTag)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   670
                    enclosingTagName = scriptTag.localName().impl();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   671
                else if (tag == noscriptTag)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   672
                    enclosingTagName = noscriptTag.localName().impl();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   673
            }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   674
            
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   675
            // Find where the opening tag ends.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   676
            const char* tagContentStart = ptr;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   677
            if (!end) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   678
                while (ptr != pEnd && *ptr != '>') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   679
                    if (*ptr == '\'' || *ptr == '"') {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   680
                        char quoteMark = *ptr;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   681
                        ++ptr;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   682
                        while (ptr != pEnd && *ptr != quoteMark)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   683
                            ++ptr;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   684
                        if (ptr == pEnd)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   685
                            return false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   686
                    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   687
                    ++ptr;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   688
                }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   689
                if (ptr == pEnd)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   690
                    return false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   691
                ++ptr;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   692
            }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   693
            
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   694
            if (!end && tag == metaTag && !sawNamespace) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   695
                const char* str = tagContentStart;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   696
                int length = ptr - tagContentStart;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   697
                int pos = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   698
                while (pos < length) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   699
                    int charsetPos = findIgnoringCase(str + pos, length - pos, "charset");
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   700
                    if (charsetPos == -1)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   701
                        break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   702
                    pos += charsetPos + 7;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   703
                    // skip whitespace
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   704
                    while (pos < length && str[pos] <= ' ')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   705
                        pos++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   706
                    if (pos == length)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   707
                        break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   708
                    if (str[pos++] != '=')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   709
                        continue;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   710
                    while ((pos < length) &&
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   711
                            (str[pos] <= ' ' || str[pos] == '=' || str[pos] == '"' || str[pos] == '\''))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   712
                        pos++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   713
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   714
                    // end ?
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   715
                    if (pos == length)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   716
                        break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   717
                    int end = pos;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   718
                    while (end < length &&
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   719
                           str[end] != ' ' && str[end] != '"' && str[end] != '\'' &&
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   720
                           str[end] != ';' && str[end] != '>')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   721
                        end++;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   722
                    setEncoding(findTextEncoding(str + pos, end - pos), EncodingFromMetaTag);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   723
                    if (m_source == EncodingFromMetaTag)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   724
                        return true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   725
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   726
                    if (end >= length || str[end] == '/' || str[end] == '>')
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   727
                        break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   728
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   729
                    pos = end + 1;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   730
                }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   731
            } else {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   732
                if (!enclosingTagName && tag != scriptTag && tag != noscriptTag && tag != styleTag
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   733
                    && tag != linkTag && tag != metaTag && tag != objectTag && tag != titleTag && tag != baseTag
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   734
                    && (end || tag != htmlTag) && (end || tag != headTag) && isASCIIAlpha(tagBuffer[0])) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   735
                    inHeadSection = false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   736
                }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   737
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   738
                if (ptr - m_buffer.data() >= bytesToCheckUnconditionally && !inHeadSection) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   739
                    m_checkedForHeadCharset = true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   740
                    return true;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   741
                }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   742
            }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   743
        } else
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   744
            ++ptr;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   745
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   746
    return false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   747
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   748
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   749
void TextResourceDecoder::detectJapaneseEncoding(const char* data, size_t len)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   750
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   751
    switch (KanjiCode::judge(data, len)) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   752
        case KanjiCode::JIS:
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   753
            setEncoding("ISO-2022-JP", AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   754
            break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   755
        case KanjiCode::EUC:
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   756
            setEncoding("EUC-JP", AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   757
            break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   758
        case KanjiCode::SJIS:
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   759
            setEncoding("Shift_JIS", AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   760
            break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   761
        case KanjiCode::ASCII:
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   762
        case KanjiCode::UTF16:
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   763
        case KanjiCode::UTF8:
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   764
            break;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   765
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   766
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   767
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   768
// We use the encoding detector in two cases:
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   769
//   1. Encoding detector is turned ON and no other encoding source is
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   770
//      available (that is, it's DefaultEncoding).
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   771
//   2. Encoding detector is turned ON and the encoding is set to
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   772
//      the encoding of the parent frame, which is also auto-detected.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   773
//   Note that condition #2 is NOT satisfied unless parent-child frame
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   774
//   relationship is compliant to the same-origin policy. If they're from
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   775
//   different domains, |m_source| would not be set to EncodingFromParentFrame
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   776
//   in the first place. 
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   777
bool TextResourceDecoder::shouldAutoDetect() const
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   778
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   779
    // Just checking m_hintEncoding suffices here because it's only set
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   780
    // in setHintEncoding when the source is AutoDetectedEncoding.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   781
    return m_usesEncodingDetector
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   782
        && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding)); 
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   783
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   784
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   785
String TextResourceDecoder::decode(const char* data, size_t len)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   786
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   787
    size_t lengthOfBOM = 0;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   788
    if (!m_checkedForBOM)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   789
        lengthOfBOM = checkForBOM(data, len);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   790
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   791
    bool movedDataToBuffer = false;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   792
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   793
    if (m_contentType == CSS && !m_checkedForCSSCharset)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   794
        if (!checkForCSSCharset(data, len, movedDataToBuffer))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   795
            return "";
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   796
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   797
    if ((m_contentType == HTML || m_contentType == XML) && !m_checkedForHeadCharset) // HTML and XML
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   798
        if (!checkForHeadCharset(data, len, movedDataToBuffer))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   799
            return "";
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   800
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   801
    // FIXME: It is wrong to change the encoding downstream after we have already done some decoding.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   802
    if (shouldAutoDetect()) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   803
        if (m_encoding.isJapanese())
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   804
            detectJapaneseEncoding(data, len); // FIXME: We should use detectTextEncoding() for all languages.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   805
        else {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   806
            TextEncoding detectedEncoding;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   807
            if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   808
                setEncoding(detectedEncoding, AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   809
        }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   810
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   811
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   812
    ASSERT(m_encoding.isValid());
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   813
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   814
    if (!m_codec)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   815
        m_codec = newTextCodec(m_encoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   816
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   817
    if (m_buffer.isEmpty())
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   818
        return m_codec->decode(data + lengthOfBOM, len - lengthOfBOM, false, m_contentType == XML, m_sawError);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   819
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   820
    if (!movedDataToBuffer) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   821
        size_t oldSize = m_buffer.size();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   822
        m_buffer.grow(oldSize + len);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   823
        memcpy(m_buffer.data() + oldSize, data, len);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   824
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   825
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   826
    String result = m_codec->decode(m_buffer.data() + lengthOfBOM, m_buffer.size() - lengthOfBOM, false, m_contentType == XML && !m_useLenientXMLDecoding, m_sawError);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   827
    m_buffer.clear();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   828
    return result;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   829
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   830
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   831
String TextResourceDecoder::flush()
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   832
{
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   833
   // If we can not identify the encoding even after a document is completely
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   834
   // loaded, we need to detect the encoding if other conditions for
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   835
   // autodetection is satisfied.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   836
    if (m_buffer.size() && shouldAutoDetect()
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   837
        && ((!m_checkedForHeadCharset && (m_contentType == HTML || m_contentType == XML)) || (!m_checkedForCSSCharset && (m_contentType == CSS)))) {
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   838
         TextEncoding detectedEncoding;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   839
         if (detectTextEncoding(m_buffer.data(), m_buffer.size(),
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   840
                                m_hintEncoding, &detectedEncoding))
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   841
             setEncoding(detectedEncoding, AutoDetectedEncoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   842
    }
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   843
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   844
    if (!m_codec)
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   845
        m_codec = newTextCodec(m_encoding);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   846
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   847
    String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_contentType == XML && !m_useLenientXMLDecoding, m_sawError);
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   848
    m_buffer.clear();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   849
    m_codec.clear();
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   850
    m_checkedForBOM = false; // Skip BOM again when re-decoding.
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   851
    return result;
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   852
}
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   853
4f2f89ce4247 Revision: 201037
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   854
}