src/xmlpatterns/acceltree/qcompressedwhitespace.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/xmlpatterns/acceltree/qcompressedwhitespace.cpp	Mon Jan 11 14:00:40 2010 +0000
@@ -0,0 +1,197 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the QtXmlPatterns module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <QString>
+
+#include "qcompressedwhitespace_p.h"
+
+QT_BEGIN_NAMESPACE
+
+using namespace QPatternist;
+
+CompressedWhitespace::CharIdentifier CompressedWhitespace::toIdentifier(const QChar ch)
+{
+    switch(ch.unicode())
+    {
+        case ' ':
+            return Space;
+        case '\n':
+            return LF;
+        case '\r':
+            return CR;
+        case '\t':
+            return Tab;
+        default:
+        {
+            Q_ASSERT_X(false, Q_FUNC_INFO,
+                       "The caller must guarantee only whitespace is passed.");
+            return Tab;
+        }
+    }
+}
+
+bool CompressedWhitespace::isEven(const int number)
+{
+    Q_ASSERT(number >= 0);
+    return number % 2 == 0;
+}
+
+quint8 CompressedWhitespace::toCompressedChar(const QChar ch, const int len)
+{
+    Q_ASSERT(len > 0);
+    Q_ASSERT(len <= MaxCharCount);
+
+    return len + toIdentifier(ch);
+}
+
+QChar CompressedWhitespace::toChar(const CharIdentifier id)
+{
+    switch(id)
+    {
+        case Space: return QLatin1Char(' ');
+        case CR:    return QLatin1Char('\r');
+        case LF:    return QLatin1Char('\n');
+        case Tab:   return QLatin1Char('\t');
+        default:
+                    {
+                        Q_ASSERT_X(false, Q_FUNC_INFO, "Unexpected input");
+                        return QChar();
+                    }
+    }
+}
+
+QString CompressedWhitespace::compress(const QStringRef &input)
+{
+    Q_ASSERT(!isEven(1) && isEven(0) && isEven(2));
+    Q_ASSERT(!input.isEmpty());
+
+    QString result;
+    const int len = input.length();
+
+    /* The amount of compressed characters. For instance, if input is
+     * four spaces followed by one tab, compressedChars will be 2, and the resulting
+     * QString will have a length of 1, two compressedChars stored in one QChar. */
+    int compressedChars = 0;
+
+    for(int i = 0; i < len; ++i)
+    {
+        const QChar c(input.at(i));
+
+        int start = i;
+
+        while(true)
+        {
+            if(i + 1 == input.length() || input.at(i + 1) != c)
+                break;
+            else
+                ++i;
+        }
+
+        /* The length of subsequent whitespace characters in the input. */
+        int wsLen = (i - start) + 1;
+
+        /* We might get a sequence of whitespace that is so long, that we can't
+         * store it in one unit/byte. In that case we chop it into as many subsequent
+         * ones that is needed. */
+        while(true)
+        {
+            const int unitLength = qMin(wsLen, int(MaxCharCount));
+            wsLen -= unitLength;
+
+            ushort resultCP = toCompressedChar(c, unitLength);
+
+            if(isEven(compressedChars))
+                result += QChar(resultCP);
+            else
+            {
+                resultCP = resultCP << 8;
+                resultCP |= result.at(result.size() - 1).unicode();
+                result[result.size() - 1] = resultCP;
+            }
+
+            ++compressedChars;
+
+            if(wsLen == 0)
+                break;
+        }
+    }
+
+    return result;
+}
+
+QString CompressedWhitespace::decompress(const QString &input)
+{
+    Q_ASSERT(!input.isEmpty());
+    const int len = input.length() * 2;
+    QString retval;
+
+    for(int i = 0; i < len; ++i)
+    {
+        ushort cp = input.at(i / 2).unicode();
+
+        if(isEven(i))
+            cp &= Lower8Bits;
+        else
+        {
+            cp = cp >> 8;
+
+            if(cp == 0)
+                return retval;
+        }
+
+        const quint8 wsLen = cp & Lower6Bits;
+        const quint8 id = cp & UpperTwoBits;
+
+        /* Resize retval, and fill in on the top. */
+        const int oldSize = retval.size();
+        const int newSize = retval.size() + wsLen;
+        retval.resize(newSize);
+        const QChar ch(toChar(CharIdentifier(id)));
+
+        for(int f = oldSize; f < newSize; ++f)
+            retval[f] = ch;
+    }
+
+    return retval;
+}
+
+QT_END_NAMESPACE
+