src/xmlpatterns/data/qderivedstring_p.h
changeset 0 1918ee327afb
child 4 3b1da2848fc7
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the QtXmlPatterns module of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 //
       
    43 //  W A R N I N G
       
    44 //  -------------
       
    45 //
       
    46 // This file is not part of the Qt API.  It exists purely as an
       
    47 // implementation detail.  This header file may change from version to
       
    48 // version without notice, or even be removed.
       
    49 //
       
    50 // We mean it.
       
    51 
       
    52 #ifndef Patternist_DerivedString_H
       
    53 #define Patternist_DerivedString_H
       
    54 
       
    55 #include <QRegExp>
       
    56 
       
    57 #include "private/qxmlutils_p.h"
       
    58 #include "qbuiltintypes_p.h"
       
    59 #include "qpatternistlocale_p.h"
       
    60 #include "qvalidationerror_p.h"
       
    61 
       
    62 QT_BEGIN_HEADER
       
    63 
       
    64 QT_BEGIN_NAMESPACE
       
    65 
       
    66 namespace QPatternist
       
    67 {
       
    68     /**
       
    69      * @short Represents instances of derived @c xs:string types, such as @c
       
    70      * xs:normalizedString.
       
    71      *
       
    72      * Whitespace is a significant part for creating values from the lexical
       
    73      * space. Of course the specification is tricky here. Here's some pointers:
       
    74      *
       
    75      * - From <a href="4.3.6.1 The whiteSpace Schema Component">XML Schema Part 2: Datatypes
       
    76      *   Second Edition, 4.3.6 whiteSpace</a>:
       
    77      *   "For all atomic datatypes other than string (and types
       
    78      *   derived by restriction from it) the value of whiteSpace is
       
    79      *   collapse and cannot be changed by a schema author; for string the
       
    80      *   value of whiteSpace is preserve; for any type derived by
       
    81      *   restriction from string the value of whiteSpace can be any of the
       
    82      *   three legal values."
       
    83      * - From <a href="http://www.w3.org/TR/xmlschema-1/#d0e1654">XML Schema Part 1: Structures
       
    84      *   Second Edition, 3.1.4 White Space Normalization during Validation</a>:
       
    85      *   "[Definition:]  The normalized value of an element or attribute
       
    86      *   information item is an initial value whose white space, if any,
       
    87      *   has been normalized according to the value of the whiteSpace facet of
       
    88      *   the simple type definition used in its validation."
       
    89      *
       
    90      * @author Frans Englich <frans.englich@nokia.com>
       
    91      * @ingroup Patternist_xdm
       
    92      * @todo Documentation is missing
       
    93      */
       
    94     template<TypeOfDerivedString DerivedType>
       
    95     class DerivedString : public AtomicValue
       
    96     {
       
    97     private:
       
    98         static inline ItemType::Ptr itemType()
       
    99         {
       
   100             switch(DerivedType)
       
   101             {
       
   102                 case TypeNormalizedString:  return BuiltinTypes::xsNormalizedString;
       
   103                 case TypeToken:             return BuiltinTypes::xsToken;
       
   104                 case TypeLanguage:          return BuiltinTypes::xsLanguage;
       
   105                 case TypeNMTOKEN:           return BuiltinTypes::xsNMTOKEN;
       
   106                 case TypeName:              return BuiltinTypes::xsName;
       
   107                 case TypeNCName:            return BuiltinTypes::xsNCName;
       
   108                 case TypeID:                return BuiltinTypes::xsID;
       
   109                 case TypeIDREF:             return BuiltinTypes::xsIDREF;
       
   110                 case TypeENTITY:            return BuiltinTypes::xsENTITY;
       
   111                 case TypeString:            return BuiltinTypes::xsString;
       
   112             }
       
   113 
       
   114             Q_ASSERT_X(false, Q_FUNC_INFO, "This line is not supposed to be reached.");
       
   115             return ItemType::Ptr();
       
   116         }
       
   117 
       
   118         const QString m_value;
       
   119 
       
   120         inline DerivedString(const QString &value) : m_value(value)
       
   121         {
       
   122         }
       
   123 
       
   124         /**
       
   125          * @short This is an incomplete test for whether @p ch conforms to
       
   126          * the XML 1.0 NameChar production.
       
   127          */
       
   128         static inline bool isNameChar(const QChar &ch)
       
   129         {
       
   130             return ch.isLetter()            ||
       
   131                    ch.isDigit()             ||
       
   132                    ch == QLatin1Char('.')   ||
       
   133                    ch == QLatin1Char('-')   ||
       
   134                    ch == QLatin1Char('_')   ||
       
   135                    ch == QLatin1Char(':');
       
   136         }
       
   137 
       
   138         /**
       
   139          * @returns @c true if @p input is a valid @c xs:Name.
       
   140          * @see <a href="http://www.w3.org/TR/REC-xml/#NT-Name">Extensible
       
   141          * Markup Language (XML) 1.0 (Fourth Edition), [5] Name</a>
       
   142          */
       
   143         static inline bool isValidName(const QString &input)
       
   144         {
       
   145             if(input.isEmpty())
       
   146                 return false;
       
   147 
       
   148             const QChar first(input.at(0));
       
   149 
       
   150             if(first.isLetter()             ||
       
   151                first == QLatin1Char('_')    ||
       
   152                first == QLatin1Char(':'))
       
   153             {
       
   154                 const int len = input.length();
       
   155 
       
   156                 if(len == 1)
       
   157                     return true;
       
   158 
       
   159                 /* Since we've checked the first character above, we start at
       
   160                  * position 1. */
       
   161                 for(int i = 1; i < len; ++i)
       
   162                 {
       
   163                     if(!isNameChar(input.at(i)))
       
   164                         return false;
       
   165                 }
       
   166 
       
   167                 return true;
       
   168             }
       
   169             else
       
   170                 return false;
       
   171         }
       
   172 
       
   173         /**
       
   174          * @returns @c true if @p input conforms to the XML 1.0 @c Nmtoken product.
       
   175          *
       
   176          * @see <a
       
   177          * href="http://www.w3.org/TR/2000/WD-xml-2e-20000814#NT-Nmtoken">Extensible
       
   178          * Markup Language (XML) 1.0 (Second Edition), [7] Nmtoken</a>
       
   179          */
       
   180         static inline bool isValidNMTOKEN(const QString &input)
       
   181         {
       
   182             const int len = input.length();
       
   183 
       
   184             if(len == 0)
       
   185                 return false;
       
   186 
       
   187             for(int i = 0; i < len; ++i)
       
   188             {
       
   189                 if(!isNameChar(input.at(i)))
       
   190                     return false;
       
   191             }
       
   192 
       
   193             return true;
       
   194         }
       
   195 
       
   196         /**
       
   197          * @short Performs attribute value normalization as if @p input was not
       
   198          * from a @c CDATA section.
       
   199          *
       
   200          * Each whitespace character in @p input that's not a space, such as tab
       
   201          * or new line character, is replaced with a space. This algorithm
       
   202          * differs from QString::simplified() in that it doesn't collapse
       
   203          * subsequent whitespace characters to a single one, or remove trailing
       
   204          * and leading space.
       
   205          *
       
   206          * @see <a href="http://www.w3.org/TR/REC-xml/#AVNormalize">Extensible
       
   207          * Markup Language (XML) 1.0 (Second Edition), 3.3.3 [E70]Attribute-Value Normalization</a>
       
   208          */
       
   209         static QString attributeNormalize(const QString &input)
       
   210         {
       
   211             QString retval(input);
       
   212             const int len = retval.length();
       
   213             const QLatin1Char space(' ');
       
   214 
       
   215             for(int i = 0; i < len; ++i)
       
   216             {
       
   217                 const QChar ati(retval.at(i));
       
   218 
       
   219                 if(ati.isSpace() && ati != space)
       
   220                     retval[i] = space;
       
   221             }
       
   222 
       
   223             return retval;
       
   224         }
       
   225 
       
   226         static AtomicValue::Ptr error(const NamePool::Ptr &np, const QString &invalidValue)
       
   227         {
       
   228             return ValidationError::createError(QString::fromLatin1("%1 is not a valid value for "
       
   229                                                                     "type %2.").arg(formatData(invalidValue))
       
   230                                                                                .arg(formatType(np, itemType())));
       
   231         }
       
   232 
       
   233     public:
       
   234 
       
   235         /**
       
   236          * @note This function doesn't perform any cleanup/normalizaiton of @p
       
   237          * value. @p value must be a canonical value space of the type.
       
   238          *
       
   239          * If you want cleanup to be performed and/or the lexical space
       
   240          * checked, use fromLexical().
       
   241          */
       
   242         static AtomicValue::Ptr fromValue(const QString &value)
       
   243         {
       
   244             return AtomicValue::Ptr(new DerivedString(value));
       
   245         }
       
   246 
       
   247         /**
       
   248          * Constructs an instance from the lexical
       
   249          * representation @p lexical.
       
   250          */
       
   251         static AtomicValue::Ptr fromLexical(const NamePool::Ptr &np, const QString &lexical)
       
   252         {
       
   253             switch(DerivedType)
       
   254             {
       
   255                 case TypeString:
       
   256                     return AtomicValue::Ptr(new DerivedString(lexical));
       
   257                 case TypeNormalizedString:
       
   258                     return AtomicValue::Ptr(new DerivedString(attributeNormalize(lexical)));
       
   259                 case TypeToken:
       
   260                     return AtomicValue::Ptr(new DerivedString(lexical.simplified()));
       
   261                 case TypeLanguage:
       
   262                 {
       
   263                     const QString simplified(lexical.trimmed());
       
   264 
       
   265                     const QRegExp validate(QLatin1String("[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*"));
       
   266                     Q_ASSERT(validate.isValid());
       
   267 
       
   268                     if(validate.exactMatch(simplified))
       
   269                         return AtomicValue::Ptr(new DerivedString(lexical.simplified()));
       
   270                     else
       
   271                         return error(np, simplified);
       
   272                 }
       
   273                 case TypeNMTOKEN:
       
   274                 {
       
   275                     const QString trimmed(lexical.trimmed());
       
   276 
       
   277                     if(isValidNMTOKEN(trimmed))
       
   278                         return AtomicValue::Ptr(new DerivedString(trimmed));
       
   279                     else
       
   280                         return error(np, trimmed);
       
   281                 }
       
   282                 case TypeName:
       
   283                 {
       
   284                     const QString simplified(lexical.simplified());
       
   285 
       
   286                     if(isValidName(simplified))
       
   287                         return AtomicValue::Ptr(new DerivedString(simplified));
       
   288                     else
       
   289                         return error(np, simplified);
       
   290                 }
       
   291                 case TypeID:
       
   292                 /* Fallthrough. */
       
   293                 case TypeIDREF:
       
   294                 /* Fallthrough. */
       
   295                 case TypeENTITY:
       
   296                 /* Fallthrough. */
       
   297                 case TypeNCName:
       
   298                 {
       
   299                     /* We treat xs:ID, xs:ENTITY, xs:IDREF and xs:NCName in the exact same
       
   300                      * way, except for the type annotation.
       
   301                      *
       
   302                      * We use trimmed() instead of simplified() because it's
       
   303                      * faster and whitespace isn't allowed between
       
   304                      * non-whitespace characters anyway, for these types. */
       
   305                     const QString trimmed(lexical.trimmed());
       
   306 
       
   307                     if(QXmlUtils::isNCName(trimmed))
       
   308                         return AtomicValue::Ptr(new DerivedString(trimmed));
       
   309                     else
       
   310                         return error(np, trimmed);
       
   311                 }
       
   312                 default:
       
   313                 {
       
   314                     Q_ASSERT_X(false, Q_FUNC_INFO, "This line is not supposed to be reached.");
       
   315                     return AtomicValue::Ptr();
       
   316                 }
       
   317             }
       
   318         }
       
   319 
       
   320         virtual QString stringValue() const
       
   321         {
       
   322             return m_value;
       
   323         }
       
   324 
       
   325         virtual bool evaluateEBV(const QExplicitlySharedDataPointer<DynamicContext> &) const
       
   326         {
       
   327              return m_value.length() > 0;
       
   328         }
       
   329 
       
   330         virtual ItemType::Ptr type() const
       
   331         {
       
   332             return itemType();
       
   333         }
       
   334     };
       
   335 }
       
   336 
       
   337 QT_END_NAMESPACE
       
   338 
       
   339 QT_END_HEADER
       
   340 
       
   341 #endif