src/corelib/tools/qstringmatcher.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the QtCore module of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 #include "qstringmatcher.h"
       
    43 #include "qunicodetables_p.h"
       
    44 
       
    45 QT_BEGIN_NAMESPACE
       
    46 
       
    47 static void bm_init_skiptable(const ushort *uc, int len, uchar *skiptable, Qt::CaseSensitivity cs)
       
    48 {
       
    49     int l = qMin(len, 255);
       
    50     memset(skiptable, l, 256*sizeof(uchar));
       
    51     uc += len - l;
       
    52     if (cs == Qt::CaseSensitive) {
       
    53         while (l--) {
       
    54             skiptable[*uc & 0xff] = l;
       
    55             uc++;
       
    56         }
       
    57     } else {
       
    58         const ushort *start = uc;
       
    59         while (l--) {
       
    60             skiptable[foldCase(uc, start) & 0xff] = l;
       
    61             uc++;
       
    62         }
       
    63     }
       
    64 }
       
    65 
       
    66 static inline int bm_find(const ushort *uc, uint l, int index, const ushort *puc, uint pl,
       
    67                           const uchar *skiptable, Qt::CaseSensitivity cs)
       
    68 {
       
    69     if (pl == 0)
       
    70         return index > (int)l ? -1 : index;
       
    71     const uint pl_minus_one = pl - 1;
       
    72 
       
    73     register const ushort *current = uc + index + pl_minus_one;
       
    74     const ushort *end = uc + l;
       
    75     if (cs == Qt::CaseSensitive) {
       
    76         while (current < end) {
       
    77             uint skip = skiptable[*current & 0xff];
       
    78             if (!skip) {
       
    79                 // possible match
       
    80                 while (skip < pl) {
       
    81                     if (*(current - skip) != puc[pl_minus_one-skip])
       
    82                         break;
       
    83                     skip++;
       
    84                 }
       
    85                 if (skip > pl_minus_one) // we have a match
       
    86                     return (current - uc) - pl_minus_one;
       
    87 
       
    88                 // in case we don't have a match we are a bit inefficient as we only skip by one
       
    89                 // when we have the non matching char in the string.
       
    90                 if (skiptable[*(current - skip) & 0xff] == pl)
       
    91                     skip = pl - skip;
       
    92                 else
       
    93                     skip = 1;
       
    94             }
       
    95             if (current > end - skip)
       
    96                 break;
       
    97             current += skip;
       
    98         }
       
    99     } else {
       
   100         while (current < end) {
       
   101             uint skip = skiptable[foldCase(current, uc) & 0xff];
       
   102             if (!skip) {
       
   103                 // possible match
       
   104                 while (skip < pl) {
       
   105                     if (foldCase(current - skip, uc) != foldCase(puc + pl_minus_one - skip, puc))
       
   106                         break;
       
   107                     skip++;
       
   108                 }
       
   109                 if (skip > pl_minus_one) // we have a match
       
   110                     return (current - uc) - pl_minus_one;
       
   111                 // in case we don't have a match we are a bit inefficient as we only skip by one
       
   112                 // when we have the non matching char in the string.
       
   113                 if (skiptable[foldCase(current - skip, uc) & 0xff] == pl)
       
   114                     skip = pl - skip;
       
   115                 else
       
   116                     skip = 1;
       
   117             }
       
   118             if (current > end - skip)
       
   119                 break;
       
   120             current += skip;
       
   121         }
       
   122     }
       
   123     return -1; // not found
       
   124 }
       
   125 
       
   126 /*!
       
   127     \class QStringMatcher
       
   128     \brief The QStringMatcher class holds a sequence of characters that
       
   129     can be quickly matched in a Unicode string.
       
   130 
       
   131     \ingroup tools
       
   132     \ingroup string-processing
       
   133 
       
   134     This class is useful when you have a sequence of \l{QChar}s that
       
   135     you want to repeatedly match against some strings (perhaps in a
       
   136     loop), or when you want to search for the same sequence of
       
   137     characters multiple times in the same string. Using a matcher
       
   138     object and indexIn() is faster than matching a plain QString with
       
   139     QString::indexOf() if repeated matching takes place. This class
       
   140     offers no benefit if you are doing one-off string matches.
       
   141 
       
   142     Create the QStringMatcher with the QString you want to search
       
   143     for. Then call indexIn() on the QString that you want to search.
       
   144 
       
   145     \sa QString, QByteArrayMatcher, QRegExp
       
   146 */
       
   147 
       
   148 /*!
       
   149     Constructs an empty string matcher that won't match anything.
       
   150     Call setPattern() to give it a pattern to match.
       
   151 */
       
   152 QStringMatcher::QStringMatcher()
       
   153     : d_ptr(0), q_cs(Qt::CaseSensitive)
       
   154 {
       
   155     qMemSet(q_data, 0, sizeof(q_data));
       
   156 }
       
   157 
       
   158 /*!
       
   159     Constructs a string matcher that will search for \a pattern, with
       
   160     case sensitivity \a cs.
       
   161 
       
   162     Call indexIn() to perform a search.
       
   163 */
       
   164 QStringMatcher::QStringMatcher(const QString &pattern, Qt::CaseSensitivity cs)
       
   165     : d_ptr(0), q_pattern(pattern), q_cs(cs)
       
   166 {
       
   167     p.uc = pattern.unicode();
       
   168     p.len = pattern.size();
       
   169     bm_init_skiptable((const ushort *)p.uc, p.len, p.q_skiptable, cs);
       
   170 }
       
   171 
       
   172 /*!
       
   173     \fn QStringMatcher::QStringMatcher(const QChar *uc, int length, Qt::CaseSensitivity cs)
       
   174     \since 4.5
       
   175 
       
   176     Constructs a string matcher that will search for the pattern referred to
       
   177     by \a uc with the given \a length and case sensitivity specified by \a cs.
       
   178 */
       
   179 QStringMatcher::QStringMatcher(const QChar *uc, int len, Qt::CaseSensitivity cs)
       
   180     : d_ptr(0), q_cs(cs)
       
   181 {
       
   182     p.uc = uc;
       
   183     p.len = len;
       
   184     bm_init_skiptable((const ushort *)p.uc, len, p.q_skiptable, cs);
       
   185 }
       
   186 
       
   187 /*!
       
   188     Copies the \a other string matcher to this string matcher.
       
   189 */
       
   190 QStringMatcher::QStringMatcher(const QStringMatcher &other)
       
   191     : d_ptr(0)
       
   192 {
       
   193     operator=(other);
       
   194 }
       
   195 
       
   196 /*!
       
   197     Destroys the string matcher.
       
   198 */
       
   199 QStringMatcher::~QStringMatcher()
       
   200 {
       
   201 }
       
   202 
       
   203 /*!
       
   204     Assigns the \a other string matcher to this string matcher.
       
   205 */
       
   206 QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other)
       
   207 {
       
   208     if (this != &other) {
       
   209         q_pattern = other.q_pattern;
       
   210         q_cs = other.q_cs;
       
   211         qMemCopy(q_data, other.q_data, sizeof(q_data));
       
   212     }
       
   213     return *this;
       
   214 }
       
   215 
       
   216 /*!
       
   217     Sets the string that this string matcher will search for to \a
       
   218     pattern.
       
   219 
       
   220     \sa pattern(), setCaseSensitivity(), indexIn()
       
   221 */
       
   222 void QStringMatcher::setPattern(const QString &pattern)
       
   223 {
       
   224     q_pattern = pattern;
       
   225     p.uc = pattern.unicode();
       
   226     p.len = pattern.size();
       
   227     bm_init_skiptable((const ushort *)pattern.unicode(), pattern.size(), p.q_skiptable, q_cs);
       
   228 }
       
   229 
       
   230 /*!
       
   231     \fn QString QStringMatcher::pattern() const
       
   232 
       
   233     Returns the string pattern that this string matcher will search
       
   234     for.
       
   235 
       
   236     \sa setPattern()
       
   237 */
       
   238 
       
   239 QString QStringMatcher::pattern() const
       
   240 {
       
   241     if (!q_pattern.isEmpty())
       
   242         return q_pattern;
       
   243     return QString(p.uc, p.len);
       
   244 }
       
   245 
       
   246 /*!
       
   247     Sets the case sensitivity setting of this string matcher to \a
       
   248     cs.
       
   249 
       
   250     \sa caseSensitivity(), setPattern(), indexIn()
       
   251 */
       
   252 void QStringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs)
       
   253 {
       
   254     if (cs == q_cs)
       
   255         return;
       
   256     bm_init_skiptable((const ushort *)q_pattern.unicode(), q_pattern.size(), p.q_skiptable, cs);
       
   257     q_cs = cs;
       
   258 }
       
   259 
       
   260 /*!
       
   261     Searches the string \a str from character position \a from
       
   262     (default 0, i.e. from the first character), for the string
       
   263     pattern() that was set in the constructor or in the most recent
       
   264     call to setPattern(). Returns the position where the pattern()
       
   265     matched in \a str, or -1 if no match was found.
       
   266 
       
   267     \sa setPattern(), setCaseSensitivity()
       
   268 */
       
   269 int QStringMatcher::indexIn(const QString &str, int from) const
       
   270 {
       
   271     if (from < 0)
       
   272         from = 0;
       
   273     return bm_find((const ushort *)str.unicode(), str.size(), from,
       
   274                    (const ushort *)p.uc, p.len,
       
   275                    p.q_skiptable, q_cs);
       
   276 }
       
   277 
       
   278 /*!
       
   279     \since 4.5
       
   280 
       
   281     Searches the string starting at \a str (of length \a length) from
       
   282     character position \a from (default 0, i.e. from the first
       
   283     character), for the string pattern() that was set in the
       
   284     constructor or in the most recent call to setPattern(). Returns
       
   285     the position where the pattern() matched in \a str, or -1 if no
       
   286     match was found.
       
   287 
       
   288     \sa setPattern(), setCaseSensitivity()
       
   289 */
       
   290 int QStringMatcher::indexIn(const QChar *str, int length, int from) const
       
   291 {
       
   292     if (from < 0)
       
   293         from = 0;
       
   294     return bm_find((const ushort *)str, length, from,
       
   295                    (const ushort *)p.uc, p.len,
       
   296                    p.q_skiptable, q_cs);
       
   297 }
       
   298 
       
   299 /*!
       
   300     \fn Qt::CaseSensitivity QStringMatcher::caseSensitivity() const
       
   301 
       
   302     Returns the case sensitivity setting for this string matcher.
       
   303 
       
   304     \sa setCaseSensitivity()
       
   305 */
       
   306 
       
   307 /*!
       
   308     \internal
       
   309 */
       
   310 
       
   311 int qFindStringBoyerMoore(
       
   312     const QChar *haystack, int haystackLen, int haystackOffset,
       
   313     const QChar *needle, int needleLen, Qt::CaseSensitivity cs)
       
   314 {
       
   315     uchar skiptable[256];
       
   316     bm_init_skiptable((const ushort *)needle, needleLen, skiptable, cs);
       
   317     if (haystackOffset < 0)
       
   318         haystackOffset = 0;
       
   319     return bm_find((const ushort *)haystack, haystackLen, haystackOffset,
       
   320                    (const ushort *)needle, needleLen, skiptable, cs);
       
   321 }
       
   322 
       
   323 QT_END_NAMESPACE