--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/libraries/spcre/libpcre/pcre/pcre_tables.c Wed Jun 23 15:52:26 2010 +0100
@@ -0,0 +1,356 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2008 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains some fixed tables that are used by more than one of the
+PCRE code modules. The tables are also #included by the pcretest program, which
+uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
+clashes with the library. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre_internal.h"
+
+
+/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
+the definition is next to the definition of the opcodes in pcre_internal.h. */
+
+const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
+
+
+
+/*************************************************
+* Tables for UTF-8 support *
+*************************************************/
+
+/* These are the breakpoints for different numbers of bytes in a UTF-8
+character. */
+
+#ifdef SUPPORT_UTF8
+
+const int _pcre_utf8_table1[] =
+ { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
+
+const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
+
+/* These are the indicator bits and the mask for the data bits to set in the
+first byte of a character, indexed by the number of additional bytes. */
+
+const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
+const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
+
+/* Table of the number of extra bytes, indexed by the first byte masked with
+0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
+
+const uschar _pcre_utf8_table4[] = {
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
+
+/* Table to translate from particular type value to the general value. */
+
+const int _pcre_ucp_gentype[] = {
+ ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
+ ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
+ ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
+ ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
+ ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
+ ucp_P, ucp_P, /* Ps, Po */
+ ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
+ ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
+};
+
+/* The pcre_utt[] table below translates Unicode property names into type and
+code values. It is searched by binary chop, so must be in collating sequence of
+name. Originally, the table contained pointers to the name strings in the first
+field of each entry. However, that leads to a large number of relocations when
+a shared library is dynamically loaded. A significant reduction is made by
+putting all the names into a single, large string and then using offsets in the
+table itself. Maintenance is more error-prone, but frequent changes to this
+data are unlikely.
+
+July 2008: There is now a script called maint/GenerateUtt.py which can be used
+to generate this data instead of maintaining it entirely by hand. */
+
+const char _pcre_utt_names[] =
+ "Any\0"
+ "Arabic\0"
+ "Armenian\0"
+ "Balinese\0"
+ "Bengali\0"
+ "Bopomofo\0"
+ "Braille\0"
+ "Buginese\0"
+ "Buhid\0"
+ "C\0"
+ "Canadian_Aboriginal\0"
+ "Carian\0"
+ "Cc\0"
+ "Cf\0"
+ "Cham\0"
+ "Cherokee\0"
+ "Cn\0"
+ "Co\0"
+ "Common\0"
+ "Coptic\0"
+ "Cs\0"
+ "Cuneiform\0"
+ "Cypriot\0"
+ "Cyrillic\0"
+ "Deseret\0"
+ "Devanagari\0"
+ "Ethiopic\0"
+ "Georgian\0"
+ "Glagolitic\0"
+ "Gothic\0"
+ "Greek\0"
+ "Gujarati\0"
+ "Gurmukhi\0"
+ "Han\0"
+ "Hangul\0"
+ "Hanunoo\0"
+ "Hebrew\0"
+ "Hiragana\0"
+ "Inherited\0"
+ "Kannada\0"
+ "Katakana\0"
+ "Kayah_Li\0"
+ "Kharoshthi\0"
+ "Khmer\0"
+ "L\0"
+ "L&\0"
+ "Lao\0"
+ "Latin\0"
+ "Lepcha\0"
+ "Limbu\0"
+ "Linear_B\0"
+ "Ll\0"
+ "Lm\0"
+ "Lo\0"
+ "Lt\0"
+ "Lu\0"
+ "Lycian\0"
+ "Lydian\0"
+ "M\0"
+ "Malayalam\0"
+ "Mc\0"
+ "Me\0"
+ "Mn\0"
+ "Mongolian\0"
+ "Myanmar\0"
+ "N\0"
+ "Nd\0"
+ "New_Tai_Lue\0"
+ "Nko\0"
+ "Nl\0"
+ "No\0"
+ "Ogham\0"
+ "Ol_Chiki\0"
+ "Old_Italic\0"
+ "Old_Persian\0"
+ "Oriya\0"
+ "Osmanya\0"
+ "P\0"
+ "Pc\0"
+ "Pd\0"
+ "Pe\0"
+ "Pf\0"
+ "Phags_Pa\0"
+ "Phoenician\0"
+ "Pi\0"
+ "Po\0"
+ "Ps\0"
+ "Rejang\0"
+ "Runic\0"
+ "S\0"
+ "Saurashtra\0"
+ "Sc\0"
+ "Shavian\0"
+ "Sinhala\0"
+ "Sk\0"
+ "Sm\0"
+ "So\0"
+ "Sundanese\0"
+ "Syloti_Nagri\0"
+ "Syriac\0"
+ "Tagalog\0"
+ "Tagbanwa\0"
+ "Tai_Le\0"
+ "Tamil\0"
+ "Telugu\0"
+ "Thaana\0"
+ "Thai\0"
+ "Tibetan\0"
+ "Tifinagh\0"
+ "Ugaritic\0"
+ "Vai\0"
+ "Yi\0"
+ "Z\0"
+ "Zl\0"
+ "Zp\0"
+ "Zs\0";
+
+const ucp_type_table _pcre_utt[] = {
+ { 0, PT_ANY, 0 },
+ { 4, PT_SC, ucp_Arabic },
+ { 11, PT_SC, ucp_Armenian },
+ { 20, PT_SC, ucp_Balinese },
+ { 29, PT_SC, ucp_Bengali },
+ { 37, PT_SC, ucp_Bopomofo },
+ { 46, PT_SC, ucp_Braille },
+ { 54, PT_SC, ucp_Buginese },
+ { 63, PT_SC, ucp_Buhid },
+ { 69, PT_GC, ucp_C },
+ { 71, PT_SC, ucp_Canadian_Aboriginal },
+ { 91, PT_SC, ucp_Carian },
+ { 98, PT_PC, ucp_Cc },
+ { 101, PT_PC, ucp_Cf },
+ { 104, PT_SC, ucp_Cham },
+ { 109, PT_SC, ucp_Cherokee },
+ { 118, PT_PC, ucp_Cn },
+ { 121, PT_PC, ucp_Co },
+ { 124, PT_SC, ucp_Common },
+ { 131, PT_SC, ucp_Coptic },
+ { 138, PT_PC, ucp_Cs },
+ { 141, PT_SC, ucp_Cuneiform },
+ { 151, PT_SC, ucp_Cypriot },
+ { 159, PT_SC, ucp_Cyrillic },
+ { 168, PT_SC, ucp_Deseret },
+ { 176, PT_SC, ucp_Devanagari },
+ { 187, PT_SC, ucp_Ethiopic },
+ { 196, PT_SC, ucp_Georgian },
+ { 205, PT_SC, ucp_Glagolitic },
+ { 216, PT_SC, ucp_Gothic },
+ { 223, PT_SC, ucp_Greek },
+ { 229, PT_SC, ucp_Gujarati },
+ { 238, PT_SC, ucp_Gurmukhi },
+ { 247, PT_SC, ucp_Han },
+ { 251, PT_SC, ucp_Hangul },
+ { 258, PT_SC, ucp_Hanunoo },
+ { 266, PT_SC, ucp_Hebrew },
+ { 273, PT_SC, ucp_Hiragana },
+ { 282, PT_SC, ucp_Inherited },
+ { 292, PT_SC, ucp_Kannada },
+ { 300, PT_SC, ucp_Katakana },
+ { 309, PT_SC, ucp_Kayah_Li },
+ { 318, PT_SC, ucp_Kharoshthi },
+ { 329, PT_SC, ucp_Khmer },
+ { 335, PT_GC, ucp_L },
+ { 337, PT_LAMP, 0 },
+ { 340, PT_SC, ucp_Lao },
+ { 344, PT_SC, ucp_Latin },
+ { 350, PT_SC, ucp_Lepcha },
+ { 357, PT_SC, ucp_Limbu },
+ { 363, PT_SC, ucp_Linear_B },
+ { 372, PT_PC, ucp_Ll },
+ { 375, PT_PC, ucp_Lm },
+ { 378, PT_PC, ucp_Lo },
+ { 381, PT_PC, ucp_Lt },
+ { 384, PT_PC, ucp_Lu },
+ { 387, PT_SC, ucp_Lycian },
+ { 394, PT_SC, ucp_Lydian },
+ { 401, PT_GC, ucp_M },
+ { 403, PT_SC, ucp_Malayalam },
+ { 413, PT_PC, ucp_Mc },
+ { 416, PT_PC, ucp_Me },
+ { 419, PT_PC, ucp_Mn },
+ { 422, PT_SC, ucp_Mongolian },
+ { 432, PT_SC, ucp_Myanmar },
+ { 440, PT_GC, ucp_N },
+ { 442, PT_PC, ucp_Nd },
+ { 445, PT_SC, ucp_New_Tai_Lue },
+ { 457, PT_SC, ucp_Nko },
+ { 461, PT_PC, ucp_Nl },
+ { 464, PT_PC, ucp_No },
+ { 467, PT_SC, ucp_Ogham },
+ { 473, PT_SC, ucp_Ol_Chiki },
+ { 482, PT_SC, ucp_Old_Italic },
+ { 493, PT_SC, ucp_Old_Persian },
+ { 505, PT_SC, ucp_Oriya },
+ { 511, PT_SC, ucp_Osmanya },
+ { 519, PT_GC, ucp_P },
+ { 521, PT_PC, ucp_Pc },
+ { 524, PT_PC, ucp_Pd },
+ { 527, PT_PC, ucp_Pe },
+ { 530, PT_PC, ucp_Pf },
+ { 533, PT_SC, ucp_Phags_Pa },
+ { 542, PT_SC, ucp_Phoenician },
+ { 553, PT_PC, ucp_Pi },
+ { 556, PT_PC, ucp_Po },
+ { 559, PT_PC, ucp_Ps },
+ { 562, PT_SC, ucp_Rejang },
+ { 569, PT_SC, ucp_Runic },
+ { 575, PT_GC, ucp_S },
+ { 577, PT_SC, ucp_Saurashtra },
+ { 588, PT_PC, ucp_Sc },
+ { 591, PT_SC, ucp_Shavian },
+ { 599, PT_SC, ucp_Sinhala },
+ { 607, PT_PC, ucp_Sk },
+ { 610, PT_PC, ucp_Sm },
+ { 613, PT_PC, ucp_So },
+ { 616, PT_SC, ucp_Sundanese },
+ { 626, PT_SC, ucp_Syloti_Nagri },
+ { 639, PT_SC, ucp_Syriac },
+ { 646, PT_SC, ucp_Tagalog },
+ { 654, PT_SC, ucp_Tagbanwa },
+ { 663, PT_SC, ucp_Tai_Le },
+ { 670, PT_SC, ucp_Tamil },
+ { 676, PT_SC, ucp_Telugu },
+ { 683, PT_SC, ucp_Thaana },
+ { 690, PT_SC, ucp_Thai },
+ { 695, PT_SC, ucp_Tibetan },
+ { 703, PT_SC, ucp_Tifinagh },
+ { 712, PT_SC, ucp_Ugaritic },
+ { 721, PT_SC, ucp_Vai },
+ { 725, PT_SC, ucp_Yi },
+ { 728, PT_GC, ucp_Z },
+ { 730, PT_PC, ucp_Zl },
+ { 733, PT_PC, ucp_Zp },
+ { 736, PT_PC, ucp_Zs }
+};
+
+const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
+
+#endif /* SUPPORT_UTF8 */
+
+/* End of pcre_tables.c */