head 1.20; access; symbols STR_0_9_12:1.20 LMTP2NNTP_1_4_1:1.20 LMTP2NNTP_1_4_0:1.20 STR_0_9_11:1.20 STR_0_9_10:1.20 LMTP2NNTP_1_3_0:1.19 LMTP2NNTP_1_3b2:1.19 LMTP2NNTP_1_3b1:1.19 LMTP2NNTP_1_3a3:1.19 LMTP2NNTP_1_3a2:1.19 STR_0_9_9:1.19 LMTP2NNTP_1_3a1:1.19 STR_0_9_8:1.19 LMTP2NNTP_1_2_0:1.19 LMTP2NNTP_1_2b4:1.19 LMTP2NNTP_1_2b3:1.19 LMTP2NNTP_1_2b2:1.19 LMTP2NNTP_1_2b1:1.19 LMTP2NNTP_1_2a8:1.19 LMTP2NNTP_1_2a7:1.19 LMTP2NNTP_1_2a6:1.18 LMTP2NNTP_1_2a5:1.18 STR_0_9_7:1.18 LMTP2NNTP_1_2a4:1.17 LMTP2NNTP_1_2a3:1.17 OSSP_RC_SPEC:1.17 LMTP2NNTP_1_2a1:1.16 LMTP2NNTP_1_1_1:1.16 LMTP2NNTP_1_1_0:1.16 LMTP2NNTP_1_1b4:1.16 LMTP2NNTP_1_1b3:1.16 LMTP2NNTP_1_1b2:1.16 LMTP2NNTP_1_1b1:1.16 STR_0_9_6:1.16 STR_0_9_5:1.16 STR_0_9_4:1.14 STR_0_9_3:1.12 STR_0_9_2:1.10 STR_0_9_1:1.10 STR_0_9_0:1.10 str_init:1.1.1.1 str:1.1.1; locks; strict; comment @ * @; 1.20 date 2005.01.24.15.22.19; author rse; state Exp; branches; next 1.19; 1.19 date 2003.01.06.19.13.48; author rse; state Exp; branches; next 1.18; 1.18 date 2002.04.01.08.32.54; author rse; state Exp; branches; next 1.17; 1.17 date 2002.01.02.17.09.13; author rse; state Exp; branches; next 1.16; 1.16 date 2001.08.16.13.21.23; author rse; state Exp; branches; next 1.15; 1.15 date 2000.11.20.19.48.39; author rse; state Exp; branches; next 1.14; 1.14 date 2000.03.01.09.59.38; author rse; state Exp; branches; next 1.13; 1.13 date 2000.03.01.09.50.55; author rse; state Exp; branches; next 1.12; 1.12 date 2000.02.21.12.55.20; author rse; state Exp; branches; next 1.11; 1.11 date 2000.02.13.17.35.57; author rse; state Exp; branches; next 1.10; 1.10 date 2000.01.01.13.05.18; author rse; state Exp; branches; next 1.9; 1.9 date 99.12.27.19.20.38; author rse; state Exp; branches; next 1.8; 1.8 date 99.12.27.18.40.58; author rse; state Exp; branches; next 1.7; 1.7 date 99.12.26.15.37.34; author rse; state Exp; branches; next 1.6; 1.6 date 99.12.26.14.45.12; author rse; state Exp; branches; next 1.5; 1.5 date 99.12.25.20.57.23; author rse; state Exp; branches; next 1.4; 1.4 date 99.12.25.18.33.25; author rse; state Exp; branches; next 1.3; 1.3 date 99.12.25.18.23.02; author rse; state Exp; branches; next 1.2; 1.2 date 99.11.23.08.57.35; author rse; state Exp; branches; next 1.1; 1.1 date 99.11.22.17.29.12; author rse; state Exp; branches 1.1.1.1; next ; 1.1.1.1 date 99.11.22.17.29.12; author rse; state Exp; branches; next ; desc @@ 1.20 log @Adjusted copyright messages for new year 2004/2005. @ text @/* ** OSSP str - String Handling ** Copyright (c) 1999-2005 Ralf S. Engelschall ** Copyright (c) 1999-2005 The OSSP Project ** ** This file is part of OSSP str, a string handling and manipulation ** library which can be found at http://www.ossp.org/pkg/lib/str/. ** ** Permission to use, copy, modify, and distribute this software for ** any purpose with or without fee is hereby granted, provided that ** the above copyright notice and this permission notice appear in all ** copies. ** ** THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED ** WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. ** IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR ** CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF ** USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, ** OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT ** OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ** SUCH DAMAGE. ** ** str_search.c: searching functions */ #include "str_p.h" /* * str_span -- span over a set of character in a string. * This is a spanning function inspired by POSIX strchr(3) and * strrchr(3). But it is more flexible, because it provides * left-to-right and right-to-left spanning combined with either * performing positive or negative charsets. Additionally it allows one * to restrict the number of maximum spanned characters. */ char *str_span(const char *s, str_size_t m, const char *charset, int mode) { register const char *sp; register const char *cp; register char sc, cc; register int n; char *rv; if (s == NULL || charset == NULL) return NULL; n = m; if (n == 0) str_ilen(n, s); rv = NULL; if (!(mode & STR_RIGHT) && !(mode & STR_COMPLEMENT)) { /* span from left to right while chars are in charset */ sp = s; l1: sc = *sp++; n--; if (sc != NUL && n >= 0) { for (cp = charset; (cc = *cp++) != NUL; ) if (sc == cc) goto l1; } rv = (char *)(sp - 1); } else if ((mode & STR_RIGHT) && !(mode & STR_COMPLEMENT)) { /* span from right to left while chars are in charset */ sp = s; while (*sp != NUL && n > 0) sp++, n--; if (sp > s) sp--; l2: sc = *sp--; if (sp >= s) { for (cp = charset; (cc = *cp++) != NUL; ) if (sc == cc) goto l2; } rv = (char *)(sp + 1); } else if (!(mode & STR_RIGHT) && (mode & STR_COMPLEMENT)) { /* span from left to right while chars are NOT in charset */ sp = s; l3a: sc = *sp++; n--; if (sc != NUL && n >= 0) { for (cp = charset; (cc = *cp++) != NUL; ) if (sc == cc) goto l3; goto l3a; } l3: rv = (char *)(sp - 1); } else if ((mode & STR_RIGHT) && (mode & STR_COMPLEMENT)) { /* span from right to left while chars are NOT in charset */ sp = s; while (*sp != NUL && n > 0) sp++, n--; if (sp > s) sp--; l4a: sc = *sp--; if (sp >= s) { for (cp = charset; (cc = *cp++) != NUL; ) if (sc == cc) goto l4; goto l4a; } l4: rv = (char *)(sp + 1); } return rv; } /* * str_locate - locate a string in another one. * This is an optimized version of SUNDAY's Quick Search algorithm * which it turn is a simplification of the Boyer-Moore text searching * algorithm. It is very fast in practice for short patterns and long * alphabets, because the bad-character shift used in the Boyer-Moore * algorithm is not very efficient for small alphabets. But when the * alphabet is large compared with the length of the pattern, as it is * often the case with the ASCII table and ordinary searches made under * a text editor or similar applications, it becomes very useful. Using * it alone produces a very efficient algorithm in practice. It has a * quadratic worst-case time complexity but a good practical behaviour. * The implementation below was speed-optimized, too. */ char *str_locate(const char *ay, str_size_t n, const char *ax) { int qs_bc[256 /* 2^8 */]; register const unsigned char *x = (const unsigned char *)ax; register const unsigned char *y = (const unsigned char *)ay; register int m; register int i; register const unsigned char *tx; register const unsigned char *ty; int tm; /* * Consistency checks */ if (y == NULL || x == NULL) return NULL; if (*x == NUL) return (char *)y; /* * Preprocessing */ /* determine length of strings */ str_ilen(m, x); if (n == 0) str_ilen(n, y); /* fill whole bad char skip array */ for (i = 0; i < (sizeof(qs_bc)/sizeof(int)); i++) qs_bc[(int)i] = m+1; /* override positions related to pattern */ for (i = 0; i < m; i++) qs_bc[(int)*(x+i)] = m-i; /* * Searching */ /* search inside a len-terminated string */ while (n >= m) { /* compare first char (speed-up) */ if (*y == *x) { /* compare remaining chars */ ty = y+1; tx = x+1; tm = m-1; do { if (tm-- == 0) return (char *)y; /* found pattern */ } while (*ty++ == *tx++); } i = qs_bc[(int)*(y+m)]; /* perform shift */ y += i; n -= i; } return NULL; } @ 1.19 log @- adjust copyright messages for new year 2003 - strip trailing whitespaces - consistently use OSSP ASCII-art - add standard devtool.conf stuff from OSSP sa @ text @d3 2 a4 2 ** Copyright (c) 1999-2003 Ralf S. Engelschall ** Copyright (c) 1999-2003 The OSSP Project @ 1.18 log @finally switch to full OSSP branding @ text @d3 2 a4 2 ** Copyright (c) 1999-2002 Ralf S. Engelschall ** Copyright (c) 1999-2002 The OSSP Project d6 1 a6 1 ** This file is part of OSSP str, a string handling and manipulation d31 1 a31 1 d56 1 a56 1 sp = s; d84 1 a84 1 sp = s; d150 1 a150 1 /* d156 1 a156 1 if (n == 0) d160 1 a160 1 for (i = 0; i < (sizeof(qs_bc)/sizeof(int)); i++) d164 1 a164 1 for (i = 0; i < m; i++) d166 2 a167 2 /* @ 1.17 log @bump copyright year @ text @d2 1 a2 1 ** Str - String Library d4 1 d6 2 a7 2 ** This file is part of Str, a string handling and manipulation ** library which can be found at http://www.engelschall.com/sw/str/. @ 1.16 log @Adjust copyright for year 2001. @ text @d3 1 a3 1 ** Copyright (c) 1999-2001 Ralf S. Engelschall @ 1.15 log @*** empty log message *** @ text @d3 1 a3 1 ** Copyright (c) 1999-2000 Ralf S. Engelschall @ 1.14 log @*** empty log message *** @ text @d39 1 a39 1 char *str_span(const char *s, str_size_t n, const char *charset, int mode) d44 1 d49 1 @ 1.13 log @*** empty log message *** @ text @d49 1 a49 1 n = str_len(s); d142 1 a142 1 if (ay == NULL || ax == NULL) d144 2 a145 2 if (*ax == NUL) return (char *)ay; d151 4 a154 13 /* length of y [inlined n = str_len(y)] */ if (n == 0) { ty = y; while (*ty++ != NUL) /*nop*/; n = (ty-y-1); } /* length of x [inlined m = str_len(x)] */ tx = x; while (*tx++ != NUL) /*nop*/; m = (tx-x-1); @ 1.12 log @*** empty log message *** @ text @d151 1 a151 1 /* length of y [inlined n = strlen(y)] */ d159 1 a159 1 /* length of x [inlined m = strlen(x)] */ @ 1.11 log @*** empty log message *** @ text @a137 1 int nulterm; d152 1 a152 3 nulterm = TRUE; if (n > 0) { nulterm = FALSE; d166 1 a166 1 for (i = 0; i < sizeof(qs_bc); i++) d177 12 a188 34 if (nulterm) { /* search inside a nul-terminated string */ while ((i = *(y+m)) != NUL) { /* compare first char (speed-up) */ if (*y == *x) { /* compare remaining chars */ ty = y+1; tx = x+1; tm = m-1; do { if (tm-- == 0) return (char *)y; /* found pattern */ } while (*ty++ == *tx++); } y += qs_bc[i]; /* perform shift */ } } else { /* search inside a len-terminated string */ while (n >= m) { /* compare first char (speed-up) */ if (*y == *x) { /* compare remaining chars */ ty = y+1; tx = x+1; tm = m-1; do { if (tm-- == 0) return (char *)y; /* found pattern */ } while (*ty++ == *tx++); } i = qs_bc[(int)*(y+m)]; /* perform shift */ y += i; n -= i; d190 3 d194 1 @ 1.10 log @*** empty log message *** @ text @d72 1 a72 1 if (sp >= (s-1)) { d102 1 a102 1 if (sp >= (s-1)) { @ 1.9 log @*** empty log message *** @ text @d3 1 a3 1 ** Copyright (c) 1999 Ralf S. Engelschall @ 1.8 log @*** empty log message *** @ text @a127 1 #define ASIZE 256 d130 1 a130 1 int qs_bc[ASIZE]; d169 1 a169 1 for (i = 0; i < ASIZE; i++) @ 1.7 log @*** empty log message *** @ text @d33 2 a34 3 * * This is a spanning function influenced by POSIX strchr(3) * and strrchr(3). But it is more flexible, because it provides d36 1 a36 1 * peforming positive or negative charsets. Additionally it allows one d115 1 a126 10 * * References: * o CROCHEMORE, M., LECROQ, T., 1996, Pattern matching and text * compression algorithms, In CRC Computer Science and Engineering Handbook, * A. Tucker ed., Chapter 8, pp 162-202, CRC Press Inc., Boca Raton, FL. * o LECROQ, T., 1995, Experimental results on string matching algorithms, * Softw.-Pract. & Exp. 25(7):727-765. * o STEPHEN, G.A., 1994, String Searching Algorithms, World Scientific. * o SUNDAY, D.M., 1990, A very fast substring search algorithm. Commun. ACM. * 33(8):132-142. @ 1.6 log @*** empty log message *** @ text @d47 1 a47 1 if (s == NULL || n == 0 || charset == NULL) @ 1.5 log @*** empty log message *** @ text @d139 1 a139 1 char *str_locate_qs(const char *ay, str_size_t n, const char *ax) a228 70 #ifdef TEST #include #include #include char *str_locate_strstr(const char *big, str_size_t n, const char *little) { return strstr(big, little); } struct { char *name; char *(*func)(const char *, str_size_t, const char *); } tab1[] = { { "STRSTR", str_locate_strstr }, { "OBF", str_locate_obf }, { "QS", str_locate_qs }, { NULL, NULL } }; char *readfile(char *file) { FILE *fp; int nBuf; char *cpBuf; fp = fopen(file, "r"); fseek(fp, 0, SEEK_END); nBuf = ftell(fp); cpBuf = (char *)malloc(sizeof(char) * nBuf+1+20); fseek(fp, 0, SEEK_SET); fread(cpBuf, nBuf, 1, fp); cpBuf[nBuf] = '\0'; fclose(fp); return cpBuf; } int main(int argc, char *argv[]) { char *cp; char *rc; int i, j; struct timeval tv1, tv2; double td; cp = readfile("/usr/share/dict/words"); printf("bytes=%d\n", str_len(cp)); sleep(1); for (i = 0; tab1[i].name != NULL; i++) { gettimeofday(&tv1, NULL); rc = tab1[i].func(cp, 0, "aalii"); rc = tab1[i].func(cp, 0, "abacate"); rc = tab1[i].func(cp, 0, "abiogenesist"); rc = tab1[i].func(cp, 0, "Acalepha"); rc = tab1[i].func(cp, 0, "absorbedness"); rc = tab1[i].func(cp, 0, "Nepidae"); rc = tab1[i].func(cp, 0, "Zyrian"); rc = tab1[i].func(cp, 0, "Zyzzogeton"); gettimeofday(&tv2, NULL); printf("%p\n", rc); td = ((double)(tv2.tv_sec*1000000 + tv2.tv_usec) / 1000000) - ((double)(tv1.tv_sec*1000000 + tv1.tv_usec) / 1000000); printf("%10s %.6fs\n", tab1[i].name, td); } return 0; } #endif @ 1.4 log @*** empty log message *** @ text @d32 7 a38 1 * str_span -- span over a set of character in a string d49 2 a115 113 * str_locate -- locate a substring in a string * * This is a strstr(3) compatible implementation that beats most other * string searching algorithms. It was originally written by Stephen R. * van den Berg who originally * commented it this way: * * ``Until someone tells me otherwise, I assume that this is the * fastest implementation of strstr() in C. I deliberately chose not * to comment it. You should have at least as much fun trying to * understand it, as I had to write it :-)''. * * Nevertheless I (RSE) have commented it now for the Str library and * cleaned it up a little bit more. The interesting aspect of this * implementation is the fact that it is mainly just a highly optimized * brute-force text searching algorithm. But as Robert Sedgewick already * explained in his book ``Algorithms'' (2nd ed, p.278): * * ``While it [the brute-force algorithm] has a worst-case running * time proportional to M*N [M = length of pattern, N = length of * string], the strings which arise in many applications lead to * a running time which is virtually always proportional to M+N. * Furthermore, it is well suited to good architectural features * on most computer systems, so an optimized version provides a * "standard" which is difficult to beat with a clever algorithm.'' * * And that is also what I observed here. I did run-time comparisons * between van den Berg's algorithm and various "clever" * algorithms (like Tuned Boyer-More, Quick Search, etc.) from the * comprehensive paper ``Exact String Matching Algorithms'' (see * http://topaze.dir.univ-rouen.fr/~charras/string/). And the result was * that van den Berg's algorithm was always faster than the competitors * and often (for the complex ones of the "clever" algorithms) still * more intuitive (although it isn't at the first glance). */ char *str_locate_obf(const char *phaystack, const char *pneedle) { register const unsigned char *haystack = (const unsigned char *)phaystack; register const unsigned char *needle = (const unsigned char *)pneedle; register const unsigned char *rhaystack; register const unsigned char *rneedle; register unsigned char a, b, c; b = *needle; if (b != NUL) { haystack--; do { c = *++haystack; if (c == NUL) goto failed; } while (c != b); c = *++needle; if (c == NUL) goto foundneedle; needle++; goto jin; for (;;) { do { a = *++haystack; if (a == NUL) goto failed; if (a == b) break; a = *++haystack; if (a == NUL) goto failed; shloop: a = a; /* dummy operation, just to make gcc happy after label -- RSE */ } while (a != b); jin: a = *++haystack; if (a == NUL) goto failed; if (a != c) goto shloop; rhaystack = haystack-- + 1; rneedle = needle; a = *rneedle; if (*rhaystack == a) { do { if (a == NUL) goto foundneedle; ++rhaystack; a = *++needle; if (*rhaystack != a) break; if (a == NUL) goto foundneedle; ++rhaystack; a = *++needle; } while (*rhaystack == a); } needle = rneedle; /* took the register-poor aproach */ if (a == NUL) break; } } foundneedle: return (char *)haystack; failed: return NULL; } /* d139 1 a139 1 char *str_locate_qs(const char *ay, const char *ax) d149 1 d163 11 a173 1 /* inlined m = strlen(x) */ d190 35 a224 11 while (*(y+m) != NUL) { /* compare first char (speed-up) */ if (*y == *x) { /* compare remaining chars */ ty = y+1; tx = x+1; tm = m-1; do { if (tm-- == 0) return (char *)y; /* found pattern */ } while (*ty++ == *tx++); a225 1 y += qs_bc[(int)*(y+m)]; /* perform shift */ a226 1 d232 1 d236 5 d243 1 a243 1 char *(*func)(const char *, const char *); d245 3 a247 3 { "STRSTR", strstr }, { "OBF", str_locate_obf }, { "QS", str_locate_qs }, d281 8 a288 8 rc = tab1[i].func(cp, "aalii"); rc = tab1[i].func(cp, "abacate"); rc = tab1[i].func(cp, "abiogenesist"); rc = tab1[i].func(cp, "Acalepha"); rc = tab1[i].func(cp, "absorbedness"); rc = tab1[i].func(cp, "Nepidae"); rc = tab1[i].func(cp, "Zyrian"); rc = tab1[i].func(cp, "Zyzzogeton"); @ 1.3 log @*** empty log message *** @ text @d29 1 a29 6 #include #include #include #include #include "str.h" d303 4 @ 1.2 log @*** empty log message *** @ text @d1 3 a3 5 /* ** str_search.c -- String Searching ** ** ==================================================================== ** Copyright (c) 1999 Ralf S. Engelschall. All rights reserved. d5 2 a6 3 ** Redistribution and use in source and binary forms, with or without ** modification, are permitted provided that the following conditions ** are met: d8 4 a11 2 ** 1. Redistributions of source code must retain the above copyright ** notice, this list of conditions and the following disclaimer. d13 12 a24 4 ** 2. Redistributions in binary form must reproduce the above copyright ** notice, this list of conditions and the following disclaimer in ** the documentation and/or other materials provided with the ** distribution. d26 1 a26 13 ** THIS SOFTWARE IS PROVIDED BY RALF S. ENGELSCHALL ``AS IS'' AND ANY ** EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR ** PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RALF S. ENGELSCHALL OR ** ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ** HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ** STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ** OF THE POSSIBILITY OF SUCH DAMAGE. ** ==================================================================== d29 5 d113 1 a113 1 * str_find -- locate a substring in a string d116 31 a146 6 * algorithms. It was originally written by Stephen R. van den Berg * . He commented it this way: * ``Until someone tells me otherwise, I assume that this is the fastest * implementation of strstr() in C. I deliberately chose not to comment * it. You should have at least as much fun trying to understand it, as * I had to write it :-).'' d148 1 a148 1 char *str_find(const char *phaystack, const char *pneedle) d182 1 a182 1 a = a; /* dummy operation, just to make gcc happy -- RSE */ d224 142 @ 1.1 log @Initial revision @ text @d80 1 d82 1 a82 1 while (sc != NUL && n >= 0) { d86 1 d96 3 d100 1 a100 1 while (sp >= (s-1)) { d104 1 @ 1.1.1.1 log @ @ text @@