head 1.8; access; symbols str_init:1.1.1.1 str:1.1.1; locks; strict; comment @ * @; 1.8 date 99.11.23.08.57.35; author rse; state dead; branches; next 1.7; 1.7 date 99.11.22.20.27.11; author rse; state Exp; branches; next 1.6; 1.6 date 99.11.22.17.38.43; author rse; state Exp; branches; next 1.5; 1.5 date 99.11.22.17.38.28; author rse; state Exp; branches; next 1.4; 1.4 date 99.11.22.17.38.14; author rse; state Exp; branches; next 1.3; 1.3 date 99.11.22.17.37.55; author rse; state Exp; branches; next 1.2; 1.2 date 99.11.22.17.30.40; author rse; state Exp; branches; next 1.1; 1.1 date 99.11.22.17.29.12; author rse; state Exp; branches 1.1.1.1; next ; 1.1.1.1 date 99.11.22.17.29.12; author rse; state Exp; branches; next ; desc @@ 1.8 log @*** empty log message *** @ text @/* ** str_match.c -- String Matching ** ** ==================================================================== ** Copyright (c) 1999 Ralf S. Engelschall. All rights reserved. ** ** Redistribution and use in source and binary forms, with or without ** modification, are permitted provided that the following conditions ** are met: ** ** 1. Redistributions of source code must retain the above copyright ** notice, this list of conditions and the following disclaimer. ** ** 2. Redistributions in binary form must reproduce the above copyright ** notice, this list of conditions and the following disclaimer in ** the documentation and/or other materials provided with the ** distribution. ** ** THIS SOFTWARE IS PROVIDED BY RALF S. ENGELSCHALL ``AS IS'' AND ANY ** EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR ** PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RALF S. ENGELSCHALL OR ** ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ** HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ** STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ** OF THE POSSIBILITY OF SUCH DAMAGE. ** ==================================================================== */ #include "str.h" #include "str_config.h" #ifdef STR_PCRE #include #endif #include #ifdef STR_PCRE /* compile a regular expression pattern from string into internal format */ static int pattern_compile( const char *ptr, int len, int opt, pcre **p_pcre, pcre_extra **p_pcre_extra) { const char *err_str; char buf[128]; int err_pos; char *cp; if (ptr[len] == NUL) { /* plain string, so we can speed up processing... */ *p_pcre = pcre_compile(ptr, opt, &err_str, &err_pos, NULL); } else { /* ...else we have to create a temporary NUL-terminated string */ if (len < sizeof(buf)) { /* either we use a local buffer to avoid malloc/free ping-pong... */ memcpy(buf, ptr, len); buf[len] = NUL; *p_pcre = pcre_compile(buf, opt, &err_str, &err_pos, NULL); } else { /* ...or we have to actually allocate a memory chunk :-( */ if ((cp = malloc(len+1)) == NULL) return FALSE; memcpy(cp, ptr, len); cp[len] = NUL; *p_pcre = pcre_compile(cp, opt, &err_str, &err_pos, NULL); free(cp); } } if (*p_pcre == NULL) return FALSE; /* optionally study pattern */ if (p_pcre_extra != NULL) { *p_pcre_extra = pcre_study(*p_pcre, 0, &err_str); if (err_str != NULL) { free(p_pcre); return FALSE; } } return TRUE; } /* the hash table entry in the pattern cache */ struct hash_entry { struct hash_entry *next; char *key; int keylen; pcre *p_pcre; pcre_extra *p_pcre_extra; }; /* size of the cache hash table; is prime */ #define HASH_SIZE 101 /* the pattern cache hash table */ static struct hash_entry *pattern_hash[HASH_SIZE]; /* initialization flag for hash table */ static int hash_initialized = FALSE; /* initialize cache hash table */ static void hash_init(void) { int i; for (i = 0; i < HASH_SIZE; i++) pattern_hash[i] = NULL; return; } /* destroy cache hash table */ static void hash_destroy(void) { int i; struct hash_entry *he, *ohe; for (i = 0; i < HASH_SIZE; i++) { he = pattern_hash[i]; pattern_hash[i] = NULL; while (he != NULL) { ohe = he; he = he->next; free(ohe); } } return; } /* the hashing function: a popular `times 33' hash */ static unsigned int hash_func( const char *key, int keylen) { unsigned int h; int i; h = 0xDEAD; for (i = 0; key[i] != NUL; i++) h = ((((h<<5)+h)+key[i]) % HASH_SIZE); return h; } /* cache a pattern */ static void pattern_cache( const char *key, int keylen, pcre *p_pcre, pcre_extra *p_pcre_extra) { int h; struct hash_entry *he, *che; if ((he = malloc(sizeof(struct hash_entry))) == NULL) return; if ((he->key = malloc(keylen)) == NULL) { free(he); return; } he->next = NULL; memcpy(he->key, key, keylen); he->keylen = keylen; he->p_pcre = p_pcre; he->p_pcre_extra = p_pcre_extra; h = hash_func(key, keylen); if (pattern_hash[h] == NULL) pattern_hash[h] = he; else { che = pattern_hash[h]; while (che->next != NULL) che = che->next; che->next = he; } return; } /* lookup a pattern */ static void pattern_lookup( const char *key, int keylen, pcre **p_pcre, pcre_extra **p_pcre_extra) { int h; struct hash_entry *he; *p_pcre = NULL; *p_pcre_extra = NULL; h = hash_func(key, keylen); if ((he = pattern_hash[h]) == NULL) return; while (he->next != NULL) { if (he->keylen == keylen) if (memcmp(he->key, key, keylen)) break; he = he->next; } *p_pcre = he->p_pcre; *p_pcre_extra = he->p_pcre_extra; return; } #endif /* STR_PCRE */ /* the API matching function */ int str_match(const char *string, const char *pattern, ...) { #ifndef STR_PCRE return (-1); #else pcre *p_pcre; pcre_extra *p_pcre_extra; const char *match_ptr; int match_len; int match_opt; int match_once; int match_1resbuf; int *cap_vec; int cap_num; int cap_len; char *cp; char **cpp; int n; int i; int k; int l; va_list ap; int ismop; /* hash table initialization */ if (!hash_initialized) { hash_init(); atexit(hash_destroy); hash_initialized = TRUE; } /* hash table destruction */ if (string == NULL && pattern == NULL) { hash_destroy(); return 0; } /* check input parameters */ if (string == NULL || pattern == NULL) return -1; /* parse pattern */ match_ptr = NULL; match_len = 0; match_opt = 0; match_once = FALSE; match_1resbuf = FALSE; ismop = FALSE; cp = NULL; if (*pattern == 'm' && strlen(pattern) >= 3) if ((cp = str_span(pattern, STR_FULL, "imsxob", STR_RIGHT)) > pattern+1) if (*(pattern+1) == *cp) ismop = TRUE; if (ismop) { match_ptr = pattern + 2; match_len = cp - match_ptr; cp++; for (i = 0; cp[i] != NUL; i++) { switch (cp[i]) { case 'i': match_opt |= PCRE_CASELESS; break; case 'm': match_opt |= PCRE_MULTILINE; break; case 's': match_opt |= PCRE_DOTALL; break; case 'x': match_opt |= PCRE_EXTENDED; break; case 'o': match_once = TRUE; break; case 'b': match_1resbuf = TRUE; break; default: return -1; } } } else { match_ptr = pattern; match_len = strlen(pattern); } /* compile pattern into internal PCRE structure */ if (match_once) { /* optimized processing: up to factor 15(!) for complex regular expressions */ pattern_lookup(match_ptr, match_len, &p_pcre, &p_pcre_extra); if (p_pcre == NULL) { if (!pattern_compile(match_ptr, match_len, match_opt, &p_pcre, &p_pcre_extra)) return -1; pattern_cache(match_ptr, match_len, p_pcre, p_pcre_extra); } } else { /* unoptimized processing */ p_pcre_extra = NULL; if (!pattern_compile(match_ptr, match_len, match_opt, &p_pcre, NULL)) return -1; } /* allocate storage for offset table of captured substrings */ cap_vec = NULL; cap_len = 0; cap_num = pcre_info(p_pcre, NULL, NULL); if (cap_num > 0) { cap_len = (cap_num+1)*3; if ((cap_vec = malloc(cap_len*sizeof(int))) == NULL) { if (!match_once) { free(p_pcre); free(p_pcre_extra); } return -1; } } /* perform the matching */ n = pcre_exec(p_pcre, p_pcre_extra, string, strlen(string), 0, 0, cap_vec, cap_len); /* error cases */ if (n < 0) { if (cap_vec != NULL) free(cap_vec); if (!match_once) { free(p_pcre); free(p_pcre_extra); } if (n == PCRE_ERROR_NOMATCH) return 0; return -1; } /* extract captured substrings into caller provided pointer variables */ if (cap_num > 0) { va_start(ap, pattern); if (match_1resbuf) { /* use a single result buffer */ l = 0; for (i = 1; i <= cap_num && i <= (n-1); i++) { if (cap_vec[(i*2)] != -1 && cap_vec[(i*2)+1] != -1) { k = (cap_vec[(i*2)+1] - cap_vec[(i*2)]); if (k > 0) l += k+1; } } cpp = va_arg(ap, char **); if (cpp == NULL) cpp = &cp; if ((*cpp = malloc(l)) != NULL) { cp = *cpp; for (i = 1; i <= cap_num; i++) { cpp = va_arg(ap, char **); if (cpp != NULL) { if (i <= (n-1)) { if (cap_vec[(i*2)] != -1 && cap_vec[(i*2)+1] != -1) { k = (cap_vec[(i*2)+1] - cap_vec[(i*2)]); if (k > 0) { memcpy(cp, (char *)(string+cap_vec[(i*2)]), k); cp += k; *cp++ = NUL; continue; } } } *cpp = cp; *cp++ = NUL; } } } } else { /* use multiple result buffers */ for (i = 1; i <= cap_num; i++) { cpp = va_arg(ap, char **); if (cpp != NULL) { if (i <= (n-1)) { if (cap_vec[(i*2)] != -1 && cap_vec[(i*2)+1] != -1) { k = (cap_vec[(i*2)+1] - cap_vec[(i*2)]); if (k > 0) { if ((*cpp = malloc(k+1)) != NULL) { memcpy(*cpp, (char *)(string+cap_vec[(i*2)]), k); (*cpp)[k] = NUL; continue; } } } } *cpp = strdup(""); } } } va_end(ap); } /* cleanup */ if (cap_vec != NULL) free(cap_vec); if (!match_once) { free(p_pcre); free(p_pcre_extra); } /* return success */ return 1; #endif /* STR_PCRE */ } @ 1.7 log @*** empty log message *** @ text @@ 1.6 log @*** empty log message *** @ text @d273 1 a273 1 if ((cp = str_span(pattern, STR_FULL, "imsxo", STR_RIGHT)) > pattern+1) @ 1.5 log @*** empty log message *** @ text @a272 1 /* if ((cp = str_rspn(pattern, "imsxo")) > pattern+1) */ @ 1.4 log @*** empty log message *** @ text @d143 1 a143 1 /* the hashing function: the popular `times 33' hash */ @ 1.3 log @*** empty log message *** @ text @d83 1 d93 1 @ 1.2 log @*** empty log message *** @ text @d55 1 d61 1 a61 2 if ((*p_pcre = pcre_compile(ptr, opt, &err_str, &err_pos, NULL)) == NULL) return FALSE; d65 15 a79 8 if ((cp = malloc(len+1)) == NULL) return FALSE; memcpy(cp, ptr, len); cp[len] = NUL; *p_pcre = pcre_compile(cp, opt, &err_str, &err_pos, NULL); free(cp); if (*p_pcre == NULL) return FALSE; d81 2 @ 1.1 log @Initial revision @ text @a291 1 fprintf(stderr, "hash lookup: %s\n", p_pcre == NULL ? "missed" : "found"); @ 1.1.1.1 log @ @ text @@