head	1.10;
access;
symbols
	STR_0_9_12:1.10
	LMTP2NNTP_1_4_1:1.10
	LMTP2NNTP_1_4_0:1.10
	STR_0_9_11:1.10
	STR_0_9_10:1.10
	LMTP2NNTP_1_3_0:1.9
	LMTP2NNTP_1_3b2:1.9
	LMTP2NNTP_1_3b1:1.9
	LMTP2NNTP_1_3a3:1.9
	LMTP2NNTP_1_3a2:1.9
	STR_0_9_9:1.9
	LMTP2NNTP_1_3a1:1.9
	STR_0_9_8:1.9
	LMTP2NNTP_1_2_0:1.9
	LMTP2NNTP_1_2b4:1.9
	LMTP2NNTP_1_2b3:1.9
	LMTP2NNTP_1_2b2:1.9
	LMTP2NNTP_1_2b1:1.9
	LMTP2NNTP_1_2a8:1.9
	LMTP2NNTP_1_2a7:1.9
	LMTP2NNTP_1_2a6:1.8
	LMTP2NNTP_1_2a5:1.8
	STR_0_9_7:1.8
	LMTP2NNTP_1_2a4:1.7
	LMTP2NNTP_1_2a3:1.7
	OSSP_RC_SPEC:1.7
	LMTP2NNTP_1_2a1:1.6
	LMTP2NNTP_1_1_1:1.6
	LMTP2NNTP_1_1_0:1.6
	LMTP2NNTP_1_1b4:1.6
	LMTP2NNTP_1_1b3:1.6
	LMTP2NNTP_1_1b2:1.6
	LMTP2NNTP_1_1b1:1.6
	STR_0_9_6:1.6
	STR_0_9_5:1.6
	STR_0_9_4:1.5
	STR_0_9_3:1.5
	STR_0_9_2:1.5
	STR_0_9_1:1.5
	STR_0_9_0:1.4;
locks; strict;
comment	@ * @;


1.10
date	2005.01.24.15.22.19;	author rse;	state Exp;
branches;
next	1.9;

1.9
date	2003.01.06.19.13.48;	author rse;	state Exp;
branches;
next	1.8;

1.8
date	2002.04.01.08.32.55;	author rse;	state Exp;
branches;
next	1.7;

1.7
date	2002.01.02.17.09.14;	author rse;	state Exp;
branches;
next	1.6;

1.6
date	2001.08.16.13.21.23;	author rse;	state Exp;
branches;
next	1.5;

1.5
date	2000.01.04.19.42.00;	author rse;	state Exp;
branches;
next	1.4;

1.4
date	2000.01.01.13.05.18;	author rse;	state Exp;
branches;
next	1.3;

1.3
date	99.12.27.18.40.58;	author rse;	state Exp;
branches;
next	1.2;

1.2
date	99.12.27.14.02.40;	author rse;	state Exp;
branches;
next	1.1;

1.1
date	99.12.27.13.43.06;	author rse;	state Exp;
branches;
next	;


desc
@@


1.10
log
@Adjusted copyright messages for new year 2004/2005.
@
text
@/*
**  OSSP str - String Handling
**  Copyright (c) 1999-2005 Ralf S. Engelschall <rse@@engelschall.com>
**  Copyright (c) 1999-2005 The OSSP Project <http://www.ossp.org/>
**
**  This file is part of OSSP str, a string handling and manipulation
**  library which can be found at http://www.ossp.org/pkg/lib/str/.
**
**  Permission to use, copy, modify, and distribute this software for
**  any purpose with or without fee is hereby granted, provided that
**  the above copyright notice and this permission notice appear in all
**  copies.
**
**  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
**  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
**  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
**  IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
**  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
**  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
**  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
**  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
**  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
**  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
**  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
**  SUCH DAMAGE.
**
**  str_token.c: tokenizing functions
*/

#include "str_p.h"

/*
 * str_token -- tokenize a string.
 * This is the implementation of our tokenization function str_token(3).
 * It is partly derived from an achient strqtok(3) function, written
 * 1991 by William Deich <will@@surya.caltech.edu> which itself was
 * already a superset of POSIX strtok(3). The main differences between
 * our str_token(3) and a plain POSIX strtok(3) is that our str_token(3)
 * is reentrant to support multithreading environments, supports
 * quoted tokens, can ignore trailing comments and is aware of ANSI-C
 * backslashed escape sequences and trigraphs. This way it is a lot
 * more powerful and useful in practice than a stock POSIX strtok(3) or
 * similar functions.
 */

/*
 * isoneof -- check whether c is one of the chars in c
 */
static int
isoneof(
    register char c,
    register const char *s)
{
    for (; *s != NUL; s++)
        if (*s == c)
            return TRUE;
    return FALSE;
}

/*
 * nextchar -- get next character from a string
 */
static char *
nextchar(
    register char *s,  /* string from which to collect character(s) */
    register char *c,  /* return character here */
    int bTrigraphs,    /* whether to interpret trigraphs according to ANSI rules */
    int *bBackslashed) /* return FALSE if character was started with a backslash */
{
    register char ch;

    if ((*bBackslashed = (*s == '\\'))) {
        /*
         * ANSI C backslashed escape sequence ("\x")
         */
        switch (*++s) {
            case 'a':  *c = '\a'; break;
            case 'b':  *c = '\b'; break;
            case 'f':  *c = '\f'; break;
            case 'n':  *c = '\n'; break;
            case 'r':  *c = '\r'; break;
            case 't':  *c = '\t'; break;
            case 'v':  *c = '\v'; break;
            case '\\': *c = '\\'; break;
            case '^':  *c = '^';  break;
            case '\'': *c = '\''; break;
            case '"':  *c = '"';  break;
            case '?':  *c = '?';  break;
            case '0': case '1': case '2':
            case '3': case '4': case '5':
            case '6': case '7':
                /* convert octal digits into number */
                ch = 0;
                if (str_isdigit(*s) && *s != '8' && *s != '9') {
                    ch = *s++ - '0';
                    if (str_isdigit(*s) && *s != '8' && *s != '9') {
                        ch <<= 3;
                        ch |= *s++ - '0';
                        if (str_isdigit(*s) && *s != '8' && *s != '9') {
                            ch <<= 3;
                            ch |= *s++ - '0';
                        }
                    }
                }
                s--;
                *c = ch;
                break;
            case 'x':
                /* convert hexadecimal digits into number */
                s++;
                for (ch = 0; str_isxdigit(*s); s++) {
                    ch <<= 4;
                    ch |= str_isdigit(*s) ? *s - '0' :
                          str_islower(*s) ? *s + 10 - 'a' : *s + 10 - 'A';
                }
                s--;
                *c = ch;
                break;
            default:
                *c = *s;
                break;
        }
    }
    else if (bTrigraphs && (*s == '?') && (*(s + 1) == '?')) {
        /*
         * ANSI C trigraph ("??x")
         */
        switch (*(s + 2)) {
            case '=':  *c = '#';  s += 2; break;
            case '(':  *c = '[';  s += 2; break;
            case '/':  *c = '\\'; s += 2; break;
            case ')':  *c = ']';  s += 2; break;
            case '\'': *c = '^';  s += 2; break;
            case '<':  *c = '{';  s += 2; break;
            case '!':  *c = '|';  s += 2; break;
            case '>':  *c = '}';  s += 2; break;
            case '-':  *c = '~';  s += 2; break;
            default:
                /* not a trigraph sequence */
                *c = *s;
        }
        *c = *s;
    }
    else {
        /*
         * Ordinary Character
         */
        *c = *s;
    }
    return (*s != NUL) ? s + 1 : NULL;
}

/*
 * str_token -- the API tokenization function
 */
char *
str_token(
    char **s,
    const char *cs_delim,
    const char *cs_quote,
    const char *cs_comment,
    int mode)
{
    register char *p, *q;
    int bBackslashed, bInQuote, bInToken, bWithTrigraphs;
    char c, cLeftQuote;
    char *cpToken;

    /* argument checking */
    if (s == NULL || *s == NULL)
        return NULL;
    if (cs_delim == NULL)
        return NULL;
    if (cs_quote == NULL)
        cs_quote = "";
    if (cs_comment == NULL)
        cs_comment = "";

    /* skip leading delimiters */
    p = *s;
    while (*p != NUL && isoneof(*p, cs_delim))
        p++;

    /* end of string, so stop parsing */
    if (*p == NUL)
        return NULL;

    /*
     * start of comment reached, so stop parsing but update the parsing
     * cursor just in case the user wants to recover the comment
     */
    if (isoneof(*p, cs_comment)) {
        *s = p;
        (*s)++;
        return NULL;
    }

    /*
     * Set `cpToken' to point to returned string.
     * Then use p and q to walk through the string:
     *  - p will follow the input characters;
     *  - q will overwrite string with output characters,
     *      (minus possibly-stripped quotes and including NULs after tokens)
     */

    cpToken  = q = p;
    bInQuote = FALSE;
    bInToken = TRUE;
    bWithTrigraphs = (mode & STR_TRIGRAPHS);
    cLeftQuote = NUL;

    if ((mode & STR_BACKSLASHESC) || (mode & STR_TRIGRAPHS)) {
        /*
         * parse while recognizing backslash escapes
         */
        while (bInToken && (p = nextchar(p, &c, bWithTrigraphs, &bBackslashed)) != NULL) {
            if (bBackslashed) {
                /* treat as plain character */
                *q++ = c;
            }
            else if (!bInQuote && *cs_delim != NUL && isoneof(c, cs_delim)) {
                /* reached end of token */
                *q = NUL;
                bInToken = FALSE;
            }
            else if (!bInQuote && *cs_comment != NUL && isoneof(c, cs_comment)) {
                /* reached end of token */
                *q = NUL;
                *p = NUL;
                bInToken = FALSE;
            }
            else if (!bInQuote && *cs_quote != NUL && isoneof(c, cs_quote)) {
                /* beginning a quoted segment */
                bInQuote = TRUE;
                cLeftQuote = c;
                if (!(mode & STR_STRIPQUOTES))
                    *q++ = c;
            }
            else if (bInQuote && cLeftQuote == c) {
                /* ending a quoted segment */
                bInQuote = FALSE;
                if (!(mode & STR_STRIPQUOTES))
                    *q++ = cLeftQuote;
            }
            else {
                /* ordinary character */
                *q++ = c;
            }
        }
    }
    else {
        /*
         * parse while ignoring backslash escapes
         */
        while (bInToken && *p != NUL) {
            if (!bInQuote && *cs_delim != NUL && isoneof(*p, cs_delim)) {
                /* reached end of token */
                *q = NUL;
                p++;
                bInToken = FALSE;
            }
            else if (!bInQuote && *cs_comment != NUL && isoneof(*p, cs_comment)) {
                /* reached end of token */
                *q = NUL;
                *p = NUL;
                bInToken = FALSE;
            }
            else if (!bInQuote && *cs_quote != NUL && isoneof(*p, cs_quote)) {
                /* beginning a quoted segment */
                bInQuote = TRUE;
                cLeftQuote = *p++;
                if (!(mode & STR_STRIPQUOTES))
                    *q++ = cLeftQuote;
            }
            else if (bInQuote && cLeftQuote == *p) {
                /* ending a quoted segment */
                bInQuote = FALSE;
                p++;
                if (!(mode & STR_STRIPQUOTES))
                    *q++ = cLeftQuote;
            }
            else {
                /* ordinary character */
                *q++ = *p++;
            }
        }
    }

    /* terminate token and update parsing cursor */
    *q = NUL;
    *s = p;

    /* skip trailing delimiters
       (if requested only, else we do it on next round) */
    if ((mode & STR_SKIPDELIMS) && *s != NULL) {
        while (*(*s) != NUL && isoneof(*(*s), cs_delim))
            (*s)++;
    }

    /* return the resulting token */
    return cpToken;
}

@


1.9
log
@- adjust copyright messages for new year 2003
- strip trailing whitespaces
- consistently use OSSP ASCII-art
- add standard devtool.conf stuff from OSSP sa
@
text
@d3 2
a4 2
**  Copyright (c) 1999-2003 Ralf S. Engelschall <rse@@engelschall.com>
**  Copyright (c) 1999-2003 The OSSP Project <http://www.ossp.org/>
@


1.8
log
@finally switch to full OSSP branding
@
text
@d3 2
a4 2
**  Copyright (c) 1999-2002 Ralf S. Engelschall <rse@@engelschall.com>
**  Copyright (c) 1999-2002 The OSSP Project <http://www.ossp.org/>
d6 1
a6 1
**  This file is part of OSSP str, a string handling and manipulation 
d27 1
a27 1
**  str_token.c: tokenizing functions 
d47 1
a47 1
 * isoneof -- check whether c is one of the chars in c 
d49 1
a49 1
static int 
d51 1
a51 1
    register char c, 
d61 1
a61 1
 * nextchar -- get next character from a string  
d180 1
a180 1
    p = *s; 
d185 1
a185 1
    if (*p == NUL) 
d188 1
a188 1
    /* 
d198 1
a198 1
    /* 
d213 1
a213 1
        /* 
d223 1
a223 1
                *q = NUL;      
d234 1
a234 1
                bInQuote = TRUE; 
d252 1
a252 1
        /* 
d277 1
a277 1
                bInQuote = FALSE; 
d291 1
a291 1
    *s = p; 
d293 1
a293 1
    /* skip trailing delimiters 
@


1.7
log
@bump copyright year
@
text
@d2 1
a2 1
**  Str - String Library
d4 1
d6 2
a7 2
**  This file is part of Str, a string handling and manipulation 
**  library which can be found at http://www.engelschall.com/sw/str/.
@


1.6
log
@Adjust copyright for year 2001.
@
text
@d3 1
a3 1
**  Copyright (c) 1999-2001 Ralf S. Engelschall <rse@@engelschall.com>
@


1.5
log
@*** empty log message ***
@
text
@d3 1
a3 1
**  Copyright (c) 1999-2000 Ralf S. Engelschall <rse@@engelschall.com>
@


1.4
log
@*** empty log message ***
@
text
@d183 2
a184 2
    /* end of string or start of comment reached, so stop parsing */
    if (*p == NUL || isoneof(*p, cs_comment))
d186 10
@


1.3
log
@*** empty log message ***
@
text
@d3 1
a3 1
**  Copyright (c) 1999 Ralf S. Engelschall <rse@@engelschall.com>
@


1.2
log
@*** empty log message ***
@
text
@d32 1
@


1.1
log
@.
@
text
@d45 15
a59 1
 * str_token_char -- get next character from a string  
d62 1
a62 1
str_token_char(
d179 1
a179 1
    while (*p != NUL && strchr(cs_delim, *p))
d183 1
a183 1
    if (*p == NUL || strchr(cs_comment, *p))
d204 1
a204 1
        while (bInToken && (p = str_token_char(p, &c, bWithTrigraphs, &bBackslashed)) != NULL) {
d209 1
a209 1
            else if (!bInQuote && *cs_delim != NUL && strchr(cs_delim, c) != NULL) {
d214 1
a214 1
            else if (!bInQuote && *cs_comment != NUL && strchr(cs_comment, c) != NULL) {
d220 1
a220 1
            else if (!bInQuote && *cs_quote != NUL && strchr(cs_quote, c) != NULL) {
d244 1
a244 1
            if (!bInQuote && *cs_delim != NUL && strchr(cs_delim, *p) != NULL) {
d250 1
a250 1
            else if (!bInQuote && *cs_comment != NUL && strchr(cs_comment, *p) != NULL) {
d256 1
a256 1
            else if (!bInQuote && *cs_quote != NUL && strchr(cs_quote, *p) != NULL) {
d284 1
a284 1
        while (*(*s) != NUL && strchr(cs_delim, *(*s)))
@