head 1.2; access; symbols; locks; strict; comment @// @; 1.2 date 2000.12.13.17.37.25; author simons; state dead; branches; next 1.1; 1.1 date 2000.12.13.15.46.39; author simons; state Exp; branches; next ; desc @@ 1.2 log @Removed unused C++ code. @ text @/* * $Source: /d1/e/petidomo/cvs/petidomo/source/libtext/RegexTokenizer.cpp,v $ * $Revision: 1.1 $ * $Date: 2000/12/13 15:46:39 $ * * Copyright (c) 1999 by CyberSolutions GmbH, Germany. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by CyberSolutions GmbH. * * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "text.hpp" char RegexTokenizer::workspace[RegexTokenizer::N_substring+1]=""; RegexTokenizer::RegexTokenizer(){ } void RegexTokenizer::reset(){ input= string(); int i=N_pm; while(--i>0){ pm[i].rm_so=-1; pm[i].rm_eo=-1; } so= 0; eo= 0; previous_eo= -1; error= 0; } int RegexTokenizer::set(string _input,list _regex){ reset(); input= _input; list::iterator first= _regex.begin(); list::iterator last = _regex.end(); while(first!=last){ regex_t re; int i; //REG_EXTENDED //use extended regular expressions //REG_NEWLINE //makes ^...$ work to match newline/endofline i= regcomp (&re, *first, REG_EXTENDED|REG_NEWLINE); if(i) return i; regex.push_back(re); regex_src.push_back(*first); ++first; } } RegexTokenizer::RegexTokenizer(string _input,Mode _mode){ mode= _mode; //create a list listalist; switch(_mode){ case Word: alist.push_back("([^ \t\n]*)([ \t\n]*)"); break; case Line: alist.push_back("^(.*)$\n"); break; case RFC: alist.push_back("((^.*$)((\n)^[ \t]+.*$)*)(\n)?"); //this works, but output is confusing // that is, how to remove the glue ? break; case Custom: //break; default: cerr<<"RegexTokenizer mode constructor called with pointless mode."<alist; alist.push_back(oneregex); set(_input,alist); } RegexTokenizer::RegexTokenizer(string _input,list _regex){ set(_input,_regex); } RegexTokenizer::RegexTokenizer(const RegexTokenizer &r){ //cerr<<"(copy constructor)"<::iterator first= regex.begin(); list::iterator last = regex.end(); error= 1; previous_eo= eo; while(error && result[0].empty() && first!=last){//check for empty buffer { //cerr<0){ result[i]= string(); } i=0; while(i=0 && pm[i].rm_eo>0 && pm[i].rm_so::iterator first= regex.begin(); list::iterator last = regex.end(); while(first!=last){ //cerr<<"freeing "<<&*first<