Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members  

ixlib_re.hh

Go to the documentation of this file.
00001 // ----------------------------------------------------------------------------
00002 //  Description      : Regular expressions string object
00003 // ----------------------------------------------------------------------------
00004 //  (c) Copyright 1998 by iXiONmedia, all rights reserved.
00005 // ----------------------------------------------------------------------------
00006 
00007 
00008 
00009 
00010 #ifndef IXLIB_RE
00011 #define IXLIB_RE
00012 
00013 
00014 
00015 
00016 #include <vector>
00017 #include <memory>
00018 #include <ixlib_exgen.hh>
00019 #include <ixlib_string.hh>
00020 
00021 
00022 
00023 
00024 // Regex exceptions -----------------------------------------------------------
00025 #define ECRE_INVQUANTIFIER              0
00026 #define ECRE_UNBALBACKREF               1
00027 #define ECRE_INVESCAPE                  2
00028 #define ECRE_INVBACKREF                 3
00029 #define ECRE_UNTERMCLASS                4
00030 #define ECRE_NOPATTERN                  5
00031 
00032 
00033 
00034 
00035 namespace ixion {
00036   class regex_exception : public base_exception {
00037     public:
00038     regex_exception(TErrorCode error,char const *info = NULL,char *module = NULL,TIndex line = 0);
00039     virtual char *getText() const;
00040     };
00041   }
00042 
00043 
00044 
00045 
00046 // Regex token macros ---------------------------------------------------------
00047 #define XSTRRE_LITERAL          '\\'
00048 #define XSTRRE_BACKREF          '\\'
00049 #define XSTRRE_ESCAPESEQ        '\\'
00050 #define XSTRRE_ANYCHAR          '.'
00051 #define XSTRRE_START            '^'
00052 #define XSTRRE_END              '$'
00053 #define XSTRRE_ALTERNATIVE      '|'
00054 #define XSTRRE_CLASSSTART       '['
00055 #define XSTRRE_CLASSNEG         '^'
00056 #define XSTRRE_CLASSRANGE       '-'
00057 #define XSTRRE_CLASSSTOP        ']'
00058 
00059 #define XSTRRE_BACKREFSTART     '('
00060 #define XSTRRE_BACKREFSTOP      ')'
00061 
00062 #define XSTRREQ_0PLUS           '*'
00063 #define XSTRREQ_1PLUS           '+'
00064 #define XSTRREQ_01              '?'
00065 #define XSTRREQ_START           '{'
00066 #define XSTRREQ_RANGE           ','
00067 #define XSTRREQ_STOP            '}'
00068 #define XSTRREQ_NONGREEDY       '?'
00069 
00070 
00071 
00072 
00073 namespace ixion {
00083   template<class T>
00084   class regex {
00085     protected:
00086       // various helper classes -----------------------------------------------
00087       class backref_stack {
00088         private:
00089           struct backref_entry {
00090             enum { OPEN,CLOSE }                 Type;
00091             TIndex                              Index;
00092             };
00093           
00094           typedef std::vector<backref_entry>    internal_stack;
00095           
00096           internal_stack                        Stack;
00097         
00098         public:
00099           typedef TSize                         rewind_info;
00100           
00101           void open(TIndex index);
00102           void close(TIndex index);
00103           
00104           rewind_info getRewindInfo() const;
00105           void rewind(rewind_info ri);
00106           void clear();
00107           
00108           TSize size();
00109           T get(TIndex number,T const &candidate) const;
00110         };
00111 
00112 
00113 
00114 
00115       // matchers -------------------------------------------------------------
00116       class matcher {
00117         protected:
00118           matcher                   *Next;
00119           bool                      OwnNext;
00120           TSize                     MatchLength;
00121       
00122         public:
00123           matcher();
00124           virtual ~matcher();
00125           
00126           virtual matcher *duplicate() const = 0;
00127           
00128           TSize getMatchLength() const { 
00129             return MatchLength; 
00130             }
00131           TSize subsequentMatchLength() const;
00132           virtual TSize minimumMatchLength() const = 0;
00133           TSize minimumSubsequentMatchLength() const;
00134   
00135           matcher *getNext() const { 
00136             return Next; 
00137             }
00138           virtual void setNext(matcher *next,bool ownnext = true) {
00139             Next = next;
00140             OwnNext = ownnext;
00141             }
00142           
00143           // this routine must set the MatchLength member correctly.
00144           virtual bool match(backref_stack &brstack,T const &candidate,TIndex at)
00145             = 0;
00146       
00147         protected:
00148           bool matchNext(backref_stack &brstack,T const &candidate,TIndex at) const { 
00149             return Next ? Next->match(brstack,candidate,at) : true; 
00150             }
00151           void copy(matcher const *src);
00152         };
00153     
00154   
00155   
00156   
00157       class quantifier : public matcher {
00158         private:
00159           typedef matcher           super;
00160           bool                      Greedy,MaxValid;
00161           TSize                     MinCount,MaxCount;
00162           matcher                   *Quantified;
00163     
00164         struct backtrack_stack_entry {
00165           TIndex                          Index;
00166           backref_stack::rewind_info      RewindInfo;
00167           };
00168   
00169         public:
00170           quantifier() 
00171             : Quantified(NULL) {
00172             }
00173           quantifier(bool greedy,TSize mincount);
00174           quantifier(bool greedy,TSize mincount,TSize maxcount);
00175           ~quantifier();
00176       
00177           matcher *duplicate() const;
00178           
00179           TSize minimumMatchLength() const;
00180           
00181           void setQuantified(matcher *quantified) { 
00182             Quantified = quantified;
00183             }
00184           bool match(backref_stack &brstack,T const &candidate,TIndex at);
00185 
00186         protected:
00187           void copy(quantifier const *src);
00188         };
00189     
00190   
00191   
00192   
00193       class sequence_matcher : public matcher {
00194           T               MatchStr;
00195       
00196         public:
00197           sequence_matcher(T const &matchstr);
00198 
00199           matcher *duplicate() const;
00200 
00201           TSize minimumMatchLength() const {
00202             return MatchStr.size();
00203             }
00204           bool match(backref_stack &brstack,T const &candidate,TIndex at);
00205         };
00206     
00207   
00208   
00209   
00210       class any_matcher : public matcher {
00211         public:
00212           any_matcher() { 
00213             MatchLength = 1; 
00214             }
00215 
00216           matcher *duplicate() const;
00217 
00218           TSize minimumMatchLength() const {
00219             return 1;
00220             }
00221           bool match(backref_stack &brstack,T const &candidate,TIndex at) { 
00222             return at < candidate.size() && matchNext(brstack,candidate,at+1); 
00223             }
00224         };
00225     
00226   
00227   
00228   
00229       class start_matcher : public matcher {
00230         public:
00231           start_matcher() { 
00232             MatchLength = 0; 
00233             }
00234 
00235           matcher *duplicate() const;
00236 
00237           TSize minimumMatchLength() const {
00238             return 0;
00239             }
00240           bool match(backref_stack &brstack,T const &candidate,TIndex at);
00241         };
00242     
00243   
00244   
00245   
00246       class end_matcher : public matcher {
00247         public:
00248           end_matcher() { 
00249             MatchLength = 0;
00250             }
00251 
00252           matcher *duplicate() const;
00253 
00254           TSize minimumMatchLength() const {
00255             return 0;
00256             }
00257           bool match(backref_stack &brstack,T const &candidate,TIndex at);
00258         };
00259     
00260   
00261   
00262   
00263       class backref_open_matcher : public matcher {
00264         public:
00265           backref_open_matcher() { 
00266             MatchLength = 0; 
00267             }
00268 
00269           matcher *duplicate() const;
00270 
00271           TSize minimumMatchLength() const {
00272             return 0;
00273             }
00274           bool match(backref_stack &brstack,T const &candidate,TIndex at);
00275         };
00276     
00277   
00278 
00279 
00280       class backref_close_matcher : public matcher {
00281         public:
00282           backref_close_matcher() { 
00283             MatchLength = 0; 
00284             }
00285 
00286           matcher *duplicate() const;
00287 
00288           TSize minimumMatchLength() const {
00289             return 0;
00290             }
00291           bool match(backref_stack &brstack,T const &candidate,TIndex at);
00292         };
00293 
00294 
00295 
00296 
00297       class alternative_matcher : public matcher {
00298           // The connector serves two purposes:
00299           // a) be a null-matcher that re-unites the different alternate token
00300           //    sequences
00301           // b) make the end of each sequence identifiable to be able to compute
00302           //    the match length
00303         
00304           class connector : public matcher {
00305             public:
00306               matcher *duplicate() const {
00307                 return NULL;
00308                 }
00309 
00310               TSize minimumMatchLength() const {
00311                 return 0;
00312                 }
00313               bool match(backref_stack &brstack,T const &candidate,TIndex at);
00314             };
00315 
00316           typedef matcher                 super;
00317           typedef std::vector<matcher *>  alt_list;
00318           alt_list                        AltList;
00319           connector                       Connector;
00320       
00321         public:
00322           ~alternative_matcher();
00323   
00324           matcher *duplicate() const;
00325 
00326           TSize minimumMatchLength() const;
00327           void setNext(matcher *next,bool ownnext = true);
00328           void addAlternative(matcher *alternative);
00329           bool match(backref_stack &brstack,T const &candidate,TIndex at);
00330 
00331         protected:
00332           void copy(alternative_matcher const *src);
00333         };
00334     
00335 
00336 
00337 
00338       class backref_matcher : public matcher {
00339           TIndex Backref;
00340     
00341         public:
00342           backref_matcher(TIndex backref);
00343 
00344           matcher *duplicate() const;
00345 
00346           TSize minimumMatchLength() const {
00347             return 0;
00348             }
00349           bool match(backref_stack &brstack,T const &candidate,TIndex at);
00350         };
00351     
00352       // instance data --------------------------------------------------------
00353       std::auto_ptr<matcher>      ParsedRegex;
00354       backref_stack               BackrefStack;
00355       T                           LastCandidate;
00356       TIndex                      MatchIndex;
00357       TSize                       MatchLength;
00358   
00359     public:
00360       // interface ------------------------------------------------------------
00361       regex();
00362       regex(regex const &src);
00363 
00364       regex &operator=(regex const &src);
00365       
00366       bool match(T const &candidate,TIndex from = 0);
00367       bool matchAt(T const &candidate,TIndex at = 0);
00368     
00369       // Queries pertaining to the last match
00370       TIndex getMatchIndex() { 
00371         return MatchIndex; 
00372         }
00373       TSize getMatchLength() { 
00374         return MatchLength; 
00375         }
00376       std::string getMatch() { 
00377         return T(LastCandidate.begin()+MatchIndex,
00378            LastCandidate.begin()+MatchIndex+MatchLength); 
00379         }
00380       TSize countBackrefs() { 
00381         return BackrefStack.size(); 
00382         }
00383       T getBackref(TIndex index) { 
00384         return BackrefStack.get(index,LastCandidate); 
00385         }
00386     };
00387   
00388 
00389 
00418   class regex_string : public regex<std::string> {
00419     private:
00420       class class_matcher : public regex<std::string>::matcher {
00421         private:
00422           typedef regex<std::string>::matcher   super;
00423           static TSize const                    CharValues = 256;
00424           bool                                  Set[CharValues];
00425           bool                                  Negated;
00426 
00427         public:
00428           class_matcher();
00429           class_matcher(std::string const &cls);
00430 
00431           matcher *duplicate() const;
00432 
00433           TSize minimumMatchLength() const {
00434             return 1;
00435             }
00436           bool match(backref_stack &brstack,std::string const &candidate,TIndex at);
00437       
00438         private:
00439           void expandClass(std::string const &cls);
00440         
00441         protected:
00442           void copy(class_matcher const *src);
00443         };
00444 
00445 
00446 
00447 
00448       class special_class_matcher : public regex<std::string>::matcher {
00449         public:
00450           enum type { DIGIT,NONDIGIT,ALNUM,NONALNUM,SPACE,NONSPACE };
00451   
00452         private:
00453           type          Type;
00454   
00455         public:
00456           special_class_matcher(type tp);
00457 
00458           matcher *duplicate() const;
00459 
00460           TSize minimumMatchLength() const {
00461             return 1;
00462             }
00463           bool match(backref_stack &brstack,std::string const &candidate,TIndex at);
00464         };
00465 
00466 
00467 
00468 
00469     public:
00470       regex_string() {
00471         }
00472       regex_string(std::string const &str) {
00473         parse(str);
00474         }
00475       regex_string(char const *s) {
00476         parse(s);
00477         }
00478 
00479       void parse(std::string const &expr);
00480 
00481       std::string replaceAll(std::string const &candidate,std::string const &replacement,
00482         TIndex from = 0);
00483 
00484     private:
00485       regex<std::string>::matcher *parseRegex(std::string const &expr);
00486       quantifier *parseQuantifier(std::string const &expr,TIndex &at);
00487       bool isGreedy(std::string const &expr,TIndex &at);
00488     };
00489   }
00490 
00491 
00492 
00493 #endif

Generated on Wed Oct 31 17:12:24 2001 for ixlib by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001