00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef IXLIB_RE
00011 #define IXLIB_RE
00012
00013
00014
00015
00016 #include <vector>
00017 #include <memory>
00018 #include <ixlib_exgen.hh>
00019 #include <ixlib_string.hh>
00020
00021
00022
00023
00024
00025 #define ECRE_INVQUANTIFIER 0
00026 #define ECRE_UNBALBACKREF 1
00027 #define ECRE_INVESCAPE 2
00028 #define ECRE_INVBACKREF 3
00029 #define ECRE_UNTERMCLASS 4
00030 #define ECRE_NOPATTERN 5
00031
00032
00033
00034
00035 namespace ixion {
00036 class regex_exception : public base_exception {
00037 public:
00038 regex_exception(TErrorCode error,char const *info = NULL,char *module = NULL,TIndex line = 0);
00039 virtual char *getText() const;
00040 };
00041 }
00042
00043
00044
00045
00046
00047 #define XSTRRE_LITERAL '\\'
00048 #define XSTRRE_BACKREF '\\'
00049 #define XSTRRE_ESCAPESEQ '\\'
00050 #define XSTRRE_ANYCHAR '.'
00051 #define XSTRRE_START '^'
00052 #define XSTRRE_END '$'
00053 #define XSTRRE_ALTERNATIVE '|'
00054 #define XSTRRE_CLASSSTART '['
00055 #define XSTRRE_CLASSNEG '^'
00056 #define XSTRRE_CLASSRANGE '-'
00057 #define XSTRRE_CLASSSTOP ']'
00058
00059 #define XSTRRE_BACKREFSTART '('
00060 #define XSTRRE_BACKREFSTOP ')'
00061
00062 #define XSTRREQ_0PLUS '*'
00063 #define XSTRREQ_1PLUS '+'
00064 #define XSTRREQ_01 '?'
00065 #define XSTRREQ_START '{'
00066 #define XSTRREQ_RANGE ','
00067 #define XSTRREQ_STOP '}'
00068 #define XSTRREQ_NONGREEDY '?'
00069
00070
00071
00072
00073 namespace ixion {
00083 template<class T>
00084 class regex {
00085 protected:
00086
00087 class backref_stack {
00088 private:
00089 struct backref_entry {
00090 enum { OPEN,CLOSE } Type;
00091 TIndex Index;
00092 };
00093
00094 typedef std::vector<backref_entry> internal_stack;
00095
00096 internal_stack Stack;
00097
00098 public:
00099 typedef TSize rewind_info;
00100
00101 void open(TIndex index);
00102 void close(TIndex index);
00103
00104 rewind_info getRewindInfo() const;
00105 void rewind(rewind_info ri);
00106 void clear();
00107
00108 TSize size();
00109 T get(TIndex number,T const &candidate) const;
00110 };
00111
00112
00113
00114
00115
00116 class matcher {
00117 protected:
00118 matcher *Next;
00119 bool OwnNext;
00120 TSize MatchLength;
00121
00122 public:
00123 matcher();
00124 virtual ~matcher();
00125
00126 virtual matcher *duplicate() const = 0;
00127
00128 TSize getMatchLength() const {
00129 return MatchLength;
00130 }
00131 TSize subsequentMatchLength() const;
00132 virtual TSize minimumMatchLength() const = 0;
00133 TSize minimumSubsequentMatchLength() const;
00134
00135 matcher *getNext() const {
00136 return Next;
00137 }
00138 virtual void setNext(matcher *next,bool ownnext = true) {
00139 Next = next;
00140 OwnNext = ownnext;
00141 }
00142
00143
00144 virtual bool match(backref_stack &brstack,T const &candidate,TIndex at)
00145 = 0;
00146
00147 protected:
00148 bool matchNext(backref_stack &brstack,T const &candidate,TIndex at) const {
00149 return Next ? Next->match(brstack,candidate,at) : true;
00150 }
00151 void copy(matcher const *src);
00152 };
00153
00154
00155
00156
00157 class quantifier : public matcher {
00158 private:
00159 typedef matcher super;
00160 bool Greedy,MaxValid;
00161 TSize MinCount,MaxCount;
00162 matcher *Quantified;
00163
00164 struct backtrack_stack_entry {
00165 TIndex Index;
00166 backref_stack::rewind_info RewindInfo;
00167 };
00168
00169 public:
00170 quantifier()
00171 : Quantified(NULL) {
00172 }
00173 quantifier(bool greedy,TSize mincount);
00174 quantifier(bool greedy,TSize mincount,TSize maxcount);
00175 ~quantifier();
00176
00177 matcher *duplicate() const;
00178
00179 TSize minimumMatchLength() const;
00180
00181 void setQuantified(matcher *quantified) {
00182 Quantified = quantified;
00183 }
00184 bool match(backref_stack &brstack,T const &candidate,TIndex at);
00185
00186 protected:
00187 void copy(quantifier const *src);
00188 };
00189
00190
00191
00192
00193 class sequence_matcher : public matcher {
00194 T MatchStr;
00195
00196 public:
00197 sequence_matcher(T const &matchstr);
00198
00199 matcher *duplicate() const;
00200
00201 TSize minimumMatchLength() const {
00202 return MatchStr.size();
00203 }
00204 bool match(backref_stack &brstack,T const &candidate,TIndex at);
00205 };
00206
00207
00208
00209
00210 class any_matcher : public matcher {
00211 public:
00212 any_matcher() {
00213 MatchLength = 1;
00214 }
00215
00216 matcher *duplicate() const;
00217
00218 TSize minimumMatchLength() const {
00219 return 1;
00220 }
00221 bool match(backref_stack &brstack,T const &candidate,TIndex at) {
00222 return at < candidate.size() && matchNext(brstack,candidate,at+1);
00223 }
00224 };
00225
00226
00227
00228
00229 class start_matcher : public matcher {
00230 public:
00231 start_matcher() {
00232 MatchLength = 0;
00233 }
00234
00235 matcher *duplicate() const;
00236
00237 TSize minimumMatchLength() const {
00238 return 0;
00239 }
00240 bool match(backref_stack &brstack,T const &candidate,TIndex at);
00241 };
00242
00243
00244
00245
00246 class end_matcher : public matcher {
00247 public:
00248 end_matcher() {
00249 MatchLength = 0;
00250 }
00251
00252 matcher *duplicate() const;
00253
00254 TSize minimumMatchLength() const {
00255 return 0;
00256 }
00257 bool match(backref_stack &brstack,T const &candidate,TIndex at);
00258 };
00259
00260
00261
00262
00263 class backref_open_matcher : public matcher {
00264 public:
00265 backref_open_matcher() {
00266 MatchLength = 0;
00267 }
00268
00269 matcher *duplicate() const;
00270
00271 TSize minimumMatchLength() const {
00272 return 0;
00273 }
00274 bool match(backref_stack &brstack,T const &candidate,TIndex at);
00275 };
00276
00277
00278
00279
00280 class backref_close_matcher : public matcher {
00281 public:
00282 backref_close_matcher() {
00283 MatchLength = 0;
00284 }
00285
00286 matcher *duplicate() const;
00287
00288 TSize minimumMatchLength() const {
00289 return 0;
00290 }
00291 bool match(backref_stack &brstack,T const &candidate,TIndex at);
00292 };
00293
00294
00295
00296
00297 class alternative_matcher : public matcher {
00298
00299
00300
00301
00302
00303
00304 class connector : public matcher {
00305 public:
00306 matcher *duplicate() const {
00307 return NULL;
00308 }
00309
00310 TSize minimumMatchLength() const {
00311 return 0;
00312 }
00313 bool match(backref_stack &brstack,T const &candidate,TIndex at);
00314 };
00315
00316 typedef matcher super;
00317 typedef std::vector<matcher *> alt_list;
00318 alt_list AltList;
00319 connector Connector;
00320
00321 public:
00322 ~alternative_matcher();
00323
00324 matcher *duplicate() const;
00325
00326 TSize minimumMatchLength() const;
00327 void setNext(matcher *next,bool ownnext = true);
00328 void addAlternative(matcher *alternative);
00329 bool match(backref_stack &brstack,T const &candidate,TIndex at);
00330
00331 protected:
00332 void copy(alternative_matcher const *src);
00333 };
00334
00335
00336
00337
00338 class backref_matcher : public matcher {
00339 TIndex Backref;
00340
00341 public:
00342 backref_matcher(TIndex backref);
00343
00344 matcher *duplicate() const;
00345
00346 TSize minimumMatchLength() const {
00347 return 0;
00348 }
00349 bool match(backref_stack &brstack,T const &candidate,TIndex at);
00350 };
00351
00352
00353 std::auto_ptr<matcher> ParsedRegex;
00354 backref_stack BackrefStack;
00355 T LastCandidate;
00356 TIndex MatchIndex;
00357 TSize MatchLength;
00358
00359 public:
00360
00361 regex();
00362 regex(regex const &src);
00363
00364 regex &operator=(regex const &src);
00365
00366 bool match(T const &candidate,TIndex from = 0);
00367 bool matchAt(T const &candidate,TIndex at = 0);
00368
00369
00370 TIndex getMatchIndex() {
00371 return MatchIndex;
00372 }
00373 TSize getMatchLength() {
00374 return MatchLength;
00375 }
00376 std::string getMatch() {
00377 return T(LastCandidate.begin()+MatchIndex,
00378 LastCandidate.begin()+MatchIndex+MatchLength);
00379 }
00380 TSize countBackrefs() {
00381 return BackrefStack.size();
00382 }
00383 T getBackref(TIndex index) {
00384 return BackrefStack.get(index,LastCandidate);
00385 }
00386 };
00387
00388
00389
00418 class regex_string : public regex<std::string> {
00419 private:
00420 class class_matcher : public regex<std::string>::matcher {
00421 private:
00422 typedef regex<std::string>::matcher super;
00423 static TSize const CharValues = 256;
00424 bool Set[CharValues];
00425 bool Negated;
00426
00427 public:
00428 class_matcher();
00429 class_matcher(std::string const &cls);
00430
00431 matcher *duplicate() const;
00432
00433 TSize minimumMatchLength() const {
00434 return 1;
00435 }
00436 bool match(backref_stack &brstack,std::string const &candidate,TIndex at);
00437
00438 private:
00439 void expandClass(std::string const &cls);
00440
00441 protected:
00442 void copy(class_matcher const *src);
00443 };
00444
00445
00446
00447
00448 class special_class_matcher : public regex<std::string>::matcher {
00449 public:
00450 enum type { DIGIT,NONDIGIT,ALNUM,NONALNUM,SPACE,NONSPACE };
00451
00452 private:
00453 type Type;
00454
00455 public:
00456 special_class_matcher(type tp);
00457
00458 matcher *duplicate() const;
00459
00460 TSize minimumMatchLength() const {
00461 return 1;
00462 }
00463 bool match(backref_stack &brstack,std::string const &candidate,TIndex at);
00464 };
00465
00466
00467
00468
00469 public:
00470 regex_string() {
00471 }
00472 regex_string(std::string const &str) {
00473 parse(str);
00474 }
00475 regex_string(char const *s) {
00476 parse(s);
00477 }
00478
00479 void parse(std::string const &expr);
00480
00481 std::string replaceAll(std::string const &candidate,std::string const &replacement,
00482 TIndex from = 0);
00483
00484 private:
00485 regex<std::string>::matcher *parseRegex(std::string const &expr);
00486 quantifier *parseQuantifier(std::string const &expr,TIndex &at);
00487 bool isGreedy(std::string const &expr,TIndex &at);
00488 };
00489 }
00490
00491
00492
00493 #endif