appkit  1.5.1
RoadNarrows Robotics Application Kit
RegEx.h
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // Package: RoadNarrows Robotics Application Tool Kit
4 //
5 // Link: https://github.com/roadnarrows-robotics/rnr-sdk
6 //
7 // Library: librnr_appkit
8 //
9 // File: RegEx.h
10 //
11 /*! \file
12  *
13  * \brief The Regular Expression Class interface.
14  *
15  * RegEx provides a wrapper around the regex C library calls. See REGEX(3) man
16  * page.
17  *
18  * \note Generalized from dynashell_regex.h source found in RoadNarrows Robotics
19  * Dynamixel SDK package.
20  *
21  * \author Robin Knight (robin.knight@roadnarrows.com)
22  *
23  * \par Copyright
24  * \h_copy 2017-2017. RoadNarrows LLC.\n
25  * http://www.roadnarrows.com\n
26  * All Rights Reserved
27  */
28 /*
29  * @EulaBegin@
30  *
31  * Unless otherwise stated explicitly, all materials contained are copyrighted
32  * and may not be used without RoadNarrows LLC's written consent,
33  * except as provided in these terms and conditions or in the copyright
34  * notice (documents and software) or other proprietary notice provided with
35  * the relevant materials.
36  *
37  * IN NO EVENT SHALL THE AUTHOR, ROADNARROWS LLC, OR ANY
38  * MEMBERS/EMPLOYEES/CONTRACTORS OF ROADNARROWS OR DISTRIBUTORS OF THIS SOFTWARE
39  * BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR
40  * CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
41  * DOCUMENTATION, EVEN IF THE AUTHORS OR ANY OF THE ABOVE PARTIES HAVE BEEN
42  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43  *
44  * THE AUTHORS AND ROADNARROWS LLC SPECIFICALLY DISCLAIM ANY WARRANTIES,
45  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
46  * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN
47  * "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE NO OBLIGATION TO
48  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
49  *
50  * @EulaEnd@
51  */
52 ////////////////////////////////////////////////////////////////////////////////
53 
54 #ifndef _RNR_REGEX_H
55 #define _RNR_REGEX_H
56 
57 #include <sys/types.h>
58 #include <stdio.h>
59 #include <string.h>
60 #include <unistd.h>
61 #include <regex.h>
62 
63 #include <iostream>
64 #include <string>
65 #include <vector>
66 
67 #include "rnr/rnrconfig.h"
68 
69 /*!
70  * \brief RoadNarrows Robotics
71  */
72 namespace rnr
73 {
74  // --------------------------------------------------------------------------
75  // Class RegEx
76  // --------------------------------------------------------------------------
77 
78  /*!
79  * \brief Regular Express Class.
80  *
81  * Regular expression are evaluated using the POSIX Extended Regular
82  * Expression syntax.
83  */
84  class RegEx
85  {
86  public:
87  /*!
88  * \brief Special return and error codes.
89  *
90  * \note The error codes must to be 'out-of-band' from the the standard
91  * regex REG_* codes. Large negative values should do the trick.
92  */
93  static const int ReOk = REG_NOERROR; ///< regex operation success
94  static const int ReENoExpr = -1000; ///< no pre-compiled express
95  static const int ReENotComp = -1001; ///< not compiled
96 
97  /*!
98  * \brief Regular expression compile and matching behavior flags.
99  *
100  * See regex(3) for further description of flags.
101  *
102  * Flags are a bitwise-or of zero or more of these values.
103  */
104  static const int ReFlagDefaults = 0; ///< default flags
105 
106  // supported compile flags
107  static const int ReFlagICase = REG_ICASE; ///< ignore case when matching
108  static const int ReFlagNewLine = REG_NEWLINE; ///< \n force bol/eol matching
109 
110  // supported matching flags
111  static const int ReFlagNotBoL = REG_NOTBOL; ///< input not begin of line
112  static const int ReFlagNotEoL = REG_NOTEOL; ///< input not end of line
113 
114  /*!
115  * \brief Concerning, in reality, in regards to invalid regular expressions.
116  */
117  static const char *ReInvalid;
118 
119  /*!
120  * \brief Regular expression match structure.
121  */
122  struct ReMatch
123  {
124  size_t m_uStart; ///< starting index in input where match is found
125  size_t m_uEnd; ///< ending index in input where match is found
126  std::string m_strMatch; ///< matching (sub)string
127  };
128 
129  /*! default maximum submatches per match */
130  static const size_t ReMaxSubMatchesDft = 32;
131 
132  typedef std::vector<ReMatch> ReMatchVec; ///< vector of matches
133 
134  /*!
135  * \brief Default constructor.
136  *
137  * No regular expression compile will be attempted.
138  *
139  * \param nFlags Bitwise-or of compile behavior flags.
140  */
141  RegEx(int nFlags = ReFlagDefaults);
142 
143  /*!
144  * \brief String initialization constructor.
145  *
146  * Regular expression compile will be attempted.
147  *
148  * \param strRegEx Pre-compile regular expression string.
149  * \param nFlags Bitwise-or of compile behavior flags.
150  */
151  RegEx(const std::string &strRegEx, int nFlags = ReFlagDefaults);
152 
153  /*!
154  * \brief Null-terminated char* initialization constructor.
155  *
156  * If not NULL or empty, regular expression compile will be attempted.
157  *
158  * \param sRegEx Null-terminated pre-compile regular expression string.
159  * \param nFlags Bitwise-or of compile behavior flags.
160  */
161  RegEx(const char *sRegEx, int nFlags = ReFlagDefaults);
162 
163  /*!
164  * \brief Copy constructor.
165  *
166  * Regular expression compile will be attempted.
167  *
168  * \param src Source object.
169  */
170  RegEx(const RegEx &src);
171 
172  /*!
173  * \brief Default destructor.
174  */
175  virtual ~RegEx();
176 
177  /*!
178  * \brief Assignment copy operator.
179  *
180  * Regular expression compile will be attempted.
181  *
182  * \param rhs Regular expression class object.
183  *
184  * \return This regular expression object.
185  */
186  RegEx &operator=(const RegEx &rhs);
187 
188  /*!
189  * \brief Assignment operator.
190  *
191  * Regular expression compile will be attempted.
192  *
193  * \param rhs String regular expression.
194  *
195  * \return This regular expression object.
196  */
197  RegEx &operator=(const std::string &rhs);
198 
199  /*!
200  * \brief Assignment operator.
201  *
202  * If not NULL or empty, regular expression compile will be attempted.
203  *
204  * \param rhs Null-terminated string expression object.
205  *
206  * \return This regular expression object.
207  */
208  RegEx &operator=(const char *rhs);
209 
210  /*!
211  * \{
212  *
213  * \brief Match the input string against the regular expression.
214  *
215  * The entire input must match the regular expression.
216  *
217  * \param [in] strInput Input string to match.
218  *
219  * \return
220  * Returns true if a match, false otherwise.
221  * For the non-constant version, an error is set if no match.
222  */
223  bool match(const std::string &strInput, const int nFlags = ReFlagDefaults);
224 
225  bool match(const std::string &strInput,
226  const int nFlags = ReFlagDefaults) const;
227  /*!
228  * \}
229  */
230 
231  /*!
232  * \brief Match the input char* against the regular expression.
233  *
234  * The entire input must match the regular expression.
235  *
236  * \param [in] sInput Null-terminated input char* to match.
237  *
238  * \return
239  * Returns true if a match, false otherwise.
240  * For the non-constant version, an error is set if no match.
241  */
242  bool match(const char *sInput, const int nFlags = ReFlagDefaults);
243 
244  bool match(const char *sInput, const int nFlags = ReFlagDefaults) const;
245  /*!
246  * \}
247  */
248 
249  /*!
250  * \brief Find all substrings in the input that match the regular
251  * expression.
252  *
253  * As an example, the following C++ code snippet:
254  * ~~~~~~~~~~{.cxx}
255  * rnr::RegEx re("[A-Z][a-z ]*[ ]+(cat)[ ]+[a-z ]+[ ]+(dog)[a-z ]*\.");
256  *
257  * std::string input("My cat is not a dog. But my cat has dog breath.");
258  *
259  * rnr::RegEx::ReMatchVec matches;
260  *
261  * re.match(input, matches, 4);
262  *
263  * for(size_t i = 0; i < matches.size(); ++i)
264  * {
265  * std::cout << i << ". (" << matches[i].m_uStart << ","
266  * << matches[i].m_uEnd << ") '"
267  * << matches[i].m_strMatch << "'" << std::endl;
268  * }
269  * ~~~~~~~~~~
270  *
271  * Produces the output:
272  * ~~~~~~~~~~
273  * 0. (0,19) 'My cat is not a dog.'
274  * 1. (3,5) 'cat'
275  * 2. (16,18) 'dog'
276  * 3. (21,46) 'But my cat has dog breath.'
277  * 4. (28,30) 'cat'
278  * 5. (36,38) 'dog'
279  * ~~~~~~~~~~
280  *
281  * Notes:
282  * - The match start and end values are indices into the original input
283  * string, marking the location of the matched string.
284  * - In the call match(), the maximum number of specified submatches is 4,
285  * but the number of matches found is 6. This is because 2 entire
286  * matches occurred, listed on output 0 and 3. Each occurance matched
287  * 3 (sub)strings, which is less than the 4 maximum.
288  *
289  * \param [in] strInput Input string.
290  * \param [in,out] Vector of matches.
291  * \param uMaxSubMatches Maximum number of submatches per match.
292  * \param nFlags Bitwise-or of matching behavior flags.
293  *
294  * \return Returns number of matches.
295  * For the non-constant version, an error is set if no match.
296  */
297  size_t match(const std::string &strInput,
298  ReMatchVec &matches,
299  const size_t uMaxSubMatches = ReMaxSubMatchesDft,
300  const int nFlags = ReFlagDefaults);
301 
302  size_t match(const std::string &strInput,
303  ReMatchVec &matches,
304  const size_t uMaxSubMatches = ReMaxSubMatchesDft,
305  const int nFlags = ReFlagDefaults) const;
306  /*!
307  * \}
308  */
309 
310  /*!
311  * \brief Find all substrings in input that match the regular expression.
312  *
313  * \sa See above for description.
314  *
315  * \param [in] sInput Null-terminated input char*.
316  * \param [in,out] Vector of matches.
317  * \param uMaxSubMatches Maximum number of submatches per match.
318  * \param nFlags Bitwise-or of matching behavior flags.
319  *
320  * \return Returns number of matches.
321  * For the non-constant version, an error is set if no match.
322  */
323  size_t match(const char *sInput,
324  ReMatchVec &matches,
325  const size_t uMaxSubMatches = ReMaxSubMatchesDft,
326  const int nFlags = ReFlagDefaults);
327 
328  size_t match(const char *sInput,
329  ReMatchVec &matches,
330  const size_t uMaxSubMatches = ReMaxSubMatchesDft,
331  const int nFlags = ReFlagDefaults) const;
332  /*!
333  * \}
334  */
335 
336  /*!
337  * \brief Get the pre-compiled regular expression.
338  *
339  * \return If a regular expressing exist, then the expression string is
340  * returned. Else NULL is returned.
341  */
342  const std::string &getRegEx() const
343  {
344  return m_strRegEx;
345  }
346 
347  /*!
348  * \brief Test if in a valid state (i.e. compiled).
349  *
350  * If valid, match operation may be applied.
351  *
352  * \return Returns true or false;
353  */
354  bool isValid() const
355  {
356  return m_bIsValid;
357  }
358 
359  /*!
360  * \brief Get compile behavior flags.
361  *
362  * \return Bitwise-or of compile behavior flags.
363  */
364  int getFlags() const
365  {
366  return m_nFlags;
367  }
368 
369  /*!
370  * \brief Set new compile behavior flags.
371  *
372  * \note May result in a new regular expression re-compile.
373  *
374  * \param nFlags Bitwise-or of compile behavior flags.
375  */
376  void setFlags(int nFlags);
377 
378  /*!
379  * \brief Get the extened return code from the last RegEx operation.
380  *
381  * Return Code | Description
382  * ------------------ | -----------
383  * ReOk (REG_NOERROR) | No error
384  * REG_NOMATCH | Input does no match regular expression
385  * REG_BAD*, REG_E* | The regex C interface standard compile errors
386  * ReENoExpr | No regular expression extended error
387  * ReENotComp | Not compiled extended error
388  *
389  * \return nCode Extended return code.
390  */
391  int getReturnCode() const
392  {
393  return m_nReCode;
394  }
395 
396  /*!
397  * \brief Get the last RegExs operation error string.
398  *
399  * \return Error string.
400  */
401  const std::string &getErrorStr() const
402  {
403  return m_strReError;
404  }
405 
406  //
407  // Friends
408  //
409  friend std::ostream &operator<<(std::ostream &os, const RegEx &re);
410  friend std::istream &operator>>(std::istream &is, RegEx &re);
411 
412  protected:
413  std::string m_strRegEx; ///< pre-compiled regular expression string
414  int m_nFlags; ///< compile and matching flags
415  bool m_bIsValid; ///< expression [not] successfully compiled
416  regex_t m_regex; ///< compiled reqular expression
417  int m_nReCode; ///< compiled regular expression return code
418  std::string m_strReError; ///< compiled regualar expresson error string
419 
420  /*!
421  * \brief Compile the regular expression string.
422  *
423  * \return Returns true success, false otherwise. On failure, check the
424  * reason with getReturnCode() and/or getErrorStr().
425  */
426  bool compile();
427 
428  /*!
429  * \brief Groom compile behavior flags, disabling any unsupported flags.
430  *
431  * \param nFlags Bitwise-or of compile behavior flags.
432  */
433  void groomFlags(const int nFlags);
434 
435  /*!
436  * \brief Set the error code and associated error string.
437  *
438  * \param nCode Extended return code.
439  */
440  void setError(const int nCode);
441 
442  /*!
443  * \brief Free compiled regular expression memory.
444  */
445  void freeReMem();
446  };
447 
448 
449  // . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
450  // Stream Manipulators
451  // . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
452 
453  /*!
454  * \brief Insert object into output stream.
455  *
456  * Output: re"REGEX" string where REGEX is the regular expression pattern.
457  *
458  * \param os Output stream.
459  * \param re Object to insert.
460  *
461  * \return Reference to output stream.
462  */
463  extern std::ostream &operator<<(std::ostream &os, const RegEx &re);
464 
465  /*!
466  * \brief Extract from input stream to object.
467  *
468  * Input: re"REGEX" string where REGEX is the regular expression pattern.
469  *
470  * \param is Input stream.
471  * \param re Object to extract into.
472  *
473  * \return Reference to input stream.
474  */
475  extern std::istream &operator>>(std::istream &is, RegEx &re);
476 
477 } // namespace rnr
478 
479 #endif // _RNR_REGEX_H
static const int ReFlagICase
ignore case when matching
Definition: RegEx.h:107
std::vector< ReMatch > ReMatchVec
vector of matches
Definition: RegEx.h:132
static const int ReOk
Special return and error codes.
Definition: RegEx.h:93
size_t m_uEnd
ending index in input where match is found
Definition: RegEx.h:125
static const int ReENotComp
not compiled
Definition: RegEx.h:95
void setError(const int nCode)
Set the error code and associated error string.
Definition: RegEx.cxx:415
int getReturnCode() const
Get the extened return code from the last RegEx operation.
Definition: RegEx.h:391
const std::string & getRegEx() const
Get the pre-compiled regular expression.
Definition: RegEx.h:342
Regular Express Class.
Definition: RegEx.h:84
int m_nFlags
compile and matching flags
Definition: RegEx.h:414
void freeReMem()
Free compiled regular expression memory.
Definition: RegEx.cxx:442
std::string m_strReError
compiled regualar expresson error string
Definition: RegEx.h:418
static const size_t ReMaxSubMatchesDft
Definition: RegEx.h:130
regex_t m_regex
compiled reqular expression
Definition: RegEx.h:416
static const int ReFlagNotBoL
input not begin of line
Definition: RegEx.h:111
size_t m_uStart
starting index in input where match is found
Definition: RegEx.h:124
friend std::istream & operator>>(std::istream &is, RegEx &re)
Extract from input stream to object.
const std::string & getErrorStr() const
Get the last RegExs operation error string.
Definition: RegEx.h:401
std::string m_strMatch
matching (sub)string
Definition: RegEx.h:126
static const int ReENoExpr
no pre-compiled express
Definition: RegEx.h:94
RegEx & operator=(const RegEx &rhs)
Assignment copy operator.
Definition: RegEx.cxx:131
void groomFlags(const int nFlags)
Groom compile behavior flags, disabling any unsupported flags.
Definition: RegEx.cxx:409
Regular expression match structure.
Definition: RegEx.h:122
std::string m_strRegEx
pre-compiled regular expression string
Definition: RegEx.h:413
friend std::ostream & operator<<(std::ostream &os, const RegEx &re)
Insert object into output stream.
static const int ReFlagNewLine
force bol/eol matching
Definition: RegEx.h:108
bool match(const std::string &strInput, const int nFlags=ReFlagDefaults)
Match the input string against the regular expression.
bool m_bIsValid
expression [not] successfully compiled
Definition: RegEx.h:415
int getFlags() const
Get compile behavior flags.
Definition: RegEx.h:364
int m_nReCode
compiled regular expression return code
Definition: RegEx.h:417
virtual ~RegEx()
Default destructor.
Definition: RegEx.cxx:126
RegEx(int nFlags=ReFlagDefaults)
Default constructor.
Definition: RegEx.cxx:81
static const int ReFlagNotEoL
input not end of line
Definition: RegEx.h:112
RoadNarrows Robotics.
Definition: Camera.h:74
bool isValid() const
Test if in a valid state (i.e. compiled).
Definition: RegEx.h:354
static const int ReFlagDefaults
Regular expression compile and matching behavior flags.
Definition: RegEx.h:104
static const char * ReInvalid
Concerning, in reality, in regards to invalid regular expressions.
Definition: RegEx.h:117
bool compile()
Compile the regular expression string.
Definition: RegEx.cxx:373
void setFlags(int nFlags)
Set new compile behavior flags.
Definition: RegEx.cxx:397