appkit  1.5.1
RoadNarrows Robotics Application Kit
RegEx.cxx
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // Package: RoadNarrows Robotics Application Tool Kit
4 //
5 // Link: https://github.com/roadnarrows-robotics/rnr-sdk
6 //
7 // Library: librnr_appkit
8 //
9 // File: RegEx.cxx
10 //
11 /*! \file
12  *
13  * \brief The Regular Expression class implementation.
14  *
15  * RegEx provides a wrapper around the regex C library calls.
16  *
17  * \note Generalized from dynashell_regex.cxx source found in RoadNarrows
18  * Robotics Dynamixel SDK package.
19  *
20  * \author Robin Knight (robin.knight@roadnarrows.com)
21  *
22  * \par Copyright
23  * \h_copy 2017-2017. RoadNarrows LLC.\n
24  * http://www.roadnarrows.com\n
25  * All Rights Reserved
26  */
27 /*
28  * @EulaBegin@
29  *
30  * Unless otherwise stated explicitly, all materials contained are copyrighted
31  * and may not be used without RoadNarrows LLC's written consent,
32  * except as provided in these terms and conditions or in the copyright
33  * notice (documents and software) or other proprietary notice provided with
34  * the relevant materials.
35  *
36  * IN NO EVENT SHALL THE AUTHOR, ROADNARROWS LLC, OR ANY
37  * MEMBERS/EMPLOYEES/CONTRACTORS OF ROADNARROWS OR DISTRIBUTORS OF THIS SOFTWARE
38  * BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR
39  * CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
40  * DOCUMENTATION, EVEN IF THE AUTHORS OR ANY OF THE ABOVE PARTIES HAVE BEEN
41  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42  *
43  * THE AUTHORS AND ROADNARROWS LLC SPECIFICALLY DISCLAIM ANY WARRANTIES,
44  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
45  * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN
46  * "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE NO OBLIGATION TO
47  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
48  *
49  * @EulaEnd@
50  */
51 ////////////////////////////////////////////////////////////////////////////////
52 
53 
54 #include <sys/types.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <unistd.h>
58 #include <regex.h>
59 
60 #include <iostream>
61 #include <iomanip>
62 #include <cstdio>
63 #include <string>
64 #include <vector>
65 
66 #include <rnr/rnrconfig.h>
67 #include <rnr/log.h>
68 
69 #include "rnr/appkit/RegEx.h"
70 
71 using namespace std;
72 using namespace rnr;
73 
74 
75 // ----------------------------------------------------------------------------
76 // Class RegEx
77 // ----------------------------------------------------------------------------
78 
79 const char *RegEx::ReInvalid = "inre";
80 
81 RegEx::RegEx(int nFlags)
82 {
83  m_bIsValid = false;
84 
85  groomFlags(nFlags);
86  setError(ReENoExpr);
87 }
88 
89 RegEx::RegEx(const string &strRegEx, int nFlags)
90 {
91  m_strRegEx = strRegEx;
92  m_bIsValid = false;
93 
94  groomFlags(nFlags);
95 
96  compile();
97 }
98 
99 RegEx::RegEx(const char *sRegEx, int nFlags)
100 {
101  m_bIsValid = false;
102 
103  groomFlags(nFlags);
104 
105  if( (sRegEx != NULL) && (*sRegEx != 0) )
106  {
107  m_strRegEx = sRegEx;
108  compile();
109  }
110  else
111  {
112  setError(ReENoExpr);
113  }
114 }
115 
116 RegEx::RegEx(const RegEx &src)
117 {
118  m_strRegEx = src.m_strRegEx;
119  m_bIsValid = false;
120 
121  groomFlags(src.m_nFlags);
122 
123  compile();
124 }
125 
126 RegEx::~RegEx()
127 {
128  freeReMem();
129 }
130 
131 RegEx &RegEx::operator=(const RegEx &rhs)
132 {
133  freeReMem();
134 
135  m_strRegEx = rhs.m_strRegEx;
136 
137  groomFlags(rhs.m_nFlags);
138 
139  compile();
140 
141  return *this;
142 }
143 
144 RegEx &RegEx::operator=(const string &rhs)
145 {
146  freeReMem();
147 
148  m_strRegEx = rhs;
149 
150  compile();
151 
152  return *this;
153 }
154 
155 RegEx &RegEx::operator=(const char *rhs)
156 {
157  freeReMem();
158 
159  if( (rhs != NULL) && (*rhs != 0) )
160  {
161  m_strRegEx = rhs;
162  compile();
163  }
164  else
165  {
166  m_strRegEx.clear();
167  setError(ReENoExpr);
168  }
169 
170  return *this;
171 }
172 
173 bool RegEx::match(const string &strInput, int nFlags) const
174 {
175  if( !m_bIsValid )
176  {
177  return false;
178  }
179  else if( regexec(&m_regex, strInput.c_str(), 0, NULL, nFlags) == ReOk )
180  {
181  return true;
182  }
183  else
184  {
185  return false;
186  }
187 }
188 
189 bool RegEx::match(const string &strInput, int nFlags)
190 {
191  int rc; // return code
192 
193  if( !m_bIsValid )
194  {
195  setError(ReENotComp);
196  return false;
197  }
198  else if( (rc = regexec(&m_regex, strInput.c_str(), 0, NULL, nFlags) == ReOk) )
199  {
200  setError(ReOk); // clear
201  return true;
202  }
203  else
204  {
205  setError(rc);
206  return false;
207  }
208 }
209 
210 bool RegEx::match(const char *sInput, int nFlags) const
211 {
212  string strInput;
213 
214  if( sInput != NULL )
215  {
216  strInput = sInput;
217  }
218 
219  return match(strInput, nFlags);
220 }
221 
222 bool RegEx::match(const char *sInput, int nFlags)
223 {
224  string strInput;
225 
226  if( sInput != NULL )
227  {
228  strInput = sInput;
229  }
230 
231  return match(strInput, nFlags);
232 }
233 
234 size_t RegEx::match(const string &strInput,
235  ReMatchVec &matches,
236  const size_t uMaxSubMatches,
237  const int nFlags) const
238 {
239  regmatch_t pos[uMaxSubMatches];
240  const char *sIn; // fixed input start pointer
241  const char *s; // working pointer
242  int flags; // working matching behavior flags
243 
244  matches.clear();
245 
246  if( !m_bIsValid )
247  {
248  return matches.size();
249  }
250 
251  sIn = s = strInput.c_str();
252  flags = nFlags;
253 
254  while( regexec(&m_regex, s, uMaxSubMatches, pos, flags) == ReOk )
255  {
256  for(size_t i = 0; i < uMaxSubMatches; ++i)
257  {
258  ReMatch m;
259 
260  if( pos[i].rm_so == -1 )
261  {
262  break;
263  }
264 
265  m.m_uStart = pos[i].rm_so + (s - sIn);
266  m.m_uEnd = pos[i].rm_eo + (s - sIn);
267  m.m_strMatch = strInput.substr(m.m_uStart, m.m_uEnd-m.m_uStart);
268  m.m_uEnd -= 1;
269 
270  //cerr << "DBG: " << i << ". "
271  // << "(" << m.m_uStart << "," << m.m_uEnd << ") "
272  // << "'" << m.m_strMatch << "'" << endl;
273 
274  matches.push_back(m);
275  }
276 
277  flags |= REG_NOTBOL;
278 
279  s += pos[0].rm_eo;
280  }
281 
282  return matches.size();
283 }
284 
285 size_t RegEx::match(const string &strInput,
286  ReMatchVec &matches,
287  const size_t uMaxSubMatches,
288  const int nFlags)
289 {
290  regmatch_t pos[uMaxSubMatches];
291  const char *sIn; // fixed input start pointer
292  const char *s; // working pointer
293  int flags; // working matching behavior flags
294  int rc; // return code
295 
296  matches.clear();
297 
298  if( !m_bIsValid )
299  {
300  setError(ReENotComp);
301  return matches.size();
302  }
303 
304  sIn = s = strInput.c_str();
305  flags = nFlags;
306 
307  while( (rc = regexec(&m_regex, s, uMaxSubMatches, pos, flags)) == ReOk )
308  {
309  for(size_t i = 0; i < uMaxSubMatches; ++i)
310  {
311  ReMatch m;
312 
313  if( pos[i].rm_so == -1 )
314  {
315  break;
316  }
317 
318  m.m_uStart = pos[i].rm_so + (s - sIn);
319  m.m_uEnd = pos[i].rm_eo + (s - sIn);
320  m.m_strMatch = strInput.substr(m.m_uStart, m.m_uEnd-m.m_uStart);
321  m.m_uEnd -= 1;
322 
323  //cerr << "DBG: " << i << ". "
324  // << "(" << m.m_uStart << "," << m.m_uEnd << ") "
325  // << "'" << m.m_strMatch << "'" << endl;
326 
327  matches.push_back(m);
328  }
329 
330  flags |= REG_NOTBOL;
331 
332  s += pos[0].rm_eo;
333  }
334 
335  if( matches.size() == 0 )
336  {
337  setError(rc);
338  }
339 
340  return matches.size();
341 }
342 
343 size_t RegEx::match(const char *sInput,
344  ReMatchVec &matches,
345  const size_t uMaxSubMatches,
346  const int nFlags) const
347 {
348  string strInput;
349 
350  if( sInput != NULL )
351  {
352  strInput = sInput;
353  }
354 
355  return match(strInput, matches, uMaxSubMatches, nFlags);
356 }
357 
358 size_t RegEx::match(const char *sInput,
359  ReMatchVec &matches,
360  const size_t uMaxSubMatches,
361  const int nFlags)
362 {
363  string strInput;
364 
365  if( sInput != NULL )
366  {
367  strInput = sInput;
368  }
369 
370  return match(strInput, matches, uMaxSubMatches, nFlags);
371 }
372 
373 bool RegEx::compile()
374 {
375  int rc; // return code
376 
377  freeReMem();
378 
379  if( m_strRegEx.length() == 0 )
380  {
381  setError(ReENoExpr);
382  }
383  else if( (rc = regcomp(&m_regex, m_strRegEx.c_str(), m_nFlags)) == ReOk )
384  {
385  m_bIsValid = true;
386  setError(ReOk); // not an error
387  }
388  else
389  {
390  setError(rc);
391  LOGERROR("'%s': %s", m_strRegEx.c_str(), m_strReError.c_str());
392  }
393 
394  return m_bIsValid;
395 }
396 
397 void RegEx::setFlags(const int nFlags)
398 {
399  int oldFlags = m_nFlags;
400 
401  groomFlags(nFlags);
402 
403  if( (m_nFlags != oldFlags) && !m_strRegEx.empty() )
404  {
405  compile();
406  }
407 }
408 
409 void RegEx::groomFlags(const int nFlags)
410 {
411  m_nFlags = nFlags & (ReFlagICase | ReFlagNewLine);
412  m_nFlags |= REG_EXTENDED;
413 }
414 
415 void RegEx::setError(const int nCode)
416 {
417  m_nReCode = nCode;
418 
419  switch( m_nReCode )
420  {
421  case ReOk:
422  m_strReError.clear();
423  break;
424  case ReENoExpr:
425  m_strReError = "No pre-compiled regular expression";
426  break;
427  case ReENotComp:
428  m_strReError = "Not compiled";
429  break;
430  default:
431  {
432  char buf[256];
433 
434  regerror(m_nReCode, &m_regex, buf, sizeof(buf));
435 
436  m_strReError = buf;
437  }
438  break;
439  }
440 }
441 
442 void RegEx::freeReMem()
443 {
444  if( m_bIsValid )
445  {
446  regfree(&m_regex);
447  m_bIsValid = false;
448  }
449 }
450 
451 ostream &rnr::operator<<(ostream &os, const rnr::RegEx &re)
452 {
453  // invalid regular expression
454  if( !re.isValid() )
455  {
456  os << RegEx::ReInvalid;
457  return os;
458  }
459 
460  string strRe = re.getRegEx();
461 
462  os << "re\"";
463 
464  for(size_t i = 0; i < strRe.size(); ++i)
465  {
466  switch( strRe[i] )
467  {
468  case '\x1b':
469  os << "\\e";
470  break;
471  case '\f':
472  os << "\\f";
473  break;
474  case '\n':
475  os << "\\n";
476  break;
477  case '\r':
478  os << "\\r";
479  break;
480  case '\t':
481  os << "\\t";
482  break;
483  case '\v':
484  os << "\\v";
485  break;
486  case '"':
487  os << "\\\"";
488  break;
489  default:
490  os << strRe[i];
491  break;
492  }
493  }
494 
495  os << "\"";
496 
497  return os;
498 }
499 
500 istream &rnr::operator>>(istream &is, rnr::RegEx &re)
501 {
502  // state
503  string strRe;
504  char expected = 'r';
505  bool bHasRe = false;
506  bool bInEsc = false;
507 
508  char c;
509 
510  // skip leading whitespace
511  while( !is.fail() && !is.eof() && isspace(is.peek()) )
512  {
513  is.get(c); // eat
514  }
515 
516  while( !bHasRe && !is.fail() && !is.eof() )
517  {
518  is.get(c);
519 
520  if( is.fail() || is.eof() )
521  {
522  break;
523  }
524 
525  //
526  // Step through expected state to get re prefix.
527  //
528  else if( expected != 0 )
529  {
530  if( c == expected )
531  {
532  switch( c )
533  {
534  case 'r':
535  expected = 'e';
536  break;
537  case 'e':
538  expected = '"';
539  break;
540  case '"':
541  default:
542  expected = 0;
543  break;
544  }
545  strRe.push_back(c);
546  }
547  else
548  {
549  LOGERROR("%s: Got '%c', expected '%c' after '%s'. "
550  "Required format: re\"REGEX\"",
551  LOGFUNCNAME, c, expected, strRe.c_str());
552  cin.setstate(ios::failbit);
553  }
554  }
555 
556  //
557  // In regular expression escape sequence.
558  //
559  else if( bInEsc )
560  {
561  strRe.push_back(c);
562  bInEsc = false;
563  }
564 
565  //
566  // Unescaped regular expression.
567  //
568  else
569  {
570  switch( c )
571  {
572  case '"':
573  bHasRe = true;
574  break;
575  case '\\':
576  strRe.push_back(c);
577  bInEsc = true;
578  break;
579  case '\n':
580  case '\r':
581  LOGERROR("%s: Unexpected end-of-line after '%s'.",
582  LOGFUNCNAME, strRe.c_str());
583  cin.setstate(ios::failbit);
584  break;
585  default:
586  if( isascii(c) && !iscntrl(c) )
587  {
588  strRe.push_back(c);
589  }
590  else
591  {
592  LOGERROR("%s: Unexpected non-ascii character 0x%02x after %s.",
593  LOGFUNCNAME, c, strRe.c_str());
594  cin.setstate(ios::failbit);
595  }
596  break;
597  }
598  }
599  }
600 
601  if( cin.good() )
602  {
603  re = strRe.substr(3);
604  }
605 
606  return is;
607 }
std::vector< ReMatch > ReMatchVec
vector of matches
Definition: RegEx.h:132
size_t m_uEnd
ending index in input where match is found
Definition: RegEx.h:125
const std::string & getRegEx() const
Get the pre-compiled regular expression.
Definition: RegEx.h:342
Regular Express Class.
Definition: RegEx.h:84
int m_nFlags
compile and matching flags
Definition: RegEx.h:414
std::ostream & operator<<(std::ostream &os, const LogBook &log)
Stream insertion operator.
size_t m_uStart
starting index in input where match is found
Definition: RegEx.h:124
std::string m_strMatch
matching (sub)string
Definition: RegEx.h:126
Regular expression match structure.
Definition: RegEx.h:122
std::string m_strRegEx
pre-compiled regular expression string
Definition: RegEx.h:413
The Regular Expression Class interface.
std::istream & operator>>(std::istream &is, RegEx &re)
Extract from input stream to object.
RoadNarrows Robotics.
Definition: Camera.h:74
bool isValid() const
Test if in a valid state (i.e. compiled).
Definition: RegEx.h:354