2
0

RegExp.h 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. /**
  2. * Copyright 2008-2009 Cheng Shi. All rights reserved.
  3. * Email: shicheng107@hotmail.com
  4. */
  5. #ifndef REGEXP_H
  6. #define REGEXP_H
  7. #include <iostream>
  8. #include <string>
  9. #include <vector>
  10. using namespace std;
  11. #pragma warning(push)
  12. #pragma warning(disable: 6385 6011 4127)
  13. #include "..\..\ThirdParty\ATLRegExp\atlrx.h"
  14. #pragma warning(pop)
  15. /*
  16. * Parameters
  17. * [in] regExp: Value of type string which is the input regular expression.
  18. * [in] caseSensitive: Value of type bool which indicate whether the parse is case sensitive.
  19. * [in] groupCount: Value of type int which is the group count of the regular expression.
  20. * [in] source: Value of type string reference which is the source to parse.
  21. * [out] result: Value of type vecotr of strings which is the output of the parse.
  22. * [in] allowDuplicate: Value of type bool which indicates whether duplicate items are added to the output result.
  23. *
  24. * Return Value
  25. * Returns true if the function succeeds, or false otherwise.
  26. *
  27. * Remarks
  28. * The output result is devided into groups. User should get the groups according to the group count. For example:
  29. * 1. RegExp = L"{ab}", source = L"abcabe", then result = L"ab", L"ab".
  30. * 2. RegExp = L"{ab}{cd}", source = L"abcdeabecd", then result = L"ab", L"cd", L"ab", L"cd".
  31. */
  32. inline bool ParseRegExp(const wstring &regExp, bool caseSensitive, int groupCount, const wstring &source, vector<wstring> &result, bool allowDuplicate = false)
  33. {
  34. result.clear();
  35. if (regExp.size() <= 0)
  36. {
  37. return false;
  38. }
  39. if (groupCount <= 0)
  40. {
  41. return false;
  42. }
  43. if (source.size() <= 0)
  44. {
  45. return false;
  46. }
  47. CAtlRegExp<> re;
  48. REParseError error = re.Parse(regExp.c_str(), caseSensitive);
  49. if (error != REPARSE_ERROR_OK)
  50. {
  51. return false;
  52. }
  53. wchar_t *pSource = new wchar_t[source.size()+1];
  54. wchar_t *pSourceEnd = pSource + source.size();
  55. if (pSource == NULL)
  56. {
  57. return false;
  58. }
  59. wcscpy_s(pSource, source.size()+1, source.c_str());
  60. BOOL bSucceed = TRUE;
  61. CAtlREMatchContext<> mc;
  62. const wchar_t *pFrom = pSource;
  63. const wchar_t *pTo = NULL;
  64. while (bSucceed)
  65. {
  66. bSucceed = re.Match(pFrom, &mc, &pTo);
  67. if (bSucceed)
  68. {
  69. const wchar_t *pStart = NULL;
  70. const wchar_t *pEnd = NULL;
  71. vector<wstring> tempMatch;
  72. for (int i = 0; i < groupCount; i++)
  73. {
  74. mc.GetMatch(i, &pStart, &pEnd);
  75. if (pStart != NULL && pEnd != NULL)
  76. {
  77. wstring match(pStart, pEnd-pStart);
  78. tempMatch.push_back(match);
  79. }
  80. else
  81. {
  82. break;
  83. }
  84. }
  85. bool bAdd = true;
  86. if (!allowDuplicate)
  87. {
  88. // Check whether this match already exists in the vector.
  89. for (vector<wstring>::iterator it = result.begin(); it != result.end();)
  90. {
  91. bool bEqual = true;
  92. for (vector<wstring>::iterator tempMatchIt = tempMatch.begin(); tempMatchIt != tempMatch.end(); tempMatchIt++, it++)
  93. {
  94. bool bGroupEqual = true;
  95. if (caseSensitive)
  96. {
  97. bGroupEqual = (wcscmp(it->c_str(), tempMatchIt->c_str()) == 0);
  98. }
  99. else
  100. {
  101. bGroupEqual = (_wcsicmp(it->c_str(), tempMatchIt->c_str()) == 0);
  102. }
  103. if (!bGroupEqual)
  104. {
  105. bEqual = false;
  106. }
  107. }
  108. if (bEqual)
  109. {
  110. bAdd = false;
  111. break;
  112. }
  113. }
  114. }
  115. if (bAdd)
  116. {
  117. for (vector<wstring>::iterator tempMatchIt = tempMatch.begin(); tempMatchIt != tempMatch.end(); tempMatchIt++)
  118. {
  119. result.push_back(*tempMatchIt);
  120. }
  121. }
  122. if (pTo < pSourceEnd)
  123. {
  124. pFrom = pTo;
  125. }
  126. else
  127. {
  128. break;
  129. }
  130. }
  131. else
  132. {
  133. break;
  134. }
  135. }
  136. delete[] pSource;
  137. return true;
  138. }
  139. #endif // REGEXP_H