StringProcess.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. /**
  2. * Copyright 2008-2009 Cheng Shi. All rights reserved.
  3. * Email: shicheng107@hotmail.com
  4. */
  5. #ifndef STRINGPROCESS_H
  6. #define STRINGPROCESS_H
  7. #include "RegExp.h"
  8. #include <Windows.h>
  9. #include <iostream>
  10. #include <string>
  11. #include <comutil.h>
  12. #pragma warning(push)
  13. #pragma warning(disable: 4127)
  14. #include <atlcomtime.h>
  15. #pragma warning(pop)
  16. using namespace std;
  17. inline wstring Trim(const wstring &source, const wstring &targets)
  18. {
  19. wstring::size_type start = 0;
  20. wstring::size_type end = 0;
  21. for (start = 0; start < source.size(); start++)
  22. {
  23. bool bIsTarget = false;
  24. for (wstring::size_type i = 0; i < targets.size(); i++)
  25. {
  26. if (source[start] == targets[i])
  27. {
  28. bIsTarget = true;
  29. break;
  30. }
  31. }
  32. if (!bIsTarget)
  33. {
  34. break;
  35. }
  36. }
  37. for (end = source.size() - 1; (int)end >= 0; end--)
  38. {
  39. bool bIsTarget = false;
  40. for (wstring::size_type i = 0; i < targets.size(); i++)
  41. {
  42. if (source[end] == targets[i])
  43. {
  44. bIsTarget = true;
  45. break;
  46. }
  47. }
  48. if (!bIsTarget)
  49. {
  50. break;
  51. }
  52. }
  53. wstring result = L"";
  54. if (end >= start && start < source.size() && end >= 0)
  55. {
  56. result = source.substr(start, end-start+1);
  57. }
  58. return result;
  59. }
  60. inline bool PrepareString(wchar_t *dest, size_t *size, const wstring &src)
  61. {
  62. if (dest == NULL)
  63. {
  64. if (size != NULL)
  65. {
  66. *size = src.size();
  67. }
  68. return false;
  69. }
  70. else
  71. {
  72. if (size != NULL)
  73. {
  74. wcsncpy_s(dest, *size, src.c_str(), _TRUNCATE);
  75. if (*size <= src.size())
  76. {
  77. ::SetLastError(ERROR_INSUFFICIENT_BUFFER);
  78. return false;
  79. }
  80. }
  81. }
  82. return true;
  83. }
  84. inline wstring ReplaceString(const wstring &srcStr, const wstring &oldStr, const wstring &newStr)
  85. {
  86. if (srcStr.size() <= 0 || oldStr.size() <= 0)
  87. {
  88. return srcStr;
  89. }
  90. wstring strReturn = srcStr;
  91. wstring::size_type offset = 0;
  92. wstring::size_type start = strReturn.find(oldStr);
  93. while (start != wstring::npos)
  94. {
  95. offset = start + newStr.size();
  96. strReturn.replace(start, oldStr.size(), newStr);
  97. start = strReturn.find(oldStr, offset);
  98. }
  99. return strReturn;
  100. }
  101. inline int StringToInteger(const wstring &number)
  102. {
  103. if (number.size() <= 0)
  104. {
  105. return 0;
  106. }
  107. wstring num = ReplaceString(number, L",", L"");
  108. num = ReplaceString(num, L" ", L"");
  109. return _wtoi(num.c_str());
  110. }
  111. inline wstring LowerString(const wstring &text)
  112. {
  113. if (text.size() <= 0)
  114. {
  115. return L"";
  116. }
  117. unsigned int iLength = text.size() + 1;
  118. wchar_t *pTemp = new wchar_t[iLength];
  119. if (pTemp == NULL)
  120. {
  121. return L"";
  122. }
  123. wcscpy_s(pTemp, iLength, text.c_str());
  124. _wcslwr_s(pTemp, iLength);
  125. wstring retStr = pTemp;
  126. delete[] pTemp;
  127. return retStr;
  128. }
  129. inline wstring UpperString(const wstring &text)
  130. {
  131. if (text.size() <= 0)
  132. {
  133. return L"";
  134. }
  135. unsigned int iLength = text.size() + 1;
  136. wchar_t *pTemp = new wchar_t[iLength];
  137. if (pTemp == NULL)
  138. {
  139. return L"";
  140. }
  141. wcscpy_s(pTemp, iLength, text.c_str());
  142. _wcsupr_s(pTemp, iLength);
  143. wstring retStr = pTemp;
  144. delete[] pTemp;
  145. return retStr;
  146. }
  147. inline wstring GetAnchorText(const wstring &anchor)
  148. {
  149. wstring regExp = L"<a.*?>[ \t\r\n]*{.*?}[ \t\r\n]*</a>";
  150. vector<wstring> result;
  151. if (ParseRegExp(regExp, false, 1, anchor, result) && result.size() == 1)
  152. {
  153. wstring text = result[0];
  154. return text;
  155. }
  156. return L"";
  157. }
  158. inline wstring GetAnchorLink(const wstring &anchor)
  159. {
  160. wstring regExp = L"<a.*?href=\"|\'{.*?}\"|\'.*?>.*?</a>";
  161. vector<wstring> result;
  162. if (ParseRegExp(regExp, false, 1, anchor, result) && result.size() == 1)
  163. {
  164. wstring link = result[0];
  165. return link;
  166. }
  167. return L"";
  168. }
  169. inline bool SeparateString(const wstring &content, const wstring &delimiter, vector<wstring> &result)
  170. {
  171. if (content.size() <= 0 || delimiter.size() <= 0)
  172. {
  173. return false;
  174. }
  175. result.clear();
  176. wstring::size_type start = 0;
  177. wstring::size_type index = 0;
  178. index = content.find(delimiter, start);
  179. while (index != wstring::npos)
  180. {
  181. wstring::size_type size = index - start;
  182. if (size > 0)
  183. {
  184. wstring temp = content.substr(start, size);
  185. if (temp.size() > 0)
  186. {
  187. result.push_back(temp);
  188. }
  189. }
  190. start += size + delimiter.size();
  191. index = content.find(delimiter, start);
  192. }
  193. if (content.find(delimiter) != wstring::npos)
  194. {
  195. wstring last = content.substr(start);
  196. if (last.size() > 0)
  197. {
  198. result.push_back(last);
  199. }
  200. }
  201. else
  202. {
  203. false;
  204. }
  205. return true;
  206. }
  207. inline wstring URLEncoding(const wstring &keyword, bool convertToUTF8 = true)
  208. {
  209. int iLength = 0;
  210. char *szKeyword = NULL;
  211. if (convertToUTF8)
  212. {
  213. iLength = ::WideCharToMultiByte(CP_UTF8,
  214. 0,
  215. keyword.c_str(),
  216. keyword.length(),
  217. NULL,
  218. 0,
  219. NULL,
  220. NULL);
  221. if (iLength <= 0)
  222. {
  223. return L"";
  224. }
  225. szKeyword = new char[iLength];
  226. if (szKeyword == NULL)
  227. {
  228. return L"";
  229. }
  230. iLength = ::WideCharToMultiByte(CP_UTF8,
  231. 0,
  232. keyword.c_str(),
  233. keyword.length(),
  234. szKeyword,
  235. iLength,
  236. NULL,
  237. NULL);
  238. }
  239. else
  240. {
  241. string strKeyword = (char *)(_bstr_t)keyword.c_str();
  242. iLength = (int)strKeyword.length();
  243. szKeyword = new char[strKeyword.length() + 1];
  244. strcpy_s(szKeyword, strKeyword.length() + 1, strKeyword.c_str());
  245. }
  246. wstring encodedKeyword = L"";
  247. string strEncodedKeyword = "";
  248. for (int i = 0; i < iLength; i++)
  249. {
  250. unsigned char c = (unsigned char)szKeyword[i];
  251. char temp[MAX_PATH] = "";
  252. sprintf_s(temp, MAX_PATH, "%%%2X", c);
  253. if (temp[1] == ' ')
  254. {
  255. temp[1] = '0';
  256. }
  257. strEncodedKeyword += temp;
  258. }
  259. if (szKeyword != NULL)
  260. {
  261. delete[] szKeyword;
  262. }
  263. encodedKeyword = (wchar_t *)(_bstr_t)strEncodedKeyword.c_str();
  264. encodedKeyword = ReplaceString(encodedKeyword, L" ", L"+");
  265. return encodedKeyword;
  266. }
  267. inline unsigned int GetSeparateKeywordMatchGrade(const wstring &source, const wstring &keyword)
  268. {
  269. if (source.length() <= 0 || keyword.length() <= 0)
  270. {
  271. return 0;
  272. }
  273. wstring lowerSource = LowerString(source);
  274. wstring lowerKeyword = LowerString(keyword);
  275. unsigned int grade = 0;
  276. if (lowerKeyword.length() <= 3)
  277. {
  278. if (lowerSource.find(lowerKeyword) != wstring::npos)
  279. {
  280. grade = 100;
  281. }
  282. else
  283. {
  284. grade = 0;
  285. }
  286. }
  287. else
  288. {
  289. unsigned int matchLength = 0;
  290. unsigned int index = 0;
  291. while (index < lowerKeyword.length())
  292. {
  293. unsigned int compareLength = lowerKeyword.length() - index;
  294. while (compareLength > 0 && index < lowerKeyword.length())
  295. {
  296. wstring subKeyword = lowerKeyword.substr(index, compareLength);
  297. if (lowerSource.find(subKeyword) != wstring::npos)
  298. {
  299. matchLength += compareLength;
  300. index += compareLength;
  301. }
  302. else
  303. {
  304. compareLength--;
  305. }
  306. }
  307. index++;
  308. }
  309. grade = matchLength * 100 / lowerKeyword.length();
  310. }
  311. return grade;
  312. }
  313. inline unsigned int GetKeywordMatchGrade(const wstring &source, const wstring & keyword)
  314. {
  315. if (source.length() <= 0 || keyword.length() <= 0)
  316. {
  317. return 0;
  318. }
  319. unsigned int grade = 0;
  320. wstring src = source;
  321. while (src.find(L"\t") != wstring::npos)
  322. {
  323. src = ReplaceString(src, L"\t", L" ");
  324. }
  325. while (src.find(L" ") != wstring::npos)
  326. {
  327. src = ReplaceString(src, L" ", L" ");
  328. }
  329. vector<wstring> results;
  330. if (SeparateString(keyword, L" ", results) && results.size() > 0)
  331. {
  332. unsigned int keywordTotalLength = 0;
  333. for (vector<wstring>::size_type index = 0; index < results.size(); index++)
  334. {
  335. keywordTotalLength += results[index].length();
  336. }
  337. for (vector<wstring>::size_type index = 0; index < results.size(); index++)
  338. {
  339. grade += GetSeparateKeywordMatchGrade(src, results[index]) * results[index].length() / keywordTotalLength;
  340. }
  341. }
  342. else
  343. {
  344. grade = GetSeparateKeywordMatchGrade(src, keyword);
  345. }
  346. return grade;
  347. }
  348. inline wstring GetDateString(const COleDateTime &time, const wstring &separator = L"-", bool align = true)
  349. {
  350. wstring date = L"";
  351. wchar_t szTemp[MAX_PATH] = L"";
  352. swprintf_s(szTemp, MAX_PATH, L"%d", time.GetYear());
  353. date += szTemp;
  354. date += separator;
  355. memset(szTemp, 0, sizeof(wchar_t) * MAX_PATH);
  356. swprintf_s(szTemp, MAX_PATH, L"%d", time.GetMonth());
  357. if (time.GetMonth() < 10 && align)
  358. {
  359. date += L"0";
  360. }
  361. date += szTemp;
  362. date += separator;
  363. memset(szTemp, 0, sizeof(wchar_t) * MAX_PATH);
  364. swprintf_s(szTemp, MAX_PATH, L"%d", time.GetDay());
  365. if (time.GetDay() < 10 && align)
  366. {
  367. date += L"0";
  368. }
  369. date += szTemp;
  370. return date;
  371. }
  372. inline wstring GetDateString(int dayOffset, const wstring &separator = L"-", bool align = true)
  373. {
  374. COleDateTime time = COleDateTime::GetCurrentTime();
  375. time += COleDateTimeSpan(dayOffset, 0, 0, 0);
  376. return GetDateString(time, separator, align);
  377. }
  378. inline wstring GetTimeString(const COleDateTime &time, const wstring &separator = L":", bool align = true)
  379. {
  380. wstring date = L"";
  381. wchar_t szTemp[MAX_PATH] = L"";
  382. swprintf_s(szTemp, MAX_PATH, L"%d", time.GetHour());
  383. date += szTemp;
  384. date += separator;
  385. memset(szTemp, 0, sizeof(wchar_t) * MAX_PATH);
  386. swprintf_s(szTemp, MAX_PATH, L"%d", time.GetMinute());
  387. if (time.GetMinute() < 10 && align)
  388. {
  389. date += L"0";
  390. }
  391. date += szTemp;
  392. date += separator;
  393. memset(szTemp, 0, sizeof(wchar_t) * MAX_PATH);
  394. swprintf_s(szTemp, MAX_PATH, L"%d", time.GetSecond());
  395. if (time.GetSecond() < 10 && align)
  396. {
  397. date += L"0";
  398. }
  399. date += szTemp;
  400. return date;
  401. }
  402. inline wstring MD5(const wstring &text)
  403. {
  404. if (text.size() <= 0)
  405. {
  406. return L"";
  407. }
  408. string asciiText = (char *)(_bstr_t)text.c_str();
  409. wstring encrypted = L"";
  410. HCRYPTPROV hCryptProv = NULL;
  411. if (::CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT | CRYPT_MACHINE_KEYSET))
  412. {
  413. HCRYPTHASH hHash = NULL;
  414. if (::CryptCreateHash(hCryptProv, CALG_MD5, 0, 0, &hHash))
  415. {
  416. if (::CryptHashData(hHash, (BYTE *)asciiText.c_str(), asciiText.size(), 0))
  417. {
  418. BYTE result[16];
  419. DWORD dwSize = 16;
  420. wchar_t temp[3] = L"";
  421. if (::CryptGetHashParam(hHash, HP_HASHVAL, result, &dwSize, 0))
  422. {
  423. for (unsigned int i = 0; i < 16; i++)
  424. {
  425. memset(temp, 0, 6);
  426. swprintf(temp, 3, L"%02x", result[i]);
  427. encrypted += temp;
  428. }
  429. }
  430. }
  431. ::CryptDestroyHash(hHash);
  432. ::CryptReleaseContext(hCryptProv, 0);
  433. }
  434. }
  435. return encrypted;
  436. }
  437. inline wstring FilterFileName(const wstring &name)
  438. {
  439. if (name.size() <= 0)
  440. {
  441. return L"";
  442. }
  443. wstring filteredName = name;
  444. filteredName = ReplaceString(filteredName, L"/", L"_");
  445. filteredName = ReplaceString(filteredName, L"\\", L"_");
  446. filteredName = ReplaceString(filteredName, L":", L"_");
  447. filteredName = ReplaceString(filteredName, L"*", L"_");
  448. filteredName = ReplaceString(filteredName, L"?", L"_");
  449. filteredName = ReplaceString(filteredName, L"\"", L"_");
  450. filteredName = ReplaceString(filteredName, L"<", L"_");
  451. filteredName = ReplaceString(filteredName, L">", L"_");
  452. filteredName = ReplaceString(filteredName, L"|", L"_");
  453. return filteredName;
  454. }
  455. inline wstring GetMagic(unsigned int length)
  456. {
  457. srand(::GetTickCount());
  458. if (length <= 0)
  459. {
  460. return L"";
  461. }
  462. wstring margic = L"";
  463. for (unsigned int i = 0; i < length; i++)
  464. {
  465. wchar_t szMargic[50] = L"";
  466. swprintf_s(szMargic, 50, L"%c", rand() % 26 + L'a');
  467. margic += szMargic;
  468. }
  469. return margic;
  470. }
  471. inline wstring GetHost(const wstring &url)
  472. {
  473. if (url.size() <= 0)
  474. {
  475. return L"";
  476. }
  477. wstring urlWidthoutHttp = ReplaceString(LowerString(url), L"http://", L"");
  478. unsigned int index = urlWidthoutHttp.find(L"/");
  479. if (index == wstring::npos)
  480. {
  481. index = urlWidthoutHttp.find(L"\\");
  482. }
  483. if (index == wstring ::npos)
  484. {
  485. return urlWidthoutHttp;
  486. }
  487. return urlWidthoutHttp.substr(0, index);
  488. }
  489. inline wstring GetValidFileName(const wstring &fileName)
  490. {
  491. if (fileName.size() == 0)
  492. {
  493. return L"";
  494. }
  495. wstring tempFileName = fileName;
  496. tempFileName = ReplaceString(tempFileName, L"\\", L"_");
  497. tempFileName = ReplaceString(tempFileName, L"/", L"_");
  498. tempFileName = ReplaceString(tempFileName, L":", L"_");
  499. tempFileName = ReplaceString(tempFileName, L"*", L"_");
  500. tempFileName = ReplaceString(tempFileName, L"?", L"_");
  501. tempFileName = ReplaceString(tempFileName, L"\"", L"_");
  502. tempFileName = ReplaceString(tempFileName, L"<", L"_");
  503. tempFileName = ReplaceString(tempFileName, L">", L"_");
  504. tempFileName = ReplaceString(tempFileName, L"|", L"_");
  505. tempFileName = ReplaceString(tempFileName, L"\r", L"_");
  506. tempFileName = ReplaceString(tempFileName, L"\n", L"_");
  507. tempFileName = ReplaceString(tempFileName, L"%", L"_");
  508. return tempFileName;
  509. }
  510. #endif // STRINGPROCESS_H