| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013 |
- // This is a part of the Active Template Library.
- // Copyright (C) Microsoft Corporation
- // All rights reserved.
- //
- // This source code is only intended as a supplement to the
- // Active Template Library Reference and related
- // electronic documentation provided with the library.
- // See these sources for detailed information regarding the
- // Active Template Library product.
- #ifndef __ATLRX_H__
- #define __ATLRX_H__
- #pragma once
- #include <atlbase.h>
- #include <atlcoll.h>
- #include <mbstring.h>
- #ifndef ATL_REGEXP_MIN_STACK
- #define ATL_REGEXP_MIN_STACK 256
- #endif
- /*
- Regular Expression Grammar
- R - top level grammar rule
- RE - regular expression
- AltE - Alternative expression
- E - expression
- SE - simple expression
- R -> RE
- '^'RE (matches begining of string)
- RE -> AltE RE
- AltE
- AltE -> E
- E '|' AltE
- E -> SE (RepeatOp '?'?)?
- SE -> Arg
- Group
- CharClass
- '\'Abbrev (see below)
- '\'EscapedChar (any character including reserved symbols)
- '\'Digit+ (Arg back reference)
- '!' (not)
- '.' (any char)
- '$' (end of input)
- Symbol (any non-reserved character)
- Arg -> '{'RE'}'
- Group -> '('RE')'
- CharClass -> '[' '^'? CharSet ']'
- CharSet -> CharItem+
- CharItem -> Char('-'Char)?
- RepeatOp -> '*'
- '+'
- '?'
- Abbrev -> Abbreviation defined in CAtlRECharTraits
- Abbrev Expansion Meaning
- a ([a-zA-Z0-9]) alpha numeric
- b ([ \\t]) white space (blank)
- c ([a-zA-Z]) alpha
- d ([0-9]) digit
- h ([0-9a-fA-F]) hex digit
- n (\r|(\r?\n)) newline
- q (\"[^\"]*\")|(\'[^\']*\') quoted string
- w ([a-zA-Z]+) simple word
- z ([0-9]+) integer
- */
- #pragma pack(push,_ATL_PACKING)
- namespace ATL {
- //Convertion utility classes used to convert char* to RECHAR.
- //Used by rx debugging printing.
- template <typename RECHARTYPE=char>
- class CAToREChar
- {
- public:
- CAToREChar(const char* psz) throw()
- : m_psz(psz)
- {
- }
- operator const RECHARTYPE*() const throw() { return m_psz; }
- const char* m_psz;
- };
- template<>
- class CAToREChar<wchar_t>
- {
- public:
- CAToREChar(const char* psz) throw()
- : m_a2w(psz)
- {
- }
- operator const wchar_t*() const throw() { return (wchar_t*)m_a2w; }
-
- private:
- CA2W m_a2w;
- };
- class CAtlRECharTraitsA
- {
- public:
- typedef char RECHARTYPE;
- static size_t GetBitFieldForRangeArrayIndex(const RECHARTYPE *sz) throw()
- {
- #ifndef ATL_NO_CHECK_BIT_FIELD
- ATLASSERT(UseBitFieldForRange());
- #endif
- return static_cast<size_t>(static_cast<unsigned char>(*sz));
- }
- static RECHARTYPE *Next(const RECHARTYPE *sz) throw()
- {
- return (RECHARTYPE *) (sz+1);
- }
- static int Strncmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw()
- {
- return strncmp(szLeft, szRight, nCount);
- }
- static int Strnicmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw()
- {
- return _strnicmp(szLeft, szRight, nCount);
- }
- _ATL_INSECURE_DEPRECATE("CAtlRECharTraitsA::Strlwr must be passed a buffer size.")
- static RECHARTYPE *Strlwr(RECHARTYPE *sz) throw()
- {
- #pragma warning (push)
- #pragma warning(disable : 4996)
- return _strlwr(sz);
- #pragma warning (pop)
- }
- static RECHARTYPE *Strlwr(RECHARTYPE *sz, int nSize) throw()
- {
- Checked::strlwr_s(sz, nSize);
- return sz;
- }
- static long Strtol(const RECHARTYPE *sz, RECHARTYPE **szEnd, int nBase) throw()
- {
- return strtol(sz, szEnd, nBase);
- }
- static int Isdigit(RECHARTYPE ch) throw()
- {
- return isdigit(static_cast<unsigned char>(ch));
- }
- static const RECHARTYPE** GetAbbrevs()
- {
- static const RECHARTYPE *s_szAbbrevs[] =
- {
- "a([a-zA-Z0-9])", // alpha numeric
- "b([ \\t])", // white space (blank)
- "c([a-zA-Z])", // alpha
- "d([0-9])", // digit
- "h([0-9a-fA-F])", // hex digit
- "n(\r|(\r?\n))", // newline
- "q(\"[^\"]*\")|(\'[^\']*\')", // quoted string
- "w([a-zA-Z]+)", // simple word
- "z([0-9]+)", // integer
- NULL
- };
- return s_szAbbrevs;
- }
- static BOOL UseBitFieldForRange() throw()
- {
- return TRUE;
- }
- static int ByteLen(const RECHARTYPE *sz) throw()
- {
- return int(strlen(sz));
- }
- };
- class CAtlRECharTraitsW
- {
- public:
- typedef WCHAR RECHARTYPE;
-
- static size_t GetBitFieldForRangeArrayIndex(const RECHARTYPE *sz) throw()
- {
- #ifndef ATL_NO_CHECK_BIT_FIELD
- ATLASSERT(UseBitFieldForRange());
- #endif
- return static_cast<size_t>(*sz);
- }
- static RECHARTYPE *Next(const RECHARTYPE *sz) throw()
- {
- return (RECHARTYPE *) (sz+1);
- }
- static int Strncmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw()
- {
- return wcsncmp(szLeft, szRight, nCount);
- }
- static int Strnicmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw()
- {
- return _wcsnicmp(szLeft, szRight, nCount);
- }
- _ATL_INSECURE_DEPRECATE("CAtlRECharTraitsW::Strlwr must be passed a buffer size.")
- static RECHARTYPE *Strlwr(RECHARTYPE *sz) throw()
- {
- #pragma warning (push)
- #pragma warning(disable : 4996)
- return _wcslwr(sz);
- #pragma warning (pop)
- }
- static RECHARTYPE *Strlwr(RECHARTYPE *sz, int nSize) throw()
- {
- Checked::wcslwr_s(sz, nSize);
- return sz;
- }
- static long Strtol(const RECHARTYPE *sz, RECHARTYPE **szEnd, int nBase) throw()
- {
- return wcstol(sz, szEnd, nBase);
- }
- static int Isdigit(RECHARTYPE ch) throw()
- {
- return iswdigit(ch);
- }
- static const RECHARTYPE** GetAbbrevs()
- {
- static const RECHARTYPE *s_szAbbrevs[] =
- {
- L"a([a-zA-Z0-9])", // alpha numeric
- L"b([ \\t])", // white space (blank)
- L"c([a-zA-Z])", // alpha
- L"d([0-9])", // digit
- L"h([0-9a-fA-F])", // hex digit
- L"n(\r|(\r?\n))", // newline
- L"q(\"[^\"]*\")|(\'[^\']*\')", // quoted string
- L"w([a-zA-Z]+)", // simple word
- L"z([0-9]+)", // integer
- NULL
- };
- return s_szAbbrevs;
- }
- static BOOL UseBitFieldForRange() throw()
- {
- return FALSE;
- }
- static int ByteLen(const RECHARTYPE *sz) throw()
- {
- return int(wcslen(sz)*sizeof(WCHAR));
- }
- };
- class CAtlRECharTraitsMB
- {
- public:
- typedef unsigned char RECHARTYPE;
- static size_t GetBitFieldForRangeArrayIndex(const RECHARTYPE *sz) throw()
- {
- #ifndef ATL_NO_CHECK_BIT_FIELD
- ATLASSERT(UseBitFieldForRange());
- #endif
- return static_cast<size_t>(*sz);
- }
- static RECHARTYPE *Next(const RECHARTYPE *sz) throw()
- {
- return _mbsinc(sz);
- }
- static int Strncmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw()
- {
- return _mbsncmp(szLeft, szRight, nCount);
- }
- static int Strnicmp(const RECHARTYPE *szLeft, const RECHARTYPE *szRight, size_t nCount) throw()
- {
- return _mbsnicmp(szLeft, szRight, nCount);
- }
- _ATL_INSECURE_DEPRECATE("CAtlRECharTraitsMB::Strlwr must be passed a buffer size.")
- static RECHARTYPE *Strlwr(RECHARTYPE *sz) throw()
- {
- #pragma warning (push)
- #pragma warning(disable : 4996)
- return _mbslwr(sz);
- #pragma warning (pop)
- }
- static RECHARTYPE *Strlwr(RECHARTYPE *sz, int nSize) throw()
- {
- Checked::mbslwr_s(sz, nSize);
- return sz;
- }
- static long Strtol(const RECHARTYPE *sz, RECHARTYPE **szEnd, int nBase) throw()
- {
- return strtol((const char *) sz, (char **) szEnd, nBase);
- }
- static int Isdigit(RECHARTYPE ch) throw()
- {
- return _ismbcdigit((unsigned int) ch);
- }
- static const RECHARTYPE** GetAbbrevs()
- {
- return reinterpret_cast<const RECHARTYPE **>(CAtlRECharTraitsA::GetAbbrevs());
- }
- static BOOL UseBitFieldForRange() throw()
- {
- return FALSE;
- }
- static int ByteLen(const RECHARTYPE *sz) throw()
- {
- return (int)strlen((const char *) sz);
- }
- };
- #ifndef _UNICODE
- typedef CAtlRECharTraitsA CAtlRECharTraits;
- #else // _UNICODE
- typedef CAtlRECharTraitsW CAtlRECharTraits;
- #endif // !_UNICODE
- // Note: If you want to use CAtlRECharTraitsMB you must pass it in
- // as a template argument
- template <class CharTraits=CAtlRECharTraits>
- class CAtlRegExp; // forward declaration
- template <class CharTraits=CAtlRECharTraits>
- class CAtlREMatchContext
- {
- public:
- friend CAtlRegExp<CharTraits>;
- typedef typename CharTraits::RECHARTYPE RECHAR;
- struct MatchGroup
- {
- const RECHAR *szStart;
- const RECHAR *szEnd;
- };
- UINT m_uNumGroups;
- MatchGroup m_Match;
- void GetMatch(UINT nIndex, const RECHAR **szStart, const RECHAR **szEnd)
- {
- ATLENSURE(szStart != NULL);
- ATLENSURE(szEnd != NULL);
- ATLENSURE(nIndex >=0 && nIndex < m_uNumGroups);
- *szStart = m_Matches[nIndex].szStart;
- *szEnd = m_Matches[nIndex].szEnd;
- }
- void GetMatch(UINT nIndex, MatchGroup *pGroup)
- {
-
- ATLENSURE(pGroup != NULL);
- ATLENSURE(nIndex >=0&&(static_cast<UINT>(nIndex))< m_uNumGroups);
- pGroup->szStart = m_Matches[nIndex].szStart;
- pGroup->szEnd = m_Matches[nIndex].szEnd;
- }
- protected:
- CAutoVectorPtr<void *> m_Mem;
- CAutoVectorPtr<MatchGroup> m_Matches;
- CAtlArray<void *> m_stack;
- size_t m_nTos;
- public:
- CAtlREMatchContext(size_t nInitStackSize=ATL_REGEXP_MIN_STACK)
- {
- m_uNumGroups = 0;
- m_nTos = 0;
- m_stack.SetCount(nInitStackSize);
- m_Match.szStart = NULL;
- m_Match.szEnd = NULL;
- }
- protected:
- BOOL Initialize(UINT uRequiredMem, UINT uNumGroups) throw()
- {
- m_nTos = 0;
- m_uNumGroups = 0;
- m_Matches.Free();
- if (!m_Matches.Allocate(uNumGroups))
- return FALSE;
- m_uNumGroups = uNumGroups;
- m_Mem.Free();
- if (!m_Mem.Allocate(uRequiredMem))
- return FALSE;
- memset(m_Mem.m_p, 0x00, uRequiredMem*sizeof(void *));
- memset(m_Matches, 0x00, m_uNumGroups * sizeof(MatchGroup));
- return TRUE;
- }
- BOOL Push(void *p)
- {
- m_nTos++;
- if (m_stack.GetCount() <= (UINT) m_nTos)
- {
- if (!m_stack.SetCount((m_nTos+1)*2))
- {
- m_nTos--;
- return FALSE;
- }
- }
- m_stack[m_nTos] = p;
- return TRUE;
- }
- BOOL Push(size_t n)
- {
- return Push((void *) n);
- }
- void *Pop() throw()
- {
- if (m_nTos==0)
- {
- // stack underflow
- // this should never happen at match time.
- // (the parsing succeeded when it shouldn't have)
- ATLASSERT(FALSE);
- return NULL;
- }
- void *p = m_stack[m_nTos];
- m_nTos--;
- return p;
- }
- };
- enum REParseError {
- REPARSE_ERROR_OK = 0, // No error occurred
- REPARSE_ERROR_OUTOFMEMORY, // Out of memory
- REPARSE_ERROR_BRACE_EXPECTED, // A closing brace was expected
- REPARSE_ERROR_PAREN_EXPECTED, // A closing parenthesis was expected
- REPARSE_ERROR_BRACKET_EXPECTED, // A closing bracket was expected
- REPARSE_ERROR_UNEXPECTED, // An unspecified fatal error occurred
- REPARSE_ERROR_EMPTY_RANGE, // A range expression was empty
- REPARSE_ERROR_INVALID_GROUP, // A backreference was made to a group
- // that did not exist
- REPARSE_ERROR_INVALID_RANGE, // An invalid range was specified
- REPARSE_ERROR_EMPTY_REPEATOP, // A possibly empty * or + was detected
- REPARSE_ERROR_INVALID_INPUT, // The input string was invalid
- };
- template <class CharTraits /* =CAtlRECharTraits */>
- class CAtlRegExp
- {
- public:
- CAtlRegExp() throw()
- {
- m_uNumGroups = 0;
- m_uRequiredMem = 0;
- m_bCaseSensitive = TRUE;
- m_LastError = REPARSE_ERROR_OK;
- }
- typedef typename CharTraits::RECHARTYPE RECHAR;
- // CAtlRegExp::Parse
- // Parses the regular expression
- // returns REPARSE_ERROR_OK if successful, an REParseError otherwise
- REParseError Parse(const RECHAR *szRE, BOOL bCaseSensitive=TRUE)
- {
- ATLASSERT(szRE);
- if (!szRE)
- return REPARSE_ERROR_INVALID_INPUT;
- Reset();
- m_bCaseSensitive = bCaseSensitive;
- const RECHAR *szInput = szRE;
- if (!bCaseSensitive)
- {
- // copy the string
- int nSize = CharTraits::ByteLen(szRE)+sizeof(RECHAR);
- szInput = (const RECHAR *) malloc(nSize);
- if (!szInput)
- return REPARSE_ERROR_OUTOFMEMORY;
- Checked::memcpy_s((char *) szInput, nSize, szRE, nSize);
- CharTraits::Strlwr(const_cast<RECHAR *>(szInput), nSize/sizeof(RECHAR));
- }
- const RECHAR *sz = szInput;
- int nCall = AddInstruction(RE_CALL);
- if (nCall < 0)
- return REPARSE_ERROR_OUTOFMEMORY;
- if (*sz == '^')
- {
- if (AddInstruction(RE_FAIL) < 0)
- return REPARSE_ERROR_OUTOFMEMORY;
- sz++;
- }
- else
- {
- if (AddInstruction(RE_ADVANCE) < 0)
- return REPARSE_ERROR_OUTOFMEMORY;
- }
- bool bEmpty = true;
- ParseRE(&sz, bEmpty);
- if (!GetLastParseError())
- {
- GetInstruction(nCall).call.nTarget = 2;
- if (AddInstruction(RE_MATCH) < 0)
- return REPARSE_ERROR_OUTOFMEMORY;
- }
- if (szInput != szRE)
- free((void *) szInput);
- return GetLastParseError();
- }
- BOOL Match(const RECHAR *szIn, CAtlREMatchContext<CharTraits> *pContext, const RECHAR **ppszEnd=NULL)
- {
- ATLASSERT(szIn);
- ATLASSERT(pContext);
- if (!szIn || !pContext)
- return FALSE;
- if (ppszEnd)
- *ppszEnd = NULL;
- const RECHAR *szInput = szIn;
- if (!m_bCaseSensitive)
- {
- int nSize = CharTraits::ByteLen(szIn)+sizeof(RECHAR);
- szInput = (const RECHAR *) malloc(nSize);
- if (!szInput)
- return FALSE;
- Checked::memcpy_s((char *) szInput, nSize, szIn, nSize);
- CharTraits::Strlwr(const_cast<RECHAR *>(szInput), nSize/sizeof(RECHAR));
- }
- if (!pContext->Initialize(m_uRequiredMem, m_uNumGroups))
- {
- if (szInput != szIn)
- free((void *) szInput);
- return FALSE;
- }
- size_t ip = 0;
- const RECHAR *sz = szInput;
- const RECHAR *szCurrInput = szInput;
- #pragma warning(push)
- #pragma warning(disable:4127) // conditional expression is constant
- while (1)
- {
- #ifdef ATLRX_DEBUG
- OnDebugEvent(ip, szInput, sz, pContext);
- #endif
- if (ip == 0)
- pContext->m_Match.szStart = sz;
- switch (GetInstruction(ip).type)
- {
- case RE_NOP:
- ip++;
- break;
- case RE_SYMBOL:
- if (GetInstruction(ip).symbol.nSymbol == static_cast<size_t>(*sz))
- {
- sz = CharTraits::Next(sz);
- ip++;
- }
- else
- {
- ip = (size_t) pContext->Pop();
- }
- break;
- case RE_ANY:
- if (*sz)
- {
- sz = CharTraits::Next(sz);
- ip++;
- }
- else
- {
- ip = (size_t) pContext->Pop();
- }
- break;
- case RE_GROUP_START:
- pContext->m_Matches[GetInstruction(ip).group.nGroup].szStart = sz;
- ip++;
- break;
- case RE_GROUP_END:
- pContext->m_Matches[GetInstruction(ip).group.nGroup].szEnd = sz;
- ip++;
- break;
- case RE_PUSH_CHARPOS:
- pContext->Push((void *) sz);
- ip++;
- break;
- case RE_POP_CHARPOS:
- sz = (RECHAR *) pContext->Pop();
- ip++;
- break;
- case RE_CALL:
- pContext->Push(ip+1);
- ip = GetInstruction(ip).call.nTarget;
- break;
- case RE_JMP:
- ip = GetInstruction(ip).jmp.nTarget;
- break;
- case RE_RETURN:
- ip = (size_t) pContext->Pop();
- break;
- case RE_PUSH_MEMORY:
- pContext->Push((void *) (pContext->m_Mem[GetInstruction(ip).memory.nIndex]));
- ip++;
- break;
- case RE_POP_MEMORY:
- pContext->m_Mem[GetInstruction(ip).memory.nIndex] = pContext->Pop();
- ip++;
- break;
- case RE_STORE_CHARPOS:
- pContext->m_Mem[GetInstruction(ip).memory.nIndex] = (void *) sz;
- ip++;
- break;
- case RE_GET_CHARPOS:
- sz = (RECHAR *) pContext->m_Mem[GetInstruction(ip).memory.nIndex];
- ip++;
- break;
- case RE_STORE_STACKPOS:
- pContext->m_Mem[GetInstruction(ip).memory.nIndex] = (void *) pContext->m_nTos;
- ip++;
- break;
- case RE_GET_STACKPOS:
- pContext->m_nTos = (size_t) pContext->m_Mem[GetInstruction(ip).memory.nIndex];
- ip++;
- break;
- case RE_RET_NOMATCH:
- if (sz == (RECHAR *) pContext->m_Mem[GetInstruction(ip).memory.nIndex])
- {
- // do a return
- ip = (size_t) pContext->Pop();
- }
- else
- ip++;
- break;
- case RE_ADVANCE:
- sz = CharTraits::Next(szCurrInput);
- szCurrInput = sz;
- if (*sz == '\0')
- goto Error;
- ip = 0;
- pContext->m_nTos = 0;
- break;
- case RE_FAIL:
- goto Error;
- case RE_RANGE:
- {
- if (*sz == '\0')
- {
- ip = (size_t) pContext->Pop();
- break;
- }
- RECHAR *pBits = reinterpret_cast<RECHAR *>((&m_Instructions[ip]+1));
- size_t u = CharTraits::GetBitFieldForRangeArrayIndex(sz);
- if (pBits[u >> 3] & 1 << (u & 0x7))
- {
- ip += InstructionsPerRangeBitField();
- ip++;
- sz = CharTraits::Next(sz);
- }
- else
- {
- ip = (size_t) pContext->Pop();
- }
- }
- break;
- case RE_NOTRANGE:
- {
- if (*sz == '\0')
- {
- ip = (size_t) pContext->Pop();
- break;
- }
- RECHAR *pBits = reinterpret_cast<RECHAR *>((&m_Instructions[ip]+1));
- size_t u = static_cast<size_t>(* ((RECHAR *) sz));
- if (pBits[u >> 3] & 1 << (u & 0x7))
- {
- ip = (size_t) pContext->Pop();
- }
- else
- {
- ip += InstructionsPerRangeBitField();
- ip++;
- sz = CharTraits::Next(sz);
- }
- }
- break;
- case RE_RANGE_EX:
- {
- if (*sz == '\0')
- {
- ip = (size_t) pContext->Pop();
- break;
- }
- BOOL bMatch = FALSE;
- size_t inEnd = GetInstruction(ip).range.nTarget;
- ip++;
- while (ip < inEnd)
- {
- if (static_cast<size_t>(*sz) >= GetInstruction(ip).memory.nIndex &&
- static_cast<size_t>(*sz) <= GetInstruction(ip+1).memory.nIndex)
- {
- // if we match, we jump to the end
- sz = CharTraits::Next(sz);
- ip = inEnd;
- bMatch = TRUE;
- }
- else
- {
- ip += 2;
- }
- }
- if (!bMatch)
- {
- ip = (size_t) pContext->Pop();
- }
- }
- break;
- case RE_NOTRANGE_EX:
- {
- if (*sz == '\0')
- {
- ip = (size_t) pContext->Pop();
- break;
- }
- BOOL bMatch = TRUE;
- size_t inEnd = GetInstruction(ip).range.nTarget;
- ip++;
- while (ip < inEnd)
- {
- if (static_cast<size_t>(*sz) >= GetInstruction(ip).memory.nIndex &&
- static_cast<size_t>(*sz) <= GetInstruction(ip+1).memory.nIndex)
- {
- ip = (size_t) pContext->Pop();
- bMatch = FALSE;
- break;
- }
- else
- {
- // if we match, we jump to the end
- ip += 2;
- }
- }
- if (bMatch)
- sz = CharTraits::Next(sz);
- }
- break;
- case RE_PREVIOUS:
- {
- BOOL bMatch = FALSE;
- if (m_bCaseSensitive)
- {
- bMatch = !CharTraits::Strncmp(sz, pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart,
- pContext->m_Matches[GetInstruction(ip).prev.nGroup].szEnd-pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart);
- }
- else
- {
- bMatch = !CharTraits::Strnicmp(sz, pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart,
- pContext->m_Matches[GetInstruction(ip).prev.nGroup].szEnd-pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart);
- }
- if (bMatch)
- {
- sz += pContext->m_Matches[GetInstruction(ip).prev.nGroup].szEnd-pContext->m_Matches[GetInstruction(ip).prev.nGroup].szStart;
- ip++;
- break;
- }
- ip = (size_t) pContext->Pop();
- }
- break;
- case RE_MATCH:
- pContext->m_Match.szEnd = sz;
- if (!m_bCaseSensitive)
- FixupMatchContext(pContext, szIn, szInput);
- if (ppszEnd)
- *ppszEnd = szIn + (sz - szInput);
- if (szInput != szIn)
- free((void *) szInput);
- return TRUE;
- break;
- case RE_PUSH_GROUP:
- pContext->Push((void *) pContext->m_Matches[GetInstruction(ip).group.nGroup].szStart);
- pContext->Push((void *) pContext->m_Matches[GetInstruction(ip).group.nGroup].szEnd);
- ip++;
- break;
- case RE_POP_GROUP:
- pContext->m_Matches[GetInstruction(ip).group.nGroup].szEnd = (const RECHAR *) pContext->Pop();
- pContext->m_Matches[GetInstruction(ip).group.nGroup].szStart = (const RECHAR *) pContext->Pop();
- ip++;
- break;
- default:
- ATLASSERT(FALSE);
- break;
- }
- }
- #pragma warning(pop) // 4127
- ATLASSERT(FALSE);
- Error:
- pContext->m_Match.szEnd = sz;
- if (!m_bCaseSensitive)
- FixupMatchContext(pContext, szIn, szInput);
- if (ppszEnd)
- *ppszEnd = szIn + (sz - szInput);
- if (szInput != szIn)
- free((void *) szInput);
- return FALSE;
- }
- protected:
- REParseError m_LastError;
- REParseError GetLastParseError() throw()
- {
- return m_LastError;
- }
- void SetLastParseError(REParseError Error) throw()
- {
- m_LastError = Error;
- }
- // CAtlRegExp::Reset
- // Removes all instructions to allow reparsing into the same instance
- void Reset() throw()
- {
- m_Instructions.RemoveAll();
- m_uRequiredMem = 0;
- m_bCaseSensitive = TRUE;
- m_uNumGroups = 0;
- SetLastParseError(REPARSE_ERROR_OK);
- }
- enum REInstructionType {
- RE_NOP,
- RE_GROUP_START,
- RE_GROUP_END,
- RE_SYMBOL,
- RE_ANY,
- RE_RANGE,
- RE_NOTRANGE,
- RE_RANGE_EX,
- RE_NOTRANGE_EX,
- RE_PLUS,
- RE_NG_PLUS,
- RE_QUESTION,
- RE_NG_QUESTION,
- RE_JMP,
- RE_PUSH_CHARPOS,
- RE_POP_CHARPOS,
- RE_CALL,
- RE_RETURN,
- RE_STAR_BEGIN,
- RE_NG_STAR_BEGIN,
- RE_PUSH_MEMORY,
- RE_POP_MEMORY,
- RE_STORE_CHARPOS,
- RE_STORE_STACKPOS,
- RE_GET_CHARPOS,
- RE_GET_STACKPOS,
- RE_RET_NOMATCH,
- RE_PREVIOUS,
- RE_FAIL,
- RE_ADVANCE,
- RE_MATCH,
- RE_PUSH_GROUP,
- RE_POP_GROUP,
- };
- struct INSTRUCTION_SYMBOL
- {
- size_t nSymbol;
- };
- struct INSTRUCTION_JMP
- {
- size_t nTarget;
- };
- struct INSTRUCTION_GROUP
- {
- size_t nGroup;
- };
- struct INSTRUCTION_CALL
- {
- size_t nTarget;
- };
- struct INSTRUCTION_MEMORY
- {
- size_t nIndex;
- };
- struct INSTRUCTION_PREVIOUS
- {
- size_t nGroup;
- };
- struct INSTRUCTION_RANGE_EX
- {
- size_t nTarget;
- };
- struct INSTRUCTION
- {
- REInstructionType type;
- union
- {
- INSTRUCTION_SYMBOL symbol;
- INSTRUCTION_JMP jmp;
- INSTRUCTION_GROUP group;
- INSTRUCTION_CALL call;
- INSTRUCTION_MEMORY memory;
- INSTRUCTION_PREVIOUS prev;
- INSTRUCTION_RANGE_EX range;
- };
- };
- inline int InstructionsPerRangeBitField() throw()
- {
- return (256/8) / sizeof(INSTRUCTION) + (((256/8) % sizeof(INSTRUCTION)) ? 1 : 0);
- }
- CAtlArray<INSTRUCTION> m_Instructions;
- UINT m_uNumGroups;
- UINT m_uRequiredMem;
- BOOL m_bCaseSensitive;
- // class used internally to restore
- // parsing state when unwinding
- class CParseState
- {
- public:
- int m_nNumInstructions;
- UINT m_uNumGroups;
- UINT m_uRequiredMem;
- CParseState(CAtlRegExp *pRegExp) throw()
- {
- m_nNumInstructions = (int) pRegExp->m_Instructions.GetCount();
- m_uNumGroups = pRegExp->m_uNumGroups;
- m_uRequiredMem = pRegExp->m_uRequiredMem;
- }
- void Restore(CAtlRegExp *pRegExp)
- {
- pRegExp->m_Instructions.SetCount(m_nNumInstructions);
- pRegExp->m_uNumGroups = m_uNumGroups;
- pRegExp->m_uRequiredMem = m_uRequiredMem;
- }
- };
- int AddInstruction(REInstructionType type)
- {
- if (!m_Instructions.SetCount(m_Instructions.GetCount()+1))
- {
- SetLastParseError(REPARSE_ERROR_OUTOFMEMORY);
- return -1;
- }
- m_Instructions[m_Instructions.GetCount()-1].type = type;
- return (int) m_Instructions.GetCount()-1;
- }
- BOOL PeekToken(const RECHAR **ppszRE, int ch) throw()
- {
- if (**ppszRE != ch)
- return FALSE;
- return TRUE;
- }
- BOOL MatchToken(const RECHAR **ppszRE, int ch) throw()
- {
- if (!PeekToken(ppszRE, ch))
- return FALSE;
- *ppszRE = CharTraits::Next(*ppszRE);
- return TRUE;
- }
- INSTRUCTION &GetInstruction(size_t nIndex) throw()
- {
- return m_Instructions[nIndex];
- }
- // ParseArg: parse grammar rule Arg
- int ParseArg(const RECHAR **ppszRE, bool &bEmpty)
- {
- int nPushGroup = AddInstruction(RE_PUSH_GROUP);
- if (nPushGroup < 0)
- return -1;
- GetInstruction(nPushGroup).group.nGroup = m_uNumGroups;
- int p = AddInstruction(RE_GROUP_START);
- if (p < 0)
- return -1;
- GetInstruction(p).group.nGroup = m_uNumGroups++;
- int nCall = AddInstruction(RE_CALL);
- if (nCall < 0)
- return -1;
- int nPopGroup = AddInstruction(RE_POP_GROUP);
- if (nPopGroup < 0)
- return -1;
- GetInstruction(nPopGroup).group.nGroup = GetInstruction(nPushGroup).group.nGroup;
- if (AddInstruction(RE_RETURN) < 0)
- return -1;
- int nAlt = ParseRE(ppszRE, bEmpty);
- if (nAlt < 0)
- {
- if (GetLastParseError())
- return -1;
- if (!PeekToken(ppszRE, '}'))
- {
- SetLastParseError(REPARSE_ERROR_BRACE_EXPECTED);
- return -1;
- }
- // in the case of an empty group, we add a nop
- nAlt = AddInstruction(RE_NOP);
- if (nAlt < 0)
- return -1;
- }
- GetInstruction(nCall).call.nTarget = nAlt;
- if (!MatchToken(ppszRE, '}'))
- {
- SetLastParseError(REPARSE_ERROR_BRACE_EXPECTED);
- return -1;
- }
- int nEnd = AddInstruction(RE_GROUP_END);
- if (nEnd < 0)
- return -1;
- GetInstruction(nEnd).group.nGroup = GetInstruction(p).group.nGroup;
- return nPushGroup;
- }
- // ParseGroup: parse grammar rule Group
- int ParseGroup(const RECHAR **ppszRE, bool &bEmpty)
- {
- int nCall = AddInstruction(RE_CALL);
- if (nCall < 0)
- return -1;
- if (AddInstruction(RE_RETURN) < 0)
- return -1;
- int nAlt = ParseRE(ppszRE, bEmpty);
- if (nAlt < 0)
- {
- if (GetLastParseError())
- return -1;
- if (!PeekToken(ppszRE, ')'))
- {
- SetLastParseError(REPARSE_ERROR_PAREN_EXPECTED);
- return -1;
- }
- // in the case of an empty group, we add a nop
- nAlt = AddInstruction(RE_NOP);
- if (nAlt < 0)
- return -1;
- }
- GetInstruction(nCall).call.nTarget = nAlt;
- if (!MatchToken(ppszRE, ')'))
- {
- SetLastParseError(REPARSE_ERROR_PAREN_EXPECTED);
- return -1;
- }
- return nCall;
- }
- RECHAR GetEscapedChar(RECHAR ch) throw()
- {
- if (ch == 't')
- return '\t';
- return ch;
- }
- // ParseCharItem: parse grammar rule CharItem
- int ParseCharItem(const RECHAR **ppszRE, RECHAR *pchStartChar, RECHAR *pchEndChar) throw()
- {
- if (**ppszRE == '\\')
- {
- *ppszRE = CharTraits::Next(*ppszRE);
- *pchStartChar = GetEscapedChar(**ppszRE);
- }
- else
- *pchStartChar = **ppszRE;
- *ppszRE = CharTraits::Next(*ppszRE);
- if (!MatchToken(ppszRE, '-'))
- {
- *pchEndChar = *pchStartChar;
- return 0;
- }
- // check for unterminated range
- if (!**ppszRE || PeekToken(ppszRE, ']'))
- {
- SetLastParseError(REPARSE_ERROR_BRACKET_EXPECTED);
- return -1;
- }
- *pchEndChar = **ppszRE;
- *ppszRE = CharTraits::Next(*ppszRE);
- if (*pchEndChar < *pchStartChar)
- {
- SetLastParseError(REPARSE_ERROR_INVALID_RANGE);
- return -1;
- }
- return 0;
- }
- int AddInstructions(int nNumInstructions)
- {
- size_t nCurr = m_Instructions.GetCount();
- if (!m_Instructions.SetCount(nCurr+nNumInstructions))
- {
- SetLastParseError(REPARSE_ERROR_OUTOFMEMORY);
- return -1;
- }
- return (int) nCurr;
- }
- // ParseCharSet: parse grammar rule CharSet
- int ParseCharSet(const RECHAR **ppszRE, BOOL bNot)
- {
- int p = -1;
- unsigned char *pBits = NULL;
- if (CharTraits::UseBitFieldForRange())
- {
- // we use a bit field to represent the characters
- // a 1 bit means match against the character
- // the last 5 bits are used as an index into
- // the byte array, and the first 3 bits
- // are used to index into the selected byte
- p = AddInstruction(bNot ? RE_NOTRANGE : RE_RANGE);
- if (p < 0)
- return -1;
- // add the required space to hold the character
- // set. We use one bit per character for ansi
- if (AddInstructions(InstructionsPerRangeBitField()) < 0)
- return -1;
- pBits = (unsigned char *) (&m_Instructions[p+1]);
- memset(pBits, 0x00, 256/8);
- }
- else
- {
- p = AddInstruction(bNot ? RE_NOTRANGE_EX : RE_RANGE_EX);
- if (p < 0)
- return -1;
- }
- RECHAR chStart;
- RECHAR chEnd;
- while (**ppszRE && **ppszRE != ']')
- {
- if (ParseCharItem(ppszRE, &chStart, &chEnd))
- return -1;
- if (CharTraits::UseBitFieldForRange())
- {
- for (int i=chStart; i<=chEnd; i++)
- pBits[i >> 3] |= 1 << (i & 0x7);
- }
- else
- {
- int nStart = AddInstruction(RE_NOP);
- if (nStart < 0)
- return -1;
- int nEnd = AddInstruction(RE_NOP);
- if (nEnd < 0)
- return -1;
- GetInstruction(nStart).memory.nIndex = (int) chStart;
- GetInstruction(nEnd).memory.nIndex = (int) chEnd;
- }
- }
- if (!CharTraits::UseBitFieldForRange())
- GetInstruction(p).range.nTarget = m_Instructions.GetCount();
- return p;
- }
- // ParseCharClass: parse grammar rule CharClass
- int ParseCharClass(const RECHAR **ppszRE, bool &bEmpty)
- {
- bEmpty = false;
- if (MatchToken(ppszRE, ']'))
- {
- SetLastParseError(REPARSE_ERROR_EMPTY_RANGE);
- return -1;
- }
- BOOL bNot = FALSE;
- if (MatchToken(ppszRE, '^'))
- bNot = TRUE;
- if (MatchToken(ppszRE, ']'))
- {
- SetLastParseError(REPARSE_ERROR_EMPTY_RANGE);
- return -1;
- }
- int p = ParseCharSet(ppszRE, bNot);
- if (p < 0)
- return p;
- if (!MatchToken(ppszRE, ']'))
- {
- SetLastParseError(REPARSE_ERROR_BRACKET_EXPECTED);
- return -1;
- }
- return p;
- }
- int AddMemInstruction(REInstructionType type)
- {
- int p = AddInstruction(type);
- if (p < 0)
- return p;
- GetInstruction(p).memory.nIndex = m_uRequiredMem++;
- return p;
- }
- // helper for parsing !SE
- int ParseNot(const RECHAR **ppszRE, bool &bEmpty)
- {
- int nStoreCP = AddMemInstruction(RE_STORE_CHARPOS);
- int nStoreSP = AddMemInstruction(RE_STORE_STACKPOS);
- int nCall = AddInstruction(RE_CALL);
- if (nCall < 0)
- return -1;
- int nGetCP = AddInstruction(RE_GET_CHARPOS);
- if (nGetCP < 0)
- return -1;
- GetInstruction(nGetCP).memory.nIndex = GetInstruction(nStoreCP).memory.nIndex;
- int nGetSP = AddInstruction(RE_GET_STACKPOS);
- if (nGetSP < 0)
- return -1;
- GetInstruction(nGetSP).memory.nIndex = GetInstruction(nStoreSP).memory.nIndex;
- int nJmp = AddInstruction(RE_JMP);
- if (nJmp < 0)
- return -1;
- int nSE = ParseSE(ppszRE, bEmpty);
- if (nSE < 0)
- return nSE;
- // patch the call
- GetInstruction(nCall).call.nTarget = nSE;
- int nGetCP1 = AddInstruction(RE_GET_CHARPOS);
- if (nGetCP1 < 0)
- return -1;
- GetInstruction(nGetCP1).memory.nIndex = GetInstruction(nStoreCP).memory.nIndex;
- int nGetSP1 = AddInstruction(RE_GET_STACKPOS);
- if (nGetSP1 < 0)
- return -1;
- GetInstruction(nGetSP1).memory.nIndex = GetInstruction(nStoreSP).memory.nIndex;
- int nRet = AddInstruction(RE_RETURN);
- if (nRet < 0)
- return -1;
- GetInstruction(nJmp).jmp.nTarget = nRet+1;
- return nStoreCP;
- }
- // ParseAbbrev: parse grammar rule Abbrev
- int ParseAbbrev(const RECHAR **ppszRE, bool &bEmpty)
- {
- const RECHAR **szAbbrevs = CharTraits::GetAbbrevs();
- while (*szAbbrevs)
- {
- if (**ppszRE == **szAbbrevs)
- {
- const RECHAR *szAbbrev = (*szAbbrevs)+1;
- int p = ParseE(&szAbbrev, bEmpty);
- if (p < 0)
- {
- SetLastParseError(REPARSE_ERROR_UNEXPECTED);
- return p;
- }
- *ppszRE = CharTraits::Next(*ppszRE);
- return p;
- }
- szAbbrevs++;
- }
- return -1;
- }
- // ParseSE: parse grammar rule SE (simple expression)
- int ParseSE(const RECHAR **ppszRE, bool &bEmpty)
- {
- if (MatchToken(ppszRE, '{'))
- return ParseArg(ppszRE, bEmpty);
- if (MatchToken(ppszRE, '('))
- return ParseGroup(ppszRE, bEmpty);
- if (MatchToken(ppszRE, '['))
- return ParseCharClass(ppszRE, bEmpty);
- if (MatchToken(ppszRE, '\\'))
- {
- if (!CharTraits::Isdigit(**ppszRE))
- {
- // check for abbreviations
- int p;
- p = ParseAbbrev(ppszRE, bEmpty);
- if (p >= 0)
- return p;
- if (GetLastParseError())
- return -1;
- // escaped char
- p = AddInstruction(RE_SYMBOL);
- if (p < 0)
- return -1;
- GetInstruction(p).symbol.nSymbol = (int) **ppszRE;
- *ppszRE = CharTraits::Next(*ppszRE);
- return p;
- }
- // previous match
- bEmpty = false;
- int nPrev = AddInstruction(RE_PREVIOUS);
- if (nPrev < 0)
- return -1;
- UINT uValue = (UINT) CharTraits::Strtol(*ppszRE, (RECHAR **) ppszRE, 10);
- if (uValue >= m_uNumGroups)
- {
- SetLastParseError(REPARSE_ERROR_INVALID_GROUP);
- return -1;
- }
- GetInstruction(nPrev).prev.nGroup = (size_t) uValue;
- return nPrev;
- }
- if (MatchToken(ppszRE, '!'))
- return ParseNot(ppszRE, bEmpty);
- if (**ppszRE == '}' || **ppszRE == ']' || **ppszRE == ')')
- {
- return -1;
- }
- if (**ppszRE == '\0')
- {
- return -1;
- }
- int p;
- if (**ppszRE == '.')
- {
- p = AddInstruction(RE_ANY);
- if (p < 0)
- return -1;
- bEmpty = false;
- }
- else if (**ppszRE == '$' && (*ppszRE)[1] == '\0')
- {
- p = AddInstruction(RE_SYMBOL);
- if (p < 0)
- return -1;
- GetInstruction(p).symbol.nSymbol = 0;
- bEmpty = false;
- }
- else
- {
- p = AddInstruction(RE_SYMBOL);
- if (p < 0)
- return -1;
- GetInstruction(p).symbol.nSymbol = (int) **ppszRE;
- bEmpty = false;
- }
- *ppszRE = CharTraits::Next(*ppszRE);
- return p;
- }
- // ParseE: parse grammar rule E (expression)
- int ParseE(const RECHAR **ppszRE, bool &bEmpty)
- {
- CParseState ParseState(this);
- const RECHAR *sz = *ppszRE;
- int nSE;
- int nFirst = ParseSE(ppszRE, bEmpty);
- if (nFirst < 0)
- return nFirst;
- REInstructionType type = RE_MATCH;
- if (MatchToken(ppszRE, '*'))
- if(MatchToken(ppszRE, '?'))
- type = RE_NG_STAR_BEGIN;
- else
- type = RE_STAR_BEGIN;
- else if (MatchToken(ppszRE, '+'))
- if(MatchToken(ppszRE, '?'))
- type = RE_NG_PLUS;
- else
- type = RE_PLUS;
- else if (MatchToken(ppszRE, '?'))
- if(MatchToken(ppszRE, '?'))
- type = RE_NG_QUESTION;
- else
- type = RE_QUESTION;
- if (type == RE_MATCH)
- return nFirst;
- if (type == RE_STAR_BEGIN || type == RE_QUESTION|| type == RE_NG_STAR_BEGIN || type == RE_NG_QUESTION)
- {
- ParseState.Restore(this);
- }
- else
- {
- m_uNumGroups = ParseState.m_uNumGroups;
- }
- *ppszRE = sz;
- int nE;
- if (type == RE_NG_STAR_BEGIN || type == RE_NG_PLUS || type == RE_NG_QUESTION) // Non-Greedy
- {
- int nCall = AddInstruction(RE_CALL);
- if (nCall < 0)
- return -1;
- bEmpty = false;
- nSE = ParseSE(ppszRE, bEmpty);
- if (nSE < 0)
- return nSE;
- if (bEmpty && (type == RE_NG_STAR_BEGIN || type == RE_NG_PLUS))
- {
- SetLastParseError(REPARSE_ERROR_EMPTY_REPEATOP);
- return -1;
- }
- bEmpty = true;
- *ppszRE = CharTraits::Next(*ppszRE);
- *ppszRE = CharTraits::Next(*ppszRE);
- if (type == RE_NG_STAR_BEGIN || type == RE_NG_PLUS)
- {
- int nJmp = AddInstruction(RE_JMP);
- if (nJmp < 0)
- return -1;
- GetInstruction(nCall).call.nTarget = nJmp+1;
- GetInstruction(nJmp).jmp.nTarget = nCall;
- }
- else
- GetInstruction(nCall).call.nTarget = nSE+1;
- if (type == RE_NG_PLUS)
- nE = nFirst;
- else
- nE = nCall;
- }
- else // Greedy
- {
- int nPushMem = AddInstruction(RE_PUSH_MEMORY);
- if (nPushMem < 0)
- return -1;
- int nStore = AddInstruction(RE_STORE_CHARPOS);
- if (nStore < 0)
- return -1;
- if (AddInstruction(RE_PUSH_CHARPOS) < 0)
- return -1;
- int nCall = AddInstruction(RE_CALL);
- if (nCall < 0)
- return -1;
- if (AddInstruction(RE_POP_CHARPOS) < 0)
- return -1;
- int nPopMem = AddInstruction(RE_POP_MEMORY);
- if (nPopMem < 0)
- return -1;
- int nJmp = AddInstruction(RE_JMP);
- if (nJmp < 0)
- return -1;
- GetInstruction(nPushMem).memory.nIndex = m_uRequiredMem++;
- GetInstruction(nStore).memory.nIndex = GetInstruction(nPushMem).memory.nIndex;
- GetInstruction(nCall).call.nTarget = nJmp+1;
- GetInstruction(nPopMem).memory.nIndex = GetInstruction(nPushMem).memory.nIndex;
- bEmpty = false;
- nSE = ParseSE(ppszRE, bEmpty);
- if (nSE < 0)
- return nSE;
- if (bEmpty && (type == RE_STAR_BEGIN || type == RE_PLUS))
- {
- SetLastParseError(REPARSE_ERROR_EMPTY_REPEATOP);
- return -1;
- }
- if (type != RE_PLUS && type != RE_NG_PLUS)
- bEmpty = true;
- *ppszRE = CharTraits::Next(*ppszRE);
- int nRetNoMatch = AddInstruction(RE_RET_NOMATCH);
- if (nRetNoMatch < 0)
- return -1;
- int nStore1 = AddInstruction(RE_STORE_CHARPOS);
- if (nStore1 < 0)
- return -1;
- GetInstruction(nRetNoMatch).memory.nIndex = GetInstruction(nPushMem).memory.nIndex;
- GetInstruction(nStore1).memory.nIndex = GetInstruction(nPushMem).memory.nIndex;
- if (type != RE_QUESTION)
- {
- int nJmp1 = AddInstruction(RE_JMP);
- if (nJmp1 < 0)
- return -1;
- GetInstruction(nJmp1).jmp.nTarget = nPushMem;
- }
- GetInstruction(nJmp).jmp.nTarget = m_Instructions.GetCount();
- if (type == RE_PLUS)
- nE = nFirst;
- else
- nE = nPushMem;
- }
- return nE;
- }
- // ParseAltE: parse grammar rule AltE
- int ParseAltE(const RECHAR **ppszRE, bool &bEmpty)
- {
- const RECHAR *sz = *ppszRE;
- CParseState ParseState(this);
- int nPush = AddInstruction(RE_PUSH_CHARPOS);
- if (nPush < 0)
- return -1;
- int nCall = AddInstruction(RE_CALL);
- if (nCall < 0)
- return -1;
- GetInstruction(nCall).call.nTarget = nPush+4;
- if (AddInstruction(RE_POP_CHARPOS) < 0)
- return -1;
- int nJmpNext = AddInstruction(RE_JMP);
- if (nJmpNext < 0)
- return -1;
- int nE = ParseE(ppszRE, bEmpty);
- if (nE < 0)
- {
- if (GetLastParseError())
- return -1;
- ParseState.Restore(this);
- return nE;
- }
- int nJmpEnd = AddInstruction(RE_JMP);
- if (nJmpEnd < 0)
- return -1;
- GetInstruction(nJmpNext).jmp.nTarget = nJmpEnd+1;
- if (!MatchToken(ppszRE, '|'))
- {
- ParseState.Restore(this);
- *ppszRE = sz;
- return ParseE(ppszRE, bEmpty);
- }
- bool bEmptyAltE;
- int nAltE = ParseAltE(ppszRE, bEmptyAltE);
- GetInstruction(nJmpEnd).jmp.nTarget = m_Instructions.GetCount();
- GetInstruction(nJmpNext).jmp.nTarget = nAltE;
- if (nAltE < 0)
- {
- if (GetLastParseError())
- return -1;
- ParseState.Restore(this);
- return nAltE;
- }
- bEmpty = bEmpty | bEmptyAltE;
- return nPush;
- }
- // ParseRE: parse grammar rule RE (regular expression)
- int ParseRE(const RECHAR **ppszRE, bool &bEmpty)
- {
- if (**ppszRE == '\0')
- return -1;
- int p = ParseAltE(ppszRE, bEmpty);
- if (p < 0)
- return p;
- bool bEmptyRE = true;
- ParseRE(ppszRE, bEmptyRE);
- if (GetLastParseError())
- return -1;
- bEmpty = bEmpty && bEmptyRE;
- return p;
- }
- //pointers to the matched string and matched groups, currently point into an internal allocated
- //buffer that hold a copy of the input string.
- //This function fix these pointers to point into the original, user supplied buffer (first param to Match method).
- //Example: If a ptr (szStart) currently point to <internal buffer>+3, it is fixed to <user supplied buffer>+3
- void FixupMatchContext(CAtlREMatchContext<CharTraits> *pContext, const RECHAR *szOrig, const RECHAR *szNew)
- {
- ATLENSURE(pContext);
- ATLASSERT(szOrig);
- ATLASSERT(szNew);
- pContext->m_Match.szStart = szOrig + (pContext->m_Match.szStart - szNew);
- pContext->m_Match.szEnd = szOrig + (pContext->m_Match.szEnd - szNew);
- for (UINT i=0; i<pContext->m_uNumGroups; i++)
- {
- if (pContext->m_Matches[i].szStart==NULL || pContext->m_Matches[i].szEnd==NULL)
- {
- continue; //Do not fix unmatched groups.
- }
- pContext->m_Matches[i].szStart = szOrig + (pContext->m_Matches[i].szStart - szNew);
- pContext->m_Matches[i].szEnd = szOrig + (pContext->m_Matches[i].szEnd - szNew);
- }
- }
- // implementation
- // helpers for dumping and debugging the rx engine
- public:
- #ifdef ATL_REGEXP_DUMP
- size_t DumpInstruction(size_t ip)
- {
- printf("%08x ", ip);
- switch (GetInstruction(ip).type)
- {
- case RE_NOP:
- printf("NOP\n");
- ip++;
- break;
- case RE_SYMBOL:
- AtlprintfT<RECHAR>(CAToREChar<RECHAR>("Symbol %c\n"),GetInstruction(ip).symbol.nSymbol);
- ip++;
- break;
- case RE_ANY:
- printf("Any\n");
- ip++;
- break;
- case RE_RANGE:
- printf("Range\n");
- ip++;
- ip += InstructionsPerRangeBitField();
- break;
- case RE_NOTRANGE:
- printf("NOT Range\n");
- ip++;
- ip += InstructionsPerRangeBitField();
- break;
- case RE_RANGE_EX:
- printf("RangeEx %08x\n", GetInstruction(ip).range.nTarget);
- ip++;
- break;
- case RE_NOTRANGE_EX:
- printf("NotRangeEx %08x\n", GetInstruction(ip).range.nTarget);
- ip++;
- break;
- case RE_GROUP_START:
- printf("Start group %d\n", GetInstruction(ip).group.nGroup);
- ip++;
- break;
- case RE_GROUP_END:
- printf("Group end %d\n", GetInstruction(ip).group.nGroup);
- ip++;
- break;
- case RE_PUSH_CHARPOS:
- printf("Push char pos\n");
- ip++;
- break;
- case RE_POP_CHARPOS:
- printf("Pop char pos\n");
- ip++;
- break;
- case RE_STORE_CHARPOS:
- printf("Store char pos %d\n", GetInstruction(ip).memory.nIndex);
- ip++;
- break;
- case RE_GET_CHARPOS:
- printf("Get char pos %d\n", GetInstruction(ip).memory.nIndex);
- ip++;
- break;
- case RE_STORE_STACKPOS:
- printf("Store stack pos %d\n", GetInstruction(ip).memory.nIndex);
- ip++;
- break;
- case RE_GET_STACKPOS:
- printf("Get stack pos %d\n", GetInstruction(ip).memory.nIndex);
- ip++;
- break;
- case RE_CALL:
- printf("Call %08x\n", GetInstruction(ip).call.nTarget);
- ip++;
- break;
- case RE_JMP:
- printf("Jump %08x\n", GetInstruction(ip).jmp.nTarget);
- ip++;
- break;
- case RE_RETURN:
- printf("return\n");
- ip++;
- break;
- case RE_PUSH_MEMORY:
- printf("Push memory %08x\n", GetInstruction(ip).memory.nIndex);
- ip++;
- break;
- case RE_POP_MEMORY:
- printf("Pop memory %08x\n", GetInstruction(ip).memory.nIndex);
- ip++;
- break;
- case RE_RET_NOMATCH:
- printf("Return no match %08x\n", GetInstruction(ip).memory.nIndex);
- ip++;
- break;
- case RE_MATCH:
- printf("END\n");
- ip++;
- break;
- case RE_ADVANCE:
- printf("ADVANCE\n");
- ip++;
- break;
- case RE_FAIL:
- printf("FAIL\n");
- ip++;
- break;
- case RE_PREVIOUS:
- printf("Prev %d\n", GetInstruction(ip).prev.nGroup);
- ip++;
- break;
- case RE_PUSH_GROUP:
- printf("Push group %d\n", GetInstruction(ip).group.nGroup);
- ip++;
- break;
- case RE_POP_GROUP:
- printf("Pop group %d\n", GetInstruction(ip).group.nGroup);
- ip++;
- break;
- default:
- printf("????\n");
- ip++;
- break;
- }
- return ip;
- }
- void Dump(size_t ipCurrent = 0)
- {
- size_t ip = 0;
- while (ip < m_Instructions.GetCount())
- {
- if (ip == ipCurrent)
- printf("->");
- ip = DumpInstruction(ip);
- }
- }
- #endif
- #ifdef ATLRX_DEBUG
- void cls( HANDLE hConsole )
- {
- COORD coordScreen = { 0, 0 }; /* here's where we'll home the
- cursor */
- BOOL bSuccess;
- DWORD cCharsWritten;
- CONSOLE_SCREEN_BUFFER_INFO csbi; /* to get buffer info */
- DWORD dwConSize; /* number of character cells in
- the current buffer */
- /* get the number of character cells in the current buffer */
- bSuccess = GetConsoleScreenBufferInfo( hConsole, &csbi );
- dwConSize = csbi.dwSize.X * csbi.dwSize.Y;
- /* fill the entire screen with blanks */
- bSuccess = FillConsoleOutputCharacter( hConsole, (TCHAR) ' ',
- dwConSize, coordScreen, &cCharsWritten );
- /* get the current text attribute */
- bSuccess = GetConsoleScreenBufferInfo( hConsole, &csbi );
- /* now set the buffer's attributes accordingly */
- bSuccess = FillConsoleOutputAttribute( hConsole, csbi.wAttributes,
- dwConSize, coordScreen, &cCharsWritten );
- /* put the cursor at (0, 0) */
- bSuccess = SetConsoleCursorPosition( hConsole, coordScreen );
- return;
- }
- void DumpStack(CAtlREMatchContext<CharTraits> *pContext)
- {
- for (size_t i=pContext->m_nTos; i>0; i--)
- {
- if (pContext->m_stack[i] < (void *) m_Instructions.GetCount())
- printf("0x%p\n", pContext->m_stack[i]);
- else
- {
- // assume a pointer into the input
- AtlprintfT<RECHAR>(CAToREChar<RECHAR>("%s\n"), pContext->m_stack[i]);
- }
- }
- }
- void DumpMemory(CAtlREMatchContext<CharTraits> *pContext)
- {
- for (UINT i=0; i<m_uRequiredMem; i++)
- {
- AtlprintfT<RECHAR>(CAToREChar<RECHAR>("%d: %s\n"), i, pContext->m_Mem.m_p[i]);
- }
- }
- virtual void OnDebugEvent(size_t ip, const RECHAR *szIn, const RECHAR *sz, CAtlREMatchContext<CharTraits> *pContext)
- {
- cls(GetStdHandle(STD_OUTPUT_HANDLE));
- printf("----------Code---------\n");
- Dump(ip);
- printf("----------Input---------\n");
- AtlprintfT<RECHAR>(CAToREChar<RECHAR>("%s\n"), szIn);
- for (int s=0; szIn+s < sz; s++)
- {
- printf(" ");
- }
- printf("^\n");
- printf("----------Memory---------\n");
- DumpMemory(pContext);
- printf("----------Stack---------\n");
- DumpStack(pContext);
- getchar();
- }
- #endif
- };
- } // namespace ATL
- #pragma pack(pop)
- #endif // __ATLRX_H__
|