#pragma once #include "LiteHTMLCommon.h" #include "LiteHTMLAttributes.h" #pragma warning(push, 4) #pragma warning (disable : 4290) // C++ Exception Specification ignored class CLiteHTMLTag { // Construction/Destruction public: CLiteHTMLTag() : m_pcollAttr(NULL) { } CLiteHTMLTag(CLiteHTMLTag &rSource, bool bCopy = false) :m_pcollAttr(NULL) { m_strTagName = rSource.m_strTagName; if (!bCopy) { m_pcollAttr = rSource.m_pcollAttr; rSource.m_pcollAttr = NULL; } else if (rSource.m_pcollAttr != NULL) { m_pcollAttr = new CLiteHTMLAttributes(*(rSource.m_pcollAttr), true); } } virtual ~CLiteHTMLTag() { SAFE_DELETE_POINTER(m_pcollAttr); } // Attributes public: CStringW getTagName(void) const { return (m_strTagName); } const CLiteHTMLAttributes* getAttributes(void) const { return (m_pcollAttr); } // Parsing Helpers public: // parses tag information from the given string UINT parseFromStr(LPCWSTR lpszString, bool &bIsOpeningTag, bool &bIsClosingTag, bool bParseAttrib = true); // Data Members private: CLiteHTMLAttributes *m_pcollAttr; CStringW m_strTagName; }; inline UINT CLiteHTMLTag::parseFromStr(LPCWSTR lpszString, bool &bIsOpeningTag, bool &bIsClosingTag, bool bParseAttrib /* = true */) { ASSERT(lpszString); //ASSERT(AfxIsValidString(lpszString)); bool bClosingTag = false; bool bOpeningTag = false; CLiteHTMLAttributes *pcollAttr = NULL; CStringW strTagName; UINT nRetVal = 0U, nTemp = 0U; LPCWSTR lpszBegin = lpszString; LPCWSTR lpszEnd = NULL; // skip leading white-space characters while (::iswspace(*lpszBegin)) //lpszBegin = ::_tcsinc(lpszBegin); lpszBegin++; // HTML tags always begin with a less-than symbol if (*lpszBegin != L'<') return (0U); // skip tag's opening delimeter '<' //lpszBegin = ::_tcsinc(lpszBegin); lpszBegin++; // optimization for empty opening tags if (*lpszBegin == L'>') { ASSERT(strTagName.IsEmpty()); ASSERT(pcollAttr == NULL); ASSERT(!bClosingTag); nRetVal = UINT(lpszBegin - lpszString); goto LUpdateAndExit; } // tag names always begin with an alphabet if (!::iswalnum(*lpszBegin)) { bClosingTag = (*lpszBegin == L'/'); if (bClosingTag) //lpszBegin = ::_tcsinc(lpszBegin); lpszBegin++; else return (0U); } bOpeningTag = !bClosingTag; lpszEnd = lpszBegin; do { // tag name may contain letters (a-z, A-Z), digits (0-9), // underscores '_', hyphen '-', colons ':', and periods '.' if ( (!::iswalnum(*lpszEnd)) && (*lpszEnd != L'-') && (*lpszEnd != L':') && (*lpszEnd != L'_') && (*lpszEnd != L'.') ) { ASSERT(lpszEnd != lpszBegin); // only white-space characters, a null-character, a // greater-than symbol, or a forward-slash can break // a tag name if (*lpszEnd == NULL || ::iswspace(*lpszEnd) || *lpszEnd == L'>' || (*lpszEnd == L'/' && (!bClosingTag)) ) { break; } return (0U); // any other character will fail parsing process } //lpszEnd = ::_tcsinc(lpszEnd); lpszEnd++; } while(true); // store tag name for later use strTagName = CStringW(lpszBegin, UINT(lpszEnd - lpszBegin)); // is this a closing tag? if (bClosingTag) { // in a closing tag, there can be only one symbol after // tag name, i.e., the tag end delimeter '>'. Anything // else will result in parsing failure if (*lpszEnd != L'>') return (0U); // skip tag's ending delimeter //lpszEnd = ::_tcsinc(lpszEnd); lpszEnd++; ASSERT(strTagName.GetLength()); ASSERT(pcollAttr == NULL); nRetVal = UINT(lpszEnd - lpszString); goto LUpdateAndExit; } // tag contains attribute/value pairs? if (*lpszEnd != L'>') { lpszBegin = lpszEnd; lpszEnd = NULL; // skip white-space characters after tag name while (::iswspace(*lpszBegin)) //lpszBegin = ::_tcsinc(lpszBegin); lpszBegin++; nTemp = 0U; if (bParseAttrib) // parse attribute/value pairs? { ASSERT(pcollAttr == NULL); // instantiate collection ... if ((pcollAttr = new CLiteHTMLAttributes) == NULL) { //TRACE0("(Error) CLiteHTMLTag::parseFromStr: Out of memory.\n"; return (0U); } // ... and delegate parsing process nTemp = pcollAttr->parseFromStr(lpszBegin); } if (nTemp == 0) // attribute/value pair parsing is disabled? // - OR - // attribute/value pairs could not be parsed? { SAFE_DELETE_POINTER(pcollAttr); if ((lpszEnd = ::wcsstr(lpszBegin, L"/>")) == NULL) { if ((lpszEnd = ::wcschr(lpszBegin, L'>')) == NULL) return (0U); } } else { lpszEnd = lpszBegin + nTemp; // skip white-space after attribute/value pairs while (::iswspace(*lpszEnd)) //lpszEnd = ::_tcsinc(lpszEnd); lpszEnd++; // tag's ending delimeter could not be found? if (*lpszEnd == NULL) { SAFE_DELETE_POINTER(pcollAttr); return (0U); } } // a tag like this one:
if (*lpszEnd == L'/') { ASSERT(bOpeningTag); bClosingTag = true; //lpszEnd = ::_tcsinc(lpszEnd); lpszEnd++; } } // HTML tags always end with a greater-than '>' symbol if (*lpszEnd != L'>') { SAFE_DELETE_POINTER(pcollAttr); return (0U); } else //lpszEnd = ::_tcsinc(lpszEnd); lpszEnd++; nRetVal = UINT(lpszEnd - lpszString); goto LUpdateAndExit; // just to show the flow-of-control LUpdateAndExit: bIsClosingTag = bClosingTag; bIsOpeningTag = bOpeningTag; m_strTagName = strTagName; m_strTagName.TrimLeft(); m_strTagName.TrimRight(); // just-in-case! SAFE_DELETE_POINTER(m_pcollAttr); m_pcollAttr = pcollAttr; pcollAttr = NULL; return (nRetVal); } #pragma warning (default : 4290) // C++ Exception Specification ignored #pragma warning(pop)