#pragma once
#include "LiteHTMLTag.h"
#pragma warning(push, 4)
class CLiteHTMLReader; // forward declaration
class ILiteHTMLReaderEvents
{
friend class CLiteHTMLReader;
// Events
protected:
virtual void BeginParse(DWORD dwAppData, bool &bAbort) = 0;
virtual void StartTag(CLiteHTMLTag *pTag, DWORD dwAppData, bool &bAbort) = 0;
virtual void EndTag(CLiteHTMLTag *pTag, DWORD dwAppData, bool &bAbort) = 0;
virtual void Characters(const CStringW &rText, DWORD dwAppData, bool &bAbort) = 0;
virtual void Comment(const CStringW &rComment, DWORD dwAppData, bool &bAbort) = 0;
virtual void EndParse(DWORD dwAppData, bool bIsAborted) = 0;
public:
virtual ~ILiteHTMLReaderEvents() = 0
{
}
};
class CLiteHTMLReader
{
public:
enum EventMaskEnum
{
notifyStartStop = 0x00000001L, // raise BeginParse and EndParse?
notifyTagStart = 0x00000002L, // raise StartTag?
notifyTagEnd = 0x00000004L, // raise EndTag?
notifyCharacters = 0x00000008L, // raise Characters?
notifyComment = 0x00000010L, // raise Comment?
};
enum ReaderOptionsEnum
{
resolveEntities, // determines whether entity references should be resolved
// TODO:
// TODO: add more reader options
// TODO:
};
// Construction/Destruction
public:
CLiteHTMLReader()
{
m_bResolveEntities = true; // entities are resolved, by default
m_dwAppData = 0L; // reasonable default!
m_dwBufPos = 0L; // start from the very beginning
m_dwBufLen = 0L; // buffer length is unknown yet
// default is to raise all of the events
m_eventMask = (EventMaskEnum)(notifyStartStop |
notifyTagStart |
notifyTagEnd |
notifyCharacters |
notifyComment );
m_pEventHandler = NULL; // no event handler is associated
m_lpszBuffer = NULL;
}
public:
EventMaskEnum getEventMask(void) const
{ return (m_eventMask); }
EventMaskEnum setEventMask(DWORD dwNewEventMask)
{
EventMaskEnum oldMask = m_eventMask;
m_eventMask = (EventMaskEnum)dwNewEventMask;
return (oldMask);
}
EventMaskEnum setEventMask(DWORD addFlags, DWORD removeFlags)
{
DWORD dwOldMask = (DWORD)m_eventMask;
DWORD dwNewMask = (dwOldMask | addFlags) & ~removeFlags;
m_eventMask = (EventMaskEnum)dwNewMask;
return ((EventMaskEnum)dwOldMask);
}
DWORD getAppData(void) const
{ return (m_dwAppData); }
DWORD setAppData(DWORD dwNewAppData)
{
DWORD dwOldAppData = m_dwAppData;
m_dwAppData = dwNewAppData;
return (dwOldAppData);
}
ILiteHTMLReaderEvents* getEventHandler(void) const
{ return (m_pEventHandler); }
ILiteHTMLReaderEvents* setEventHandler(ILiteHTMLReaderEvents* pNewHandler)
{
ILiteHTMLReaderEvents *pOldHandler = m_pEventHandler;
m_pEventHandler = pNewHandler;
return (pOldHandler);
}
// returns the current value for the specified option
bool getBoolOption(ReaderOptionsEnum option, bool& bCurVal) const;
// sets a new value for the specified option
bool setBoolOption(ReaderOptionsEnum option, bool bNewVal);
// Operations
public:
// Note : wfopen()¸¦ ÀÌ¿ëÇØ¼ À¯´ÏÄÚµå·Î ÀÛ¼ºµÈ HTML¹®¼¸¦ ºÐ¼®ÇÑ´Ù.
UINT ReadFile( const wchar_t *filename );
UINT ReadFile( const char *filename );
// parses an HTML document from the specified string
UINT Read(LPCWSTR lpszString);
// parses an HTML document from a file given its HANDLE
//UINT ReadFile(HANDLE hFile);
// Helpers
protected:
virtual UINT parseDocument(void);
virtual bool parseComment(CStringW &rComment);
virtual bool parseTag(CLiteHTMLTag &rTag, bool &bIsOpeningTag, bool &bIsClosingTag);
virtual void NormalizeCharacters(CStringW &rCharacters)
{
rCharacters.Replace(L"\r\n", L"");
rCharacters.Remove(L'\n');
rCharacters.Replace(L'\r', L' ');
rCharacters.Replace(L'\t', L' ');
//UNUSED_ALWAYS(rCharacters);
//rCharacters;
}
void ResetSeekPointer(void)
{ m_dwBufPos = 0L; }
wchar_t ReadChar(void)
{
ASSERT(m_lpszBuffer != NULL);
if (m_dwBufPos >= m_dwBufLen)
return (NULL);
return (m_lpszBuffer[m_dwBufPos++]);
}
wchar_t UngetChar(void)
{
ASSERT(m_lpszBuffer != NULL);
ASSERT(m_dwBufPos);
return (m_lpszBuffer[--m_dwBufPos]);
}
bool getEventNotify(DWORD dwEvent) const
{
ASSERT(dwEvent == notifyStartStop ||
dwEvent == notifyTagStart ||
dwEvent == notifyTagEnd ||
dwEvent == notifyCharacters ||
dwEvent == notifyComment);
if (m_pEventHandler == NULL)
return (false);
return ((m_eventMask & dwEvent) == dwEvent);
}
bool isWhiteSpace(wchar_t ch) const
{ return (::iswspace(ch) ? true : false); }
protected:
bool m_bResolveEntities;
DWORD m_dwAppData;
DWORD m_dwBufPos;
DWORD m_dwBufLen;
EventMaskEnum m_eventMask;
ILiteHTMLReaderEvents* m_pEventHandler;
LPCWSTR m_lpszBuffer;
};
inline bool CLiteHTMLReader::getBoolOption(ReaderOptionsEnum option, bool& bCurVal) const
{
bool bSuccess = false;
switch (option)
{
case resolveEntities:
{
bCurVal = m_bResolveEntities;
bSuccess = true;
break;
}
default:
{
bSuccess = false;
break;
}
}
return (bSuccess);
}
inline bool CLiteHTMLReader::setBoolOption(ReaderOptionsEnum option, bool bNewVal)
{
bool bSuccess = false;
switch (option)
{
case resolveEntities:
{
m_bResolveEntities = bNewVal;
bSuccess = true;
break;
}
default:
{
bSuccess = false;
break;
}
}
return (bSuccess);
}
inline bool CLiteHTMLReader::parseComment(CStringW &rComment)
{
ASSERT(m_lpszBuffer != NULL);
ASSERT(m_dwBufPos >= 0L);
ASSERT(m_dwBufPos + 4 < m_dwBufLen);
// HTML comments begin with '= m_lpszBuffer + m_dwBufLen)
return (false);
// Note : skip comment ending delimeter '--'
//
while( *lpszEnd == L'-' )
//lpszEnd = ::_tcsinc(lpszEnd);
lpszEnd++;
// Note : skip white-space characters after comment ending delimeter '--'
//
while (::iswspace(*lpszEnd))
//lpszEnd = ::_tcsinc(lpszEnd);
lpszEnd++;
// comment has not been terminated properly
if (*lpszEnd != L'>')
return (false);
//lpszEnd = ::_tcsinc(lpszEnd);
lpszEnd++;
m_dwBufPos += UINT(lpszEnd - &m_lpszBuffer[m_dwBufPos]);
rComment = strComment;
return (true);
}
inline bool CLiteHTMLReader::parseTag(CLiteHTMLTag &rTag,
bool &bIsOpeningTag,
bool &bIsClosingTag)
{
ASSERT(m_lpszBuffer != NULL);
ASSERT(m_dwBufPos >= 0L);
ASSERT(m_dwBufPos + 4 < m_dwBufLen);
UINT nRetVal = rTag.parseFromStr(&m_lpszBuffer[m_dwBufPos],
bIsOpeningTag, bIsClosingTag);
if (!nRetVal)
return (false);
m_dwBufPos += nRetVal;
return (true);
}
#pragma warning(pop)