DragonNest/Common/Utility/LiteHTMLReader.cpp

330 lines
7.3 KiB
C++
Raw Normal View History

2024-12-19 09:48:26 +08:00
#include "stdafx.h"
#include "LiteHTMLReader.h"
#include "LiteHTMLEntityResolver.h"
#ifdef _DEBUG
#define new new(_NORMAL_BLOCK,__FILE__,__LINE__)
#endif
#pragma warning(push, 4)
UINT CLiteHTMLReader::parseDocument(void)
{
ASSERT(m_lpszBuffer != NULL);
bool bAbort = false; // continue parsing or abort?
bool bIsClosingTag = false; // tag parsed is a closing tag?
bool bIsOpeningTag = false; // tag parsed is an opening tag?
CStringW strCharacters; // character data
CStringW strComment; // comment data
CStringW strT; // temporary storage
DWORD dwCharDataStart = 0L; // starting position of character data
DWORD dwCharDataLen = 0L; // length of character data
LONG lTemp = 0L; // temporary storage
wchar_t ch = 0; // character at current buffer position
CLiteHTMLTag oTag; // tag information
if ( (!m_lpszBuffer) || (!m_dwBufLen) )
return (0U);
// reset seek pointer to beginning
ResetSeekPointer();
// notify event handler about parsing startup
if (getEventNotify(notifyStartStop))
{
bAbort = false;
m_pEventHandler->BeginParse(m_dwAppData, bAbort);
if (bAbort) goto LEndParse;
}
// skip leading white-space characters
while (isWhiteSpace(ReadChar()))
;
ch = UngetChar();
while ((ch = ReadChar()) != NULL)
{
switch (ch)
{
// tag starting delimeter?
case L'<':
{
UngetChar();
strComment.Empty();
if (!parseComment(strComment))
{
bIsOpeningTag = false;
bIsClosingTag = false;
if (!parseTag(oTag, bIsOpeningTag, bIsClosingTag))
{
++dwCharDataLen;
// manually advance buffer position
// because the last call to UngetChar()
// moved it back one character
ch = ReadChar();
break;
}
}
// clear pending notifications
if ( (dwCharDataLen) || (strCharacters.GetLength()) )
{
strCharacters += CStringW(&m_lpszBuffer[dwCharDataStart], dwCharDataLen);
NormalizeCharacters(strCharacters);
//strCharacters.Replace(L"\r\n", L"");
//strCharacters.Remove(L'\n');
//strCharacters.Replace(L'\r', L' ');
//strCharacters.Replace(L'\t', L' ');
if ( (strCharacters.GetLength()) &&
(getEventNotify(notifyCharacters)) )
{
bAbort = false;
m_pEventHandler->Characters(strCharacters, m_dwAppData, bAbort);
if (bAbort) goto LEndParse;
}
strCharacters.Empty();
}
dwCharDataLen = 0L;
dwCharDataStart = m_dwBufPos;
if (strComment.GetLength())
{
if (getEventNotify(notifyComment))
{
bAbort = false;
m_pEventHandler->Comment(strComment, m_dwAppData, bAbort);
if (bAbort) goto LEndParse;
}
}
else
{
if ( (bIsOpeningTag) && (getEventNotify(notifyTagStart)) )
{
bAbort = false;
m_pEventHandler->StartTag(&oTag, m_dwAppData, bAbort);
if (bAbort) goto LEndParse;
}
if ( (bIsClosingTag) && (getEventNotify(notifyTagEnd)) )
{
bAbort = false;
m_pEventHandler->EndTag(&oTag, m_dwAppData, bAbort);
if (bAbort) goto LEndParse;
}
}
break;
}
// entity reference beginning delimeter?
case L'&':
{
UngetChar();
lTemp = 0;
if (m_bResolveEntities)
lTemp = CLiteHTMLEntityResolver::resolveEntity(&m_lpszBuffer[m_dwBufPos], ch);
if (lTemp)
{
strCharacters += CStringW(&m_lpszBuffer[dwCharDataStart], dwCharDataLen) + ch;
m_dwBufPos += lTemp;
dwCharDataStart = m_dwBufPos;
dwCharDataLen = 0L;
}
else
{
ch = ReadChar();
++dwCharDataLen;
}
break;
}
// any other character
default:
{
++dwCharDataLen;
break;
}
}
}
// clear pending notifications
if ( (dwCharDataLen) || (strCharacters.GetLength()) )
{
strCharacters += CStringW(&m_lpszBuffer[dwCharDataStart], dwCharDataLen) + ch;
NormalizeCharacters(strCharacters);
strCharacters.TrimRight(); // explicit trailing white-space removal
if ( (strCharacters.GetLength()) &&
(getEventNotify(notifyCharacters)) )
{
bAbort = false;
m_pEventHandler->Characters(strCharacters, m_dwAppData, bAbort);
if (bAbort) goto LEndParse;
}
}
LEndParse:
// notify event handler about parsing completion
if (getEventNotify(notifyStartStop))
m_pEventHandler->EndParse(m_dwAppData, bAbort);
m_lpszBuffer = NULL;
m_dwBufLen = 0L;
return (m_dwBufPos);
}
UINT CLiteHTMLReader::ReadFile( const wchar_t *filename )
{
wchar_t wszBuf[1024+1]={0};
FILE* fileHandle;
errno_t err;
size_t nRetSize(0);
if ((err = _wfopen_s(&fileHandle, filename, L"rt,ccs=UTF-8")) != 0)
{
//wprintf(L"CLiteHTMLReader::ReadFile, the file was not opened!\n";
return(0U);
}
std::wstring strString;
while( !feof( fileHandle ) )
{
nRetSize = fread( wszBuf, sizeof( wchar_t ), 1024, fileHandle );
if( ferror( fileHandle ) )
{
perror( "Read error" );
break;
}
wszBuf[nRetSize] = NULL;
strString += wszBuf;
}
fclose( fileHandle );
return Read( strString.c_str() );
}
UINT CLiteHTMLReader::ReadFile( const char *filename )
{
wchar_t wszBuf[1024+1]={0};
FILE* fileHandle;
errno_t err;
size_t nRetSize(0);
if ((err = fopen_s(&fileHandle, filename, "rt,ccs=UTF-8")) != 0)
{
//wprintf(L"CLiteHTMLReader::ReadFile, the file was not opened!\n";
return(0U);
}
std::wstring strString;
while( !feof( fileHandle ) )
{
nRetSize = fread( wszBuf, sizeof( wchar_t ), 1024, fileHandle );
if( ferror( fileHandle ) )
{
perror( "Read error" );
break;
}
wszBuf[nRetSize] = NULL;
strString += wszBuf;
}
fclose( fileHandle );
return Read( strString.c_str() );
}
UINT CLiteHTMLReader::Read(LPCWSTR lpszString)
{
ASSERT(lpszString);
//ASSERT(AfxIsValidString(lpszString));
m_dwBufLen = (UINT)::wcslen(lpszString);
if (m_dwBufLen)
{
m_lpszBuffer = lpszString;
return (parseDocument());
}
return (0U);
}
//UINT CLiteHTMLReader::ReadFile(HANDLE hFile)
//{
// ASSERT(hFile != INVALID_HANDLE_VALUE);
// ASSERT(::GetFileType(hFile) == FILE_TYPE_DISK);
//
// HANDLE hFileMap;
// LPCWSTR lpsz;
// UINT nRetVal;
//
// // determine file size
// m_dwBufLen = ::GetFileSize(hFile, NULL);
// if (m_dwBufLen == INVALID_FILE_SIZE)
// {
// //TRACE1( "(Error) CLiteHTMLReader::Read:"
// // " GetFileSize() failed;"
// // " GetLastError() returns 0x%08x.\n", ::GetLastError());
// goto LError;
// }
//
// // calculate length, in wchar_ts, of the buffer
// m_dwBufLen /= sizeof(wchar_t);
// if (!m_dwBufLen)
// return (0U);
//
// // create a file-mapping object for the file
// hFileMap = ::CreateFileMapping(hFile, NULL, PAGE_READONLY, 0L, 0L, NULL);
// if (hFileMap == NULL)
// {
// //TRACE1( "(Error) CLiteHTMLReader::Read:"
// // " CreateFileMapping() failed;"
// // " GetLastError() returns 0x%08x.\n", ::GetLastError());
// goto LError;
// }
//
// // map the entire file into the address-space of the application
// lpsz = (LPCWSTR)::MapViewOfFile(hFileMap, FILE_MAP_READ, 0L, 0L, 0L);
// if (lpsz == NULL)
// {
// //TRACE1( "(Error) CLiteHTMLReader::Read:"
// // " MapViewOfFile() failed;"
// // " GetLastError() returns 0x%08x.\n", ::GetLastError());
// goto LError;
// }
//
// m_lpszBuffer = lpsz;
// nRetVal = parseDocument();
// goto LCleanExit;
//
//LError:
// nRetVal = 0U;
// m_dwBufLen = 0L;
//
//LCleanExit:
// if (lpsz != NULL)
// VERIFY(::UnmapViewOfFile(lpsz));
// if (hFileMap)
// VERIFY(::CloseHandle(hFileMap));
// return (nRetVal);
//}
#pragma warning(pop)