初步修复
This commit is contained in:
parent
8fc4357cc6
commit
e4714f3f0e
46705 changed files with 12004901 additions and 0 deletions
329
Common/Utility/LiteHTMLReader.cpp
Normal file
329
Common/Utility/LiteHTMLReader.cpp
Normal file
|
|
@ -0,0 +1,329 @@
|
|||
#include "stdafx.h"
|
||||
#include "LiteHTMLReader.h"
|
||||
#include "LiteHTMLEntityResolver.h"
|
||||
|
||||
#ifdef _DEBUG
|
||||
#define new new(_NORMAL_BLOCK,__FILE__,__LINE__)
|
||||
#endif
|
||||
|
||||
#pragma warning(push, 4)
|
||||
UINT CLiteHTMLReader::parseDocument(void)
|
||||
{
|
||||
ASSERT(m_lpszBuffer != NULL);
|
||||
|
||||
bool bAbort = false; // continue parsing or abort?
|
||||
bool bIsClosingTag = false; // tag parsed is a closing tag?
|
||||
bool bIsOpeningTag = false; // tag parsed is an opening tag?
|
||||
CStringW strCharacters; // character data
|
||||
CStringW strComment; // comment data
|
||||
CStringW strT; // temporary storage
|
||||
DWORD dwCharDataStart = 0L; // starting position of character data
|
||||
DWORD dwCharDataLen = 0L; // length of character data
|
||||
LONG lTemp = 0L; // temporary storage
|
||||
wchar_t ch = 0; // character at current buffer position
|
||||
CLiteHTMLTag oTag; // tag information
|
||||
|
||||
if ( (!m_lpszBuffer) || (!m_dwBufLen) )
|
||||
return (0U);
|
||||
|
||||
// reset seek pointer to beginning
|
||||
ResetSeekPointer();
|
||||
|
||||
// notify event handler about parsing startup
|
||||
if (getEventNotify(notifyStartStop))
|
||||
{
|
||||
bAbort = false;
|
||||
m_pEventHandler->BeginParse(m_dwAppData, bAbort);
|
||||
if (bAbort) goto LEndParse;
|
||||
}
|
||||
|
||||
// skip leading white-space characters
|
||||
while (isWhiteSpace(ReadChar()))
|
||||
;
|
||||
|
||||
ch = UngetChar();
|
||||
while ((ch = ReadChar()) != NULL)
|
||||
{
|
||||
switch (ch)
|
||||
{
|
||||
|
||||
// tag starting delimeter?
|
||||
case L'<':
|
||||
{
|
||||
UngetChar();
|
||||
|
||||
strComment.Empty();
|
||||
if (!parseComment(strComment))
|
||||
{
|
||||
bIsOpeningTag = false;
|
||||
bIsClosingTag = false;
|
||||
if (!parseTag(oTag, bIsOpeningTag, bIsClosingTag))
|
||||
{
|
||||
++dwCharDataLen;
|
||||
|
||||
// manually advance buffer position
|
||||
// because the last call to UngetChar()
|
||||
// moved it back one character
|
||||
ch = ReadChar();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// clear pending notifications
|
||||
if ( (dwCharDataLen) || (strCharacters.GetLength()) )
|
||||
{
|
||||
strCharacters += CStringW(&m_lpszBuffer[dwCharDataStart], dwCharDataLen);
|
||||
NormalizeCharacters(strCharacters);
|
||||
//strCharacters.Replace(L"\r\n", L"");
|
||||
//strCharacters.Remove(L'\n');
|
||||
//strCharacters.Replace(L'\r', L' ');
|
||||
//strCharacters.Replace(L'\t', L' ');
|
||||
|
||||
if ( (strCharacters.GetLength()) &&
|
||||
(getEventNotify(notifyCharacters)) )
|
||||
{
|
||||
bAbort = false;
|
||||
m_pEventHandler->Characters(strCharacters, m_dwAppData, bAbort);
|
||||
if (bAbort) goto LEndParse;
|
||||
}
|
||||
|
||||
strCharacters.Empty();
|
||||
}
|
||||
|
||||
dwCharDataLen = 0L;
|
||||
dwCharDataStart = m_dwBufPos;
|
||||
|
||||
if (strComment.GetLength())
|
||||
{
|
||||
if (getEventNotify(notifyComment))
|
||||
{
|
||||
bAbort = false;
|
||||
m_pEventHandler->Comment(strComment, m_dwAppData, bAbort);
|
||||
if (bAbort) goto LEndParse;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( (bIsOpeningTag) && (getEventNotify(notifyTagStart)) )
|
||||
{
|
||||
bAbort = false;
|
||||
m_pEventHandler->StartTag(&oTag, m_dwAppData, bAbort);
|
||||
if (bAbort) goto LEndParse;
|
||||
}
|
||||
|
||||
if ( (bIsClosingTag) && (getEventNotify(notifyTagEnd)) )
|
||||
{
|
||||
bAbort = false;
|
||||
m_pEventHandler->EndTag(&oTag, m_dwAppData, bAbort);
|
||||
if (bAbort) goto LEndParse;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// entity reference beginning delimeter?
|
||||
case L'&':
|
||||
{
|
||||
UngetChar();
|
||||
|
||||
lTemp = 0;
|
||||
if (m_bResolveEntities)
|
||||
lTemp = CLiteHTMLEntityResolver::resolveEntity(&m_lpszBuffer[m_dwBufPos], ch);
|
||||
|
||||
if (lTemp)
|
||||
{
|
||||
strCharacters += CStringW(&m_lpszBuffer[dwCharDataStart], dwCharDataLen) + ch;
|
||||
m_dwBufPos += lTemp;
|
||||
dwCharDataStart = m_dwBufPos;
|
||||
dwCharDataLen = 0L;
|
||||
}
|
||||
else
|
||||
{
|
||||
ch = ReadChar();
|
||||
++dwCharDataLen;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// any other character
|
||||
default:
|
||||
{
|
||||
++dwCharDataLen;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// clear pending notifications
|
||||
if ( (dwCharDataLen) || (strCharacters.GetLength()) )
|
||||
{
|
||||
strCharacters += CStringW(&m_lpszBuffer[dwCharDataStart], dwCharDataLen) + ch;
|
||||
NormalizeCharacters(strCharacters);
|
||||
strCharacters.TrimRight(); // explicit trailing white-space removal
|
||||
|
||||
if ( (strCharacters.GetLength()) &&
|
||||
(getEventNotify(notifyCharacters)) )
|
||||
{
|
||||
bAbort = false;
|
||||
m_pEventHandler->Characters(strCharacters, m_dwAppData, bAbort);
|
||||
if (bAbort) goto LEndParse;
|
||||
}
|
||||
}
|
||||
|
||||
LEndParse:
|
||||
// notify event handler about parsing completion
|
||||
if (getEventNotify(notifyStartStop))
|
||||
m_pEventHandler->EndParse(m_dwAppData, bAbort);
|
||||
|
||||
m_lpszBuffer = NULL;
|
||||
m_dwBufLen = 0L;
|
||||
return (m_dwBufPos);
|
||||
}
|
||||
|
||||
UINT CLiteHTMLReader::ReadFile( const wchar_t *filename )
|
||||
{
|
||||
wchar_t wszBuf[1024+1]={0};
|
||||
FILE* fileHandle;
|
||||
errno_t err;
|
||||
size_t nRetSize(0);
|
||||
|
||||
if ((err = _wfopen_s(&fileHandle, filename, L"rt,ccs=UTF-8")) != 0)
|
||||
{
|
||||
//wprintf(L"CLiteHTMLReader::ReadFile, the file was not opened!\n";
|
||||
return(0U);
|
||||
}
|
||||
|
||||
std::wstring strString;
|
||||
|
||||
while( !feof( fileHandle ) )
|
||||
{
|
||||
nRetSize = fread( wszBuf, sizeof( wchar_t ), 1024, fileHandle );
|
||||
|
||||
if( ferror( fileHandle ) )
|
||||
{
|
||||
perror( "Read error" );
|
||||
break;
|
||||
}
|
||||
|
||||
wszBuf[nRetSize] = NULL;
|
||||
strString += wszBuf;
|
||||
}
|
||||
|
||||
fclose( fileHandle );
|
||||
|
||||
return Read( strString.c_str() );
|
||||
}
|
||||
|
||||
UINT CLiteHTMLReader::ReadFile( const char *filename )
|
||||
{
|
||||
wchar_t wszBuf[1024+1]={0};
|
||||
FILE* fileHandle;
|
||||
errno_t err;
|
||||
size_t nRetSize(0);
|
||||
|
||||
if ((err = fopen_s(&fileHandle, filename, "rt,ccs=UTF-8")) != 0)
|
||||
{
|
||||
//wprintf(L"CLiteHTMLReader::ReadFile, the file was not opened!\n";
|
||||
return(0U);
|
||||
}
|
||||
|
||||
std::wstring strString;
|
||||
|
||||
while( !feof( fileHandle ) )
|
||||
{
|
||||
nRetSize = fread( wszBuf, sizeof( wchar_t ), 1024, fileHandle );
|
||||
|
||||
if( ferror( fileHandle ) )
|
||||
{
|
||||
perror( "Read error" );
|
||||
break;
|
||||
}
|
||||
|
||||
wszBuf[nRetSize] = NULL;
|
||||
strString += wszBuf;
|
||||
}
|
||||
|
||||
fclose( fileHandle );
|
||||
|
||||
return Read( strString.c_str() );
|
||||
}
|
||||
|
||||
UINT CLiteHTMLReader::Read(LPCWSTR lpszString)
|
||||
{
|
||||
ASSERT(lpszString);
|
||||
//ASSERT(AfxIsValidString(lpszString));
|
||||
|
||||
m_dwBufLen = (UINT)::wcslen(lpszString);
|
||||
if (m_dwBufLen)
|
||||
{
|
||||
m_lpszBuffer = lpszString;
|
||||
return (parseDocument());
|
||||
}
|
||||
|
||||
return (0U);
|
||||
}
|
||||
|
||||
//UINT CLiteHTMLReader::ReadFile(HANDLE hFile)
|
||||
//{
|
||||
// ASSERT(hFile != INVALID_HANDLE_VALUE);
|
||||
// ASSERT(::GetFileType(hFile) == FILE_TYPE_DISK);
|
||||
//
|
||||
// HANDLE hFileMap;
|
||||
// LPCWSTR lpsz;
|
||||
// UINT nRetVal;
|
||||
//
|
||||
// // determine file size
|
||||
// m_dwBufLen = ::GetFileSize(hFile, NULL);
|
||||
// if (m_dwBufLen == INVALID_FILE_SIZE)
|
||||
// {
|
||||
// //TRACE1( "(Error) CLiteHTMLReader::Read:"
|
||||
// // " GetFileSize() failed;"
|
||||
// // " GetLastError() returns 0x%08x.\n", ::GetLastError());
|
||||
// goto LError;
|
||||
// }
|
||||
//
|
||||
// // calculate length, in wchar_ts, of the buffer
|
||||
// m_dwBufLen /= sizeof(wchar_t);
|
||||
// if (!m_dwBufLen)
|
||||
// return (0U);
|
||||
//
|
||||
// // create a file-mapping object for the file
|
||||
// hFileMap = ::CreateFileMapping(hFile, NULL, PAGE_READONLY, 0L, 0L, NULL);
|
||||
// if (hFileMap == NULL)
|
||||
// {
|
||||
// //TRACE1( "(Error) CLiteHTMLReader::Read:"
|
||||
// // " CreateFileMapping() failed;"
|
||||
// // " GetLastError() returns 0x%08x.\n", ::GetLastError());
|
||||
// goto LError;
|
||||
// }
|
||||
//
|
||||
// // map the entire file into the address-space of the application
|
||||
// lpsz = (LPCWSTR)::MapViewOfFile(hFileMap, FILE_MAP_READ, 0L, 0L, 0L);
|
||||
// if (lpsz == NULL)
|
||||
// {
|
||||
// //TRACE1( "(Error) CLiteHTMLReader::Read:"
|
||||
// // " MapViewOfFile() failed;"
|
||||
// // " GetLastError() returns 0x%08x.\n", ::GetLastError());
|
||||
// goto LError;
|
||||
// }
|
||||
//
|
||||
// m_lpszBuffer = lpsz;
|
||||
// nRetVal = parseDocument();
|
||||
// goto LCleanExit;
|
||||
//
|
||||
//LError:
|
||||
// nRetVal = 0U;
|
||||
// m_dwBufLen = 0L;
|
||||
//
|
||||
//LCleanExit:
|
||||
// if (lpsz != NULL)
|
||||
// VERIFY(::UnmapViewOfFile(lpsz));
|
||||
// if (hFileMap)
|
||||
// VERIFY(::CloseHandle(hFileMap));
|
||||
// return (nRetVal);
|
||||
//}
|
||||
|
||||
#pragma warning(pop)
|
||||
Loading…
Add table
Add a link
Reference in a new issue