167 lines
No EOL
3.4 KiB
C++
167 lines
No EOL
3.4 KiB
C++
#include "Stdafx.h"
|
|
#include "SlangFilter.h"
|
|
|
|
CSlangFilter::CSlangFilter()
|
|
{
|
|
memset(&m_Root, 0, sizeof(m_Root));
|
|
}
|
|
|
|
|
|
CSlangFilter::~CSlangFilter()
|
|
{
|
|
std::vector <TCHAR*>::iterator ii;
|
|
for (ii = m_MemList.begin(); ii != m_MemList.end(); ii++)
|
|
CLfhHeap::GetInstance()->_DeAlloc((*ii));
|
|
}
|
|
|
|
|
|
void CSlangFilter::InsertSlang(const TCHAR *words, const TCHAR * replacewords)
|
|
{
|
|
_WORD * tree = &m_Root, * child;
|
|
for (int i=0; words[i] != NULL; i++)
|
|
{
|
|
child = Find(tree, words[i]);
|
|
if (child == NULL)
|
|
child = InsertTree(tree, words[i]);
|
|
tree = child;
|
|
}
|
|
|
|
tree->tail = true;
|
|
|
|
if (replacewords != NULL)
|
|
{
|
|
tree->replaceword = static_cast<TCHAR*>(CLfhHeap::GetInstance()->_Alloc((int)_tcslen(replacewords)+1));
|
|
_tcscpy(tree->replaceword, replacewords);
|
|
|
|
m_MemList.push_back(tree->replaceword);
|
|
} else
|
|
tree->replaceword = NULL;
|
|
}
|
|
|
|
|
|
int CSlangFilter::ReplaceSlang(const TCHAR *src, TCHAR *out, int outlen, int type)
|
|
{
|
|
int i, match, j=0, k;
|
|
const TCHAR * rep ;
|
|
#ifdef _DEBUG
|
|
int len = (int) _tcslen(src);
|
|
#endif
|
|
for(i=0; src[i] && j+1 < outlen;)
|
|
{
|
|
match = MatchSlang(&src[i], &rep);
|
|
|
|
#ifdef _DEBUG
|
|
_ASSERT(i+match<=len);
|
|
#endif
|
|
|
|
if (match > 0)
|
|
{
|
|
if (rep != NULL)
|
|
{
|
|
int strl = (int) _tcslen(rep);
|
|
if (strl + j + 1 < outlen)
|
|
{
|
|
memcpy(&out[j], rep, strl*sizeof(TCHAR));
|
|
j += strl;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(k=0; k<match;)
|
|
{
|
|
if (outlen > j+1)
|
|
out[j++] = '*';
|
|
#ifdef UNICODE
|
|
k += 1;
|
|
#else
|
|
k += (src[i+k]&0x80) ? 2 : 1;
|
|
#endif
|
|
}
|
|
_ASSERT(match == k);
|
|
}
|
|
i+=match;
|
|
}
|
|
else
|
|
{
|
|
#ifdef UNICODE
|
|
//허용문자 체크는 다른걸 사용한다.
|
|
#else
|
|
if ((src[i] & 0x80) != NULL) //한글일 경우
|
|
{
|
|
if (src[i+1] == '\0')
|
|
break; // 만약을 대비해서....(이런 문자열이 넘어온다면 곤란)
|
|
|
|
if (((unsigned char)src[i+1] >= 0xA1 && (unsigned char)src[i+1] <= 0xFE) &&
|
|
(((unsigned char)src[i] >= 0xB0 && (unsigned char)src[i] <= 0xC8) || // 한글
|
|
(type&FILTER_IRREGULAR_HANAGUL) == 0 ||
|
|
// ((unsigned char)src[i] >= 0xCA && (unsigned char)src[i] <= 0xFD)) // 한자
|
|
// ((type&FILTER_INCOMPLETION_HANAGUL) == 0 && ((unsigned char)src[i] >= 0xA4 && (unsigned char)src[i] <= 0xAC)))) // 낯자
|
|
((type&FILTER_INCOMPLETION_HANAGUL) == 0 && (unsigned char)src[i] == 0xA4))) // 낯자
|
|
{
|
|
if (outlen > j+2)
|
|
{
|
|
out[j++] = src[i];
|
|
out[j++] = src[i+1];
|
|
}
|
|
} else
|
|
out[j++] = '*'; //정상한글이아님
|
|
i += 2;
|
|
}
|
|
else //한글이 아닐때!!!
|
|
#endif
|
|
{
|
|
if (outlen > j+1)
|
|
out[j++] = src[i++];
|
|
}
|
|
}
|
|
}
|
|
_ASSERT(j<outlen);
|
|
out[j] = 0;
|
|
return j;
|
|
}
|
|
|
|
CSlangFilter::_WORD * CSlangFilter::Find(_WORD * tree, TCHAR code) const
|
|
{
|
|
_WORD * t;
|
|
t = tree->child[code&15];
|
|
for(; t; t=t->next)
|
|
if (t->code == code)
|
|
break;
|
|
return t;
|
|
}
|
|
|
|
|
|
int CSlangFilter::MatchSlang(const TCHAR * szText, const TCHAR ** replace_str) const
|
|
{
|
|
const _WORD * t = &m_Root;
|
|
TCHAR code;
|
|
int i;
|
|
int ret = 0;
|
|
for(i=0; szText[i];)
|
|
{
|
|
code = szText[i];
|
|
for(t = t->child[code&15]; t && !_tcsicmp(&t->code, &code); t = t->next);
|
|
if (t == NULL)
|
|
return ret;
|
|
i++;
|
|
if (t->tail == true)
|
|
{
|
|
*replace_str = t->replaceword;
|
|
ret = i;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
CSlangFilter::_WORD * CSlangFilter::InsertTree(_WORD * tree, TCHAR code)
|
|
{
|
|
_WORD * ch = static_cast <_WORD*> (CLfhHeap::GetInstance()->_Alloc(sizeof(_WORD)));
|
|
memset(ch, 0, sizeof(_WORD));
|
|
ch->code = code;
|
|
ch->next = tree->child[code&15];
|
|
tree->child[code&15] = ch;
|
|
|
|
return ch;
|
|
} |