对JSON字符进行转义的原理跟上一节讨论的那几个函数不太一样。
在Javascript中的字符有多种表达形式:
1,ASCII码的8进制转义,16进制转义,比如换行符可以用”\x0A” 和 “\12” 表示 。代码实现如下:
std::string CXCode::encodeJSONComponent(const std::string& sData)
{
std::string sUCValue = UCS2(sData);
T_UC* bpos = (T_UC*)&sUCValue[0];
const T_UC* epos = bpos + (sUCValue.size()/sizeof(T_UC));
T_UC * tUC = new T_UC[sData.size() * 4];
T_UC * ptUC = tUC;
while (bpos < epos)
{
if (*bpos == ‘\\‘ || *bpos== ‘\"‘ )
{
*ptUC++ = ‘\\‘;
*ptUC++ = *bpos;
}
else if (*bpos == ‘\n‘)
{
*ptUC++ = ‘\\‘;
*ptUC++ = ‘n‘;
}
else if (*bpos == ‘\r‘)
{
*ptUC++ = ‘\\‘;
*ptUC++ = ‘r‘;
}
else if (*bpos == ‘\b‘)
{
*ptUC++ = ‘\\‘;
*ptUC++ = ‘b‘;
}
else if (*bpos == ‘\f‘)
{
*ptUC++ = ‘\\‘;
*ptUC++ = ‘f‘;
}
else if (*bpos == ‘\t‘)
{
*ptUC++ = ‘\\‘;
*ptUC++ = ‘t‘;
}
else
{
*ptUC++ = *bpos;
}
++bpos;
}
bpos = tUC;
epos = ptUC;
const static bool s_esc[256] =
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
std::string sValue = __encodeBase(s_esc, bpos, epos, "\\u00", "\\u", "");
if (tUC != NULL)
{
delete[] tUC;
}
if (CXCode::GetCharSet()==CXCode::CHARSET_UCS2)
{
CXCode x2(CXCode::CHARSET_UTF8);
return UCS2(sValue);
}
return sValue;
}我们这里为了兼容性,统一用实体编号来进行转义。
xml中有几个字符必须要转义,他们是 & < > " ‘ ,在HTML中还有个空格。他们的实体名字和16进制编码分别为
std::string encodeXMLComponent(const std::string& sData)
{
const static bool s_esc[256] =
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
std::string sUCValue = UCS2(sData);
T_UC* bpos = (T_UC*)&sUCValue[0];
const T_UC* epos = bpos + (sUCValue.size()/sizeof(T_UC));
while (bpos < epos)
{
if (*bpos < 32 && *bpos!=13 && *bpos!=10)
{
*bpos = T_UC(‘?‘);
}
++bpos;
}
bpos = (T_UC*)&sUCValue[0];
std::string sValue = __encodeBase(s_esc, bpos, epos, "&#x", "&#x", ";");
return sValue;
}原文:http://blog.csdn.net/yulongli/article/details/24270515