对JSON字符进行转义的原理跟上一节讨论的那几个函数不太一样。
在Javascript中的字符有多种表达形式:
1,ASCII码的8进制转义,16进制转义,比如换行符可以用”\x0A” 和 “\12” 表示 。代码实现如下:
std::string CXCode::encodeJSONComponent(const std::string& sData) { std::string sUCValue = UCS2(sData); T_UC* bpos = (T_UC*)&sUCValue[0]; const T_UC* epos = bpos + (sUCValue.size()/sizeof(T_UC)); T_UC * tUC = new T_UC[sData.size() * 4]; T_UC * ptUC = tUC; while (bpos < epos) { if (*bpos == ‘\\‘ || *bpos== ‘\"‘ ) { *ptUC++ = ‘\\‘; *ptUC++ = *bpos; } else if (*bpos == ‘\n‘) { *ptUC++ = ‘\\‘; *ptUC++ = ‘n‘; } else if (*bpos == ‘\r‘) { *ptUC++ = ‘\\‘; *ptUC++ = ‘r‘; } else if (*bpos == ‘\b‘) { *ptUC++ = ‘\\‘; *ptUC++ = ‘b‘; } else if (*bpos == ‘\f‘) { *ptUC++ = ‘\\‘; *ptUC++ = ‘f‘; } else if (*bpos == ‘\t‘) { *ptUC++ = ‘\\‘; *ptUC++ = ‘t‘; } else { *ptUC++ = *bpos; } ++bpos; } bpos = tUC; epos = ptUC; const static bool s_esc[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; std::string sValue = __encodeBase(s_esc, bpos, epos, "\\u00", "\\u", ""); if (tUC != NULL) { delete[] tUC; } if (CXCode::GetCharSet()==CXCode::CHARSET_UCS2) { CXCode x2(CXCode::CHARSET_UTF8); return UCS2(sValue); } return sValue; }
我们这里为了兼容性,统一用实体编号来进行转义。
xml中有几个字符必须要转义,他们是 & < > " ‘ ,在HTML中还有个空格。他们的实体名字和16进制编码分别为
std::string encodeXMLComponent(const std::string& sData) { const static bool s_esc[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; std::string sUCValue = UCS2(sData); T_UC* bpos = (T_UC*)&sUCValue[0]; const T_UC* epos = bpos + (sUCValue.size()/sizeof(T_UC)); while (bpos < epos) { if (*bpos < 32 && *bpos!=13 && *bpos!=10) { *bpos = T_UC(‘?‘); } ++bpos; } bpos = (T_UC*)&sUCValue[0]; std::string sValue = __encodeBase(s_esc, bpos, epos, "&#x", "&#x", ";"); return sValue; }
原文:http://blog.csdn.net/yulongli/article/details/24270515