最近项目的计算节点用的最多的是一个笛卡尔乘积算法,由于字典集合比较多,总高度有时会达到很高的一个量级,例如:1G。这样笛卡尔算法所 耗费的时间将很大,如何提交笛卡尔算法效率已经是当务之急。
笛卡尔乘积的算法很简单,复杂度就摆在这里O(high*n) :n指字典集合个数,high是所有集合的高度乘积。复杂度没法降低,所以我们从循环里面的运算下手,尽量降低循环内的计算,性能有所提升,下面是老版笛卡尔和新版笛卡尔的代码:
新版: 对比旧版,减少了high*(maxHigh-1)*2*n次除法,效率有所提高
int DICJob::descarte(DICComputationTask* dicTask,map<string,charset_mem*>& dicMap,uint8 segSIndex,uint8 segEIndex,uint8** resData,uint64* resHigh,int* reWidth){
#define MAX_WORD_LEN 64
cout<<"begin descarte!"<<endl;
uint64 i,j,k;// 循环变量
// maxDicHigh为所有字典库高度最大值, maxDicInx为最高字典库的下标 (均要除去star字典库)
// maxDicOffset为最高字典库在表达式口令中的偏移量, expressionLen为表达式口令长度
uint64 maxDicHigh = 0, maxDicInx = 0, maxDicOffset = 0, expressionLen = 0;
// highProduct为所有字典库高度乘积 (除去star字典库)
// outerIter为外层迭代次数, innerIter为内层迭代次数
uint64 highProduct = 1, outerIter, innerIter;
charset_mem* dicCharsetArray[MAX_WORD_LEN]={NULL};
for(int i=segSIndex; i<=segEIndex; i++){
map<string,charset_mem*>::iterator iterDic=dicMap.find(dicTask->cn_re.dicName[i]);
if(iterDic==dicMap.end()){
cout<<dicTask->cn_re.dicName[i]<<" do not exist"<<endl;
LOG(ERROR)<<dicTask->cn_re.dicName[i]<<" do not exist";
return -1;
}
int temHigh=dicTask->cn_re.segInfo[i].end-dicTask->cn_re.segInfo[i].begin+1;
if(temHigh > maxDicHigh) {
maxDicHigh = temHigh;
maxDicInx = i;
}
dicCharsetArray[i-segSIndex]=iterDic->second;
highProduct *= temHigh;
expressionLen += iterDic->second->width;
}
uint8 *result = new uint8[highProduct*expressionLen];
memset(result,0,highProduct*expressionLen);
// 每条口令后添加一个换行符,所以表达式宽度需要加1
//expressionLen += 1;
// 求最高字典库在表达式口令中的偏移量
for(i = 0; i < maxDicInx; i++) {
map<string,charset_mem*>::iterator iterDic=dicMap.find(dicTask->cn_re.dicName[i]);
if(iterDic==dicMap.end()){
cout<<dicTask->cn_re.dicName[i]<<" do not exist"<<endl;
LOG(ERROR)<<dicTask->cn_re.dicName[i]<<" do not exist";
return -1;
}
maxDicOffset += iterDic->second->width;
}
cout<<"descarte 11!"<<endl;
// 内层迭代次数为最高字典库高度
// 外层迭代次数为除了star位和最高字典库的高度乘积
innerIter = maxDicHigh;
outerIter = highProduct / maxDicHigh;
// 外层循环,程序热点(hotspot)
int thread_num = omp_get_max_threads();
#pragma omp parallel for num_threads(thread_num) shared(result) private(i, j, k)
for(i = 0; i < outerIter; i++) {
// star字典库字段设为0
// 构造外层模板
uint32 memOffset = 0;
uint8 outerPwdTemplate[expressionLen];
uint8 outerPwdTemplate_32[32 * expressionLen];
memset(outerPwdTemplate, 0, expressionLen * sizeof(uint8));
//outerPwdTemplate[expressionLen - 1] = '\n'; // 口令后面添加一个换行
memset(outerPwdTemplate_32, 0, 32 * expressionLen * sizeof(uint8));
uint32 remainder , quotient = i; // 除数和余数
for(int k=segSIndex;k<=segEIndex;++k){
// 构造外层模板时,排除star字典库和最高字典库(用作内层模板)
if( k != maxDicInx) {
remainder = quotient % dicCharsetArray[k-segSIndex]->high;
quotient = quotient / dicCharsetArray[k-segSIndex]->high;
memcpy(outerPwdTemplate + memOffset, dicCharsetArray[k-segSIndex]->data + remainder * dicCharsetArray[k-segSIndex]->width,dicCharsetArray[k-segSIndex]->width);
}
memOffset += dicCharsetArray[k-segSIndex]->width;
}
for(k = 0; k < 32; k++)
memcpy(outerPwdTemplate_32 + k * expressionLen, outerPwdTemplate, expressionLen);
// 内层循环
uint8 innerPwdTemplate_32[32 * expressionLen];
memset(innerPwdTemplate_32, 0, 32 * expressionLen * sizeof(uint8));
for(j = 0; j < innerIter; j+=32) {
uint32 kNum = (j + 32) < innerIter ? 32 : innerIter - j;
// 构造内层模板
for(k = 0; k < kNum; k++)
memcpy(innerPwdTemplate_32 + k * expressionLen + maxDicOffset, dicCharsetArray[maxDicInx]->data + (j + k) * dicCharsetArray[maxDicInx]->width, dicCharsetArray[maxDicInx]->width);
// 将内层模板和外层模板做或操作,得到结果数组
for(k = 0; k < kNum * expressionLen; k++)
result[(i * innerIter + j) * expressionLen + k] = innerPwdTemplate_32[k] | outerPwdTemplate_32[k];
}
}
cout<<"descarte 22!"<<endl;
*resData=result;
*resHigh=highProduct;
*reWidth=expressionLen;
return 0;
//return highProduct * expressionLen;
}老版:
int Old_descarte(DICComputationTask* dicTask,map<string,charset_mem*>& dicMap,uint8 segSIndex,uint8 segEIndex,uint8** resData,uint64* resHigh,int* reWidth){
#define MAX_WORD_LEN 64
uint64 s[MAX_DIC_NUM]={0}; //字典段进制位
uint64 high=1;
int width=0;
charset_mem* dicCharsetArray[MAX_WORD_LEN]={NULL};
for(int i=segEIndex; i>=segSIndex; --i){
s[i]=high;
map<string,charset_mem*>::iterator iterDic=dicMap.find(dicTask->cn_re.dicName[i]);
if(iterDic==dicMap.end()){
cout<<dicTask->cn_re.dicName[i]<<" do not exist"<<endl;
LOG(ERROR)<<dicTask->cn_re.dicName[i]<<" do not exist";
return -1;
}
dicCharsetArray[i-segSIndex]=iterDic->second;
high*=dicTask->cn_re.segInfo[i].end-dicTask->cn_re.segInfo[i].begin+1;
width+=iterDic->second->width;
}
uint8* data=new uint8[high*width];
memset(data,0,high*width);
//笛卡尔乘积
int thread_num=omp_get_max_threads();
#pragma omp parallel for num_threads(thread_num)
for(uint64 i=0;i<high;++i){
uint8 wordTmp[MAX_WORD_LEN]={0};
int offset=0;
int charpos=i;
for(int j=segSIndex;j<=segEIndex;++j){
int indexDic=charpos/s[j];
int offsetDic=(indexDic+dicTask->cn_re.segInfo[j].begin)*dicCharsetArray[j-segSIndex]->width;
memcpy(wordTmp+offset,dicCharsetArray[j-segSIndex]->data+offsetDic,dicCharsetArray[j-segSIndex]->width);
charpos=charpos%s[j];
offset+=dicCharsetArray[j-segSIndex]->width;
}
memcpy(data+i*width,wordTmp,width);
}
*resData=data;
*resHigh=high;
*reWidth=width;
return 0;
}
原文:http://blog.csdn.net/kingbaiyulong/article/details/51333975