<span style="font-size:18px;">/*** * @author YangXin * @info 基于性别的IDRscorer * 对于在乎性别的用户,IDRscorer能够对物品或用户档案进行过滤。 * 首先,可以先通过检查已经评价过的档案的性别,来猜测该用户所偏好 * 的性别。然后,就可以过滤与之性别相反的档案。 */ package unitFive; import java.io.File; import java.io.IOException; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.impl.common.FastIDSet; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.model.PreferenceArray; import org.apache.mahout.cf.taste.recommender.IDRescorer; import org.apache.mahout.common.iterator.FileLineIterable; public class GenderRescorer implements IDRescorer { /*** * 缓存更对对男性评价的用户 */ private final FastIDSet men; private final FastIDSet women; private final FastIDSet usersRateMoreMen; private final FastIDSet usersRateLessMen; private final boolean filterMen; /** * 构造函数 */ public GenderRescorer(FastIDSet men, FastIDSet women, FastIDSet usersRateMoreMen, FastIDSet usersRateLessMen, long userID, DataModel model) throws TasteException{ // TODO Auto-generated constructor stub this.men = men; this.women = women; this.usersRateMoreMen = usersRateMoreMen; this.usersRateLessMen = usersRateLessMen; this.filterMen = ratesMoreMen(userID, model); } /** * 解析gender.dat并创建两个档案ID * */ public static FastIDSet[] parseMenWomen(File genderFile) throws IOException{ FastIDSet men = new FastIDSet(50000); FastIDSet women = new FastIDSet(50000); for(String line : new FileLineIterable(genderFile)){ int comma = line.indexOf(','); char gender = line.charAt(comma + 1); if(gender == 'U'){ continue; } long profileID = Long.parseLong(line.substring(0, comma)); if(gender == 'M'){ men.add(profileID); }else{ women.add(profileID); } } men.rehash(); //刷新 women.rehash(); //刷新 return new FastIDSet[]{men, women}; } public boolean ratesMoreMen(long userID, DataModel model) throws TasteException{ if(usersRateMoreMen.contains(userID)){ return true; } if(usersRateLessMen.contains(userID)){ return false; } PreferenceArray prefs = model.getPreferencesFromUser(userID); int menCount = 0; int womenCount = 0; for(int i = 0; i < prefs.length(); i++){ long profileID = prefs.get(i).getItemID(); if(men.contains(profileID)){ menCount++; }else if(women.contains(profileID)){ womenCount++; } } boolean ratesMoreMen = menCount > womenCount; //对男性评分的用户可能更喜欢男性 if(ratesMoreMen){ usersRateMoreMen.add(userID); }else{ usersRateLessMen.add(userID); } return ratesMoreMen; } @Override public double rescore(long profileID, double originalScore) { // TODO Auto-generated method stub return isFiltered(profileID) ? Double.NaN : originalScore; //将被排除的值赋值为NaN } @Override public boolean isFiltered(long profileID) { // TODO Auto-generated method stub return filterMen ? men.contains(profileID) : women.contains(profileID); } } </span>
原文:http://blog.csdn.net/u012965373/article/details/50692832