package com.panguoyuan.mahout.itemcf; import java.io.File; import java.io.IOException; import java.util.List; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood; import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.recommender.RecommendedItem; import org.apache.mahout.cf.taste.recommender.Recommender; import org.apache.mahout.cf.taste.similarity.UserSimilarity; public class UserCF { final static int NEIGHBORHOOD_NUM = 2; final static int RECOMMENDER_NUM = 3; public static void main(String[] args) throws IOException, TasteException { String file = "inputdata/item.csv"; DataModel model = new FileDataModel(new File(file)); UserSimilarity user = new EuclideanDistanceSimilarity(model); NearestNUserNeighborhood neighbor = new NearestNUserNeighborhood(NEIGHBORHOOD_NUM, user, model); Recommender r = new GenericUserBasedRecommender(model, neighbor, user); LongPrimitiveIterator iter = model.getUserIDs(); while (iter.hasNext()) { long uid = iter.nextLong(); List<RecommendedItem> list = r.recommend(uid, RECOMMENDER_NUM); System.out.printf("uid:%s", uid); for (RecommendedItem ritem : list) { System.out.printf("(%s,%f)", ritem.getItemID(), ritem.getValue()); } System.out.println(); } } }
package com.panguoyuan.mahout.itemcf; import java.io.File; import java.util.List; import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood; import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; import org.apache.mahout.cf.taste.recommender.RecommendedItem; import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; import org.apache.mahout.cf.taste.similarity.UserSimilarity; public class BasedUserBookRecommender2 { public static void main(String[] args) throws Exception { long userId = 188; //构建数据模型 DataModel model = new FileDataModel(new File("inputdata/rating.csv")); //创建相似度 UserSimilarity itemSimilarity = new PearsonCorrelationSimilarity(model); //UserSimilarity itemSimilarity = new EuclideanDistanceSimilarity(model); //GenericUserSimilarity genericItemSimilarity = new GenericUserSimilarity(itemSimilarity, model); //构建近邻算法 UserNeighborhood neighborhood = new NearestNUserNeighborhood(3, itemSimilarity, model); //构建推荐模型 UserBasedRecommender userBasedRecommender = new GenericUserBasedRecommender(model, neighborhood, itemSimilarity); //计算并返回图书推荐结果 List<RecommendedItem> recommendations = userBasedRecommender.recommend(188, 5); //打印推荐结果 showItems(userId, recommendations, true); } public static void showItems(long uid, List<RecommendedItem> recommendations, boolean skip) { if (skip || recommendations.size() > 0) { System.out.printf("userId:%s,", uid); for (RecommendedItem r : recommendations) { System.out.printf("(%s,%f)", r.getItemID(), r.getValue()); } System.out.println(); } } }
4、输出结果
userId:188,(885,9.500000)(396,7.000000)(688,6.000000)5、用R语言对推荐结果进行人工分析
ratings=read.csv("F:\workspace1\mahout\inputdata\rating.csv",FALSE) users=read.csv("F:\workspace1\mahout\inputdata\user.csv",FALSE)
(2)修改列名
ratings=data.frame('userid'=ratings$V1,'bookid'=ratings$V2,'grade'=ratings$V3) users=data.frame('userid'=users$V1,'sex'=users$V2,'age'=users$V3)(3)查看用户188都看了哪些书
> ratings[c(ratings$userid==188),] userid bookid grade 3760 188 798 6 3761 188 653 3 3762 188 426 6 3763 188 742 7 3764 188 549 2 3765 188 520 8 3766 188 312 2 3767 188 213 10 3768 188 954 5 3769 188 121 10 3770 188 204 9 3771 188 684 3 3772 188 493 4 3773 188 452 1 3774 188 622 3 3775 188 298 8
(4)图书885推荐分数最高,下面查看该图书有哪些人评过分
ratings[c(ratings$bookid==885),] userid bookid grade 182 9 885 8 1225 60 885 10 3691 184 885 9
(5)查看这用户9,用户60,用户184,用户188的信息
> users[c(9,60,184,188),] userid sex age 9 9 M 50 60 60 F 49 184 184 M 27 188 188 F 24
(6)查看这用户9,用户60,用户184与用户188都共同看了哪些图书
> rating188=ratings[which(ratings$userid==188),] > rating9=ratings[which(ratings$userid==9),] > rating60=ratings[which(ratings$userid==60),] > rating184=ratings[which(ratings$userid==184),] > intersect(rating188$bookid,rating9$bookid) integer(0) > intersect(rating188$bookid,rating60$bookid) [1] 312 298 > intersect(rating188$bookid,rating184$bookid) [1] 121 684从上面可以看出用户188与用户60共同看了312和298这两本书,与用户184共同看了121和684这两本书,他们都有共同的偏好,所以给用户188推荐图书885是合理的。
package com.panguoyuan.mahout.itemcf; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender; import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity; import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.recommender.IDRescorer; import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender; import org.apache.mahout.cf.taste.recommender.RecommendedItem; import org.apache.mahout.cf.taste.similarity.ItemSimilarity; public class BookFilterGenderRecommender3 { public static void main(String[] args) throws Exception { DataModel model = new FileDataModel(new File("inputdata/rating.csv")); ItemSimilarity otherSimilarity = new EuclideanDistanceSimilarity(model); GenericItemSimilarity similarity = new GenericItemSimilarity(otherSimilarity, model); ItemBasedRecommender recommender = new GenericItemBasedRecommender(model, similarity); filterRecommender(188, recommender, model); } public static void showItems(long uid,List<RecommendedItem> recommendations, boolean skip) { if (skip || recommendations.size() > 0) { System.out.printf("userId:%s,", uid); for (RecommendedItem r : recommendations) { System.out.printf("Item:(%s,%f)", r.getItemID(), r.getValue()); System.out.println(); } } } /** * 对用户性别进行过滤 */ public static void filterRecommender(long uid, ItemBasedRecommender recommender, DataModel dataModel) throws TasteException, IOException { Set<Long> userids = getMale("datafile/book/user.csv"); //计算男性用户打分过的图书 Set<Long> bookids = new HashSet<Long>(); for (long uids : userids) { LongPrimitiveIterator iter = dataModel.getItemIDsFromUser(uids).iterator(); while (iter.hasNext()) { long bookid = iter.next(); bookids.add(bookid); } } IDRescorer rescorer = new FilterRescorer(bookids); List<RecommendedItem> list = recommender.recommend(uid, 10, rescorer); showItems(uid, list, false); } /** * 返回所有男性id */ public static Set<Long> getMale(String file) throws IOException { BufferedReader br = new BufferedReader(new FileReader(new File(file))); Set<Long> userids = new HashSet<Long>(); String s = null; while ((s = br.readLine()) != null) { String[] cols = s.split(","); if (cols[1].equals("M")) { userids.add(Long.parseLong(cols[0])); } } br.close(); return userids; } } /** * 对结果重计算 */ class FilterRescorer implements IDRescorer { final private Set<Long> userids; public FilterRescorer(Set<Long> userids) { this.userids = userids; } @Override public double rescore(long id, double originalScore) { return isFiltered(id) ? Double.NaN : originalScore; } @Override public boolean isFiltered(long id) { return userids.contains(id); } }
3、打印推荐结果
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder". SLF4J: Defaulting to no-operation (NOP) logger implementation SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details. userId:188,Item:(365,8.800000) Item:(725,8.583333) Item:(427,8.000000) Item:(403,7.987013) Item:(734,7.676371) Item:(256,7.533333) Item:(300,7.428571) Item:(743,7.333333) Item:(356,6.875000) Item:(579,6.777778)4、人工对数据进行分析
(1)查看图书365都有哪些用户评过分
> ratings[c(ratings$bookid==365),] userid bookid grade 1046 51 365 9 2206 111 365 9 2632 134 365 4 > users[c(51,111,134),] userid sex age 51 51 F 18 111 111 F 40 134 134 F 74(2)利用intersect函数把用户188与25,45,65这三个用户共同评分过的图书汇集出来
说明:intersect(A,B)是一个数据框都在A和B这些行
>rating188=ratings[which(ratings$userid==188),] >rating51=ratings[which(ratings$userid==51),] >rating111=ratings[which(ratings$userid==111),] >rating134=ratings[which(ratings$userid==134),] > intersect(rating188$bookid,rating51$bookid) integer(0)> intersect(rating188$bookid,rating134$bookid) [1] 204 > intersect(rating188$bookid,rating111$bookid) [1] 742(3)从上面可以看出用户188与用户134共同看了204图书,与111共同看了742图书
> rating188 userid bookid grade 3760 188 798 6 3761 188 653 3 3762 188 426 6 3763 188 742 7 3764 188 549 2 3765 188 520 8 3766 188 312 2 3767 188 213 10 3768 188 954 5 3769 188 121 10 3770 188 204 9 3771 188 684 3 3772 188 493 4 3773 188 452 1 3774 188 622 3 3775 188 298 8综上所述把图书365推荐给用户188是合理的。
原文:http://blog.csdn.net/panguoyuan/article/details/43524507