原来的算法模型中,没有考虑买了又买的物品,对于钢材行业,用户的需求是一致的,经常买了又买,所以增加此功能
a 模型训练中增加如下代码 //buy item for users JavaPairRDD<String, String> buyItemForUser = data.getBuyEvents().mapToPair(new PairFunction<UserItemEvent, Tuple2<String, String>, Integer>() { @Override public Tuple2<Tuple2<String, String>, Integer> call(UserItemEvent buyEvent) throws Exception { return new Tuple2<>(new Tuple2<>(buyEvent.getUser(), buyEvent.getItem()), 1); } }).mapToPair(new PairFunction<Tuple2<Tuple2<String, String>, Integer>, String, Integer>() { @Override public Tuple2<String, Integer> call(Tuple2<Tuple2<String, String>, Integer> element) throws Exception { return new Tuple2<>(element._1()._1()+":::"+element._1()._2(), element._2()); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer integer, Integer integer2) throws Exception { return integer + integer2; } }).mapToPair(new PairFunction<Tuple2<String, Integer>, String, String>() { @Override public Tuple2<String, String> call(Tuple2<String, Integer> element) throws Exception { String temp[]=element._1().split(":::"); if(temp.length==2){ return new Tuple2<>(temp[0], temp[1]); } return new Tuple2<>("", ""); } }); b 增加预测算法 private List<ItemScore> buyItemForUser(Model model, Query query){ logger.info("start to add buy item for the user"); final JavaRDD<ItemScore> matchedUser = model.getUserBuyItem().filter(new Function<Tuple2<String, String>, Boolean>() { @Override public Boolean call(Tuple2<String, String> userIndex) throws Exception { return userIndex._1().equals(query.getUserEntityId()); } }).map(new Function<Tuple2<String,String>,ItemScore>() { @Override public ItemScore call(Tuple2<String, String> arg0) throws Exception { return new ItemScore(arg0._2(),10); } }); return matchedUser.collect(); } c topItemsForUser 按照你的业务逻辑出来两者的排序规则
基于物的相似性,也是我们期望,加入以下代码
//根据事件查找物品的属性 private List<Set<String>> getRecentProductCategory(Query query, Model model) { try { List<Set<String>> result = new ArrayList<>(); List<Event> events = LJavaEventStore.findByEntity( ap.getAppName(), "user", query.getUserEntityId(), OptionHelper.<String>none(), OptionHelper.some(ap.getSimilarItemEvents()), OptionHelper.some(OptionHelper.some("item")), OptionHelper.<Option<String>>none(), OptionHelper.<DateTime>none(), OptionHelper.<DateTime>none(), OptionHelper.some(10), true, Duration.apply(10, TimeUnit.SECONDS)); for (final Event event : events) { if (event.targetEntityId().isDefined()) { JavaPairRDD<String, Integer> filtered = model.getItemIndex().filter(new Function<Tuple2<String, Integer>, Boolean>() { @Override public Boolean call(Tuple2<String, Integer> element) throws Exception { return element._1().equals(event.targetEntityId().get()); } }); final String itemIndex = filtered.first()._1(); Item item = model.getItems().get(itemIndex); if(item.getCategories()!=null && item.getCategories().size()>0){ result.add(item.getCategories()); } } } return result; } catch (Exception e) { logger.error("Error reading recent events for user " + query.getUserEntityId()); throw new RuntimeException(e.getMessage(), e); } } //相似性比较 private List<ItemScore> similarItemsByCategory(final List<Set<String>> category, Model model, Query query) { Map<String, Item> items =model.getItems(); if(items==null || items.size()==0){ return null; } if(category==null || category.size()==0){ return null; } JavaRDD<ItemScore> itemScores = model.getItemIndex().map(new Function<Tuple2<String, Integer>, ItemScore>() { @Override public ItemScore call(Tuple2<String, Integer> idItem) throws Exception { String itemid= idItem._1(); Item item = items.get(itemid); double similarity = 0.0; for(int i=0 ; i<category.size(); i++){ similarity+=getDistance(category.get(i),item.getCategories()); } logger.info(itemid+"->"+similarity); return (new ItemScore(itemid, similarity)); } }); itemScores = validScores(itemScores, query.getWhitelist(), query.getBlacklist(), query.getCategories(), model.getItems(), query.getUserEntityId()); return sortAndTake(itemScores, query.getNumber()); /* List<ItemScore> itemScores=new ArrayList<ItemScore>(); for (Map.Entry<String, Item> entry : items.entrySet()) { Item it = entry.getValue(); double similarity = 0.0; for(int i=0 ; i<category.size(); i++){ similarity+=getDistance(category.get(i),it.getCategories()); } itemScores.add(new ItemScore(it.getEntityId(), similarity)); } itemScores = validScoresForList(itemScores, query.getWhitelist(), query.getBlacklist(), query.getCategories(), model.getItems(), query.getUserEntityId()); return sortAndTake(itemScores, query.getNumber());*/ } //相似算法,比较简单 public static int getDistance(Set<String> t, Set<String> s) { if (t==null || t.size()==0 || s==null || s.size()==0 || t.size() != s.size()) { return 0; } HashSet<String> t_temp=new HashSet<String>(t);//必须转一下 HashSet<String> s_temp=new HashSet<String>(s); t_temp.retainAll(s_temp); return t_temp.size(); } 最后按照你的业务逻辑,加入相似的物品。
predictionIO E-Commerce Recommendation 买了又买-物相似
原文:http://12597095.blog.51cto.com/12587095/1983709